File manager - Edit - /home/nandedex/public_html/s.nandedexpress.com/lib.tar
Back
htmlpurifier/VERSION 0000644 00000000006 15121423110 0010310 0 ustar 00 4.13.1 htmlpurifier/library/HTMLPurifier/EntityParser.php 0000644 00000023374 15121423110 0016335 0 ustar 00 <?php // if want to implement error collecting here, we'll need to use some sort // of global data (probably trigger_error) because it's impossible to pass // $config or $context to the callback functions. /** * Handles referencing and derefencing character entities */ class HTMLPurifier_EntityParser { /** * Reference to entity lookup table. * @type HTMLPurifier_EntityLookup */ protected $_entity_lookup; /** * Callback regex string for entities in text. * @type string */ protected $_textEntitiesRegex; /** * Callback regex string for entities in attributes. * @type string */ protected $_attrEntitiesRegex; /** * Tests if the beginning of a string is a semi-optional regex */ protected $_semiOptionalPrefixRegex; public function __construct() { // From // http://stackoverflow.com/questions/15532252/why-is-reg-being-rendered-as-without-the-bounding-semicolon $semi_optional = "quot|QUOT|lt|LT|gt|GT|amp|AMP|AElig|Aacute|Acirc|Agrave|Aring|Atilde|Auml|COPY|Ccedil|ETH|Eacute|Ecirc|Egrave|Euml|Iacute|Icirc|Igrave|Iuml|Ntilde|Oacute|Ocirc|Ograve|Oslash|Otilde|Ouml|REG|THORN|Uacute|Ucirc|Ugrave|Uuml|Yacute|aacute|acirc|acute|aelig|agrave|aring|atilde|auml|brvbar|ccedil|cedil|cent|copy|curren|deg|divide|eacute|ecirc|egrave|eth|euml|frac12|frac14|frac34|iacute|icirc|iexcl|igrave|iquest|iuml|laquo|macr|micro|middot|nbsp|not|ntilde|oacute|ocirc|ograve|ordf|ordm|oslash|otilde|ouml|para|plusmn|pound|raquo|reg|sect|shy|sup1|sup2|sup3|szlig|thorn|times|uacute|ucirc|ugrave|uml|uuml|yacute|yen|yuml"; // NB: three empty captures to put the fourth match in the right // place $this->_semiOptionalPrefixRegex = "/&()()()($semi_optional)/"; $this->_textEntitiesRegex = '/&(?:'. // hex '[#]x([a-fA-F0-9]+);?|'. // dec '[#]0*(\d+);?|'. // string (mandatory semicolon) // NB: order matters: match semicolon preferentially '([A-Za-z_:][A-Za-z0-9.\-_:]*);|'. // string (optional semicolon) "($semi_optional)". ')/'; $this->_attrEntitiesRegex = '/&(?:'. // hex '[#]x([a-fA-F0-9]+);?|'. // dec '[#]0*(\d+);?|'. // string (mandatory semicolon) // NB: order matters: match semicolon preferentially '([A-Za-z_:][A-Za-z0-9.\-_:]*);|'. // string (optional semicolon) // don't match if trailing is equals or alphanumeric (URL // like) "($semi_optional)(?![=;A-Za-z0-9])". ')/'; } /** * Substitute entities with the parsed equivalents. Use this on * textual data in an HTML document (as opposed to attributes.) * * @param string $string String to have entities parsed. * @return string Parsed string. */ public function substituteTextEntities($string) { return preg_replace_callback( $this->_textEntitiesRegex, array($this, 'entityCallback'), $string ); } /** * Substitute entities with the parsed equivalents. Use this on * attribute contents in documents. * * @param string $string String to have entities parsed. * @return string Parsed string. */ public function substituteAttrEntities($string) { return preg_replace_callback( $this->_attrEntitiesRegex, array($this, 'entityCallback'), $string ); } /** * Callback function for substituteNonSpecialEntities() that does the work. * * @param array $matches PCRE matches array, with 0 the entire match, and * either index 1, 2 or 3 set with a hex value, dec value, * or string (respectively). * @return string Replacement string. */ protected function entityCallback($matches) { $entity = $matches[0]; $hex_part = @$matches[1]; $dec_part = @$matches[2]; $named_part = empty($matches[3]) ? (empty($matches[4]) ? "" : $matches[4]) : $matches[3]; if ($hex_part !== NULL && $hex_part !== "") { return HTMLPurifier_Encoder::unichr(hexdec($hex_part)); } elseif ($dec_part !== NULL && $dec_part !== "") { return HTMLPurifier_Encoder::unichr((int) $dec_part); } else { if (!$this->_entity_lookup) { $this->_entity_lookup = HTMLPurifier_EntityLookup::instance(); } if (isset($this->_entity_lookup->table[$named_part])) { return $this->_entity_lookup->table[$named_part]; } else { // exact match didn't match anything, so test if // any of the semicolon optional match the prefix. // Test that this is an EXACT match is important to // prevent infinite loop if (!empty($matches[3])) { return preg_replace_callback( $this->_semiOptionalPrefixRegex, array($this, 'entityCallback'), $entity ); } return $entity; } } } // LEGACY CODE BELOW /** * Callback regex string for parsing entities. * @type string */ protected $_substituteEntitiesRegex = '/&(?:[#]x([a-fA-F0-9]+)|[#]0*(\d+)|([A-Za-z_:][A-Za-z0-9.\-_:]*));?/'; // 1. hex 2. dec 3. string (XML style) /** * Decimal to parsed string conversion table for special entities. * @type array */ protected $_special_dec2str = array( 34 => '"', 38 => '&', 39 => "'", 60 => '<', 62 => '>' ); /** * Stripped entity names to decimal conversion table for special entities. * @type array */ protected $_special_ent2dec = array( 'quot' => 34, 'amp' => 38, 'lt' => 60, 'gt' => 62 ); /** * Substitutes non-special entities with their parsed equivalents. Since * running this whenever you have parsed character is t3h 5uck, we run * it before everything else. * * @param string $string String to have non-special entities parsed. * @return string Parsed string. */ public function substituteNonSpecialEntities($string) { // it will try to detect missing semicolons, but don't rely on it return preg_replace_callback( $this->_substituteEntitiesRegex, array($this, 'nonSpecialEntityCallback'), $string ); } /** * Callback function for substituteNonSpecialEntities() that does the work. * * @param array $matches PCRE matches array, with 0 the entire match, and * either index 1, 2 or 3 set with a hex value, dec value, * or string (respectively). * @return string Replacement string. */ protected function nonSpecialEntityCallback($matches) { // replaces all but big five $entity = $matches[0]; $is_num = (@$matches[0][1] === '#'); if ($is_num) { $is_hex = (@$entity[2] === 'x'); $code = $is_hex ? hexdec($matches[1]) : (int) $matches[2]; // abort for special characters if (isset($this->_special_dec2str[$code])) { return $entity; } return HTMLPurifier_Encoder::unichr($code); } else { if (isset($this->_special_ent2dec[$matches[3]])) { return $entity; } if (!$this->_entity_lookup) { $this->_entity_lookup = HTMLPurifier_EntityLookup::instance(); } if (isset($this->_entity_lookup->table[$matches[3]])) { return $this->_entity_lookup->table[$matches[3]]; } else { return $entity; } } } /** * Substitutes only special entities with their parsed equivalents. * * @notice We try to avoid calling this function because otherwise, it * would have to be called a lot (for every parsed section). * * @param string $string String to have non-special entities parsed. * @return string Parsed string. */ public function substituteSpecialEntities($string) { return preg_replace_callback( $this->_substituteEntitiesRegex, array($this, 'specialEntityCallback'), $string ); } /** * Callback function for substituteSpecialEntities() that does the work. * * This callback has same syntax as nonSpecialEntityCallback(). * * @param array $matches PCRE-style matches array, with 0 the entire match, and * either index 1, 2 or 3 set with a hex value, dec value, * or string (respectively). * @return string Replacement string. */ protected function specialEntityCallback($matches) { $entity = $matches[0]; $is_num = (@$matches[0][1] === '#'); if ($is_num) { $is_hex = (@$entity[2] === 'x'); $int = $is_hex ? hexdec($matches[1]) : (int) $matches[2]; return isset($this->_special_dec2str[$int]) ? $this->_special_dec2str[$int] : $entity; } else { return isset($this->_special_ent2dec[$matches[3]]) ? $this->_special_dec2str[$this->_special_ent2dec[$matches[3]]] : $entity; } } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/ErrorStruct.php 0000644 00000003545 15121423110 0016200 0 ustar 00 <?php /** * Records errors for particular segments of an HTML document such as tokens, * attributes or CSS properties. They can contain error structs (which apply * to components of what they represent), but their main purpose is to hold * errors applying to whatever struct is being used. */ class HTMLPurifier_ErrorStruct { /** * Possible values for $children first-key. Note that top-level structures * are automatically token-level. */ const TOKEN = 0; const ATTR = 1; const CSSPROP = 2; /** * Type of this struct. * @type string */ public $type; /** * Value of the struct we are recording errors for. There are various * values for this: * - TOKEN: Instance of HTMLPurifier_Token * - ATTR: array('attr-name', 'value') * - CSSPROP: array('prop-name', 'value') * @type mixed */ public $value; /** * Errors registered for this structure. * @type array */ public $errors = array(); /** * Child ErrorStructs that are from this structure. For example, a TOKEN * ErrorStruct would contain ATTR ErrorStructs. This is a multi-dimensional * array in structure: [TYPE]['identifier'] * @type array */ public $children = array(); /** * @param string $type * @param string $id * @return mixed */ public function getChild($type, $id) { if (!isset($this->children[$type][$id])) { $this->children[$type][$id] = new HTMLPurifier_ErrorStruct(); $this->children[$type][$id]->type = $type; } return $this->children[$type][$id]; } /** * @param int $severity * @param string $message */ public function addError($severity, $message) { $this->errors[] = array($severity, $message); } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/CSSDefinition.php 0000644 00000045175 15121423110 0016350 0 ustar 00 <?php /** * Defines allowed CSS attributes and what their values are. * @see HTMLPurifier_HTMLDefinition */ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition { public $type = 'CSS'; /** * Assoc array of attribute name to definition object. * @type HTMLPurifier_AttrDef[] */ public $info = array(); /** * Constructs the info array. The meat of this class. * @param HTMLPurifier_Config $config */ protected function doSetup($config) { $this->info['text-align'] = new HTMLPurifier_AttrDef_Enum( array('left', 'right', 'center', 'justify'), false ); $border_style = $this->info['border-bottom-style'] = $this->info['border-right-style'] = $this->info['border-left-style'] = $this->info['border-top-style'] = new HTMLPurifier_AttrDef_Enum( array( 'none', 'hidden', 'dotted', 'dashed', 'solid', 'double', 'groove', 'ridge', 'inset', 'outset' ), false ); $this->info['border-style'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_style); $this->info['clear'] = new HTMLPurifier_AttrDef_Enum( array('none', 'left', 'right', 'both'), false ); $this->info['float'] = new HTMLPurifier_AttrDef_Enum( array('none', 'left', 'right'), false ); $this->info['font-style'] = new HTMLPurifier_AttrDef_Enum( array('normal', 'italic', 'oblique'), false ); $this->info['font-variant'] = new HTMLPurifier_AttrDef_Enum( array('normal', 'small-caps'), false ); $uri_or_none = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_Enum(array('none')), new HTMLPurifier_AttrDef_CSS_URI() ) ); $this->info['list-style-position'] = new HTMLPurifier_AttrDef_Enum( array('inside', 'outside'), false ); $this->info['list-style-type'] = new HTMLPurifier_AttrDef_Enum( array( 'disc', 'circle', 'square', 'decimal', 'lower-roman', 'upper-roman', 'lower-alpha', 'upper-alpha', 'none' ), false ); $this->info['list-style-image'] = $uri_or_none; $this->info['list-style'] = new HTMLPurifier_AttrDef_CSS_ListStyle($config); $this->info['text-transform'] = new HTMLPurifier_AttrDef_Enum( array('capitalize', 'uppercase', 'lowercase', 'none'), false ); $this->info['color'] = new HTMLPurifier_AttrDef_CSS_Color(); $this->info['background-image'] = $uri_or_none; $this->info['background-repeat'] = new HTMLPurifier_AttrDef_Enum( array('repeat', 'repeat-x', 'repeat-y', 'no-repeat') ); $this->info['background-attachment'] = new HTMLPurifier_AttrDef_Enum( array('scroll', 'fixed') ); $this->info['background-position'] = new HTMLPurifier_AttrDef_CSS_BackgroundPosition(); $border_color = $this->info['border-top-color'] = $this->info['border-bottom-color'] = $this->info['border-left-color'] = $this->info['border-right-color'] = $this->info['background-color'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_Enum(array('transparent')), new HTMLPurifier_AttrDef_CSS_Color() ) ); $this->info['background'] = new HTMLPurifier_AttrDef_CSS_Background($config); $this->info['border-color'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_color); $border_width = $this->info['border-top-width'] = $this->info['border-bottom-width'] = $this->info['border-left-width'] = $this->info['border-right-width'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_Enum(array('thin', 'medium', 'thick')), new HTMLPurifier_AttrDef_CSS_Length('0') //disallow negative ) ); $this->info['border-width'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_width); $this->info['letter-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_Enum(array('normal')), new HTMLPurifier_AttrDef_CSS_Length() ) ); $this->info['word-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_Enum(array('normal')), new HTMLPurifier_AttrDef_CSS_Length() ) ); $this->info['font-size'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_Enum( array( 'xx-small', 'x-small', 'small', 'medium', 'large', 'x-large', 'xx-large', 'larger', 'smaller' ) ), new HTMLPurifier_AttrDef_CSS_Percentage(), new HTMLPurifier_AttrDef_CSS_Length() ) ); $this->info['line-height'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_Enum(array('normal')), new HTMLPurifier_AttrDef_CSS_Number(true), // no negatives new HTMLPurifier_AttrDef_CSS_Length('0'), new HTMLPurifier_AttrDef_CSS_Percentage(true) ) ); $margin = $this->info['margin-top'] = $this->info['margin-bottom'] = $this->info['margin-left'] = $this->info['margin-right'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_CSS_Length(), new HTMLPurifier_AttrDef_CSS_Percentage(), new HTMLPurifier_AttrDef_Enum(array('auto')) ) ); $this->info['margin'] = new HTMLPurifier_AttrDef_CSS_Multiple($margin); // non-negative $padding = $this->info['padding-top'] = $this->info['padding-bottom'] = $this->info['padding-left'] = $this->info['padding-right'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_CSS_Length('0'), new HTMLPurifier_AttrDef_CSS_Percentage(true) ) ); $this->info['padding'] = new HTMLPurifier_AttrDef_CSS_Multiple($padding); $this->info['text-indent'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_CSS_Length(), new HTMLPurifier_AttrDef_CSS_Percentage() ) ); $trusted_wh = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_CSS_Length('0'), new HTMLPurifier_AttrDef_CSS_Percentage(true), new HTMLPurifier_AttrDef_Enum(array('auto', 'initial', 'inherit')) ) ); $trusted_min_wh = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_CSS_Length('0'), new HTMLPurifier_AttrDef_CSS_Percentage(true), new HTMLPurifier_AttrDef_Enum(array('initial', 'inherit')) ) ); $trusted_max_wh = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_CSS_Length('0'), new HTMLPurifier_AttrDef_CSS_Percentage(true), new HTMLPurifier_AttrDef_Enum(array('none', 'initial', 'inherit')) ) ); $max = $config->get('CSS.MaxImgLength'); $this->info['width'] = $this->info['height'] = $max === null ? $trusted_wh : new HTMLPurifier_AttrDef_Switch( 'img', // For img tags: new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_CSS_Length('0', $max), new HTMLPurifier_AttrDef_Enum(array('auto')) ) ), // For everyone else: $trusted_wh ); $this->info['min-width'] = $this->info['min-height'] = $max === null ? $trusted_min_wh : new HTMLPurifier_AttrDef_Switch( 'img', // For img tags: new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_CSS_Length('0', $max), new HTMLPurifier_AttrDef_Enum(array('initial', 'inherit')) ) ), // For everyone else: $trusted_min_wh ); $this->info['max-width'] = $this->info['max-height'] = $max === null ? $trusted_max_wh : new HTMLPurifier_AttrDef_Switch( 'img', // For img tags: new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_CSS_Length('0', $max), new HTMLPurifier_AttrDef_Enum(array('none', 'initial', 'inherit')) ) ), // For everyone else: $trusted_max_wh ); $this->info['text-decoration'] = new HTMLPurifier_AttrDef_CSS_TextDecoration(); $this->info['font-family'] = new HTMLPurifier_AttrDef_CSS_FontFamily(); // this could use specialized code $this->info['font-weight'] = new HTMLPurifier_AttrDef_Enum( array( 'normal', 'bold', 'bolder', 'lighter', '100', '200', '300', '400', '500', '600', '700', '800', '900' ), false ); // MUST be called after other font properties, as it references // a CSSDefinition object $this->info['font'] = new HTMLPurifier_AttrDef_CSS_Font($config); // same here $this->info['border'] = $this->info['border-bottom'] = $this->info['border-top'] = $this->info['border-left'] = $this->info['border-right'] = new HTMLPurifier_AttrDef_CSS_Border($config); $this->info['border-collapse'] = new HTMLPurifier_AttrDef_Enum( array('collapse', 'separate') ); $this->info['caption-side'] = new HTMLPurifier_AttrDef_Enum( array('top', 'bottom') ); $this->info['table-layout'] = new HTMLPurifier_AttrDef_Enum( array('auto', 'fixed') ); $this->info['vertical-align'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_Enum( array( 'baseline', 'sub', 'super', 'top', 'text-top', 'middle', 'bottom', 'text-bottom' ) ), new HTMLPurifier_AttrDef_CSS_Length(), new HTMLPurifier_AttrDef_CSS_Percentage() ) ); $this->info['border-spacing'] = new HTMLPurifier_AttrDef_CSS_Multiple(new HTMLPurifier_AttrDef_CSS_Length(), 2); // These CSS properties don't work on many browsers, but we live // in THE FUTURE! $this->info['white-space'] = new HTMLPurifier_AttrDef_Enum( array('nowrap', 'normal', 'pre', 'pre-wrap', 'pre-line') ); if ($config->get('CSS.Proprietary')) { $this->doSetupProprietary($config); } if ($config->get('CSS.AllowTricky')) { $this->doSetupTricky($config); } if ($config->get('CSS.Trusted')) { $this->doSetupTrusted($config); } $allow_important = $config->get('CSS.AllowImportant'); // wrap all attr-defs with decorator that handles !important foreach ($this->info as $k => $v) { $this->info[$k] = new HTMLPurifier_AttrDef_CSS_ImportantDecorator($v, $allow_important); } $this->setupConfigStuff($config); } /** * @param HTMLPurifier_Config $config */ protected function doSetupProprietary($config) { // Internet Explorer only scrollbar colors $this->info['scrollbar-arrow-color'] = new HTMLPurifier_AttrDef_CSS_Color(); $this->info['scrollbar-base-color'] = new HTMLPurifier_AttrDef_CSS_Color(); $this->info['scrollbar-darkshadow-color'] = new HTMLPurifier_AttrDef_CSS_Color(); $this->info['scrollbar-face-color'] = new HTMLPurifier_AttrDef_CSS_Color(); $this->info['scrollbar-highlight-color'] = new HTMLPurifier_AttrDef_CSS_Color(); $this->info['scrollbar-shadow-color'] = new HTMLPurifier_AttrDef_CSS_Color(); // vendor specific prefixes of opacity $this->info['-moz-opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue(); $this->info['-khtml-opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue(); // only opacity, for now $this->info['filter'] = new HTMLPurifier_AttrDef_CSS_Filter(); // more CSS3 $this->info['page-break-after'] = $this->info['page-break-before'] = new HTMLPurifier_AttrDef_Enum( array( 'auto', 'always', 'avoid', 'left', 'right' ) ); $this->info['page-break-inside'] = new HTMLPurifier_AttrDef_Enum(array('auto', 'avoid')); $border_radius = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_CSS_Percentage(true), // disallow negative new HTMLPurifier_AttrDef_CSS_Length('0') // disallow negative )); $this->info['border-top-left-radius'] = $this->info['border-top-right-radius'] = $this->info['border-bottom-right-radius'] = $this->info['border-bottom-left-radius'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_radius, 2); // TODO: support SLASH syntax $this->info['border-radius'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_radius, 4); } /** * @param HTMLPurifier_Config $config */ protected function doSetupTricky($config) { $this->info['display'] = new HTMLPurifier_AttrDef_Enum( array( 'inline', 'block', 'list-item', 'run-in', 'compact', 'marker', 'table', 'inline-block', 'inline-table', 'table-row-group', 'table-header-group', 'table-footer-group', 'table-row', 'table-column-group', 'table-column', 'table-cell', 'table-caption', 'none' ) ); $this->info['visibility'] = new HTMLPurifier_AttrDef_Enum( array('visible', 'hidden', 'collapse') ); $this->info['overflow'] = new HTMLPurifier_AttrDef_Enum(array('visible', 'hidden', 'auto', 'scroll')); $this->info['opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue(); } /** * @param HTMLPurifier_Config $config */ protected function doSetupTrusted($config) { $this->info['position'] = new HTMLPurifier_AttrDef_Enum( array('static', 'relative', 'absolute', 'fixed') ); $this->info['top'] = $this->info['left'] = $this->info['right'] = $this->info['bottom'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_CSS_Length(), new HTMLPurifier_AttrDef_CSS_Percentage(), new HTMLPurifier_AttrDef_Enum(array('auto')), ) ); $this->info['z-index'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_Integer(), new HTMLPurifier_AttrDef_Enum(array('auto')), ) ); } /** * Performs extra config-based processing. Based off of * HTMLPurifier_HTMLDefinition. * @param HTMLPurifier_Config $config * @todo Refactor duplicate elements into common class (probably using * composition, not inheritance). */ protected function setupConfigStuff($config) { // setup allowed elements $support = "(for information on implementing this, see the " . "support forums) "; $allowed_properties = $config->get('CSS.AllowedProperties'); if ($allowed_properties !== null) { foreach ($this->info as $name => $d) { if (!isset($allowed_properties[$name])) { unset($this->info[$name]); } unset($allowed_properties[$name]); } // emit errors foreach ($allowed_properties as $name => $d) { // :TODO: Is this htmlspecialchars() call really necessary? $name = htmlspecialchars($name); trigger_error("Style attribute '$name' is not supported $support", E_USER_WARNING); } } $forbidden_properties = $config->get('CSS.ForbiddenProperties'); if ($forbidden_properties !== null) { foreach ($this->info as $name => $d) { if (isset($forbidden_properties[$name])) { unset($this->info[$name]); } } } } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/ChildDef.php 0000644 00000003027 15121423110 0015337 0 ustar 00 <?php /** * Defines allowed child nodes and validates nodes against it. */ abstract class HTMLPurifier_ChildDef { /** * Type of child definition, usually right-most part of class name lowercase. * Used occasionally in terms of context. * @type string */ public $type; /** * Indicates whether or not an empty array of children is okay. * * This is necessary for redundant checking when changes affecting * a child node may cause a parent node to now be disallowed. * @type bool */ public $allow_empty; /** * Lookup array of all elements that this definition could possibly allow. * @type array */ public $elements = array(); /** * Get lookup of tag names that should not close this element automatically. * All other elements will do so. * @param HTMLPurifier_Config $config HTMLPurifier_Config object * @return array */ public function getAllowedElements($config) { return $this->elements; } /** * Validates nodes according to definition and returns modification. * * @param HTMLPurifier_Node[] $children Array of HTMLPurifier_Node * @param HTMLPurifier_Config $config HTMLPurifier_Config object * @param HTMLPurifier_Context $context HTMLPurifier_Context object * @return bool|array true to leave nodes as is, false to remove parent node, array of replacement children */ abstract public function validateChildren($children, $config, $context); } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/HTMLModuleManager.php 0000644 00000037112 15121423110 0017104 0 ustar 00 <?php class HTMLPurifier_HTMLModuleManager { /** * @type HTMLPurifier_DoctypeRegistry */ public $doctypes; /** * Instance of current doctype. * @type string */ public $doctype; /** * @type HTMLPurifier_AttrTypes */ public $attrTypes; /** * Active instances of modules for the specified doctype are * indexed, by name, in this array. * @type HTMLPurifier_HTMLModule[] */ public $modules = array(); /** * Array of recognized HTMLPurifier_HTMLModule instances, * indexed by module's class name. This array is usually lazy loaded, but a * user can overload a module by pre-emptively registering it. * @type HTMLPurifier_HTMLModule[] */ public $registeredModules = array(); /** * List of extra modules that were added by the user * using addModule(). These get unconditionally merged into the current doctype, whatever * it may be. * @type HTMLPurifier_HTMLModule[] */ public $userModules = array(); /** * Associative array of element name to list of modules that have * definitions for the element; this array is dynamically filled. * @type array */ public $elementLookup = array(); /** * List of prefixes we should use for registering small names. * @type array */ public $prefixes = array('HTMLPurifier_HTMLModule_'); /** * @type HTMLPurifier_ContentSets */ public $contentSets; /** * @type HTMLPurifier_AttrCollections */ public $attrCollections; /** * If set to true, unsafe elements and attributes will be allowed. * @type bool */ public $trusted = false; public function __construct() { // editable internal objects $this->attrTypes = new HTMLPurifier_AttrTypes(); $this->doctypes = new HTMLPurifier_DoctypeRegistry(); // setup basic modules $common = array( 'CommonAttributes', 'Text', 'Hypertext', 'List', 'Presentation', 'Edit', 'Bdo', 'Tables', 'Image', 'StyleAttribute', // Unsafe: 'Scripting', 'Object', 'Forms', // Sorta legacy, but present in strict: 'Name', ); $transitional = array('Legacy', 'Target', 'Iframe'); $xml = array('XMLCommonAttributes'); $non_xml = array('NonXMLCommonAttributes'); // setup basic doctypes $this->doctypes->register( 'HTML 4.01 Transitional', false, array_merge($common, $transitional, $non_xml), array('Tidy_Transitional', 'Tidy_Proprietary'), array(), '-//W3C//DTD HTML 4.01 Transitional//EN', 'http://www.w3.org/TR/html4/loose.dtd' ); $this->doctypes->register( 'HTML 4.01 Strict', false, array_merge($common, $non_xml), array('Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'), array(), '-//W3C//DTD HTML 4.01//EN', 'http://www.w3.org/TR/html4/strict.dtd' ); $this->doctypes->register( 'XHTML 1.0 Transitional', true, array_merge($common, $transitional, $xml, $non_xml), array('Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Name'), array(), '-//W3C//DTD XHTML 1.0 Transitional//EN', 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd' ); $this->doctypes->register( 'XHTML 1.0 Strict', true, array_merge($common, $xml, $non_xml), array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'), array(), '-//W3C//DTD XHTML 1.0 Strict//EN', 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd' ); $this->doctypes->register( 'XHTML 1.1', true, // Iframe is a real XHTML 1.1 module, despite being // "transitional"! array_merge($common, $xml, array('Ruby', 'Iframe')), array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1 array(), '-//W3C//DTD XHTML 1.1//EN', 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' ); } /** * Registers a module to the recognized module list, useful for * overloading pre-existing modules. * @param $module Mixed: string module name, with or without * HTMLPurifier_HTMLModule prefix, or instance of * subclass of HTMLPurifier_HTMLModule. * @param $overload Boolean whether or not to overload previous modules. * If this is not set, and you do overload a module, * HTML Purifier will complain with a warning. * @note This function will not call autoload, you must instantiate * (and thus invoke) autoload outside the method. * @note If a string is passed as a module name, different variants * will be tested in this order: * - Check for HTMLPurifier_HTMLModule_$name * - Check all prefixes with $name in order they were added * - Check for literal object name * - Throw fatal error * If your object name collides with an internal class, specify * your module manually. All modules must have been included * externally: registerModule will not perform inclusions for you! */ public function registerModule($module, $overload = false) { if (is_string($module)) { // attempt to load the module $original_module = $module; $ok = false; foreach ($this->prefixes as $prefix) { $module = $prefix . $original_module; if (class_exists($module)) { $ok = true; break; } } if (!$ok) { $module = $original_module; if (!class_exists($module)) { trigger_error( $original_module . ' module does not exist', E_USER_ERROR ); return; } } $module = new $module(); } if (empty($module->name)) { trigger_error('Module instance of ' . get_class($module) . ' must have name'); return; } if (!$overload && isset($this->registeredModules[$module->name])) { trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING); } $this->registeredModules[$module->name] = $module; } /** * Adds a module to the current doctype by first registering it, * and then tacking it on to the active doctype */ public function addModule($module) { $this->registerModule($module); if (is_object($module)) { $module = $module->name; } $this->userModules[] = $module; } /** * Adds a class prefix that registerModule() will use to resolve a * string name to a concrete class */ public function addPrefix($prefix) { $this->prefixes[] = $prefix; } /** * Performs processing on modules, after being called you may * use getElement() and getElements() * @param HTMLPurifier_Config $config */ public function setup($config) { $this->trusted = $config->get('HTML.Trusted'); // generate $this->doctype = $this->doctypes->make($config); $modules = $this->doctype->modules; // take out the default modules that aren't allowed $lookup = $config->get('HTML.AllowedModules'); $special_cases = $config->get('HTML.CoreModules'); if (is_array($lookup)) { foreach ($modules as $k => $m) { if (isset($special_cases[$m])) { continue; } if (!isset($lookup[$m])) { unset($modules[$k]); } } } // custom modules if ($config->get('HTML.Proprietary')) { $modules[] = 'Proprietary'; } if ($config->get('HTML.SafeObject')) { $modules[] = 'SafeObject'; } if ($config->get('HTML.SafeEmbed')) { $modules[] = 'SafeEmbed'; } if ($config->get('HTML.SafeScripting') !== array()) { $modules[] = 'SafeScripting'; } if ($config->get('HTML.Nofollow')) { $modules[] = 'Nofollow'; } if ($config->get('HTML.TargetBlank')) { $modules[] = 'TargetBlank'; } // NB: HTML.TargetNoreferrer and HTML.TargetNoopener must be AFTER HTML.TargetBlank // so that its post-attr-transform gets run afterwards. if ($config->get('HTML.TargetNoreferrer')) { $modules[] = 'TargetNoreferrer'; } if ($config->get('HTML.TargetNoopener')) { $modules[] = 'TargetNoopener'; } // merge in custom modules $modules = array_merge($modules, $this->userModules); foreach ($modules as $module) { $this->processModule($module); $this->modules[$module]->setup($config); } foreach ($this->doctype->tidyModules as $module) { $this->processModule($module); $this->modules[$module]->setup($config); } // prepare any injectors foreach ($this->modules as $module) { $n = array(); foreach ($module->info_injector as $injector) { if (!is_object($injector)) { $class = "HTMLPurifier_Injector_$injector"; $injector = new $class; } $n[$injector->name] = $injector; } $module->info_injector = $n; } // setup lookup table based on all valid modules foreach ($this->modules as $module) { foreach ($module->info as $name => $def) { if (!isset($this->elementLookup[$name])) { $this->elementLookup[$name] = array(); } $this->elementLookup[$name][] = $module->name; } } // note the different choice $this->contentSets = new HTMLPurifier_ContentSets( // content set assembly deals with all possible modules, // not just ones deemed to be "safe" $this->modules ); $this->attrCollections = new HTMLPurifier_AttrCollections( $this->attrTypes, // there is no way to directly disable a global attribute, // but using AllowedAttributes or simply not including // the module in your custom doctype should be sufficient $this->modules ); } /** * Takes a module and adds it to the active module collection, * registering it if necessary. */ public function processModule($module) { if (!isset($this->registeredModules[$module]) || is_object($module)) { $this->registerModule($module); } $this->modules[$module] = $this->registeredModules[$module]; } /** * Retrieves merged element definitions. * @return Array of HTMLPurifier_ElementDef */ public function getElements() { $elements = array(); foreach ($this->modules as $module) { if (!$this->trusted && !$module->safe) { continue; } foreach ($module->info as $name => $v) { if (isset($elements[$name])) { continue; } $elements[$name] = $this->getElement($name); } } // remove dud elements, this happens when an element that // appeared to be safe actually wasn't foreach ($elements as $n => $v) { if ($v === false) { unset($elements[$n]); } } return $elements; } /** * Retrieves a single merged element definition * @param string $name Name of element * @param bool $trusted Boolean trusted overriding parameter: set to true * if you want the full version of an element * @return HTMLPurifier_ElementDef Merged HTMLPurifier_ElementDef * @note You may notice that modules are getting iterated over twice (once * in getElements() and once here). This * is because */ public function getElement($name, $trusted = null) { if (!isset($this->elementLookup[$name])) { return false; } // setup global state variables $def = false; if ($trusted === null) { $trusted = $this->trusted; } // iterate through each module that has registered itself to this // element foreach ($this->elementLookup[$name] as $module_name) { $module = $this->modules[$module_name]; // refuse to create/merge from a module that is deemed unsafe-- // pretend the module doesn't exist--when trusted mode is not on. if (!$trusted && !$module->safe) { continue; } // clone is used because, ideally speaking, the original // definition should not be modified. Usually, this will // make no difference, but for consistency's sake $new_def = clone $module->info[$name]; if (!$def && $new_def->standalone) { $def = $new_def; } elseif ($def) { // This will occur even if $new_def is standalone. In practice, // this will usually result in a full replacement. $def->mergeIn($new_def); } else { // :TODO: // non-standalone definitions that don't have a standalone // to merge into could be deferred to the end // HOWEVER, it is perfectly valid for a non-standalone // definition to lack a standalone definition, even // after all processing: this allows us to safely // specify extra attributes for elements that may not be // enabled all in one place. In particular, this might // be the case for trusted elements. WARNING: care must // be taken that the /extra/ definitions are all safe. continue; } // attribute value expansions $this->attrCollections->performInclusions($def->attr); $this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes); // descendants_are_inline, for ChildDef_Chameleon if (is_string($def->content_model) && strpos($def->content_model, 'Inline') !== false) { if ($name != 'del' && $name != 'ins') { // this is for you, ins/del $def->descendants_are_inline = true; } } $this->contentSets->generateChildDef($def, $module); } // This can occur if there is a blank definition, but no base to // mix it in with if (!$def) { return false; } // add information on required attributes foreach ($def->attr as $attr_name => $attr_def) { if ($attr_def->required) { $def->required_attr[] = $attr_name; } } return $def; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/UnitConverter.php 0000644 00000023622 15121423110 0016507 0 ustar 00 <?php /** * Class for converting between different unit-lengths as specified by * CSS. */ class HTMLPurifier_UnitConverter { const ENGLISH = 1; const METRIC = 2; const DIGITAL = 3; /** * Units information array. Units are grouped into measuring systems * (English, Metric), and are assigned an integer representing * the conversion factor between that unit and the smallest unit in * the system. Numeric indexes are actually magical constants that * encode conversion data from one system to the next, with a O(n^2) * constraint on memory (this is generally not a problem, since * the number of measuring systems is small.) */ protected static $units = array( self::ENGLISH => array( 'px' => 3, // This is as per CSS 2.1 and Firefox. Your mileage may vary 'pt' => 4, 'pc' => 48, 'in' => 288, self::METRIC => array('pt', '0.352777778', 'mm'), ), self::METRIC => array( 'mm' => 1, 'cm' => 10, self::ENGLISH => array('mm', '2.83464567', 'pt'), ), ); /** * Minimum bcmath precision for output. * @type int */ protected $outputPrecision; /** * Bcmath precision for internal calculations. * @type int */ protected $internalPrecision; /** * Whether or not BCMath is available. * @type bool */ private $bcmath; public function __construct($output_precision = 4, $internal_precision = 10, $force_no_bcmath = false) { $this->outputPrecision = $output_precision; $this->internalPrecision = $internal_precision; $this->bcmath = !$force_no_bcmath && function_exists('bcmul'); } /** * Converts a length object of one unit into another unit. * @param HTMLPurifier_Length $length * Instance of HTMLPurifier_Length to convert. You must validate() * it before passing it here! * @param string $to_unit * Unit to convert to. * @return HTMLPurifier_Length|bool * @note * About precision: This conversion function pays very special * attention to the incoming precision of values and attempts * to maintain a number of significant figure. Results are * fairly accurate up to nine digits. Some caveats: * - If a number is zero-padded as a result of this significant * figure tracking, the zeroes will be eliminated. * - If a number contains less than four sigfigs ($outputPrecision) * and this causes some decimals to be excluded, those * decimals will be added on. */ public function convert($length, $to_unit) { if (!$length->isValid()) { return false; } $n = $length->getN(); $unit = $length->getUnit(); if ($n === '0' || $unit === false) { return new HTMLPurifier_Length('0', false); } $state = $dest_state = false; foreach (self::$units as $k => $x) { if (isset($x[$unit])) { $state = $k; } if (isset($x[$to_unit])) { $dest_state = $k; } } if (!$state || !$dest_state) { return false; } // Some calculations about the initial precision of the number; // this will be useful when we need to do final rounding. $sigfigs = $this->getSigFigs($n); if ($sigfigs < $this->outputPrecision) { $sigfigs = $this->outputPrecision; } // BCMath's internal precision deals only with decimals. Use // our default if the initial number has no decimals, or increase // it by how ever many decimals, thus, the number of guard digits // will always be greater than or equal to internalPrecision. $log = (int)floor(log(abs($n), 10)); $cp = ($log < 0) ? $this->internalPrecision - $log : $this->internalPrecision; // internal precision for ($i = 0; $i < 2; $i++) { // Determine what unit IN THIS SYSTEM we need to convert to if ($dest_state === $state) { // Simple conversion $dest_unit = $to_unit; } else { // Convert to the smallest unit, pending a system shift $dest_unit = self::$units[$state][$dest_state][0]; } // Do the conversion if necessary if ($dest_unit !== $unit) { $factor = $this->div(self::$units[$state][$unit], self::$units[$state][$dest_unit], $cp); $n = $this->mul($n, $factor, $cp); $unit = $dest_unit; } // Output was zero, so bail out early. Shouldn't ever happen. if ($n === '') { $n = '0'; $unit = $to_unit; break; } // It was a simple conversion, so bail out if ($dest_state === $state) { break; } if ($i !== 0) { // Conversion failed! Apparently, the system we forwarded // to didn't have this unit. This should never happen! return false; } // Pre-condition: $i == 0 // Perform conversion to next system of units $n = $this->mul($n, self::$units[$state][$dest_state][1], $cp); $unit = self::$units[$state][$dest_state][2]; $state = $dest_state; // One more loop around to convert the unit in the new system. } // Post-condition: $unit == $to_unit if ($unit !== $to_unit) { return false; } // Useful for debugging: //echo "<pre>n"; //echo "$n\nsigfigs = $sigfigs\nnew_log = $new_log\nlog = $log\nrp = $rp\n</pre>\n"; $n = $this->round($n, $sigfigs); if (strpos($n, '.') !== false) { $n = rtrim($n, '0'); } $n = rtrim($n, '.'); return new HTMLPurifier_Length($n, $unit); } /** * Returns the number of significant figures in a string number. * @param string $n Decimal number * @return int number of sigfigs */ public function getSigFigs($n) { $n = ltrim($n, '0+-'); $dp = strpos($n, '.'); // decimal position if ($dp === false) { $sigfigs = strlen(rtrim($n, '0')); } else { $sigfigs = strlen(ltrim($n, '0.')); // eliminate extra decimal character if ($dp !== 0) { $sigfigs--; } } return $sigfigs; } /** * Adds two numbers, using arbitrary precision when available. * @param string $s1 * @param string $s2 * @param int $scale * @return string */ private function add($s1, $s2, $scale) { if ($this->bcmath) { return bcadd($s1, $s2, $scale); } else { return $this->scale((float)$s1 + (float)$s2, $scale); } } /** * Multiples two numbers, using arbitrary precision when available. * @param string $s1 * @param string $s2 * @param int $scale * @return string */ private function mul($s1, $s2, $scale) { if ($this->bcmath) { return bcmul($s1, $s2, $scale); } else { return $this->scale((float)$s1 * (float)$s2, $scale); } } /** * Divides two numbers, using arbitrary precision when available. * @param string $s1 * @param string $s2 * @param int $scale * @return string */ private function div($s1, $s2, $scale) { if ($this->bcmath) { return bcdiv($s1, $s2, $scale); } else { return $this->scale((float)$s1 / (float)$s2, $scale); } } /** * Rounds a number according to the number of sigfigs it should have, * using arbitrary precision when available. * @param float $n * @param int $sigfigs * @return string */ private function round($n, $sigfigs) { $new_log = (int)floor(log(abs($n), 10)); // Number of digits left of decimal - 1 $rp = $sigfigs - $new_log - 1; // Number of decimal places needed $neg = $n < 0 ? '-' : ''; // Negative sign if ($this->bcmath) { if ($rp >= 0) { $n = bcadd($n, $neg . '0.' . str_repeat('0', $rp) . '5', $rp + 1); $n = bcdiv($n, '1', $rp); } else { // This algorithm partially depends on the standardized // form of numbers that comes out of bcmath. $n = bcadd($n, $neg . '5' . str_repeat('0', $new_log - $sigfigs), 0); $n = substr($n, 0, $sigfigs + strlen($neg)) . str_repeat('0', $new_log - $sigfigs + 1); } return $n; } else { return $this->scale(round($n, $sigfigs - $new_log - 1), $rp + 1); } } /** * Scales a float to $scale digits right of decimal point, like BCMath. * @param float $r * @param int $scale * @return string */ private function scale($r, $scale) { if ($scale < 0) { // The f sprintf type doesn't support negative numbers, so we // need to cludge things manually. First get the string. $r = sprintf('%.0f', (float)$r); // Due to floating point precision loss, $r will more than likely // look something like 4652999999999.9234. We grab one more digit // than we need to precise from $r and then use that to round // appropriately. $precise = (string)round(substr($r, 0, strlen($r) + $scale), -1); // Now we return it, truncating the zero that was rounded off. return substr($precise, 0, -1) . str_repeat('0', -$scale + 1); } return sprintf('%.' . $scale . 'f', (float)$r); } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/Lexer.php 0000644 00000032363 15121423110 0014761 0 ustar 00 <?php /** * Forgivingly lexes HTML (SGML-style) markup into tokens. * * A lexer parses a string of SGML-style markup and converts them into * corresponding tokens. It doesn't check for well-formedness, although its * internal mechanism may make this automatic (such as the case of * HTMLPurifier_Lexer_DOMLex). There are several implementations to choose * from. * * A lexer is HTML-oriented: it might work with XML, but it's not * recommended, as we adhere to a subset of the specification for optimization * reasons. This might change in the future. Also, most tokenizers are not * expected to handle DTDs or PIs. * * This class should not be directly instantiated, but you may use create() to * retrieve a default copy of the lexer. Being a supertype, this class * does not actually define any implementation, but offers commonly used * convenience functions for subclasses. * * @note The unit tests will instantiate this class for testing purposes, as * many of the utility functions require a class to be instantiated. * This means that, even though this class is not runnable, it will * not be declared abstract. * * @par * * @note * We use tokens rather than create a DOM representation because DOM would: * * @par * -# Require more processing and memory to create, * -# Is not streamable, and * -# Has the entire document structure (html and body not needed). * * @par * However, DOM is helpful in that it makes it easy to move around nodes * without a lot of lookaheads to see when a tag is closed. This is a * limitation of the token system and some workarounds would be nice. */ class HTMLPurifier_Lexer { /** * Whether or not this lexer implements line-number/column-number tracking. * If it does, set to true. */ public $tracksLineNumbers = false; /** * @since 4.13.1 - https://github.com/MetaSlider/metaslider/issues/494 */ private $_entity_parser; // -- STATIC ---------------------------------------------------------- /** * Retrieves or sets the default Lexer as a Prototype Factory. * * By default HTMLPurifier_Lexer_DOMLex will be returned. There are * a few exceptions involving special features that only DirectLex * implements. * * @note The behavior of this class has changed, rather than accepting * a prototype object, it now accepts a configuration object. * To specify your own prototype, set %Core.LexerImpl to it. * This change in behavior de-singletonizes the lexer object. * * @param HTMLPurifier_Config $config * @return HTMLPurifier_Lexer * @throws HTMLPurifier_Exception */ public static function create($config) { if (!($config instanceof HTMLPurifier_Config)) { $lexer = $config; trigger_error( "Passing a prototype to HTMLPurifier_Lexer::create() is deprecated, please instead use %Core.LexerImpl", E_USER_WARNING ); } else { $lexer = $config->get('Core.LexerImpl'); } $needs_tracking = $config->get('Core.MaintainLineNumbers') || $config->get('Core.CollectErrors'); $inst = null; if (is_object($lexer)) { $inst = $lexer; } else { if (is_null($lexer)) { do { // auto-detection algorithm if ($needs_tracking) { $lexer = 'DirectLex'; break; } if (class_exists('DOMDocument', false) && method_exists('DOMDocument', 'loadHTML') && !extension_loaded('domxml') ) { // check for DOM support, because while it's part of the // core, it can be disabled compile time. Also, the PECL // domxml extension overrides the default DOM, and is evil // and nasty and we shan't bother to support it $lexer = 'DOMLex'; } else { $lexer = 'DirectLex'; } } while (0); } // do..while so we can break // instantiate recognized string names switch ($lexer) { case 'DOMLex': $inst = new HTMLPurifier_Lexer_DOMLex(); break; case 'DirectLex': $inst = new HTMLPurifier_Lexer_DirectLex(); break; case 'PH5P': $inst = new HTMLPurifier_Lexer_PH5P(); break; default: throw new HTMLPurifier_Exception( "Cannot instantiate unrecognized Lexer type " . htmlspecialchars($lexer) ); } } if (!$inst) { throw new HTMLPurifier_Exception('No lexer was instantiated'); } // once PHP DOM implements native line numbers, or we // hack out something using XSLT, remove this stipulation if ($needs_tracking && !$inst->tracksLineNumbers) { throw new HTMLPurifier_Exception( 'Cannot use lexer that does not support line numbers with ' . 'Core.MaintainLineNumbers or Core.CollectErrors (use DirectLex instead)' ); } return $inst; } // -- CONVENIENCE MEMBERS --------------------------------------------- public function __construct() { $this->_entity_parser = new HTMLPurifier_EntityParser(); } /** * Most common entity to raw value conversion table for special entities. * @type array */ protected $_special_entity2str = array( '"' => '"', '&' => '&', '<' => '<', '>' => '>', ''' => "'", ''' => "'", ''' => "'" ); public function parseText($string, $config) { return $this->parseData($string, false, $config); } public function parseAttr($string, $config) { return $this->parseData($string, true, $config); } /** * Parses special entities into the proper characters. * * This string will translate escaped versions of the special characters * into the correct ones. * * @param string $string String character data to be parsed. * @return string Parsed character data. */ public function parseData($string, $is_attr, $config) { // following functions require at least one character if ($string === '') { return ''; } // subtracts amps that cannot possibly be escaped $num_amp = substr_count($string, '&') - substr_count($string, '& ') - ($string[strlen($string) - 1] === '&' ? 1 : 0); if (!$num_amp) { return $string; } // abort if no entities $num_esc_amp = substr_count($string, '&'); $string = strtr($string, $this->_special_entity2str); // code duplication for sake of optimization, see above $num_amp_2 = substr_count($string, '&') - substr_count($string, '& ') - ($string[strlen($string) - 1] === '&' ? 1 : 0); if ($num_amp_2 <= $num_esc_amp) { return $string; } // hmm... now we have some uncommon entities. Use the callback. if ($config->get('Core.LegacyEntityDecoder')) { $string = $this->_entity_parser->substituteSpecialEntities($string); } else { if ($is_attr) { $string = $this->_entity_parser->substituteAttrEntities($string); } else { $string = $this->_entity_parser->substituteTextEntities($string); } } return $string; } /** * Lexes an HTML string into tokens. * @param $string String HTML. * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return HTMLPurifier_Token[] array representation of HTML. */ public function tokenizeHTML($string, $config, $context) { trigger_error('Call to abstract class', E_USER_ERROR); } /** * Translates CDATA sections into regular sections (through escaping). * @param string $string HTML string to process. * @return string HTML with CDATA sections escaped. */ protected static function escapeCDATA($string) { return preg_replace_callback( '/<!\[CDATA\[(.+?)\]\]>/s', array('HTMLPurifier_Lexer', 'CDATACallback'), $string ); } /** * Special CDATA case that is especially convoluted for <script> * @param string $string HTML string to process. * @return string HTML with CDATA sections escaped. */ protected static function escapeCommentedCDATA($string) { return preg_replace_callback( '#<!--//--><!\[CDATA\[//><!--(.+?)//--><!\]\]>#s', array('HTMLPurifier_Lexer', 'CDATACallback'), $string ); } /** * Special Internet Explorer conditional comments should be removed. * @param string $string HTML string to process. * @return string HTML with conditional comments removed. */ protected static function removeIEConditional($string) { return preg_replace( '#<!--\[if [^>]+\]>.*?<!\[endif\]-->#si', // probably should generalize for all strings '', $string ); } /** * Callback function for escapeCDATA() that does the work. * * @warning Though this is public in order to let the callback happen, * calling it directly is not recommended. * @param array $matches PCRE matches array, with index 0 the entire match * and 1 the inside of the CDATA section. * @return string Escaped internals of the CDATA section. */ protected static function CDATACallback($matches) { // not exactly sure why the character set is needed, but whatever return htmlspecialchars($matches[1], ENT_COMPAT, 'UTF-8'); } /** * Takes a piece of HTML and normalizes it by converting entities, fixing * encoding, extracting bits, and other good stuff. * @param string $html HTML. * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return string * @todo Consider making protected */ public function normalize($html, $config, $context) { // normalize newlines to \n if ($config->get('Core.NormalizeNewlines')) { $html = str_replace("\r\n", "\n", $html); $html = str_replace("\r", "\n", $html); } if ($config->get('HTML.Trusted')) { // escape convoluted CDATA $html = $this->escapeCommentedCDATA($html); } // escape CDATA $html = $this->escapeCDATA($html); $html = $this->removeIEConditional($html); // extract body from document if applicable if ($config->get('Core.ConvertDocumentToFragment')) { $e = false; if ($config->get('Core.CollectErrors')) { $e =& $context->get('ErrorCollector'); } $new_html = $this->extractBody($html); if ($e && $new_html != $html) { $e->send(E_WARNING, 'Lexer: Extracted body'); } $html = $new_html; } // expand entities that aren't the big five if ($config->get('Core.LegacyEntityDecoder')) { $html = $this->_entity_parser->substituteNonSpecialEntities($html); } // clean into wellformed UTF-8 string for an SGML context: this has // to be done after entity expansion because the entities sometimes // represent non-SGML characters (horror, horror!) $html = HTMLPurifier_Encoder::cleanUTF8($html); // if processing instructions are to removed, remove them now if ($config->get('Core.RemoveProcessingInstructions')) { $html = preg_replace('#<\?.+?\?>#s', '', $html); } $hidden_elements = $config->get('Core.HiddenElements'); if ($config->get('Core.AggressivelyRemoveScript') && !($config->get('HTML.Trusted') || !$config->get('Core.RemoveScriptContents') || empty($hidden_elements["script"]))) { $html = preg_replace('#<script[^>]*>.*?</script>#i', '', $html); } return $html; } /** * Takes a string of HTML (fragment or document) and returns the content * @todo Consider making protected */ public function extractBody($html) { $matches = array(); $result = preg_match('|(.*?)<body[^>]*>(.*)</body>|is', $html, $matches); if ($result) { // Make sure it's not in a comment $comment_start = strrpos($matches[1], '<!--'); $comment_end = strrpos($matches[1], '-->'); if ($comment_start === false || ($comment_end !== false && $comment_end > $comment_start)) { return $matches[2]; } } return $html; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/URI.php 0000644 00000024544 15121423110 0014343 0 ustar 00 <?php /** * HTML Purifier's internal representation of a URI. * @note * Internal data-structures are completely escaped. If the data needs * to be used in a non-URI context (which is very unlikely), be sure * to decode it first. The URI may not necessarily be well-formed until * validate() is called. */ class HTMLPurifier_URI { /** * @type string */ public $scheme; /** * @type string */ public $userinfo; /** * @type string */ public $host; /** * @type int */ public $port; /** * @type string */ public $path; /** * @type string */ public $query; /** * @type string */ public $fragment; /** * @param string $scheme * @param string $userinfo * @param string $host * @param int $port * @param string $path * @param string $query * @param string $fragment * @note Automatically normalizes scheme and port */ public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment) { $this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme); $this->userinfo = $userinfo; $this->host = $host; $this->port = is_null($port) ? $port : (int)$port; $this->path = $path; $this->query = $query; $this->fragment = $fragment; } /** * Retrieves a scheme object corresponding to the URI's scheme/default * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return HTMLPurifier_URIScheme Scheme object appropriate for validating this URI */ public function getSchemeObj($config, $context) { $registry = HTMLPurifier_URISchemeRegistry::instance(); if ($this->scheme !== null) { $scheme_obj = $registry->getScheme($this->scheme, $config, $context); if (!$scheme_obj) { return false; } // invalid scheme, clean it out } else { // no scheme: retrieve the default one $def = $config->getDefinition('URI'); $scheme_obj = $def->getDefaultScheme($config, $context); if (!$scheme_obj) { if ($def->defaultScheme !== null) { // something funky happened to the default scheme object trigger_error( 'Default scheme object "' . $def->defaultScheme . '" was not readable', E_USER_WARNING ); } // suppress error if it's null return false; } } return $scheme_obj; } /** * Generic validation method applicable for all schemes. May modify * this URI in order to get it into a compliant form. * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool True if validation/filtering succeeds, false if failure */ public function validate($config, $context) { // ABNF definitions from RFC 3986 $chars_sub_delims = '!$&\'()*+,;='; $chars_gen_delims = ':/?#[]@'; $chars_pchar = $chars_sub_delims . ':@'; // validate host if (!is_null($this->host)) { $host_def = new HTMLPurifier_AttrDef_URI_Host(); $this->host = $host_def->validate($this->host, $config, $context); if ($this->host === false) { $this->host = null; } } // validate scheme // NOTE: It's not appropriate to check whether or not this // scheme is in our registry, since a URIFilter may convert a // URI that we don't allow into one we do. So instead, we just // check if the scheme can be dropped because there is no host // and it is our default scheme. if (!is_null($this->scheme) && is_null($this->host) || $this->host === '') { // support for relative paths is pretty abysmal when the // scheme is present, so axe it when possible $def = $config->getDefinition('URI'); if ($def->defaultScheme === $this->scheme) { $this->scheme = null; } } // validate username if (!is_null($this->userinfo)) { $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':'); $this->userinfo = $encoder->encode($this->userinfo); } // validate port if (!is_null($this->port)) { if ($this->port < 1 || $this->port > 65535) { $this->port = null; } } // validate path $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/'); if (!is_null($this->host)) { // this catches $this->host === '' // path-abempty (hier and relative) // http://www.example.com/my/path // //www.example.com/my/path (looks odd, but works, and // recognized by most browsers) // (this set is valid or invalid on a scheme by scheme // basis, so we'll deal with it later) // file:///my/path // ///my/path $this->path = $segments_encoder->encode($this->path); } elseif ($this->path !== '') { if ($this->path[0] === '/') { // path-absolute (hier and relative) // http:/my/path // /my/path if (strlen($this->path) >= 2 && $this->path[1] === '/') { // This could happen if both the host gets stripped // out // http://my/path // //my/path $this->path = ''; } else { $this->path = $segments_encoder->encode($this->path); } } elseif (!is_null($this->scheme)) { // path-rootless (hier) // http:my/path // Short circuit evaluation means we don't need to check nz $this->path = $segments_encoder->encode($this->path); } else { // path-noscheme (relative) // my/path // (once again, not checking nz) $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@'); $c = strpos($this->path, '/'); if ($c !== false) { $this->path = $segment_nc_encoder->encode(substr($this->path, 0, $c)) . $segments_encoder->encode(substr($this->path, $c)); } else { $this->path = $segment_nc_encoder->encode($this->path); } } } else { // path-empty (hier and relative) $this->path = ''; // just to be safe } // qf = query and fragment $qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?'); if (!is_null($this->query)) { $this->query = $qf_encoder->encode($this->query); } if (!is_null($this->fragment)) { $this->fragment = $qf_encoder->encode($this->fragment); } return true; } /** * Convert URI back to string * @return string URI appropriate for output */ public function toString() { // reconstruct authority $authority = null; // there is a rendering difference between a null authority // (http:foo-bar) and an empty string authority // (http:///foo-bar). if (!is_null($this->host)) { $authority = ''; if (!is_null($this->userinfo)) { $authority .= $this->userinfo . '@'; } $authority .= $this->host; if (!is_null($this->port)) { $authority .= ':' . $this->port; } } // Reconstruct the result // One might wonder about parsing quirks from browsers after // this reconstruction. Unfortunately, parsing behavior depends // on what *scheme* was employed (file:///foo is handled *very* // differently than http:///foo), so unfortunately we have to // defer to the schemes to do the right thing. $result = ''; if (!is_null($this->scheme)) { $result .= $this->scheme . ':'; } if (!is_null($authority)) { $result .= '//' . $authority; } $result .= $this->path; if (!is_null($this->query)) { $result .= '?' . $this->query; } if (!is_null($this->fragment)) { $result .= '#' . $this->fragment; } return $result; } /** * Returns true if this URL might be considered a 'local' URL given * the current context. This is true when the host is null, or * when it matches the host supplied to the configuration. * * Note that this does not do any scheme checking, so it is mostly * only appropriate for metadata that doesn't care about protocol * security. isBenign is probably what you actually want. * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool */ public function isLocal($config, $context) { if ($this->host === null) { return true; } $uri_def = $config->getDefinition('URI'); if ($uri_def->host === $this->host) { return true; } return false; } /** * Returns true if this URL should be considered a 'benign' URL, * that is: * * - It is a local URL (isLocal), and * - It has a equal or better level of security * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool */ public function isBenign($config, $context) { if (!$this->isLocal($config, $context)) { return false; } $scheme_obj = $this->getSchemeObj($config, $context); if (!$scheme_obj) { return false; } // conservative approach $current_scheme_obj = $config->getDefinition('URI')->getDefaultScheme($config, $context); if ($current_scheme_obj->secure) { if (!$scheme_obj->secure) { return false; } } return true; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/VarParserException.php 0000644 00000000235 15121423110 0017457 0 ustar 00 <?php /** * Exception type for HTMLPurifier_VarParser */ class HTMLPurifier_VarParserException extends HTMLPurifier_Exception { } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/Context.php 0000644 00000005112 15121423110 0015316 0 ustar 00 <?php /** * Registry object that contains information about the current context. * @warning Is a bit buggy when variables are set to null: it thinks * they don't exist! So use false instead, please. * @note Since the variables Context deals with may not be objects, * references are very important here! Do not remove! */ class HTMLPurifier_Context { /** * Private array that stores the references. * @type array */ private $_storage = array(); /** * Registers a variable into the context. * @param string $name String name * @param mixed $ref Reference to variable to be registered */ public function register($name, &$ref) { if (array_key_exists($name, $this->_storage)) { trigger_error( "Name $name produces collision, cannot re-register", E_USER_ERROR ); return; } $this->_storage[$name] =& $ref; } /** * Retrieves a variable reference from the context. * @param string $name String name * @param bool $ignore_error Boolean whether or not to ignore error * @return mixed */ public function &get($name, $ignore_error = false) { if (!array_key_exists($name, $this->_storage)) { if (!$ignore_error) { trigger_error( "Attempted to retrieve non-existent variable $name", E_USER_ERROR ); } $var = null; // so we can return by reference return $var; } return $this->_storage[$name]; } /** * Destroys a variable in the context. * @param string $name String name */ public function destroy($name) { if (!array_key_exists($name, $this->_storage)) { trigger_error( "Attempted to destroy non-existent variable $name", E_USER_ERROR ); return; } unset($this->_storage[$name]); } /** * Checks whether or not the variable exists. * @param string $name String name * @return bool */ public function exists($name) { return array_key_exists($name, $this->_storage); } /** * Loads a series of variables from an associative array * @param array $context_array Assoc array of variables to load */ public function loadArray($context_array) { foreach ($context_array as $key => $discard) { $this->register($key, $context_array[$key]); } } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/Node.php 0000644 00000002400 15121423110 0014554 0 ustar 00 <?php /** * Abstract base node class that all others inherit from. * * Why do we not use the DOM extension? (1) It is not always available, * (2) it has funny constraints on the data it can represent, * whereas we want a maximally flexible representation, and (3) its * interface is a bit cumbersome. */ abstract class HTMLPurifier_Node { /** * Line number of the start token in the source document * @type int */ public $line; /** * Column number of the start token in the source document. Null if unknown. * @type int */ public $col; /** * Lookup array of processing that this token is exempt from. * Currently, valid values are "ValidateAttributes". * @type array */ public $armor = array(); /** * When true, this node should be ignored as non-existent. * * Who is responsible for ignoring dead nodes? FixNesting is * responsible for removing them before passing on to child * validators. */ public $dead = false; /** * Returns a pair of start and end tokens, where the end token * is null if it is not necessary. Does not include children. * @type array */ abstract public function toTokenPair(); } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/URIFilter.php 0000644 00000004475 15121423110 0015512 0 ustar 00 <?php /** * Chainable filters for custom URI processing. * * These filters can perform custom actions on a URI filter object, * including transformation or blacklisting. A filter named Foo * must have a corresponding configuration directive %URI.Foo, * unless always_load is specified to be true. * * The following contexts may be available while URIFilters are being * processed: * * - EmbeddedURI: true if URI is an embedded resource that will * be loaded automatically on page load * - CurrentToken: a reference to the token that is currently * being processed * - CurrentAttr: the name of the attribute that is currently being * processed * - CurrentCSSProperty: the name of the CSS property that is * currently being processed (if applicable) * * @warning This filter is called before scheme object validation occurs. * Make sure, if you require a specific scheme object, you * you check that it exists. This allows filters to convert * proprietary URI schemes into regular ones. */ abstract class HTMLPurifier_URIFilter { /** * Unique identifier of filter. * @type string */ public $name; /** * True if this filter should be run after scheme validation. * @type bool */ public $post = false; /** * True if this filter should always be loaded. * This permits a filter to be named Foo without the corresponding * %URI.Foo directive existing. * @type bool */ public $always_load = false; /** * Performs initialization for the filter. If the filter returns * false, this means that it shouldn't be considered active. * @param HTMLPurifier_Config $config * @return bool */ public function prepare($config) { return true; } /** * Filter a URI object * @param HTMLPurifier_URI $uri Reference to URI object variable * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool Whether or not to continue processing: false indicates * URL is no good, true indicates continue processing. Note that * all changes are committed directly on the URI object */ abstract public function filter(&$uri, $config, $context); } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/IDAccumulator.php 0000644 00000003157 15121423110 0016375 0 ustar 00 <?php /** * Component of HTMLPurifier_AttrContext that accumulates IDs to prevent dupes * @note In Slashdot-speak, dupe means duplicate. * @note The default constructor does not accept $config or $context objects: * use must use the static build() factory method to perform initialization. */ class HTMLPurifier_IDAccumulator { /** * Lookup table of IDs we've accumulated. * @public */ public $ids = array(); /** * Builds an IDAccumulator, also initializing the default blacklist * @param HTMLPurifier_Config $config Instance of HTMLPurifier_Config * @param HTMLPurifier_Context $context Instance of HTMLPurifier_Context * @return HTMLPurifier_IDAccumulator Fully initialized HTMLPurifier_IDAccumulator */ public static function build($config, $context) { $id_accumulator = new HTMLPurifier_IDAccumulator(); $id_accumulator->load($config->get('Attr.IDBlacklist')); return $id_accumulator; } /** * Add an ID to the lookup table. * @param string $id ID to be added. * @return bool status, true if success, false if there's a dupe */ public function add($id) { if (isset($this->ids[$id])) { return false; } return $this->ids[$id] = true; } /** * Load a list of IDs into the lookup table * @param $array_of_ids Array of IDs to load * @note This function doesn't care about duplicates */ public function load($array_of_ids) { foreach ($array_of_ids as $id) { $this->ids[$id] = true; } } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/URIFilter/DisableExternal.php 0000644 00000002404 15121423110 0020546 0 ustar 00 <?php class HTMLPurifier_URIFilter_DisableExternal extends HTMLPurifier_URIFilter { /** * @type string */ public $name = 'DisableExternal'; /** * @type array */ protected $ourHostParts = false; /** * @param HTMLPurifier_Config $config * @return void */ public function prepare($config) { $our_host = $config->getDefinition('URI')->host; if ($our_host !== null) { $this->ourHostParts = array_reverse(explode('.', $our_host)); } } /** * @param HTMLPurifier_URI $uri Reference * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool */ public function filter(&$uri, $config, $context) { if (is_null($uri->host)) { return true; } if ($this->ourHostParts === false) { return false; } $host_parts = array_reverse(explode('.', $uri->host)); foreach ($this->ourHostParts as $i => $x) { if (!isset($host_parts[$i])) { return false; } if ($host_parts[$i] != $this->ourHostParts[$i]) { return false; } } return true; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/URIFilter/SafeIframe.php 0000644 00000003262 15121423110 0017505 0 ustar 00 <?php /** * Implements safety checks for safe iframes. * * @warning This filter is *critical* for ensuring that %HTML.SafeIframe * works safely. */ class HTMLPurifier_URIFilter_SafeIframe extends HTMLPurifier_URIFilter { /** * @type string */ public $name = 'SafeIframe'; /** * @type bool */ public $always_load = true; /** * @type string */ protected $regexp = null; // XXX: The not so good bit about how this is all set up now is we // can't check HTML.SafeIframe in the 'prepare' step: we have to // defer till the actual filtering. /** * @param HTMLPurifier_Config $config * @return bool */ public function prepare($config) { $this->regexp = $config->get('URI.SafeIframeRegexp'); return true; } /** * @param HTMLPurifier_URI $uri * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool */ public function filter(&$uri, $config, $context) { // check if filter not applicable if (!$config->get('HTML.SafeIframe')) { return true; } // check if the filter should actually trigger if (!$context->get('EmbeddedURI', true)) { return true; } $token = $context->get('CurrentToken', true); if (!($token && $token->name == 'iframe')) { return true; } // check if we actually have some whitelists enabled if ($this->regexp === null) { return false; } // actually check the whitelists return preg_match($this->regexp, $uri->toString()); } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/URIFilter/Munge.php 0000644 00000006103 15121423110 0016553 0 ustar 00 <?php class HTMLPurifier_URIFilter_Munge extends HTMLPurifier_URIFilter { /** * @type string */ public $name = 'Munge'; /** * @type bool */ public $post = true; /** * @type string */ private $target; /** * @type HTMLPurifier_URIParser */ private $parser; /** * @type bool */ private $doEmbed; /** * @type string */ private $secretKey; /** * @type array */ protected $replace = array(); /** * @param HTMLPurifier_Config $config * @return bool */ public function prepare($config) { $this->target = $config->get('URI.' . $this->name); $this->parser = new HTMLPurifier_URIParser(); $this->doEmbed = $config->get('URI.MungeResources'); $this->secretKey = $config->get('URI.MungeSecretKey'); if ($this->secretKey && !function_exists('hash_hmac')) { throw new Exception("Cannot use %URI.MungeSecretKey without hash_hmac support."); } return true; } /** * @param HTMLPurifier_URI $uri * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool */ public function filter(&$uri, $config, $context) { if ($context->get('EmbeddedURI', true) && !$this->doEmbed) { return true; } $scheme_obj = $uri->getSchemeObj($config, $context); if (!$scheme_obj) { return true; } // ignore unknown schemes, maybe another postfilter did it if (!$scheme_obj->browsable) { return true; } // ignore non-browseable schemes, since we can't munge those in a reasonable way if ($uri->isBenign($config, $context)) { return true; } // don't redirect if a benign URL $this->makeReplace($uri, $config, $context); $this->replace = array_map('rawurlencode', $this->replace); $new_uri = strtr($this->target, $this->replace); $new_uri = $this->parser->parse($new_uri); // don't redirect if the target host is the same as the // starting host if ($uri->host === $new_uri->host) { return true; } $uri = $new_uri; // overwrite return true; } /** * @param HTMLPurifier_URI $uri * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context */ protected function makeReplace($uri, $config, $context) { $string = $uri->toString(); // always available $this->replace['%s'] = $string; $this->replace['%r'] = $context->get('EmbeddedURI', true); $token = $context->get('CurrentToken', true); $this->replace['%n'] = $token ? $token->name : null; $this->replace['%m'] = $context->get('CurrentAttr', true); $this->replace['%p'] = $context->get('CurrentCSSProperty', true); // not always available if ($this->secretKey) { $this->replace['%t'] = hash_hmac("sha256", $string, $this->secretKey); } } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/URIFilter/MakeAbsolute.php 0000644 00000011541 15121423110 0020056 0 ustar 00 <?php // does not support network paths class HTMLPurifier_URIFilter_MakeAbsolute extends HTMLPurifier_URIFilter { /** * @type string */ public $name = 'MakeAbsolute'; /** * @type */ protected $base; /** * @type array */ protected $basePathStack = array(); /** * @param HTMLPurifier_Config $config * @return bool */ public function prepare($config) { $def = $config->getDefinition('URI'); $this->base = $def->base; if (is_null($this->base)) { trigger_error( 'URI.MakeAbsolute is being ignored due to lack of ' . 'value for URI.Base configuration', E_USER_WARNING ); return false; } $this->base->fragment = null; // fragment is invalid for base URI $stack = explode('/', $this->base->path); array_pop($stack); // discard last segment $stack = $this->_collapseStack($stack); // do pre-parsing $this->basePathStack = $stack; return true; } /** * @param HTMLPurifier_URI $uri * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool */ public function filter(&$uri, $config, $context) { if (is_null($this->base)) { return true; } // abort early if ($uri->path === '' && is_null($uri->scheme) && is_null($uri->host) && is_null($uri->query) && is_null($uri->fragment)) { // reference to current document $uri = clone $this->base; return true; } if (!is_null($uri->scheme)) { // absolute URI already: don't change if (!is_null($uri->host)) { return true; } $scheme_obj = $uri->getSchemeObj($config, $context); if (!$scheme_obj) { // scheme not recognized return false; } if (!$scheme_obj->hierarchical) { // non-hierarchal URI with explicit scheme, don't change return true; } // special case: had a scheme but always is hierarchical and had no authority } if (!is_null($uri->host)) { // network path, don't bother return true; } if ($uri->path === '') { $uri->path = $this->base->path; } elseif ($uri->path[0] !== '/') { // relative path, needs more complicated processing $stack = explode('/', $uri->path); $new_stack = array_merge($this->basePathStack, $stack); if ($new_stack[0] !== '' && !is_null($this->base->host)) { array_unshift($new_stack, ''); } $new_stack = $this->_collapseStack($new_stack); $uri->path = implode('/', $new_stack); } else { // absolute path, but still we should collapse $uri->path = implode('/', $this->_collapseStack(explode('/', $uri->path))); } // re-combine $uri->scheme = $this->base->scheme; if (is_null($uri->userinfo)) { $uri->userinfo = $this->base->userinfo; } if (is_null($uri->host)) { $uri->host = $this->base->host; } if (is_null($uri->port)) { $uri->port = $this->base->port; } return true; } /** * Resolve dots and double-dots in a path stack * @param array $stack * @return array */ private function _collapseStack($stack) { $result = array(); $is_folder = false; for ($i = 0; isset($stack[$i]); $i++) { $is_folder = false; // absorb an internally duplicated slash if ($stack[$i] == '' && $i && isset($stack[$i + 1])) { continue; } if ($stack[$i] == '..') { if (!empty($result)) { $segment = array_pop($result); if ($segment === '' && empty($result)) { // error case: attempted to back out too far: // restore the leading slash $result[] = ''; } elseif ($segment === '..') { $result[] = '..'; // cannot remove .. with .. } } else { // relative path, preserve the double-dots $result[] = '..'; } $is_folder = true; continue; } if ($stack[$i] == '.') { // silently absorb $is_folder = true; continue; } $result[] = $stack[$i]; } if ($is_folder) { $result[] = ''; } return $result; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/URIFilter/DisableResources.php 0000644 00000000716 15121423110 0020742 0 ustar 00 <?php class HTMLPurifier_URIFilter_DisableResources extends HTMLPurifier_URIFilter { /** * @type string */ public $name = 'DisableResources'; /** * @param HTMLPurifier_URI $uri * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool */ public function filter(&$uri, $config, $context) { return !$context->get('EmbeddedURI', true); } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/URIFilter/DisableExternalResources.php 0000644 00000001110 15121423110 0022432 0 ustar 00 <?php class HTMLPurifier_URIFilter_DisableExternalResources extends HTMLPurifier_URIFilter_DisableExternal { /** * @type string */ public $name = 'DisableExternalResources'; /** * @param HTMLPurifier_URI $uri * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool */ public function filter(&$uri, $config, $context) { if (!$context->get('EmbeddedURI', true)) { return true; } return parent::filter($uri, $config, $context); } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/URIFilter/HostBlacklist.php 0000644 00000002200 15121423110 0020240 0 ustar 00 <?php // It's not clear to me whether or not Punycode means that hostnames // do not have canonical forms anymore. As far as I can tell, it's // not a problem (punycoding should be identity when no Unicode // points are involved), but I'm not 100% sure class HTMLPurifier_URIFilter_HostBlacklist extends HTMLPurifier_URIFilter { /** * @type string */ public $name = 'HostBlacklist'; /** * @type array */ protected $blacklist = array(); /** * @param HTMLPurifier_Config $config * @return bool */ public function prepare($config) { $this->blacklist = $config->get('URI.HostBlacklist'); return true; } /** * @param HTMLPurifier_URI $uri * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool */ public function filter(&$uri, $config, $context) { foreach ($this->blacklist as $blacklisted_host_fragment) { if (strpos($uri->host, $blacklisted_host_fragment) !== false) { return false; } } return true; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/TagTransform.php 0000644 00000002112 15121423110 0016276 0 ustar 00 <?php /** * Defines a mutation of an obsolete tag into a valid tag. */ abstract class HTMLPurifier_TagTransform { /** * Tag name to transform the tag to. * @type string */ public $transform_to; /** * Transforms the obsolete tag into the valid tag. * @param HTMLPurifier_Token_Tag $tag Tag to be transformed. * @param HTMLPurifier_Config $config Mandatory HTMLPurifier_Config object * @param HTMLPurifier_Context $context Mandatory HTMLPurifier_Context object */ abstract public function transform($tag, $config, $context); /** * Prepends CSS properties to the style attribute, creating the * attribute if it doesn't exist. * @warning Copied over from AttrTransform, be sure to keep in sync * @param array $attr Attribute array to process (passed by reference) * @param string $css CSS to prepend */ protected function prependCSS(&$attr, $css) { $attr['style'] = isset($attr['style']) ? $attr['style'] : ''; $attr['style'] = $css . $attr['style']; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef.php 0000644 00000012113 15121423110 0015222 0 ustar 00 <?php /** * Base class for all validating attribute definitions. * * This family of classes forms the core for not only HTML attribute validation, * but also any sort of string that needs to be validated or cleaned (which * means CSS properties and composite definitions are defined here too). * Besides defining (through code) what precisely makes the string valid, * subclasses are also responsible for cleaning the code if possible. */ abstract class HTMLPurifier_AttrDef { /** * Tells us whether or not an HTML attribute is minimized. * Has no meaning in other contexts. * @type bool */ public $minimized = false; /** * Tells us whether or not an HTML attribute is required. * Has no meaning in other contexts * @type bool */ public $required = false; /** * Validates and cleans passed string according to a definition. * * @param string $string String to be validated and cleaned. * @param HTMLPurifier_Config $config Mandatory HTMLPurifier_Config object. * @param HTMLPurifier_Context $context Mandatory HTMLPurifier_Context object. */ abstract public function validate($string, $config, $context); /** * Convenience method that parses a string as if it were CDATA. * * This method process a string in the manner specified at * <http://www.w3.org/TR/html4/types.html#h-6.2> by removing * leading and trailing whitespace, ignoring line feeds, and replacing * carriage returns and tabs with spaces. While most useful for HTML * attributes specified as CDATA, it can also be applied to most CSS * values. * * @note This method is not entirely standards compliant, as trim() removes * more types of whitespace than specified in the spec. In practice, * this is rarely a problem, as those extra characters usually have * already been removed by HTMLPurifier_Encoder. * * @warning This processing is inconsistent with XML's whitespace handling * as specified by section 3.3.3 and referenced XHTML 1.0 section * 4.7. However, note that we are NOT necessarily * parsing XML, thus, this behavior may still be correct. We * assume that newlines have been normalized. */ public function parseCDATA($string) { $string = trim($string); $string = str_replace(array("\n", "\t", "\r"), ' ', $string); return $string; } /** * Factory method for creating this class from a string. * @param string $string String construction info * @return HTMLPurifier_AttrDef Created AttrDef object corresponding to $string */ public function make($string) { // default implementation, return a flyweight of this object. // If $string has an effect on the returned object (i.e. you // need to overload this method), it is best // to clone or instantiate new copies. (Instantiation is safer.) return $this; } /** * Removes spaces from rgb(0, 0, 0) so that shorthand CSS properties work * properly. THIS IS A HACK! * @param string $string a CSS colour definition * @return string */ protected function mungeRgb($string) { $p = '\s*(\d+(\.\d+)?([%]?))\s*'; if (preg_match('/(rgba|hsla)\(/', $string)) { return preg_replace('/(rgba|hsla)\('.$p.','.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8,\11)', $string); } return preg_replace('/(rgb|hsl)\('.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8)', $string); } /** * Parses a possibly escaped CSS string and returns the "pure" * version of it. */ protected function expandCSSEscape($string) { // flexibly parse it $ret = ''; for ($i = 0, $c = strlen($string); $i < $c; $i++) { if ($string[$i] === '\\') { $i++; if ($i >= $c) { $ret .= '\\'; break; } if (ctype_xdigit($string[$i])) { $code = $string[$i]; for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) { if (!ctype_xdigit($string[$i])) { break; } $code .= $string[$i]; } // We have to be extremely careful when adding // new characters, to make sure we're not breaking // the encoding. $char = HTMLPurifier_Encoder::unichr(hexdec($code)); if (HTMLPurifier_Encoder::cleanUTF8($char) === '') { continue; } $ret .= $char; if ($i < $c && trim($string[$i]) !== '') { $i--; } continue; } if ($string[$i] === "\n") { continue; } } $ret .= $string[$i]; } return $ret; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/Injector/Linkify.php 0000644 00000003753 15121423110 0017065 0 ustar 00 <?php /** * Injector that converts http, https and ftp text URLs to actual links. */ class HTMLPurifier_Injector_Linkify extends HTMLPurifier_Injector { /** * @type string */ public $name = 'Linkify'; /** * @type array */ public $needed = array('a' => array('href')); /** * @param HTMLPurifier_Token $token */ public function handleText(&$token) { if (!$this->allowsElement('a')) { return; } if (strpos($token->data, '://') === false) { // our really quick heuristic failed, abort // this may not work so well if we want to match things like // "google.com", but then again, most people don't return; } // there is/are URL(s). Let's split the string. // We use this regex: // https://gist.github.com/gruber/249502 // but with @cscott's backtracking fix and also // the Unicode characters un-Unicodified. $bits = preg_split( '/\\b((?:[a-z][\\w\\-]+:(?:\\/{1,3}|[a-z0-9%])|www\\d{0,3}[.]|[a-z0-9.\\-]+[.][a-z]{2,4}\\/)(?:[^\\s()<>]|\\((?:[^\\s()<>]|(?:\\([^\\s()<>]+\\)))*\\))+(?:\\((?:[^\\s()<>]|(?:\\([^\\s()<>]+\\)))*\\)|[^\\s`!()\\[\\]{};:\'".,<>?\x{00ab}\x{00bb}\x{201c}\x{201d}\x{2018}\x{2019}]))/iu', $token->data, -1, PREG_SPLIT_DELIM_CAPTURE); $token = array(); // $i = index // $c = count // $l = is link for ($i = 0, $c = count($bits), $l = false; $i < $c; $i++, $l = !$l) { if (!$l) { if ($bits[$i] === '') { continue; } $token[] = new HTMLPurifier_Token_Text($bits[$i]); } else { $token[] = new HTMLPurifier_Token_Start('a', array('href' => $bits[$i])); $token[] = new HTMLPurifier_Token_Text($bits[$i]); $token[] = new HTMLPurifier_Token_End('a'); } } } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/Injector/AutoParagraph.php 0000644 00000032744 15121423110 0020220 0 ustar 00 <?php /** * Injector that auto paragraphs text in the root node based on * double-spacing. * @todo Ensure all states are unit tested, including variations as well. * @todo Make a graph of the flow control for this Injector. */ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector { /** * @type string */ public $name = 'AutoParagraph'; /** * @type array */ public $needed = array('p'); /** * @return HTMLPurifier_Token_Start */ private function _pStart() { $par = new HTMLPurifier_Token_Start('p'); $par->armor['MakeWellFormed_TagClosedError'] = true; return $par; } /** * @param HTMLPurifier_Token_Text $token */ public function handleText(&$token) { $text = $token->data; // Does the current parent allow <p> tags? if ($this->allowsElement('p')) { if (empty($this->currentNesting) || strpos($text, "\n\n") !== false) { // Note that we have differing behavior when dealing with text // in the anonymous root node, or a node inside the document. // If the text as a double-newline, the treatment is the same; // if it doesn't, see the next if-block if you're in the document. $i = $nesting = null; if (!$this->forwardUntilEndToken($i, $current, $nesting) && $token->is_whitespace) { // State 1.1: ... ^ (whitespace, then document end) // ---- // This is a degenerate case } else { if (!$token->is_whitespace || $this->_isInline($current)) { // State 1.2: PAR1 // ---- // State 1.3: PAR1\n\nPAR2 // ------------ // State 1.4: <div>PAR1\n\nPAR2 (see State 2) // ------------ $token = array($this->_pStart()); $this->_splitText($text, $token); } else { // State 1.5: \n<hr /> // -- } } } else { // State 2: <div>PAR1... (similar to 1.4) // ---- // We're in an element that allows paragraph tags, but we're not // sure if we're going to need them. if ($this->_pLookAhead()) { // State 2.1: <div>PAR1<b>PAR1\n\nPAR2 // ---- // Note: This will always be the first child, since any // previous inline element would have triggered this very // same routine, and found the double newline. One possible // exception would be a comment. $token = array($this->_pStart(), $token); } else { // State 2.2.1: <div>PAR1<div> // ---- // State 2.2.2: <div>PAR1<b>PAR1</b></div> // ---- } } // Is the current parent a <p> tag? } elseif (!empty($this->currentNesting) && $this->currentNesting[count($this->currentNesting) - 1]->name == 'p') { // State 3.1: ...<p>PAR1 // ---- // State 3.2: ...<p>PAR1\n\nPAR2 // ------------ $token = array(); $this->_splitText($text, $token); // Abort! } else { // State 4.1: ...<b>PAR1 // ---- // State 4.2: ...<b>PAR1\n\nPAR2 // ------------ } } /** * @param HTMLPurifier_Token $token */ public function handleElement(&$token) { // We don't have to check if we're already in a <p> tag for block // tokens, because the tag would have been autoclosed by MakeWellFormed. if ($this->allowsElement('p')) { if (!empty($this->currentNesting)) { if ($this->_isInline($token)) { // State 1: <div>...<b> // --- // Check if this token is adjacent to the parent token // (seek backwards until token isn't whitespace) $i = null; $this->backward($i, $prev); if (!$prev instanceof HTMLPurifier_Token_Start) { // Token wasn't adjacent if ($prev instanceof HTMLPurifier_Token_Text && substr($prev->data, -2) === "\n\n" ) { // State 1.1.4: <div><p>PAR1</p>\n\n<b> // --- // Quite frankly, this should be handled by splitText $token = array($this->_pStart(), $token); } else { // State 1.1.1: <div><p>PAR1</p><b> // --- // State 1.1.2: <div><br /><b> // --- // State 1.1.3: <div>PAR<b> // --- } } else { // State 1.2.1: <div><b> // --- // Lookahead to see if <p> is needed. if ($this->_pLookAhead()) { // State 1.3.1: <div><b>PAR1\n\nPAR2 // --- $token = array($this->_pStart(), $token); } else { // State 1.3.2: <div><b>PAR1</b></div> // --- // State 1.3.3: <div><b>PAR1</b><div></div>\n\n</div> // --- } } } else { // State 2.3: ...<div> // ----- } } else { if ($this->_isInline($token)) { // State 3.1: <b> // --- // This is where the {p} tag is inserted, not reflected in // inputTokens yet, however. $token = array($this->_pStart(), $token); } else { // State 3.2: <div> // ----- } $i = null; if ($this->backward($i, $prev)) { if (!$prev instanceof HTMLPurifier_Token_Text) { // State 3.1.1: ...</p>{p}<b> // --- // State 3.2.1: ...</p><div> // ----- if (!is_array($token)) { $token = array($token); } array_unshift($token, new HTMLPurifier_Token_Text("\n\n")); } else { // State 3.1.2: ...</p>\n\n{p}<b> // --- // State 3.2.2: ...</p>\n\n<div> // ----- // Note: PAR<ELEM> cannot occur because PAR would have been // wrapped in <p> tags. } } } } else { // State 2.2: <ul><li> // ---- // State 2.4: <p><b> // --- } } /** * Splits up a text in paragraph tokens and appends them * to the result stream that will replace the original * @param string $data String text data that will be processed * into paragraphs * @param HTMLPurifier_Token[] $result Reference to array of tokens that the * tags will be appended onto */ private function _splitText($data, &$result) { $raw_paragraphs = explode("\n\n", $data); $paragraphs = array(); // without empty paragraphs $needs_start = false; $needs_end = false; $c = count($raw_paragraphs); if ($c == 1) { // There were no double-newlines, abort quickly. In theory this // should never happen. $result[] = new HTMLPurifier_Token_Text($data); return; } for ($i = 0; $i < $c; $i++) { $par = $raw_paragraphs[$i]; if (trim($par) !== '') { $paragraphs[] = $par; } else { if ($i == 0) { // Double newline at the front if (empty($result)) { // The empty result indicates that the AutoParagraph // injector did not add any start paragraph tokens. // This means that we have been in a paragraph for // a while, and the newline means we should start a new one. $result[] = new HTMLPurifier_Token_End('p'); $result[] = new HTMLPurifier_Token_Text("\n\n"); // However, the start token should only be added if // there is more processing to be done (i.e. there are // real paragraphs in here). If there are none, the // next start paragraph tag will be handled by the // next call to the injector $needs_start = true; } else { // We just started a new paragraph! // Reinstate a double-newline for presentation's sake, since // it was in the source code. array_unshift($result, new HTMLPurifier_Token_Text("\n\n")); } } elseif ($i + 1 == $c) { // Double newline at the end // There should be a trailing </p> when we're finally done. $needs_end = true; } } } // Check if this was just a giant blob of whitespace. Move this earlier, // perhaps? if (empty($paragraphs)) { return; } // Add the start tag indicated by \n\n at the beginning of $data if ($needs_start) { $result[] = $this->_pStart(); } // Append the paragraphs onto the result foreach ($paragraphs as $par) { $result[] = new HTMLPurifier_Token_Text($par); $result[] = new HTMLPurifier_Token_End('p'); $result[] = new HTMLPurifier_Token_Text("\n\n"); $result[] = $this->_pStart(); } // Remove trailing start token; Injector will handle this later if // it was indeed needed. This prevents from needing to do a lookahead, // at the cost of a lookbehind later. array_pop($result); // If there is no need for an end tag, remove all of it and let // MakeWellFormed close it later. if (!$needs_end) { array_pop($result); // removes \n\n array_pop($result); // removes </p> } } /** * Returns true if passed token is inline (and, ergo, allowed in * paragraph tags) * @param HTMLPurifier_Token $token * @return bool */ private function _isInline($token) { return isset($this->htmlDefinition->info['p']->child->elements[$token->name]); } /** * Looks ahead in the token list and determines whether or not we need * to insert a <p> tag. * @return bool */ private function _pLookAhead() { if ($this->currentToken instanceof HTMLPurifier_Token_Start) { $nesting = 1; } else { $nesting = 0; } $ok = false; $i = null; while ($this->forwardUntilEndToken($i, $current, $nesting)) { $result = $this->_checkNeedsP($current); if ($result !== null) { $ok = $result; break; } } return $ok; } /** * Determines if a particular token requires an earlier inline token * to get a paragraph. This should be used with _forwardUntilEndToken * @param HTMLPurifier_Token $current * @return bool */ private function _checkNeedsP($current) { if ($current instanceof HTMLPurifier_Token_Start) { if (!$this->_isInline($current)) { // <div>PAR1<div> // ---- // Terminate early, since we hit a block element return false; } } elseif ($current instanceof HTMLPurifier_Token_Text) { if (strpos($current->data, "\n\n") !== false) { // <div>PAR1<b>PAR1\n\nPAR2 // ---- return true; } else { // <div>PAR1<b>PAR1... // ---- } } return null; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/Injector/PurifierLinkify.php 0000644 00000003423 15121423110 0020565 0 ustar 00 <?php /** * Injector that converts configuration directive syntax %Namespace.Directive * to links */ class HTMLPurifier_Injector_PurifierLinkify extends HTMLPurifier_Injector { /** * @type string */ public $name = 'PurifierLinkify'; /** * @type string */ public $docURL; /** * @type array */ public $needed = array('a' => array('href')); /** * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return string */ public function prepare($config, $context) { $this->docURL = $config->get('AutoFormat.PurifierLinkify.DocURL'); return parent::prepare($config, $context); } /** * @param HTMLPurifier_Token $token */ public function handleText(&$token) { if (!$this->allowsElement('a')) { return; } if (strpos($token->data, '%') === false) { return; } $bits = preg_split('#%([a-z0-9]+\.[a-z0-9]+)#Si', $token->data, -1, PREG_SPLIT_DELIM_CAPTURE); $token = array(); // $i = index // $c = count // $l = is link for ($i = 0, $c = count($bits), $l = false; $i < $c; $i++, $l = !$l) { if (!$l) { if ($bits[$i] === '') { continue; } $token[] = new HTMLPurifier_Token_Text($bits[$i]); } else { $token[] = new HTMLPurifier_Token_Start( 'a', array('href' => str_replace('%s', $bits[$i], $this->docURL)) ); $token[] = new HTMLPurifier_Token_Text('%' . $bits[$i]); $token[] = new HTMLPurifier_Token_End('a'); } } } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/Injector/DisplayLinkURI.php 0000644 00000001533 15121423110 0020255 0 ustar 00 <?php /** * Injector that displays the URL of an anchor instead of linking to it, in addition to showing the text of the link. */ class HTMLPurifier_Injector_DisplayLinkURI extends HTMLPurifier_Injector { /** * @type string */ public $name = 'DisplayLinkURI'; /** * @type array */ public $needed = array('a'); /** * @param $token */ public function handleElement(&$token) { } /** * @param HTMLPurifier_Token $token */ public function handleEnd(&$token) { if (isset($token->start->attr['href'])) { $url = $token->start->attr['href']; unset($token->start->attr['href']); $token = array($token, new HTMLPurifier_Token_Text(" ($url)")); } else { // nothing to display } } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/Injector/SafeObject.php 0000644 00000007557 15121423110 0017473 0 ustar 00 <?php /** * Adds important param elements to inside of object in order to make * things safe. */ class HTMLPurifier_Injector_SafeObject extends HTMLPurifier_Injector { /** * @type string */ public $name = 'SafeObject'; /** * @type array */ public $needed = array('object', 'param'); /** * @type array */ protected $objectStack = array(); /** * @type array */ protected $paramStack = array(); /** * Keep this synchronized with AttrTransform/SafeParam.php. * @type array */ protected $addParam = array( 'allowScriptAccess' => 'never', 'allowNetworking' => 'internal', ); /** * These are all lower-case keys. * @type array */ protected $allowedParam = array( 'wmode' => true, 'movie' => true, 'flashvars' => true, 'src' => true, 'allowfullscreen' => true, // if omitted, assume to be 'false' ); /** * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return void */ public function prepare($config, $context) { parent::prepare($config, $context); } /** * @param HTMLPurifier_Token $token */ public function handleElement(&$token) { if ($token->name == 'object') { $this->objectStack[] = $token; $this->paramStack[] = array(); $new = array($token); foreach ($this->addParam as $name => $value) { $new[] = new HTMLPurifier_Token_Empty('param', array('name' => $name, 'value' => $value)); } $token = $new; } elseif ($token->name == 'param') { $nest = count($this->currentNesting) - 1; if ($nest >= 0 && $this->currentNesting[$nest]->name === 'object') { $i = count($this->objectStack) - 1; if (!isset($token->attr['name'])) { $token = false; return; } $n = $token->attr['name']; // We need this fix because YouTube doesn't supply a data // attribute, which we need if a type is specified. This is // *very* Flash specific. if (!isset($this->objectStack[$i]->attr['data']) && ($token->attr['name'] == 'movie' || $token->attr['name'] == 'src') ) { $this->objectStack[$i]->attr['data'] = $token->attr['value']; } // Check if the parameter is the correct value but has not // already been added if (!isset($this->paramStack[$i][$n]) && isset($this->addParam[$n]) && $token->attr['name'] === $this->addParam[$n]) { // keep token, and add to param stack $this->paramStack[$i][$n] = true; } elseif (isset($this->allowedParam[strtolower($n)])) { // keep token, don't do anything to it // (could possibly check for duplicates here) // Note: In principle, parameters should be case sensitive. // But it seems they are not really; so accept any case. } else { $token = false; } } else { // not directly inside an object, DENY! $token = false; } } } public function handleEnd(&$token) { // This is the WRONG way of handling the object and param stacks; // we should be inserting them directly on the relevant object tokens // so that the global stack handling handles it. if ($token->name == 'object') { array_pop($this->objectStack); array_pop($this->paramStack); } } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php 0000644 00000003746 15121423110 0023357 0 ustar 00 <?php /** * Injector that removes spans with no attributes */ class HTMLPurifier_Injector_RemoveSpansWithoutAttributes extends HTMLPurifier_Injector { /** * @type string */ public $name = 'RemoveSpansWithoutAttributes'; /** * @type array */ public $needed = array('span'); /** * @type HTMLPurifier_AttrValidator */ private $attrValidator; /** * Used by AttrValidator. * @type HTMLPurifier_Config */ private $config; /** * @type HTMLPurifier_Context */ private $context; public function prepare($config, $context) { $this->attrValidator = new HTMLPurifier_AttrValidator(); $this->config = $config; $this->context = $context; return parent::prepare($config, $context); } /** * @param HTMLPurifier_Token $token */ public function handleElement(&$token) { if ($token->name !== 'span' || !$token instanceof HTMLPurifier_Token_Start) { return; } // We need to validate the attributes now since this doesn't normally // happen until after MakeWellFormed. If all the attributes are removed // the span needs to be removed too. $this->attrValidator->validateToken($token, $this->config, $this->context); $token->armor['ValidateAttributes'] = true; if (!empty($token->attr)) { return; } $nesting = 0; while ($this->forwardUntilEndToken($i, $current, $nesting)) { } if ($current instanceof HTMLPurifier_Token_End && $current->name === 'span') { // Mark closing span tag for deletion $current->markForDeletion = true; // Delete open span tag $token = false; } } /** * @param HTMLPurifier_Token $token */ public function handleEnd(&$token) { if ($token->markForDeletion) { $token = false; } } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/Injector/RemoveEmpty.php 0000644 00000006664 15121423110 0017740 0 ustar 00 <?php class HTMLPurifier_Injector_RemoveEmpty extends HTMLPurifier_Injector { /** * @type HTMLPurifier_Context */ private $context; /** * @type HTMLPurifier_Config */ private $config; /** * @type HTMLPurifier_AttrValidator */ private $attrValidator; /** * @type bool */ private $removeNbsp; /** * @type bool */ private $removeNbspExceptions; /** * Cached contents of %AutoFormat.RemoveEmpty.Predicate * @type array */ private $exclude; /** * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return void */ public function prepare($config, $context) { parent::prepare($config, $context); $this->config = $config; $this->context = $context; $this->removeNbsp = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp'); $this->removeNbspExceptions = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions'); $this->exclude = $config->get('AutoFormat.RemoveEmpty.Predicate'); foreach ($this->exclude as $key => $attrs) { if (!is_array($attrs)) { // HACK, see HTMLPurifier/Printer/ConfigForm.php $this->exclude[$key] = explode(';', $attrs); } } $this->attrValidator = new HTMLPurifier_AttrValidator(); } /** * @param HTMLPurifier_Token $token */ public function handleElement(&$token) { if (!$token instanceof HTMLPurifier_Token_Start) { return; } $next = false; $deleted = 1; // the current tag for ($i = count($this->inputZipper->back) - 1; $i >= 0; $i--, $deleted++) { $next = $this->inputZipper->back[$i]; if ($next instanceof HTMLPurifier_Token_Text) { if ($next->is_whitespace) { continue; } if ($this->removeNbsp && !isset($this->removeNbspExceptions[$token->name])) { $plain = str_replace("\xC2\xA0", "", $next->data); $isWsOrNbsp = $plain === '' || ctype_space($plain); if ($isWsOrNbsp) { continue; } } } break; } if (!$next || ($next instanceof HTMLPurifier_Token_End && $next->name == $token->name)) { $this->attrValidator->validateToken($token, $this->config, $this->context); $token->armor['ValidateAttributes'] = true; if (isset($this->exclude[$token->name])) { $r = true; foreach ($this->exclude[$token->name] as $elem) { if (!isset($token->attr[$elem])) $r = false; } if ($r) return; } if (isset($token->attr['id']) || isset($token->attr['name'])) { return; } $token = $deleted + 1; for ($b = 0, $c = count($this->inputZipper->front); $b < $c; $b++) { $prev = $this->inputZipper->front[$b]; if ($prev instanceof HTMLPurifier_Token_Text && $prev->is_whitespace) { continue; } break; } // This is safe because we removed the token that triggered this. $this->rewindOffset($b+$deleted); return; } } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/Node/Element.php 0000644 00000003275 15121423110 0016160 0 ustar 00 <?php /** * Concrete element node class. */ class HTMLPurifier_Node_Element extends HTMLPurifier_Node { /** * The lower-case name of the tag, like 'a', 'b' or 'blockquote'. * * @note Strictly speaking, XML tags are case sensitive, so we shouldn't * be lower-casing them, but these tokens cater to HTML tags, which are * insensitive. * @type string */ public $name; /** * Associative array of the node's attributes. * @type array */ public $attr = array(); /** * List of child elements. * @type array */ public $children = array(); /** * Does this use the <a></a> form or the </a> form, i.e. * is it a pair of start/end tokens or an empty token. * @bool */ public $empty = false; public $endCol = null, $endLine = null, $endArmor = array(); public function __construct($name, $attr = array(), $line = null, $col = null, $armor = array()) { $this->name = $name; $this->attr = $attr; $this->line = $line; $this->col = $col; $this->armor = $armor; } public function toTokenPair() { // XXX inefficiency here, normalization is not necessary if ($this->empty) { return array(new HTMLPurifier_Token_Empty($this->name, $this->attr, $this->line, $this->col, $this->armor), null); } else { $start = new HTMLPurifier_Token_Start($this->name, $this->attr, $this->line, $this->col, $this->armor); $end = new HTMLPurifier_Token_End($this->name, array(), $this->endLine, $this->endCol, $this->endArmor); //$end->start = $start; return array($start, $end); } } } htmlpurifier/library/HTMLPurifier/Node/Text.php 0000644 00000002544 15121423110 0015511 0 ustar 00 <?php /** * Concrete text token class. * * Text tokens comprise of regular parsed character data (PCDATA) and raw * character data (from the CDATA sections). Internally, their * data is parsed with all entities expanded. Surprisingly, the text token * does have a "tag name" called #PCDATA, which is how the DTD represents it * in permissible child nodes. */ class HTMLPurifier_Node_Text extends HTMLPurifier_Node { /** * PCDATA tag name compatible with DTD, see * HTMLPurifier_ChildDef_Custom for details. * @type string */ public $name = '#PCDATA'; /** * @type string */ public $data; /**< Parsed character data of text. */ /** * @type bool */ public $is_whitespace; /**< Bool indicating if node is whitespace. */ /** * Constructor, accepts data and determines if it is whitespace. * @param string $data String parsed character data. * @param int $line * @param int $col */ public function __construct($data, $is_whitespace, $line = null, $col = null) { $this->data = $data; $this->is_whitespace = $is_whitespace; $this->line = $line; $this->col = $col; } public function toTokenPair() { return array(new HTMLPurifier_Token_Text($this->data, $this->line, $this->col), null); } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/Node/Comment.php 0000644 00000001325 15121423110 0016163 0 ustar 00 <?php /** * Concrete comment node class. */ class HTMLPurifier_Node_Comment extends HTMLPurifier_Node { /** * Character data within comment. * @type string */ public $data; /** * @type bool */ public $is_whitespace = true; /** * Transparent constructor. * * @param string $data String comment data. * @param int $line * @param int $col */ public function __construct($data, $line = null, $col = null) { $this->data = $data; $this->line = $line; $this->col = $col; } public function toTokenPair() { return array(new HTMLPurifier_Token_Comment($this->data, $this->line, $this->col), null); } } htmlpurifier/library/HTMLPurifier/Injector.php 0000644 00000021456 15121423110 0015460 0 ustar 00 <?php /** * Injects tokens into the document while parsing for well-formedness. * This enables "formatter-like" functionality such as auto-paragraphing, * smiley-ification and linkification to take place. * * A note on how handlers create changes; this is done by assigning a new * value to the $token reference. These values can take a variety of forms and * are best described HTMLPurifier_Strategy_MakeWellFormed->processToken() * documentation. * * @todo Allow injectors to request a re-run on their output. This * would help if an operation is recursive. */ abstract class HTMLPurifier_Injector { /** * Advisory name of injector, this is for friendly error messages. * @type string */ public $name; /** * @type HTMLPurifier_HTMLDefinition */ protected $htmlDefinition; /** * Reference to CurrentNesting variable in Context. This is an array * list of tokens that we are currently "inside" * @type array */ protected $currentNesting; /** * Reference to current token. * @type HTMLPurifier_Token */ protected $currentToken; /** * Reference to InputZipper variable in Context. * @type HTMLPurifier_Zipper */ protected $inputZipper; /** * Array of elements and attributes this injector creates and therefore * need to be allowed by the definition. Takes form of * array('element' => array('attr', 'attr2'), 'element2') * @type array */ public $needed = array(); /** * Number of elements to rewind backwards (relative). * @type bool|int */ protected $rewindOffset = false; /** * Rewind to a spot to re-perform processing. This is useful if you * deleted a node, and now need to see if this change affected any * earlier nodes. Rewinding does not affect other injectors, and can * result in infinite loops if not used carefully. * @param bool|int $offset * @warning HTML Purifier will prevent you from fast-forwarding with this * function. */ public function rewindOffset($offset) { $this->rewindOffset = $offset; } /** * Retrieves rewind offset, and then unsets it. * @return bool|int */ public function getRewindOffset() { $r = $this->rewindOffset; $this->rewindOffset = false; return $r; } /** * Prepares the injector by giving it the config and context objects: * this allows references to important variables to be made within * the injector. This function also checks if the HTML environment * will work with the Injector (see checkNeeded()). * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string Boolean false if success, string of missing needed element/attribute if failure */ public function prepare($config, $context) { $this->htmlDefinition = $config->getHTMLDefinition(); // Even though this might fail, some unit tests ignore this and // still test checkNeeded, so be careful. Maybe get rid of that // dependency. $result = $this->checkNeeded($config); if ($result !== false) { return $result; } $this->currentNesting =& $context->get('CurrentNesting'); $this->currentToken =& $context->get('CurrentToken'); $this->inputZipper =& $context->get('InputZipper'); return false; } /** * This function checks if the HTML environment * will work with the Injector: if p tags are not allowed, the * Auto-Paragraphing injector should not be enabled. * @param HTMLPurifier_Config $config * @return bool|string Boolean false if success, string of missing needed element/attribute if failure */ public function checkNeeded($config) { $def = $config->getHTMLDefinition(); foreach ($this->needed as $element => $attributes) { if (is_int($element)) { $element = $attributes; } if (!isset($def->info[$element])) { return $element; } if (!is_array($attributes)) { continue; } foreach ($attributes as $name) { if (!isset($def->info[$element]->attr[$name])) { return "$element.$name"; } } } return false; } /** * Tests if the context node allows a certain element * @param string $name Name of element to test for * @return bool True if element is allowed, false if it is not */ public function allowsElement($name) { if (!empty($this->currentNesting)) { $parent_token = array_pop($this->currentNesting); $this->currentNesting[] = $parent_token; $parent = $this->htmlDefinition->info[$parent_token->name]; } else { $parent = $this->htmlDefinition->info_parent_def; } if (!isset($parent->child->elements[$name]) || isset($parent->excludes[$name])) { return false; } // check for exclusion if (!empty($this->currentNesting)) { for ($i = count($this->currentNesting) - 2; $i >= 0; $i--) { $node = $this->currentNesting[$i]; $def = $this->htmlDefinition->info[$node->name]; if (isset($def->excludes[$name])) { return false; } } } return true; } /** * Iterator function, which starts with the next token and continues until * you reach the end of the input tokens. * @warning Please prevent previous references from interfering with this * functions by setting $i = null beforehand! * @param int $i Current integer index variable for inputTokens * @param HTMLPurifier_Token $current Current token variable. * Do NOT use $token, as that variable is also a reference * @return bool */ protected function forward(&$i, &$current) { if ($i === null) { $i = count($this->inputZipper->back) - 1; } else { $i--; } if ($i < 0) { return false; } $current = $this->inputZipper->back[$i]; return true; } /** * Similar to _forward, but accepts a third parameter $nesting (which * should be initialized at 0) and stops when we hit the end tag * for the node $this->inputIndex starts in. * @param int $i Current integer index variable for inputTokens * @param HTMLPurifier_Token $current Current token variable. * Do NOT use $token, as that variable is also a reference * @param int $nesting * @return bool */ protected function forwardUntilEndToken(&$i, &$current, &$nesting) { $result = $this->forward($i, $current); if (!$result) { return false; } if ($nesting === null) { $nesting = 0; } if ($current instanceof HTMLPurifier_Token_Start) { $nesting++; } elseif ($current instanceof HTMLPurifier_Token_End) { if ($nesting <= 0) { return false; } $nesting--; } return true; } /** * Iterator function, starts with the previous token and continues until * you reach the beginning of input tokens. * @warning Please prevent previous references from interfering with this * functions by setting $i = null beforehand! * @param int $i Current integer index variable for inputTokens * @param HTMLPurifier_Token $current Current token variable. * Do NOT use $token, as that variable is also a reference * @return bool */ protected function backward(&$i, &$current) { if ($i === null) { $i = count($this->inputZipper->front) - 1; } else { $i--; } if ($i < 0) { return false; } $current = $this->inputZipper->front[$i]; return true; } /** * Handler that is called when a text token is processed */ public function handleText(&$token) { } /** * Handler that is called when a start or empty token is processed */ public function handleElement(&$token) { } /** * Handler that is called when an end token is processed */ public function handleEnd(&$token) { $this->notifyEnd($token); } /** * Notifier that is called when an end token is processed * @param HTMLPurifier_Token $token Current token variable. * @note This differs from handlers in that the token is read-only * @deprecated */ public function notifyEnd($token) { } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/Doctype.php 0000644 00000003056 15121423110 0015306 0 ustar 00 <?php /** * Represents a document type, contains information on which modules * need to be loaded. * @note This class is inspected by Printer_HTMLDefinition->renderDoctype. * If structure changes, please update that function. */ class HTMLPurifier_Doctype { /** * Full name of doctype * @type string */ public $name; /** * List of standard modules (string identifiers or literal objects) * that this doctype uses * @type array */ public $modules = array(); /** * List of modules to use for tidying up code * @type array */ public $tidyModules = array(); /** * Is the language derived from XML (i.e. XHTML)? * @type bool */ public $xml = true; /** * List of aliases for this doctype * @type array */ public $aliases = array(); /** * Public DTD identifier * @type string */ public $dtdPublic; /** * System DTD identifier * @type string */ public $dtdSystem; public function __construct( $name = null, $xml = true, $modules = array(), $tidyModules = array(), $aliases = array(), $dtd_public = null, $dtd_system = null ) { $this->name = $name; $this->xml = $xml; $this->modules = $modules; $this->tidyModules = $tidyModules; $this->aliases = $aliases; $this->dtdPublic = $dtd_public; $this->dtdSystem = $dtd_system; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/TokenFactory.php 0000644 00000006033 15121423110 0016305 0 ustar 00 <?php /** * Factory for token generation. * * @note Doing some benchmarking indicates that the new operator is much * slower than the clone operator (even discounting the cost of the * constructor). This class is for that optimization. * Other then that, there's not much point as we don't * maintain parallel HTMLPurifier_Token hierarchies (the main reason why * you'd want to use an abstract factory). * @todo Port DirectLex to use this */ class HTMLPurifier_TokenFactory { // p stands for prototype /** * @type HTMLPurifier_Token_Start */ private $p_start; /** * @type HTMLPurifier_Token_End */ private $p_end; /** * @type HTMLPurifier_Token_Empty */ private $p_empty; /** * @type HTMLPurifier_Token_Text */ private $p_text; /** * @type HTMLPurifier_Token_Comment */ private $p_comment; /** * Generates blank prototypes for cloning. */ public function __construct() { $this->p_start = new HTMLPurifier_Token_Start('', array()); $this->p_end = new HTMLPurifier_Token_End(''); $this->p_empty = new HTMLPurifier_Token_Empty('', array()); $this->p_text = new HTMLPurifier_Token_Text(''); $this->p_comment = new HTMLPurifier_Token_Comment(''); } /** * Creates a HTMLPurifier_Token_Start. * @param string $name Tag name * @param array $attr Associative array of attributes * @return HTMLPurifier_Token_Start Generated HTMLPurifier_Token_Start */ public function createStart($name, $attr = array()) { $p = clone $this->p_start; $p->__construct($name, $attr); return $p; } /** * Creates a HTMLPurifier_Token_End. * @param string $name Tag name * @return HTMLPurifier_Token_End Generated HTMLPurifier_Token_End */ public function createEnd($name) { $p = clone $this->p_end; $p->__construct($name); return $p; } /** * Creates a HTMLPurifier_Token_Empty. * @param string $name Tag name * @param array $attr Associative array of attributes * @return HTMLPurifier_Token_Empty Generated HTMLPurifier_Token_Empty */ public function createEmpty($name, $attr = array()) { $p = clone $this->p_empty; $p->__construct($name, $attr); return $p; } /** * Creates a HTMLPurifier_Token_Text. * @param string $data Data of text token * @return HTMLPurifier_Token_Text Generated HTMLPurifier_Token_Text */ public function createText($data) { $p = clone $this->p_text; $p->__construct($data); return $p; } /** * Creates a HTMLPurifier_Token_Comment. * @param string $data Data of comment token * @return HTMLPurifier_Token_Comment Generated HTMLPurifier_Token_Comment */ public function createComment($data) { $p = clone $this->p_comment; $p->__construct($data); return $p; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/CSS.php 0000644 00000010363 15121423110 0015657 0 ustar 00 <?php /** * Validates the HTML attribute style, otherwise known as CSS. * @note We don't implement the whole CSS specification, so it might be * difficult to reuse this component in the context of validating * actual stylesheet declarations. * @note If we were really serious about validating the CSS, we would * tokenize the styles and then parse the tokens. Obviously, we * are not doing that. Doing that could seriously harm performance, * but would make these components a lot more viable for a CSS * filtering solution. */ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef { /** * @param string $css * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($css, $config, $context) { $css = $this->parseCDATA($css); $definition = $config->getCSSDefinition(); $allow_duplicates = $config->get("CSS.AllowDuplicates"); // According to the CSS2.1 spec, the places where a // non-delimiting semicolon can appear are in strings // escape sequences. So here is some dumb hack to // handle quotes. $len = strlen($css); $accum = ""; $declarations = array(); $quoted = false; for ($i = 0; $i < $len; $i++) { $c = strcspn($css, ";'\"", $i); $accum .= substr($css, $i, $c); $i += $c; if ($i == $len) break; $d = $css[$i]; if ($quoted) { $accum .= $d; if ($d == $quoted) { $quoted = false; } } else { if ($d == ";") { $declarations[] = $accum; $accum = ""; } else { $accum .= $d; $quoted = $d; } } } if ($accum != "") $declarations[] = $accum; $propvalues = array(); $new_declarations = ''; /** * Name of the current CSS property being validated. */ $property = false; $context->register('CurrentCSSProperty', $property); foreach ($declarations as $declaration) { if (!$declaration) { continue; } if (!strpos($declaration, ':')) { continue; } list($property, $value) = explode(':', $declaration, 2); $property = trim($property); $value = trim($value); $ok = false; do { if (isset($definition->info[$property])) { $ok = true; break; } if (ctype_lower($property)) { break; } $property = strtolower($property); if (isset($definition->info[$property])) { $ok = true; break; } } while (0); if (!$ok) { continue; } // inefficient call, since the validator will do this again if (strtolower(trim($value)) !== 'inherit') { // inherit works for everything (but only on the base property) $result = $definition->info[$property]->validate( $value, $config, $context ); } else { $result = 'inherit'; } if ($result === false) { continue; } if ($allow_duplicates) { $new_declarations .= "$property:$result;"; } else { $propvalues[$property] = $result; } } $context->destroy('CurrentCSSProperty'); // procedure does not write the new CSS simultaneously, so it's // slightly inefficient, but it's the only way of getting rid of // duplicates. Perhaps config to optimize it, but not now. foreach ($propvalues as $prop => $value) { $new_declarations .= "$prop:$value;"; } return $new_declarations ? $new_declarations : false; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/Switch.php 0000644 00000002411 15121423110 0016463 0 ustar 00 <?php /** * Decorator that, depending on a token, switches between two definitions. */ class HTMLPurifier_AttrDef_Switch { /** * @type string */ protected $tag; /** * @type HTMLPurifier_AttrDef */ protected $withTag; /** * @type HTMLPurifier_AttrDef */ protected $withoutTag; /** * @param string $tag Tag name to switch upon * @param HTMLPurifier_AttrDef $with_tag Call if token matches tag * @param HTMLPurifier_AttrDef $without_tag Call if token doesn't match, or there is no token */ public function __construct($tag, $with_tag, $without_tag) { $this->tag = $tag; $this->withTag = $with_tag; $this->withoutTag = $without_tag; } /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { $token = $context->get('CurrentToken', true); if (!$token || $token->name !== $this->tag) { return $this->withoutTag->validate($string, $config, $context); } else { return $this->withTag->validate($string, $config, $context); } } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/Integer.php 0000644 00000004763 15121423110 0016633 0 ustar 00 <?php /** * Validates an integer. * @note While this class was modeled off the CSS definition, no currently * allowed CSS uses this type. The properties that do are: widows, * orphans, z-index, counter-increment, counter-reset. Some of the * HTML attributes, however, find use for a non-negative version of this. */ class HTMLPurifier_AttrDef_Integer extends HTMLPurifier_AttrDef { /** * Whether or not negative values are allowed. * @type bool */ protected $negative = true; /** * Whether or not zero is allowed. * @type bool */ protected $zero = true; /** * Whether or not positive values are allowed. * @type bool */ protected $positive = true; /** * @param $negative Bool indicating whether or not negative values are allowed * @param $zero Bool indicating whether or not zero is allowed * @param $positive Bool indicating whether or not positive values are allowed */ public function __construct($negative = true, $zero = true, $positive = true) { $this->negative = $negative; $this->zero = $zero; $this->positive = $positive; } /** * @param string $integer * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($integer, $config, $context) { $integer = $this->parseCDATA($integer); if ($integer === '') { return false; } // we could possibly simply typecast it to integer, but there are // certain fringe cases that must not return an integer. // clip leading sign if ($this->negative && $integer[0] === '-') { $digits = substr($integer, 1); if ($digits === '0') { $integer = '0'; } // rm minus sign for zero } elseif ($this->positive && $integer[0] === '+') { $digits = $integer = substr($integer, 1); // rm unnecessary plus } else { $digits = $integer; } // test if it's numeric if (!ctype_digit($digits)) { return false; } // perform scope tests if (!$this->zero && $integer == 0) { return false; } if (!$this->positive && $integer > 0) { return false; } if (!$this->negative && $integer < 0) { return false; } return $integer; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/URI.php 0000644 00000005230 15121423110 0015663 0 ustar 00 <?php /** * Validates a URI as defined by RFC 3986. * @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme */ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef { /** * @type HTMLPurifier_URIParser */ protected $parser; /** * @type bool */ protected $embedsResource; /** * @param bool $embeds_resource Does the URI here result in an extra HTTP request? */ public function __construct($embeds_resource = false) { $this->parser = new HTMLPurifier_URIParser(); $this->embedsResource = (bool)$embeds_resource; } /** * @param string $string * @return HTMLPurifier_AttrDef_URI */ public function make($string) { $embeds = ($string === 'embedded'); return new HTMLPurifier_AttrDef_URI($embeds); } /** * @param string $uri * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($uri, $config, $context) { if ($config->get('URI.Disable')) { return false; } $uri = $this->parseCDATA($uri); // parse the URI $uri = $this->parser->parse($uri); if ($uri === false) { return false; } // add embedded flag to context for validators $context->register('EmbeddedURI', $this->embedsResource); $ok = false; do { // generic validation $result = $uri->validate($config, $context); if (!$result) { break; } // chained filtering $uri_def = $config->getDefinition('URI'); $result = $uri_def->filter($uri, $config, $context); if (!$result) { break; } // scheme-specific validation $scheme_obj = $uri->getSchemeObj($config, $context); if (!$scheme_obj) { break; } if ($this->embedsResource && !$scheme_obj->browsable) { break; } $result = $scheme_obj->validate($uri, $config, $context); if (!$result) { break; } // Post chained filtering $result = $uri_def->postFilter($uri, $config, $context); if (!$result) { break; } // survived gauntlet $ok = true; } while (false); $context->destroy('EmbeddedURI'); if (!$ok) { return false; } // back to string return $uri->toString(); } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/Clone.php 0000644 00000001550 15121423110 0016265 0 ustar 00 <?php /** * Dummy AttrDef that mimics another AttrDef, BUT it generates clones * with make. */ class HTMLPurifier_AttrDef_Clone extends HTMLPurifier_AttrDef { /** * What we're cloning. * @type HTMLPurifier_AttrDef */ protected $clone; /** * @param HTMLPurifier_AttrDef $clone */ public function __construct($clone) { $this->clone = $clone; } /** * @param string $v * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($v, $config, $context) { return $this->clone->validate($v, $config, $context); } /** * @param string $string * @return HTMLPurifier_AttrDef */ public function make($string) { return clone $this->clone; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/Enum.php 0000644 00000004243 15121423110 0016133 0 ustar 00 <?php // Enum = Enumerated /** * Validates a keyword against a list of valid values. * @warning The case-insensitive compare of this function uses PHP's * built-in strtolower and ctype_lower functions, which may * cause problems with international comparisons */ class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef { /** * Lookup table of valid values. * @type array * @todo Make protected */ public $valid_values = array(); /** * Bool indicating whether or not enumeration is case sensitive. * @note In general this is always case insensitive. */ protected $case_sensitive = false; // values according to W3C spec /** * @param array $valid_values List of valid values * @param bool $case_sensitive Whether or not case sensitive */ public function __construct($valid_values = array(), $case_sensitive = false) { $this->valid_values = array_flip($valid_values); $this->case_sensitive = $case_sensitive; } /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { $string = trim($string); if (!$this->case_sensitive) { // we may want to do full case-insensitive libraries $string = ctype_lower($string) ? $string : strtolower($string); } $result = isset($this->valid_values[$string]); return $result ? $string : false; } /** * @param string $string In form of comma-delimited list of case-insensitive * valid values. Example: "foo,bar,baz". Prepend "s:" to make * case sensitive * @return HTMLPurifier_AttrDef_Enum */ public function make($string) { if (strlen($string) > 2 && $string[0] == 's' && $string[1] == ':') { $string = substr($string, 2); $sensitive = true; } else { $sensitive = false; } $values = explode(',', $string); return new HTMLPurifier_AttrDef_Enum($values, $sensitive); } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Color.php 0000644 00000002253 15121423110 0017050 0 ustar 00 <?php /** * Validates a color according to the HTML spec. */ class HTMLPurifier_AttrDef_HTML_Color extends HTMLPurifier_AttrDef { /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { static $colors = null; if ($colors === null) { $colors = $config->get('Core.ColorKeywords'); } $string = trim($string); if (empty($string)) { return false; } $lower = strtolower($string); if (isset($colors[$lower])) { return $colors[$lower]; } if ($string[0] === '#') { $hex = substr($string, 1); } else { $hex = $string; } $length = strlen($hex); if ($length !== 3 && $length !== 6) { return false; } if (!ctype_xdigit($hex)) { return false; } if ($length === 3) { $hex = $hex[0] . $hex[0] . $hex[1] . $hex[1] . $hex[2] . $hex[2]; } return "#$hex"; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/HTML/MultiLength.php 0000644 00000002464 15121423110 0020232 0 ustar 00 <?php /** * Validates a MultiLength as defined by the HTML spec. * * A multilength is either a integer (pixel count), a percentage, or * a relative number. */ class HTMLPurifier_AttrDef_HTML_MultiLength extends HTMLPurifier_AttrDef_HTML_Length { /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { $string = trim($string); if ($string === '') { return false; } $parent_result = parent::validate($string, $config, $context); if ($parent_result !== false) { return $parent_result; } $length = strlen($string); $last_char = $string[$length - 1]; if ($last_char !== '*') { return false; } $int = substr($string, 0, $length - 1); if ($int == '') { return '*'; } if (!is_numeric($int)) { return false; } $int = (int)$int; if ($int < 0) { return false; } if ($int == 0) { return '0'; } if ($int == 1) { return '*'; } return ((string)$int) . '*'; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Class.php 0000644 00000002715 15121423110 0017042 0 ustar 00 <?php /** * Implements special behavior for class attribute (normally NMTOKENS) */ class HTMLPurifier_AttrDef_HTML_Class extends HTMLPurifier_AttrDef_HTML_Nmtokens { /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ protected function split($string, $config, $context) { // really, this twiddle should be lazy loaded $name = $config->getDefinition('HTML')->doctype->name; if ($name == "XHTML 1.1" || $name == "XHTML 2.0") { return parent::split($string, $config, $context); } else { return preg_split('/\s+/', $string); } } /** * @param array $tokens * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return array */ protected function filter($tokens, $config, $context) { $allowed = $config->get('Attr.AllowedClasses'); $forbidden = $config->get('Attr.ForbiddenClasses'); $ret = array(); foreach ($tokens as $token) { if (($allowed === null || isset($allowed[$token])) && !isset($forbidden[$token]) && // We need this O(n) check because of PHP's array // implementation that casts -0 to 0. !in_array($token, $ret, true) ) { $ret[] = $token; } } return $ret; } } htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php 0000644 00000004141 15121423110 0017566 0 ustar 00 <?php /** * Validates contents based on NMTOKENS attribute type. */ class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef { /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { $string = trim($string); // early abort: '' and '0' (strings that convert to false) are invalid if (!$string) { return false; } $tokens = $this->split($string, $config, $context); $tokens = $this->filter($tokens, $config, $context); if (empty($tokens)) { return false; } return implode(' ', $tokens); } /** * Splits a space separated list of tokens into its constituent parts. * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return array */ protected function split($string, $config, $context) { // OPTIMIZABLE! // do the preg_match, capture all subpatterns for reformulation // we don't support U+00A1 and up codepoints or // escaping because I don't know how to do that with regexps // and plus it would complicate optimization efforts (you never // see that anyway). $pattern = '/(?:(?<=\s)|\A)' . // look behind for space or string start '((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)' . '(?:(?=\s)|\z)/'; // look ahead for space or string end preg_match_all($pattern, $string, $matches); return $matches[1]; } /** * Template method for removing certain tokens based on arbitrary criteria. * @note If we wanted to be really functional, we'd do an array_filter * with a callback. But... we're not. * @param array $tokens * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return array */ protected function filter($tokens, $config, $context) { return $tokens; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Bool.php 0000644 00000001551 15121423110 0016665 0 ustar 00 <?php /** * Validates a boolean attribute */ class HTMLPurifier_AttrDef_HTML_Bool extends HTMLPurifier_AttrDef { /** * @type string */ protected $name; /** * @type bool */ public $minimized = true; /** * @param bool|string $name */ public function __construct($name = false) { $this->name = $name; } /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { return $this->name; } /** * @param string $string Name of attribute * @return HTMLPurifier_AttrDef_HTML_Bool */ public function make($string) { return new HTMLPurifier_AttrDef_HTML_Bool($string); } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/HTML/LinkTypes.php 0000644 00000003350 15121423110 0017713 0 ustar 00 <?php /** * Validates a rel/rev link attribute against a directive of allowed values * @note We cannot use Enum because link types allow multiple * values. * @note Assumes link types are ASCII text */ class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef { /** * Name config attribute to pull. * @type string */ protected $name; /** * @param string $name */ public function __construct($name) { $configLookup = array( 'rel' => 'AllowedRel', 'rev' => 'AllowedRev' ); if (!isset($configLookup[$name])) { trigger_error( 'Unrecognized attribute name for link ' . 'relationship.', E_USER_ERROR ); return; } $this->name = $configLookup[$name]; } /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { $allowed = $config->get('Attr.' . $this->name); if (empty($allowed)) { return false; } $string = $this->parseCDATA($string); $parts = explode(' ', $string); // lookup to prevent duplicates $ret_lookup = array(); foreach ($parts as $part) { $part = strtolower(trim($part)); if (!isset($allowed[$part])) { continue; } $ret_lookup[$part] = true; } if (empty($ret_lookup)) { return false; } $string = implode(' ', array_keys($ret_lookup)); return $string; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/HTML/FrameTarget.php 0000644 00000001502 15121423110 0020167 0 ustar 00 <?php /** * Special-case enum attribute definition that lazy loads allowed frame targets */ class HTMLPurifier_AttrDef_HTML_FrameTarget extends HTMLPurifier_AttrDef_Enum { /** * @type array */ public $valid_values = false; // uninitialized value /** * @type bool */ protected $case_sensitive = false; public function __construct() { } /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { if ($this->valid_values === false) { $this->valid_values = $config->get('Attr.AllowedFrameTargets'); } return parent::validate($string, $config, $context); } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/HTML/ID.php 0000644 00000006204 15121423110 0016266 0 ustar 00 <?php /** * Validates the HTML attribute ID. * @warning Even though this is the id processor, it * will ignore the directive Attr:IDBlacklist, since it will only * go according to the ID accumulator. Since the accumulator is * automatically generated, it will have already absorbed the * blacklist. If you're hacking around, make sure you use load()! */ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef { // selector is NOT a valid thing to use for IDREFs, because IDREFs // *must* target IDs that exist, whereas selector #ids do not. /** * Determines whether or not we're validating an ID in a CSS * selector context. * @type bool */ protected $selector; /** * @param bool $selector */ public function __construct($selector = false) { $this->selector = $selector; } /** * @param string $id * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($id, $config, $context) { if (!$this->selector && !$config->get('Attr.EnableID')) { return false; } $id = trim($id); // trim it first if ($id === '') { return false; } $prefix = $config->get('Attr.IDPrefix'); if ($prefix !== '') { $prefix .= $config->get('Attr.IDPrefixLocal'); // prevent re-appending the prefix if (strpos($id, $prefix) !== 0) { $id = $prefix . $id; } } elseif ($config->get('Attr.IDPrefixLocal') !== '') { trigger_error( '%Attr.IDPrefixLocal cannot be used unless ' . '%Attr.IDPrefix is set', E_USER_WARNING ); } if (!$this->selector) { $id_accumulator =& $context->get('IDAccumulator'); if (isset($id_accumulator->ids[$id])) { return false; } } // we purposely avoid using regex, hopefully this is faster if ($config->get('Attr.ID.HTML5') === true) { if (preg_match('/[\t\n\x0b\x0c ]/', $id)) { return false; } } else { if (ctype_alpha($id)) { // OK } else { if (!ctype_alpha(@$id[0])) { return false; } // primitive style of regexps, I suppose $trim = trim( $id, 'A..Za..z0..9:-._' ); if ($trim !== '') { return false; } } } $regexp = $config->get('Attr.IDBlacklistRegexp'); if ($regexp && preg_match($regexp, $id)) { return false; } if (!$this->selector) { $id_accumulator->add($id); } // if no change was made to the ID, return the result // else, return the new id if stripping whitespace made it // valid, or return false. return $id; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Length.php 0000644 00000002342 15121423110 0017212 0 ustar 00 <?php /** * Validates the HTML type length (not to be confused with CSS's length). * * This accepts integer pixels or percentages as lengths for certain * HTML attributes. */ class HTMLPurifier_AttrDef_HTML_Length extends HTMLPurifier_AttrDef_HTML_Pixels { /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { $string = trim($string); if ($string === '') { return false; } $parent_result = parent::validate($string, $config, $context); if ($parent_result !== false) { return $parent_result; } $length = strlen($string); $last_char = $string[$length - 1]; if ($last_char !== '%') { return false; } $points = substr($string, 0, $length - 1); if (!is_numeric($points)) { return false; } $points = (int)$points; if ($points < 0) { return '0%'; } if ($points > 100) { return '100%'; } return ((string)$points) . '%'; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Pixels.php 0000644 00000003274 15121423110 0017242 0 ustar 00 <?php /** * Validates an integer representation of pixels according to the HTML spec. */ class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef { /** * @type int */ protected $max; /** * @param int $max */ public function __construct($max = null) { $this->max = $max; } /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { $string = trim($string); if ($string === '0') { return $string; } if ($string === '') { return false; } $length = strlen($string); if (substr($string, $length - 2) == 'px') { $string = substr($string, 0, $length - 2); } if (!is_numeric($string)) { return false; } $int = (int)$string; if ($int < 0) { return '0'; } // upper-bound value, extremely high values can // crash operating systems, see <http://ha.ckers.org/imagecrash.html> // WARNING, above link WILL crash you if you're using Windows if ($this->max !== null && $int > $this->max) { return (string)$this->max; } return (string)$int; } /** * @param string $string * @return HTMLPurifier_AttrDef */ public function make($string) { if ($string === '') { $max = null; } else { $max = (int)$string; } $class = get_class($this); return new $class($max); } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/URI/Email.php 0000644 00000000527 15121423110 0016716 0 ustar 00 <?php abstract class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef { /** * Unpacks a mailbox into its display-name and address * @param string $string * @return mixed */ public function unpack($string) { // needs to be implemented } } // sub-implementations // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/URI/Host.php 0000644 00000012432 15121423110 0016602 0 ustar 00 <?php /** * Validates a host according to the IPv4, IPv6 and DNS (future) specifications. */ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef { /** * IPv4 sub-validator. * @type HTMLPurifier_AttrDef_URI_IPv4 */ protected $ipv4; /** * IPv6 sub-validator. * @type HTMLPurifier_AttrDef_URI_IPv6 */ protected $ipv6; public function __construct() { $this->ipv4 = new HTMLPurifier_AttrDef_URI_IPv4(); $this->ipv6 = new HTMLPurifier_AttrDef_URI_IPv6(); } /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { $length = strlen($string); // empty hostname is OK; it's usually semantically equivalent: // the default host as defined by a URI scheme is used: // // If the URI scheme defines a default for host, then that // default applies when the host subcomponent is undefined // or when the registered name is empty (zero length). if ($string === '') { return ''; } if ($length > 1 && $string[0] === '[' && $string[$length - 1] === ']') { //IPv6 $ip = substr($string, 1, $length - 2); $valid = $this->ipv6->validate($ip, $config, $context); if ($valid === false) { return false; } return '[' . $valid . ']'; } // need to do checks on unusual encodings too $ipv4 = $this->ipv4->validate($string, $config, $context); if ($ipv4 !== false) { return $ipv4; } // A regular domain name. // This doesn't match I18N domain names, but we don't have proper IRI support, // so force users to insert Punycode. // There is not a good sense in which underscores should be // allowed, since it's technically not! (And if you go as // far to allow everything as specified by the DNS spec... // well, that's literally everything, modulo some space limits // for the components and the overall name (which, by the way, // we are NOT checking!). So we (arbitrarily) decide this: // let's allow underscores wherever we would have allowed // hyphens, if they are enabled. This is a pretty good match // for browser behavior, for example, a large number of browsers // cannot handle foo_.example.com, but foo_bar.example.com is // fairly well supported. $underscore = $config->get('Core.AllowHostnameUnderscore') ? '_' : ''; // Based off of RFC 1738, but amended so that // as per RFC 3696, the top label need only not be all numeric. // The productions describing this are: $a = '[a-z]'; // alpha $an = '[a-z0-9]'; // alphanum $and = "[a-z0-9-$underscore]"; // alphanum | "-" // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum $domainlabel = "$an(?:$and*$an)?"; // AMENDED as per RFC 3696 // toplabel = alphanum | alphanum *( alphanum | "-" ) alphanum // side condition: not all numeric $toplabel = "$an(?:$and*$an)?"; // hostname = *( domainlabel "." ) toplabel [ "." ] if (preg_match("/^(?:$domainlabel\.)*($toplabel)\.?$/i", $string, $matches)) { if (!ctype_digit($matches[1])) { return $string; } } // PHP 5.3 and later support this functionality natively if (function_exists('idn_to_ascii')) { if (defined('IDNA_NONTRANSITIONAL_TO_ASCII') && defined('INTL_IDNA_VARIANT_UTS46')) { $string = idn_to_ascii($string, IDNA_NONTRANSITIONAL_TO_ASCII, INTL_IDNA_VARIANT_UTS46); } else { $string = idn_to_ascii($string); } // If we have Net_IDNA2 support, we can support IRIs by // punycoding them. (This is the most portable thing to do, // since otherwise we have to assume browsers support } elseif ($config->get('Core.EnableIDNA')) { $idna = new Net_IDNA2(array('encoding' => 'utf8', 'overlong' => false, 'strict' => true)); // we need to encode each period separately $parts = explode('.', $string); try { $new_parts = array(); foreach ($parts as $part) { $encodable = false; for ($i = 0, $c = strlen($part); $i < $c; $i++) { if (ord($part[$i]) > 0x7a) { $encodable = true; break; } } if (!$encodable) { $new_parts[] = $part; } else { $new_parts[] = $idna->encode($part); } } $string = implode('.', $new_parts); } catch (Exception $e) { // XXX error reporting } } // Try again if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) { return $string; } return false; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/URI/IPv6.php 0000644 00000004655 15121423110 0016461 0 ustar 00 <?php /** * Validates an IPv6 address. * @author Feyd @ forums.devnetwork.net (public domain) * @note This function requires brackets to have been removed from address * in URI. */ class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4 { /** * @param string $aIP * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($aIP, $config, $context) { if (!$this->ip4) { $this->_loadRegex(); } $original = $aIP; $hex = '[0-9a-fA-F]'; $blk = '(?:' . $hex . '{1,4})'; $pre = '(?:/(?:12[0-8]|1[0-1][0-9]|[1-9][0-9]|[0-9]))'; // /0 - /128 // prefix check if (strpos($aIP, '/') !== false) { if (preg_match('#' . $pre . '$#s', $aIP, $find)) { $aIP = substr($aIP, 0, 0 - strlen($find[0])); unset($find); } else { return false; } } // IPv4-compatiblity check if (preg_match('#(?<=:' . ')' . $this->ip4 . '$#s', $aIP, $find)) { $aIP = substr($aIP, 0, 0 - strlen($find[0])); $ip = explode('.', $find[0]); $ip = array_map('dechex', $ip); $aIP .= $ip[0] . $ip[1] . ':' . $ip[2] . $ip[3]; unset($find, $ip); } // compression check $aIP = explode('::', $aIP); $c = count($aIP); if ($c > 2) { return false; } elseif ($c == 2) { list($first, $second) = $aIP; $first = explode(':', $first); $second = explode(':', $second); if (count($first) + count($second) > 8) { return false; } while (count($first) < 8) { array_push($first, '0'); } array_splice($first, 8 - count($second), 8, $second); $aIP = $first; unset($first, $second); } else { $aIP = explode(':', $aIP[0]); } $c = count($aIP); if ($c != 8) { return false; } // All the pieces should be 16-bit hex strings. Are they? foreach ($aIP as $piece) { if (!preg_match('#^[0-9a-fA-F]{4}$#s', sprintf('%04s', $piece))) { return false; } } return $original; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php 0000644 00000001470 15121423110 0021103 0 ustar 00 <?php /** * Primitive email validation class based on the regexp found at * http://www.regular-expressions.info/email.html */ class HTMLPurifier_AttrDef_URI_Email_SimpleCheck extends HTMLPurifier_AttrDef_URI_Email { /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { // no support for named mailboxes i.e. "Bob <bob@example.com>" // that needs more percent encoding to be done if ($string == '') { return false; } $string = trim($string); $result = preg_match('/^[A-Z0-9._%-]+@[A-Z0-9.-]+\.[A-Z]{2,4}$/i', $string); return $result ? $string : false; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/URI/IPv4.php 0000644 00000001746 15121423110 0016455 0 ustar 00 <?php /** * Validates an IPv4 address * @author Feyd @ forums.devnetwork.net (public domain) */ class HTMLPurifier_AttrDef_URI_IPv4 extends HTMLPurifier_AttrDef { /** * IPv4 regex, protected so that IPv6 can reuse it. * @type string */ protected $ip4; /** * @param string $aIP * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($aIP, $config, $context) { if (!$this->ip4) { $this->_loadRegex(); } if (preg_match('#^' . $this->ip4 . '$#s', $aIP)) { return $aIP; } return false; } /** * Lazy load function to prevent regex from being stuffed in * cache. */ protected function _loadRegex() { $oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255 $this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})"; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Color.php 0000644 00000011110 15121423110 0016724 0 ustar 00 <?php /** * Validates Color as defined by CSS. */ class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef { /** * @type HTMLPurifier_AttrDef_CSS_AlphaValue */ protected $alpha; public function __construct() { $this->alpha = new HTMLPurifier_AttrDef_CSS_AlphaValue(); } /** * @param string $color * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($color, $config, $context) { static $colors = null; if ($colors === null) { $colors = $config->get('Core.ColorKeywords'); } $color = trim($color); if ($color === '') { return false; } $lower = strtolower($color); if (isset($colors[$lower])) { return $colors[$lower]; } if (preg_match('#(rgb|rgba|hsl|hsla)\(#', $color, $matches) === 1) { $length = strlen($color); if (strpos($color, ')') !== $length - 1) { return false; } // get used function : rgb, rgba, hsl or hsla $function = $matches[1]; $parameters_size = 3; $alpha_channel = false; if (substr($function, -1) === 'a') { $parameters_size = 4; $alpha_channel = true; } /* * Allowed types for values : * parameter_position => [type => max_value] */ $allowed_types = array( 1 => array('percentage' => 100, 'integer' => 255), 2 => array('percentage' => 100, 'integer' => 255), 3 => array('percentage' => 100, 'integer' => 255), ); $allow_different_types = false; if (strpos($function, 'hsl') !== false) { $allowed_types = array( 1 => array('integer' => 360), 2 => array('percentage' => 100), 3 => array('percentage' => 100), ); $allow_different_types = true; } $values = trim(str_replace($function, '', $color), ' ()'); $parts = explode(',', $values); if (count($parts) !== $parameters_size) { return false; } $type = false; $new_parts = array(); $i = 0; foreach ($parts as $part) { $i++; $part = trim($part); if ($part === '') { return false; } // different check for alpha channel if ($alpha_channel === true && $i === count($parts)) { $result = $this->alpha->validate($part, $config, $context); if ($result === false) { return false; } $new_parts[] = (string)$result; continue; } if (substr($part, -1) === '%') { $current_type = 'percentage'; } else { $current_type = 'integer'; } if (!array_key_exists($current_type, $allowed_types[$i])) { return false; } if (!$type) { $type = $current_type; } if ($allow_different_types === false && $type != $current_type) { return false; } $max_value = $allowed_types[$i][$current_type]; if ($current_type == 'integer') { // Return value between range 0 -> $max_value $new_parts[] = (int)max(min($part, $max_value), 0); } elseif ($current_type == 'percentage') { $new_parts[] = (float)max(min(rtrim($part, '%'), $max_value), 0) . '%'; } } $new_values = implode(',', $new_parts); $color = $function . '(' . $new_values . ')'; } else { // hexadecimal handling if ($color[0] === '#') { $hex = substr($color, 1); } else { $hex = $color; $color = '#' . $color; } $length = strlen($hex); if ($length !== 3 && $length !== 6) { return false; } if (!ctype_xdigit($hex)) { return false; } } return $color; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/CSS/URI.php 0000644 00000005010 15121423110 0016307 0 ustar 00 <?php /** * Validates a URI in CSS syntax, which uses url('http://example.com') * @note While theoretically speaking a URI in a CSS document could * be non-embedded, as of CSS2 there is no such usage so we're * generalizing it. This may need to be changed in the future. * @warning Since HTMLPurifier_AttrDef_CSS blindly uses semicolons as * the separator, you cannot put a literal semicolon in * in the URI. Try percent encoding it, in that case. */ class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI { public function __construct() { parent::__construct(true); // always embedded } /** * @param string $uri_string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($uri_string, $config, $context) { // parse the URI out of the string and then pass it onto // the parent object $uri_string = $this->parseCDATA($uri_string); if (strpos($uri_string, 'url(') !== 0) { return false; } $uri_string = substr($uri_string, 4); if (strlen($uri_string) == 0) { return false; } $new_length = strlen($uri_string) - 1; if ($uri_string[$new_length] != ')') { return false; } $uri = trim(substr($uri_string, 0, $new_length)); if (!empty($uri) && ($uri[0] == "'" || $uri[0] == '"')) { $quote = $uri[0]; $new_length = strlen($uri) - 1; if ($uri[$new_length] !== $quote) { return false; } $uri = substr($uri, 1, $new_length - 1); } $uri = $this->expandCSSEscape($uri); $result = parent::validate($uri, $config, $context); if ($result === false) { return false; } // extra sanity check; should have been done by URI $result = str_replace(array('"', "\\", "\n", "\x0c", "\r"), "", $result); // suspicious characters are ()'; we're going to percent encode // them for safety. $result = str_replace(array('(', ')', "'"), array('%28', '%29', '%27'), $result); // there's an extra bug where ampersands lose their escaping on // an innerHTML cycle, so a very unlucky query parameter could // then change the meaning of the URL. Unfortunately, there's // not much we can do about that... return "url(\"$result\")"; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/CSS/ListStyle.php 0000644 00000005537 15121423110 0017622 0 ustar 00 <?php /** * Validates shorthand CSS property list-style. * @warning Does not support url tokens that have internal spaces. */ class HTMLPurifier_AttrDef_CSS_ListStyle extends HTMLPurifier_AttrDef { /** * Local copy of validators. * @type HTMLPurifier_AttrDef[] * @note See HTMLPurifier_AttrDef_CSS_Font::$info for a similar impl. */ protected $info; /** * @param HTMLPurifier_Config $config */ public function __construct($config) { $def = $config->getCSSDefinition(); $this->info['list-style-type'] = $def->info['list-style-type']; $this->info['list-style-position'] = $def->info['list-style-position']; $this->info['list-style-image'] = $def->info['list-style-image']; } /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { // regular pre-processing $string = $this->parseCDATA($string); if ($string === '') { return false; } // assumes URI doesn't have spaces in it $bits = explode(' ', strtolower($string)); // bits to process $caught = array(); $caught['type'] = false; $caught['position'] = false; $caught['image'] = false; $i = 0; // number of catches $none = false; foreach ($bits as $bit) { if ($i >= 3) { return; } // optimization bit if ($bit === '') { continue; } foreach ($caught as $key => $status) { if ($status !== false) { continue; } $r = $this->info['list-style-' . $key]->validate($bit, $config, $context); if ($r === false) { continue; } if ($r === 'none') { if ($none) { continue; } else { $none = true; } if ($key == 'image') { continue; } } $caught[$key] = $r; $i++; break; } } if (!$i) { return false; } $ret = array(); // construct type if ($caught['type']) { $ret[] = $caught['type']; } // construct image if ($caught['image']) { $ret[] = $caught['image']; } // construct position if ($caught['position']) { $ret[] = $caught['position']; } if (empty($ret)) { return false; } return implode(' ', $ret); } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php 0000644 00000003075 15121423110 0021501 0 ustar 00 <?php /** * Decorator which enables !important to be used in CSS values. */ class HTMLPurifier_AttrDef_CSS_ImportantDecorator extends HTMLPurifier_AttrDef { /** * @type HTMLPurifier_AttrDef */ public $def; /** * @type bool */ public $allow; /** * @param HTMLPurifier_AttrDef $def Definition to wrap * @param bool $allow Whether or not to allow !important */ public function __construct($def, $allow = false) { $this->def = $def; $this->allow = $allow; } /** * Intercepts and removes !important if necessary * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { // test for ! and important tokens $string = trim($string); $is_important = false; // :TODO: optimization: test directly for !important and ! important if (strlen($string) >= 9 && substr($string, -9) === 'important') { $temp = rtrim(substr($string, 0, -9)); // use a temp, because we might want to restore important if (strlen($temp) >= 1 && substr($temp, -1) === '!') { $string = rtrim(substr($temp, 0, -1)); $is_important = true; } } $string = $this->def->validate($string, $config, $context); if ($this->allow && $is_important) { $string .= ' !important'; } return $string; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Font.php 0000644 00000014721 15121423110 0016567 0 ustar 00 <?php /** * Validates shorthand CSS property font. */ class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef { /** * Local copy of validators * @type HTMLPurifier_AttrDef[] * @note If we moved specific CSS property definitions to their own * classes instead of having them be assembled at run time by * CSSDefinition, this wouldn't be necessary. We'd instantiate * our own copies. */ protected $info = array(); /** * @param HTMLPurifier_Config $config */ public function __construct($config) { $def = $config->getCSSDefinition(); $this->info['font-style'] = $def->info['font-style']; $this->info['font-variant'] = $def->info['font-variant']; $this->info['font-weight'] = $def->info['font-weight']; $this->info['font-size'] = $def->info['font-size']; $this->info['line-height'] = $def->info['line-height']; $this->info['font-family'] = $def->info['font-family']; } /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { static $system_fonts = array( 'caption' => true, 'icon' => true, 'menu' => true, 'message-box' => true, 'small-caption' => true, 'status-bar' => true ); // regular pre-processing $string = $this->parseCDATA($string); if ($string === '') { return false; } // check if it's one of the keywords $lowercase_string = strtolower($string); if (isset($system_fonts[$lowercase_string])) { return $lowercase_string; } $bits = explode(' ', $string); // bits to process $stage = 0; // this indicates what we're looking for $caught = array(); // which stage 0 properties have we caught? $stage_1 = array('font-style', 'font-variant', 'font-weight'); $final = ''; // output for ($i = 0, $size = count($bits); $i < $size; $i++) { if ($bits[$i] === '') { continue; } switch ($stage) { case 0: // attempting to catch font-style, font-variant or font-weight foreach ($stage_1 as $validator_name) { if (isset($caught[$validator_name])) { continue; } $r = $this->info[$validator_name]->validate( $bits[$i], $config, $context ); if ($r !== false) { $final .= $r . ' '; $caught[$validator_name] = true; break; } } // all three caught, continue on if (count($caught) >= 3) { $stage = 1; } if ($r !== false) { break; } case 1: // attempting to catch font-size and perhaps line-height $found_slash = false; if (strpos($bits[$i], '/') !== false) { list($font_size, $line_height) = explode('/', $bits[$i]); if ($line_height === '') { // ooh, there's a space after the slash! $line_height = false; $found_slash = true; } } else { $font_size = $bits[$i]; $line_height = false; } $r = $this->info['font-size']->validate( $font_size, $config, $context ); if ($r !== false) { $final .= $r; // attempt to catch line-height if ($line_height === false) { // we need to scroll forward for ($j = $i + 1; $j < $size; $j++) { if ($bits[$j] === '') { continue; } if ($bits[$j] === '/') { if ($found_slash) { return false; } else { $found_slash = true; continue; } } $line_height = $bits[$j]; break; } } else { // slash already found $found_slash = true; $j = $i; } if ($found_slash) { $i = $j; $r = $this->info['line-height']->validate( $line_height, $config, $context ); if ($r !== false) { $final .= '/' . $r; } } $final .= ' '; $stage = 2; break; } return false; case 2: // attempting to catch font-family $font_family = implode(' ', array_slice($bits, $i, $size - $i)); $r = $this->info['font-family']->validate( $font_family, $config, $context ); if ($r !== false) { $final .= $r . ' '; // processing completed successfully return rtrim($final); } return false; } } return false; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Percentage.php 0000644 00000002377 15121423110 0017742 0 ustar 00 <?php /** * Validates a Percentage as defined by the CSS spec. */ class HTMLPurifier_AttrDef_CSS_Percentage extends HTMLPurifier_AttrDef { /** * Instance to defer number validation to. * @type HTMLPurifier_AttrDef_CSS_Number */ protected $number_def; /** * @param bool $non_negative Whether to forbid negative values */ public function __construct($non_negative = false) { $this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative); } /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { $string = $this->parseCDATA($string); if ($string === '') { return false; } $length = strlen($string); if ($length === 1) { return false; } if ($string[$length - 1] !== '%') { return false; } $number = substr($string, 0, $length - 1); $number = $this->number_def->validate($number, $config, $context); if ($number === false) { return false; } return "$number%"; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Ident.php 0000644 00000001324 15121423110 0016717 0 ustar 00 <?php /** * Validates based on {ident} CSS grammar production */ class HTMLPurifier_AttrDef_CSS_Ident extends HTMLPurifier_AttrDef { /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { $string = trim($string); // early abort: '' and '0' (strings that convert to false) are invalid if (!$string) { return false; } $pattern = '/^(-?[A-Za-z_][A-Za-z_\-0-9]*)$/'; if (!preg_match($pattern, $string)) { return false; } return $string; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Composite.php 0000644 00000002464 15121423110 0017624 0 ustar 00 <?php /** * Allows multiple validators to attempt to validate attribute. * * Composite is just what it sounds like: a composite of many validators. * This means that multiple HTMLPurifier_AttrDef objects will have a whack * at the string. If one of them passes, that's what is returned. This is * especially useful for CSS values, which often are a choice between * an enumerated set of predefined values or a flexible data type. */ class HTMLPurifier_AttrDef_CSS_Composite extends HTMLPurifier_AttrDef { /** * List of objects that may process strings. * @type HTMLPurifier_AttrDef[] * @todo Make protected */ public $defs; /** * @param HTMLPurifier_AttrDef[] $defs List of HTMLPurifier_AttrDef objects */ public function __construct($defs) { $this->defs = $defs; } /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { foreach ($this->defs as $i => $def) { $result = $this->defs[$i]->validate($string, $config, $context); if ($result !== false) { return $result; } } return false; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Border.php 0000644 00000003067 15121423110 0017077 0 ustar 00 <?php /** * Validates the border property as defined by CSS. */ class HTMLPurifier_AttrDef_CSS_Border extends HTMLPurifier_AttrDef { /** * Local copy of properties this property is shorthand for. * @type HTMLPurifier_AttrDef[] */ protected $info = array(); /** * @param HTMLPurifier_Config $config */ public function __construct($config) { $def = $config->getCSSDefinition(); $this->info['border-width'] = $def->info['border-width']; $this->info['border-style'] = $def->info['border-style']; $this->info['border-top-color'] = $def->info['border-top-color']; } /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { $string = $this->parseCDATA($string); $string = $this->mungeRgb($string); $bits = explode(' ', $string); $done = array(); // segments we've finished $ret = ''; // return value foreach ($bits as $bit) { foreach ($this->info as $propname => $validator) { if (isset($done[$propname])) { continue; } $r = $validator->validate($bit, $config, $context); if ($r !== false) { $ret .= $r . ' '; $done[$propname] = true; break; } } } return rtrim($ret); } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Number.php 0000644 00000004357 15121423110 0017115 0 ustar 00 <?php /** * Validates a number as defined by the CSS spec. */ class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef { /** * Indicates whether or not only positive values are allowed. * @type bool */ protected $non_negative = false; /** * @param bool $non_negative indicates whether negatives are forbidden */ public function __construct($non_negative = false) { $this->non_negative = $non_negative; } /** * @param string $number * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return string|bool * @warning Some contexts do not pass $config, $context. These * variables should not be used without checking HTMLPurifier_Length */ public function validate($number, $config, $context) { $number = $this->parseCDATA($number); if ($number === '') { return false; } if ($number === '0') { return '0'; } $sign = ''; switch ($number[0]) { case '-': if ($this->non_negative) { return false; } $sign = '-'; case '+': $number = substr($number, 1); } if (ctype_digit($number)) { $number = ltrim($number, '0'); return $number ? $sign . $number : '0'; } // Period is the only non-numeric character allowed if (strpos($number, '.') === false) { return false; } list($left, $right) = explode('.', $number, 2); if ($left === '' && $right === '') { return false; } if ($left !== '' && !ctype_digit($left)) { return false; } // Remove leading zeros until positive number or a zero stays left if (ltrim($left, '0') != '') { $left = ltrim($left, '0'); } else { $left = '0'; } $right = rtrim($right, '0'); if ($right === '') { return $left ? $sign . $left : '0'; } elseif (!ctype_digit($right)) { return false; } return $sign . $left . '.' . $right; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/CSS/AlphaValue.php 0000644 00000001431 15121423110 0017675 0 ustar 00 <?php class HTMLPurifier_AttrDef_CSS_AlphaValue extends HTMLPurifier_AttrDef_CSS_Number { public function __construct() { parent::__construct(false); // opacity is non-negative, but we will clamp it } /** * @param string $number * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return string */ public function validate($number, $config, $context) { $result = parent::validate($number, $config, $context); if ($result === false) { return $result; } $float = (float)$result; if ($float < 0.0) { $result = '0'; } if ($float > 1.0) { $result = '1'; } return $result; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Filter.php 0000644 00000004426 15121423110 0017107 0 ustar 00 <?php /** * Microsoft's proprietary filter: CSS property * @note Currently supports the alpha filter. In the future, this will * probably need an extensible framework */ class HTMLPurifier_AttrDef_CSS_Filter extends HTMLPurifier_AttrDef { /** * @type HTMLPurifier_AttrDef_Integer */ protected $intValidator; public function __construct() { $this->intValidator = new HTMLPurifier_AttrDef_Integer(); } /** * @param string $value * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($value, $config, $context) { $value = $this->parseCDATA($value); if ($value === 'none') { return $value; } // if we looped this we could support multiple filters $function_length = strcspn($value, '('); $function = trim(substr($value, 0, $function_length)); if ($function !== 'alpha' && $function !== 'Alpha' && $function !== 'progid:DXImageTransform.Microsoft.Alpha' ) { return false; } $cursor = $function_length + 1; $parameters_length = strcspn($value, ')', $cursor); $parameters = substr($value, $cursor, $parameters_length); $params = explode(',', $parameters); $ret_params = array(); $lookup = array(); foreach ($params as $param) { list($key, $value) = explode('=', $param); $key = trim($key); $value = trim($value); if (isset($lookup[$key])) { continue; } if ($key !== 'opacity') { continue; } $value = $this->intValidator->validate($value, $config, $context); if ($value === false) { continue; } $int = (int)$value; if ($int > 100) { $value = '100'; } if ($int < 0) { $value = '0'; } $ret_params[] = "$key=$value"; $lookup[$key] = true; } $ret_parameters = implode(',', $ret_params); $ret_function = "$function($ret_parameters)"; return $ret_function; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php 0000644 00000010106 15121423110 0021456 0 ustar 00 <?php /* W3C says: [ // adjective and number must be in correct order, even if // you could switch them without introducing ambiguity. // some browsers support that syntax [ <percentage> | <length> | left | center | right ] [ <percentage> | <length> | top | center | bottom ]? ] | [ // this signifies that the vertical and horizontal adjectives // can be arbitrarily ordered, however, there can only be two, // one of each, or none at all [ left | center | right ] || [ top | center | bottom ] ] top, left = 0% center, (none) = 50% bottom, right = 100% */ /* QuirksMode says: keyword + length/percentage must be ordered correctly, as per W3C Internet Explorer and Opera, however, support arbitrary ordering. We should fix it up. Minor issue though, not strictly necessary. */ // control freaks may appreciate the ability to convert these to // percentages or something, but it's not necessary /** * Validates the value of background-position. */ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef { /** * @type HTMLPurifier_AttrDef_CSS_Length */ protected $length; /** * @type HTMLPurifier_AttrDef_CSS_Percentage */ protected $percentage; public function __construct() { $this->length = new HTMLPurifier_AttrDef_CSS_Length(); $this->percentage = new HTMLPurifier_AttrDef_CSS_Percentage(); } /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { $string = $this->parseCDATA($string); $bits = explode(' ', $string); $keywords = array(); $keywords['h'] = false; // left, right $keywords['v'] = false; // top, bottom $keywords['ch'] = false; // center (first word) $keywords['cv'] = false; // center (second word) $measures = array(); $i = 0; $lookup = array( 'top' => 'v', 'bottom' => 'v', 'left' => 'h', 'right' => 'h', 'center' => 'c' ); foreach ($bits as $bit) { if ($bit === '') { continue; } // test for keyword $lbit = ctype_lower($bit) ? $bit : strtolower($bit); if (isset($lookup[$lbit])) { $status = $lookup[$lbit]; if ($status == 'c') { if ($i == 0) { $status = 'ch'; } else { $status = 'cv'; } } $keywords[$status] = $lbit; $i++; } // test for length $r = $this->length->validate($bit, $config, $context); if ($r !== false) { $measures[] = $r; $i++; } // test for percentage $r = $this->percentage->validate($bit, $config, $context); if ($r !== false) { $measures[] = $r; $i++; } } if (!$i) { return false; } // no valid values were caught $ret = array(); // first keyword if ($keywords['h']) { $ret[] = $keywords['h']; } elseif ($keywords['ch']) { $ret[] = $keywords['ch']; $keywords['cv'] = false; // prevent re-use: center = center center } elseif (count($measures)) { $ret[] = array_shift($measures); } if ($keywords['v']) { $ret[] = $keywords['v']; } elseif ($keywords['cv']) { $ret[] = $keywords['cv']; } elseif (count($measures)) { $ret[] = array_shift($measures); } if (empty($ret)) { return false; } return implode(' ', $ret); } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Background.php 0000644 00000006210 15121423110 0017732 0 ustar 00 <?php /** * Validates shorthand CSS property background. * @warning Does not support url tokens that have internal spaces. */ class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef { /** * Local copy of component validators. * @type HTMLPurifier_AttrDef[] * @note See HTMLPurifier_AttrDef_Font::$info for a similar impl. */ protected $info; /** * @param HTMLPurifier_Config $config */ public function __construct($config) { $def = $config->getCSSDefinition(); $this->info['background-color'] = $def->info['background-color']; $this->info['background-image'] = $def->info['background-image']; $this->info['background-repeat'] = $def->info['background-repeat']; $this->info['background-attachment'] = $def->info['background-attachment']; $this->info['background-position'] = $def->info['background-position']; } /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { // regular pre-processing $string = $this->parseCDATA($string); if ($string === '') { return false; } // munge rgb() decl if necessary $string = $this->mungeRgb($string); // assumes URI doesn't have spaces in it $bits = explode(' ', $string); // bits to process $caught = array(); $caught['color'] = false; $caught['image'] = false; $caught['repeat'] = false; $caught['attachment'] = false; $caught['position'] = false; $i = 0; // number of catches foreach ($bits as $bit) { if ($bit === '') { continue; } foreach ($caught as $key => $status) { if ($key != 'position') { if ($status !== false) { continue; } $r = $this->info['background-' . $key]->validate($bit, $config, $context); } else { $r = $bit; } if ($r === false) { continue; } if ($key == 'position') { if ($caught[$key] === false) { $caught[$key] = ''; } $caught[$key] .= $r . ' '; } else { $caught[$key] = $r; } $i++; break; } } if (!$i) { return false; } if ($caught['position'] !== false) { $caught['position'] = $this->info['background-position']-> validate($caught['position'], $config, $context); } $ret = array(); foreach ($caught as $value) { if ($value === false) { continue; } $ret[] = $value; } if (empty($ret)) { return false; } return implode(' ', $ret); } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/CSS/FontFamily.php 0000644 00000022301 15121423110 0017722 0 ustar 00 <?php /** * Validates a font family list according to CSS spec */ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef { protected $mask = null; public function __construct() { $this->mask = '_- '; for ($c = 'a'; $c <= 'z'; $c++) { $this->mask .= $c; } for ($c = 'A'; $c <= 'Z'; $c++) { $this->mask .= $c; } for ($c = '0'; $c <= '9'; $c++) { $this->mask .= $c; } // cast-y, but should be fine // special bytes used by UTF-8 for ($i = 0x80; $i <= 0xFF; $i++) { // We don't bother excluding invalid bytes in this range, // because the our restriction of well-formed UTF-8 will // prevent these from ever occurring. $this->mask .= chr($i); } /* PHP's internal strcspn implementation is O(length of string * length of mask), making it inefficient for large masks. However, it's still faster than preg_match 8) for (p = s1;;) { spanp = s2; do { if (*spanp == c || p == s1_end) { return p - s1; } } while (spanp++ < (s2_end - 1)); c = *++p; } */ // possible optimization: invert the mask. } /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { static $generic_names = array( 'serif' => true, 'sans-serif' => true, 'monospace' => true, 'fantasy' => true, 'cursive' => true ); $allowed_fonts = $config->get('CSS.AllowedFonts'); // assume that no font names contain commas in them $fonts = explode(',', $string); $final = ''; foreach ($fonts as $font) { $font = trim($font); if ($font === '') { continue; } // match a generic name if (isset($generic_names[$font])) { if ($allowed_fonts === null || isset($allowed_fonts[$font])) { $final .= $font . ', '; } continue; } // match a quoted name if ($font[0] === '"' || $font[0] === "'") { $length = strlen($font); if ($length <= 2) { continue; } $quote = $font[0]; if ($font[$length - 1] !== $quote) { continue; } $font = substr($font, 1, $length - 2); } $font = $this->expandCSSEscape($font); // $font is a pure representation of the font name if ($allowed_fonts !== null && !isset($allowed_fonts[$font])) { continue; } if (ctype_alnum($font) && $font !== '') { // very simple font, allow it in unharmed $final .= $font . ', '; continue; } // bugger out on whitespace. form feed (0C) really // shouldn't show up regardless $font = str_replace(array("\n", "\t", "\r", "\x0C"), ' ', $font); // Here, there are various classes of characters which need // to be treated differently: // - Alphanumeric characters are essentially safe. We // handled these above. // - Spaces require quoting, though most parsers will do // the right thing if there aren't any characters that // can be misinterpreted // - Dashes rarely occur, but they fairly unproblematic // for parsing/rendering purposes. // The above characters cover the majority of Western font // names. // - Arbitrary Unicode characters not in ASCII. Because // most parsers give little thought to Unicode, treatment // of these codepoints is basically uniform, even for // punctuation-like codepoints. These characters can // show up in non-Western pages and are supported by most // major browsers, for example: "MS 明朝" is a // legitimate font-name // <http://ja.wikipedia.org/wiki/MS_明朝>. See // the CSS3 spec for more examples: // <http://www.w3.org/TR/2011/WD-css3-fonts-20110324/localizedfamilynames.png> // You can see live samples of these on the Internet: // <http://www.google.co.jp/search?q=font-family+MS+明朝|ゴシック> // However, most of these fonts have ASCII equivalents: // for example, 'MS Mincho', and it's considered // professional to use ASCII font names instead of // Unicode font names. Thanks Takeshi Terada for // providing this information. // The following characters, to my knowledge, have not been // used to name font names. // - Single quote. While theoretically you might find a // font name that has a single quote in its name (serving // as an apostrophe, e.g. Dave's Scribble), I haven't // been able to find any actual examples of this. // Internet Explorer's cssText translation (which I // believe is invoked by innerHTML) normalizes any // quoting to single quotes, and fails to escape single // quotes. (Note that this is not IE's behavior for all // CSS properties, just some sort of special casing for // font-family). So a single quote *cannot* be used // safely in the font-family context if there will be an // innerHTML/cssText translation. Note that Firefox 3.x // does this too. // - Double quote. In IE, these get normalized to // single-quotes, no matter what the encoding. (Fun // fact, in IE8, the 'content' CSS property gained // support, where they special cased to preserve encoded // double quotes, but still translate unadorned double // quotes into single quotes.) So, because their // fixpoint behavior is identical to single quotes, they // cannot be allowed either. Firefox 3.x displays // single-quote style behavior. // - Backslashes are reduced by one (so \\ -> \) every // iteration, so they cannot be used safely. This shows // up in IE7, IE8 and FF3 // - Semicolons, commas and backticks are handled properly. // - The rest of the ASCII punctuation is handled properly. // We haven't checked what browsers do to unadorned // versions, but this is not important as long as the // browser doesn't /remove/ surrounding quotes (as IE does // for HTML). // // With these results in hand, we conclude that there are // various levels of safety: // - Paranoid: alphanumeric, spaces and dashes(?) // - International: Paranoid + non-ASCII Unicode // - Edgy: Everything except quotes, backslashes // - NoJS: Standards compliance, e.g. sod IE. Note that // with some judicious character escaping (since certain // types of escaping doesn't work) this is theoretically // OK as long as innerHTML/cssText is not called. // We believe that international is a reasonable default // (that we will implement now), and once we do more // extensive research, we may feel comfortable with dropping // it down to edgy. // Edgy: alphanumeric, spaces, dashes, underscores and Unicode. Use of // str(c)spn assumes that the string was already well formed // Unicode (which of course it is). if (strspn($font, $this->mask) !== strlen($font)) { continue; } // Historical: // In the absence of innerHTML/cssText, these ugly // transforms don't pose a security risk (as \\ and \" // might--these escapes are not supported by most browsers). // We could try to be clever and use single-quote wrapping // when there is a double quote present, but I have choosen // not to implement that. (NOTE: you can reduce the amount // of escapes by one depending on what quoting style you use) // $font = str_replace('\\', '\\5C ', $font); // $font = str_replace('"', '\\22 ', $font); // $font = str_replace("'", '\\27 ', $font); // font possibly with spaces, requires quoting $final .= "'$font', "; } $final = rtrim($final, ', '); if ($final === '') { return false; } return $final; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Length.php 0000644 00000003551 15121423110 0017101 0 ustar 00 <?php /** * Represents a Length as defined by CSS. */ class HTMLPurifier_AttrDef_CSS_Length extends HTMLPurifier_AttrDef { /** * @type HTMLPurifier_Length|string */ protected $min; /** * @type HTMLPurifier_Length|string */ protected $max; /** * @param HTMLPurifier_Length|string $min Minimum length, or null for no bound. String is also acceptable. * @param HTMLPurifier_Length|string $max Maximum length, or null for no bound. String is also acceptable. */ public function __construct($min = null, $max = null) { $this->min = $min !== null ? HTMLPurifier_Length::make($min) : null; $this->max = $max !== null ? HTMLPurifier_Length::make($max) : null; } /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { $string = $this->parseCDATA($string); // Optimizations if ($string === '') { return false; } if ($string === '0') { return '0'; } if (strlen($string) === 1) { return false; } $length = HTMLPurifier_Length::make($string); if (!$length->isValid()) { return false; } if ($this->min) { $c = $length->compareTo($this->min); if ($c === false) { return false; } if ($c < 0) { return false; } } if ($this->max) { $c = $length->compareTo($this->max); if ($c === false) { return false; } if ($c > 0) { return false; } } return $length->toString(); } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php 0000644 00000002063 15121423110 0021731 0 ustar 00 <?php /** * Decorator which enables CSS properties to be disabled for specific elements. */ class HTMLPurifier_AttrDef_CSS_DenyElementDecorator extends HTMLPurifier_AttrDef { /** * @type HTMLPurifier_AttrDef */ public $def; /** * @type string */ public $element; /** * @param HTMLPurifier_AttrDef $def Definition to wrap * @param string $element Element to deny */ public function __construct($def, $element) { $this->def = $def; $this->element = $element; } /** * Checks if CurrentToken is set and equal to $this->element * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { $token = $context->get('CurrentToken', true); if ($token && $token->name == $this->element) { return false; } return $this->def->validate($string, $config, $context); } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/CSS/TextDecoration.php 0000644 00000002204 15121423110 0020606 0 ustar 00 <?php /** * Validates the value for the CSS property text-decoration * @note This class could be generalized into a version that acts sort of * like Enum except you can compound the allowed values. */ class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef { /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { static $allowed_values = array( 'line-through' => true, 'overline' => true, 'underline' => true, ); $string = strtolower($this->parseCDATA($string)); if ($string === 'none') { return $string; } $parts = explode(' ', $string); $final = ''; foreach ($parts as $part) { if (isset($allowed_values[$part])) { $final .= $part . ' '; } } $final = rtrim($final); if ($final === '') { return false; } return $final; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Multiple.php 0000644 00000004054 15121423110 0017452 0 ustar 00 <?php /** * Framework class for strings that involve multiple values. * * Certain CSS properties such as border-width and margin allow multiple * lengths to be specified. This class can take a vanilla border-width * definition and multiply it, usually into a max of four. * * @note Even though the CSS specification isn't clear about it, inherit * can only be used alone: it will never manifest as part of a multi * shorthand declaration. Thus, this class does not allow inherit. */ class HTMLPurifier_AttrDef_CSS_Multiple extends HTMLPurifier_AttrDef { /** * Instance of component definition to defer validation to. * @type HTMLPurifier_AttrDef * @todo Make protected */ public $single; /** * Max number of values allowed. * @todo Make protected */ public $max; /** * @param HTMLPurifier_AttrDef $single HTMLPurifier_AttrDef to multiply * @param int $max Max number of values allowed (usually four) */ public function __construct($single, $max = 4) { $this->single = $single; $this->max = $max; } /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { $string = $this->mungeRgb($this->parseCDATA($string)); if ($string === '') { return false; } $parts = explode(' ', $string); // parseCDATA replaced \r, \t and \n $length = count($parts); $final = ''; for ($i = 0, $num = 0; $i < $length && $num < $this->max; $i++) { if (ctype_space($parts[$i])) { continue; } $result = $this->single->validate($parts[$i], $config, $context); if ($result !== false) { $final .= $result . ' '; $num++; } } if ($final === '') { return false; } return rtrim($final); } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/Lang.php 0000644 00000004604 15121423110 0016111 0 ustar 00 <?php /** * Validates the HTML attribute lang, effectively a language code. * @note Built according to RFC 3066, which obsoleted RFC 1766 */ class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef { /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { $string = trim($string); if (!$string) { return false; } $subtags = explode('-', $string); $num_subtags = count($subtags); if ($num_subtags == 0) { // sanity check return false; } // process primary subtag : $subtags[0] $length = strlen($subtags[0]); switch ($length) { case 0: return false; case 1: if (!($subtags[0] == 'x' || $subtags[0] == 'i')) { return false; } break; case 2: case 3: if (!ctype_alpha($subtags[0])) { return false; } elseif (!ctype_lower($subtags[0])) { $subtags[0] = strtolower($subtags[0]); } break; default: return false; } $new_string = $subtags[0]; if ($num_subtags == 1) { return $new_string; } // process second subtag : $subtags[1] $length = strlen($subtags[1]); if ($length == 0 || ($length == 1 && $subtags[1] != 'x') || $length > 8 || !ctype_alnum($subtags[1])) { return $new_string; } if (!ctype_lower($subtags[1])) { $subtags[1] = strtolower($subtags[1]); } $new_string .= '-' . $subtags[1]; if ($num_subtags == 2) { return $new_string; } // process all other subtags, index 2 and up for ($i = 2; $i < $num_subtags; $i++) { $length = strlen($subtags[$i]); if ($length == 0 || $length > 8 || !ctype_alnum($subtags[$i])) { return $new_string; } if (!ctype_lower($subtags[$i])) { $subtags[$i] = strtolower($subtags[$i]); } $new_string .= '-' . $subtags[$i]; } return $new_string; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/AttrDef/Text.php 0000644 00000000656 15121423110 0016157 0 ustar 00 <?php /** * Validates arbitrary text according to the HTML spec. */ class HTMLPurifier_AttrDef_Text extends HTMLPurifier_AttrDef { /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { return $this->parseCDATA($string); } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/ConfigSchema/Interchange/Directive.php 0000644 00000003661 15121423110 0022374 0 ustar 00 <?php /** * Interchange component class describing configuration directives. */ class HTMLPurifier_ConfigSchema_Interchange_Directive { /** * ID of directive. * @type HTMLPurifier_ConfigSchema_Interchange_Id */ public $id; /** * Type, e.g. 'integer' or 'istring'. * @type string */ public $type; /** * Default value, e.g. 3 or 'DefaultVal'. * @type mixed */ public $default; /** * HTML description. * @type string */ public $description; /** * Whether or not null is allowed as a value. * @type bool */ public $typeAllowsNull = false; /** * Lookup table of allowed scalar values. * e.g. array('allowed' => true). * Null if all values are allowed. * @type array */ public $allowed; /** * List of aliases for the directive. * e.g. array(new HTMLPurifier_ConfigSchema_Interchange_Id('Ns', 'Dir'))). * @type HTMLPurifier_ConfigSchema_Interchange_Id[] */ public $aliases = array(); /** * Hash of value aliases, e.g. array('alt' => 'real'). Null if value * aliasing is disabled (necessary for non-scalar types). * @type array */ public $valueAliases; /** * Version of HTML Purifier the directive was introduced, e.g. '1.3.1'. * Null if the directive has always existed. * @type string */ public $version; /** * ID of directive that supercedes this old directive. * Null if not deprecated. * @type HTMLPurifier_ConfigSchema_Interchange_Id */ public $deprecatedUse; /** * Version of HTML Purifier this directive was deprecated. Null if not * deprecated. * @type string */ public $deprecatedVersion; /** * List of external projects this directive depends on, e.g. array('CSSTidy'). * @type array */ public $external = array(); } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/ConfigSchema/Interchange/Id.php 0000644 00000002061 15121423110 0021003 0 ustar 00 <?php /** * Represents a directive ID in the interchange format. */ class HTMLPurifier_ConfigSchema_Interchange_Id { /** * @type string */ public $key; /** * @param string $key */ public function __construct($key) { $this->key = $key; } /** * @return string * @warning This is NOT magic, to ensure that people don't abuse SPL and * cause problems for PHP 5.0 support. */ public function toString() { return $this->key; } /** * @return string */ public function getRootNamespace() { return substr($this->key, 0, strpos($this->key, ".")); } /** * @return string */ public function getDirective() { return substr($this->key, strpos($this->key, ".") + 1); } /** * @param string $id * @return HTMLPurifier_ConfigSchema_Interchange_Id */ public static function make($id) { return new HTMLPurifier_ConfigSchema_Interchange_Id($id); } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/ConfigSchema/Interchange.php 0000644 00000002402 15121423110 0020446 0 ustar 00 <?php /** * Generic schema interchange format that can be converted to a runtime * representation (HTMLPurifier_ConfigSchema) or HTML documentation. Members * are completely validated. */ class HTMLPurifier_ConfigSchema_Interchange { /** * Name of the application this schema is describing. * @type string */ public $name; /** * Array of Directive ID => array(directive info) * @type HTMLPurifier_ConfigSchema_Interchange_Directive[] */ public $directives = array(); /** * Adds a directive array to $directives * @param HTMLPurifier_ConfigSchema_Interchange_Directive $directive * @throws HTMLPurifier_ConfigSchema_Exception */ public function addDirective($directive) { if (isset($this->directives[$i = $directive->id->toString()])) { throw new HTMLPurifier_ConfigSchema_Exception("Cannot redefine directive '$i'"); } $this->directives[$i] = $directive; } /** * Convenience function to perform standard validation. Throws exception * on failed validation. */ public function validate() { $validator = new HTMLPurifier_ConfigSchema_Validator(); return $validator->validate($this); } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/ConfigSchema/Builder/Xml.php 0000644 00000010424 15121423110 0020350 0 ustar 00 <?php /** * Converts HTMLPurifier_ConfigSchema_Interchange to an XML format, * which can be further processed to generate documentation. */ class HTMLPurifier_ConfigSchema_Builder_Xml extends XMLWriter { /** * @type HTMLPurifier_ConfigSchema_Interchange */ protected $interchange; /** * @type string */ private $namespace; /** * @param string $html */ protected function writeHTMLDiv($html) { $this->startElement('div'); $purifier = HTMLPurifier::getInstance(); $html = $purifier->purify($html); $this->writeAttribute('xmlns', 'http://www.w3.org/1999/xhtml'); $this->writeRaw($html); $this->endElement(); // div } /** * @param mixed $var * @return string */ protected function export($var) { if ($var === array()) { return 'array()'; } return var_export($var, true); } /** * @param HTMLPurifier_ConfigSchema_Interchange $interchange */ public function build($interchange) { // global access, only use as last resort $this->interchange = $interchange; $this->setIndent(true); $this->startDocument('1.0', 'UTF-8'); $this->startElement('configdoc'); $this->writeElement('title', $interchange->name); foreach ($interchange->directives as $directive) { $this->buildDirective($directive); } if ($this->namespace) { $this->endElement(); } // namespace $this->endElement(); // configdoc $this->flush(); } /** * @param HTMLPurifier_ConfigSchema_Interchange_Directive $directive */ public function buildDirective($directive) { // Kludge, although I suppose having a notion of a "root namespace" // certainly makes things look nicer when documentation is built. // Depends on things being sorted. if (!$this->namespace || $this->namespace !== $directive->id->getRootNamespace()) { if ($this->namespace) { $this->endElement(); } // namespace $this->namespace = $directive->id->getRootNamespace(); $this->startElement('namespace'); $this->writeAttribute('id', $this->namespace); $this->writeElement('name', $this->namespace); } $this->startElement('directive'); $this->writeAttribute('id', $directive->id->toString()); $this->writeElement('name', $directive->id->getDirective()); $this->startElement('aliases'); foreach ($directive->aliases as $alias) { $this->writeElement('alias', $alias->toString()); } $this->endElement(); // aliases $this->startElement('constraints'); if ($directive->version) { $this->writeElement('version', $directive->version); } $this->startElement('type'); if ($directive->typeAllowsNull) { $this->writeAttribute('allow-null', 'yes'); } $this->text($directive->type); $this->endElement(); // type if ($directive->allowed) { $this->startElement('allowed'); foreach ($directive->allowed as $value => $x) { $this->writeElement('value', $value); } $this->endElement(); // allowed } $this->writeElement('default', $this->export($directive->default)); $this->writeAttribute('xml:space', 'preserve'); if ($directive->external) { $this->startElement('external'); foreach ($directive->external as $project) { $this->writeElement('project', $project); } $this->endElement(); } $this->endElement(); // constraints if ($directive->deprecatedVersion) { $this->startElement('deprecated'); $this->writeElement('version', $directive->deprecatedVersion); $this->writeElement('use', $directive->deprecatedUse->toString()); $this->endElement(); // deprecated } $this->startElement('description'); $this->writeHTMLDiv($directive->description); $this->endElement(); // description $this->endElement(); // directive } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/ConfigSchema/Builder/ConfigSchema.php 0000644 00000002375 15121423110 0022144 0 ustar 00 <?php /** * Converts HTMLPurifier_ConfigSchema_Interchange to our runtime * representation used to perform checks on user configuration. */ class HTMLPurifier_ConfigSchema_Builder_ConfigSchema { /** * @param HTMLPurifier_ConfigSchema_Interchange $interchange * @return HTMLPurifier_ConfigSchema */ public function build($interchange) { $schema = new HTMLPurifier_ConfigSchema(); foreach ($interchange->directives as $d) { $schema->add( $d->id->key, $d->default, $d->type, $d->typeAllowsNull ); if ($d->allowed !== null) { $schema->addAllowedValues( $d->id->key, $d->allowed ); } foreach ($d->aliases as $alias) { $schema->addAlias( $alias->key, $d->id->key ); } if ($d->valueAliases !== null) { $schema->addValueAliases( $d->id->key, $d->valueAliases ); } } $schema->postProcess(); return $schema; } } // vim: et sw=4 sts=4 htmlpurifier/library/HTMLPurifier/ConfigSchema/schema.ser 0000644 00000057176 15121423110 0017503 0 ustar 00 O:25:"HTMLPurifier_ConfigSchema":3:{s:8:"defaults";a:127:{s:19:"Attr.AllowedClasses";N;s:24:"Attr.AllowedFrameTargets";a:0:{}s:15:"Attr.AllowedRel";a:0:{}s:15:"Attr.AllowedRev";a:0:{}s:18:"Attr.ClassUseCDATA";N;s:20:"Attr.DefaultImageAlt";N;s:24:"Attr.DefaultInvalidImage";s:0:"";s:27:"Attr.DefaultInvalidImageAlt";s:13:"Invalid image";s:19:"Attr.DefaultTextDir";s:3:"ltr";s:13:"Attr.EnableID";b:0;s:21:"Attr.ForbiddenClasses";a:0:{}s:13:"Attr.ID.HTML5";N;s:16:"Attr.IDBlacklist";a:0:{}s:22:"Attr.IDBlacklistRegexp";N;s:13:"Attr.IDPrefix";s:0:"";s:18:"Attr.IDPrefixLocal";s:0:"";s:24:"AutoFormat.AutoParagraph";b:0;s:17:"AutoFormat.Custom";a:0:{}s:25:"AutoFormat.DisplayLinkURI";b:0;s:18:"AutoFormat.Linkify";b:0;s:33:"AutoFormat.PurifierLinkify.DocURL";s:3:"#%s";s:26:"AutoFormat.PurifierLinkify";b:0;s:32:"AutoFormat.RemoveEmpty.Predicate";a:4:{s:8:"colgroup";a:0:{}s:2:"th";a:0:{}s:2:"td";a:0:{}s:6:"iframe";a:1:{i:0;s:3:"src";}}s:44:"AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions";a:2:{s:2:"td";b:1;s:2:"th";b:1;}s:33:"AutoFormat.RemoveEmpty.RemoveNbsp";b:0;s:22:"AutoFormat.RemoveEmpty";b:0;s:39:"AutoFormat.RemoveSpansWithoutAttributes";b:0;s:19:"CSS.AllowDuplicates";b:0;s:18:"CSS.AllowImportant";b:0;s:15:"CSS.AllowTricky";b:0;s:16:"CSS.AllowedFonts";N;s:21:"CSS.AllowedProperties";N;s:17:"CSS.DefinitionRev";i:1;s:23:"CSS.ForbiddenProperties";a:0:{}s:16:"CSS.MaxImgLength";s:6:"1200px";s:15:"CSS.Proprietary";b:0;s:11:"CSS.Trusted";b:0;s:20:"Cache.DefinitionImpl";s:10:"Serializer";s:20:"Cache.SerializerPath";N;s:27:"Cache.SerializerPermissions";i:493;s:22:"Core.AggressivelyFixLt";b:1;s:29:"Core.AggressivelyRemoveScript";b:1;s:28:"Core.AllowHostnameUnderscore";b:0;s:23:"Core.AllowParseManyTags";b:0;s:18:"Core.CollectErrors";b:0;s:18:"Core.ColorKeywords";a:148:{s:9:"aliceblue";s:7:"#F0F8FF";s:12:"antiquewhite";s:7:"#FAEBD7";s:4:"aqua";s:7:"#00FFFF";s:10:"aquamarine";s:7:"#7FFFD4";s:5:"azure";s:7:"#F0FFFF";s:5:"beige";s:7:"#F5F5DC";s:6:"bisque";s:7:"#FFE4C4";s:5:"black";s:7:"#000000";s:14:"blanchedalmond";s:7:"#FFEBCD";s:4:"blue";s:7:"#0000FF";s:10:"blueviolet";s:7:"#8A2BE2";s:5:"brown";s:7:"#A52A2A";s:9:"burlywood";s:7:"#DEB887";s:9:"cadetblue";s:7:"#5F9EA0";s:10:"chartreuse";s:7:"#7FFF00";s:9:"chocolate";s:7:"#D2691E";s:5:"coral";s:7:"#FF7F50";s:14:"cornflowerblue";s:7:"#6495ED";s:8:"cornsilk";s:7:"#FFF8DC";s:7:"crimson";s:7:"#DC143C";s:4:"cyan";s:7:"#00FFFF";s:8:"darkblue";s:7:"#00008B";s:8:"darkcyan";s:7:"#008B8B";s:13:"darkgoldenrod";s:7:"#B8860B";s:8:"darkgray";s:7:"#A9A9A9";s:8:"darkgrey";s:7:"#A9A9A9";s:9:"darkgreen";s:7:"#006400";s:9:"darkkhaki";s:7:"#BDB76B";s:11:"darkmagenta";s:7:"#8B008B";s:14:"darkolivegreen";s:7:"#556B2F";s:10:"darkorange";s:7:"#FF8C00";s:10:"darkorchid";s:7:"#9932CC";s:7:"darkred";s:7:"#8B0000";s:10:"darksalmon";s:7:"#E9967A";s:12:"darkseagreen";s:7:"#8FBC8F";s:13:"darkslateblue";s:7:"#483D8B";s:13:"darkslategray";s:7:"#2F4F4F";s:13:"darkslategrey";s:7:"#2F4F4F";s:13:"darkturquoise";s:7:"#00CED1";s:10:"darkviolet";s:7:"#9400D3";s:8:"deeppink";s:7:"#FF1493";s:11:"deepskyblue";s:7:"#00BFFF";s:7:"dimgray";s:7:"#696969";s:7:"dimgrey";s:7:"#696969";s:10:"dodgerblue";s:7:"#1E90FF";s:9:"firebrick";s:7:"#B22222";s:11:"floralwhite";s:7:"#FFFAF0";s:11:"forestgreen";s:7:"#228B22";s:7:"fuchsia";s:7:"#FF00FF";s:9:"gainsboro";s:7:"#DCDCDC";s:10:"ghostwhite";s:7:"#F8F8FF";s:4:"gold";s:7:"#FFD700";s:9:"goldenrod";s:7:"#DAA520";s:4:"gray";s:7:"#808080";s:4:"grey";s:7:"#808080";s:5:"green";s:7:"#008000";s:11:"greenyellow";s:7:"#ADFF2F";s:8:"honeydew";s:7:"#F0FFF0";s:7:"hotpink";s:7:"#FF69B4";s:9:"indianred";s:7:"#CD5C5C";s:6:"indigo";s:7:"#4B0082";s:5:"ivory";s:7:"#FFFFF0";s:5:"khaki";s:7:"#F0E68C";s:8:"lavender";s:7:"#E6E6FA";s:13:"lavenderblush";s:7:"#FFF0F5";s:9:"lawngreen";s:7:"#7CFC00";s:12:"lemonchiffon";s:7:"#FFFACD";s:9:"lightblue";s:7:"#ADD8E6";s:10:"lightcoral";s:7:"#F08080";s:9:"lightcyan";s:7:"#E0FFFF";s:20:"lightgoldenrodyellow";s:7:"#FAFAD2";s:9:"lightgray";s:7:"#D3D3D3";s:9:"lightgrey";s:7:"#D3D3D3";s:10:"lightgreen";s:7:"#90EE90";s:9:"lightpink";s:7:"#FFB6C1";s:11:"lightsalmon";s:7:"#FFA07A";s:13:"lightseagreen";s:7:"#20B2AA";s:12:"lightskyblue";s:7:"#87CEFA";s:14:"lightslategray";s:7:"#778899";s:14:"lightslategrey";s:7:"#778899";s:14:"lightsteelblue";s:7:"#B0C4DE";s:11:"lightyellow";s:7:"#FFFFE0";s:4:"lime";s:7:"#00FF00";s:9:"limegreen";s:7:"#32CD32";s:5:"linen";s:7:"#FAF0E6";s:7:"magenta";s:7:"#FF00FF";s:6:"maroon";s:7:"#800000";s:16:"mediumaquamarine";s:7:"#66CDAA";s:10:"mediumblue";s:7:"#0000CD";s:12:"mediumorchid";s:7:"#BA55D3";s:12:"mediumpurple";s:7:"#9370DB";s:14:"mediumseagreen";s:7:"#3CB371";s:15:"mediumslateblue";s:7:"#7B68EE";s:17:"mediumspringgreen";s:7:"#00FA9A";s:15:"mediumturquoise";s:7:"#48D1CC";s:15:"mediumvioletred";s:7:"#C71585";s:12:"midnightblue";s:7:"#191970";s:9:"mintcream";s:7:"#F5FFFA";s:9:"mistyrose";s:7:"#FFE4E1";s:8:"moccasin";s:7:"#FFE4B5";s:11:"navajowhite";s:7:"#FFDEAD";s:4:"navy";s:7:"#000080";s:7:"oldlace";s:7:"#FDF5E6";s:5:"olive";s:7:"#808000";s:9:"olivedrab";s:7:"#6B8E23";s:6:"orange";s:7:"#FFA500";s:9:"orangered";s:7:"#FF4500";s:6:"orchid";s:7:"#DA70D6";s:13:"palegoldenrod";s:7:"#EEE8AA";s:9:"palegreen";s:7:"#98FB98";s:13:"paleturquoise";s:7:"#AFEEEE";s:13:"palevioletred";s:7:"#DB7093";s:10:"papayawhip";s:7:"#FFEFD5";s:9:"peachpuff";s:7:"#FFDAB9";s:4:"peru";s:7:"#CD853F";s:4:"pink";s:7:"#FFC0CB";s:4:"plum";s:7:"#DDA0DD";s:10:"powderblue";s:7:"#B0E0E6";s:6:"purple";s:7:"#800080";s:13:"rebeccapurple";s:7:"#663399";s:3:"red";s:7:"#FF0000";s:9:"rosybrown";s:7:"#BC8F8F";s:9:"royalblue";s:7:"#4169E1";s:11:"saddlebrown";s:7:"#8B4513";s:6:"salmon";s:7:"#FA8072";s:10:"sandybrown";s:7:"#F4A460";s:8:"seagreen";s:7:"#2E8B57";s:8:"seashell";s:7:"#FFF5EE";s:6:"sienna";s:7:"#A0522D";s:6:"silver";s:7:"#C0C0C0";s:7:"skyblue";s:7:"#87CEEB";s:9:"slateblue";s:7:"#6A5ACD";s:9:"slategray";s:7:"#708090";s:9:"slategrey";s:7:"#708090";s:4:"snow";s:7:"#FFFAFA";s:11:"springgreen";s:7:"#00FF7F";s:9:"steelblue";s:7:"#4682B4";s:3:"tan";s:7:"#D2B48C";s:4:"teal";s:7:"#008080";s:7:"thistle";s:7:"#D8BFD8";s:6:"tomato";s:7:"#FF6347";s:9:"turquoise";s:7:"#40E0D0";s:6:"violet";s:7:"#EE82EE";s:5:"wheat";s:7:"#F5DEB3";s:5:"white";s:7:"#FFFFFF";s:10:"whitesmoke";s:7:"#F5F5F5";s:6:"yellow";s:7:"#FFFF00";s:11:"yellowgreen";s:7:"#9ACD32";}s:30:"Core.ConvertDocumentToFragment";b:1;s:36:"Core.DirectLexLineNumberSyncInterval";i:0;s:20:"Core.DisableExcludes";b:0;s:15:"Core.EnableIDNA";b:0;s:13:"Core.Encoding";s:5:"utf-8";s:26:"Core.EscapeInvalidChildren";b:0;s:22:"Core.EscapeInvalidTags";b:0;s:29:"Core.EscapeNonASCIICharacters";b:0;s:19:"Core.HiddenElements";a:2:{s:6:"script";b:1;s:5:"style";b:1;}s:13:"Core.Language";s:2:"en";s:24:"Core.LegacyEntityDecoder";b:0;s:14:"Core.LexerImpl";N;s:24:"Core.MaintainLineNumbers";N;s:22:"Core.NormalizeNewlines";b:1;s:21:"Core.RemoveInvalidImg";b:1;s:33:"Core.RemoveProcessingInstructions";b:0;s:25:"Core.RemoveScriptContents";N;s:13:"Filter.Custom";a:0:{}s:34:"Filter.ExtractStyleBlocks.Escaping";b:1;s:31:"Filter.ExtractStyleBlocks.Scope";N;s:34:"Filter.ExtractStyleBlocks.TidyImpl";N;s:25:"Filter.ExtractStyleBlocks";b:0;s:14:"Filter.YouTube";b:0;s:12:"HTML.Allowed";N;s:22:"HTML.AllowedAttributes";N;s:20:"HTML.AllowedComments";a:0:{}s:26:"HTML.AllowedCommentsRegexp";N;s:20:"HTML.AllowedElements";N;s:19:"HTML.AllowedModules";N;s:23:"HTML.Attr.Name.UseCDATA";b:0;s:17:"HTML.BlockWrapper";s:1:"p";s:16:"HTML.CoreModules";a:7:{s:9:"Structure";b:1;s:4:"Text";b:1;s:9:"Hypertext";b:1;s:4:"List";b:1;s:22:"NonXMLCommonAttributes";b:1;s:19:"XMLCommonAttributes";b:1;s:16:"CommonAttributes";b:1;}s:18:"HTML.CustomDoctype";N;s:17:"HTML.DefinitionID";N;s:18:"HTML.DefinitionRev";i:1;s:12:"HTML.Doctype";N;s:25:"HTML.FlashAllowFullScreen";b:0;s:24:"HTML.ForbiddenAttributes";a:0:{}s:22:"HTML.ForbiddenElements";a:0:{}s:10:"HTML.Forms";b:0;s:17:"HTML.MaxImgLength";i:1200;s:13:"HTML.Nofollow";b:0;s:11:"HTML.Parent";s:3:"div";s:16:"HTML.Proprietary";b:0;s:14:"HTML.SafeEmbed";b:0;s:15:"HTML.SafeIframe";b:0;s:15:"HTML.SafeObject";b:0;s:18:"HTML.SafeScripting";a:0:{}s:11:"HTML.Strict";b:0;s:16:"HTML.TargetBlank";b:0;s:19:"HTML.TargetNoopener";b:1;s:21:"HTML.TargetNoreferrer";b:1;s:12:"HTML.TidyAdd";a:0:{}s:14:"HTML.TidyLevel";s:6:"medium";s:15:"HTML.TidyRemove";a:0:{}s:12:"HTML.Trusted";b:0;s:10:"HTML.XHTML";b:1;s:28:"Output.CommentScriptContents";b:1;s:19:"Output.FixInnerHTML";b:1;s:18:"Output.FlashCompat";b:0;s:14:"Output.Newline";N;s:15:"Output.SortAttr";b:0;s:17:"Output.TidyFormat";b:0;s:17:"Test.ForceNoIconv";b:0;s:18:"URI.AllowedSchemes";a:7:{s:4:"http";b:1;s:5:"https";b:1;s:6:"mailto";b:1;s:3:"ftp";b:1;s:4:"nntp";b:1;s:4:"news";b:1;s:3:"tel";b:1;}s:8:"URI.Base";N;s:17:"URI.DefaultScheme";s:4:"http";s:16:"URI.DefinitionID";N;s:17:"URI.DefinitionRev";i:1;s:11:"URI.Disable";b:0;s:19:"URI.DisableExternal";b:0;s:28:"URI.DisableExternalResources";b:0;s:20:"URI.DisableResources";b:0;s:8:"URI.Host";N;s:17:"URI.HostBlacklist";a:0:{}s:16:"URI.MakeAbsolute";b:0;s:9:"URI.Munge";N;s:18:"URI.MungeResources";b:0;s:18:"URI.MungeSecretKey";N;s:26:"URI.OverrideAllowedSchemes";b:1;s:20:"URI.SafeIframeRegexp";N;}s:12:"defaultPlist";O:25:"HTMLPurifier_PropertyList":3:{s:7:"