|
Server : LiteSpeed System : Linux server51.dnsbootclub.com 4.18.0-553.62.1.lve.el8.x86_64 #1 SMP Mon Jul 21 17:50:35 UTC 2025 x86_64 User : nandedex ( 1060) PHP Version : 8.1.33 Disable Function : NONE Directory : /home/nandedex/www/s.nandedexpress.com/ |
htmlpurifier/VERSION 0000644 00000000006 15121423110 0010310 0 ustar 00 4.13.1 htmlpurifier/library/HTMLPurifier/EntityParser.php 0000644 00000023374 15121423110 0016335 0 ustar 00 <?php
// if want to implement error collecting here, we'll need to use some sort
// of global data (probably trigger_error) because it's impossible to pass
// $config or $context to the callback functions.
/**
* Handles referencing and derefencing character entities
*/
class HTMLPurifier_EntityParser
{
/**
* Reference to entity lookup table.
* @type HTMLPurifier_EntityLookup
*/
protected $_entity_lookup;
/**
* Callback regex string for entities in text.
* @type string
*/
protected $_textEntitiesRegex;
/**
* Callback regex string for entities in attributes.
* @type string
*/
protected $_attrEntitiesRegex;
/**
* Tests if the beginning of a string is a semi-optional regex
*/
protected $_semiOptionalPrefixRegex;
public function __construct() {
// From
// http://stackoverflow.com/questions/15532252/why-is-reg-being-rendered-as-without-the-bounding-semicolon
$semi_optional = "quot|QUOT|lt|LT|gt|GT|amp|AMP|AElig|Aacute|Acirc|Agrave|Aring|Atilde|Auml|COPY|Ccedil|ETH|Eacute|Ecirc|Egrave|Euml|Iacute|Icirc|Igrave|Iuml|Ntilde|Oacute|Ocirc|Ograve|Oslash|Otilde|Ouml|REG|THORN|Uacute|Ucirc|Ugrave|Uuml|Yacute|aacute|acirc|acute|aelig|agrave|aring|atilde|auml|brvbar|ccedil|cedil|cent|copy|curren|deg|divide|eacute|ecirc|egrave|eth|euml|frac12|frac14|frac34|iacute|icirc|iexcl|igrave|iquest|iuml|laquo|macr|micro|middot|nbsp|not|ntilde|oacute|ocirc|ograve|ordf|ordm|oslash|otilde|ouml|para|plusmn|pound|raquo|reg|sect|shy|sup1|sup2|sup3|szlig|thorn|times|uacute|ucirc|ugrave|uml|uuml|yacute|yen|yuml";
// NB: three empty captures to put the fourth match in the right
// place
$this->_semiOptionalPrefixRegex = "/&()()()($semi_optional)/";
$this->_textEntitiesRegex =
'/&(?:'.
// hex
'[#]x([a-fA-F0-9]+);?|'.
// dec
'[#]0*(\d+);?|'.
// string (mandatory semicolon)
// NB: order matters: match semicolon preferentially
'([A-Za-z_:][A-Za-z0-9.\-_:]*);|'.
// string (optional semicolon)
"($semi_optional)".
')/';
$this->_attrEntitiesRegex =
'/&(?:'.
// hex
'[#]x([a-fA-F0-9]+);?|'.
// dec
'[#]0*(\d+);?|'.
// string (mandatory semicolon)
// NB: order matters: match semicolon preferentially
'([A-Za-z_:][A-Za-z0-9.\-_:]*);|'.
// string (optional semicolon)
// don't match if trailing is equals or alphanumeric (URL
// like)
"($semi_optional)(?![=;A-Za-z0-9])".
')/';
}
/**
* Substitute entities with the parsed equivalents. Use this on
* textual data in an HTML document (as opposed to attributes.)
*
* @param string $string String to have entities parsed.
* @return string Parsed string.
*/
public function substituteTextEntities($string)
{
return preg_replace_callback(
$this->_textEntitiesRegex,
array($this, 'entityCallback'),
$string
);
}
/**
* Substitute entities with the parsed equivalents. Use this on
* attribute contents in documents.
*
* @param string $string String to have entities parsed.
* @return string Parsed string.
*/
public function substituteAttrEntities($string)
{
return preg_replace_callback(
$this->_attrEntitiesRegex,
array($this, 'entityCallback'),
$string
);
}
/**
* Callback function for substituteNonSpecialEntities() that does the work.
*
* @param array $matches PCRE matches array, with 0 the entire match, and
* either index 1, 2 or 3 set with a hex value, dec value,
* or string (respectively).
* @return string Replacement string.
*/
protected function entityCallback($matches)
{
$entity = $matches[0];
$hex_part = @$matches[1];
$dec_part = @$matches[2];
$named_part = empty($matches[3]) ? (empty($matches[4]) ? "" : $matches[4]) : $matches[3];
if ($hex_part !== NULL && $hex_part !== "") {
return HTMLPurifier_Encoder::unichr(hexdec($hex_part));
} elseif ($dec_part !== NULL && $dec_part !== "") {
return HTMLPurifier_Encoder::unichr((int) $dec_part);
} else {
if (!$this->_entity_lookup) {
$this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
}
if (isset($this->_entity_lookup->table[$named_part])) {
return $this->_entity_lookup->table[$named_part];
} else {
// exact match didn't match anything, so test if
// any of the semicolon optional match the prefix.
// Test that this is an EXACT match is important to
// prevent infinite loop
if (!empty($matches[3])) {
return preg_replace_callback(
$this->_semiOptionalPrefixRegex,
array($this, 'entityCallback'),
$entity
);
}
return $entity;
}
}
}
// LEGACY CODE BELOW
/**
* Callback regex string for parsing entities.
* @type string
*/
protected $_substituteEntitiesRegex =
'/&(?:[#]x([a-fA-F0-9]+)|[#]0*(\d+)|([A-Za-z_:][A-Za-z0-9.\-_:]*));?/';
// 1. hex 2. dec 3. string (XML style)
/**
* Decimal to parsed string conversion table for special entities.
* @type array
*/
protected $_special_dec2str =
array(
34 => '"',
38 => '&',
39 => "'",
60 => '<',
62 => '>'
);
/**
* Stripped entity names to decimal conversion table for special entities.
* @type array
*/
protected $_special_ent2dec =
array(
'quot' => 34,
'amp' => 38,
'lt' => 60,
'gt' => 62
);
/**
* Substitutes non-special entities with their parsed equivalents. Since
* running this whenever you have parsed character is t3h 5uck, we run
* it before everything else.
*
* @param string $string String to have non-special entities parsed.
* @return string Parsed string.
*/
public function substituteNonSpecialEntities($string)
{
// it will try to detect missing semicolons, but don't rely on it
return preg_replace_callback(
$this->_substituteEntitiesRegex,
array($this, 'nonSpecialEntityCallback'),
$string
);
}
/**
* Callback function for substituteNonSpecialEntities() that does the work.
*
* @param array $matches PCRE matches array, with 0 the entire match, and
* either index 1, 2 or 3 set with a hex value, dec value,
* or string (respectively).
* @return string Replacement string.
*/
protected function nonSpecialEntityCallback($matches)
{
// replaces all but big five
$entity = $matches[0];
$is_num = (@$matches[0][1] === '#');
if ($is_num) {
$is_hex = (@$entity[2] === 'x');
$code = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
// abort for special characters
if (isset($this->_special_dec2str[$code])) {
return $entity;
}
return HTMLPurifier_Encoder::unichr($code);
} else {
if (isset($this->_special_ent2dec[$matches[3]])) {
return $entity;
}
if (!$this->_entity_lookup) {
$this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
}
if (isset($this->_entity_lookup->table[$matches[3]])) {
return $this->_entity_lookup->table[$matches[3]];
} else {
return $entity;
}
}
}
/**
* Substitutes only special entities with their parsed equivalents.
*
* @notice We try to avoid calling this function because otherwise, it
* would have to be called a lot (for every parsed section).
*
* @param string $string String to have non-special entities parsed.
* @return string Parsed string.
*/
public function substituteSpecialEntities($string)
{
return preg_replace_callback(
$this->_substituteEntitiesRegex,
array($this, 'specialEntityCallback'),
$string
);
}
/**
* Callback function for substituteSpecialEntities() that does the work.
*
* This callback has same syntax as nonSpecialEntityCallback().
*
* @param array $matches PCRE-style matches array, with 0 the entire match, and
* either index 1, 2 or 3 set with a hex value, dec value,
* or string (respectively).
* @return string Replacement string.
*/
protected function specialEntityCallback($matches)
{
$entity = $matches[0];
$is_num = (@$matches[0][1] === '#');
if ($is_num) {
$is_hex = (@$entity[2] === 'x');
$int = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
return isset($this->_special_dec2str[$int]) ?
$this->_special_dec2str[$int] :
$entity;
} else {
return isset($this->_special_ent2dec[$matches[3]]) ?
$this->_special_dec2str[$this->_special_ent2dec[$matches[3]]] :
$entity;
}
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/ErrorStruct.php 0000644 00000003545 15121423110 0016200 0 ustar 00 <?php
/**
* Records errors for particular segments of an HTML document such as tokens,
* attributes or CSS properties. They can contain error structs (which apply
* to components of what they represent), but their main purpose is to hold
* errors applying to whatever struct is being used.
*/
class HTMLPurifier_ErrorStruct
{
/**
* Possible values for $children first-key. Note that top-level structures
* are automatically token-level.
*/
const TOKEN = 0;
const ATTR = 1;
const CSSPROP = 2;
/**
* Type of this struct.
* @type string
*/
public $type;
/**
* Value of the struct we are recording errors for. There are various
* values for this:
* - TOKEN: Instance of HTMLPurifier_Token
* - ATTR: array('attr-name', 'value')
* - CSSPROP: array('prop-name', 'value')
* @type mixed
*/
public $value;
/**
* Errors registered for this structure.
* @type array
*/
public $errors = array();
/**
* Child ErrorStructs that are from this structure. For example, a TOKEN
* ErrorStruct would contain ATTR ErrorStructs. This is a multi-dimensional
* array in structure: [TYPE]['identifier']
* @type array
*/
public $children = array();
/**
* @param string $type
* @param string $id
* @return mixed
*/
public function getChild($type, $id)
{
if (!isset($this->children[$type][$id])) {
$this->children[$type][$id] = new HTMLPurifier_ErrorStruct();
$this->children[$type][$id]->type = $type;
}
return $this->children[$type][$id];
}
/**
* @param int $severity
* @param string $message
*/
public function addError($severity, $message)
{
$this->errors[] = array($severity, $message);
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/CSSDefinition.php 0000644 00000045175 15121423110 0016350 0 ustar 00 <?php
/**
* Defines allowed CSS attributes and what their values are.
* @see HTMLPurifier_HTMLDefinition
*/
class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
{
public $type = 'CSS';
/**
* Assoc array of attribute name to definition object.
* @type HTMLPurifier_AttrDef[]
*/
public $info = array();
/**
* Constructs the info array. The meat of this class.
* @param HTMLPurifier_Config $config
*/
protected function doSetup($config)
{
$this->info['text-align'] = new HTMLPurifier_AttrDef_Enum(
array('left', 'right', 'center', 'justify'),
false
);
$border_style =
$this->info['border-bottom-style'] =
$this->info['border-right-style'] =
$this->info['border-left-style'] =
$this->info['border-top-style'] = new HTMLPurifier_AttrDef_Enum(
array(
'none',
'hidden',
'dotted',
'dashed',
'solid',
'double',
'groove',
'ridge',
'inset',
'outset'
),
false
);
$this->info['border-style'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_style);
$this->info['clear'] = new HTMLPurifier_AttrDef_Enum(
array('none', 'left', 'right', 'both'),
false
);
$this->info['float'] = new HTMLPurifier_AttrDef_Enum(
array('none', 'left', 'right'),
false
);
$this->info['font-style'] = new HTMLPurifier_AttrDef_Enum(
array('normal', 'italic', 'oblique'),
false
);
$this->info['font-variant'] = new HTMLPurifier_AttrDef_Enum(
array('normal', 'small-caps'),
false
);
$uri_or_none = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_Enum(array('none')),
new HTMLPurifier_AttrDef_CSS_URI()
)
);
$this->info['list-style-position'] = new HTMLPurifier_AttrDef_Enum(
array('inside', 'outside'),
false
);
$this->info['list-style-type'] = new HTMLPurifier_AttrDef_Enum(
array(
'disc',
'circle',
'square',
'decimal',
'lower-roman',
'upper-roman',
'lower-alpha',
'upper-alpha',
'none'
),
false
);
$this->info['list-style-image'] = $uri_or_none;
$this->info['list-style'] = new HTMLPurifier_AttrDef_CSS_ListStyle($config);
$this->info['text-transform'] = new HTMLPurifier_AttrDef_Enum(
array('capitalize', 'uppercase', 'lowercase', 'none'),
false
);
$this->info['color'] = new HTMLPurifier_AttrDef_CSS_Color();
$this->info['background-image'] = $uri_or_none;
$this->info['background-repeat'] = new HTMLPurifier_AttrDef_Enum(
array('repeat', 'repeat-x', 'repeat-y', 'no-repeat')
);
$this->info['background-attachment'] = new HTMLPurifier_AttrDef_Enum(
array('scroll', 'fixed')
);
$this->info['background-position'] = new HTMLPurifier_AttrDef_CSS_BackgroundPosition();
$border_color =
$this->info['border-top-color'] =
$this->info['border-bottom-color'] =
$this->info['border-left-color'] =
$this->info['border-right-color'] =
$this->info['background-color'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_Enum(array('transparent')),
new HTMLPurifier_AttrDef_CSS_Color()
)
);
$this->info['background'] = new HTMLPurifier_AttrDef_CSS_Background($config);
$this->info['border-color'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_color);
$border_width =
$this->info['border-top-width'] =
$this->info['border-bottom-width'] =
$this->info['border-left-width'] =
$this->info['border-right-width'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_Enum(array('thin', 'medium', 'thick')),
new HTMLPurifier_AttrDef_CSS_Length('0') //disallow negative
)
);
$this->info['border-width'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_width);
$this->info['letter-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_Enum(array('normal')),
new HTMLPurifier_AttrDef_CSS_Length()
)
);
$this->info['word-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_Enum(array('normal')),
new HTMLPurifier_AttrDef_CSS_Length()
)
);
$this->info['font-size'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_Enum(
array(
'xx-small',
'x-small',
'small',
'medium',
'large',
'x-large',
'xx-large',
'larger',
'smaller'
)
),
new HTMLPurifier_AttrDef_CSS_Percentage(),
new HTMLPurifier_AttrDef_CSS_Length()
)
);
$this->info['line-height'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_Enum(array('normal')),
new HTMLPurifier_AttrDef_CSS_Number(true), // no negatives
new HTMLPurifier_AttrDef_CSS_Length('0'),
new HTMLPurifier_AttrDef_CSS_Percentage(true)
)
);
$margin =
$this->info['margin-top'] =
$this->info['margin-bottom'] =
$this->info['margin-left'] =
$this->info['margin-right'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_CSS_Length(),
new HTMLPurifier_AttrDef_CSS_Percentage(),
new HTMLPurifier_AttrDef_Enum(array('auto'))
)
);
$this->info['margin'] = new HTMLPurifier_AttrDef_CSS_Multiple($margin);
// non-negative
$padding =
$this->info['padding-top'] =
$this->info['padding-bottom'] =
$this->info['padding-left'] =
$this->info['padding-right'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_CSS_Length('0'),
new HTMLPurifier_AttrDef_CSS_Percentage(true)
)
);
$this->info['padding'] = new HTMLPurifier_AttrDef_CSS_Multiple($padding);
$this->info['text-indent'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_CSS_Length(),
new HTMLPurifier_AttrDef_CSS_Percentage()
)
);
$trusted_wh = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_CSS_Length('0'),
new HTMLPurifier_AttrDef_CSS_Percentage(true),
new HTMLPurifier_AttrDef_Enum(array('auto', 'initial', 'inherit'))
)
);
$trusted_min_wh = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_CSS_Length('0'),
new HTMLPurifier_AttrDef_CSS_Percentage(true),
new HTMLPurifier_AttrDef_Enum(array('initial', 'inherit'))
)
);
$trusted_max_wh = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_CSS_Length('0'),
new HTMLPurifier_AttrDef_CSS_Percentage(true),
new HTMLPurifier_AttrDef_Enum(array('none', 'initial', 'inherit'))
)
);
$max = $config->get('CSS.MaxImgLength');
$this->info['width'] =
$this->info['height'] =
$max === null ?
$trusted_wh :
new HTMLPurifier_AttrDef_Switch(
'img',
// For img tags:
new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_CSS_Length('0', $max),
new HTMLPurifier_AttrDef_Enum(array('auto'))
)
),
// For everyone else:
$trusted_wh
);
$this->info['min-width'] =
$this->info['min-height'] =
$max === null ?
$trusted_min_wh :
new HTMLPurifier_AttrDef_Switch(
'img',
// For img tags:
new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_CSS_Length('0', $max),
new HTMLPurifier_AttrDef_Enum(array('initial', 'inherit'))
)
),
// For everyone else:
$trusted_min_wh
);
$this->info['max-width'] =
$this->info['max-height'] =
$max === null ?
$trusted_max_wh :
new HTMLPurifier_AttrDef_Switch(
'img',
// For img tags:
new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_CSS_Length('0', $max),
new HTMLPurifier_AttrDef_Enum(array('none', 'initial', 'inherit'))
)
),
// For everyone else:
$trusted_max_wh
);
$this->info['text-decoration'] = new HTMLPurifier_AttrDef_CSS_TextDecoration();
$this->info['font-family'] = new HTMLPurifier_AttrDef_CSS_FontFamily();
// this could use specialized code
$this->info['font-weight'] = new HTMLPurifier_AttrDef_Enum(
array(
'normal',
'bold',
'bolder',
'lighter',
'100',
'200',
'300',
'400',
'500',
'600',
'700',
'800',
'900'
),
false
);
// MUST be called after other font properties, as it references
// a CSSDefinition object
$this->info['font'] = new HTMLPurifier_AttrDef_CSS_Font($config);
// same here
$this->info['border'] =
$this->info['border-bottom'] =
$this->info['border-top'] =
$this->info['border-left'] =
$this->info['border-right'] = new HTMLPurifier_AttrDef_CSS_Border($config);
$this->info['border-collapse'] = new HTMLPurifier_AttrDef_Enum(
array('collapse', 'separate')
);
$this->info['caption-side'] = new HTMLPurifier_AttrDef_Enum(
array('top', 'bottom')
);
$this->info['table-layout'] = new HTMLPurifier_AttrDef_Enum(
array('auto', 'fixed')
);
$this->info['vertical-align'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_Enum(
array(
'baseline',
'sub',
'super',
'top',
'text-top',
'middle',
'bottom',
'text-bottom'
)
),
new HTMLPurifier_AttrDef_CSS_Length(),
new HTMLPurifier_AttrDef_CSS_Percentage()
)
);
$this->info['border-spacing'] = new HTMLPurifier_AttrDef_CSS_Multiple(new HTMLPurifier_AttrDef_CSS_Length(), 2);
// These CSS properties don't work on many browsers, but we live
// in THE FUTURE!
$this->info['white-space'] = new HTMLPurifier_AttrDef_Enum(
array('nowrap', 'normal', 'pre', 'pre-wrap', 'pre-line')
);
if ($config->get('CSS.Proprietary')) {
$this->doSetupProprietary($config);
}
if ($config->get('CSS.AllowTricky')) {
$this->doSetupTricky($config);
}
if ($config->get('CSS.Trusted')) {
$this->doSetupTrusted($config);
}
$allow_important = $config->get('CSS.AllowImportant');
// wrap all attr-defs with decorator that handles !important
foreach ($this->info as $k => $v) {
$this->info[$k] = new HTMLPurifier_AttrDef_CSS_ImportantDecorator($v, $allow_important);
}
$this->setupConfigStuff($config);
}
/**
* @param HTMLPurifier_Config $config
*/
protected function doSetupProprietary($config)
{
// Internet Explorer only scrollbar colors
$this->info['scrollbar-arrow-color'] = new HTMLPurifier_AttrDef_CSS_Color();
$this->info['scrollbar-base-color'] = new HTMLPurifier_AttrDef_CSS_Color();
$this->info['scrollbar-darkshadow-color'] = new HTMLPurifier_AttrDef_CSS_Color();
$this->info['scrollbar-face-color'] = new HTMLPurifier_AttrDef_CSS_Color();
$this->info['scrollbar-highlight-color'] = new HTMLPurifier_AttrDef_CSS_Color();
$this->info['scrollbar-shadow-color'] = new HTMLPurifier_AttrDef_CSS_Color();
// vendor specific prefixes of opacity
$this->info['-moz-opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue();
$this->info['-khtml-opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue();
// only opacity, for now
$this->info['filter'] = new HTMLPurifier_AttrDef_CSS_Filter();
// more CSS3
$this->info['page-break-after'] =
$this->info['page-break-before'] = new HTMLPurifier_AttrDef_Enum(
array(
'auto',
'always',
'avoid',
'left',
'right'
)
);
$this->info['page-break-inside'] = new HTMLPurifier_AttrDef_Enum(array('auto', 'avoid'));
$border_radius = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_CSS_Percentage(true), // disallow negative
new HTMLPurifier_AttrDef_CSS_Length('0') // disallow negative
));
$this->info['border-top-left-radius'] =
$this->info['border-top-right-radius'] =
$this->info['border-bottom-right-radius'] =
$this->info['border-bottom-left-radius'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_radius, 2);
// TODO: support SLASH syntax
$this->info['border-radius'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_radius, 4);
}
/**
* @param HTMLPurifier_Config $config
*/
protected function doSetupTricky($config)
{
$this->info['display'] = new HTMLPurifier_AttrDef_Enum(
array(
'inline',
'block',
'list-item',
'run-in',
'compact',
'marker',
'table',
'inline-block',
'inline-table',
'table-row-group',
'table-header-group',
'table-footer-group',
'table-row',
'table-column-group',
'table-column',
'table-cell',
'table-caption',
'none'
)
);
$this->info['visibility'] = new HTMLPurifier_AttrDef_Enum(
array('visible', 'hidden', 'collapse')
);
$this->info['overflow'] = new HTMLPurifier_AttrDef_Enum(array('visible', 'hidden', 'auto', 'scroll'));
$this->info['opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue();
}
/**
* @param HTMLPurifier_Config $config
*/
protected function doSetupTrusted($config)
{
$this->info['position'] = new HTMLPurifier_AttrDef_Enum(
array('static', 'relative', 'absolute', 'fixed')
);
$this->info['top'] =
$this->info['left'] =
$this->info['right'] =
$this->info['bottom'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_CSS_Length(),
new HTMLPurifier_AttrDef_CSS_Percentage(),
new HTMLPurifier_AttrDef_Enum(array('auto')),
)
);
$this->info['z-index'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_Integer(),
new HTMLPurifier_AttrDef_Enum(array('auto')),
)
);
}
/**
* Performs extra config-based processing. Based off of
* HTMLPurifier_HTMLDefinition.
* @param HTMLPurifier_Config $config
* @todo Refactor duplicate elements into common class (probably using
* composition, not inheritance).
*/
protected function setupConfigStuff($config)
{
// setup allowed elements
$support = "(for information on implementing this, see the " .
"support forums) ";
$allowed_properties = $config->get('CSS.AllowedProperties');
if ($allowed_properties !== null) {
foreach ($this->info as $name => $d) {
if (!isset($allowed_properties[$name])) {
unset($this->info[$name]);
}
unset($allowed_properties[$name]);
}
// emit errors
foreach ($allowed_properties as $name => $d) {
// :TODO: Is this htmlspecialchars() call really necessary?
$name = htmlspecialchars($name);
trigger_error("Style attribute '$name' is not supported $support", E_USER_WARNING);
}
}
$forbidden_properties = $config->get('CSS.ForbiddenProperties');
if ($forbidden_properties !== null) {
foreach ($this->info as $name => $d) {
if (isset($forbidden_properties[$name])) {
unset($this->info[$name]);
}
}
}
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/ChildDef.php 0000644 00000003027 15121423110 0015337 0 ustar 00 <?php
/**
* Defines allowed child nodes and validates nodes against it.
*/
abstract class HTMLPurifier_ChildDef
{
/**
* Type of child definition, usually right-most part of class name lowercase.
* Used occasionally in terms of context.
* @type string
*/
public $type;
/**
* Indicates whether or not an empty array of children is okay.
*
* This is necessary for redundant checking when changes affecting
* a child node may cause a parent node to now be disallowed.
* @type bool
*/
public $allow_empty;
/**
* Lookup array of all elements that this definition could possibly allow.
* @type array
*/
public $elements = array();
/**
* Get lookup of tag names that should not close this element automatically.
* All other elements will do so.
* @param HTMLPurifier_Config $config HTMLPurifier_Config object
* @return array
*/
public function getAllowedElements($config)
{
return $this->elements;
}
/**
* Validates nodes according to definition and returns modification.
*
* @param HTMLPurifier_Node[] $children Array of HTMLPurifier_Node
* @param HTMLPurifier_Config $config HTMLPurifier_Config object
* @param HTMLPurifier_Context $context HTMLPurifier_Context object
* @return bool|array true to leave nodes as is, false to remove parent node, array of replacement children
*/
abstract public function validateChildren($children, $config, $context);
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/HTMLModuleManager.php 0000644 00000037112 15121423110 0017104 0 ustar 00 <?php
class HTMLPurifier_HTMLModuleManager
{
/**
* @type HTMLPurifier_DoctypeRegistry
*/
public $doctypes;
/**
* Instance of current doctype.
* @type string
*/
public $doctype;
/**
* @type HTMLPurifier_AttrTypes
*/
public $attrTypes;
/**
* Active instances of modules for the specified doctype are
* indexed, by name, in this array.
* @type HTMLPurifier_HTMLModule[]
*/
public $modules = array();
/**
* Array of recognized HTMLPurifier_HTMLModule instances,
* indexed by module's class name. This array is usually lazy loaded, but a
* user can overload a module by pre-emptively registering it.
* @type HTMLPurifier_HTMLModule[]
*/
public $registeredModules = array();
/**
* List of extra modules that were added by the user
* using addModule(). These get unconditionally merged into the current doctype, whatever
* it may be.
* @type HTMLPurifier_HTMLModule[]
*/
public $userModules = array();
/**
* Associative array of element name to list of modules that have
* definitions for the element; this array is dynamically filled.
* @type array
*/
public $elementLookup = array();
/**
* List of prefixes we should use for registering small names.
* @type array
*/
public $prefixes = array('HTMLPurifier_HTMLModule_');
/**
* @type HTMLPurifier_ContentSets
*/
public $contentSets;
/**
* @type HTMLPurifier_AttrCollections
*/
public $attrCollections;
/**
* If set to true, unsafe elements and attributes will be allowed.
* @type bool
*/
public $trusted = false;
public function __construct()
{
// editable internal objects
$this->attrTypes = new HTMLPurifier_AttrTypes();
$this->doctypes = new HTMLPurifier_DoctypeRegistry();
// setup basic modules
$common = array(
'CommonAttributes', 'Text', 'Hypertext', 'List',
'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
'StyleAttribute',
// Unsafe:
'Scripting', 'Object', 'Forms',
// Sorta legacy, but present in strict:
'Name',
);
$transitional = array('Legacy', 'Target', 'Iframe');
$xml = array('XMLCommonAttributes');
$non_xml = array('NonXMLCommonAttributes');
// setup basic doctypes
$this->doctypes->register(
'HTML 4.01 Transitional',
false,
array_merge($common, $transitional, $non_xml),
array('Tidy_Transitional', 'Tidy_Proprietary'),
array(),
'-//W3C//DTD HTML 4.01 Transitional//EN',
'http://www.w3.org/TR/html4/loose.dtd'
);
$this->doctypes->register(
'HTML 4.01 Strict',
false,
array_merge($common, $non_xml),
array('Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'),
array(),
'-//W3C//DTD HTML 4.01//EN',
'http://www.w3.org/TR/html4/strict.dtd'
);
$this->doctypes->register(
'XHTML 1.0 Transitional',
true,
array_merge($common, $transitional, $xml, $non_xml),
array('Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Name'),
array(),
'-//W3C//DTD XHTML 1.0 Transitional//EN',
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'
);
$this->doctypes->register(
'XHTML 1.0 Strict',
true,
array_merge($common, $xml, $non_xml),
array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'),
array(),
'-//W3C//DTD XHTML 1.0 Strict//EN',
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
);
$this->doctypes->register(
'XHTML 1.1',
true,
// Iframe is a real XHTML 1.1 module, despite being
// "transitional"!
array_merge($common, $xml, array('Ruby', 'Iframe')),
array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1
array(),
'-//W3C//DTD XHTML 1.1//EN',
'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'
);
}
/**
* Registers a module to the recognized module list, useful for
* overloading pre-existing modules.
* @param $module Mixed: string module name, with or without
* HTMLPurifier_HTMLModule prefix, or instance of
* subclass of HTMLPurifier_HTMLModule.
* @param $overload Boolean whether or not to overload previous modules.
* If this is not set, and you do overload a module,
* HTML Purifier will complain with a warning.
* @note This function will not call autoload, you must instantiate
* (and thus invoke) autoload outside the method.
* @note If a string is passed as a module name, different variants
* will be tested in this order:
* - Check for HTMLPurifier_HTMLModule_$name
* - Check all prefixes with $name in order they were added
* - Check for literal object name
* - Throw fatal error
* If your object name collides with an internal class, specify
* your module manually. All modules must have been included
* externally: registerModule will not perform inclusions for you!
*/
public function registerModule($module, $overload = false)
{
if (is_string($module)) {
// attempt to load the module
$original_module = $module;
$ok = false;
foreach ($this->prefixes as $prefix) {
$module = $prefix . $original_module;
if (class_exists($module)) {
$ok = true;
break;
}
}
if (!$ok) {
$module = $original_module;
if (!class_exists($module)) {
trigger_error(
$original_module . ' module does not exist',
E_USER_ERROR
);
return;
}
}
$module = new $module();
}
if (empty($module->name)) {
trigger_error('Module instance of ' . get_class($module) . ' must have name');
return;
}
if (!$overload && isset($this->registeredModules[$module->name])) {
trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING);
}
$this->registeredModules[$module->name] = $module;
}
/**
* Adds a module to the current doctype by first registering it,
* and then tacking it on to the active doctype
*/
public function addModule($module)
{
$this->registerModule($module);
if (is_object($module)) {
$module = $module->name;
}
$this->userModules[] = $module;
}
/**
* Adds a class prefix that registerModule() will use to resolve a
* string name to a concrete class
*/
public function addPrefix($prefix)
{
$this->prefixes[] = $prefix;
}
/**
* Performs processing on modules, after being called you may
* use getElement() and getElements()
* @param HTMLPurifier_Config $config
*/
public function setup($config)
{
$this->trusted = $config->get('HTML.Trusted');
// generate
$this->doctype = $this->doctypes->make($config);
$modules = $this->doctype->modules;
// take out the default modules that aren't allowed
$lookup = $config->get('HTML.AllowedModules');
$special_cases = $config->get('HTML.CoreModules');
if (is_array($lookup)) {
foreach ($modules as $k => $m) {
if (isset($special_cases[$m])) {
continue;
}
if (!isset($lookup[$m])) {
unset($modules[$k]);
}
}
}
// custom modules
if ($config->get('HTML.Proprietary')) {
$modules[] = 'Proprietary';
}
if ($config->get('HTML.SafeObject')) {
$modules[] = 'SafeObject';
}
if ($config->get('HTML.SafeEmbed')) {
$modules[] = 'SafeEmbed';
}
if ($config->get('HTML.SafeScripting') !== array()) {
$modules[] = 'SafeScripting';
}
if ($config->get('HTML.Nofollow')) {
$modules[] = 'Nofollow';
}
if ($config->get('HTML.TargetBlank')) {
$modules[] = 'TargetBlank';
}
// NB: HTML.TargetNoreferrer and HTML.TargetNoopener must be AFTER HTML.TargetBlank
// so that its post-attr-transform gets run afterwards.
if ($config->get('HTML.TargetNoreferrer')) {
$modules[] = 'TargetNoreferrer';
}
if ($config->get('HTML.TargetNoopener')) {
$modules[] = 'TargetNoopener';
}
// merge in custom modules
$modules = array_merge($modules, $this->userModules);
foreach ($modules as $module) {
$this->processModule($module);
$this->modules[$module]->setup($config);
}
foreach ($this->doctype->tidyModules as $module) {
$this->processModule($module);
$this->modules[$module]->setup($config);
}
// prepare any injectors
foreach ($this->modules as $module) {
$n = array();
foreach ($module->info_injector as $injector) {
if (!is_object($injector)) {
$class = "HTMLPurifier_Injector_$injector";
$injector = new $class;
}
$n[$injector->name] = $injector;
}
$module->info_injector = $n;
}
// setup lookup table based on all valid modules
foreach ($this->modules as $module) {
foreach ($module->info as $name => $def) {
if (!isset($this->elementLookup[$name])) {
$this->elementLookup[$name] = array();
}
$this->elementLookup[$name][] = $module->name;
}
}
// note the different choice
$this->contentSets = new HTMLPurifier_ContentSets(
// content set assembly deals with all possible modules,
// not just ones deemed to be "safe"
$this->modules
);
$this->attrCollections = new HTMLPurifier_AttrCollections(
$this->attrTypes,
// there is no way to directly disable a global attribute,
// but using AllowedAttributes or simply not including
// the module in your custom doctype should be sufficient
$this->modules
);
}
/**
* Takes a module and adds it to the active module collection,
* registering it if necessary.
*/
public function processModule($module)
{
if (!isset($this->registeredModules[$module]) || is_object($module)) {
$this->registerModule($module);
}
$this->modules[$module] = $this->registeredModules[$module];
}
/**
* Retrieves merged element definitions.
* @return Array of HTMLPurifier_ElementDef
*/
public function getElements()
{
$elements = array();
foreach ($this->modules as $module) {
if (!$this->trusted && !$module->safe) {
continue;
}
foreach ($module->info as $name => $v) {
if (isset($elements[$name])) {
continue;
}
$elements[$name] = $this->getElement($name);
}
}
// remove dud elements, this happens when an element that
// appeared to be safe actually wasn't
foreach ($elements as $n => $v) {
if ($v === false) {
unset($elements[$n]);
}
}
return $elements;
}
/**
* Retrieves a single merged element definition
* @param string $name Name of element
* @param bool $trusted Boolean trusted overriding parameter: set to true
* if you want the full version of an element
* @return HTMLPurifier_ElementDef Merged HTMLPurifier_ElementDef
* @note You may notice that modules are getting iterated over twice (once
* in getElements() and once here). This
* is because
*/
public function getElement($name, $trusted = null)
{
if (!isset($this->elementLookup[$name])) {
return false;
}
// setup global state variables
$def = false;
if ($trusted === null) {
$trusted = $this->trusted;
}
// iterate through each module that has registered itself to this
// element
foreach ($this->elementLookup[$name] as $module_name) {
$module = $this->modules[$module_name];
// refuse to create/merge from a module that is deemed unsafe--
// pretend the module doesn't exist--when trusted mode is not on.
if (!$trusted && !$module->safe) {
continue;
}
// clone is used because, ideally speaking, the original
// definition should not be modified. Usually, this will
// make no difference, but for consistency's sake
$new_def = clone $module->info[$name];
if (!$def && $new_def->standalone) {
$def = $new_def;
} elseif ($def) {
// This will occur even if $new_def is standalone. In practice,
// this will usually result in a full replacement.
$def->mergeIn($new_def);
} else {
// :TODO:
// non-standalone definitions that don't have a standalone
// to merge into could be deferred to the end
// HOWEVER, it is perfectly valid for a non-standalone
// definition to lack a standalone definition, even
// after all processing: this allows us to safely
// specify extra attributes for elements that may not be
// enabled all in one place. In particular, this might
// be the case for trusted elements. WARNING: care must
// be taken that the /extra/ definitions are all safe.
continue;
}
// attribute value expansions
$this->attrCollections->performInclusions($def->attr);
$this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes);
// descendants_are_inline, for ChildDef_Chameleon
if (is_string($def->content_model) &&
strpos($def->content_model, 'Inline') !== false) {
if ($name != 'del' && $name != 'ins') {
// this is for you, ins/del
$def->descendants_are_inline = true;
}
}
$this->contentSets->generateChildDef($def, $module);
}
// This can occur if there is a blank definition, but no base to
// mix it in with
if (!$def) {
return false;
}
// add information on required attributes
foreach ($def->attr as $attr_name => $attr_def) {
if ($attr_def->required) {
$def->required_attr[] = $attr_name;
}
}
return $def;
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/UnitConverter.php 0000644 00000023622 15121423110 0016507 0 ustar 00 <?php
/**
* Class for converting between different unit-lengths as specified by
* CSS.
*/
class HTMLPurifier_UnitConverter
{
const ENGLISH = 1;
const METRIC = 2;
const DIGITAL = 3;
/**
* Units information array. Units are grouped into measuring systems
* (English, Metric), and are assigned an integer representing
* the conversion factor between that unit and the smallest unit in
* the system. Numeric indexes are actually magical constants that
* encode conversion data from one system to the next, with a O(n^2)
* constraint on memory (this is generally not a problem, since
* the number of measuring systems is small.)
*/
protected static $units = array(
self::ENGLISH => array(
'px' => 3, // This is as per CSS 2.1 and Firefox. Your mileage may vary
'pt' => 4,
'pc' => 48,
'in' => 288,
self::METRIC => array('pt', '0.352777778', 'mm'),
),
self::METRIC => array(
'mm' => 1,
'cm' => 10,
self::ENGLISH => array('mm', '2.83464567', 'pt'),
),
);
/**
* Minimum bcmath precision for output.
* @type int
*/
protected $outputPrecision;
/**
* Bcmath precision for internal calculations.
* @type int
*/
protected $internalPrecision;
/**
* Whether or not BCMath is available.
* @type bool
*/
private $bcmath;
public function __construct($output_precision = 4, $internal_precision = 10, $force_no_bcmath = false)
{
$this->outputPrecision = $output_precision;
$this->internalPrecision = $internal_precision;
$this->bcmath = !$force_no_bcmath && function_exists('bcmul');
}
/**
* Converts a length object of one unit into another unit.
* @param HTMLPurifier_Length $length
* Instance of HTMLPurifier_Length to convert. You must validate()
* it before passing it here!
* @param string $to_unit
* Unit to convert to.
* @return HTMLPurifier_Length|bool
* @note
* About precision: This conversion function pays very special
* attention to the incoming precision of values and attempts
* to maintain a number of significant figure. Results are
* fairly accurate up to nine digits. Some caveats:
* - If a number is zero-padded as a result of this significant
* figure tracking, the zeroes will be eliminated.
* - If a number contains less than four sigfigs ($outputPrecision)
* and this causes some decimals to be excluded, those
* decimals will be added on.
*/
public function convert($length, $to_unit)
{
if (!$length->isValid()) {
return false;
}
$n = $length->getN();
$unit = $length->getUnit();
if ($n === '0' || $unit === false) {
return new HTMLPurifier_Length('0', false);
}
$state = $dest_state = false;
foreach (self::$units as $k => $x) {
if (isset($x[$unit])) {
$state = $k;
}
if (isset($x[$to_unit])) {
$dest_state = $k;
}
}
if (!$state || !$dest_state) {
return false;
}
// Some calculations about the initial precision of the number;
// this will be useful when we need to do final rounding.
$sigfigs = $this->getSigFigs($n);
if ($sigfigs < $this->outputPrecision) {
$sigfigs = $this->outputPrecision;
}
// BCMath's internal precision deals only with decimals. Use
// our default if the initial number has no decimals, or increase
// it by how ever many decimals, thus, the number of guard digits
// will always be greater than or equal to internalPrecision.
$log = (int)floor(log(abs($n), 10));
$cp = ($log < 0) ? $this->internalPrecision - $log : $this->internalPrecision; // internal precision
for ($i = 0; $i < 2; $i++) {
// Determine what unit IN THIS SYSTEM we need to convert to
if ($dest_state === $state) {
// Simple conversion
$dest_unit = $to_unit;
} else {
// Convert to the smallest unit, pending a system shift
$dest_unit = self::$units[$state][$dest_state][0];
}
// Do the conversion if necessary
if ($dest_unit !== $unit) {
$factor = $this->div(self::$units[$state][$unit], self::$units[$state][$dest_unit], $cp);
$n = $this->mul($n, $factor, $cp);
$unit = $dest_unit;
}
// Output was zero, so bail out early. Shouldn't ever happen.
if ($n === '') {
$n = '0';
$unit = $to_unit;
break;
}
// It was a simple conversion, so bail out
if ($dest_state === $state) {
break;
}
if ($i !== 0) {
// Conversion failed! Apparently, the system we forwarded
// to didn't have this unit. This should never happen!
return false;
}
// Pre-condition: $i == 0
// Perform conversion to next system of units
$n = $this->mul($n, self::$units[$state][$dest_state][1], $cp);
$unit = self::$units[$state][$dest_state][2];
$state = $dest_state;
// One more loop around to convert the unit in the new system.
}
// Post-condition: $unit == $to_unit
if ($unit !== $to_unit) {
return false;
}
// Useful for debugging:
//echo "<pre>n";
//echo "$n\nsigfigs = $sigfigs\nnew_log = $new_log\nlog = $log\nrp = $rp\n</pre>\n";
$n = $this->round($n, $sigfigs);
if (strpos($n, '.') !== false) {
$n = rtrim($n, '0');
}
$n = rtrim($n, '.');
return new HTMLPurifier_Length($n, $unit);
}
/**
* Returns the number of significant figures in a string number.
* @param string $n Decimal number
* @return int number of sigfigs
*/
public function getSigFigs($n)
{
$n = ltrim($n, '0+-');
$dp = strpos($n, '.'); // decimal position
if ($dp === false) {
$sigfigs = strlen(rtrim($n, '0'));
} else {
$sigfigs = strlen(ltrim($n, '0.')); // eliminate extra decimal character
if ($dp !== 0) {
$sigfigs--;
}
}
return $sigfigs;
}
/**
* Adds two numbers, using arbitrary precision when available.
* @param string $s1
* @param string $s2
* @param int $scale
* @return string
*/
private function add($s1, $s2, $scale)
{
if ($this->bcmath) {
return bcadd($s1, $s2, $scale);
} else {
return $this->scale((float)$s1 + (float)$s2, $scale);
}
}
/**
* Multiples two numbers, using arbitrary precision when available.
* @param string $s1
* @param string $s2
* @param int $scale
* @return string
*/
private function mul($s1, $s2, $scale)
{
if ($this->bcmath) {
return bcmul($s1, $s2, $scale);
} else {
return $this->scale((float)$s1 * (float)$s2, $scale);
}
}
/**
* Divides two numbers, using arbitrary precision when available.
* @param string $s1
* @param string $s2
* @param int $scale
* @return string
*/
private function div($s1, $s2, $scale)
{
if ($this->bcmath) {
return bcdiv($s1, $s2, $scale);
} else {
return $this->scale((float)$s1 / (float)$s2, $scale);
}
}
/**
* Rounds a number according to the number of sigfigs it should have,
* using arbitrary precision when available.
* @param float $n
* @param int $sigfigs
* @return string
*/
private function round($n, $sigfigs)
{
$new_log = (int)floor(log(abs($n), 10)); // Number of digits left of decimal - 1
$rp = $sigfigs - $new_log - 1; // Number of decimal places needed
$neg = $n < 0 ? '-' : ''; // Negative sign
if ($this->bcmath) {
if ($rp >= 0) {
$n = bcadd($n, $neg . '0.' . str_repeat('0', $rp) . '5', $rp + 1);
$n = bcdiv($n, '1', $rp);
} else {
// This algorithm partially depends on the standardized
// form of numbers that comes out of bcmath.
$n = bcadd($n, $neg . '5' . str_repeat('0', $new_log - $sigfigs), 0);
$n = substr($n, 0, $sigfigs + strlen($neg)) . str_repeat('0', $new_log - $sigfigs + 1);
}
return $n;
} else {
return $this->scale(round($n, $sigfigs - $new_log - 1), $rp + 1);
}
}
/**
* Scales a float to $scale digits right of decimal point, like BCMath.
* @param float $r
* @param int $scale
* @return string
*/
private function scale($r, $scale)
{
if ($scale < 0) {
// The f sprintf type doesn't support negative numbers, so we
// need to cludge things manually. First get the string.
$r = sprintf('%.0f', (float)$r);
// Due to floating point precision loss, $r will more than likely
// look something like 4652999999999.9234. We grab one more digit
// than we need to precise from $r and then use that to round
// appropriately.
$precise = (string)round(substr($r, 0, strlen($r) + $scale), -1);
// Now we return it, truncating the zero that was rounded off.
return substr($precise, 0, -1) . str_repeat('0', -$scale + 1);
}
return sprintf('%.' . $scale . 'f', (float)$r);
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/Lexer.php 0000644 00000032363 15121423110 0014761 0 ustar 00 <?php
/**
* Forgivingly lexes HTML (SGML-style) markup into tokens.
*
* A lexer parses a string of SGML-style markup and converts them into
* corresponding tokens. It doesn't check for well-formedness, although its
* internal mechanism may make this automatic (such as the case of
* HTMLPurifier_Lexer_DOMLex). There are several implementations to choose
* from.
*
* A lexer is HTML-oriented: it might work with XML, but it's not
* recommended, as we adhere to a subset of the specification for optimization
* reasons. This might change in the future. Also, most tokenizers are not
* expected to handle DTDs or PIs.
*
* This class should not be directly instantiated, but you may use create() to
* retrieve a default copy of the lexer. Being a supertype, this class
* does not actually define any implementation, but offers commonly used
* convenience functions for subclasses.
*
* @note The unit tests will instantiate this class for testing purposes, as
* many of the utility functions require a class to be instantiated.
* This means that, even though this class is not runnable, it will
* not be declared abstract.
*
* @par
*
* @note
* We use tokens rather than create a DOM representation because DOM would:
*
* @par
* -# Require more processing and memory to create,
* -# Is not streamable, and
* -# Has the entire document structure (html and body not needed).
*
* @par
* However, DOM is helpful in that it makes it easy to move around nodes
* without a lot of lookaheads to see when a tag is closed. This is a
* limitation of the token system and some workarounds would be nice.
*/
class HTMLPurifier_Lexer
{
/**
* Whether or not this lexer implements line-number/column-number tracking.
* If it does, set to true.
*/
public $tracksLineNumbers = false;
/**
* @since 4.13.1 - https://github.com/MetaSlider/metaslider/issues/494
*/
private $_entity_parser;
// -- STATIC ----------------------------------------------------------
/**
* Retrieves or sets the default Lexer as a Prototype Factory.
*
* By default HTMLPurifier_Lexer_DOMLex will be returned. There are
* a few exceptions involving special features that only DirectLex
* implements.
*
* @note The behavior of this class has changed, rather than accepting
* a prototype object, it now accepts a configuration object.
* To specify your own prototype, set %Core.LexerImpl to it.
* This change in behavior de-singletonizes the lexer object.
*
* @param HTMLPurifier_Config $config
* @return HTMLPurifier_Lexer
* @throws HTMLPurifier_Exception
*/
public static function create($config)
{
if (!($config instanceof HTMLPurifier_Config)) {
$lexer = $config;
trigger_error(
"Passing a prototype to
HTMLPurifier_Lexer::create() is deprecated, please instead
use %Core.LexerImpl",
E_USER_WARNING
);
} else {
$lexer = $config->get('Core.LexerImpl');
}
$needs_tracking =
$config->get('Core.MaintainLineNumbers') ||
$config->get('Core.CollectErrors');
$inst = null;
if (is_object($lexer)) {
$inst = $lexer;
} else {
if (is_null($lexer)) {
do {
// auto-detection algorithm
if ($needs_tracking) {
$lexer = 'DirectLex';
break;
}
if (class_exists('DOMDocument', false) &&
method_exists('DOMDocument', 'loadHTML') &&
!extension_loaded('domxml')
) {
// check for DOM support, because while it's part of the
// core, it can be disabled compile time. Also, the PECL
// domxml extension overrides the default DOM, and is evil
// and nasty and we shan't bother to support it
$lexer = 'DOMLex';
} else {
$lexer = 'DirectLex';
}
} while (0);
} // do..while so we can break
// instantiate recognized string names
switch ($lexer) {
case 'DOMLex':
$inst = new HTMLPurifier_Lexer_DOMLex();
break;
case 'DirectLex':
$inst = new HTMLPurifier_Lexer_DirectLex();
break;
case 'PH5P':
$inst = new HTMLPurifier_Lexer_PH5P();
break;
default:
throw new HTMLPurifier_Exception(
"Cannot instantiate unrecognized Lexer type " .
htmlspecialchars($lexer)
);
}
}
if (!$inst) {
throw new HTMLPurifier_Exception('No lexer was instantiated');
}
// once PHP DOM implements native line numbers, or we
// hack out something using XSLT, remove this stipulation
if ($needs_tracking && !$inst->tracksLineNumbers) {
throw new HTMLPurifier_Exception(
'Cannot use lexer that does not support line numbers with ' .
'Core.MaintainLineNumbers or Core.CollectErrors (use DirectLex instead)'
);
}
return $inst;
}
// -- CONVENIENCE MEMBERS ---------------------------------------------
public function __construct()
{
$this->_entity_parser = new HTMLPurifier_EntityParser();
}
/**
* Most common entity to raw value conversion table for special entities.
* @type array
*/
protected $_special_entity2str =
array(
'"' => '"',
'&' => '&',
'<' => '<',
'>' => '>',
''' => "'",
''' => "'",
''' => "'"
);
public function parseText($string, $config) {
return $this->parseData($string, false, $config);
}
public function parseAttr($string, $config) {
return $this->parseData($string, true, $config);
}
/**
* Parses special entities into the proper characters.
*
* This string will translate escaped versions of the special characters
* into the correct ones.
*
* @param string $string String character data to be parsed.
* @return string Parsed character data.
*/
public function parseData($string, $is_attr, $config)
{
// following functions require at least one character
if ($string === '') {
return '';
}
// subtracts amps that cannot possibly be escaped
$num_amp = substr_count($string, '&') - substr_count($string, '& ') -
($string[strlen($string) - 1] === '&' ? 1 : 0);
if (!$num_amp) {
return $string;
} // abort if no entities
$num_esc_amp = substr_count($string, '&');
$string = strtr($string, $this->_special_entity2str);
// code duplication for sake of optimization, see above
$num_amp_2 = substr_count($string, '&') - substr_count($string, '& ') -
($string[strlen($string) - 1] === '&' ? 1 : 0);
if ($num_amp_2 <= $num_esc_amp) {
return $string;
}
// hmm... now we have some uncommon entities. Use the callback.
if ($config->get('Core.LegacyEntityDecoder')) {
$string = $this->_entity_parser->substituteSpecialEntities($string);
} else {
if ($is_attr) {
$string = $this->_entity_parser->substituteAttrEntities($string);
} else {
$string = $this->_entity_parser->substituteTextEntities($string);
}
}
return $string;
}
/**
* Lexes an HTML string into tokens.
* @param $string String HTML.
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return HTMLPurifier_Token[] array representation of HTML.
*/
public function tokenizeHTML($string, $config, $context)
{
trigger_error('Call to abstract class', E_USER_ERROR);
}
/**
* Translates CDATA sections into regular sections (through escaping).
* @param string $string HTML string to process.
* @return string HTML with CDATA sections escaped.
*/
protected static function escapeCDATA($string)
{
return preg_replace_callback(
'/<!\[CDATA\[(.+?)\]\]>/s',
array('HTMLPurifier_Lexer', 'CDATACallback'),
$string
);
}
/**
* Special CDATA case that is especially convoluted for <script>
* @param string $string HTML string to process.
* @return string HTML with CDATA sections escaped.
*/
protected static function escapeCommentedCDATA($string)
{
return preg_replace_callback(
'#<!--//--><!\[CDATA\[//><!--(.+?)//--><!\]\]>#s',
array('HTMLPurifier_Lexer', 'CDATACallback'),
$string
);
}
/**
* Special Internet Explorer conditional comments should be removed.
* @param string $string HTML string to process.
* @return string HTML with conditional comments removed.
*/
protected static function removeIEConditional($string)
{
return preg_replace(
'#<!--\[if [^>]+\]>.*?<!\[endif\]-->#si', // probably should generalize for all strings
'',
$string
);
}
/**
* Callback function for escapeCDATA() that does the work.
*
* @warning Though this is public in order to let the callback happen,
* calling it directly is not recommended.
* @param array $matches PCRE matches array, with index 0 the entire match
* and 1 the inside of the CDATA section.
* @return string Escaped internals of the CDATA section.
*/
protected static function CDATACallback($matches)
{
// not exactly sure why the character set is needed, but whatever
return htmlspecialchars($matches[1], ENT_COMPAT, 'UTF-8');
}
/**
* Takes a piece of HTML and normalizes it by converting entities, fixing
* encoding, extracting bits, and other good stuff.
* @param string $html HTML.
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return string
* @todo Consider making protected
*/
public function normalize($html, $config, $context)
{
// normalize newlines to \n
if ($config->get('Core.NormalizeNewlines')) {
$html = str_replace("\r\n", "\n", $html);
$html = str_replace("\r", "\n", $html);
}
if ($config->get('HTML.Trusted')) {
// escape convoluted CDATA
$html = $this->escapeCommentedCDATA($html);
}
// escape CDATA
$html = $this->escapeCDATA($html);
$html = $this->removeIEConditional($html);
// extract body from document if applicable
if ($config->get('Core.ConvertDocumentToFragment')) {
$e = false;
if ($config->get('Core.CollectErrors')) {
$e =& $context->get('ErrorCollector');
}
$new_html = $this->extractBody($html);
if ($e && $new_html != $html) {
$e->send(E_WARNING, 'Lexer: Extracted body');
}
$html = $new_html;
}
// expand entities that aren't the big five
if ($config->get('Core.LegacyEntityDecoder')) {
$html = $this->_entity_parser->substituteNonSpecialEntities($html);
}
// clean into wellformed UTF-8 string for an SGML context: this has
// to be done after entity expansion because the entities sometimes
// represent non-SGML characters (horror, horror!)
$html = HTMLPurifier_Encoder::cleanUTF8($html);
// if processing instructions are to removed, remove them now
if ($config->get('Core.RemoveProcessingInstructions')) {
$html = preg_replace('#<\?.+?\?>#s', '', $html);
}
$hidden_elements = $config->get('Core.HiddenElements');
if ($config->get('Core.AggressivelyRemoveScript') &&
!($config->get('HTML.Trusted') || !$config->get('Core.RemoveScriptContents')
|| empty($hidden_elements["script"]))) {
$html = preg_replace('#<script[^>]*>.*?</script>#i', '', $html);
}
return $html;
}
/**
* Takes a string of HTML (fragment or document) and returns the content
* @todo Consider making protected
*/
public function extractBody($html)
{
$matches = array();
$result = preg_match('|(.*?)<body[^>]*>(.*)</body>|is', $html, $matches);
if ($result) {
// Make sure it's not in a comment
$comment_start = strrpos($matches[1], '<!--');
$comment_end = strrpos($matches[1], '-->');
if ($comment_start === false ||
($comment_end !== false && $comment_end > $comment_start)) {
return $matches[2];
}
}
return $html;
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/URI.php 0000644 00000024544 15121423110 0014343 0 ustar 00 <?php
/**
* HTML Purifier's internal representation of a URI.
* @note
* Internal data-structures are completely escaped. If the data needs
* to be used in a non-URI context (which is very unlikely), be sure
* to decode it first. The URI may not necessarily be well-formed until
* validate() is called.
*/
class HTMLPurifier_URI
{
/**
* @type string
*/
public $scheme;
/**
* @type string
*/
public $userinfo;
/**
* @type string
*/
public $host;
/**
* @type int
*/
public $port;
/**
* @type string
*/
public $path;
/**
* @type string
*/
public $query;
/**
* @type string
*/
public $fragment;
/**
* @param string $scheme
* @param string $userinfo
* @param string $host
* @param int $port
* @param string $path
* @param string $query
* @param string $fragment
* @note Automatically normalizes scheme and port
*/
public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment)
{
$this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme);
$this->userinfo = $userinfo;
$this->host = $host;
$this->port = is_null($port) ? $port : (int)$port;
$this->path = $path;
$this->query = $query;
$this->fragment = $fragment;
}
/**
* Retrieves a scheme object corresponding to the URI's scheme/default
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return HTMLPurifier_URIScheme Scheme object appropriate for validating this URI
*/
public function getSchemeObj($config, $context)
{
$registry = HTMLPurifier_URISchemeRegistry::instance();
if ($this->scheme !== null) {
$scheme_obj = $registry->getScheme($this->scheme, $config, $context);
if (!$scheme_obj) {
return false;
} // invalid scheme, clean it out
} else {
// no scheme: retrieve the default one
$def = $config->getDefinition('URI');
$scheme_obj = $def->getDefaultScheme($config, $context);
if (!$scheme_obj) {
if ($def->defaultScheme !== null) {
// something funky happened to the default scheme object
trigger_error(
'Default scheme object "' . $def->defaultScheme . '" was not readable',
E_USER_WARNING
);
} // suppress error if it's null
return false;
}
}
return $scheme_obj;
}
/**
* Generic validation method applicable for all schemes. May modify
* this URI in order to get it into a compliant form.
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool True if validation/filtering succeeds, false if failure
*/
public function validate($config, $context)
{
// ABNF definitions from RFC 3986
$chars_sub_delims = '!$&\'()*+,;=';
$chars_gen_delims = ':/?#[]@';
$chars_pchar = $chars_sub_delims . ':@';
// validate host
if (!is_null($this->host)) {
$host_def = new HTMLPurifier_AttrDef_URI_Host();
$this->host = $host_def->validate($this->host, $config, $context);
if ($this->host === false) {
$this->host = null;
}
}
// validate scheme
// NOTE: It's not appropriate to check whether or not this
// scheme is in our registry, since a URIFilter may convert a
// URI that we don't allow into one we do. So instead, we just
// check if the scheme can be dropped because there is no host
// and it is our default scheme.
if (!is_null($this->scheme) && is_null($this->host) || $this->host === '') {
// support for relative paths is pretty abysmal when the
// scheme is present, so axe it when possible
$def = $config->getDefinition('URI');
if ($def->defaultScheme === $this->scheme) {
$this->scheme = null;
}
}
// validate username
if (!is_null($this->userinfo)) {
$encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':');
$this->userinfo = $encoder->encode($this->userinfo);
}
// validate port
if (!is_null($this->port)) {
if ($this->port < 1 || $this->port > 65535) {
$this->port = null;
}
}
// validate path
$segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/');
if (!is_null($this->host)) { // this catches $this->host === ''
// path-abempty (hier and relative)
// http://www.example.com/my/path
// //www.example.com/my/path (looks odd, but works, and
// recognized by most browsers)
// (this set is valid or invalid on a scheme by scheme
// basis, so we'll deal with it later)
// file:///my/path
// ///my/path
$this->path = $segments_encoder->encode($this->path);
} elseif ($this->path !== '') {
if ($this->path[0] === '/') {
// path-absolute (hier and relative)
// http:/my/path
// /my/path
if (strlen($this->path) >= 2 && $this->path[1] === '/') {
// This could happen if both the host gets stripped
// out
// http://my/path
// //my/path
$this->path = '';
} else {
$this->path = $segments_encoder->encode($this->path);
}
} elseif (!is_null($this->scheme)) {
// path-rootless (hier)
// http:my/path
// Short circuit evaluation means we don't need to check nz
$this->path = $segments_encoder->encode($this->path);
} else {
// path-noscheme (relative)
// my/path
// (once again, not checking nz)
$segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@');
$c = strpos($this->path, '/');
if ($c !== false) {
$this->path =
$segment_nc_encoder->encode(substr($this->path, 0, $c)) .
$segments_encoder->encode(substr($this->path, $c));
} else {
$this->path = $segment_nc_encoder->encode($this->path);
}
}
} else {
// path-empty (hier and relative)
$this->path = ''; // just to be safe
}
// qf = query and fragment
$qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?');
if (!is_null($this->query)) {
$this->query = $qf_encoder->encode($this->query);
}
if (!is_null($this->fragment)) {
$this->fragment = $qf_encoder->encode($this->fragment);
}
return true;
}
/**
* Convert URI back to string
* @return string URI appropriate for output
*/
public function toString()
{
// reconstruct authority
$authority = null;
// there is a rendering difference between a null authority
// (http:foo-bar) and an empty string authority
// (http:///foo-bar).
if (!is_null($this->host)) {
$authority = '';
if (!is_null($this->userinfo)) {
$authority .= $this->userinfo . '@';
}
$authority .= $this->host;
if (!is_null($this->port)) {
$authority .= ':' . $this->port;
}
}
// Reconstruct the result
// One might wonder about parsing quirks from browsers after
// this reconstruction. Unfortunately, parsing behavior depends
// on what *scheme* was employed (file:///foo is handled *very*
// differently than http:///foo), so unfortunately we have to
// defer to the schemes to do the right thing.
$result = '';
if (!is_null($this->scheme)) {
$result .= $this->scheme . ':';
}
if (!is_null($authority)) {
$result .= '//' . $authority;
}
$result .= $this->path;
if (!is_null($this->query)) {
$result .= '?' . $this->query;
}
if (!is_null($this->fragment)) {
$result .= '#' . $this->fragment;
}
return $result;
}
/**
* Returns true if this URL might be considered a 'local' URL given
* the current context. This is true when the host is null, or
* when it matches the host supplied to the configuration.
*
* Note that this does not do any scheme checking, so it is mostly
* only appropriate for metadata that doesn't care about protocol
* security. isBenign is probably what you actually want.
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool
*/
public function isLocal($config, $context)
{
if ($this->host === null) {
return true;
}
$uri_def = $config->getDefinition('URI');
if ($uri_def->host === $this->host) {
return true;
}
return false;
}
/**
* Returns true if this URL should be considered a 'benign' URL,
* that is:
*
* - It is a local URL (isLocal), and
* - It has a equal or better level of security
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool
*/
public function isBenign($config, $context)
{
if (!$this->isLocal($config, $context)) {
return false;
}
$scheme_obj = $this->getSchemeObj($config, $context);
if (!$scheme_obj) {
return false;
} // conservative approach
$current_scheme_obj = $config->getDefinition('URI')->getDefaultScheme($config, $context);
if ($current_scheme_obj->secure) {
if (!$scheme_obj->secure) {
return false;
}
}
return true;
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/VarParserException.php 0000644 00000000235 15121423110 0017457 0 ustar 00 <?php
/**
* Exception type for HTMLPurifier_VarParser
*/
class HTMLPurifier_VarParserException extends HTMLPurifier_Exception
{
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/Context.php 0000644 00000005112 15121423110 0015316 0 ustar 00 <?php
/**
* Registry object that contains information about the current context.
* @warning Is a bit buggy when variables are set to null: it thinks
* they don't exist! So use false instead, please.
* @note Since the variables Context deals with may not be objects,
* references are very important here! Do not remove!
*/
class HTMLPurifier_Context
{
/**
* Private array that stores the references.
* @type array
*/
private $_storage = array();
/**
* Registers a variable into the context.
* @param string $name String name
* @param mixed $ref Reference to variable to be registered
*/
public function register($name, &$ref)
{
if (array_key_exists($name, $this->_storage)) {
trigger_error(
"Name $name produces collision, cannot re-register",
E_USER_ERROR
);
return;
}
$this->_storage[$name] =& $ref;
}
/**
* Retrieves a variable reference from the context.
* @param string $name String name
* @param bool $ignore_error Boolean whether or not to ignore error
* @return mixed
*/
public function &get($name, $ignore_error = false)
{
if (!array_key_exists($name, $this->_storage)) {
if (!$ignore_error) {
trigger_error(
"Attempted to retrieve non-existent variable $name",
E_USER_ERROR
);
}
$var = null; // so we can return by reference
return $var;
}
return $this->_storage[$name];
}
/**
* Destroys a variable in the context.
* @param string $name String name
*/
public function destroy($name)
{
if (!array_key_exists($name, $this->_storage)) {
trigger_error(
"Attempted to destroy non-existent variable $name",
E_USER_ERROR
);
return;
}
unset($this->_storage[$name]);
}
/**
* Checks whether or not the variable exists.
* @param string $name String name
* @return bool
*/
public function exists($name)
{
return array_key_exists($name, $this->_storage);
}
/**
* Loads a series of variables from an associative array
* @param array $context_array Assoc array of variables to load
*/
public function loadArray($context_array)
{
foreach ($context_array as $key => $discard) {
$this->register($key, $context_array[$key]);
}
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/Node.php 0000644 00000002400 15121423110 0014554 0 ustar 00 <?php
/**
* Abstract base node class that all others inherit from.
*
* Why do we not use the DOM extension? (1) It is not always available,
* (2) it has funny constraints on the data it can represent,
* whereas we want a maximally flexible representation, and (3) its
* interface is a bit cumbersome.
*/
abstract class HTMLPurifier_Node
{
/**
* Line number of the start token in the source document
* @type int
*/
public $line;
/**
* Column number of the start token in the source document. Null if unknown.
* @type int
*/
public $col;
/**
* Lookup array of processing that this token is exempt from.
* Currently, valid values are "ValidateAttributes".
* @type array
*/
public $armor = array();
/**
* When true, this node should be ignored as non-existent.
*
* Who is responsible for ignoring dead nodes? FixNesting is
* responsible for removing them before passing on to child
* validators.
*/
public $dead = false;
/**
* Returns a pair of start and end tokens, where the end token
* is null if it is not necessary. Does not include children.
* @type array
*/
abstract public function toTokenPair();
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/URIFilter.php 0000644 00000004475 15121423110 0015512 0 ustar 00 <?php
/**
* Chainable filters for custom URI processing.
*
* These filters can perform custom actions on a URI filter object,
* including transformation or blacklisting. A filter named Foo
* must have a corresponding configuration directive %URI.Foo,
* unless always_load is specified to be true.
*
* The following contexts may be available while URIFilters are being
* processed:
*
* - EmbeddedURI: true if URI is an embedded resource that will
* be loaded automatically on page load
* - CurrentToken: a reference to the token that is currently
* being processed
* - CurrentAttr: the name of the attribute that is currently being
* processed
* - CurrentCSSProperty: the name of the CSS property that is
* currently being processed (if applicable)
*
* @warning This filter is called before scheme object validation occurs.
* Make sure, if you require a specific scheme object, you
* you check that it exists. This allows filters to convert
* proprietary URI schemes into regular ones.
*/
abstract class HTMLPurifier_URIFilter
{
/**
* Unique identifier of filter.
* @type string
*/
public $name;
/**
* True if this filter should be run after scheme validation.
* @type bool
*/
public $post = false;
/**
* True if this filter should always be loaded.
* This permits a filter to be named Foo without the corresponding
* %URI.Foo directive existing.
* @type bool
*/
public $always_load = false;
/**
* Performs initialization for the filter. If the filter returns
* false, this means that it shouldn't be considered active.
* @param HTMLPurifier_Config $config
* @return bool
*/
public function prepare($config)
{
return true;
}
/**
* Filter a URI object
* @param HTMLPurifier_URI $uri Reference to URI object variable
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool Whether or not to continue processing: false indicates
* URL is no good, true indicates continue processing. Note that
* all changes are committed directly on the URI object
*/
abstract public function filter(&$uri, $config, $context);
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/IDAccumulator.php 0000644 00000003157 15121423110 0016375 0 ustar 00 <?php
/**
* Component of HTMLPurifier_AttrContext that accumulates IDs to prevent dupes
* @note In Slashdot-speak, dupe means duplicate.
* @note The default constructor does not accept $config or $context objects:
* use must use the static build() factory method to perform initialization.
*/
class HTMLPurifier_IDAccumulator
{
/**
* Lookup table of IDs we've accumulated.
* @public
*/
public $ids = array();
/**
* Builds an IDAccumulator, also initializing the default blacklist
* @param HTMLPurifier_Config $config Instance of HTMLPurifier_Config
* @param HTMLPurifier_Context $context Instance of HTMLPurifier_Context
* @return HTMLPurifier_IDAccumulator Fully initialized HTMLPurifier_IDAccumulator
*/
public static function build($config, $context)
{
$id_accumulator = new HTMLPurifier_IDAccumulator();
$id_accumulator->load($config->get('Attr.IDBlacklist'));
return $id_accumulator;
}
/**
* Add an ID to the lookup table.
* @param string $id ID to be added.
* @return bool status, true if success, false if there's a dupe
*/
public function add($id)
{
if (isset($this->ids[$id])) {
return false;
}
return $this->ids[$id] = true;
}
/**
* Load a list of IDs into the lookup table
* @param $array_of_ids Array of IDs to load
* @note This function doesn't care about duplicates
*/
public function load($array_of_ids)
{
foreach ($array_of_ids as $id) {
$this->ids[$id] = true;
}
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/URIFilter/DisableExternal.php 0000644 00000002404 15121423110 0020546 0 ustar 00 <?php
class HTMLPurifier_URIFilter_DisableExternal extends HTMLPurifier_URIFilter
{
/**
* @type string
*/
public $name = 'DisableExternal';
/**
* @type array
*/
protected $ourHostParts = false;
/**
* @param HTMLPurifier_Config $config
* @return void
*/
public function prepare($config)
{
$our_host = $config->getDefinition('URI')->host;
if ($our_host !== null) {
$this->ourHostParts = array_reverse(explode('.', $our_host));
}
}
/**
* @param HTMLPurifier_URI $uri Reference
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool
*/
public function filter(&$uri, $config, $context)
{
if (is_null($uri->host)) {
return true;
}
if ($this->ourHostParts === false) {
return false;
}
$host_parts = array_reverse(explode('.', $uri->host));
foreach ($this->ourHostParts as $i => $x) {
if (!isset($host_parts[$i])) {
return false;
}
if ($host_parts[$i] != $this->ourHostParts[$i]) {
return false;
}
}
return true;
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/URIFilter/SafeIframe.php 0000644 00000003262 15121423110 0017505 0 ustar 00 <?php
/**
* Implements safety checks for safe iframes.
*
* @warning This filter is *critical* for ensuring that %HTML.SafeIframe
* works safely.
*/
class HTMLPurifier_URIFilter_SafeIframe extends HTMLPurifier_URIFilter
{
/**
* @type string
*/
public $name = 'SafeIframe';
/**
* @type bool
*/
public $always_load = true;
/**
* @type string
*/
protected $regexp = null;
// XXX: The not so good bit about how this is all set up now is we
// can't check HTML.SafeIframe in the 'prepare' step: we have to
// defer till the actual filtering.
/**
* @param HTMLPurifier_Config $config
* @return bool
*/
public function prepare($config)
{
$this->regexp = $config->get('URI.SafeIframeRegexp');
return true;
}
/**
* @param HTMLPurifier_URI $uri
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool
*/
public function filter(&$uri, $config, $context)
{
// check if filter not applicable
if (!$config->get('HTML.SafeIframe')) {
return true;
}
// check if the filter should actually trigger
if (!$context->get('EmbeddedURI', true)) {
return true;
}
$token = $context->get('CurrentToken', true);
if (!($token && $token->name == 'iframe')) {
return true;
}
// check if we actually have some whitelists enabled
if ($this->regexp === null) {
return false;
}
// actually check the whitelists
return preg_match($this->regexp, $uri->toString());
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/URIFilter/Munge.php 0000644 00000006103 15121423110 0016553 0 ustar 00 <?php
class HTMLPurifier_URIFilter_Munge extends HTMLPurifier_URIFilter
{
/**
* @type string
*/
public $name = 'Munge';
/**
* @type bool
*/
public $post = true;
/**
* @type string
*/
private $target;
/**
* @type HTMLPurifier_URIParser
*/
private $parser;
/**
* @type bool
*/
private $doEmbed;
/**
* @type string
*/
private $secretKey;
/**
* @type array
*/
protected $replace = array();
/**
* @param HTMLPurifier_Config $config
* @return bool
*/
public function prepare($config)
{
$this->target = $config->get('URI.' . $this->name);
$this->parser = new HTMLPurifier_URIParser();
$this->doEmbed = $config->get('URI.MungeResources');
$this->secretKey = $config->get('URI.MungeSecretKey');
if ($this->secretKey && !function_exists('hash_hmac')) {
throw new Exception("Cannot use %URI.MungeSecretKey without hash_hmac support.");
}
return true;
}
/**
* @param HTMLPurifier_URI $uri
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool
*/
public function filter(&$uri, $config, $context)
{
if ($context->get('EmbeddedURI', true) && !$this->doEmbed) {
return true;
}
$scheme_obj = $uri->getSchemeObj($config, $context);
if (!$scheme_obj) {
return true;
} // ignore unknown schemes, maybe another postfilter did it
if (!$scheme_obj->browsable) {
return true;
} // ignore non-browseable schemes, since we can't munge those in a reasonable way
if ($uri->isBenign($config, $context)) {
return true;
} // don't redirect if a benign URL
$this->makeReplace($uri, $config, $context);
$this->replace = array_map('rawurlencode', $this->replace);
$new_uri = strtr($this->target, $this->replace);
$new_uri = $this->parser->parse($new_uri);
// don't redirect if the target host is the same as the
// starting host
if ($uri->host === $new_uri->host) {
return true;
}
$uri = $new_uri; // overwrite
return true;
}
/**
* @param HTMLPurifier_URI $uri
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
*/
protected function makeReplace($uri, $config, $context)
{
$string = $uri->toString();
// always available
$this->replace['%s'] = $string;
$this->replace['%r'] = $context->get('EmbeddedURI', true);
$token = $context->get('CurrentToken', true);
$this->replace['%n'] = $token ? $token->name : null;
$this->replace['%m'] = $context->get('CurrentAttr', true);
$this->replace['%p'] = $context->get('CurrentCSSProperty', true);
// not always available
if ($this->secretKey) {
$this->replace['%t'] = hash_hmac("sha256", $string, $this->secretKey);
}
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/URIFilter/MakeAbsolute.php 0000644 00000011541 15121423110 0020056 0 ustar 00 <?php
// does not support network paths
class HTMLPurifier_URIFilter_MakeAbsolute extends HTMLPurifier_URIFilter
{
/**
* @type string
*/
public $name = 'MakeAbsolute';
/**
* @type
*/
protected $base;
/**
* @type array
*/
protected $basePathStack = array();
/**
* @param HTMLPurifier_Config $config
* @return bool
*/
public function prepare($config)
{
$def = $config->getDefinition('URI');
$this->base = $def->base;
if (is_null($this->base)) {
trigger_error(
'URI.MakeAbsolute is being ignored due to lack of ' .
'value for URI.Base configuration',
E_USER_WARNING
);
return false;
}
$this->base->fragment = null; // fragment is invalid for base URI
$stack = explode('/', $this->base->path);
array_pop($stack); // discard last segment
$stack = $this->_collapseStack($stack); // do pre-parsing
$this->basePathStack = $stack;
return true;
}
/**
* @param HTMLPurifier_URI $uri
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool
*/
public function filter(&$uri, $config, $context)
{
if (is_null($this->base)) {
return true;
} // abort early
if ($uri->path === '' && is_null($uri->scheme) &&
is_null($uri->host) && is_null($uri->query) && is_null($uri->fragment)) {
// reference to current document
$uri = clone $this->base;
return true;
}
if (!is_null($uri->scheme)) {
// absolute URI already: don't change
if (!is_null($uri->host)) {
return true;
}
$scheme_obj = $uri->getSchemeObj($config, $context);
if (!$scheme_obj) {
// scheme not recognized
return false;
}
if (!$scheme_obj->hierarchical) {
// non-hierarchal URI with explicit scheme, don't change
return true;
}
// special case: had a scheme but always is hierarchical and had no authority
}
if (!is_null($uri->host)) {
// network path, don't bother
return true;
}
if ($uri->path === '') {
$uri->path = $this->base->path;
} elseif ($uri->path[0] !== '/') {
// relative path, needs more complicated processing
$stack = explode('/', $uri->path);
$new_stack = array_merge($this->basePathStack, $stack);
if ($new_stack[0] !== '' && !is_null($this->base->host)) {
array_unshift($new_stack, '');
}
$new_stack = $this->_collapseStack($new_stack);
$uri->path = implode('/', $new_stack);
} else {
// absolute path, but still we should collapse
$uri->path = implode('/', $this->_collapseStack(explode('/', $uri->path)));
}
// re-combine
$uri->scheme = $this->base->scheme;
if (is_null($uri->userinfo)) {
$uri->userinfo = $this->base->userinfo;
}
if (is_null($uri->host)) {
$uri->host = $this->base->host;
}
if (is_null($uri->port)) {
$uri->port = $this->base->port;
}
return true;
}
/**
* Resolve dots and double-dots in a path stack
* @param array $stack
* @return array
*/
private function _collapseStack($stack)
{
$result = array();
$is_folder = false;
for ($i = 0; isset($stack[$i]); $i++) {
$is_folder = false;
// absorb an internally duplicated slash
if ($stack[$i] == '' && $i && isset($stack[$i + 1])) {
continue;
}
if ($stack[$i] == '..') {
if (!empty($result)) {
$segment = array_pop($result);
if ($segment === '' && empty($result)) {
// error case: attempted to back out too far:
// restore the leading slash
$result[] = '';
} elseif ($segment === '..') {
$result[] = '..'; // cannot remove .. with ..
}
} else {
// relative path, preserve the double-dots
$result[] = '..';
}
$is_folder = true;
continue;
}
if ($stack[$i] == '.') {
// silently absorb
$is_folder = true;
continue;
}
$result[] = $stack[$i];
}
if ($is_folder) {
$result[] = '';
}
return $result;
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/URIFilter/DisableResources.php 0000644 00000000716 15121423110 0020742 0 ustar 00 <?php
class HTMLPurifier_URIFilter_DisableResources extends HTMLPurifier_URIFilter
{
/**
* @type string
*/
public $name = 'DisableResources';
/**
* @param HTMLPurifier_URI $uri
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool
*/
public function filter(&$uri, $config, $context)
{
return !$context->get('EmbeddedURI', true);
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/URIFilter/DisableExternalResources.php 0000644 00000001110 15121423110 0022432 0 ustar 00 <?php
class HTMLPurifier_URIFilter_DisableExternalResources extends HTMLPurifier_URIFilter_DisableExternal
{
/**
* @type string
*/
public $name = 'DisableExternalResources';
/**
* @param HTMLPurifier_URI $uri
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool
*/
public function filter(&$uri, $config, $context)
{
if (!$context->get('EmbeddedURI', true)) {
return true;
}
return parent::filter($uri, $config, $context);
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/URIFilter/HostBlacklist.php 0000644 00000002200 15121423110 0020240 0 ustar 00 <?php
// It's not clear to me whether or not Punycode means that hostnames
// do not have canonical forms anymore. As far as I can tell, it's
// not a problem (punycoding should be identity when no Unicode
// points are involved), but I'm not 100% sure
class HTMLPurifier_URIFilter_HostBlacklist extends HTMLPurifier_URIFilter
{
/**
* @type string
*/
public $name = 'HostBlacklist';
/**
* @type array
*/
protected $blacklist = array();
/**
* @param HTMLPurifier_Config $config
* @return bool
*/
public function prepare($config)
{
$this->blacklist = $config->get('URI.HostBlacklist');
return true;
}
/**
* @param HTMLPurifier_URI $uri
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool
*/
public function filter(&$uri, $config, $context)
{
foreach ($this->blacklist as $blacklisted_host_fragment) {
if (strpos($uri->host, $blacklisted_host_fragment) !== false) {
return false;
}
}
return true;
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/TagTransform.php 0000644 00000002112 15121423110 0016276 0 ustar 00 <?php
/**
* Defines a mutation of an obsolete tag into a valid tag.
*/
abstract class HTMLPurifier_TagTransform
{
/**
* Tag name to transform the tag to.
* @type string
*/
public $transform_to;
/**
* Transforms the obsolete tag into the valid tag.
* @param HTMLPurifier_Token_Tag $tag Tag to be transformed.
* @param HTMLPurifier_Config $config Mandatory HTMLPurifier_Config object
* @param HTMLPurifier_Context $context Mandatory HTMLPurifier_Context object
*/
abstract public function transform($tag, $config, $context);
/**
* Prepends CSS properties to the style attribute, creating the
* attribute if it doesn't exist.
* @warning Copied over from AttrTransform, be sure to keep in sync
* @param array $attr Attribute array to process (passed by reference)
* @param string $css CSS to prepend
*/
protected function prependCSS(&$attr, $css)
{
$attr['style'] = isset($attr['style']) ? $attr['style'] : '';
$attr['style'] = $css . $attr['style'];
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef.php 0000644 00000012113 15121423110 0015222 0 ustar 00 <?php
/**
* Base class for all validating attribute definitions.
*
* This family of classes forms the core for not only HTML attribute validation,
* but also any sort of string that needs to be validated or cleaned (which
* means CSS properties and composite definitions are defined here too).
* Besides defining (through code) what precisely makes the string valid,
* subclasses are also responsible for cleaning the code if possible.
*/
abstract class HTMLPurifier_AttrDef
{
/**
* Tells us whether or not an HTML attribute is minimized.
* Has no meaning in other contexts.
* @type bool
*/
public $minimized = false;
/**
* Tells us whether or not an HTML attribute is required.
* Has no meaning in other contexts
* @type bool
*/
public $required = false;
/**
* Validates and cleans passed string according to a definition.
*
* @param string $string String to be validated and cleaned.
* @param HTMLPurifier_Config $config Mandatory HTMLPurifier_Config object.
* @param HTMLPurifier_Context $context Mandatory HTMLPurifier_Context object.
*/
abstract public function validate($string, $config, $context);
/**
* Convenience method that parses a string as if it were CDATA.
*
* This method process a string in the manner specified at
* <http://www.w3.org/TR/html4/types.html#h-6.2> by removing
* leading and trailing whitespace, ignoring line feeds, and replacing
* carriage returns and tabs with spaces. While most useful for HTML
* attributes specified as CDATA, it can also be applied to most CSS
* values.
*
* @note This method is not entirely standards compliant, as trim() removes
* more types of whitespace than specified in the spec. In practice,
* this is rarely a problem, as those extra characters usually have
* already been removed by HTMLPurifier_Encoder.
*
* @warning This processing is inconsistent with XML's whitespace handling
* as specified by section 3.3.3 and referenced XHTML 1.0 section
* 4.7. However, note that we are NOT necessarily
* parsing XML, thus, this behavior may still be correct. We
* assume that newlines have been normalized.
*/
public function parseCDATA($string)
{
$string = trim($string);
$string = str_replace(array("\n", "\t", "\r"), ' ', $string);
return $string;
}
/**
* Factory method for creating this class from a string.
* @param string $string String construction info
* @return HTMLPurifier_AttrDef Created AttrDef object corresponding to $string
*/
public function make($string)
{
// default implementation, return a flyweight of this object.
// If $string has an effect on the returned object (i.e. you
// need to overload this method), it is best
// to clone or instantiate new copies. (Instantiation is safer.)
return $this;
}
/**
* Removes spaces from rgb(0, 0, 0) so that shorthand CSS properties work
* properly. THIS IS A HACK!
* @param string $string a CSS colour definition
* @return string
*/
protected function mungeRgb($string)
{
$p = '\s*(\d+(\.\d+)?([%]?))\s*';
if (preg_match('/(rgba|hsla)\(/', $string)) {
return preg_replace('/(rgba|hsla)\('.$p.','.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8,\11)', $string);
}
return preg_replace('/(rgb|hsl)\('.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8)', $string);
}
/**
* Parses a possibly escaped CSS string and returns the "pure"
* version of it.
*/
protected function expandCSSEscape($string)
{
// flexibly parse it
$ret = '';
for ($i = 0, $c = strlen($string); $i < $c; $i++) {
if ($string[$i] === '\\') {
$i++;
if ($i >= $c) {
$ret .= '\\';
break;
}
if (ctype_xdigit($string[$i])) {
$code = $string[$i];
for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
if (!ctype_xdigit($string[$i])) {
break;
}
$code .= $string[$i];
}
// We have to be extremely careful when adding
// new characters, to make sure we're not breaking
// the encoding.
$char = HTMLPurifier_Encoder::unichr(hexdec($code));
if (HTMLPurifier_Encoder::cleanUTF8($char) === '') {
continue;
}
$ret .= $char;
if ($i < $c && trim($string[$i]) !== '') {
$i--;
}
continue;
}
if ($string[$i] === "\n") {
continue;
}
}
$ret .= $string[$i];
}
return $ret;
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/Injector/Linkify.php 0000644 00000003753 15121423110 0017065 0 ustar 00 <?php
/**
* Injector that converts http, https and ftp text URLs to actual links.
*/
class HTMLPurifier_Injector_Linkify extends HTMLPurifier_Injector
{
/**
* @type string
*/
public $name = 'Linkify';
/**
* @type array
*/
public $needed = array('a' => array('href'));
/**
* @param HTMLPurifier_Token $token
*/
public function handleText(&$token)
{
if (!$this->allowsElement('a')) {
return;
}
if (strpos($token->data, '://') === false) {
// our really quick heuristic failed, abort
// this may not work so well if we want to match things like
// "google.com", but then again, most people don't
return;
}
// there is/are URL(s). Let's split the string.
// We use this regex:
// https://gist.github.com/gruber/249502
// but with @cscott's backtracking fix and also
// the Unicode characters un-Unicodified.
$bits = preg_split(
'/\\b((?:[a-z][\\w\\-]+:(?:\\/{1,3}|[a-z0-9%])|www\\d{0,3}[.]|[a-z0-9.\\-]+[.][a-z]{2,4}\\/)(?:[^\\s()<>]|\\((?:[^\\s()<>]|(?:\\([^\\s()<>]+\\)))*\\))+(?:\\((?:[^\\s()<>]|(?:\\([^\\s()<>]+\\)))*\\)|[^\\s`!()\\[\\]{};:\'".,<>?\x{00ab}\x{00bb}\x{201c}\x{201d}\x{2018}\x{2019}]))/iu',
$token->data, -1, PREG_SPLIT_DELIM_CAPTURE);
$token = array();
// $i = index
// $c = count
// $l = is link
for ($i = 0, $c = count($bits), $l = false; $i < $c; $i++, $l = !$l) {
if (!$l) {
if ($bits[$i] === '') {
continue;
}
$token[] = new HTMLPurifier_Token_Text($bits[$i]);
} else {
$token[] = new HTMLPurifier_Token_Start('a', array('href' => $bits[$i]));
$token[] = new HTMLPurifier_Token_Text($bits[$i]);
$token[] = new HTMLPurifier_Token_End('a');
}
}
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/Injector/AutoParagraph.php 0000644 00000032744 15121423110 0020220 0 ustar 00 <?php
/**
* Injector that auto paragraphs text in the root node based on
* double-spacing.
* @todo Ensure all states are unit tested, including variations as well.
* @todo Make a graph of the flow control for this Injector.
*/
class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
{
/**
* @type string
*/
public $name = 'AutoParagraph';
/**
* @type array
*/
public $needed = array('p');
/**
* @return HTMLPurifier_Token_Start
*/
private function _pStart()
{
$par = new HTMLPurifier_Token_Start('p');
$par->armor['MakeWellFormed_TagClosedError'] = true;
return $par;
}
/**
* @param HTMLPurifier_Token_Text $token
*/
public function handleText(&$token)
{
$text = $token->data;
// Does the current parent allow <p> tags?
if ($this->allowsElement('p')) {
if (empty($this->currentNesting) || strpos($text, "\n\n") !== false) {
// Note that we have differing behavior when dealing with text
// in the anonymous root node, or a node inside the document.
// If the text as a double-newline, the treatment is the same;
// if it doesn't, see the next if-block if you're in the document.
$i = $nesting = null;
if (!$this->forwardUntilEndToken($i, $current, $nesting) && $token->is_whitespace) {
// State 1.1: ... ^ (whitespace, then document end)
// ----
// This is a degenerate case
} else {
if (!$token->is_whitespace || $this->_isInline($current)) {
// State 1.2: PAR1
// ----
// State 1.3: PAR1\n\nPAR2
// ------------
// State 1.4: <div>PAR1\n\nPAR2 (see State 2)
// ------------
$token = array($this->_pStart());
$this->_splitText($text, $token);
} else {
// State 1.5: \n<hr />
// --
}
}
} else {
// State 2: <div>PAR1... (similar to 1.4)
// ----
// We're in an element that allows paragraph tags, but we're not
// sure if we're going to need them.
if ($this->_pLookAhead()) {
// State 2.1: <div>PAR1<b>PAR1\n\nPAR2
// ----
// Note: This will always be the first child, since any
// previous inline element would have triggered this very
// same routine, and found the double newline. One possible
// exception would be a comment.
$token = array($this->_pStart(), $token);
} else {
// State 2.2.1: <div>PAR1<div>
// ----
// State 2.2.2: <div>PAR1<b>PAR1</b></div>
// ----
}
}
// Is the current parent a <p> tag?
} elseif (!empty($this->currentNesting) &&
$this->currentNesting[count($this->currentNesting) - 1]->name == 'p') {
// State 3.1: ...<p>PAR1
// ----
// State 3.2: ...<p>PAR1\n\nPAR2
// ------------
$token = array();
$this->_splitText($text, $token);
// Abort!
} else {
// State 4.1: ...<b>PAR1
// ----
// State 4.2: ...<b>PAR1\n\nPAR2
// ------------
}
}
/**
* @param HTMLPurifier_Token $token
*/
public function handleElement(&$token)
{
// We don't have to check if we're already in a <p> tag for block
// tokens, because the tag would have been autoclosed by MakeWellFormed.
if ($this->allowsElement('p')) {
if (!empty($this->currentNesting)) {
if ($this->_isInline($token)) {
// State 1: <div>...<b>
// ---
// Check if this token is adjacent to the parent token
// (seek backwards until token isn't whitespace)
$i = null;
$this->backward($i, $prev);
if (!$prev instanceof HTMLPurifier_Token_Start) {
// Token wasn't adjacent
if ($prev instanceof HTMLPurifier_Token_Text &&
substr($prev->data, -2) === "\n\n"
) {
// State 1.1.4: <div><p>PAR1</p>\n\n<b>
// ---
// Quite frankly, this should be handled by splitText
$token = array($this->_pStart(), $token);
} else {
// State 1.1.1: <div><p>PAR1</p><b>
// ---
// State 1.1.2: <div><br /><b>
// ---
// State 1.1.3: <div>PAR<b>
// ---
}
} else {
// State 1.2.1: <div><b>
// ---
// Lookahead to see if <p> is needed.
if ($this->_pLookAhead()) {
// State 1.3.1: <div><b>PAR1\n\nPAR2
// ---
$token = array($this->_pStart(), $token);
} else {
// State 1.3.2: <div><b>PAR1</b></div>
// ---
// State 1.3.3: <div><b>PAR1</b><div></div>\n\n</div>
// ---
}
}
} else {
// State 2.3: ...<div>
// -----
}
} else {
if ($this->_isInline($token)) {
// State 3.1: <b>
// ---
// This is where the {p} tag is inserted, not reflected in
// inputTokens yet, however.
$token = array($this->_pStart(), $token);
} else {
// State 3.2: <div>
// -----
}
$i = null;
if ($this->backward($i, $prev)) {
if (!$prev instanceof HTMLPurifier_Token_Text) {
// State 3.1.1: ...</p>{p}<b>
// ---
// State 3.2.1: ...</p><div>
// -----
if (!is_array($token)) {
$token = array($token);
}
array_unshift($token, new HTMLPurifier_Token_Text("\n\n"));
} else {
// State 3.1.2: ...</p>\n\n{p}<b>
// ---
// State 3.2.2: ...</p>\n\n<div>
// -----
// Note: PAR<ELEM> cannot occur because PAR would have been
// wrapped in <p> tags.
}
}
}
} else {
// State 2.2: <ul><li>
// ----
// State 2.4: <p><b>
// ---
}
}
/**
* Splits up a text in paragraph tokens and appends them
* to the result stream that will replace the original
* @param string $data String text data that will be processed
* into paragraphs
* @param HTMLPurifier_Token[] $result Reference to array of tokens that the
* tags will be appended onto
*/
private function _splitText($data, &$result)
{
$raw_paragraphs = explode("\n\n", $data);
$paragraphs = array(); // without empty paragraphs
$needs_start = false;
$needs_end = false;
$c = count($raw_paragraphs);
if ($c == 1) {
// There were no double-newlines, abort quickly. In theory this
// should never happen.
$result[] = new HTMLPurifier_Token_Text($data);
return;
}
for ($i = 0; $i < $c; $i++) {
$par = $raw_paragraphs[$i];
if (trim($par) !== '') {
$paragraphs[] = $par;
} else {
if ($i == 0) {
// Double newline at the front
if (empty($result)) {
// The empty result indicates that the AutoParagraph
// injector did not add any start paragraph tokens.
// This means that we have been in a paragraph for
// a while, and the newline means we should start a new one.
$result[] = new HTMLPurifier_Token_End('p');
$result[] = new HTMLPurifier_Token_Text("\n\n");
// However, the start token should only be added if
// there is more processing to be done (i.e. there are
// real paragraphs in here). If there are none, the
// next start paragraph tag will be handled by the
// next call to the injector
$needs_start = true;
} else {
// We just started a new paragraph!
// Reinstate a double-newline for presentation's sake, since
// it was in the source code.
array_unshift($result, new HTMLPurifier_Token_Text("\n\n"));
}
} elseif ($i + 1 == $c) {
// Double newline at the end
// There should be a trailing </p> when we're finally done.
$needs_end = true;
}
}
}
// Check if this was just a giant blob of whitespace. Move this earlier,
// perhaps?
if (empty($paragraphs)) {
return;
}
// Add the start tag indicated by \n\n at the beginning of $data
if ($needs_start) {
$result[] = $this->_pStart();
}
// Append the paragraphs onto the result
foreach ($paragraphs as $par) {
$result[] = new HTMLPurifier_Token_Text($par);
$result[] = new HTMLPurifier_Token_End('p');
$result[] = new HTMLPurifier_Token_Text("\n\n");
$result[] = $this->_pStart();
}
// Remove trailing start token; Injector will handle this later if
// it was indeed needed. This prevents from needing to do a lookahead,
// at the cost of a lookbehind later.
array_pop($result);
// If there is no need for an end tag, remove all of it and let
// MakeWellFormed close it later.
if (!$needs_end) {
array_pop($result); // removes \n\n
array_pop($result); // removes </p>
}
}
/**
* Returns true if passed token is inline (and, ergo, allowed in
* paragraph tags)
* @param HTMLPurifier_Token $token
* @return bool
*/
private function _isInline($token)
{
return isset($this->htmlDefinition->info['p']->child->elements[$token->name]);
}
/**
* Looks ahead in the token list and determines whether or not we need
* to insert a <p> tag.
* @return bool
*/
private function _pLookAhead()
{
if ($this->currentToken instanceof HTMLPurifier_Token_Start) {
$nesting = 1;
} else {
$nesting = 0;
}
$ok = false;
$i = null;
while ($this->forwardUntilEndToken($i, $current, $nesting)) {
$result = $this->_checkNeedsP($current);
if ($result !== null) {
$ok = $result;
break;
}
}
return $ok;
}
/**
* Determines if a particular token requires an earlier inline token
* to get a paragraph. This should be used with _forwardUntilEndToken
* @param HTMLPurifier_Token $current
* @return bool
*/
private function _checkNeedsP($current)
{
if ($current instanceof HTMLPurifier_Token_Start) {
if (!$this->_isInline($current)) {
// <div>PAR1<div>
// ----
// Terminate early, since we hit a block element
return false;
}
} elseif ($current instanceof HTMLPurifier_Token_Text) {
if (strpos($current->data, "\n\n") !== false) {
// <div>PAR1<b>PAR1\n\nPAR2
// ----
return true;
} else {
// <div>PAR1<b>PAR1...
// ----
}
}
return null;
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/Injector/PurifierLinkify.php 0000644 00000003423 15121423110 0020565 0 ustar 00 <?php
/**
* Injector that converts configuration directive syntax %Namespace.Directive
* to links
*/
class HTMLPurifier_Injector_PurifierLinkify extends HTMLPurifier_Injector
{
/**
* @type string
*/
public $name = 'PurifierLinkify';
/**
* @type string
*/
public $docURL;
/**
* @type array
*/
public $needed = array('a' => array('href'));
/**
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return string
*/
public function prepare($config, $context)
{
$this->docURL = $config->get('AutoFormat.PurifierLinkify.DocURL');
return parent::prepare($config, $context);
}
/**
* @param HTMLPurifier_Token $token
*/
public function handleText(&$token)
{
if (!$this->allowsElement('a')) {
return;
}
if (strpos($token->data, '%') === false) {
return;
}
$bits = preg_split('#%([a-z0-9]+\.[a-z0-9]+)#Si', $token->data, -1, PREG_SPLIT_DELIM_CAPTURE);
$token = array();
// $i = index
// $c = count
// $l = is link
for ($i = 0, $c = count($bits), $l = false; $i < $c; $i++, $l = !$l) {
if (!$l) {
if ($bits[$i] === '') {
continue;
}
$token[] = new HTMLPurifier_Token_Text($bits[$i]);
} else {
$token[] = new HTMLPurifier_Token_Start(
'a',
array('href' => str_replace('%s', $bits[$i], $this->docURL))
);
$token[] = new HTMLPurifier_Token_Text('%' . $bits[$i]);
$token[] = new HTMLPurifier_Token_End('a');
}
}
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/Injector/DisplayLinkURI.php 0000644 00000001533 15121423110 0020255 0 ustar 00 <?php
/**
* Injector that displays the URL of an anchor instead of linking to it, in addition to showing the text of the link.
*/
class HTMLPurifier_Injector_DisplayLinkURI extends HTMLPurifier_Injector
{
/**
* @type string
*/
public $name = 'DisplayLinkURI';
/**
* @type array
*/
public $needed = array('a');
/**
* @param $token
*/
public function handleElement(&$token)
{
}
/**
* @param HTMLPurifier_Token $token
*/
public function handleEnd(&$token)
{
if (isset($token->start->attr['href'])) {
$url = $token->start->attr['href'];
unset($token->start->attr['href']);
$token = array($token, new HTMLPurifier_Token_Text(" ($url)"));
} else {
// nothing to display
}
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/Injector/SafeObject.php 0000644 00000007557 15121423110 0017473 0 ustar 00 <?php
/**
* Adds important param elements to inside of object in order to make
* things safe.
*/
class HTMLPurifier_Injector_SafeObject extends HTMLPurifier_Injector
{
/**
* @type string
*/
public $name = 'SafeObject';
/**
* @type array
*/
public $needed = array('object', 'param');
/**
* @type array
*/
protected $objectStack = array();
/**
* @type array
*/
protected $paramStack = array();
/**
* Keep this synchronized with AttrTransform/SafeParam.php.
* @type array
*/
protected $addParam = array(
'allowScriptAccess' => 'never',
'allowNetworking' => 'internal',
);
/**
* These are all lower-case keys.
* @type array
*/
protected $allowedParam = array(
'wmode' => true,
'movie' => true,
'flashvars' => true,
'src' => true,
'allowfullscreen' => true, // if omitted, assume to be 'false'
);
/**
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return void
*/
public function prepare($config, $context)
{
parent::prepare($config, $context);
}
/**
* @param HTMLPurifier_Token $token
*/
public function handleElement(&$token)
{
if ($token->name == 'object') {
$this->objectStack[] = $token;
$this->paramStack[] = array();
$new = array($token);
foreach ($this->addParam as $name => $value) {
$new[] = new HTMLPurifier_Token_Empty('param', array('name' => $name, 'value' => $value));
}
$token = $new;
} elseif ($token->name == 'param') {
$nest = count($this->currentNesting) - 1;
if ($nest >= 0 && $this->currentNesting[$nest]->name === 'object') {
$i = count($this->objectStack) - 1;
if (!isset($token->attr['name'])) {
$token = false;
return;
}
$n = $token->attr['name'];
// We need this fix because YouTube doesn't supply a data
// attribute, which we need if a type is specified. This is
// *very* Flash specific.
if (!isset($this->objectStack[$i]->attr['data']) &&
($token->attr['name'] == 'movie' || $token->attr['name'] == 'src')
) {
$this->objectStack[$i]->attr['data'] = $token->attr['value'];
}
// Check if the parameter is the correct value but has not
// already been added
if (!isset($this->paramStack[$i][$n]) &&
isset($this->addParam[$n]) &&
$token->attr['name'] === $this->addParam[$n]) {
// keep token, and add to param stack
$this->paramStack[$i][$n] = true;
} elseif (isset($this->allowedParam[strtolower($n)])) {
// keep token, don't do anything to it
// (could possibly check for duplicates here)
// Note: In principle, parameters should be case sensitive.
// But it seems they are not really; so accept any case.
} else {
$token = false;
}
} else {
// not directly inside an object, DENY!
$token = false;
}
}
}
public function handleEnd(&$token)
{
// This is the WRONG way of handling the object and param stacks;
// we should be inserting them directly on the relevant object tokens
// so that the global stack handling handles it.
if ($token->name == 'object') {
array_pop($this->objectStack);
array_pop($this->paramStack);
}
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php 0000644 00000003746 15121423110 0023357 0 ustar 00 <?php
/**
* Injector that removes spans with no attributes
*/
class HTMLPurifier_Injector_RemoveSpansWithoutAttributes extends HTMLPurifier_Injector
{
/**
* @type string
*/
public $name = 'RemoveSpansWithoutAttributes';
/**
* @type array
*/
public $needed = array('span');
/**
* @type HTMLPurifier_AttrValidator
*/
private $attrValidator;
/**
* Used by AttrValidator.
* @type HTMLPurifier_Config
*/
private $config;
/**
* @type HTMLPurifier_Context
*/
private $context;
public function prepare($config, $context)
{
$this->attrValidator = new HTMLPurifier_AttrValidator();
$this->config = $config;
$this->context = $context;
return parent::prepare($config, $context);
}
/**
* @param HTMLPurifier_Token $token
*/
public function handleElement(&$token)
{
if ($token->name !== 'span' || !$token instanceof HTMLPurifier_Token_Start) {
return;
}
// We need to validate the attributes now since this doesn't normally
// happen until after MakeWellFormed. If all the attributes are removed
// the span needs to be removed too.
$this->attrValidator->validateToken($token, $this->config, $this->context);
$token->armor['ValidateAttributes'] = true;
if (!empty($token->attr)) {
return;
}
$nesting = 0;
while ($this->forwardUntilEndToken($i, $current, $nesting)) {
}
if ($current instanceof HTMLPurifier_Token_End && $current->name === 'span') {
// Mark closing span tag for deletion
$current->markForDeletion = true;
// Delete open span tag
$token = false;
}
}
/**
* @param HTMLPurifier_Token $token
*/
public function handleEnd(&$token)
{
if ($token->markForDeletion) {
$token = false;
}
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/Injector/RemoveEmpty.php 0000644 00000006664 15121423110 0017740 0 ustar 00 <?php
class HTMLPurifier_Injector_RemoveEmpty extends HTMLPurifier_Injector
{
/**
* @type HTMLPurifier_Context
*/
private $context;
/**
* @type HTMLPurifier_Config
*/
private $config;
/**
* @type HTMLPurifier_AttrValidator
*/
private $attrValidator;
/**
* @type bool
*/
private $removeNbsp;
/**
* @type bool
*/
private $removeNbspExceptions;
/**
* Cached contents of %AutoFormat.RemoveEmpty.Predicate
* @type array
*/
private $exclude;
/**
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return void
*/
public function prepare($config, $context)
{
parent::prepare($config, $context);
$this->config = $config;
$this->context = $context;
$this->removeNbsp = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp');
$this->removeNbspExceptions = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions');
$this->exclude = $config->get('AutoFormat.RemoveEmpty.Predicate');
foreach ($this->exclude as $key => $attrs) {
if (!is_array($attrs)) {
// HACK, see HTMLPurifier/Printer/ConfigForm.php
$this->exclude[$key] = explode(';', $attrs);
}
}
$this->attrValidator = new HTMLPurifier_AttrValidator();
}
/**
* @param HTMLPurifier_Token $token
*/
public function handleElement(&$token)
{
if (!$token instanceof HTMLPurifier_Token_Start) {
return;
}
$next = false;
$deleted = 1; // the current tag
for ($i = count($this->inputZipper->back) - 1; $i >= 0; $i--, $deleted++) {
$next = $this->inputZipper->back[$i];
if ($next instanceof HTMLPurifier_Token_Text) {
if ($next->is_whitespace) {
continue;
}
if ($this->removeNbsp && !isset($this->removeNbspExceptions[$token->name])) {
$plain = str_replace("\xC2\xA0", "", $next->data);
$isWsOrNbsp = $plain === '' || ctype_space($plain);
if ($isWsOrNbsp) {
continue;
}
}
}
break;
}
if (!$next || ($next instanceof HTMLPurifier_Token_End && $next->name == $token->name)) {
$this->attrValidator->validateToken($token, $this->config, $this->context);
$token->armor['ValidateAttributes'] = true;
if (isset($this->exclude[$token->name])) {
$r = true;
foreach ($this->exclude[$token->name] as $elem) {
if (!isset($token->attr[$elem])) $r = false;
}
if ($r) return;
}
if (isset($token->attr['id']) || isset($token->attr['name'])) {
return;
}
$token = $deleted + 1;
for ($b = 0, $c = count($this->inputZipper->front); $b < $c; $b++) {
$prev = $this->inputZipper->front[$b];
if ($prev instanceof HTMLPurifier_Token_Text && $prev->is_whitespace) {
continue;
}
break;
}
// This is safe because we removed the token that triggered this.
$this->rewindOffset($b+$deleted);
return;
}
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/Node/Element.php 0000644 00000003275 15121423110 0016160 0 ustar 00 <?php
/**
* Concrete element node class.
*/
class HTMLPurifier_Node_Element extends HTMLPurifier_Node
{
/**
* The lower-case name of the tag, like 'a', 'b' or 'blockquote'.
*
* @note Strictly speaking, XML tags are case sensitive, so we shouldn't
* be lower-casing them, but these tokens cater to HTML tags, which are
* insensitive.
* @type string
*/
public $name;
/**
* Associative array of the node's attributes.
* @type array
*/
public $attr = array();
/**
* List of child elements.
* @type array
*/
public $children = array();
/**
* Does this use the <a></a> form or the </a> form, i.e.
* is it a pair of start/end tokens or an empty token.
* @bool
*/
public $empty = false;
public $endCol = null, $endLine = null, $endArmor = array();
public function __construct($name, $attr = array(), $line = null, $col = null, $armor = array()) {
$this->name = $name;
$this->attr = $attr;
$this->line = $line;
$this->col = $col;
$this->armor = $armor;
}
public function toTokenPair() {
// XXX inefficiency here, normalization is not necessary
if ($this->empty) {
return array(new HTMLPurifier_Token_Empty($this->name, $this->attr, $this->line, $this->col, $this->armor), null);
} else {
$start = new HTMLPurifier_Token_Start($this->name, $this->attr, $this->line, $this->col, $this->armor);
$end = new HTMLPurifier_Token_End($this->name, array(), $this->endLine, $this->endCol, $this->endArmor);
//$end->start = $start;
return array($start, $end);
}
}
}
htmlpurifier/library/HTMLPurifier/Node/Text.php 0000644 00000002544 15121423110 0015511 0 ustar 00 <?php
/**
* Concrete text token class.
*
* Text tokens comprise of regular parsed character data (PCDATA) and raw
* character data (from the CDATA sections). Internally, their
* data is parsed with all entities expanded. Surprisingly, the text token
* does have a "tag name" called #PCDATA, which is how the DTD represents it
* in permissible child nodes.
*/
class HTMLPurifier_Node_Text extends HTMLPurifier_Node
{
/**
* PCDATA tag name compatible with DTD, see
* HTMLPurifier_ChildDef_Custom for details.
* @type string
*/
public $name = '#PCDATA';
/**
* @type string
*/
public $data;
/**< Parsed character data of text. */
/**
* @type bool
*/
public $is_whitespace;
/**< Bool indicating if node is whitespace. */
/**
* Constructor, accepts data and determines if it is whitespace.
* @param string $data String parsed character data.
* @param int $line
* @param int $col
*/
public function __construct($data, $is_whitespace, $line = null, $col = null)
{
$this->data = $data;
$this->is_whitespace = $is_whitespace;
$this->line = $line;
$this->col = $col;
}
public function toTokenPair() {
return array(new HTMLPurifier_Token_Text($this->data, $this->line, $this->col), null);
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/Node/Comment.php 0000644 00000001325 15121423110 0016163 0 ustar 00 <?php
/**
* Concrete comment node class.
*/
class HTMLPurifier_Node_Comment extends HTMLPurifier_Node
{
/**
* Character data within comment.
* @type string
*/
public $data;
/**
* @type bool
*/
public $is_whitespace = true;
/**
* Transparent constructor.
*
* @param string $data String comment data.
* @param int $line
* @param int $col
*/
public function __construct($data, $line = null, $col = null)
{
$this->data = $data;
$this->line = $line;
$this->col = $col;
}
public function toTokenPair() {
return array(new HTMLPurifier_Token_Comment($this->data, $this->line, $this->col), null);
}
}
htmlpurifier/library/HTMLPurifier/Injector.php 0000644 00000021456 15121423110 0015460 0 ustar 00 <?php
/**
* Injects tokens into the document while parsing for well-formedness.
* This enables "formatter-like" functionality such as auto-paragraphing,
* smiley-ification and linkification to take place.
*
* A note on how handlers create changes; this is done by assigning a new
* value to the $token reference. These values can take a variety of forms and
* are best described HTMLPurifier_Strategy_MakeWellFormed->processToken()
* documentation.
*
* @todo Allow injectors to request a re-run on their output. This
* would help if an operation is recursive.
*/
abstract class HTMLPurifier_Injector
{
/**
* Advisory name of injector, this is for friendly error messages.
* @type string
*/
public $name;
/**
* @type HTMLPurifier_HTMLDefinition
*/
protected $htmlDefinition;
/**
* Reference to CurrentNesting variable in Context. This is an array
* list of tokens that we are currently "inside"
* @type array
*/
protected $currentNesting;
/**
* Reference to current token.
* @type HTMLPurifier_Token
*/
protected $currentToken;
/**
* Reference to InputZipper variable in Context.
* @type HTMLPurifier_Zipper
*/
protected $inputZipper;
/**
* Array of elements and attributes this injector creates and therefore
* need to be allowed by the definition. Takes form of
* array('element' => array('attr', 'attr2'), 'element2')
* @type array
*/
public $needed = array();
/**
* Number of elements to rewind backwards (relative).
* @type bool|int
*/
protected $rewindOffset = false;
/**
* Rewind to a spot to re-perform processing. This is useful if you
* deleted a node, and now need to see if this change affected any
* earlier nodes. Rewinding does not affect other injectors, and can
* result in infinite loops if not used carefully.
* @param bool|int $offset
* @warning HTML Purifier will prevent you from fast-forwarding with this
* function.
*/
public function rewindOffset($offset)
{
$this->rewindOffset = $offset;
}
/**
* Retrieves rewind offset, and then unsets it.
* @return bool|int
*/
public function getRewindOffset()
{
$r = $this->rewindOffset;
$this->rewindOffset = false;
return $r;
}
/**
* Prepares the injector by giving it the config and context objects:
* this allows references to important variables to be made within
* the injector. This function also checks if the HTML environment
* will work with the Injector (see checkNeeded()).
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string Boolean false if success, string of missing needed element/attribute if failure
*/
public function prepare($config, $context)
{
$this->htmlDefinition = $config->getHTMLDefinition();
// Even though this might fail, some unit tests ignore this and
// still test checkNeeded, so be careful. Maybe get rid of that
// dependency.
$result = $this->checkNeeded($config);
if ($result !== false) {
return $result;
}
$this->currentNesting =& $context->get('CurrentNesting');
$this->currentToken =& $context->get('CurrentToken');
$this->inputZipper =& $context->get('InputZipper');
return false;
}
/**
* This function checks if the HTML environment
* will work with the Injector: if p tags are not allowed, the
* Auto-Paragraphing injector should not be enabled.
* @param HTMLPurifier_Config $config
* @return bool|string Boolean false if success, string of missing needed element/attribute if failure
*/
public function checkNeeded($config)
{
$def = $config->getHTMLDefinition();
foreach ($this->needed as $element => $attributes) {
if (is_int($element)) {
$element = $attributes;
}
if (!isset($def->info[$element])) {
return $element;
}
if (!is_array($attributes)) {
continue;
}
foreach ($attributes as $name) {
if (!isset($def->info[$element]->attr[$name])) {
return "$element.$name";
}
}
}
return false;
}
/**
* Tests if the context node allows a certain element
* @param string $name Name of element to test for
* @return bool True if element is allowed, false if it is not
*/
public function allowsElement($name)
{
if (!empty($this->currentNesting)) {
$parent_token = array_pop($this->currentNesting);
$this->currentNesting[] = $parent_token;
$parent = $this->htmlDefinition->info[$parent_token->name];
} else {
$parent = $this->htmlDefinition->info_parent_def;
}
if (!isset($parent->child->elements[$name]) || isset($parent->excludes[$name])) {
return false;
}
// check for exclusion
if (!empty($this->currentNesting)) {
for ($i = count($this->currentNesting) - 2; $i >= 0; $i--) {
$node = $this->currentNesting[$i];
$def = $this->htmlDefinition->info[$node->name];
if (isset($def->excludes[$name])) {
return false;
}
}
}
return true;
}
/**
* Iterator function, which starts with the next token and continues until
* you reach the end of the input tokens.
* @warning Please prevent previous references from interfering with this
* functions by setting $i = null beforehand!
* @param int $i Current integer index variable for inputTokens
* @param HTMLPurifier_Token $current Current token variable.
* Do NOT use $token, as that variable is also a reference
* @return bool
*/
protected function forward(&$i, &$current)
{
if ($i === null) {
$i = count($this->inputZipper->back) - 1;
} else {
$i--;
}
if ($i < 0) {
return false;
}
$current = $this->inputZipper->back[$i];
return true;
}
/**
* Similar to _forward, but accepts a third parameter $nesting (which
* should be initialized at 0) and stops when we hit the end tag
* for the node $this->inputIndex starts in.
* @param int $i Current integer index variable for inputTokens
* @param HTMLPurifier_Token $current Current token variable.
* Do NOT use $token, as that variable is also a reference
* @param int $nesting
* @return bool
*/
protected function forwardUntilEndToken(&$i, &$current, &$nesting)
{
$result = $this->forward($i, $current);
if (!$result) {
return false;
}
if ($nesting === null) {
$nesting = 0;
}
if ($current instanceof HTMLPurifier_Token_Start) {
$nesting++;
} elseif ($current instanceof HTMLPurifier_Token_End) {
if ($nesting <= 0) {
return false;
}
$nesting--;
}
return true;
}
/**
* Iterator function, starts with the previous token and continues until
* you reach the beginning of input tokens.
* @warning Please prevent previous references from interfering with this
* functions by setting $i = null beforehand!
* @param int $i Current integer index variable for inputTokens
* @param HTMLPurifier_Token $current Current token variable.
* Do NOT use $token, as that variable is also a reference
* @return bool
*/
protected function backward(&$i, &$current)
{
if ($i === null) {
$i = count($this->inputZipper->front) - 1;
} else {
$i--;
}
if ($i < 0) {
return false;
}
$current = $this->inputZipper->front[$i];
return true;
}
/**
* Handler that is called when a text token is processed
*/
public function handleText(&$token)
{
}
/**
* Handler that is called when a start or empty token is processed
*/
public function handleElement(&$token)
{
}
/**
* Handler that is called when an end token is processed
*/
public function handleEnd(&$token)
{
$this->notifyEnd($token);
}
/**
* Notifier that is called when an end token is processed
* @param HTMLPurifier_Token $token Current token variable.
* @note This differs from handlers in that the token is read-only
* @deprecated
*/
public function notifyEnd($token)
{
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/Doctype.php 0000644 00000003056 15121423110 0015306 0 ustar 00 <?php
/**
* Represents a document type, contains information on which modules
* need to be loaded.
* @note This class is inspected by Printer_HTMLDefinition->renderDoctype.
* If structure changes, please update that function.
*/
class HTMLPurifier_Doctype
{
/**
* Full name of doctype
* @type string
*/
public $name;
/**
* List of standard modules (string identifiers or literal objects)
* that this doctype uses
* @type array
*/
public $modules = array();
/**
* List of modules to use for tidying up code
* @type array
*/
public $tidyModules = array();
/**
* Is the language derived from XML (i.e. XHTML)?
* @type bool
*/
public $xml = true;
/**
* List of aliases for this doctype
* @type array
*/
public $aliases = array();
/**
* Public DTD identifier
* @type string
*/
public $dtdPublic;
/**
* System DTD identifier
* @type string
*/
public $dtdSystem;
public function __construct(
$name = null,
$xml = true,
$modules = array(),
$tidyModules = array(),
$aliases = array(),
$dtd_public = null,
$dtd_system = null
) {
$this->name = $name;
$this->xml = $xml;
$this->modules = $modules;
$this->tidyModules = $tidyModules;
$this->aliases = $aliases;
$this->dtdPublic = $dtd_public;
$this->dtdSystem = $dtd_system;
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/TokenFactory.php 0000644 00000006033 15121423110 0016305 0 ustar 00 <?php
/**
* Factory for token generation.
*
* @note Doing some benchmarking indicates that the new operator is much
* slower than the clone operator (even discounting the cost of the
* constructor). This class is for that optimization.
* Other then that, there's not much point as we don't
* maintain parallel HTMLPurifier_Token hierarchies (the main reason why
* you'd want to use an abstract factory).
* @todo Port DirectLex to use this
*/
class HTMLPurifier_TokenFactory
{
// p stands for prototype
/**
* @type HTMLPurifier_Token_Start
*/
private $p_start;
/**
* @type HTMLPurifier_Token_End
*/
private $p_end;
/**
* @type HTMLPurifier_Token_Empty
*/
private $p_empty;
/**
* @type HTMLPurifier_Token_Text
*/
private $p_text;
/**
* @type HTMLPurifier_Token_Comment
*/
private $p_comment;
/**
* Generates blank prototypes for cloning.
*/
public function __construct()
{
$this->p_start = new HTMLPurifier_Token_Start('', array());
$this->p_end = new HTMLPurifier_Token_End('');
$this->p_empty = new HTMLPurifier_Token_Empty('', array());
$this->p_text = new HTMLPurifier_Token_Text('');
$this->p_comment = new HTMLPurifier_Token_Comment('');
}
/**
* Creates a HTMLPurifier_Token_Start.
* @param string $name Tag name
* @param array $attr Associative array of attributes
* @return HTMLPurifier_Token_Start Generated HTMLPurifier_Token_Start
*/
public function createStart($name, $attr = array())
{
$p = clone $this->p_start;
$p->__construct($name, $attr);
return $p;
}
/**
* Creates a HTMLPurifier_Token_End.
* @param string $name Tag name
* @return HTMLPurifier_Token_End Generated HTMLPurifier_Token_End
*/
public function createEnd($name)
{
$p = clone $this->p_end;
$p->__construct($name);
return $p;
}
/**
* Creates a HTMLPurifier_Token_Empty.
* @param string $name Tag name
* @param array $attr Associative array of attributes
* @return HTMLPurifier_Token_Empty Generated HTMLPurifier_Token_Empty
*/
public function createEmpty($name, $attr = array())
{
$p = clone $this->p_empty;
$p->__construct($name, $attr);
return $p;
}
/**
* Creates a HTMLPurifier_Token_Text.
* @param string $data Data of text token
* @return HTMLPurifier_Token_Text Generated HTMLPurifier_Token_Text
*/
public function createText($data)
{
$p = clone $this->p_text;
$p->__construct($data);
return $p;
}
/**
* Creates a HTMLPurifier_Token_Comment.
* @param string $data Data of comment token
* @return HTMLPurifier_Token_Comment Generated HTMLPurifier_Token_Comment
*/
public function createComment($data)
{
$p = clone $this->p_comment;
$p->__construct($data);
return $p;
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/CSS.php 0000644 00000010363 15121423110 0015657 0 ustar 00 <?php
/**
* Validates the HTML attribute style, otherwise known as CSS.
* @note We don't implement the whole CSS specification, so it might be
* difficult to reuse this component in the context of validating
* actual stylesheet declarations.
* @note If we were really serious about validating the CSS, we would
* tokenize the styles and then parse the tokens. Obviously, we
* are not doing that. Doing that could seriously harm performance,
* but would make these components a lot more viable for a CSS
* filtering solution.
*/
class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
{
/**
* @param string $css
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($css, $config, $context)
{
$css = $this->parseCDATA($css);
$definition = $config->getCSSDefinition();
$allow_duplicates = $config->get("CSS.AllowDuplicates");
// According to the CSS2.1 spec, the places where a
// non-delimiting semicolon can appear are in strings
// escape sequences. So here is some dumb hack to
// handle quotes.
$len = strlen($css);
$accum = "";
$declarations = array();
$quoted = false;
for ($i = 0; $i < $len; $i++) {
$c = strcspn($css, ";'\"", $i);
$accum .= substr($css, $i, $c);
$i += $c;
if ($i == $len) break;
$d = $css[$i];
if ($quoted) {
$accum .= $d;
if ($d == $quoted) {
$quoted = false;
}
} else {
if ($d == ";") {
$declarations[] = $accum;
$accum = "";
} else {
$accum .= $d;
$quoted = $d;
}
}
}
if ($accum != "") $declarations[] = $accum;
$propvalues = array();
$new_declarations = '';
/**
* Name of the current CSS property being validated.
*/
$property = false;
$context->register('CurrentCSSProperty', $property);
foreach ($declarations as $declaration) {
if (!$declaration) {
continue;
}
if (!strpos($declaration, ':')) {
continue;
}
list($property, $value) = explode(':', $declaration, 2);
$property = trim($property);
$value = trim($value);
$ok = false;
do {
if (isset($definition->info[$property])) {
$ok = true;
break;
}
if (ctype_lower($property)) {
break;
}
$property = strtolower($property);
if (isset($definition->info[$property])) {
$ok = true;
break;
}
} while (0);
if (!$ok) {
continue;
}
// inefficient call, since the validator will do this again
if (strtolower(trim($value)) !== 'inherit') {
// inherit works for everything (but only on the base property)
$result = $definition->info[$property]->validate(
$value,
$config,
$context
);
} else {
$result = 'inherit';
}
if ($result === false) {
continue;
}
if ($allow_duplicates) {
$new_declarations .= "$property:$result;";
} else {
$propvalues[$property] = $result;
}
}
$context->destroy('CurrentCSSProperty');
// procedure does not write the new CSS simultaneously, so it's
// slightly inefficient, but it's the only way of getting rid of
// duplicates. Perhaps config to optimize it, but not now.
foreach ($propvalues as $prop => $value) {
$new_declarations .= "$prop:$value;";
}
return $new_declarations ? $new_declarations : false;
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/Switch.php 0000644 00000002411 15121423110 0016463 0 ustar 00 <?php
/**
* Decorator that, depending on a token, switches between two definitions.
*/
class HTMLPurifier_AttrDef_Switch
{
/**
* @type string
*/
protected $tag;
/**
* @type HTMLPurifier_AttrDef
*/
protected $withTag;
/**
* @type HTMLPurifier_AttrDef
*/
protected $withoutTag;
/**
* @param string $tag Tag name to switch upon
* @param HTMLPurifier_AttrDef $with_tag Call if token matches tag
* @param HTMLPurifier_AttrDef $without_tag Call if token doesn't match, or there is no token
*/
public function __construct($tag, $with_tag, $without_tag)
{
$this->tag = $tag;
$this->withTag = $with_tag;
$this->withoutTag = $without_tag;
}
/**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
$token = $context->get('CurrentToken', true);
if (!$token || $token->name !== $this->tag) {
return $this->withoutTag->validate($string, $config, $context);
} else {
return $this->withTag->validate($string, $config, $context);
}
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/Integer.php 0000644 00000004763 15121423110 0016633 0 ustar 00 <?php
/**
* Validates an integer.
* @note While this class was modeled off the CSS definition, no currently
* allowed CSS uses this type. The properties that do are: widows,
* orphans, z-index, counter-increment, counter-reset. Some of the
* HTML attributes, however, find use for a non-negative version of this.
*/
class HTMLPurifier_AttrDef_Integer extends HTMLPurifier_AttrDef
{
/**
* Whether or not negative values are allowed.
* @type bool
*/
protected $negative = true;
/**
* Whether or not zero is allowed.
* @type bool
*/
protected $zero = true;
/**
* Whether or not positive values are allowed.
* @type bool
*/
protected $positive = true;
/**
* @param $negative Bool indicating whether or not negative values are allowed
* @param $zero Bool indicating whether or not zero is allowed
* @param $positive Bool indicating whether or not positive values are allowed
*/
public function __construct($negative = true, $zero = true, $positive = true)
{
$this->negative = $negative;
$this->zero = $zero;
$this->positive = $positive;
}
/**
* @param string $integer
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($integer, $config, $context)
{
$integer = $this->parseCDATA($integer);
if ($integer === '') {
return false;
}
// we could possibly simply typecast it to integer, but there are
// certain fringe cases that must not return an integer.
// clip leading sign
if ($this->negative && $integer[0] === '-') {
$digits = substr($integer, 1);
if ($digits === '0') {
$integer = '0';
} // rm minus sign for zero
} elseif ($this->positive && $integer[0] === '+') {
$digits = $integer = substr($integer, 1); // rm unnecessary plus
} else {
$digits = $integer;
}
// test if it's numeric
if (!ctype_digit($digits)) {
return false;
}
// perform scope tests
if (!$this->zero && $integer == 0) {
return false;
}
if (!$this->positive && $integer > 0) {
return false;
}
if (!$this->negative && $integer < 0) {
return false;
}
return $integer;
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/URI.php 0000644 00000005230 15121423110 0015663 0 ustar 00 <?php
/**
* Validates a URI as defined by RFC 3986.
* @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme
*/
class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
{
/**
* @type HTMLPurifier_URIParser
*/
protected $parser;
/**
* @type bool
*/
protected $embedsResource;
/**
* @param bool $embeds_resource Does the URI here result in an extra HTTP request?
*/
public function __construct($embeds_resource = false)
{
$this->parser = new HTMLPurifier_URIParser();
$this->embedsResource = (bool)$embeds_resource;
}
/**
* @param string $string
* @return HTMLPurifier_AttrDef_URI
*/
public function make($string)
{
$embeds = ($string === 'embedded');
return new HTMLPurifier_AttrDef_URI($embeds);
}
/**
* @param string $uri
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($uri, $config, $context)
{
if ($config->get('URI.Disable')) {
return false;
}
$uri = $this->parseCDATA($uri);
// parse the URI
$uri = $this->parser->parse($uri);
if ($uri === false) {
return false;
}
// add embedded flag to context for validators
$context->register('EmbeddedURI', $this->embedsResource);
$ok = false;
do {
// generic validation
$result = $uri->validate($config, $context);
if (!$result) {
break;
}
// chained filtering
$uri_def = $config->getDefinition('URI');
$result = $uri_def->filter($uri, $config, $context);
if (!$result) {
break;
}
// scheme-specific validation
$scheme_obj = $uri->getSchemeObj($config, $context);
if (!$scheme_obj) {
break;
}
if ($this->embedsResource && !$scheme_obj->browsable) {
break;
}
$result = $scheme_obj->validate($uri, $config, $context);
if (!$result) {
break;
}
// Post chained filtering
$result = $uri_def->postFilter($uri, $config, $context);
if (!$result) {
break;
}
// survived gauntlet
$ok = true;
} while (false);
$context->destroy('EmbeddedURI');
if (!$ok) {
return false;
}
// back to string
return $uri->toString();
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/Clone.php 0000644 00000001550 15121423110 0016265 0 ustar 00 <?php
/**
* Dummy AttrDef that mimics another AttrDef, BUT it generates clones
* with make.
*/
class HTMLPurifier_AttrDef_Clone extends HTMLPurifier_AttrDef
{
/**
* What we're cloning.
* @type HTMLPurifier_AttrDef
*/
protected $clone;
/**
* @param HTMLPurifier_AttrDef $clone
*/
public function __construct($clone)
{
$this->clone = $clone;
}
/**
* @param string $v
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($v, $config, $context)
{
return $this->clone->validate($v, $config, $context);
}
/**
* @param string $string
* @return HTMLPurifier_AttrDef
*/
public function make($string)
{
return clone $this->clone;
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/Enum.php 0000644 00000004243 15121423110 0016133 0 ustar 00 <?php
// Enum = Enumerated
/**
* Validates a keyword against a list of valid values.
* @warning The case-insensitive compare of this function uses PHP's
* built-in strtolower and ctype_lower functions, which may
* cause problems with international comparisons
*/
class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef
{
/**
* Lookup table of valid values.
* @type array
* @todo Make protected
*/
public $valid_values = array();
/**
* Bool indicating whether or not enumeration is case sensitive.
* @note In general this is always case insensitive.
*/
protected $case_sensitive = false; // values according to W3C spec
/**
* @param array $valid_values List of valid values
* @param bool $case_sensitive Whether or not case sensitive
*/
public function __construct($valid_values = array(), $case_sensitive = false)
{
$this->valid_values = array_flip($valid_values);
$this->case_sensitive = $case_sensitive;
}
/**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
$string = trim($string);
if (!$this->case_sensitive) {
// we may want to do full case-insensitive libraries
$string = ctype_lower($string) ? $string : strtolower($string);
}
$result = isset($this->valid_values[$string]);
return $result ? $string : false;
}
/**
* @param string $string In form of comma-delimited list of case-insensitive
* valid values. Example: "foo,bar,baz". Prepend "s:" to make
* case sensitive
* @return HTMLPurifier_AttrDef_Enum
*/
public function make($string)
{
if (strlen($string) > 2 && $string[0] == 's' && $string[1] == ':') {
$string = substr($string, 2);
$sensitive = true;
} else {
$sensitive = false;
}
$values = explode(',', $string);
return new HTMLPurifier_AttrDef_Enum($values, $sensitive);
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Color.php 0000644 00000002253 15121423110 0017050 0 ustar 00 <?php
/**
* Validates a color according to the HTML spec.
*/
class HTMLPurifier_AttrDef_HTML_Color extends HTMLPurifier_AttrDef
{
/**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
static $colors = null;
if ($colors === null) {
$colors = $config->get('Core.ColorKeywords');
}
$string = trim($string);
if (empty($string)) {
return false;
}
$lower = strtolower($string);
if (isset($colors[$lower])) {
return $colors[$lower];
}
if ($string[0] === '#') {
$hex = substr($string, 1);
} else {
$hex = $string;
}
$length = strlen($hex);
if ($length !== 3 && $length !== 6) {
return false;
}
if (!ctype_xdigit($hex)) {
return false;
}
if ($length === 3) {
$hex = $hex[0] . $hex[0] . $hex[1] . $hex[1] . $hex[2] . $hex[2];
}
return "#$hex";
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/HTML/MultiLength.php 0000644 00000002464 15121423110 0020232 0 ustar 00 <?php
/**
* Validates a MultiLength as defined by the HTML spec.
*
* A multilength is either a integer (pixel count), a percentage, or
* a relative number.
*/
class HTMLPurifier_AttrDef_HTML_MultiLength extends HTMLPurifier_AttrDef_HTML_Length
{
/**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
$string = trim($string);
if ($string === '') {
return false;
}
$parent_result = parent::validate($string, $config, $context);
if ($parent_result !== false) {
return $parent_result;
}
$length = strlen($string);
$last_char = $string[$length - 1];
if ($last_char !== '*') {
return false;
}
$int = substr($string, 0, $length - 1);
if ($int == '') {
return '*';
}
if (!is_numeric($int)) {
return false;
}
$int = (int)$int;
if ($int < 0) {
return false;
}
if ($int == 0) {
return '0';
}
if ($int == 1) {
return '*';
}
return ((string)$int) . '*';
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Class.php 0000644 00000002715 15121423110 0017042 0 ustar 00 <?php
/**
* Implements special behavior for class attribute (normally NMTOKENS)
*/
class HTMLPurifier_AttrDef_HTML_Class extends HTMLPurifier_AttrDef_HTML_Nmtokens
{
/**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
protected function split($string, $config, $context)
{
// really, this twiddle should be lazy loaded
$name = $config->getDefinition('HTML')->doctype->name;
if ($name == "XHTML 1.1" || $name == "XHTML 2.0") {
return parent::split($string, $config, $context);
} else {
return preg_split('/\s+/', $string);
}
}
/**
* @param array $tokens
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return array
*/
protected function filter($tokens, $config, $context)
{
$allowed = $config->get('Attr.AllowedClasses');
$forbidden = $config->get('Attr.ForbiddenClasses');
$ret = array();
foreach ($tokens as $token) {
if (($allowed === null || isset($allowed[$token])) &&
!isset($forbidden[$token]) &&
// We need this O(n) check because of PHP's array
// implementation that casts -0 to 0.
!in_array($token, $ret, true)
) {
$ret[] = $token;
}
}
return $ret;
}
}
htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php 0000644 00000004141 15121423110 0017566 0 ustar 00 <?php
/**
* Validates contents based on NMTOKENS attribute type.
*/
class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
{
/**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
$string = trim($string);
// early abort: '' and '0' (strings that convert to false) are invalid
if (!$string) {
return false;
}
$tokens = $this->split($string, $config, $context);
$tokens = $this->filter($tokens, $config, $context);
if (empty($tokens)) {
return false;
}
return implode(' ', $tokens);
}
/**
* Splits a space separated list of tokens into its constituent parts.
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return array
*/
protected function split($string, $config, $context)
{
// OPTIMIZABLE!
// do the preg_match, capture all subpatterns for reformulation
// we don't support U+00A1 and up codepoints or
// escaping because I don't know how to do that with regexps
// and plus it would complicate optimization efforts (you never
// see that anyway).
$pattern = '/(?:(?<=\s)|\A)' . // look behind for space or string start
'((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)' .
'(?:(?=\s)|\z)/'; // look ahead for space or string end
preg_match_all($pattern, $string, $matches);
return $matches[1];
}
/**
* Template method for removing certain tokens based on arbitrary criteria.
* @note If we wanted to be really functional, we'd do an array_filter
* with a callback. But... we're not.
* @param array $tokens
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return array
*/
protected function filter($tokens, $config, $context)
{
return $tokens;
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Bool.php 0000644 00000001551 15121423110 0016665 0 ustar 00 <?php
/**
* Validates a boolean attribute
*/
class HTMLPurifier_AttrDef_HTML_Bool extends HTMLPurifier_AttrDef
{
/**
* @type string
*/
protected $name;
/**
* @type bool
*/
public $minimized = true;
/**
* @param bool|string $name
*/
public function __construct($name = false)
{
$this->name = $name;
}
/**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
return $this->name;
}
/**
* @param string $string Name of attribute
* @return HTMLPurifier_AttrDef_HTML_Bool
*/
public function make($string)
{
return new HTMLPurifier_AttrDef_HTML_Bool($string);
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/HTML/LinkTypes.php 0000644 00000003350 15121423110 0017713 0 ustar 00 <?php
/**
* Validates a rel/rev link attribute against a directive of allowed values
* @note We cannot use Enum because link types allow multiple
* values.
* @note Assumes link types are ASCII text
*/
class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef
{
/**
* Name config attribute to pull.
* @type string
*/
protected $name;
/**
* @param string $name
*/
public function __construct($name)
{
$configLookup = array(
'rel' => 'AllowedRel',
'rev' => 'AllowedRev'
);
if (!isset($configLookup[$name])) {
trigger_error(
'Unrecognized attribute name for link ' .
'relationship.',
E_USER_ERROR
);
return;
}
$this->name = $configLookup[$name];
}
/**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
$allowed = $config->get('Attr.' . $this->name);
if (empty($allowed)) {
return false;
}
$string = $this->parseCDATA($string);
$parts = explode(' ', $string);
// lookup to prevent duplicates
$ret_lookup = array();
foreach ($parts as $part) {
$part = strtolower(trim($part));
if (!isset($allowed[$part])) {
continue;
}
$ret_lookup[$part] = true;
}
if (empty($ret_lookup)) {
return false;
}
$string = implode(' ', array_keys($ret_lookup));
return $string;
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/HTML/FrameTarget.php 0000644 00000001502 15121423110 0020167 0 ustar 00 <?php
/**
* Special-case enum attribute definition that lazy loads allowed frame targets
*/
class HTMLPurifier_AttrDef_HTML_FrameTarget extends HTMLPurifier_AttrDef_Enum
{
/**
* @type array
*/
public $valid_values = false; // uninitialized value
/**
* @type bool
*/
protected $case_sensitive = false;
public function __construct()
{
}
/**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
if ($this->valid_values === false) {
$this->valid_values = $config->get('Attr.AllowedFrameTargets');
}
return parent::validate($string, $config, $context);
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/HTML/ID.php 0000644 00000006204 15121423110 0016266 0 ustar 00 <?php
/**
* Validates the HTML attribute ID.
* @warning Even though this is the id processor, it
* will ignore the directive Attr:IDBlacklist, since it will only
* go according to the ID accumulator. Since the accumulator is
* automatically generated, it will have already absorbed the
* blacklist. If you're hacking around, make sure you use load()!
*/
class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
{
// selector is NOT a valid thing to use for IDREFs, because IDREFs
// *must* target IDs that exist, whereas selector #ids do not.
/**
* Determines whether or not we're validating an ID in a CSS
* selector context.
* @type bool
*/
protected $selector;
/**
* @param bool $selector
*/
public function __construct($selector = false)
{
$this->selector = $selector;
}
/**
* @param string $id
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($id, $config, $context)
{
if (!$this->selector && !$config->get('Attr.EnableID')) {
return false;
}
$id = trim($id); // trim it first
if ($id === '') {
return false;
}
$prefix = $config->get('Attr.IDPrefix');
if ($prefix !== '') {
$prefix .= $config->get('Attr.IDPrefixLocal');
// prevent re-appending the prefix
if (strpos($id, $prefix) !== 0) {
$id = $prefix . $id;
}
} elseif ($config->get('Attr.IDPrefixLocal') !== '') {
trigger_error(
'%Attr.IDPrefixLocal cannot be used unless ' .
'%Attr.IDPrefix is set',
E_USER_WARNING
);
}
if (!$this->selector) {
$id_accumulator =& $context->get('IDAccumulator');
if (isset($id_accumulator->ids[$id])) {
return false;
}
}
// we purposely avoid using regex, hopefully this is faster
if ($config->get('Attr.ID.HTML5') === true) {
if (preg_match('/[\t\n\x0b\x0c ]/', $id)) {
return false;
}
} else {
if (ctype_alpha($id)) {
// OK
} else {
if (!ctype_alpha(@$id[0])) {
return false;
}
// primitive style of regexps, I suppose
$trim = trim(
$id,
'A..Za..z0..9:-._'
);
if ($trim !== '') {
return false;
}
}
}
$regexp = $config->get('Attr.IDBlacklistRegexp');
if ($regexp && preg_match($regexp, $id)) {
return false;
}
if (!$this->selector) {
$id_accumulator->add($id);
}
// if no change was made to the ID, return the result
// else, return the new id if stripping whitespace made it
// valid, or return false.
return $id;
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Length.php 0000644 00000002342 15121423110 0017212 0 ustar 00 <?php
/**
* Validates the HTML type length (not to be confused with CSS's length).
*
* This accepts integer pixels or percentages as lengths for certain
* HTML attributes.
*/
class HTMLPurifier_AttrDef_HTML_Length extends HTMLPurifier_AttrDef_HTML_Pixels
{
/**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
$string = trim($string);
if ($string === '') {
return false;
}
$parent_result = parent::validate($string, $config, $context);
if ($parent_result !== false) {
return $parent_result;
}
$length = strlen($string);
$last_char = $string[$length - 1];
if ($last_char !== '%') {
return false;
}
$points = substr($string, 0, $length - 1);
if (!is_numeric($points)) {
return false;
}
$points = (int)$points;
if ($points < 0) {
return '0%';
}
if ($points > 100) {
return '100%';
}
return ((string)$points) . '%';
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Pixels.php 0000644 00000003274 15121423110 0017242 0 ustar 00 <?php
/**
* Validates an integer representation of pixels according to the HTML spec.
*/
class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef
{
/**
* @type int
*/
protected $max;
/**
* @param int $max
*/
public function __construct($max = null)
{
$this->max = $max;
}
/**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
$string = trim($string);
if ($string === '0') {
return $string;
}
if ($string === '') {
return false;
}
$length = strlen($string);
if (substr($string, $length - 2) == 'px') {
$string = substr($string, 0, $length - 2);
}
if (!is_numeric($string)) {
return false;
}
$int = (int)$string;
if ($int < 0) {
return '0';
}
// upper-bound value, extremely high values can
// crash operating systems, see <http://ha.ckers.org/imagecrash.html>
// WARNING, above link WILL crash you if you're using Windows
if ($this->max !== null && $int > $this->max) {
return (string)$this->max;
}
return (string)$int;
}
/**
* @param string $string
* @return HTMLPurifier_AttrDef
*/
public function make($string)
{
if ($string === '') {
$max = null;
} else {
$max = (int)$string;
}
$class = get_class($this);
return new $class($max);
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/URI/Email.php 0000644 00000000527 15121423110 0016716 0 ustar 00 <?php
abstract class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
{
/**
* Unpacks a mailbox into its display-name and address
* @param string $string
* @return mixed
*/
public function unpack($string)
{
// needs to be implemented
}
}
// sub-implementations
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/URI/Host.php 0000644 00000012432 15121423110 0016602 0 ustar 00 <?php
/**
* Validates a host according to the IPv4, IPv6 and DNS (future) specifications.
*/
class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
{
/**
* IPv4 sub-validator.
* @type HTMLPurifier_AttrDef_URI_IPv4
*/
protected $ipv4;
/**
* IPv6 sub-validator.
* @type HTMLPurifier_AttrDef_URI_IPv6
*/
protected $ipv6;
public function __construct()
{
$this->ipv4 = new HTMLPurifier_AttrDef_URI_IPv4();
$this->ipv6 = new HTMLPurifier_AttrDef_URI_IPv6();
}
/**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
$length = strlen($string);
// empty hostname is OK; it's usually semantically equivalent:
// the default host as defined by a URI scheme is used:
//
// If the URI scheme defines a default for host, then that
// default applies when the host subcomponent is undefined
// or when the registered name is empty (zero length).
if ($string === '') {
return '';
}
if ($length > 1 && $string[0] === '[' && $string[$length - 1] === ']') {
//IPv6
$ip = substr($string, 1, $length - 2);
$valid = $this->ipv6->validate($ip, $config, $context);
if ($valid === false) {
return false;
}
return '[' . $valid . ']';
}
// need to do checks on unusual encodings too
$ipv4 = $this->ipv4->validate($string, $config, $context);
if ($ipv4 !== false) {
return $ipv4;
}
// A regular domain name.
// This doesn't match I18N domain names, but we don't have proper IRI support,
// so force users to insert Punycode.
// There is not a good sense in which underscores should be
// allowed, since it's technically not! (And if you go as
// far to allow everything as specified by the DNS spec...
// well, that's literally everything, modulo some space limits
// for the components and the overall name (which, by the way,
// we are NOT checking!). So we (arbitrarily) decide this:
// let's allow underscores wherever we would have allowed
// hyphens, if they are enabled. This is a pretty good match
// for browser behavior, for example, a large number of browsers
// cannot handle foo_.example.com, but foo_bar.example.com is
// fairly well supported.
$underscore = $config->get('Core.AllowHostnameUnderscore') ? '_' : '';
// Based off of RFC 1738, but amended so that
// as per RFC 3696, the top label need only not be all numeric.
// The productions describing this are:
$a = '[a-z]'; // alpha
$an = '[a-z0-9]'; // alphanum
$and = "[a-z0-9-$underscore]"; // alphanum | "-"
// domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
$domainlabel = "$an(?:$and*$an)?";
// AMENDED as per RFC 3696
// toplabel = alphanum | alphanum *( alphanum | "-" ) alphanum
// side condition: not all numeric
$toplabel = "$an(?:$and*$an)?";
// hostname = *( domainlabel "." ) toplabel [ "." ]
if (preg_match("/^(?:$domainlabel\.)*($toplabel)\.?$/i", $string, $matches)) {
if (!ctype_digit($matches[1])) {
return $string;
}
}
// PHP 5.3 and later support this functionality natively
if (function_exists('idn_to_ascii')) {
if (defined('IDNA_NONTRANSITIONAL_TO_ASCII') && defined('INTL_IDNA_VARIANT_UTS46')) {
$string = idn_to_ascii($string, IDNA_NONTRANSITIONAL_TO_ASCII, INTL_IDNA_VARIANT_UTS46);
} else {
$string = idn_to_ascii($string);
}
// If we have Net_IDNA2 support, we can support IRIs by
// punycoding them. (This is the most portable thing to do,
// since otherwise we have to assume browsers support
} elseif ($config->get('Core.EnableIDNA')) {
$idna = new Net_IDNA2(array('encoding' => 'utf8', 'overlong' => false, 'strict' => true));
// we need to encode each period separately
$parts = explode('.', $string);
try {
$new_parts = array();
foreach ($parts as $part) {
$encodable = false;
for ($i = 0, $c = strlen($part); $i < $c; $i++) {
if (ord($part[$i]) > 0x7a) {
$encodable = true;
break;
}
}
if (!$encodable) {
$new_parts[] = $part;
} else {
$new_parts[] = $idna->encode($part);
}
}
$string = implode('.', $new_parts);
} catch (Exception $e) {
// XXX error reporting
}
}
// Try again
if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
return $string;
}
return false;
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/URI/IPv6.php 0000644 00000004655 15121423110 0016461 0 ustar 00 <?php
/**
* Validates an IPv6 address.
* @author Feyd @ forums.devnetwork.net (public domain)
* @note This function requires brackets to have been removed from address
* in URI.
*/
class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4
{
/**
* @param string $aIP
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($aIP, $config, $context)
{
if (!$this->ip4) {
$this->_loadRegex();
}
$original = $aIP;
$hex = '[0-9a-fA-F]';
$blk = '(?:' . $hex . '{1,4})';
$pre = '(?:/(?:12[0-8]|1[0-1][0-9]|[1-9][0-9]|[0-9]))'; // /0 - /128
// prefix check
if (strpos($aIP, '/') !== false) {
if (preg_match('#' . $pre . '$#s', $aIP, $find)) {
$aIP = substr($aIP, 0, 0 - strlen($find[0]));
unset($find);
} else {
return false;
}
}
// IPv4-compatiblity check
if (preg_match('#(?<=:' . ')' . $this->ip4 . '$#s', $aIP, $find)) {
$aIP = substr($aIP, 0, 0 - strlen($find[0]));
$ip = explode('.', $find[0]);
$ip = array_map('dechex', $ip);
$aIP .= $ip[0] . $ip[1] . ':' . $ip[2] . $ip[3];
unset($find, $ip);
}
// compression check
$aIP = explode('::', $aIP);
$c = count($aIP);
if ($c > 2) {
return false;
} elseif ($c == 2) {
list($first, $second) = $aIP;
$first = explode(':', $first);
$second = explode(':', $second);
if (count($first) + count($second) > 8) {
return false;
}
while (count($first) < 8) {
array_push($first, '0');
}
array_splice($first, 8 - count($second), 8, $second);
$aIP = $first;
unset($first, $second);
} else {
$aIP = explode(':', $aIP[0]);
}
$c = count($aIP);
if ($c != 8) {
return false;
}
// All the pieces should be 16-bit hex strings. Are they?
foreach ($aIP as $piece) {
if (!preg_match('#^[0-9a-fA-F]{4}$#s', sprintf('%04s', $piece))) {
return false;
}
}
return $original;
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php 0000644 00000001470 15121423110 0021103 0 ustar 00 <?php
/**
* Primitive email validation class based on the regexp found at
* http://www.regular-expressions.info/email.html
*/
class HTMLPurifier_AttrDef_URI_Email_SimpleCheck extends HTMLPurifier_AttrDef_URI_Email
{
/**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
// no support for named mailboxes i.e. "Bob <bob@example.com>"
// that needs more percent encoding to be done
if ($string == '') {
return false;
}
$string = trim($string);
$result = preg_match('/^[A-Z0-9._%-]+@[A-Z0-9.-]+\.[A-Z]{2,4}$/i', $string);
return $result ? $string : false;
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/URI/IPv4.php 0000644 00000001746 15121423110 0016455 0 ustar 00 <?php
/**
* Validates an IPv4 address
* @author Feyd @ forums.devnetwork.net (public domain)
*/
class HTMLPurifier_AttrDef_URI_IPv4 extends HTMLPurifier_AttrDef
{
/**
* IPv4 regex, protected so that IPv6 can reuse it.
* @type string
*/
protected $ip4;
/**
* @param string $aIP
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($aIP, $config, $context)
{
if (!$this->ip4) {
$this->_loadRegex();
}
if (preg_match('#^' . $this->ip4 . '$#s', $aIP)) {
return $aIP;
}
return false;
}
/**
* Lazy load function to prevent regex from being stuffed in
* cache.
*/
protected function _loadRegex()
{
$oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255
$this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})";
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Color.php 0000644 00000011110 15121423110 0016724 0 ustar 00 <?php
/**
* Validates Color as defined by CSS.
*/
class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
{
/**
* @type HTMLPurifier_AttrDef_CSS_AlphaValue
*/
protected $alpha;
public function __construct()
{
$this->alpha = new HTMLPurifier_AttrDef_CSS_AlphaValue();
}
/**
* @param string $color
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($color, $config, $context)
{
static $colors = null;
if ($colors === null) {
$colors = $config->get('Core.ColorKeywords');
}
$color = trim($color);
if ($color === '') {
return false;
}
$lower = strtolower($color);
if (isset($colors[$lower])) {
return $colors[$lower];
}
if (preg_match('#(rgb|rgba|hsl|hsla)\(#', $color, $matches) === 1) {
$length = strlen($color);
if (strpos($color, ')') !== $length - 1) {
return false;
}
// get used function : rgb, rgba, hsl or hsla
$function = $matches[1];
$parameters_size = 3;
$alpha_channel = false;
if (substr($function, -1) === 'a') {
$parameters_size = 4;
$alpha_channel = true;
}
/*
* Allowed types for values :
* parameter_position => [type => max_value]
*/
$allowed_types = array(
1 => array('percentage' => 100, 'integer' => 255),
2 => array('percentage' => 100, 'integer' => 255),
3 => array('percentage' => 100, 'integer' => 255),
);
$allow_different_types = false;
if (strpos($function, 'hsl') !== false) {
$allowed_types = array(
1 => array('integer' => 360),
2 => array('percentage' => 100),
3 => array('percentage' => 100),
);
$allow_different_types = true;
}
$values = trim(str_replace($function, '', $color), ' ()');
$parts = explode(',', $values);
if (count($parts) !== $parameters_size) {
return false;
}
$type = false;
$new_parts = array();
$i = 0;
foreach ($parts as $part) {
$i++;
$part = trim($part);
if ($part === '') {
return false;
}
// different check for alpha channel
if ($alpha_channel === true && $i === count($parts)) {
$result = $this->alpha->validate($part, $config, $context);
if ($result === false) {
return false;
}
$new_parts[] = (string)$result;
continue;
}
if (substr($part, -1) === '%') {
$current_type = 'percentage';
} else {
$current_type = 'integer';
}
if (!array_key_exists($current_type, $allowed_types[$i])) {
return false;
}
if (!$type) {
$type = $current_type;
}
if ($allow_different_types === false && $type != $current_type) {
return false;
}
$max_value = $allowed_types[$i][$current_type];
if ($current_type == 'integer') {
// Return value between range 0 -> $max_value
$new_parts[] = (int)max(min($part, $max_value), 0);
} elseif ($current_type == 'percentage') {
$new_parts[] = (float)max(min(rtrim($part, '%'), $max_value), 0) . '%';
}
}
$new_values = implode(',', $new_parts);
$color = $function . '(' . $new_values . ')';
} else {
// hexadecimal handling
if ($color[0] === '#') {
$hex = substr($color, 1);
} else {
$hex = $color;
$color = '#' . $color;
}
$length = strlen($hex);
if ($length !== 3 && $length !== 6) {
return false;
}
if (!ctype_xdigit($hex)) {
return false;
}
}
return $color;
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/CSS/URI.php 0000644 00000005010 15121423110 0016307 0 ustar 00 <?php
/**
* Validates a URI in CSS syntax, which uses url('http://example.com')
* @note While theoretically speaking a URI in a CSS document could
* be non-embedded, as of CSS2 there is no such usage so we're
* generalizing it. This may need to be changed in the future.
* @warning Since HTMLPurifier_AttrDef_CSS blindly uses semicolons as
* the separator, you cannot put a literal semicolon in
* in the URI. Try percent encoding it, in that case.
*/
class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
{
public function __construct()
{
parent::__construct(true); // always embedded
}
/**
* @param string $uri_string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($uri_string, $config, $context)
{
// parse the URI out of the string and then pass it onto
// the parent object
$uri_string = $this->parseCDATA($uri_string);
if (strpos($uri_string, 'url(') !== 0) {
return false;
}
$uri_string = substr($uri_string, 4);
if (strlen($uri_string) == 0) {
return false;
}
$new_length = strlen($uri_string) - 1;
if ($uri_string[$new_length] != ')') {
return false;
}
$uri = trim(substr($uri_string, 0, $new_length));
if (!empty($uri) && ($uri[0] == "'" || $uri[0] == '"')) {
$quote = $uri[0];
$new_length = strlen($uri) - 1;
if ($uri[$new_length] !== $quote) {
return false;
}
$uri = substr($uri, 1, $new_length - 1);
}
$uri = $this->expandCSSEscape($uri);
$result = parent::validate($uri, $config, $context);
if ($result === false) {
return false;
}
// extra sanity check; should have been done by URI
$result = str_replace(array('"', "\\", "\n", "\x0c", "\r"), "", $result);
// suspicious characters are ()'; we're going to percent encode
// them for safety.
$result = str_replace(array('(', ')', "'"), array('%28', '%29', '%27'), $result);
// there's an extra bug where ampersands lose their escaping on
// an innerHTML cycle, so a very unlucky query parameter could
// then change the meaning of the URL. Unfortunately, there's
// not much we can do about that...
return "url(\"$result\")";
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/CSS/ListStyle.php 0000644 00000005537 15121423110 0017622 0 ustar 00 <?php
/**
* Validates shorthand CSS property list-style.
* @warning Does not support url tokens that have internal spaces.
*/
class HTMLPurifier_AttrDef_CSS_ListStyle extends HTMLPurifier_AttrDef
{
/**
* Local copy of validators.
* @type HTMLPurifier_AttrDef[]
* @note See HTMLPurifier_AttrDef_CSS_Font::$info for a similar impl.
*/
protected $info;
/**
* @param HTMLPurifier_Config $config
*/
public function __construct($config)
{
$def = $config->getCSSDefinition();
$this->info['list-style-type'] = $def->info['list-style-type'];
$this->info['list-style-position'] = $def->info['list-style-position'];
$this->info['list-style-image'] = $def->info['list-style-image'];
}
/**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
// regular pre-processing
$string = $this->parseCDATA($string);
if ($string === '') {
return false;
}
// assumes URI doesn't have spaces in it
$bits = explode(' ', strtolower($string)); // bits to process
$caught = array();
$caught['type'] = false;
$caught['position'] = false;
$caught['image'] = false;
$i = 0; // number of catches
$none = false;
foreach ($bits as $bit) {
if ($i >= 3) {
return;
} // optimization bit
if ($bit === '') {
continue;
}
foreach ($caught as $key => $status) {
if ($status !== false) {
continue;
}
$r = $this->info['list-style-' . $key]->validate($bit, $config, $context);
if ($r === false) {
continue;
}
if ($r === 'none') {
if ($none) {
continue;
} else {
$none = true;
}
if ($key == 'image') {
continue;
}
}
$caught[$key] = $r;
$i++;
break;
}
}
if (!$i) {
return false;
}
$ret = array();
// construct type
if ($caught['type']) {
$ret[] = $caught['type'];
}
// construct image
if ($caught['image']) {
$ret[] = $caught['image'];
}
// construct position
if ($caught['position']) {
$ret[] = $caught['position'];
}
if (empty($ret)) {
return false;
}
return implode(' ', $ret);
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php 0000644 00000003075 15121423110 0021501 0 ustar 00 <?php
/**
* Decorator which enables !important to be used in CSS values.
*/
class HTMLPurifier_AttrDef_CSS_ImportantDecorator extends HTMLPurifier_AttrDef
{
/**
* @type HTMLPurifier_AttrDef
*/
public $def;
/**
* @type bool
*/
public $allow;
/**
* @param HTMLPurifier_AttrDef $def Definition to wrap
* @param bool $allow Whether or not to allow !important
*/
public function __construct($def, $allow = false)
{
$this->def = $def;
$this->allow = $allow;
}
/**
* Intercepts and removes !important if necessary
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
// test for ! and important tokens
$string = trim($string);
$is_important = false;
// :TODO: optimization: test directly for !important and ! important
if (strlen($string) >= 9 && substr($string, -9) === 'important') {
$temp = rtrim(substr($string, 0, -9));
// use a temp, because we might want to restore important
if (strlen($temp) >= 1 && substr($temp, -1) === '!') {
$string = rtrim(substr($temp, 0, -1));
$is_important = true;
}
}
$string = $this->def->validate($string, $config, $context);
if ($this->allow && $is_important) {
$string .= ' !important';
}
return $string;
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Font.php 0000644 00000014721 15121423110 0016567 0 ustar 00 <?php
/**
* Validates shorthand CSS property font.
*/
class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
{
/**
* Local copy of validators
* @type HTMLPurifier_AttrDef[]
* @note If we moved specific CSS property definitions to their own
* classes instead of having them be assembled at run time by
* CSSDefinition, this wouldn't be necessary. We'd instantiate
* our own copies.
*/
protected $info = array();
/**
* @param HTMLPurifier_Config $config
*/
public function __construct($config)
{
$def = $config->getCSSDefinition();
$this->info['font-style'] = $def->info['font-style'];
$this->info['font-variant'] = $def->info['font-variant'];
$this->info['font-weight'] = $def->info['font-weight'];
$this->info['font-size'] = $def->info['font-size'];
$this->info['line-height'] = $def->info['line-height'];
$this->info['font-family'] = $def->info['font-family'];
}
/**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
static $system_fonts = array(
'caption' => true,
'icon' => true,
'menu' => true,
'message-box' => true,
'small-caption' => true,
'status-bar' => true
);
// regular pre-processing
$string = $this->parseCDATA($string);
if ($string === '') {
return false;
}
// check if it's one of the keywords
$lowercase_string = strtolower($string);
if (isset($system_fonts[$lowercase_string])) {
return $lowercase_string;
}
$bits = explode(' ', $string); // bits to process
$stage = 0; // this indicates what we're looking for
$caught = array(); // which stage 0 properties have we caught?
$stage_1 = array('font-style', 'font-variant', 'font-weight');
$final = ''; // output
for ($i = 0, $size = count($bits); $i < $size; $i++) {
if ($bits[$i] === '') {
continue;
}
switch ($stage) {
case 0: // attempting to catch font-style, font-variant or font-weight
foreach ($stage_1 as $validator_name) {
if (isset($caught[$validator_name])) {
continue;
}
$r = $this->info[$validator_name]->validate(
$bits[$i],
$config,
$context
);
if ($r !== false) {
$final .= $r . ' ';
$caught[$validator_name] = true;
break;
}
}
// all three caught, continue on
if (count($caught) >= 3) {
$stage = 1;
}
if ($r !== false) {
break;
}
case 1: // attempting to catch font-size and perhaps line-height
$found_slash = false;
if (strpos($bits[$i], '/') !== false) {
list($font_size, $line_height) =
explode('/', $bits[$i]);
if ($line_height === '') {
// ooh, there's a space after the slash!
$line_height = false;
$found_slash = true;
}
} else {
$font_size = $bits[$i];
$line_height = false;
}
$r = $this->info['font-size']->validate(
$font_size,
$config,
$context
);
if ($r !== false) {
$final .= $r;
// attempt to catch line-height
if ($line_height === false) {
// we need to scroll forward
for ($j = $i + 1; $j < $size; $j++) {
if ($bits[$j] === '') {
continue;
}
if ($bits[$j] === '/') {
if ($found_slash) {
return false;
} else {
$found_slash = true;
continue;
}
}
$line_height = $bits[$j];
break;
}
} else {
// slash already found
$found_slash = true;
$j = $i;
}
if ($found_slash) {
$i = $j;
$r = $this->info['line-height']->validate(
$line_height,
$config,
$context
);
if ($r !== false) {
$final .= '/' . $r;
}
}
$final .= ' ';
$stage = 2;
break;
}
return false;
case 2: // attempting to catch font-family
$font_family =
implode(' ', array_slice($bits, $i, $size - $i));
$r = $this->info['font-family']->validate(
$font_family,
$config,
$context
);
if ($r !== false) {
$final .= $r . ' ';
// processing completed successfully
return rtrim($final);
}
return false;
}
}
return false;
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Percentage.php 0000644 00000002377 15121423110 0017742 0 ustar 00 <?php
/**
* Validates a Percentage as defined by the CSS spec.
*/
class HTMLPurifier_AttrDef_CSS_Percentage extends HTMLPurifier_AttrDef
{
/**
* Instance to defer number validation to.
* @type HTMLPurifier_AttrDef_CSS_Number
*/
protected $number_def;
/**
* @param bool $non_negative Whether to forbid negative values
*/
public function __construct($non_negative = false)
{
$this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative);
}
/**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
$string = $this->parseCDATA($string);
if ($string === '') {
return false;
}
$length = strlen($string);
if ($length === 1) {
return false;
}
if ($string[$length - 1] !== '%') {
return false;
}
$number = substr($string, 0, $length - 1);
$number = $this->number_def->validate($number, $config, $context);
if ($number === false) {
return false;
}
return "$number%";
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Ident.php 0000644 00000001324 15121423110 0016717 0 ustar 00 <?php
/**
* Validates based on {ident} CSS grammar production
*/
class HTMLPurifier_AttrDef_CSS_Ident extends HTMLPurifier_AttrDef
{
/**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
$string = trim($string);
// early abort: '' and '0' (strings that convert to false) are invalid
if (!$string) {
return false;
}
$pattern = '/^(-?[A-Za-z_][A-Za-z_\-0-9]*)$/';
if (!preg_match($pattern, $string)) {
return false;
}
return $string;
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Composite.php 0000644 00000002464 15121423110 0017624 0 ustar 00 <?php
/**
* Allows multiple validators to attempt to validate attribute.
*
* Composite is just what it sounds like: a composite of many validators.
* This means that multiple HTMLPurifier_AttrDef objects will have a whack
* at the string. If one of them passes, that's what is returned. This is
* especially useful for CSS values, which often are a choice between
* an enumerated set of predefined values or a flexible data type.
*/
class HTMLPurifier_AttrDef_CSS_Composite extends HTMLPurifier_AttrDef
{
/**
* List of objects that may process strings.
* @type HTMLPurifier_AttrDef[]
* @todo Make protected
*/
public $defs;
/**
* @param HTMLPurifier_AttrDef[] $defs List of HTMLPurifier_AttrDef objects
*/
public function __construct($defs)
{
$this->defs = $defs;
}
/**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
foreach ($this->defs as $i => $def) {
$result = $this->defs[$i]->validate($string, $config, $context);
if ($result !== false) {
return $result;
}
}
return false;
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Border.php 0000644 00000003067 15121423110 0017077 0 ustar 00 <?php
/**
* Validates the border property as defined by CSS.
*/
class HTMLPurifier_AttrDef_CSS_Border extends HTMLPurifier_AttrDef
{
/**
* Local copy of properties this property is shorthand for.
* @type HTMLPurifier_AttrDef[]
*/
protected $info = array();
/**
* @param HTMLPurifier_Config $config
*/
public function __construct($config)
{
$def = $config->getCSSDefinition();
$this->info['border-width'] = $def->info['border-width'];
$this->info['border-style'] = $def->info['border-style'];
$this->info['border-top-color'] = $def->info['border-top-color'];
}
/**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
$string = $this->parseCDATA($string);
$string = $this->mungeRgb($string);
$bits = explode(' ', $string);
$done = array(); // segments we've finished
$ret = ''; // return value
foreach ($bits as $bit) {
foreach ($this->info as $propname => $validator) {
if (isset($done[$propname])) {
continue;
}
$r = $validator->validate($bit, $config, $context);
if ($r !== false) {
$ret .= $r . ' ';
$done[$propname] = true;
break;
}
}
}
return rtrim($ret);
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Number.php 0000644 00000004357 15121423110 0017115 0 ustar 00 <?php
/**
* Validates a number as defined by the CSS spec.
*/
class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef
{
/**
* Indicates whether or not only positive values are allowed.
* @type bool
*/
protected $non_negative = false;
/**
* @param bool $non_negative indicates whether negatives are forbidden
*/
public function __construct($non_negative = false)
{
$this->non_negative = $non_negative;
}
/**
* @param string $number
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return string|bool
* @warning Some contexts do not pass $config, $context. These
* variables should not be used without checking HTMLPurifier_Length
*/
public function validate($number, $config, $context)
{
$number = $this->parseCDATA($number);
if ($number === '') {
return false;
}
if ($number === '0') {
return '0';
}
$sign = '';
switch ($number[0]) {
case '-':
if ($this->non_negative) {
return false;
}
$sign = '-';
case '+':
$number = substr($number, 1);
}
if (ctype_digit($number)) {
$number = ltrim($number, '0');
return $number ? $sign . $number : '0';
}
// Period is the only non-numeric character allowed
if (strpos($number, '.') === false) {
return false;
}
list($left, $right) = explode('.', $number, 2);
if ($left === '' && $right === '') {
return false;
}
if ($left !== '' && !ctype_digit($left)) {
return false;
}
// Remove leading zeros until positive number or a zero stays left
if (ltrim($left, '0') != '') {
$left = ltrim($left, '0');
} else {
$left = '0';
}
$right = rtrim($right, '0');
if ($right === '') {
return $left ? $sign . $left : '0';
} elseif (!ctype_digit($right)) {
return false;
}
return $sign . $left . '.' . $right;
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/CSS/AlphaValue.php 0000644 00000001431 15121423110 0017675 0 ustar 00 <?php
class HTMLPurifier_AttrDef_CSS_AlphaValue extends HTMLPurifier_AttrDef_CSS_Number
{
public function __construct()
{
parent::__construct(false); // opacity is non-negative, but we will clamp it
}
/**
* @param string $number
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return string
*/
public function validate($number, $config, $context)
{
$result = parent::validate($number, $config, $context);
if ($result === false) {
return $result;
}
$float = (float)$result;
if ($float < 0.0) {
$result = '0';
}
if ($float > 1.0) {
$result = '1';
}
return $result;
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Filter.php 0000644 00000004426 15121423110 0017107 0 ustar 00 <?php
/**
* Microsoft's proprietary filter: CSS property
* @note Currently supports the alpha filter. In the future, this will
* probably need an extensible framework
*/
class HTMLPurifier_AttrDef_CSS_Filter extends HTMLPurifier_AttrDef
{
/**
* @type HTMLPurifier_AttrDef_Integer
*/
protected $intValidator;
public function __construct()
{
$this->intValidator = new HTMLPurifier_AttrDef_Integer();
}
/**
* @param string $value
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($value, $config, $context)
{
$value = $this->parseCDATA($value);
if ($value === 'none') {
return $value;
}
// if we looped this we could support multiple filters
$function_length = strcspn($value, '(');
$function = trim(substr($value, 0, $function_length));
if ($function !== 'alpha' &&
$function !== 'Alpha' &&
$function !== 'progid:DXImageTransform.Microsoft.Alpha'
) {
return false;
}
$cursor = $function_length + 1;
$parameters_length = strcspn($value, ')', $cursor);
$parameters = substr($value, $cursor, $parameters_length);
$params = explode(',', $parameters);
$ret_params = array();
$lookup = array();
foreach ($params as $param) {
list($key, $value) = explode('=', $param);
$key = trim($key);
$value = trim($value);
if (isset($lookup[$key])) {
continue;
}
if ($key !== 'opacity') {
continue;
}
$value = $this->intValidator->validate($value, $config, $context);
if ($value === false) {
continue;
}
$int = (int)$value;
if ($int > 100) {
$value = '100';
}
if ($int < 0) {
$value = '0';
}
$ret_params[] = "$key=$value";
$lookup[$key] = true;
}
$ret_parameters = implode(',', $ret_params);
$ret_function = "$function($ret_parameters)";
return $ret_function;
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php 0000644 00000010106 15121423110 0021456 0 ustar 00 <?php
/* W3C says:
[ // adjective and number must be in correct order, even if
// you could switch them without introducing ambiguity.
// some browsers support that syntax
[
<percentage> | <length> | left | center | right
]
[
<percentage> | <length> | top | center | bottom
]?
] |
[ // this signifies that the vertical and horizontal adjectives
// can be arbitrarily ordered, however, there can only be two,
// one of each, or none at all
[
left | center | right
] ||
[
top | center | bottom
]
]
top, left = 0%
center, (none) = 50%
bottom, right = 100%
*/
/* QuirksMode says:
keyword + length/percentage must be ordered correctly, as per W3C
Internet Explorer and Opera, however, support arbitrary ordering. We
should fix it up.
Minor issue though, not strictly necessary.
*/
// control freaks may appreciate the ability to convert these to
// percentages or something, but it's not necessary
/**
* Validates the value of background-position.
*/
class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
{
/**
* @type HTMLPurifier_AttrDef_CSS_Length
*/
protected $length;
/**
* @type HTMLPurifier_AttrDef_CSS_Percentage
*/
protected $percentage;
public function __construct()
{
$this->length = new HTMLPurifier_AttrDef_CSS_Length();
$this->percentage = new HTMLPurifier_AttrDef_CSS_Percentage();
}
/**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
$string = $this->parseCDATA($string);
$bits = explode(' ', $string);
$keywords = array();
$keywords['h'] = false; // left, right
$keywords['v'] = false; // top, bottom
$keywords['ch'] = false; // center (first word)
$keywords['cv'] = false; // center (second word)
$measures = array();
$i = 0;
$lookup = array(
'top' => 'v',
'bottom' => 'v',
'left' => 'h',
'right' => 'h',
'center' => 'c'
);
foreach ($bits as $bit) {
if ($bit === '') {
continue;
}
// test for keyword
$lbit = ctype_lower($bit) ? $bit : strtolower($bit);
if (isset($lookup[$lbit])) {
$status = $lookup[$lbit];
if ($status == 'c') {
if ($i == 0) {
$status = 'ch';
} else {
$status = 'cv';
}
}
$keywords[$status] = $lbit;
$i++;
}
// test for length
$r = $this->length->validate($bit, $config, $context);
if ($r !== false) {
$measures[] = $r;
$i++;
}
// test for percentage
$r = $this->percentage->validate($bit, $config, $context);
if ($r !== false) {
$measures[] = $r;
$i++;
}
}
if (!$i) {
return false;
} // no valid values were caught
$ret = array();
// first keyword
if ($keywords['h']) {
$ret[] = $keywords['h'];
} elseif ($keywords['ch']) {
$ret[] = $keywords['ch'];
$keywords['cv'] = false; // prevent re-use: center = center center
} elseif (count($measures)) {
$ret[] = array_shift($measures);
}
if ($keywords['v']) {
$ret[] = $keywords['v'];
} elseif ($keywords['cv']) {
$ret[] = $keywords['cv'];
} elseif (count($measures)) {
$ret[] = array_shift($measures);
}
if (empty($ret)) {
return false;
}
return implode(' ', $ret);
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Background.php 0000644 00000006210 15121423110 0017732 0 ustar 00 <?php
/**
* Validates shorthand CSS property background.
* @warning Does not support url tokens that have internal spaces.
*/
class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef
{
/**
* Local copy of component validators.
* @type HTMLPurifier_AttrDef[]
* @note See HTMLPurifier_AttrDef_Font::$info for a similar impl.
*/
protected $info;
/**
* @param HTMLPurifier_Config $config
*/
public function __construct($config)
{
$def = $config->getCSSDefinition();
$this->info['background-color'] = $def->info['background-color'];
$this->info['background-image'] = $def->info['background-image'];
$this->info['background-repeat'] = $def->info['background-repeat'];
$this->info['background-attachment'] = $def->info['background-attachment'];
$this->info['background-position'] = $def->info['background-position'];
}
/**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
// regular pre-processing
$string = $this->parseCDATA($string);
if ($string === '') {
return false;
}
// munge rgb() decl if necessary
$string = $this->mungeRgb($string);
// assumes URI doesn't have spaces in it
$bits = explode(' ', $string); // bits to process
$caught = array();
$caught['color'] = false;
$caught['image'] = false;
$caught['repeat'] = false;
$caught['attachment'] = false;
$caught['position'] = false;
$i = 0; // number of catches
foreach ($bits as $bit) {
if ($bit === '') {
continue;
}
foreach ($caught as $key => $status) {
if ($key != 'position') {
if ($status !== false) {
continue;
}
$r = $this->info['background-' . $key]->validate($bit, $config, $context);
} else {
$r = $bit;
}
if ($r === false) {
continue;
}
if ($key == 'position') {
if ($caught[$key] === false) {
$caught[$key] = '';
}
$caught[$key] .= $r . ' ';
} else {
$caught[$key] = $r;
}
$i++;
break;
}
}
if (!$i) {
return false;
}
if ($caught['position'] !== false) {
$caught['position'] = $this->info['background-position']->
validate($caught['position'], $config, $context);
}
$ret = array();
foreach ($caught as $value) {
if ($value === false) {
continue;
}
$ret[] = $value;
}
if (empty($ret)) {
return false;
}
return implode(' ', $ret);
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/CSS/FontFamily.php 0000644 00000022301 15121423110 0017722 0 ustar 00 <?php
/**
* Validates a font family list according to CSS spec
*/
class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
{
protected $mask = null;
public function __construct()
{
$this->mask = '_- ';
for ($c = 'a'; $c <= 'z'; $c++) {
$this->mask .= $c;
}
for ($c = 'A'; $c <= 'Z'; $c++) {
$this->mask .= $c;
}
for ($c = '0'; $c <= '9'; $c++) {
$this->mask .= $c;
} // cast-y, but should be fine
// special bytes used by UTF-8
for ($i = 0x80; $i <= 0xFF; $i++) {
// We don't bother excluding invalid bytes in this range,
// because the our restriction of well-formed UTF-8 will
// prevent these from ever occurring.
$this->mask .= chr($i);
}
/*
PHP's internal strcspn implementation is
O(length of string * length of mask), making it inefficient
for large masks. However, it's still faster than
preg_match 8)
for (p = s1;;) {
spanp = s2;
do {
if (*spanp == c || p == s1_end) {
return p - s1;
}
} while (spanp++ < (s2_end - 1));
c = *++p;
}
*/
// possible optimization: invert the mask.
}
/**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
static $generic_names = array(
'serif' => true,
'sans-serif' => true,
'monospace' => true,
'fantasy' => true,
'cursive' => true
);
$allowed_fonts = $config->get('CSS.AllowedFonts');
// assume that no font names contain commas in them
$fonts = explode(',', $string);
$final = '';
foreach ($fonts as $font) {
$font = trim($font);
if ($font === '') {
continue;
}
// match a generic name
if (isset($generic_names[$font])) {
if ($allowed_fonts === null || isset($allowed_fonts[$font])) {
$final .= $font . ', ';
}
continue;
}
// match a quoted name
if ($font[0] === '"' || $font[0] === "'") {
$length = strlen($font);
if ($length <= 2) {
continue;
}
$quote = $font[0];
if ($font[$length - 1] !== $quote) {
continue;
}
$font = substr($font, 1, $length - 2);
}
$font = $this->expandCSSEscape($font);
// $font is a pure representation of the font name
if ($allowed_fonts !== null && !isset($allowed_fonts[$font])) {
continue;
}
if (ctype_alnum($font) && $font !== '') {
// very simple font, allow it in unharmed
$final .= $font . ', ';
continue;
}
// bugger out on whitespace. form feed (0C) really
// shouldn't show up regardless
$font = str_replace(array("\n", "\t", "\r", "\x0C"), ' ', $font);
// Here, there are various classes of characters which need
// to be treated differently:
// - Alphanumeric characters are essentially safe. We
// handled these above.
// - Spaces require quoting, though most parsers will do
// the right thing if there aren't any characters that
// can be misinterpreted
// - Dashes rarely occur, but they fairly unproblematic
// for parsing/rendering purposes.
// The above characters cover the majority of Western font
// names.
// - Arbitrary Unicode characters not in ASCII. Because
// most parsers give little thought to Unicode, treatment
// of these codepoints is basically uniform, even for
// punctuation-like codepoints. These characters can
// show up in non-Western pages and are supported by most
// major browsers, for example: "MS 明朝" is a
// legitimate font-name
// <http://ja.wikipedia.org/wiki/MS_明朝>. See
// the CSS3 spec for more examples:
// <http://www.w3.org/TR/2011/WD-css3-fonts-20110324/localizedfamilynames.png>
// You can see live samples of these on the Internet:
// <http://www.google.co.jp/search?q=font-family+MS+明朝|ゴシック>
// However, most of these fonts have ASCII equivalents:
// for example, 'MS Mincho', and it's considered
// professional to use ASCII font names instead of
// Unicode font names. Thanks Takeshi Terada for
// providing this information.
// The following characters, to my knowledge, have not been
// used to name font names.
// - Single quote. While theoretically you might find a
// font name that has a single quote in its name (serving
// as an apostrophe, e.g. Dave's Scribble), I haven't
// been able to find any actual examples of this.
// Internet Explorer's cssText translation (which I
// believe is invoked by innerHTML) normalizes any
// quoting to single quotes, and fails to escape single
// quotes. (Note that this is not IE's behavior for all
// CSS properties, just some sort of special casing for
// font-family). So a single quote *cannot* be used
// safely in the font-family context if there will be an
// innerHTML/cssText translation. Note that Firefox 3.x
// does this too.
// - Double quote. In IE, these get normalized to
// single-quotes, no matter what the encoding. (Fun
// fact, in IE8, the 'content' CSS property gained
// support, where they special cased to preserve encoded
// double quotes, but still translate unadorned double
// quotes into single quotes.) So, because their
// fixpoint behavior is identical to single quotes, they
// cannot be allowed either. Firefox 3.x displays
// single-quote style behavior.
// - Backslashes are reduced by one (so \\ -> \) every
// iteration, so they cannot be used safely. This shows
// up in IE7, IE8 and FF3
// - Semicolons, commas and backticks are handled properly.
// - The rest of the ASCII punctuation is handled properly.
// We haven't checked what browsers do to unadorned
// versions, but this is not important as long as the
// browser doesn't /remove/ surrounding quotes (as IE does
// for HTML).
//
// With these results in hand, we conclude that there are
// various levels of safety:
// - Paranoid: alphanumeric, spaces and dashes(?)
// - International: Paranoid + non-ASCII Unicode
// - Edgy: Everything except quotes, backslashes
// - NoJS: Standards compliance, e.g. sod IE. Note that
// with some judicious character escaping (since certain
// types of escaping doesn't work) this is theoretically
// OK as long as innerHTML/cssText is not called.
// We believe that international is a reasonable default
// (that we will implement now), and once we do more
// extensive research, we may feel comfortable with dropping
// it down to edgy.
// Edgy: alphanumeric, spaces, dashes, underscores and Unicode. Use of
// str(c)spn assumes that the string was already well formed
// Unicode (which of course it is).
if (strspn($font, $this->mask) !== strlen($font)) {
continue;
}
// Historical:
// In the absence of innerHTML/cssText, these ugly
// transforms don't pose a security risk (as \\ and \"
// might--these escapes are not supported by most browsers).
// We could try to be clever and use single-quote wrapping
// when there is a double quote present, but I have choosen
// not to implement that. (NOTE: you can reduce the amount
// of escapes by one depending on what quoting style you use)
// $font = str_replace('\\', '\\5C ', $font);
// $font = str_replace('"', '\\22 ', $font);
// $font = str_replace("'", '\\27 ', $font);
// font possibly with spaces, requires quoting
$final .= "'$font', ";
}
$final = rtrim($final, ', ');
if ($final === '') {
return false;
}
return $final;
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Length.php 0000644 00000003551 15121423110 0017101 0 ustar 00 <?php
/**
* Represents a Length as defined by CSS.
*/
class HTMLPurifier_AttrDef_CSS_Length extends HTMLPurifier_AttrDef
{
/**
* @type HTMLPurifier_Length|string
*/
protected $min;
/**
* @type HTMLPurifier_Length|string
*/
protected $max;
/**
* @param HTMLPurifier_Length|string $min Minimum length, or null for no bound. String is also acceptable.
* @param HTMLPurifier_Length|string $max Maximum length, or null for no bound. String is also acceptable.
*/
public function __construct($min = null, $max = null)
{
$this->min = $min !== null ? HTMLPurifier_Length::make($min) : null;
$this->max = $max !== null ? HTMLPurifier_Length::make($max) : null;
}
/**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
$string = $this->parseCDATA($string);
// Optimizations
if ($string === '') {
return false;
}
if ($string === '0') {
return '0';
}
if (strlen($string) === 1) {
return false;
}
$length = HTMLPurifier_Length::make($string);
if (!$length->isValid()) {
return false;
}
if ($this->min) {
$c = $length->compareTo($this->min);
if ($c === false) {
return false;
}
if ($c < 0) {
return false;
}
}
if ($this->max) {
$c = $length->compareTo($this->max);
if ($c === false) {
return false;
}
if ($c > 0) {
return false;
}
}
return $length->toString();
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php 0000644 00000002063 15121423110 0021731 0 ustar 00 <?php
/**
* Decorator which enables CSS properties to be disabled for specific elements.
*/
class HTMLPurifier_AttrDef_CSS_DenyElementDecorator extends HTMLPurifier_AttrDef
{
/**
* @type HTMLPurifier_AttrDef
*/
public $def;
/**
* @type string
*/
public $element;
/**
* @param HTMLPurifier_AttrDef $def Definition to wrap
* @param string $element Element to deny
*/
public function __construct($def, $element)
{
$this->def = $def;
$this->element = $element;
}
/**
* Checks if CurrentToken is set and equal to $this->element
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
$token = $context->get('CurrentToken', true);
if ($token && $token->name == $this->element) {
return false;
}
return $this->def->validate($string, $config, $context);
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/CSS/TextDecoration.php 0000644 00000002204 15121423110 0020606 0 ustar 00 <?php
/**
* Validates the value for the CSS property text-decoration
* @note This class could be generalized into a version that acts sort of
* like Enum except you can compound the allowed values.
*/
class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef
{
/**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
static $allowed_values = array(
'line-through' => true,
'overline' => true,
'underline' => true,
);
$string = strtolower($this->parseCDATA($string));
if ($string === 'none') {
return $string;
}
$parts = explode(' ', $string);
$final = '';
foreach ($parts as $part) {
if (isset($allowed_values[$part])) {
$final .= $part . ' ';
}
}
$final = rtrim($final);
if ($final === '') {
return false;
}
return $final;
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Multiple.php 0000644 00000004054 15121423110 0017452 0 ustar 00 <?php
/**
* Framework class for strings that involve multiple values.
*
* Certain CSS properties such as border-width and margin allow multiple
* lengths to be specified. This class can take a vanilla border-width
* definition and multiply it, usually into a max of four.
*
* @note Even though the CSS specification isn't clear about it, inherit
* can only be used alone: it will never manifest as part of a multi
* shorthand declaration. Thus, this class does not allow inherit.
*/
class HTMLPurifier_AttrDef_CSS_Multiple extends HTMLPurifier_AttrDef
{
/**
* Instance of component definition to defer validation to.
* @type HTMLPurifier_AttrDef
* @todo Make protected
*/
public $single;
/**
* Max number of values allowed.
* @todo Make protected
*/
public $max;
/**
* @param HTMLPurifier_AttrDef $single HTMLPurifier_AttrDef to multiply
* @param int $max Max number of values allowed (usually four)
*/
public function __construct($single, $max = 4)
{
$this->single = $single;
$this->max = $max;
}
/**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
$string = $this->mungeRgb($this->parseCDATA($string));
if ($string === '') {
return false;
}
$parts = explode(' ', $string); // parseCDATA replaced \r, \t and \n
$length = count($parts);
$final = '';
for ($i = 0, $num = 0; $i < $length && $num < $this->max; $i++) {
if (ctype_space($parts[$i])) {
continue;
}
$result = $this->single->validate($parts[$i], $config, $context);
if ($result !== false) {
$final .= $result . ' ';
$num++;
}
}
if ($final === '') {
return false;
}
return rtrim($final);
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/Lang.php 0000644 00000004604 15121423110 0016111 0 ustar 00 <?php
/**
* Validates the HTML attribute lang, effectively a language code.
* @note Built according to RFC 3066, which obsoleted RFC 1766
*/
class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
{
/**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
$string = trim($string);
if (!$string) {
return false;
}
$subtags = explode('-', $string);
$num_subtags = count($subtags);
if ($num_subtags == 0) { // sanity check
return false;
}
// process primary subtag : $subtags[0]
$length = strlen($subtags[0]);
switch ($length) {
case 0:
return false;
case 1:
if (!($subtags[0] == 'x' || $subtags[0] == 'i')) {
return false;
}
break;
case 2:
case 3:
if (!ctype_alpha($subtags[0])) {
return false;
} elseif (!ctype_lower($subtags[0])) {
$subtags[0] = strtolower($subtags[0]);
}
break;
default:
return false;
}
$new_string = $subtags[0];
if ($num_subtags == 1) {
return $new_string;
}
// process second subtag : $subtags[1]
$length = strlen($subtags[1]);
if ($length == 0 || ($length == 1 && $subtags[1] != 'x') || $length > 8 || !ctype_alnum($subtags[1])) {
return $new_string;
}
if (!ctype_lower($subtags[1])) {
$subtags[1] = strtolower($subtags[1]);
}
$new_string .= '-' . $subtags[1];
if ($num_subtags == 2) {
return $new_string;
}
// process all other subtags, index 2 and up
for ($i = 2; $i < $num_subtags; $i++) {
$length = strlen($subtags[$i]);
if ($length == 0 || $length > 8 || !ctype_alnum($subtags[$i])) {
return $new_string;
}
if (!ctype_lower($subtags[$i])) {
$subtags[$i] = strtolower($subtags[$i]);
}
$new_string .= '-' . $subtags[$i];
}
return $new_string;
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/AttrDef/Text.php 0000644 00000000656 15121423110 0016157 0 ustar 00 <?php
/**
* Validates arbitrary text according to the HTML spec.
*/
class HTMLPurifier_AttrDef_Text extends HTMLPurifier_AttrDef
{
/**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
return $this->parseCDATA($string);
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/ConfigSchema/Interchange/Directive.php 0000644 00000003661 15121423110 0022374 0 ustar 00 <?php
/**
* Interchange component class describing configuration directives.
*/
class HTMLPurifier_ConfigSchema_Interchange_Directive
{
/**
* ID of directive.
* @type HTMLPurifier_ConfigSchema_Interchange_Id
*/
public $id;
/**
* Type, e.g. 'integer' or 'istring'.
* @type string
*/
public $type;
/**
* Default value, e.g. 3 or 'DefaultVal'.
* @type mixed
*/
public $default;
/**
* HTML description.
* @type string
*/
public $description;
/**
* Whether or not null is allowed as a value.
* @type bool
*/
public $typeAllowsNull = false;
/**
* Lookup table of allowed scalar values.
* e.g. array('allowed' => true).
* Null if all values are allowed.
* @type array
*/
public $allowed;
/**
* List of aliases for the directive.
* e.g. array(new HTMLPurifier_ConfigSchema_Interchange_Id('Ns', 'Dir'))).
* @type HTMLPurifier_ConfigSchema_Interchange_Id[]
*/
public $aliases = array();
/**
* Hash of value aliases, e.g. array('alt' => 'real'). Null if value
* aliasing is disabled (necessary for non-scalar types).
* @type array
*/
public $valueAliases;
/**
* Version of HTML Purifier the directive was introduced, e.g. '1.3.1'.
* Null if the directive has always existed.
* @type string
*/
public $version;
/**
* ID of directive that supercedes this old directive.
* Null if not deprecated.
* @type HTMLPurifier_ConfigSchema_Interchange_Id
*/
public $deprecatedUse;
/**
* Version of HTML Purifier this directive was deprecated. Null if not
* deprecated.
* @type string
*/
public $deprecatedVersion;
/**
* List of external projects this directive depends on, e.g. array('CSSTidy').
* @type array
*/
public $external = array();
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/ConfigSchema/Interchange/Id.php 0000644 00000002061 15121423110 0021003 0 ustar 00 <?php
/**
* Represents a directive ID in the interchange format.
*/
class HTMLPurifier_ConfigSchema_Interchange_Id
{
/**
* @type string
*/
public $key;
/**
* @param string $key
*/
public function __construct($key)
{
$this->key = $key;
}
/**
* @return string
* @warning This is NOT magic, to ensure that people don't abuse SPL and
* cause problems for PHP 5.0 support.
*/
public function toString()
{
return $this->key;
}
/**
* @return string
*/
public function getRootNamespace()
{
return substr($this->key, 0, strpos($this->key, "."));
}
/**
* @return string
*/
public function getDirective()
{
return substr($this->key, strpos($this->key, ".") + 1);
}
/**
* @param string $id
* @return HTMLPurifier_ConfigSchema_Interchange_Id
*/
public static function make($id)
{
return new HTMLPurifier_ConfigSchema_Interchange_Id($id);
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/ConfigSchema/Interchange.php 0000644 00000002402 15121423110 0020446 0 ustar 00 <?php
/**
* Generic schema interchange format that can be converted to a runtime
* representation (HTMLPurifier_ConfigSchema) or HTML documentation. Members
* are completely validated.
*/
class HTMLPurifier_ConfigSchema_Interchange
{
/**
* Name of the application this schema is describing.
* @type string
*/
public $name;
/**
* Array of Directive ID => array(directive info)
* @type HTMLPurifier_ConfigSchema_Interchange_Directive[]
*/
public $directives = array();
/**
* Adds a directive array to $directives
* @param HTMLPurifier_ConfigSchema_Interchange_Directive $directive
* @throws HTMLPurifier_ConfigSchema_Exception
*/
public function addDirective($directive)
{
if (isset($this->directives[$i = $directive->id->toString()])) {
throw new HTMLPurifier_ConfigSchema_Exception("Cannot redefine directive '$i'");
}
$this->directives[$i] = $directive;
}
/**
* Convenience function to perform standard validation. Throws exception
* on failed validation.
*/
public function validate()
{
$validator = new HTMLPurifier_ConfigSchema_Validator();
return $validator->validate($this);
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/ConfigSchema/Builder/Xml.php 0000644 00000010424 15121423110 0020350 0 ustar 00 <?php
/**
* Converts HTMLPurifier_ConfigSchema_Interchange to an XML format,
* which can be further processed to generate documentation.
*/
class HTMLPurifier_ConfigSchema_Builder_Xml extends XMLWriter
{
/**
* @type HTMLPurifier_ConfigSchema_Interchange
*/
protected $interchange;
/**
* @type string
*/
private $namespace;
/**
* @param string $html
*/
protected function writeHTMLDiv($html)
{
$this->startElement('div');
$purifier = HTMLPurifier::getInstance();
$html = $purifier->purify($html);
$this->writeAttribute('xmlns', 'http://www.w3.org/1999/xhtml');
$this->writeRaw($html);
$this->endElement(); // div
}
/**
* @param mixed $var
* @return string
*/
protected function export($var)
{
if ($var === array()) {
return 'array()';
}
return var_export($var, true);
}
/**
* @param HTMLPurifier_ConfigSchema_Interchange $interchange
*/
public function build($interchange)
{
// global access, only use as last resort
$this->interchange = $interchange;
$this->setIndent(true);
$this->startDocument('1.0', 'UTF-8');
$this->startElement('configdoc');
$this->writeElement('title', $interchange->name);
foreach ($interchange->directives as $directive) {
$this->buildDirective($directive);
}
if ($this->namespace) {
$this->endElement();
} // namespace
$this->endElement(); // configdoc
$this->flush();
}
/**
* @param HTMLPurifier_ConfigSchema_Interchange_Directive $directive
*/
public function buildDirective($directive)
{
// Kludge, although I suppose having a notion of a "root namespace"
// certainly makes things look nicer when documentation is built.
// Depends on things being sorted.
if (!$this->namespace || $this->namespace !== $directive->id->getRootNamespace()) {
if ($this->namespace) {
$this->endElement();
} // namespace
$this->namespace = $directive->id->getRootNamespace();
$this->startElement('namespace');
$this->writeAttribute('id', $this->namespace);
$this->writeElement('name', $this->namespace);
}
$this->startElement('directive');
$this->writeAttribute('id', $directive->id->toString());
$this->writeElement('name', $directive->id->getDirective());
$this->startElement('aliases');
foreach ($directive->aliases as $alias) {
$this->writeElement('alias', $alias->toString());
}
$this->endElement(); // aliases
$this->startElement('constraints');
if ($directive->version) {
$this->writeElement('version', $directive->version);
}
$this->startElement('type');
if ($directive->typeAllowsNull) {
$this->writeAttribute('allow-null', 'yes');
}
$this->text($directive->type);
$this->endElement(); // type
if ($directive->allowed) {
$this->startElement('allowed');
foreach ($directive->allowed as $value => $x) {
$this->writeElement('value', $value);
}
$this->endElement(); // allowed
}
$this->writeElement('default', $this->export($directive->default));
$this->writeAttribute('xml:space', 'preserve');
if ($directive->external) {
$this->startElement('external');
foreach ($directive->external as $project) {
$this->writeElement('project', $project);
}
$this->endElement();
}
$this->endElement(); // constraints
if ($directive->deprecatedVersion) {
$this->startElement('deprecated');
$this->writeElement('version', $directive->deprecatedVersion);
$this->writeElement('use', $directive->deprecatedUse->toString());
$this->endElement(); // deprecated
}
$this->startElement('description');
$this->writeHTMLDiv($directive->description);
$this->endElement(); // description
$this->endElement(); // directive
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/ConfigSchema/Builder/ConfigSchema.php 0000644 00000002375 15121423110 0022144 0 ustar 00 <?php
/**
* Converts HTMLPurifier_ConfigSchema_Interchange to our runtime
* representation used to perform checks on user configuration.
*/
class HTMLPurifier_ConfigSchema_Builder_ConfigSchema
{
/**
* @param HTMLPurifier_ConfigSchema_Interchange $interchange
* @return HTMLPurifier_ConfigSchema
*/
public function build($interchange)
{
$schema = new HTMLPurifier_ConfigSchema();
foreach ($interchange->directives as $d) {
$schema->add(
$d->id->key,
$d->default,
$d->type,
$d->typeAllowsNull
);
if ($d->allowed !== null) {
$schema->addAllowedValues(
$d->id->key,
$d->allowed
);
}
foreach ($d->aliases as $alias) {
$schema->addAlias(
$alias->key,
$d->id->key
);
}
if ($d->valueAliases !== null) {
$schema->addValueAliases(
$d->id->key,
$d->valueAliases
);
}
}
$schema->postProcess();
return $schema;
}
}
// vim: et sw=4 sts=4
htmlpurifier/library/HTMLPurifier/ConfigSchema/schema.ser 0000644 00000057176 15121423110 0017503 0 ustar 00 O:25:"HTMLPurifier_ConfigSchema":3:{s:8:"defaults";a:127:{s:19:"Attr.AllowedClasses";N;s:24:"Attr.AllowedFrameTargets";a:0:{}s:15:"Attr.AllowedRel";a:0:{}s:15:"Attr.AllowedRev";a:0:{}s:18:"Attr.ClassUseCDATA";N;s:20:"Attr.DefaultImageAlt";N;s:24:"Attr.DefaultInvalidImage";s:0:"";s:27:"Attr.DefaultInvalidImageAlt";s:13:"Invalid image";s:19:"Attr.DefaultTextDir";s:3:"ltr";s:13:"Attr.EnableID";b:0;s:21:"Attr.ForbiddenClasses";a:0:{}s:13:"Attr.ID.HTML5";N;s:16:"Attr.IDBlacklist";a:0:{}s:22:"Attr.IDBlacklistRegexp";N;s:13:"Attr.IDPrefix";s:0:"";s:18:"Attr.IDPrefixLocal";s:0:"";s:24:"AutoFormat.AutoParagraph";b:0;s:17:"AutoFormat.Custom";a:0:{}s:25:"AutoFormat.DisplayLinkURI";b:0;s:18:"AutoFormat.Linkify";b:0;s:33:"AutoFormat.PurifierLinkify.DocURL";s:3:"#%s";s:26:"AutoFormat.PurifierLinkify";b:0;s:32:"AutoFormat.RemoveEmpty.Predicate";a:4:{s:8:"colgroup";a:0:{}s:2:"th";a:0:{}s:2:"td";a:0:{}s:6:"iframe";a:1:{i:0;s:3:"src";}}s:44:"AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions";a:2:{s:2:"td";b:1;s:2:"th";b:1;}s:33:"AutoFormat.RemoveEmpty.RemoveNbsp";b:0;s:22:"AutoFormat.RemoveEmpty";b:0;s:39:"AutoFormat.RemoveSpansWithoutAttributes";b:0;s:19:"CSS.AllowDuplicates";b:0;s:18:"CSS.AllowImportant";b:0;s:15:"CSS.AllowTricky";b:0;s:16:"CSS.AllowedFonts";N;s:21:"CSS.AllowedProperties";N;s:17:"CSS.DefinitionRev";i:1;s:23:"CSS.ForbiddenProperties";a:0:{}s:16:"CSS.MaxImgLength";s:6:"1200px";s:15:"CSS.Proprietary";b:0;s:11:"CSS.Trusted";b:0;s:20:"Cache.DefinitionImpl";s:10:"Serializer";s:20:"Cache.SerializerPath";N;s:27:"Cache.SerializerPermissions";i:493;s:22:"Core.AggressivelyFixLt";b:1;s:29:"Core.AggressivelyRemoveScript";b:1;s:28:"Core.AllowHostnameUnderscore";b:0;s:23:"Core.AllowParseManyTags";b:0;s:18:"Core.CollectErrors";b:0;s:18:"Core.ColorKeywords";a:148:{s:9:"aliceblue";s:7:"#F0F8FF";s:12:"antiquewhite";s:7:"#FAEBD7";s:4:"aqua";s:7:"#00FFFF";s:10:"aquamarine";s:7:"#7FFFD4";s:5:"azure";s:7:"#F0FFFF";s:5:"beige";s:7:"#F5F5DC";s:6:"bisque";s:7:"#FFE4C4";s:5:"black";s:7:"#000000";s:14:"blanchedalmond";s:7:"#FFEBCD";s:4:"blue";s:7:"#0000FF";s:10:"blueviolet";s:7:"#8A2BE2";s:5:"brown";s:7:"#A52A2A";s:9:"burlywood";s:7:"#DEB887";s:9:"cadetblue";s:7:"#5F9EA0";s:10:"chartreuse";s:7:"#7FFF00";s:9:"chocolate";s:7:"#D2691E";s:5:"coral";s:7:"#FF7F50";s:14:"cornflowerblue";s:7:"#6495ED";s:8:"cornsilk";s:7:"#FFF8DC";s:7:"crimson";s:7:"#DC143C";s:4:"cyan";s:7:"#00FFFF";s:8:"darkblue";s:7:"#00008B";s:8:"darkcyan";s:7:"#008B8B";s:13:"darkgoldenrod";s:7:"#B8860B";s:8:"darkgray";s:7:"#A9A9A9";s:8:"darkgrey";s:7:"#A9A9A9";s:9:"darkgreen";s:7:"#006400";s:9:"darkkhaki";s:7:"#BDB76B";s:11:"darkmagenta";s:7:"#8B008B";s:14:"darkolivegreen";s:7:"#556B2F";s:10:"darkorange";s:7:"#FF8C00";s:10:"darkorchid";s:7:"#9932CC";s:7:"darkred";s:7:"#8B0000";s:10:"darksalmon";s:7:"#E9967A";s:12:"darkseagreen";s:7:"#8FBC8F";s:13:"darkslateblue";s:7:"#483D8B";s:13:"darkslategray";s:7:"#2F4F4F";s:13:"darkslategrey";s:7:"#2F4F4F";s:13:"darkturquoise";s:7:"#00CED1";s:10:"darkviolet";s:7:"#9400D3";s:8:"deeppink";s:7:"#FF1493";s:11:"deepskyblue";s:7:"#00BFFF";s:7:"dimgray";s:7:"#696969";s:7:"dimgrey";s:7:"#696969";s:10:"dodgerblue";s:7:"#1E90FF";s:9:"firebrick";s:7:"#B22222";s:11:"floralwhite";s:7:"#FFFAF0";s:11:"forestgreen";s:7:"#228B22";s:7:"fuchsia";s:7:"#FF00FF";s:9:"gainsboro";s:7:"#DCDCDC";s:10:"ghostwhite";s:7:"#F8F8FF";s:4:"gold";s:7:"#FFD700";s:9:"goldenrod";s:7:"#DAA520";s:4:"gray";s:7:"#808080";s:4:"grey";s:7:"#808080";s:5:"green";s:7:"#008000";s:11:"greenyellow";s:7:"#ADFF2F";s:8:"honeydew";s:7:"#F0FFF0";s:7:"hotpink";s:7:"#FF69B4";s:9:"indianred";s:7:"#CD5C5C";s:6:"indigo";s:7:"#4B0082";s:5:"ivory";s:7:"#FFFFF0";s:5:"khaki";s:7:"#F0E68C";s:8:"lavender";s:7:"#E6E6FA";s:13:"lavenderblush";s:7:"#FFF0F5";s:9:"lawngreen";s:7:"#7CFC00";s:12:"lemonchiffon";s:7:"#FFFACD";s:9:"lightblue";s:7:"#ADD8E6";s:10:"lightcoral";s:7:"#F08080";s:9:"lightcyan";s:7:"#E0FFFF";s:20:"lightgoldenrodyellow";s:7:"#FAFAD2";s:9:"lightgray";s:7:"#D3D3D3";s:9:"lightgrey";s:7:"#D3D3D3";s:10:"lightgreen";s:7:"#90EE90";s:9:"lightpink";s:7:"#FFB6C1";s:11:"lightsalmon";s:7:"#FFA07A";s:13:"lightseagreen";s:7:"#20B2AA";s:12:"lightskyblue";s:7:"#87CEFA";s:14:"lightslategray";s:7:"#778899";s:14:"lightslategrey";s:7:"#778899";s:14:"lightsteelblue";s:7:"#B0C4DE";s:11:"lightyellow";s:7:"#FFFFE0";s:4:"lime";s:7:"#00FF00";s:9:"limegreen";s:7:"#32CD32";s:5:"linen";s:7:"#FAF0E6";s:7:"magenta";s:7:"#FF00FF";s:6:"maroon";s:7:"#800000";s:16:"mediumaquamarine";s:7:"#66CDAA";s:10:"mediumblue";s:7:"#0000CD";s:12:"mediumorchid";s:7:"#BA55D3";s:12:"mediumpurple";s:7:"#9370DB";s:14:"mediumseagreen";s:7:"#3CB371";s:15:"mediumslateblue";s:7:"#7B68EE";s:17:"mediumspringgreen";s:7:"#00FA9A";s:15:"mediumturquoise";s:7:"#48D1CC";s:15:"mediumvioletred";s:7:"#C71585";s:12:"midnightblue";s:7:"#191970";s:9:"mintcream";s:7:"#F5FFFA";s:9:"mistyrose";s:7:"#FFE4E1";s:8:"moccasin";s:7:"#FFE4B5";s:11:"navajowhite";s:7:"#FFDEAD";s:4:"navy";s:7:"#000080";s:7:"oldlace";s:7:"#FDF5E6";s:5:"olive";s:7:"#808000";s:9:"olivedrab";s:7:"#6B8E23";s:6:"orange";s:7:"#FFA500";s:9:"orangered";s:7:"#FF4500";s:6:"orchid";s:7:"#DA70D6";s:13:"palegoldenrod";s:7:"#EEE8AA";s:9:"palegreen";s:7:"#98FB98";s:13:"paleturquoise";s:7:"#AFEEEE";s:13:"palevioletred";s:7:"#DB7093";s:10:"papayawhip";s:7:"#FFEFD5";s:9:"peachpuff";s:7:"#FFDAB9";s:4:"peru";s:7:"#CD853F";s:4:"pink";s:7:"#FFC0CB";s:4:"plum";s:7:"#DDA0DD";s:10:"powderblue";s:7:"#B0E0E6";s:6:"purple";s:7:"#800080";s:13:"rebeccapurple";s:7:"#663399";s:3:"red";s:7:"#FF0000";s:9:"rosybrown";s:7:"#BC8F8F";s:9:"royalblue";s:7:"#4169E1";s:11:"saddlebrown";s:7:"#8B4513";s:6:"salmon";s:7:"#FA8072";s:10:"sandybrown";s:7:"#F4A460";s:8:"seagreen";s:7:"#2E8B57";s:8:"seashell";s:7:"#FFF5EE";s:6:"sienna";s:7:"#A0522D";s:6:"silver";s:7:"#C0C0C0";s:7:"skyblue";s:7:"#87CEEB";s:9:"slateblue";s:7:"#6A5ACD";s:9:"slategray";s:7:"#708090";s:9:"slategrey";s:7:"#708090";s:4:"snow";s:7:"#FFFAFA";s:11:"springgreen";s:7:"#00FF7F";s:9:"steelblue";s:7:"#4682B4";s:3:"tan";s:7:"#D2B48C";s:4:"teal";s:7:"#008080";s:7:"thistle";s:7:"#D8BFD8";s:6:"tomato";s:7:"#FF6347";s:9:"turquoise";s:7:"#40E0D0";s:6:"violet";s:7:"#EE82EE";s:5:"wheat";s:7:"#F5DEB3";s:5:"white";s:7:"#FFFFFF";s:10:"whitesmoke";s:7:"#F5F5F5";s:6:"yellow";s:7:"#FFFF00";s:11:"yellowgreen";s:7:"#9ACD32";}s:30:"Core.ConvertDocumentToFragment";b:1;s:36:"Core.DirectLexLineNumberSyncInterval";i:0;s:20:"Core.DisableExcludes";b:0;s:15:"Core.EnableIDNA";b:0;s:13:"Core.Encoding";s:5:"utf-8";s:26:"Core.EscapeInvalidChildren";b:0;s:22:"Core.EscapeInvalidTags";b:0;s:29:"Core.EscapeNonASCIICharacters";b:0;s:19:"Core.HiddenElements";a:2:{s:6:"script";b:1;s:5:"style";b:1;}s:13:"Core.Language";s:2:"en";s:24:"Core.LegacyEntityDecoder";b:0;s:14:"Core.LexerImpl";N;s:24:"Core.MaintainLineNumbers";N;s:22:"Core.NormalizeNewlines";b:1;s:21:"Core.RemoveInvalidImg";b:1;s:33:"Core.RemoveProcessingInstructions";b:0;s:25:"Core.RemoveScriptContents";N;s:13:"Filter.Custom";a:0:{}s:34:"Filter.ExtractStyleBlocks.Escaping";b:1;s:31:"Filter.ExtractStyleBlocks.Scope";N;s:34:"Filter.ExtractStyleBlocks.TidyImpl";N;s:25:"Filter.ExtractStyleBlocks";b:0;s:14:"Filter.YouTube";b:0;s:12:"HTML.Allowed";N;s:22:"HTML.AllowedAttributes";N;s:20:"HTML.AllowedComments";a:0:{}s:26:"HTML.AllowedCommentsRegexp";N;s:20:"HTML.AllowedElements";N;s:19:"HTML.AllowedModules";N;s:23:"HTML.Attr.Name.UseCDATA";b:0;s:17:"HTML.BlockWrapper";s:1:"p";s:16:"HTML.CoreModules";a:7:{s:9:"Structure";b:1;s:4:"Text";b:1;s:9:"Hypertext";b:1;s:4:"List";b:1;s:22:"NonXMLCommonAttributes";b:1;s:19:"XMLCommonAttributes";b:1;s:16:"CommonAttributes";b:1;}s:18:"HTML.CustomDoctype";N;s:17:"HTML.DefinitionID";N;s:18:"HTML.DefinitionRev";i:1;s:12:"HTML.Doctype";N;s:25:"HTML.FlashAllowFullScreen";b:0;s:24:"HTML.ForbiddenAttributes";a:0:{}s:22:"HTML.ForbiddenElements";a:0:{}s:10:"HTML.Forms";b:0;s:17:"HTML.MaxImgLength";i:1200;s:13:"HTML.Nofollow";b:0;s:11:"HTML.Parent";s:3:"div";s:16:"HTML.Proprietary";b:0;s:14:"HTML.SafeEmbed";b:0;s:15:"HTML.SafeIframe";b:0;s:15:"HTML.SafeObject";b:0;s:18:"HTML.SafeScripting";a:0:{}s:11:"HTML.Strict";b:0;s:16:"HTML.TargetBlank";b:0;s:19:"HTML.TargetNoopener";b:1;s:21:"HTML.TargetNoreferrer";b:1;s:12:"HTML.TidyAdd";a:0:{}s:14:"HTML.TidyLevel";s:6:"medium";s:15:"HTML.TidyRemove";a:0:{}s:12:"HTML.Trusted";b:0;s:10:"HTML.XHTML";b:1;s:28:"Output.CommentScriptContents";b:1;s:19:"Output.FixInnerHTML";b:1;s:18:"Output.FlashCompat";b:0;s:14:"Output.Newline";N;s:15:"Output.SortAttr";b:0;s:17:"Output.TidyFormat";b:0;s:17:"Test.ForceNoIconv";b:0;s:18:"URI.AllowedSchemes";a:7:{s:4:"http";b:1;s:5:"https";b:1;s:6:"mailto";b:1;s:3:"ftp";b:1;s:4:"nntp";b:1;s:4:"news";b:1;s:3:"tel";b:1;}s:8:"URI.Base";N;s:17:"URI.DefaultScheme";s:4:"http";s:16:"URI.DefinitionID";N;s:17:"URI.DefinitionRev";i:1;s:11:"URI.Disable";b:0;s:19:"URI.DisableExternal";b:0;s:28:"URI.DisableExternalResources";b:0;s:20:"URI.DisableResources";b:0;s:8:"URI.Host";N;s:17:"URI.HostBlacklist";a:0:{}s:16:"URI.MakeAbsolute";b:0;s:9:"URI.Munge";N;s:18:"URI.MungeResources";b:0;s:18:"URI.MungeSecretKey";N;s:26:"URI.OverrideAllowedSchemes";b:1;s:20:"URI.SafeIframeRegexp";N;}s:12:"defaultPlist";O:25:"HTMLPurifier_PropertyList":3:{s:7:"