diff --git a/src/Pluf/Text/HTML/Filter.php b/src/Pluf/Text/HTML/Filter.php index e934015..c62c426 100644 --- a/src/Pluf/Text/HTML/Filter.php +++ b/src/Pluf/Text/HTML/Filter.php @@ -106,10 +106,25 @@ class Pluf_Text_HTML_Filter public $always_make_tags = 0; /** - * entity control options + * Allows decimal entities. + * + * An entity has to decimal format . + * For example, the entity @ is the @ character. + * + * @var int */ public $allow_numbered_entities = 1; + /** + * Allows hexadecimal entities. + * + * An entity has to decimal format . + * For example, the entity @ is the @ character. + * + * @var int + */ + public $allow_hexadecimal_entities = 1; + public $allowed_entities = array( 'amp', 'gt', @@ -117,7 +132,6 @@ class Pluf_Text_HTML_Filter 'quot', ); - function go($data) { $this->tag_counts = array(); @@ -311,27 +325,56 @@ class Pluf_Text_HTML_Filter function check_entity($preamble, $term) { - if ($term != ';') { - return '&'.$preamble; - } - if ($this->is_valid_entity($preamble)) { - return '&'.$preamble; + if (';' === $term) { + if ($this->is_valid_entity($preamble)) { + return '&'.$preamble; + } } return '&'.$preamble; } + /** + * Determines if the string provided is a valid entity. + * + * @param string $entity String to test against. + * @return boolean + */ function is_valid_entity($entity) { - if (preg_match('!^#([0-9]+)$!i', $entity, $m)) { - if ($m[1] > 127) { - return 1; + if (preg_match('#^\#([0-9]{2,}|x[0-9a-f]{2,})$#i', $entity, $m)) { + if (0 === strpos($m[1], 'x')) { + // hexadecimal entity + if ($this->allow_hexadecimal_entities && $this->not_control_caracter($m[1])) { + return true; + } + return false; + } else { + // decimal entity + if ($this->allow_numbered_entities && $this->not_control_caracter($m[1])) { + return true; + } + return false; } - return $this->allow_numbered_entities; } - if (in_array($entity, $this->allowed_entities)){ - return 1; + // HTML 4.0 character entity + return in_array($entity, $this->allowed_entities); + } + + /** + * Determines if the data provided is not a control character. + * + * @param string|int $data Data to test against like "64" or "x40". + * @return boolean + */ + function not_control_caracter($data) + { + if (0 === strpos($data, 'x')) { + $data = substr($data, 1); + $data = hexdec($data); + } else { + $data = intval($data); } - return 0; + return (31 < $data && (127 > $data || 159 < $data)); } // within attributes, we want to convert all hex/dec/url escape @@ -360,7 +403,7 @@ class Pluf_Text_HTML_Filter { if ($d < 0) { $d = 32; } // space // don't mess with huigh chars - if ($d > 127) { + if ($this->not_control_caracter($d)) { if ($orig_type == '%') { return '%'.dechex($d); } if ($orig_type == '&') { return "&#$d;"; } }