1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
|
<?php
// validates and cleans a XHTML snipped. Returns null on error, in which case
// Will also validate which tags are allowed.
class XHTML_validator {
public $error = null;
public $output = null;
function validate_xhtml_snippet($snippet) {
if (!function_exists('tidy_repair_string') ||
!function_exists('tidy_warning_count') ||
!function_exists('tidy_error_count') ||
!function_exists('tidy_get_error_buffer'))
throw new Exception('Tidy library not installed. Cannot proceed.');
if (!function_exists('xml_parser_create'))
throw new Exception('XML library not installed. Cannot proceed.');
$tidycfg = array('doctype'=>'omit', 'drop-font-tags'=>true, 'hide-comments'=>true, 'output-xhtml'=>true, 'show-body-only'=>true, 'break-before-br'=>true, 'indent'=>true, 'indent-spaces'=>1, 'char-encoding'=>'utf8', 'lower-literals'=>true, 'numeric-entities'=>true);
$tidy = tidy_parse_string('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head><title>Temp</title></head><body>' .
$snippet .
'</body></html>', $tidycfg);
tidy_clean_repair($tidy);
$out = tidy_get_output($tidy);
if (tidy_warning_count($tidy) || tidy_error_count($tidy)) {
$this->error = 'Input is not valid XHTML: <pre>' . htmlentities(tidy_get_error_buffer($tidy)) . '</pre>';
return false;
}
$xml_parser = xml_parser_create();
xml_set_object($xml_parser, $this);
xml_set_element_handler($xml_parser, 'xvalidate_startElement', 'xvalidate_endElement');
xml_set_character_data_handler($xml_parser, 'xvalidate_charData');
// Need to specify any non-XML entities here.   is the only one found so far
if (!xml_parse($xml_parser, '<!DOCTYPE demo SYSTEM "/demo.dtd" [<!ENTITY nbsp " "> ]><div>' . $out . '</div>', true)) {
$this->error = 'Failed to parse XML: ' . xml_error_string(xml_get_error_code($xml_parser));
return false;
}
xml_parser_free($xml_parser);
if ($this->error) /* Can be set by the parser routines */
return false;
return true;
}
private function appendError($errstr, $elementname) {
$this->error .= 'Element ' . $elementname . ': ' . $errstr . '<br>';
}
private function xvalidate_startElement($parser, $name, $attrs) {
static $xml_elements_and_attributes = array(
'A' => array('HREF','TITLE','NAME'),
'B' => array(),
'BR' => array(),
'CODE' => array(),
'DIV' => array(),
'EM' => array(),
'H2' => array(),
'H3' => array(),
'H4' => array(),
'IMG' => array('WIDTH','HEIGHT','SRC','ALT','TITLE','BORDER'),
'LI' => array(),
'OL' => array(),
'P' => array('ALIGN'),
'SPAN'=>array(),
'STRONG'=>array(),
'TABLE'=>array('BORDER'),
'TBODY'=>array(),
'TD'=>array(),
'TH'=>array(),
'THEAD'=>array(),
'TR'=>array(),
'U' => array(),
'UL' => array()
);
if (!array_key_exists($name, $xml_elements_and_attributes)) {
$this->appendError('Invalid element', $name);
return;
}
if (isset($attrs['STYLE'])) {
unset($attrs['STYLE']);
}
$xd = array_diff(array_keys($attrs),$xml_elements_and_attributes[$name]);
if (count($xd) > 0) {
foreach ($xd as $xattr) {
$this->appendError('Invalid attribute "' . $xattr . '"', $name);
}
}
$this->output .= '<' . $name;
foreach ($attrs as $attr => $val) {
$this->output .= ' ' . $attr . '="' . $val . '"';
}
$this->output .= '>';
}
private function xvalidate_endElement($parser, $name) {
$this->output .= '</' . $name . '>';
}
private function xvalidate_charData($parser,$data) {
$this->output .= $data;
}
}
?>
|