1 |
wakaba |
1.1 |
package Whatpm::ContentChecker; |
2 |
|
|
use strict; |
3 |
|
|
require Whatpm::ContentChecker; |
4 |
|
|
|
5 |
|
|
my $HTML_NS = q<http://www.w3.org/1999/xhtml>; |
6 |
|
|
|
7 |
wakaba |
1.29 |
## December 2007 HTML5 Classification |
8 |
|
|
|
9 |
|
|
my $HTMLMetadataContent = { |
10 |
|
|
$HTML_NS => { |
11 |
|
|
title => 1, base => 1, link => 1, style => 1, script => 1, noscript => 1, |
12 |
|
|
'event-source' => 1, command => 1, datatemplate => 1, |
13 |
|
|
## NOTE: A |meta| with no |name| element is not allowed as |
14 |
|
|
## a metadata content other than |head| element. |
15 |
|
|
meta => 1, |
16 |
|
|
}, |
17 |
|
|
## NOTE: RDF is mentioned in the HTML5 spec. |
18 |
|
|
## TODO: Other RDF elements? |
19 |
|
|
q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {RDF => 1}, |
20 |
|
|
}; |
21 |
|
|
|
22 |
|
|
my $HTMLProseContent = { |
23 |
|
|
$HTML_NS => { |
24 |
|
|
section => 1, nav => 1, article => 1, blockquote => 1, aside => 1, |
25 |
|
|
h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1, |
26 |
|
|
footer => 1, address => 1, p => 1, hr => 1, dialog => 1, pre => 1, |
27 |
|
|
ol => 1, ul => 1, dl => 1, figure => 1, map => 1, table => 1, |
28 |
|
|
details => 1, ## ISSUE: "Prose element" in spec. |
29 |
|
|
datagrid => 1, ## ISSUE: "Prose element" in spec. |
30 |
|
|
datatemplate => 1, |
31 |
|
|
div => 1, ## ISSUE: No category in spec. |
32 |
|
|
## NOTE: |style| is only allowed if |scoped| attribute is specified. |
33 |
|
|
## Additionally, it must be before any other element or |
34 |
|
|
## non-inter-element-whitespace text node. |
35 |
|
|
style => 1, |
36 |
|
|
|
37 |
|
|
br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, m => 1, |
38 |
|
|
dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1, |
39 |
|
|
var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1, |
40 |
|
|
b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1, |
41 |
|
|
command => 1, font => 1, |
42 |
|
|
a => 1, |
43 |
|
|
datagrid => 1, ## ISSUE: "Interactive element" in the spec. |
44 |
|
|
## NOTE: |area| is allowed only as a descendant of |map|. |
45 |
|
|
area => 1, |
46 |
|
|
|
47 |
|
|
ins => 1, del => 1, |
48 |
|
|
|
49 |
|
|
## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose. |
50 |
|
|
menu => 1, |
51 |
|
|
|
52 |
|
|
img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1, |
53 |
|
|
canvas => 1, |
54 |
|
|
}, |
55 |
|
|
|
56 |
|
|
## NOTE: Embedded |
57 |
|
|
q<http://www.w3.org/1998/Math/MathML> => {math => 1}, |
58 |
|
|
q<http://www.w3.org/2000/svg> => {svg => 1}, |
59 |
|
|
}; |
60 |
|
|
|
61 |
|
|
my $HTMLSectioningContent = { |
62 |
|
|
$HTML_NS => { |
63 |
|
|
section => 1, nav => 1, article => 1, blockquote => 1, aside => 1, |
64 |
|
|
## NOTE: |body| is only allowed in |html| element. |
65 |
|
|
body => 1, |
66 |
|
|
}, |
67 |
|
|
}; |
68 |
|
|
|
69 |
|
|
my $HTMLHeadingContent = { |
70 |
|
|
$HTML_NS => { |
71 |
|
|
h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1, |
72 |
|
|
}, |
73 |
|
|
}; |
74 |
|
|
|
75 |
|
|
my $HTMLPhrasingContent = { |
76 |
|
|
## NOTE: All phrasing content is also prose content. |
77 |
|
|
$HTML_NS => { |
78 |
|
|
br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, m => 1, |
79 |
|
|
dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1, |
80 |
|
|
var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1, |
81 |
|
|
b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1, |
82 |
|
|
command => 1, font => 1, |
83 |
|
|
a => 1, |
84 |
|
|
datagrid => 1, ## ISSUE: "Interactive element" in the spec. |
85 |
|
|
## NOTE: |area| is allowed only as a descendant of |map|. |
86 |
|
|
area => 1, |
87 |
|
|
|
88 |
|
|
## NOTE: Transparent. |
89 |
|
|
ins => 1, del => 1, |
90 |
|
|
|
91 |
|
|
## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose. |
92 |
|
|
menu => 1, |
93 |
|
|
|
94 |
|
|
img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1, |
95 |
|
|
canvas => 1, |
96 |
|
|
}, |
97 |
|
|
|
98 |
|
|
## NOTE: Embedded |
99 |
|
|
q<http://www.w3.org/1998/Math/MathML> => {math => 1}, |
100 |
|
|
q<http://www.w3.org/2000/svg> => {svg => 1}, |
101 |
|
|
|
102 |
|
|
## NOTE: And non-inter-element-whitespace text nodes. |
103 |
|
|
}; |
104 |
|
|
|
105 |
|
|
my $HTMLEmbeddedContent = { |
106 |
|
|
## NOTE: All embedded content is also phrasing content. |
107 |
|
|
$HTML_NS => { |
108 |
|
|
img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1, |
109 |
|
|
canvas => 1, |
110 |
|
|
}, |
111 |
|
|
## NOTE: MathML is mentioned in the HTML5 spec. |
112 |
|
|
q<http://www.w3.org/1998/Math/MathML> => {math => 1}, |
113 |
|
|
## NOTE: SVG is mentioned in the HTML5 spec. |
114 |
|
|
q<http://www.w3.org/2000/svg> => {svg => 1}, |
115 |
|
|
## NOTE: Foreign elements with content (but no metadata) are |
116 |
|
|
## embedded content. |
117 |
|
|
}; |
118 |
|
|
|
119 |
|
|
my $HTMLInteractiveContent = { |
120 |
|
|
$HTML_NS => { |
121 |
|
|
a => 1, |
122 |
wakaba |
1.36 |
datagrid => 1, ## ISSUE: Categorized as "Inetractive element" |
123 |
wakaba |
1.29 |
}, |
124 |
|
|
}; |
125 |
|
|
|
126 |
wakaba |
1.36 |
## NOTE: $HTMLTransparentElements: See Whatpm::ContentChecker. |
127 |
|
|
## NOTE: Semi-transparent elements: See Whatpm::ContentChecker. |
128 |
|
|
|
129 |
wakaba |
1.29 |
## Old HTML5 categories |
130 |
|
|
|
131 |
wakaba |
1.1 |
my $HTMLBlockLevelElements = { |
132 |
|
|
$HTML_NS => { |
133 |
|
|
qw/ |
134 |
|
|
section 1 nav 1 article 1 blockquote 1 aside 1 |
135 |
|
|
h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1 |
136 |
|
|
address 1 p 1 hr 1 dialog 1 pre 1 ol 1 ul 1 dl 1 |
137 |
|
|
ins 1 del 1 figure 1 map 1 table 1 script 1 noscript 1 |
138 |
|
|
event-source 1 details 1 datagrid 1 menu 1 div 1 font 1 |
139 |
wakaba |
1.8 |
datatemplate 1 |
140 |
wakaba |
1.1 |
/, |
141 |
|
|
}, |
142 |
|
|
}; |
143 |
|
|
|
144 |
|
|
my $HTMLStrictlyInlineLevelElements = { |
145 |
|
|
$HTML_NS => { |
146 |
|
|
qw/ |
147 |
|
|
br 1 a 1 q 1 cite 1 em 1 strong 1 small 1 m 1 dfn 1 abbr 1 |
148 |
|
|
time 1 meter 1 progress 1 code 1 var 1 samp 1 kbd 1 |
149 |
|
|
sub 1 sup 1 span 1 i 1 b 1 bdo 1 ins 1 del 1 img 1 |
150 |
|
|
iframe 1 embed 1 object 1 video 1 audio 1 canvas 1 area 1 |
151 |
|
|
script 1 noscript 1 event-source 1 command 1 font 1 |
152 |
|
|
/, |
153 |
|
|
}, |
154 |
|
|
}; |
155 |
|
|
|
156 |
|
|
my $HTMLStructuredInlineLevelElements = { |
157 |
|
|
$HTML_NS => {qw/blockquote 1 pre 1 ol 1 ul 1 dl 1 table 1 menu 1/}, |
158 |
|
|
}; |
159 |
|
|
|
160 |
wakaba |
1.26 |
my $HTMLSignificantContentErrors = { |
161 |
|
|
significant => sub { |
162 |
|
|
my ($self, $todo) = @_; |
163 |
|
|
$self->{onerror}->(node => $todo->{node}, |
164 |
|
|
level => $self->{should_level}, |
165 |
|
|
type => 'no significant content'); |
166 |
|
|
}, |
167 |
|
|
}; # $HTMLSignificantContentErrors |
168 |
|
|
|
169 |
wakaba |
1.25 |
our $AnyChecker; |
170 |
|
|
my $HTMLAnyChecker = sub { |
171 |
|
|
my ($self, $todo) = @_; |
172 |
|
|
|
173 |
|
|
my $old_values = {significant => |
174 |
|
|
$todo->{flag}->{has_descendant}->{significant}}; |
175 |
|
|
$todo->{flag}->{has_descendant}->{significant} = 0; |
176 |
|
|
|
177 |
|
|
my ($new_todos) = $AnyChecker->($self, $todo); |
178 |
|
|
|
179 |
|
|
push @$new_todos, { |
180 |
|
|
type => 'descendant', node => $todo->{node}, flag => $todo->{flag}, |
181 |
|
|
old_values => $old_values, |
182 |
wakaba |
1.26 |
errors => $HTMLSignificantContentErrors, |
183 |
wakaba |
1.25 |
}; |
184 |
|
|
|
185 |
|
|
return ($new_todos); |
186 |
|
|
}; # $HTMLAnyChecker |
187 |
wakaba |
1.1 |
|
188 |
|
|
## Empty |
189 |
|
|
my $HTMLEmptyChecker = sub { |
190 |
|
|
my ($self, $todo) = @_; |
191 |
|
|
my $el = $todo->{node}; |
192 |
|
|
my $new_todos = []; |
193 |
|
|
my @nodes = (@{$el->child_nodes}); |
194 |
|
|
|
195 |
|
|
while (@nodes) { |
196 |
|
|
my $node = shift @nodes; |
197 |
|
|
$self->_remove_minuses ($node) and next if ref $node eq 'HASH'; |
198 |
|
|
|
199 |
|
|
my $nt = $node->node_type; |
200 |
|
|
if ($nt == 1) { |
201 |
wakaba |
1.8 |
my $node_ns = $node->namespace_uri; |
202 |
|
|
$node_ns = '' unless defined $node_ns; |
203 |
|
|
my $node_ln = $node->manakai_local_name; |
204 |
wakaba |
1.30 |
if ($self->{minuses}->{$node_ns}->{$node_ln}) { |
205 |
|
|
$self->{onerror}->(node => $node, type => 'element not allowed:minus', |
206 |
|
|
level => $self->{must_level}); |
207 |
|
|
} elsif ($self->{pluses}->{$node_ns}->{$node_ln}) { |
208 |
wakaba |
1.8 |
# |
209 |
|
|
} else { |
210 |
wakaba |
1.30 |
$self->{onerror}->(node => $node, type => 'element not allowed:empty', |
211 |
|
|
level => $self->{must_level}); |
212 |
wakaba |
1.8 |
} |
213 |
wakaba |
1.30 |
|
214 |
wakaba |
1.1 |
my ($sib, $ch) = $self->_check_get_children ($node, $todo); |
215 |
|
|
unshift @nodes, @$sib; |
216 |
|
|
push @$new_todos, @$ch; |
217 |
|
|
} elsif ($nt == 3 or $nt == 4) { |
218 |
|
|
if ($node->data =~ /[^\x09-\x0D\x20]/) { |
219 |
wakaba |
1.30 |
$self->{onerror}->(node => $node, |
220 |
|
|
type => 'character not allowed:empty', |
221 |
|
|
level => $self->{must_level}); |
222 |
wakaba |
1.25 |
$todo->{flag}->{has_descendant}->{significant} = 1; |
223 |
wakaba |
1.1 |
} |
224 |
|
|
} elsif ($nt == 5) { |
225 |
|
|
unshift @nodes, @{$node->child_nodes}; |
226 |
|
|
} |
227 |
|
|
} |
228 |
|
|
return ($new_todos); |
229 |
|
|
}; |
230 |
|
|
|
231 |
|
|
## Text |
232 |
|
|
my $HTMLTextChecker = sub { |
233 |
|
|
my ($self, $todo) = @_; |
234 |
|
|
my $el = $todo->{node}; |
235 |
|
|
my $new_todos = []; |
236 |
|
|
my @nodes = (@{$el->child_nodes}); |
237 |
|
|
|
238 |
wakaba |
1.25 |
my $old_values = {significant => |
239 |
|
|
$todo->{flag}->{has_descendant}->{significant}}; |
240 |
|
|
$todo->{flag}->{has_descendant}->{significant} = 0; |
241 |
|
|
|
242 |
wakaba |
1.1 |
while (@nodes) { |
243 |
|
|
my $node = shift @nodes; |
244 |
|
|
$self->_remove_minuses ($node) and next if ref $node eq 'HASH'; |
245 |
|
|
|
246 |
|
|
my $nt = $node->node_type; |
247 |
|
|
if ($nt == 1) { |
248 |
wakaba |
1.8 |
my $node_ns = $node->namespace_uri; |
249 |
|
|
$node_ns = '' unless defined $node_ns; |
250 |
|
|
my $node_ln = $node->manakai_local_name; |
251 |
|
|
if ($self->{pluses}->{$node_ns}->{$node_ln}) { |
252 |
|
|
# |
253 |
|
|
} else { |
254 |
|
|
## NOTE: |minuses| list is not checked since redundant |
255 |
|
|
$self->{onerror}->(node => $node, type => 'element not allowed'); |
256 |
|
|
} |
257 |
wakaba |
1.1 |
my ($sib, $ch) = $self->_check_get_children ($node, $todo); |
258 |
|
|
unshift @nodes, @$sib; |
259 |
|
|
push @$new_todos, @$ch; |
260 |
wakaba |
1.25 |
} elsif ($nt == 3 or $nt == 4) { |
261 |
|
|
if ($node->data =~ /[^\x09-\x0D\x20]/) { |
262 |
|
|
$todo->{flag}->{has_descendant}->{significant} = 1; |
263 |
|
|
} |
264 |
wakaba |
1.1 |
} elsif ($nt == 5) { |
265 |
|
|
unshift @nodes, @{$node->child_nodes}; |
266 |
|
|
} |
267 |
|
|
} |
268 |
wakaba |
1.25 |
|
269 |
|
|
push @$new_todos, { |
270 |
|
|
type => 'descendant', node => $todo->{node}, flag => $todo->{flag}, |
271 |
|
|
old_values => $old_values, |
272 |
wakaba |
1.26 |
errors => $HTMLSignificantContentErrors, |
273 |
wakaba |
1.25 |
}; |
274 |
|
|
|
275 |
wakaba |
1.1 |
return ($new_todos); |
276 |
|
|
}; |
277 |
|
|
|
278 |
wakaba |
1.29 |
my $HTMLProseContentChecker = sub { |
279 |
|
|
my ($self, $todo) = @_; |
280 |
|
|
my $el = $todo->{node}; |
281 |
|
|
my $new_todos = []; |
282 |
|
|
my @nodes = (@{$el->child_nodes}); |
283 |
|
|
|
284 |
|
|
my $old_values = {significant => |
285 |
|
|
$todo->{flag}->{has_descendant}->{significant}}; |
286 |
|
|
$todo->{flag}->{has_descendant}->{significant} = 0; |
287 |
|
|
|
288 |
|
|
my $has_non_style; |
289 |
|
|
while (@nodes) { |
290 |
|
|
my $node = shift @nodes; |
291 |
|
|
$self->_remove_minuses ($node) and next if ref $node eq 'HASH'; |
292 |
|
|
|
293 |
|
|
my $nt = $node->node_type; |
294 |
|
|
if ($nt == 1) { |
295 |
|
|
my $node_ns = $node->namespace_uri; |
296 |
|
|
$node_ns = '' unless defined $node_ns; |
297 |
|
|
my $node_ln = $node->manakai_local_name; |
298 |
wakaba |
1.30 |
if ($self->{minuses}->{$node_ns}->{$node_ln}) { |
299 |
|
|
$self->{onerror}->(node => $node, |
300 |
|
|
type => 'element not allowed:minus', |
301 |
|
|
level => $self->{must_level}); |
302 |
|
|
} elsif ($node_ns eq $HTML_NS and $node_ln eq 'style') { |
303 |
|
|
if ($has_non_style or |
304 |
|
|
not $node->has_attribute_ns (undef, 'scoped')) { |
305 |
|
|
$self->{onerror}->(node => $node, |
306 |
|
|
type => 'element not allowed:prose style', |
307 |
|
|
level => $self->{must_level}); |
308 |
|
|
} |
309 |
wakaba |
1.29 |
} elsif ($HTMLProseContent->{$node_ns}->{$node_ln}) { |
310 |
|
|
$has_non_style = 1; |
311 |
|
|
if ($HTMLEmbeddedContent->{$node_ns}->{$node_ln}) { |
312 |
|
|
$todo->{flag}->{has_descendant}->{significant} = 1; |
313 |
|
|
} |
314 |
|
|
} elsif ($self->{pluses}->{$node_ns}->{$node_ln}) { |
315 |
|
|
# |
316 |
|
|
} else { |
317 |
|
|
$has_non_style = 1; |
318 |
wakaba |
1.30 |
$self->{onerror}->(node => $node, |
319 |
|
|
type => 'element not allowed:prose', |
320 |
|
|
level => $self->{must_level}) |
321 |
wakaba |
1.29 |
} |
322 |
wakaba |
1.30 |
|
323 |
wakaba |
1.29 |
my ($sib, $ch) = $self->_check_get_children ($node, $todo); |
324 |
|
|
unshift @nodes, @$sib; |
325 |
|
|
push @$new_todos, @$ch; |
326 |
|
|
} elsif ($nt == 3 or $nt == 4) { |
327 |
|
|
if ($node->data =~ /[^\x09-\x0D\x20]/) { |
328 |
|
|
$has_non_style = 1; |
329 |
|
|
$todo->{flag}->{has_descendant}->{significant} = 1; |
330 |
|
|
} |
331 |
|
|
} elsif ($nt == 5) { |
332 |
|
|
unshift @nodes, @{$node->child_nodes}; |
333 |
|
|
} |
334 |
|
|
} |
335 |
|
|
|
336 |
|
|
push @$new_todos, { |
337 |
|
|
type => 'descendant', node => $todo->{node}, flag => $todo->{flag}, |
338 |
|
|
old_values => $old_values, |
339 |
|
|
errors => $HTMLSignificantContentErrors, |
340 |
|
|
}; |
341 |
|
|
|
342 |
|
|
return ($new_todos); |
343 |
|
|
}; # $HTMLProseContentChecker |
344 |
|
|
|
345 |
|
|
my $HTMLPhrasingContentChecker = sub { |
346 |
|
|
my ($self, $todo) = @_; |
347 |
|
|
my $el = $todo->{node}; |
348 |
|
|
my $new_todos = []; |
349 |
|
|
my @nodes = (@{$el->child_nodes}); |
350 |
|
|
|
351 |
|
|
my $old_values = {significant => |
352 |
|
|
$todo->{flag}->{has_descendant}->{significant}}; |
353 |
|
|
$todo->{flag}->{has_descendant}->{significant} = 0; |
354 |
|
|
|
355 |
|
|
while (@nodes) { |
356 |
|
|
my $node = shift @nodes; |
357 |
|
|
$self->_remove_minuses ($node) and next if ref $node eq 'HASH'; |
358 |
|
|
|
359 |
|
|
my $nt = $node->node_type; |
360 |
|
|
if ($nt == 1) { |
361 |
|
|
my $node_ns = $node->namespace_uri; |
362 |
|
|
$node_ns = '' unless defined $node_ns; |
363 |
|
|
my $node_ln = $node->manakai_local_name; |
364 |
wakaba |
1.30 |
if ($self->{minuses}->{$node_ns}->{$node_ln}) { |
365 |
|
|
$self->{onerror}->(node => $node, |
366 |
|
|
type => 'element not allowed:minus', |
367 |
|
|
level => $self->{must_level}); |
368 |
|
|
} elsif ($HTMLPhrasingContent->{$node_ns}->{$node_ln} or |
369 |
|
|
$self->{pluses}->{$node_ns}->{$node_ln}) { |
370 |
|
|
# |
371 |
|
|
} else { |
372 |
|
|
$self->{onerror}->(node => $node, |
373 |
|
|
type => 'element not allowed:phrasing', |
374 |
|
|
level => $self->{must_level}); |
375 |
|
|
} |
376 |
|
|
|
377 |
wakaba |
1.29 |
my ($sib, $ch) = $self->_check_get_children ($node, $todo); |
378 |
|
|
unshift @nodes, @$sib; |
379 |
|
|
push @$new_todos, @$ch; |
380 |
|
|
} elsif ($nt == 3 or $nt == 4) { |
381 |
|
|
if ($node->data =~ /[^\x09-\x0D\x20]/) { |
382 |
|
|
$todo->{flag}->{has_descendant}->{significant} = 1; |
383 |
|
|
} |
384 |
|
|
} elsif ($nt == 5) { |
385 |
|
|
unshift @nodes, @{$node->child_nodes}; |
386 |
|
|
} |
387 |
|
|
} |
388 |
|
|
|
389 |
|
|
push @$new_todos, { |
390 |
|
|
type => 'descendant', node => $todo->{node}, flag => $todo->{flag}, |
391 |
|
|
old_values => $old_values, |
392 |
|
|
errors => $HTMLSignificantContentErrors, |
393 |
|
|
}; |
394 |
|
|
|
395 |
|
|
return ($new_todos); |
396 |
|
|
}; # $HTMLPhrasingContentChecker |
397 |
|
|
|
398 |
wakaba |
1.1 |
## Zero or more XXX element, then either block-level or inline-level |
399 |
|
|
my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) { |
400 |
|
|
my ($elnsuri, $ellname) = @_; |
401 |
|
|
return sub { |
402 |
|
|
my ($self, $todo) = @_; |
403 |
|
|
my $el = $todo->{node}; |
404 |
|
|
my $new_todos = []; |
405 |
|
|
my @nodes = (@{$el->child_nodes}); |
406 |
wakaba |
1.25 |
|
407 |
|
|
my $old_values = {significant => |
408 |
|
|
$todo->{flag}->{has_descendant}->{significant}}; |
409 |
|
|
$todo->{flag}->{has_descendant}->{significant} = 0; |
410 |
wakaba |
1.1 |
|
411 |
|
|
my $has_non_style; |
412 |
|
|
my $content = 'block-or-inline'; # or 'block' or 'inline' |
413 |
|
|
my @block_not_inline; |
414 |
|
|
while (@nodes) { |
415 |
|
|
my $node = shift @nodes; |
416 |
|
|
$self->_remove_minuses ($node) and next if ref $node eq 'HASH'; |
417 |
|
|
|
418 |
|
|
my $nt = $node->node_type; |
419 |
|
|
if ($nt == 1) { |
420 |
|
|
my $node_ns = $node->namespace_uri; |
421 |
|
|
$node_ns = '' unless defined $node_ns; |
422 |
|
|
my $node_ln = $node->manakai_local_name; |
423 |
|
|
my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln}; |
424 |
|
|
if ($node_ns eq $elnsuri and $node_ln eq $ellname) { |
425 |
|
|
$not_allowed = 1 if $has_non_style; |
426 |
|
|
if ($ellname eq 'style' and |
427 |
|
|
not $node->has_attribute_ns (undef, 'scoped')) { |
428 |
|
|
$not_allowed = 1; |
429 |
|
|
} |
430 |
|
|
} elsif ($content eq 'block') { |
431 |
|
|
$has_non_style = 1; |
432 |
|
|
$not_allowed = 1 |
433 |
wakaba |
1.8 |
unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or |
434 |
|
|
$self->{pluses}->{$node_ns}->{$node_ln}; |
435 |
wakaba |
1.1 |
} elsif ($content eq 'inline') { |
436 |
|
|
$has_non_style = 1; |
437 |
|
|
$not_allowed = 1 |
438 |
|
|
unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or |
439 |
wakaba |
1.8 |
$HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or |
440 |
|
|
$self->{pluses}->{$node_ns}->{$node_ln}; |
441 |
wakaba |
1.1 |
} else { |
442 |
wakaba |
1.8 |
$has_non_style = 1 unless $self->{pluses}->{$node_ns}->{$node_ln}; |
443 |
wakaba |
1.1 |
my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln}; |
444 |
|
|
my $is_inline |
445 |
|
|
= $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} || |
446 |
|
|
$HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln}; |
447 |
|
|
|
448 |
|
|
push @block_not_inline, $node |
449 |
|
|
if $is_block and not $is_inline and not $not_allowed; |
450 |
wakaba |
1.8 |
if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) { |
451 |
wakaba |
1.1 |
$content = 'inline'; |
452 |
|
|
for (@block_not_inline) { |
453 |
|
|
$self->{onerror}->(node => $_, type => 'element not allowed'); |
454 |
|
|
} |
455 |
|
|
$not_allowed = 1 unless $is_inline; |
456 |
|
|
} |
457 |
|
|
} |
458 |
|
|
$self->{onerror}->(node => $node, type => 'element not allowed') |
459 |
|
|
if $not_allowed; |
460 |
|
|
my ($sib, $ch) = $self->_check_get_children ($node, $todo); |
461 |
|
|
unshift @nodes, @$sib; |
462 |
|
|
push @$new_todos, @$ch; |
463 |
|
|
} elsif ($nt == 3 or $nt == 4) { |
464 |
|
|
if ($node->data =~ /[^\x09-\x0D\x20]/) { |
465 |
|
|
$has_non_style = 1; |
466 |
|
|
if ($content eq 'block') { |
467 |
|
|
$self->{onerror}->(node => $node, type => 'character not allowed'); |
468 |
|
|
} else { |
469 |
|
|
$content = 'inline'; |
470 |
|
|
for (@block_not_inline) { |
471 |
|
|
$self->{onerror}->(node => $_, type => 'element not allowed'); |
472 |
|
|
} |
473 |
|
|
} |
474 |
wakaba |
1.25 |
$todo->{flag}->{has_descendant}->{significant} = 1; |
475 |
wakaba |
1.1 |
} |
476 |
|
|
} elsif ($nt == 5) { |
477 |
|
|
unshift @nodes, @{$node->child_nodes}; |
478 |
|
|
} |
479 |
|
|
} |
480 |
|
|
|
481 |
|
|
if ($content eq 'inline') { |
482 |
|
|
for (@$new_todos) { |
483 |
|
|
$_->{inline} = 1; |
484 |
|
|
} |
485 |
|
|
} |
486 |
wakaba |
1.25 |
|
487 |
|
|
push @$new_todos, { |
488 |
|
|
type => 'descendant', node => $todo->{node}, flag => $todo->{flag}, |
489 |
|
|
old_values => $old_values, |
490 |
wakaba |
1.26 |
errors => $HTMLSignificantContentErrors, |
491 |
wakaba |
1.25 |
}; |
492 |
|
|
|
493 |
wakaba |
1.1 |
return ($new_todos); |
494 |
|
|
}; |
495 |
|
|
}; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker |
496 |
|
|
|
497 |
wakaba |
1.29 |
my $HTMLTransparentChecker = $HTMLProseContentChecker; |
498 |
wakaba |
1.25 |
## ISSUE: Significant content rule should be applied to transparent element |
499 |
|
|
## with parent? Currently, applied to |video| but not to others. |
500 |
wakaba |
1.1 |
|
501 |
wakaba |
1.36 |
## -- Common attribute syntacx checkers |
502 |
|
|
|
503 |
wakaba |
1.1 |
our $AttrChecker; |
504 |
|
|
|
505 |
|
|
my $GetHTMLEnumeratedAttrChecker = sub { |
506 |
|
|
my $states = shift; # {value => conforming ? 1 : -1} |
507 |
|
|
return sub { |
508 |
|
|
my ($self, $attr) = @_; |
509 |
|
|
my $value = lc $attr->value; ## TODO: ASCII case insensitibility? |
510 |
|
|
if ($states->{$value} > 0) { |
511 |
|
|
# |
512 |
|
|
} elsif ($states->{$value}) { |
513 |
|
|
$self->{onerror}->(node => $attr, type => 'enumerated:non-conforming'); |
514 |
|
|
} else { |
515 |
|
|
$self->{onerror}->(node => $attr, type => 'enumerated:invalid'); |
516 |
|
|
} |
517 |
|
|
}; |
518 |
|
|
}; # $GetHTMLEnumeratedAttrChecker |
519 |
|
|
|
520 |
|
|
my $GetHTMLBooleanAttrChecker = sub { |
521 |
|
|
my $local_name = shift; |
522 |
|
|
return sub { |
523 |
|
|
my ($self, $attr) = @_; |
524 |
|
|
my $value = $attr->value; |
525 |
|
|
unless ($value eq $local_name or $value eq '') { |
526 |
|
|
$self->{onerror}->(node => $attr, type => 'boolean:invalid'); |
527 |
|
|
} |
528 |
|
|
}; |
529 |
|
|
}; # $GetHTMLBooleanAttrChecker |
530 |
|
|
|
531 |
wakaba |
1.8 |
## Unordered set of space-separated tokens |
532 |
wakaba |
1.18 |
my $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub { |
533 |
wakaba |
1.8 |
my ($self, $attr) = @_; |
534 |
|
|
my %word; |
535 |
|
|
for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) { |
536 |
|
|
unless ($word{$word}) { |
537 |
|
|
$word{$word} = 1; |
538 |
|
|
} else { |
539 |
|
|
$self->{onerror}->(node => $attr, type => 'duplicate token:'.$word); |
540 |
|
|
} |
541 |
|
|
} |
542 |
wakaba |
1.18 |
}; # $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker |
543 |
wakaba |
1.8 |
|
544 |
wakaba |
1.1 |
## |rel| attribute (unordered set of space separated tokens, |
545 |
|
|
## whose allowed values are defined by the section on link types) |
546 |
|
|
my $HTMLLinkTypesAttrChecker = sub { |
547 |
wakaba |
1.4 |
my ($a_or_area, $todo, $self, $attr) = @_; |
548 |
wakaba |
1.1 |
my %word; |
549 |
|
|
for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) { |
550 |
|
|
unless ($word{$word}) { |
551 |
|
|
$word{$word} = 1; |
552 |
wakaba |
1.18 |
} elsif ($word eq 'up') { |
553 |
|
|
# |
554 |
wakaba |
1.1 |
} else { |
555 |
|
|
$self->{onerror}->(node => $attr, type => 'duplicate token:'.$word); |
556 |
|
|
} |
557 |
|
|
} |
558 |
|
|
## NOTE: Case sensitive match (since HTML5 spec does not say link |
559 |
|
|
## types are case-insensitive and it says "The value should not |
560 |
|
|
## be confusingly similar to any other defined value (e.g. |
561 |
|
|
## differing only in case)."). |
562 |
|
|
## NOTE: Though there is no explicit "MUST NOT" for undefined values, |
563 |
|
|
## "MAY"s and "only ... MAY" restrict non-standard non-registered |
564 |
|
|
## values to be used conformingly. |
565 |
|
|
require Whatpm::_LinkTypeList; |
566 |
|
|
our $LinkType; |
567 |
|
|
for my $word (keys %word) { |
568 |
|
|
my $def = $LinkType->{$word}; |
569 |
|
|
if (defined $def) { |
570 |
|
|
if ($def->{status} eq 'accepted') { |
571 |
|
|
if (defined $def->{effect}->[$a_or_area]) { |
572 |
|
|
# |
573 |
|
|
} else { |
574 |
|
|
$self->{onerror}->(node => $attr, |
575 |
|
|
type => 'link type:bad context:'.$word); |
576 |
|
|
} |
577 |
|
|
} elsif ($def->{status} eq 'proposal') { |
578 |
|
|
$self->{onerror}->(node => $attr, level => 's', |
579 |
|
|
type => 'link type:proposed:'.$word); |
580 |
wakaba |
1.20 |
if (defined $def->{effect}->[$a_or_area]) { |
581 |
|
|
# |
582 |
|
|
} else { |
583 |
|
|
$self->{onerror}->(node => $attr, |
584 |
|
|
type => 'link type:bad context:'.$word); |
585 |
|
|
} |
586 |
wakaba |
1.1 |
} else { # rejected or synonym |
587 |
|
|
$self->{onerror}->(node => $attr, |
588 |
|
|
type => 'link type:non-conforming:'.$word); |
589 |
|
|
} |
590 |
wakaba |
1.4 |
if (defined $def->{effect}->[$a_or_area]) { |
591 |
|
|
if ($word eq 'alternate') { |
592 |
|
|
# |
593 |
|
|
} elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') { |
594 |
|
|
$todo->{has_hyperlink_link_type} = 1; |
595 |
|
|
} |
596 |
|
|
} |
597 |
wakaba |
1.1 |
if ($def->{unique}) { |
598 |
|
|
unless ($self->{has_link_type}->{$word}) { |
599 |
|
|
$self->{has_link_type}->{$word} = 1; |
600 |
|
|
} else { |
601 |
|
|
$self->{onerror}->(node => $attr, |
602 |
|
|
type => 'link type:duplicate:'.$word); |
603 |
|
|
} |
604 |
|
|
} |
605 |
|
|
} else { |
606 |
|
|
$self->{onerror}->(node => $attr, level => 'unsupported', |
607 |
|
|
type => 'link type:'.$word); |
608 |
|
|
} |
609 |
|
|
} |
610 |
wakaba |
1.4 |
$todo->{has_hyperlink_link_type} = 1 |
611 |
|
|
if $word{alternate} and not $word{stylesheet}; |
612 |
wakaba |
1.1 |
## TODO: The Pingback 1.0 specification, which is referenced by HTML5, |
613 |
|
|
## says that using both X-Pingback: header field and HTML |
614 |
|
|
## <link rel=pingback> is deprecated and if both appears they |
615 |
|
|
## SHOULD contain exactly the same value. |
616 |
|
|
## ISSUE: Pingback 1.0 specification defines the exact representation |
617 |
|
|
## of its link element, which cannot be tested by the current arch. |
618 |
|
|
## ISSUE: Pingback 1.0 specification says that the document MUST NOT |
619 |
|
|
## include any string that matches to the pattern for the rel=pingback link, |
620 |
|
|
## which again inpossible to test. |
621 |
|
|
## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4. |
622 |
wakaba |
1.12 |
|
623 |
|
|
## NOTE: <link rel="up index"><link rel="up up index"> is not an error. |
624 |
wakaba |
1.17 |
## NOTE: We can't check "If the page is part of multiple hierarchies, |
625 |
|
|
## then they SHOULD be described in different paragraphs.". |
626 |
wakaba |
1.1 |
}; # $HTMLLinkTypesAttrChecker |
627 |
wakaba |
1.20 |
|
628 |
|
|
## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status." |
629 |
wakaba |
1.1 |
|
630 |
|
|
## URI (or IRI) |
631 |
|
|
my $HTMLURIAttrChecker = sub { |
632 |
|
|
my ($self, $attr) = @_; |
633 |
|
|
## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.) |
634 |
|
|
my $value = $attr->value; |
635 |
|
|
Whatpm::URIChecker->check_iri_reference ($value, sub { |
636 |
|
|
my %opt = @_; |
637 |
|
|
$self->{onerror}->(node => $attr, level => $opt{level}, |
638 |
|
|
type => 'URI::'.$opt{type}. |
639 |
|
|
(defined $opt{position} ? ':'.$opt{position} : '')); |
640 |
|
|
}); |
641 |
wakaba |
1.17 |
$self->{has_uri_attr} = 1; ## TODO: <html manifest> |
642 |
wakaba |
1.1 |
}; # $HTMLURIAttrChecker |
643 |
|
|
|
644 |
|
|
## A space separated list of one or more URIs (or IRIs) |
645 |
|
|
my $HTMLSpaceURIsAttrChecker = sub { |
646 |
|
|
my ($self, $attr) = @_; |
647 |
|
|
my $i = 0; |
648 |
|
|
for my $value (split /[\x09-\x0D\x20]+/, $attr->value) { |
649 |
|
|
Whatpm::URIChecker->check_iri_reference ($value, sub { |
650 |
|
|
my %opt = @_; |
651 |
|
|
$self->{onerror}->(node => $attr, level => $opt{level}, |
652 |
wakaba |
1.2 |
type => 'URIs:'.':'. |
653 |
|
|
$opt{type}.':'.$i. |
654 |
wakaba |
1.1 |
(defined $opt{position} ? ':'.$opt{position} : '')); |
655 |
|
|
}); |
656 |
|
|
$i++; |
657 |
|
|
} |
658 |
|
|
## ISSUE: Relative references? |
659 |
|
|
## ISSUE: Leading or trailing white spaces are conformant? |
660 |
|
|
## ISSUE: A sequence of white space characters are conformant? |
661 |
|
|
## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.) |
662 |
|
|
## NOTE: Duplication seems not an error. |
663 |
wakaba |
1.4 |
$self->{has_uri_attr} = 1; |
664 |
wakaba |
1.1 |
}; # $HTMLSpaceURIsAttrChecker |
665 |
|
|
|
666 |
|
|
my $HTMLDatetimeAttrChecker = sub { |
667 |
|
|
my ($self, $attr) = @_; |
668 |
|
|
my $value = $attr->value; |
669 |
|
|
## ISSUE: "space", not "space character" (in parsing algorihtm, "space character") |
670 |
|
|
if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) { |
671 |
|
|
my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm) |
672 |
|
|
= ($1, $2, $3, $4, $5, $6, $7, $8, $9); |
673 |
|
|
if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm) |
674 |
|
|
$self->{onerror}->(node => $attr, type => 'datetime:bad day') |
675 |
|
|
if $d < 1 or |
676 |
|
|
$d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M]; |
677 |
|
|
$self->{onerror}->(node => $attr, type => 'datetime:bad day') |
678 |
|
|
if $M == 2 and $d == 29 and |
679 |
|
|
not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0)); |
680 |
|
|
} else { |
681 |
|
|
$self->{onerror}->(node => $attr, type => 'datetime:bad month'); |
682 |
|
|
} |
683 |
|
|
$self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23; |
684 |
|
|
$self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59; |
685 |
|
|
$self->{onerror}->(node => $attr, type => 'datetime:bad second') |
686 |
|
|
if defined $s and $s > 59; |
687 |
|
|
$self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour') |
688 |
|
|
if $zh > 23; |
689 |
|
|
$self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute') |
690 |
|
|
if $zm > 59; |
691 |
|
|
## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339. |
692 |
|
|
} else { |
693 |
|
|
$self->{onerror}->(node => $attr, type => 'datetime:syntax error'); |
694 |
|
|
} |
695 |
|
|
}; # $HTMLDatetimeAttrChecker |
696 |
|
|
|
697 |
|
|
my $HTMLIntegerAttrChecker = sub { |
698 |
|
|
my ($self, $attr) = @_; |
699 |
|
|
my $value = $attr->value; |
700 |
|
|
unless ($value =~ /\A-?[0-9]+\z/) { |
701 |
|
|
$self->{onerror}->(node => $attr, type => 'integer:syntax error'); |
702 |
|
|
} |
703 |
|
|
}; # $HTMLIntegerAttrChecker |
704 |
|
|
|
705 |
|
|
my $GetHTMLNonNegativeIntegerAttrChecker = sub { |
706 |
|
|
my $range_check = shift; |
707 |
|
|
return sub { |
708 |
|
|
my ($self, $attr) = @_; |
709 |
|
|
my $value = $attr->value; |
710 |
|
|
if ($value =~ /\A[0-9]+\z/) { |
711 |
|
|
unless ($range_check->($value + 0)) { |
712 |
|
|
$self->{onerror}->(node => $attr, type => 'nninteger:out of range'); |
713 |
|
|
} |
714 |
|
|
} else { |
715 |
|
|
$self->{onerror}->(node => $attr, |
716 |
|
|
type => 'nninteger:syntax error'); |
717 |
|
|
} |
718 |
|
|
}; |
719 |
|
|
}; # $GetHTMLNonNegativeIntegerAttrChecker |
720 |
|
|
|
721 |
|
|
my $GetHTMLFloatingPointNumberAttrChecker = sub { |
722 |
|
|
my $range_check = shift; |
723 |
|
|
return sub { |
724 |
|
|
my ($self, $attr) = @_; |
725 |
|
|
my $value = $attr->value; |
726 |
|
|
if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) { |
727 |
|
|
unless ($range_check->($value + 0)) { |
728 |
|
|
$self->{onerror}->(node => $attr, type => 'float:out of range'); |
729 |
|
|
} |
730 |
|
|
} else { |
731 |
|
|
$self->{onerror}->(node => $attr, |
732 |
|
|
type => 'float:syntax error'); |
733 |
|
|
} |
734 |
|
|
}; |
735 |
|
|
}; # $GetHTMLFloatingPointNumberAttrChecker |
736 |
|
|
|
737 |
|
|
## "A valid MIME type, optionally with parameters. [RFC 2046]" |
738 |
|
|
## ISSUE: RFC 2046 does not define syntax of media types. |
739 |
|
|
## ISSUE: The definition of "a valid MIME type" is unknown. |
740 |
|
|
## Syntactical correctness? |
741 |
|
|
my $HTMLIMTAttrChecker = sub { |
742 |
|
|
my ($self, $attr) = @_; |
743 |
|
|
my $value = $attr->value; |
744 |
|
|
## ISSUE: RFC 2045 Content-Type header field allows insertion |
745 |
|
|
## of LWS/comments between tokens. Is it allowed in HTML? Maybe no. |
746 |
|
|
## ISSUE: RFC 2231 extension? Maybe no. |
747 |
|
|
my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/; |
748 |
|
|
my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/; |
749 |
|
|
my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/; |
750 |
|
|
if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) { |
751 |
|
|
my @type = ($1, $2); |
752 |
|
|
my $param = $3; |
753 |
|
|
while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) { |
754 |
|
|
if (defined $2) { |
755 |
|
|
push @type, $1 => $2; |
756 |
|
|
} else { |
757 |
|
|
my $n = $1; |
758 |
|
|
my $v = $2; |
759 |
|
|
$v =~ s/\\(.)/$1/gs; |
760 |
|
|
push @type, $n => $v; |
761 |
|
|
} |
762 |
|
|
} |
763 |
|
|
require Whatpm::IMTChecker; |
764 |
|
|
Whatpm::IMTChecker->check_imt (sub { |
765 |
|
|
my %opt = @_; |
766 |
|
|
$self->{onerror}->(node => $attr, level => $opt{level}, |
767 |
|
|
type => 'IMT:'.$opt{type}); |
768 |
|
|
}, @type); |
769 |
|
|
} else { |
770 |
|
|
$self->{onerror}->(node => $attr, type => 'IMT:syntax error'); |
771 |
|
|
} |
772 |
|
|
}; # $HTMLIMTAttrChecker |
773 |
|
|
|
774 |
|
|
my $HTMLLanguageTagAttrChecker = sub { |
775 |
wakaba |
1.7 |
## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm. |
776 |
|
|
|
777 |
wakaba |
1.1 |
my ($self, $attr) = @_; |
778 |
wakaba |
1.6 |
my $value = $attr->value; |
779 |
|
|
require Whatpm::LangTag; |
780 |
|
|
Whatpm::LangTag->check_rfc3066_language_tag ($value, sub { |
781 |
|
|
my %opt = @_; |
782 |
|
|
my $type = 'LangTag:'.$opt{type}; |
783 |
|
|
$type .= ':' . $opt{subtag} if defined $opt{subtag}; |
784 |
|
|
$self->{onerror}->(node => $attr, type => $type, value => $opt{value}, |
785 |
|
|
level => $opt{level}); |
786 |
|
|
}); |
787 |
wakaba |
1.1 |
## ISSUE: RFC 4646 (3066bis)? |
788 |
wakaba |
1.6 |
|
789 |
|
|
## TODO: testdata |
790 |
wakaba |
1.1 |
}; # $HTMLLanguageTagAttrChecker |
791 |
|
|
|
792 |
|
|
## "A valid media query [MQ]" |
793 |
|
|
my $HTMLMQAttrChecker = sub { |
794 |
|
|
my ($self, $attr) = @_; |
795 |
|
|
$self->{onerror}->(node => $attr, level => 'unsupported', |
796 |
|
|
type => 'media query'); |
797 |
|
|
## ISSUE: What is "a valid media query"? |
798 |
|
|
}; # $HTMLMQAttrChecker |
799 |
|
|
|
800 |
|
|
my $HTMLEventHandlerAttrChecker = sub { |
801 |
|
|
my ($self, $attr) = @_; |
802 |
|
|
$self->{onerror}->(node => $attr, level => 'unsupported', |
803 |
|
|
type => 'event handler'); |
804 |
|
|
## TODO: MUST contain valid ECMAScript code matching the |
805 |
|
|
## ECMAScript |FunctionBody| production. [ECMA262] |
806 |
|
|
## ISSUE: MUST be ES3? E4X? ES4? JS1.x? |
807 |
|
|
## ISSUE: Automatic semicolon insertion does not apply? |
808 |
|
|
## ISSUE: Other script languages? |
809 |
|
|
}; # $HTMLEventHandlerAttrChecker |
810 |
|
|
|
811 |
|
|
my $HTMLUsemapAttrChecker = sub { |
812 |
|
|
my ($self, $attr) = @_; |
813 |
|
|
## MUST be a valid hashed ID reference to a |map| element |
814 |
|
|
my $value = $attr->value; |
815 |
|
|
if ($value =~ s/^#//) { |
816 |
|
|
## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.) |
817 |
|
|
push @{$self->{usemap}}, [$value => $attr]; |
818 |
|
|
} else { |
819 |
|
|
$self->{onerror}->(node => $attr, type => '#idref:syntax error'); |
820 |
|
|
} |
821 |
|
|
## NOTE: Space characters in hashed ID references are conforming. |
822 |
|
|
## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported |
823 |
|
|
}; # $HTMLUsemapAttrChecker |
824 |
|
|
|
825 |
|
|
my $HTMLTargetAttrChecker = sub { |
826 |
|
|
my ($self, $attr) = @_; |
827 |
|
|
my $value = $attr->value; |
828 |
|
|
if ($value =~ /^_/) { |
829 |
|
|
$value = lc $value; ## ISSUE: ASCII case-insentitive? |
830 |
|
|
unless ({ |
831 |
|
|
_self => 1, _parent => 1, _top => 1, |
832 |
|
|
}->{$value}) { |
833 |
|
|
$self->{onerror}->(node => $attr, |
834 |
|
|
type => 'reserved browsing context name'); |
835 |
|
|
} |
836 |
|
|
} else { |
837 |
wakaba |
1.29 |
## NOTE: An empty string is a valid browsing context name (same as _self). |
838 |
wakaba |
1.1 |
} |
839 |
|
|
}; # $HTMLTargetAttrChecker |
840 |
|
|
|
841 |
wakaba |
1.23 |
my $HTMLSelectorsAttrChecker = sub { |
842 |
|
|
my ($self, $attr) = @_; |
843 |
|
|
|
844 |
|
|
## ISSUE: Namespace resolution? |
845 |
|
|
|
846 |
|
|
my $value = $attr->value; |
847 |
|
|
|
848 |
|
|
require Whatpm::CSS::SelectorsParser; |
849 |
|
|
my $p = Whatpm::CSS::SelectorsParser->new; |
850 |
|
|
$p->{pseudo_class}->{$_} = 1 for qw/ |
851 |
|
|
active checked disabled empty enabled first-child first-of-type |
852 |
|
|
focus hover indeterminate last-child last-of-type link only-child |
853 |
|
|
only-of-type root target visited |
854 |
|
|
lang nth-child nth-last-child nth-of-type nth-last-of-type not |
855 |
|
|
-manakai-contains -manakai-current |
856 |
|
|
/; |
857 |
|
|
|
858 |
|
|
$p->{pseudo_element}->{$_} = 1 for qw/ |
859 |
|
|
after before first-letter first-line |
860 |
|
|
/; |
861 |
|
|
|
862 |
|
|
$p->{must_level} = $self->{must_level}; |
863 |
|
|
$p->{onerror} = sub { |
864 |
|
|
my %opt = @_; |
865 |
|
|
$opt{type} = 'selectors:'.$opt{type}; |
866 |
|
|
$self->{onerror}->(%opt, node => $attr); |
867 |
|
|
}; |
868 |
|
|
$p->parse_string ($value); |
869 |
|
|
}; # $HTMLSelectorsAttrChecker |
870 |
|
|
|
871 |
wakaba |
1.1 |
my $HTMLAttrChecker = { |
872 |
|
|
id => sub { |
873 |
|
|
## NOTE: |map| has its own variant of |id=""| checker |
874 |
|
|
my ($self, $attr) = @_; |
875 |
|
|
my $value = $attr->value; |
876 |
|
|
if (length $value > 0) { |
877 |
|
|
if ($self->{id}->{$value}) { |
878 |
|
|
$self->{onerror}->(node => $attr, type => 'duplicate ID'); |
879 |
|
|
push @{$self->{id}->{$value}}, $attr; |
880 |
|
|
} else { |
881 |
|
|
$self->{id}->{$value} = [$attr]; |
882 |
|
|
} |
883 |
|
|
if ($value =~ /[\x09-\x0D\x20]/) { |
884 |
|
|
$self->{onerror}->(node => $attr, type => 'space in ID'); |
885 |
|
|
} |
886 |
|
|
} else { |
887 |
|
|
## NOTE: MUST contain at least one character |
888 |
|
|
$self->{onerror}->(node => $attr, type => 'empty attribute value'); |
889 |
|
|
} |
890 |
|
|
}, |
891 |
|
|
title => sub {}, ## NOTE: No conformance creteria |
892 |
|
|
lang => sub { |
893 |
|
|
my ($self, $attr) = @_; |
894 |
wakaba |
1.6 |
my $value = $attr->value; |
895 |
|
|
if ($value eq '') { |
896 |
|
|
# |
897 |
|
|
} else { |
898 |
|
|
require Whatpm::LangTag; |
899 |
|
|
Whatpm::LangTag->check_rfc3066_language_tag ($value, sub { |
900 |
|
|
my %opt = @_; |
901 |
|
|
my $type = 'LangTag:'.$opt{type}; |
902 |
|
|
$type .= ':' . $opt{subtag} if defined $opt{subtag}; |
903 |
|
|
$self->{onerror}->(node => $attr, type => $type, value => $opt{value}, |
904 |
|
|
level => $opt{level}); |
905 |
|
|
}); |
906 |
|
|
} |
907 |
wakaba |
1.1 |
## ISSUE: RFC 4646 (3066bis)? |
908 |
|
|
unless ($attr->owner_document->manakai_is_html) { |
909 |
|
|
$self->{onerror}->(node => $attr, type => 'in XML:lang'); |
910 |
|
|
} |
911 |
wakaba |
1.6 |
|
912 |
|
|
## TODO: test data |
913 |
wakaba |
1.1 |
}, |
914 |
|
|
dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}), |
915 |
|
|
class => sub { |
916 |
|
|
my ($self, $attr) = @_; |
917 |
|
|
my %word; |
918 |
|
|
for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) { |
919 |
|
|
unless ($word{$word}) { |
920 |
|
|
$word{$word} = 1; |
921 |
|
|
push @{$self->{return}->{class}->{$word}||=[]}, $attr; |
922 |
|
|
} else { |
923 |
|
|
$self->{onerror}->(node => $attr, type => 'duplicate token:'.$word); |
924 |
|
|
} |
925 |
|
|
} |
926 |
|
|
}, |
927 |
|
|
contextmenu => sub { |
928 |
|
|
my ($self, $attr) = @_; |
929 |
|
|
my $value = $attr->value; |
930 |
|
|
push @{$self->{contextmenu}}, [$value => $attr]; |
931 |
|
|
## ISSUE: "The value must be the ID of a menu element in the DOM." |
932 |
|
|
## What is "in the DOM"? A menu Element node that is not part |
933 |
|
|
## of the Document tree is in the DOM? A menu Element node that |
934 |
|
|
## belong to another Document tree is in the DOM? |
935 |
|
|
}, |
936 |
|
|
irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'), |
937 |
wakaba |
1.8 |
tabindex => $HTMLIntegerAttrChecker |
938 |
|
|
## TODO: ref, template, registrationmark |
939 |
wakaba |
1.1 |
}; |
940 |
|
|
|
941 |
|
|
for (qw/ |
942 |
|
|
onabort onbeforeunload onblur onchange onclick oncontextmenu |
943 |
|
|
ondblclick ondrag ondragend ondragenter ondragleave ondragover |
944 |
|
|
ondragstart ondrop onerror onfocus onkeydown onkeypress |
945 |
|
|
onkeyup onload onmessage onmousedown onmousemove onmouseout |
946 |
|
|
onmouseover onmouseup onmousewheel onresize onscroll onselect |
947 |
|
|
onsubmit onunload |
948 |
|
|
/) { |
949 |
|
|
$HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker; |
950 |
|
|
} |
951 |
|
|
|
952 |
|
|
my $GetHTMLAttrsChecker = sub { |
953 |
|
|
my $element_specific_checker = shift; |
954 |
|
|
return sub { |
955 |
|
|
my ($self, $todo) = @_; |
956 |
|
|
for my $attr (@{$todo->{node}->attributes}) { |
957 |
|
|
my $attr_ns = $attr->namespace_uri; |
958 |
|
|
$attr_ns = '' unless defined $attr_ns; |
959 |
|
|
my $attr_ln = $attr->manakai_local_name; |
960 |
|
|
my $checker; |
961 |
|
|
if ($attr_ns eq '') { |
962 |
|
|
$checker = $element_specific_checker->{$attr_ln} |
963 |
|
|
|| $HTMLAttrChecker->{$attr_ln}; |
964 |
|
|
} |
965 |
|
|
$checker ||= $AttrChecker->{$attr_ns}->{$attr_ln} |
966 |
|
|
|| $AttrChecker->{$attr_ns}->{''}; |
967 |
|
|
if ($checker) { |
968 |
|
|
$checker->($self, $attr, $todo); |
969 |
|
|
} else { |
970 |
|
|
$self->{onerror}->(node => $attr, level => 'unsupported', |
971 |
|
|
type => 'attribute'); |
972 |
|
|
## ISSUE: No comformance createria for unknown attributes in the spec |
973 |
|
|
} |
974 |
|
|
} |
975 |
|
|
}; |
976 |
|
|
}; # $GetHTMLAttrsChecker |
977 |
|
|
|
978 |
|
|
our $Element; |
979 |
|
|
our $ElementDefault; |
980 |
|
|
|
981 |
|
|
$Element->{$HTML_NS}->{''} = { |
982 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
983 |
|
|
checker => $ElementDefault->{checker}, |
984 |
|
|
}; |
985 |
|
|
|
986 |
|
|
$Element->{$HTML_NS}->{html} = { |
987 |
|
|
is_root => 1, |
988 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({ |
989 |
wakaba |
1.16 |
manifest => $HTMLURIAttrChecker, |
990 |
wakaba |
1.1 |
xmlns => sub { |
991 |
|
|
my ($self, $attr) = @_; |
992 |
|
|
my $value = $attr->value; |
993 |
|
|
unless ($value eq $HTML_NS) { |
994 |
|
|
$self->{onerror}->(node => $attr, type => 'invalid attribute value'); |
995 |
|
|
} |
996 |
|
|
unless ($attr->owner_document->manakai_is_html) { |
997 |
|
|
$self->{onerror}->(node => $attr, type => 'in XML:xmlns'); |
998 |
|
|
## TODO: Test |
999 |
|
|
} |
1000 |
|
|
}, |
1001 |
|
|
}), |
1002 |
|
|
checker => sub { |
1003 |
|
|
my ($self, $todo) = @_; |
1004 |
|
|
my $el = $todo->{node}; |
1005 |
|
|
my $new_todos = []; |
1006 |
|
|
my @nodes = (@{$el->child_nodes}); |
1007 |
|
|
|
1008 |
wakaba |
1.25 |
my $old_values = {significant => |
1009 |
|
|
$todo->{flag}->{has_descendant}->{significant}}; |
1010 |
|
|
$todo->{flag}->{has_descendant}->{significant} = 0; |
1011 |
|
|
|
1012 |
wakaba |
1.1 |
my $phase = 'before head'; |
1013 |
|
|
while (@nodes) { |
1014 |
|
|
my $node = shift @nodes; |
1015 |
|
|
$self->_remove_minuses ($node) and next if ref $node eq 'HASH'; |
1016 |
|
|
|
1017 |
|
|
my $nt = $node->node_type; |
1018 |
|
|
if ($nt == 1) { |
1019 |
|
|
my $node_ns = $node->namespace_uri; |
1020 |
|
|
$node_ns = '' unless defined $node_ns; |
1021 |
|
|
my $node_ln = $node->manakai_local_name; |
1022 |
|
|
my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln}; |
1023 |
wakaba |
1.8 |
if ($self->{pluses}->{$node_ns}->{$node_ln}) { |
1024 |
|
|
# |
1025 |
|
|
} elsif ($phase eq 'before head') { |
1026 |
wakaba |
1.1 |
if ($node_ns eq $HTML_NS and $node_ln eq 'head') { |
1027 |
|
|
$phase = 'after head'; |
1028 |
|
|
} elsif ($node_ns eq $HTML_NS and $node_ln eq 'body') { |
1029 |
|
|
$self->{onerror}->(node => $node, type => 'ps element missing:head'); |
1030 |
|
|
$phase = 'after body'; |
1031 |
|
|
} else { |
1032 |
|
|
$not_allowed = 1; |
1033 |
|
|
# before head |
1034 |
|
|
} |
1035 |
|
|
} elsif ($phase eq 'after head') { |
1036 |
|
|
if ($node_ns eq $HTML_NS and $node_ln eq 'body') { |
1037 |
|
|
$phase = 'after body'; |
1038 |
|
|
} else { |
1039 |
|
|
$not_allowed = 1; |
1040 |
|
|
# after head |
1041 |
|
|
} |
1042 |
|
|
} else { #elsif ($phase eq 'after body') { |
1043 |
|
|
$not_allowed = 1; |
1044 |
|
|
# after body |
1045 |
|
|
} |
1046 |
|
|
$self->{onerror}->(node => $node, type => 'element not allowed') |
1047 |
|
|
if $not_allowed; |
1048 |
|
|
my ($sib, $ch) = $self->_check_get_children ($node, $todo); |
1049 |
|
|
unshift @nodes, @$sib; |
1050 |
|
|
push @$new_todos, @$ch; |
1051 |
|
|
} elsif ($nt == 3 or $nt == 4) { |
1052 |
|
|
if ($node->data =~ /[^\x09-\x0D\x20]/) { |
1053 |
|
|
$self->{onerror}->(node => $node, type => 'character not allowed'); |
1054 |
wakaba |
1.25 |
$todo->{flag}->{has_descendant}->{significant} = 1; |
1055 |
wakaba |
1.1 |
} |
1056 |
|
|
} elsif ($nt == 5) { |
1057 |
|
|
unshift @nodes, @{$node->child_nodes}; |
1058 |
|
|
} |
1059 |
|
|
} |
1060 |
|
|
|
1061 |
|
|
if ($phase eq 'before head') { |
1062 |
|
|
$self->{onerror}->(node => $el, type => 'child element missing:head'); |
1063 |
|
|
$self->{onerror}->(node => $el, type => 'child element missing:body'); |
1064 |
|
|
} elsif ($phase eq 'after head') { |
1065 |
|
|
$self->{onerror}->(node => $el, type => 'child element missing:body'); |
1066 |
|
|
} |
1067 |
|
|
|
1068 |
wakaba |
1.25 |
## NOTE: Significant content check - this is performed here since |
1069 |
|
|
## |html| content model allows a block-level element - |body|. |
1070 |
|
|
push @$new_todos, { |
1071 |
|
|
type => 'descendant', node => $todo->{node}, flag => $todo->{flag}, |
1072 |
|
|
old_values => $old_values, |
1073 |
wakaba |
1.26 |
errors => $HTMLSignificantContentErrors, |
1074 |
wakaba |
1.25 |
}; |
1075 |
|
|
|
1076 |
wakaba |
1.1 |
return ($new_todos); |
1077 |
|
|
}, |
1078 |
|
|
}; |
1079 |
|
|
|
1080 |
|
|
$Element->{$HTML_NS}->{head} = { |
1081 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
1082 |
|
|
checker => sub { |
1083 |
|
|
my ($self, $todo) = @_; |
1084 |
|
|
my $el = $todo->{node}; |
1085 |
|
|
my $new_todos = []; |
1086 |
|
|
my @nodes = (@{$el->child_nodes}); |
1087 |
|
|
|
1088 |
|
|
my $has_title; |
1089 |
|
|
while (@nodes) { |
1090 |
|
|
my $node = shift @nodes; |
1091 |
|
|
$self->_remove_minuses ($node) and next if ref $node eq 'HASH'; |
1092 |
|
|
|
1093 |
|
|
my $nt = $node->node_type; |
1094 |
|
|
if ($nt == 1) { |
1095 |
|
|
my $node_ns = $node->namespace_uri; |
1096 |
|
|
$node_ns = '' unless defined $node_ns; |
1097 |
|
|
my $node_ln = $node->manakai_local_name; |
1098 |
wakaba |
1.30 |
if ($self->{minuses}->{$node_ns}->{$node_ln}) { |
1099 |
|
|
$self->{onerror}->(node => $node, |
1100 |
|
|
type => 'element not allowed:minus', |
1101 |
|
|
level => $self->{must_level}); |
1102 |
|
|
} elsif ($self->{pluses}->{$node_ns}->{$node_ln}) { |
1103 |
wakaba |
1.8 |
# |
1104 |
|
|
} elsif ($node_ns eq $HTML_NS and $node_ln eq 'title') { |
1105 |
wakaba |
1.1 |
unless ($has_title) { |
1106 |
|
|
$has_title = 1; |
1107 |
|
|
} else { |
1108 |
wakaba |
1.30 |
$self->{onerror}->(node => $node, |
1109 |
|
|
type => 'element not allowed:head title', |
1110 |
|
|
level => $self->{must_level}); |
1111 |
|
|
} |
1112 |
|
|
} elsif ($node_ns eq $HTML_NS and $node_ln eq 'style') { |
1113 |
|
|
if ($todo->{node}->has_attribute_ns (undef, 'scoped')) { |
1114 |
|
|
$self->{onerror}->(node => $node, |
1115 |
|
|
type => 'element not allowed:head style', |
1116 |
|
|
level => $self->{must_level}); |
1117 |
wakaba |
1.1 |
} |
1118 |
wakaba |
1.29 |
} elsif ($HTMLMetadataContent->{$node_ns}->{$node_ln}) { |
1119 |
|
|
# |
1120 |
|
|
|
1121 |
|
|
## NOTE: |meta| is a metadata content. However, strictly speaking, |
1122 |
|
|
## a |meta| element with none of |charset|, |name|, |
1123 |
|
|
## or |http-equiv| attribute is not allowed. It is non-conforming |
1124 |
|
|
## anyway. |
1125 |
wakaba |
1.1 |
} else { |
1126 |
wakaba |
1.30 |
$self->{onerror}->(node => $node, |
1127 |
|
|
type => 'element not allowed:metadata', |
1128 |
|
|
level => $self->{must_level}); |
1129 |
wakaba |
1.1 |
} |
1130 |
wakaba |
1.3 |
local $todo->{flag}->{in_head} = 1; |
1131 |
wakaba |
1.1 |
my ($sib, $ch) = $self->_check_get_children ($node, $todo); |
1132 |
|
|
unshift @nodes, @$sib; |
1133 |
|
|
push @$new_todos, @$ch; |
1134 |
|
|
} elsif ($nt == 3 or $nt == 4) { |
1135 |
|
|
if ($node->data =~ /[^\x09-\x0D\x20]/) { |
1136 |
|
|
$self->{onerror}->(node => $node, type => 'character not allowed'); |
1137 |
wakaba |
1.25 |
$todo->{flag}->{has_descendant}->{significant} = 1; |
1138 |
wakaba |
1.1 |
} |
1139 |
|
|
} elsif ($nt == 5) { |
1140 |
|
|
unshift @nodes, @{$node->child_nodes}; |
1141 |
|
|
} |
1142 |
|
|
} |
1143 |
|
|
unless ($has_title) { |
1144 |
|
|
$self->{onerror}->(node => $el, type => 'child element missing:title'); |
1145 |
|
|
} |
1146 |
|
|
return ($new_todos); |
1147 |
|
|
}, |
1148 |
|
|
}; |
1149 |
|
|
|
1150 |
|
|
$Element->{$HTML_NS}->{title} = { |
1151 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
1152 |
|
|
checker => $HTMLTextChecker, |
1153 |
|
|
}; |
1154 |
|
|
|
1155 |
|
|
$Element->{$HTML_NS}->{base} = { |
1156 |
wakaba |
1.4 |
attrs_checker => sub { |
1157 |
|
|
my ($self, $todo) = @_; |
1158 |
|
|
|
1159 |
wakaba |
1.29 |
if ($self->{has_base}) { |
1160 |
|
|
$self->{onerror}->(node => $todo->{node}, |
1161 |
|
|
type => 'element not allowed:base'); |
1162 |
|
|
} else { |
1163 |
|
|
$self->{has_base} = 1; |
1164 |
|
|
} |
1165 |
|
|
|
1166 |
wakaba |
1.14 |
my $has_href = $todo->{node}->has_attribute_ns (undef, 'href'); |
1167 |
|
|
my $has_target = $todo->{node}->has_attribute_ns (undef, 'target'); |
1168 |
|
|
|
1169 |
|
|
if ($self->{has_uri_attr} and $has_href) { |
1170 |
wakaba |
1.4 |
## ISSUE: Are these examples conforming? |
1171 |
|
|
## <head profile="a b c"><base href> (except for |profile|'s |
1172 |
|
|
## non-conformance) |
1173 |
|
|
## <title xml:base="relative"/><base href/> (maybe it should be) |
1174 |
|
|
## <unknown xmlns="relative"/><base href/> (assuming that |
1175 |
|
|
## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though)) |
1176 |
|
|
## <style>@import 'relative';</style><base href> |
1177 |
|
|
## <script>location.href = 'relative';</script><base href> |
1178 |
wakaba |
1.14 |
## NOTE: <html manifest=".."><head><base href=""/> is conforming as |
1179 |
|
|
## an exception. |
1180 |
wakaba |
1.4 |
$self->{onerror}->(node => $todo->{node}, |
1181 |
|
|
type => 'basehref after URI attribute'); |
1182 |
|
|
} |
1183 |
wakaba |
1.14 |
if ($self->{has_hyperlink_element} and $has_target) { |
1184 |
wakaba |
1.4 |
## ISSUE: Are these examples conforming? |
1185 |
|
|
## <head><title xlink:href=""/><base target="name"/></head> |
1186 |
|
|
## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/> |
1187 |
|
|
## (assuming that |xbl:xbl| is allowed before |base|) |
1188 |
|
|
## NOTE: These are non-conformant anyway because of |head|'s content model: |
1189 |
|
|
## <link href=""/><base target="name"/> |
1190 |
|
|
## <link rel=unknown href=""><base target=name> |
1191 |
|
|
$self->{onerror}->(node => $todo->{node}, |
1192 |
|
|
type => 'basetarget after hyperlink'); |
1193 |
|
|
} |
1194 |
|
|
|
1195 |
wakaba |
1.14 |
if (not $has_href and not $has_target) { |
1196 |
|
|
$self->{onerror}->(node => $todo->{node}, |
1197 |
|
|
type => 'attribute missing:href|target'); |
1198 |
|
|
} |
1199 |
|
|
|
1200 |
wakaba |
1.4 |
return $GetHTMLAttrsChecker->({ |
1201 |
|
|
href => $HTMLURIAttrChecker, |
1202 |
|
|
target => $HTMLTargetAttrChecker, |
1203 |
|
|
})->($self, $todo); |
1204 |
|
|
}, |
1205 |
wakaba |
1.1 |
checker => $HTMLEmptyChecker, |
1206 |
|
|
}; |
1207 |
|
|
|
1208 |
|
|
$Element->{$HTML_NS}->{link} = { |
1209 |
|
|
attrs_checker => sub { |
1210 |
|
|
my ($self, $todo) = @_; |
1211 |
|
|
$GetHTMLAttrsChecker->({ |
1212 |
|
|
href => $HTMLURIAttrChecker, |
1213 |
wakaba |
1.4 |
rel => sub { $HTMLLinkTypesAttrChecker->(0, $todo, @_) }, |
1214 |
wakaba |
1.1 |
media => $HTMLMQAttrChecker, |
1215 |
|
|
hreflang => $HTMLLanguageTagAttrChecker, |
1216 |
|
|
type => $HTMLIMTAttrChecker, |
1217 |
|
|
## NOTE: Though |title| has special semantics, |
1218 |
|
|
## syntactically same as the |title| as global attribute. |
1219 |
|
|
})->($self, $todo); |
1220 |
wakaba |
1.4 |
if ($todo->{node}->has_attribute_ns (undef, 'href')) { |
1221 |
|
|
$self->{has_hyperlink_element} = 1 if $todo->{has_hyperlink_link_type}; |
1222 |
|
|
} else { |
1223 |
wakaba |
1.1 |
$self->{onerror}->(node => $todo->{node}, |
1224 |
|
|
type => 'attribute missing:href'); |
1225 |
|
|
} |
1226 |
|
|
unless ($todo->{node}->has_attribute_ns (undef, 'rel')) { |
1227 |
|
|
$self->{onerror}->(node => $todo->{node}, |
1228 |
|
|
type => 'attribute missing:rel'); |
1229 |
|
|
} |
1230 |
|
|
}, |
1231 |
|
|
checker => $HTMLEmptyChecker, |
1232 |
|
|
}; |
1233 |
|
|
|
1234 |
|
|
$Element->{$HTML_NS}->{meta} = { |
1235 |
|
|
attrs_checker => sub { |
1236 |
|
|
my ($self, $todo) = @_; |
1237 |
|
|
my $name_attr; |
1238 |
|
|
my $http_equiv_attr; |
1239 |
|
|
my $charset_attr; |
1240 |
|
|
my $content_attr; |
1241 |
|
|
for my $attr (@{$todo->{node}->attributes}) { |
1242 |
|
|
my $attr_ns = $attr->namespace_uri; |
1243 |
|
|
$attr_ns = '' unless defined $attr_ns; |
1244 |
|
|
my $attr_ln = $attr->manakai_local_name; |
1245 |
|
|
my $checker; |
1246 |
|
|
if ($attr_ns eq '') { |
1247 |
|
|
if ($attr_ln eq 'content') { |
1248 |
|
|
$content_attr = $attr; |
1249 |
|
|
$checker = 1; |
1250 |
|
|
} elsif ($attr_ln eq 'name') { |
1251 |
|
|
$name_attr = $attr; |
1252 |
|
|
$checker = 1; |
1253 |
|
|
} elsif ($attr_ln eq 'http-equiv') { |
1254 |
|
|
$http_equiv_attr = $attr; |
1255 |
|
|
$checker = 1; |
1256 |
|
|
} elsif ($attr_ln eq 'charset') { |
1257 |
|
|
$charset_attr = $attr; |
1258 |
|
|
$checker = 1; |
1259 |
|
|
} else { |
1260 |
|
|
$checker = $HTMLAttrChecker->{$attr_ln} |
1261 |
|
|
|| $AttrChecker->{$attr_ns}->{$attr_ln} |
1262 |
|
|
|| $AttrChecker->{$attr_ns}->{''}; |
1263 |
|
|
} |
1264 |
|
|
} else { |
1265 |
|
|
$checker ||= $AttrChecker->{$attr_ns}->{$attr_ln} |
1266 |
|
|
|| $AttrChecker->{$attr_ns}->{''}; |
1267 |
|
|
} |
1268 |
|
|
if ($checker) { |
1269 |
|
|
$checker->($self, $attr) if ref $checker; |
1270 |
|
|
} else { |
1271 |
|
|
$self->{onerror}->(node => $attr, level => 'unsupported', |
1272 |
|
|
type => 'attribute'); |
1273 |
|
|
## ISSUE: No comformance createria for unknown attributes in the spec |
1274 |
|
|
} |
1275 |
|
|
} |
1276 |
|
|
|
1277 |
|
|
if (defined $name_attr) { |
1278 |
|
|
if (defined $http_equiv_attr) { |
1279 |
|
|
$self->{onerror}->(node => $http_equiv_attr, |
1280 |
|
|
type => 'attribute not allowed'); |
1281 |
|
|
} elsif (defined $charset_attr) { |
1282 |
|
|
$self->{onerror}->(node => $charset_attr, |
1283 |
|
|
type => 'attribute not allowed'); |
1284 |
|
|
} |
1285 |
|
|
my $metadata_name = $name_attr->value; |
1286 |
|
|
my $metadata_value; |
1287 |
|
|
if (defined $content_attr) { |
1288 |
|
|
$metadata_value = $content_attr->value; |
1289 |
|
|
} else { |
1290 |
|
|
$self->{onerror}->(node => $todo->{node}, |
1291 |
|
|
type => 'attribute missing:content'); |
1292 |
|
|
$metadata_value = ''; |
1293 |
|
|
} |
1294 |
|
|
} elsif (defined $http_equiv_attr) { |
1295 |
|
|
if (defined $charset_attr) { |
1296 |
|
|
$self->{onerror}->(node => $charset_attr, |
1297 |
|
|
type => 'attribute not allowed'); |
1298 |
|
|
} |
1299 |
|
|
unless (defined $content_attr) { |
1300 |
|
|
$self->{onerror}->(node => $todo->{node}, |
1301 |
|
|
type => 'attribute missing:content'); |
1302 |
|
|
} |
1303 |
|
|
} elsif (defined $charset_attr) { |
1304 |
|
|
if (defined $content_attr) { |
1305 |
|
|
$self->{onerror}->(node => $content_attr, |
1306 |
|
|
type => 'attribute not allowed'); |
1307 |
|
|
} |
1308 |
|
|
} else { |
1309 |
|
|
if (defined $content_attr) { |
1310 |
|
|
$self->{onerror}->(node => $content_attr, |
1311 |
|
|
type => 'attribute not allowed'); |
1312 |
|
|
$self->{onerror}->(node => $todo->{node}, |
1313 |
|
|
type => 'attribute missing:name|http-equiv'); |
1314 |
|
|
} else { |
1315 |
|
|
$self->{onerror}->(node => $todo->{node}, |
1316 |
|
|
type => 'attribute missing:name|http-equiv|charset'); |
1317 |
|
|
} |
1318 |
|
|
} |
1319 |
|
|
|
1320 |
wakaba |
1.32 |
my $check_charset_decl = sub () { |
1321 |
wakaba |
1.29 |
my $parent = $todo->{node}->manakai_parent_element; |
1322 |
|
|
if ($parent and $parent eq $parent->owner_document->manakai_head) { |
1323 |
|
|
for my $el (@{$parent->child_nodes}) { |
1324 |
|
|
next unless $el->node_type == 1; # ELEMENT_NODE |
1325 |
|
|
unless ($el eq $todo->{node}) { |
1326 |
|
|
## NOTE: Not the first child element. |
1327 |
|
|
$self->{onerror}->(node => $todo->{node}, |
1328 |
wakaba |
1.32 |
type => 'element not allowed:meta charset', |
1329 |
|
|
level => $self->{must_level}); |
1330 |
wakaba |
1.29 |
} |
1331 |
|
|
last; |
1332 |
|
|
## NOTE: Entity references are not supported. |
1333 |
|
|
} |
1334 |
|
|
} else { |
1335 |
|
|
$self->{onerror}->(node => $todo->{node}, |
1336 |
wakaba |
1.32 |
type => 'element not allowed:meta charset', |
1337 |
|
|
level => $self->{must_level}); |
1338 |
wakaba |
1.29 |
} |
1339 |
|
|
|
1340 |
wakaba |
1.1 |
unless ($todo->{node}->owner_document->manakai_is_html) { |
1341 |
wakaba |
1.32 |
$self->{onerror}->(node => $todo->{node}, |
1342 |
|
|
type => 'in XML:charset', |
1343 |
|
|
level => $self->{must_level}); |
1344 |
wakaba |
1.1 |
} |
1345 |
wakaba |
1.32 |
}; # $check_charset_decl |
1346 |
wakaba |
1.21 |
|
1347 |
wakaba |
1.32 |
my $check_charset = sub ($$) { |
1348 |
|
|
my ($attr, $charset_value) = @_; |
1349 |
wakaba |
1.21 |
## NOTE: Though the case-sensitivility of |charset| attribute value |
1350 |
|
|
## is not explicitly spelled in the HTML5 spec, the Character Set |
1351 |
|
|
## registry of IANA, which is referenced from HTML5 spec, says that |
1352 |
|
|
## charset name is case-insensitive. |
1353 |
|
|
$charset_value =~ tr/A-Z/a-z/; ## NOTE: ASCII Case-insensitive. |
1354 |
|
|
|
1355 |
|
|
require Message::Charset::Info; |
1356 |
|
|
my $charset = $Message::Charset::Info::IANACharset->{$charset_value}; |
1357 |
|
|
my $ic = $todo->{node}->owner_document->input_encoding; |
1358 |
|
|
if (defined $ic) { |
1359 |
|
|
## TODO: Test for this case |
1360 |
|
|
my $ic_charset = $Message::Charset::Info::IANACharset->{$ic}; |
1361 |
|
|
if ($charset ne $ic_charset) { |
1362 |
wakaba |
1.32 |
$self->{onerror}->(node => $attr, |
1363 |
wakaba |
1.21 |
type => 'mismatched charset name:'.$ic. |
1364 |
wakaba |
1.32 |
':'.$charset_value, ## TODO: This should be a |value| value. |
1365 |
|
|
level => $self->{must_level}); |
1366 |
wakaba |
1.21 |
} |
1367 |
|
|
} else { |
1368 |
|
|
## NOTE: MUST, but not checkable, since the document is not originally |
1369 |
|
|
## in serialized form (or the parser does not preserve the input |
1370 |
|
|
## encoding information). |
1371 |
wakaba |
1.32 |
$self->{onerror}->(node => $attr, |
1372 |
|
|
type => 'mismatched charset name::'.$charset_value, ## TODO: |value| |
1373 |
wakaba |
1.21 |
level => 'unsupported'); |
1374 |
|
|
} |
1375 |
|
|
|
1376 |
|
|
## ISSUE: What is "valid character encoding name"? Syntactically valid? |
1377 |
|
|
## Syntactically valid and registered? What about x-charset names? |
1378 |
|
|
unless (Message::Charset::Info::is_syntactically_valid_iana_charset_name |
1379 |
|
|
($charset_value)) { |
1380 |
wakaba |
1.32 |
$self->{onerror}->(node => $attr, |
1381 |
|
|
type => 'charset:syntax error:'.$charset_value, ## TODO |
1382 |
|
|
level => $self->{must_level}); |
1383 |
wakaba |
1.21 |
} |
1384 |
|
|
|
1385 |
|
|
if ($charset) { |
1386 |
|
|
## ISSUE: What is "the preferred name for that encoding" (for a charset |
1387 |
|
|
## with no "preferred MIME name" label)? |
1388 |
|
|
my $charset_status = $charset->{iana_names}->{$charset_value} || 0; |
1389 |
|
|
if (($charset_status & |
1390 |
|
|
Message::Charset::Info::PREFERRED_CHARSET_NAME ()) |
1391 |
|
|
!= Message::Charset::Info::PREFERRED_CHARSET_NAME ()) { |
1392 |
wakaba |
1.32 |
$self->{onerror}->(node => $attr, |
1393 |
wakaba |
1.21 |
type => 'charset:not preferred:'. |
1394 |
wakaba |
1.32 |
$charset_value, ## TODO |
1395 |
|
|
level => $self->{must_level}); |
1396 |
wakaba |
1.21 |
} |
1397 |
|
|
if (($charset_status & |
1398 |
|
|
Message::Charset::Info::REGISTERED_CHARSET_NAME ()) |
1399 |
|
|
!= Message::Charset::Info::REGISTERED_CHARSET_NAME ()) { |
1400 |
|
|
if ($charset_value =~ /^x-/) { |
1401 |
wakaba |
1.32 |
$self->{onerror}->(node => $attr, |
1402 |
|
|
type => 'charset:private:'.$charset_value, ## TODO |
1403 |
wakaba |
1.21 |
level => $self->{good_level}); |
1404 |
|
|
} else { |
1405 |
wakaba |
1.32 |
$self->{onerror}->(node => $attr, |
1406 |
wakaba |
1.21 |
type => 'charset:not registered:'. |
1407 |
wakaba |
1.32 |
$charset_value, ## TODO |
1408 |
wakaba |
1.21 |
level => $self->{good_level}); |
1409 |
|
|
} |
1410 |
|
|
} |
1411 |
|
|
} elsif ($charset_value =~ /^x-/) { |
1412 |
wakaba |
1.32 |
$self->{onerror}->(node => $attr, |
1413 |
|
|
type => 'charset:private:'.$charset_value, ## TODO |
1414 |
wakaba |
1.21 |
level => $self->{good_level}); |
1415 |
|
|
} else { |
1416 |
wakaba |
1.32 |
$self->{onerror}->(node => $attr, |
1417 |
|
|
type => 'charset:not registered:'.$charset_value, ## TODO |
1418 |
wakaba |
1.21 |
level => $self->{good_level}); |
1419 |
|
|
} |
1420 |
|
|
|
1421 |
wakaba |
1.32 |
if ($attr->get_user_data ('manakai_has_reference')) { |
1422 |
|
|
$self->{onerror}->(node => $attr, |
1423 |
wakaba |
1.22 |
type => 'character reference in charset', |
1424 |
|
|
level => $self->{must_level}); |
1425 |
|
|
} |
1426 |
wakaba |
1.32 |
}; # $check_charset |
1427 |
|
|
|
1428 |
|
|
## TODO: metadata conformance |
1429 |
|
|
|
1430 |
|
|
## TODO: pragma conformance |
1431 |
|
|
if (defined $http_equiv_attr) { ## An enumerated attribute |
1432 |
|
|
my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case? |
1433 |
|
|
if ({ |
1434 |
|
|
'refresh' => 1, |
1435 |
|
|
'default-style' => 1, |
1436 |
|
|
}->{$keyword}) { |
1437 |
|
|
# |
1438 |
wakaba |
1.33 |
|
1439 |
|
|
## TODO: More than one occurence is a MUST-level error (revision 1180). |
1440 |
wakaba |
1.32 |
} elsif ($keyword eq 'content-type') { |
1441 |
wakaba |
1.33 |
## ISSUE: Though it is renamed as "Encoding declaration" state in rev |
1442 |
|
|
## 1221, there are still many occurence of "Content-Type" state in |
1443 |
|
|
## the spec. |
1444 |
|
|
|
1445 |
wakaba |
1.32 |
$check_charset_decl->(); |
1446 |
|
|
if ($content_attr) { |
1447 |
|
|
my $content = $content_attr->value; |
1448 |
|
|
if ($content =~ m!^text/html;\x20?charset=(.+)\z!s) { |
1449 |
|
|
$check_charset->($content_attr, $1); |
1450 |
|
|
} else { |
1451 |
|
|
$self->{onerror}->(node => $content_attr, |
1452 |
|
|
type => 'meta content-type syntax error', |
1453 |
|
|
level => $self->{must_level}); |
1454 |
|
|
} |
1455 |
|
|
} |
1456 |
|
|
} else { |
1457 |
|
|
$self->{onerror}->(node => $http_equiv_attr, |
1458 |
|
|
type => 'enumerated:invalid'); |
1459 |
|
|
} |
1460 |
|
|
} |
1461 |
|
|
|
1462 |
|
|
if (defined $charset_attr) { |
1463 |
|
|
$check_charset_decl->(); |
1464 |
|
|
$check_charset->($charset_attr, $charset_attr->value); |
1465 |
wakaba |
1.1 |
} |
1466 |
|
|
}, |
1467 |
|
|
checker => $HTMLEmptyChecker, |
1468 |
|
|
}; |
1469 |
|
|
|
1470 |
|
|
$Element->{$HTML_NS}->{style} = { |
1471 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({ |
1472 |
|
|
type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language |
1473 |
|
|
media => $HTMLMQAttrChecker, |
1474 |
|
|
scoped => $GetHTMLBooleanAttrChecker->('scoped'), |
1475 |
|
|
## NOTE: |title| has special semantics for |style|s, but is syntactically |
1476 |
|
|
## not different |
1477 |
|
|
}), |
1478 |
|
|
checker => sub { |
1479 |
wakaba |
1.27 |
## NOTE: |html:style| itself has no conformance creteria on content model. |
1480 |
wakaba |
1.1 |
my ($self, $todo) = @_; |
1481 |
|
|
my $type = $todo->{node}->get_attribute_ns (undef, 'type'); |
1482 |
wakaba |
1.27 |
if (not defined $type or |
1483 |
|
|
$type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*[Tt][Ee][Xx][Tt](?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*[Cc][Ss][Ss](?>(?>\x0D\x0A)?[\x09\x20])*\z]) { |
1484 |
|
|
my $el = $todo->{node}; |
1485 |
|
|
my $new_todos = []; |
1486 |
|
|
my @nodes = (@{$el->child_nodes}); |
1487 |
|
|
|
1488 |
|
|
my $ss_text = ''; |
1489 |
|
|
while (@nodes) { |
1490 |
|
|
my $node = shift @nodes; |
1491 |
|
|
$self->_remove_minuses ($node) and next if ref $node eq 'HASH'; |
1492 |
|
|
|
1493 |
|
|
my $nt = $node->node_type; |
1494 |
|
|
if ($nt == 1) { |
1495 |
wakaba |
1.29 |
my $node_ns = $node->namespace_uri; |
1496 |
|
|
$node_ns = '' unless defined $node_ns; |
1497 |
|
|
my $node_ln = $node->manakai_local_name; |
1498 |
|
|
if ($self->{pluses}->{$node_ns}->{$node_ln}) { |
1499 |
|
|
# |
1500 |
|
|
} else { |
1501 |
|
|
$self->{onerror}->(node => $node, type => 'element not allowed'); |
1502 |
|
|
} |
1503 |
wakaba |
1.27 |
my ($sib, $ch) = $self->_check_get_children ($node, $todo); |
1504 |
|
|
unshift @nodes, @$sib; |
1505 |
|
|
push @$new_todos, @$ch; |
1506 |
|
|
} elsif ($nt == 3 or $nt == 4) { |
1507 |
|
|
$ss_text .= $node->text_content; |
1508 |
|
|
} elsif ($nt == 5) { |
1509 |
|
|
unshift @nodes, @{$node->child_nodes}; |
1510 |
|
|
} |
1511 |
|
|
} |
1512 |
|
|
|
1513 |
wakaba |
1.28 |
$self->{onsubdoc}->({s => $ss_text, container_node => $el, |
1514 |
|
|
media_type => 'text/css', is_char_string => 1}); |
1515 |
wakaba |
1.27 |
return ($new_todos); |
1516 |
|
|
} else { |
1517 |
|
|
$self->{onerror}->(node => $todo->{node}, level => 'unsupported', |
1518 |
|
|
type => 'style:'.$type); ## TODO: $type normalization |
1519 |
|
|
return $AnyChecker->($self, $todo); |
1520 |
|
|
} |
1521 |
wakaba |
1.1 |
}, |
1522 |
|
|
}; |
1523 |
wakaba |
1.25 |
## ISSUE: Relationship to significant content check? |
1524 |
wakaba |
1.1 |
|
1525 |
|
|
$Element->{$HTML_NS}->{body} = { |
1526 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
1527 |
wakaba |
1.29 |
checker => $HTMLProseContentChecker, |
1528 |
wakaba |
1.1 |
}; |
1529 |
|
|
|
1530 |
|
|
$Element->{$HTML_NS}->{section} = { |
1531 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
1532 |
wakaba |
1.29 |
checker => $HTMLProseContentChecker, |
1533 |
wakaba |
1.1 |
}; |
1534 |
|
|
|
1535 |
|
|
$Element->{$HTML_NS}->{nav} = { |
1536 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
1537 |
wakaba |
1.29 |
checker => $HTMLProseContentChecker, |
1538 |
wakaba |
1.1 |
}; |
1539 |
|
|
|
1540 |
|
|
$Element->{$HTML_NS}->{article} = { |
1541 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
1542 |
wakaba |
1.29 |
checker => $HTMLProseContentChecker, |
1543 |
wakaba |
1.1 |
}; |
1544 |
|
|
|
1545 |
|
|
$Element->{$HTML_NS}->{blockquote} = { |
1546 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({ |
1547 |
|
|
cite => $HTMLURIAttrChecker, |
1548 |
|
|
}), |
1549 |
wakaba |
1.29 |
checker => $HTMLProseContentChecker, |
1550 |
wakaba |
1.1 |
}; |
1551 |
|
|
|
1552 |
|
|
$Element->{$HTML_NS}->{aside} = { |
1553 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
1554 |
wakaba |
1.29 |
checker => $HTMLProseContentChecker, |
1555 |
wakaba |
1.1 |
}; |
1556 |
|
|
|
1557 |
|
|
$Element->{$HTML_NS}->{h1} = { |
1558 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
1559 |
|
|
checker => sub { |
1560 |
|
|
my ($self, $todo) = @_; |
1561 |
wakaba |
1.24 |
$todo->{flag}->{has_descendant}->{hn} = 1; |
1562 |
wakaba |
1.29 |
return $HTMLPhrasingContentChecker->($self, $todo); |
1563 |
wakaba |
1.1 |
}, |
1564 |
|
|
}; |
1565 |
|
|
|
1566 |
|
|
$Element->{$HTML_NS}->{h2} = { |
1567 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
1568 |
|
|
checker => $Element->{$HTML_NS}->{h1}->{checker}, |
1569 |
|
|
}; |
1570 |
|
|
|
1571 |
|
|
$Element->{$HTML_NS}->{h3} = { |
1572 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
1573 |
|
|
checker => $Element->{$HTML_NS}->{h1}->{checker}, |
1574 |
|
|
}; |
1575 |
|
|
|
1576 |
|
|
$Element->{$HTML_NS}->{h4} = { |
1577 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
1578 |
|
|
checker => $Element->{$HTML_NS}->{h1}->{checker}, |
1579 |
|
|
}; |
1580 |
|
|
|
1581 |
|
|
$Element->{$HTML_NS}->{h5} = { |
1582 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
1583 |
|
|
checker => $Element->{$HTML_NS}->{h1}->{checker}, |
1584 |
|
|
}; |
1585 |
|
|
|
1586 |
|
|
$Element->{$HTML_NS}->{h6} = { |
1587 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
1588 |
|
|
checker => $Element->{$HTML_NS}->{h1}->{checker}, |
1589 |
|
|
}; |
1590 |
|
|
|
1591 |
wakaba |
1.29 |
## TODO: Explicit sectioning is "encouraged". |
1592 |
|
|
|
1593 |
wakaba |
1.1 |
$Element->{$HTML_NS}->{header} = { |
1594 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
1595 |
|
|
checker => sub { |
1596 |
|
|
my ($self, $todo) = @_; |
1597 |
wakaba |
1.24 |
|
1598 |
|
|
my $old_flags = {hn => $todo->{flag}->{has_descendant}->{hn}}; |
1599 |
|
|
$todo->{flag}->{has_descendant}->{hn} = 0; |
1600 |
wakaba |
1.1 |
|
1601 |
|
|
my $end = $self->_add_minuses |
1602 |
|
|
({$HTML_NS => {qw/header 1 footer 1/}}, |
1603 |
wakaba |
1.29 |
$HTMLSectioningContent); |
1604 |
|
|
my ($new_todos, $ch) = $HTMLProseContentChecker->($self, $todo); |
1605 |
wakaba |
1.24 |
push @$new_todos, $end, |
1606 |
|
|
{type => 'descendant', node => $todo->{node}, |
1607 |
|
|
flag => $todo->{flag}, old_values => $old_flags, |
1608 |
|
|
errors => { |
1609 |
|
|
hn => sub { |
1610 |
|
|
my ($self, $todo) = @_; |
1611 |
|
|
$self->{onerror}->(node => $todo->{node}, |
1612 |
|
|
type => 'element missing:hn'); |
1613 |
|
|
}, |
1614 |
wakaba |
1.1 |
}}; |
1615 |
|
|
return ($new_todos, $ch); |
1616 |
wakaba |
1.24 |
|
1617 |
|
|
## ISSUE: <header><del><h1>...</h1></del></header> is conforming? |
1618 |
wakaba |
1.1 |
}, |
1619 |
|
|
}; |
1620 |
|
|
|
1621 |
|
|
$Element->{$HTML_NS}->{footer} = { |
1622 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
1623 |
wakaba |
1.29 |
checker => sub { |
1624 |
wakaba |
1.1 |
my ($self, $todo) = @_; |
1625 |
wakaba |
1.25 |
|
1626 |
wakaba |
1.29 |
my $old_flags = {hn => $todo->{flag}->{has_descendant}->{hn}}; |
1627 |
|
|
$todo->{flag}->{has_descendant}->{hn} = 0; |
1628 |
wakaba |
1.1 |
|
1629 |
|
|
my $end = $self->_add_minuses |
1630 |
wakaba |
1.29 |
({$HTML_NS => {footer => 1}}, |
1631 |
|
|
$HTMLSectioningContent, $HTMLHeadingContent); |
1632 |
|
|
my ($new_todos, $ch) = $HTMLProseContentChecker->($self, $todo); |
1633 |
wakaba |
1.1 |
push @$new_todos, $end; |
1634 |
|
|
|
1635 |
wakaba |
1.29 |
return ($new_todos, $ch); |
1636 |
wakaba |
1.1 |
}, |
1637 |
|
|
}; |
1638 |
|
|
|
1639 |
|
|
$Element->{$HTML_NS}->{address} = { |
1640 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
1641 |
wakaba |
1.29 |
checker => sub { |
1642 |
|
|
my ($self, $todo) = @_; |
1643 |
|
|
|
1644 |
|
|
my $old_flags = {hn => $todo->{flag}->{has_descendant}->{hn}}; |
1645 |
|
|
$todo->{flag}->{has_descendant}->{hn} = 0; |
1646 |
|
|
|
1647 |
|
|
my $end = $self->_add_minuses |
1648 |
|
|
({$HTML_NS => {footer => 1, address => 1}}, |
1649 |
|
|
$HTMLSectioningContent, $HTMLHeadingContent); |
1650 |
|
|
my ($new_todos, $ch) = $HTMLProseContentChecker->($self, $todo); |
1651 |
|
|
push @$new_todos, $end; |
1652 |
|
|
|
1653 |
|
|
return ($new_todos, $ch); |
1654 |
|
|
}, |
1655 |
wakaba |
1.1 |
}; |
1656 |
|
|
|
1657 |
|
|
$Element->{$HTML_NS}->{p} = { |
1658 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
1659 |
wakaba |
1.29 |
checker => $HTMLPhrasingContentChecker, |
1660 |
wakaba |
1.1 |
}; |
1661 |
|
|
|
1662 |
|
|
$Element->{$HTML_NS}->{hr} = { |
1663 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
1664 |
|
|
checker => $HTMLEmptyChecker, |
1665 |
|
|
}; |
1666 |
|
|
|
1667 |
|
|
$Element->{$HTML_NS}->{br} = { |
1668 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
1669 |
|
|
checker => $HTMLEmptyChecker, |
1670 |
wakaba |
1.29 |
## NOTE: Blank line MUST NOT be used for presentation purpose. |
1671 |
|
|
## (This requirement is semantic so that we cannot check.) |
1672 |
wakaba |
1.1 |
}; |
1673 |
|
|
|
1674 |
|
|
$Element->{$HTML_NS}->{dialog} = { |
1675 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
1676 |
|
|
checker => sub { |
1677 |
|
|
my ($self, $todo) = @_; |
1678 |
|
|
my $el = $todo->{node}; |
1679 |
|
|
my $new_todos = []; |
1680 |
|
|
my @nodes = (@{$el->child_nodes}); |
1681 |
|
|
|
1682 |
|
|
my $phase = 'before dt'; |
1683 |
|
|
while (@nodes) { |
1684 |
|
|
my $node = shift @nodes; |
1685 |
|
|
$self->_remove_minuses ($node) and next if ref $node eq 'HASH'; |
1686 |
|
|
|
1687 |
|
|
my $nt = $node->node_type; |
1688 |
|
|
if ($nt == 1) { |
1689 |
|
|
my $node_ns = $node->namespace_uri; |
1690 |
|
|
$node_ns = '' unless defined $node_ns; |
1691 |
|
|
my $node_ln = $node->manakai_local_name; |
1692 |
|
|
## NOTE: |minuses| list is not checked since redundant |
1693 |
wakaba |
1.8 |
if ($self->{pluses}->{$node_ns}->{$node_ln}) { |
1694 |
|
|
# |
1695 |
|
|
} elsif ($phase eq 'before dt') { |
1696 |
wakaba |
1.1 |
if ($node_ns eq $HTML_NS and $node_ln eq 'dt') { |
1697 |
|
|
$phase = 'before dd'; |
1698 |
|
|
} elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') { |
1699 |
|
|
$self->{onerror} |
1700 |
|
|
->(node => $node, type => 'ps element missing:dt'); |
1701 |
|
|
$phase = 'before dt'; |
1702 |
|
|
} else { |
1703 |
|
|
$self->{onerror}->(node => $node, type => 'element not allowed'); |
1704 |
|
|
} |
1705 |
|
|
} else { # before dd |
1706 |
|
|
if ($node_ns eq $HTML_NS and $node_ln eq 'dd') { |
1707 |
|
|
$phase = 'before dt'; |
1708 |
|
|
} elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') { |
1709 |
|
|
$self->{onerror} |
1710 |
|
|
->(node => $node, type => 'ps element missing:dd'); |
1711 |
|
|
$phase = 'before dd'; |
1712 |
|
|
} else { |
1713 |
|
|
$self->{onerror}->(node => $node, type => 'element not allowed'); |
1714 |
|
|
} |
1715 |
|
|
} |
1716 |
|
|
my ($sib, $ch) = $self->_check_get_children ($node, $todo); |
1717 |
|
|
unshift @nodes, @$sib; |
1718 |
|
|
push @$new_todos, @$ch; |
1719 |
|
|
} elsif ($nt == 3 or $nt == 4) { |
1720 |
|
|
if ($node->data =~ /[^\x09-\x0D\x20]/) { |
1721 |
|
|
$self->{onerror}->(node => $node, type => 'character not allowed'); |
1722 |
wakaba |
1.25 |
$todo->{flag}->{has_descendant}->{significant} = 1; |
1723 |
wakaba |
1.1 |
} |
1724 |
|
|
} elsif ($nt == 5) { |
1725 |
|
|
unshift @nodes, @{$node->child_nodes}; |
1726 |
|
|
} |
1727 |
|
|
} |
1728 |
|
|
if ($phase eq 'before dd') { |
1729 |
wakaba |
1.8 |
$self->{onerror}->(node => $el, type => 'child element missing:dd'); |
1730 |
wakaba |
1.1 |
} |
1731 |
|
|
return ($new_todos); |
1732 |
|
|
}, |
1733 |
|
|
}; |
1734 |
|
|
|
1735 |
|
|
$Element->{$HTML_NS}->{pre} = { |
1736 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
1737 |
wakaba |
1.29 |
checker => $HTMLPhrasingContentChecker, |
1738 |
wakaba |
1.1 |
}; |
1739 |
|
|
|
1740 |
|
|
$Element->{$HTML_NS}->{ol} = { |
1741 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({ |
1742 |
|
|
start => $HTMLIntegerAttrChecker, |
1743 |
|
|
}), |
1744 |
|
|
checker => sub { |
1745 |
|
|
my ($self, $todo) = @_; |
1746 |
|
|
my $el = $todo->{node}; |
1747 |
|
|
my $new_todos = []; |
1748 |
|
|
my @nodes = (@{$el->child_nodes}); |
1749 |
|
|
|
1750 |
|
|
while (@nodes) { |
1751 |
|
|
my $node = shift @nodes; |
1752 |
|
|
$self->_remove_minuses ($node) and next if ref $node eq 'HASH'; |
1753 |
|
|
|
1754 |
|
|
my $nt = $node->node_type; |
1755 |
|
|
if ($nt == 1) { |
1756 |
|
|
my $node_ns = $node->namespace_uri; |
1757 |
|
|
$node_ns = '' unless defined $node_ns; |
1758 |
|
|
my $node_ln = $node->manakai_local_name; |
1759 |
|
|
## NOTE: |minuses| list is not checked since redundant |
1760 |
wakaba |
1.8 |
if ($self->{pluses}->{$node_ns}->{$node_ln}) { |
1761 |
|
|
# |
1762 |
|
|
} elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'li')) { |
1763 |
wakaba |
1.1 |
$self->{onerror}->(node => $node, type => 'element not allowed'); |
1764 |
|
|
} |
1765 |
|
|
my ($sib, $ch) = $self->_check_get_children ($node, $todo); |
1766 |
|
|
unshift @nodes, @$sib; |
1767 |
|
|
push @$new_todos, @$ch; |
1768 |
|
|
} elsif ($nt == 3 or $nt == 4) { |
1769 |
|
|
if ($node->data =~ /[^\x09-\x0D\x20]/) { |
1770 |
|
|
$self->{onerror}->(node => $node, type => 'character not allowed'); |
1771 |
wakaba |
1.25 |
$todo->{flag}->{has_descendant}->{significant} = 1; |
1772 |
wakaba |
1.1 |
} |
1773 |
|
|
} elsif ($nt == 5) { |
1774 |
|
|
unshift @nodes, @{$node->child_nodes}; |
1775 |
|
|
} |
1776 |
|
|
} |
1777 |
|
|
|
1778 |
|
|
if ($todo->{inline}) { |
1779 |
|
|
for (@$new_todos) { |
1780 |
|
|
$_->{inline} = 1; |
1781 |
|
|
} |
1782 |
|
|
} |
1783 |
|
|
return ($new_todos); |
1784 |
|
|
}, |
1785 |
|
|
}; |
1786 |
|
|
|
1787 |
|
|
$Element->{$HTML_NS}->{ul} = { |
1788 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
1789 |
|
|
checker => $Element->{$HTML_NS}->{ol}->{checker}, |
1790 |
|
|
}; |
1791 |
|
|
|
1792 |
|
|
$Element->{$HTML_NS}->{li} = { |
1793 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({ |
1794 |
|
|
start => sub { |
1795 |
|
|
my ($self, $attr) = @_; |
1796 |
|
|
my $parent = $attr->owner_element->manakai_parent_element; |
1797 |
|
|
if (defined $parent) { |
1798 |
|
|
my $parent_ns = $parent->namespace_uri; |
1799 |
|
|
$parent_ns = '' unless defined $parent_ns; |
1800 |
|
|
my $parent_ln = $parent->manakai_local_name; |
1801 |
|
|
unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') { |
1802 |
|
|
$self->{onerror}->(node => $attr, level => 'unsupported', |
1803 |
|
|
type => 'attribute'); |
1804 |
|
|
} |
1805 |
|
|
} |
1806 |
|
|
$HTMLIntegerAttrChecker->($self, $attr); |
1807 |
|
|
}, |
1808 |
|
|
}), |
1809 |
|
|
checker => sub { |
1810 |
|
|
my ($self, $todo) = @_; |
1811 |
wakaba |
1.29 |
if ($todo->{flag}->{in_menu}) { |
1812 |
|
|
return $HTMLPhrasingContentChecker->($self, $todo); |
1813 |
wakaba |
1.1 |
} else { |
1814 |
wakaba |
1.29 |
return $HTMLProseContentChecker->($self, $todo); |
1815 |
wakaba |
1.1 |
} |
1816 |
|
|
}, |
1817 |
|
|
}; |
1818 |
|
|
|
1819 |
|
|
$Element->{$HTML_NS}->{dl} = { |
1820 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
1821 |
|
|
checker => sub { |
1822 |
|
|
my ($self, $todo) = @_; |
1823 |
|
|
my $el = $todo->{node}; |
1824 |
|
|
my $new_todos = []; |
1825 |
|
|
my @nodes = (@{$el->child_nodes}); |
1826 |
|
|
|
1827 |
|
|
my $phase = 'before dt'; |
1828 |
|
|
while (@nodes) { |
1829 |
|
|
my $node = shift @nodes; |
1830 |
|
|
$self->_remove_minuses ($node) and next if ref $node eq 'HASH'; |
1831 |
|
|
|
1832 |
|
|
my $nt = $node->node_type; |
1833 |
|
|
if ($nt == 1) { |
1834 |
|
|
my $node_ns = $node->namespace_uri; |
1835 |
|
|
$node_ns = '' unless defined $node_ns; |
1836 |
|
|
my $node_ln = $node->manakai_local_name; |
1837 |
|
|
## NOTE: |minuses| list is not checked since redundant |
1838 |
wakaba |
1.8 |
if ($self->{pluses}->{$node_ns}->{$node_ln}) { |
1839 |
|
|
# |
1840 |
|
|
} elsif ($phase eq 'in dds') { |
1841 |
wakaba |
1.1 |
if ($node_ns eq $HTML_NS and $node_ln eq 'dd') { |
1842 |
|
|
#$phase = 'in dds'; |
1843 |
|
|
} elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') { |
1844 |
|
|
$phase = 'in dts'; |
1845 |
|
|
} else { |
1846 |
|
|
$self->{onerror}->(node => $node, type => 'element not allowed'); |
1847 |
|
|
} |
1848 |
|
|
} elsif ($phase eq 'in dts') { |
1849 |
|
|
if ($node_ns eq $HTML_NS and $node_ln eq 'dt') { |
1850 |
|
|
#$phase = 'in dts'; |
1851 |
|
|
} elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') { |
1852 |
|
|
$phase = 'in dds'; |
1853 |
|
|
} else { |
1854 |
|
|
$self->{onerror}->(node => $node, type => 'element not allowed'); |
1855 |
|
|
} |
1856 |
|
|
} else { # before dt |
1857 |
|
|
if ($node_ns eq $HTML_NS and $node_ln eq 'dt') { |
1858 |
|
|
$phase = 'in dts'; |
1859 |
|
|
} elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') { |
1860 |
|
|
$self->{onerror} |
1861 |
|
|
->(node => $node, type => 'ps element missing:dt'); |
1862 |
|
|
$phase = 'in dds'; |
1863 |
|
|
} else { |
1864 |
|
|
$self->{onerror}->(node => $node, type => 'element not allowed'); |
1865 |
|
|
} |
1866 |
|
|
} |
1867 |
|
|
my ($sib, $ch) = $self->_check_get_children ($node, $todo); |
1868 |
|
|
unshift @nodes, @$sib; |
1869 |
|
|
push @$new_todos, @$ch; |
1870 |
|
|
} elsif ($nt == 3 or $nt == 4) { |
1871 |
|
|
if ($node->data =~ /[^\x09-\x0D\x20]/) { |
1872 |
|
|
$self->{onerror}->(node => $node, type => 'character not allowed'); |
1873 |
wakaba |
1.25 |
$todo->{flag}->{has_descendant}->{significant} = 1; |
1874 |
wakaba |
1.1 |
} |
1875 |
|
|
} elsif ($nt == 5) { |
1876 |
|
|
unshift @nodes, @{$node->child_nodes}; |
1877 |
|
|
} |
1878 |
|
|
} |
1879 |
|
|
if ($phase eq 'in dts') { |
1880 |
wakaba |
1.8 |
$self->{onerror}->(node => $el, type => 'child element missing:dd'); |
1881 |
wakaba |
1.1 |
} |
1882 |
|
|
|
1883 |
|
|
if ($todo->{inline}) { |
1884 |
|
|
for (@$new_todos) { |
1885 |
|
|
$_->{inline} = 1; |
1886 |
|
|
} |
1887 |
|
|
} |
1888 |
|
|
return ($new_todos); |
1889 |
|
|
}, |
1890 |
|
|
}; |
1891 |
|
|
|
1892 |
|
|
$Element->{$HTML_NS}->{dt} = { |
1893 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
1894 |
wakaba |
1.29 |
checker => $HTMLPhrasingContentChecker, |
1895 |
wakaba |
1.1 |
}; |
1896 |
|
|
|
1897 |
|
|
$Element->{$HTML_NS}->{dd} = { |
1898 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
1899 |
wakaba |
1.29 |
checker => $HTMLProseContentChecker, |
1900 |
wakaba |
1.1 |
}; |
1901 |
|
|
|
1902 |
|
|
$Element->{$HTML_NS}->{a} = { |
1903 |
|
|
attrs_checker => sub { |
1904 |
|
|
my ($self, $todo) = @_; |
1905 |
|
|
my %attr; |
1906 |
|
|
for my $attr (@{$todo->{node}->attributes}) { |
1907 |
|
|
my $attr_ns = $attr->namespace_uri; |
1908 |
|
|
$attr_ns = '' unless defined $attr_ns; |
1909 |
|
|
my $attr_ln = $attr->manakai_local_name; |
1910 |
|
|
my $checker; |
1911 |
|
|
if ($attr_ns eq '') { |
1912 |
|
|
$checker = { |
1913 |
|
|
target => $HTMLTargetAttrChecker, |
1914 |
|
|
href => $HTMLURIAttrChecker, |
1915 |
|
|
ping => $HTMLSpaceURIsAttrChecker, |
1916 |
wakaba |
1.4 |
rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) }, |
1917 |
wakaba |
1.1 |
media => $HTMLMQAttrChecker, |
1918 |
|
|
hreflang => $HTMLLanguageTagAttrChecker, |
1919 |
|
|
type => $HTMLIMTAttrChecker, |
1920 |
|
|
}->{$attr_ln}; |
1921 |
|
|
if ($checker) { |
1922 |
|
|
$attr{$attr_ln} = $attr; |
1923 |
|
|
} else { |
1924 |
|
|
$checker = $HTMLAttrChecker->{$attr_ln}; |
1925 |
|
|
} |
1926 |
|
|
} |
1927 |
|
|
$checker ||= $AttrChecker->{$attr_ns}->{$attr_ln} |
1928 |
|
|
|| $AttrChecker->{$attr_ns}->{''}; |
1929 |
|
|
if ($checker) { |
1930 |
|
|
$checker->($self, $attr) if ref $checker; |
1931 |
|
|
} else { |
1932 |
|
|
$self->{onerror}->(node => $attr, level => 'unsupported', |
1933 |
|
|
type => 'attribute'); |
1934 |
|
|
## ISSUE: No comformance createria for unknown attributes in the spec |
1935 |
|
|
} |
1936 |
|
|
} |
1937 |
|
|
|
1938 |
wakaba |
1.4 |
if (defined $attr{href}) { |
1939 |
|
|
$self->{has_hyperlink_element} = 1; |
1940 |
|
|
} else { |
1941 |
wakaba |
1.1 |
for (qw/target ping rel media hreflang type/) { |
1942 |
|
|
if (defined $attr{$_}) { |
1943 |
|
|
$self->{onerror}->(node => $attr{$_}, |
1944 |
|
|
type => 'attribute not allowed'); |
1945 |
|
|
} |
1946 |
|
|
} |
1947 |
|
|
} |
1948 |
|
|
}, |
1949 |
|
|
checker => sub { |
1950 |
|
|
my ($self, $todo) = @_; |
1951 |
|
|
|
1952 |
wakaba |
1.36 |
my $end = $self->_add_minuses ($HTMLInteractiveContent); |
1953 |
wakaba |
1.29 |
my ($new_todos, $ch) = $HTMLPhrasingContentChecker->($self, $todo); |
1954 |
wakaba |
1.1 |
push @$new_todos, $end; |
1955 |
|
|
|
1956 |
wakaba |
1.15 |
if ($todo->{node}->has_attribute_ns (undef, 'href')) { |
1957 |
|
|
$_->{flag}->{in_a_href} = 1 for @$new_todos; |
1958 |
|
|
} |
1959 |
wakaba |
1.1 |
|
1960 |
|
|
return ($new_todos, $ch); |
1961 |
|
|
}, |
1962 |
|
|
}; |
1963 |
|
|
|
1964 |
|
|
$Element->{$HTML_NS}->{q} = { |
1965 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({ |
1966 |
|
|
cite => $HTMLURIAttrChecker, |
1967 |
|
|
}), |
1968 |
wakaba |
1.29 |
checker => $HTMLPhrasingContentChecker, |
1969 |
wakaba |
1.1 |
}; |
1970 |
|
|
|
1971 |
|
|
$Element->{$HTML_NS}->{cite} = { |
1972 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
1973 |
wakaba |
1.29 |
checker => $HTMLPhrasingContentChecker, |
1974 |
wakaba |
1.1 |
}; |
1975 |
|
|
|
1976 |
|
|
$Element->{$HTML_NS}->{em} = { |
1977 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
1978 |
wakaba |
1.29 |
checker => $HTMLPhrasingContentChecker, |
1979 |
wakaba |
1.1 |
}; |
1980 |
|
|
|
1981 |
|
|
$Element->{$HTML_NS}->{strong} = { |
1982 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
1983 |
wakaba |
1.29 |
checker => $HTMLPhrasingContentChecker, |
1984 |
wakaba |
1.1 |
}; |
1985 |
|
|
|
1986 |
|
|
$Element->{$HTML_NS}->{small} = { |
1987 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
1988 |
wakaba |
1.29 |
checker => $HTMLPhrasingContentChecker, |
1989 |
wakaba |
1.1 |
}; |
1990 |
|
|
|
1991 |
|
|
$Element->{$HTML_NS}->{m} = { |
1992 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
1993 |
wakaba |
1.29 |
checker => $HTMLPhrasingContentChecker, |
1994 |
wakaba |
1.1 |
}; |
1995 |
|
|
|
1996 |
|
|
$Element->{$HTML_NS}->{dfn} = { |
1997 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
1998 |
|
|
checker => sub { |
1999 |
|
|
my ($self, $todo) = @_; |
2000 |
|
|
|
2001 |
|
|
my $end = $self->_add_minuses ({$HTML_NS => {dfn => 1}}); |
2002 |
wakaba |
1.29 |
my ($sib, $ch) = $HTMLPhrasingContentChecker->($self, $todo); |
2003 |
wakaba |
1.1 |
push @$sib, $end; |
2004 |
|
|
|
2005 |
|
|
my $node = $todo->{node}; |
2006 |
|
|
my $term = $node->get_attribute_ns (undef, 'title'); |
2007 |
|
|
unless (defined $term) { |
2008 |
|
|
for my $child (@{$node->child_nodes}) { |
2009 |
|
|
if ($child->node_type == 1) { # ELEMENT_NODE |
2010 |
|
|
if (defined $term) { |
2011 |
|
|
undef $term; |
2012 |
|
|
last; |
2013 |
|
|
} elsif ($child->manakai_local_name eq 'abbr') { |
2014 |
|
|
my $nsuri = $child->namespace_uri; |
2015 |
|
|
if (defined $nsuri and $nsuri eq $HTML_NS) { |
2016 |
|
|
my $attr = $child->get_attribute_node_ns (undef, 'title'); |
2017 |
|
|
if ($attr) { |
2018 |
|
|
$term = $attr->value; |
2019 |
|
|
} |
2020 |
|
|
} |
2021 |
|
|
} |
2022 |
|
|
} elsif ($child->node_type == 3 or $child->node_type == 4) { |
2023 |
|
|
## TEXT_NODE or CDATA_SECTION_NODE |
2024 |
|
|
if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace |
2025 |
|
|
next; |
2026 |
|
|
} |
2027 |
|
|
undef $term; |
2028 |
|
|
last; |
2029 |
|
|
} |
2030 |
|
|
} |
2031 |
|
|
unless (defined $term) { |
2032 |
|
|
$term = $node->text_content; |
2033 |
|
|
} |
2034 |
|
|
} |
2035 |
|
|
if ($self->{term}->{$term}) { |
2036 |
|
|
$self->{onerror}->(node => $node, type => 'duplicate term'); |
2037 |
|
|
push @{$self->{term}->{$term}}, $node; |
2038 |
|
|
} else { |
2039 |
|
|
$self->{term}->{$term} = [$node]; |
2040 |
|
|
} |
2041 |
|
|
## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn| |
2042 |
|
|
## has |title|. |
2043 |
|
|
|
2044 |
|
|
return ($sib, $ch); |
2045 |
|
|
}, |
2046 |
|
|
}; |
2047 |
|
|
|
2048 |
|
|
$Element->{$HTML_NS}->{abbr} = { |
2049 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({ |
2050 |
|
|
## NOTE: |title| has special semantics for |abbr|s, but is syntactically |
2051 |
|
|
## not different. The spec says that the |title| MAY be omitted |
2052 |
|
|
## if there is a |dfn| whose defining term is the abbreviation, |
2053 |
|
|
## but it does not prohibit |abbr| w/o |title| in other cases. |
2054 |
|
|
}), |
2055 |
wakaba |
1.29 |
checker => $HTMLPhrasingContentChecker, |
2056 |
wakaba |
1.1 |
}; |
2057 |
|
|
|
2058 |
|
|
$Element->{$HTML_NS}->{time} = { |
2059 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({ |
2060 |
|
|
datetime => sub { 1 }, # checked in |checker| |
2061 |
|
|
}), |
2062 |
|
|
## TODO: Write tests |
2063 |
|
|
checker => sub { |
2064 |
|
|
my ($self, $todo) = @_; |
2065 |
|
|
|
2066 |
|
|
my $attr = $todo->{node}->get_attribute_node_ns (undef, 'datetime'); |
2067 |
|
|
my $input; |
2068 |
|
|
my $reg_sp; |
2069 |
|
|
my $input_node; |
2070 |
|
|
if ($attr) { |
2071 |
|
|
$input = $attr->value; |
2072 |
|
|
$reg_sp = qr/[\x09-\x0D\x20]*/; |
2073 |
|
|
$input_node = $attr; |
2074 |
|
|
} else { |
2075 |
|
|
$input = $todo->{node}->text_content; |
2076 |
|
|
$reg_sp = qr/\p{Zs}*/; |
2077 |
|
|
$input_node = $todo->{node}; |
2078 |
|
|
|
2079 |
|
|
## ISSUE: What is the definition for "successfully extracts a date |
2080 |
|
|
## or time"? If the algorithm says the string is invalid but |
2081 |
|
|
## return some date or time, is it "successfully"? |
2082 |
|
|
} |
2083 |
|
|
|
2084 |
|
|
my $hour; |
2085 |
|
|
my $minute; |
2086 |
|
|
my $second; |
2087 |
|
|
if ($input =~ / |
2088 |
|
|
\A |
2089 |
|
|
[\x09-\x0D\x20]* |
2090 |
|
|
([0-9]+) # 1 |
2091 |
|
|
(?> |
2092 |
|
|
-([0-9]+) # 2 |
2093 |
|
|
-([0-9]+) # 3 |
2094 |
|
|
[\x09-\x0D\x20]* |
2095 |
|
|
(?> |
2096 |
|
|
T |
2097 |
|
|
[\x09-\x0D\x20]* |
2098 |
|
|
)? |
2099 |
|
|
([0-9]+) # 4 |
2100 |
|
|
:([0-9]+) # 5 |
2101 |
|
|
(?> |
2102 |
|
|
:([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6 |
2103 |
|
|
)? |
2104 |
|
|
[\x09-\x0D\x20]* |
2105 |
|
|
(?> |
2106 |
|
|
Z |
2107 |
|
|
[\x09-\x0D\x20]* |
2108 |
|
|
| |
2109 |
|
|
[+-]([0-9]+):([0-9]+) # 7, 8 |
2110 |
|
|
[\x09-\x0D\x20]* |
2111 |
|
|
)? |
2112 |
|
|
\z |
2113 |
|
|
| |
2114 |
|
|
:([0-9]+) # 9 |
2115 |
|
|
(?> |
2116 |
|
|
:([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10 |
2117 |
|
|
)? |
2118 |
|
|
[\x09-\x0D\x20]*\z |
2119 |
|
|
) |
2120 |
|
|
/x) { |
2121 |
|
|
if (defined $2) { ## YYYY-MM-DD T? hh:mm |
2122 |
|
|
if (length $1 != 4 or length $2 != 2 or length $3 != 2 or |
2123 |
|
|
length $4 != 2 or length $5 != 2) { |
2124 |
|
|
$self->{onerror}->(node => $input_node, |
2125 |
|
|
type => 'dateortime:syntax error'); |
2126 |
|
|
} |
2127 |
|
|
|
2128 |
|
|
if (1 <= $2 and $2 <= 12) { |
2129 |
|
|
$self->{onerror}->(node => $input_node, type => 'datetime:bad day') |
2130 |
|
|
if $3 < 1 or |
2131 |
|
|
$3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2]; |
2132 |
|
|
$self->{onerror}->(node => $input_node, type => 'datetime:bad day') |
2133 |
|
|
if $2 == 2 and $3 == 29 and |
2134 |
|
|
not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0)); |
2135 |
|
|
} else { |
2136 |
|
|
$self->{onerror}->(node => $input_node, |
2137 |
|
|
type => 'datetime:bad month'); |
2138 |
|
|
} |
2139 |
|
|
|
2140 |
|
|
($hour, $minute, $second) = ($4, $5, $6); |
2141 |
|
|
|
2142 |
|
|
if (defined $7) { ## [+-]hh:mm |
2143 |
|
|
if (length $7 != 2 or length $8 != 2) { |
2144 |
|
|
$self->{onerror}->(node => $input_node, |
2145 |
|
|
type => 'dateortime:syntax error'); |
2146 |
|
|
} |
2147 |
|
|
|
2148 |
|
|
$self->{onerror}->(node => $input_node, |
2149 |
|
|
type => 'datetime:bad timezone hour') |
2150 |
|
|
if $7 > 23; |
2151 |
|
|
$self->{onerror}->(node => $input_node, |
2152 |
|
|
type => 'datetime:bad timezone minute') |
2153 |
|
|
if $8 > 59; |
2154 |
|
|
} |
2155 |
|
|
} else { ## hh:mm |
2156 |
|
|
if (length $1 != 2 or length $9 != 2) { |
2157 |
|
|
$self->{onerror}->(node => $input_node, |
2158 |
|
|
type => qq'dateortime:syntax error'); |
2159 |
|
|
} |
2160 |
|
|
|
2161 |
|
|
($hour, $minute, $second) = ($1, $9, $10); |
2162 |
|
|
} |
2163 |
|
|
|
2164 |
|
|
$self->{onerror}->(node => $input_node, type => 'datetime:bad hour') |
2165 |
|
|
if $hour > 23; |
2166 |
|
|
$self->{onerror}->(node => $input_node, type => 'datetime:bad minute') |
2167 |
|
|
if $minute > 59; |
2168 |
|
|
|
2169 |
|
|
if (defined $second) { ## s |
2170 |
|
|
## NOTE: Integer part of second don't have to have length of two. |
2171 |
|
|
|
2172 |
|
|
if (substr ($second, 0, 1) eq '.') { |
2173 |
|
|
$self->{onerror}->(node => $input_node, |
2174 |
|
|
type => 'dateortime:syntax error'); |
2175 |
|
|
} |
2176 |
|
|
|
2177 |
|
|
$self->{onerror}->(node => $input_node, type => 'datetime:bad second') |
2178 |
|
|
if $second >= 60; |
2179 |
|
|
} |
2180 |
|
|
} else { |
2181 |
|
|
$self->{onerror}->(node => $input_node, |
2182 |
|
|
type => 'dateortime:syntax error'); |
2183 |
|
|
} |
2184 |
|
|
|
2185 |
wakaba |
1.29 |
return $HTMLPhrasingContentChecker->($self, $todo); |
2186 |
wakaba |
1.1 |
}, |
2187 |
|
|
}; |
2188 |
|
|
|
2189 |
|
|
$Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element" |
2190 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({ |
2191 |
|
|
value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }), |
2192 |
|
|
min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }), |
2193 |
|
|
low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }), |
2194 |
|
|
high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }), |
2195 |
|
|
max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }), |
2196 |
|
|
optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }), |
2197 |
|
|
}), |
2198 |
wakaba |
1.29 |
checker => $HTMLPhrasingContentChecker, |
2199 |
wakaba |
1.1 |
}; |
2200 |
|
|
|
2201 |
|
|
$Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content |
2202 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({ |
2203 |
|
|
value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }), |
2204 |
|
|
max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }), |
2205 |
|
|
}), |
2206 |
wakaba |
1.29 |
checker => $HTMLPhrasingContentChecker, |
2207 |
wakaba |
1.1 |
}; |
2208 |
|
|
|
2209 |
|
|
$Element->{$HTML_NS}->{code} = { |
2210 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
2211 |
|
|
## NOTE: Though |title| has special semantics, |
2212 |
|
|
## syntatically same as the |title| as global attribute. |
2213 |
wakaba |
1.29 |
checker => $HTMLPhrasingContentChecker, |
2214 |
wakaba |
1.1 |
}; |
2215 |
|
|
|
2216 |
|
|
$Element->{$HTML_NS}->{var} = { |
2217 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
2218 |
|
|
## NOTE: Though |title| has special semantics, |
2219 |
|
|
## syntatically same as the |title| as global attribute. |
2220 |
wakaba |
1.29 |
checker => $HTMLPhrasingContentChecker, |
2221 |
wakaba |
1.1 |
}; |
2222 |
|
|
|
2223 |
|
|
$Element->{$HTML_NS}->{samp} = { |
2224 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
2225 |
|
|
## NOTE: Though |title| has special semantics, |
2226 |
|
|
## syntatically same as the |title| as global attribute. |
2227 |
wakaba |
1.29 |
checker => $HTMLPhrasingContentChecker, |
2228 |
wakaba |
1.1 |
}; |
2229 |
|
|
|
2230 |
|
|
$Element->{$HTML_NS}->{kbd} = { |
2231 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
2232 |
wakaba |
1.29 |
checker => $HTMLPhrasingContentChecker, |
2233 |
wakaba |
1.1 |
}; |
2234 |
|
|
|
2235 |
|
|
$Element->{$HTML_NS}->{sub} = { |
2236 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
2237 |
wakaba |
1.29 |
checker => $HTMLPhrasingContentChecker, |
2238 |
wakaba |
1.1 |
}; |
2239 |
|
|
|
2240 |
|
|
$Element->{$HTML_NS}->{sup} = { |
2241 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
2242 |
wakaba |
1.29 |
checker => $HTMLPhrasingContentChecker, |
2243 |
wakaba |
1.1 |
}; |
2244 |
|
|
|
2245 |
|
|
$Element->{$HTML_NS}->{span} = { |
2246 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
2247 |
|
|
## NOTE: Though |title| has special semantics, |
2248 |
|
|
## syntatically same as the |title| as global attribute. |
2249 |
wakaba |
1.29 |
checker => $HTMLPhrasingContentChecker, |
2250 |
wakaba |
1.1 |
}; |
2251 |
|
|
|
2252 |
|
|
$Element->{$HTML_NS}->{i} = { |
2253 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
2254 |
|
|
## NOTE: Though |title| has special semantics, |
2255 |
|
|
## syntatically same as the |title| as global attribute. |
2256 |
wakaba |
1.29 |
checker => $HTMLPhrasingContentChecker, |
2257 |
wakaba |
1.1 |
}; |
2258 |
|
|
|
2259 |
|
|
$Element->{$HTML_NS}->{b} = { |
2260 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
2261 |
wakaba |
1.29 |
checker => $HTMLPhrasingContentChecker, |
2262 |
wakaba |
1.1 |
}; |
2263 |
|
|
|
2264 |
|
|
$Element->{$HTML_NS}->{bdo} = { |
2265 |
|
|
attrs_checker => sub { |
2266 |
|
|
my ($self, $todo) = @_; |
2267 |
|
|
$GetHTMLAttrsChecker->({})->($self, $todo); |
2268 |
|
|
unless ($todo->{node}->has_attribute_ns (undef, 'dir')) { |
2269 |
|
|
$self->{onerror}->(node => $todo->{node}, type => 'attribute missing:dir'); |
2270 |
|
|
} |
2271 |
|
|
}, |
2272 |
|
|
## ISSUE: The spec does not directly say that |dir| is a enumerated attr. |
2273 |
wakaba |
1.29 |
checker => $HTMLPhrasingContentChecker, |
2274 |
wakaba |
1.1 |
}; |
2275 |
|
|
|
2276 |
wakaba |
1.29 |
=pod |
2277 |
|
|
|
2278 |
|
|
## TODO: |
2279 |
|
|
|
2280 |
|
|
+ |
2281 |
|
|
+ <p>Partly because of the confusion described above, authors are |
2282 |
|
|
+ strongly recommended to always mark up all paragraphs with the |
2283 |
|
|
+ <code>p</code> element, and to not have any <code>ins</code> or |
2284 |
|
|
+ <code>del</code> elements that cross across any <span |
2285 |
|
|
+ title="paragraph">implied paragraphs</span>.</p> |
2286 |
|
|
+ |
2287 |
|
|
(An informative note) |
2288 |
|
|
|
2289 |
|
|
<p><code>ins</code> elements should not cross <span |
2290 |
|
|
+ title="paragraph">implied paragraph</span> boundaries.</p> |
2291 |
|
|
(normative) |
2292 |
|
|
|
2293 |
|
|
+ <p><code>del</code> elements should not cross <span |
2294 |
|
|
+ title="paragraph">implied paragraph</span> boundaries.</p> |
2295 |
|
|
(normative) |
2296 |
|
|
|
2297 |
|
|
=cut |
2298 |
|
|
|
2299 |
wakaba |
1.1 |
$Element->{$HTML_NS}->{ins} = { |
2300 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({ |
2301 |
|
|
cite => $HTMLURIAttrChecker, |
2302 |
|
|
datetime => $HTMLDatetimeAttrChecker, |
2303 |
|
|
}), |
2304 |
|
|
checker => $HTMLTransparentChecker, |
2305 |
|
|
}; |
2306 |
|
|
|
2307 |
|
|
$Element->{$HTML_NS}->{del} = { |
2308 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({ |
2309 |
|
|
cite => $HTMLURIAttrChecker, |
2310 |
|
|
datetime => $HTMLDatetimeAttrChecker, |
2311 |
|
|
}), |
2312 |
|
|
checker => sub { |
2313 |
|
|
my ($self, $todo) = @_; |
2314 |
wakaba |
1.29 |
my $sig_flag = $todo->{flag}->{has_descendant}->{significant}; |
2315 |
|
|
my ($new_todos) = $HTMLTransparentChecker->($self, $todo); |
2316 |
|
|
push @$new_todos, {type => 'code', code => sub { |
2317 |
|
|
$todo->{flag}->{has_descendant}->{significant} = 0; |
2318 |
|
|
}} if not $sig_flag; |
2319 |
|
|
return $new_todos; |
2320 |
wakaba |
1.1 |
}, |
2321 |
|
|
}; |
2322 |
|
|
|
2323 |
wakaba |
1.35 |
$Element->{$HTML_NS}->{figure} = { |
2324 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
2325 |
|
|
checker => sub { |
2326 |
|
|
my ($self, $todo) = @_; |
2327 |
|
|
my $el = $todo->{node}; |
2328 |
|
|
my $new_todos = []; |
2329 |
|
|
my @nodes = (@{$el->child_nodes}); |
2330 |
|
|
|
2331 |
|
|
my $old_values = {significant => |
2332 |
|
|
$todo->{flag}->{has_descendant}->{significant}}; |
2333 |
|
|
$todo->{flag}->{has_descendant}->{significant} = 0; |
2334 |
|
|
|
2335 |
|
|
my $has_legend; |
2336 |
|
|
my $has_non_legend; |
2337 |
|
|
my $has_non_style; |
2338 |
|
|
while (@nodes) { |
2339 |
|
|
my $node = shift @nodes; |
2340 |
|
|
$self->_remove_minuses ($node) and next if ref $node eq 'HASH'; |
2341 |
|
|
|
2342 |
|
|
my $nt = $node->node_type; |
2343 |
|
|
if ($nt == 1) { |
2344 |
|
|
my $node_ns = $node->namespace_uri; |
2345 |
|
|
$node_ns = '' unless defined $node_ns; |
2346 |
|
|
my $node_ln = $node->manakai_local_name; |
2347 |
|
|
if ($self->{minuses}->{$node_ns}->{$node_ln}) { |
2348 |
|
|
$has_non_legend = 1; |
2349 |
|
|
$self->{onerror}->(node => $node, |
2350 |
|
|
type => 'element not allowed:minus', |
2351 |
|
|
level => $self->{must_level}); |
2352 |
|
|
} elsif ($node_ns eq $HTML_NS and $node_ln eq 'legend') { |
2353 |
|
|
if ($has_legend) { |
2354 |
|
|
if (ref $has_legend) { |
2355 |
|
|
$self->{onerror}->(node => $has_legend, |
2356 |
|
|
type => 'element not allowed:figure legend', |
2357 |
|
|
level => $self->{must_level}); |
2358 |
|
|
$has_legend = $node; |
2359 |
|
|
} else { |
2360 |
|
|
## NOTE: The first child element was a |legend|. |
2361 |
|
|
$self->{onerror}->(node => $node, |
2362 |
|
|
type => 'element not allowed:figure legend', |
2363 |
|
|
level => $self->{must_level}); |
2364 |
|
|
} |
2365 |
|
|
} elsif ($has_non_legend) { |
2366 |
|
|
undef $has_non_legend; |
2367 |
|
|
$has_legend = $node; |
2368 |
|
|
} else { |
2369 |
|
|
$has_legend = 1; |
2370 |
|
|
} |
2371 |
|
|
} elsif ($node_ns eq $HTML_NS and $node_ln eq 'style') { |
2372 |
|
|
$has_non_legend = 1; |
2373 |
|
|
if ($has_non_style or |
2374 |
|
|
not $node->has_attribute_ns (undef, 'scoped')) { |
2375 |
|
|
$self->{onerror}->(node => $node, |
2376 |
|
|
type => 'element not allowed:prose style', |
2377 |
|
|
level => $self->{must_level}); |
2378 |
|
|
} |
2379 |
|
|
} elsif ($HTMLProseContent->{$node_ns}->{$node_ln}) { |
2380 |
|
|
$has_non_style = 1; |
2381 |
|
|
$has_non_legend = 1; |
2382 |
|
|
if ($HTMLEmbeddedContent->{$node_ns}->{$node_ln}) { |
2383 |
|
|
$todo->{flag}->{has_descendant}->{significant} = 1; |
2384 |
|
|
} |
2385 |
|
|
} elsif ($self->{pluses}->{$node_ns}->{$node_ln}) { |
2386 |
|
|
# |
2387 |
|
|
} else { |
2388 |
|
|
$has_non_style = 1; |
2389 |
|
|
$has_non_legend = 1; |
2390 |
|
|
$self->{onerror}->(node => $node, |
2391 |
|
|
type => 'element not allowed:prose', |
2392 |
|
|
level => $self->{must_level}) |
2393 |
|
|
} |
2394 |
|
|
|
2395 |
|
|
my ($sib, $ch) = $self->_check_get_children ($node, $todo); |
2396 |
|
|
unshift @nodes, @$sib; |
2397 |
|
|
push @$new_todos, @$ch; |
2398 |
|
|
} elsif ($nt == 3 or $nt == 4) { |
2399 |
|
|
if ($node->data =~ /[^\x09-\x0D\x20]/) { |
2400 |
|
|
$has_non_style = 1; |
2401 |
|
|
$has_non_legend = 1; |
2402 |
|
|
$todo->{flag}->{has_descendant}->{significant} = 1; |
2403 |
|
|
} |
2404 |
|
|
} elsif ($nt == 5) { |
2405 |
|
|
unshift @nodes, @{$node->child_nodes}; |
2406 |
|
|
} |
2407 |
|
|
} |
2408 |
|
|
|
2409 |
|
|
if ($has_legend) { |
2410 |
|
|
if (ref $has_legend and $has_non_legend) { |
2411 |
|
|
$self->{onerror}->(node => $has_legend, |
2412 |
|
|
type => 'element not allowed:figure legend', |
2413 |
|
|
level => $self->{must_level}); |
2414 |
|
|
} |
2415 |
|
|
} else { |
2416 |
|
|
$self->{onerror}->(node => $todo->{node}, |
2417 |
|
|
type => 'element missing:legend', |
2418 |
|
|
level => $self->{must_level}); |
2419 |
|
|
} |
2420 |
|
|
|
2421 |
|
|
push @$new_todos, { |
2422 |
|
|
type => 'descendant', node => $todo->{node}, flag => $todo->{flag}, |
2423 |
|
|
old_values => $old_values, |
2424 |
|
|
errors => $HTMLSignificantContentErrors, |
2425 |
|
|
}; |
2426 |
|
|
|
2427 |
|
|
return ($new_todos); |
2428 |
|
|
}, |
2429 |
|
|
}; |
2430 |
wakaba |
1.8 |
## TODO: Test for <nest/> in <figure/> |
2431 |
wakaba |
1.1 |
|
2432 |
|
|
$Element->{$HTML_NS}->{img} = { |
2433 |
|
|
attrs_checker => sub { |
2434 |
|
|
my ($self, $todo) = @_; |
2435 |
|
|
$GetHTMLAttrsChecker->({ |
2436 |
|
|
alt => sub { }, ## NOTE: No syntactical requirement |
2437 |
|
|
src => $HTMLURIAttrChecker, |
2438 |
|
|
usemap => $HTMLUsemapAttrChecker, |
2439 |
|
|
ismap => sub { |
2440 |
|
|
my ($self, $attr, $parent_todo) = @_; |
2441 |
wakaba |
1.15 |
if (not $todo->{flag}->{in_a_href}) { |
2442 |
|
|
$self->{onerror}->(node => $attr, |
2443 |
|
|
type => 'attribute not allowed:ismap'); |
2444 |
wakaba |
1.1 |
} |
2445 |
|
|
$GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_todo); |
2446 |
|
|
}, |
2447 |
|
|
## TODO: height |
2448 |
|
|
## TODO: width |
2449 |
|
|
})->($self, $todo); |
2450 |
|
|
unless ($todo->{node}->has_attribute_ns (undef, 'alt')) { |
2451 |
wakaba |
1.37 |
$self->{onerror}->(node => $todo->{node}, |
2452 |
|
|
type => 'attribute missing:alt', |
2453 |
|
|
level => $self->{should_level}); |
2454 |
wakaba |
1.1 |
} |
2455 |
|
|
unless ($todo->{node}->has_attribute_ns (undef, 'src')) { |
2456 |
|
|
$self->{onerror}->(node => $todo->{node}, type => 'attribute missing:src'); |
2457 |
|
|
} |
2458 |
|
|
}, |
2459 |
wakaba |
1.25 |
checker => sub { |
2460 |
|
|
my ($self, $todo) = @_; |
2461 |
|
|
$todo->{flag}->{has_descendant}->{significant} = 1; |
2462 |
|
|
return $HTMLEmptyChecker->($self, $todo); |
2463 |
|
|
}, |
2464 |
wakaba |
1.1 |
}; |
2465 |
|
|
|
2466 |
|
|
$Element->{$HTML_NS}->{iframe} = { |
2467 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({ |
2468 |
|
|
src => $HTMLURIAttrChecker, |
2469 |
|
|
}), |
2470 |
wakaba |
1.25 |
checker => sub { |
2471 |
|
|
my ($self, $todo) = @_; |
2472 |
|
|
$todo->{flag}->{has_descendant}->{significant} = 1; |
2473 |
|
|
return $HTMLTextChecker->($self, $todo); |
2474 |
|
|
}, |
2475 |
wakaba |
1.1 |
}; |
2476 |
|
|
|
2477 |
|
|
$Element->{$HTML_NS}->{embed} = { |
2478 |
|
|
attrs_checker => sub { |
2479 |
|
|
my ($self, $todo) = @_; |
2480 |
|
|
my $has_src; |
2481 |
|
|
for my $attr (@{$todo->{node}->attributes}) { |
2482 |
|
|
my $attr_ns = $attr->namespace_uri; |
2483 |
|
|
$attr_ns = '' unless defined $attr_ns; |
2484 |
|
|
my $attr_ln = $attr->manakai_local_name; |
2485 |
|
|
my $checker; |
2486 |
|
|
if ($attr_ns eq '') { |
2487 |
|
|
if ($attr_ln eq 'src') { |
2488 |
|
|
$checker = $HTMLURIAttrChecker; |
2489 |
|
|
$has_src = 1; |
2490 |
|
|
} elsif ($attr_ln eq 'type') { |
2491 |
|
|
$checker = $HTMLIMTAttrChecker; |
2492 |
|
|
} else { |
2493 |
|
|
## TODO: height |
2494 |
|
|
## TODO: width |
2495 |
|
|
$checker = $HTMLAttrChecker->{$attr_ln} |
2496 |
|
|
|| sub { }; ## NOTE: Any local attribute is ok. |
2497 |
|
|
} |
2498 |
|
|
} |
2499 |
|
|
$checker ||= $AttrChecker->{$attr_ns}->{$attr_ln} |
2500 |
|
|
|| $AttrChecker->{$attr_ns}->{''}; |
2501 |
|
|
if ($checker) { |
2502 |
|
|
$checker->($self, $attr); |
2503 |
|
|
} else { |
2504 |
|
|
$self->{onerror}->(node => $attr, level => 'unsupported', |
2505 |
|
|
type => 'attribute'); |
2506 |
|
|
## ISSUE: No comformance createria for global attributes in the spec |
2507 |
|
|
} |
2508 |
|
|
} |
2509 |
|
|
|
2510 |
|
|
unless ($has_src) { |
2511 |
|
|
$self->{onerror}->(node => $todo->{node}, |
2512 |
|
|
type => 'attribute missing:src'); |
2513 |
|
|
} |
2514 |
|
|
}, |
2515 |
wakaba |
1.25 |
checker => sub { |
2516 |
|
|
my ($self, $todo) = @_; |
2517 |
|
|
$todo->{flag}->{has_descendant}->{significant} = 1; |
2518 |
|
|
return $HTMLEmptyChecker->($self, $todo); |
2519 |
|
|
}, |
2520 |
wakaba |
1.1 |
}; |
2521 |
|
|
|
2522 |
|
|
$Element->{$HTML_NS}->{object} = { |
2523 |
|
|
attrs_checker => sub { |
2524 |
|
|
my ($self, $todo) = @_; |
2525 |
|
|
$GetHTMLAttrsChecker->({ |
2526 |
|
|
data => $HTMLURIAttrChecker, |
2527 |
|
|
type => $HTMLIMTAttrChecker, |
2528 |
|
|
usemap => $HTMLUsemapAttrChecker, |
2529 |
|
|
## TODO: width |
2530 |
|
|
## TODO: height |
2531 |
|
|
})->($self, $todo); |
2532 |
|
|
unless ($todo->{node}->has_attribute_ns (undef, 'data')) { |
2533 |
|
|
unless ($todo->{node}->has_attribute_ns (undef, 'type')) { |
2534 |
|
|
$self->{onerror}->(node => $todo->{node}, |
2535 |
|
|
type => 'attribute missing:data|type'); |
2536 |
|
|
} |
2537 |
|
|
} |
2538 |
|
|
}, |
2539 |
wakaba |
1.29 |
## NOTE: param*, then transparent. |
2540 |
wakaba |
1.25 |
checker => sub { |
2541 |
|
|
my ($self, $todo) = @_; |
2542 |
|
|
$todo->{flag}->{has_descendant}->{significant} = 1; |
2543 |
|
|
return $ElementDefault->{checker}->($self, $todo); ## TODO |
2544 |
|
|
}, |
2545 |
wakaba |
1.8 |
## TODO: Tests for <nest/> in <object/> |
2546 |
wakaba |
1.1 |
}; |
2547 |
|
|
|
2548 |
|
|
$Element->{$HTML_NS}->{param} = { |
2549 |
|
|
attrs_checker => sub { |
2550 |
|
|
my ($self, $todo) = @_; |
2551 |
|
|
$GetHTMLAttrsChecker->({ |
2552 |
|
|
name => sub { }, |
2553 |
|
|
value => sub { }, |
2554 |
|
|
})->($self, $todo); |
2555 |
|
|
unless ($todo->{node}->has_attribute_ns (undef, 'name')) { |
2556 |
|
|
$self->{onerror}->(node => $todo->{node}, |
2557 |
|
|
type => 'attribute missing:name'); |
2558 |
|
|
} |
2559 |
|
|
unless ($todo->{node}->has_attribute_ns (undef, 'value')) { |
2560 |
|
|
$self->{onerror}->(node => $todo->{node}, |
2561 |
|
|
type => 'attribute missing:value'); |
2562 |
|
|
} |
2563 |
|
|
}, |
2564 |
|
|
checker => $HTMLEmptyChecker, |
2565 |
|
|
}; |
2566 |
|
|
|
2567 |
|
|
$Element->{$HTML_NS}->{video} = { |
2568 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({ |
2569 |
|
|
src => $HTMLURIAttrChecker, |
2570 |
|
|
## TODO: start, loopstart, loopend, end |
2571 |
|
|
## ISSUE: they MUST be "value time offset"s. Value? |
2572 |
wakaba |
1.11 |
## ISSUE: playcount has no conformance creteria |
2573 |
wakaba |
1.1 |
autoplay => $GetHTMLBooleanAttrChecker->('autoplay'), |
2574 |
|
|
controls => $GetHTMLBooleanAttrChecker->('controls'), |
2575 |
wakaba |
1.11 |
poster => $HTMLURIAttrChecker, ## TODO: not for audio! |
2576 |
|
|
## TODO: width, height (not for audio!) |
2577 |
wakaba |
1.1 |
}), |
2578 |
|
|
checker => sub { |
2579 |
|
|
my ($self, $todo) = @_; |
2580 |
wakaba |
1.25 |
$todo->{flag}->{has_descendant}->{significant} = 1; |
2581 |
wakaba |
1.1 |
|
2582 |
wakaba |
1.29 |
## TODO: |
2583 |
wakaba |
1.1 |
if ($todo->{node}->has_attribute_ns (undef, 'src')) { |
2584 |
wakaba |
1.35 |
return $HTMLProseContentChecker->($self, $todo); |
2585 |
wakaba |
1.1 |
} else { |
2586 |
|
|
return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source') |
2587 |
|
|
->($self, $todo); |
2588 |
|
|
} |
2589 |
|
|
}, |
2590 |
|
|
}; |
2591 |
|
|
|
2592 |
|
|
$Element->{$HTML_NS}->{audio} = { |
2593 |
|
|
attrs_checker => $Element->{$HTML_NS}->{video}->{attrs_checker}, |
2594 |
|
|
checker => $Element->{$HTML_NS}->{video}->{checker}, |
2595 |
|
|
}; |
2596 |
|
|
|
2597 |
|
|
$Element->{$HTML_NS}->{source} = { |
2598 |
|
|
attrs_checker => sub { |
2599 |
|
|
my ($self, $todo) = @_; |
2600 |
|
|
$GetHTMLAttrsChecker->({ |
2601 |
|
|
src => $HTMLURIAttrChecker, |
2602 |
|
|
type => $HTMLIMTAttrChecker, |
2603 |
|
|
media => $HTMLMQAttrChecker, |
2604 |
|
|
})->($self, $todo); |
2605 |
|
|
unless ($todo->{node}->has_attribute_ns (undef, 'src')) { |
2606 |
|
|
$self->{onerror}->(node => $todo->{node}, |
2607 |
|
|
type => 'attribute missing:src'); |
2608 |
|
|
} |
2609 |
|
|
}, |
2610 |
|
|
checker => $HTMLEmptyChecker, |
2611 |
|
|
}; |
2612 |
|
|
|
2613 |
|
|
$Element->{$HTML_NS}->{canvas} = { |
2614 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({ |
2615 |
|
|
height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }), |
2616 |
|
|
width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }), |
2617 |
|
|
}), |
2618 |
wakaba |
1.25 |
checker => sub { |
2619 |
|
|
my ($self, $todo) = @_; |
2620 |
|
|
$todo->{flag}->{has_descendant}->{significant} = 1; |
2621 |
wakaba |
1.29 |
return $HTMLTransparentChecker->($self, $todo); |
2622 |
wakaba |
1.25 |
}, |
2623 |
wakaba |
1.1 |
}; |
2624 |
|
|
|
2625 |
|
|
$Element->{$HTML_NS}->{map} = { |
2626 |
wakaba |
1.4 |
attrs_checker => sub { |
2627 |
|
|
my ($self, $todo) = @_; |
2628 |
|
|
my $has_id; |
2629 |
|
|
$GetHTMLAttrsChecker->({ |
2630 |
|
|
id => sub { |
2631 |
|
|
## NOTE: same as global |id=""|, with |$self->{map}| registeration |
2632 |
|
|
my ($self, $attr) = @_; |
2633 |
|
|
my $value = $attr->value; |
2634 |
|
|
if (length $value > 0) { |
2635 |
|
|
if ($self->{id}->{$value}) { |
2636 |
|
|
$self->{onerror}->(node => $attr, type => 'duplicate ID'); |
2637 |
|
|
push @{$self->{id}->{$value}}, $attr; |
2638 |
|
|
} else { |
2639 |
|
|
$self->{id}->{$value} = [$attr]; |
2640 |
|
|
} |
2641 |
wakaba |
1.1 |
} else { |
2642 |
wakaba |
1.4 |
## NOTE: MUST contain at least one character |
2643 |
|
|
$self->{onerror}->(node => $attr, type => 'empty attribute value'); |
2644 |
wakaba |
1.1 |
} |
2645 |
wakaba |
1.4 |
if ($value =~ /[\x09-\x0D\x20]/) { |
2646 |
|
|
$self->{onerror}->(node => $attr, type => 'space in ID'); |
2647 |
|
|
} |
2648 |
|
|
$self->{map}->{$value} ||= $attr; |
2649 |
|
|
$has_id = 1; |
2650 |
|
|
}, |
2651 |
|
|
})->($self, $todo); |
2652 |
|
|
$self->{onerror}->(node => $todo->{node}, type => 'attribute missing:id') |
2653 |
|
|
unless $has_id; |
2654 |
|
|
}, |
2655 |
wakaba |
1.29 |
checker => $HTMLProseContentChecker, |
2656 |
wakaba |
1.1 |
}; |
2657 |
|
|
|
2658 |
|
|
$Element->{$HTML_NS}->{area} = { |
2659 |
|
|
attrs_checker => sub { |
2660 |
|
|
my ($self, $todo) = @_; |
2661 |
|
|
my %attr; |
2662 |
|
|
my $coords; |
2663 |
|
|
for my $attr (@{$todo->{node}->attributes}) { |
2664 |
|
|
my $attr_ns = $attr->namespace_uri; |
2665 |
|
|
$attr_ns = '' unless defined $attr_ns; |
2666 |
|
|
my $attr_ln = $attr->manakai_local_name; |
2667 |
|
|
my $checker; |
2668 |
|
|
if ($attr_ns eq '') { |
2669 |
|
|
$checker = { |
2670 |
|
|
alt => sub { }, |
2671 |
|
|
## NOTE: |alt| value has no conformance creteria. |
2672 |
|
|
shape => $GetHTMLEnumeratedAttrChecker->({ |
2673 |
|
|
circ => -1, circle => 1, |
2674 |
|
|
default => 1, |
2675 |
|
|
poly => 1, polygon => -1, |
2676 |
|
|
rect => 1, rectangle => -1, |
2677 |
|
|
}), |
2678 |
|
|
coords => sub { |
2679 |
|
|
my ($self, $attr) = @_; |
2680 |
|
|
my $value = $attr->value; |
2681 |
|
|
if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) { |
2682 |
|
|
$coords = [split /,/, $value]; |
2683 |
|
|
} else { |
2684 |
|
|
$self->{onerror}->(node => $attr, |
2685 |
|
|
type => 'coords:syntax error'); |
2686 |
|
|
} |
2687 |
|
|
}, |
2688 |
|
|
target => $HTMLTargetAttrChecker, |
2689 |
|
|
href => $HTMLURIAttrChecker, |
2690 |
|
|
ping => $HTMLSpaceURIsAttrChecker, |
2691 |
wakaba |
1.4 |
rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) }, |
2692 |
wakaba |
1.1 |
media => $HTMLMQAttrChecker, |
2693 |
|
|
hreflang => $HTMLLanguageTagAttrChecker, |
2694 |
|
|
type => $HTMLIMTAttrChecker, |
2695 |
|
|
}->{$attr_ln}; |
2696 |
|
|
if ($checker) { |
2697 |
|
|
$attr{$attr_ln} = $attr; |
2698 |
|
|
} else { |
2699 |
|
|
$checker = $HTMLAttrChecker->{$attr_ln}; |
2700 |
|
|
} |
2701 |
|
|
} |
2702 |
|
|
$checker ||= $AttrChecker->{$attr_ns}->{$attr_ln} |
2703 |
|
|
|| $AttrChecker->{$attr_ns}->{''}; |
2704 |
|
|
if ($checker) { |
2705 |
|
|
$checker->($self, $attr) if ref $checker; |
2706 |
|
|
} else { |
2707 |
|
|
$self->{onerror}->(node => $attr, level => 'unsupported', |
2708 |
|
|
type => 'attribute'); |
2709 |
|
|
## ISSUE: No comformance createria for unknown attributes in the spec |
2710 |
|
|
} |
2711 |
|
|
} |
2712 |
|
|
|
2713 |
|
|
if (defined $attr{href}) { |
2714 |
wakaba |
1.4 |
$self->{has_hyperlink_element} = 1; |
2715 |
wakaba |
1.1 |
unless (defined $attr{alt}) { |
2716 |
|
|
$self->{onerror}->(node => $todo->{node}, |
2717 |
|
|
type => 'attribute missing:alt'); |
2718 |
|
|
} |
2719 |
|
|
} else { |
2720 |
|
|
for (qw/target ping rel media hreflang type alt/) { |
2721 |
|
|
if (defined $attr{$_}) { |
2722 |
|
|
$self->{onerror}->(node => $attr{$_}, |
2723 |
|
|
type => 'attribute not allowed'); |
2724 |
|
|
} |
2725 |
|
|
} |
2726 |
|
|
} |
2727 |
|
|
|
2728 |
|
|
my $shape = 'rectangle'; |
2729 |
|
|
if (defined $attr{shape}) { |
2730 |
|
|
$shape = { |
2731 |
|
|
circ => 'circle', circle => 'circle', |
2732 |
|
|
default => 'default', |
2733 |
|
|
poly => 'polygon', polygon => 'polygon', |
2734 |
|
|
rect => 'rectangle', rectangle => 'rectangle', |
2735 |
|
|
}->{lc $attr{shape}->value} || 'rectangle'; |
2736 |
|
|
## TODO: ASCII lowercase? |
2737 |
|
|
} |
2738 |
|
|
|
2739 |
|
|
if ($shape eq 'circle') { |
2740 |
|
|
if (defined $attr{coords}) { |
2741 |
|
|
if (defined $coords) { |
2742 |
|
|
if (@$coords == 3) { |
2743 |
|
|
if ($coords->[2] < 0) { |
2744 |
|
|
$self->{onerror}->(node => $attr{coords}, |
2745 |
|
|
type => 'coords:out of range:2'); |
2746 |
|
|
} |
2747 |
|
|
} else { |
2748 |
|
|
$self->{onerror}->(node => $attr{coords}, |
2749 |
|
|
type => 'coords:number:3:'.@$coords); |
2750 |
|
|
} |
2751 |
|
|
} else { |
2752 |
|
|
## NOTE: A syntax error has been reported. |
2753 |
|
|
} |
2754 |
|
|
} else { |
2755 |
|
|
$self->{onerror}->(node => $todo->{node}, |
2756 |
|
|
type => 'attribute missing:coords'); |
2757 |
|
|
} |
2758 |
|
|
} elsif ($shape eq 'default') { |
2759 |
|
|
if (defined $attr{coords}) { |
2760 |
|
|
$self->{onerror}->(node => $attr{coords}, |
2761 |
|
|
type => 'attribute not allowed'); |
2762 |
|
|
} |
2763 |
|
|
} elsif ($shape eq 'polygon') { |
2764 |
|
|
if (defined $attr{coords}) { |
2765 |
|
|
if (defined $coords) { |
2766 |
|
|
if (@$coords >= 6) { |
2767 |
|
|
unless (@$coords % 2 == 0) { |
2768 |
|
|
$self->{onerror}->(node => $attr{coords}, |
2769 |
|
|
type => 'coords:number:even:'.@$coords); |
2770 |
|
|
} |
2771 |
|
|
} else { |
2772 |
|
|
$self->{onerror}->(node => $attr{coords}, |
2773 |
|
|
type => 'coords:number:>=6:'.@$coords); |
2774 |
|
|
} |
2775 |
|
|
} else { |
2776 |
|
|
## NOTE: A syntax error has been reported. |
2777 |
|
|
} |
2778 |
|
|
} else { |
2779 |
|
|
$self->{onerror}->(node => $todo->{node}, |
2780 |
|
|
type => 'attribute missing:coords'); |
2781 |
|
|
} |
2782 |
|
|
} elsif ($shape eq 'rectangle') { |
2783 |
|
|
if (defined $attr{coords}) { |
2784 |
|
|
if (defined $coords) { |
2785 |
|
|
if (@$coords == 4) { |
2786 |
|
|
unless ($coords->[0] < $coords->[2]) { |
2787 |
|
|
$self->{onerror}->(node => $attr{coords}, |
2788 |
|
|
type => 'coords:out of range:0'); |
2789 |
|
|
} |
2790 |
|
|
unless ($coords->[1] < $coords->[3]) { |
2791 |
|
|
$self->{onerror}->(node => $attr{coords}, |
2792 |
|
|
type => 'coords:out of range:1'); |
2793 |
|
|
} |
2794 |
|
|
} else { |
2795 |
|
|
$self->{onerror}->(node => $attr{coords}, |
2796 |
|
|
type => 'coords:number:4:'.@$coords); |
2797 |
|
|
} |
2798 |
|
|
} else { |
2799 |
|
|
## NOTE: A syntax error has been reported. |
2800 |
|
|
} |
2801 |
|
|
} else { |
2802 |
|
|
$self->{onerror}->(node => $todo->{node}, |
2803 |
|
|
type => 'attribute missing:coords'); |
2804 |
|
|
} |
2805 |
|
|
} |
2806 |
|
|
}, |
2807 |
|
|
checker => $HTMLEmptyChecker, |
2808 |
|
|
}; |
2809 |
|
|
## TODO: only in map |
2810 |
|
|
|
2811 |
|
|
$Element->{$HTML_NS}->{table} = { |
2812 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
2813 |
|
|
checker => sub { |
2814 |
|
|
my ($self, $todo) = @_; |
2815 |
|
|
my $el = $todo->{node}; |
2816 |
|
|
my $new_todos = []; |
2817 |
|
|
my @nodes = (@{$el->child_nodes}); |
2818 |
|
|
|
2819 |
|
|
my $phase = 'before caption'; |
2820 |
|
|
my $has_tfoot; |
2821 |
|
|
while (@nodes) { |
2822 |
|
|
my $node = shift @nodes; |
2823 |
|
|
$self->_remove_minuses ($node) and next if ref $node eq 'HASH'; |
2824 |
|
|
|
2825 |
|
|
my $nt = $node->node_type; |
2826 |
|
|
if ($nt == 1) { |
2827 |
|
|
my $node_ns = $node->namespace_uri; |
2828 |
|
|
$node_ns = '' unless defined $node_ns; |
2829 |
|
|
my $node_ln = $node->manakai_local_name; |
2830 |
|
|
## NOTE: |minuses| list is not checked since redundant |
2831 |
wakaba |
1.8 |
if ($self->{pluses}->{$node_ns}->{$node_ln}) { |
2832 |
|
|
# |
2833 |
|
|
} elsif ($phase eq 'in tbodys') { |
2834 |
wakaba |
1.1 |
if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') { |
2835 |
|
|
#$phase = 'in tbodys'; |
2836 |
|
|
} elsif (not $has_tfoot and |
2837 |
|
|
$node_ns eq $HTML_NS and $node_ln eq 'tfoot') { |
2838 |
|
|
$phase = 'after tfoot'; |
2839 |
|
|
$has_tfoot = 1; |
2840 |
|
|
} else { |
2841 |
|
|
$self->{onerror}->(node => $node, type => 'element not allowed'); |
2842 |
|
|
} |
2843 |
|
|
} elsif ($phase eq 'in trs') { |
2844 |
|
|
if ($node_ns eq $HTML_NS and $node_ln eq 'tr') { |
2845 |
|
|
#$phase = 'in trs'; |
2846 |
|
|
} elsif (not $has_tfoot and |
2847 |
|
|
$node_ns eq $HTML_NS and $node_ln eq 'tfoot') { |
2848 |
|
|
$phase = 'after tfoot'; |
2849 |
|
|
$has_tfoot = 1; |
2850 |
|
|
} else { |
2851 |
|
|
$self->{onerror}->(node => $node, type => 'element not allowed'); |
2852 |
|
|
} |
2853 |
|
|
} elsif ($phase eq 'after thead') { |
2854 |
|
|
if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') { |
2855 |
|
|
$phase = 'in tbodys'; |
2856 |
|
|
} elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') { |
2857 |
|
|
$phase = 'in trs'; |
2858 |
|
|
} elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') { |
2859 |
|
|
$phase = 'in tbodys'; |
2860 |
|
|
$has_tfoot = 1; |
2861 |
|
|
} else { |
2862 |
|
|
$self->{onerror}->(node => $node, type => 'element not allowed'); |
2863 |
|
|
} |
2864 |
|
|
} elsif ($phase eq 'in colgroup') { |
2865 |
|
|
if ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') { |
2866 |
|
|
$phase = 'in colgroup'; |
2867 |
|
|
} elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') { |
2868 |
|
|
$phase = 'after thead'; |
2869 |
|
|
} elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') { |
2870 |
|
|
$phase = 'in tbodys'; |
2871 |
|
|
} elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') { |
2872 |
|
|
$phase = 'in trs'; |
2873 |
|
|
} elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') { |
2874 |
|
|
$phase = 'in tbodys'; |
2875 |
|
|
$has_tfoot = 1; |
2876 |
|
|
} else { |
2877 |
|
|
$self->{onerror}->(node => $node, type => 'element not allowed'); |
2878 |
|
|
} |
2879 |
|
|
} elsif ($phase eq 'before caption') { |
2880 |
|
|
if ($node_ns eq $HTML_NS and $node_ln eq 'caption') { |
2881 |
|
|
$phase = 'in colgroup'; |
2882 |
|
|
} elsif ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') { |
2883 |
|
|
$phase = 'in colgroup'; |
2884 |
|
|
} elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') { |
2885 |
|
|
$phase = 'after thead'; |
2886 |
|
|
} elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') { |
2887 |
|
|
$phase = 'in tbodys'; |
2888 |
|
|
} elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') { |
2889 |
|
|
$phase = 'in trs'; |
2890 |
|
|
} elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') { |
2891 |
|
|
$phase = 'in tbodys'; |
2892 |
|
|
$has_tfoot = 1; |
2893 |
|
|
} else { |
2894 |
|
|
$self->{onerror}->(node => $node, type => 'element not allowed'); |
2895 |
|
|
} |
2896 |
|
|
} else { # after tfoot |
2897 |
|
|
$self->{onerror}->(node => $node, type => 'element not allowed'); |
2898 |
|
|
} |
2899 |
|
|
my ($sib, $ch) = $self->_check_get_children ($node, $todo); |
2900 |
|
|
unshift @nodes, @$sib; |
2901 |
|
|
push @$new_todos, @$ch; |
2902 |
|
|
} elsif ($nt == 3 or $nt == 4) { |
2903 |
|
|
if ($node->data =~ /[^\x09-\x0D\x20]/) { |
2904 |
|
|
$self->{onerror}->(node => $node, type => 'character not allowed'); |
2905 |
wakaba |
1.25 |
$todo->{flag}->{has_descendant}->{significant} = 1; |
2906 |
wakaba |
1.1 |
} |
2907 |
|
|
} elsif ($nt == 5) { |
2908 |
|
|
unshift @nodes, @{$node->child_nodes}; |
2909 |
|
|
} |
2910 |
|
|
} |
2911 |
|
|
|
2912 |
|
|
## Table model errors |
2913 |
|
|
require Whatpm::HTMLTable; |
2914 |
|
|
Whatpm::HTMLTable->form_table ($todo->{node}, sub { |
2915 |
|
|
my %opt = @_; |
2916 |
|
|
$self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node}); |
2917 |
|
|
}); |
2918 |
|
|
push @{$self->{return}->{table}}, $todo->{node}; |
2919 |
|
|
|
2920 |
|
|
return ($new_todos); |
2921 |
|
|
}, |
2922 |
|
|
}; |
2923 |
|
|
|
2924 |
|
|
$Element->{$HTML_NS}->{caption} = { |
2925 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
2926 |
wakaba |
1.36 |
checker => $HTMLPhrasingContentChecker, |
2927 |
wakaba |
1.1 |
}; |
2928 |
|
|
|
2929 |
|
|
$Element->{$HTML_NS}->{colgroup} = { |
2930 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({ |
2931 |
|
|
span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }), |
2932 |
|
|
## NOTE: Defined only if "the |colgroup| element contains no |col| elements" |
2933 |
|
|
## TODO: "attribute not supported" if |col|. |
2934 |
|
|
## ISSUE: MUST NOT if any |col|? |
2935 |
|
|
## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)? |
2936 |
|
|
}), |
2937 |
|
|
checker => sub { |
2938 |
|
|
my ($self, $todo) = @_; |
2939 |
|
|
my $el = $todo->{node}; |
2940 |
|
|
my $new_todos = []; |
2941 |
|
|
my @nodes = (@{$el->child_nodes}); |
2942 |
|
|
|
2943 |
|
|
while (@nodes) { |
2944 |
|
|
my $node = shift @nodes; |
2945 |
|
|
$self->_remove_minuses ($node) and next if ref $node eq 'HASH'; |
2946 |
|
|
|
2947 |
|
|
my $nt = $node->node_type; |
2948 |
|
|
if ($nt == 1) { |
2949 |
|
|
my $node_ns = $node->namespace_uri; |
2950 |
|
|
$node_ns = '' unless defined $node_ns; |
2951 |
|
|
my $node_ln = $node->manakai_local_name; |
2952 |
|
|
## NOTE: |minuses| list is not checked since redundant |
2953 |
wakaba |
1.8 |
if ($self->{pluses}->{$node_ns}->{$node_ln}) { |
2954 |
|
|
# |
2955 |
|
|
} elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'col')) { |
2956 |
wakaba |
1.1 |
$self->{onerror}->(node => $node, type => 'element not allowed'); |
2957 |
|
|
} |
2958 |
|
|
my ($sib, $ch) = $self->_check_get_children ($node, $todo); |
2959 |
|
|
unshift @nodes, @$sib; |
2960 |
|
|
push @$new_todos, @$ch; |
2961 |
|
|
} elsif ($nt == 3 or $nt == 4) { |
2962 |
|
|
if ($node->data =~ /[^\x09-\x0D\x20]/) { |
2963 |
|
|
$self->{onerror}->(node => $node, type => 'character not allowed'); |
2964 |
wakaba |
1.25 |
$todo->{flag}->{has_descendant}->{significant} = 1; |
2965 |
wakaba |
1.1 |
} |
2966 |
|
|
} elsif ($nt == 5) { |
2967 |
|
|
unshift @nodes, @{$node->child_nodes}; |
2968 |
|
|
} |
2969 |
|
|
} |
2970 |
|
|
return ($new_todos); |
2971 |
|
|
}, |
2972 |
|
|
}; |
2973 |
|
|
|
2974 |
|
|
$Element->{$HTML_NS}->{col} = { |
2975 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({ |
2976 |
|
|
span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }), |
2977 |
|
|
}), |
2978 |
|
|
checker => $HTMLEmptyChecker, |
2979 |
|
|
}; |
2980 |
|
|
|
2981 |
|
|
$Element->{$HTML_NS}->{tbody} = { |
2982 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
2983 |
|
|
checker => sub { |
2984 |
|
|
my ($self, $todo) = @_; |
2985 |
|
|
my $el = $todo->{node}; |
2986 |
|
|
my $new_todos = []; |
2987 |
|
|
my @nodes = (@{$el->child_nodes}); |
2988 |
|
|
|
2989 |
|
|
my $has_tr; |
2990 |
|
|
while (@nodes) { |
2991 |
|
|
my $node = shift @nodes; |
2992 |
|
|
$self->_remove_minuses ($node) and next if ref $node eq 'HASH'; |
2993 |
|
|
|
2994 |
|
|
my $nt = $node->node_type; |
2995 |
|
|
if ($nt == 1) { |
2996 |
|
|
my $node_ns = $node->namespace_uri; |
2997 |
|
|
$node_ns = '' unless defined $node_ns; |
2998 |
|
|
my $node_ln = $node->manakai_local_name; |
2999 |
|
|
## NOTE: |minuses| list is not checked since redundant |
3000 |
wakaba |
1.8 |
if ($self->{pluses}->{$node_ns}->{$node_ln}) { |
3001 |
|
|
# |
3002 |
|
|
} elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') { |
3003 |
wakaba |
1.1 |
$has_tr = 1; |
3004 |
|
|
} else { |
3005 |
|
|
$self->{onerror}->(node => $node, type => 'element not allowed'); |
3006 |
|
|
} |
3007 |
|
|
my ($sib, $ch) = $self->_check_get_children ($node, $todo); |
3008 |
|
|
unshift @nodes, @$sib; |
3009 |
|
|
push @$new_todos, @$ch; |
3010 |
|
|
} elsif ($nt == 3 or $nt == 4) { |
3011 |
|
|
if ($node->data =~ /[^\x09-\x0D\x20]/) { |
3012 |
|
|
$self->{onerror}->(node => $node, type => 'character not allowed'); |
3013 |
wakaba |
1.25 |
$todo->{flag}->{has_descendant}->{significant} = 1; |
3014 |
wakaba |
1.1 |
} |
3015 |
|
|
} elsif ($nt == 5) { |
3016 |
|
|
unshift @nodes, @{$node->child_nodes}; |
3017 |
|
|
} |
3018 |
|
|
} |
3019 |
|
|
unless ($has_tr) { |
3020 |
|
|
$self->{onerror}->(node => $el, type => 'child element missing:tr'); |
3021 |
|
|
} |
3022 |
|
|
return ($new_todos); |
3023 |
|
|
}, |
3024 |
|
|
}; |
3025 |
|
|
|
3026 |
|
|
$Element->{$HTML_NS}->{thead} = { |
3027 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
3028 |
|
|
checker => $Element->{$HTML_NS}->{tbody}->{checker}, |
3029 |
|
|
}; |
3030 |
|
|
|
3031 |
|
|
$Element->{$HTML_NS}->{tfoot} = { |
3032 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
3033 |
|
|
checker => $Element->{$HTML_NS}->{tbody}->{checker}, |
3034 |
|
|
}; |
3035 |
|
|
|
3036 |
|
|
$Element->{$HTML_NS}->{tr} = { |
3037 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
3038 |
|
|
checker => sub { |
3039 |
|
|
my ($self, $todo) = @_; |
3040 |
|
|
my $el = $todo->{node}; |
3041 |
|
|
my $new_todos = []; |
3042 |
|
|
my @nodes = (@{$el->child_nodes}); |
3043 |
|
|
|
3044 |
|
|
my $has_td; |
3045 |
|
|
while (@nodes) { |
3046 |
|
|
my $node = shift @nodes; |
3047 |
|
|
$self->_remove_minuses ($node) and next if ref $node eq 'HASH'; |
3048 |
|
|
|
3049 |
|
|
my $nt = $node->node_type; |
3050 |
|
|
if ($nt == 1) { |
3051 |
|
|
my $node_ns = $node->namespace_uri; |
3052 |
|
|
$node_ns = '' unless defined $node_ns; |
3053 |
|
|
my $node_ln = $node->manakai_local_name; |
3054 |
|
|
## NOTE: |minuses| list is not checked since redundant |
3055 |
wakaba |
1.8 |
if ($self->{pluses}->{$node_ns}->{$node_ln}) { |
3056 |
|
|
# |
3057 |
|
|
} elsif ($node_ns eq $HTML_NS and |
3058 |
|
|
($node_ln eq 'td' or $node_ln eq 'th')) { |
3059 |
wakaba |
1.1 |
$has_td = 1; |
3060 |
|
|
} else { |
3061 |
|
|
$self->{onerror}->(node => $node, type => 'element not allowed'); |
3062 |
|
|
} |
3063 |
|
|
my ($sib, $ch) = $self->_check_get_children ($node, $todo); |
3064 |
|
|
unshift @nodes, @$sib; |
3065 |
|
|
push @$new_todos, @$ch; |
3066 |
|
|
} elsif ($nt == 3 or $nt == 4) { |
3067 |
|
|
if ($node->data =~ /[^\x09-\x0D\x20]/) { |
3068 |
|
|
$self->{onerror}->(node => $node, type => 'character not allowed'); |
3069 |
wakaba |
1.25 |
$todo->{flag}->{has_descendant}->{significant} = 1; |
3070 |
wakaba |
1.1 |
} |
3071 |
|
|
} elsif ($nt == 5) { |
3072 |
|
|
unshift @nodes, @{$node->child_nodes}; |
3073 |
|
|
} |
3074 |
|
|
} |
3075 |
|
|
unless ($has_td) { |
3076 |
|
|
$self->{onerror}->(node => $el, type => 'child element missing:td|th'); |
3077 |
|
|
} |
3078 |
|
|
return ($new_todos); |
3079 |
|
|
}, |
3080 |
|
|
}; |
3081 |
|
|
|
3082 |
|
|
$Element->{$HTML_NS}->{td} = { |
3083 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({ |
3084 |
|
|
colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }), |
3085 |
|
|
rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }), |
3086 |
|
|
}), |
3087 |
wakaba |
1.29 |
checker => $HTMLProseContentChecker, |
3088 |
wakaba |
1.1 |
}; |
3089 |
|
|
|
3090 |
|
|
$Element->{$HTML_NS}->{th} = { |
3091 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({ |
3092 |
|
|
colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }), |
3093 |
|
|
rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }), |
3094 |
|
|
scope => $GetHTMLEnumeratedAttrChecker |
3095 |
|
|
->({row => 1, col => 1, rowgroup => 1, colgroup => 1}), |
3096 |
|
|
}), |
3097 |
wakaba |
1.31 |
checker => $HTMLPhrasingContentChecker, |
3098 |
wakaba |
1.1 |
}; |
3099 |
|
|
|
3100 |
|
|
## TODO: forms |
3101 |
wakaba |
1.8 |
## TODO: Tests for <nest/> in form elements |
3102 |
wakaba |
1.1 |
|
3103 |
|
|
$Element->{$HTML_NS}->{script} = { |
3104 |
wakaba |
1.9 |
attrs_checker => $GetHTMLAttrsChecker->({ |
3105 |
wakaba |
1.1 |
src => $HTMLURIAttrChecker, |
3106 |
|
|
defer => $GetHTMLBooleanAttrChecker->('defer'), |
3107 |
|
|
async => $GetHTMLBooleanAttrChecker->('async'), |
3108 |
|
|
type => $HTMLIMTAttrChecker, |
3109 |
wakaba |
1.9 |
}), |
3110 |
wakaba |
1.1 |
checker => sub { |
3111 |
|
|
my ($self, $todo) = @_; |
3112 |
|
|
|
3113 |
|
|
if ($todo->{node}->has_attribute_ns (undef, 'src')) { |
3114 |
|
|
return $HTMLEmptyChecker->($self, $todo); |
3115 |
|
|
} else { |
3116 |
|
|
## NOTE: No content model conformance in HTML5 spec. |
3117 |
|
|
my $type = $todo->{node}->get_attribute_ns (undef, 'type'); |
3118 |
|
|
my $language = $todo->{node}->get_attribute_ns (undef, 'language'); |
3119 |
|
|
if ((defined $type and $type eq '') or |
3120 |
|
|
(defined $language and $language eq '')) { |
3121 |
|
|
$type = 'text/javascript'; |
3122 |
|
|
} elsif (defined $type) { |
3123 |
|
|
# |
3124 |
|
|
} elsif (defined $language) { |
3125 |
|
|
$type = 'text/' . $language; |
3126 |
|
|
} else { |
3127 |
|
|
$type = 'text/javascript'; |
3128 |
|
|
} |
3129 |
|
|
$self->{onerror}->(node => $todo->{node}, level => 'unsupported', |
3130 |
|
|
type => 'script:'.$type); ## TODO: $type normalization |
3131 |
|
|
return $AnyChecker->($self, $todo); |
3132 |
|
|
} |
3133 |
|
|
}, |
3134 |
|
|
}; |
3135 |
wakaba |
1.25 |
## ISSUE: Significant check and text child node |
3136 |
wakaba |
1.1 |
|
3137 |
|
|
## NOTE: When script is disabled. |
3138 |
|
|
$Element->{$HTML_NS}->{noscript} = { |
3139 |
wakaba |
1.3 |
attrs_checker => sub { |
3140 |
|
|
my ($self, $todo) = @_; |
3141 |
|
|
|
3142 |
|
|
## NOTE: This check is inserted in |attrs_checker|, rather than |checker|, |
3143 |
|
|
## since the later is not invoked when the |noscript| is used as a |
3144 |
|
|
## transparent element. |
3145 |
|
|
unless ($todo->{node}->owner_document->manakai_is_html) { |
3146 |
|
|
$self->{onerror}->(node => $todo->{node}, type => 'in XML:noscript'); |
3147 |
|
|
} |
3148 |
|
|
|
3149 |
|
|
$GetHTMLAttrsChecker->({})->($self, $todo); |
3150 |
|
|
}, |
3151 |
wakaba |
1.1 |
checker => sub { |
3152 |
|
|
my ($self, $todo) = @_; |
3153 |
|
|
|
3154 |
wakaba |
1.3 |
if ($todo->{flag}->{in_head}) { |
3155 |
|
|
my $new_todos = []; |
3156 |
|
|
my @nodes = (@{$todo->{node}->child_nodes}); |
3157 |
|
|
|
3158 |
|
|
while (@nodes) { |
3159 |
|
|
my $node = shift @nodes; |
3160 |
|
|
$self->_remove_minuses ($node) and next if ref $node eq 'HASH'; |
3161 |
|
|
|
3162 |
|
|
my $nt = $node->node_type; |
3163 |
|
|
if ($nt == 1) { |
3164 |
|
|
my $node_ns = $node->namespace_uri; |
3165 |
|
|
$node_ns = '' unless defined $node_ns; |
3166 |
|
|
my $node_ln = $node->manakai_local_name; |
3167 |
wakaba |
1.8 |
if ($self->{pluses}->{$node_ns}->{$node_ln}) { |
3168 |
|
|
# |
3169 |
|
|
} elsif ($node_ns eq $HTML_NS) { |
3170 |
wakaba |
1.34 |
if ($node_ln eq 'link') { |
3171 |
wakaba |
1.3 |
# |
3172 |
wakaba |
1.34 |
} elsif ($node_ln eq 'style') { |
3173 |
|
|
if ($node->has_attribute_ns (undef, 'scoped')) { |
3174 |
|
|
$self->{onerror}->(node => $node, |
3175 |
|
|
type => 'element not allowed:head noscript', |
3176 |
|
|
level => $self->{must_level}); |
3177 |
|
|
} |
3178 |
wakaba |
1.3 |
} elsif ($node_ln eq 'meta') { |
3179 |
wakaba |
1.34 |
if ($node->has_attribute_ns (undef, 'charset')) { |
3180 |
|
|
## NOTE: Non-conforming. An error is raised by |
3181 |
|
|
## |meta|'s checker. |
3182 |
wakaba |
1.3 |
} else { |
3183 |
wakaba |
1.34 |
my $http_equiv_attr |
3184 |
|
|
= $node->get_attribute_node_ns (undef, 'http-equiv'); |
3185 |
|
|
if ($http_equiv_attr) { |
3186 |
|
|
## TODO: case |
3187 |
|
|
if (lc $http_equiv_attr->value eq 'content-type') { |
3188 |
|
|
## NOTE: Non-conforming. An error is raised by |
3189 |
|
|
## |meta|'s checker. |
3190 |
|
|
} else { |
3191 |
|
|
# |
3192 |
|
|
} |
3193 |
|
|
} else { |
3194 |
|
|
$self->{onerror}->(node => $node, |
3195 |
|
|
type => 'element not allowed:head noscript', |
3196 |
|
|
level => $self->{must_level}); |
3197 |
|
|
} |
3198 |
wakaba |
1.3 |
} |
3199 |
|
|
} else { |
3200 |
wakaba |
1.34 |
$self->{onerror}->(node => $node, |
3201 |
|
|
type => 'element not allowed:head noscript', |
3202 |
|
|
level => $self->{must_level}); |
3203 |
wakaba |
1.3 |
} |
3204 |
|
|
} else { |
3205 |
wakaba |
1.34 |
$self->{onerror}->(node => $node, |
3206 |
|
|
type => 'element not allowed:head noscript', |
3207 |
|
|
level => $self->{must_level}); |
3208 |
wakaba |
1.3 |
} |
3209 |
|
|
|
3210 |
|
|
my ($sib, $ch) = $self->_check_get_children ($node, $todo); |
3211 |
|
|
unshift @nodes, @$sib; |
3212 |
|
|
push @$new_todos, @$ch; |
3213 |
|
|
} elsif ($nt == 3 or $nt == 4) { |
3214 |
|
|
if ($node->data =~ /[^\x09-\x0D\x20]/) { |
3215 |
|
|
$self->{onerror}->(node => $node, type => 'character not allowed'); |
3216 |
wakaba |
1.25 |
$todo->{flag}->{has_descendant}->{significant} = 1; |
3217 |
wakaba |
1.3 |
} |
3218 |
|
|
} elsif ($nt == 5) { |
3219 |
|
|
unshift @nodes, @{$node->child_nodes}; |
3220 |
|
|
} |
3221 |
|
|
} |
3222 |
|
|
return ($new_todos); |
3223 |
|
|
} else { |
3224 |
|
|
my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}}); |
3225 |
wakaba |
1.29 |
my ($sib, $ch) = $HTMLTransparentChecker->($self, $todo); |
3226 |
wakaba |
1.3 |
push @$sib, $end; |
3227 |
|
|
return ($sib, $ch); |
3228 |
|
|
} |
3229 |
wakaba |
1.1 |
}, |
3230 |
|
|
}; |
3231 |
wakaba |
1.3 |
|
3232 |
|
|
## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head> |
3233 |
wakaba |
1.1 |
|
3234 |
|
|
$Element->{$HTML_NS}->{'event-source'} = { |
3235 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({ |
3236 |
|
|
src => $HTMLURIAttrChecker, |
3237 |
|
|
}), |
3238 |
|
|
checker => $HTMLEmptyChecker, |
3239 |
|
|
}; |
3240 |
|
|
|
3241 |
|
|
$Element->{$HTML_NS}->{details} = { |
3242 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({ |
3243 |
|
|
open => $GetHTMLBooleanAttrChecker->('open'), |
3244 |
|
|
}), |
3245 |
|
|
checker => sub { |
3246 |
|
|
my ($self, $todo) = @_; |
3247 |
|
|
|
3248 |
wakaba |
1.29 |
## TODO: |
3249 |
wakaba |
1.1 |
my ($sib, $ch) |
3250 |
|
|
= $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend') |
3251 |
|
|
->($self, $todo); |
3252 |
|
|
return ($sib, $ch); |
3253 |
|
|
}, |
3254 |
|
|
}; |
3255 |
|
|
|
3256 |
|
|
$Element->{$HTML_NS}->{datagrid} = { |
3257 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({ |
3258 |
|
|
disabled => $GetHTMLBooleanAttrChecker->('disabled'), |
3259 |
|
|
multiple => $GetHTMLBooleanAttrChecker->('multiple'), |
3260 |
|
|
}), |
3261 |
|
|
checker => sub { |
3262 |
|
|
my ($self, $todo) = @_; |
3263 |
|
|
my $el = $todo->{node}; |
3264 |
|
|
my $new_todos = []; |
3265 |
|
|
my @nodes = (@{$el->child_nodes}); |
3266 |
|
|
|
3267 |
wakaba |
1.25 |
my $old_values = {significant => |
3268 |
|
|
$todo->{flag}->{has_descendant}->{significant}}; |
3269 |
|
|
$todo->{flag}->{has_descendant}->{significant} = 0; |
3270 |
|
|
|
3271 |
wakaba |
1.1 |
my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}}); |
3272 |
|
|
|
3273 |
wakaba |
1.29 |
## Prose -(text* table Prose*) | table | select | datalist | Empty |
3274 |
wakaba |
1.1 |
my $mode = 'any'; |
3275 |
|
|
while (@nodes) { |
3276 |
|
|
my $node = shift @nodes; |
3277 |
|
|
$self->_remove_minuses ($node) and next if ref $node eq 'HASH'; |
3278 |
|
|
|
3279 |
|
|
my $nt = $node->node_type; |
3280 |
|
|
if ($nt == 1) { |
3281 |
|
|
my $node_ns = $node->namespace_uri; |
3282 |
|
|
$node_ns = '' unless defined $node_ns; |
3283 |
|
|
my $node_ln = $node->manakai_local_name; |
3284 |
|
|
my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln}; |
3285 |
wakaba |
1.8 |
if ($self->{pluses}->{$node_ns}->{$node_ln}) { |
3286 |
|
|
# |
3287 |
wakaba |
1.29 |
} elsif ($mode eq 'prose') { |
3288 |
wakaba |
1.1 |
$not_allowed = 1 |
3289 |
wakaba |
1.29 |
unless $HTMLProseContent->{$node_ns}->{$node_ln}; |
3290 |
wakaba |
1.1 |
} elsif ($mode eq 'any') { |
3291 |
|
|
if ($node_ns eq $HTML_NS and |
3292 |
|
|
{table => 1, select => 1, datalist => 1}->{$node_ln}) { |
3293 |
|
|
$mode = 'none'; |
3294 |
wakaba |
1.29 |
} elsif ($HTMLProseContent->{$node_ns}->{$node_ln}) { |
3295 |
|
|
$mode = 'prose'; |
3296 |
wakaba |
1.1 |
} else { |
3297 |
|
|
$not_allowed = 1; |
3298 |
|
|
} |
3299 |
|
|
} else { |
3300 |
|
|
$not_allowed = 1; |
3301 |
|
|
} |
3302 |
|
|
$self->{onerror}->(node => $node, type => 'element not allowed') |
3303 |
|
|
if $not_allowed; |
3304 |
|
|
my ($sib, $ch) = $self->_check_get_children ($node, $todo); |
3305 |
|
|
unshift @nodes, @$sib; |
3306 |
|
|
push @$new_todos, @$ch; |
3307 |
|
|
} elsif ($nt == 3 or $nt == 4) { |
3308 |
|
|
if ($node->data =~ /[^\x09-\x0D\x20]/) { |
3309 |
wakaba |
1.29 |
if ($mode eq 'prose') { |
3310 |
|
|
# |
3311 |
|
|
} elsif ($mode eq 'any') { |
3312 |
|
|
$mode = 'prose'; |
3313 |
|
|
} else { |
3314 |
|
|
$self->{onerror}->(node => $node, type => 'character not allowed'); |
3315 |
|
|
} |
3316 |
wakaba |
1.25 |
$todo->{flag}->{has_descendant}->{significant} = 1; |
3317 |
wakaba |
1.1 |
} |
3318 |
|
|
} elsif ($nt == 5) { |
3319 |
|
|
unshift @nodes, @{$node->child_nodes}; |
3320 |
|
|
} |
3321 |
|
|
} |
3322 |
|
|
|
3323 |
|
|
push @$new_todos, $end; |
3324 |
wakaba |
1.25 |
|
3325 |
|
|
push @$new_todos, { |
3326 |
|
|
type => 'descendant', node => $todo->{node}, flag => $todo->{flag}, |
3327 |
|
|
old_values => $old_values, |
3328 |
wakaba |
1.26 |
errors => $HTMLSignificantContentErrors, |
3329 |
wakaba |
1.25 |
}; |
3330 |
|
|
|
3331 |
wakaba |
1.1 |
return ($new_todos); |
3332 |
wakaba |
1.29 |
|
3333 |
|
|
## ISSUE: "xxx<table/>" is disallowed; "<select/>aaa" and "<datalist/>aa" |
3334 |
|
|
## are not disallowed (assuming that form control contents are also |
3335 |
|
|
## prose content). |
3336 |
wakaba |
1.1 |
}, |
3337 |
|
|
}; |
3338 |
|
|
|
3339 |
|
|
$Element->{$HTML_NS}->{command} = { |
3340 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({ |
3341 |
|
|
checked => $GetHTMLBooleanAttrChecker->('checked'), |
3342 |
|
|
default => $GetHTMLBooleanAttrChecker->('default'), |
3343 |
|
|
disabled => $GetHTMLBooleanAttrChecker->('disabled'), |
3344 |
|
|
hidden => $GetHTMLBooleanAttrChecker->('hidden'), |
3345 |
|
|
icon => $HTMLURIAttrChecker, |
3346 |
|
|
label => sub { }, ## NOTE: No conformance creteria |
3347 |
|
|
radiogroup => sub { }, ## NOTE: No conformance creteria |
3348 |
|
|
## NOTE: |title| has special semantics, but no syntactical difference |
3349 |
|
|
type => sub { |
3350 |
|
|
my ($self, $attr) = @_; |
3351 |
|
|
my $value = $attr->value; |
3352 |
|
|
unless ({command => 1, checkbox => 1, radio => 1}->{$value}) { |
3353 |
|
|
$self->{onerror}->(node => $attr, type => 'attribute value not allowed'); |
3354 |
|
|
} |
3355 |
|
|
}, |
3356 |
|
|
}), |
3357 |
|
|
checker => $HTMLEmptyChecker, |
3358 |
|
|
}; |
3359 |
|
|
|
3360 |
|
|
$Element->{$HTML_NS}->{menu} = { |
3361 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({ |
3362 |
|
|
autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'), |
3363 |
|
|
id => sub { |
3364 |
|
|
## NOTE: same as global |id=""|, with |$self->{menu}| registeration |
3365 |
|
|
my ($self, $attr) = @_; |
3366 |
|
|
my $value = $attr->value; |
3367 |
|
|
if (length $value > 0) { |
3368 |
|
|
if ($self->{id}->{$value}) { |
3369 |
|
|
$self->{onerror}->(node => $attr, type => 'duplicate ID'); |
3370 |
|
|
push @{$self->{id}->{$value}}, $attr; |
3371 |
|
|
} else { |
3372 |
|
|
$self->{id}->{$value} = [$attr]; |
3373 |
|
|
} |
3374 |
|
|
} else { |
3375 |
|
|
## NOTE: MUST contain at least one character |
3376 |
|
|
$self->{onerror}->(node => $attr, type => 'empty attribute value'); |
3377 |
|
|
} |
3378 |
|
|
if ($value =~ /[\x09-\x0D\x20]/) { |
3379 |
|
|
$self->{onerror}->(node => $attr, type => 'space in ID'); |
3380 |
|
|
} |
3381 |
|
|
$self->{menu}->{$value} ||= $attr; |
3382 |
|
|
## ISSUE: <menu id=""><p contextmenu=""> match? |
3383 |
|
|
}, |
3384 |
|
|
label => sub { }, ## NOTE: No conformance creteria |
3385 |
|
|
type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}), |
3386 |
|
|
}), |
3387 |
|
|
checker => sub { |
3388 |
|
|
my ($self, $todo) = @_; |
3389 |
|
|
my $el = $todo->{node}; |
3390 |
|
|
my $new_todos = []; |
3391 |
|
|
my @nodes = (@{$el->child_nodes}); |
3392 |
wakaba |
1.25 |
|
3393 |
|
|
my $old_values = {significant => |
3394 |
|
|
$todo->{flag}->{has_descendant}->{significant}}; |
3395 |
|
|
$todo->{flag}->{has_descendant}->{significant} = 0; |
3396 |
wakaba |
1.1 |
|
3397 |
wakaba |
1.29 |
my $content = 'li or phrasing'; |
3398 |
wakaba |
1.1 |
while (@nodes) { |
3399 |
|
|
my $node = shift @nodes; |
3400 |
|
|
$self->_remove_minuses ($node) and next if ref $node eq 'HASH'; |
3401 |
|
|
|
3402 |
|
|
my $nt = $node->node_type; |
3403 |
|
|
if ($nt == 1) { |
3404 |
|
|
my $node_ns = $node->namespace_uri; |
3405 |
|
|
$node_ns = '' unless defined $node_ns; |
3406 |
|
|
my $node_ln = $node->manakai_local_name; |
3407 |
|
|
my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln}; |
3408 |
wakaba |
1.8 |
if ($self->{pluses}->{$node_ns}->{$node_ln}) { |
3409 |
|
|
# |
3410 |
|
|
} elsif ($node_ns eq $HTML_NS and $node_ln eq 'li') { |
3411 |
wakaba |
1.29 |
if ($content eq 'phrasing') { |
3412 |
wakaba |
1.1 |
$not_allowed = 1; |
3413 |
wakaba |
1.29 |
} elsif ($content eq 'li or phrasing') { |
3414 |
wakaba |
1.1 |
$content = 'li'; |
3415 |
|
|
} |
3416 |
|
|
} else { |
3417 |
wakaba |
1.29 |
if ($HTMLPhrasingContent->{$node_ns}->{$node_ln}) { |
3418 |
|
|
$content = 'phrasing'; |
3419 |
wakaba |
1.1 |
} else { |
3420 |
|
|
$not_allowed = 1; |
3421 |
|
|
} |
3422 |
|
|
} |
3423 |
|
|
$self->{onerror}->(node => $node, type => 'element not allowed') |
3424 |
|
|
if $not_allowed; |
3425 |
|
|
my ($sib, $ch) = $self->_check_get_children ($node, $todo); |
3426 |
|
|
unshift @nodes, @$sib; |
3427 |
|
|
push @$new_todos, @$ch; |
3428 |
|
|
} elsif ($nt == 3 or $nt == 4) { |
3429 |
|
|
if ($node->data =~ /[^\x09-\x0D\x20]/) { |
3430 |
|
|
if ($content eq 'li') { |
3431 |
|
|
$self->{onerror}->(node => $node, type => 'character not allowed'); |
3432 |
wakaba |
1.29 |
} elsif ($content eq 'li or phrasing') { |
3433 |
|
|
$content = 'phrasing'; |
3434 |
wakaba |
1.1 |
} |
3435 |
wakaba |
1.25 |
$todo->{flag}->{has_descendant}->{significant} = 1; |
3436 |
wakaba |
1.1 |
} |
3437 |
|
|
} elsif ($nt == 5) { |
3438 |
|
|
unshift @nodes, @{$node->child_nodes}; |
3439 |
|
|
} |
3440 |
|
|
} |
3441 |
|
|
|
3442 |
|
|
for (@$new_todos) { |
3443 |
wakaba |
1.29 |
$_->{flag}->{in_menu} = 1; |
3444 |
wakaba |
1.1 |
} |
3445 |
wakaba |
1.25 |
|
3446 |
|
|
push @$new_todos, { |
3447 |
|
|
type => 'descendant', node => $todo->{node}, flag => $todo->{flag}, |
3448 |
|
|
old_values => $old_values, |
3449 |
wakaba |
1.26 |
errors => $HTMLSignificantContentErrors, |
3450 |
wakaba |
1.25 |
}; |
3451 |
|
|
|
3452 |
wakaba |
1.1 |
return ($new_todos); |
3453 |
|
|
}, |
3454 |
wakaba |
1.8 |
}; |
3455 |
|
|
|
3456 |
|
|
$Element->{$HTML_NS}->{datatemplate} = { |
3457 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
3458 |
|
|
checker => sub { |
3459 |
|
|
my ($self, $todo) = @_; |
3460 |
|
|
my $el = $todo->{node}; |
3461 |
|
|
my $new_todos = []; |
3462 |
|
|
my @nodes = (@{$el->child_nodes}); |
3463 |
|
|
|
3464 |
|
|
while (@nodes) { |
3465 |
|
|
my $node = shift @nodes; |
3466 |
|
|
$self->_remove_minuses ($node) and next if ref $node eq 'HASH'; |
3467 |
|
|
|
3468 |
|
|
my $nt = $node->node_type; |
3469 |
|
|
if ($nt == 1) { |
3470 |
|
|
my $node_ns = $node->namespace_uri; |
3471 |
|
|
$node_ns = '' unless defined $node_ns; |
3472 |
|
|
my $node_ln = $node->manakai_local_name; |
3473 |
|
|
## NOTE: |minuses| list is not checked since redundant |
3474 |
|
|
if ($self->{pluses}->{$node_ns}->{$node_ln}) { |
3475 |
|
|
# |
3476 |
|
|
} elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'rule')) { |
3477 |
|
|
$self->{onerror}->(node => $node, |
3478 |
|
|
type => 'element not allowed:datatemplate'); |
3479 |
|
|
} |
3480 |
|
|
my ($sib, $ch) = $self->_check_get_children ($node, $todo); |
3481 |
|
|
unshift @nodes, @$sib; |
3482 |
|
|
push @$new_todos, @$ch; |
3483 |
|
|
} elsif ($nt == 3 or $nt == 4) { |
3484 |
|
|
if ($node->data =~ /[^\x09-\x0D\x20]/) { |
3485 |
|
|
$self->{onerror}->(node => $node, type => 'character not allowed'); |
3486 |
wakaba |
1.25 |
$todo->{flag}->{has_descendant}->{significant} = 1; |
3487 |
wakaba |
1.8 |
} |
3488 |
|
|
} elsif ($nt == 5) { |
3489 |
|
|
unshift @nodes, @{$node->child_nodes}; |
3490 |
|
|
} |
3491 |
|
|
} |
3492 |
|
|
return ($new_todos); |
3493 |
|
|
}, |
3494 |
|
|
is_xml_root => 1, |
3495 |
|
|
}; |
3496 |
|
|
|
3497 |
|
|
$Element->{$HTML_NS}->{rule} = { |
3498 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({ |
3499 |
wakaba |
1.23 |
condition => $HTMLSelectorsAttrChecker, |
3500 |
wakaba |
1.18 |
mode => $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker, |
3501 |
wakaba |
1.8 |
}), |
3502 |
|
|
checker => sub { |
3503 |
|
|
my ($self, $todo) = @_; |
3504 |
|
|
|
3505 |
|
|
my $end = $self->_add_pluses ({$HTML_NS => {nest => 1}}); |
3506 |
wakaba |
1.25 |
my ($sib, $ch) = $HTMLAnyChecker->($self, $todo); |
3507 |
wakaba |
1.8 |
push @$sib, $end; |
3508 |
|
|
return ($sib, $ch); |
3509 |
|
|
}, |
3510 |
|
|
## NOTE: "MAY be anything that, when the parent |datatemplate| |
3511 |
|
|
## is applied to some conforming data, results in a conforming DOM tree.": |
3512 |
|
|
## We don't check against this. |
3513 |
|
|
}; |
3514 |
|
|
|
3515 |
|
|
$Element->{$HTML_NS}->{nest} = { |
3516 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({ |
3517 |
wakaba |
1.23 |
filter => $HTMLSelectorsAttrChecker, |
3518 |
|
|
mode => sub { |
3519 |
|
|
my ($self, $attr) = @_; |
3520 |
|
|
my $value = $attr->value; |
3521 |
|
|
if ($value !~ /\A[^\x09-\x0D\x20]+\z/) { |
3522 |
|
|
$self->{onerror}->(node => $attr, type => 'mode:syntax error'); |
3523 |
|
|
} |
3524 |
|
|
}, |
3525 |
wakaba |
1.8 |
}), |
3526 |
|
|
checker => $HTMLEmptyChecker, |
3527 |
wakaba |
1.1 |
}; |
3528 |
|
|
|
3529 |
|
|
$Element->{$HTML_NS}->{legend} = { |
3530 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
3531 |
wakaba |
1.29 |
checker => $HTMLPhrasingContentChecker, |
3532 |
wakaba |
1.1 |
}; |
3533 |
|
|
|
3534 |
|
|
$Element->{$HTML_NS}->{div} = { |
3535 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), |
3536 |
wakaba |
1.31 |
checker => $HTMLProseContentChecker, |
3537 |
wakaba |
1.1 |
}; |
3538 |
|
|
|
3539 |
|
|
$Element->{$HTML_NS}->{font} = { |
3540 |
|
|
attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO |
3541 |
|
|
checker => $HTMLTransparentChecker, |
3542 |
|
|
}; |
3543 |
|
|
|
3544 |
|
|
$Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1; |
3545 |
|
|
|
3546 |
|
|
1; |