/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.33 - (hide annotations) (download)
Sun Feb 17 12:32:44 2008 UTC (17 years, 5 months ago) by wakaba
Branch: MAIN
Changes since 1.32: +6 -0 lines
++ whatpm/t/ChangeLog	17 Feb 2008 12:32:40 -0000
	* content-model-1.dat: More test on |<meta http-equiv=Content-Type>|
	are added.

2008-02-17  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7 wakaba 1.29 ## December 2007 HTML5 Classification
8    
9     my $HTMLMetadataContent = {
10     $HTML_NS => {
11     title => 1, base => 1, link => 1, style => 1, script => 1, noscript => 1,
12     'event-source' => 1, command => 1, datatemplate => 1,
13     ## NOTE: A |meta| with no |name| element is not allowed as
14     ## a metadata content other than |head| element.
15     meta => 1,
16     },
17     ## NOTE: RDF is mentioned in the HTML5 spec.
18     ## TODO: Other RDF elements?
19     q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {RDF => 1},
20     };
21    
22     my $HTMLProseContent = {
23     $HTML_NS => {
24     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
25     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
26     footer => 1, address => 1, p => 1, hr => 1, dialog => 1, pre => 1,
27     ol => 1, ul => 1, dl => 1, figure => 1, map => 1, table => 1,
28     details => 1, ## ISSUE: "Prose element" in spec.
29     datagrid => 1, ## ISSUE: "Prose element" in spec.
30     datatemplate => 1,
31     div => 1, ## ISSUE: No category in spec.
32     ## NOTE: |style| is only allowed if |scoped| attribute is specified.
33     ## Additionally, it must be before any other element or
34     ## non-inter-element-whitespace text node.
35     style => 1,
36    
37     br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, m => 1,
38     dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
39     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
40     b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1,
41     command => 1, font => 1,
42     a => 1,
43     datagrid => 1, ## ISSUE: "Interactive element" in the spec.
44     ## NOTE: |area| is allowed only as a descendant of |map|.
45     area => 1,
46    
47     ins => 1, del => 1,
48    
49     ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose.
50     menu => 1,
51    
52     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
53     canvas => 1,
54     },
55    
56     ## NOTE: Embedded
57     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
58     q<http://www.w3.org/2000/svg> => {svg => 1},
59     };
60    
61     my $HTMLSectioningContent = {
62     $HTML_NS => {
63     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
64     ## NOTE: |body| is only allowed in |html| element.
65     body => 1,
66     },
67     };
68    
69     my $HTMLHeadingContent = {
70     $HTML_NS => {
71     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
72     },
73     };
74    
75     my $HTMLPhrasingContent = {
76     ## NOTE: All phrasing content is also prose content.
77     $HTML_NS => {
78     br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, m => 1,
79     dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
80     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
81     b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1,
82     command => 1, font => 1,
83     a => 1,
84     datagrid => 1, ## ISSUE: "Interactive element" in the spec.
85     ## NOTE: |area| is allowed only as a descendant of |map|.
86     area => 1,
87    
88     ## NOTE: Transparent.
89     ins => 1, del => 1,
90    
91     ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose.
92     menu => 1,
93    
94     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
95     canvas => 1,
96     },
97    
98     ## NOTE: Embedded
99     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
100     q<http://www.w3.org/2000/svg> => {svg => 1},
101    
102     ## NOTE: And non-inter-element-whitespace text nodes.
103     };
104    
105     my $HTMLEmbeddedContent = {
106     ## NOTE: All embedded content is also phrasing content.
107     $HTML_NS => {
108     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
109     canvas => 1,
110     },
111     ## NOTE: MathML is mentioned in the HTML5 spec.
112     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
113     ## NOTE: SVG is mentioned in the HTML5 spec.
114     q<http://www.w3.org/2000/svg> => {svg => 1},
115     ## NOTE: Foreign elements with content (but no metadata) are
116     ## embedded content.
117     };
118    
119     my $HTMLInteractiveContent = {
120     $HTML_NS => {
121     a => 1,
122     },
123     };
124    
125     ## Old HTML5 categories
126    
127 wakaba 1.1 my $HTMLMetadataElements = {
128     $HTML_NS => {
129     qw/link 1 meta 1 style 1 script 1 event-source 1 command 1 base 1 title 1
130 wakaba 1.8 noscript 1 datatemplate 1
131 wakaba 1.1 /,
132     },
133     };
134    
135     my $HTMLSectioningElements = {
136     $HTML_NS => {qw/body 1 section 1 nav 1 article 1 blockquote 1 aside 1/},
137     };
138    
139     my $HTMLBlockLevelElements = {
140     $HTML_NS => {
141     qw/
142     section 1 nav 1 article 1 blockquote 1 aside 1
143     h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1
144     address 1 p 1 hr 1 dialog 1 pre 1 ol 1 ul 1 dl 1
145     ins 1 del 1 figure 1 map 1 table 1 script 1 noscript 1
146     event-source 1 details 1 datagrid 1 menu 1 div 1 font 1
147 wakaba 1.8 datatemplate 1
148 wakaba 1.1 /,
149     },
150     };
151    
152     my $HTMLStrictlyInlineLevelElements = {
153     $HTML_NS => {
154     qw/
155     br 1 a 1 q 1 cite 1 em 1 strong 1 small 1 m 1 dfn 1 abbr 1
156     time 1 meter 1 progress 1 code 1 var 1 samp 1 kbd 1
157     sub 1 sup 1 span 1 i 1 b 1 bdo 1 ins 1 del 1 img 1
158     iframe 1 embed 1 object 1 video 1 audio 1 canvas 1 area 1
159     script 1 noscript 1 event-source 1 command 1 font 1
160     /,
161     },
162     };
163    
164     my $HTMLStructuredInlineLevelElements = {
165     $HTML_NS => {qw/blockquote 1 pre 1 ol 1 ul 1 dl 1 table 1 menu 1/},
166     };
167    
168     my $HTMLInteractiveElements = {
169     $HTML_NS => {a => 1, details => 1, datagrid => 1},
170     };
171     ## NOTE: |html:a| and |html:datagrid| are not allowed as a descendant
172     ## of interactive elements
173    
174     # my $HTMLTransparentElements : in |Whatpm/ContentChecker.pm|.
175    
176     #my $HTMLSemiTransparentElements = {
177     # $HTML_NS => {qw/video 1 audio 1/},
178     #};
179    
180     my $HTMLEmbededElements = {
181     $HTML_NS => {qw/img 1 iframe 1 embed 1 object 1 video 1 audio 1 canvas 1/},
182     };
183 wakaba 1.25 ## NOTE: When an element is added to this list, make sure that
184     ## the element's checker set |has_descendant| flag for |significant| content
185     ## as true.
186    
187 wakaba 1.26 my $HTMLSignificantContentErrors = {
188     significant => sub {
189     my ($self, $todo) = @_;
190     $self->{onerror}->(node => $todo->{node},
191     level => $self->{should_level},
192     type => 'no significant content');
193     },
194     }; # $HTMLSignificantContentErrors
195    
196 wakaba 1.25 our $AnyChecker;
197     my $HTMLAnyChecker = sub {
198     my ($self, $todo) = @_;
199    
200     my $old_values = {significant =>
201     $todo->{flag}->{has_descendant}->{significant}};
202     $todo->{flag}->{has_descendant}->{significant} = 0;
203    
204     my ($new_todos) = $AnyChecker->($self, $todo);
205    
206     push @$new_todos, {
207     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
208     old_values => $old_values,
209 wakaba 1.26 errors => $HTMLSignificantContentErrors,
210 wakaba 1.25 };
211    
212     return ($new_todos);
213     }; # $HTMLAnyChecker
214 wakaba 1.1
215     ## Empty
216     my $HTMLEmptyChecker = sub {
217     my ($self, $todo) = @_;
218     my $el = $todo->{node};
219     my $new_todos = [];
220     my @nodes = (@{$el->child_nodes});
221    
222     while (@nodes) {
223     my $node = shift @nodes;
224     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
225    
226     my $nt = $node->node_type;
227     if ($nt == 1) {
228 wakaba 1.8 my $node_ns = $node->namespace_uri;
229     $node_ns = '' unless defined $node_ns;
230     my $node_ln = $node->manakai_local_name;
231 wakaba 1.30 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
232     $self->{onerror}->(node => $node, type => 'element not allowed:minus',
233     level => $self->{must_level});
234     } elsif ($self->{pluses}->{$node_ns}->{$node_ln}) {
235 wakaba 1.8 #
236     } else {
237 wakaba 1.30 $self->{onerror}->(node => $node, type => 'element not allowed:empty',
238     level => $self->{must_level});
239 wakaba 1.8 }
240 wakaba 1.30
241 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
242     unshift @nodes, @$sib;
243     push @$new_todos, @$ch;
244     } elsif ($nt == 3 or $nt == 4) {
245     if ($node->data =~ /[^\x09-\x0D\x20]/) {
246 wakaba 1.30 $self->{onerror}->(node => $node,
247     type => 'character not allowed:empty',
248     level => $self->{must_level});
249 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
250 wakaba 1.1 }
251     } elsif ($nt == 5) {
252     unshift @nodes, @{$node->child_nodes};
253     }
254     }
255     return ($new_todos);
256     };
257    
258     ## Text
259     my $HTMLTextChecker = sub {
260     my ($self, $todo) = @_;
261     my $el = $todo->{node};
262     my $new_todos = [];
263     my @nodes = (@{$el->child_nodes});
264    
265 wakaba 1.25 my $old_values = {significant =>
266     $todo->{flag}->{has_descendant}->{significant}};
267     $todo->{flag}->{has_descendant}->{significant} = 0;
268    
269 wakaba 1.1 while (@nodes) {
270     my $node = shift @nodes;
271     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
272    
273     my $nt = $node->node_type;
274     if ($nt == 1) {
275 wakaba 1.8 my $node_ns = $node->namespace_uri;
276     $node_ns = '' unless defined $node_ns;
277     my $node_ln = $node->manakai_local_name;
278     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
279     #
280     } else {
281     ## NOTE: |minuses| list is not checked since redundant
282     $self->{onerror}->(node => $node, type => 'element not allowed');
283     }
284 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
285     unshift @nodes, @$sib;
286     push @$new_todos, @$ch;
287 wakaba 1.25 } elsif ($nt == 3 or $nt == 4) {
288     if ($node->data =~ /[^\x09-\x0D\x20]/) {
289     $todo->{flag}->{has_descendant}->{significant} = 1;
290     }
291 wakaba 1.1 } elsif ($nt == 5) {
292     unshift @nodes, @{$node->child_nodes};
293     }
294     }
295 wakaba 1.25
296     push @$new_todos, {
297     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
298     old_values => $old_values,
299 wakaba 1.26 errors => $HTMLSignificantContentErrors,
300 wakaba 1.25 };
301    
302 wakaba 1.1 return ($new_todos);
303     };
304    
305     ## Zero or more |html:style| elements,
306     ## followed by zero or more block-level elements
307     my $HTMLStylableBlockChecker = sub {
308     my ($self, $todo) = @_;
309     my $el = $todo->{node};
310     my $new_todos = [];
311     my @nodes = (@{$el->child_nodes});
312 wakaba 1.25
313     my $old_values = {significant =>
314     $todo->{flag}->{has_descendant}->{significant}};
315     $todo->{flag}->{has_descendant}->{significant} = 0;
316 wakaba 1.1
317     my $has_non_style;
318     while (@nodes) {
319     my $node = shift @nodes;
320     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
321    
322     my $nt = $node->node_type;
323     if ($nt == 1) {
324     my $node_ns = $node->namespace_uri;
325     $node_ns = '' unless defined $node_ns;
326     my $node_ln = $node->manakai_local_name;
327     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
328     if ($node_ns eq $HTML_NS and $node_ln eq 'style') {
329     $not_allowed = 1 if $has_non_style or
330     not $node->has_attribute_ns (undef, 'scoped');
331     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
332     $has_non_style = 1;
333 wakaba 1.8 } elsif ($self->{pluses}->{$node_ns}->{$node_ln}) {
334     #
335 wakaba 1.1 } else {
336     $has_non_style = 1;
337     $not_allowed = 1;
338     }
339     $self->{onerror}->(node => $node, type => 'element not allowed')
340     if $not_allowed;
341     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
342     unshift @nodes, @$sib;
343     push @$new_todos, @$ch;
344     } elsif ($nt == 3 or $nt == 4) {
345     if ($node->data =~ /[^\x09-\x0D\x20]/) {
346     $self->{onerror}->(node => $node, type => 'character not allowed');
347 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
348 wakaba 1.1 }
349     } elsif ($nt == 5) {
350     unshift @nodes, @{$node->child_nodes};
351     }
352     }
353 wakaba 1.25
354     push @$new_todos, {
355     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
356     old_values => $old_values,
357 wakaba 1.26 errors => $HTMLSignificantContentErrors,
358 wakaba 1.25 };
359    
360 wakaba 1.1 return ($new_todos);
361     }; # $HTMLStylableBlockChecker
362    
363 wakaba 1.29 my $HTMLProseContentChecker = sub {
364     my ($self, $todo) = @_;
365     my $el = $todo->{node};
366     my $new_todos = [];
367     my @nodes = (@{$el->child_nodes});
368    
369     my $old_values = {significant =>
370     $todo->{flag}->{has_descendant}->{significant}};
371     $todo->{flag}->{has_descendant}->{significant} = 0;
372    
373     my $has_non_style;
374     while (@nodes) {
375     my $node = shift @nodes;
376     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
377    
378     my $nt = $node->node_type;
379     if ($nt == 1) {
380     my $node_ns = $node->namespace_uri;
381     $node_ns = '' unless defined $node_ns;
382     my $node_ln = $node->manakai_local_name;
383 wakaba 1.30 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
384     $self->{onerror}->(node => $node,
385     type => 'element not allowed:minus',
386     level => $self->{must_level});
387     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'style') {
388     if ($has_non_style or
389     not $node->has_attribute_ns (undef, 'scoped')) {
390     $self->{onerror}->(node => $node,
391     type => 'element not allowed:prose style',
392     level => $self->{must_level});
393     }
394 wakaba 1.29 } elsif ($HTMLProseContent->{$node_ns}->{$node_ln}) {
395     $has_non_style = 1;
396     if ($HTMLEmbeddedContent->{$node_ns}->{$node_ln}) {
397     $todo->{flag}->{has_descendant}->{significant} = 1;
398     }
399     } elsif ($self->{pluses}->{$node_ns}->{$node_ln}) {
400     #
401     } else {
402     $has_non_style = 1;
403 wakaba 1.30 $self->{onerror}->(node => $node,
404     type => 'element not allowed:prose',
405     level => $self->{must_level})
406 wakaba 1.29 }
407 wakaba 1.30
408 wakaba 1.29 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
409     unshift @nodes, @$sib;
410     push @$new_todos, @$ch;
411     } elsif ($nt == 3 or $nt == 4) {
412     if ($node->data =~ /[^\x09-\x0D\x20]/) {
413     $has_non_style = 1;
414     $todo->{flag}->{has_descendant}->{significant} = 1;
415     }
416     } elsif ($nt == 5) {
417     unshift @nodes, @{$node->child_nodes};
418     }
419     }
420    
421     push @$new_todos, {
422     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
423     old_values => $old_values,
424     errors => $HTMLSignificantContentErrors,
425     };
426    
427     return ($new_todos);
428     }; # $HTMLProseContentChecker
429    
430 wakaba 1.1 ## Zero or more block-level elements
431     my $HTMLBlockChecker = sub {
432     my ($self, $todo) = @_;
433     my $el = $todo->{node};
434     my $new_todos = [];
435     my @nodes = (@{$el->child_nodes});
436    
437 wakaba 1.25 my $old_values = {significant =>
438     $todo->{flag}->{has_descendant}->{significant}};
439     $todo->{flag}->{has_descendant}->{significant} = 0;
440    
441 wakaba 1.1 while (@nodes) {
442     my $node = shift @nodes;
443     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
444    
445     my $nt = $node->node_type;
446     if ($nt == 1) {
447     my $node_ns = $node->namespace_uri;
448     $node_ns = '' unless defined $node_ns;
449     my $node_ln = $node->manakai_local_name;
450     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
451     $not_allowed = 1
452 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
453     $self->{pluses}->{$node_ns}->{$node_ln};
454 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
455     if $not_allowed;
456     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
457     unshift @nodes, @$sib;
458     push @$new_todos, @$ch;
459     } elsif ($nt == 3 or $nt == 4) {
460     if ($node->data =~ /[^\x09-\x0D\x20]/) {
461     $self->{onerror}->(node => $node, type => 'character not allowed');
462 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
463 wakaba 1.1 }
464     } elsif ($nt == 5) {
465     unshift @nodes, @{$node->child_nodes};
466     }
467     }
468 wakaba 1.25
469     push @$new_todos, {
470     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
471     old_values => $old_values,
472 wakaba 1.26 errors => $HTMLSignificantContentErrors,
473 wakaba 1.25 };
474    
475 wakaba 1.1 return ($new_todos);
476     }; # $HTMLBlockChecker
477    
478     ## Inline-level content
479     my $HTMLInlineChecker = sub {
480     my ($self, $todo) = @_;
481     my $el = $todo->{node};
482     my $new_todos = [];
483     my @nodes = (@{$el->child_nodes});
484    
485 wakaba 1.25 my $old_values = {significant =>
486     $todo->{flag}->{has_descendant}->{significant}};
487     $todo->{flag}->{has_descendant}->{significant} = 0;
488    
489 wakaba 1.1 while (@nodes) {
490     my $node = shift @nodes;
491     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
492    
493     my $nt = $node->node_type;
494     if ($nt == 1) {
495     my $node_ns = $node->namespace_uri;
496     $node_ns = '' unless defined $node_ns;
497     my $node_ln = $node->manakai_local_name;
498     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
499     $not_allowed = 1
500     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
501 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
502     $self->{pluses}->{$node_ns}->{$node_ln};
503 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
504     if $not_allowed;
505     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
506     unshift @nodes, @$sib;
507     push @$new_todos, @$ch;
508 wakaba 1.25 } elsif ($nt == 3 or $nt == 4) {
509     if ($node->data =~ /[^\x09-\x0D\x20]/) {
510     $todo->{flag}->{has_descendant}->{significant} = 1;
511     }
512 wakaba 1.1 } elsif ($nt == 5) {
513     unshift @nodes, @{$node->child_nodes};
514     }
515     }
516    
517     for (@$new_todos) {
518     $_->{inline} = 1;
519     }
520 wakaba 1.25
521     push @$new_todos, {
522     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
523     old_values => $old_values,
524 wakaba 1.26 errors => $HTMLSignificantContentErrors,
525 wakaba 1.25 };
526    
527 wakaba 1.1 return ($new_todos);
528     }; # $HTMLInlineChecker
529    
530     ## Strictly inline-level content
531     my $HTMLStrictlyInlineChecker = sub {
532     my ($self, $todo) = @_;
533     my $el = $todo->{node};
534     my $new_todos = [];
535     my @nodes = (@{$el->child_nodes});
536 wakaba 1.25
537     my $old_values = {significant =>
538     $todo->{flag}->{has_descendant}->{significant}};
539     $todo->{flag}->{has_descendant}->{significant} = 0;
540 wakaba 1.1
541     while (@nodes) {
542     my $node = shift @nodes;
543     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
544    
545     my $nt = $node->node_type;
546     if ($nt == 1) {
547     my $node_ns = $node->namespace_uri;
548     $node_ns = '' unless defined $node_ns;
549     my $node_ln = $node->manakai_local_name;
550     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
551     $not_allowed = 1
552 wakaba 1.8 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
553     $self->{pluses}->{$node_ns}->{$node_ln};
554 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
555     if $not_allowed;
556     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
557     unshift @nodes, @$sib;
558     push @$new_todos, @$ch;
559 wakaba 1.25 } elsif ($nt == 3 or $nt == 4) {
560     if ($node->data =~ /[^\x09-\x0D\x20]/) {
561     $todo->{flag}->{has_descendant}->{significant} = 1;
562     }
563 wakaba 1.1 } elsif ($nt == 5) {
564     unshift @nodes, @{$node->child_nodes};
565     }
566     }
567    
568     for (@$new_todos) {
569     $_->{inline} = 1;
570     $_->{strictly_inline} = 1;
571     }
572 wakaba 1.25
573     push @$new_todos, {
574     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
575     old_values => $old_values,
576 wakaba 1.26 errors => $HTMLSignificantContentErrors,
577 wakaba 1.25 };
578    
579 wakaba 1.1 return ($new_todos);
580     }; # $HTMLStrictlyInlineChecker
581    
582 wakaba 1.8 ## Inline-level or strictly inline-level content
583 wakaba 1.1 my $HTMLInlineOrStrictlyInlineChecker = sub {
584     my ($self, $todo) = @_;
585     my $el = $todo->{node};
586     my $new_todos = [];
587     my @nodes = (@{$el->child_nodes});
588 wakaba 1.25
589     my $old_values = {significant =>
590     $todo->{flag}->{has_descendant}->{significant}};
591     $todo->{flag}->{has_descendant}->{significant} = 0;
592 wakaba 1.1
593     while (@nodes) {
594     my $node = shift @nodes;
595     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
596    
597     my $nt = $node->node_type;
598     if ($nt == 1) {
599     my $node_ns = $node->namespace_uri;
600     $node_ns = '' unless defined $node_ns;
601     my $node_ln = $node->manakai_local_name;
602     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
603     if ($todo->{strictly_inline}) {
604     $not_allowed = 1
605 wakaba 1.8 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
606     $self->{pluses}->{$node_ns}->{$node_ln};
607 wakaba 1.1 } else {
608     $not_allowed = 1
609     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
610 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
611     $self->{pluses}->{$node_ns}->{$node_ln};
612 wakaba 1.1 }
613     $self->{onerror}->(node => $node, type => 'element not allowed')
614     if $not_allowed;
615     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
616     unshift @nodes, @$sib;
617     push @$new_todos, @$ch;
618 wakaba 1.25 } elsif ($nt == 3 or $nt == 4) {
619     if ($node->data =~ /[^\x09-\x0D\x20]/) {
620     $todo->{flag}->{has_descendant}->{significant} = 1;
621     }
622 wakaba 1.1 } elsif ($nt == 5) {
623     unshift @nodes, @{$node->child_nodes};
624     }
625     }
626    
627     for (@$new_todos) {
628     $_->{inline} = 1;
629     $_->{strictly_inline} = 1;
630     }
631 wakaba 1.25
632     push @$new_todos, {
633     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
634     old_values => $old_values,
635 wakaba 1.26 errors => $HTMLSignificantContentErrors,
636 wakaba 1.25 };
637    
638 wakaba 1.1 return ($new_todos);
639     }; # $HTMLInlineOrStrictlyInlineChecker
640    
641 wakaba 1.29 my $HTMLPhrasingContentChecker = sub {
642     my ($self, $todo) = @_;
643     my $el = $todo->{node};
644     my $new_todos = [];
645     my @nodes = (@{$el->child_nodes});
646    
647     my $old_values = {significant =>
648     $todo->{flag}->{has_descendant}->{significant}};
649     $todo->{flag}->{has_descendant}->{significant} = 0;
650    
651     while (@nodes) {
652     my $node = shift @nodes;
653     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
654    
655     my $nt = $node->node_type;
656     if ($nt == 1) {
657     my $node_ns = $node->namespace_uri;
658     $node_ns = '' unless defined $node_ns;
659     my $node_ln = $node->manakai_local_name;
660 wakaba 1.30 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
661     $self->{onerror}->(node => $node,
662     type => 'element not allowed:minus',
663     level => $self->{must_level});
664     } elsif ($HTMLPhrasingContent->{$node_ns}->{$node_ln} or
665     $self->{pluses}->{$node_ns}->{$node_ln}) {
666     #
667     } else {
668     $self->{onerror}->(node => $node,
669     type => 'element not allowed:phrasing',
670     level => $self->{must_level});
671     }
672    
673 wakaba 1.29 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
674     unshift @nodes, @$sib;
675     push @$new_todos, @$ch;
676     } elsif ($nt == 3 or $nt == 4) {
677     if ($node->data =~ /[^\x09-\x0D\x20]/) {
678     $todo->{flag}->{has_descendant}->{significant} = 1;
679     }
680     } elsif ($nt == 5) {
681     unshift @nodes, @{$node->child_nodes};
682     }
683     }
684    
685     push @$new_todos, {
686     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
687     old_values => $old_values,
688     errors => $HTMLSignificantContentErrors,
689     };
690    
691     return ($new_todos);
692     }; # $HTMLPhrasingContentChecker
693    
694 wakaba 1.8 ## Block-level content or inline-level content (i.e. bimorphic content model)
695 wakaba 1.1 my $HTMLBlockOrInlineChecker = sub {
696     my ($self, $todo) = @_;
697     my $el = $todo->{node};
698     my $new_todos = [];
699     my @nodes = (@{$el->child_nodes});
700 wakaba 1.25
701     my $old_values = {significant =>
702     $todo->{flag}->{has_descendant}->{significant}};
703     $todo->{flag}->{has_descendant}->{significant} = 0;
704 wakaba 1.1
705     my $content = 'block-or-inline'; # or 'block' or 'inline'
706     my @block_not_inline;
707     while (@nodes) {
708     my $node = shift @nodes;
709     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
710    
711 wakaba 1.8 ## ISSUE: It is unclear whether "<rule><div><p/><nest/></div></rule>"
712     ## is conforming or not.
713    
714 wakaba 1.1 my $nt = $node->node_type;
715     if ($nt == 1) {
716     my $node_ns = $node->namespace_uri;
717     $node_ns = '' unless defined $node_ns;
718     my $node_ln = $node->manakai_local_name;
719     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
720     if ($content eq 'block') {
721     $not_allowed = 1
722 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
723     $self->{pluses}->{$node_ns}->{$node_ln};
724 wakaba 1.1 } elsif ($content eq 'inline') {
725     $not_allowed = 1
726     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
727 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
728     $self->{pluses}->{$node_ns}->{$node_ln};
729 wakaba 1.1 } else {
730     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
731     my $is_inline
732     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
733     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
734    
735     push @block_not_inline, $node
736     if $is_block and not $is_inline and not $not_allowed;
737 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
738 wakaba 1.1 $content = 'inline';
739     for (@block_not_inline) {
740     $self->{onerror}->(node => $_, type => 'element not allowed');
741     }
742     $not_allowed = 1 unless $is_inline;
743     }
744     }
745     $self->{onerror}->(node => $node, type => 'element not allowed')
746     if $not_allowed;
747     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
748     unshift @nodes, @$sib;
749     push @$new_todos, @$ch;
750     } elsif ($nt == 3 or $nt == 4) {
751     if ($node->data =~ /[^\x09-\x0D\x20]/) {
752     if ($content eq 'block') {
753     $self->{onerror}->(node => $node, type => 'character not allowed');
754     } else {
755     $content = 'inline';
756     for (@block_not_inline) {
757     $self->{onerror}->(node => $_, type => 'element not allowed');
758     }
759     }
760 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
761 wakaba 1.1 }
762     } elsif ($nt == 5) {
763     unshift @nodes, @{$node->child_nodes};
764     }
765     }
766    
767     if ($content eq 'inline') {
768     for (@$new_todos) {
769     $_->{inline} = 1;
770     }
771     }
772 wakaba 1.25
773     push @$new_todos, {
774     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
775     old_values => $old_values,
776 wakaba 1.26 errors => $HTMLSignificantContentErrors,
777 wakaba 1.25 };
778    
779 wakaba 1.1 return ($new_todos);
780     };
781    
782     ## Zero or more XXX element, then either block-level or inline-level
783     my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) {
784     my ($elnsuri, $ellname) = @_;
785     return sub {
786     my ($self, $todo) = @_;
787     my $el = $todo->{node};
788     my $new_todos = [];
789     my @nodes = (@{$el->child_nodes});
790 wakaba 1.25
791     my $old_values = {significant =>
792     $todo->{flag}->{has_descendant}->{significant}};
793     $todo->{flag}->{has_descendant}->{significant} = 0;
794 wakaba 1.1
795     my $has_non_style;
796     my $content = 'block-or-inline'; # or 'block' or 'inline'
797     my @block_not_inline;
798     while (@nodes) {
799     my $node = shift @nodes;
800     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
801    
802     my $nt = $node->node_type;
803     if ($nt == 1) {
804     my $node_ns = $node->namespace_uri;
805     $node_ns = '' unless defined $node_ns;
806     my $node_ln = $node->manakai_local_name;
807     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
808     if ($node_ns eq $elnsuri and $node_ln eq $ellname) {
809     $not_allowed = 1 if $has_non_style;
810     if ($ellname eq 'style' and
811     not $node->has_attribute_ns (undef, 'scoped')) {
812     $not_allowed = 1;
813     }
814     } elsif ($content eq 'block') {
815     $has_non_style = 1;
816     $not_allowed = 1
817 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
818     $self->{pluses}->{$node_ns}->{$node_ln};
819 wakaba 1.1 } elsif ($content eq 'inline') {
820     $has_non_style = 1;
821     $not_allowed = 1
822     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
823 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
824     $self->{pluses}->{$node_ns}->{$node_ln};
825 wakaba 1.1 } else {
826 wakaba 1.8 $has_non_style = 1 unless $self->{pluses}->{$node_ns}->{$node_ln};
827 wakaba 1.1 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
828     my $is_inline
829     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
830     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
831    
832     push @block_not_inline, $node
833     if $is_block and not $is_inline and not $not_allowed;
834 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
835 wakaba 1.1 $content = 'inline';
836     for (@block_not_inline) {
837     $self->{onerror}->(node => $_, type => 'element not allowed');
838     }
839     $not_allowed = 1 unless $is_inline;
840     }
841     }
842     $self->{onerror}->(node => $node, type => 'element not allowed')
843     if $not_allowed;
844     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
845     unshift @nodes, @$sib;
846     push @$new_todos, @$ch;
847     } elsif ($nt == 3 or $nt == 4) {
848     if ($node->data =~ /[^\x09-\x0D\x20]/) {
849     $has_non_style = 1;
850     if ($content eq 'block') {
851     $self->{onerror}->(node => $node, type => 'character not allowed');
852     } else {
853     $content = 'inline';
854     for (@block_not_inline) {
855     $self->{onerror}->(node => $_, type => 'element not allowed');
856     }
857     }
858 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
859 wakaba 1.1 }
860     } elsif ($nt == 5) {
861     unshift @nodes, @{$node->child_nodes};
862     }
863     }
864    
865     if ($content eq 'inline') {
866     for (@$new_todos) {
867     $_->{inline} = 1;
868     }
869     }
870 wakaba 1.25
871     push @$new_todos, {
872     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
873     old_values => $old_values,
874 wakaba 1.26 errors => $HTMLSignificantContentErrors,
875 wakaba 1.25 };
876    
877 wakaba 1.1 return ($new_todos);
878     };
879     }; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker
880    
881 wakaba 1.29 my $HTMLTransparentChecker = $HTMLProseContentChecker;
882 wakaba 1.25 ## ISSUE: Significant content rule should be applied to transparent element
883     ## with parent? Currently, applied to |video| but not to others.
884 wakaba 1.1
885     our $AttrChecker;
886    
887     my $GetHTMLEnumeratedAttrChecker = sub {
888     my $states = shift; # {value => conforming ? 1 : -1}
889     return sub {
890     my ($self, $attr) = @_;
891     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
892     if ($states->{$value} > 0) {
893     #
894     } elsif ($states->{$value}) {
895     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
896     } else {
897     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
898     }
899     };
900     }; # $GetHTMLEnumeratedAttrChecker
901    
902     my $GetHTMLBooleanAttrChecker = sub {
903     my $local_name = shift;
904     return sub {
905     my ($self, $attr) = @_;
906     my $value = $attr->value;
907     unless ($value eq $local_name or $value eq '') {
908     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
909     }
910     };
911     }; # $GetHTMLBooleanAttrChecker
912    
913 wakaba 1.8 ## Unordered set of space-separated tokens
914 wakaba 1.18 my $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
915 wakaba 1.8 my ($self, $attr) = @_;
916     my %word;
917     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
918     unless ($word{$word}) {
919     $word{$word} = 1;
920     } else {
921     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
922     }
923     }
924 wakaba 1.18 }; # $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
925 wakaba 1.8
926 wakaba 1.1 ## |rel| attribute (unordered set of space separated tokens,
927     ## whose allowed values are defined by the section on link types)
928     my $HTMLLinkTypesAttrChecker = sub {
929 wakaba 1.4 my ($a_or_area, $todo, $self, $attr) = @_;
930 wakaba 1.1 my %word;
931     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
932     unless ($word{$word}) {
933     $word{$word} = 1;
934 wakaba 1.18 } elsif ($word eq 'up') {
935     #
936 wakaba 1.1 } else {
937     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
938     }
939     }
940     ## NOTE: Case sensitive match (since HTML5 spec does not say link
941     ## types are case-insensitive and it says "The value should not
942     ## be confusingly similar to any other defined value (e.g.
943     ## differing only in case).").
944     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
945     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
946     ## values to be used conformingly.
947     require Whatpm::_LinkTypeList;
948     our $LinkType;
949     for my $word (keys %word) {
950     my $def = $LinkType->{$word};
951     if (defined $def) {
952     if ($def->{status} eq 'accepted') {
953     if (defined $def->{effect}->[$a_or_area]) {
954     #
955     } else {
956     $self->{onerror}->(node => $attr,
957     type => 'link type:bad context:'.$word);
958     }
959     } elsif ($def->{status} eq 'proposal') {
960     $self->{onerror}->(node => $attr, level => 's',
961     type => 'link type:proposed:'.$word);
962 wakaba 1.20 if (defined $def->{effect}->[$a_or_area]) {
963     #
964     } else {
965     $self->{onerror}->(node => $attr,
966     type => 'link type:bad context:'.$word);
967     }
968 wakaba 1.1 } else { # rejected or synonym
969     $self->{onerror}->(node => $attr,
970     type => 'link type:non-conforming:'.$word);
971     }
972 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
973     if ($word eq 'alternate') {
974     #
975     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
976     $todo->{has_hyperlink_link_type} = 1;
977     }
978     }
979 wakaba 1.1 if ($def->{unique}) {
980     unless ($self->{has_link_type}->{$word}) {
981     $self->{has_link_type}->{$word} = 1;
982     } else {
983     $self->{onerror}->(node => $attr,
984     type => 'link type:duplicate:'.$word);
985     }
986     }
987     } else {
988     $self->{onerror}->(node => $attr, level => 'unsupported',
989     type => 'link type:'.$word);
990     }
991     }
992 wakaba 1.4 $todo->{has_hyperlink_link_type} = 1
993     if $word{alternate} and not $word{stylesheet};
994 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
995     ## says that using both X-Pingback: header field and HTML
996     ## <link rel=pingback> is deprecated and if both appears they
997     ## SHOULD contain exactly the same value.
998     ## ISSUE: Pingback 1.0 specification defines the exact representation
999     ## of its link element, which cannot be tested by the current arch.
1000     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
1001     ## include any string that matches to the pattern for the rel=pingback link,
1002     ## which again inpossible to test.
1003     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
1004 wakaba 1.12
1005     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
1006 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
1007     ## then they SHOULD be described in different paragraphs.".
1008 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
1009 wakaba 1.20
1010     ## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status."
1011 wakaba 1.1
1012     ## URI (or IRI)
1013     my $HTMLURIAttrChecker = sub {
1014     my ($self, $attr) = @_;
1015     ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
1016     my $value = $attr->value;
1017     Whatpm::URIChecker->check_iri_reference ($value, sub {
1018     my %opt = @_;
1019     $self->{onerror}->(node => $attr, level => $opt{level},
1020     type => 'URI::'.$opt{type}.
1021     (defined $opt{position} ? ':'.$opt{position} : ''));
1022     });
1023 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
1024 wakaba 1.1 }; # $HTMLURIAttrChecker
1025    
1026     ## A space separated list of one or more URIs (or IRIs)
1027     my $HTMLSpaceURIsAttrChecker = sub {
1028     my ($self, $attr) = @_;
1029     my $i = 0;
1030     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
1031     Whatpm::URIChecker->check_iri_reference ($value, sub {
1032     my %opt = @_;
1033     $self->{onerror}->(node => $attr, level => $opt{level},
1034 wakaba 1.2 type => 'URIs:'.':'.
1035     $opt{type}.':'.$i.
1036 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
1037     });
1038     $i++;
1039     }
1040     ## ISSUE: Relative references?
1041     ## ISSUE: Leading or trailing white spaces are conformant?
1042     ## ISSUE: A sequence of white space characters are conformant?
1043     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
1044     ## NOTE: Duplication seems not an error.
1045 wakaba 1.4 $self->{has_uri_attr} = 1;
1046 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
1047    
1048     my $HTMLDatetimeAttrChecker = sub {
1049     my ($self, $attr) = @_;
1050     my $value = $attr->value;
1051     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
1052     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
1053     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
1054     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
1055     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
1056     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
1057     if $d < 1 or
1058     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
1059     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
1060     if $M == 2 and $d == 29 and
1061     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
1062     } else {
1063     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
1064     }
1065     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
1066     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
1067     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
1068     if defined $s and $s > 59;
1069     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
1070     if $zh > 23;
1071     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
1072     if $zm > 59;
1073     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
1074     } else {
1075     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
1076     }
1077     }; # $HTMLDatetimeAttrChecker
1078    
1079     my $HTMLIntegerAttrChecker = sub {
1080     my ($self, $attr) = @_;
1081     my $value = $attr->value;
1082     unless ($value =~ /\A-?[0-9]+\z/) {
1083     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
1084     }
1085     }; # $HTMLIntegerAttrChecker
1086    
1087     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
1088     my $range_check = shift;
1089     return sub {
1090     my ($self, $attr) = @_;
1091     my $value = $attr->value;
1092     if ($value =~ /\A[0-9]+\z/) {
1093     unless ($range_check->($value + 0)) {
1094     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
1095     }
1096     } else {
1097     $self->{onerror}->(node => $attr,
1098     type => 'nninteger:syntax error');
1099     }
1100     };
1101     }; # $GetHTMLNonNegativeIntegerAttrChecker
1102    
1103     my $GetHTMLFloatingPointNumberAttrChecker = sub {
1104     my $range_check = shift;
1105     return sub {
1106     my ($self, $attr) = @_;
1107     my $value = $attr->value;
1108     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
1109     unless ($range_check->($value + 0)) {
1110     $self->{onerror}->(node => $attr, type => 'float:out of range');
1111     }
1112     } else {
1113     $self->{onerror}->(node => $attr,
1114     type => 'float:syntax error');
1115     }
1116     };
1117     }; # $GetHTMLFloatingPointNumberAttrChecker
1118    
1119     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
1120     ## ISSUE: RFC 2046 does not define syntax of media types.
1121     ## ISSUE: The definition of "a valid MIME type" is unknown.
1122     ## Syntactical correctness?
1123     my $HTMLIMTAttrChecker = sub {
1124     my ($self, $attr) = @_;
1125     my $value = $attr->value;
1126     ## ISSUE: RFC 2045 Content-Type header field allows insertion
1127     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
1128     ## ISSUE: RFC 2231 extension? Maybe no.
1129     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
1130     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
1131     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
1132     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
1133     my @type = ($1, $2);
1134     my $param = $3;
1135     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
1136     if (defined $2) {
1137     push @type, $1 => $2;
1138     } else {
1139     my $n = $1;
1140     my $v = $2;
1141     $v =~ s/\\(.)/$1/gs;
1142     push @type, $n => $v;
1143     }
1144     }
1145     require Whatpm::IMTChecker;
1146     Whatpm::IMTChecker->check_imt (sub {
1147     my %opt = @_;
1148     $self->{onerror}->(node => $attr, level => $opt{level},
1149     type => 'IMT:'.$opt{type});
1150     }, @type);
1151     } else {
1152     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
1153     }
1154     }; # $HTMLIMTAttrChecker
1155    
1156     my $HTMLLanguageTagAttrChecker = sub {
1157 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
1158    
1159 wakaba 1.1 my ($self, $attr) = @_;
1160 wakaba 1.6 my $value = $attr->value;
1161     require Whatpm::LangTag;
1162     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
1163     my %opt = @_;
1164     my $type = 'LangTag:'.$opt{type};
1165     $type .= ':' . $opt{subtag} if defined $opt{subtag};
1166     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
1167     level => $opt{level});
1168     });
1169 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
1170 wakaba 1.6
1171     ## TODO: testdata
1172 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
1173    
1174     ## "A valid media query [MQ]"
1175     my $HTMLMQAttrChecker = sub {
1176     my ($self, $attr) = @_;
1177     $self->{onerror}->(node => $attr, level => 'unsupported',
1178     type => 'media query');
1179     ## ISSUE: What is "a valid media query"?
1180     }; # $HTMLMQAttrChecker
1181    
1182     my $HTMLEventHandlerAttrChecker = sub {
1183     my ($self, $attr) = @_;
1184     $self->{onerror}->(node => $attr, level => 'unsupported',
1185     type => 'event handler');
1186     ## TODO: MUST contain valid ECMAScript code matching the
1187     ## ECMAScript |FunctionBody| production. [ECMA262]
1188     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
1189     ## ISSUE: Automatic semicolon insertion does not apply?
1190     ## ISSUE: Other script languages?
1191     }; # $HTMLEventHandlerAttrChecker
1192    
1193     my $HTMLUsemapAttrChecker = sub {
1194     my ($self, $attr) = @_;
1195     ## MUST be a valid hashed ID reference to a |map| element
1196     my $value = $attr->value;
1197     if ($value =~ s/^#//) {
1198     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
1199     push @{$self->{usemap}}, [$value => $attr];
1200     } else {
1201     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
1202     }
1203     ## NOTE: Space characters in hashed ID references are conforming.
1204     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
1205     }; # $HTMLUsemapAttrChecker
1206    
1207     my $HTMLTargetAttrChecker = sub {
1208     my ($self, $attr) = @_;
1209     my $value = $attr->value;
1210     if ($value =~ /^_/) {
1211     $value = lc $value; ## ISSUE: ASCII case-insentitive?
1212     unless ({
1213     _self => 1, _parent => 1, _top => 1,
1214     }->{$value}) {
1215     $self->{onerror}->(node => $attr,
1216     type => 'reserved browsing context name');
1217     }
1218     } else {
1219 wakaba 1.29 ## NOTE: An empty string is a valid browsing context name (same as _self).
1220 wakaba 1.1 }
1221     }; # $HTMLTargetAttrChecker
1222    
1223 wakaba 1.23 my $HTMLSelectorsAttrChecker = sub {
1224     my ($self, $attr) = @_;
1225    
1226     ## ISSUE: Namespace resolution?
1227    
1228     my $value = $attr->value;
1229    
1230     require Whatpm::CSS::SelectorsParser;
1231     my $p = Whatpm::CSS::SelectorsParser->new;
1232     $p->{pseudo_class}->{$_} = 1 for qw/
1233     active checked disabled empty enabled first-child first-of-type
1234     focus hover indeterminate last-child last-of-type link only-child
1235     only-of-type root target visited
1236     lang nth-child nth-last-child nth-of-type nth-last-of-type not
1237     -manakai-contains -manakai-current
1238     /;
1239    
1240     $p->{pseudo_element}->{$_} = 1 for qw/
1241     after before first-letter first-line
1242     /;
1243    
1244     $p->{must_level} = $self->{must_level};
1245     $p->{onerror} = sub {
1246     my %opt = @_;
1247     $opt{type} = 'selectors:'.$opt{type};
1248     $self->{onerror}->(%opt, node => $attr);
1249     };
1250     $p->parse_string ($value);
1251     }; # $HTMLSelectorsAttrChecker
1252    
1253 wakaba 1.1 my $HTMLAttrChecker = {
1254     id => sub {
1255     ## NOTE: |map| has its own variant of |id=""| checker
1256     my ($self, $attr) = @_;
1257     my $value = $attr->value;
1258     if (length $value > 0) {
1259     if ($self->{id}->{$value}) {
1260     $self->{onerror}->(node => $attr, type => 'duplicate ID');
1261     push @{$self->{id}->{$value}}, $attr;
1262     } else {
1263     $self->{id}->{$value} = [$attr];
1264     }
1265     if ($value =~ /[\x09-\x0D\x20]/) {
1266     $self->{onerror}->(node => $attr, type => 'space in ID');
1267     }
1268     } else {
1269     ## NOTE: MUST contain at least one character
1270     $self->{onerror}->(node => $attr, type => 'empty attribute value');
1271     }
1272     },
1273     title => sub {}, ## NOTE: No conformance creteria
1274     lang => sub {
1275     my ($self, $attr) = @_;
1276 wakaba 1.6 my $value = $attr->value;
1277     if ($value eq '') {
1278     #
1279     } else {
1280     require Whatpm::LangTag;
1281     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
1282     my %opt = @_;
1283     my $type = 'LangTag:'.$opt{type};
1284     $type .= ':' . $opt{subtag} if defined $opt{subtag};
1285     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
1286     level => $opt{level});
1287     });
1288     }
1289 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
1290     unless ($attr->owner_document->manakai_is_html) {
1291     $self->{onerror}->(node => $attr, type => 'in XML:lang');
1292     }
1293 wakaba 1.6
1294     ## TODO: test data
1295 wakaba 1.1 },
1296     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
1297     class => sub {
1298     my ($self, $attr) = @_;
1299     my %word;
1300     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
1301     unless ($word{$word}) {
1302     $word{$word} = 1;
1303     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
1304     } else {
1305     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
1306     }
1307     }
1308     },
1309     contextmenu => sub {
1310     my ($self, $attr) = @_;
1311     my $value = $attr->value;
1312     push @{$self->{contextmenu}}, [$value => $attr];
1313     ## ISSUE: "The value must be the ID of a menu element in the DOM."
1314     ## What is "in the DOM"? A menu Element node that is not part
1315     ## of the Document tree is in the DOM? A menu Element node that
1316     ## belong to another Document tree is in the DOM?
1317     },
1318     irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
1319 wakaba 1.8 tabindex => $HTMLIntegerAttrChecker
1320     ## TODO: ref, template, registrationmark
1321 wakaba 1.1 };
1322    
1323     for (qw/
1324     onabort onbeforeunload onblur onchange onclick oncontextmenu
1325     ondblclick ondrag ondragend ondragenter ondragleave ondragover
1326     ondragstart ondrop onerror onfocus onkeydown onkeypress
1327     onkeyup onload onmessage onmousedown onmousemove onmouseout
1328     onmouseover onmouseup onmousewheel onresize onscroll onselect
1329     onsubmit onunload
1330     /) {
1331     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
1332     }
1333    
1334     my $GetHTMLAttrsChecker = sub {
1335     my $element_specific_checker = shift;
1336     return sub {
1337     my ($self, $todo) = @_;
1338     for my $attr (@{$todo->{node}->attributes}) {
1339     my $attr_ns = $attr->namespace_uri;
1340     $attr_ns = '' unless defined $attr_ns;
1341     my $attr_ln = $attr->manakai_local_name;
1342     my $checker;
1343     if ($attr_ns eq '') {
1344     $checker = $element_specific_checker->{$attr_ln}
1345     || $HTMLAttrChecker->{$attr_ln};
1346     }
1347     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1348     || $AttrChecker->{$attr_ns}->{''};
1349     if ($checker) {
1350     $checker->($self, $attr, $todo);
1351     } else {
1352     $self->{onerror}->(node => $attr, level => 'unsupported',
1353     type => 'attribute');
1354     ## ISSUE: No comformance createria for unknown attributes in the spec
1355     }
1356     }
1357     };
1358     }; # $GetHTMLAttrsChecker
1359    
1360     our $Element;
1361     our $ElementDefault;
1362    
1363     $Element->{$HTML_NS}->{''} = {
1364     attrs_checker => $GetHTMLAttrsChecker->({}),
1365     checker => $ElementDefault->{checker},
1366     };
1367    
1368     $Element->{$HTML_NS}->{html} = {
1369     is_root => 1,
1370     attrs_checker => $GetHTMLAttrsChecker->({
1371 wakaba 1.16 manifest => $HTMLURIAttrChecker,
1372 wakaba 1.1 xmlns => sub {
1373     my ($self, $attr) = @_;
1374     my $value = $attr->value;
1375     unless ($value eq $HTML_NS) {
1376     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
1377     }
1378     unless ($attr->owner_document->manakai_is_html) {
1379     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
1380     ## TODO: Test
1381     }
1382     },
1383     }),
1384     checker => sub {
1385     my ($self, $todo) = @_;
1386     my $el = $todo->{node};
1387     my $new_todos = [];
1388     my @nodes = (@{$el->child_nodes});
1389    
1390 wakaba 1.25 my $old_values = {significant =>
1391     $todo->{flag}->{has_descendant}->{significant}};
1392     $todo->{flag}->{has_descendant}->{significant} = 0;
1393    
1394 wakaba 1.1 my $phase = 'before head';
1395     while (@nodes) {
1396     my $node = shift @nodes;
1397     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1398    
1399     my $nt = $node->node_type;
1400     if ($nt == 1) {
1401     my $node_ns = $node->namespace_uri;
1402     $node_ns = '' unless defined $node_ns;
1403     my $node_ln = $node->manakai_local_name;
1404     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
1405 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1406     #
1407     } elsif ($phase eq 'before head') {
1408 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'head') {
1409     $phase = 'after head';
1410     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'body') {
1411     $self->{onerror}->(node => $node, type => 'ps element missing:head');
1412     $phase = 'after body';
1413     } else {
1414     $not_allowed = 1;
1415     # before head
1416     }
1417     } elsif ($phase eq 'after head') {
1418     if ($node_ns eq $HTML_NS and $node_ln eq 'body') {
1419     $phase = 'after body';
1420     } else {
1421     $not_allowed = 1;
1422     # after head
1423     }
1424     } else { #elsif ($phase eq 'after body') {
1425     $not_allowed = 1;
1426     # after body
1427     }
1428     $self->{onerror}->(node => $node, type => 'element not allowed')
1429     if $not_allowed;
1430     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1431     unshift @nodes, @$sib;
1432     push @$new_todos, @$ch;
1433     } elsif ($nt == 3 or $nt == 4) {
1434     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1435     $self->{onerror}->(node => $node, type => 'character not allowed');
1436 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
1437 wakaba 1.1 }
1438     } elsif ($nt == 5) {
1439     unshift @nodes, @{$node->child_nodes};
1440     }
1441     }
1442    
1443     if ($phase eq 'before head') {
1444     $self->{onerror}->(node => $el, type => 'child element missing:head');
1445     $self->{onerror}->(node => $el, type => 'child element missing:body');
1446     } elsif ($phase eq 'after head') {
1447     $self->{onerror}->(node => $el, type => 'child element missing:body');
1448     }
1449    
1450 wakaba 1.25 ## NOTE: Significant content check - this is performed here since
1451     ## |html| content model allows a block-level element - |body|.
1452     push @$new_todos, {
1453     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
1454     old_values => $old_values,
1455 wakaba 1.26 errors => $HTMLSignificantContentErrors,
1456 wakaba 1.25 };
1457    
1458 wakaba 1.1 return ($new_todos);
1459     },
1460     };
1461    
1462     $Element->{$HTML_NS}->{head} = {
1463     attrs_checker => $GetHTMLAttrsChecker->({}),
1464     checker => sub {
1465     my ($self, $todo) = @_;
1466     my $el = $todo->{node};
1467     my $new_todos = [];
1468     my @nodes = (@{$el->child_nodes});
1469    
1470     my $has_title;
1471     while (@nodes) {
1472     my $node = shift @nodes;
1473     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1474    
1475     my $nt = $node->node_type;
1476     if ($nt == 1) {
1477     my $node_ns = $node->namespace_uri;
1478     $node_ns = '' unless defined $node_ns;
1479     my $node_ln = $node->manakai_local_name;
1480 wakaba 1.30 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
1481     $self->{onerror}->(node => $node,
1482     type => 'element not allowed:minus',
1483     level => $self->{must_level});
1484     } elsif ($self->{pluses}->{$node_ns}->{$node_ln}) {
1485 wakaba 1.8 #
1486     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'title') {
1487 wakaba 1.1 unless ($has_title) {
1488     $has_title = 1;
1489     } else {
1490 wakaba 1.30 $self->{onerror}->(node => $node,
1491     type => 'element not allowed:head title',
1492     level => $self->{must_level});
1493     }
1494     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'style') {
1495     if ($todo->{node}->has_attribute_ns (undef, 'scoped')) {
1496     $self->{onerror}->(node => $node,
1497     type => 'element not allowed:head style',
1498     level => $self->{must_level});
1499 wakaba 1.1 }
1500 wakaba 1.29 } elsif ($HTMLMetadataContent->{$node_ns}->{$node_ln}) {
1501     #
1502    
1503     ## NOTE: |meta| is a metadata content. However, strictly speaking,
1504     ## a |meta| element with none of |charset|, |name|,
1505     ## or |http-equiv| attribute is not allowed. It is non-conforming
1506     ## anyway.
1507 wakaba 1.1 } else {
1508 wakaba 1.30 $self->{onerror}->(node => $node,
1509     type => 'element not allowed:metadata',
1510     level => $self->{must_level});
1511 wakaba 1.1 }
1512 wakaba 1.3 local $todo->{flag}->{in_head} = 1;
1513 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1514     unshift @nodes, @$sib;
1515     push @$new_todos, @$ch;
1516     } elsif ($nt == 3 or $nt == 4) {
1517     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1518     $self->{onerror}->(node => $node, type => 'character not allowed');
1519 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
1520 wakaba 1.1 }
1521     } elsif ($nt == 5) {
1522     unshift @nodes, @{$node->child_nodes};
1523     }
1524     }
1525     unless ($has_title) {
1526     $self->{onerror}->(node => $el, type => 'child element missing:title');
1527     }
1528     return ($new_todos);
1529     },
1530     };
1531    
1532     $Element->{$HTML_NS}->{title} = {
1533     attrs_checker => $GetHTMLAttrsChecker->({}),
1534     checker => $HTMLTextChecker,
1535     };
1536    
1537     $Element->{$HTML_NS}->{base} = {
1538 wakaba 1.4 attrs_checker => sub {
1539     my ($self, $todo) = @_;
1540    
1541 wakaba 1.29 if ($self->{has_base}) {
1542     $self->{onerror}->(node => $todo->{node},
1543     type => 'element not allowed:base');
1544     } else {
1545     $self->{has_base} = 1;
1546     }
1547    
1548 wakaba 1.14 my $has_href = $todo->{node}->has_attribute_ns (undef, 'href');
1549     my $has_target = $todo->{node}->has_attribute_ns (undef, 'target');
1550    
1551     if ($self->{has_uri_attr} and $has_href) {
1552 wakaba 1.4 ## ISSUE: Are these examples conforming?
1553     ## <head profile="a b c"><base href> (except for |profile|'s
1554     ## non-conformance)
1555     ## <title xml:base="relative"/><base href/> (maybe it should be)
1556     ## <unknown xmlns="relative"/><base href/> (assuming that
1557     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1558     ## <style>@import 'relative';</style><base href>
1559     ## <script>location.href = 'relative';</script><base href>
1560 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
1561     ## an exception.
1562 wakaba 1.4 $self->{onerror}->(node => $todo->{node},
1563     type => 'basehref after URI attribute');
1564     }
1565 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
1566 wakaba 1.4 ## ISSUE: Are these examples conforming?
1567     ## <head><title xlink:href=""/><base target="name"/></head>
1568     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1569     ## (assuming that |xbl:xbl| is allowed before |base|)
1570     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1571     ## <link href=""/><base target="name"/>
1572     ## <link rel=unknown href=""><base target=name>
1573     $self->{onerror}->(node => $todo->{node},
1574     type => 'basetarget after hyperlink');
1575     }
1576    
1577 wakaba 1.14 if (not $has_href and not $has_target) {
1578     $self->{onerror}->(node => $todo->{node},
1579     type => 'attribute missing:href|target');
1580     }
1581    
1582 wakaba 1.4 return $GetHTMLAttrsChecker->({
1583     href => $HTMLURIAttrChecker,
1584     target => $HTMLTargetAttrChecker,
1585     })->($self, $todo);
1586     },
1587 wakaba 1.1 checker => $HTMLEmptyChecker,
1588     };
1589    
1590     $Element->{$HTML_NS}->{link} = {
1591     attrs_checker => sub {
1592     my ($self, $todo) = @_;
1593     $GetHTMLAttrsChecker->({
1594     href => $HTMLURIAttrChecker,
1595 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(0, $todo, @_) },
1596 wakaba 1.1 media => $HTMLMQAttrChecker,
1597     hreflang => $HTMLLanguageTagAttrChecker,
1598     type => $HTMLIMTAttrChecker,
1599     ## NOTE: Though |title| has special semantics,
1600     ## syntactically same as the |title| as global attribute.
1601     })->($self, $todo);
1602 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
1603     $self->{has_hyperlink_element} = 1 if $todo->{has_hyperlink_link_type};
1604     } else {
1605 wakaba 1.1 $self->{onerror}->(node => $todo->{node},
1606     type => 'attribute missing:href');
1607     }
1608     unless ($todo->{node}->has_attribute_ns (undef, 'rel')) {
1609     $self->{onerror}->(node => $todo->{node},
1610     type => 'attribute missing:rel');
1611     }
1612     },
1613     checker => $HTMLEmptyChecker,
1614     };
1615    
1616     $Element->{$HTML_NS}->{meta} = {
1617     attrs_checker => sub {
1618     my ($self, $todo) = @_;
1619     my $name_attr;
1620     my $http_equiv_attr;
1621     my $charset_attr;
1622     my $content_attr;
1623     for my $attr (@{$todo->{node}->attributes}) {
1624     my $attr_ns = $attr->namespace_uri;
1625     $attr_ns = '' unless defined $attr_ns;
1626     my $attr_ln = $attr->manakai_local_name;
1627     my $checker;
1628     if ($attr_ns eq '') {
1629     if ($attr_ln eq 'content') {
1630     $content_attr = $attr;
1631     $checker = 1;
1632     } elsif ($attr_ln eq 'name') {
1633     $name_attr = $attr;
1634     $checker = 1;
1635     } elsif ($attr_ln eq 'http-equiv') {
1636     $http_equiv_attr = $attr;
1637     $checker = 1;
1638     } elsif ($attr_ln eq 'charset') {
1639     $charset_attr = $attr;
1640     $checker = 1;
1641     } else {
1642     $checker = $HTMLAttrChecker->{$attr_ln}
1643     || $AttrChecker->{$attr_ns}->{$attr_ln}
1644     || $AttrChecker->{$attr_ns}->{''};
1645     }
1646     } else {
1647     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1648     || $AttrChecker->{$attr_ns}->{''};
1649     }
1650     if ($checker) {
1651     $checker->($self, $attr) if ref $checker;
1652     } else {
1653     $self->{onerror}->(node => $attr, level => 'unsupported',
1654     type => 'attribute');
1655     ## ISSUE: No comformance createria for unknown attributes in the spec
1656     }
1657     }
1658    
1659     if (defined $name_attr) {
1660     if (defined $http_equiv_attr) {
1661     $self->{onerror}->(node => $http_equiv_attr,
1662     type => 'attribute not allowed');
1663     } elsif (defined $charset_attr) {
1664     $self->{onerror}->(node => $charset_attr,
1665     type => 'attribute not allowed');
1666     }
1667     my $metadata_name = $name_attr->value;
1668     my $metadata_value;
1669     if (defined $content_attr) {
1670     $metadata_value = $content_attr->value;
1671     } else {
1672     $self->{onerror}->(node => $todo->{node},
1673     type => 'attribute missing:content');
1674     $metadata_value = '';
1675     }
1676     } elsif (defined $http_equiv_attr) {
1677     if (defined $charset_attr) {
1678     $self->{onerror}->(node => $charset_attr,
1679     type => 'attribute not allowed');
1680     }
1681     unless (defined $content_attr) {
1682     $self->{onerror}->(node => $todo->{node},
1683     type => 'attribute missing:content');
1684     }
1685     } elsif (defined $charset_attr) {
1686     if (defined $content_attr) {
1687     $self->{onerror}->(node => $content_attr,
1688     type => 'attribute not allowed');
1689     }
1690     } else {
1691     if (defined $content_attr) {
1692     $self->{onerror}->(node => $content_attr,
1693     type => 'attribute not allowed');
1694     $self->{onerror}->(node => $todo->{node},
1695     type => 'attribute missing:name|http-equiv');
1696     } else {
1697     $self->{onerror}->(node => $todo->{node},
1698     type => 'attribute missing:name|http-equiv|charset');
1699     }
1700     }
1701    
1702 wakaba 1.32 my $check_charset_decl = sub () {
1703 wakaba 1.29 my $parent = $todo->{node}->manakai_parent_element;
1704     if ($parent and $parent eq $parent->owner_document->manakai_head) {
1705     for my $el (@{$parent->child_nodes}) {
1706     next unless $el->node_type == 1; # ELEMENT_NODE
1707     unless ($el eq $todo->{node}) {
1708     ## NOTE: Not the first child element.
1709     $self->{onerror}->(node => $todo->{node},
1710 wakaba 1.32 type => 'element not allowed:meta charset',
1711     level => $self->{must_level});
1712 wakaba 1.29 }
1713     last;
1714     ## NOTE: Entity references are not supported.
1715     }
1716     } else {
1717     $self->{onerror}->(node => $todo->{node},
1718 wakaba 1.32 type => 'element not allowed:meta charset',
1719     level => $self->{must_level});
1720 wakaba 1.29 }
1721    
1722 wakaba 1.1 unless ($todo->{node}->owner_document->manakai_is_html) {
1723 wakaba 1.32 $self->{onerror}->(node => $todo->{node},
1724     type => 'in XML:charset',
1725     level => $self->{must_level});
1726 wakaba 1.1 }
1727 wakaba 1.32 }; # $check_charset_decl
1728 wakaba 1.21
1729 wakaba 1.32 my $check_charset = sub ($$) {
1730     my ($attr, $charset_value) = @_;
1731 wakaba 1.21 ## NOTE: Though the case-sensitivility of |charset| attribute value
1732     ## is not explicitly spelled in the HTML5 spec, the Character Set
1733     ## registry of IANA, which is referenced from HTML5 spec, says that
1734     ## charset name is case-insensitive.
1735     $charset_value =~ tr/A-Z/a-z/; ## NOTE: ASCII Case-insensitive.
1736    
1737     require Message::Charset::Info;
1738     my $charset = $Message::Charset::Info::IANACharset->{$charset_value};
1739     my $ic = $todo->{node}->owner_document->input_encoding;
1740     if (defined $ic) {
1741     ## TODO: Test for this case
1742     my $ic_charset = $Message::Charset::Info::IANACharset->{$ic};
1743     if ($charset ne $ic_charset) {
1744 wakaba 1.32 $self->{onerror}->(node => $attr,
1745 wakaba 1.21 type => 'mismatched charset name:'.$ic.
1746 wakaba 1.32 ':'.$charset_value, ## TODO: This should be a |value| value.
1747     level => $self->{must_level});
1748 wakaba 1.21 }
1749     } else {
1750     ## NOTE: MUST, but not checkable, since the document is not originally
1751     ## in serialized form (or the parser does not preserve the input
1752     ## encoding information).
1753 wakaba 1.32 $self->{onerror}->(node => $attr,
1754     type => 'mismatched charset name::'.$charset_value, ## TODO: |value|
1755 wakaba 1.21 level => 'unsupported');
1756     }
1757    
1758     ## ISSUE: What is "valid character encoding name"? Syntactically valid?
1759     ## Syntactically valid and registered? What about x-charset names?
1760     unless (Message::Charset::Info::is_syntactically_valid_iana_charset_name
1761     ($charset_value)) {
1762 wakaba 1.32 $self->{onerror}->(node => $attr,
1763     type => 'charset:syntax error:'.$charset_value, ## TODO
1764     level => $self->{must_level});
1765 wakaba 1.21 }
1766    
1767     if ($charset) {
1768     ## ISSUE: What is "the preferred name for that encoding" (for a charset
1769     ## with no "preferred MIME name" label)?
1770     my $charset_status = $charset->{iana_names}->{$charset_value} || 0;
1771     if (($charset_status &
1772     Message::Charset::Info::PREFERRED_CHARSET_NAME ())
1773     != Message::Charset::Info::PREFERRED_CHARSET_NAME ()) {
1774 wakaba 1.32 $self->{onerror}->(node => $attr,
1775 wakaba 1.21 type => 'charset:not preferred:'.
1776 wakaba 1.32 $charset_value, ## TODO
1777     level => $self->{must_level});
1778 wakaba 1.21 }
1779     if (($charset_status &
1780     Message::Charset::Info::REGISTERED_CHARSET_NAME ())
1781     != Message::Charset::Info::REGISTERED_CHARSET_NAME ()) {
1782     if ($charset_value =~ /^x-/) {
1783 wakaba 1.32 $self->{onerror}->(node => $attr,
1784     type => 'charset:private:'.$charset_value, ## TODO
1785 wakaba 1.21 level => $self->{good_level});
1786     } else {
1787 wakaba 1.32 $self->{onerror}->(node => $attr,
1788 wakaba 1.21 type => 'charset:not registered:'.
1789 wakaba 1.32 $charset_value, ## TODO
1790 wakaba 1.21 level => $self->{good_level});
1791     }
1792     }
1793     } elsif ($charset_value =~ /^x-/) {
1794 wakaba 1.32 $self->{onerror}->(node => $attr,
1795     type => 'charset:private:'.$charset_value, ## TODO
1796 wakaba 1.21 level => $self->{good_level});
1797     } else {
1798 wakaba 1.32 $self->{onerror}->(node => $attr,
1799     type => 'charset:not registered:'.$charset_value, ## TODO
1800 wakaba 1.21 level => $self->{good_level});
1801     }
1802    
1803 wakaba 1.32 if ($attr->get_user_data ('manakai_has_reference')) {
1804     $self->{onerror}->(node => $attr,
1805 wakaba 1.22 type => 'character reference in charset',
1806     level => $self->{must_level});
1807     }
1808 wakaba 1.32 }; # $check_charset
1809    
1810     ## TODO: metadata conformance
1811    
1812     ## TODO: pragma conformance
1813     if (defined $http_equiv_attr) { ## An enumerated attribute
1814     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1815     if ({
1816     'refresh' => 1,
1817     'default-style' => 1,
1818     }->{$keyword}) {
1819     #
1820 wakaba 1.33
1821     ## TODO: More than one occurence is a MUST-level error (revision 1180).
1822 wakaba 1.32 } elsif ($keyword eq 'content-type') {
1823 wakaba 1.33 ## ISSUE: Though it is renamed as "Encoding declaration" state in rev
1824     ## 1221, there are still many occurence of "Content-Type" state in
1825     ## the spec.
1826    
1827 wakaba 1.32 $check_charset_decl->();
1828     if ($content_attr) {
1829     my $content = $content_attr->value;
1830     if ($content =~ m!^text/html;\x20?charset=(.+)\z!s) {
1831     $check_charset->($content_attr, $1);
1832     } else {
1833     $self->{onerror}->(node => $content_attr,
1834     type => 'meta content-type syntax error',
1835     level => $self->{must_level});
1836     }
1837     }
1838     } else {
1839     $self->{onerror}->(node => $http_equiv_attr,
1840     type => 'enumerated:invalid');
1841     }
1842     }
1843    
1844     if (defined $charset_attr) {
1845     $check_charset_decl->();
1846     $check_charset->($charset_attr, $charset_attr->value);
1847 wakaba 1.1 }
1848     },
1849     checker => $HTMLEmptyChecker,
1850     };
1851    
1852     $Element->{$HTML_NS}->{style} = {
1853     attrs_checker => $GetHTMLAttrsChecker->({
1854     type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1855     media => $HTMLMQAttrChecker,
1856     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1857     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1858     ## not different
1859     }),
1860     checker => sub {
1861 wakaba 1.27 ## NOTE: |html:style| itself has no conformance creteria on content model.
1862 wakaba 1.1 my ($self, $todo) = @_;
1863     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
1864 wakaba 1.27 if (not defined $type or
1865     $type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*[Tt][Ee][Xx][Tt](?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*[Cc][Ss][Ss](?>(?>\x0D\x0A)?[\x09\x20])*\z]) {
1866     my $el = $todo->{node};
1867     my $new_todos = [];
1868     my @nodes = (@{$el->child_nodes});
1869    
1870     my $ss_text = '';
1871     while (@nodes) {
1872     my $node = shift @nodes;
1873     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1874    
1875     my $nt = $node->node_type;
1876     if ($nt == 1) {
1877 wakaba 1.29 my $node_ns = $node->namespace_uri;
1878     $node_ns = '' unless defined $node_ns;
1879     my $node_ln = $node->manakai_local_name;
1880     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1881     #
1882     } else {
1883     $self->{onerror}->(node => $node, type => 'element not allowed');
1884     }
1885 wakaba 1.27 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1886     unshift @nodes, @$sib;
1887     push @$new_todos, @$ch;
1888     } elsif ($nt == 3 or $nt == 4) {
1889     $ss_text .= $node->text_content;
1890     } elsif ($nt == 5) {
1891     unshift @nodes, @{$node->child_nodes};
1892     }
1893     }
1894    
1895 wakaba 1.28 $self->{onsubdoc}->({s => $ss_text, container_node => $el,
1896     media_type => 'text/css', is_char_string => 1});
1897 wakaba 1.27 return ($new_todos);
1898     } else {
1899     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
1900     type => 'style:'.$type); ## TODO: $type normalization
1901     return $AnyChecker->($self, $todo);
1902     }
1903 wakaba 1.1 },
1904     };
1905 wakaba 1.25 ## ISSUE: Relationship to significant content check?
1906 wakaba 1.1
1907     $Element->{$HTML_NS}->{body} = {
1908     attrs_checker => $GetHTMLAttrsChecker->({}),
1909 wakaba 1.29 checker => $HTMLProseContentChecker,
1910 wakaba 1.1 };
1911    
1912     $Element->{$HTML_NS}->{section} = {
1913     attrs_checker => $GetHTMLAttrsChecker->({}),
1914 wakaba 1.29 checker => $HTMLProseContentChecker,
1915 wakaba 1.1 };
1916    
1917     $Element->{$HTML_NS}->{nav} = {
1918     attrs_checker => $GetHTMLAttrsChecker->({}),
1919 wakaba 1.29 checker => $HTMLProseContentChecker,
1920 wakaba 1.1 };
1921    
1922     $Element->{$HTML_NS}->{article} = {
1923     attrs_checker => $GetHTMLAttrsChecker->({}),
1924 wakaba 1.29 checker => $HTMLProseContentChecker,
1925 wakaba 1.1 };
1926    
1927     $Element->{$HTML_NS}->{blockquote} = {
1928     attrs_checker => $GetHTMLAttrsChecker->({
1929     cite => $HTMLURIAttrChecker,
1930     }),
1931 wakaba 1.29 checker => $HTMLProseContentChecker,
1932 wakaba 1.1 };
1933    
1934     $Element->{$HTML_NS}->{aside} = {
1935     attrs_checker => $GetHTMLAttrsChecker->({}),
1936 wakaba 1.29 checker => $HTMLProseContentChecker,
1937 wakaba 1.1 };
1938    
1939     $Element->{$HTML_NS}->{h1} = {
1940     attrs_checker => $GetHTMLAttrsChecker->({}),
1941     checker => sub {
1942     my ($self, $todo) = @_;
1943 wakaba 1.24 $todo->{flag}->{has_descendant}->{hn} = 1;
1944 wakaba 1.29 return $HTMLPhrasingContentChecker->($self, $todo);
1945 wakaba 1.1 },
1946     };
1947    
1948     $Element->{$HTML_NS}->{h2} = {
1949     attrs_checker => $GetHTMLAttrsChecker->({}),
1950     checker => $Element->{$HTML_NS}->{h1}->{checker},
1951     };
1952    
1953     $Element->{$HTML_NS}->{h3} = {
1954     attrs_checker => $GetHTMLAttrsChecker->({}),
1955     checker => $Element->{$HTML_NS}->{h1}->{checker},
1956     };
1957    
1958     $Element->{$HTML_NS}->{h4} = {
1959     attrs_checker => $GetHTMLAttrsChecker->({}),
1960     checker => $Element->{$HTML_NS}->{h1}->{checker},
1961     };
1962    
1963     $Element->{$HTML_NS}->{h5} = {
1964     attrs_checker => $GetHTMLAttrsChecker->({}),
1965     checker => $Element->{$HTML_NS}->{h1}->{checker},
1966     };
1967    
1968     $Element->{$HTML_NS}->{h6} = {
1969     attrs_checker => $GetHTMLAttrsChecker->({}),
1970     checker => $Element->{$HTML_NS}->{h1}->{checker},
1971     };
1972    
1973 wakaba 1.29 ## TODO: Explicit sectioning is "encouraged".
1974    
1975 wakaba 1.1 $Element->{$HTML_NS}->{header} = {
1976     attrs_checker => $GetHTMLAttrsChecker->({}),
1977     checker => sub {
1978     my ($self, $todo) = @_;
1979 wakaba 1.24
1980     my $old_flags = {hn => $todo->{flag}->{has_descendant}->{hn}};
1981     $todo->{flag}->{has_descendant}->{hn} = 0;
1982 wakaba 1.1
1983     my $end = $self->_add_minuses
1984     ({$HTML_NS => {qw/header 1 footer 1/}},
1985 wakaba 1.29 $HTMLSectioningContent);
1986     my ($new_todos, $ch) = $HTMLProseContentChecker->($self, $todo);
1987 wakaba 1.24 push @$new_todos, $end,
1988     {type => 'descendant', node => $todo->{node},
1989     flag => $todo->{flag}, old_values => $old_flags,
1990     errors => {
1991     hn => sub {
1992     my ($self, $todo) = @_;
1993     $self->{onerror}->(node => $todo->{node},
1994     type => 'element missing:hn');
1995     },
1996 wakaba 1.1 }};
1997     return ($new_todos, $ch);
1998 wakaba 1.24
1999     ## ISSUE: <header><del><h1>...</h1></del></header> is conforming?
2000 wakaba 1.1 },
2001     };
2002    
2003     $Element->{$HTML_NS}->{footer} = {
2004     attrs_checker => $GetHTMLAttrsChecker->({}),
2005 wakaba 1.29 checker => sub {
2006 wakaba 1.1 my ($self, $todo) = @_;
2007 wakaba 1.25
2008 wakaba 1.29 my $old_flags = {hn => $todo->{flag}->{has_descendant}->{hn}};
2009     $todo->{flag}->{has_descendant}->{hn} = 0;
2010 wakaba 1.1
2011     my $end = $self->_add_minuses
2012 wakaba 1.29 ({$HTML_NS => {footer => 1}},
2013     $HTMLSectioningContent, $HTMLHeadingContent);
2014     my ($new_todos, $ch) = $HTMLProseContentChecker->($self, $todo);
2015 wakaba 1.1 push @$new_todos, $end;
2016    
2017 wakaba 1.29 return ($new_todos, $ch);
2018 wakaba 1.1 },
2019     };
2020    
2021     $Element->{$HTML_NS}->{address} = {
2022     attrs_checker => $GetHTMLAttrsChecker->({}),
2023 wakaba 1.29 checker => sub {
2024     my ($self, $todo) = @_;
2025    
2026     my $old_flags = {hn => $todo->{flag}->{has_descendant}->{hn}};
2027     $todo->{flag}->{has_descendant}->{hn} = 0;
2028    
2029     my $end = $self->_add_minuses
2030     ({$HTML_NS => {footer => 1, address => 1}},
2031     $HTMLSectioningContent, $HTMLHeadingContent);
2032     my ($new_todos, $ch) = $HTMLProseContentChecker->($self, $todo);
2033     push @$new_todos, $end;
2034    
2035     return ($new_todos, $ch);
2036     },
2037 wakaba 1.1 };
2038    
2039     $Element->{$HTML_NS}->{p} = {
2040     attrs_checker => $GetHTMLAttrsChecker->({}),
2041 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2042 wakaba 1.1 };
2043    
2044     $Element->{$HTML_NS}->{hr} = {
2045     attrs_checker => $GetHTMLAttrsChecker->({}),
2046     checker => $HTMLEmptyChecker,
2047     };
2048    
2049     $Element->{$HTML_NS}->{br} = {
2050     attrs_checker => $GetHTMLAttrsChecker->({}),
2051     checker => $HTMLEmptyChecker,
2052 wakaba 1.29 ## NOTE: Blank line MUST NOT be used for presentation purpose.
2053     ## (This requirement is semantic so that we cannot check.)
2054 wakaba 1.1 };
2055    
2056     $Element->{$HTML_NS}->{dialog} = {
2057     attrs_checker => $GetHTMLAttrsChecker->({}),
2058     checker => sub {
2059     my ($self, $todo) = @_;
2060     my $el = $todo->{node};
2061     my $new_todos = [];
2062     my @nodes = (@{$el->child_nodes});
2063    
2064     my $phase = 'before dt';
2065     while (@nodes) {
2066     my $node = shift @nodes;
2067     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2068    
2069     my $nt = $node->node_type;
2070     if ($nt == 1) {
2071     my $node_ns = $node->namespace_uri;
2072     $node_ns = '' unless defined $node_ns;
2073     my $node_ln = $node->manakai_local_name;
2074     ## NOTE: |minuses| list is not checked since redundant
2075 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2076     #
2077     } elsif ($phase eq 'before dt') {
2078 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
2079     $phase = 'before dd';
2080     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
2081     $self->{onerror}
2082     ->(node => $node, type => 'ps element missing:dt');
2083     $phase = 'before dt';
2084     } else {
2085     $self->{onerror}->(node => $node, type => 'element not allowed');
2086     }
2087     } else { # before dd
2088     if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
2089     $phase = 'before dt';
2090     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
2091     $self->{onerror}
2092     ->(node => $node, type => 'ps element missing:dd');
2093     $phase = 'before dd';
2094     } else {
2095     $self->{onerror}->(node => $node, type => 'element not allowed');
2096     }
2097     }
2098     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2099     unshift @nodes, @$sib;
2100     push @$new_todos, @$ch;
2101     } elsif ($nt == 3 or $nt == 4) {
2102     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2103     $self->{onerror}->(node => $node, type => 'character not allowed');
2104 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
2105 wakaba 1.1 }
2106     } elsif ($nt == 5) {
2107     unshift @nodes, @{$node->child_nodes};
2108     }
2109     }
2110     if ($phase eq 'before dd') {
2111 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
2112 wakaba 1.1 }
2113     return ($new_todos);
2114     },
2115     };
2116    
2117     $Element->{$HTML_NS}->{pre} = {
2118     attrs_checker => $GetHTMLAttrsChecker->({}),
2119 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2120 wakaba 1.1 };
2121    
2122     $Element->{$HTML_NS}->{ol} = {
2123     attrs_checker => $GetHTMLAttrsChecker->({
2124     start => $HTMLIntegerAttrChecker,
2125     }),
2126     checker => sub {
2127     my ($self, $todo) = @_;
2128     my $el = $todo->{node};
2129     my $new_todos = [];
2130     my @nodes = (@{$el->child_nodes});
2131    
2132     while (@nodes) {
2133     my $node = shift @nodes;
2134     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2135    
2136     my $nt = $node->node_type;
2137     if ($nt == 1) {
2138     my $node_ns = $node->namespace_uri;
2139     $node_ns = '' unless defined $node_ns;
2140     my $node_ln = $node->manakai_local_name;
2141     ## NOTE: |minuses| list is not checked since redundant
2142 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2143     #
2144     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'li')) {
2145 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
2146     }
2147     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2148     unshift @nodes, @$sib;
2149     push @$new_todos, @$ch;
2150     } elsif ($nt == 3 or $nt == 4) {
2151     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2152     $self->{onerror}->(node => $node, type => 'character not allowed');
2153 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
2154 wakaba 1.1 }
2155     } elsif ($nt == 5) {
2156     unshift @nodes, @{$node->child_nodes};
2157     }
2158     }
2159    
2160     if ($todo->{inline}) {
2161     for (@$new_todos) {
2162     $_->{inline} = 1;
2163     }
2164     }
2165     return ($new_todos);
2166     },
2167     };
2168    
2169     $Element->{$HTML_NS}->{ul} = {
2170     attrs_checker => $GetHTMLAttrsChecker->({}),
2171     checker => $Element->{$HTML_NS}->{ol}->{checker},
2172     };
2173    
2174     $Element->{$HTML_NS}->{li} = {
2175     attrs_checker => $GetHTMLAttrsChecker->({
2176     start => sub {
2177     my ($self, $attr) = @_;
2178     my $parent = $attr->owner_element->manakai_parent_element;
2179     if (defined $parent) {
2180     my $parent_ns = $parent->namespace_uri;
2181     $parent_ns = '' unless defined $parent_ns;
2182     my $parent_ln = $parent->manakai_local_name;
2183     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
2184     $self->{onerror}->(node => $attr, level => 'unsupported',
2185     type => 'attribute');
2186     }
2187     }
2188     $HTMLIntegerAttrChecker->($self, $attr);
2189     },
2190     }),
2191     checker => sub {
2192     my ($self, $todo) = @_;
2193 wakaba 1.29 if ($todo->{flag}->{in_menu}) {
2194     return $HTMLPhrasingContentChecker->($self, $todo);
2195 wakaba 1.1 } else {
2196 wakaba 1.29 return $HTMLProseContentChecker->($self, $todo);
2197 wakaba 1.1 }
2198     },
2199     };
2200    
2201     $Element->{$HTML_NS}->{dl} = {
2202     attrs_checker => $GetHTMLAttrsChecker->({}),
2203     checker => sub {
2204     my ($self, $todo) = @_;
2205     my $el = $todo->{node};
2206     my $new_todos = [];
2207     my @nodes = (@{$el->child_nodes});
2208    
2209     my $phase = 'before dt';
2210     while (@nodes) {
2211     my $node = shift @nodes;
2212     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2213    
2214     my $nt = $node->node_type;
2215     if ($nt == 1) {
2216     my $node_ns = $node->namespace_uri;
2217     $node_ns = '' unless defined $node_ns;
2218     my $node_ln = $node->manakai_local_name;
2219     ## NOTE: |minuses| list is not checked since redundant
2220 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2221     #
2222     } elsif ($phase eq 'in dds') {
2223 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
2224     #$phase = 'in dds';
2225     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
2226     $phase = 'in dts';
2227     } else {
2228     $self->{onerror}->(node => $node, type => 'element not allowed');
2229     }
2230     } elsif ($phase eq 'in dts') {
2231     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
2232     #$phase = 'in dts';
2233     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
2234     $phase = 'in dds';
2235     } else {
2236     $self->{onerror}->(node => $node, type => 'element not allowed');
2237     }
2238     } else { # before dt
2239     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
2240     $phase = 'in dts';
2241     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
2242     $self->{onerror}
2243     ->(node => $node, type => 'ps element missing:dt');
2244     $phase = 'in dds';
2245     } else {
2246     $self->{onerror}->(node => $node, type => 'element not allowed');
2247     }
2248     }
2249     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2250     unshift @nodes, @$sib;
2251     push @$new_todos, @$ch;
2252     } elsif ($nt == 3 or $nt == 4) {
2253     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2254     $self->{onerror}->(node => $node, type => 'character not allowed');
2255 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
2256 wakaba 1.1 }
2257     } elsif ($nt == 5) {
2258     unshift @nodes, @{$node->child_nodes};
2259     }
2260     }
2261     if ($phase eq 'in dts') {
2262 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
2263 wakaba 1.1 }
2264    
2265     if ($todo->{inline}) {
2266     for (@$new_todos) {
2267     $_->{inline} = 1;
2268     }
2269     }
2270     return ($new_todos);
2271     },
2272     };
2273    
2274     $Element->{$HTML_NS}->{dt} = {
2275     attrs_checker => $GetHTMLAttrsChecker->({}),
2276 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2277 wakaba 1.1 };
2278    
2279     $Element->{$HTML_NS}->{dd} = {
2280     attrs_checker => $GetHTMLAttrsChecker->({}),
2281 wakaba 1.29 checker => $HTMLProseContentChecker,
2282 wakaba 1.1 };
2283    
2284     $Element->{$HTML_NS}->{a} = {
2285     attrs_checker => sub {
2286     my ($self, $todo) = @_;
2287     my %attr;
2288     for my $attr (@{$todo->{node}->attributes}) {
2289     my $attr_ns = $attr->namespace_uri;
2290     $attr_ns = '' unless defined $attr_ns;
2291     my $attr_ln = $attr->manakai_local_name;
2292     my $checker;
2293     if ($attr_ns eq '') {
2294     $checker = {
2295     target => $HTMLTargetAttrChecker,
2296     href => $HTMLURIAttrChecker,
2297     ping => $HTMLSpaceURIsAttrChecker,
2298 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
2299 wakaba 1.1 media => $HTMLMQAttrChecker,
2300     hreflang => $HTMLLanguageTagAttrChecker,
2301     type => $HTMLIMTAttrChecker,
2302     }->{$attr_ln};
2303     if ($checker) {
2304     $attr{$attr_ln} = $attr;
2305     } else {
2306     $checker = $HTMLAttrChecker->{$attr_ln};
2307     }
2308     }
2309     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2310     || $AttrChecker->{$attr_ns}->{''};
2311     if ($checker) {
2312     $checker->($self, $attr) if ref $checker;
2313     } else {
2314     $self->{onerror}->(node => $attr, level => 'unsupported',
2315     type => 'attribute');
2316     ## ISSUE: No comformance createria for unknown attributes in the spec
2317     }
2318     }
2319    
2320 wakaba 1.4 if (defined $attr{href}) {
2321     $self->{has_hyperlink_element} = 1;
2322     } else {
2323 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
2324     if (defined $attr{$_}) {
2325     $self->{onerror}->(node => $attr{$_},
2326     type => 'attribute not allowed');
2327     }
2328     }
2329     }
2330     },
2331     checker => sub {
2332     my ($self, $todo) = @_;
2333    
2334     my $end = $self->_add_minuses ($HTMLInteractiveElements);
2335 wakaba 1.29 my ($new_todos, $ch) = $HTMLPhrasingContentChecker->($self, $todo);
2336 wakaba 1.1 push @$new_todos, $end;
2337    
2338 wakaba 1.15 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
2339     $_->{flag}->{in_a_href} = 1 for @$new_todos;
2340     }
2341 wakaba 1.1
2342     return ($new_todos, $ch);
2343     },
2344     };
2345    
2346     $Element->{$HTML_NS}->{q} = {
2347     attrs_checker => $GetHTMLAttrsChecker->({
2348     cite => $HTMLURIAttrChecker,
2349     }),
2350 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2351 wakaba 1.1 };
2352    
2353     $Element->{$HTML_NS}->{cite} = {
2354     attrs_checker => $GetHTMLAttrsChecker->({}),
2355 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2356 wakaba 1.1 };
2357    
2358     $Element->{$HTML_NS}->{em} = {
2359     attrs_checker => $GetHTMLAttrsChecker->({}),
2360 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2361 wakaba 1.1 };
2362    
2363     $Element->{$HTML_NS}->{strong} = {
2364     attrs_checker => $GetHTMLAttrsChecker->({}),
2365 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2366 wakaba 1.1 };
2367    
2368     $Element->{$HTML_NS}->{small} = {
2369     attrs_checker => $GetHTMLAttrsChecker->({}),
2370 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2371 wakaba 1.1 };
2372    
2373     $Element->{$HTML_NS}->{m} = {
2374     attrs_checker => $GetHTMLAttrsChecker->({}),
2375 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2376 wakaba 1.1 };
2377    
2378     $Element->{$HTML_NS}->{dfn} = {
2379     attrs_checker => $GetHTMLAttrsChecker->({}),
2380     checker => sub {
2381     my ($self, $todo) = @_;
2382    
2383     my $end = $self->_add_minuses ({$HTML_NS => {dfn => 1}});
2384 wakaba 1.29 my ($sib, $ch) = $HTMLPhrasingContentChecker->($self, $todo);
2385 wakaba 1.1 push @$sib, $end;
2386    
2387     my $node = $todo->{node};
2388     my $term = $node->get_attribute_ns (undef, 'title');
2389     unless (defined $term) {
2390     for my $child (@{$node->child_nodes}) {
2391     if ($child->node_type == 1) { # ELEMENT_NODE
2392     if (defined $term) {
2393     undef $term;
2394     last;
2395     } elsif ($child->manakai_local_name eq 'abbr') {
2396     my $nsuri = $child->namespace_uri;
2397     if (defined $nsuri and $nsuri eq $HTML_NS) {
2398     my $attr = $child->get_attribute_node_ns (undef, 'title');
2399     if ($attr) {
2400     $term = $attr->value;
2401     }
2402     }
2403     }
2404     } elsif ($child->node_type == 3 or $child->node_type == 4) {
2405     ## TEXT_NODE or CDATA_SECTION_NODE
2406     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
2407     next;
2408     }
2409     undef $term;
2410     last;
2411     }
2412     }
2413     unless (defined $term) {
2414     $term = $node->text_content;
2415     }
2416     }
2417     if ($self->{term}->{$term}) {
2418     $self->{onerror}->(node => $node, type => 'duplicate term');
2419     push @{$self->{term}->{$term}}, $node;
2420     } else {
2421     $self->{term}->{$term} = [$node];
2422     }
2423     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
2424     ## has |title|.
2425    
2426     return ($sib, $ch);
2427     },
2428     };
2429    
2430     $Element->{$HTML_NS}->{abbr} = {
2431     attrs_checker => $GetHTMLAttrsChecker->({
2432     ## NOTE: |title| has special semantics for |abbr|s, but is syntactically
2433     ## not different. The spec says that the |title| MAY be omitted
2434     ## if there is a |dfn| whose defining term is the abbreviation,
2435     ## but it does not prohibit |abbr| w/o |title| in other cases.
2436     }),
2437 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2438 wakaba 1.1 };
2439    
2440     $Element->{$HTML_NS}->{time} = {
2441     attrs_checker => $GetHTMLAttrsChecker->({
2442     datetime => sub { 1 }, # checked in |checker|
2443     }),
2444     ## TODO: Write tests
2445     checker => sub {
2446     my ($self, $todo) = @_;
2447    
2448     my $attr = $todo->{node}->get_attribute_node_ns (undef, 'datetime');
2449     my $input;
2450     my $reg_sp;
2451     my $input_node;
2452     if ($attr) {
2453     $input = $attr->value;
2454     $reg_sp = qr/[\x09-\x0D\x20]*/;
2455     $input_node = $attr;
2456     } else {
2457     $input = $todo->{node}->text_content;
2458     $reg_sp = qr/\p{Zs}*/;
2459     $input_node = $todo->{node};
2460    
2461     ## ISSUE: What is the definition for "successfully extracts a date
2462     ## or time"? If the algorithm says the string is invalid but
2463     ## return some date or time, is it "successfully"?
2464     }
2465    
2466     my $hour;
2467     my $minute;
2468     my $second;
2469     if ($input =~ /
2470     \A
2471     [\x09-\x0D\x20]*
2472     ([0-9]+) # 1
2473     (?>
2474     -([0-9]+) # 2
2475     -([0-9]+) # 3
2476     [\x09-\x0D\x20]*
2477     (?>
2478     T
2479     [\x09-\x0D\x20]*
2480     )?
2481     ([0-9]+) # 4
2482     :([0-9]+) # 5
2483     (?>
2484     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
2485     )?
2486     [\x09-\x0D\x20]*
2487     (?>
2488     Z
2489     [\x09-\x0D\x20]*
2490     |
2491     [+-]([0-9]+):([0-9]+) # 7, 8
2492     [\x09-\x0D\x20]*
2493     )?
2494     \z
2495     |
2496     :([0-9]+) # 9
2497     (?>
2498     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
2499     )?
2500     [\x09-\x0D\x20]*\z
2501     )
2502     /x) {
2503     if (defined $2) { ## YYYY-MM-DD T? hh:mm
2504     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
2505     length $4 != 2 or length $5 != 2) {
2506     $self->{onerror}->(node => $input_node,
2507     type => 'dateortime:syntax error');
2508     }
2509    
2510     if (1 <= $2 and $2 <= 12) {
2511     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
2512     if $3 < 1 or
2513     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
2514     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
2515     if $2 == 2 and $3 == 29 and
2516     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
2517     } else {
2518     $self->{onerror}->(node => $input_node,
2519     type => 'datetime:bad month');
2520     }
2521    
2522     ($hour, $minute, $second) = ($4, $5, $6);
2523    
2524     if (defined $7) { ## [+-]hh:mm
2525     if (length $7 != 2 or length $8 != 2) {
2526     $self->{onerror}->(node => $input_node,
2527     type => 'dateortime:syntax error');
2528     }
2529    
2530     $self->{onerror}->(node => $input_node,
2531     type => 'datetime:bad timezone hour')
2532     if $7 > 23;
2533     $self->{onerror}->(node => $input_node,
2534     type => 'datetime:bad timezone minute')
2535     if $8 > 59;
2536     }
2537     } else { ## hh:mm
2538     if (length $1 != 2 or length $9 != 2) {
2539     $self->{onerror}->(node => $input_node,
2540     type => qq'dateortime:syntax error');
2541     }
2542    
2543     ($hour, $minute, $second) = ($1, $9, $10);
2544     }
2545    
2546     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
2547     if $hour > 23;
2548     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
2549     if $minute > 59;
2550    
2551     if (defined $second) { ## s
2552     ## NOTE: Integer part of second don't have to have length of two.
2553    
2554     if (substr ($second, 0, 1) eq '.') {
2555     $self->{onerror}->(node => $input_node,
2556     type => 'dateortime:syntax error');
2557     }
2558    
2559     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
2560     if $second >= 60;
2561     }
2562     } else {
2563     $self->{onerror}->(node => $input_node,
2564     type => 'dateortime:syntax error');
2565     }
2566    
2567 wakaba 1.29 return $HTMLPhrasingContentChecker->($self, $todo);
2568 wakaba 1.1 },
2569     };
2570    
2571     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
2572     attrs_checker => $GetHTMLAttrsChecker->({
2573     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2574     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2575     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2576     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2577     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2578     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2579     }),
2580 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2581 wakaba 1.1 };
2582    
2583     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
2584     attrs_checker => $GetHTMLAttrsChecker->({
2585     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
2586     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
2587     }),
2588 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2589 wakaba 1.1 };
2590    
2591     $Element->{$HTML_NS}->{code} = {
2592     attrs_checker => $GetHTMLAttrsChecker->({}),
2593     ## NOTE: Though |title| has special semantics,
2594     ## syntatically same as the |title| as global attribute.
2595 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2596 wakaba 1.1 };
2597    
2598     $Element->{$HTML_NS}->{var} = {
2599     attrs_checker => $GetHTMLAttrsChecker->({}),
2600     ## NOTE: Though |title| has special semantics,
2601     ## syntatically same as the |title| as global attribute.
2602 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2603 wakaba 1.1 };
2604    
2605     $Element->{$HTML_NS}->{samp} = {
2606     attrs_checker => $GetHTMLAttrsChecker->({}),
2607     ## NOTE: Though |title| has special semantics,
2608     ## syntatically same as the |title| as global attribute.
2609 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2610 wakaba 1.1 };
2611    
2612     $Element->{$HTML_NS}->{kbd} = {
2613     attrs_checker => $GetHTMLAttrsChecker->({}),
2614 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2615 wakaba 1.1 };
2616    
2617     $Element->{$HTML_NS}->{sub} = {
2618     attrs_checker => $GetHTMLAttrsChecker->({}),
2619 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2620 wakaba 1.1 };
2621    
2622     $Element->{$HTML_NS}->{sup} = {
2623     attrs_checker => $GetHTMLAttrsChecker->({}),
2624 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2625 wakaba 1.1 };
2626    
2627     $Element->{$HTML_NS}->{span} = {
2628     attrs_checker => $GetHTMLAttrsChecker->({}),
2629     ## NOTE: Though |title| has special semantics,
2630     ## syntatically same as the |title| as global attribute.
2631 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2632 wakaba 1.1 };
2633    
2634     $Element->{$HTML_NS}->{i} = {
2635     attrs_checker => $GetHTMLAttrsChecker->({}),
2636     ## NOTE: Though |title| has special semantics,
2637     ## syntatically same as the |title| as global attribute.
2638 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2639 wakaba 1.1 };
2640    
2641     $Element->{$HTML_NS}->{b} = {
2642     attrs_checker => $GetHTMLAttrsChecker->({}),
2643 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2644 wakaba 1.1 };
2645    
2646     $Element->{$HTML_NS}->{bdo} = {
2647     attrs_checker => sub {
2648     my ($self, $todo) = @_;
2649     $GetHTMLAttrsChecker->({})->($self, $todo);
2650     unless ($todo->{node}->has_attribute_ns (undef, 'dir')) {
2651     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:dir');
2652     }
2653     },
2654     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
2655 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2656 wakaba 1.1 };
2657    
2658 wakaba 1.29 =pod
2659    
2660     ## TODO:
2661    
2662     +
2663     + <p>Partly because of the confusion described above, authors are
2664     + strongly recommended to always mark up all paragraphs with the
2665     + <code>p</code> element, and to not have any <code>ins</code> or
2666     + <code>del</code> elements that cross across any <span
2667     + title="paragraph">implied paragraphs</span>.</p>
2668     +
2669     (An informative note)
2670    
2671     <p><code>ins</code> elements should not cross <span
2672     + title="paragraph">implied paragraph</span> boundaries.</p>
2673     (normative)
2674    
2675     + <p><code>del</code> elements should not cross <span
2676     + title="paragraph">implied paragraph</span> boundaries.</p>
2677     (normative)
2678    
2679     =cut
2680    
2681 wakaba 1.1 $Element->{$HTML_NS}->{ins} = {
2682     attrs_checker => $GetHTMLAttrsChecker->({
2683     cite => $HTMLURIAttrChecker,
2684     datetime => $HTMLDatetimeAttrChecker,
2685     }),
2686     checker => $HTMLTransparentChecker,
2687     };
2688    
2689     $Element->{$HTML_NS}->{del} = {
2690     attrs_checker => $GetHTMLAttrsChecker->({
2691     cite => $HTMLURIAttrChecker,
2692     datetime => $HTMLDatetimeAttrChecker,
2693     }),
2694     checker => sub {
2695     my ($self, $todo) = @_;
2696 wakaba 1.29 my $sig_flag = $todo->{flag}->{has_descendant}->{significant};
2697     my ($new_todos) = $HTMLTransparentChecker->($self, $todo);
2698     push @$new_todos, {type => 'code', code => sub {
2699     $todo->{flag}->{has_descendant}->{significant} = 0;
2700     }} if not $sig_flag;
2701     return $new_todos;
2702 wakaba 1.1 },
2703     };
2704    
2705     ## TODO: figure
2706 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
2707 wakaba 1.1
2708 wakaba 1.4 ## TODO: |alt|
2709 wakaba 1.1 $Element->{$HTML_NS}->{img} = {
2710     attrs_checker => sub {
2711     my ($self, $todo) = @_;
2712     $GetHTMLAttrsChecker->({
2713     alt => sub { }, ## NOTE: No syntactical requirement
2714     src => $HTMLURIAttrChecker,
2715     usemap => $HTMLUsemapAttrChecker,
2716     ismap => sub {
2717     my ($self, $attr, $parent_todo) = @_;
2718 wakaba 1.15 if (not $todo->{flag}->{in_a_href}) {
2719     $self->{onerror}->(node => $attr,
2720     type => 'attribute not allowed:ismap');
2721 wakaba 1.1 }
2722     $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_todo);
2723     },
2724     ## TODO: height
2725     ## TODO: width
2726     })->($self, $todo);
2727     unless ($todo->{node}->has_attribute_ns (undef, 'alt')) {
2728     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:alt');
2729     }
2730     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2731     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:src');
2732     }
2733     },
2734 wakaba 1.25 checker => sub {
2735     my ($self, $todo) = @_;
2736     $todo->{flag}->{has_descendant}->{significant} = 1;
2737     return $HTMLEmptyChecker->($self, $todo);
2738     },
2739 wakaba 1.1 };
2740    
2741     $Element->{$HTML_NS}->{iframe} = {
2742     attrs_checker => $GetHTMLAttrsChecker->({
2743     src => $HTMLURIAttrChecker,
2744     }),
2745 wakaba 1.25 checker => sub {
2746     my ($self, $todo) = @_;
2747     $todo->{flag}->{has_descendant}->{significant} = 1;
2748     return $HTMLTextChecker->($self, $todo);
2749     },
2750 wakaba 1.1 };
2751    
2752     $Element->{$HTML_NS}->{embed} = {
2753     attrs_checker => sub {
2754     my ($self, $todo) = @_;
2755     my $has_src;
2756     for my $attr (@{$todo->{node}->attributes}) {
2757     my $attr_ns = $attr->namespace_uri;
2758     $attr_ns = '' unless defined $attr_ns;
2759     my $attr_ln = $attr->manakai_local_name;
2760     my $checker;
2761     if ($attr_ns eq '') {
2762     if ($attr_ln eq 'src') {
2763     $checker = $HTMLURIAttrChecker;
2764     $has_src = 1;
2765     } elsif ($attr_ln eq 'type') {
2766     $checker = $HTMLIMTAttrChecker;
2767     } else {
2768     ## TODO: height
2769     ## TODO: width
2770     $checker = $HTMLAttrChecker->{$attr_ln}
2771     || sub { }; ## NOTE: Any local attribute is ok.
2772     }
2773     }
2774     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2775     || $AttrChecker->{$attr_ns}->{''};
2776     if ($checker) {
2777     $checker->($self, $attr);
2778     } else {
2779     $self->{onerror}->(node => $attr, level => 'unsupported',
2780     type => 'attribute');
2781     ## ISSUE: No comformance createria for global attributes in the spec
2782     }
2783     }
2784    
2785     unless ($has_src) {
2786     $self->{onerror}->(node => $todo->{node},
2787     type => 'attribute missing:src');
2788     }
2789     },
2790 wakaba 1.25 checker => sub {
2791     my ($self, $todo) = @_;
2792     $todo->{flag}->{has_descendant}->{significant} = 1;
2793     return $HTMLEmptyChecker->($self, $todo);
2794     },
2795 wakaba 1.1 };
2796    
2797     $Element->{$HTML_NS}->{object} = {
2798     attrs_checker => sub {
2799     my ($self, $todo) = @_;
2800     $GetHTMLAttrsChecker->({
2801     data => $HTMLURIAttrChecker,
2802     type => $HTMLIMTAttrChecker,
2803     usemap => $HTMLUsemapAttrChecker,
2804     ## TODO: width
2805     ## TODO: height
2806     })->($self, $todo);
2807     unless ($todo->{node}->has_attribute_ns (undef, 'data')) {
2808     unless ($todo->{node}->has_attribute_ns (undef, 'type')) {
2809     $self->{onerror}->(node => $todo->{node},
2810     type => 'attribute missing:data|type');
2811     }
2812     }
2813     },
2814 wakaba 1.29 ## NOTE: param*, then transparent.
2815 wakaba 1.25 checker => sub {
2816     my ($self, $todo) = @_;
2817     $todo->{flag}->{has_descendant}->{significant} = 1;
2818     return $ElementDefault->{checker}->($self, $todo); ## TODO
2819     },
2820 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
2821 wakaba 1.1 };
2822    
2823     $Element->{$HTML_NS}->{param} = {
2824     attrs_checker => sub {
2825     my ($self, $todo) = @_;
2826     $GetHTMLAttrsChecker->({
2827     name => sub { },
2828     value => sub { },
2829     })->($self, $todo);
2830     unless ($todo->{node}->has_attribute_ns (undef, 'name')) {
2831     $self->{onerror}->(node => $todo->{node},
2832     type => 'attribute missing:name');
2833     }
2834     unless ($todo->{node}->has_attribute_ns (undef, 'value')) {
2835     $self->{onerror}->(node => $todo->{node},
2836     type => 'attribute missing:value');
2837     }
2838     },
2839     checker => $HTMLEmptyChecker,
2840     };
2841    
2842     $Element->{$HTML_NS}->{video} = {
2843     attrs_checker => $GetHTMLAttrsChecker->({
2844     src => $HTMLURIAttrChecker,
2845     ## TODO: start, loopstart, loopend, end
2846     ## ISSUE: they MUST be "value time offset"s. Value?
2847 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
2848 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2849     controls => $GetHTMLBooleanAttrChecker->('controls'),
2850 wakaba 1.11 poster => $HTMLURIAttrChecker, ## TODO: not for audio!
2851     ## TODO: width, height (not for audio!)
2852 wakaba 1.1 }),
2853     checker => sub {
2854     my ($self, $todo) = @_;
2855 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
2856 wakaba 1.1
2857 wakaba 1.29 ## TODO:
2858 wakaba 1.1 if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2859     return $HTMLBlockOrInlineChecker->($self, $todo);
2860     } else {
2861     return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
2862     ->($self, $todo);
2863     }
2864     },
2865     };
2866    
2867     $Element->{$HTML_NS}->{audio} = {
2868     attrs_checker => $Element->{$HTML_NS}->{video}->{attrs_checker},
2869     checker => $Element->{$HTML_NS}->{video}->{checker},
2870     };
2871    
2872     $Element->{$HTML_NS}->{source} = {
2873     attrs_checker => sub {
2874     my ($self, $todo) = @_;
2875     $GetHTMLAttrsChecker->({
2876     src => $HTMLURIAttrChecker,
2877     type => $HTMLIMTAttrChecker,
2878     media => $HTMLMQAttrChecker,
2879     })->($self, $todo);
2880     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2881     $self->{onerror}->(node => $todo->{node},
2882     type => 'attribute missing:src');
2883     }
2884     },
2885     checker => $HTMLEmptyChecker,
2886     };
2887    
2888     $Element->{$HTML_NS}->{canvas} = {
2889     attrs_checker => $GetHTMLAttrsChecker->({
2890     height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2891     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2892     }),
2893 wakaba 1.25 checker => sub {
2894     my ($self, $todo) = @_;
2895     $todo->{flag}->{has_descendant}->{significant} = 1;
2896 wakaba 1.29 return $HTMLTransparentChecker->($self, $todo);
2897 wakaba 1.25 },
2898 wakaba 1.1 };
2899    
2900     $Element->{$HTML_NS}->{map} = {
2901 wakaba 1.4 attrs_checker => sub {
2902     my ($self, $todo) = @_;
2903     my $has_id;
2904     $GetHTMLAttrsChecker->({
2905     id => sub {
2906     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2907     my ($self, $attr) = @_;
2908     my $value = $attr->value;
2909     if (length $value > 0) {
2910     if ($self->{id}->{$value}) {
2911     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2912     push @{$self->{id}->{$value}}, $attr;
2913     } else {
2914     $self->{id}->{$value} = [$attr];
2915     }
2916 wakaba 1.1 } else {
2917 wakaba 1.4 ## NOTE: MUST contain at least one character
2918     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2919 wakaba 1.1 }
2920 wakaba 1.4 if ($value =~ /[\x09-\x0D\x20]/) {
2921     $self->{onerror}->(node => $attr, type => 'space in ID');
2922     }
2923     $self->{map}->{$value} ||= $attr;
2924     $has_id = 1;
2925     },
2926     })->($self, $todo);
2927     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:id')
2928     unless $has_id;
2929     },
2930 wakaba 1.29 checker => $HTMLProseContentChecker,
2931 wakaba 1.1 };
2932    
2933     $Element->{$HTML_NS}->{area} = {
2934     attrs_checker => sub {
2935     my ($self, $todo) = @_;
2936     my %attr;
2937     my $coords;
2938     for my $attr (@{$todo->{node}->attributes}) {
2939     my $attr_ns = $attr->namespace_uri;
2940     $attr_ns = '' unless defined $attr_ns;
2941     my $attr_ln = $attr->manakai_local_name;
2942     my $checker;
2943     if ($attr_ns eq '') {
2944     $checker = {
2945     alt => sub { },
2946     ## NOTE: |alt| value has no conformance creteria.
2947     shape => $GetHTMLEnumeratedAttrChecker->({
2948     circ => -1, circle => 1,
2949     default => 1,
2950     poly => 1, polygon => -1,
2951     rect => 1, rectangle => -1,
2952     }),
2953     coords => sub {
2954     my ($self, $attr) = @_;
2955     my $value = $attr->value;
2956     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
2957     $coords = [split /,/, $value];
2958     } else {
2959     $self->{onerror}->(node => $attr,
2960     type => 'coords:syntax error');
2961     }
2962     },
2963     target => $HTMLTargetAttrChecker,
2964     href => $HTMLURIAttrChecker,
2965     ping => $HTMLSpaceURIsAttrChecker,
2966 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
2967 wakaba 1.1 media => $HTMLMQAttrChecker,
2968     hreflang => $HTMLLanguageTagAttrChecker,
2969     type => $HTMLIMTAttrChecker,
2970     }->{$attr_ln};
2971     if ($checker) {
2972     $attr{$attr_ln} = $attr;
2973     } else {
2974     $checker = $HTMLAttrChecker->{$attr_ln};
2975     }
2976     }
2977     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2978     || $AttrChecker->{$attr_ns}->{''};
2979     if ($checker) {
2980     $checker->($self, $attr) if ref $checker;
2981     } else {
2982     $self->{onerror}->(node => $attr, level => 'unsupported',
2983     type => 'attribute');
2984     ## ISSUE: No comformance createria for unknown attributes in the spec
2985     }
2986     }
2987    
2988     if (defined $attr{href}) {
2989 wakaba 1.4 $self->{has_hyperlink_element} = 1;
2990 wakaba 1.1 unless (defined $attr{alt}) {
2991     $self->{onerror}->(node => $todo->{node},
2992     type => 'attribute missing:alt');
2993     }
2994     } else {
2995     for (qw/target ping rel media hreflang type alt/) {
2996     if (defined $attr{$_}) {
2997     $self->{onerror}->(node => $attr{$_},
2998     type => 'attribute not allowed');
2999     }
3000     }
3001     }
3002    
3003     my $shape = 'rectangle';
3004     if (defined $attr{shape}) {
3005     $shape = {
3006     circ => 'circle', circle => 'circle',
3007     default => 'default',
3008     poly => 'polygon', polygon => 'polygon',
3009     rect => 'rectangle', rectangle => 'rectangle',
3010     }->{lc $attr{shape}->value} || 'rectangle';
3011     ## TODO: ASCII lowercase?
3012     }
3013    
3014     if ($shape eq 'circle') {
3015     if (defined $attr{coords}) {
3016     if (defined $coords) {
3017     if (@$coords == 3) {
3018     if ($coords->[2] < 0) {
3019     $self->{onerror}->(node => $attr{coords},
3020     type => 'coords:out of range:2');
3021     }
3022     } else {
3023     $self->{onerror}->(node => $attr{coords},
3024     type => 'coords:number:3:'.@$coords);
3025     }
3026     } else {
3027     ## NOTE: A syntax error has been reported.
3028     }
3029     } else {
3030     $self->{onerror}->(node => $todo->{node},
3031     type => 'attribute missing:coords');
3032     }
3033     } elsif ($shape eq 'default') {
3034     if (defined $attr{coords}) {
3035     $self->{onerror}->(node => $attr{coords},
3036     type => 'attribute not allowed');
3037     }
3038     } elsif ($shape eq 'polygon') {
3039     if (defined $attr{coords}) {
3040     if (defined $coords) {
3041     if (@$coords >= 6) {
3042     unless (@$coords % 2 == 0) {
3043     $self->{onerror}->(node => $attr{coords},
3044     type => 'coords:number:even:'.@$coords);
3045     }
3046     } else {
3047     $self->{onerror}->(node => $attr{coords},
3048     type => 'coords:number:>=6:'.@$coords);
3049     }
3050     } else {
3051     ## NOTE: A syntax error has been reported.
3052     }
3053     } else {
3054     $self->{onerror}->(node => $todo->{node},
3055     type => 'attribute missing:coords');
3056     }
3057     } elsif ($shape eq 'rectangle') {
3058     if (defined $attr{coords}) {
3059     if (defined $coords) {
3060     if (@$coords == 4) {
3061     unless ($coords->[0] < $coords->[2]) {
3062     $self->{onerror}->(node => $attr{coords},
3063     type => 'coords:out of range:0');
3064     }
3065     unless ($coords->[1] < $coords->[3]) {
3066     $self->{onerror}->(node => $attr{coords},
3067     type => 'coords:out of range:1');
3068     }
3069     } else {
3070     $self->{onerror}->(node => $attr{coords},
3071     type => 'coords:number:4:'.@$coords);
3072     }
3073     } else {
3074     ## NOTE: A syntax error has been reported.
3075     }
3076     } else {
3077     $self->{onerror}->(node => $todo->{node},
3078     type => 'attribute missing:coords');
3079     }
3080     }
3081     },
3082     checker => $HTMLEmptyChecker,
3083     };
3084     ## TODO: only in map
3085    
3086     $Element->{$HTML_NS}->{table} = {
3087     attrs_checker => $GetHTMLAttrsChecker->({}),
3088     checker => sub {
3089     my ($self, $todo) = @_;
3090     my $el = $todo->{node};
3091     my $new_todos = [];
3092     my @nodes = (@{$el->child_nodes});
3093    
3094     my $phase = 'before caption';
3095     my $has_tfoot;
3096     while (@nodes) {
3097     my $node = shift @nodes;
3098     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3099    
3100     my $nt = $node->node_type;
3101     if ($nt == 1) {
3102     my $node_ns = $node->namespace_uri;
3103     $node_ns = '' unless defined $node_ns;
3104     my $node_ln = $node->manakai_local_name;
3105     ## NOTE: |minuses| list is not checked since redundant
3106 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3107     #
3108     } elsif ($phase eq 'in tbodys') {
3109 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
3110     #$phase = 'in tbodys';
3111     } elsif (not $has_tfoot and
3112     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
3113     $phase = 'after tfoot';
3114     $has_tfoot = 1;
3115     } else {
3116     $self->{onerror}->(node => $node, type => 'element not allowed');
3117     }
3118     } elsif ($phase eq 'in trs') {
3119     if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
3120     #$phase = 'in trs';
3121     } elsif (not $has_tfoot and
3122     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
3123     $phase = 'after tfoot';
3124     $has_tfoot = 1;
3125     } else {
3126     $self->{onerror}->(node => $node, type => 'element not allowed');
3127     }
3128     } elsif ($phase eq 'after thead') {
3129     if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
3130     $phase = 'in tbodys';
3131     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
3132     $phase = 'in trs';
3133     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
3134     $phase = 'in tbodys';
3135     $has_tfoot = 1;
3136     } else {
3137     $self->{onerror}->(node => $node, type => 'element not allowed');
3138     }
3139     } elsif ($phase eq 'in colgroup') {
3140     if ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
3141     $phase = 'in colgroup';
3142     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
3143     $phase = 'after thead';
3144     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
3145     $phase = 'in tbodys';
3146     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
3147     $phase = 'in trs';
3148     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
3149     $phase = 'in tbodys';
3150     $has_tfoot = 1;
3151     } else {
3152     $self->{onerror}->(node => $node, type => 'element not allowed');
3153     }
3154     } elsif ($phase eq 'before caption') {
3155     if ($node_ns eq $HTML_NS and $node_ln eq 'caption') {
3156     $phase = 'in colgroup';
3157     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
3158     $phase = 'in colgroup';
3159     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
3160     $phase = 'after thead';
3161     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
3162     $phase = 'in tbodys';
3163     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
3164     $phase = 'in trs';
3165     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
3166     $phase = 'in tbodys';
3167     $has_tfoot = 1;
3168     } else {
3169     $self->{onerror}->(node => $node, type => 'element not allowed');
3170     }
3171     } else { # after tfoot
3172     $self->{onerror}->(node => $node, type => 'element not allowed');
3173     }
3174     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3175     unshift @nodes, @$sib;
3176     push @$new_todos, @$ch;
3177     } elsif ($nt == 3 or $nt == 4) {
3178     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3179     $self->{onerror}->(node => $node, type => 'character not allowed');
3180 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3181 wakaba 1.1 }
3182     } elsif ($nt == 5) {
3183     unshift @nodes, @{$node->child_nodes};
3184     }
3185     }
3186    
3187     ## Table model errors
3188     require Whatpm::HTMLTable;
3189     Whatpm::HTMLTable->form_table ($todo->{node}, sub {
3190     my %opt = @_;
3191     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
3192     });
3193     push @{$self->{return}->{table}}, $todo->{node};
3194    
3195     return ($new_todos);
3196     },
3197     };
3198    
3199     $Element->{$HTML_NS}->{caption} = {
3200     attrs_checker => $GetHTMLAttrsChecker->({}),
3201 wakaba 1.13 checker => $HTMLStrictlyInlineChecker,
3202 wakaba 1.1 };
3203    
3204     $Element->{$HTML_NS}->{colgroup} = {
3205     attrs_checker => $GetHTMLAttrsChecker->({
3206     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3207     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
3208     ## TODO: "attribute not supported" if |col|.
3209     ## ISSUE: MUST NOT if any |col|?
3210     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
3211     }),
3212     checker => sub {
3213     my ($self, $todo) = @_;
3214     my $el = $todo->{node};
3215     my $new_todos = [];
3216     my @nodes = (@{$el->child_nodes});
3217    
3218     while (@nodes) {
3219     my $node = shift @nodes;
3220     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3221    
3222     my $nt = $node->node_type;
3223     if ($nt == 1) {
3224     my $node_ns = $node->namespace_uri;
3225     $node_ns = '' unless defined $node_ns;
3226     my $node_ln = $node->manakai_local_name;
3227     ## NOTE: |minuses| list is not checked since redundant
3228 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3229     #
3230     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'col')) {
3231 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
3232     }
3233     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3234     unshift @nodes, @$sib;
3235     push @$new_todos, @$ch;
3236     } elsif ($nt == 3 or $nt == 4) {
3237     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3238     $self->{onerror}->(node => $node, type => 'character not allowed');
3239 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3240 wakaba 1.1 }
3241     } elsif ($nt == 5) {
3242     unshift @nodes, @{$node->child_nodes};
3243     }
3244     }
3245     return ($new_todos);
3246     },
3247     };
3248    
3249     $Element->{$HTML_NS}->{col} = {
3250     attrs_checker => $GetHTMLAttrsChecker->({
3251     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3252     }),
3253     checker => $HTMLEmptyChecker,
3254     };
3255    
3256     $Element->{$HTML_NS}->{tbody} = {
3257     attrs_checker => $GetHTMLAttrsChecker->({}),
3258     checker => sub {
3259     my ($self, $todo) = @_;
3260     my $el = $todo->{node};
3261     my $new_todos = [];
3262     my @nodes = (@{$el->child_nodes});
3263    
3264     my $has_tr;
3265     while (@nodes) {
3266     my $node = shift @nodes;
3267     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3268    
3269     my $nt = $node->node_type;
3270     if ($nt == 1) {
3271     my $node_ns = $node->namespace_uri;
3272     $node_ns = '' unless defined $node_ns;
3273     my $node_ln = $node->manakai_local_name;
3274     ## NOTE: |minuses| list is not checked since redundant
3275 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3276     #
3277     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
3278 wakaba 1.1 $has_tr = 1;
3279     } else {
3280     $self->{onerror}->(node => $node, type => 'element not allowed');
3281     }
3282     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3283     unshift @nodes, @$sib;
3284     push @$new_todos, @$ch;
3285     } elsif ($nt == 3 or $nt == 4) {
3286     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3287     $self->{onerror}->(node => $node, type => 'character not allowed');
3288 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3289 wakaba 1.1 }
3290     } elsif ($nt == 5) {
3291     unshift @nodes, @{$node->child_nodes};
3292     }
3293     }
3294     unless ($has_tr) {
3295     $self->{onerror}->(node => $el, type => 'child element missing:tr');
3296     }
3297     return ($new_todos);
3298     },
3299     };
3300    
3301     $Element->{$HTML_NS}->{thead} = {
3302     attrs_checker => $GetHTMLAttrsChecker->({}),
3303     checker => $Element->{$HTML_NS}->{tbody}->{checker},
3304     };
3305    
3306     $Element->{$HTML_NS}->{tfoot} = {
3307     attrs_checker => $GetHTMLAttrsChecker->({}),
3308     checker => $Element->{$HTML_NS}->{tbody}->{checker},
3309     };
3310    
3311     $Element->{$HTML_NS}->{tr} = {
3312     attrs_checker => $GetHTMLAttrsChecker->({}),
3313     checker => sub {
3314     my ($self, $todo) = @_;
3315     my $el = $todo->{node};
3316     my $new_todos = [];
3317     my @nodes = (@{$el->child_nodes});
3318    
3319     my $has_td;
3320     while (@nodes) {
3321     my $node = shift @nodes;
3322     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3323    
3324     my $nt = $node->node_type;
3325     if ($nt == 1) {
3326     my $node_ns = $node->namespace_uri;
3327     $node_ns = '' unless defined $node_ns;
3328     my $node_ln = $node->manakai_local_name;
3329     ## NOTE: |minuses| list is not checked since redundant
3330 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3331     #
3332     } elsif ($node_ns eq $HTML_NS and
3333     ($node_ln eq 'td' or $node_ln eq 'th')) {
3334 wakaba 1.1 $has_td = 1;
3335     } else {
3336     $self->{onerror}->(node => $node, type => 'element not allowed');
3337     }
3338     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3339     unshift @nodes, @$sib;
3340     push @$new_todos, @$ch;
3341     } elsif ($nt == 3 or $nt == 4) {
3342     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3343     $self->{onerror}->(node => $node, type => 'character not allowed');
3344 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3345 wakaba 1.1 }
3346     } elsif ($nt == 5) {
3347     unshift @nodes, @{$node->child_nodes};
3348     }
3349     }
3350     unless ($has_td) {
3351     $self->{onerror}->(node => $el, type => 'child element missing:td|th');
3352     }
3353     return ($new_todos);
3354     },
3355     };
3356    
3357     $Element->{$HTML_NS}->{td} = {
3358     attrs_checker => $GetHTMLAttrsChecker->({
3359     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3360     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3361     }),
3362 wakaba 1.29 checker => $HTMLProseContentChecker,
3363 wakaba 1.1 };
3364    
3365     $Element->{$HTML_NS}->{th} = {
3366     attrs_checker => $GetHTMLAttrsChecker->({
3367     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3368     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3369     scope => $GetHTMLEnumeratedAttrChecker
3370     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
3371     }),
3372 wakaba 1.31 checker => $HTMLPhrasingContentChecker,
3373 wakaba 1.1 };
3374    
3375     ## TODO: forms
3376 wakaba 1.8 ## TODO: Tests for <nest/> in form elements
3377 wakaba 1.1
3378     $Element->{$HTML_NS}->{script} = {
3379 wakaba 1.9 attrs_checker => $GetHTMLAttrsChecker->({
3380 wakaba 1.1 src => $HTMLURIAttrChecker,
3381     defer => $GetHTMLBooleanAttrChecker->('defer'),
3382     async => $GetHTMLBooleanAttrChecker->('async'),
3383     type => $HTMLIMTAttrChecker,
3384 wakaba 1.9 }),
3385 wakaba 1.1 checker => sub {
3386     my ($self, $todo) = @_;
3387    
3388     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
3389     return $HTMLEmptyChecker->($self, $todo);
3390     } else {
3391     ## NOTE: No content model conformance in HTML5 spec.
3392     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
3393     my $language = $todo->{node}->get_attribute_ns (undef, 'language');
3394     if ((defined $type and $type eq '') or
3395     (defined $language and $language eq '')) {
3396     $type = 'text/javascript';
3397     } elsif (defined $type) {
3398     #
3399     } elsif (defined $language) {
3400     $type = 'text/' . $language;
3401     } else {
3402     $type = 'text/javascript';
3403     }
3404     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
3405     type => 'script:'.$type); ## TODO: $type normalization
3406     return $AnyChecker->($self, $todo);
3407     }
3408     },
3409     };
3410 wakaba 1.25 ## ISSUE: Significant check and text child node
3411 wakaba 1.1
3412     ## NOTE: When script is disabled.
3413     $Element->{$HTML_NS}->{noscript} = {
3414 wakaba 1.3 attrs_checker => sub {
3415     my ($self, $todo) = @_;
3416    
3417     ## NOTE: This check is inserted in |attrs_checker|, rather than |checker|,
3418     ## since the later is not invoked when the |noscript| is used as a
3419     ## transparent element.
3420     unless ($todo->{node}->owner_document->manakai_is_html) {
3421     $self->{onerror}->(node => $todo->{node}, type => 'in XML:noscript');
3422     }
3423    
3424     $GetHTMLAttrsChecker->({})->($self, $todo);
3425     },
3426 wakaba 1.1 checker => sub {
3427     my ($self, $todo) = @_;
3428    
3429 wakaba 1.3 if ($todo->{flag}->{in_head}) {
3430     my $new_todos = [];
3431     my @nodes = (@{$todo->{node}->child_nodes});
3432    
3433     while (@nodes) {
3434     my $node = shift @nodes;
3435     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3436    
3437     my $nt = $node->node_type;
3438     if ($nt == 1) {
3439     my $node_ns = $node->namespace_uri;
3440     $node_ns = '' unless defined $node_ns;
3441     my $node_ln = $node->manakai_local_name;
3442 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3443     #
3444     } elsif ($node_ns eq $HTML_NS) {
3445 wakaba 1.3 if ({link => 1, style => 1}->{$node_ln}) {
3446     #
3447     } elsif ($node_ln eq 'meta') {
3448 wakaba 1.5 if ($node->has_attribute_ns (undef, 'name')) {
3449     #
3450 wakaba 1.3 } else {
3451 wakaba 1.5 $self->{onerror}->(node => $node,
3452     type => 'element not allowed');
3453 wakaba 1.3 }
3454     } else {
3455     $self->{onerror}->(node => $node, type => 'element not allowed');
3456     }
3457     } else {
3458     $self->{onerror}->(node => $node, type => 'element not allowed');
3459     }
3460    
3461     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3462     unshift @nodes, @$sib;
3463     push @$new_todos, @$ch;
3464     } elsif ($nt == 3 or $nt == 4) {
3465     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3466     $self->{onerror}->(node => $node, type => 'character not allowed');
3467 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3468 wakaba 1.3 }
3469     } elsif ($nt == 5) {
3470     unshift @nodes, @{$node->child_nodes};
3471     }
3472     }
3473     return ($new_todos);
3474     } else {
3475     my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}});
3476 wakaba 1.29 my ($sib, $ch) = $HTMLTransparentChecker->($self, $todo);
3477 wakaba 1.3 push @$sib, $end;
3478     return ($sib, $ch);
3479     }
3480 wakaba 1.1 },
3481     };
3482 wakaba 1.3
3483     ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
3484 wakaba 1.1
3485     $Element->{$HTML_NS}->{'event-source'} = {
3486     attrs_checker => $GetHTMLAttrsChecker->({
3487     src => $HTMLURIAttrChecker,
3488     }),
3489     checker => $HTMLEmptyChecker,
3490     };
3491    
3492     $Element->{$HTML_NS}->{details} = {
3493     attrs_checker => $GetHTMLAttrsChecker->({
3494     open => $GetHTMLBooleanAttrChecker->('open'),
3495     }),
3496     checker => sub {
3497     my ($self, $todo) = @_;
3498    
3499 wakaba 1.29 ## TODO:
3500 wakaba 1.1 my ($sib, $ch)
3501     = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
3502     ->($self, $todo);
3503     return ($sib, $ch);
3504     },
3505     };
3506    
3507     $Element->{$HTML_NS}->{datagrid} = {
3508     attrs_checker => $GetHTMLAttrsChecker->({
3509     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3510     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
3511     }),
3512     checker => sub {
3513     my ($self, $todo) = @_;
3514     my $el = $todo->{node};
3515     my $new_todos = [];
3516     my @nodes = (@{$el->child_nodes});
3517    
3518 wakaba 1.25 my $old_values = {significant =>
3519     $todo->{flag}->{has_descendant}->{significant}};
3520     $todo->{flag}->{has_descendant}->{significant} = 0;
3521    
3522 wakaba 1.1 my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
3523    
3524 wakaba 1.29 ## Prose -(text* table Prose*) | table | select | datalist | Empty
3525 wakaba 1.1 my $mode = 'any';
3526     while (@nodes) {
3527     my $node = shift @nodes;
3528     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3529    
3530     my $nt = $node->node_type;
3531     if ($nt == 1) {
3532     my $node_ns = $node->namespace_uri;
3533     $node_ns = '' unless defined $node_ns;
3534     my $node_ln = $node->manakai_local_name;
3535     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
3536 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3537     #
3538 wakaba 1.29 } elsif ($mode eq 'prose') {
3539 wakaba 1.1 $not_allowed = 1
3540 wakaba 1.29 unless $HTMLProseContent->{$node_ns}->{$node_ln};
3541 wakaba 1.1 } elsif ($mode eq 'any') {
3542     if ($node_ns eq $HTML_NS and
3543     {table => 1, select => 1, datalist => 1}->{$node_ln}) {
3544     $mode = 'none';
3545 wakaba 1.29 } elsif ($HTMLProseContent->{$node_ns}->{$node_ln}) {
3546     $mode = 'prose';
3547 wakaba 1.1 } else {
3548     $not_allowed = 1;
3549     }
3550     } else {
3551     $not_allowed = 1;
3552     }
3553     $self->{onerror}->(node => $node, type => 'element not allowed')
3554     if $not_allowed;
3555     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3556     unshift @nodes, @$sib;
3557     push @$new_todos, @$ch;
3558     } elsif ($nt == 3 or $nt == 4) {
3559     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3560 wakaba 1.29 if ($mode eq 'prose') {
3561     #
3562     } elsif ($mode eq 'any') {
3563     $mode = 'prose';
3564     } else {
3565     $self->{onerror}->(node => $node, type => 'character not allowed');
3566     }
3567 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3568 wakaba 1.1 }
3569     } elsif ($nt == 5) {
3570     unshift @nodes, @{$node->child_nodes};
3571     }
3572     }
3573    
3574     push @$new_todos, $end;
3575 wakaba 1.25
3576     push @$new_todos, {
3577     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
3578     old_values => $old_values,
3579 wakaba 1.26 errors => $HTMLSignificantContentErrors,
3580 wakaba 1.25 };
3581    
3582 wakaba 1.1 return ($new_todos);
3583 wakaba 1.29
3584     ## ISSUE: "xxx<table/>" is disallowed; "<select/>aaa" and "<datalist/>aa"
3585     ## are not disallowed (assuming that form control contents are also
3586     ## prose content).
3587 wakaba 1.1 },
3588     };
3589    
3590     $Element->{$HTML_NS}->{command} = {
3591     attrs_checker => $GetHTMLAttrsChecker->({
3592     checked => $GetHTMLBooleanAttrChecker->('checked'),
3593     default => $GetHTMLBooleanAttrChecker->('default'),
3594     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3595     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
3596     icon => $HTMLURIAttrChecker,
3597     label => sub { }, ## NOTE: No conformance creteria
3598     radiogroup => sub { }, ## NOTE: No conformance creteria
3599     ## NOTE: |title| has special semantics, but no syntactical difference
3600     type => sub {
3601     my ($self, $attr) = @_;
3602     my $value = $attr->value;
3603     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
3604     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
3605     }
3606     },
3607     }),
3608     checker => $HTMLEmptyChecker,
3609     };
3610    
3611     $Element->{$HTML_NS}->{menu} = {
3612     attrs_checker => $GetHTMLAttrsChecker->({
3613     autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
3614     id => sub {
3615     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
3616     my ($self, $attr) = @_;
3617     my $value = $attr->value;
3618     if (length $value > 0) {
3619     if ($self->{id}->{$value}) {
3620     $self->{onerror}->(node => $attr, type => 'duplicate ID');
3621     push @{$self->{id}->{$value}}, $attr;
3622     } else {
3623     $self->{id}->{$value} = [$attr];
3624     }
3625     } else {
3626     ## NOTE: MUST contain at least one character
3627     $self->{onerror}->(node => $attr, type => 'empty attribute value');
3628     }
3629     if ($value =~ /[\x09-\x0D\x20]/) {
3630     $self->{onerror}->(node => $attr, type => 'space in ID');
3631     }
3632     $self->{menu}->{$value} ||= $attr;
3633     ## ISSUE: <menu id=""><p contextmenu=""> match?
3634     },
3635     label => sub { }, ## NOTE: No conformance creteria
3636     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
3637     }),
3638     checker => sub {
3639     my ($self, $todo) = @_;
3640     my $el = $todo->{node};
3641     my $new_todos = [];
3642     my @nodes = (@{$el->child_nodes});
3643 wakaba 1.25
3644     my $old_values = {significant =>
3645     $todo->{flag}->{has_descendant}->{significant}};
3646     $todo->{flag}->{has_descendant}->{significant} = 0;
3647 wakaba 1.1
3648 wakaba 1.29 my $content = 'li or phrasing';
3649 wakaba 1.1 while (@nodes) {
3650     my $node = shift @nodes;
3651     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3652    
3653     my $nt = $node->node_type;
3654     if ($nt == 1) {
3655     my $node_ns = $node->namespace_uri;
3656     $node_ns = '' unless defined $node_ns;
3657     my $node_ln = $node->manakai_local_name;
3658     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
3659 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3660     #
3661     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'li') {
3662 wakaba 1.29 if ($content eq 'phrasing') {
3663 wakaba 1.1 $not_allowed = 1;
3664 wakaba 1.29 } elsif ($content eq 'li or phrasing') {
3665 wakaba 1.1 $content = 'li';
3666     }
3667     } else {
3668 wakaba 1.29 if ($HTMLPhrasingContent->{$node_ns}->{$node_ln}) {
3669     $content = 'phrasing';
3670 wakaba 1.1 } else {
3671     $not_allowed = 1;
3672     }
3673     }
3674     $self->{onerror}->(node => $node, type => 'element not allowed')
3675     if $not_allowed;
3676     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3677     unshift @nodes, @$sib;
3678     push @$new_todos, @$ch;
3679     } elsif ($nt == 3 or $nt == 4) {
3680     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3681     if ($content eq 'li') {
3682     $self->{onerror}->(node => $node, type => 'character not allowed');
3683 wakaba 1.29 } elsif ($content eq 'li or phrasing') {
3684     $content = 'phrasing';
3685 wakaba 1.1 }
3686 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3687 wakaba 1.1 }
3688     } elsif ($nt == 5) {
3689     unshift @nodes, @{$node->child_nodes};
3690     }
3691     }
3692    
3693     for (@$new_todos) {
3694 wakaba 1.29 $_->{flag}->{in_menu} = 1;
3695 wakaba 1.1 }
3696 wakaba 1.25
3697     push @$new_todos, {
3698     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
3699     old_values => $old_values,
3700 wakaba 1.26 errors => $HTMLSignificantContentErrors,
3701 wakaba 1.25 };
3702    
3703 wakaba 1.1 return ($new_todos);
3704     },
3705 wakaba 1.8 };
3706    
3707     $Element->{$HTML_NS}->{datatemplate} = {
3708     attrs_checker => $GetHTMLAttrsChecker->({}),
3709     checker => sub {
3710     my ($self, $todo) = @_;
3711     my $el = $todo->{node};
3712     my $new_todos = [];
3713     my @nodes = (@{$el->child_nodes});
3714    
3715     while (@nodes) {
3716     my $node = shift @nodes;
3717     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3718    
3719     my $nt = $node->node_type;
3720     if ($nt == 1) {
3721     my $node_ns = $node->namespace_uri;
3722     $node_ns = '' unless defined $node_ns;
3723     my $node_ln = $node->manakai_local_name;
3724     ## NOTE: |minuses| list is not checked since redundant
3725     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3726     #
3727     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'rule')) {
3728     $self->{onerror}->(node => $node,
3729     type => 'element not allowed:datatemplate');
3730     }
3731     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3732     unshift @nodes, @$sib;
3733     push @$new_todos, @$ch;
3734     } elsif ($nt == 3 or $nt == 4) {
3735     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3736     $self->{onerror}->(node => $node, type => 'character not allowed');
3737 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3738 wakaba 1.8 }
3739     } elsif ($nt == 5) {
3740     unshift @nodes, @{$node->child_nodes};
3741     }
3742     }
3743     return ($new_todos);
3744     },
3745     is_xml_root => 1,
3746     };
3747    
3748     $Element->{$HTML_NS}->{rule} = {
3749     attrs_checker => $GetHTMLAttrsChecker->({
3750 wakaba 1.23 condition => $HTMLSelectorsAttrChecker,
3751 wakaba 1.18 mode => $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker,
3752 wakaba 1.8 }),
3753     checker => sub {
3754     my ($self, $todo) = @_;
3755    
3756     my $end = $self->_add_pluses ({$HTML_NS => {nest => 1}});
3757 wakaba 1.25 my ($sib, $ch) = $HTMLAnyChecker->($self, $todo);
3758 wakaba 1.8 push @$sib, $end;
3759     return ($sib, $ch);
3760     },
3761     ## NOTE: "MAY be anything that, when the parent |datatemplate|
3762     ## is applied to some conforming data, results in a conforming DOM tree.":
3763     ## We don't check against this.
3764     };
3765    
3766     $Element->{$HTML_NS}->{nest} = {
3767     attrs_checker => $GetHTMLAttrsChecker->({
3768 wakaba 1.23 filter => $HTMLSelectorsAttrChecker,
3769     mode => sub {
3770     my ($self, $attr) = @_;
3771     my $value = $attr->value;
3772     if ($value !~ /\A[^\x09-\x0D\x20]+\z/) {
3773     $self->{onerror}->(node => $attr, type => 'mode:syntax error');
3774     }
3775     },
3776 wakaba 1.8 }),
3777     checker => $HTMLEmptyChecker,
3778 wakaba 1.1 };
3779    
3780     $Element->{$HTML_NS}->{legend} = {
3781     attrs_checker => $GetHTMLAttrsChecker->({}),
3782 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
3783 wakaba 1.1 };
3784    
3785     $Element->{$HTML_NS}->{div} = {
3786     attrs_checker => $GetHTMLAttrsChecker->({}),
3787 wakaba 1.31 checker => $HTMLProseContentChecker,
3788 wakaba 1.1 };
3789    
3790     $Element->{$HTML_NS}->{font} = {
3791     attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
3792     checker => $HTMLTransparentChecker,
3793     };
3794    
3795     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
3796    
3797     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24