/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.32 - (hide annotations) (download)
Sun Feb 17 12:18:06 2008 UTC (17 years, 5 months ago) by wakaba
Branch: MAIN
Changes since 1.31: +65 -47 lines
++ whatpm/t/ChangeLog	17 Feb 2008 12:18:01 -0000
	* content-model-1.dat, content-model-2.dat: Updated.

2008-02-17  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	17 Feb 2008 12:12:56 -0000
	* ContentChecker.pm ({unsupported_level}): New value.

	* HTML.pm.src: Save whether |meta| |content| attribute
	contains character references or not.

2008-02-17  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ContentChecker/ChangeLog	17 Feb 2008 12:17:33 -0000
	* HTML.pm: |<meta http-equiv=Content-Type| support (HTML5 revision
	1180).

2008-02-17  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7 wakaba 1.29 ## December 2007 HTML5 Classification
8    
9     my $HTMLMetadataContent = {
10     $HTML_NS => {
11     title => 1, base => 1, link => 1, style => 1, script => 1, noscript => 1,
12     'event-source' => 1, command => 1, datatemplate => 1,
13     ## NOTE: A |meta| with no |name| element is not allowed as
14     ## a metadata content other than |head| element.
15     meta => 1,
16     },
17     ## NOTE: RDF is mentioned in the HTML5 spec.
18     ## TODO: Other RDF elements?
19     q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {RDF => 1},
20     };
21    
22     my $HTMLProseContent = {
23     $HTML_NS => {
24     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
25     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
26     footer => 1, address => 1, p => 1, hr => 1, dialog => 1, pre => 1,
27     ol => 1, ul => 1, dl => 1, figure => 1, map => 1, table => 1,
28     details => 1, ## ISSUE: "Prose element" in spec.
29     datagrid => 1, ## ISSUE: "Prose element" in spec.
30     datatemplate => 1,
31     div => 1, ## ISSUE: No category in spec.
32     ## NOTE: |style| is only allowed if |scoped| attribute is specified.
33     ## Additionally, it must be before any other element or
34     ## non-inter-element-whitespace text node.
35     style => 1,
36    
37     br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, m => 1,
38     dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
39     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
40     b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1,
41     command => 1, font => 1,
42     a => 1,
43     datagrid => 1, ## ISSUE: "Interactive element" in the spec.
44     ## NOTE: |area| is allowed only as a descendant of |map|.
45     area => 1,
46    
47     ins => 1, del => 1,
48    
49     ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose.
50     menu => 1,
51    
52     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
53     canvas => 1,
54     },
55    
56     ## NOTE: Embedded
57     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
58     q<http://www.w3.org/2000/svg> => {svg => 1},
59     };
60    
61     my $HTMLSectioningContent = {
62     $HTML_NS => {
63     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
64     ## NOTE: |body| is only allowed in |html| element.
65     body => 1,
66     },
67     };
68    
69     my $HTMLHeadingContent = {
70     $HTML_NS => {
71     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
72     },
73     };
74    
75     my $HTMLPhrasingContent = {
76     ## NOTE: All phrasing content is also prose content.
77     $HTML_NS => {
78     br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, m => 1,
79     dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
80     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
81     b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1,
82     command => 1, font => 1,
83     a => 1,
84     datagrid => 1, ## ISSUE: "Interactive element" in the spec.
85     ## NOTE: |area| is allowed only as a descendant of |map|.
86     area => 1,
87    
88     ## NOTE: Transparent.
89     ins => 1, del => 1,
90    
91     ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose.
92     menu => 1,
93    
94     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
95     canvas => 1,
96     },
97    
98     ## NOTE: Embedded
99     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
100     q<http://www.w3.org/2000/svg> => {svg => 1},
101    
102     ## NOTE: And non-inter-element-whitespace text nodes.
103     };
104    
105     my $HTMLEmbeddedContent = {
106     ## NOTE: All embedded content is also phrasing content.
107     $HTML_NS => {
108     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
109     canvas => 1,
110     },
111     ## NOTE: MathML is mentioned in the HTML5 spec.
112     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
113     ## NOTE: SVG is mentioned in the HTML5 spec.
114     q<http://www.w3.org/2000/svg> => {svg => 1},
115     ## NOTE: Foreign elements with content (but no metadata) are
116     ## embedded content.
117     };
118    
119     my $HTMLInteractiveContent = {
120     $HTML_NS => {
121     a => 1,
122     },
123     };
124    
125     ## Old HTML5 categories
126    
127 wakaba 1.1 my $HTMLMetadataElements = {
128     $HTML_NS => {
129     qw/link 1 meta 1 style 1 script 1 event-source 1 command 1 base 1 title 1
130 wakaba 1.8 noscript 1 datatemplate 1
131 wakaba 1.1 /,
132     },
133     };
134    
135     my $HTMLSectioningElements = {
136     $HTML_NS => {qw/body 1 section 1 nav 1 article 1 blockquote 1 aside 1/},
137     };
138    
139     my $HTMLBlockLevelElements = {
140     $HTML_NS => {
141     qw/
142     section 1 nav 1 article 1 blockquote 1 aside 1
143     h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1
144     address 1 p 1 hr 1 dialog 1 pre 1 ol 1 ul 1 dl 1
145     ins 1 del 1 figure 1 map 1 table 1 script 1 noscript 1
146     event-source 1 details 1 datagrid 1 menu 1 div 1 font 1
147 wakaba 1.8 datatemplate 1
148 wakaba 1.1 /,
149     },
150     };
151    
152     my $HTMLStrictlyInlineLevelElements = {
153     $HTML_NS => {
154     qw/
155     br 1 a 1 q 1 cite 1 em 1 strong 1 small 1 m 1 dfn 1 abbr 1
156     time 1 meter 1 progress 1 code 1 var 1 samp 1 kbd 1
157     sub 1 sup 1 span 1 i 1 b 1 bdo 1 ins 1 del 1 img 1
158     iframe 1 embed 1 object 1 video 1 audio 1 canvas 1 area 1
159     script 1 noscript 1 event-source 1 command 1 font 1
160     /,
161     },
162     };
163    
164     my $HTMLStructuredInlineLevelElements = {
165     $HTML_NS => {qw/blockquote 1 pre 1 ol 1 ul 1 dl 1 table 1 menu 1/},
166     };
167    
168     my $HTMLInteractiveElements = {
169     $HTML_NS => {a => 1, details => 1, datagrid => 1},
170     };
171     ## NOTE: |html:a| and |html:datagrid| are not allowed as a descendant
172     ## of interactive elements
173    
174     # my $HTMLTransparentElements : in |Whatpm/ContentChecker.pm|.
175    
176     #my $HTMLSemiTransparentElements = {
177     # $HTML_NS => {qw/video 1 audio 1/},
178     #};
179    
180     my $HTMLEmbededElements = {
181     $HTML_NS => {qw/img 1 iframe 1 embed 1 object 1 video 1 audio 1 canvas 1/},
182     };
183 wakaba 1.25 ## NOTE: When an element is added to this list, make sure that
184     ## the element's checker set |has_descendant| flag for |significant| content
185     ## as true.
186    
187 wakaba 1.26 my $HTMLSignificantContentErrors = {
188     significant => sub {
189     my ($self, $todo) = @_;
190     $self->{onerror}->(node => $todo->{node},
191     level => $self->{should_level},
192     type => 'no significant content');
193     },
194     }; # $HTMLSignificantContentErrors
195    
196 wakaba 1.25 our $AnyChecker;
197     my $HTMLAnyChecker = sub {
198     my ($self, $todo) = @_;
199    
200     my $old_values = {significant =>
201     $todo->{flag}->{has_descendant}->{significant}};
202     $todo->{flag}->{has_descendant}->{significant} = 0;
203    
204     my ($new_todos) = $AnyChecker->($self, $todo);
205    
206     push @$new_todos, {
207     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
208     old_values => $old_values,
209 wakaba 1.26 errors => $HTMLSignificantContentErrors,
210 wakaba 1.25 };
211    
212     return ($new_todos);
213     }; # $HTMLAnyChecker
214 wakaba 1.1
215     ## Empty
216     my $HTMLEmptyChecker = sub {
217     my ($self, $todo) = @_;
218     my $el = $todo->{node};
219     my $new_todos = [];
220     my @nodes = (@{$el->child_nodes});
221    
222     while (@nodes) {
223     my $node = shift @nodes;
224     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
225    
226     my $nt = $node->node_type;
227     if ($nt == 1) {
228 wakaba 1.8 my $node_ns = $node->namespace_uri;
229     $node_ns = '' unless defined $node_ns;
230     my $node_ln = $node->manakai_local_name;
231 wakaba 1.30 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
232     $self->{onerror}->(node => $node, type => 'element not allowed:minus',
233     level => $self->{must_level});
234     } elsif ($self->{pluses}->{$node_ns}->{$node_ln}) {
235 wakaba 1.8 #
236     } else {
237 wakaba 1.30 $self->{onerror}->(node => $node, type => 'element not allowed:empty',
238     level => $self->{must_level});
239 wakaba 1.8 }
240 wakaba 1.30
241 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
242     unshift @nodes, @$sib;
243     push @$new_todos, @$ch;
244     } elsif ($nt == 3 or $nt == 4) {
245     if ($node->data =~ /[^\x09-\x0D\x20]/) {
246 wakaba 1.30 $self->{onerror}->(node => $node,
247     type => 'character not allowed:empty',
248     level => $self->{must_level});
249 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
250 wakaba 1.1 }
251     } elsif ($nt == 5) {
252     unshift @nodes, @{$node->child_nodes};
253     }
254     }
255     return ($new_todos);
256     };
257    
258     ## Text
259     my $HTMLTextChecker = sub {
260     my ($self, $todo) = @_;
261     my $el = $todo->{node};
262     my $new_todos = [];
263     my @nodes = (@{$el->child_nodes});
264    
265 wakaba 1.25 my $old_values = {significant =>
266     $todo->{flag}->{has_descendant}->{significant}};
267     $todo->{flag}->{has_descendant}->{significant} = 0;
268    
269 wakaba 1.1 while (@nodes) {
270     my $node = shift @nodes;
271     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
272    
273     my $nt = $node->node_type;
274     if ($nt == 1) {
275 wakaba 1.8 my $node_ns = $node->namespace_uri;
276     $node_ns = '' unless defined $node_ns;
277     my $node_ln = $node->manakai_local_name;
278     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
279     #
280     } else {
281     ## NOTE: |minuses| list is not checked since redundant
282     $self->{onerror}->(node => $node, type => 'element not allowed');
283     }
284 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
285     unshift @nodes, @$sib;
286     push @$new_todos, @$ch;
287 wakaba 1.25 } elsif ($nt == 3 or $nt == 4) {
288     if ($node->data =~ /[^\x09-\x0D\x20]/) {
289     $todo->{flag}->{has_descendant}->{significant} = 1;
290     }
291 wakaba 1.1 } elsif ($nt == 5) {
292     unshift @nodes, @{$node->child_nodes};
293     }
294     }
295 wakaba 1.25
296     push @$new_todos, {
297     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
298     old_values => $old_values,
299 wakaba 1.26 errors => $HTMLSignificantContentErrors,
300 wakaba 1.25 };
301    
302 wakaba 1.1 return ($new_todos);
303     };
304    
305     ## Zero or more |html:style| elements,
306     ## followed by zero or more block-level elements
307     my $HTMLStylableBlockChecker = sub {
308     my ($self, $todo) = @_;
309     my $el = $todo->{node};
310     my $new_todos = [];
311     my @nodes = (@{$el->child_nodes});
312 wakaba 1.25
313     my $old_values = {significant =>
314     $todo->{flag}->{has_descendant}->{significant}};
315     $todo->{flag}->{has_descendant}->{significant} = 0;
316 wakaba 1.1
317     my $has_non_style;
318     while (@nodes) {
319     my $node = shift @nodes;
320     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
321    
322     my $nt = $node->node_type;
323     if ($nt == 1) {
324     my $node_ns = $node->namespace_uri;
325     $node_ns = '' unless defined $node_ns;
326     my $node_ln = $node->manakai_local_name;
327     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
328     if ($node_ns eq $HTML_NS and $node_ln eq 'style') {
329     $not_allowed = 1 if $has_non_style or
330     not $node->has_attribute_ns (undef, 'scoped');
331     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
332     $has_non_style = 1;
333 wakaba 1.8 } elsif ($self->{pluses}->{$node_ns}->{$node_ln}) {
334     #
335 wakaba 1.1 } else {
336     $has_non_style = 1;
337     $not_allowed = 1;
338     }
339     $self->{onerror}->(node => $node, type => 'element not allowed')
340     if $not_allowed;
341     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
342     unshift @nodes, @$sib;
343     push @$new_todos, @$ch;
344     } elsif ($nt == 3 or $nt == 4) {
345     if ($node->data =~ /[^\x09-\x0D\x20]/) {
346     $self->{onerror}->(node => $node, type => 'character not allowed');
347 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
348 wakaba 1.1 }
349     } elsif ($nt == 5) {
350     unshift @nodes, @{$node->child_nodes};
351     }
352     }
353 wakaba 1.25
354     push @$new_todos, {
355     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
356     old_values => $old_values,
357 wakaba 1.26 errors => $HTMLSignificantContentErrors,
358 wakaba 1.25 };
359    
360 wakaba 1.1 return ($new_todos);
361     }; # $HTMLStylableBlockChecker
362    
363 wakaba 1.29 my $HTMLProseContentChecker = sub {
364     my ($self, $todo) = @_;
365     my $el = $todo->{node};
366     my $new_todos = [];
367     my @nodes = (@{$el->child_nodes});
368    
369     my $old_values = {significant =>
370     $todo->{flag}->{has_descendant}->{significant}};
371     $todo->{flag}->{has_descendant}->{significant} = 0;
372    
373     my $has_non_style;
374     while (@nodes) {
375     my $node = shift @nodes;
376     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
377    
378     my $nt = $node->node_type;
379     if ($nt == 1) {
380     my $node_ns = $node->namespace_uri;
381     $node_ns = '' unless defined $node_ns;
382     my $node_ln = $node->manakai_local_name;
383 wakaba 1.30 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
384     $self->{onerror}->(node => $node,
385     type => 'element not allowed:minus',
386     level => $self->{must_level});
387     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'style') {
388     if ($has_non_style or
389     not $node->has_attribute_ns (undef, 'scoped')) {
390     $self->{onerror}->(node => $node,
391     type => 'element not allowed:prose style',
392     level => $self->{must_level});
393     }
394 wakaba 1.29 } elsif ($HTMLProseContent->{$node_ns}->{$node_ln}) {
395     $has_non_style = 1;
396     if ($HTMLEmbeddedContent->{$node_ns}->{$node_ln}) {
397     $todo->{flag}->{has_descendant}->{significant} = 1;
398     }
399     } elsif ($self->{pluses}->{$node_ns}->{$node_ln}) {
400     #
401     } else {
402     $has_non_style = 1;
403 wakaba 1.30 $self->{onerror}->(node => $node,
404     type => 'element not allowed:prose',
405     level => $self->{must_level})
406 wakaba 1.29 }
407 wakaba 1.30
408 wakaba 1.29 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
409     unshift @nodes, @$sib;
410     push @$new_todos, @$ch;
411     } elsif ($nt == 3 or $nt == 4) {
412     if ($node->data =~ /[^\x09-\x0D\x20]/) {
413     $has_non_style = 1;
414     $todo->{flag}->{has_descendant}->{significant} = 1;
415     }
416     } elsif ($nt == 5) {
417     unshift @nodes, @{$node->child_nodes};
418     }
419     }
420    
421     push @$new_todos, {
422     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
423     old_values => $old_values,
424     errors => $HTMLSignificantContentErrors,
425     };
426    
427     return ($new_todos);
428     }; # $HTMLProseContentChecker
429    
430 wakaba 1.1 ## Zero or more block-level elements
431     my $HTMLBlockChecker = sub {
432     my ($self, $todo) = @_;
433     my $el = $todo->{node};
434     my $new_todos = [];
435     my @nodes = (@{$el->child_nodes});
436    
437 wakaba 1.25 my $old_values = {significant =>
438     $todo->{flag}->{has_descendant}->{significant}};
439     $todo->{flag}->{has_descendant}->{significant} = 0;
440    
441 wakaba 1.1 while (@nodes) {
442     my $node = shift @nodes;
443     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
444    
445     my $nt = $node->node_type;
446     if ($nt == 1) {
447     my $node_ns = $node->namespace_uri;
448     $node_ns = '' unless defined $node_ns;
449     my $node_ln = $node->manakai_local_name;
450     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
451     $not_allowed = 1
452 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
453     $self->{pluses}->{$node_ns}->{$node_ln};
454 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
455     if $not_allowed;
456     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
457     unshift @nodes, @$sib;
458     push @$new_todos, @$ch;
459     } elsif ($nt == 3 or $nt == 4) {
460     if ($node->data =~ /[^\x09-\x0D\x20]/) {
461     $self->{onerror}->(node => $node, type => 'character not allowed');
462 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
463 wakaba 1.1 }
464     } elsif ($nt == 5) {
465     unshift @nodes, @{$node->child_nodes};
466     }
467     }
468 wakaba 1.25
469     push @$new_todos, {
470     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
471     old_values => $old_values,
472 wakaba 1.26 errors => $HTMLSignificantContentErrors,
473 wakaba 1.25 };
474    
475 wakaba 1.1 return ($new_todos);
476     }; # $HTMLBlockChecker
477    
478     ## Inline-level content
479     my $HTMLInlineChecker = sub {
480     my ($self, $todo) = @_;
481     my $el = $todo->{node};
482     my $new_todos = [];
483     my @nodes = (@{$el->child_nodes});
484    
485 wakaba 1.25 my $old_values = {significant =>
486     $todo->{flag}->{has_descendant}->{significant}};
487     $todo->{flag}->{has_descendant}->{significant} = 0;
488    
489 wakaba 1.1 while (@nodes) {
490     my $node = shift @nodes;
491     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
492    
493     my $nt = $node->node_type;
494     if ($nt == 1) {
495     my $node_ns = $node->namespace_uri;
496     $node_ns = '' unless defined $node_ns;
497     my $node_ln = $node->manakai_local_name;
498     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
499     $not_allowed = 1
500     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
501 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
502     $self->{pluses}->{$node_ns}->{$node_ln};
503 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
504     if $not_allowed;
505     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
506     unshift @nodes, @$sib;
507     push @$new_todos, @$ch;
508 wakaba 1.25 } elsif ($nt == 3 or $nt == 4) {
509     if ($node->data =~ /[^\x09-\x0D\x20]/) {
510     $todo->{flag}->{has_descendant}->{significant} = 1;
511     }
512 wakaba 1.1 } elsif ($nt == 5) {
513     unshift @nodes, @{$node->child_nodes};
514     }
515     }
516    
517     for (@$new_todos) {
518     $_->{inline} = 1;
519     }
520 wakaba 1.25
521     push @$new_todos, {
522     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
523     old_values => $old_values,
524 wakaba 1.26 errors => $HTMLSignificantContentErrors,
525 wakaba 1.25 };
526    
527 wakaba 1.1 return ($new_todos);
528     }; # $HTMLInlineChecker
529    
530     ## Strictly inline-level content
531     my $HTMLStrictlyInlineChecker = sub {
532     my ($self, $todo) = @_;
533     my $el = $todo->{node};
534     my $new_todos = [];
535     my @nodes = (@{$el->child_nodes});
536 wakaba 1.25
537     my $old_values = {significant =>
538     $todo->{flag}->{has_descendant}->{significant}};
539     $todo->{flag}->{has_descendant}->{significant} = 0;
540 wakaba 1.1
541     while (@nodes) {
542     my $node = shift @nodes;
543     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
544    
545     my $nt = $node->node_type;
546     if ($nt == 1) {
547     my $node_ns = $node->namespace_uri;
548     $node_ns = '' unless defined $node_ns;
549     my $node_ln = $node->manakai_local_name;
550     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
551     $not_allowed = 1
552 wakaba 1.8 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
553     $self->{pluses}->{$node_ns}->{$node_ln};
554 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
555     if $not_allowed;
556     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
557     unshift @nodes, @$sib;
558     push @$new_todos, @$ch;
559 wakaba 1.25 } elsif ($nt == 3 or $nt == 4) {
560     if ($node->data =~ /[^\x09-\x0D\x20]/) {
561     $todo->{flag}->{has_descendant}->{significant} = 1;
562     }
563 wakaba 1.1 } elsif ($nt == 5) {
564     unshift @nodes, @{$node->child_nodes};
565     }
566     }
567    
568     for (@$new_todos) {
569     $_->{inline} = 1;
570     $_->{strictly_inline} = 1;
571     }
572 wakaba 1.25
573     push @$new_todos, {
574     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
575     old_values => $old_values,
576 wakaba 1.26 errors => $HTMLSignificantContentErrors,
577 wakaba 1.25 };
578    
579 wakaba 1.1 return ($new_todos);
580     }; # $HTMLStrictlyInlineChecker
581    
582 wakaba 1.8 ## Inline-level or strictly inline-level content
583 wakaba 1.1 my $HTMLInlineOrStrictlyInlineChecker = sub {
584     my ($self, $todo) = @_;
585     my $el = $todo->{node};
586     my $new_todos = [];
587     my @nodes = (@{$el->child_nodes});
588 wakaba 1.25
589     my $old_values = {significant =>
590     $todo->{flag}->{has_descendant}->{significant}};
591     $todo->{flag}->{has_descendant}->{significant} = 0;
592 wakaba 1.1
593     while (@nodes) {
594     my $node = shift @nodes;
595     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
596    
597     my $nt = $node->node_type;
598     if ($nt == 1) {
599     my $node_ns = $node->namespace_uri;
600     $node_ns = '' unless defined $node_ns;
601     my $node_ln = $node->manakai_local_name;
602     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
603     if ($todo->{strictly_inline}) {
604     $not_allowed = 1
605 wakaba 1.8 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
606     $self->{pluses}->{$node_ns}->{$node_ln};
607 wakaba 1.1 } else {
608     $not_allowed = 1
609     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
610 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
611     $self->{pluses}->{$node_ns}->{$node_ln};
612 wakaba 1.1 }
613     $self->{onerror}->(node => $node, type => 'element not allowed')
614     if $not_allowed;
615     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
616     unshift @nodes, @$sib;
617     push @$new_todos, @$ch;
618 wakaba 1.25 } elsif ($nt == 3 or $nt == 4) {
619     if ($node->data =~ /[^\x09-\x0D\x20]/) {
620     $todo->{flag}->{has_descendant}->{significant} = 1;
621     }
622 wakaba 1.1 } elsif ($nt == 5) {
623     unshift @nodes, @{$node->child_nodes};
624     }
625     }
626    
627     for (@$new_todos) {
628     $_->{inline} = 1;
629     $_->{strictly_inline} = 1;
630     }
631 wakaba 1.25
632     push @$new_todos, {
633     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
634     old_values => $old_values,
635 wakaba 1.26 errors => $HTMLSignificantContentErrors,
636 wakaba 1.25 };
637    
638 wakaba 1.1 return ($new_todos);
639     }; # $HTMLInlineOrStrictlyInlineChecker
640    
641 wakaba 1.29 my $HTMLPhrasingContentChecker = sub {
642     my ($self, $todo) = @_;
643     my $el = $todo->{node};
644     my $new_todos = [];
645     my @nodes = (@{$el->child_nodes});
646    
647     my $old_values = {significant =>
648     $todo->{flag}->{has_descendant}->{significant}};
649     $todo->{flag}->{has_descendant}->{significant} = 0;
650    
651     while (@nodes) {
652     my $node = shift @nodes;
653     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
654    
655     my $nt = $node->node_type;
656     if ($nt == 1) {
657     my $node_ns = $node->namespace_uri;
658     $node_ns = '' unless defined $node_ns;
659     my $node_ln = $node->manakai_local_name;
660 wakaba 1.30 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
661     $self->{onerror}->(node => $node,
662     type => 'element not allowed:minus',
663     level => $self->{must_level});
664     } elsif ($HTMLPhrasingContent->{$node_ns}->{$node_ln} or
665     $self->{pluses}->{$node_ns}->{$node_ln}) {
666     #
667     } else {
668     $self->{onerror}->(node => $node,
669     type => 'element not allowed:phrasing',
670     level => $self->{must_level});
671     }
672    
673 wakaba 1.29 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
674     unshift @nodes, @$sib;
675     push @$new_todos, @$ch;
676     } elsif ($nt == 3 or $nt == 4) {
677     if ($node->data =~ /[^\x09-\x0D\x20]/) {
678     $todo->{flag}->{has_descendant}->{significant} = 1;
679     }
680     } elsif ($nt == 5) {
681     unshift @nodes, @{$node->child_nodes};
682     }
683     }
684    
685     push @$new_todos, {
686     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
687     old_values => $old_values,
688     errors => $HTMLSignificantContentErrors,
689     };
690    
691     return ($new_todos);
692     }; # $HTMLPhrasingContentChecker
693    
694 wakaba 1.8 ## Block-level content or inline-level content (i.e. bimorphic content model)
695 wakaba 1.1 my $HTMLBlockOrInlineChecker = sub {
696     my ($self, $todo) = @_;
697     my $el = $todo->{node};
698     my $new_todos = [];
699     my @nodes = (@{$el->child_nodes});
700 wakaba 1.25
701     my $old_values = {significant =>
702     $todo->{flag}->{has_descendant}->{significant}};
703     $todo->{flag}->{has_descendant}->{significant} = 0;
704 wakaba 1.1
705     my $content = 'block-or-inline'; # or 'block' or 'inline'
706     my @block_not_inline;
707     while (@nodes) {
708     my $node = shift @nodes;
709     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
710    
711 wakaba 1.8 ## ISSUE: It is unclear whether "<rule><div><p/><nest/></div></rule>"
712     ## is conforming or not.
713    
714 wakaba 1.1 my $nt = $node->node_type;
715     if ($nt == 1) {
716     my $node_ns = $node->namespace_uri;
717     $node_ns = '' unless defined $node_ns;
718     my $node_ln = $node->manakai_local_name;
719     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
720     if ($content eq 'block') {
721     $not_allowed = 1
722 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
723     $self->{pluses}->{$node_ns}->{$node_ln};
724 wakaba 1.1 } elsif ($content eq 'inline') {
725     $not_allowed = 1
726     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
727 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
728     $self->{pluses}->{$node_ns}->{$node_ln};
729 wakaba 1.1 } else {
730     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
731     my $is_inline
732     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
733     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
734    
735     push @block_not_inline, $node
736     if $is_block and not $is_inline and not $not_allowed;
737 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
738 wakaba 1.1 $content = 'inline';
739     for (@block_not_inline) {
740     $self->{onerror}->(node => $_, type => 'element not allowed');
741     }
742     $not_allowed = 1 unless $is_inline;
743     }
744     }
745     $self->{onerror}->(node => $node, type => 'element not allowed')
746     if $not_allowed;
747     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
748     unshift @nodes, @$sib;
749     push @$new_todos, @$ch;
750     } elsif ($nt == 3 or $nt == 4) {
751     if ($node->data =~ /[^\x09-\x0D\x20]/) {
752     if ($content eq 'block') {
753     $self->{onerror}->(node => $node, type => 'character not allowed');
754     } else {
755     $content = 'inline';
756     for (@block_not_inline) {
757     $self->{onerror}->(node => $_, type => 'element not allowed');
758     }
759     }
760 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
761 wakaba 1.1 }
762     } elsif ($nt == 5) {
763     unshift @nodes, @{$node->child_nodes};
764     }
765     }
766    
767     if ($content eq 'inline') {
768     for (@$new_todos) {
769     $_->{inline} = 1;
770     }
771     }
772 wakaba 1.25
773     push @$new_todos, {
774     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
775     old_values => $old_values,
776 wakaba 1.26 errors => $HTMLSignificantContentErrors,
777 wakaba 1.25 };
778    
779 wakaba 1.1 return ($new_todos);
780     };
781    
782     ## Zero or more XXX element, then either block-level or inline-level
783     my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) {
784     my ($elnsuri, $ellname) = @_;
785     return sub {
786     my ($self, $todo) = @_;
787     my $el = $todo->{node};
788     my $new_todos = [];
789     my @nodes = (@{$el->child_nodes});
790 wakaba 1.25
791     my $old_values = {significant =>
792     $todo->{flag}->{has_descendant}->{significant}};
793     $todo->{flag}->{has_descendant}->{significant} = 0;
794 wakaba 1.1
795     my $has_non_style;
796     my $content = 'block-or-inline'; # or 'block' or 'inline'
797     my @block_not_inline;
798     while (@nodes) {
799     my $node = shift @nodes;
800     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
801    
802     my $nt = $node->node_type;
803     if ($nt == 1) {
804     my $node_ns = $node->namespace_uri;
805     $node_ns = '' unless defined $node_ns;
806     my $node_ln = $node->manakai_local_name;
807     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
808     if ($node_ns eq $elnsuri and $node_ln eq $ellname) {
809     $not_allowed = 1 if $has_non_style;
810     if ($ellname eq 'style' and
811     not $node->has_attribute_ns (undef, 'scoped')) {
812     $not_allowed = 1;
813     }
814     } elsif ($content eq 'block') {
815     $has_non_style = 1;
816     $not_allowed = 1
817 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
818     $self->{pluses}->{$node_ns}->{$node_ln};
819 wakaba 1.1 } elsif ($content eq 'inline') {
820     $has_non_style = 1;
821     $not_allowed = 1
822     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
823 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
824     $self->{pluses}->{$node_ns}->{$node_ln};
825 wakaba 1.1 } else {
826 wakaba 1.8 $has_non_style = 1 unless $self->{pluses}->{$node_ns}->{$node_ln};
827 wakaba 1.1 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
828     my $is_inline
829     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
830     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
831    
832     push @block_not_inline, $node
833     if $is_block and not $is_inline and not $not_allowed;
834 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
835 wakaba 1.1 $content = 'inline';
836     for (@block_not_inline) {
837     $self->{onerror}->(node => $_, type => 'element not allowed');
838     }
839     $not_allowed = 1 unless $is_inline;
840     }
841     }
842     $self->{onerror}->(node => $node, type => 'element not allowed')
843     if $not_allowed;
844     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
845     unshift @nodes, @$sib;
846     push @$new_todos, @$ch;
847     } elsif ($nt == 3 or $nt == 4) {
848     if ($node->data =~ /[^\x09-\x0D\x20]/) {
849     $has_non_style = 1;
850     if ($content eq 'block') {
851     $self->{onerror}->(node => $node, type => 'character not allowed');
852     } else {
853     $content = 'inline';
854     for (@block_not_inline) {
855     $self->{onerror}->(node => $_, type => 'element not allowed');
856     }
857     }
858 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
859 wakaba 1.1 }
860     } elsif ($nt == 5) {
861     unshift @nodes, @{$node->child_nodes};
862     }
863     }
864    
865     if ($content eq 'inline') {
866     for (@$new_todos) {
867     $_->{inline} = 1;
868     }
869     }
870 wakaba 1.25
871     push @$new_todos, {
872     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
873     old_values => $old_values,
874 wakaba 1.26 errors => $HTMLSignificantContentErrors,
875 wakaba 1.25 };
876    
877 wakaba 1.1 return ($new_todos);
878     };
879     }; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker
880    
881 wakaba 1.29 my $HTMLTransparentChecker = $HTMLProseContentChecker;
882 wakaba 1.25 ## ISSUE: Significant content rule should be applied to transparent element
883     ## with parent? Currently, applied to |video| but not to others.
884 wakaba 1.1
885     our $AttrChecker;
886    
887     my $GetHTMLEnumeratedAttrChecker = sub {
888     my $states = shift; # {value => conforming ? 1 : -1}
889     return sub {
890     my ($self, $attr) = @_;
891     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
892     if ($states->{$value} > 0) {
893     #
894     } elsif ($states->{$value}) {
895     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
896     } else {
897     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
898     }
899     };
900     }; # $GetHTMLEnumeratedAttrChecker
901    
902     my $GetHTMLBooleanAttrChecker = sub {
903     my $local_name = shift;
904     return sub {
905     my ($self, $attr) = @_;
906     my $value = $attr->value;
907     unless ($value eq $local_name or $value eq '') {
908     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
909     }
910     };
911     }; # $GetHTMLBooleanAttrChecker
912    
913 wakaba 1.8 ## Unordered set of space-separated tokens
914 wakaba 1.18 my $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
915 wakaba 1.8 my ($self, $attr) = @_;
916     my %word;
917     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
918     unless ($word{$word}) {
919     $word{$word} = 1;
920     } else {
921     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
922     }
923     }
924 wakaba 1.18 }; # $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
925 wakaba 1.8
926 wakaba 1.1 ## |rel| attribute (unordered set of space separated tokens,
927     ## whose allowed values are defined by the section on link types)
928     my $HTMLLinkTypesAttrChecker = sub {
929 wakaba 1.4 my ($a_or_area, $todo, $self, $attr) = @_;
930 wakaba 1.1 my %word;
931     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
932     unless ($word{$word}) {
933     $word{$word} = 1;
934 wakaba 1.18 } elsif ($word eq 'up') {
935     #
936 wakaba 1.1 } else {
937     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
938     }
939     }
940     ## NOTE: Case sensitive match (since HTML5 spec does not say link
941     ## types are case-insensitive and it says "The value should not
942     ## be confusingly similar to any other defined value (e.g.
943     ## differing only in case).").
944     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
945     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
946     ## values to be used conformingly.
947     require Whatpm::_LinkTypeList;
948     our $LinkType;
949     for my $word (keys %word) {
950     my $def = $LinkType->{$word};
951     if (defined $def) {
952     if ($def->{status} eq 'accepted') {
953     if (defined $def->{effect}->[$a_or_area]) {
954     #
955     } else {
956     $self->{onerror}->(node => $attr,
957     type => 'link type:bad context:'.$word);
958     }
959     } elsif ($def->{status} eq 'proposal') {
960     $self->{onerror}->(node => $attr, level => 's',
961     type => 'link type:proposed:'.$word);
962 wakaba 1.20 if (defined $def->{effect}->[$a_or_area]) {
963     #
964     } else {
965     $self->{onerror}->(node => $attr,
966     type => 'link type:bad context:'.$word);
967     }
968 wakaba 1.1 } else { # rejected or synonym
969     $self->{onerror}->(node => $attr,
970     type => 'link type:non-conforming:'.$word);
971     }
972 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
973     if ($word eq 'alternate') {
974     #
975     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
976     $todo->{has_hyperlink_link_type} = 1;
977     }
978     }
979 wakaba 1.1 if ($def->{unique}) {
980     unless ($self->{has_link_type}->{$word}) {
981     $self->{has_link_type}->{$word} = 1;
982     } else {
983     $self->{onerror}->(node => $attr,
984     type => 'link type:duplicate:'.$word);
985     }
986     }
987     } else {
988     $self->{onerror}->(node => $attr, level => 'unsupported',
989     type => 'link type:'.$word);
990     }
991     }
992 wakaba 1.4 $todo->{has_hyperlink_link_type} = 1
993     if $word{alternate} and not $word{stylesheet};
994 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
995     ## says that using both X-Pingback: header field and HTML
996     ## <link rel=pingback> is deprecated and if both appears they
997     ## SHOULD contain exactly the same value.
998     ## ISSUE: Pingback 1.0 specification defines the exact representation
999     ## of its link element, which cannot be tested by the current arch.
1000     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
1001     ## include any string that matches to the pattern for the rel=pingback link,
1002     ## which again inpossible to test.
1003     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
1004 wakaba 1.12
1005     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
1006 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
1007     ## then they SHOULD be described in different paragraphs.".
1008 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
1009 wakaba 1.20
1010     ## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status."
1011 wakaba 1.1
1012     ## URI (or IRI)
1013     my $HTMLURIAttrChecker = sub {
1014     my ($self, $attr) = @_;
1015     ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
1016     my $value = $attr->value;
1017     Whatpm::URIChecker->check_iri_reference ($value, sub {
1018     my %opt = @_;
1019     $self->{onerror}->(node => $attr, level => $opt{level},
1020     type => 'URI::'.$opt{type}.
1021     (defined $opt{position} ? ':'.$opt{position} : ''));
1022     });
1023 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
1024 wakaba 1.1 }; # $HTMLURIAttrChecker
1025    
1026     ## A space separated list of one or more URIs (or IRIs)
1027     my $HTMLSpaceURIsAttrChecker = sub {
1028     my ($self, $attr) = @_;
1029     my $i = 0;
1030     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
1031     Whatpm::URIChecker->check_iri_reference ($value, sub {
1032     my %opt = @_;
1033     $self->{onerror}->(node => $attr, level => $opt{level},
1034 wakaba 1.2 type => 'URIs:'.':'.
1035     $opt{type}.':'.$i.
1036 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
1037     });
1038     $i++;
1039     }
1040     ## ISSUE: Relative references?
1041     ## ISSUE: Leading or trailing white spaces are conformant?
1042     ## ISSUE: A sequence of white space characters are conformant?
1043     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
1044     ## NOTE: Duplication seems not an error.
1045 wakaba 1.4 $self->{has_uri_attr} = 1;
1046 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
1047    
1048     my $HTMLDatetimeAttrChecker = sub {
1049     my ($self, $attr) = @_;
1050     my $value = $attr->value;
1051     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
1052     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
1053     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
1054     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
1055     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
1056     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
1057     if $d < 1 or
1058     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
1059     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
1060     if $M == 2 and $d == 29 and
1061     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
1062     } else {
1063     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
1064     }
1065     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
1066     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
1067     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
1068     if defined $s and $s > 59;
1069     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
1070     if $zh > 23;
1071     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
1072     if $zm > 59;
1073     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
1074     } else {
1075     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
1076     }
1077     }; # $HTMLDatetimeAttrChecker
1078    
1079     my $HTMLIntegerAttrChecker = sub {
1080     my ($self, $attr) = @_;
1081     my $value = $attr->value;
1082     unless ($value =~ /\A-?[0-9]+\z/) {
1083     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
1084     }
1085     }; # $HTMLIntegerAttrChecker
1086    
1087     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
1088     my $range_check = shift;
1089     return sub {
1090     my ($self, $attr) = @_;
1091     my $value = $attr->value;
1092     if ($value =~ /\A[0-9]+\z/) {
1093     unless ($range_check->($value + 0)) {
1094     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
1095     }
1096     } else {
1097     $self->{onerror}->(node => $attr,
1098     type => 'nninteger:syntax error');
1099     }
1100     };
1101     }; # $GetHTMLNonNegativeIntegerAttrChecker
1102    
1103     my $GetHTMLFloatingPointNumberAttrChecker = sub {
1104     my $range_check = shift;
1105     return sub {
1106     my ($self, $attr) = @_;
1107     my $value = $attr->value;
1108     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
1109     unless ($range_check->($value + 0)) {
1110     $self->{onerror}->(node => $attr, type => 'float:out of range');
1111     }
1112     } else {
1113     $self->{onerror}->(node => $attr,
1114     type => 'float:syntax error');
1115     }
1116     };
1117     }; # $GetHTMLFloatingPointNumberAttrChecker
1118    
1119     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
1120     ## ISSUE: RFC 2046 does not define syntax of media types.
1121     ## ISSUE: The definition of "a valid MIME type" is unknown.
1122     ## Syntactical correctness?
1123     my $HTMLIMTAttrChecker = sub {
1124     my ($self, $attr) = @_;
1125     my $value = $attr->value;
1126     ## ISSUE: RFC 2045 Content-Type header field allows insertion
1127     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
1128     ## ISSUE: RFC 2231 extension? Maybe no.
1129     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
1130     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
1131     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
1132     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
1133     my @type = ($1, $2);
1134     my $param = $3;
1135     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
1136     if (defined $2) {
1137     push @type, $1 => $2;
1138     } else {
1139     my $n = $1;
1140     my $v = $2;
1141     $v =~ s/\\(.)/$1/gs;
1142     push @type, $n => $v;
1143     }
1144     }
1145     require Whatpm::IMTChecker;
1146     Whatpm::IMTChecker->check_imt (sub {
1147     my %opt = @_;
1148     $self->{onerror}->(node => $attr, level => $opt{level},
1149     type => 'IMT:'.$opt{type});
1150     }, @type);
1151     } else {
1152     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
1153     }
1154     }; # $HTMLIMTAttrChecker
1155    
1156     my $HTMLLanguageTagAttrChecker = sub {
1157 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
1158    
1159 wakaba 1.1 my ($self, $attr) = @_;
1160 wakaba 1.6 my $value = $attr->value;
1161     require Whatpm::LangTag;
1162     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
1163     my %opt = @_;
1164     my $type = 'LangTag:'.$opt{type};
1165     $type .= ':' . $opt{subtag} if defined $opt{subtag};
1166     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
1167     level => $opt{level});
1168     });
1169 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
1170 wakaba 1.6
1171     ## TODO: testdata
1172 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
1173    
1174     ## "A valid media query [MQ]"
1175     my $HTMLMQAttrChecker = sub {
1176     my ($self, $attr) = @_;
1177     $self->{onerror}->(node => $attr, level => 'unsupported',
1178     type => 'media query');
1179     ## ISSUE: What is "a valid media query"?
1180     }; # $HTMLMQAttrChecker
1181    
1182     my $HTMLEventHandlerAttrChecker = sub {
1183     my ($self, $attr) = @_;
1184     $self->{onerror}->(node => $attr, level => 'unsupported',
1185     type => 'event handler');
1186     ## TODO: MUST contain valid ECMAScript code matching the
1187     ## ECMAScript |FunctionBody| production. [ECMA262]
1188     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
1189     ## ISSUE: Automatic semicolon insertion does not apply?
1190     ## ISSUE: Other script languages?
1191     }; # $HTMLEventHandlerAttrChecker
1192    
1193     my $HTMLUsemapAttrChecker = sub {
1194     my ($self, $attr) = @_;
1195     ## MUST be a valid hashed ID reference to a |map| element
1196     my $value = $attr->value;
1197     if ($value =~ s/^#//) {
1198     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
1199     push @{$self->{usemap}}, [$value => $attr];
1200     } else {
1201     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
1202     }
1203     ## NOTE: Space characters in hashed ID references are conforming.
1204     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
1205     }; # $HTMLUsemapAttrChecker
1206    
1207     my $HTMLTargetAttrChecker = sub {
1208     my ($self, $attr) = @_;
1209     my $value = $attr->value;
1210     if ($value =~ /^_/) {
1211     $value = lc $value; ## ISSUE: ASCII case-insentitive?
1212     unless ({
1213     _self => 1, _parent => 1, _top => 1,
1214     }->{$value}) {
1215     $self->{onerror}->(node => $attr,
1216     type => 'reserved browsing context name');
1217     }
1218     } else {
1219 wakaba 1.29 ## NOTE: An empty string is a valid browsing context name (same as _self).
1220 wakaba 1.1 }
1221     }; # $HTMLTargetAttrChecker
1222    
1223 wakaba 1.23 my $HTMLSelectorsAttrChecker = sub {
1224     my ($self, $attr) = @_;
1225    
1226     ## ISSUE: Namespace resolution?
1227    
1228     my $value = $attr->value;
1229    
1230     require Whatpm::CSS::SelectorsParser;
1231     my $p = Whatpm::CSS::SelectorsParser->new;
1232     $p->{pseudo_class}->{$_} = 1 for qw/
1233     active checked disabled empty enabled first-child first-of-type
1234     focus hover indeterminate last-child last-of-type link only-child
1235     only-of-type root target visited
1236     lang nth-child nth-last-child nth-of-type nth-last-of-type not
1237     -manakai-contains -manakai-current
1238     /;
1239    
1240     $p->{pseudo_element}->{$_} = 1 for qw/
1241     after before first-letter first-line
1242     /;
1243    
1244     $p->{must_level} = $self->{must_level};
1245     $p->{onerror} = sub {
1246     my %opt = @_;
1247     $opt{type} = 'selectors:'.$opt{type};
1248     $self->{onerror}->(%opt, node => $attr);
1249     };
1250     $p->parse_string ($value);
1251     }; # $HTMLSelectorsAttrChecker
1252    
1253 wakaba 1.1 my $HTMLAttrChecker = {
1254     id => sub {
1255     ## NOTE: |map| has its own variant of |id=""| checker
1256     my ($self, $attr) = @_;
1257     my $value = $attr->value;
1258     if (length $value > 0) {
1259     if ($self->{id}->{$value}) {
1260     $self->{onerror}->(node => $attr, type => 'duplicate ID');
1261     push @{$self->{id}->{$value}}, $attr;
1262     } else {
1263     $self->{id}->{$value} = [$attr];
1264     }
1265     if ($value =~ /[\x09-\x0D\x20]/) {
1266     $self->{onerror}->(node => $attr, type => 'space in ID');
1267     }
1268     } else {
1269     ## NOTE: MUST contain at least one character
1270     $self->{onerror}->(node => $attr, type => 'empty attribute value');
1271     }
1272     },
1273     title => sub {}, ## NOTE: No conformance creteria
1274     lang => sub {
1275     my ($self, $attr) = @_;
1276 wakaba 1.6 my $value = $attr->value;
1277     if ($value eq '') {
1278     #
1279     } else {
1280     require Whatpm::LangTag;
1281     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
1282     my %opt = @_;
1283     my $type = 'LangTag:'.$opt{type};
1284     $type .= ':' . $opt{subtag} if defined $opt{subtag};
1285     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
1286     level => $opt{level});
1287     });
1288     }
1289 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
1290     unless ($attr->owner_document->manakai_is_html) {
1291     $self->{onerror}->(node => $attr, type => 'in XML:lang');
1292     }
1293 wakaba 1.6
1294     ## TODO: test data
1295 wakaba 1.1 },
1296     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
1297     class => sub {
1298     my ($self, $attr) = @_;
1299     my %word;
1300     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
1301     unless ($word{$word}) {
1302     $word{$word} = 1;
1303     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
1304     } else {
1305     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
1306     }
1307     }
1308     },
1309     contextmenu => sub {
1310     my ($self, $attr) = @_;
1311     my $value = $attr->value;
1312     push @{$self->{contextmenu}}, [$value => $attr];
1313     ## ISSUE: "The value must be the ID of a menu element in the DOM."
1314     ## What is "in the DOM"? A menu Element node that is not part
1315     ## of the Document tree is in the DOM? A menu Element node that
1316     ## belong to another Document tree is in the DOM?
1317     },
1318     irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
1319 wakaba 1.8 tabindex => $HTMLIntegerAttrChecker
1320     ## TODO: ref, template, registrationmark
1321 wakaba 1.1 };
1322    
1323     for (qw/
1324     onabort onbeforeunload onblur onchange onclick oncontextmenu
1325     ondblclick ondrag ondragend ondragenter ondragleave ondragover
1326     ondragstart ondrop onerror onfocus onkeydown onkeypress
1327     onkeyup onload onmessage onmousedown onmousemove onmouseout
1328     onmouseover onmouseup onmousewheel onresize onscroll onselect
1329     onsubmit onunload
1330     /) {
1331     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
1332     }
1333    
1334     my $GetHTMLAttrsChecker = sub {
1335     my $element_specific_checker = shift;
1336     return sub {
1337     my ($self, $todo) = @_;
1338     for my $attr (@{$todo->{node}->attributes}) {
1339     my $attr_ns = $attr->namespace_uri;
1340     $attr_ns = '' unless defined $attr_ns;
1341     my $attr_ln = $attr->manakai_local_name;
1342     my $checker;
1343     if ($attr_ns eq '') {
1344     $checker = $element_specific_checker->{$attr_ln}
1345     || $HTMLAttrChecker->{$attr_ln};
1346     }
1347     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1348     || $AttrChecker->{$attr_ns}->{''};
1349     if ($checker) {
1350     $checker->($self, $attr, $todo);
1351     } else {
1352     $self->{onerror}->(node => $attr, level => 'unsupported',
1353     type => 'attribute');
1354     ## ISSUE: No comformance createria for unknown attributes in the spec
1355     }
1356     }
1357     };
1358     }; # $GetHTMLAttrsChecker
1359    
1360     our $Element;
1361     our $ElementDefault;
1362    
1363     $Element->{$HTML_NS}->{''} = {
1364     attrs_checker => $GetHTMLAttrsChecker->({}),
1365     checker => $ElementDefault->{checker},
1366     };
1367    
1368     $Element->{$HTML_NS}->{html} = {
1369     is_root => 1,
1370     attrs_checker => $GetHTMLAttrsChecker->({
1371 wakaba 1.16 manifest => $HTMLURIAttrChecker,
1372 wakaba 1.1 xmlns => sub {
1373     my ($self, $attr) = @_;
1374     my $value = $attr->value;
1375     unless ($value eq $HTML_NS) {
1376     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
1377     }
1378     unless ($attr->owner_document->manakai_is_html) {
1379     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
1380     ## TODO: Test
1381     }
1382     },
1383     }),
1384     checker => sub {
1385     my ($self, $todo) = @_;
1386     my $el = $todo->{node};
1387     my $new_todos = [];
1388     my @nodes = (@{$el->child_nodes});
1389    
1390 wakaba 1.25 my $old_values = {significant =>
1391     $todo->{flag}->{has_descendant}->{significant}};
1392     $todo->{flag}->{has_descendant}->{significant} = 0;
1393    
1394 wakaba 1.1 my $phase = 'before head';
1395     while (@nodes) {
1396     my $node = shift @nodes;
1397     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1398    
1399     my $nt = $node->node_type;
1400     if ($nt == 1) {
1401     my $node_ns = $node->namespace_uri;
1402     $node_ns = '' unless defined $node_ns;
1403     my $node_ln = $node->manakai_local_name;
1404     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
1405 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1406     #
1407     } elsif ($phase eq 'before head') {
1408 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'head') {
1409     $phase = 'after head';
1410     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'body') {
1411     $self->{onerror}->(node => $node, type => 'ps element missing:head');
1412     $phase = 'after body';
1413     } else {
1414     $not_allowed = 1;
1415     # before head
1416     }
1417     } elsif ($phase eq 'after head') {
1418     if ($node_ns eq $HTML_NS and $node_ln eq 'body') {
1419     $phase = 'after body';
1420     } else {
1421     $not_allowed = 1;
1422     # after head
1423     }
1424     } else { #elsif ($phase eq 'after body') {
1425     $not_allowed = 1;
1426     # after body
1427     }
1428     $self->{onerror}->(node => $node, type => 'element not allowed')
1429     if $not_allowed;
1430     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1431     unshift @nodes, @$sib;
1432     push @$new_todos, @$ch;
1433     } elsif ($nt == 3 or $nt == 4) {
1434     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1435     $self->{onerror}->(node => $node, type => 'character not allowed');
1436 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
1437 wakaba 1.1 }
1438     } elsif ($nt == 5) {
1439     unshift @nodes, @{$node->child_nodes};
1440     }
1441     }
1442    
1443     if ($phase eq 'before head') {
1444     $self->{onerror}->(node => $el, type => 'child element missing:head');
1445     $self->{onerror}->(node => $el, type => 'child element missing:body');
1446     } elsif ($phase eq 'after head') {
1447     $self->{onerror}->(node => $el, type => 'child element missing:body');
1448     }
1449    
1450 wakaba 1.25 ## NOTE: Significant content check - this is performed here since
1451     ## |html| content model allows a block-level element - |body|.
1452     push @$new_todos, {
1453     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
1454     old_values => $old_values,
1455 wakaba 1.26 errors => $HTMLSignificantContentErrors,
1456 wakaba 1.25 };
1457    
1458 wakaba 1.1 return ($new_todos);
1459     },
1460     };
1461    
1462     $Element->{$HTML_NS}->{head} = {
1463     attrs_checker => $GetHTMLAttrsChecker->({}),
1464     checker => sub {
1465     my ($self, $todo) = @_;
1466     my $el = $todo->{node};
1467     my $new_todos = [];
1468     my @nodes = (@{$el->child_nodes});
1469    
1470     my $has_title;
1471     while (@nodes) {
1472     my $node = shift @nodes;
1473     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1474    
1475     my $nt = $node->node_type;
1476     if ($nt == 1) {
1477     my $node_ns = $node->namespace_uri;
1478     $node_ns = '' unless defined $node_ns;
1479     my $node_ln = $node->manakai_local_name;
1480 wakaba 1.30 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
1481     $self->{onerror}->(node => $node,
1482     type => 'element not allowed:minus',
1483     level => $self->{must_level});
1484     } elsif ($self->{pluses}->{$node_ns}->{$node_ln}) {
1485 wakaba 1.8 #
1486     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'title') {
1487 wakaba 1.1 unless ($has_title) {
1488     $has_title = 1;
1489     } else {
1490 wakaba 1.30 $self->{onerror}->(node => $node,
1491     type => 'element not allowed:head title',
1492     level => $self->{must_level});
1493     }
1494     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'style') {
1495     if ($todo->{node}->has_attribute_ns (undef, 'scoped')) {
1496     $self->{onerror}->(node => $node,
1497     type => 'element not allowed:head style',
1498     level => $self->{must_level});
1499 wakaba 1.1 }
1500 wakaba 1.29 } elsif ($HTMLMetadataContent->{$node_ns}->{$node_ln}) {
1501     #
1502    
1503     ## NOTE: |meta| is a metadata content. However, strictly speaking,
1504     ## a |meta| element with none of |charset|, |name|,
1505     ## or |http-equiv| attribute is not allowed. It is non-conforming
1506     ## anyway.
1507 wakaba 1.1 } else {
1508 wakaba 1.30 $self->{onerror}->(node => $node,
1509     type => 'element not allowed:metadata',
1510     level => $self->{must_level});
1511 wakaba 1.1 }
1512 wakaba 1.3 local $todo->{flag}->{in_head} = 1;
1513 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1514     unshift @nodes, @$sib;
1515     push @$new_todos, @$ch;
1516     } elsif ($nt == 3 or $nt == 4) {
1517     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1518     $self->{onerror}->(node => $node, type => 'character not allowed');
1519 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
1520 wakaba 1.1 }
1521     } elsif ($nt == 5) {
1522     unshift @nodes, @{$node->child_nodes};
1523     }
1524     }
1525     unless ($has_title) {
1526     $self->{onerror}->(node => $el, type => 'child element missing:title');
1527     }
1528     return ($new_todos);
1529     },
1530     };
1531    
1532     $Element->{$HTML_NS}->{title} = {
1533     attrs_checker => $GetHTMLAttrsChecker->({}),
1534     checker => $HTMLTextChecker,
1535     };
1536    
1537     $Element->{$HTML_NS}->{base} = {
1538 wakaba 1.4 attrs_checker => sub {
1539     my ($self, $todo) = @_;
1540    
1541 wakaba 1.29 if ($self->{has_base}) {
1542     $self->{onerror}->(node => $todo->{node},
1543     type => 'element not allowed:base');
1544     } else {
1545     $self->{has_base} = 1;
1546     }
1547    
1548 wakaba 1.14 my $has_href = $todo->{node}->has_attribute_ns (undef, 'href');
1549     my $has_target = $todo->{node}->has_attribute_ns (undef, 'target');
1550    
1551     if ($self->{has_uri_attr} and $has_href) {
1552 wakaba 1.4 ## ISSUE: Are these examples conforming?
1553     ## <head profile="a b c"><base href> (except for |profile|'s
1554     ## non-conformance)
1555     ## <title xml:base="relative"/><base href/> (maybe it should be)
1556     ## <unknown xmlns="relative"/><base href/> (assuming that
1557     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1558     ## <style>@import 'relative';</style><base href>
1559     ## <script>location.href = 'relative';</script><base href>
1560 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
1561     ## an exception.
1562 wakaba 1.4 $self->{onerror}->(node => $todo->{node},
1563     type => 'basehref after URI attribute');
1564     }
1565 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
1566 wakaba 1.4 ## ISSUE: Are these examples conforming?
1567     ## <head><title xlink:href=""/><base target="name"/></head>
1568     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1569     ## (assuming that |xbl:xbl| is allowed before |base|)
1570     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1571     ## <link href=""/><base target="name"/>
1572     ## <link rel=unknown href=""><base target=name>
1573     $self->{onerror}->(node => $todo->{node},
1574     type => 'basetarget after hyperlink');
1575     }
1576    
1577 wakaba 1.14 if (not $has_href and not $has_target) {
1578     $self->{onerror}->(node => $todo->{node},
1579     type => 'attribute missing:href|target');
1580     }
1581    
1582 wakaba 1.4 return $GetHTMLAttrsChecker->({
1583     href => $HTMLURIAttrChecker,
1584     target => $HTMLTargetAttrChecker,
1585     })->($self, $todo);
1586     },
1587 wakaba 1.1 checker => $HTMLEmptyChecker,
1588     };
1589    
1590     $Element->{$HTML_NS}->{link} = {
1591     attrs_checker => sub {
1592     my ($self, $todo) = @_;
1593     $GetHTMLAttrsChecker->({
1594     href => $HTMLURIAttrChecker,
1595 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(0, $todo, @_) },
1596 wakaba 1.1 media => $HTMLMQAttrChecker,
1597     hreflang => $HTMLLanguageTagAttrChecker,
1598     type => $HTMLIMTAttrChecker,
1599     ## NOTE: Though |title| has special semantics,
1600     ## syntactically same as the |title| as global attribute.
1601     })->($self, $todo);
1602 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
1603     $self->{has_hyperlink_element} = 1 if $todo->{has_hyperlink_link_type};
1604     } else {
1605 wakaba 1.1 $self->{onerror}->(node => $todo->{node},
1606     type => 'attribute missing:href');
1607     }
1608     unless ($todo->{node}->has_attribute_ns (undef, 'rel')) {
1609     $self->{onerror}->(node => $todo->{node},
1610     type => 'attribute missing:rel');
1611     }
1612     },
1613     checker => $HTMLEmptyChecker,
1614     };
1615    
1616     $Element->{$HTML_NS}->{meta} = {
1617     attrs_checker => sub {
1618     my ($self, $todo) = @_;
1619     my $name_attr;
1620     my $http_equiv_attr;
1621     my $charset_attr;
1622     my $content_attr;
1623     for my $attr (@{$todo->{node}->attributes}) {
1624     my $attr_ns = $attr->namespace_uri;
1625     $attr_ns = '' unless defined $attr_ns;
1626     my $attr_ln = $attr->manakai_local_name;
1627     my $checker;
1628     if ($attr_ns eq '') {
1629     if ($attr_ln eq 'content') {
1630     $content_attr = $attr;
1631     $checker = 1;
1632     } elsif ($attr_ln eq 'name') {
1633     $name_attr = $attr;
1634     $checker = 1;
1635     } elsif ($attr_ln eq 'http-equiv') {
1636     $http_equiv_attr = $attr;
1637     $checker = 1;
1638     } elsif ($attr_ln eq 'charset') {
1639     $charset_attr = $attr;
1640     $checker = 1;
1641     } else {
1642     $checker = $HTMLAttrChecker->{$attr_ln}
1643     || $AttrChecker->{$attr_ns}->{$attr_ln}
1644     || $AttrChecker->{$attr_ns}->{''};
1645     }
1646     } else {
1647     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1648     || $AttrChecker->{$attr_ns}->{''};
1649     }
1650     if ($checker) {
1651     $checker->($self, $attr) if ref $checker;
1652     } else {
1653     $self->{onerror}->(node => $attr, level => 'unsupported',
1654     type => 'attribute');
1655     ## ISSUE: No comformance createria for unknown attributes in the spec
1656     }
1657     }
1658    
1659     if (defined $name_attr) {
1660     if (defined $http_equiv_attr) {
1661     $self->{onerror}->(node => $http_equiv_attr,
1662     type => 'attribute not allowed');
1663     } elsif (defined $charset_attr) {
1664     $self->{onerror}->(node => $charset_attr,
1665     type => 'attribute not allowed');
1666     }
1667     my $metadata_name = $name_attr->value;
1668     my $metadata_value;
1669     if (defined $content_attr) {
1670     $metadata_value = $content_attr->value;
1671     } else {
1672     $self->{onerror}->(node => $todo->{node},
1673     type => 'attribute missing:content');
1674     $metadata_value = '';
1675     }
1676     } elsif (defined $http_equiv_attr) {
1677     if (defined $charset_attr) {
1678     $self->{onerror}->(node => $charset_attr,
1679     type => 'attribute not allowed');
1680     }
1681     unless (defined $content_attr) {
1682     $self->{onerror}->(node => $todo->{node},
1683     type => 'attribute missing:content');
1684     }
1685     } elsif (defined $charset_attr) {
1686     if (defined $content_attr) {
1687     $self->{onerror}->(node => $content_attr,
1688     type => 'attribute not allowed');
1689     }
1690     } else {
1691     if (defined $content_attr) {
1692     $self->{onerror}->(node => $content_attr,
1693     type => 'attribute not allowed');
1694     $self->{onerror}->(node => $todo->{node},
1695     type => 'attribute missing:name|http-equiv');
1696     } else {
1697     $self->{onerror}->(node => $todo->{node},
1698     type => 'attribute missing:name|http-equiv|charset');
1699     }
1700     }
1701    
1702 wakaba 1.32 my $check_charset_decl = sub () {
1703 wakaba 1.29 my $parent = $todo->{node}->manakai_parent_element;
1704     if ($parent and $parent eq $parent->owner_document->manakai_head) {
1705     for my $el (@{$parent->child_nodes}) {
1706     next unless $el->node_type == 1; # ELEMENT_NODE
1707     unless ($el eq $todo->{node}) {
1708     ## NOTE: Not the first child element.
1709     $self->{onerror}->(node => $todo->{node},
1710 wakaba 1.32 type => 'element not allowed:meta charset',
1711     level => $self->{must_level});
1712 wakaba 1.29 }
1713     last;
1714     ## NOTE: Entity references are not supported.
1715     }
1716     } else {
1717     $self->{onerror}->(node => $todo->{node},
1718 wakaba 1.32 type => 'element not allowed:meta charset',
1719     level => $self->{must_level});
1720 wakaba 1.29 }
1721    
1722 wakaba 1.1 unless ($todo->{node}->owner_document->manakai_is_html) {
1723 wakaba 1.32 $self->{onerror}->(node => $todo->{node},
1724     type => 'in XML:charset',
1725     level => $self->{must_level});
1726 wakaba 1.1 }
1727 wakaba 1.32 }; # $check_charset_decl
1728 wakaba 1.21
1729 wakaba 1.32 my $check_charset = sub ($$) {
1730     my ($attr, $charset_value) = @_;
1731 wakaba 1.21 ## NOTE: Though the case-sensitivility of |charset| attribute value
1732     ## is not explicitly spelled in the HTML5 spec, the Character Set
1733     ## registry of IANA, which is referenced from HTML5 spec, says that
1734     ## charset name is case-insensitive.
1735     $charset_value =~ tr/A-Z/a-z/; ## NOTE: ASCII Case-insensitive.
1736    
1737     require Message::Charset::Info;
1738     my $charset = $Message::Charset::Info::IANACharset->{$charset_value};
1739     my $ic = $todo->{node}->owner_document->input_encoding;
1740     if (defined $ic) {
1741     ## TODO: Test for this case
1742     my $ic_charset = $Message::Charset::Info::IANACharset->{$ic};
1743     if ($charset ne $ic_charset) {
1744 wakaba 1.32 $self->{onerror}->(node => $attr,
1745 wakaba 1.21 type => 'mismatched charset name:'.$ic.
1746 wakaba 1.32 ':'.$charset_value, ## TODO: This should be a |value| value.
1747     level => $self->{must_level});
1748 wakaba 1.21 }
1749     } else {
1750     ## NOTE: MUST, but not checkable, since the document is not originally
1751     ## in serialized form (or the parser does not preserve the input
1752     ## encoding information).
1753 wakaba 1.32 $self->{onerror}->(node => $attr,
1754     type => 'mismatched charset name::'.$charset_value, ## TODO: |value|
1755 wakaba 1.21 level => 'unsupported');
1756     }
1757    
1758     ## ISSUE: What is "valid character encoding name"? Syntactically valid?
1759     ## Syntactically valid and registered? What about x-charset names?
1760     unless (Message::Charset::Info::is_syntactically_valid_iana_charset_name
1761     ($charset_value)) {
1762 wakaba 1.32 $self->{onerror}->(node => $attr,
1763     type => 'charset:syntax error:'.$charset_value, ## TODO
1764     level => $self->{must_level});
1765 wakaba 1.21 }
1766    
1767     if ($charset) {
1768     ## ISSUE: What is "the preferred name for that encoding" (for a charset
1769     ## with no "preferred MIME name" label)?
1770     my $charset_status = $charset->{iana_names}->{$charset_value} || 0;
1771     if (($charset_status &
1772     Message::Charset::Info::PREFERRED_CHARSET_NAME ())
1773     != Message::Charset::Info::PREFERRED_CHARSET_NAME ()) {
1774 wakaba 1.32 $self->{onerror}->(node => $attr,
1775 wakaba 1.21 type => 'charset:not preferred:'.
1776 wakaba 1.32 $charset_value, ## TODO
1777     level => $self->{must_level});
1778 wakaba 1.21 }
1779     if (($charset_status &
1780     Message::Charset::Info::REGISTERED_CHARSET_NAME ())
1781     != Message::Charset::Info::REGISTERED_CHARSET_NAME ()) {
1782     if ($charset_value =~ /^x-/) {
1783 wakaba 1.32 $self->{onerror}->(node => $attr,
1784     type => 'charset:private:'.$charset_value, ## TODO
1785 wakaba 1.21 level => $self->{good_level});
1786     } else {
1787 wakaba 1.32 $self->{onerror}->(node => $attr,
1788 wakaba 1.21 type => 'charset:not registered:'.
1789 wakaba 1.32 $charset_value, ## TODO
1790 wakaba 1.21 level => $self->{good_level});
1791     }
1792     }
1793     } elsif ($charset_value =~ /^x-/) {
1794 wakaba 1.32 $self->{onerror}->(node => $attr,
1795     type => 'charset:private:'.$charset_value, ## TODO
1796 wakaba 1.21 level => $self->{good_level});
1797     } else {
1798 wakaba 1.32 $self->{onerror}->(node => $attr,
1799     type => 'charset:not registered:'.$charset_value, ## TODO
1800 wakaba 1.21 level => $self->{good_level});
1801     }
1802    
1803 wakaba 1.32 if ($attr->get_user_data ('manakai_has_reference')) {
1804     $self->{onerror}->(node => $attr,
1805 wakaba 1.22 type => 'character reference in charset',
1806     level => $self->{must_level});
1807     }
1808 wakaba 1.32 }; # $check_charset
1809    
1810     ## TODO: metadata conformance
1811    
1812     ## TODO: pragma conformance
1813     if (defined $http_equiv_attr) { ## An enumerated attribute
1814     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1815     if ({
1816     'refresh' => 1,
1817     'default-style' => 1,
1818     }->{$keyword}) {
1819     #
1820     } elsif ($keyword eq 'content-type') {
1821     $check_charset_decl->();
1822     if ($content_attr) {
1823     my $content = $content_attr->value;
1824     if ($content =~ m!^text/html;\x20?charset=(.+)\z!s) {
1825     $check_charset->($content_attr, $1);
1826     } else {
1827     $self->{onerror}->(node => $content_attr,
1828     type => 'meta content-type syntax error',
1829     level => $self->{must_level});
1830     }
1831     }
1832     } else {
1833     $self->{onerror}->(node => $http_equiv_attr,
1834     type => 'enumerated:invalid');
1835     }
1836     }
1837    
1838     if (defined $charset_attr) {
1839     $check_charset_decl->();
1840     $check_charset->($charset_attr, $charset_attr->value);
1841 wakaba 1.1 }
1842     },
1843     checker => $HTMLEmptyChecker,
1844     };
1845    
1846     $Element->{$HTML_NS}->{style} = {
1847     attrs_checker => $GetHTMLAttrsChecker->({
1848     type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1849     media => $HTMLMQAttrChecker,
1850     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1851     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1852     ## not different
1853     }),
1854     checker => sub {
1855 wakaba 1.27 ## NOTE: |html:style| itself has no conformance creteria on content model.
1856 wakaba 1.1 my ($self, $todo) = @_;
1857     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
1858 wakaba 1.27 if (not defined $type or
1859     $type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*[Tt][Ee][Xx][Tt](?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*[Cc][Ss][Ss](?>(?>\x0D\x0A)?[\x09\x20])*\z]) {
1860     my $el = $todo->{node};
1861     my $new_todos = [];
1862     my @nodes = (@{$el->child_nodes});
1863    
1864     my $ss_text = '';
1865     while (@nodes) {
1866     my $node = shift @nodes;
1867     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1868    
1869     my $nt = $node->node_type;
1870     if ($nt == 1) {
1871 wakaba 1.29 my $node_ns = $node->namespace_uri;
1872     $node_ns = '' unless defined $node_ns;
1873     my $node_ln = $node->manakai_local_name;
1874     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1875     #
1876     } else {
1877     $self->{onerror}->(node => $node, type => 'element not allowed');
1878     }
1879 wakaba 1.27 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1880     unshift @nodes, @$sib;
1881     push @$new_todos, @$ch;
1882     } elsif ($nt == 3 or $nt == 4) {
1883     $ss_text .= $node->text_content;
1884     } elsif ($nt == 5) {
1885     unshift @nodes, @{$node->child_nodes};
1886     }
1887     }
1888    
1889 wakaba 1.28 $self->{onsubdoc}->({s => $ss_text, container_node => $el,
1890     media_type => 'text/css', is_char_string => 1});
1891 wakaba 1.27 return ($new_todos);
1892     } else {
1893     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
1894     type => 'style:'.$type); ## TODO: $type normalization
1895     return $AnyChecker->($self, $todo);
1896     }
1897 wakaba 1.1 },
1898     };
1899 wakaba 1.25 ## ISSUE: Relationship to significant content check?
1900 wakaba 1.1
1901     $Element->{$HTML_NS}->{body} = {
1902     attrs_checker => $GetHTMLAttrsChecker->({}),
1903 wakaba 1.29 checker => $HTMLProseContentChecker,
1904 wakaba 1.1 };
1905    
1906     $Element->{$HTML_NS}->{section} = {
1907     attrs_checker => $GetHTMLAttrsChecker->({}),
1908 wakaba 1.29 checker => $HTMLProseContentChecker,
1909 wakaba 1.1 };
1910    
1911     $Element->{$HTML_NS}->{nav} = {
1912     attrs_checker => $GetHTMLAttrsChecker->({}),
1913 wakaba 1.29 checker => $HTMLProseContentChecker,
1914 wakaba 1.1 };
1915    
1916     $Element->{$HTML_NS}->{article} = {
1917     attrs_checker => $GetHTMLAttrsChecker->({}),
1918 wakaba 1.29 checker => $HTMLProseContentChecker,
1919 wakaba 1.1 };
1920    
1921     $Element->{$HTML_NS}->{blockquote} = {
1922     attrs_checker => $GetHTMLAttrsChecker->({
1923     cite => $HTMLURIAttrChecker,
1924     }),
1925 wakaba 1.29 checker => $HTMLProseContentChecker,
1926 wakaba 1.1 };
1927    
1928     $Element->{$HTML_NS}->{aside} = {
1929     attrs_checker => $GetHTMLAttrsChecker->({}),
1930 wakaba 1.29 checker => $HTMLProseContentChecker,
1931 wakaba 1.1 };
1932    
1933     $Element->{$HTML_NS}->{h1} = {
1934     attrs_checker => $GetHTMLAttrsChecker->({}),
1935     checker => sub {
1936     my ($self, $todo) = @_;
1937 wakaba 1.24 $todo->{flag}->{has_descendant}->{hn} = 1;
1938 wakaba 1.29 return $HTMLPhrasingContentChecker->($self, $todo);
1939 wakaba 1.1 },
1940     };
1941    
1942     $Element->{$HTML_NS}->{h2} = {
1943     attrs_checker => $GetHTMLAttrsChecker->({}),
1944     checker => $Element->{$HTML_NS}->{h1}->{checker},
1945     };
1946    
1947     $Element->{$HTML_NS}->{h3} = {
1948     attrs_checker => $GetHTMLAttrsChecker->({}),
1949     checker => $Element->{$HTML_NS}->{h1}->{checker},
1950     };
1951    
1952     $Element->{$HTML_NS}->{h4} = {
1953     attrs_checker => $GetHTMLAttrsChecker->({}),
1954     checker => $Element->{$HTML_NS}->{h1}->{checker},
1955     };
1956    
1957     $Element->{$HTML_NS}->{h5} = {
1958     attrs_checker => $GetHTMLAttrsChecker->({}),
1959     checker => $Element->{$HTML_NS}->{h1}->{checker},
1960     };
1961    
1962     $Element->{$HTML_NS}->{h6} = {
1963     attrs_checker => $GetHTMLAttrsChecker->({}),
1964     checker => $Element->{$HTML_NS}->{h1}->{checker},
1965     };
1966    
1967 wakaba 1.29 ## TODO: Explicit sectioning is "encouraged".
1968    
1969 wakaba 1.1 $Element->{$HTML_NS}->{header} = {
1970     attrs_checker => $GetHTMLAttrsChecker->({}),
1971     checker => sub {
1972     my ($self, $todo) = @_;
1973 wakaba 1.24
1974     my $old_flags = {hn => $todo->{flag}->{has_descendant}->{hn}};
1975     $todo->{flag}->{has_descendant}->{hn} = 0;
1976 wakaba 1.1
1977     my $end = $self->_add_minuses
1978     ({$HTML_NS => {qw/header 1 footer 1/}},
1979 wakaba 1.29 $HTMLSectioningContent);
1980     my ($new_todos, $ch) = $HTMLProseContentChecker->($self, $todo);
1981 wakaba 1.24 push @$new_todos, $end,
1982     {type => 'descendant', node => $todo->{node},
1983     flag => $todo->{flag}, old_values => $old_flags,
1984     errors => {
1985     hn => sub {
1986     my ($self, $todo) = @_;
1987     $self->{onerror}->(node => $todo->{node},
1988     type => 'element missing:hn');
1989     },
1990 wakaba 1.1 }};
1991     return ($new_todos, $ch);
1992 wakaba 1.24
1993     ## ISSUE: <header><del><h1>...</h1></del></header> is conforming?
1994 wakaba 1.1 },
1995     };
1996    
1997     $Element->{$HTML_NS}->{footer} = {
1998     attrs_checker => $GetHTMLAttrsChecker->({}),
1999 wakaba 1.29 checker => sub {
2000 wakaba 1.1 my ($self, $todo) = @_;
2001 wakaba 1.25
2002 wakaba 1.29 my $old_flags = {hn => $todo->{flag}->{has_descendant}->{hn}};
2003     $todo->{flag}->{has_descendant}->{hn} = 0;
2004 wakaba 1.1
2005     my $end = $self->_add_minuses
2006 wakaba 1.29 ({$HTML_NS => {footer => 1}},
2007     $HTMLSectioningContent, $HTMLHeadingContent);
2008     my ($new_todos, $ch) = $HTMLProseContentChecker->($self, $todo);
2009 wakaba 1.1 push @$new_todos, $end;
2010    
2011 wakaba 1.29 return ($new_todos, $ch);
2012 wakaba 1.1 },
2013     };
2014    
2015     $Element->{$HTML_NS}->{address} = {
2016     attrs_checker => $GetHTMLAttrsChecker->({}),
2017 wakaba 1.29 checker => sub {
2018     my ($self, $todo) = @_;
2019    
2020     my $old_flags = {hn => $todo->{flag}->{has_descendant}->{hn}};
2021     $todo->{flag}->{has_descendant}->{hn} = 0;
2022    
2023     my $end = $self->_add_minuses
2024     ({$HTML_NS => {footer => 1, address => 1}},
2025     $HTMLSectioningContent, $HTMLHeadingContent);
2026     my ($new_todos, $ch) = $HTMLProseContentChecker->($self, $todo);
2027     push @$new_todos, $end;
2028    
2029     return ($new_todos, $ch);
2030     },
2031 wakaba 1.1 };
2032    
2033     $Element->{$HTML_NS}->{p} = {
2034     attrs_checker => $GetHTMLAttrsChecker->({}),
2035 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2036 wakaba 1.1 };
2037    
2038     $Element->{$HTML_NS}->{hr} = {
2039     attrs_checker => $GetHTMLAttrsChecker->({}),
2040     checker => $HTMLEmptyChecker,
2041     };
2042    
2043     $Element->{$HTML_NS}->{br} = {
2044     attrs_checker => $GetHTMLAttrsChecker->({}),
2045     checker => $HTMLEmptyChecker,
2046 wakaba 1.29 ## NOTE: Blank line MUST NOT be used for presentation purpose.
2047     ## (This requirement is semantic so that we cannot check.)
2048 wakaba 1.1 };
2049    
2050     $Element->{$HTML_NS}->{dialog} = {
2051     attrs_checker => $GetHTMLAttrsChecker->({}),
2052     checker => sub {
2053     my ($self, $todo) = @_;
2054     my $el = $todo->{node};
2055     my $new_todos = [];
2056     my @nodes = (@{$el->child_nodes});
2057    
2058     my $phase = 'before dt';
2059     while (@nodes) {
2060     my $node = shift @nodes;
2061     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2062    
2063     my $nt = $node->node_type;
2064     if ($nt == 1) {
2065     my $node_ns = $node->namespace_uri;
2066     $node_ns = '' unless defined $node_ns;
2067     my $node_ln = $node->manakai_local_name;
2068     ## NOTE: |minuses| list is not checked since redundant
2069 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2070     #
2071     } elsif ($phase eq 'before dt') {
2072 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
2073     $phase = 'before dd';
2074     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
2075     $self->{onerror}
2076     ->(node => $node, type => 'ps element missing:dt');
2077     $phase = 'before dt';
2078     } else {
2079     $self->{onerror}->(node => $node, type => 'element not allowed');
2080     }
2081     } else { # before dd
2082     if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
2083     $phase = 'before dt';
2084     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
2085     $self->{onerror}
2086     ->(node => $node, type => 'ps element missing:dd');
2087     $phase = 'before dd';
2088     } else {
2089     $self->{onerror}->(node => $node, type => 'element not allowed');
2090     }
2091     }
2092     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2093     unshift @nodes, @$sib;
2094     push @$new_todos, @$ch;
2095     } elsif ($nt == 3 or $nt == 4) {
2096     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2097     $self->{onerror}->(node => $node, type => 'character not allowed');
2098 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
2099 wakaba 1.1 }
2100     } elsif ($nt == 5) {
2101     unshift @nodes, @{$node->child_nodes};
2102     }
2103     }
2104     if ($phase eq 'before dd') {
2105 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
2106 wakaba 1.1 }
2107     return ($new_todos);
2108     },
2109     };
2110    
2111     $Element->{$HTML_NS}->{pre} = {
2112     attrs_checker => $GetHTMLAttrsChecker->({}),
2113 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2114 wakaba 1.1 };
2115    
2116     $Element->{$HTML_NS}->{ol} = {
2117     attrs_checker => $GetHTMLAttrsChecker->({
2118     start => $HTMLIntegerAttrChecker,
2119     }),
2120     checker => sub {
2121     my ($self, $todo) = @_;
2122     my $el = $todo->{node};
2123     my $new_todos = [];
2124     my @nodes = (@{$el->child_nodes});
2125    
2126     while (@nodes) {
2127     my $node = shift @nodes;
2128     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2129    
2130     my $nt = $node->node_type;
2131     if ($nt == 1) {
2132     my $node_ns = $node->namespace_uri;
2133     $node_ns = '' unless defined $node_ns;
2134     my $node_ln = $node->manakai_local_name;
2135     ## NOTE: |minuses| list is not checked since redundant
2136 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2137     #
2138     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'li')) {
2139 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
2140     }
2141     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2142     unshift @nodes, @$sib;
2143     push @$new_todos, @$ch;
2144     } elsif ($nt == 3 or $nt == 4) {
2145     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2146     $self->{onerror}->(node => $node, type => 'character not allowed');
2147 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
2148 wakaba 1.1 }
2149     } elsif ($nt == 5) {
2150     unshift @nodes, @{$node->child_nodes};
2151     }
2152     }
2153    
2154     if ($todo->{inline}) {
2155     for (@$new_todos) {
2156     $_->{inline} = 1;
2157     }
2158     }
2159     return ($new_todos);
2160     },
2161     };
2162    
2163     $Element->{$HTML_NS}->{ul} = {
2164     attrs_checker => $GetHTMLAttrsChecker->({}),
2165     checker => $Element->{$HTML_NS}->{ol}->{checker},
2166     };
2167    
2168     $Element->{$HTML_NS}->{li} = {
2169     attrs_checker => $GetHTMLAttrsChecker->({
2170     start => sub {
2171     my ($self, $attr) = @_;
2172     my $parent = $attr->owner_element->manakai_parent_element;
2173     if (defined $parent) {
2174     my $parent_ns = $parent->namespace_uri;
2175     $parent_ns = '' unless defined $parent_ns;
2176     my $parent_ln = $parent->manakai_local_name;
2177     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
2178     $self->{onerror}->(node => $attr, level => 'unsupported',
2179     type => 'attribute');
2180     }
2181     }
2182     $HTMLIntegerAttrChecker->($self, $attr);
2183     },
2184     }),
2185     checker => sub {
2186     my ($self, $todo) = @_;
2187 wakaba 1.29 if ($todo->{flag}->{in_menu}) {
2188     return $HTMLPhrasingContentChecker->($self, $todo);
2189 wakaba 1.1 } else {
2190 wakaba 1.29 return $HTMLProseContentChecker->($self, $todo);
2191 wakaba 1.1 }
2192     },
2193     };
2194    
2195     $Element->{$HTML_NS}->{dl} = {
2196     attrs_checker => $GetHTMLAttrsChecker->({}),
2197     checker => sub {
2198     my ($self, $todo) = @_;
2199     my $el = $todo->{node};
2200     my $new_todos = [];
2201     my @nodes = (@{$el->child_nodes});
2202    
2203     my $phase = 'before dt';
2204     while (@nodes) {
2205     my $node = shift @nodes;
2206     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2207    
2208     my $nt = $node->node_type;
2209     if ($nt == 1) {
2210     my $node_ns = $node->namespace_uri;
2211     $node_ns = '' unless defined $node_ns;
2212     my $node_ln = $node->manakai_local_name;
2213     ## NOTE: |minuses| list is not checked since redundant
2214 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2215     #
2216     } elsif ($phase eq 'in dds') {
2217 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
2218     #$phase = 'in dds';
2219     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
2220     $phase = 'in dts';
2221     } else {
2222     $self->{onerror}->(node => $node, type => 'element not allowed');
2223     }
2224     } elsif ($phase eq 'in dts') {
2225     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
2226     #$phase = 'in dts';
2227     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
2228     $phase = 'in dds';
2229     } else {
2230     $self->{onerror}->(node => $node, type => 'element not allowed');
2231     }
2232     } else { # before dt
2233     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
2234     $phase = 'in dts';
2235     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
2236     $self->{onerror}
2237     ->(node => $node, type => 'ps element missing:dt');
2238     $phase = 'in dds';
2239     } else {
2240     $self->{onerror}->(node => $node, type => 'element not allowed');
2241     }
2242     }
2243     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2244     unshift @nodes, @$sib;
2245     push @$new_todos, @$ch;
2246     } elsif ($nt == 3 or $nt == 4) {
2247     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2248     $self->{onerror}->(node => $node, type => 'character not allowed');
2249 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
2250 wakaba 1.1 }
2251     } elsif ($nt == 5) {
2252     unshift @nodes, @{$node->child_nodes};
2253     }
2254     }
2255     if ($phase eq 'in dts') {
2256 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
2257 wakaba 1.1 }
2258    
2259     if ($todo->{inline}) {
2260     for (@$new_todos) {
2261     $_->{inline} = 1;
2262     }
2263     }
2264     return ($new_todos);
2265     },
2266     };
2267    
2268     $Element->{$HTML_NS}->{dt} = {
2269     attrs_checker => $GetHTMLAttrsChecker->({}),
2270 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2271 wakaba 1.1 };
2272    
2273     $Element->{$HTML_NS}->{dd} = {
2274     attrs_checker => $GetHTMLAttrsChecker->({}),
2275 wakaba 1.29 checker => $HTMLProseContentChecker,
2276 wakaba 1.1 };
2277    
2278     $Element->{$HTML_NS}->{a} = {
2279     attrs_checker => sub {
2280     my ($self, $todo) = @_;
2281     my %attr;
2282     for my $attr (@{$todo->{node}->attributes}) {
2283     my $attr_ns = $attr->namespace_uri;
2284     $attr_ns = '' unless defined $attr_ns;
2285     my $attr_ln = $attr->manakai_local_name;
2286     my $checker;
2287     if ($attr_ns eq '') {
2288     $checker = {
2289     target => $HTMLTargetAttrChecker,
2290     href => $HTMLURIAttrChecker,
2291     ping => $HTMLSpaceURIsAttrChecker,
2292 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
2293 wakaba 1.1 media => $HTMLMQAttrChecker,
2294     hreflang => $HTMLLanguageTagAttrChecker,
2295     type => $HTMLIMTAttrChecker,
2296     }->{$attr_ln};
2297     if ($checker) {
2298     $attr{$attr_ln} = $attr;
2299     } else {
2300     $checker = $HTMLAttrChecker->{$attr_ln};
2301     }
2302     }
2303     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2304     || $AttrChecker->{$attr_ns}->{''};
2305     if ($checker) {
2306     $checker->($self, $attr) if ref $checker;
2307     } else {
2308     $self->{onerror}->(node => $attr, level => 'unsupported',
2309     type => 'attribute');
2310     ## ISSUE: No comformance createria for unknown attributes in the spec
2311     }
2312     }
2313    
2314 wakaba 1.4 if (defined $attr{href}) {
2315     $self->{has_hyperlink_element} = 1;
2316     } else {
2317 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
2318     if (defined $attr{$_}) {
2319     $self->{onerror}->(node => $attr{$_},
2320     type => 'attribute not allowed');
2321     }
2322     }
2323     }
2324     },
2325     checker => sub {
2326     my ($self, $todo) = @_;
2327    
2328     my $end = $self->_add_minuses ($HTMLInteractiveElements);
2329 wakaba 1.29 my ($new_todos, $ch) = $HTMLPhrasingContentChecker->($self, $todo);
2330 wakaba 1.1 push @$new_todos, $end;
2331    
2332 wakaba 1.15 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
2333     $_->{flag}->{in_a_href} = 1 for @$new_todos;
2334     }
2335 wakaba 1.1
2336     return ($new_todos, $ch);
2337     },
2338     };
2339    
2340     $Element->{$HTML_NS}->{q} = {
2341     attrs_checker => $GetHTMLAttrsChecker->({
2342     cite => $HTMLURIAttrChecker,
2343     }),
2344 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2345 wakaba 1.1 };
2346    
2347     $Element->{$HTML_NS}->{cite} = {
2348     attrs_checker => $GetHTMLAttrsChecker->({}),
2349 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2350 wakaba 1.1 };
2351    
2352     $Element->{$HTML_NS}->{em} = {
2353     attrs_checker => $GetHTMLAttrsChecker->({}),
2354 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2355 wakaba 1.1 };
2356    
2357     $Element->{$HTML_NS}->{strong} = {
2358     attrs_checker => $GetHTMLAttrsChecker->({}),
2359 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2360 wakaba 1.1 };
2361    
2362     $Element->{$HTML_NS}->{small} = {
2363     attrs_checker => $GetHTMLAttrsChecker->({}),
2364 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2365 wakaba 1.1 };
2366    
2367     $Element->{$HTML_NS}->{m} = {
2368     attrs_checker => $GetHTMLAttrsChecker->({}),
2369 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2370 wakaba 1.1 };
2371    
2372     $Element->{$HTML_NS}->{dfn} = {
2373     attrs_checker => $GetHTMLAttrsChecker->({}),
2374     checker => sub {
2375     my ($self, $todo) = @_;
2376    
2377     my $end = $self->_add_minuses ({$HTML_NS => {dfn => 1}});
2378 wakaba 1.29 my ($sib, $ch) = $HTMLPhrasingContentChecker->($self, $todo);
2379 wakaba 1.1 push @$sib, $end;
2380    
2381     my $node = $todo->{node};
2382     my $term = $node->get_attribute_ns (undef, 'title');
2383     unless (defined $term) {
2384     for my $child (@{$node->child_nodes}) {
2385     if ($child->node_type == 1) { # ELEMENT_NODE
2386     if (defined $term) {
2387     undef $term;
2388     last;
2389     } elsif ($child->manakai_local_name eq 'abbr') {
2390     my $nsuri = $child->namespace_uri;
2391     if (defined $nsuri and $nsuri eq $HTML_NS) {
2392     my $attr = $child->get_attribute_node_ns (undef, 'title');
2393     if ($attr) {
2394     $term = $attr->value;
2395     }
2396     }
2397     }
2398     } elsif ($child->node_type == 3 or $child->node_type == 4) {
2399     ## TEXT_NODE or CDATA_SECTION_NODE
2400     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
2401     next;
2402     }
2403     undef $term;
2404     last;
2405     }
2406     }
2407     unless (defined $term) {
2408     $term = $node->text_content;
2409     }
2410     }
2411     if ($self->{term}->{$term}) {
2412     $self->{onerror}->(node => $node, type => 'duplicate term');
2413     push @{$self->{term}->{$term}}, $node;
2414     } else {
2415     $self->{term}->{$term} = [$node];
2416     }
2417     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
2418     ## has |title|.
2419    
2420     return ($sib, $ch);
2421     },
2422     };
2423    
2424     $Element->{$HTML_NS}->{abbr} = {
2425     attrs_checker => $GetHTMLAttrsChecker->({
2426     ## NOTE: |title| has special semantics for |abbr|s, but is syntactically
2427     ## not different. The spec says that the |title| MAY be omitted
2428     ## if there is a |dfn| whose defining term is the abbreviation,
2429     ## but it does not prohibit |abbr| w/o |title| in other cases.
2430     }),
2431 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2432 wakaba 1.1 };
2433    
2434     $Element->{$HTML_NS}->{time} = {
2435     attrs_checker => $GetHTMLAttrsChecker->({
2436     datetime => sub { 1 }, # checked in |checker|
2437     }),
2438     ## TODO: Write tests
2439     checker => sub {
2440     my ($self, $todo) = @_;
2441    
2442     my $attr = $todo->{node}->get_attribute_node_ns (undef, 'datetime');
2443     my $input;
2444     my $reg_sp;
2445     my $input_node;
2446     if ($attr) {
2447     $input = $attr->value;
2448     $reg_sp = qr/[\x09-\x0D\x20]*/;
2449     $input_node = $attr;
2450     } else {
2451     $input = $todo->{node}->text_content;
2452     $reg_sp = qr/\p{Zs}*/;
2453     $input_node = $todo->{node};
2454    
2455     ## ISSUE: What is the definition for "successfully extracts a date
2456     ## or time"? If the algorithm says the string is invalid but
2457     ## return some date or time, is it "successfully"?
2458     }
2459    
2460     my $hour;
2461     my $minute;
2462     my $second;
2463     if ($input =~ /
2464     \A
2465     [\x09-\x0D\x20]*
2466     ([0-9]+) # 1
2467     (?>
2468     -([0-9]+) # 2
2469     -([0-9]+) # 3
2470     [\x09-\x0D\x20]*
2471     (?>
2472     T
2473     [\x09-\x0D\x20]*
2474     )?
2475     ([0-9]+) # 4
2476     :([0-9]+) # 5
2477     (?>
2478     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
2479     )?
2480     [\x09-\x0D\x20]*
2481     (?>
2482     Z
2483     [\x09-\x0D\x20]*
2484     |
2485     [+-]([0-9]+):([0-9]+) # 7, 8
2486     [\x09-\x0D\x20]*
2487     )?
2488     \z
2489     |
2490     :([0-9]+) # 9
2491     (?>
2492     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
2493     )?
2494     [\x09-\x0D\x20]*\z
2495     )
2496     /x) {
2497     if (defined $2) { ## YYYY-MM-DD T? hh:mm
2498     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
2499     length $4 != 2 or length $5 != 2) {
2500     $self->{onerror}->(node => $input_node,
2501     type => 'dateortime:syntax error');
2502     }
2503    
2504     if (1 <= $2 and $2 <= 12) {
2505     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
2506     if $3 < 1 or
2507     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
2508     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
2509     if $2 == 2 and $3 == 29 and
2510     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
2511     } else {
2512     $self->{onerror}->(node => $input_node,
2513     type => 'datetime:bad month');
2514     }
2515    
2516     ($hour, $minute, $second) = ($4, $5, $6);
2517    
2518     if (defined $7) { ## [+-]hh:mm
2519     if (length $7 != 2 or length $8 != 2) {
2520     $self->{onerror}->(node => $input_node,
2521     type => 'dateortime:syntax error');
2522     }
2523    
2524     $self->{onerror}->(node => $input_node,
2525     type => 'datetime:bad timezone hour')
2526     if $7 > 23;
2527     $self->{onerror}->(node => $input_node,
2528     type => 'datetime:bad timezone minute')
2529     if $8 > 59;
2530     }
2531     } else { ## hh:mm
2532     if (length $1 != 2 or length $9 != 2) {
2533     $self->{onerror}->(node => $input_node,
2534     type => qq'dateortime:syntax error');
2535     }
2536    
2537     ($hour, $minute, $second) = ($1, $9, $10);
2538     }
2539    
2540     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
2541     if $hour > 23;
2542     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
2543     if $minute > 59;
2544    
2545     if (defined $second) { ## s
2546     ## NOTE: Integer part of second don't have to have length of two.
2547    
2548     if (substr ($second, 0, 1) eq '.') {
2549     $self->{onerror}->(node => $input_node,
2550     type => 'dateortime:syntax error');
2551     }
2552    
2553     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
2554     if $second >= 60;
2555     }
2556     } else {
2557     $self->{onerror}->(node => $input_node,
2558     type => 'dateortime:syntax error');
2559     }
2560    
2561 wakaba 1.29 return $HTMLPhrasingContentChecker->($self, $todo);
2562 wakaba 1.1 },
2563     };
2564    
2565     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
2566     attrs_checker => $GetHTMLAttrsChecker->({
2567     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2568     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2569     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2570     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2571     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2572     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2573     }),
2574 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2575 wakaba 1.1 };
2576    
2577     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
2578     attrs_checker => $GetHTMLAttrsChecker->({
2579     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
2580     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
2581     }),
2582 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2583 wakaba 1.1 };
2584    
2585     $Element->{$HTML_NS}->{code} = {
2586     attrs_checker => $GetHTMLAttrsChecker->({}),
2587     ## NOTE: Though |title| has special semantics,
2588     ## syntatically same as the |title| as global attribute.
2589 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2590 wakaba 1.1 };
2591    
2592     $Element->{$HTML_NS}->{var} = {
2593     attrs_checker => $GetHTMLAttrsChecker->({}),
2594     ## NOTE: Though |title| has special semantics,
2595     ## syntatically same as the |title| as global attribute.
2596 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2597 wakaba 1.1 };
2598    
2599     $Element->{$HTML_NS}->{samp} = {
2600     attrs_checker => $GetHTMLAttrsChecker->({}),
2601     ## NOTE: Though |title| has special semantics,
2602     ## syntatically same as the |title| as global attribute.
2603 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2604 wakaba 1.1 };
2605    
2606     $Element->{$HTML_NS}->{kbd} = {
2607     attrs_checker => $GetHTMLAttrsChecker->({}),
2608 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2609 wakaba 1.1 };
2610    
2611     $Element->{$HTML_NS}->{sub} = {
2612     attrs_checker => $GetHTMLAttrsChecker->({}),
2613 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2614 wakaba 1.1 };
2615    
2616     $Element->{$HTML_NS}->{sup} = {
2617     attrs_checker => $GetHTMLAttrsChecker->({}),
2618 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2619 wakaba 1.1 };
2620    
2621     $Element->{$HTML_NS}->{span} = {
2622     attrs_checker => $GetHTMLAttrsChecker->({}),
2623     ## NOTE: Though |title| has special semantics,
2624     ## syntatically same as the |title| as global attribute.
2625 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2626 wakaba 1.1 };
2627    
2628     $Element->{$HTML_NS}->{i} = {
2629     attrs_checker => $GetHTMLAttrsChecker->({}),
2630     ## NOTE: Though |title| has special semantics,
2631     ## syntatically same as the |title| as global attribute.
2632 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2633 wakaba 1.1 };
2634    
2635     $Element->{$HTML_NS}->{b} = {
2636     attrs_checker => $GetHTMLAttrsChecker->({}),
2637 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2638 wakaba 1.1 };
2639    
2640     $Element->{$HTML_NS}->{bdo} = {
2641     attrs_checker => sub {
2642     my ($self, $todo) = @_;
2643     $GetHTMLAttrsChecker->({})->($self, $todo);
2644     unless ($todo->{node}->has_attribute_ns (undef, 'dir')) {
2645     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:dir');
2646     }
2647     },
2648     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
2649 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2650 wakaba 1.1 };
2651    
2652 wakaba 1.29 =pod
2653    
2654     ## TODO:
2655    
2656     +
2657     + <p>Partly because of the confusion described above, authors are
2658     + strongly recommended to always mark up all paragraphs with the
2659     + <code>p</code> element, and to not have any <code>ins</code> or
2660     + <code>del</code> elements that cross across any <span
2661     + title="paragraph">implied paragraphs</span>.</p>
2662     +
2663     (An informative note)
2664    
2665     <p><code>ins</code> elements should not cross <span
2666     + title="paragraph">implied paragraph</span> boundaries.</p>
2667     (normative)
2668    
2669     + <p><code>del</code> elements should not cross <span
2670     + title="paragraph">implied paragraph</span> boundaries.</p>
2671     (normative)
2672    
2673     =cut
2674    
2675 wakaba 1.1 $Element->{$HTML_NS}->{ins} = {
2676     attrs_checker => $GetHTMLAttrsChecker->({
2677     cite => $HTMLURIAttrChecker,
2678     datetime => $HTMLDatetimeAttrChecker,
2679     }),
2680     checker => $HTMLTransparentChecker,
2681     };
2682    
2683     $Element->{$HTML_NS}->{del} = {
2684     attrs_checker => $GetHTMLAttrsChecker->({
2685     cite => $HTMLURIAttrChecker,
2686     datetime => $HTMLDatetimeAttrChecker,
2687     }),
2688     checker => sub {
2689     my ($self, $todo) = @_;
2690 wakaba 1.29 my $sig_flag = $todo->{flag}->{has_descendant}->{significant};
2691     my ($new_todos) = $HTMLTransparentChecker->($self, $todo);
2692     push @$new_todos, {type => 'code', code => sub {
2693     $todo->{flag}->{has_descendant}->{significant} = 0;
2694     }} if not $sig_flag;
2695     return $new_todos;
2696 wakaba 1.1 },
2697     };
2698    
2699     ## TODO: figure
2700 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
2701 wakaba 1.1
2702 wakaba 1.4 ## TODO: |alt|
2703 wakaba 1.1 $Element->{$HTML_NS}->{img} = {
2704     attrs_checker => sub {
2705     my ($self, $todo) = @_;
2706     $GetHTMLAttrsChecker->({
2707     alt => sub { }, ## NOTE: No syntactical requirement
2708     src => $HTMLURIAttrChecker,
2709     usemap => $HTMLUsemapAttrChecker,
2710     ismap => sub {
2711     my ($self, $attr, $parent_todo) = @_;
2712 wakaba 1.15 if (not $todo->{flag}->{in_a_href}) {
2713     $self->{onerror}->(node => $attr,
2714     type => 'attribute not allowed:ismap');
2715 wakaba 1.1 }
2716     $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_todo);
2717     },
2718     ## TODO: height
2719     ## TODO: width
2720     })->($self, $todo);
2721     unless ($todo->{node}->has_attribute_ns (undef, 'alt')) {
2722     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:alt');
2723     }
2724     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2725     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:src');
2726     }
2727     },
2728 wakaba 1.25 checker => sub {
2729     my ($self, $todo) = @_;
2730     $todo->{flag}->{has_descendant}->{significant} = 1;
2731     return $HTMLEmptyChecker->($self, $todo);
2732     },
2733 wakaba 1.1 };
2734    
2735     $Element->{$HTML_NS}->{iframe} = {
2736     attrs_checker => $GetHTMLAttrsChecker->({
2737     src => $HTMLURIAttrChecker,
2738     }),
2739 wakaba 1.25 checker => sub {
2740     my ($self, $todo) = @_;
2741     $todo->{flag}->{has_descendant}->{significant} = 1;
2742     return $HTMLTextChecker->($self, $todo);
2743     },
2744 wakaba 1.1 };
2745    
2746     $Element->{$HTML_NS}->{embed} = {
2747     attrs_checker => sub {
2748     my ($self, $todo) = @_;
2749     my $has_src;
2750     for my $attr (@{$todo->{node}->attributes}) {
2751     my $attr_ns = $attr->namespace_uri;
2752     $attr_ns = '' unless defined $attr_ns;
2753     my $attr_ln = $attr->manakai_local_name;
2754     my $checker;
2755     if ($attr_ns eq '') {
2756     if ($attr_ln eq 'src') {
2757     $checker = $HTMLURIAttrChecker;
2758     $has_src = 1;
2759     } elsif ($attr_ln eq 'type') {
2760     $checker = $HTMLIMTAttrChecker;
2761     } else {
2762     ## TODO: height
2763     ## TODO: width
2764     $checker = $HTMLAttrChecker->{$attr_ln}
2765     || sub { }; ## NOTE: Any local attribute is ok.
2766     }
2767     }
2768     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2769     || $AttrChecker->{$attr_ns}->{''};
2770     if ($checker) {
2771     $checker->($self, $attr);
2772     } else {
2773     $self->{onerror}->(node => $attr, level => 'unsupported',
2774     type => 'attribute');
2775     ## ISSUE: No comformance createria for global attributes in the spec
2776     }
2777     }
2778    
2779     unless ($has_src) {
2780     $self->{onerror}->(node => $todo->{node},
2781     type => 'attribute missing:src');
2782     }
2783     },
2784 wakaba 1.25 checker => sub {
2785     my ($self, $todo) = @_;
2786     $todo->{flag}->{has_descendant}->{significant} = 1;
2787     return $HTMLEmptyChecker->($self, $todo);
2788     },
2789 wakaba 1.1 };
2790    
2791     $Element->{$HTML_NS}->{object} = {
2792     attrs_checker => sub {
2793     my ($self, $todo) = @_;
2794     $GetHTMLAttrsChecker->({
2795     data => $HTMLURIAttrChecker,
2796     type => $HTMLIMTAttrChecker,
2797     usemap => $HTMLUsemapAttrChecker,
2798     ## TODO: width
2799     ## TODO: height
2800     })->($self, $todo);
2801     unless ($todo->{node}->has_attribute_ns (undef, 'data')) {
2802     unless ($todo->{node}->has_attribute_ns (undef, 'type')) {
2803     $self->{onerror}->(node => $todo->{node},
2804     type => 'attribute missing:data|type');
2805     }
2806     }
2807     },
2808 wakaba 1.29 ## NOTE: param*, then transparent.
2809 wakaba 1.25 checker => sub {
2810     my ($self, $todo) = @_;
2811     $todo->{flag}->{has_descendant}->{significant} = 1;
2812     return $ElementDefault->{checker}->($self, $todo); ## TODO
2813     },
2814 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
2815 wakaba 1.1 };
2816    
2817     $Element->{$HTML_NS}->{param} = {
2818     attrs_checker => sub {
2819     my ($self, $todo) = @_;
2820     $GetHTMLAttrsChecker->({
2821     name => sub { },
2822     value => sub { },
2823     })->($self, $todo);
2824     unless ($todo->{node}->has_attribute_ns (undef, 'name')) {
2825     $self->{onerror}->(node => $todo->{node},
2826     type => 'attribute missing:name');
2827     }
2828     unless ($todo->{node}->has_attribute_ns (undef, 'value')) {
2829     $self->{onerror}->(node => $todo->{node},
2830     type => 'attribute missing:value');
2831     }
2832     },
2833     checker => $HTMLEmptyChecker,
2834     };
2835    
2836     $Element->{$HTML_NS}->{video} = {
2837     attrs_checker => $GetHTMLAttrsChecker->({
2838     src => $HTMLURIAttrChecker,
2839     ## TODO: start, loopstart, loopend, end
2840     ## ISSUE: they MUST be "value time offset"s. Value?
2841 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
2842 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2843     controls => $GetHTMLBooleanAttrChecker->('controls'),
2844 wakaba 1.11 poster => $HTMLURIAttrChecker, ## TODO: not for audio!
2845     ## TODO: width, height (not for audio!)
2846 wakaba 1.1 }),
2847     checker => sub {
2848     my ($self, $todo) = @_;
2849 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
2850 wakaba 1.1
2851 wakaba 1.29 ## TODO:
2852 wakaba 1.1 if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2853     return $HTMLBlockOrInlineChecker->($self, $todo);
2854     } else {
2855     return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
2856     ->($self, $todo);
2857     }
2858     },
2859     };
2860    
2861     $Element->{$HTML_NS}->{audio} = {
2862     attrs_checker => $Element->{$HTML_NS}->{video}->{attrs_checker},
2863     checker => $Element->{$HTML_NS}->{video}->{checker},
2864     };
2865    
2866     $Element->{$HTML_NS}->{source} = {
2867     attrs_checker => sub {
2868     my ($self, $todo) = @_;
2869     $GetHTMLAttrsChecker->({
2870     src => $HTMLURIAttrChecker,
2871     type => $HTMLIMTAttrChecker,
2872     media => $HTMLMQAttrChecker,
2873     })->($self, $todo);
2874     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2875     $self->{onerror}->(node => $todo->{node},
2876     type => 'attribute missing:src');
2877     }
2878     },
2879     checker => $HTMLEmptyChecker,
2880     };
2881    
2882     $Element->{$HTML_NS}->{canvas} = {
2883     attrs_checker => $GetHTMLAttrsChecker->({
2884     height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2885     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2886     }),
2887 wakaba 1.25 checker => sub {
2888     my ($self, $todo) = @_;
2889     $todo->{flag}->{has_descendant}->{significant} = 1;
2890 wakaba 1.29 return $HTMLTransparentChecker->($self, $todo);
2891 wakaba 1.25 },
2892 wakaba 1.1 };
2893    
2894     $Element->{$HTML_NS}->{map} = {
2895 wakaba 1.4 attrs_checker => sub {
2896     my ($self, $todo) = @_;
2897     my $has_id;
2898     $GetHTMLAttrsChecker->({
2899     id => sub {
2900     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2901     my ($self, $attr) = @_;
2902     my $value = $attr->value;
2903     if (length $value > 0) {
2904     if ($self->{id}->{$value}) {
2905     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2906     push @{$self->{id}->{$value}}, $attr;
2907     } else {
2908     $self->{id}->{$value} = [$attr];
2909     }
2910 wakaba 1.1 } else {
2911 wakaba 1.4 ## NOTE: MUST contain at least one character
2912     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2913 wakaba 1.1 }
2914 wakaba 1.4 if ($value =~ /[\x09-\x0D\x20]/) {
2915     $self->{onerror}->(node => $attr, type => 'space in ID');
2916     }
2917     $self->{map}->{$value} ||= $attr;
2918     $has_id = 1;
2919     },
2920     })->($self, $todo);
2921     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:id')
2922     unless $has_id;
2923     },
2924 wakaba 1.29 checker => $HTMLProseContentChecker,
2925 wakaba 1.1 };
2926    
2927     $Element->{$HTML_NS}->{area} = {
2928     attrs_checker => sub {
2929     my ($self, $todo) = @_;
2930     my %attr;
2931     my $coords;
2932     for my $attr (@{$todo->{node}->attributes}) {
2933     my $attr_ns = $attr->namespace_uri;
2934     $attr_ns = '' unless defined $attr_ns;
2935     my $attr_ln = $attr->manakai_local_name;
2936     my $checker;
2937     if ($attr_ns eq '') {
2938     $checker = {
2939     alt => sub { },
2940     ## NOTE: |alt| value has no conformance creteria.
2941     shape => $GetHTMLEnumeratedAttrChecker->({
2942     circ => -1, circle => 1,
2943     default => 1,
2944     poly => 1, polygon => -1,
2945     rect => 1, rectangle => -1,
2946     }),
2947     coords => sub {
2948     my ($self, $attr) = @_;
2949     my $value = $attr->value;
2950     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
2951     $coords = [split /,/, $value];
2952     } else {
2953     $self->{onerror}->(node => $attr,
2954     type => 'coords:syntax error');
2955     }
2956     },
2957     target => $HTMLTargetAttrChecker,
2958     href => $HTMLURIAttrChecker,
2959     ping => $HTMLSpaceURIsAttrChecker,
2960 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
2961 wakaba 1.1 media => $HTMLMQAttrChecker,
2962     hreflang => $HTMLLanguageTagAttrChecker,
2963     type => $HTMLIMTAttrChecker,
2964     }->{$attr_ln};
2965     if ($checker) {
2966     $attr{$attr_ln} = $attr;
2967     } else {
2968     $checker = $HTMLAttrChecker->{$attr_ln};
2969     }
2970     }
2971     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2972     || $AttrChecker->{$attr_ns}->{''};
2973     if ($checker) {
2974     $checker->($self, $attr) if ref $checker;
2975     } else {
2976     $self->{onerror}->(node => $attr, level => 'unsupported',
2977     type => 'attribute');
2978     ## ISSUE: No comformance createria for unknown attributes in the spec
2979     }
2980     }
2981    
2982     if (defined $attr{href}) {
2983 wakaba 1.4 $self->{has_hyperlink_element} = 1;
2984 wakaba 1.1 unless (defined $attr{alt}) {
2985     $self->{onerror}->(node => $todo->{node},
2986     type => 'attribute missing:alt');
2987     }
2988     } else {
2989     for (qw/target ping rel media hreflang type alt/) {
2990     if (defined $attr{$_}) {
2991     $self->{onerror}->(node => $attr{$_},
2992     type => 'attribute not allowed');
2993     }
2994     }
2995     }
2996    
2997     my $shape = 'rectangle';
2998     if (defined $attr{shape}) {
2999     $shape = {
3000     circ => 'circle', circle => 'circle',
3001     default => 'default',
3002     poly => 'polygon', polygon => 'polygon',
3003     rect => 'rectangle', rectangle => 'rectangle',
3004     }->{lc $attr{shape}->value} || 'rectangle';
3005     ## TODO: ASCII lowercase?
3006     }
3007    
3008     if ($shape eq 'circle') {
3009     if (defined $attr{coords}) {
3010     if (defined $coords) {
3011     if (@$coords == 3) {
3012     if ($coords->[2] < 0) {
3013     $self->{onerror}->(node => $attr{coords},
3014     type => 'coords:out of range:2');
3015     }
3016     } else {
3017     $self->{onerror}->(node => $attr{coords},
3018     type => 'coords:number:3:'.@$coords);
3019     }
3020     } else {
3021     ## NOTE: A syntax error has been reported.
3022     }
3023     } else {
3024     $self->{onerror}->(node => $todo->{node},
3025     type => 'attribute missing:coords');
3026     }
3027     } elsif ($shape eq 'default') {
3028     if (defined $attr{coords}) {
3029     $self->{onerror}->(node => $attr{coords},
3030     type => 'attribute not allowed');
3031     }
3032     } elsif ($shape eq 'polygon') {
3033     if (defined $attr{coords}) {
3034     if (defined $coords) {
3035     if (@$coords >= 6) {
3036     unless (@$coords % 2 == 0) {
3037     $self->{onerror}->(node => $attr{coords},
3038     type => 'coords:number:even:'.@$coords);
3039     }
3040     } else {
3041     $self->{onerror}->(node => $attr{coords},
3042     type => 'coords:number:>=6:'.@$coords);
3043     }
3044     } else {
3045     ## NOTE: A syntax error has been reported.
3046     }
3047     } else {
3048     $self->{onerror}->(node => $todo->{node},
3049     type => 'attribute missing:coords');
3050     }
3051     } elsif ($shape eq 'rectangle') {
3052     if (defined $attr{coords}) {
3053     if (defined $coords) {
3054     if (@$coords == 4) {
3055     unless ($coords->[0] < $coords->[2]) {
3056     $self->{onerror}->(node => $attr{coords},
3057     type => 'coords:out of range:0');
3058     }
3059     unless ($coords->[1] < $coords->[3]) {
3060     $self->{onerror}->(node => $attr{coords},
3061     type => 'coords:out of range:1');
3062     }
3063     } else {
3064     $self->{onerror}->(node => $attr{coords},
3065     type => 'coords:number:4:'.@$coords);
3066     }
3067     } else {
3068     ## NOTE: A syntax error has been reported.
3069     }
3070     } else {
3071     $self->{onerror}->(node => $todo->{node},
3072     type => 'attribute missing:coords');
3073     }
3074     }
3075     },
3076     checker => $HTMLEmptyChecker,
3077     };
3078     ## TODO: only in map
3079    
3080     $Element->{$HTML_NS}->{table} = {
3081     attrs_checker => $GetHTMLAttrsChecker->({}),
3082     checker => sub {
3083     my ($self, $todo) = @_;
3084     my $el = $todo->{node};
3085     my $new_todos = [];
3086     my @nodes = (@{$el->child_nodes});
3087    
3088     my $phase = 'before caption';
3089     my $has_tfoot;
3090     while (@nodes) {
3091     my $node = shift @nodes;
3092     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3093    
3094     my $nt = $node->node_type;
3095     if ($nt == 1) {
3096     my $node_ns = $node->namespace_uri;
3097     $node_ns = '' unless defined $node_ns;
3098     my $node_ln = $node->manakai_local_name;
3099     ## NOTE: |minuses| list is not checked since redundant
3100 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3101     #
3102     } elsif ($phase eq 'in tbodys') {
3103 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
3104     #$phase = 'in tbodys';
3105     } elsif (not $has_tfoot and
3106     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
3107     $phase = 'after tfoot';
3108     $has_tfoot = 1;
3109     } else {
3110     $self->{onerror}->(node => $node, type => 'element not allowed');
3111     }
3112     } elsif ($phase eq 'in trs') {
3113     if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
3114     #$phase = 'in trs';
3115     } elsif (not $has_tfoot and
3116     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
3117     $phase = 'after tfoot';
3118     $has_tfoot = 1;
3119     } else {
3120     $self->{onerror}->(node => $node, type => 'element not allowed');
3121     }
3122     } elsif ($phase eq 'after thead') {
3123     if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
3124     $phase = 'in tbodys';
3125     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
3126     $phase = 'in trs';
3127     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
3128     $phase = 'in tbodys';
3129     $has_tfoot = 1;
3130     } else {
3131     $self->{onerror}->(node => $node, type => 'element not allowed');
3132     }
3133     } elsif ($phase eq 'in colgroup') {
3134     if ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
3135     $phase = 'in colgroup';
3136     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
3137     $phase = 'after thead';
3138     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
3139     $phase = 'in tbodys';
3140     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
3141     $phase = 'in trs';
3142     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
3143     $phase = 'in tbodys';
3144     $has_tfoot = 1;
3145     } else {
3146     $self->{onerror}->(node => $node, type => 'element not allowed');
3147     }
3148     } elsif ($phase eq 'before caption') {
3149     if ($node_ns eq $HTML_NS and $node_ln eq 'caption') {
3150     $phase = 'in colgroup';
3151     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
3152     $phase = 'in colgroup';
3153     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
3154     $phase = 'after thead';
3155     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
3156     $phase = 'in tbodys';
3157     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
3158     $phase = 'in trs';
3159     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
3160     $phase = 'in tbodys';
3161     $has_tfoot = 1;
3162     } else {
3163     $self->{onerror}->(node => $node, type => 'element not allowed');
3164     }
3165     } else { # after tfoot
3166     $self->{onerror}->(node => $node, type => 'element not allowed');
3167     }
3168     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3169     unshift @nodes, @$sib;
3170     push @$new_todos, @$ch;
3171     } elsif ($nt == 3 or $nt == 4) {
3172     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3173     $self->{onerror}->(node => $node, type => 'character not allowed');
3174 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3175 wakaba 1.1 }
3176     } elsif ($nt == 5) {
3177     unshift @nodes, @{$node->child_nodes};
3178     }
3179     }
3180    
3181     ## Table model errors
3182     require Whatpm::HTMLTable;
3183     Whatpm::HTMLTable->form_table ($todo->{node}, sub {
3184     my %opt = @_;
3185     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
3186     });
3187     push @{$self->{return}->{table}}, $todo->{node};
3188    
3189     return ($new_todos);
3190     },
3191     };
3192    
3193     $Element->{$HTML_NS}->{caption} = {
3194     attrs_checker => $GetHTMLAttrsChecker->({}),
3195 wakaba 1.13 checker => $HTMLStrictlyInlineChecker,
3196 wakaba 1.1 };
3197    
3198     $Element->{$HTML_NS}->{colgroup} = {
3199     attrs_checker => $GetHTMLAttrsChecker->({
3200     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3201     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
3202     ## TODO: "attribute not supported" if |col|.
3203     ## ISSUE: MUST NOT if any |col|?
3204     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
3205     }),
3206     checker => sub {
3207     my ($self, $todo) = @_;
3208     my $el = $todo->{node};
3209     my $new_todos = [];
3210     my @nodes = (@{$el->child_nodes});
3211    
3212     while (@nodes) {
3213     my $node = shift @nodes;
3214     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3215    
3216     my $nt = $node->node_type;
3217     if ($nt == 1) {
3218     my $node_ns = $node->namespace_uri;
3219     $node_ns = '' unless defined $node_ns;
3220     my $node_ln = $node->manakai_local_name;
3221     ## NOTE: |minuses| list is not checked since redundant
3222 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3223     #
3224     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'col')) {
3225 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
3226     }
3227     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3228     unshift @nodes, @$sib;
3229     push @$new_todos, @$ch;
3230     } elsif ($nt == 3 or $nt == 4) {
3231     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3232     $self->{onerror}->(node => $node, type => 'character not allowed');
3233 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3234 wakaba 1.1 }
3235     } elsif ($nt == 5) {
3236     unshift @nodes, @{$node->child_nodes};
3237     }
3238     }
3239     return ($new_todos);
3240     },
3241     };
3242    
3243     $Element->{$HTML_NS}->{col} = {
3244     attrs_checker => $GetHTMLAttrsChecker->({
3245     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3246     }),
3247     checker => $HTMLEmptyChecker,
3248     };
3249    
3250     $Element->{$HTML_NS}->{tbody} = {
3251     attrs_checker => $GetHTMLAttrsChecker->({}),
3252     checker => sub {
3253     my ($self, $todo) = @_;
3254     my $el = $todo->{node};
3255     my $new_todos = [];
3256     my @nodes = (@{$el->child_nodes});
3257    
3258     my $has_tr;
3259     while (@nodes) {
3260     my $node = shift @nodes;
3261     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3262    
3263     my $nt = $node->node_type;
3264     if ($nt == 1) {
3265     my $node_ns = $node->namespace_uri;
3266     $node_ns = '' unless defined $node_ns;
3267     my $node_ln = $node->manakai_local_name;
3268     ## NOTE: |minuses| list is not checked since redundant
3269 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3270     #
3271     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
3272 wakaba 1.1 $has_tr = 1;
3273     } else {
3274     $self->{onerror}->(node => $node, type => 'element not allowed');
3275     }
3276     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3277     unshift @nodes, @$sib;
3278     push @$new_todos, @$ch;
3279     } elsif ($nt == 3 or $nt == 4) {
3280     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3281     $self->{onerror}->(node => $node, type => 'character not allowed');
3282 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3283 wakaba 1.1 }
3284     } elsif ($nt == 5) {
3285     unshift @nodes, @{$node->child_nodes};
3286     }
3287     }
3288     unless ($has_tr) {
3289     $self->{onerror}->(node => $el, type => 'child element missing:tr');
3290     }
3291     return ($new_todos);
3292     },
3293     };
3294    
3295     $Element->{$HTML_NS}->{thead} = {
3296     attrs_checker => $GetHTMLAttrsChecker->({}),
3297     checker => $Element->{$HTML_NS}->{tbody}->{checker},
3298     };
3299    
3300     $Element->{$HTML_NS}->{tfoot} = {
3301     attrs_checker => $GetHTMLAttrsChecker->({}),
3302     checker => $Element->{$HTML_NS}->{tbody}->{checker},
3303     };
3304    
3305     $Element->{$HTML_NS}->{tr} = {
3306     attrs_checker => $GetHTMLAttrsChecker->({}),
3307     checker => sub {
3308     my ($self, $todo) = @_;
3309     my $el = $todo->{node};
3310     my $new_todos = [];
3311     my @nodes = (@{$el->child_nodes});
3312    
3313     my $has_td;
3314     while (@nodes) {
3315     my $node = shift @nodes;
3316     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3317    
3318     my $nt = $node->node_type;
3319     if ($nt == 1) {
3320     my $node_ns = $node->namespace_uri;
3321     $node_ns = '' unless defined $node_ns;
3322     my $node_ln = $node->manakai_local_name;
3323     ## NOTE: |minuses| list is not checked since redundant
3324 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3325     #
3326     } elsif ($node_ns eq $HTML_NS and
3327     ($node_ln eq 'td' or $node_ln eq 'th')) {
3328 wakaba 1.1 $has_td = 1;
3329     } else {
3330     $self->{onerror}->(node => $node, type => 'element not allowed');
3331     }
3332     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3333     unshift @nodes, @$sib;
3334     push @$new_todos, @$ch;
3335     } elsif ($nt == 3 or $nt == 4) {
3336     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3337     $self->{onerror}->(node => $node, type => 'character not allowed');
3338 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3339 wakaba 1.1 }
3340     } elsif ($nt == 5) {
3341     unshift @nodes, @{$node->child_nodes};
3342     }
3343     }
3344     unless ($has_td) {
3345     $self->{onerror}->(node => $el, type => 'child element missing:td|th');
3346     }
3347     return ($new_todos);
3348     },
3349     };
3350    
3351     $Element->{$HTML_NS}->{td} = {
3352     attrs_checker => $GetHTMLAttrsChecker->({
3353     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3354     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3355     }),
3356 wakaba 1.29 checker => $HTMLProseContentChecker,
3357 wakaba 1.1 };
3358    
3359     $Element->{$HTML_NS}->{th} = {
3360     attrs_checker => $GetHTMLAttrsChecker->({
3361     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3362     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3363     scope => $GetHTMLEnumeratedAttrChecker
3364     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
3365     }),
3366 wakaba 1.31 checker => $HTMLPhrasingContentChecker,
3367 wakaba 1.1 };
3368    
3369     ## TODO: forms
3370 wakaba 1.8 ## TODO: Tests for <nest/> in form elements
3371 wakaba 1.1
3372     $Element->{$HTML_NS}->{script} = {
3373 wakaba 1.9 attrs_checker => $GetHTMLAttrsChecker->({
3374 wakaba 1.1 src => $HTMLURIAttrChecker,
3375     defer => $GetHTMLBooleanAttrChecker->('defer'),
3376     async => $GetHTMLBooleanAttrChecker->('async'),
3377     type => $HTMLIMTAttrChecker,
3378 wakaba 1.9 }),
3379 wakaba 1.1 checker => sub {
3380     my ($self, $todo) = @_;
3381    
3382     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
3383     return $HTMLEmptyChecker->($self, $todo);
3384     } else {
3385     ## NOTE: No content model conformance in HTML5 spec.
3386     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
3387     my $language = $todo->{node}->get_attribute_ns (undef, 'language');
3388     if ((defined $type and $type eq '') or
3389     (defined $language and $language eq '')) {
3390     $type = 'text/javascript';
3391     } elsif (defined $type) {
3392     #
3393     } elsif (defined $language) {
3394     $type = 'text/' . $language;
3395     } else {
3396     $type = 'text/javascript';
3397     }
3398     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
3399     type => 'script:'.$type); ## TODO: $type normalization
3400     return $AnyChecker->($self, $todo);
3401     }
3402     },
3403     };
3404 wakaba 1.25 ## ISSUE: Significant check and text child node
3405 wakaba 1.1
3406     ## NOTE: When script is disabled.
3407     $Element->{$HTML_NS}->{noscript} = {
3408 wakaba 1.3 attrs_checker => sub {
3409     my ($self, $todo) = @_;
3410    
3411     ## NOTE: This check is inserted in |attrs_checker|, rather than |checker|,
3412     ## since the later is not invoked when the |noscript| is used as a
3413     ## transparent element.
3414     unless ($todo->{node}->owner_document->manakai_is_html) {
3415     $self->{onerror}->(node => $todo->{node}, type => 'in XML:noscript');
3416     }
3417    
3418     $GetHTMLAttrsChecker->({})->($self, $todo);
3419     },
3420 wakaba 1.1 checker => sub {
3421     my ($self, $todo) = @_;
3422    
3423 wakaba 1.3 if ($todo->{flag}->{in_head}) {
3424     my $new_todos = [];
3425     my @nodes = (@{$todo->{node}->child_nodes});
3426    
3427     while (@nodes) {
3428     my $node = shift @nodes;
3429     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3430    
3431     my $nt = $node->node_type;
3432     if ($nt == 1) {
3433     my $node_ns = $node->namespace_uri;
3434     $node_ns = '' unless defined $node_ns;
3435     my $node_ln = $node->manakai_local_name;
3436 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3437     #
3438     } elsif ($node_ns eq $HTML_NS) {
3439 wakaba 1.3 if ({link => 1, style => 1}->{$node_ln}) {
3440     #
3441     } elsif ($node_ln eq 'meta') {
3442 wakaba 1.5 if ($node->has_attribute_ns (undef, 'name')) {
3443     #
3444 wakaba 1.3 } else {
3445 wakaba 1.5 $self->{onerror}->(node => $node,
3446     type => 'element not allowed');
3447 wakaba 1.3 }
3448     } else {
3449     $self->{onerror}->(node => $node, type => 'element not allowed');
3450     }
3451     } else {
3452     $self->{onerror}->(node => $node, type => 'element not allowed');
3453     }
3454    
3455     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3456     unshift @nodes, @$sib;
3457     push @$new_todos, @$ch;
3458     } elsif ($nt == 3 or $nt == 4) {
3459     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3460     $self->{onerror}->(node => $node, type => 'character not allowed');
3461 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3462 wakaba 1.3 }
3463     } elsif ($nt == 5) {
3464     unshift @nodes, @{$node->child_nodes};
3465     }
3466     }
3467     return ($new_todos);
3468     } else {
3469     my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}});
3470 wakaba 1.29 my ($sib, $ch) = $HTMLTransparentChecker->($self, $todo);
3471 wakaba 1.3 push @$sib, $end;
3472     return ($sib, $ch);
3473     }
3474 wakaba 1.1 },
3475     };
3476 wakaba 1.3
3477     ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
3478 wakaba 1.1
3479     $Element->{$HTML_NS}->{'event-source'} = {
3480     attrs_checker => $GetHTMLAttrsChecker->({
3481     src => $HTMLURIAttrChecker,
3482     }),
3483     checker => $HTMLEmptyChecker,
3484     };
3485    
3486     $Element->{$HTML_NS}->{details} = {
3487     attrs_checker => $GetHTMLAttrsChecker->({
3488     open => $GetHTMLBooleanAttrChecker->('open'),
3489     }),
3490     checker => sub {
3491     my ($self, $todo) = @_;
3492    
3493 wakaba 1.29 ## TODO:
3494 wakaba 1.1 my ($sib, $ch)
3495     = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
3496     ->($self, $todo);
3497     return ($sib, $ch);
3498     },
3499     };
3500    
3501     $Element->{$HTML_NS}->{datagrid} = {
3502     attrs_checker => $GetHTMLAttrsChecker->({
3503     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3504     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
3505     }),
3506     checker => sub {
3507     my ($self, $todo) = @_;
3508     my $el = $todo->{node};
3509     my $new_todos = [];
3510     my @nodes = (@{$el->child_nodes});
3511    
3512 wakaba 1.25 my $old_values = {significant =>
3513     $todo->{flag}->{has_descendant}->{significant}};
3514     $todo->{flag}->{has_descendant}->{significant} = 0;
3515    
3516 wakaba 1.1 my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
3517    
3518 wakaba 1.29 ## Prose -(text* table Prose*) | table | select | datalist | Empty
3519 wakaba 1.1 my $mode = 'any';
3520     while (@nodes) {
3521     my $node = shift @nodes;
3522     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3523    
3524     my $nt = $node->node_type;
3525     if ($nt == 1) {
3526     my $node_ns = $node->namespace_uri;
3527     $node_ns = '' unless defined $node_ns;
3528     my $node_ln = $node->manakai_local_name;
3529     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
3530 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3531     #
3532 wakaba 1.29 } elsif ($mode eq 'prose') {
3533 wakaba 1.1 $not_allowed = 1
3534 wakaba 1.29 unless $HTMLProseContent->{$node_ns}->{$node_ln};
3535 wakaba 1.1 } elsif ($mode eq 'any') {
3536     if ($node_ns eq $HTML_NS and
3537     {table => 1, select => 1, datalist => 1}->{$node_ln}) {
3538     $mode = 'none';
3539 wakaba 1.29 } elsif ($HTMLProseContent->{$node_ns}->{$node_ln}) {
3540     $mode = 'prose';
3541 wakaba 1.1 } else {
3542     $not_allowed = 1;
3543     }
3544     } else {
3545     $not_allowed = 1;
3546     }
3547     $self->{onerror}->(node => $node, type => 'element not allowed')
3548     if $not_allowed;
3549     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3550     unshift @nodes, @$sib;
3551     push @$new_todos, @$ch;
3552     } elsif ($nt == 3 or $nt == 4) {
3553     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3554 wakaba 1.29 if ($mode eq 'prose') {
3555     #
3556     } elsif ($mode eq 'any') {
3557     $mode = 'prose';
3558     } else {
3559     $self->{onerror}->(node => $node, type => 'character not allowed');
3560     }
3561 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3562 wakaba 1.1 }
3563     } elsif ($nt == 5) {
3564     unshift @nodes, @{$node->child_nodes};
3565     }
3566     }
3567    
3568     push @$new_todos, $end;
3569 wakaba 1.25
3570     push @$new_todos, {
3571     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
3572     old_values => $old_values,
3573 wakaba 1.26 errors => $HTMLSignificantContentErrors,
3574 wakaba 1.25 };
3575    
3576 wakaba 1.1 return ($new_todos);
3577 wakaba 1.29
3578     ## ISSUE: "xxx<table/>" is disallowed; "<select/>aaa" and "<datalist/>aa"
3579     ## are not disallowed (assuming that form control contents are also
3580     ## prose content).
3581 wakaba 1.1 },
3582     };
3583    
3584     $Element->{$HTML_NS}->{command} = {
3585     attrs_checker => $GetHTMLAttrsChecker->({
3586     checked => $GetHTMLBooleanAttrChecker->('checked'),
3587     default => $GetHTMLBooleanAttrChecker->('default'),
3588     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3589     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
3590     icon => $HTMLURIAttrChecker,
3591     label => sub { }, ## NOTE: No conformance creteria
3592     radiogroup => sub { }, ## NOTE: No conformance creteria
3593     ## NOTE: |title| has special semantics, but no syntactical difference
3594     type => sub {
3595     my ($self, $attr) = @_;
3596     my $value = $attr->value;
3597     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
3598     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
3599     }
3600     },
3601     }),
3602     checker => $HTMLEmptyChecker,
3603     };
3604    
3605     $Element->{$HTML_NS}->{menu} = {
3606     attrs_checker => $GetHTMLAttrsChecker->({
3607     autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
3608     id => sub {
3609     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
3610     my ($self, $attr) = @_;
3611     my $value = $attr->value;
3612     if (length $value > 0) {
3613     if ($self->{id}->{$value}) {
3614     $self->{onerror}->(node => $attr, type => 'duplicate ID');
3615     push @{$self->{id}->{$value}}, $attr;
3616     } else {
3617     $self->{id}->{$value} = [$attr];
3618     }
3619     } else {
3620     ## NOTE: MUST contain at least one character
3621     $self->{onerror}->(node => $attr, type => 'empty attribute value');
3622     }
3623     if ($value =~ /[\x09-\x0D\x20]/) {
3624     $self->{onerror}->(node => $attr, type => 'space in ID');
3625     }
3626     $self->{menu}->{$value} ||= $attr;
3627     ## ISSUE: <menu id=""><p contextmenu=""> match?
3628     },
3629     label => sub { }, ## NOTE: No conformance creteria
3630     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
3631     }),
3632     checker => sub {
3633     my ($self, $todo) = @_;
3634     my $el = $todo->{node};
3635     my $new_todos = [];
3636     my @nodes = (@{$el->child_nodes});
3637 wakaba 1.25
3638     my $old_values = {significant =>
3639     $todo->{flag}->{has_descendant}->{significant}};
3640     $todo->{flag}->{has_descendant}->{significant} = 0;
3641 wakaba 1.1
3642 wakaba 1.29 my $content = 'li or phrasing';
3643 wakaba 1.1 while (@nodes) {
3644     my $node = shift @nodes;
3645     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3646    
3647     my $nt = $node->node_type;
3648     if ($nt == 1) {
3649     my $node_ns = $node->namespace_uri;
3650     $node_ns = '' unless defined $node_ns;
3651     my $node_ln = $node->manakai_local_name;
3652     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
3653 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3654     #
3655     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'li') {
3656 wakaba 1.29 if ($content eq 'phrasing') {
3657 wakaba 1.1 $not_allowed = 1;
3658 wakaba 1.29 } elsif ($content eq 'li or phrasing') {
3659 wakaba 1.1 $content = 'li';
3660     }
3661     } else {
3662 wakaba 1.29 if ($HTMLPhrasingContent->{$node_ns}->{$node_ln}) {
3663     $content = 'phrasing';
3664 wakaba 1.1 } else {
3665     $not_allowed = 1;
3666     }
3667     }
3668     $self->{onerror}->(node => $node, type => 'element not allowed')
3669     if $not_allowed;
3670     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3671     unshift @nodes, @$sib;
3672     push @$new_todos, @$ch;
3673     } elsif ($nt == 3 or $nt == 4) {
3674     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3675     if ($content eq 'li') {
3676     $self->{onerror}->(node => $node, type => 'character not allowed');
3677 wakaba 1.29 } elsif ($content eq 'li or phrasing') {
3678     $content = 'phrasing';
3679 wakaba 1.1 }
3680 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3681 wakaba 1.1 }
3682     } elsif ($nt == 5) {
3683     unshift @nodes, @{$node->child_nodes};
3684     }
3685     }
3686    
3687     for (@$new_todos) {
3688 wakaba 1.29 $_->{flag}->{in_menu} = 1;
3689 wakaba 1.1 }
3690 wakaba 1.25
3691     push @$new_todos, {
3692     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
3693     old_values => $old_values,
3694 wakaba 1.26 errors => $HTMLSignificantContentErrors,
3695 wakaba 1.25 };
3696    
3697 wakaba 1.1 return ($new_todos);
3698     },
3699 wakaba 1.8 };
3700    
3701     $Element->{$HTML_NS}->{datatemplate} = {
3702     attrs_checker => $GetHTMLAttrsChecker->({}),
3703     checker => sub {
3704     my ($self, $todo) = @_;
3705     my $el = $todo->{node};
3706     my $new_todos = [];
3707     my @nodes = (@{$el->child_nodes});
3708    
3709     while (@nodes) {
3710     my $node = shift @nodes;
3711     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3712    
3713     my $nt = $node->node_type;
3714     if ($nt == 1) {
3715     my $node_ns = $node->namespace_uri;
3716     $node_ns = '' unless defined $node_ns;
3717     my $node_ln = $node->manakai_local_name;
3718     ## NOTE: |minuses| list is not checked since redundant
3719     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3720     #
3721     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'rule')) {
3722     $self->{onerror}->(node => $node,
3723     type => 'element not allowed:datatemplate');
3724     }
3725     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3726     unshift @nodes, @$sib;
3727     push @$new_todos, @$ch;
3728     } elsif ($nt == 3 or $nt == 4) {
3729     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3730     $self->{onerror}->(node => $node, type => 'character not allowed');
3731 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3732 wakaba 1.8 }
3733     } elsif ($nt == 5) {
3734     unshift @nodes, @{$node->child_nodes};
3735     }
3736     }
3737     return ($new_todos);
3738     },
3739     is_xml_root => 1,
3740     };
3741    
3742     $Element->{$HTML_NS}->{rule} = {
3743     attrs_checker => $GetHTMLAttrsChecker->({
3744 wakaba 1.23 condition => $HTMLSelectorsAttrChecker,
3745 wakaba 1.18 mode => $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker,
3746 wakaba 1.8 }),
3747     checker => sub {
3748     my ($self, $todo) = @_;
3749    
3750     my $end = $self->_add_pluses ({$HTML_NS => {nest => 1}});
3751 wakaba 1.25 my ($sib, $ch) = $HTMLAnyChecker->($self, $todo);
3752 wakaba 1.8 push @$sib, $end;
3753     return ($sib, $ch);
3754     },
3755     ## NOTE: "MAY be anything that, when the parent |datatemplate|
3756     ## is applied to some conforming data, results in a conforming DOM tree.":
3757     ## We don't check against this.
3758     };
3759    
3760     $Element->{$HTML_NS}->{nest} = {
3761     attrs_checker => $GetHTMLAttrsChecker->({
3762 wakaba 1.23 filter => $HTMLSelectorsAttrChecker,
3763     mode => sub {
3764     my ($self, $attr) = @_;
3765     my $value = $attr->value;
3766     if ($value !~ /\A[^\x09-\x0D\x20]+\z/) {
3767     $self->{onerror}->(node => $attr, type => 'mode:syntax error');
3768     }
3769     },
3770 wakaba 1.8 }),
3771     checker => $HTMLEmptyChecker,
3772 wakaba 1.1 };
3773    
3774     $Element->{$HTML_NS}->{legend} = {
3775     attrs_checker => $GetHTMLAttrsChecker->({}),
3776 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
3777 wakaba 1.1 };
3778    
3779     $Element->{$HTML_NS}->{div} = {
3780     attrs_checker => $GetHTMLAttrsChecker->({}),
3781 wakaba 1.31 checker => $HTMLProseContentChecker,
3782 wakaba 1.1 };
3783    
3784     $Element->{$HTML_NS}->{font} = {
3785     attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
3786     checker => $HTMLTransparentChecker,
3787     };
3788    
3789     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
3790    
3791     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24