/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.37 - (hide annotations) (download)
Mon Feb 18 00:03:32 2008 UTC (16 years, 8 months ago) by wakaba
Branch: MAIN
Changes since 1.36: +3 -2 lines
++ whatpm/t/ChangeLog	18 Feb 2008 00:02:40 -0000
2008-02-18  Wakaba  <wakaba@suika.fam.cx>

	* content-model-2.dat: |alt=""| test results revised.

	* content-model-1.dat: |<img/>|s in tests are revised so
	that they are conforming.

++ whatpm/Whatpm/ContentChecker/ChangeLog	18 Feb 2008 00:03:25 -0000
2008-02-18  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm (img): |alt| is a SHOULD-level requirement now (since
	the conformance checker cannot examine the semantics of the
	image).

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7 wakaba 1.29 ## December 2007 HTML5 Classification
8    
9     my $HTMLMetadataContent = {
10     $HTML_NS => {
11     title => 1, base => 1, link => 1, style => 1, script => 1, noscript => 1,
12     'event-source' => 1, command => 1, datatemplate => 1,
13     ## NOTE: A |meta| with no |name| element is not allowed as
14     ## a metadata content other than |head| element.
15     meta => 1,
16     },
17     ## NOTE: RDF is mentioned in the HTML5 spec.
18     ## TODO: Other RDF elements?
19     q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {RDF => 1},
20     };
21    
22     my $HTMLProseContent = {
23     $HTML_NS => {
24     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
25     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
26     footer => 1, address => 1, p => 1, hr => 1, dialog => 1, pre => 1,
27     ol => 1, ul => 1, dl => 1, figure => 1, map => 1, table => 1,
28     details => 1, ## ISSUE: "Prose element" in spec.
29     datagrid => 1, ## ISSUE: "Prose element" in spec.
30     datatemplate => 1,
31     div => 1, ## ISSUE: No category in spec.
32     ## NOTE: |style| is only allowed if |scoped| attribute is specified.
33     ## Additionally, it must be before any other element or
34     ## non-inter-element-whitespace text node.
35     style => 1,
36    
37     br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, m => 1,
38     dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
39     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
40     b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1,
41     command => 1, font => 1,
42     a => 1,
43     datagrid => 1, ## ISSUE: "Interactive element" in the spec.
44     ## NOTE: |area| is allowed only as a descendant of |map|.
45     area => 1,
46    
47     ins => 1, del => 1,
48    
49     ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose.
50     menu => 1,
51    
52     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
53     canvas => 1,
54     },
55    
56     ## NOTE: Embedded
57     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
58     q<http://www.w3.org/2000/svg> => {svg => 1},
59     };
60    
61     my $HTMLSectioningContent = {
62     $HTML_NS => {
63     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
64     ## NOTE: |body| is only allowed in |html| element.
65     body => 1,
66     },
67     };
68    
69     my $HTMLHeadingContent = {
70     $HTML_NS => {
71     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
72     },
73     };
74    
75     my $HTMLPhrasingContent = {
76     ## NOTE: All phrasing content is also prose content.
77     $HTML_NS => {
78     br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, m => 1,
79     dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
80     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
81     b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1,
82     command => 1, font => 1,
83     a => 1,
84     datagrid => 1, ## ISSUE: "Interactive element" in the spec.
85     ## NOTE: |area| is allowed only as a descendant of |map|.
86     area => 1,
87    
88     ## NOTE: Transparent.
89     ins => 1, del => 1,
90    
91     ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose.
92     menu => 1,
93    
94     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
95     canvas => 1,
96     },
97    
98     ## NOTE: Embedded
99     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
100     q<http://www.w3.org/2000/svg> => {svg => 1},
101    
102     ## NOTE: And non-inter-element-whitespace text nodes.
103     };
104    
105     my $HTMLEmbeddedContent = {
106     ## NOTE: All embedded content is also phrasing content.
107     $HTML_NS => {
108     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
109     canvas => 1,
110     },
111     ## NOTE: MathML is mentioned in the HTML5 spec.
112     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
113     ## NOTE: SVG is mentioned in the HTML5 spec.
114     q<http://www.w3.org/2000/svg> => {svg => 1},
115     ## NOTE: Foreign elements with content (but no metadata) are
116     ## embedded content.
117     };
118    
119     my $HTMLInteractiveContent = {
120     $HTML_NS => {
121     a => 1,
122 wakaba 1.36 datagrid => 1, ## ISSUE: Categorized as "Inetractive element"
123 wakaba 1.29 },
124     };
125    
126 wakaba 1.36 ## NOTE: $HTMLTransparentElements: See Whatpm::ContentChecker.
127     ## NOTE: Semi-transparent elements: See Whatpm::ContentChecker.
128    
129 wakaba 1.29 ## Old HTML5 categories
130    
131 wakaba 1.1 my $HTMLBlockLevelElements = {
132     $HTML_NS => {
133     qw/
134     section 1 nav 1 article 1 blockquote 1 aside 1
135     h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1
136     address 1 p 1 hr 1 dialog 1 pre 1 ol 1 ul 1 dl 1
137     ins 1 del 1 figure 1 map 1 table 1 script 1 noscript 1
138     event-source 1 details 1 datagrid 1 menu 1 div 1 font 1
139 wakaba 1.8 datatemplate 1
140 wakaba 1.1 /,
141     },
142     };
143    
144     my $HTMLStrictlyInlineLevelElements = {
145     $HTML_NS => {
146     qw/
147     br 1 a 1 q 1 cite 1 em 1 strong 1 small 1 m 1 dfn 1 abbr 1
148     time 1 meter 1 progress 1 code 1 var 1 samp 1 kbd 1
149     sub 1 sup 1 span 1 i 1 b 1 bdo 1 ins 1 del 1 img 1
150     iframe 1 embed 1 object 1 video 1 audio 1 canvas 1 area 1
151     script 1 noscript 1 event-source 1 command 1 font 1
152     /,
153     },
154     };
155    
156     my $HTMLStructuredInlineLevelElements = {
157     $HTML_NS => {qw/blockquote 1 pre 1 ol 1 ul 1 dl 1 table 1 menu 1/},
158     };
159    
160 wakaba 1.26 my $HTMLSignificantContentErrors = {
161     significant => sub {
162     my ($self, $todo) = @_;
163     $self->{onerror}->(node => $todo->{node},
164     level => $self->{should_level},
165     type => 'no significant content');
166     },
167     }; # $HTMLSignificantContentErrors
168    
169 wakaba 1.25 our $AnyChecker;
170     my $HTMLAnyChecker = sub {
171     my ($self, $todo) = @_;
172    
173     my $old_values = {significant =>
174     $todo->{flag}->{has_descendant}->{significant}};
175     $todo->{flag}->{has_descendant}->{significant} = 0;
176    
177     my ($new_todos) = $AnyChecker->($self, $todo);
178    
179     push @$new_todos, {
180     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
181     old_values => $old_values,
182 wakaba 1.26 errors => $HTMLSignificantContentErrors,
183 wakaba 1.25 };
184    
185     return ($new_todos);
186     }; # $HTMLAnyChecker
187 wakaba 1.1
188     ## Empty
189     my $HTMLEmptyChecker = sub {
190     my ($self, $todo) = @_;
191     my $el = $todo->{node};
192     my $new_todos = [];
193     my @nodes = (@{$el->child_nodes});
194    
195     while (@nodes) {
196     my $node = shift @nodes;
197     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
198    
199     my $nt = $node->node_type;
200     if ($nt == 1) {
201 wakaba 1.8 my $node_ns = $node->namespace_uri;
202     $node_ns = '' unless defined $node_ns;
203     my $node_ln = $node->manakai_local_name;
204 wakaba 1.30 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
205     $self->{onerror}->(node => $node, type => 'element not allowed:minus',
206     level => $self->{must_level});
207     } elsif ($self->{pluses}->{$node_ns}->{$node_ln}) {
208 wakaba 1.8 #
209     } else {
210 wakaba 1.30 $self->{onerror}->(node => $node, type => 'element not allowed:empty',
211     level => $self->{must_level});
212 wakaba 1.8 }
213 wakaba 1.30
214 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
215     unshift @nodes, @$sib;
216     push @$new_todos, @$ch;
217     } elsif ($nt == 3 or $nt == 4) {
218     if ($node->data =~ /[^\x09-\x0D\x20]/) {
219 wakaba 1.30 $self->{onerror}->(node => $node,
220     type => 'character not allowed:empty',
221     level => $self->{must_level});
222 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
223 wakaba 1.1 }
224     } elsif ($nt == 5) {
225     unshift @nodes, @{$node->child_nodes};
226     }
227     }
228     return ($new_todos);
229     };
230    
231     ## Text
232     my $HTMLTextChecker = sub {
233     my ($self, $todo) = @_;
234     my $el = $todo->{node};
235     my $new_todos = [];
236     my @nodes = (@{$el->child_nodes});
237    
238 wakaba 1.25 my $old_values = {significant =>
239     $todo->{flag}->{has_descendant}->{significant}};
240     $todo->{flag}->{has_descendant}->{significant} = 0;
241    
242 wakaba 1.1 while (@nodes) {
243     my $node = shift @nodes;
244     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
245    
246     my $nt = $node->node_type;
247     if ($nt == 1) {
248 wakaba 1.8 my $node_ns = $node->namespace_uri;
249     $node_ns = '' unless defined $node_ns;
250     my $node_ln = $node->manakai_local_name;
251     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
252     #
253     } else {
254     ## NOTE: |minuses| list is not checked since redundant
255     $self->{onerror}->(node => $node, type => 'element not allowed');
256     }
257 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
258     unshift @nodes, @$sib;
259     push @$new_todos, @$ch;
260 wakaba 1.25 } elsif ($nt == 3 or $nt == 4) {
261     if ($node->data =~ /[^\x09-\x0D\x20]/) {
262     $todo->{flag}->{has_descendant}->{significant} = 1;
263     }
264 wakaba 1.1 } elsif ($nt == 5) {
265     unshift @nodes, @{$node->child_nodes};
266     }
267     }
268 wakaba 1.25
269     push @$new_todos, {
270     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
271     old_values => $old_values,
272 wakaba 1.26 errors => $HTMLSignificantContentErrors,
273 wakaba 1.25 };
274    
275 wakaba 1.1 return ($new_todos);
276     };
277    
278 wakaba 1.29 my $HTMLProseContentChecker = sub {
279     my ($self, $todo) = @_;
280     my $el = $todo->{node};
281     my $new_todos = [];
282     my @nodes = (@{$el->child_nodes});
283    
284     my $old_values = {significant =>
285     $todo->{flag}->{has_descendant}->{significant}};
286     $todo->{flag}->{has_descendant}->{significant} = 0;
287    
288     my $has_non_style;
289     while (@nodes) {
290     my $node = shift @nodes;
291     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
292    
293     my $nt = $node->node_type;
294     if ($nt == 1) {
295     my $node_ns = $node->namespace_uri;
296     $node_ns = '' unless defined $node_ns;
297     my $node_ln = $node->manakai_local_name;
298 wakaba 1.30 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
299     $self->{onerror}->(node => $node,
300     type => 'element not allowed:minus',
301     level => $self->{must_level});
302     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'style') {
303     if ($has_non_style or
304     not $node->has_attribute_ns (undef, 'scoped')) {
305     $self->{onerror}->(node => $node,
306     type => 'element not allowed:prose style',
307     level => $self->{must_level});
308     }
309 wakaba 1.29 } elsif ($HTMLProseContent->{$node_ns}->{$node_ln}) {
310     $has_non_style = 1;
311     if ($HTMLEmbeddedContent->{$node_ns}->{$node_ln}) {
312     $todo->{flag}->{has_descendant}->{significant} = 1;
313     }
314     } elsif ($self->{pluses}->{$node_ns}->{$node_ln}) {
315     #
316     } else {
317     $has_non_style = 1;
318 wakaba 1.30 $self->{onerror}->(node => $node,
319     type => 'element not allowed:prose',
320     level => $self->{must_level})
321 wakaba 1.29 }
322 wakaba 1.30
323 wakaba 1.29 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
324     unshift @nodes, @$sib;
325     push @$new_todos, @$ch;
326     } elsif ($nt == 3 or $nt == 4) {
327     if ($node->data =~ /[^\x09-\x0D\x20]/) {
328     $has_non_style = 1;
329     $todo->{flag}->{has_descendant}->{significant} = 1;
330     }
331     } elsif ($nt == 5) {
332     unshift @nodes, @{$node->child_nodes};
333     }
334     }
335    
336     push @$new_todos, {
337     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
338     old_values => $old_values,
339     errors => $HTMLSignificantContentErrors,
340     };
341    
342     return ($new_todos);
343     }; # $HTMLProseContentChecker
344    
345     my $HTMLPhrasingContentChecker = sub {
346     my ($self, $todo) = @_;
347     my $el = $todo->{node};
348     my $new_todos = [];
349     my @nodes = (@{$el->child_nodes});
350    
351     my $old_values = {significant =>
352     $todo->{flag}->{has_descendant}->{significant}};
353     $todo->{flag}->{has_descendant}->{significant} = 0;
354    
355     while (@nodes) {
356     my $node = shift @nodes;
357     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
358    
359     my $nt = $node->node_type;
360     if ($nt == 1) {
361     my $node_ns = $node->namespace_uri;
362     $node_ns = '' unless defined $node_ns;
363     my $node_ln = $node->manakai_local_name;
364 wakaba 1.30 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
365     $self->{onerror}->(node => $node,
366     type => 'element not allowed:minus',
367     level => $self->{must_level});
368     } elsif ($HTMLPhrasingContent->{$node_ns}->{$node_ln} or
369     $self->{pluses}->{$node_ns}->{$node_ln}) {
370     #
371     } else {
372     $self->{onerror}->(node => $node,
373     type => 'element not allowed:phrasing',
374     level => $self->{must_level});
375     }
376    
377 wakaba 1.29 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
378     unshift @nodes, @$sib;
379     push @$new_todos, @$ch;
380     } elsif ($nt == 3 or $nt == 4) {
381     if ($node->data =~ /[^\x09-\x0D\x20]/) {
382     $todo->{flag}->{has_descendant}->{significant} = 1;
383     }
384     } elsif ($nt == 5) {
385     unshift @nodes, @{$node->child_nodes};
386     }
387     }
388    
389     push @$new_todos, {
390     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
391     old_values => $old_values,
392     errors => $HTMLSignificantContentErrors,
393     };
394    
395     return ($new_todos);
396     }; # $HTMLPhrasingContentChecker
397    
398 wakaba 1.1 ## Zero or more XXX element, then either block-level or inline-level
399     my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) {
400     my ($elnsuri, $ellname) = @_;
401     return sub {
402     my ($self, $todo) = @_;
403     my $el = $todo->{node};
404     my $new_todos = [];
405     my @nodes = (@{$el->child_nodes});
406 wakaba 1.25
407     my $old_values = {significant =>
408     $todo->{flag}->{has_descendant}->{significant}};
409     $todo->{flag}->{has_descendant}->{significant} = 0;
410 wakaba 1.1
411     my $has_non_style;
412     my $content = 'block-or-inline'; # or 'block' or 'inline'
413     my @block_not_inline;
414     while (@nodes) {
415     my $node = shift @nodes;
416     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
417    
418     my $nt = $node->node_type;
419     if ($nt == 1) {
420     my $node_ns = $node->namespace_uri;
421     $node_ns = '' unless defined $node_ns;
422     my $node_ln = $node->manakai_local_name;
423     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
424     if ($node_ns eq $elnsuri and $node_ln eq $ellname) {
425     $not_allowed = 1 if $has_non_style;
426     if ($ellname eq 'style' and
427     not $node->has_attribute_ns (undef, 'scoped')) {
428     $not_allowed = 1;
429     }
430     } elsif ($content eq 'block') {
431     $has_non_style = 1;
432     $not_allowed = 1
433 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
434     $self->{pluses}->{$node_ns}->{$node_ln};
435 wakaba 1.1 } elsif ($content eq 'inline') {
436     $has_non_style = 1;
437     $not_allowed = 1
438     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
439 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
440     $self->{pluses}->{$node_ns}->{$node_ln};
441 wakaba 1.1 } else {
442 wakaba 1.8 $has_non_style = 1 unless $self->{pluses}->{$node_ns}->{$node_ln};
443 wakaba 1.1 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
444     my $is_inline
445     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
446     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
447    
448     push @block_not_inline, $node
449     if $is_block and not $is_inline and not $not_allowed;
450 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
451 wakaba 1.1 $content = 'inline';
452     for (@block_not_inline) {
453     $self->{onerror}->(node => $_, type => 'element not allowed');
454     }
455     $not_allowed = 1 unless $is_inline;
456     }
457     }
458     $self->{onerror}->(node => $node, type => 'element not allowed')
459     if $not_allowed;
460     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
461     unshift @nodes, @$sib;
462     push @$new_todos, @$ch;
463     } elsif ($nt == 3 or $nt == 4) {
464     if ($node->data =~ /[^\x09-\x0D\x20]/) {
465     $has_non_style = 1;
466     if ($content eq 'block') {
467     $self->{onerror}->(node => $node, type => 'character not allowed');
468     } else {
469     $content = 'inline';
470     for (@block_not_inline) {
471     $self->{onerror}->(node => $_, type => 'element not allowed');
472     }
473     }
474 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
475 wakaba 1.1 }
476     } elsif ($nt == 5) {
477     unshift @nodes, @{$node->child_nodes};
478     }
479     }
480    
481     if ($content eq 'inline') {
482     for (@$new_todos) {
483     $_->{inline} = 1;
484     }
485     }
486 wakaba 1.25
487     push @$new_todos, {
488     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
489     old_values => $old_values,
490 wakaba 1.26 errors => $HTMLSignificantContentErrors,
491 wakaba 1.25 };
492    
493 wakaba 1.1 return ($new_todos);
494     };
495     }; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker
496    
497 wakaba 1.29 my $HTMLTransparentChecker = $HTMLProseContentChecker;
498 wakaba 1.25 ## ISSUE: Significant content rule should be applied to transparent element
499     ## with parent? Currently, applied to |video| but not to others.
500 wakaba 1.1
501 wakaba 1.36 ## -- Common attribute syntacx checkers
502    
503 wakaba 1.1 our $AttrChecker;
504    
505     my $GetHTMLEnumeratedAttrChecker = sub {
506     my $states = shift; # {value => conforming ? 1 : -1}
507     return sub {
508     my ($self, $attr) = @_;
509     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
510     if ($states->{$value} > 0) {
511     #
512     } elsif ($states->{$value}) {
513     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
514     } else {
515     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
516     }
517     };
518     }; # $GetHTMLEnumeratedAttrChecker
519    
520     my $GetHTMLBooleanAttrChecker = sub {
521     my $local_name = shift;
522     return sub {
523     my ($self, $attr) = @_;
524     my $value = $attr->value;
525     unless ($value eq $local_name or $value eq '') {
526     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
527     }
528     };
529     }; # $GetHTMLBooleanAttrChecker
530    
531 wakaba 1.8 ## Unordered set of space-separated tokens
532 wakaba 1.18 my $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
533 wakaba 1.8 my ($self, $attr) = @_;
534     my %word;
535     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
536     unless ($word{$word}) {
537     $word{$word} = 1;
538     } else {
539     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
540     }
541     }
542 wakaba 1.18 }; # $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
543 wakaba 1.8
544 wakaba 1.1 ## |rel| attribute (unordered set of space separated tokens,
545     ## whose allowed values are defined by the section on link types)
546     my $HTMLLinkTypesAttrChecker = sub {
547 wakaba 1.4 my ($a_or_area, $todo, $self, $attr) = @_;
548 wakaba 1.1 my %word;
549     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
550     unless ($word{$word}) {
551     $word{$word} = 1;
552 wakaba 1.18 } elsif ($word eq 'up') {
553     #
554 wakaba 1.1 } else {
555     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
556     }
557     }
558     ## NOTE: Case sensitive match (since HTML5 spec does not say link
559     ## types are case-insensitive and it says "The value should not
560     ## be confusingly similar to any other defined value (e.g.
561     ## differing only in case).").
562     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
563     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
564     ## values to be used conformingly.
565     require Whatpm::_LinkTypeList;
566     our $LinkType;
567     for my $word (keys %word) {
568     my $def = $LinkType->{$word};
569     if (defined $def) {
570     if ($def->{status} eq 'accepted') {
571     if (defined $def->{effect}->[$a_or_area]) {
572     #
573     } else {
574     $self->{onerror}->(node => $attr,
575     type => 'link type:bad context:'.$word);
576     }
577     } elsif ($def->{status} eq 'proposal') {
578     $self->{onerror}->(node => $attr, level => 's',
579     type => 'link type:proposed:'.$word);
580 wakaba 1.20 if (defined $def->{effect}->[$a_or_area]) {
581     #
582     } else {
583     $self->{onerror}->(node => $attr,
584     type => 'link type:bad context:'.$word);
585     }
586 wakaba 1.1 } else { # rejected or synonym
587     $self->{onerror}->(node => $attr,
588     type => 'link type:non-conforming:'.$word);
589     }
590 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
591     if ($word eq 'alternate') {
592     #
593     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
594     $todo->{has_hyperlink_link_type} = 1;
595     }
596     }
597 wakaba 1.1 if ($def->{unique}) {
598     unless ($self->{has_link_type}->{$word}) {
599     $self->{has_link_type}->{$word} = 1;
600     } else {
601     $self->{onerror}->(node => $attr,
602     type => 'link type:duplicate:'.$word);
603     }
604     }
605     } else {
606     $self->{onerror}->(node => $attr, level => 'unsupported',
607     type => 'link type:'.$word);
608     }
609     }
610 wakaba 1.4 $todo->{has_hyperlink_link_type} = 1
611     if $word{alternate} and not $word{stylesheet};
612 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
613     ## says that using both X-Pingback: header field and HTML
614     ## <link rel=pingback> is deprecated and if both appears they
615     ## SHOULD contain exactly the same value.
616     ## ISSUE: Pingback 1.0 specification defines the exact representation
617     ## of its link element, which cannot be tested by the current arch.
618     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
619     ## include any string that matches to the pattern for the rel=pingback link,
620     ## which again inpossible to test.
621     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
622 wakaba 1.12
623     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
624 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
625     ## then they SHOULD be described in different paragraphs.".
626 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
627 wakaba 1.20
628     ## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status."
629 wakaba 1.1
630     ## URI (or IRI)
631     my $HTMLURIAttrChecker = sub {
632     my ($self, $attr) = @_;
633     ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
634     my $value = $attr->value;
635     Whatpm::URIChecker->check_iri_reference ($value, sub {
636     my %opt = @_;
637     $self->{onerror}->(node => $attr, level => $opt{level},
638     type => 'URI::'.$opt{type}.
639     (defined $opt{position} ? ':'.$opt{position} : ''));
640     });
641 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
642 wakaba 1.1 }; # $HTMLURIAttrChecker
643    
644     ## A space separated list of one or more URIs (or IRIs)
645     my $HTMLSpaceURIsAttrChecker = sub {
646     my ($self, $attr) = @_;
647     my $i = 0;
648     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
649     Whatpm::URIChecker->check_iri_reference ($value, sub {
650     my %opt = @_;
651     $self->{onerror}->(node => $attr, level => $opt{level},
652 wakaba 1.2 type => 'URIs:'.':'.
653     $opt{type}.':'.$i.
654 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
655     });
656     $i++;
657     }
658     ## ISSUE: Relative references?
659     ## ISSUE: Leading or trailing white spaces are conformant?
660     ## ISSUE: A sequence of white space characters are conformant?
661     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
662     ## NOTE: Duplication seems not an error.
663 wakaba 1.4 $self->{has_uri_attr} = 1;
664 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
665    
666     my $HTMLDatetimeAttrChecker = sub {
667     my ($self, $attr) = @_;
668     my $value = $attr->value;
669     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
670     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
671     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
672     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
673     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
674     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
675     if $d < 1 or
676     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
677     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
678     if $M == 2 and $d == 29 and
679     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
680     } else {
681     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
682     }
683     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
684     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
685     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
686     if defined $s and $s > 59;
687     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
688     if $zh > 23;
689     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
690     if $zm > 59;
691     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
692     } else {
693     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
694     }
695     }; # $HTMLDatetimeAttrChecker
696    
697     my $HTMLIntegerAttrChecker = sub {
698     my ($self, $attr) = @_;
699     my $value = $attr->value;
700     unless ($value =~ /\A-?[0-9]+\z/) {
701     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
702     }
703     }; # $HTMLIntegerAttrChecker
704    
705     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
706     my $range_check = shift;
707     return sub {
708     my ($self, $attr) = @_;
709     my $value = $attr->value;
710     if ($value =~ /\A[0-9]+\z/) {
711     unless ($range_check->($value + 0)) {
712     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
713     }
714     } else {
715     $self->{onerror}->(node => $attr,
716     type => 'nninteger:syntax error');
717     }
718     };
719     }; # $GetHTMLNonNegativeIntegerAttrChecker
720    
721     my $GetHTMLFloatingPointNumberAttrChecker = sub {
722     my $range_check = shift;
723     return sub {
724     my ($self, $attr) = @_;
725     my $value = $attr->value;
726     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
727     unless ($range_check->($value + 0)) {
728     $self->{onerror}->(node => $attr, type => 'float:out of range');
729     }
730     } else {
731     $self->{onerror}->(node => $attr,
732     type => 'float:syntax error');
733     }
734     };
735     }; # $GetHTMLFloatingPointNumberAttrChecker
736    
737     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
738     ## ISSUE: RFC 2046 does not define syntax of media types.
739     ## ISSUE: The definition of "a valid MIME type" is unknown.
740     ## Syntactical correctness?
741     my $HTMLIMTAttrChecker = sub {
742     my ($self, $attr) = @_;
743     my $value = $attr->value;
744     ## ISSUE: RFC 2045 Content-Type header field allows insertion
745     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
746     ## ISSUE: RFC 2231 extension? Maybe no.
747     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
748     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
749     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
750     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
751     my @type = ($1, $2);
752     my $param = $3;
753     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
754     if (defined $2) {
755     push @type, $1 => $2;
756     } else {
757     my $n = $1;
758     my $v = $2;
759     $v =~ s/\\(.)/$1/gs;
760     push @type, $n => $v;
761     }
762     }
763     require Whatpm::IMTChecker;
764     Whatpm::IMTChecker->check_imt (sub {
765     my %opt = @_;
766     $self->{onerror}->(node => $attr, level => $opt{level},
767     type => 'IMT:'.$opt{type});
768     }, @type);
769     } else {
770     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
771     }
772     }; # $HTMLIMTAttrChecker
773    
774     my $HTMLLanguageTagAttrChecker = sub {
775 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
776    
777 wakaba 1.1 my ($self, $attr) = @_;
778 wakaba 1.6 my $value = $attr->value;
779     require Whatpm::LangTag;
780     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
781     my %opt = @_;
782     my $type = 'LangTag:'.$opt{type};
783     $type .= ':' . $opt{subtag} if defined $opt{subtag};
784     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
785     level => $opt{level});
786     });
787 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
788 wakaba 1.6
789     ## TODO: testdata
790 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
791    
792     ## "A valid media query [MQ]"
793     my $HTMLMQAttrChecker = sub {
794     my ($self, $attr) = @_;
795     $self->{onerror}->(node => $attr, level => 'unsupported',
796     type => 'media query');
797     ## ISSUE: What is "a valid media query"?
798     }; # $HTMLMQAttrChecker
799    
800     my $HTMLEventHandlerAttrChecker = sub {
801     my ($self, $attr) = @_;
802     $self->{onerror}->(node => $attr, level => 'unsupported',
803     type => 'event handler');
804     ## TODO: MUST contain valid ECMAScript code matching the
805     ## ECMAScript |FunctionBody| production. [ECMA262]
806     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
807     ## ISSUE: Automatic semicolon insertion does not apply?
808     ## ISSUE: Other script languages?
809     }; # $HTMLEventHandlerAttrChecker
810    
811     my $HTMLUsemapAttrChecker = sub {
812     my ($self, $attr) = @_;
813     ## MUST be a valid hashed ID reference to a |map| element
814     my $value = $attr->value;
815     if ($value =~ s/^#//) {
816     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
817     push @{$self->{usemap}}, [$value => $attr];
818     } else {
819     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
820     }
821     ## NOTE: Space characters in hashed ID references are conforming.
822     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
823     }; # $HTMLUsemapAttrChecker
824    
825     my $HTMLTargetAttrChecker = sub {
826     my ($self, $attr) = @_;
827     my $value = $attr->value;
828     if ($value =~ /^_/) {
829     $value = lc $value; ## ISSUE: ASCII case-insentitive?
830     unless ({
831     _self => 1, _parent => 1, _top => 1,
832     }->{$value}) {
833     $self->{onerror}->(node => $attr,
834     type => 'reserved browsing context name');
835     }
836     } else {
837 wakaba 1.29 ## NOTE: An empty string is a valid browsing context name (same as _self).
838 wakaba 1.1 }
839     }; # $HTMLTargetAttrChecker
840    
841 wakaba 1.23 my $HTMLSelectorsAttrChecker = sub {
842     my ($self, $attr) = @_;
843    
844     ## ISSUE: Namespace resolution?
845    
846     my $value = $attr->value;
847    
848     require Whatpm::CSS::SelectorsParser;
849     my $p = Whatpm::CSS::SelectorsParser->new;
850     $p->{pseudo_class}->{$_} = 1 for qw/
851     active checked disabled empty enabled first-child first-of-type
852     focus hover indeterminate last-child last-of-type link only-child
853     only-of-type root target visited
854     lang nth-child nth-last-child nth-of-type nth-last-of-type not
855     -manakai-contains -manakai-current
856     /;
857    
858     $p->{pseudo_element}->{$_} = 1 for qw/
859     after before first-letter first-line
860     /;
861    
862     $p->{must_level} = $self->{must_level};
863     $p->{onerror} = sub {
864     my %opt = @_;
865     $opt{type} = 'selectors:'.$opt{type};
866     $self->{onerror}->(%opt, node => $attr);
867     };
868     $p->parse_string ($value);
869     }; # $HTMLSelectorsAttrChecker
870    
871 wakaba 1.1 my $HTMLAttrChecker = {
872     id => sub {
873     ## NOTE: |map| has its own variant of |id=""| checker
874     my ($self, $attr) = @_;
875     my $value = $attr->value;
876     if (length $value > 0) {
877     if ($self->{id}->{$value}) {
878     $self->{onerror}->(node => $attr, type => 'duplicate ID');
879     push @{$self->{id}->{$value}}, $attr;
880     } else {
881     $self->{id}->{$value} = [$attr];
882     }
883     if ($value =~ /[\x09-\x0D\x20]/) {
884     $self->{onerror}->(node => $attr, type => 'space in ID');
885     }
886     } else {
887     ## NOTE: MUST contain at least one character
888     $self->{onerror}->(node => $attr, type => 'empty attribute value');
889     }
890     },
891     title => sub {}, ## NOTE: No conformance creteria
892     lang => sub {
893     my ($self, $attr) = @_;
894 wakaba 1.6 my $value = $attr->value;
895     if ($value eq '') {
896     #
897     } else {
898     require Whatpm::LangTag;
899     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
900     my %opt = @_;
901     my $type = 'LangTag:'.$opt{type};
902     $type .= ':' . $opt{subtag} if defined $opt{subtag};
903     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
904     level => $opt{level});
905     });
906     }
907 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
908     unless ($attr->owner_document->manakai_is_html) {
909     $self->{onerror}->(node => $attr, type => 'in XML:lang');
910     }
911 wakaba 1.6
912     ## TODO: test data
913 wakaba 1.1 },
914     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
915     class => sub {
916     my ($self, $attr) = @_;
917     my %word;
918     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
919     unless ($word{$word}) {
920     $word{$word} = 1;
921     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
922     } else {
923     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
924     }
925     }
926     },
927     contextmenu => sub {
928     my ($self, $attr) = @_;
929     my $value = $attr->value;
930     push @{$self->{contextmenu}}, [$value => $attr];
931     ## ISSUE: "The value must be the ID of a menu element in the DOM."
932     ## What is "in the DOM"? A menu Element node that is not part
933     ## of the Document tree is in the DOM? A menu Element node that
934     ## belong to another Document tree is in the DOM?
935     },
936     irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
937 wakaba 1.8 tabindex => $HTMLIntegerAttrChecker
938     ## TODO: ref, template, registrationmark
939 wakaba 1.1 };
940    
941     for (qw/
942     onabort onbeforeunload onblur onchange onclick oncontextmenu
943     ondblclick ondrag ondragend ondragenter ondragleave ondragover
944     ondragstart ondrop onerror onfocus onkeydown onkeypress
945     onkeyup onload onmessage onmousedown onmousemove onmouseout
946     onmouseover onmouseup onmousewheel onresize onscroll onselect
947     onsubmit onunload
948     /) {
949     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
950     }
951    
952     my $GetHTMLAttrsChecker = sub {
953     my $element_specific_checker = shift;
954     return sub {
955     my ($self, $todo) = @_;
956     for my $attr (@{$todo->{node}->attributes}) {
957     my $attr_ns = $attr->namespace_uri;
958     $attr_ns = '' unless defined $attr_ns;
959     my $attr_ln = $attr->manakai_local_name;
960     my $checker;
961     if ($attr_ns eq '') {
962     $checker = $element_specific_checker->{$attr_ln}
963     || $HTMLAttrChecker->{$attr_ln};
964     }
965     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
966     || $AttrChecker->{$attr_ns}->{''};
967     if ($checker) {
968     $checker->($self, $attr, $todo);
969     } else {
970     $self->{onerror}->(node => $attr, level => 'unsupported',
971     type => 'attribute');
972     ## ISSUE: No comformance createria for unknown attributes in the spec
973     }
974     }
975     };
976     }; # $GetHTMLAttrsChecker
977    
978     our $Element;
979     our $ElementDefault;
980    
981     $Element->{$HTML_NS}->{''} = {
982     attrs_checker => $GetHTMLAttrsChecker->({}),
983     checker => $ElementDefault->{checker},
984     };
985    
986     $Element->{$HTML_NS}->{html} = {
987     is_root => 1,
988     attrs_checker => $GetHTMLAttrsChecker->({
989 wakaba 1.16 manifest => $HTMLURIAttrChecker,
990 wakaba 1.1 xmlns => sub {
991     my ($self, $attr) = @_;
992     my $value = $attr->value;
993     unless ($value eq $HTML_NS) {
994     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
995     }
996     unless ($attr->owner_document->manakai_is_html) {
997     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
998     ## TODO: Test
999     }
1000     },
1001     }),
1002     checker => sub {
1003     my ($self, $todo) = @_;
1004     my $el = $todo->{node};
1005     my $new_todos = [];
1006     my @nodes = (@{$el->child_nodes});
1007    
1008 wakaba 1.25 my $old_values = {significant =>
1009     $todo->{flag}->{has_descendant}->{significant}};
1010     $todo->{flag}->{has_descendant}->{significant} = 0;
1011    
1012 wakaba 1.1 my $phase = 'before head';
1013     while (@nodes) {
1014     my $node = shift @nodes;
1015     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1016    
1017     my $nt = $node->node_type;
1018     if ($nt == 1) {
1019     my $node_ns = $node->namespace_uri;
1020     $node_ns = '' unless defined $node_ns;
1021     my $node_ln = $node->manakai_local_name;
1022     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
1023 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1024     #
1025     } elsif ($phase eq 'before head') {
1026 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'head') {
1027     $phase = 'after head';
1028     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'body') {
1029     $self->{onerror}->(node => $node, type => 'ps element missing:head');
1030     $phase = 'after body';
1031     } else {
1032     $not_allowed = 1;
1033     # before head
1034     }
1035     } elsif ($phase eq 'after head') {
1036     if ($node_ns eq $HTML_NS and $node_ln eq 'body') {
1037     $phase = 'after body';
1038     } else {
1039     $not_allowed = 1;
1040     # after head
1041     }
1042     } else { #elsif ($phase eq 'after body') {
1043     $not_allowed = 1;
1044     # after body
1045     }
1046     $self->{onerror}->(node => $node, type => 'element not allowed')
1047     if $not_allowed;
1048     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1049     unshift @nodes, @$sib;
1050     push @$new_todos, @$ch;
1051     } elsif ($nt == 3 or $nt == 4) {
1052     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1053     $self->{onerror}->(node => $node, type => 'character not allowed');
1054 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
1055 wakaba 1.1 }
1056     } elsif ($nt == 5) {
1057     unshift @nodes, @{$node->child_nodes};
1058     }
1059     }
1060    
1061     if ($phase eq 'before head') {
1062     $self->{onerror}->(node => $el, type => 'child element missing:head');
1063     $self->{onerror}->(node => $el, type => 'child element missing:body');
1064     } elsif ($phase eq 'after head') {
1065     $self->{onerror}->(node => $el, type => 'child element missing:body');
1066     }
1067    
1068 wakaba 1.25 ## NOTE: Significant content check - this is performed here since
1069     ## |html| content model allows a block-level element - |body|.
1070     push @$new_todos, {
1071     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
1072     old_values => $old_values,
1073 wakaba 1.26 errors => $HTMLSignificantContentErrors,
1074 wakaba 1.25 };
1075    
1076 wakaba 1.1 return ($new_todos);
1077     },
1078     };
1079    
1080     $Element->{$HTML_NS}->{head} = {
1081     attrs_checker => $GetHTMLAttrsChecker->({}),
1082     checker => sub {
1083     my ($self, $todo) = @_;
1084     my $el = $todo->{node};
1085     my $new_todos = [];
1086     my @nodes = (@{$el->child_nodes});
1087    
1088     my $has_title;
1089     while (@nodes) {
1090     my $node = shift @nodes;
1091     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1092    
1093     my $nt = $node->node_type;
1094     if ($nt == 1) {
1095     my $node_ns = $node->namespace_uri;
1096     $node_ns = '' unless defined $node_ns;
1097     my $node_ln = $node->manakai_local_name;
1098 wakaba 1.30 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
1099     $self->{onerror}->(node => $node,
1100     type => 'element not allowed:minus',
1101     level => $self->{must_level});
1102     } elsif ($self->{pluses}->{$node_ns}->{$node_ln}) {
1103 wakaba 1.8 #
1104     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'title') {
1105 wakaba 1.1 unless ($has_title) {
1106     $has_title = 1;
1107     } else {
1108 wakaba 1.30 $self->{onerror}->(node => $node,
1109     type => 'element not allowed:head title',
1110     level => $self->{must_level});
1111     }
1112     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'style') {
1113     if ($todo->{node}->has_attribute_ns (undef, 'scoped')) {
1114     $self->{onerror}->(node => $node,
1115     type => 'element not allowed:head style',
1116     level => $self->{must_level});
1117 wakaba 1.1 }
1118 wakaba 1.29 } elsif ($HTMLMetadataContent->{$node_ns}->{$node_ln}) {
1119     #
1120    
1121     ## NOTE: |meta| is a metadata content. However, strictly speaking,
1122     ## a |meta| element with none of |charset|, |name|,
1123     ## or |http-equiv| attribute is not allowed. It is non-conforming
1124     ## anyway.
1125 wakaba 1.1 } else {
1126 wakaba 1.30 $self->{onerror}->(node => $node,
1127     type => 'element not allowed:metadata',
1128     level => $self->{must_level});
1129 wakaba 1.1 }
1130 wakaba 1.3 local $todo->{flag}->{in_head} = 1;
1131 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1132     unshift @nodes, @$sib;
1133     push @$new_todos, @$ch;
1134     } elsif ($nt == 3 or $nt == 4) {
1135     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1136     $self->{onerror}->(node => $node, type => 'character not allowed');
1137 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
1138 wakaba 1.1 }
1139     } elsif ($nt == 5) {
1140     unshift @nodes, @{$node->child_nodes};
1141     }
1142     }
1143     unless ($has_title) {
1144     $self->{onerror}->(node => $el, type => 'child element missing:title');
1145     }
1146     return ($new_todos);
1147     },
1148     };
1149    
1150     $Element->{$HTML_NS}->{title} = {
1151     attrs_checker => $GetHTMLAttrsChecker->({}),
1152     checker => $HTMLTextChecker,
1153     };
1154    
1155     $Element->{$HTML_NS}->{base} = {
1156 wakaba 1.4 attrs_checker => sub {
1157     my ($self, $todo) = @_;
1158    
1159 wakaba 1.29 if ($self->{has_base}) {
1160     $self->{onerror}->(node => $todo->{node},
1161     type => 'element not allowed:base');
1162     } else {
1163     $self->{has_base} = 1;
1164     }
1165    
1166 wakaba 1.14 my $has_href = $todo->{node}->has_attribute_ns (undef, 'href');
1167     my $has_target = $todo->{node}->has_attribute_ns (undef, 'target');
1168    
1169     if ($self->{has_uri_attr} and $has_href) {
1170 wakaba 1.4 ## ISSUE: Are these examples conforming?
1171     ## <head profile="a b c"><base href> (except for |profile|'s
1172     ## non-conformance)
1173     ## <title xml:base="relative"/><base href/> (maybe it should be)
1174     ## <unknown xmlns="relative"/><base href/> (assuming that
1175     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1176     ## <style>@import 'relative';</style><base href>
1177     ## <script>location.href = 'relative';</script><base href>
1178 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
1179     ## an exception.
1180 wakaba 1.4 $self->{onerror}->(node => $todo->{node},
1181     type => 'basehref after URI attribute');
1182     }
1183 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
1184 wakaba 1.4 ## ISSUE: Are these examples conforming?
1185     ## <head><title xlink:href=""/><base target="name"/></head>
1186     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1187     ## (assuming that |xbl:xbl| is allowed before |base|)
1188     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1189     ## <link href=""/><base target="name"/>
1190     ## <link rel=unknown href=""><base target=name>
1191     $self->{onerror}->(node => $todo->{node},
1192     type => 'basetarget after hyperlink');
1193     }
1194    
1195 wakaba 1.14 if (not $has_href and not $has_target) {
1196     $self->{onerror}->(node => $todo->{node},
1197     type => 'attribute missing:href|target');
1198     }
1199    
1200 wakaba 1.4 return $GetHTMLAttrsChecker->({
1201     href => $HTMLURIAttrChecker,
1202     target => $HTMLTargetAttrChecker,
1203     })->($self, $todo);
1204     },
1205 wakaba 1.1 checker => $HTMLEmptyChecker,
1206     };
1207    
1208     $Element->{$HTML_NS}->{link} = {
1209     attrs_checker => sub {
1210     my ($self, $todo) = @_;
1211     $GetHTMLAttrsChecker->({
1212     href => $HTMLURIAttrChecker,
1213 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(0, $todo, @_) },
1214 wakaba 1.1 media => $HTMLMQAttrChecker,
1215     hreflang => $HTMLLanguageTagAttrChecker,
1216     type => $HTMLIMTAttrChecker,
1217     ## NOTE: Though |title| has special semantics,
1218     ## syntactically same as the |title| as global attribute.
1219     })->($self, $todo);
1220 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
1221     $self->{has_hyperlink_element} = 1 if $todo->{has_hyperlink_link_type};
1222     } else {
1223 wakaba 1.1 $self->{onerror}->(node => $todo->{node},
1224     type => 'attribute missing:href');
1225     }
1226     unless ($todo->{node}->has_attribute_ns (undef, 'rel')) {
1227     $self->{onerror}->(node => $todo->{node},
1228     type => 'attribute missing:rel');
1229     }
1230     },
1231     checker => $HTMLEmptyChecker,
1232     };
1233    
1234     $Element->{$HTML_NS}->{meta} = {
1235     attrs_checker => sub {
1236     my ($self, $todo) = @_;
1237     my $name_attr;
1238     my $http_equiv_attr;
1239     my $charset_attr;
1240     my $content_attr;
1241     for my $attr (@{$todo->{node}->attributes}) {
1242     my $attr_ns = $attr->namespace_uri;
1243     $attr_ns = '' unless defined $attr_ns;
1244     my $attr_ln = $attr->manakai_local_name;
1245     my $checker;
1246     if ($attr_ns eq '') {
1247     if ($attr_ln eq 'content') {
1248     $content_attr = $attr;
1249     $checker = 1;
1250     } elsif ($attr_ln eq 'name') {
1251     $name_attr = $attr;
1252     $checker = 1;
1253     } elsif ($attr_ln eq 'http-equiv') {
1254     $http_equiv_attr = $attr;
1255     $checker = 1;
1256     } elsif ($attr_ln eq 'charset') {
1257     $charset_attr = $attr;
1258     $checker = 1;
1259     } else {
1260     $checker = $HTMLAttrChecker->{$attr_ln}
1261     || $AttrChecker->{$attr_ns}->{$attr_ln}
1262     || $AttrChecker->{$attr_ns}->{''};
1263     }
1264     } else {
1265     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1266     || $AttrChecker->{$attr_ns}->{''};
1267     }
1268     if ($checker) {
1269     $checker->($self, $attr) if ref $checker;
1270     } else {
1271     $self->{onerror}->(node => $attr, level => 'unsupported',
1272     type => 'attribute');
1273     ## ISSUE: No comformance createria for unknown attributes in the spec
1274     }
1275     }
1276    
1277     if (defined $name_attr) {
1278     if (defined $http_equiv_attr) {
1279     $self->{onerror}->(node => $http_equiv_attr,
1280     type => 'attribute not allowed');
1281     } elsif (defined $charset_attr) {
1282     $self->{onerror}->(node => $charset_attr,
1283     type => 'attribute not allowed');
1284     }
1285     my $metadata_name = $name_attr->value;
1286     my $metadata_value;
1287     if (defined $content_attr) {
1288     $metadata_value = $content_attr->value;
1289     } else {
1290     $self->{onerror}->(node => $todo->{node},
1291     type => 'attribute missing:content');
1292     $metadata_value = '';
1293     }
1294     } elsif (defined $http_equiv_attr) {
1295     if (defined $charset_attr) {
1296     $self->{onerror}->(node => $charset_attr,
1297     type => 'attribute not allowed');
1298     }
1299     unless (defined $content_attr) {
1300     $self->{onerror}->(node => $todo->{node},
1301     type => 'attribute missing:content');
1302     }
1303     } elsif (defined $charset_attr) {
1304     if (defined $content_attr) {
1305     $self->{onerror}->(node => $content_attr,
1306     type => 'attribute not allowed');
1307     }
1308     } else {
1309     if (defined $content_attr) {
1310     $self->{onerror}->(node => $content_attr,
1311     type => 'attribute not allowed');
1312     $self->{onerror}->(node => $todo->{node},
1313     type => 'attribute missing:name|http-equiv');
1314     } else {
1315     $self->{onerror}->(node => $todo->{node},
1316     type => 'attribute missing:name|http-equiv|charset');
1317     }
1318     }
1319    
1320 wakaba 1.32 my $check_charset_decl = sub () {
1321 wakaba 1.29 my $parent = $todo->{node}->manakai_parent_element;
1322     if ($parent and $parent eq $parent->owner_document->manakai_head) {
1323     for my $el (@{$parent->child_nodes}) {
1324     next unless $el->node_type == 1; # ELEMENT_NODE
1325     unless ($el eq $todo->{node}) {
1326     ## NOTE: Not the first child element.
1327     $self->{onerror}->(node => $todo->{node},
1328 wakaba 1.32 type => 'element not allowed:meta charset',
1329     level => $self->{must_level});
1330 wakaba 1.29 }
1331     last;
1332     ## NOTE: Entity references are not supported.
1333     }
1334     } else {
1335     $self->{onerror}->(node => $todo->{node},
1336 wakaba 1.32 type => 'element not allowed:meta charset',
1337     level => $self->{must_level});
1338 wakaba 1.29 }
1339    
1340 wakaba 1.1 unless ($todo->{node}->owner_document->manakai_is_html) {
1341 wakaba 1.32 $self->{onerror}->(node => $todo->{node},
1342     type => 'in XML:charset',
1343     level => $self->{must_level});
1344 wakaba 1.1 }
1345 wakaba 1.32 }; # $check_charset_decl
1346 wakaba 1.21
1347 wakaba 1.32 my $check_charset = sub ($$) {
1348     my ($attr, $charset_value) = @_;
1349 wakaba 1.21 ## NOTE: Though the case-sensitivility of |charset| attribute value
1350     ## is not explicitly spelled in the HTML5 spec, the Character Set
1351     ## registry of IANA, which is referenced from HTML5 spec, says that
1352     ## charset name is case-insensitive.
1353     $charset_value =~ tr/A-Z/a-z/; ## NOTE: ASCII Case-insensitive.
1354    
1355     require Message::Charset::Info;
1356     my $charset = $Message::Charset::Info::IANACharset->{$charset_value};
1357     my $ic = $todo->{node}->owner_document->input_encoding;
1358     if (defined $ic) {
1359     ## TODO: Test for this case
1360     my $ic_charset = $Message::Charset::Info::IANACharset->{$ic};
1361     if ($charset ne $ic_charset) {
1362 wakaba 1.32 $self->{onerror}->(node => $attr,
1363 wakaba 1.21 type => 'mismatched charset name:'.$ic.
1364 wakaba 1.32 ':'.$charset_value, ## TODO: This should be a |value| value.
1365     level => $self->{must_level});
1366 wakaba 1.21 }
1367     } else {
1368     ## NOTE: MUST, but not checkable, since the document is not originally
1369     ## in serialized form (or the parser does not preserve the input
1370     ## encoding information).
1371 wakaba 1.32 $self->{onerror}->(node => $attr,
1372     type => 'mismatched charset name::'.$charset_value, ## TODO: |value|
1373 wakaba 1.21 level => 'unsupported');
1374     }
1375    
1376     ## ISSUE: What is "valid character encoding name"? Syntactically valid?
1377     ## Syntactically valid and registered? What about x-charset names?
1378     unless (Message::Charset::Info::is_syntactically_valid_iana_charset_name
1379     ($charset_value)) {
1380 wakaba 1.32 $self->{onerror}->(node => $attr,
1381     type => 'charset:syntax error:'.$charset_value, ## TODO
1382     level => $self->{must_level});
1383 wakaba 1.21 }
1384    
1385     if ($charset) {
1386     ## ISSUE: What is "the preferred name for that encoding" (for a charset
1387     ## with no "preferred MIME name" label)?
1388     my $charset_status = $charset->{iana_names}->{$charset_value} || 0;
1389     if (($charset_status &
1390     Message::Charset::Info::PREFERRED_CHARSET_NAME ())
1391     != Message::Charset::Info::PREFERRED_CHARSET_NAME ()) {
1392 wakaba 1.32 $self->{onerror}->(node => $attr,
1393 wakaba 1.21 type => 'charset:not preferred:'.
1394 wakaba 1.32 $charset_value, ## TODO
1395     level => $self->{must_level});
1396 wakaba 1.21 }
1397     if (($charset_status &
1398     Message::Charset::Info::REGISTERED_CHARSET_NAME ())
1399     != Message::Charset::Info::REGISTERED_CHARSET_NAME ()) {
1400     if ($charset_value =~ /^x-/) {
1401 wakaba 1.32 $self->{onerror}->(node => $attr,
1402     type => 'charset:private:'.$charset_value, ## TODO
1403 wakaba 1.21 level => $self->{good_level});
1404     } else {
1405 wakaba 1.32 $self->{onerror}->(node => $attr,
1406 wakaba 1.21 type => 'charset:not registered:'.
1407 wakaba 1.32 $charset_value, ## TODO
1408 wakaba 1.21 level => $self->{good_level});
1409     }
1410     }
1411     } elsif ($charset_value =~ /^x-/) {
1412 wakaba 1.32 $self->{onerror}->(node => $attr,
1413     type => 'charset:private:'.$charset_value, ## TODO
1414 wakaba 1.21 level => $self->{good_level});
1415     } else {
1416 wakaba 1.32 $self->{onerror}->(node => $attr,
1417     type => 'charset:not registered:'.$charset_value, ## TODO
1418 wakaba 1.21 level => $self->{good_level});
1419     }
1420    
1421 wakaba 1.32 if ($attr->get_user_data ('manakai_has_reference')) {
1422     $self->{onerror}->(node => $attr,
1423 wakaba 1.22 type => 'character reference in charset',
1424     level => $self->{must_level});
1425     }
1426 wakaba 1.32 }; # $check_charset
1427    
1428     ## TODO: metadata conformance
1429    
1430     ## TODO: pragma conformance
1431     if (defined $http_equiv_attr) { ## An enumerated attribute
1432     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1433     if ({
1434     'refresh' => 1,
1435     'default-style' => 1,
1436     }->{$keyword}) {
1437     #
1438 wakaba 1.33
1439     ## TODO: More than one occurence is a MUST-level error (revision 1180).
1440 wakaba 1.32 } elsif ($keyword eq 'content-type') {
1441 wakaba 1.33 ## ISSUE: Though it is renamed as "Encoding declaration" state in rev
1442     ## 1221, there are still many occurence of "Content-Type" state in
1443     ## the spec.
1444    
1445 wakaba 1.32 $check_charset_decl->();
1446     if ($content_attr) {
1447     my $content = $content_attr->value;
1448     if ($content =~ m!^text/html;\x20?charset=(.+)\z!s) {
1449     $check_charset->($content_attr, $1);
1450     } else {
1451     $self->{onerror}->(node => $content_attr,
1452     type => 'meta content-type syntax error',
1453     level => $self->{must_level});
1454     }
1455     }
1456     } else {
1457     $self->{onerror}->(node => $http_equiv_attr,
1458     type => 'enumerated:invalid');
1459     }
1460     }
1461    
1462     if (defined $charset_attr) {
1463     $check_charset_decl->();
1464     $check_charset->($charset_attr, $charset_attr->value);
1465 wakaba 1.1 }
1466     },
1467     checker => $HTMLEmptyChecker,
1468     };
1469    
1470     $Element->{$HTML_NS}->{style} = {
1471     attrs_checker => $GetHTMLAttrsChecker->({
1472     type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1473     media => $HTMLMQAttrChecker,
1474     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1475     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1476     ## not different
1477     }),
1478     checker => sub {
1479 wakaba 1.27 ## NOTE: |html:style| itself has no conformance creteria on content model.
1480 wakaba 1.1 my ($self, $todo) = @_;
1481     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
1482 wakaba 1.27 if (not defined $type or
1483     $type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*[Tt][Ee][Xx][Tt](?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*[Cc][Ss][Ss](?>(?>\x0D\x0A)?[\x09\x20])*\z]) {
1484     my $el = $todo->{node};
1485     my $new_todos = [];
1486     my @nodes = (@{$el->child_nodes});
1487    
1488     my $ss_text = '';
1489     while (@nodes) {
1490     my $node = shift @nodes;
1491     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1492    
1493     my $nt = $node->node_type;
1494     if ($nt == 1) {
1495 wakaba 1.29 my $node_ns = $node->namespace_uri;
1496     $node_ns = '' unless defined $node_ns;
1497     my $node_ln = $node->manakai_local_name;
1498     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1499     #
1500     } else {
1501     $self->{onerror}->(node => $node, type => 'element not allowed');
1502     }
1503 wakaba 1.27 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1504     unshift @nodes, @$sib;
1505     push @$new_todos, @$ch;
1506     } elsif ($nt == 3 or $nt == 4) {
1507     $ss_text .= $node->text_content;
1508     } elsif ($nt == 5) {
1509     unshift @nodes, @{$node->child_nodes};
1510     }
1511     }
1512    
1513 wakaba 1.28 $self->{onsubdoc}->({s => $ss_text, container_node => $el,
1514     media_type => 'text/css', is_char_string => 1});
1515 wakaba 1.27 return ($new_todos);
1516     } else {
1517     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
1518     type => 'style:'.$type); ## TODO: $type normalization
1519     return $AnyChecker->($self, $todo);
1520     }
1521 wakaba 1.1 },
1522     };
1523 wakaba 1.25 ## ISSUE: Relationship to significant content check?
1524 wakaba 1.1
1525     $Element->{$HTML_NS}->{body} = {
1526     attrs_checker => $GetHTMLAttrsChecker->({}),
1527 wakaba 1.29 checker => $HTMLProseContentChecker,
1528 wakaba 1.1 };
1529    
1530     $Element->{$HTML_NS}->{section} = {
1531     attrs_checker => $GetHTMLAttrsChecker->({}),
1532 wakaba 1.29 checker => $HTMLProseContentChecker,
1533 wakaba 1.1 };
1534    
1535     $Element->{$HTML_NS}->{nav} = {
1536     attrs_checker => $GetHTMLAttrsChecker->({}),
1537 wakaba 1.29 checker => $HTMLProseContentChecker,
1538 wakaba 1.1 };
1539    
1540     $Element->{$HTML_NS}->{article} = {
1541     attrs_checker => $GetHTMLAttrsChecker->({}),
1542 wakaba 1.29 checker => $HTMLProseContentChecker,
1543 wakaba 1.1 };
1544    
1545     $Element->{$HTML_NS}->{blockquote} = {
1546     attrs_checker => $GetHTMLAttrsChecker->({
1547     cite => $HTMLURIAttrChecker,
1548     }),
1549 wakaba 1.29 checker => $HTMLProseContentChecker,
1550 wakaba 1.1 };
1551    
1552     $Element->{$HTML_NS}->{aside} = {
1553     attrs_checker => $GetHTMLAttrsChecker->({}),
1554 wakaba 1.29 checker => $HTMLProseContentChecker,
1555 wakaba 1.1 };
1556    
1557     $Element->{$HTML_NS}->{h1} = {
1558     attrs_checker => $GetHTMLAttrsChecker->({}),
1559     checker => sub {
1560     my ($self, $todo) = @_;
1561 wakaba 1.24 $todo->{flag}->{has_descendant}->{hn} = 1;
1562 wakaba 1.29 return $HTMLPhrasingContentChecker->($self, $todo);
1563 wakaba 1.1 },
1564     };
1565    
1566     $Element->{$HTML_NS}->{h2} = {
1567     attrs_checker => $GetHTMLAttrsChecker->({}),
1568     checker => $Element->{$HTML_NS}->{h1}->{checker},
1569     };
1570    
1571     $Element->{$HTML_NS}->{h3} = {
1572     attrs_checker => $GetHTMLAttrsChecker->({}),
1573     checker => $Element->{$HTML_NS}->{h1}->{checker},
1574     };
1575    
1576     $Element->{$HTML_NS}->{h4} = {
1577     attrs_checker => $GetHTMLAttrsChecker->({}),
1578     checker => $Element->{$HTML_NS}->{h1}->{checker},
1579     };
1580    
1581     $Element->{$HTML_NS}->{h5} = {
1582     attrs_checker => $GetHTMLAttrsChecker->({}),
1583     checker => $Element->{$HTML_NS}->{h1}->{checker},
1584     };
1585    
1586     $Element->{$HTML_NS}->{h6} = {
1587     attrs_checker => $GetHTMLAttrsChecker->({}),
1588     checker => $Element->{$HTML_NS}->{h1}->{checker},
1589     };
1590    
1591 wakaba 1.29 ## TODO: Explicit sectioning is "encouraged".
1592    
1593 wakaba 1.1 $Element->{$HTML_NS}->{header} = {
1594     attrs_checker => $GetHTMLAttrsChecker->({}),
1595     checker => sub {
1596     my ($self, $todo) = @_;
1597 wakaba 1.24
1598     my $old_flags = {hn => $todo->{flag}->{has_descendant}->{hn}};
1599     $todo->{flag}->{has_descendant}->{hn} = 0;
1600 wakaba 1.1
1601     my $end = $self->_add_minuses
1602     ({$HTML_NS => {qw/header 1 footer 1/}},
1603 wakaba 1.29 $HTMLSectioningContent);
1604     my ($new_todos, $ch) = $HTMLProseContentChecker->($self, $todo);
1605 wakaba 1.24 push @$new_todos, $end,
1606     {type => 'descendant', node => $todo->{node},
1607     flag => $todo->{flag}, old_values => $old_flags,
1608     errors => {
1609     hn => sub {
1610     my ($self, $todo) = @_;
1611     $self->{onerror}->(node => $todo->{node},
1612     type => 'element missing:hn');
1613     },
1614 wakaba 1.1 }};
1615     return ($new_todos, $ch);
1616 wakaba 1.24
1617     ## ISSUE: <header><del><h1>...</h1></del></header> is conforming?
1618 wakaba 1.1 },
1619     };
1620    
1621     $Element->{$HTML_NS}->{footer} = {
1622     attrs_checker => $GetHTMLAttrsChecker->({}),
1623 wakaba 1.29 checker => sub {
1624 wakaba 1.1 my ($self, $todo) = @_;
1625 wakaba 1.25
1626 wakaba 1.29 my $old_flags = {hn => $todo->{flag}->{has_descendant}->{hn}};
1627     $todo->{flag}->{has_descendant}->{hn} = 0;
1628 wakaba 1.1
1629     my $end = $self->_add_minuses
1630 wakaba 1.29 ({$HTML_NS => {footer => 1}},
1631     $HTMLSectioningContent, $HTMLHeadingContent);
1632     my ($new_todos, $ch) = $HTMLProseContentChecker->($self, $todo);
1633 wakaba 1.1 push @$new_todos, $end;
1634    
1635 wakaba 1.29 return ($new_todos, $ch);
1636 wakaba 1.1 },
1637     };
1638    
1639     $Element->{$HTML_NS}->{address} = {
1640     attrs_checker => $GetHTMLAttrsChecker->({}),
1641 wakaba 1.29 checker => sub {
1642     my ($self, $todo) = @_;
1643    
1644     my $old_flags = {hn => $todo->{flag}->{has_descendant}->{hn}};
1645     $todo->{flag}->{has_descendant}->{hn} = 0;
1646    
1647     my $end = $self->_add_minuses
1648     ({$HTML_NS => {footer => 1, address => 1}},
1649     $HTMLSectioningContent, $HTMLHeadingContent);
1650     my ($new_todos, $ch) = $HTMLProseContentChecker->($self, $todo);
1651     push @$new_todos, $end;
1652    
1653     return ($new_todos, $ch);
1654     },
1655 wakaba 1.1 };
1656    
1657     $Element->{$HTML_NS}->{p} = {
1658     attrs_checker => $GetHTMLAttrsChecker->({}),
1659 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
1660 wakaba 1.1 };
1661    
1662     $Element->{$HTML_NS}->{hr} = {
1663     attrs_checker => $GetHTMLAttrsChecker->({}),
1664     checker => $HTMLEmptyChecker,
1665     };
1666    
1667     $Element->{$HTML_NS}->{br} = {
1668     attrs_checker => $GetHTMLAttrsChecker->({}),
1669     checker => $HTMLEmptyChecker,
1670 wakaba 1.29 ## NOTE: Blank line MUST NOT be used for presentation purpose.
1671     ## (This requirement is semantic so that we cannot check.)
1672 wakaba 1.1 };
1673    
1674     $Element->{$HTML_NS}->{dialog} = {
1675     attrs_checker => $GetHTMLAttrsChecker->({}),
1676     checker => sub {
1677     my ($self, $todo) = @_;
1678     my $el = $todo->{node};
1679     my $new_todos = [];
1680     my @nodes = (@{$el->child_nodes});
1681    
1682     my $phase = 'before dt';
1683     while (@nodes) {
1684     my $node = shift @nodes;
1685     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1686    
1687     my $nt = $node->node_type;
1688     if ($nt == 1) {
1689     my $node_ns = $node->namespace_uri;
1690     $node_ns = '' unless defined $node_ns;
1691     my $node_ln = $node->manakai_local_name;
1692     ## NOTE: |minuses| list is not checked since redundant
1693 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1694     #
1695     } elsif ($phase eq 'before dt') {
1696 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1697     $phase = 'before dd';
1698     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1699     $self->{onerror}
1700     ->(node => $node, type => 'ps element missing:dt');
1701     $phase = 'before dt';
1702     } else {
1703     $self->{onerror}->(node => $node, type => 'element not allowed');
1704     }
1705     } else { # before dd
1706     if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1707     $phase = 'before dt';
1708     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1709     $self->{onerror}
1710     ->(node => $node, type => 'ps element missing:dd');
1711     $phase = 'before dd';
1712     } else {
1713     $self->{onerror}->(node => $node, type => 'element not allowed');
1714     }
1715     }
1716     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1717     unshift @nodes, @$sib;
1718     push @$new_todos, @$ch;
1719     } elsif ($nt == 3 or $nt == 4) {
1720     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1721     $self->{onerror}->(node => $node, type => 'character not allowed');
1722 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
1723 wakaba 1.1 }
1724     } elsif ($nt == 5) {
1725     unshift @nodes, @{$node->child_nodes};
1726     }
1727     }
1728     if ($phase eq 'before dd') {
1729 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
1730 wakaba 1.1 }
1731     return ($new_todos);
1732     },
1733     };
1734    
1735     $Element->{$HTML_NS}->{pre} = {
1736     attrs_checker => $GetHTMLAttrsChecker->({}),
1737 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
1738 wakaba 1.1 };
1739    
1740     $Element->{$HTML_NS}->{ol} = {
1741     attrs_checker => $GetHTMLAttrsChecker->({
1742     start => $HTMLIntegerAttrChecker,
1743     }),
1744     checker => sub {
1745     my ($self, $todo) = @_;
1746     my $el = $todo->{node};
1747     my $new_todos = [];
1748     my @nodes = (@{$el->child_nodes});
1749    
1750     while (@nodes) {
1751     my $node = shift @nodes;
1752     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1753    
1754     my $nt = $node->node_type;
1755     if ($nt == 1) {
1756     my $node_ns = $node->namespace_uri;
1757     $node_ns = '' unless defined $node_ns;
1758     my $node_ln = $node->manakai_local_name;
1759     ## NOTE: |minuses| list is not checked since redundant
1760 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1761     #
1762     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'li')) {
1763 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
1764     }
1765     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1766     unshift @nodes, @$sib;
1767     push @$new_todos, @$ch;
1768     } elsif ($nt == 3 or $nt == 4) {
1769     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1770     $self->{onerror}->(node => $node, type => 'character not allowed');
1771 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
1772 wakaba 1.1 }
1773     } elsif ($nt == 5) {
1774     unshift @nodes, @{$node->child_nodes};
1775     }
1776     }
1777    
1778     if ($todo->{inline}) {
1779     for (@$new_todos) {
1780     $_->{inline} = 1;
1781     }
1782     }
1783     return ($new_todos);
1784     },
1785     };
1786    
1787     $Element->{$HTML_NS}->{ul} = {
1788     attrs_checker => $GetHTMLAttrsChecker->({}),
1789     checker => $Element->{$HTML_NS}->{ol}->{checker},
1790     };
1791    
1792     $Element->{$HTML_NS}->{li} = {
1793     attrs_checker => $GetHTMLAttrsChecker->({
1794     start => sub {
1795     my ($self, $attr) = @_;
1796     my $parent = $attr->owner_element->manakai_parent_element;
1797     if (defined $parent) {
1798     my $parent_ns = $parent->namespace_uri;
1799     $parent_ns = '' unless defined $parent_ns;
1800     my $parent_ln = $parent->manakai_local_name;
1801     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1802     $self->{onerror}->(node => $attr, level => 'unsupported',
1803     type => 'attribute');
1804     }
1805     }
1806     $HTMLIntegerAttrChecker->($self, $attr);
1807     },
1808     }),
1809     checker => sub {
1810     my ($self, $todo) = @_;
1811 wakaba 1.29 if ($todo->{flag}->{in_menu}) {
1812     return $HTMLPhrasingContentChecker->($self, $todo);
1813 wakaba 1.1 } else {
1814 wakaba 1.29 return $HTMLProseContentChecker->($self, $todo);
1815 wakaba 1.1 }
1816     },
1817     };
1818    
1819     $Element->{$HTML_NS}->{dl} = {
1820     attrs_checker => $GetHTMLAttrsChecker->({}),
1821     checker => sub {
1822     my ($self, $todo) = @_;
1823     my $el = $todo->{node};
1824     my $new_todos = [];
1825     my @nodes = (@{$el->child_nodes});
1826    
1827     my $phase = 'before dt';
1828     while (@nodes) {
1829     my $node = shift @nodes;
1830     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1831    
1832     my $nt = $node->node_type;
1833     if ($nt == 1) {
1834     my $node_ns = $node->namespace_uri;
1835     $node_ns = '' unless defined $node_ns;
1836     my $node_ln = $node->manakai_local_name;
1837     ## NOTE: |minuses| list is not checked since redundant
1838 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1839     #
1840     } elsif ($phase eq 'in dds') {
1841 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1842     #$phase = 'in dds';
1843     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1844     $phase = 'in dts';
1845     } else {
1846     $self->{onerror}->(node => $node, type => 'element not allowed');
1847     }
1848     } elsif ($phase eq 'in dts') {
1849     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1850     #$phase = 'in dts';
1851     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1852     $phase = 'in dds';
1853     } else {
1854     $self->{onerror}->(node => $node, type => 'element not allowed');
1855     }
1856     } else { # before dt
1857     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1858     $phase = 'in dts';
1859     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1860     $self->{onerror}
1861     ->(node => $node, type => 'ps element missing:dt');
1862     $phase = 'in dds';
1863     } else {
1864     $self->{onerror}->(node => $node, type => 'element not allowed');
1865     }
1866     }
1867     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1868     unshift @nodes, @$sib;
1869     push @$new_todos, @$ch;
1870     } elsif ($nt == 3 or $nt == 4) {
1871     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1872     $self->{onerror}->(node => $node, type => 'character not allowed');
1873 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
1874 wakaba 1.1 }
1875     } elsif ($nt == 5) {
1876     unshift @nodes, @{$node->child_nodes};
1877     }
1878     }
1879     if ($phase eq 'in dts') {
1880 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
1881 wakaba 1.1 }
1882    
1883     if ($todo->{inline}) {
1884     for (@$new_todos) {
1885     $_->{inline} = 1;
1886     }
1887     }
1888     return ($new_todos);
1889     },
1890     };
1891    
1892     $Element->{$HTML_NS}->{dt} = {
1893     attrs_checker => $GetHTMLAttrsChecker->({}),
1894 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
1895 wakaba 1.1 };
1896    
1897     $Element->{$HTML_NS}->{dd} = {
1898     attrs_checker => $GetHTMLAttrsChecker->({}),
1899 wakaba 1.29 checker => $HTMLProseContentChecker,
1900 wakaba 1.1 };
1901    
1902     $Element->{$HTML_NS}->{a} = {
1903     attrs_checker => sub {
1904     my ($self, $todo) = @_;
1905     my %attr;
1906     for my $attr (@{$todo->{node}->attributes}) {
1907     my $attr_ns = $attr->namespace_uri;
1908     $attr_ns = '' unless defined $attr_ns;
1909     my $attr_ln = $attr->manakai_local_name;
1910     my $checker;
1911     if ($attr_ns eq '') {
1912     $checker = {
1913     target => $HTMLTargetAttrChecker,
1914     href => $HTMLURIAttrChecker,
1915     ping => $HTMLSpaceURIsAttrChecker,
1916 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
1917 wakaba 1.1 media => $HTMLMQAttrChecker,
1918     hreflang => $HTMLLanguageTagAttrChecker,
1919     type => $HTMLIMTAttrChecker,
1920     }->{$attr_ln};
1921     if ($checker) {
1922     $attr{$attr_ln} = $attr;
1923     } else {
1924     $checker = $HTMLAttrChecker->{$attr_ln};
1925     }
1926     }
1927     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1928     || $AttrChecker->{$attr_ns}->{''};
1929     if ($checker) {
1930     $checker->($self, $attr) if ref $checker;
1931     } else {
1932     $self->{onerror}->(node => $attr, level => 'unsupported',
1933     type => 'attribute');
1934     ## ISSUE: No comformance createria for unknown attributes in the spec
1935     }
1936     }
1937    
1938 wakaba 1.4 if (defined $attr{href}) {
1939     $self->{has_hyperlink_element} = 1;
1940     } else {
1941 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
1942     if (defined $attr{$_}) {
1943     $self->{onerror}->(node => $attr{$_},
1944     type => 'attribute not allowed');
1945     }
1946     }
1947     }
1948     },
1949     checker => sub {
1950     my ($self, $todo) = @_;
1951    
1952 wakaba 1.36 my $end = $self->_add_minuses ($HTMLInteractiveContent);
1953 wakaba 1.29 my ($new_todos, $ch) = $HTMLPhrasingContentChecker->($self, $todo);
1954 wakaba 1.1 push @$new_todos, $end;
1955    
1956 wakaba 1.15 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
1957     $_->{flag}->{in_a_href} = 1 for @$new_todos;
1958     }
1959 wakaba 1.1
1960     return ($new_todos, $ch);
1961     },
1962     };
1963    
1964     $Element->{$HTML_NS}->{q} = {
1965     attrs_checker => $GetHTMLAttrsChecker->({
1966     cite => $HTMLURIAttrChecker,
1967     }),
1968 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
1969 wakaba 1.1 };
1970    
1971     $Element->{$HTML_NS}->{cite} = {
1972     attrs_checker => $GetHTMLAttrsChecker->({}),
1973 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
1974 wakaba 1.1 };
1975    
1976     $Element->{$HTML_NS}->{em} = {
1977     attrs_checker => $GetHTMLAttrsChecker->({}),
1978 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
1979 wakaba 1.1 };
1980    
1981     $Element->{$HTML_NS}->{strong} = {
1982     attrs_checker => $GetHTMLAttrsChecker->({}),
1983 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
1984 wakaba 1.1 };
1985    
1986     $Element->{$HTML_NS}->{small} = {
1987     attrs_checker => $GetHTMLAttrsChecker->({}),
1988 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
1989 wakaba 1.1 };
1990    
1991     $Element->{$HTML_NS}->{m} = {
1992     attrs_checker => $GetHTMLAttrsChecker->({}),
1993 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
1994 wakaba 1.1 };
1995    
1996     $Element->{$HTML_NS}->{dfn} = {
1997     attrs_checker => $GetHTMLAttrsChecker->({}),
1998     checker => sub {
1999     my ($self, $todo) = @_;
2000    
2001     my $end = $self->_add_minuses ({$HTML_NS => {dfn => 1}});
2002 wakaba 1.29 my ($sib, $ch) = $HTMLPhrasingContentChecker->($self, $todo);
2003 wakaba 1.1 push @$sib, $end;
2004    
2005     my $node = $todo->{node};
2006     my $term = $node->get_attribute_ns (undef, 'title');
2007     unless (defined $term) {
2008     for my $child (@{$node->child_nodes}) {
2009     if ($child->node_type == 1) { # ELEMENT_NODE
2010     if (defined $term) {
2011     undef $term;
2012     last;
2013     } elsif ($child->manakai_local_name eq 'abbr') {
2014     my $nsuri = $child->namespace_uri;
2015     if (defined $nsuri and $nsuri eq $HTML_NS) {
2016     my $attr = $child->get_attribute_node_ns (undef, 'title');
2017     if ($attr) {
2018     $term = $attr->value;
2019     }
2020     }
2021     }
2022     } elsif ($child->node_type == 3 or $child->node_type == 4) {
2023     ## TEXT_NODE or CDATA_SECTION_NODE
2024     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
2025     next;
2026     }
2027     undef $term;
2028     last;
2029     }
2030     }
2031     unless (defined $term) {
2032     $term = $node->text_content;
2033     }
2034     }
2035     if ($self->{term}->{$term}) {
2036     $self->{onerror}->(node => $node, type => 'duplicate term');
2037     push @{$self->{term}->{$term}}, $node;
2038     } else {
2039     $self->{term}->{$term} = [$node];
2040     }
2041     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
2042     ## has |title|.
2043    
2044     return ($sib, $ch);
2045     },
2046     };
2047    
2048     $Element->{$HTML_NS}->{abbr} = {
2049     attrs_checker => $GetHTMLAttrsChecker->({
2050     ## NOTE: |title| has special semantics for |abbr|s, but is syntactically
2051     ## not different. The spec says that the |title| MAY be omitted
2052     ## if there is a |dfn| whose defining term is the abbreviation,
2053     ## but it does not prohibit |abbr| w/o |title| in other cases.
2054     }),
2055 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2056 wakaba 1.1 };
2057    
2058     $Element->{$HTML_NS}->{time} = {
2059     attrs_checker => $GetHTMLAttrsChecker->({
2060     datetime => sub { 1 }, # checked in |checker|
2061     }),
2062     ## TODO: Write tests
2063     checker => sub {
2064     my ($self, $todo) = @_;
2065    
2066     my $attr = $todo->{node}->get_attribute_node_ns (undef, 'datetime');
2067     my $input;
2068     my $reg_sp;
2069     my $input_node;
2070     if ($attr) {
2071     $input = $attr->value;
2072     $reg_sp = qr/[\x09-\x0D\x20]*/;
2073     $input_node = $attr;
2074     } else {
2075     $input = $todo->{node}->text_content;
2076     $reg_sp = qr/\p{Zs}*/;
2077     $input_node = $todo->{node};
2078    
2079     ## ISSUE: What is the definition for "successfully extracts a date
2080     ## or time"? If the algorithm says the string is invalid but
2081     ## return some date or time, is it "successfully"?
2082     }
2083    
2084     my $hour;
2085     my $minute;
2086     my $second;
2087     if ($input =~ /
2088     \A
2089     [\x09-\x0D\x20]*
2090     ([0-9]+) # 1
2091     (?>
2092     -([0-9]+) # 2
2093     -([0-9]+) # 3
2094     [\x09-\x0D\x20]*
2095     (?>
2096     T
2097     [\x09-\x0D\x20]*
2098     )?
2099     ([0-9]+) # 4
2100     :([0-9]+) # 5
2101     (?>
2102     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
2103     )?
2104     [\x09-\x0D\x20]*
2105     (?>
2106     Z
2107     [\x09-\x0D\x20]*
2108     |
2109     [+-]([0-9]+):([0-9]+) # 7, 8
2110     [\x09-\x0D\x20]*
2111     )?
2112     \z
2113     |
2114     :([0-9]+) # 9
2115     (?>
2116     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
2117     )?
2118     [\x09-\x0D\x20]*\z
2119     )
2120     /x) {
2121     if (defined $2) { ## YYYY-MM-DD T? hh:mm
2122     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
2123     length $4 != 2 or length $5 != 2) {
2124     $self->{onerror}->(node => $input_node,
2125     type => 'dateortime:syntax error');
2126     }
2127    
2128     if (1 <= $2 and $2 <= 12) {
2129     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
2130     if $3 < 1 or
2131     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
2132     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
2133     if $2 == 2 and $3 == 29 and
2134     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
2135     } else {
2136     $self->{onerror}->(node => $input_node,
2137     type => 'datetime:bad month');
2138     }
2139    
2140     ($hour, $minute, $second) = ($4, $5, $6);
2141    
2142     if (defined $7) { ## [+-]hh:mm
2143     if (length $7 != 2 or length $8 != 2) {
2144     $self->{onerror}->(node => $input_node,
2145     type => 'dateortime:syntax error');
2146     }
2147    
2148     $self->{onerror}->(node => $input_node,
2149     type => 'datetime:bad timezone hour')
2150     if $7 > 23;
2151     $self->{onerror}->(node => $input_node,
2152     type => 'datetime:bad timezone minute')
2153     if $8 > 59;
2154     }
2155     } else { ## hh:mm
2156     if (length $1 != 2 or length $9 != 2) {
2157     $self->{onerror}->(node => $input_node,
2158     type => qq'dateortime:syntax error');
2159     }
2160    
2161     ($hour, $minute, $second) = ($1, $9, $10);
2162     }
2163    
2164     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
2165     if $hour > 23;
2166     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
2167     if $minute > 59;
2168    
2169     if (defined $second) { ## s
2170     ## NOTE: Integer part of second don't have to have length of two.
2171    
2172     if (substr ($second, 0, 1) eq '.') {
2173     $self->{onerror}->(node => $input_node,
2174     type => 'dateortime:syntax error');
2175     }
2176    
2177     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
2178     if $second >= 60;
2179     }
2180     } else {
2181     $self->{onerror}->(node => $input_node,
2182     type => 'dateortime:syntax error');
2183     }
2184    
2185 wakaba 1.29 return $HTMLPhrasingContentChecker->($self, $todo);
2186 wakaba 1.1 },
2187     };
2188    
2189     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
2190     attrs_checker => $GetHTMLAttrsChecker->({
2191     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2192     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2193     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2194     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2195     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2196     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2197     }),
2198 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2199 wakaba 1.1 };
2200    
2201     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
2202     attrs_checker => $GetHTMLAttrsChecker->({
2203     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
2204     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
2205     }),
2206 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2207 wakaba 1.1 };
2208    
2209     $Element->{$HTML_NS}->{code} = {
2210     attrs_checker => $GetHTMLAttrsChecker->({}),
2211     ## NOTE: Though |title| has special semantics,
2212     ## syntatically same as the |title| as global attribute.
2213 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2214 wakaba 1.1 };
2215    
2216     $Element->{$HTML_NS}->{var} = {
2217     attrs_checker => $GetHTMLAttrsChecker->({}),
2218     ## NOTE: Though |title| has special semantics,
2219     ## syntatically same as the |title| as global attribute.
2220 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2221 wakaba 1.1 };
2222    
2223     $Element->{$HTML_NS}->{samp} = {
2224     attrs_checker => $GetHTMLAttrsChecker->({}),
2225     ## NOTE: Though |title| has special semantics,
2226     ## syntatically same as the |title| as global attribute.
2227 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2228 wakaba 1.1 };
2229    
2230     $Element->{$HTML_NS}->{kbd} = {
2231     attrs_checker => $GetHTMLAttrsChecker->({}),
2232 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2233 wakaba 1.1 };
2234    
2235     $Element->{$HTML_NS}->{sub} = {
2236     attrs_checker => $GetHTMLAttrsChecker->({}),
2237 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2238 wakaba 1.1 };
2239    
2240     $Element->{$HTML_NS}->{sup} = {
2241     attrs_checker => $GetHTMLAttrsChecker->({}),
2242 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2243 wakaba 1.1 };
2244    
2245     $Element->{$HTML_NS}->{span} = {
2246     attrs_checker => $GetHTMLAttrsChecker->({}),
2247     ## NOTE: Though |title| has special semantics,
2248     ## syntatically same as the |title| as global attribute.
2249 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2250 wakaba 1.1 };
2251    
2252     $Element->{$HTML_NS}->{i} = {
2253     attrs_checker => $GetHTMLAttrsChecker->({}),
2254     ## NOTE: Though |title| has special semantics,
2255     ## syntatically same as the |title| as global attribute.
2256 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2257 wakaba 1.1 };
2258    
2259     $Element->{$HTML_NS}->{b} = {
2260     attrs_checker => $GetHTMLAttrsChecker->({}),
2261 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2262 wakaba 1.1 };
2263    
2264     $Element->{$HTML_NS}->{bdo} = {
2265     attrs_checker => sub {
2266     my ($self, $todo) = @_;
2267     $GetHTMLAttrsChecker->({})->($self, $todo);
2268     unless ($todo->{node}->has_attribute_ns (undef, 'dir')) {
2269     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:dir');
2270     }
2271     },
2272     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
2273 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2274 wakaba 1.1 };
2275    
2276 wakaba 1.29 =pod
2277    
2278     ## TODO:
2279    
2280     +
2281     + <p>Partly because of the confusion described above, authors are
2282     + strongly recommended to always mark up all paragraphs with the
2283     + <code>p</code> element, and to not have any <code>ins</code> or
2284     + <code>del</code> elements that cross across any <span
2285     + title="paragraph">implied paragraphs</span>.</p>
2286     +
2287     (An informative note)
2288    
2289     <p><code>ins</code> elements should not cross <span
2290     + title="paragraph">implied paragraph</span> boundaries.</p>
2291     (normative)
2292    
2293     + <p><code>del</code> elements should not cross <span
2294     + title="paragraph">implied paragraph</span> boundaries.</p>
2295     (normative)
2296    
2297     =cut
2298    
2299 wakaba 1.1 $Element->{$HTML_NS}->{ins} = {
2300     attrs_checker => $GetHTMLAttrsChecker->({
2301     cite => $HTMLURIAttrChecker,
2302     datetime => $HTMLDatetimeAttrChecker,
2303     }),
2304     checker => $HTMLTransparentChecker,
2305     };
2306    
2307     $Element->{$HTML_NS}->{del} = {
2308     attrs_checker => $GetHTMLAttrsChecker->({
2309     cite => $HTMLURIAttrChecker,
2310     datetime => $HTMLDatetimeAttrChecker,
2311     }),
2312     checker => sub {
2313     my ($self, $todo) = @_;
2314 wakaba 1.29 my $sig_flag = $todo->{flag}->{has_descendant}->{significant};
2315     my ($new_todos) = $HTMLTransparentChecker->($self, $todo);
2316     push @$new_todos, {type => 'code', code => sub {
2317     $todo->{flag}->{has_descendant}->{significant} = 0;
2318     }} if not $sig_flag;
2319     return $new_todos;
2320 wakaba 1.1 },
2321     };
2322    
2323 wakaba 1.35 $Element->{$HTML_NS}->{figure} = {
2324     attrs_checker => $GetHTMLAttrsChecker->({}),
2325     checker => sub {
2326     my ($self, $todo) = @_;
2327     my $el = $todo->{node};
2328     my $new_todos = [];
2329     my @nodes = (@{$el->child_nodes});
2330    
2331     my $old_values = {significant =>
2332     $todo->{flag}->{has_descendant}->{significant}};
2333     $todo->{flag}->{has_descendant}->{significant} = 0;
2334    
2335     my $has_legend;
2336     my $has_non_legend;
2337     my $has_non_style;
2338     while (@nodes) {
2339     my $node = shift @nodes;
2340     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2341    
2342     my $nt = $node->node_type;
2343     if ($nt == 1) {
2344     my $node_ns = $node->namespace_uri;
2345     $node_ns = '' unless defined $node_ns;
2346     my $node_ln = $node->manakai_local_name;
2347     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
2348     $has_non_legend = 1;
2349     $self->{onerror}->(node => $node,
2350     type => 'element not allowed:minus',
2351     level => $self->{must_level});
2352     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'legend') {
2353     if ($has_legend) {
2354     if (ref $has_legend) {
2355     $self->{onerror}->(node => $has_legend,
2356     type => 'element not allowed:figure legend',
2357     level => $self->{must_level});
2358     $has_legend = $node;
2359     } else {
2360     ## NOTE: The first child element was a |legend|.
2361     $self->{onerror}->(node => $node,
2362     type => 'element not allowed:figure legend',
2363     level => $self->{must_level});
2364     }
2365     } elsif ($has_non_legend) {
2366     undef $has_non_legend;
2367     $has_legend = $node;
2368     } else {
2369     $has_legend = 1;
2370     }
2371     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'style') {
2372     $has_non_legend = 1;
2373     if ($has_non_style or
2374     not $node->has_attribute_ns (undef, 'scoped')) {
2375     $self->{onerror}->(node => $node,
2376     type => 'element not allowed:prose style',
2377     level => $self->{must_level});
2378     }
2379     } elsif ($HTMLProseContent->{$node_ns}->{$node_ln}) {
2380     $has_non_style = 1;
2381     $has_non_legend = 1;
2382     if ($HTMLEmbeddedContent->{$node_ns}->{$node_ln}) {
2383     $todo->{flag}->{has_descendant}->{significant} = 1;
2384     }
2385     } elsif ($self->{pluses}->{$node_ns}->{$node_ln}) {
2386     #
2387     } else {
2388     $has_non_style = 1;
2389     $has_non_legend = 1;
2390     $self->{onerror}->(node => $node,
2391     type => 'element not allowed:prose',
2392     level => $self->{must_level})
2393     }
2394    
2395     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2396     unshift @nodes, @$sib;
2397     push @$new_todos, @$ch;
2398     } elsif ($nt == 3 or $nt == 4) {
2399     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2400     $has_non_style = 1;
2401     $has_non_legend = 1;
2402     $todo->{flag}->{has_descendant}->{significant} = 1;
2403     }
2404     } elsif ($nt == 5) {
2405     unshift @nodes, @{$node->child_nodes};
2406     }
2407     }
2408    
2409     if ($has_legend) {
2410     if (ref $has_legend and $has_non_legend) {
2411     $self->{onerror}->(node => $has_legend,
2412     type => 'element not allowed:figure legend',
2413     level => $self->{must_level});
2414     }
2415     } else {
2416     $self->{onerror}->(node => $todo->{node},
2417     type => 'element missing:legend',
2418     level => $self->{must_level});
2419     }
2420    
2421     push @$new_todos, {
2422     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
2423     old_values => $old_values,
2424     errors => $HTMLSignificantContentErrors,
2425     };
2426    
2427     return ($new_todos);
2428     },
2429     };
2430 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
2431 wakaba 1.1
2432     $Element->{$HTML_NS}->{img} = {
2433     attrs_checker => sub {
2434     my ($self, $todo) = @_;
2435     $GetHTMLAttrsChecker->({
2436     alt => sub { }, ## NOTE: No syntactical requirement
2437     src => $HTMLURIAttrChecker,
2438     usemap => $HTMLUsemapAttrChecker,
2439     ismap => sub {
2440     my ($self, $attr, $parent_todo) = @_;
2441 wakaba 1.15 if (not $todo->{flag}->{in_a_href}) {
2442     $self->{onerror}->(node => $attr,
2443     type => 'attribute not allowed:ismap');
2444 wakaba 1.1 }
2445     $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_todo);
2446     },
2447     ## TODO: height
2448     ## TODO: width
2449     })->($self, $todo);
2450     unless ($todo->{node}->has_attribute_ns (undef, 'alt')) {
2451 wakaba 1.37 $self->{onerror}->(node => $todo->{node},
2452     type => 'attribute missing:alt',
2453     level => $self->{should_level});
2454 wakaba 1.1 }
2455     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2456     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:src');
2457     }
2458     },
2459 wakaba 1.25 checker => sub {
2460     my ($self, $todo) = @_;
2461     $todo->{flag}->{has_descendant}->{significant} = 1;
2462     return $HTMLEmptyChecker->($self, $todo);
2463     },
2464 wakaba 1.1 };
2465    
2466     $Element->{$HTML_NS}->{iframe} = {
2467     attrs_checker => $GetHTMLAttrsChecker->({
2468     src => $HTMLURIAttrChecker,
2469     }),
2470 wakaba 1.25 checker => sub {
2471     my ($self, $todo) = @_;
2472     $todo->{flag}->{has_descendant}->{significant} = 1;
2473     return $HTMLTextChecker->($self, $todo);
2474     },
2475 wakaba 1.1 };
2476    
2477     $Element->{$HTML_NS}->{embed} = {
2478     attrs_checker => sub {
2479     my ($self, $todo) = @_;
2480     my $has_src;
2481     for my $attr (@{$todo->{node}->attributes}) {
2482     my $attr_ns = $attr->namespace_uri;
2483     $attr_ns = '' unless defined $attr_ns;
2484     my $attr_ln = $attr->manakai_local_name;
2485     my $checker;
2486     if ($attr_ns eq '') {
2487     if ($attr_ln eq 'src') {
2488     $checker = $HTMLURIAttrChecker;
2489     $has_src = 1;
2490     } elsif ($attr_ln eq 'type') {
2491     $checker = $HTMLIMTAttrChecker;
2492     } else {
2493     ## TODO: height
2494     ## TODO: width
2495     $checker = $HTMLAttrChecker->{$attr_ln}
2496     || sub { }; ## NOTE: Any local attribute is ok.
2497     }
2498     }
2499     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2500     || $AttrChecker->{$attr_ns}->{''};
2501     if ($checker) {
2502     $checker->($self, $attr);
2503     } else {
2504     $self->{onerror}->(node => $attr, level => 'unsupported',
2505     type => 'attribute');
2506     ## ISSUE: No comformance createria for global attributes in the spec
2507     }
2508     }
2509    
2510     unless ($has_src) {
2511     $self->{onerror}->(node => $todo->{node},
2512     type => 'attribute missing:src');
2513     }
2514     },
2515 wakaba 1.25 checker => sub {
2516     my ($self, $todo) = @_;
2517     $todo->{flag}->{has_descendant}->{significant} = 1;
2518     return $HTMLEmptyChecker->($self, $todo);
2519     },
2520 wakaba 1.1 };
2521    
2522     $Element->{$HTML_NS}->{object} = {
2523     attrs_checker => sub {
2524     my ($self, $todo) = @_;
2525     $GetHTMLAttrsChecker->({
2526     data => $HTMLURIAttrChecker,
2527     type => $HTMLIMTAttrChecker,
2528     usemap => $HTMLUsemapAttrChecker,
2529     ## TODO: width
2530     ## TODO: height
2531     })->($self, $todo);
2532     unless ($todo->{node}->has_attribute_ns (undef, 'data')) {
2533     unless ($todo->{node}->has_attribute_ns (undef, 'type')) {
2534     $self->{onerror}->(node => $todo->{node},
2535     type => 'attribute missing:data|type');
2536     }
2537     }
2538     },
2539 wakaba 1.29 ## NOTE: param*, then transparent.
2540 wakaba 1.25 checker => sub {
2541     my ($self, $todo) = @_;
2542     $todo->{flag}->{has_descendant}->{significant} = 1;
2543     return $ElementDefault->{checker}->($self, $todo); ## TODO
2544     },
2545 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
2546 wakaba 1.1 };
2547    
2548     $Element->{$HTML_NS}->{param} = {
2549     attrs_checker => sub {
2550     my ($self, $todo) = @_;
2551     $GetHTMLAttrsChecker->({
2552     name => sub { },
2553     value => sub { },
2554     })->($self, $todo);
2555     unless ($todo->{node}->has_attribute_ns (undef, 'name')) {
2556     $self->{onerror}->(node => $todo->{node},
2557     type => 'attribute missing:name');
2558     }
2559     unless ($todo->{node}->has_attribute_ns (undef, 'value')) {
2560     $self->{onerror}->(node => $todo->{node},
2561     type => 'attribute missing:value');
2562     }
2563     },
2564     checker => $HTMLEmptyChecker,
2565     };
2566    
2567     $Element->{$HTML_NS}->{video} = {
2568     attrs_checker => $GetHTMLAttrsChecker->({
2569     src => $HTMLURIAttrChecker,
2570     ## TODO: start, loopstart, loopend, end
2571     ## ISSUE: they MUST be "value time offset"s. Value?
2572 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
2573 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2574     controls => $GetHTMLBooleanAttrChecker->('controls'),
2575 wakaba 1.11 poster => $HTMLURIAttrChecker, ## TODO: not for audio!
2576     ## TODO: width, height (not for audio!)
2577 wakaba 1.1 }),
2578     checker => sub {
2579     my ($self, $todo) = @_;
2580 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
2581 wakaba 1.1
2582 wakaba 1.29 ## TODO:
2583 wakaba 1.1 if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2584 wakaba 1.35 return $HTMLProseContentChecker->($self, $todo);
2585 wakaba 1.1 } else {
2586     return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
2587     ->($self, $todo);
2588     }
2589     },
2590     };
2591    
2592     $Element->{$HTML_NS}->{audio} = {
2593     attrs_checker => $Element->{$HTML_NS}->{video}->{attrs_checker},
2594     checker => $Element->{$HTML_NS}->{video}->{checker},
2595     };
2596    
2597     $Element->{$HTML_NS}->{source} = {
2598     attrs_checker => sub {
2599     my ($self, $todo) = @_;
2600     $GetHTMLAttrsChecker->({
2601     src => $HTMLURIAttrChecker,
2602     type => $HTMLIMTAttrChecker,
2603     media => $HTMLMQAttrChecker,
2604     })->($self, $todo);
2605     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2606     $self->{onerror}->(node => $todo->{node},
2607     type => 'attribute missing:src');
2608     }
2609     },
2610     checker => $HTMLEmptyChecker,
2611     };
2612    
2613     $Element->{$HTML_NS}->{canvas} = {
2614     attrs_checker => $GetHTMLAttrsChecker->({
2615     height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2616     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2617     }),
2618 wakaba 1.25 checker => sub {
2619     my ($self, $todo) = @_;
2620     $todo->{flag}->{has_descendant}->{significant} = 1;
2621 wakaba 1.29 return $HTMLTransparentChecker->($self, $todo);
2622 wakaba 1.25 },
2623 wakaba 1.1 };
2624    
2625     $Element->{$HTML_NS}->{map} = {
2626 wakaba 1.4 attrs_checker => sub {
2627     my ($self, $todo) = @_;
2628     my $has_id;
2629     $GetHTMLAttrsChecker->({
2630     id => sub {
2631     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2632     my ($self, $attr) = @_;
2633     my $value = $attr->value;
2634     if (length $value > 0) {
2635     if ($self->{id}->{$value}) {
2636     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2637     push @{$self->{id}->{$value}}, $attr;
2638     } else {
2639     $self->{id}->{$value} = [$attr];
2640     }
2641 wakaba 1.1 } else {
2642 wakaba 1.4 ## NOTE: MUST contain at least one character
2643     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2644 wakaba 1.1 }
2645 wakaba 1.4 if ($value =~ /[\x09-\x0D\x20]/) {
2646     $self->{onerror}->(node => $attr, type => 'space in ID');
2647     }
2648     $self->{map}->{$value} ||= $attr;
2649     $has_id = 1;
2650     },
2651     })->($self, $todo);
2652     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:id')
2653     unless $has_id;
2654     },
2655 wakaba 1.29 checker => $HTMLProseContentChecker,
2656 wakaba 1.1 };
2657    
2658     $Element->{$HTML_NS}->{area} = {
2659     attrs_checker => sub {
2660     my ($self, $todo) = @_;
2661     my %attr;
2662     my $coords;
2663     for my $attr (@{$todo->{node}->attributes}) {
2664     my $attr_ns = $attr->namespace_uri;
2665     $attr_ns = '' unless defined $attr_ns;
2666     my $attr_ln = $attr->manakai_local_name;
2667     my $checker;
2668     if ($attr_ns eq '') {
2669     $checker = {
2670     alt => sub { },
2671     ## NOTE: |alt| value has no conformance creteria.
2672     shape => $GetHTMLEnumeratedAttrChecker->({
2673     circ => -1, circle => 1,
2674     default => 1,
2675     poly => 1, polygon => -1,
2676     rect => 1, rectangle => -1,
2677     }),
2678     coords => sub {
2679     my ($self, $attr) = @_;
2680     my $value = $attr->value;
2681     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
2682     $coords = [split /,/, $value];
2683     } else {
2684     $self->{onerror}->(node => $attr,
2685     type => 'coords:syntax error');
2686     }
2687     },
2688     target => $HTMLTargetAttrChecker,
2689     href => $HTMLURIAttrChecker,
2690     ping => $HTMLSpaceURIsAttrChecker,
2691 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
2692 wakaba 1.1 media => $HTMLMQAttrChecker,
2693     hreflang => $HTMLLanguageTagAttrChecker,
2694     type => $HTMLIMTAttrChecker,
2695     }->{$attr_ln};
2696     if ($checker) {
2697     $attr{$attr_ln} = $attr;
2698     } else {
2699     $checker = $HTMLAttrChecker->{$attr_ln};
2700     }
2701     }
2702     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2703     || $AttrChecker->{$attr_ns}->{''};
2704     if ($checker) {
2705     $checker->($self, $attr) if ref $checker;
2706     } else {
2707     $self->{onerror}->(node => $attr, level => 'unsupported',
2708     type => 'attribute');
2709     ## ISSUE: No comformance createria for unknown attributes in the spec
2710     }
2711     }
2712    
2713     if (defined $attr{href}) {
2714 wakaba 1.4 $self->{has_hyperlink_element} = 1;
2715 wakaba 1.1 unless (defined $attr{alt}) {
2716     $self->{onerror}->(node => $todo->{node},
2717     type => 'attribute missing:alt');
2718     }
2719     } else {
2720     for (qw/target ping rel media hreflang type alt/) {
2721     if (defined $attr{$_}) {
2722     $self->{onerror}->(node => $attr{$_},
2723     type => 'attribute not allowed');
2724     }
2725     }
2726     }
2727    
2728     my $shape = 'rectangle';
2729     if (defined $attr{shape}) {
2730     $shape = {
2731     circ => 'circle', circle => 'circle',
2732     default => 'default',
2733     poly => 'polygon', polygon => 'polygon',
2734     rect => 'rectangle', rectangle => 'rectangle',
2735     }->{lc $attr{shape}->value} || 'rectangle';
2736     ## TODO: ASCII lowercase?
2737     }
2738    
2739     if ($shape eq 'circle') {
2740     if (defined $attr{coords}) {
2741     if (defined $coords) {
2742     if (@$coords == 3) {
2743     if ($coords->[2] < 0) {
2744     $self->{onerror}->(node => $attr{coords},
2745     type => 'coords:out of range:2');
2746     }
2747     } else {
2748     $self->{onerror}->(node => $attr{coords},
2749     type => 'coords:number:3:'.@$coords);
2750     }
2751     } else {
2752     ## NOTE: A syntax error has been reported.
2753     }
2754     } else {
2755     $self->{onerror}->(node => $todo->{node},
2756     type => 'attribute missing:coords');
2757     }
2758     } elsif ($shape eq 'default') {
2759     if (defined $attr{coords}) {
2760     $self->{onerror}->(node => $attr{coords},
2761     type => 'attribute not allowed');
2762     }
2763     } elsif ($shape eq 'polygon') {
2764     if (defined $attr{coords}) {
2765     if (defined $coords) {
2766     if (@$coords >= 6) {
2767     unless (@$coords % 2 == 0) {
2768     $self->{onerror}->(node => $attr{coords},
2769     type => 'coords:number:even:'.@$coords);
2770     }
2771     } else {
2772     $self->{onerror}->(node => $attr{coords},
2773     type => 'coords:number:>=6:'.@$coords);
2774     }
2775     } else {
2776     ## NOTE: A syntax error has been reported.
2777     }
2778     } else {
2779     $self->{onerror}->(node => $todo->{node},
2780     type => 'attribute missing:coords');
2781     }
2782     } elsif ($shape eq 'rectangle') {
2783     if (defined $attr{coords}) {
2784     if (defined $coords) {
2785     if (@$coords == 4) {
2786     unless ($coords->[0] < $coords->[2]) {
2787     $self->{onerror}->(node => $attr{coords},
2788     type => 'coords:out of range:0');
2789     }
2790     unless ($coords->[1] < $coords->[3]) {
2791     $self->{onerror}->(node => $attr{coords},
2792     type => 'coords:out of range:1');
2793     }
2794     } else {
2795     $self->{onerror}->(node => $attr{coords},
2796     type => 'coords:number:4:'.@$coords);
2797     }
2798     } else {
2799     ## NOTE: A syntax error has been reported.
2800     }
2801     } else {
2802     $self->{onerror}->(node => $todo->{node},
2803     type => 'attribute missing:coords');
2804     }
2805     }
2806     },
2807     checker => $HTMLEmptyChecker,
2808     };
2809     ## TODO: only in map
2810    
2811     $Element->{$HTML_NS}->{table} = {
2812     attrs_checker => $GetHTMLAttrsChecker->({}),
2813     checker => sub {
2814     my ($self, $todo) = @_;
2815     my $el = $todo->{node};
2816     my $new_todos = [];
2817     my @nodes = (@{$el->child_nodes});
2818    
2819     my $phase = 'before caption';
2820     my $has_tfoot;
2821     while (@nodes) {
2822     my $node = shift @nodes;
2823     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2824    
2825     my $nt = $node->node_type;
2826     if ($nt == 1) {
2827     my $node_ns = $node->namespace_uri;
2828     $node_ns = '' unless defined $node_ns;
2829     my $node_ln = $node->manakai_local_name;
2830     ## NOTE: |minuses| list is not checked since redundant
2831 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2832     #
2833     } elsif ($phase eq 'in tbodys') {
2834 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2835     #$phase = 'in tbodys';
2836     } elsif (not $has_tfoot and
2837     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2838     $phase = 'after tfoot';
2839     $has_tfoot = 1;
2840     } else {
2841     $self->{onerror}->(node => $node, type => 'element not allowed');
2842     }
2843     } elsif ($phase eq 'in trs') {
2844     if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2845     #$phase = 'in trs';
2846     } elsif (not $has_tfoot and
2847     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2848     $phase = 'after tfoot';
2849     $has_tfoot = 1;
2850     } else {
2851     $self->{onerror}->(node => $node, type => 'element not allowed');
2852     }
2853     } elsif ($phase eq 'after thead') {
2854     if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2855     $phase = 'in tbodys';
2856     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2857     $phase = 'in trs';
2858     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2859     $phase = 'in tbodys';
2860     $has_tfoot = 1;
2861     } else {
2862     $self->{onerror}->(node => $node, type => 'element not allowed');
2863     }
2864     } elsif ($phase eq 'in colgroup') {
2865     if ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2866     $phase = 'in colgroup';
2867     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2868     $phase = 'after thead';
2869     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2870     $phase = 'in tbodys';
2871     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2872     $phase = 'in trs';
2873     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2874     $phase = 'in tbodys';
2875     $has_tfoot = 1;
2876     } else {
2877     $self->{onerror}->(node => $node, type => 'element not allowed');
2878     }
2879     } elsif ($phase eq 'before caption') {
2880     if ($node_ns eq $HTML_NS and $node_ln eq 'caption') {
2881     $phase = 'in colgroup';
2882     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2883     $phase = 'in colgroup';
2884     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2885     $phase = 'after thead';
2886     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2887     $phase = 'in tbodys';
2888     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2889     $phase = 'in trs';
2890     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2891     $phase = 'in tbodys';
2892     $has_tfoot = 1;
2893     } else {
2894     $self->{onerror}->(node => $node, type => 'element not allowed');
2895     }
2896     } else { # after tfoot
2897     $self->{onerror}->(node => $node, type => 'element not allowed');
2898     }
2899     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2900     unshift @nodes, @$sib;
2901     push @$new_todos, @$ch;
2902     } elsif ($nt == 3 or $nt == 4) {
2903     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2904     $self->{onerror}->(node => $node, type => 'character not allowed');
2905 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
2906 wakaba 1.1 }
2907     } elsif ($nt == 5) {
2908     unshift @nodes, @{$node->child_nodes};
2909     }
2910     }
2911    
2912     ## Table model errors
2913     require Whatpm::HTMLTable;
2914     Whatpm::HTMLTable->form_table ($todo->{node}, sub {
2915     my %opt = @_;
2916     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
2917     });
2918     push @{$self->{return}->{table}}, $todo->{node};
2919    
2920     return ($new_todos);
2921     },
2922     };
2923    
2924     $Element->{$HTML_NS}->{caption} = {
2925     attrs_checker => $GetHTMLAttrsChecker->({}),
2926 wakaba 1.36 checker => $HTMLPhrasingContentChecker,
2927 wakaba 1.1 };
2928    
2929     $Element->{$HTML_NS}->{colgroup} = {
2930     attrs_checker => $GetHTMLAttrsChecker->({
2931     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2932     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
2933     ## TODO: "attribute not supported" if |col|.
2934     ## ISSUE: MUST NOT if any |col|?
2935     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
2936     }),
2937     checker => sub {
2938     my ($self, $todo) = @_;
2939     my $el = $todo->{node};
2940     my $new_todos = [];
2941     my @nodes = (@{$el->child_nodes});
2942    
2943     while (@nodes) {
2944     my $node = shift @nodes;
2945     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2946    
2947     my $nt = $node->node_type;
2948     if ($nt == 1) {
2949     my $node_ns = $node->namespace_uri;
2950     $node_ns = '' unless defined $node_ns;
2951     my $node_ln = $node->manakai_local_name;
2952     ## NOTE: |minuses| list is not checked since redundant
2953 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2954     #
2955     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'col')) {
2956 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
2957     }
2958     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2959     unshift @nodes, @$sib;
2960     push @$new_todos, @$ch;
2961     } elsif ($nt == 3 or $nt == 4) {
2962     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2963     $self->{onerror}->(node => $node, type => 'character not allowed');
2964 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
2965 wakaba 1.1 }
2966     } elsif ($nt == 5) {
2967     unshift @nodes, @{$node->child_nodes};
2968     }
2969     }
2970     return ($new_todos);
2971     },
2972     };
2973    
2974     $Element->{$HTML_NS}->{col} = {
2975     attrs_checker => $GetHTMLAttrsChecker->({
2976     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2977     }),
2978     checker => $HTMLEmptyChecker,
2979     };
2980    
2981     $Element->{$HTML_NS}->{tbody} = {
2982     attrs_checker => $GetHTMLAttrsChecker->({}),
2983     checker => sub {
2984     my ($self, $todo) = @_;
2985     my $el = $todo->{node};
2986     my $new_todos = [];
2987     my @nodes = (@{$el->child_nodes});
2988    
2989     my $has_tr;
2990     while (@nodes) {
2991     my $node = shift @nodes;
2992     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2993    
2994     my $nt = $node->node_type;
2995     if ($nt == 1) {
2996     my $node_ns = $node->namespace_uri;
2997     $node_ns = '' unless defined $node_ns;
2998     my $node_ln = $node->manakai_local_name;
2999     ## NOTE: |minuses| list is not checked since redundant
3000 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3001     #
3002     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
3003 wakaba 1.1 $has_tr = 1;
3004     } else {
3005     $self->{onerror}->(node => $node, type => 'element not allowed');
3006     }
3007     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3008     unshift @nodes, @$sib;
3009     push @$new_todos, @$ch;
3010     } elsif ($nt == 3 or $nt == 4) {
3011     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3012     $self->{onerror}->(node => $node, type => 'character not allowed');
3013 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3014 wakaba 1.1 }
3015     } elsif ($nt == 5) {
3016     unshift @nodes, @{$node->child_nodes};
3017     }
3018     }
3019     unless ($has_tr) {
3020     $self->{onerror}->(node => $el, type => 'child element missing:tr');
3021     }
3022     return ($new_todos);
3023     },
3024     };
3025    
3026     $Element->{$HTML_NS}->{thead} = {
3027     attrs_checker => $GetHTMLAttrsChecker->({}),
3028     checker => $Element->{$HTML_NS}->{tbody}->{checker},
3029     };
3030    
3031     $Element->{$HTML_NS}->{tfoot} = {
3032     attrs_checker => $GetHTMLAttrsChecker->({}),
3033     checker => $Element->{$HTML_NS}->{tbody}->{checker},
3034     };
3035    
3036     $Element->{$HTML_NS}->{tr} = {
3037     attrs_checker => $GetHTMLAttrsChecker->({}),
3038     checker => sub {
3039     my ($self, $todo) = @_;
3040     my $el = $todo->{node};
3041     my $new_todos = [];
3042     my @nodes = (@{$el->child_nodes});
3043    
3044     my $has_td;
3045     while (@nodes) {
3046     my $node = shift @nodes;
3047     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3048    
3049     my $nt = $node->node_type;
3050     if ($nt == 1) {
3051     my $node_ns = $node->namespace_uri;
3052     $node_ns = '' unless defined $node_ns;
3053     my $node_ln = $node->manakai_local_name;
3054     ## NOTE: |minuses| list is not checked since redundant
3055 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3056     #
3057     } elsif ($node_ns eq $HTML_NS and
3058     ($node_ln eq 'td' or $node_ln eq 'th')) {
3059 wakaba 1.1 $has_td = 1;
3060     } else {
3061     $self->{onerror}->(node => $node, type => 'element not allowed');
3062     }
3063     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3064     unshift @nodes, @$sib;
3065     push @$new_todos, @$ch;
3066     } elsif ($nt == 3 or $nt == 4) {
3067     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3068     $self->{onerror}->(node => $node, type => 'character not allowed');
3069 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3070 wakaba 1.1 }
3071     } elsif ($nt == 5) {
3072     unshift @nodes, @{$node->child_nodes};
3073     }
3074     }
3075     unless ($has_td) {
3076     $self->{onerror}->(node => $el, type => 'child element missing:td|th');
3077     }
3078     return ($new_todos);
3079     },
3080     };
3081    
3082     $Element->{$HTML_NS}->{td} = {
3083     attrs_checker => $GetHTMLAttrsChecker->({
3084     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3085     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3086     }),
3087 wakaba 1.29 checker => $HTMLProseContentChecker,
3088 wakaba 1.1 };
3089    
3090     $Element->{$HTML_NS}->{th} = {
3091     attrs_checker => $GetHTMLAttrsChecker->({
3092     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3093     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3094     scope => $GetHTMLEnumeratedAttrChecker
3095     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
3096     }),
3097 wakaba 1.31 checker => $HTMLPhrasingContentChecker,
3098 wakaba 1.1 };
3099    
3100     ## TODO: forms
3101 wakaba 1.8 ## TODO: Tests for <nest/> in form elements
3102 wakaba 1.1
3103     $Element->{$HTML_NS}->{script} = {
3104 wakaba 1.9 attrs_checker => $GetHTMLAttrsChecker->({
3105 wakaba 1.1 src => $HTMLURIAttrChecker,
3106     defer => $GetHTMLBooleanAttrChecker->('defer'),
3107     async => $GetHTMLBooleanAttrChecker->('async'),
3108     type => $HTMLIMTAttrChecker,
3109 wakaba 1.9 }),
3110 wakaba 1.1 checker => sub {
3111     my ($self, $todo) = @_;
3112    
3113     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
3114     return $HTMLEmptyChecker->($self, $todo);
3115     } else {
3116     ## NOTE: No content model conformance in HTML5 spec.
3117     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
3118     my $language = $todo->{node}->get_attribute_ns (undef, 'language');
3119     if ((defined $type and $type eq '') or
3120     (defined $language and $language eq '')) {
3121     $type = 'text/javascript';
3122     } elsif (defined $type) {
3123     #
3124     } elsif (defined $language) {
3125     $type = 'text/' . $language;
3126     } else {
3127     $type = 'text/javascript';
3128     }
3129     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
3130     type => 'script:'.$type); ## TODO: $type normalization
3131     return $AnyChecker->($self, $todo);
3132     }
3133     },
3134     };
3135 wakaba 1.25 ## ISSUE: Significant check and text child node
3136 wakaba 1.1
3137     ## NOTE: When script is disabled.
3138     $Element->{$HTML_NS}->{noscript} = {
3139 wakaba 1.3 attrs_checker => sub {
3140     my ($self, $todo) = @_;
3141    
3142     ## NOTE: This check is inserted in |attrs_checker|, rather than |checker|,
3143     ## since the later is not invoked when the |noscript| is used as a
3144     ## transparent element.
3145     unless ($todo->{node}->owner_document->manakai_is_html) {
3146     $self->{onerror}->(node => $todo->{node}, type => 'in XML:noscript');
3147     }
3148    
3149     $GetHTMLAttrsChecker->({})->($self, $todo);
3150     },
3151 wakaba 1.1 checker => sub {
3152     my ($self, $todo) = @_;
3153    
3154 wakaba 1.3 if ($todo->{flag}->{in_head}) {
3155     my $new_todos = [];
3156     my @nodes = (@{$todo->{node}->child_nodes});
3157    
3158     while (@nodes) {
3159     my $node = shift @nodes;
3160     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3161    
3162     my $nt = $node->node_type;
3163     if ($nt == 1) {
3164     my $node_ns = $node->namespace_uri;
3165     $node_ns = '' unless defined $node_ns;
3166     my $node_ln = $node->manakai_local_name;
3167 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3168     #
3169     } elsif ($node_ns eq $HTML_NS) {
3170 wakaba 1.34 if ($node_ln eq 'link') {
3171 wakaba 1.3 #
3172 wakaba 1.34 } elsif ($node_ln eq 'style') {
3173     if ($node->has_attribute_ns (undef, 'scoped')) {
3174     $self->{onerror}->(node => $node,
3175     type => 'element not allowed:head noscript',
3176     level => $self->{must_level});
3177     }
3178 wakaba 1.3 } elsif ($node_ln eq 'meta') {
3179 wakaba 1.34 if ($node->has_attribute_ns (undef, 'charset')) {
3180     ## NOTE: Non-conforming. An error is raised by
3181     ## |meta|'s checker.
3182 wakaba 1.3 } else {
3183 wakaba 1.34 my $http_equiv_attr
3184     = $node->get_attribute_node_ns (undef, 'http-equiv');
3185     if ($http_equiv_attr) {
3186     ## TODO: case
3187     if (lc $http_equiv_attr->value eq 'content-type') {
3188     ## NOTE: Non-conforming. An error is raised by
3189     ## |meta|'s checker.
3190     } else {
3191     #
3192     }
3193     } else {
3194     $self->{onerror}->(node => $node,
3195     type => 'element not allowed:head noscript',
3196     level => $self->{must_level});
3197     }
3198 wakaba 1.3 }
3199     } else {
3200 wakaba 1.34 $self->{onerror}->(node => $node,
3201     type => 'element not allowed:head noscript',
3202     level => $self->{must_level});
3203 wakaba 1.3 }
3204     } else {
3205 wakaba 1.34 $self->{onerror}->(node => $node,
3206     type => 'element not allowed:head noscript',
3207     level => $self->{must_level});
3208 wakaba 1.3 }
3209    
3210     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3211     unshift @nodes, @$sib;
3212     push @$new_todos, @$ch;
3213     } elsif ($nt == 3 or $nt == 4) {
3214     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3215     $self->{onerror}->(node => $node, type => 'character not allowed');
3216 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3217 wakaba 1.3 }
3218     } elsif ($nt == 5) {
3219     unshift @nodes, @{$node->child_nodes};
3220     }
3221     }
3222     return ($new_todos);
3223     } else {
3224     my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}});
3225 wakaba 1.29 my ($sib, $ch) = $HTMLTransparentChecker->($self, $todo);
3226 wakaba 1.3 push @$sib, $end;
3227     return ($sib, $ch);
3228     }
3229 wakaba 1.1 },
3230     };
3231 wakaba 1.3
3232     ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
3233 wakaba 1.1
3234     $Element->{$HTML_NS}->{'event-source'} = {
3235     attrs_checker => $GetHTMLAttrsChecker->({
3236     src => $HTMLURIAttrChecker,
3237     }),
3238     checker => $HTMLEmptyChecker,
3239     };
3240    
3241     $Element->{$HTML_NS}->{details} = {
3242     attrs_checker => $GetHTMLAttrsChecker->({
3243     open => $GetHTMLBooleanAttrChecker->('open'),
3244     }),
3245     checker => sub {
3246     my ($self, $todo) = @_;
3247    
3248 wakaba 1.29 ## TODO:
3249 wakaba 1.1 my ($sib, $ch)
3250     = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
3251     ->($self, $todo);
3252     return ($sib, $ch);
3253     },
3254     };
3255    
3256     $Element->{$HTML_NS}->{datagrid} = {
3257     attrs_checker => $GetHTMLAttrsChecker->({
3258     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3259     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
3260     }),
3261     checker => sub {
3262     my ($self, $todo) = @_;
3263     my $el = $todo->{node};
3264     my $new_todos = [];
3265     my @nodes = (@{$el->child_nodes});
3266    
3267 wakaba 1.25 my $old_values = {significant =>
3268     $todo->{flag}->{has_descendant}->{significant}};
3269     $todo->{flag}->{has_descendant}->{significant} = 0;
3270    
3271 wakaba 1.1 my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
3272    
3273 wakaba 1.29 ## Prose -(text* table Prose*) | table | select | datalist | Empty
3274 wakaba 1.1 my $mode = 'any';
3275     while (@nodes) {
3276     my $node = shift @nodes;
3277     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3278    
3279     my $nt = $node->node_type;
3280     if ($nt == 1) {
3281     my $node_ns = $node->namespace_uri;
3282     $node_ns = '' unless defined $node_ns;
3283     my $node_ln = $node->manakai_local_name;
3284     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
3285 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3286     #
3287 wakaba 1.29 } elsif ($mode eq 'prose') {
3288 wakaba 1.1 $not_allowed = 1
3289 wakaba 1.29 unless $HTMLProseContent->{$node_ns}->{$node_ln};
3290 wakaba 1.1 } elsif ($mode eq 'any') {
3291     if ($node_ns eq $HTML_NS and
3292     {table => 1, select => 1, datalist => 1}->{$node_ln}) {
3293     $mode = 'none';
3294 wakaba 1.29 } elsif ($HTMLProseContent->{$node_ns}->{$node_ln}) {
3295     $mode = 'prose';
3296 wakaba 1.1 } else {
3297     $not_allowed = 1;
3298     }
3299     } else {
3300     $not_allowed = 1;
3301     }
3302     $self->{onerror}->(node => $node, type => 'element not allowed')
3303     if $not_allowed;
3304     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3305     unshift @nodes, @$sib;
3306     push @$new_todos, @$ch;
3307     } elsif ($nt == 3 or $nt == 4) {
3308     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3309 wakaba 1.29 if ($mode eq 'prose') {
3310     #
3311     } elsif ($mode eq 'any') {
3312     $mode = 'prose';
3313     } else {
3314     $self->{onerror}->(node => $node, type => 'character not allowed');
3315     }
3316 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3317 wakaba 1.1 }
3318     } elsif ($nt == 5) {
3319     unshift @nodes, @{$node->child_nodes};
3320     }
3321     }
3322    
3323     push @$new_todos, $end;
3324 wakaba 1.25
3325     push @$new_todos, {
3326     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
3327     old_values => $old_values,
3328 wakaba 1.26 errors => $HTMLSignificantContentErrors,
3329 wakaba 1.25 };
3330    
3331 wakaba 1.1 return ($new_todos);
3332 wakaba 1.29
3333     ## ISSUE: "xxx<table/>" is disallowed; "<select/>aaa" and "<datalist/>aa"
3334     ## are not disallowed (assuming that form control contents are also
3335     ## prose content).
3336 wakaba 1.1 },
3337     };
3338    
3339     $Element->{$HTML_NS}->{command} = {
3340     attrs_checker => $GetHTMLAttrsChecker->({
3341     checked => $GetHTMLBooleanAttrChecker->('checked'),
3342     default => $GetHTMLBooleanAttrChecker->('default'),
3343     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3344     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
3345     icon => $HTMLURIAttrChecker,
3346     label => sub { }, ## NOTE: No conformance creteria
3347     radiogroup => sub { }, ## NOTE: No conformance creteria
3348     ## NOTE: |title| has special semantics, but no syntactical difference
3349     type => sub {
3350     my ($self, $attr) = @_;
3351     my $value = $attr->value;
3352     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
3353     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
3354     }
3355     },
3356     }),
3357     checker => $HTMLEmptyChecker,
3358     };
3359    
3360     $Element->{$HTML_NS}->{menu} = {
3361     attrs_checker => $GetHTMLAttrsChecker->({
3362     autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
3363     id => sub {
3364     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
3365     my ($self, $attr) = @_;
3366     my $value = $attr->value;
3367     if (length $value > 0) {
3368     if ($self->{id}->{$value}) {
3369     $self->{onerror}->(node => $attr, type => 'duplicate ID');
3370     push @{$self->{id}->{$value}}, $attr;
3371     } else {
3372     $self->{id}->{$value} = [$attr];
3373     }
3374     } else {
3375     ## NOTE: MUST contain at least one character
3376     $self->{onerror}->(node => $attr, type => 'empty attribute value');
3377     }
3378     if ($value =~ /[\x09-\x0D\x20]/) {
3379     $self->{onerror}->(node => $attr, type => 'space in ID');
3380     }
3381     $self->{menu}->{$value} ||= $attr;
3382     ## ISSUE: <menu id=""><p contextmenu=""> match?
3383     },
3384     label => sub { }, ## NOTE: No conformance creteria
3385     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
3386     }),
3387     checker => sub {
3388     my ($self, $todo) = @_;
3389     my $el = $todo->{node};
3390     my $new_todos = [];
3391     my @nodes = (@{$el->child_nodes});
3392 wakaba 1.25
3393     my $old_values = {significant =>
3394     $todo->{flag}->{has_descendant}->{significant}};
3395     $todo->{flag}->{has_descendant}->{significant} = 0;
3396 wakaba 1.1
3397 wakaba 1.29 my $content = 'li or phrasing';
3398 wakaba 1.1 while (@nodes) {
3399     my $node = shift @nodes;
3400     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3401    
3402     my $nt = $node->node_type;
3403     if ($nt == 1) {
3404     my $node_ns = $node->namespace_uri;
3405     $node_ns = '' unless defined $node_ns;
3406     my $node_ln = $node->manakai_local_name;
3407     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
3408 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3409     #
3410     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'li') {
3411 wakaba 1.29 if ($content eq 'phrasing') {
3412 wakaba 1.1 $not_allowed = 1;
3413 wakaba 1.29 } elsif ($content eq 'li or phrasing') {
3414 wakaba 1.1 $content = 'li';
3415     }
3416     } else {
3417 wakaba 1.29 if ($HTMLPhrasingContent->{$node_ns}->{$node_ln}) {
3418     $content = 'phrasing';
3419 wakaba 1.1 } else {
3420     $not_allowed = 1;
3421     }
3422     }
3423     $self->{onerror}->(node => $node, type => 'element not allowed')
3424     if $not_allowed;
3425     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3426     unshift @nodes, @$sib;
3427     push @$new_todos, @$ch;
3428     } elsif ($nt == 3 or $nt == 4) {
3429     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3430     if ($content eq 'li') {
3431     $self->{onerror}->(node => $node, type => 'character not allowed');
3432 wakaba 1.29 } elsif ($content eq 'li or phrasing') {
3433     $content = 'phrasing';
3434 wakaba 1.1 }
3435 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3436 wakaba 1.1 }
3437     } elsif ($nt == 5) {
3438     unshift @nodes, @{$node->child_nodes};
3439     }
3440     }
3441    
3442     for (@$new_todos) {
3443 wakaba 1.29 $_->{flag}->{in_menu} = 1;
3444 wakaba 1.1 }
3445 wakaba 1.25
3446     push @$new_todos, {
3447     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
3448     old_values => $old_values,
3449 wakaba 1.26 errors => $HTMLSignificantContentErrors,
3450 wakaba 1.25 };
3451    
3452 wakaba 1.1 return ($new_todos);
3453     },
3454 wakaba 1.8 };
3455    
3456     $Element->{$HTML_NS}->{datatemplate} = {
3457     attrs_checker => $GetHTMLAttrsChecker->({}),
3458     checker => sub {
3459     my ($self, $todo) = @_;
3460     my $el = $todo->{node};
3461     my $new_todos = [];
3462     my @nodes = (@{$el->child_nodes});
3463    
3464     while (@nodes) {
3465     my $node = shift @nodes;
3466     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3467    
3468     my $nt = $node->node_type;
3469     if ($nt == 1) {
3470     my $node_ns = $node->namespace_uri;
3471     $node_ns = '' unless defined $node_ns;
3472     my $node_ln = $node->manakai_local_name;
3473     ## NOTE: |minuses| list is not checked since redundant
3474     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3475     #
3476     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'rule')) {
3477     $self->{onerror}->(node => $node,
3478     type => 'element not allowed:datatemplate');
3479     }
3480     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3481     unshift @nodes, @$sib;
3482     push @$new_todos, @$ch;
3483     } elsif ($nt == 3 or $nt == 4) {
3484     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3485     $self->{onerror}->(node => $node, type => 'character not allowed');
3486 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3487 wakaba 1.8 }
3488     } elsif ($nt == 5) {
3489     unshift @nodes, @{$node->child_nodes};
3490     }
3491     }
3492     return ($new_todos);
3493     },
3494     is_xml_root => 1,
3495     };
3496    
3497     $Element->{$HTML_NS}->{rule} = {
3498     attrs_checker => $GetHTMLAttrsChecker->({
3499 wakaba 1.23 condition => $HTMLSelectorsAttrChecker,
3500 wakaba 1.18 mode => $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker,
3501 wakaba 1.8 }),
3502     checker => sub {
3503     my ($self, $todo) = @_;
3504    
3505     my $end = $self->_add_pluses ({$HTML_NS => {nest => 1}});
3506 wakaba 1.25 my ($sib, $ch) = $HTMLAnyChecker->($self, $todo);
3507 wakaba 1.8 push @$sib, $end;
3508     return ($sib, $ch);
3509     },
3510     ## NOTE: "MAY be anything that, when the parent |datatemplate|
3511     ## is applied to some conforming data, results in a conforming DOM tree.":
3512     ## We don't check against this.
3513     };
3514    
3515     $Element->{$HTML_NS}->{nest} = {
3516     attrs_checker => $GetHTMLAttrsChecker->({
3517 wakaba 1.23 filter => $HTMLSelectorsAttrChecker,
3518     mode => sub {
3519     my ($self, $attr) = @_;
3520     my $value = $attr->value;
3521     if ($value !~ /\A[^\x09-\x0D\x20]+\z/) {
3522     $self->{onerror}->(node => $attr, type => 'mode:syntax error');
3523     }
3524     },
3525 wakaba 1.8 }),
3526     checker => $HTMLEmptyChecker,
3527 wakaba 1.1 };
3528    
3529     $Element->{$HTML_NS}->{legend} = {
3530     attrs_checker => $GetHTMLAttrsChecker->({}),
3531 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
3532 wakaba 1.1 };
3533    
3534     $Element->{$HTML_NS}->{div} = {
3535     attrs_checker => $GetHTMLAttrsChecker->({}),
3536 wakaba 1.31 checker => $HTMLProseContentChecker,
3537 wakaba 1.1 };
3538    
3539     $Element->{$HTML_NS}->{font} = {
3540     attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
3541     checker => $HTMLTransparentChecker,
3542     };
3543    
3544     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
3545    
3546     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24