/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.20 - (hide annotations) (download)
Sun Nov 11 02:25:51 2007 UTC (16 years, 11 months ago) by wakaba
Branch: MAIN
Changes since 1.19: +8 -0 lines
++ whatpm/t/ChangeLog	11 Nov 2007 02:25:46 -0000
2007-11-11  Wakaba  <wakaba@suika.fam.cx>

	* content-model-4.dat: New tests for proposed link types.

++ whatpm/Whatpm/ContentChecker/ChangeLog	11 Nov 2007 02:19:38 -0000
2007-11-11  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm: Raise bad-context error for proposed link types.

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7     my $HTMLMetadataElements = {
8     $HTML_NS => {
9     qw/link 1 meta 1 style 1 script 1 event-source 1 command 1 base 1 title 1
10 wakaba 1.8 noscript 1 datatemplate 1
11 wakaba 1.1 /,
12     },
13     };
14    
15     my $HTMLSectioningElements = {
16     $HTML_NS => {qw/body 1 section 1 nav 1 article 1 blockquote 1 aside 1/},
17     };
18    
19     my $HTMLBlockLevelElements = {
20     $HTML_NS => {
21     qw/
22     section 1 nav 1 article 1 blockquote 1 aside 1
23     h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1
24     address 1 p 1 hr 1 dialog 1 pre 1 ol 1 ul 1 dl 1
25     ins 1 del 1 figure 1 map 1 table 1 script 1 noscript 1
26     event-source 1 details 1 datagrid 1 menu 1 div 1 font 1
27 wakaba 1.8 datatemplate 1
28 wakaba 1.1 /,
29     },
30     };
31    
32     my $HTMLStrictlyInlineLevelElements = {
33     $HTML_NS => {
34     qw/
35     br 1 a 1 q 1 cite 1 em 1 strong 1 small 1 m 1 dfn 1 abbr 1
36     time 1 meter 1 progress 1 code 1 var 1 samp 1 kbd 1
37     sub 1 sup 1 span 1 i 1 b 1 bdo 1 ins 1 del 1 img 1
38     iframe 1 embed 1 object 1 video 1 audio 1 canvas 1 area 1
39     script 1 noscript 1 event-source 1 command 1 font 1
40     /,
41     },
42     };
43    
44     my $HTMLStructuredInlineLevelElements = {
45     $HTML_NS => {qw/blockquote 1 pre 1 ol 1 ul 1 dl 1 table 1 menu 1/},
46     };
47    
48     my $HTMLInteractiveElements = {
49     $HTML_NS => {a => 1, details => 1, datagrid => 1},
50     };
51     ## NOTE: |html:a| and |html:datagrid| are not allowed as a descendant
52     ## of interactive elements
53    
54     # my $HTMLTransparentElements : in |Whatpm/ContentChecker.pm|.
55    
56     #my $HTMLSemiTransparentElements = {
57     # $HTML_NS => {qw/video 1 audio 1/},
58     #};
59    
60     my $HTMLEmbededElements = {
61     $HTML_NS => {qw/img 1 iframe 1 embed 1 object 1 video 1 audio 1 canvas 1/},
62     };
63    
64     ## Empty
65     my $HTMLEmptyChecker = sub {
66     my ($self, $todo) = @_;
67     my $el = $todo->{node};
68     my $new_todos = [];
69     my @nodes = (@{$el->child_nodes});
70    
71     while (@nodes) {
72     my $node = shift @nodes;
73     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
74    
75     my $nt = $node->node_type;
76     if ($nt == 1) {
77 wakaba 1.8 my $node_ns = $node->namespace_uri;
78     $node_ns = '' unless defined $node_ns;
79     my $node_ln = $node->manakai_local_name;
80     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
81     #
82     } else {
83     ## NOTE: |minuses| list is not checked since redundant
84     $self->{onerror}->(node => $node, type => 'element not allowed');
85     }
86 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
87     unshift @nodes, @$sib;
88     push @$new_todos, @$ch;
89     } elsif ($nt == 3 or $nt == 4) {
90     if ($node->data =~ /[^\x09-\x0D\x20]/) {
91     $self->{onerror}->(node => $node, type => 'character not allowed');
92     }
93     } elsif ($nt == 5) {
94     unshift @nodes, @{$node->child_nodes};
95     }
96     }
97     return ($new_todos);
98     };
99    
100     ## Text
101     my $HTMLTextChecker = sub {
102     my ($self, $todo) = @_;
103     my $el = $todo->{node};
104     my $new_todos = [];
105     my @nodes = (@{$el->child_nodes});
106    
107     while (@nodes) {
108     my $node = shift @nodes;
109     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
110    
111     my $nt = $node->node_type;
112     if ($nt == 1) {
113 wakaba 1.8 my $node_ns = $node->namespace_uri;
114     $node_ns = '' unless defined $node_ns;
115     my $node_ln = $node->manakai_local_name;
116     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
117     #
118     } else {
119     ## NOTE: |minuses| list is not checked since redundant
120     $self->{onerror}->(node => $node, type => 'element not allowed');
121     }
122 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
123     unshift @nodes, @$sib;
124     push @$new_todos, @$ch;
125     } elsif ($nt == 5) {
126     unshift @nodes, @{$node->child_nodes};
127     }
128     }
129     return ($new_todos);
130     };
131    
132     ## Zero or more |html:style| elements,
133     ## followed by zero or more block-level elements
134     my $HTMLStylableBlockChecker = sub {
135     my ($self, $todo) = @_;
136     my $el = $todo->{node};
137     my $new_todos = [];
138     my @nodes = (@{$el->child_nodes});
139    
140     my $has_non_style;
141     while (@nodes) {
142     my $node = shift @nodes;
143     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
144    
145     my $nt = $node->node_type;
146     if ($nt == 1) {
147     my $node_ns = $node->namespace_uri;
148     $node_ns = '' unless defined $node_ns;
149     my $node_ln = $node->manakai_local_name;
150     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
151     if ($node_ns eq $HTML_NS and $node_ln eq 'style') {
152     $not_allowed = 1 if $has_non_style or
153     not $node->has_attribute_ns (undef, 'scoped');
154     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
155     $has_non_style = 1;
156 wakaba 1.8 } elsif ($self->{pluses}->{$node_ns}->{$node_ln}) {
157     #
158 wakaba 1.1 } else {
159     $has_non_style = 1;
160     $not_allowed = 1;
161     }
162     $self->{onerror}->(node => $node, type => 'element not allowed')
163     if $not_allowed;
164     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
165     unshift @nodes, @$sib;
166     push @$new_todos, @$ch;
167     } elsif ($nt == 3 or $nt == 4) {
168     if ($node->data =~ /[^\x09-\x0D\x20]/) {
169     $self->{onerror}->(node => $node, type => 'character not allowed');
170     }
171     } elsif ($nt == 5) {
172     unshift @nodes, @{$node->child_nodes};
173     }
174     }
175     return ($new_todos);
176     }; # $HTMLStylableBlockChecker
177    
178     ## Zero or more block-level elements
179     my $HTMLBlockChecker = sub {
180     my ($self, $todo) = @_;
181     my $el = $todo->{node};
182     my $new_todos = [];
183     my @nodes = (@{$el->child_nodes});
184    
185     while (@nodes) {
186     my $node = shift @nodes;
187     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
188    
189     my $nt = $node->node_type;
190     if ($nt == 1) {
191     my $node_ns = $node->namespace_uri;
192     $node_ns = '' unless defined $node_ns;
193     my $node_ln = $node->manakai_local_name;
194     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
195     $not_allowed = 1
196 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
197     $self->{pluses}->{$node_ns}->{$node_ln};
198 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
199     if $not_allowed;
200     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
201     unshift @nodes, @$sib;
202     push @$new_todos, @$ch;
203     } elsif ($nt == 3 or $nt == 4) {
204     if ($node->data =~ /[^\x09-\x0D\x20]/) {
205     $self->{onerror}->(node => $node, type => 'character not allowed');
206     }
207     } elsif ($nt == 5) {
208     unshift @nodes, @{$node->child_nodes};
209     }
210     }
211     return ($new_todos);
212     }; # $HTMLBlockChecker
213    
214     ## Inline-level content
215     my $HTMLInlineChecker = sub {
216     my ($self, $todo) = @_;
217     my $el = $todo->{node};
218     my $new_todos = [];
219     my @nodes = (@{$el->child_nodes});
220    
221     while (@nodes) {
222     my $node = shift @nodes;
223     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
224    
225     my $nt = $node->node_type;
226     if ($nt == 1) {
227     my $node_ns = $node->namespace_uri;
228     $node_ns = '' unless defined $node_ns;
229     my $node_ln = $node->manakai_local_name;
230     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
231     $not_allowed = 1
232     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
233 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
234     $self->{pluses}->{$node_ns}->{$node_ln};
235 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
236     if $not_allowed;
237     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
238     unshift @nodes, @$sib;
239     push @$new_todos, @$ch;
240     } elsif ($nt == 5) {
241     unshift @nodes, @{$node->child_nodes};
242     }
243     }
244    
245     for (@$new_todos) {
246     $_->{inline} = 1;
247     }
248     return ($new_todos);
249     }; # $HTMLInlineChecker
250    
251     ## Strictly inline-level content
252     my $HTMLStrictlyInlineChecker = sub {
253     my ($self, $todo) = @_;
254     my $el = $todo->{node};
255     my $new_todos = [];
256     my @nodes = (@{$el->child_nodes});
257    
258     while (@nodes) {
259     my $node = shift @nodes;
260     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
261    
262     my $nt = $node->node_type;
263     if ($nt == 1) {
264     my $node_ns = $node->namespace_uri;
265     $node_ns = '' unless defined $node_ns;
266     my $node_ln = $node->manakai_local_name;
267     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
268     $not_allowed = 1
269 wakaba 1.8 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
270     $self->{pluses}->{$node_ns}->{$node_ln};
271 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
272     if $not_allowed;
273     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
274     unshift @nodes, @$sib;
275     push @$new_todos, @$ch;
276     } elsif ($nt == 5) {
277     unshift @nodes, @{$node->child_nodes};
278     }
279     }
280    
281     for (@$new_todos) {
282     $_->{inline} = 1;
283     $_->{strictly_inline} = 1;
284     }
285     return ($new_todos);
286     }; # $HTMLStrictlyInlineChecker
287    
288 wakaba 1.8 ## Inline-level or strictly inline-level content
289 wakaba 1.1 my $HTMLInlineOrStrictlyInlineChecker = sub {
290     my ($self, $todo) = @_;
291     my $el = $todo->{node};
292     my $new_todos = [];
293     my @nodes = (@{$el->child_nodes});
294    
295     while (@nodes) {
296     my $node = shift @nodes;
297     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
298    
299     my $nt = $node->node_type;
300     if ($nt == 1) {
301     my $node_ns = $node->namespace_uri;
302     $node_ns = '' unless defined $node_ns;
303     my $node_ln = $node->manakai_local_name;
304     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
305     if ($todo->{strictly_inline}) {
306     $not_allowed = 1
307 wakaba 1.8 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
308     $self->{pluses}->{$node_ns}->{$node_ln};
309 wakaba 1.1 } else {
310     $not_allowed = 1
311     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
312 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
313     $self->{pluses}->{$node_ns}->{$node_ln};
314 wakaba 1.1 }
315     $self->{onerror}->(node => $node, type => 'element not allowed')
316     if $not_allowed;
317     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
318     unshift @nodes, @$sib;
319     push @$new_todos, @$ch;
320     } elsif ($nt == 5) {
321     unshift @nodes, @{$node->child_nodes};
322     }
323     }
324    
325     for (@$new_todos) {
326     $_->{inline} = 1;
327     $_->{strictly_inline} = 1;
328     }
329     return ($new_todos);
330     }; # $HTMLInlineOrStrictlyInlineChecker
331    
332 wakaba 1.13 ## TODO: New "significant" SHOULD-level requirement (revision 1114).
333 wakaba 1.1
334 wakaba 1.8 ## Block-level content or inline-level content (i.e. bimorphic content model)
335 wakaba 1.1 my $HTMLBlockOrInlineChecker = sub {
336     my ($self, $todo) = @_;
337     my $el = $todo->{node};
338     my $new_todos = [];
339     my @nodes = (@{$el->child_nodes});
340    
341     my $content = 'block-or-inline'; # or 'block' or 'inline'
342     my @block_not_inline;
343     while (@nodes) {
344     my $node = shift @nodes;
345     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
346    
347 wakaba 1.8 ## ISSUE: It is unclear whether "<rule><div><p/><nest/></div></rule>"
348     ## is conforming or not.
349    
350 wakaba 1.1 my $nt = $node->node_type;
351     if ($nt == 1) {
352     my $node_ns = $node->namespace_uri;
353     $node_ns = '' unless defined $node_ns;
354     my $node_ln = $node->manakai_local_name;
355     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
356     if ($content eq 'block') {
357     $not_allowed = 1
358 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
359     $self->{pluses}->{$node_ns}->{$node_ln};
360 wakaba 1.1 } elsif ($content eq 'inline') {
361     $not_allowed = 1
362     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
363 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
364     $self->{pluses}->{$node_ns}->{$node_ln};
365 wakaba 1.1 } else {
366     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
367     my $is_inline
368     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
369     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
370    
371     push @block_not_inline, $node
372     if $is_block and not $is_inline and not $not_allowed;
373 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
374 wakaba 1.1 $content = 'inline';
375     for (@block_not_inline) {
376     $self->{onerror}->(node => $_, type => 'element not allowed');
377     }
378     $not_allowed = 1 unless $is_inline;
379     }
380     }
381     $self->{onerror}->(node => $node, type => 'element not allowed')
382     if $not_allowed;
383     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
384     unshift @nodes, @$sib;
385     push @$new_todos, @$ch;
386     } elsif ($nt == 3 or $nt == 4) {
387     if ($node->data =~ /[^\x09-\x0D\x20]/) {
388     if ($content eq 'block') {
389     $self->{onerror}->(node => $node, type => 'character not allowed');
390     } else {
391     $content = 'inline';
392     for (@block_not_inline) {
393     $self->{onerror}->(node => $_, type => 'element not allowed');
394     }
395     }
396     }
397     } elsif ($nt == 5) {
398     unshift @nodes, @{$node->child_nodes};
399     }
400     }
401    
402     if ($content eq 'inline') {
403     for (@$new_todos) {
404     $_->{inline} = 1;
405     }
406     }
407     return ($new_todos);
408     };
409    
410     ## Zero or more XXX element, then either block-level or inline-level
411     my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) {
412     my ($elnsuri, $ellname) = @_;
413     return sub {
414     my ($self, $todo) = @_;
415     my $el = $todo->{node};
416     my $new_todos = [];
417     my @nodes = (@{$el->child_nodes});
418    
419     my $has_non_style;
420     my $content = 'block-or-inline'; # or 'block' or 'inline'
421     my @block_not_inline;
422     while (@nodes) {
423     my $node = shift @nodes;
424     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
425    
426     my $nt = $node->node_type;
427     if ($nt == 1) {
428     my $node_ns = $node->namespace_uri;
429     $node_ns = '' unless defined $node_ns;
430     my $node_ln = $node->manakai_local_name;
431     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
432     if ($node_ns eq $elnsuri and $node_ln eq $ellname) {
433     $not_allowed = 1 if $has_non_style;
434     if ($ellname eq 'style' and
435     not $node->has_attribute_ns (undef, 'scoped')) {
436     $not_allowed = 1;
437     }
438     } elsif ($content eq 'block') {
439     $has_non_style = 1;
440     $not_allowed = 1
441 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
442     $self->{pluses}->{$node_ns}->{$node_ln};
443 wakaba 1.1 } elsif ($content eq 'inline') {
444     $has_non_style = 1;
445     $not_allowed = 1
446     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
447 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
448     $self->{pluses}->{$node_ns}->{$node_ln};
449 wakaba 1.1 } else {
450 wakaba 1.8 $has_non_style = 1 unless $self->{pluses}->{$node_ns}->{$node_ln};
451 wakaba 1.1 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
452     my $is_inline
453     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
454     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
455    
456     push @block_not_inline, $node
457     if $is_block and not $is_inline and not $not_allowed;
458 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
459 wakaba 1.1 $content = 'inline';
460     for (@block_not_inline) {
461     $self->{onerror}->(node => $_, type => 'element not allowed');
462     }
463     $not_allowed = 1 unless $is_inline;
464     }
465     }
466     $self->{onerror}->(node => $node, type => 'element not allowed')
467     if $not_allowed;
468     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
469     unshift @nodes, @$sib;
470     push @$new_todos, @$ch;
471     } elsif ($nt == 3 or $nt == 4) {
472     if ($node->data =~ /[^\x09-\x0D\x20]/) {
473     $has_non_style = 1;
474     if ($content eq 'block') {
475     $self->{onerror}->(node => $node, type => 'character not allowed');
476     } else {
477     $content = 'inline';
478     for (@block_not_inline) {
479     $self->{onerror}->(node => $_, type => 'element not allowed');
480     }
481     }
482     }
483     } elsif ($nt == 5) {
484     unshift @nodes, @{$node->child_nodes};
485     }
486     }
487    
488     if ($content eq 'inline') {
489     for (@$new_todos) {
490     $_->{inline} = 1;
491     }
492     }
493     return ($new_todos);
494     };
495     }; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker
496    
497     my $HTMLTransparentChecker = $HTMLBlockOrInlineChecker;
498    
499     our $AttrChecker;
500    
501     my $GetHTMLEnumeratedAttrChecker = sub {
502     my $states = shift; # {value => conforming ? 1 : -1}
503     return sub {
504     my ($self, $attr) = @_;
505     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
506     if ($states->{$value} > 0) {
507     #
508     } elsif ($states->{$value}) {
509     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
510     } else {
511     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
512     }
513     };
514     }; # $GetHTMLEnumeratedAttrChecker
515    
516     my $GetHTMLBooleanAttrChecker = sub {
517     my $local_name = shift;
518     return sub {
519     my ($self, $attr) = @_;
520     my $value = $attr->value;
521     unless ($value eq $local_name or $value eq '') {
522     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
523     }
524     };
525     }; # $GetHTMLBooleanAttrChecker
526    
527 wakaba 1.8 ## Unordered set of space-separated tokens
528 wakaba 1.18 my $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
529 wakaba 1.8 my ($self, $attr) = @_;
530     my %word;
531     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
532     unless ($word{$word}) {
533     $word{$word} = 1;
534     } else {
535     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
536     }
537     }
538 wakaba 1.18 }; # $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
539 wakaba 1.8
540 wakaba 1.1 ## |rel| attribute (unordered set of space separated tokens,
541     ## whose allowed values are defined by the section on link types)
542     my $HTMLLinkTypesAttrChecker = sub {
543 wakaba 1.4 my ($a_or_area, $todo, $self, $attr) = @_;
544 wakaba 1.1 my %word;
545     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
546     unless ($word{$word}) {
547     $word{$word} = 1;
548 wakaba 1.18 } elsif ($word eq 'up') {
549     #
550 wakaba 1.1 } else {
551     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
552     }
553     }
554     ## NOTE: Case sensitive match (since HTML5 spec does not say link
555     ## types are case-insensitive and it says "The value should not
556     ## be confusingly similar to any other defined value (e.g.
557     ## differing only in case).").
558     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
559     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
560     ## values to be used conformingly.
561     require Whatpm::_LinkTypeList;
562     our $LinkType;
563     for my $word (keys %word) {
564     my $def = $LinkType->{$word};
565     if (defined $def) {
566     if ($def->{status} eq 'accepted') {
567     if (defined $def->{effect}->[$a_or_area]) {
568     #
569     } else {
570     $self->{onerror}->(node => $attr,
571     type => 'link type:bad context:'.$word);
572     }
573     } elsif ($def->{status} eq 'proposal') {
574     $self->{onerror}->(node => $attr, level => 's',
575     type => 'link type:proposed:'.$word);
576 wakaba 1.20 if (defined $def->{effect}->[$a_or_area]) {
577     #
578     } else {
579     $self->{onerror}->(node => $attr,
580     type => 'link type:bad context:'.$word);
581     }
582 wakaba 1.1 } else { # rejected or synonym
583     $self->{onerror}->(node => $attr,
584     type => 'link type:non-conforming:'.$word);
585     }
586 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
587     if ($word eq 'alternate') {
588     #
589     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
590     $todo->{has_hyperlink_link_type} = 1;
591     }
592     }
593 wakaba 1.1 if ($def->{unique}) {
594     unless ($self->{has_link_type}->{$word}) {
595     $self->{has_link_type}->{$word} = 1;
596     } else {
597     $self->{onerror}->(node => $attr,
598     type => 'link type:duplicate:'.$word);
599     }
600     }
601     } else {
602     $self->{onerror}->(node => $attr, level => 'unsupported',
603     type => 'link type:'.$word);
604     }
605     }
606 wakaba 1.4 $todo->{has_hyperlink_link_type} = 1
607     if $word{alternate} and not $word{stylesheet};
608 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
609     ## says that using both X-Pingback: header field and HTML
610     ## <link rel=pingback> is deprecated and if both appears they
611     ## SHOULD contain exactly the same value.
612     ## ISSUE: Pingback 1.0 specification defines the exact representation
613     ## of its link element, which cannot be tested by the current arch.
614     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
615     ## include any string that matches to the pattern for the rel=pingback link,
616     ## which again inpossible to test.
617     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
618 wakaba 1.12
619     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
620 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
621     ## then they SHOULD be described in different paragraphs.".
622 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
623 wakaba 1.20
624     ## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status."
625 wakaba 1.1
626     ## URI (or IRI)
627     my $HTMLURIAttrChecker = sub {
628     my ($self, $attr) = @_;
629     ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
630     my $value = $attr->value;
631     Whatpm::URIChecker->check_iri_reference ($value, sub {
632     my %opt = @_;
633     $self->{onerror}->(node => $attr, level => $opt{level},
634     type => 'URI::'.$opt{type}.
635     (defined $opt{position} ? ':'.$opt{position} : ''));
636     });
637 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
638 wakaba 1.1 }; # $HTMLURIAttrChecker
639    
640     ## A space separated list of one or more URIs (or IRIs)
641     my $HTMLSpaceURIsAttrChecker = sub {
642     my ($self, $attr) = @_;
643     my $i = 0;
644     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
645     Whatpm::URIChecker->check_iri_reference ($value, sub {
646     my %opt = @_;
647     $self->{onerror}->(node => $attr, level => $opt{level},
648 wakaba 1.2 type => 'URIs:'.':'.
649     $opt{type}.':'.$i.
650 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
651     });
652     $i++;
653     }
654     ## ISSUE: Relative references?
655     ## ISSUE: Leading or trailing white spaces are conformant?
656     ## ISSUE: A sequence of white space characters are conformant?
657     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
658     ## NOTE: Duplication seems not an error.
659 wakaba 1.4 $self->{has_uri_attr} = 1;
660 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
661    
662     my $HTMLDatetimeAttrChecker = sub {
663     my ($self, $attr) = @_;
664     my $value = $attr->value;
665     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
666     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
667     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
668     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
669     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
670     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
671     if $d < 1 or
672     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
673     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
674     if $M == 2 and $d == 29 and
675     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
676     } else {
677     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
678     }
679     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
680     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
681     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
682     if defined $s and $s > 59;
683     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
684     if $zh > 23;
685     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
686     if $zm > 59;
687     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
688     } else {
689     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
690     }
691     }; # $HTMLDatetimeAttrChecker
692    
693     my $HTMLIntegerAttrChecker = sub {
694     my ($self, $attr) = @_;
695     my $value = $attr->value;
696     unless ($value =~ /\A-?[0-9]+\z/) {
697     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
698     }
699     }; # $HTMLIntegerAttrChecker
700    
701     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
702     my $range_check = shift;
703     return sub {
704     my ($self, $attr) = @_;
705     my $value = $attr->value;
706     if ($value =~ /\A[0-9]+\z/) {
707     unless ($range_check->($value + 0)) {
708     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
709     }
710     } else {
711     $self->{onerror}->(node => $attr,
712     type => 'nninteger:syntax error');
713     }
714     };
715     }; # $GetHTMLNonNegativeIntegerAttrChecker
716    
717     my $GetHTMLFloatingPointNumberAttrChecker = sub {
718     my $range_check = shift;
719     return sub {
720     my ($self, $attr) = @_;
721     my $value = $attr->value;
722     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
723     unless ($range_check->($value + 0)) {
724     $self->{onerror}->(node => $attr, type => 'float:out of range');
725     }
726     } else {
727     $self->{onerror}->(node => $attr,
728     type => 'float:syntax error');
729     }
730     };
731     }; # $GetHTMLFloatingPointNumberAttrChecker
732    
733     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
734     ## ISSUE: RFC 2046 does not define syntax of media types.
735     ## ISSUE: The definition of "a valid MIME type" is unknown.
736     ## Syntactical correctness?
737     my $HTMLIMTAttrChecker = sub {
738     my ($self, $attr) = @_;
739     my $value = $attr->value;
740     ## ISSUE: RFC 2045 Content-Type header field allows insertion
741     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
742     ## ISSUE: RFC 2231 extension? Maybe no.
743     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
744     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
745     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
746     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
747     my @type = ($1, $2);
748     my $param = $3;
749     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
750     if (defined $2) {
751     push @type, $1 => $2;
752     } else {
753     my $n = $1;
754     my $v = $2;
755     $v =~ s/\\(.)/$1/gs;
756     push @type, $n => $v;
757     }
758     }
759     require Whatpm::IMTChecker;
760     Whatpm::IMTChecker->check_imt (sub {
761     my %opt = @_;
762     $self->{onerror}->(node => $attr, level => $opt{level},
763     type => 'IMT:'.$opt{type});
764     }, @type);
765     } else {
766     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
767     }
768     }; # $HTMLIMTAttrChecker
769    
770     my $HTMLLanguageTagAttrChecker = sub {
771 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
772    
773 wakaba 1.1 my ($self, $attr) = @_;
774 wakaba 1.6 my $value = $attr->value;
775     require Whatpm::LangTag;
776     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
777     my %opt = @_;
778     my $type = 'LangTag:'.$opt{type};
779     $type .= ':' . $opt{subtag} if defined $opt{subtag};
780     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
781     level => $opt{level});
782     });
783 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
784 wakaba 1.6
785     ## TODO: testdata
786 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
787    
788     ## "A valid media query [MQ]"
789     my $HTMLMQAttrChecker = sub {
790     my ($self, $attr) = @_;
791     $self->{onerror}->(node => $attr, level => 'unsupported',
792     type => 'media query');
793     ## ISSUE: What is "a valid media query"?
794     }; # $HTMLMQAttrChecker
795    
796     my $HTMLEventHandlerAttrChecker = sub {
797     my ($self, $attr) = @_;
798     $self->{onerror}->(node => $attr, level => 'unsupported',
799     type => 'event handler');
800     ## TODO: MUST contain valid ECMAScript code matching the
801     ## ECMAScript |FunctionBody| production. [ECMA262]
802     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
803     ## ISSUE: Automatic semicolon insertion does not apply?
804     ## ISSUE: Other script languages?
805     }; # $HTMLEventHandlerAttrChecker
806    
807     my $HTMLUsemapAttrChecker = sub {
808     my ($self, $attr) = @_;
809     ## MUST be a valid hashed ID reference to a |map| element
810     my $value = $attr->value;
811     if ($value =~ s/^#//) {
812     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
813     push @{$self->{usemap}}, [$value => $attr];
814     } else {
815     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
816     }
817     ## NOTE: Space characters in hashed ID references are conforming.
818     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
819     }; # $HTMLUsemapAttrChecker
820    
821     my $HTMLTargetAttrChecker = sub {
822     my ($self, $attr) = @_;
823     my $value = $attr->value;
824     if ($value =~ /^_/) {
825     $value = lc $value; ## ISSUE: ASCII case-insentitive?
826     unless ({
827     _self => 1, _parent => 1, _top => 1,
828     }->{$value}) {
829     $self->{onerror}->(node => $attr,
830     type => 'reserved browsing context name');
831     }
832     } else {
833     #$ ISSUE: An empty string is conforming?
834     }
835     }; # $HTMLTargetAttrChecker
836    
837     my $HTMLAttrChecker = {
838     id => sub {
839     ## NOTE: |map| has its own variant of |id=""| checker
840     my ($self, $attr) = @_;
841     my $value = $attr->value;
842     if (length $value > 0) {
843     if ($self->{id}->{$value}) {
844     $self->{onerror}->(node => $attr, type => 'duplicate ID');
845     push @{$self->{id}->{$value}}, $attr;
846     } else {
847     $self->{id}->{$value} = [$attr];
848     }
849     if ($value =~ /[\x09-\x0D\x20]/) {
850     $self->{onerror}->(node => $attr, type => 'space in ID');
851     }
852     } else {
853     ## NOTE: MUST contain at least one character
854     $self->{onerror}->(node => $attr, type => 'empty attribute value');
855     }
856     },
857     title => sub {}, ## NOTE: No conformance creteria
858     lang => sub {
859     my ($self, $attr) = @_;
860 wakaba 1.6 my $value = $attr->value;
861     if ($value eq '') {
862     #
863     } else {
864     require Whatpm::LangTag;
865     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
866     my %opt = @_;
867     my $type = 'LangTag:'.$opt{type};
868     $type .= ':' . $opt{subtag} if defined $opt{subtag};
869     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
870     level => $opt{level});
871     });
872     }
873 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
874     unless ($attr->owner_document->manakai_is_html) {
875     $self->{onerror}->(node => $attr, type => 'in XML:lang');
876     }
877 wakaba 1.6
878     ## TODO: test data
879 wakaba 1.1 },
880     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
881     class => sub {
882     my ($self, $attr) = @_;
883     my %word;
884     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
885     unless ($word{$word}) {
886     $word{$word} = 1;
887     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
888     } else {
889     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
890     }
891     }
892     },
893     contextmenu => sub {
894     my ($self, $attr) = @_;
895     my $value = $attr->value;
896     push @{$self->{contextmenu}}, [$value => $attr];
897     ## ISSUE: "The value must be the ID of a menu element in the DOM."
898     ## What is "in the DOM"? A menu Element node that is not part
899     ## of the Document tree is in the DOM? A menu Element node that
900     ## belong to another Document tree is in the DOM?
901     },
902     irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
903 wakaba 1.8 tabindex => $HTMLIntegerAttrChecker
904     ## TODO: ref, template, registrationmark
905 wakaba 1.1 };
906    
907     for (qw/
908     onabort onbeforeunload onblur onchange onclick oncontextmenu
909     ondblclick ondrag ondragend ondragenter ondragleave ondragover
910     ondragstart ondrop onerror onfocus onkeydown onkeypress
911     onkeyup onload onmessage onmousedown onmousemove onmouseout
912     onmouseover onmouseup onmousewheel onresize onscroll onselect
913     onsubmit onunload
914     /) {
915     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
916     }
917    
918     my $GetHTMLAttrsChecker = sub {
919     my $element_specific_checker = shift;
920     return sub {
921     my ($self, $todo) = @_;
922     for my $attr (@{$todo->{node}->attributes}) {
923     my $attr_ns = $attr->namespace_uri;
924     $attr_ns = '' unless defined $attr_ns;
925     my $attr_ln = $attr->manakai_local_name;
926     my $checker;
927     if ($attr_ns eq '') {
928     $checker = $element_specific_checker->{$attr_ln}
929     || $HTMLAttrChecker->{$attr_ln};
930     }
931     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
932     || $AttrChecker->{$attr_ns}->{''};
933     if ($checker) {
934     $checker->($self, $attr, $todo);
935     } else {
936     $self->{onerror}->(node => $attr, level => 'unsupported',
937     type => 'attribute');
938     ## ISSUE: No comformance createria for unknown attributes in the spec
939     }
940     }
941     };
942     }; # $GetHTMLAttrsChecker
943    
944     our $Element;
945     our $ElementDefault;
946     our $AnyChecker;
947    
948     $Element->{$HTML_NS}->{''} = {
949     attrs_checker => $GetHTMLAttrsChecker->({}),
950     checker => $ElementDefault->{checker},
951     };
952    
953     $Element->{$HTML_NS}->{html} = {
954     is_root => 1,
955     attrs_checker => $GetHTMLAttrsChecker->({
956 wakaba 1.16 manifest => $HTMLURIAttrChecker,
957 wakaba 1.1 xmlns => sub {
958     my ($self, $attr) = @_;
959     my $value = $attr->value;
960     unless ($value eq $HTML_NS) {
961     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
962     }
963     unless ($attr->owner_document->manakai_is_html) {
964     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
965     ## TODO: Test
966     }
967     },
968     }),
969     checker => sub {
970     my ($self, $todo) = @_;
971     my $el = $todo->{node};
972     my $new_todos = [];
973     my @nodes = (@{$el->child_nodes});
974    
975     my $phase = 'before head';
976     while (@nodes) {
977     my $node = shift @nodes;
978     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
979    
980     my $nt = $node->node_type;
981     if ($nt == 1) {
982     my $node_ns = $node->namespace_uri;
983     $node_ns = '' unless defined $node_ns;
984     my $node_ln = $node->manakai_local_name;
985     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
986 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
987     #
988     } elsif ($phase eq 'before head') {
989 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'head') {
990     $phase = 'after head';
991     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'body') {
992     $self->{onerror}->(node => $node, type => 'ps element missing:head');
993     $phase = 'after body';
994     } else {
995     $not_allowed = 1;
996     # before head
997     }
998     } elsif ($phase eq 'after head') {
999     if ($node_ns eq $HTML_NS and $node_ln eq 'body') {
1000     $phase = 'after body';
1001     } else {
1002     $not_allowed = 1;
1003     # after head
1004     }
1005     } else { #elsif ($phase eq 'after body') {
1006     $not_allowed = 1;
1007     # after body
1008     }
1009     $self->{onerror}->(node => $node, type => 'element not allowed')
1010     if $not_allowed;
1011     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1012     unshift @nodes, @$sib;
1013     push @$new_todos, @$ch;
1014     } elsif ($nt == 3 or $nt == 4) {
1015     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1016     $self->{onerror}->(node => $node, type => 'character not allowed');
1017     }
1018     } elsif ($nt == 5) {
1019     unshift @nodes, @{$node->child_nodes};
1020     }
1021     }
1022    
1023     if ($phase eq 'before head') {
1024     $self->{onerror}->(node => $el, type => 'child element missing:head');
1025     $self->{onerror}->(node => $el, type => 'child element missing:body');
1026     } elsif ($phase eq 'after head') {
1027     $self->{onerror}->(node => $el, type => 'child element missing:body');
1028     }
1029    
1030     return ($new_todos);
1031     },
1032     };
1033    
1034     $Element->{$HTML_NS}->{head} = {
1035     attrs_checker => $GetHTMLAttrsChecker->({}),
1036     checker => sub {
1037     my ($self, $todo) = @_;
1038     my $el = $todo->{node};
1039     my $new_todos = [];
1040     my @nodes = (@{$el->child_nodes});
1041    
1042     my $has_title;
1043     my $phase = 'initial'; # 'after charset', 'after base'
1044     while (@nodes) {
1045     my $node = shift @nodes;
1046     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1047    
1048     my $nt = $node->node_type;
1049     if ($nt == 1) {
1050     my $node_ns = $node->namespace_uri;
1051     $node_ns = '' unless defined $node_ns;
1052     my $node_ln = $node->manakai_local_name;
1053     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
1054 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1055     #
1056     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'title') {
1057 wakaba 1.1 $phase = 'after base';
1058     unless ($has_title) {
1059     $has_title = 1;
1060     } else {
1061     $not_allowed = 1;
1062     }
1063     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'meta') {
1064     if ($node->has_attribute_ns (undef, 'charset')) {
1065     if ($phase eq 'initial') {
1066     $phase = 'after charset';
1067     } else {
1068     $not_allowed = 1;
1069     ## NOTE: See also |base|'s "contexts" field in the spec
1070     }
1071 wakaba 1.5 } elsif ($node->has_attribute_ns (undef, 'name') or
1072     $node->has_attribute_ns (undef, 'http-equiv')) {
1073     $phase = 'after base';
1074 wakaba 1.1 } else {
1075     $phase = 'after base';
1076 wakaba 1.5 $not_allowed = 1;
1077 wakaba 1.1 }
1078     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'base') {
1079     if ($phase eq 'initial' or $phase eq 'after charset') {
1080     $phase = 'after base';
1081     } else {
1082     $not_allowed = 1;
1083     }
1084     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'style') {
1085     $phase = 'after base';
1086     if ($node->has_attribute_ns (undef, 'scoped')) {
1087     $not_allowed = 1;
1088     }
1089     } elsif ($HTMLMetadataElements->{$node_ns}->{$node_ln}) {
1090     $phase = 'after base';
1091     } else {
1092     $not_allowed = 1;
1093     }
1094     $self->{onerror}->(node => $node, type => 'element not allowed')
1095     if $not_allowed;
1096 wakaba 1.3 local $todo->{flag}->{in_head} = 1;
1097 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1098     unshift @nodes, @$sib;
1099     push @$new_todos, @$ch;
1100     } elsif ($nt == 3 or $nt == 4) {
1101     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1102     $self->{onerror}->(node => $node, type => 'character not allowed');
1103     }
1104     } elsif ($nt == 5) {
1105     unshift @nodes, @{$node->child_nodes};
1106     }
1107     }
1108     unless ($has_title) {
1109     $self->{onerror}->(node => $el, type => 'child element missing:title');
1110     }
1111     return ($new_todos);
1112     },
1113     };
1114    
1115     $Element->{$HTML_NS}->{title} = {
1116     attrs_checker => $GetHTMLAttrsChecker->({}),
1117     checker => $HTMLTextChecker,
1118     };
1119    
1120     $Element->{$HTML_NS}->{base} = {
1121 wakaba 1.4 attrs_checker => sub {
1122     my ($self, $todo) = @_;
1123    
1124 wakaba 1.14 my $has_href = $todo->{node}->has_attribute_ns (undef, 'href');
1125     my $has_target = $todo->{node}->has_attribute_ns (undef, 'target');
1126    
1127     if ($self->{has_uri_attr} and $has_href) {
1128 wakaba 1.4 ## ISSUE: Are these examples conforming?
1129     ## <head profile="a b c"><base href> (except for |profile|'s
1130     ## non-conformance)
1131     ## <title xml:base="relative"/><base href/> (maybe it should be)
1132     ## <unknown xmlns="relative"/><base href/> (assuming that
1133     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1134     ## <?xml-stylesheet href="relative"?>...<base href=""/>
1135     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1136     ## <style>@import 'relative';</style><base href>
1137     ## <script>location.href = 'relative';</script><base href>
1138 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
1139     ## an exception.
1140 wakaba 1.4 $self->{onerror}->(node => $todo->{node},
1141     type => 'basehref after URI attribute');
1142     }
1143 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
1144 wakaba 1.4 ## ISSUE: Are these examples conforming?
1145     ## <head><title xlink:href=""/><base target="name"/></head>
1146     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1147     ## (assuming that |xbl:xbl| is allowed before |base|)
1148     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1149     ## <link href=""/><base target="name"/>
1150     ## <link rel=unknown href=""><base target=name>
1151     $self->{onerror}->(node => $todo->{node},
1152     type => 'basetarget after hyperlink');
1153     }
1154    
1155 wakaba 1.14 if (not $has_href and not $has_target) {
1156     $self->{onerror}->(node => $todo->{node},
1157     type => 'attribute missing:href|target');
1158     }
1159    
1160 wakaba 1.4 return $GetHTMLAttrsChecker->({
1161     href => $HTMLURIAttrChecker,
1162     target => $HTMLTargetAttrChecker,
1163     })->($self, $todo);
1164     },
1165 wakaba 1.1 checker => $HTMLEmptyChecker,
1166     };
1167    
1168     $Element->{$HTML_NS}->{link} = {
1169     attrs_checker => sub {
1170     my ($self, $todo) = @_;
1171     $GetHTMLAttrsChecker->({
1172     href => $HTMLURIAttrChecker,
1173 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(0, $todo, @_) },
1174 wakaba 1.1 media => $HTMLMQAttrChecker,
1175     hreflang => $HTMLLanguageTagAttrChecker,
1176     type => $HTMLIMTAttrChecker,
1177     ## NOTE: Though |title| has special semantics,
1178     ## syntactically same as the |title| as global attribute.
1179     })->($self, $todo);
1180 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
1181     $self->{has_hyperlink_element} = 1 if $todo->{has_hyperlink_link_type};
1182     } else {
1183 wakaba 1.1 $self->{onerror}->(node => $todo->{node},
1184     type => 'attribute missing:href');
1185     }
1186     unless ($todo->{node}->has_attribute_ns (undef, 'rel')) {
1187     $self->{onerror}->(node => $todo->{node},
1188     type => 'attribute missing:rel');
1189     }
1190     },
1191     checker => $HTMLEmptyChecker,
1192     };
1193    
1194     $Element->{$HTML_NS}->{meta} = {
1195     attrs_checker => sub {
1196     my ($self, $todo) = @_;
1197     my $name_attr;
1198     my $http_equiv_attr;
1199     my $charset_attr;
1200     my $content_attr;
1201     for my $attr (@{$todo->{node}->attributes}) {
1202     my $attr_ns = $attr->namespace_uri;
1203     $attr_ns = '' unless defined $attr_ns;
1204     my $attr_ln = $attr->manakai_local_name;
1205     my $checker;
1206     if ($attr_ns eq '') {
1207     if ($attr_ln eq 'content') {
1208     $content_attr = $attr;
1209     $checker = 1;
1210     } elsif ($attr_ln eq 'name') {
1211     $name_attr = $attr;
1212     $checker = 1;
1213     } elsif ($attr_ln eq 'http-equiv') {
1214     $http_equiv_attr = $attr;
1215     $checker = 1;
1216     } elsif ($attr_ln eq 'charset') {
1217     $charset_attr = $attr;
1218     $checker = 1;
1219     } else {
1220     $checker = $HTMLAttrChecker->{$attr_ln}
1221     || $AttrChecker->{$attr_ns}->{$attr_ln}
1222     || $AttrChecker->{$attr_ns}->{''};
1223     }
1224     } else {
1225     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1226     || $AttrChecker->{$attr_ns}->{''};
1227     }
1228     if ($checker) {
1229     $checker->($self, $attr) if ref $checker;
1230     } else {
1231     $self->{onerror}->(node => $attr, level => 'unsupported',
1232     type => 'attribute');
1233     ## ISSUE: No comformance createria for unknown attributes in the spec
1234     }
1235     }
1236    
1237     if (defined $name_attr) {
1238     if (defined $http_equiv_attr) {
1239     $self->{onerror}->(node => $http_equiv_attr,
1240     type => 'attribute not allowed');
1241     } elsif (defined $charset_attr) {
1242     $self->{onerror}->(node => $charset_attr,
1243     type => 'attribute not allowed');
1244     }
1245     my $metadata_name = $name_attr->value;
1246     my $metadata_value;
1247     if (defined $content_attr) {
1248     $metadata_value = $content_attr->value;
1249     } else {
1250     $self->{onerror}->(node => $todo->{node},
1251     type => 'attribute missing:content');
1252     $metadata_value = '';
1253     }
1254     } elsif (defined $http_equiv_attr) {
1255     if (defined $charset_attr) {
1256     $self->{onerror}->(node => $charset_attr,
1257     type => 'attribute not allowed');
1258     }
1259     unless (defined $content_attr) {
1260     $self->{onerror}->(node => $todo->{node},
1261     type => 'attribute missing:content');
1262     }
1263     } elsif (defined $charset_attr) {
1264     if (defined $content_attr) {
1265     $self->{onerror}->(node => $content_attr,
1266     type => 'attribute not allowed');
1267     }
1268     } else {
1269     if (defined $content_attr) {
1270     $self->{onerror}->(node => $content_attr,
1271     type => 'attribute not allowed');
1272     $self->{onerror}->(node => $todo->{node},
1273     type => 'attribute missing:name|http-equiv');
1274     } else {
1275     $self->{onerror}->(node => $todo->{node},
1276     type => 'attribute missing:name|http-equiv|charset');
1277     }
1278     }
1279    
1280     ## TODO: metadata conformance
1281    
1282     ## TODO: pragma conformance
1283     if (defined $http_equiv_attr) { ## An enumerated attribute
1284     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1285     if ({
1286     'refresh' => 1,
1287     'default-style' => 1,
1288     }->{$keyword}) {
1289     #
1290 wakaba 1.19 } elsif ($keyword eq 'content-type') {
1291     $self->{onerror}
1292     ->(node => $http_equiv_attr,
1293     type => 'enumerated:invalid:http-equiv:content-type');
1294 wakaba 1.1 } else {
1295     $self->{onerror}->(node => $http_equiv_attr,
1296     type => 'enumerated:invalid');
1297     }
1298     }
1299    
1300     if (defined $charset_attr) {
1301     unless ($todo->{node}->owner_document->manakai_is_html) {
1302     $self->{onerror}->(node => $charset_attr,
1303     type => 'in XML:charset');
1304     }
1305     ## TODO: charset
1306     }
1307     },
1308     checker => $HTMLEmptyChecker,
1309     };
1310    
1311     $Element->{$HTML_NS}->{style} = {
1312     attrs_checker => $GetHTMLAttrsChecker->({
1313     type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1314     media => $HTMLMQAttrChecker,
1315     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1316     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1317     ## not different
1318     }),
1319     checker => sub {
1320     ## NOTE: |html:style| has no conformance creteria on content model
1321     my ($self, $todo) = @_;
1322     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
1323     $type = 'text/css' unless defined $type;
1324     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
1325     type => 'style:'.$type); ## TODO: $type normalization
1326     return $AnyChecker->($self, $todo);
1327     },
1328     };
1329    
1330     $Element->{$HTML_NS}->{body} = {
1331     attrs_checker => $GetHTMLAttrsChecker->({}),
1332     checker => $HTMLBlockChecker,
1333     };
1334    
1335     $Element->{$HTML_NS}->{section} = {
1336     attrs_checker => $GetHTMLAttrsChecker->({}),
1337     checker => $HTMLStylableBlockChecker,
1338     };
1339    
1340     $Element->{$HTML_NS}->{nav} = {
1341     attrs_checker => $GetHTMLAttrsChecker->({}),
1342     checker => $HTMLBlockOrInlineChecker,
1343     };
1344    
1345     $Element->{$HTML_NS}->{article} = {
1346     attrs_checker => $GetHTMLAttrsChecker->({}),
1347     checker => $HTMLStylableBlockChecker,
1348     };
1349    
1350     $Element->{$HTML_NS}->{blockquote} = {
1351     attrs_checker => $GetHTMLAttrsChecker->({
1352     cite => $HTMLURIAttrChecker,
1353     }),
1354     checker => $HTMLBlockChecker,
1355     };
1356    
1357     $Element->{$HTML_NS}->{aside} = {
1358     attrs_checker => $GetHTMLAttrsChecker->({}),
1359     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
1360     };
1361    
1362     $Element->{$HTML_NS}->{h1} = {
1363     attrs_checker => $GetHTMLAttrsChecker->({}),
1364     checker => sub {
1365     my ($self, $todo) = @_;
1366     $todo->{flag}->{has_heading}->[0] = 1;
1367 wakaba 1.13 return $HTMLStrictlyInlineChecker->($self, $todo);
1368 wakaba 1.1 },
1369     };
1370    
1371     $Element->{$HTML_NS}->{h2} = {
1372     attrs_checker => $GetHTMLAttrsChecker->({}),
1373     checker => $Element->{$HTML_NS}->{h1}->{checker},
1374     };
1375    
1376     $Element->{$HTML_NS}->{h3} = {
1377     attrs_checker => $GetHTMLAttrsChecker->({}),
1378     checker => $Element->{$HTML_NS}->{h1}->{checker},
1379     };
1380    
1381     $Element->{$HTML_NS}->{h4} = {
1382     attrs_checker => $GetHTMLAttrsChecker->({}),
1383     checker => $Element->{$HTML_NS}->{h1}->{checker},
1384     };
1385    
1386     $Element->{$HTML_NS}->{h5} = {
1387     attrs_checker => $GetHTMLAttrsChecker->({}),
1388     checker => $Element->{$HTML_NS}->{h1}->{checker},
1389     };
1390    
1391     $Element->{$HTML_NS}->{h6} = {
1392     attrs_checker => $GetHTMLAttrsChecker->({}),
1393     checker => $Element->{$HTML_NS}->{h1}->{checker},
1394     };
1395    
1396     $Element->{$HTML_NS}->{header} = {
1397     attrs_checker => $GetHTMLAttrsChecker->({}),
1398     checker => sub {
1399     my ($self, $todo) = @_;
1400     my $old_flag = $todo->{flag}->{has_heading} || [];
1401     my $new_flag = [];
1402     local $todo->{flag}->{has_heading} = $new_flag;
1403     my $node = $todo->{node};
1404    
1405     my $end = $self->_add_minuses
1406     ({$HTML_NS => {qw/header 1 footer 1/}},
1407     $HTMLSectioningElements);
1408     my ($new_todos, $ch) = $HTMLBlockChecker->($self, $todo);
1409     push @$new_todos, $end,
1410     {type => 'code', code => sub {
1411     if ($new_flag->[0]) {
1412     $old_flag->[0] = 1;
1413     } else {
1414     $self->{onerror}->(node => $node, type => 'element missing:hn');
1415     }
1416     }};
1417     return ($new_todos, $ch);
1418     },
1419     };
1420    
1421     $Element->{$HTML_NS}->{footer} = {
1422     attrs_checker => $GetHTMLAttrsChecker->({}),
1423     checker => sub { ## block -hn -header -footer -sectioning or inline
1424     my ($self, $todo) = @_;
1425     my $el = $todo->{node};
1426     my $new_todos = [];
1427     my @nodes = (@{$el->child_nodes});
1428    
1429     my $content = 'block-or-inline'; # or 'block' or 'inline'
1430     my @block_not_inline;
1431     while (@nodes) {
1432     my $node = shift @nodes;
1433     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1434    
1435     my $nt = $node->node_type;
1436     if ($nt == 1) {
1437     my $node_ns = $node->namespace_uri;
1438     $node_ns = '' unless defined $node_ns;
1439     my $node_ln = $node->manakai_local_name;
1440     my $not_allowed;
1441     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
1442     $not_allowed = 1;
1443     } elsif ($node_ns eq $HTML_NS and
1444     {
1445     qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1/
1446     }->{$node_ln}) {
1447     $not_allowed = 1;
1448     } elsif ($HTMLSectioningElements->{$node_ns}->{$node_ln}) {
1449     $not_allowed = 1;
1450     }
1451     if ($content eq 'block') {
1452     $not_allowed = 1
1453 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
1454     $self->{pluses}->{$node_ns}->{$node_ln};
1455 wakaba 1.1 } elsif ($content eq 'inline') {
1456     $not_allowed = 1
1457     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
1458 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
1459     $self->{pluses}->{$node_ns}->{$node_ln};
1460 wakaba 1.1 } else {
1461     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1462     my $is_inline
1463     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
1464     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1465    
1466     push @block_not_inline, $node
1467     if $is_block and not $is_inline and not $not_allowed;
1468 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
1469 wakaba 1.1 $content = 'inline';
1470     for (@block_not_inline) {
1471     $self->{onerror}->(node => $_, type => 'element not allowed');
1472     }
1473     $not_allowed = 1 unless $is_inline;
1474     }
1475     }
1476     $self->{onerror}->(node => $node, type => 'element not allowed')
1477     if $not_allowed;
1478     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1479     unshift @nodes, @$sib;
1480     push @$new_todos, @$ch;
1481     } elsif ($nt == 3 or $nt == 4) {
1482     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1483     if ($content eq 'block') {
1484     $self->{onerror}->(node => $node, type => 'character not allowed');
1485     } else {
1486     $content = 'inline';
1487     for (@block_not_inline) {
1488     $self->{onerror}->(node => $_, type => 'element not allowed');
1489     }
1490     }
1491     }
1492     } elsif ($nt == 5) {
1493     unshift @nodes, @{$node->child_nodes};
1494     }
1495     }
1496    
1497     my $end = $self->_add_minuses
1498     ({$HTML_NS => {qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1/}},
1499     $HTMLSectioningElements);
1500     push @$new_todos, $end;
1501    
1502     if ($content eq 'inline') {
1503     for (@$new_todos) {
1504     $_->{inline} = 1;
1505     }
1506     }
1507    
1508     return ($new_todos);
1509     },
1510     };
1511    
1512     $Element->{$HTML_NS}->{address} = {
1513     attrs_checker => $GetHTMLAttrsChecker->({}),
1514     checker => $HTMLInlineChecker,
1515     };
1516    
1517     $Element->{$HTML_NS}->{p} = {
1518     attrs_checker => $GetHTMLAttrsChecker->({}),
1519 wakaba 1.13 checker => $HTMLInlineChecker,
1520 wakaba 1.1 };
1521    
1522     $Element->{$HTML_NS}->{hr} = {
1523     attrs_checker => $GetHTMLAttrsChecker->({}),
1524     checker => $HTMLEmptyChecker,
1525     };
1526    
1527     $Element->{$HTML_NS}->{br} = {
1528     attrs_checker => $GetHTMLAttrsChecker->({}),
1529     checker => $HTMLEmptyChecker,
1530     };
1531    
1532     $Element->{$HTML_NS}->{dialog} = {
1533     attrs_checker => $GetHTMLAttrsChecker->({}),
1534     checker => sub {
1535     my ($self, $todo) = @_;
1536     my $el = $todo->{node};
1537     my $new_todos = [];
1538     my @nodes = (@{$el->child_nodes});
1539    
1540     my $phase = 'before dt';
1541     while (@nodes) {
1542     my $node = shift @nodes;
1543     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1544    
1545     my $nt = $node->node_type;
1546     if ($nt == 1) {
1547     my $node_ns = $node->namespace_uri;
1548     $node_ns = '' unless defined $node_ns;
1549     my $node_ln = $node->manakai_local_name;
1550     ## NOTE: |minuses| list is not checked since redundant
1551 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1552     #
1553     } elsif ($phase eq 'before dt') {
1554 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1555     $phase = 'before dd';
1556     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1557     $self->{onerror}
1558     ->(node => $node, type => 'ps element missing:dt');
1559     $phase = 'before dt';
1560     } else {
1561     $self->{onerror}->(node => $node, type => 'element not allowed');
1562     }
1563     } else { # before dd
1564     if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1565     $phase = 'before dt';
1566     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1567     $self->{onerror}
1568     ->(node => $node, type => 'ps element missing:dd');
1569     $phase = 'before dd';
1570     } else {
1571     $self->{onerror}->(node => $node, type => 'element not allowed');
1572     }
1573     }
1574     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1575     unshift @nodes, @$sib;
1576     push @$new_todos, @$ch;
1577     } elsif ($nt == 3 or $nt == 4) {
1578     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1579     $self->{onerror}->(node => $node, type => 'character not allowed');
1580     }
1581     } elsif ($nt == 5) {
1582     unshift @nodes, @{$node->child_nodes};
1583     }
1584     }
1585     if ($phase eq 'before dd') {
1586 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
1587 wakaba 1.1 }
1588     return ($new_todos);
1589     },
1590     };
1591    
1592     $Element->{$HTML_NS}->{pre} = {
1593     attrs_checker => $GetHTMLAttrsChecker->({}),
1594     checker => $HTMLStrictlyInlineChecker,
1595     };
1596    
1597     $Element->{$HTML_NS}->{ol} = {
1598     attrs_checker => $GetHTMLAttrsChecker->({
1599     start => $HTMLIntegerAttrChecker,
1600     }),
1601     checker => sub {
1602     my ($self, $todo) = @_;
1603     my $el = $todo->{node};
1604     my $new_todos = [];
1605     my @nodes = (@{$el->child_nodes});
1606    
1607     while (@nodes) {
1608     my $node = shift @nodes;
1609     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1610    
1611     my $nt = $node->node_type;
1612     if ($nt == 1) {
1613     my $node_ns = $node->namespace_uri;
1614     $node_ns = '' unless defined $node_ns;
1615     my $node_ln = $node->manakai_local_name;
1616     ## NOTE: |minuses| list is not checked since redundant
1617 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1618     #
1619     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'li')) {
1620 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
1621     }
1622     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1623     unshift @nodes, @$sib;
1624     push @$new_todos, @$ch;
1625     } elsif ($nt == 3 or $nt == 4) {
1626     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1627     $self->{onerror}->(node => $node, type => 'character not allowed');
1628     }
1629     } elsif ($nt == 5) {
1630     unshift @nodes, @{$node->child_nodes};
1631     }
1632     }
1633    
1634     if ($todo->{inline}) {
1635     for (@$new_todos) {
1636     $_->{inline} = 1;
1637     }
1638     }
1639     return ($new_todos);
1640     },
1641     };
1642    
1643     $Element->{$HTML_NS}->{ul} = {
1644     attrs_checker => $GetHTMLAttrsChecker->({}),
1645     checker => $Element->{$HTML_NS}->{ol}->{checker},
1646     };
1647    
1648     $Element->{$HTML_NS}->{li} = {
1649     attrs_checker => $GetHTMLAttrsChecker->({
1650     start => sub {
1651     my ($self, $attr) = @_;
1652     my $parent = $attr->owner_element->manakai_parent_element;
1653     if (defined $parent) {
1654     my $parent_ns = $parent->namespace_uri;
1655     $parent_ns = '' unless defined $parent_ns;
1656     my $parent_ln = $parent->manakai_local_name;
1657     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1658     $self->{onerror}->(node => $attr, level => 'unsupported',
1659     type => 'attribute');
1660     }
1661     }
1662     $HTMLIntegerAttrChecker->($self, $attr);
1663     },
1664     }),
1665     checker => sub {
1666     my ($self, $todo) = @_;
1667     if ($todo->{inline}) {
1668     return $HTMLInlineChecker->($self, $todo);
1669     } else {
1670     return $HTMLBlockOrInlineChecker->($self, $todo);
1671     }
1672     },
1673     };
1674    
1675     $Element->{$HTML_NS}->{dl} = {
1676     attrs_checker => $GetHTMLAttrsChecker->({}),
1677     checker => sub {
1678     my ($self, $todo) = @_;
1679     my $el = $todo->{node};
1680     my $new_todos = [];
1681     my @nodes = (@{$el->child_nodes});
1682    
1683     my $phase = 'before dt';
1684     while (@nodes) {
1685     my $node = shift @nodes;
1686     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1687    
1688     my $nt = $node->node_type;
1689     if ($nt == 1) {
1690     my $node_ns = $node->namespace_uri;
1691     $node_ns = '' unless defined $node_ns;
1692     my $node_ln = $node->manakai_local_name;
1693     ## NOTE: |minuses| list is not checked since redundant
1694 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1695     #
1696     } elsif ($phase eq 'in dds') {
1697 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1698     #$phase = 'in dds';
1699     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1700     $phase = 'in dts';
1701     } else {
1702     $self->{onerror}->(node => $node, type => 'element not allowed');
1703     }
1704     } elsif ($phase eq 'in dts') {
1705     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1706     #$phase = 'in dts';
1707     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1708     $phase = 'in dds';
1709     } else {
1710     $self->{onerror}->(node => $node, type => 'element not allowed');
1711     }
1712     } else { # before dt
1713     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1714     $phase = 'in dts';
1715     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1716     $self->{onerror}
1717     ->(node => $node, type => 'ps element missing:dt');
1718     $phase = 'in dds';
1719     } else {
1720     $self->{onerror}->(node => $node, type => 'element not allowed');
1721     }
1722     }
1723     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1724     unshift @nodes, @$sib;
1725     push @$new_todos, @$ch;
1726     } elsif ($nt == 3 or $nt == 4) {
1727     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1728     $self->{onerror}->(node => $node, type => 'character not allowed');
1729     }
1730     } elsif ($nt == 5) {
1731     unshift @nodes, @{$node->child_nodes};
1732     }
1733     }
1734     if ($phase eq 'in dts') {
1735 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
1736 wakaba 1.1 }
1737    
1738     if ($todo->{inline}) {
1739     for (@$new_todos) {
1740     $_->{inline} = 1;
1741     }
1742     }
1743     return ($new_todos);
1744     },
1745     };
1746    
1747     $Element->{$HTML_NS}->{dt} = {
1748     attrs_checker => $GetHTMLAttrsChecker->({}),
1749     checker => $HTMLStrictlyInlineChecker,
1750     };
1751    
1752     $Element->{$HTML_NS}->{dd} = {
1753     attrs_checker => $GetHTMLAttrsChecker->({}),
1754     checker => $Element->{$HTML_NS}->{li}->{checker},
1755     };
1756    
1757     $Element->{$HTML_NS}->{a} = {
1758     attrs_checker => sub {
1759     my ($self, $todo) = @_;
1760     my %attr;
1761     for my $attr (@{$todo->{node}->attributes}) {
1762     my $attr_ns = $attr->namespace_uri;
1763     $attr_ns = '' unless defined $attr_ns;
1764     my $attr_ln = $attr->manakai_local_name;
1765     my $checker;
1766     if ($attr_ns eq '') {
1767     $checker = {
1768     target => $HTMLTargetAttrChecker,
1769     href => $HTMLURIAttrChecker,
1770     ping => $HTMLSpaceURIsAttrChecker,
1771 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
1772 wakaba 1.1 media => $HTMLMQAttrChecker,
1773     hreflang => $HTMLLanguageTagAttrChecker,
1774     type => $HTMLIMTAttrChecker,
1775     }->{$attr_ln};
1776     if ($checker) {
1777     $attr{$attr_ln} = $attr;
1778     } else {
1779     $checker = $HTMLAttrChecker->{$attr_ln};
1780     }
1781     }
1782     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1783     || $AttrChecker->{$attr_ns}->{''};
1784     if ($checker) {
1785     $checker->($self, $attr) if ref $checker;
1786     } else {
1787     $self->{onerror}->(node => $attr, level => 'unsupported',
1788     type => 'attribute');
1789     ## ISSUE: No comformance createria for unknown attributes in the spec
1790     }
1791     }
1792    
1793 wakaba 1.4 if (defined $attr{href}) {
1794     $self->{has_hyperlink_element} = 1;
1795     } else {
1796 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
1797     if (defined $attr{$_}) {
1798     $self->{onerror}->(node => $attr{$_},
1799     type => 'attribute not allowed');
1800     }
1801     }
1802     }
1803     },
1804     checker => sub {
1805     my ($self, $todo) = @_;
1806    
1807     my $end = $self->_add_minuses ($HTMLInteractiveElements);
1808     my ($new_todos, $ch)
1809 wakaba 1.13 = $HTMLInlineOrStrictlyInlineChecker->($self, $todo);
1810 wakaba 1.1 push @$new_todos, $end;
1811    
1812 wakaba 1.15 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
1813     $_->{flag}->{in_a_href} = 1 for @$new_todos;
1814     }
1815 wakaba 1.1
1816     return ($new_todos, $ch);
1817     },
1818     };
1819    
1820     $Element->{$HTML_NS}->{q} = {
1821     attrs_checker => $GetHTMLAttrsChecker->({
1822     cite => $HTMLURIAttrChecker,
1823     }),
1824     checker => $HTMLInlineOrStrictlyInlineChecker,
1825     };
1826    
1827     $Element->{$HTML_NS}->{cite} = {
1828     attrs_checker => $GetHTMLAttrsChecker->({}),
1829     checker => $HTMLStrictlyInlineChecker,
1830     };
1831    
1832     $Element->{$HTML_NS}->{em} = {
1833     attrs_checker => $GetHTMLAttrsChecker->({}),
1834     checker => $HTMLInlineOrStrictlyInlineChecker,
1835     };
1836    
1837     $Element->{$HTML_NS}->{strong} = {
1838     attrs_checker => $GetHTMLAttrsChecker->({}),
1839     checker => $HTMLInlineOrStrictlyInlineChecker,
1840     };
1841    
1842     $Element->{$HTML_NS}->{small} = {
1843     attrs_checker => $GetHTMLAttrsChecker->({}),
1844     checker => $HTMLInlineOrStrictlyInlineChecker,
1845     };
1846    
1847     $Element->{$HTML_NS}->{m} = {
1848     attrs_checker => $GetHTMLAttrsChecker->({}),
1849     checker => $HTMLInlineOrStrictlyInlineChecker,
1850     };
1851    
1852     $Element->{$HTML_NS}->{dfn} = {
1853     attrs_checker => $GetHTMLAttrsChecker->({}),
1854     checker => sub {
1855     my ($self, $todo) = @_;
1856    
1857     my $end = $self->_add_minuses ({$HTML_NS => {dfn => 1}});
1858     my ($sib, $ch) = $HTMLStrictlyInlineChecker->($self, $todo);
1859     push @$sib, $end;
1860    
1861     my $node = $todo->{node};
1862     my $term = $node->get_attribute_ns (undef, 'title');
1863     unless (defined $term) {
1864     for my $child (@{$node->child_nodes}) {
1865     if ($child->node_type == 1) { # ELEMENT_NODE
1866     if (defined $term) {
1867     undef $term;
1868     last;
1869     } elsif ($child->manakai_local_name eq 'abbr') {
1870     my $nsuri = $child->namespace_uri;
1871     if (defined $nsuri and $nsuri eq $HTML_NS) {
1872     my $attr = $child->get_attribute_node_ns (undef, 'title');
1873     if ($attr) {
1874     $term = $attr->value;
1875     }
1876     }
1877     }
1878     } elsif ($child->node_type == 3 or $child->node_type == 4) {
1879     ## TEXT_NODE or CDATA_SECTION_NODE
1880     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
1881     next;
1882     }
1883     undef $term;
1884     last;
1885     }
1886     }
1887     unless (defined $term) {
1888     $term = $node->text_content;
1889     }
1890     }
1891     if ($self->{term}->{$term}) {
1892     $self->{onerror}->(node => $node, type => 'duplicate term');
1893     push @{$self->{term}->{$term}}, $node;
1894     } else {
1895     $self->{term}->{$term} = [$node];
1896     }
1897     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
1898     ## has |title|.
1899    
1900     return ($sib, $ch);
1901     },
1902     };
1903    
1904     $Element->{$HTML_NS}->{abbr} = {
1905     attrs_checker => $GetHTMLAttrsChecker->({
1906     ## NOTE: |title| has special semantics for |abbr|s, but is syntactically
1907     ## not different. The spec says that the |title| MAY be omitted
1908     ## if there is a |dfn| whose defining term is the abbreviation,
1909     ## but it does not prohibit |abbr| w/o |title| in other cases.
1910     }),
1911     checker => $HTMLStrictlyInlineChecker,
1912     };
1913    
1914     $Element->{$HTML_NS}->{time} = {
1915     attrs_checker => $GetHTMLAttrsChecker->({
1916     datetime => sub { 1 }, # checked in |checker|
1917     }),
1918     ## TODO: Write tests
1919     checker => sub {
1920     my ($self, $todo) = @_;
1921    
1922     my $attr = $todo->{node}->get_attribute_node_ns (undef, 'datetime');
1923     my $input;
1924     my $reg_sp;
1925     my $input_node;
1926     if ($attr) {
1927     $input = $attr->value;
1928     $reg_sp = qr/[\x09-\x0D\x20]*/;
1929     $input_node = $attr;
1930     } else {
1931     $input = $todo->{node}->text_content;
1932     $reg_sp = qr/\p{Zs}*/;
1933     $input_node = $todo->{node};
1934    
1935     ## ISSUE: What is the definition for "successfully extracts a date
1936     ## or time"? If the algorithm says the string is invalid but
1937     ## return some date or time, is it "successfully"?
1938     }
1939    
1940     my $hour;
1941     my $minute;
1942     my $second;
1943     if ($input =~ /
1944     \A
1945     [\x09-\x0D\x20]*
1946     ([0-9]+) # 1
1947     (?>
1948     -([0-9]+) # 2
1949     -([0-9]+) # 3
1950     [\x09-\x0D\x20]*
1951     (?>
1952     T
1953     [\x09-\x0D\x20]*
1954     )?
1955     ([0-9]+) # 4
1956     :([0-9]+) # 5
1957     (?>
1958     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
1959     )?
1960     [\x09-\x0D\x20]*
1961     (?>
1962     Z
1963     [\x09-\x0D\x20]*
1964     |
1965     [+-]([0-9]+):([0-9]+) # 7, 8
1966     [\x09-\x0D\x20]*
1967     )?
1968     \z
1969     |
1970     :([0-9]+) # 9
1971     (?>
1972     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
1973     )?
1974     [\x09-\x0D\x20]*\z
1975     )
1976     /x) {
1977     if (defined $2) { ## YYYY-MM-DD T? hh:mm
1978     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
1979     length $4 != 2 or length $5 != 2) {
1980     $self->{onerror}->(node => $input_node,
1981     type => 'dateortime:syntax error');
1982     }
1983    
1984     if (1 <= $2 and $2 <= 12) {
1985     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1986     if $3 < 1 or
1987     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
1988     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1989     if $2 == 2 and $3 == 29 and
1990     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
1991     } else {
1992     $self->{onerror}->(node => $input_node,
1993     type => 'datetime:bad month');
1994     }
1995    
1996     ($hour, $minute, $second) = ($4, $5, $6);
1997    
1998     if (defined $7) { ## [+-]hh:mm
1999     if (length $7 != 2 or length $8 != 2) {
2000     $self->{onerror}->(node => $input_node,
2001     type => 'dateortime:syntax error');
2002     }
2003    
2004     $self->{onerror}->(node => $input_node,
2005     type => 'datetime:bad timezone hour')
2006     if $7 > 23;
2007     $self->{onerror}->(node => $input_node,
2008     type => 'datetime:bad timezone minute')
2009     if $8 > 59;
2010     }
2011     } else { ## hh:mm
2012     if (length $1 != 2 or length $9 != 2) {
2013     $self->{onerror}->(node => $input_node,
2014     type => qq'dateortime:syntax error');
2015     }
2016    
2017     ($hour, $minute, $second) = ($1, $9, $10);
2018     }
2019    
2020     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
2021     if $hour > 23;
2022     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
2023     if $minute > 59;
2024    
2025     if (defined $second) { ## s
2026     ## NOTE: Integer part of second don't have to have length of two.
2027    
2028     if (substr ($second, 0, 1) eq '.') {
2029     $self->{onerror}->(node => $input_node,
2030     type => 'dateortime:syntax error');
2031     }
2032    
2033     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
2034     if $second >= 60;
2035     }
2036     } else {
2037     $self->{onerror}->(node => $input_node,
2038     type => 'dateortime:syntax error');
2039     }
2040    
2041     return $HTMLStrictlyInlineChecker->($self, $todo);
2042     },
2043     };
2044    
2045     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
2046     attrs_checker => $GetHTMLAttrsChecker->({
2047     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2048     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2049     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2050     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2051     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2052     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2053     }),
2054     checker => $HTMLStrictlyInlineChecker,
2055     };
2056    
2057     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
2058     attrs_checker => $GetHTMLAttrsChecker->({
2059     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
2060     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
2061     }),
2062     checker => $HTMLStrictlyInlineChecker,
2063     };
2064    
2065     $Element->{$HTML_NS}->{code} = {
2066     attrs_checker => $GetHTMLAttrsChecker->({}),
2067     ## NOTE: Though |title| has special semantics,
2068     ## syntatically same as the |title| as global attribute.
2069     checker => $HTMLInlineOrStrictlyInlineChecker,
2070     };
2071    
2072     $Element->{$HTML_NS}->{var} = {
2073     attrs_checker => $GetHTMLAttrsChecker->({}),
2074     ## NOTE: Though |title| has special semantics,
2075     ## syntatically same as the |title| as global attribute.
2076     checker => $HTMLStrictlyInlineChecker,
2077     };
2078    
2079     $Element->{$HTML_NS}->{samp} = {
2080     attrs_checker => $GetHTMLAttrsChecker->({}),
2081     ## NOTE: Though |title| has special semantics,
2082     ## syntatically same as the |title| as global attribute.
2083     checker => $HTMLInlineOrStrictlyInlineChecker,
2084     };
2085    
2086     $Element->{$HTML_NS}->{kbd} = {
2087     attrs_checker => $GetHTMLAttrsChecker->({}),
2088     checker => $HTMLStrictlyInlineChecker,
2089     };
2090    
2091     $Element->{$HTML_NS}->{sub} = {
2092     attrs_checker => $GetHTMLAttrsChecker->({}),
2093     checker => $HTMLStrictlyInlineChecker,
2094     };
2095    
2096     $Element->{$HTML_NS}->{sup} = {
2097     attrs_checker => $GetHTMLAttrsChecker->({}),
2098     checker => $HTMLStrictlyInlineChecker,
2099     };
2100    
2101     $Element->{$HTML_NS}->{span} = {
2102     attrs_checker => $GetHTMLAttrsChecker->({}),
2103     ## NOTE: Though |title| has special semantics,
2104     ## syntatically same as the |title| as global attribute.
2105     checker => $HTMLInlineOrStrictlyInlineChecker,
2106     };
2107    
2108     $Element->{$HTML_NS}->{i} = {
2109     attrs_checker => $GetHTMLAttrsChecker->({}),
2110     ## NOTE: Though |title| has special semantics,
2111     ## syntatically same as the |title| as global attribute.
2112     checker => $HTMLStrictlyInlineChecker,
2113     };
2114    
2115     $Element->{$HTML_NS}->{b} = {
2116     attrs_checker => $GetHTMLAttrsChecker->({}),
2117     checker => $HTMLStrictlyInlineChecker,
2118     };
2119    
2120     $Element->{$HTML_NS}->{bdo} = {
2121     attrs_checker => sub {
2122     my ($self, $todo) = @_;
2123     $GetHTMLAttrsChecker->({})->($self, $todo);
2124     unless ($todo->{node}->has_attribute_ns (undef, 'dir')) {
2125     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:dir');
2126     }
2127     },
2128     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
2129     checker => $HTMLStrictlyInlineChecker,
2130     };
2131    
2132     $Element->{$HTML_NS}->{ins} = {
2133     attrs_checker => $GetHTMLAttrsChecker->({
2134     cite => $HTMLURIAttrChecker,
2135     datetime => $HTMLDatetimeAttrChecker,
2136     }),
2137     checker => $HTMLTransparentChecker,
2138     };
2139    
2140     $Element->{$HTML_NS}->{del} = {
2141     attrs_checker => $GetHTMLAttrsChecker->({
2142     cite => $HTMLURIAttrChecker,
2143     datetime => $HTMLDatetimeAttrChecker,
2144     }),
2145     checker => sub {
2146     my ($self, $todo) = @_;
2147    
2148     my $parent = $todo->{node}->manakai_parent_element;
2149     if (defined $parent) {
2150     my $nsuri = $parent->namespace_uri;
2151     $nsuri = '' unless defined $nsuri;
2152     my $ln = $parent->manakai_local_name;
2153     my $eldef = $Element->{$nsuri}->{$ln} ||
2154     $Element->{$nsuri}->{''} ||
2155     $ElementDefault;
2156     return $eldef->{checker}->($self, $todo);
2157     } else {
2158     return $HTMLBlockOrInlineChecker->($self, $todo);
2159     }
2160     },
2161     };
2162    
2163     ## TODO: figure
2164 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
2165 wakaba 1.1
2166 wakaba 1.4 ## TODO: |alt|
2167 wakaba 1.1 $Element->{$HTML_NS}->{img} = {
2168     attrs_checker => sub {
2169     my ($self, $todo) = @_;
2170     $GetHTMLAttrsChecker->({
2171     alt => sub { }, ## NOTE: No syntactical requirement
2172     src => $HTMLURIAttrChecker,
2173     usemap => $HTMLUsemapAttrChecker,
2174     ismap => sub {
2175     my ($self, $attr, $parent_todo) = @_;
2176 wakaba 1.15 if (not $todo->{flag}->{in_a_href}) {
2177     $self->{onerror}->(node => $attr,
2178     type => 'attribute not allowed:ismap');
2179 wakaba 1.1 }
2180     $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_todo);
2181     },
2182     ## TODO: height
2183     ## TODO: width
2184     })->($self, $todo);
2185     unless ($todo->{node}->has_attribute_ns (undef, 'alt')) {
2186     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:alt');
2187     }
2188     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2189     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:src');
2190     }
2191     },
2192     checker => $HTMLEmptyChecker,
2193     };
2194    
2195     $Element->{$HTML_NS}->{iframe} = {
2196     attrs_checker => $GetHTMLAttrsChecker->({
2197     src => $HTMLURIAttrChecker,
2198     }),
2199     checker => $HTMLTextChecker,
2200     };
2201    
2202     $Element->{$HTML_NS}->{embed} = {
2203     attrs_checker => sub {
2204     my ($self, $todo) = @_;
2205     my $has_src;
2206     for my $attr (@{$todo->{node}->attributes}) {
2207     my $attr_ns = $attr->namespace_uri;
2208     $attr_ns = '' unless defined $attr_ns;
2209     my $attr_ln = $attr->manakai_local_name;
2210     my $checker;
2211     if ($attr_ns eq '') {
2212     if ($attr_ln eq 'src') {
2213     $checker = $HTMLURIAttrChecker;
2214     $has_src = 1;
2215     } elsif ($attr_ln eq 'type') {
2216     $checker = $HTMLIMTAttrChecker;
2217     } else {
2218     ## TODO: height
2219     ## TODO: width
2220     $checker = $HTMLAttrChecker->{$attr_ln}
2221     || sub { }; ## NOTE: Any local attribute is ok.
2222     }
2223     }
2224     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2225     || $AttrChecker->{$attr_ns}->{''};
2226     if ($checker) {
2227     $checker->($self, $attr);
2228     } else {
2229     $self->{onerror}->(node => $attr, level => 'unsupported',
2230     type => 'attribute');
2231     ## ISSUE: No comformance createria for global attributes in the spec
2232     }
2233     }
2234    
2235     unless ($has_src) {
2236     $self->{onerror}->(node => $todo->{node},
2237     type => 'attribute missing:src');
2238     }
2239     },
2240     checker => $HTMLEmptyChecker,
2241     };
2242    
2243     $Element->{$HTML_NS}->{object} = {
2244     attrs_checker => sub {
2245     my ($self, $todo) = @_;
2246     $GetHTMLAttrsChecker->({
2247     data => $HTMLURIAttrChecker,
2248     type => $HTMLIMTAttrChecker,
2249     usemap => $HTMLUsemapAttrChecker,
2250     ## TODO: width
2251     ## TODO: height
2252     })->($self, $todo);
2253     unless ($todo->{node}->has_attribute_ns (undef, 'data')) {
2254     unless ($todo->{node}->has_attribute_ns (undef, 'type')) {
2255     $self->{onerror}->(node => $todo->{node},
2256     type => 'attribute missing:data|type');
2257     }
2258     }
2259     },
2260     checker => $ElementDefault->{checker}, ## TODO
2261 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
2262 wakaba 1.1 };
2263    
2264     $Element->{$HTML_NS}->{param} = {
2265     attrs_checker => sub {
2266     my ($self, $todo) = @_;
2267     $GetHTMLAttrsChecker->({
2268     name => sub { },
2269     value => sub { },
2270     })->($self, $todo);
2271     unless ($todo->{node}->has_attribute_ns (undef, 'name')) {
2272     $self->{onerror}->(node => $todo->{node},
2273     type => 'attribute missing:name');
2274     }
2275     unless ($todo->{node}->has_attribute_ns (undef, 'value')) {
2276     $self->{onerror}->(node => $todo->{node},
2277     type => 'attribute missing:value');
2278     }
2279     },
2280     checker => $HTMLEmptyChecker,
2281     };
2282    
2283     $Element->{$HTML_NS}->{video} = {
2284     attrs_checker => $GetHTMLAttrsChecker->({
2285     src => $HTMLURIAttrChecker,
2286     ## TODO: start, loopstart, loopend, end
2287     ## ISSUE: they MUST be "value time offset"s. Value?
2288 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
2289 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2290     controls => $GetHTMLBooleanAttrChecker->('controls'),
2291 wakaba 1.11 poster => $HTMLURIAttrChecker, ## TODO: not for audio!
2292     ## TODO: width, height (not for audio!)
2293 wakaba 1.1 }),
2294     checker => sub {
2295     my ($self, $todo) = @_;
2296    
2297     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2298     return $HTMLBlockOrInlineChecker->($self, $todo);
2299     } else {
2300     return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
2301     ->($self, $todo);
2302     }
2303     },
2304     };
2305    
2306     $Element->{$HTML_NS}->{audio} = {
2307     attrs_checker => $Element->{$HTML_NS}->{video}->{attrs_checker},
2308     checker => $Element->{$HTML_NS}->{video}->{checker},
2309     };
2310    
2311     $Element->{$HTML_NS}->{source} = {
2312     attrs_checker => sub {
2313     my ($self, $todo) = @_;
2314     $GetHTMLAttrsChecker->({
2315     src => $HTMLURIAttrChecker,
2316     type => $HTMLIMTAttrChecker,
2317     media => $HTMLMQAttrChecker,
2318     })->($self, $todo);
2319     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2320     $self->{onerror}->(node => $todo->{node},
2321     type => 'attribute missing:src');
2322     }
2323     },
2324     checker => $HTMLEmptyChecker,
2325     };
2326    
2327     $Element->{$HTML_NS}->{canvas} = {
2328     attrs_checker => $GetHTMLAttrsChecker->({
2329     height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2330     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2331     }),
2332     checker => $HTMLInlineChecker,
2333     };
2334    
2335     $Element->{$HTML_NS}->{map} = {
2336 wakaba 1.4 attrs_checker => sub {
2337     my ($self, $todo) = @_;
2338     my $has_id;
2339     $GetHTMLAttrsChecker->({
2340     id => sub {
2341     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2342     my ($self, $attr) = @_;
2343     my $value = $attr->value;
2344     if (length $value > 0) {
2345     if ($self->{id}->{$value}) {
2346     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2347     push @{$self->{id}->{$value}}, $attr;
2348     } else {
2349     $self->{id}->{$value} = [$attr];
2350     }
2351 wakaba 1.1 } else {
2352 wakaba 1.4 ## NOTE: MUST contain at least one character
2353     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2354 wakaba 1.1 }
2355 wakaba 1.4 if ($value =~ /[\x09-\x0D\x20]/) {
2356     $self->{onerror}->(node => $attr, type => 'space in ID');
2357     }
2358     $self->{map}->{$value} ||= $attr;
2359     $has_id = 1;
2360     },
2361     })->($self, $todo);
2362     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:id')
2363     unless $has_id;
2364     },
2365 wakaba 1.1 checker => $HTMLBlockChecker,
2366     };
2367    
2368     $Element->{$HTML_NS}->{area} = {
2369     attrs_checker => sub {
2370     my ($self, $todo) = @_;
2371     my %attr;
2372     my $coords;
2373     for my $attr (@{$todo->{node}->attributes}) {
2374     my $attr_ns = $attr->namespace_uri;
2375     $attr_ns = '' unless defined $attr_ns;
2376     my $attr_ln = $attr->manakai_local_name;
2377     my $checker;
2378     if ($attr_ns eq '') {
2379     $checker = {
2380     alt => sub { },
2381     ## NOTE: |alt| value has no conformance creteria.
2382     shape => $GetHTMLEnumeratedAttrChecker->({
2383     circ => -1, circle => 1,
2384     default => 1,
2385     poly => 1, polygon => -1,
2386     rect => 1, rectangle => -1,
2387     }),
2388     coords => sub {
2389     my ($self, $attr) = @_;
2390     my $value = $attr->value;
2391     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
2392     $coords = [split /,/, $value];
2393     } else {
2394     $self->{onerror}->(node => $attr,
2395     type => 'coords:syntax error');
2396     }
2397     },
2398     target => $HTMLTargetAttrChecker,
2399     href => $HTMLURIAttrChecker,
2400     ping => $HTMLSpaceURIsAttrChecker,
2401 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
2402 wakaba 1.1 media => $HTMLMQAttrChecker,
2403     hreflang => $HTMLLanguageTagAttrChecker,
2404     type => $HTMLIMTAttrChecker,
2405     }->{$attr_ln};
2406     if ($checker) {
2407     $attr{$attr_ln} = $attr;
2408     } else {
2409     $checker = $HTMLAttrChecker->{$attr_ln};
2410     }
2411     }
2412     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2413     || $AttrChecker->{$attr_ns}->{''};
2414     if ($checker) {
2415     $checker->($self, $attr) if ref $checker;
2416     } else {
2417     $self->{onerror}->(node => $attr, level => 'unsupported',
2418     type => 'attribute');
2419     ## ISSUE: No comformance createria for unknown attributes in the spec
2420     }
2421     }
2422    
2423     if (defined $attr{href}) {
2424 wakaba 1.4 $self->{has_hyperlink_element} = 1;
2425 wakaba 1.1 unless (defined $attr{alt}) {
2426     $self->{onerror}->(node => $todo->{node},
2427     type => 'attribute missing:alt');
2428     }
2429     } else {
2430     for (qw/target ping rel media hreflang type alt/) {
2431     if (defined $attr{$_}) {
2432     $self->{onerror}->(node => $attr{$_},
2433     type => 'attribute not allowed');
2434     }
2435     }
2436     }
2437    
2438     my $shape = 'rectangle';
2439     if (defined $attr{shape}) {
2440     $shape = {
2441     circ => 'circle', circle => 'circle',
2442     default => 'default',
2443     poly => 'polygon', polygon => 'polygon',
2444     rect => 'rectangle', rectangle => 'rectangle',
2445     }->{lc $attr{shape}->value} || 'rectangle';
2446     ## TODO: ASCII lowercase?
2447     }
2448    
2449     if ($shape eq 'circle') {
2450     if (defined $attr{coords}) {
2451     if (defined $coords) {
2452     if (@$coords == 3) {
2453     if ($coords->[2] < 0) {
2454     $self->{onerror}->(node => $attr{coords},
2455     type => 'coords:out of range:2');
2456     }
2457     } else {
2458     $self->{onerror}->(node => $attr{coords},
2459     type => 'coords:number:3:'.@$coords);
2460     }
2461     } else {
2462     ## NOTE: A syntax error has been reported.
2463     }
2464     } else {
2465     $self->{onerror}->(node => $todo->{node},
2466     type => 'attribute missing:coords');
2467     }
2468     } elsif ($shape eq 'default') {
2469     if (defined $attr{coords}) {
2470     $self->{onerror}->(node => $attr{coords},
2471     type => 'attribute not allowed');
2472     }
2473     } elsif ($shape eq 'polygon') {
2474     if (defined $attr{coords}) {
2475     if (defined $coords) {
2476     if (@$coords >= 6) {
2477     unless (@$coords % 2 == 0) {
2478     $self->{onerror}->(node => $attr{coords},
2479     type => 'coords:number:even:'.@$coords);
2480     }
2481     } else {
2482     $self->{onerror}->(node => $attr{coords},
2483     type => 'coords:number:>=6:'.@$coords);
2484     }
2485     } else {
2486     ## NOTE: A syntax error has been reported.
2487     }
2488     } else {
2489     $self->{onerror}->(node => $todo->{node},
2490     type => 'attribute missing:coords');
2491     }
2492     } elsif ($shape eq 'rectangle') {
2493     if (defined $attr{coords}) {
2494     if (defined $coords) {
2495     if (@$coords == 4) {
2496     unless ($coords->[0] < $coords->[2]) {
2497     $self->{onerror}->(node => $attr{coords},
2498     type => 'coords:out of range:0');
2499     }
2500     unless ($coords->[1] < $coords->[3]) {
2501     $self->{onerror}->(node => $attr{coords},
2502     type => 'coords:out of range:1');
2503     }
2504     } else {
2505     $self->{onerror}->(node => $attr{coords},
2506     type => 'coords:number:4:'.@$coords);
2507     }
2508     } else {
2509     ## NOTE: A syntax error has been reported.
2510     }
2511     } else {
2512     $self->{onerror}->(node => $todo->{node},
2513     type => 'attribute missing:coords');
2514     }
2515     }
2516     },
2517     checker => $HTMLEmptyChecker,
2518     };
2519     ## TODO: only in map
2520    
2521     $Element->{$HTML_NS}->{table} = {
2522     attrs_checker => $GetHTMLAttrsChecker->({}),
2523     checker => sub {
2524     my ($self, $todo) = @_;
2525     my $el = $todo->{node};
2526     my $new_todos = [];
2527     my @nodes = (@{$el->child_nodes});
2528    
2529     my $phase = 'before caption';
2530     my $has_tfoot;
2531     while (@nodes) {
2532     my $node = shift @nodes;
2533     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2534    
2535     my $nt = $node->node_type;
2536     if ($nt == 1) {
2537     my $node_ns = $node->namespace_uri;
2538     $node_ns = '' unless defined $node_ns;
2539     my $node_ln = $node->manakai_local_name;
2540     ## NOTE: |minuses| list is not checked since redundant
2541 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2542     #
2543     } elsif ($phase eq 'in tbodys') {
2544 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2545     #$phase = 'in tbodys';
2546     } elsif (not $has_tfoot and
2547     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2548     $phase = 'after tfoot';
2549     $has_tfoot = 1;
2550     } else {
2551     $self->{onerror}->(node => $node, type => 'element not allowed');
2552     }
2553     } elsif ($phase eq 'in trs') {
2554     if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2555     #$phase = 'in trs';
2556     } elsif (not $has_tfoot and
2557     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2558     $phase = 'after tfoot';
2559     $has_tfoot = 1;
2560     } else {
2561     $self->{onerror}->(node => $node, type => 'element not allowed');
2562     }
2563     } elsif ($phase eq 'after thead') {
2564     if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2565     $phase = 'in tbodys';
2566     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2567     $phase = 'in trs';
2568     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2569     $phase = 'in tbodys';
2570     $has_tfoot = 1;
2571     } else {
2572     $self->{onerror}->(node => $node, type => 'element not allowed');
2573     }
2574     } elsif ($phase eq 'in colgroup') {
2575     if ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2576     $phase = 'in colgroup';
2577     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2578     $phase = 'after thead';
2579     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2580     $phase = 'in tbodys';
2581     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2582     $phase = 'in trs';
2583     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2584     $phase = 'in tbodys';
2585     $has_tfoot = 1;
2586     } else {
2587     $self->{onerror}->(node => $node, type => 'element not allowed');
2588     }
2589     } elsif ($phase eq 'before caption') {
2590     if ($node_ns eq $HTML_NS and $node_ln eq 'caption') {
2591     $phase = 'in colgroup';
2592     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2593     $phase = 'in colgroup';
2594     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2595     $phase = 'after thead';
2596     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2597     $phase = 'in tbodys';
2598     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2599     $phase = 'in trs';
2600     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2601     $phase = 'in tbodys';
2602     $has_tfoot = 1;
2603     } else {
2604     $self->{onerror}->(node => $node, type => 'element not allowed');
2605     }
2606     } else { # after tfoot
2607     $self->{onerror}->(node => $node, type => 'element not allowed');
2608     }
2609     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2610     unshift @nodes, @$sib;
2611     push @$new_todos, @$ch;
2612     } elsif ($nt == 3 or $nt == 4) {
2613     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2614     $self->{onerror}->(node => $node, type => 'character not allowed');
2615     }
2616     } elsif ($nt == 5) {
2617     unshift @nodes, @{$node->child_nodes};
2618     }
2619     }
2620    
2621     ## Table model errors
2622     require Whatpm::HTMLTable;
2623     Whatpm::HTMLTable->form_table ($todo->{node}, sub {
2624     my %opt = @_;
2625     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
2626     });
2627     push @{$self->{return}->{table}}, $todo->{node};
2628    
2629     return ($new_todos);
2630     },
2631     };
2632    
2633     $Element->{$HTML_NS}->{caption} = {
2634     attrs_checker => $GetHTMLAttrsChecker->({}),
2635 wakaba 1.13 checker => $HTMLStrictlyInlineChecker,
2636 wakaba 1.1 };
2637    
2638     $Element->{$HTML_NS}->{colgroup} = {
2639     attrs_checker => $GetHTMLAttrsChecker->({
2640     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2641     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
2642     ## TODO: "attribute not supported" if |col|.
2643     ## ISSUE: MUST NOT if any |col|?
2644     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
2645     }),
2646     checker => sub {
2647     my ($self, $todo) = @_;
2648     my $el = $todo->{node};
2649     my $new_todos = [];
2650     my @nodes = (@{$el->child_nodes});
2651    
2652     while (@nodes) {
2653     my $node = shift @nodes;
2654     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2655    
2656     my $nt = $node->node_type;
2657     if ($nt == 1) {
2658     my $node_ns = $node->namespace_uri;
2659     $node_ns = '' unless defined $node_ns;
2660     my $node_ln = $node->manakai_local_name;
2661     ## NOTE: |minuses| list is not checked since redundant
2662 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2663     #
2664     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'col')) {
2665 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
2666     }
2667     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2668     unshift @nodes, @$sib;
2669     push @$new_todos, @$ch;
2670     } elsif ($nt == 3 or $nt == 4) {
2671     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2672     $self->{onerror}->(node => $node, type => 'character not allowed');
2673     }
2674     } elsif ($nt == 5) {
2675     unshift @nodes, @{$node->child_nodes};
2676     }
2677     }
2678     return ($new_todos);
2679     },
2680     };
2681    
2682     $Element->{$HTML_NS}->{col} = {
2683     attrs_checker => $GetHTMLAttrsChecker->({
2684     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2685     }),
2686     checker => $HTMLEmptyChecker,
2687     };
2688    
2689     $Element->{$HTML_NS}->{tbody} = {
2690     attrs_checker => $GetHTMLAttrsChecker->({}),
2691     checker => sub {
2692     my ($self, $todo) = @_;
2693     my $el = $todo->{node};
2694     my $new_todos = [];
2695     my @nodes = (@{$el->child_nodes});
2696    
2697     my $has_tr;
2698     while (@nodes) {
2699     my $node = shift @nodes;
2700     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2701    
2702     my $nt = $node->node_type;
2703     if ($nt == 1) {
2704     my $node_ns = $node->namespace_uri;
2705     $node_ns = '' unless defined $node_ns;
2706     my $node_ln = $node->manakai_local_name;
2707     ## NOTE: |minuses| list is not checked since redundant
2708 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2709     #
2710     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2711 wakaba 1.1 $has_tr = 1;
2712     } else {
2713     $self->{onerror}->(node => $node, type => 'element not allowed');
2714     }
2715     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2716     unshift @nodes, @$sib;
2717     push @$new_todos, @$ch;
2718     } elsif ($nt == 3 or $nt == 4) {
2719     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2720     $self->{onerror}->(node => $node, type => 'character not allowed');
2721     }
2722     } elsif ($nt == 5) {
2723     unshift @nodes, @{$node->child_nodes};
2724     }
2725     }
2726     unless ($has_tr) {
2727     $self->{onerror}->(node => $el, type => 'child element missing:tr');
2728     }
2729     return ($new_todos);
2730     },
2731     };
2732    
2733     $Element->{$HTML_NS}->{thead} = {
2734     attrs_checker => $GetHTMLAttrsChecker->({}),
2735     checker => $Element->{$HTML_NS}->{tbody}->{checker},
2736     };
2737    
2738     $Element->{$HTML_NS}->{tfoot} = {
2739     attrs_checker => $GetHTMLAttrsChecker->({}),
2740     checker => $Element->{$HTML_NS}->{tbody}->{checker},
2741     };
2742    
2743     $Element->{$HTML_NS}->{tr} = {
2744     attrs_checker => $GetHTMLAttrsChecker->({}),
2745     checker => sub {
2746     my ($self, $todo) = @_;
2747     my $el = $todo->{node};
2748     my $new_todos = [];
2749     my @nodes = (@{$el->child_nodes});
2750    
2751     my $has_td;
2752     while (@nodes) {
2753     my $node = shift @nodes;
2754     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2755    
2756     my $nt = $node->node_type;
2757     if ($nt == 1) {
2758     my $node_ns = $node->namespace_uri;
2759     $node_ns = '' unless defined $node_ns;
2760     my $node_ln = $node->manakai_local_name;
2761     ## NOTE: |minuses| list is not checked since redundant
2762 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2763     #
2764     } elsif ($node_ns eq $HTML_NS and
2765     ($node_ln eq 'td' or $node_ln eq 'th')) {
2766 wakaba 1.1 $has_td = 1;
2767     } else {
2768     $self->{onerror}->(node => $node, type => 'element not allowed');
2769     }
2770     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2771     unshift @nodes, @$sib;
2772     push @$new_todos, @$ch;
2773     } elsif ($nt == 3 or $nt == 4) {
2774     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2775     $self->{onerror}->(node => $node, type => 'character not allowed');
2776     }
2777     } elsif ($nt == 5) {
2778     unshift @nodes, @{$node->child_nodes};
2779     }
2780     }
2781     unless ($has_td) {
2782     $self->{onerror}->(node => $el, type => 'child element missing:td|th');
2783     }
2784     return ($new_todos);
2785     },
2786     };
2787    
2788     $Element->{$HTML_NS}->{td} = {
2789     attrs_checker => $GetHTMLAttrsChecker->({
2790     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2791     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2792     }),
2793     checker => $HTMLBlockOrInlineChecker,
2794     };
2795    
2796     $Element->{$HTML_NS}->{th} = {
2797     attrs_checker => $GetHTMLAttrsChecker->({
2798     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2799     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2800     scope => $GetHTMLEnumeratedAttrChecker
2801     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
2802     }),
2803     checker => $HTMLBlockOrInlineChecker,
2804     };
2805    
2806     ## TODO: forms
2807 wakaba 1.8 ## TODO: Tests for <nest/> in form elements
2808 wakaba 1.1
2809     $Element->{$HTML_NS}->{script} = {
2810 wakaba 1.9 attrs_checker => $GetHTMLAttrsChecker->({
2811 wakaba 1.1 src => $HTMLURIAttrChecker,
2812     defer => $GetHTMLBooleanAttrChecker->('defer'),
2813     async => $GetHTMLBooleanAttrChecker->('async'),
2814     type => $HTMLIMTAttrChecker,
2815 wakaba 1.9 }),
2816 wakaba 1.1 checker => sub {
2817     my ($self, $todo) = @_;
2818    
2819     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2820     return $HTMLEmptyChecker->($self, $todo);
2821     } else {
2822     ## NOTE: No content model conformance in HTML5 spec.
2823     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
2824     my $language = $todo->{node}->get_attribute_ns (undef, 'language');
2825     if ((defined $type and $type eq '') or
2826     (defined $language and $language eq '')) {
2827     $type = 'text/javascript';
2828     } elsif (defined $type) {
2829     #
2830     } elsif (defined $language) {
2831     $type = 'text/' . $language;
2832     } else {
2833     $type = 'text/javascript';
2834     }
2835     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
2836     type => 'script:'.$type); ## TODO: $type normalization
2837     return $AnyChecker->($self, $todo);
2838     }
2839     },
2840     };
2841    
2842     ## NOTE: When script is disabled.
2843     $Element->{$HTML_NS}->{noscript} = {
2844 wakaba 1.3 attrs_checker => sub {
2845     my ($self, $todo) = @_;
2846    
2847     ## NOTE: This check is inserted in |attrs_checker|, rather than |checker|,
2848     ## since the later is not invoked when the |noscript| is used as a
2849     ## transparent element.
2850     unless ($todo->{node}->owner_document->manakai_is_html) {
2851     $self->{onerror}->(node => $todo->{node}, type => 'in XML:noscript');
2852     }
2853    
2854     $GetHTMLAttrsChecker->({})->($self, $todo);
2855     },
2856 wakaba 1.1 checker => sub {
2857     my ($self, $todo) = @_;
2858    
2859 wakaba 1.3 if ($todo->{flag}->{in_head}) {
2860     my $new_todos = [];
2861     my @nodes = (@{$todo->{node}->child_nodes});
2862    
2863     while (@nodes) {
2864     my $node = shift @nodes;
2865     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2866    
2867     my $nt = $node->node_type;
2868     if ($nt == 1) {
2869     my $node_ns = $node->namespace_uri;
2870     $node_ns = '' unless defined $node_ns;
2871     my $node_ln = $node->manakai_local_name;
2872 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2873     #
2874     } elsif ($node_ns eq $HTML_NS) {
2875 wakaba 1.3 if ({link => 1, style => 1}->{$node_ln}) {
2876     #
2877     } elsif ($node_ln eq 'meta') {
2878 wakaba 1.5 if ($node->has_attribute_ns (undef, 'name')) {
2879     #
2880 wakaba 1.3 } else {
2881 wakaba 1.5 $self->{onerror}->(node => $node,
2882     type => 'element not allowed');
2883 wakaba 1.3 }
2884     } else {
2885     $self->{onerror}->(node => $node, type => 'element not allowed');
2886     }
2887     } else {
2888     $self->{onerror}->(node => $node, type => 'element not allowed');
2889     }
2890    
2891     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2892     unshift @nodes, @$sib;
2893     push @$new_todos, @$ch;
2894     } elsif ($nt == 3 or $nt == 4) {
2895     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2896     $self->{onerror}->(node => $node, type => 'character not allowed');
2897     }
2898     } elsif ($nt == 5) {
2899     unshift @nodes, @{$node->child_nodes};
2900     }
2901     }
2902     return ($new_todos);
2903     } else {
2904     my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}});
2905     my ($sib, $ch) = $HTMLBlockOrInlineChecker->($self, $todo);
2906     push @$sib, $end;
2907     return ($sib, $ch);
2908     }
2909 wakaba 1.1 },
2910     };
2911 wakaba 1.3
2912     ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
2913 wakaba 1.1
2914     $Element->{$HTML_NS}->{'event-source'} = {
2915     attrs_checker => $GetHTMLAttrsChecker->({
2916     src => $HTMLURIAttrChecker,
2917     }),
2918     checker => $HTMLEmptyChecker,
2919     };
2920    
2921     $Element->{$HTML_NS}->{details} = {
2922     attrs_checker => $GetHTMLAttrsChecker->({
2923     open => $GetHTMLBooleanAttrChecker->('open'),
2924     }),
2925     checker => sub {
2926     my ($self, $todo) = @_;
2927    
2928     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2929     my ($sib, $ch)
2930     = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
2931     ->($self, $todo);
2932     push @$sib, $end;
2933     return ($sib, $ch);
2934     },
2935     };
2936    
2937     $Element->{$HTML_NS}->{datagrid} = {
2938     attrs_checker => $GetHTMLAttrsChecker->({
2939     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2940     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
2941     }),
2942     checker => sub {
2943     my ($self, $todo) = @_;
2944     my $el = $todo->{node};
2945     my $new_todos = [];
2946     my @nodes = (@{$el->child_nodes});
2947    
2948     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2949    
2950     ## Block-table Block* | table | select | datalist | Empty
2951     my $mode = 'any';
2952     while (@nodes) {
2953     my $node = shift @nodes;
2954     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2955    
2956     my $nt = $node->node_type;
2957     if ($nt == 1) {
2958     my $node_ns = $node->namespace_uri;
2959     $node_ns = '' unless defined $node_ns;
2960     my $node_ln = $node->manakai_local_name;
2961     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
2962 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2963     #
2964     } elsif ($mode eq 'block') {
2965 wakaba 1.1 $not_allowed = 1
2966     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
2967     } elsif ($mode eq 'any') {
2968     if ($node_ns eq $HTML_NS and
2969     {table => 1, select => 1, datalist => 1}->{$node_ln}) {
2970     $mode = 'none';
2971     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
2972     $mode = 'block';
2973     } else {
2974     $not_allowed = 1;
2975     }
2976     } else {
2977     $not_allowed = 1;
2978     }
2979     $self->{onerror}->(node => $node, type => 'element not allowed')
2980     if $not_allowed;
2981     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2982     unshift @nodes, @$sib;
2983     push @$new_todos, @$ch;
2984     } elsif ($nt == 3 or $nt == 4) {
2985     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2986     $self->{onerror}->(node => $node, type => 'character not allowed');
2987     }
2988     } elsif ($nt == 5) {
2989     unshift @nodes, @{$node->child_nodes};
2990     }
2991     }
2992    
2993     push @$new_todos, $end;
2994     return ($new_todos);
2995     },
2996     };
2997    
2998     $Element->{$HTML_NS}->{command} = {
2999     attrs_checker => $GetHTMLAttrsChecker->({
3000     checked => $GetHTMLBooleanAttrChecker->('checked'),
3001     default => $GetHTMLBooleanAttrChecker->('default'),
3002     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3003     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
3004     icon => $HTMLURIAttrChecker,
3005     label => sub { }, ## NOTE: No conformance creteria
3006     radiogroup => sub { }, ## NOTE: No conformance creteria
3007     ## NOTE: |title| has special semantics, but no syntactical difference
3008     type => sub {
3009     my ($self, $attr) = @_;
3010     my $value = $attr->value;
3011     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
3012     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
3013     }
3014     },
3015     }),
3016     checker => $HTMLEmptyChecker,
3017     };
3018    
3019     $Element->{$HTML_NS}->{menu} = {
3020     attrs_checker => $GetHTMLAttrsChecker->({
3021     autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
3022     id => sub {
3023     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
3024     my ($self, $attr) = @_;
3025     my $value = $attr->value;
3026     if (length $value > 0) {
3027     if ($self->{id}->{$value}) {
3028     $self->{onerror}->(node => $attr, type => 'duplicate ID');
3029     push @{$self->{id}->{$value}}, $attr;
3030     } else {
3031     $self->{id}->{$value} = [$attr];
3032     }
3033     } else {
3034     ## NOTE: MUST contain at least one character
3035     $self->{onerror}->(node => $attr, type => 'empty attribute value');
3036     }
3037     if ($value =~ /[\x09-\x0D\x20]/) {
3038     $self->{onerror}->(node => $attr, type => 'space in ID');
3039     }
3040     $self->{menu}->{$value} ||= $attr;
3041     ## ISSUE: <menu id=""><p contextmenu=""> match?
3042     },
3043     label => sub { }, ## NOTE: No conformance creteria
3044     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
3045     }),
3046     checker => sub {
3047     my ($self, $todo) = @_;
3048     my $el = $todo->{node};
3049     my $new_todos = [];
3050     my @nodes = (@{$el->child_nodes});
3051    
3052     my $content = 'li or inline';
3053     while (@nodes) {
3054     my $node = shift @nodes;
3055     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3056    
3057     my $nt = $node->node_type;
3058     if ($nt == 1) {
3059     my $node_ns = $node->namespace_uri;
3060     $node_ns = '' unless defined $node_ns;
3061     my $node_ln = $node->manakai_local_name;
3062     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
3063 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3064     #
3065     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'li') {
3066 wakaba 1.1 if ($content eq 'inline') {
3067     $not_allowed = 1;
3068     } elsif ($content eq 'li or inline') {
3069     $content = 'li';
3070     }
3071     } else {
3072     if ($HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
3073     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln}) {
3074     $content = 'inline';
3075     } else {
3076     $not_allowed = 1;
3077     }
3078     }
3079     $self->{onerror}->(node => $node, type => 'element not allowed')
3080     if $not_allowed;
3081     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3082     unshift @nodes, @$sib;
3083     push @$new_todos, @$ch;
3084     } elsif ($nt == 3 or $nt == 4) {
3085     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3086     if ($content eq 'li') {
3087     $self->{onerror}->(node => $node, type => 'character not allowed');
3088     } elsif ($content eq 'li or inline') {
3089     $content = 'inline';
3090     }
3091     }
3092     } elsif ($nt == 5) {
3093     unshift @nodes, @{$node->child_nodes};
3094     }
3095     }
3096    
3097     for (@$new_todos) {
3098     $_->{inline} = 1;
3099     }
3100     return ($new_todos);
3101     },
3102 wakaba 1.8 };
3103    
3104     $Element->{$HTML_NS}->{datatemplate} = {
3105     attrs_checker => $GetHTMLAttrsChecker->({}),
3106     checker => sub {
3107     my ($self, $todo) = @_;
3108     my $el = $todo->{node};
3109     my $new_todos = [];
3110     my @nodes = (@{$el->child_nodes});
3111    
3112     while (@nodes) {
3113     my $node = shift @nodes;
3114     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3115    
3116     my $nt = $node->node_type;
3117     if ($nt == 1) {
3118     my $node_ns = $node->namespace_uri;
3119     $node_ns = '' unless defined $node_ns;
3120     my $node_ln = $node->manakai_local_name;
3121     ## NOTE: |minuses| list is not checked since redundant
3122     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3123     #
3124     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'rule')) {
3125     $self->{onerror}->(node => $node,
3126     type => 'element not allowed:datatemplate');
3127     }
3128     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3129     unshift @nodes, @$sib;
3130     push @$new_todos, @$ch;
3131     } elsif ($nt == 3 or $nt == 4) {
3132     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3133     $self->{onerror}->(node => $node, type => 'character not allowed');
3134     }
3135     } elsif ($nt == 5) {
3136     unshift @nodes, @{$node->child_nodes};
3137     }
3138     }
3139     return ($new_todos);
3140     },
3141     is_xml_root => 1,
3142     };
3143    
3144     $Element->{$HTML_NS}->{rule} = {
3145     attrs_checker => $GetHTMLAttrsChecker->({
3146     ## TODO: |condition| attribute
3147 wakaba 1.18 mode => $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker,
3148 wakaba 1.8 }),
3149     checker => sub {
3150     my ($self, $todo) = @_;
3151    
3152     my $end = $self->_add_pluses ({$HTML_NS => {nest => 1}});
3153     my ($sib, $ch) = $AnyChecker->($self, $todo);
3154     push @$sib, $end;
3155     return ($sib, $ch);
3156     },
3157     ## NOTE: "MAY be anything that, when the parent |datatemplate|
3158     ## is applied to some conforming data, results in a conforming DOM tree.":
3159     ## We don't check against this.
3160     };
3161    
3162     $Element->{$HTML_NS}->{nest} = {
3163     attrs_checker => $GetHTMLAttrsChecker->({
3164     ## TODO: |filter| attribute
3165 wakaba 1.18 mode => $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker,
3166 wakaba 1.8 }),
3167     checker => $HTMLEmptyChecker,
3168 wakaba 1.1 };
3169    
3170     $Element->{$HTML_NS}->{legend} = {
3171     attrs_checker => $GetHTMLAttrsChecker->({}),
3172     checker => sub {
3173     my ($self, $todo) = @_;
3174    
3175     my $parent = $todo->{node}->manakai_parent_element;
3176     if (defined $parent) {
3177     my $nsuri = $parent->namespace_uri;
3178     $nsuri = '' unless defined $nsuri;
3179     my $ln = $parent->manakai_local_name;
3180     if ($nsuri eq $HTML_NS and $ln eq 'figure') {
3181     return $HTMLInlineChecker->($self, $todo);
3182     } else {
3183 wakaba 1.13 return $HTMLStrictlyInlineChecker->($self, $todo);
3184 wakaba 1.1 }
3185     } else {
3186     return $HTMLInlineChecker->($self, $todo);
3187     }
3188    
3189     ## ISSUE: Content model is defined only for fieldset/legend,
3190     ## details/legend, and figure/legend.
3191     },
3192     };
3193    
3194     $Element->{$HTML_NS}->{div} = {
3195     attrs_checker => $GetHTMLAttrsChecker->({}),
3196     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
3197     };
3198    
3199     $Element->{$HTML_NS}->{font} = {
3200     attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
3201     checker => $HTMLTransparentChecker,
3202     };
3203    
3204     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
3205    
3206     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24