/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.12 - (hide annotations) (download)
Sun Nov 4 03:20:34 2007 UTC (17 years ago) by wakaba
Branch: MAIN
Changes since 1.11: +8 -1 lines
++ whatpm/t/ChangeLog	4 Nov 2007 03:11:55 -0000
2007-11-04  Wakaba  <wakaba@suika.fam.cx>

	* content-model-4.dat: New tests for rel=up (HTML5 revision 1112)
	and rel=noreferer (HTML5 revision 1118).

++ whatpm/Whatpm/ChangeLog	4 Nov 2007 03:11:13 -0000
2007-11-04  Wakaba  <wakaba@suika.fam.cx>

	* mklinktypelist.pl: Support for rel=noreferer (HTML5 revision 1118).

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7     my $HTMLMetadataElements = {
8     $HTML_NS => {
9     qw/link 1 meta 1 style 1 script 1 event-source 1 command 1 base 1 title 1
10 wakaba 1.8 noscript 1 datatemplate 1
11 wakaba 1.1 /,
12     },
13     };
14    
15     my $HTMLSectioningElements = {
16     $HTML_NS => {qw/body 1 section 1 nav 1 article 1 blockquote 1 aside 1/},
17     };
18    
19     my $HTMLBlockLevelElements = {
20     $HTML_NS => {
21     qw/
22     section 1 nav 1 article 1 blockquote 1 aside 1
23     h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1
24     address 1 p 1 hr 1 dialog 1 pre 1 ol 1 ul 1 dl 1
25     ins 1 del 1 figure 1 map 1 table 1 script 1 noscript 1
26     event-source 1 details 1 datagrid 1 menu 1 div 1 font 1
27 wakaba 1.8 datatemplate 1
28 wakaba 1.1 /,
29     },
30     };
31    
32     my $HTMLStrictlyInlineLevelElements = {
33     $HTML_NS => {
34     qw/
35     br 1 a 1 q 1 cite 1 em 1 strong 1 small 1 m 1 dfn 1 abbr 1
36     time 1 meter 1 progress 1 code 1 var 1 samp 1 kbd 1
37     sub 1 sup 1 span 1 i 1 b 1 bdo 1 ins 1 del 1 img 1
38     iframe 1 embed 1 object 1 video 1 audio 1 canvas 1 area 1
39     script 1 noscript 1 event-source 1 command 1 font 1
40     /,
41     },
42     };
43    
44     my $HTMLStructuredInlineLevelElements = {
45     $HTML_NS => {qw/blockquote 1 pre 1 ol 1 ul 1 dl 1 table 1 menu 1/},
46     };
47    
48     my $HTMLInteractiveElements = {
49     $HTML_NS => {a => 1, details => 1, datagrid => 1},
50     };
51     ## NOTE: |html:a| and |html:datagrid| are not allowed as a descendant
52     ## of interactive elements
53    
54     # my $HTMLTransparentElements : in |Whatpm/ContentChecker.pm|.
55    
56     #my $HTMLSemiTransparentElements = {
57     # $HTML_NS => {qw/video 1 audio 1/},
58     #};
59    
60     my $HTMLEmbededElements = {
61     $HTML_NS => {qw/img 1 iframe 1 embed 1 object 1 video 1 audio 1 canvas 1/},
62     };
63    
64     ## Empty
65     my $HTMLEmptyChecker = sub {
66     my ($self, $todo) = @_;
67     my $el = $todo->{node};
68     my $new_todos = [];
69     my @nodes = (@{$el->child_nodes});
70    
71     while (@nodes) {
72     my $node = shift @nodes;
73     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
74    
75     my $nt = $node->node_type;
76     if ($nt == 1) {
77 wakaba 1.8 my $node_ns = $node->namespace_uri;
78     $node_ns = '' unless defined $node_ns;
79     my $node_ln = $node->manakai_local_name;
80     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
81     #
82     } else {
83     ## NOTE: |minuses| list is not checked since redundant
84     $self->{onerror}->(node => $node, type => 'element not allowed');
85     }
86 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
87     unshift @nodes, @$sib;
88     push @$new_todos, @$ch;
89     } elsif ($nt == 3 or $nt == 4) {
90     if ($node->data =~ /[^\x09-\x0D\x20]/) {
91     $self->{onerror}->(node => $node, type => 'character not allowed');
92     }
93     } elsif ($nt == 5) {
94     unshift @nodes, @{$node->child_nodes};
95     }
96     }
97     return ($new_todos);
98     };
99    
100     ## Text
101     my $HTMLTextChecker = sub {
102     my ($self, $todo) = @_;
103     my $el = $todo->{node};
104     my $new_todos = [];
105     my @nodes = (@{$el->child_nodes});
106    
107     while (@nodes) {
108     my $node = shift @nodes;
109     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
110    
111     my $nt = $node->node_type;
112     if ($nt == 1) {
113 wakaba 1.8 my $node_ns = $node->namespace_uri;
114     $node_ns = '' unless defined $node_ns;
115     my $node_ln = $node->manakai_local_name;
116     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
117     #
118     } else {
119     ## NOTE: |minuses| list is not checked since redundant
120     $self->{onerror}->(node => $node, type => 'element not allowed');
121     }
122 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
123     unshift @nodes, @$sib;
124     push @$new_todos, @$ch;
125     } elsif ($nt == 5) {
126     unshift @nodes, @{$node->child_nodes};
127     }
128     }
129     return ($new_todos);
130     };
131    
132     ## Zero or more |html:style| elements,
133     ## followed by zero or more block-level elements
134     my $HTMLStylableBlockChecker = sub {
135     my ($self, $todo) = @_;
136     my $el = $todo->{node};
137     my $new_todos = [];
138     my @nodes = (@{$el->child_nodes});
139    
140     my $has_non_style;
141     while (@nodes) {
142     my $node = shift @nodes;
143     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
144    
145     my $nt = $node->node_type;
146     if ($nt == 1) {
147     my $node_ns = $node->namespace_uri;
148     $node_ns = '' unless defined $node_ns;
149     my $node_ln = $node->manakai_local_name;
150     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
151     if ($node_ns eq $HTML_NS and $node_ln eq 'style') {
152     $not_allowed = 1 if $has_non_style or
153     not $node->has_attribute_ns (undef, 'scoped');
154     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
155     $has_non_style = 1;
156 wakaba 1.8 } elsif ($self->{pluses}->{$node_ns}->{$node_ln}) {
157     #
158 wakaba 1.1 } else {
159     $has_non_style = 1;
160     $not_allowed = 1;
161     }
162     $self->{onerror}->(node => $node, type => 'element not allowed')
163     if $not_allowed;
164     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
165     unshift @nodes, @$sib;
166     push @$new_todos, @$ch;
167     } elsif ($nt == 3 or $nt == 4) {
168     if ($node->data =~ /[^\x09-\x0D\x20]/) {
169     $self->{onerror}->(node => $node, type => 'character not allowed');
170     }
171     } elsif ($nt == 5) {
172     unshift @nodes, @{$node->child_nodes};
173     }
174     }
175     return ($new_todos);
176     }; # $HTMLStylableBlockChecker
177    
178     ## Zero or more block-level elements
179     my $HTMLBlockChecker = sub {
180     my ($self, $todo) = @_;
181     my $el = $todo->{node};
182     my $new_todos = [];
183     my @nodes = (@{$el->child_nodes});
184    
185     while (@nodes) {
186     my $node = shift @nodes;
187     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
188    
189     my $nt = $node->node_type;
190     if ($nt == 1) {
191     my $node_ns = $node->namespace_uri;
192     $node_ns = '' unless defined $node_ns;
193     my $node_ln = $node->manakai_local_name;
194     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
195     $not_allowed = 1
196 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
197     $self->{pluses}->{$node_ns}->{$node_ln};
198 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
199     if $not_allowed;
200     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
201     unshift @nodes, @$sib;
202     push @$new_todos, @$ch;
203     } elsif ($nt == 3 or $nt == 4) {
204     if ($node->data =~ /[^\x09-\x0D\x20]/) {
205     $self->{onerror}->(node => $node, type => 'character not allowed');
206     }
207     } elsif ($nt == 5) {
208     unshift @nodes, @{$node->child_nodes};
209     }
210     }
211     return ($new_todos);
212     }; # $HTMLBlockChecker
213    
214     ## Inline-level content
215     my $HTMLInlineChecker = sub {
216     my ($self, $todo) = @_;
217     my $el = $todo->{node};
218     my $new_todos = [];
219     my @nodes = (@{$el->child_nodes});
220    
221     while (@nodes) {
222     my $node = shift @nodes;
223     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
224    
225     my $nt = $node->node_type;
226     if ($nt == 1) {
227     my $node_ns = $node->namespace_uri;
228     $node_ns = '' unless defined $node_ns;
229     my $node_ln = $node->manakai_local_name;
230     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
231     $not_allowed = 1
232     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
233 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
234     $self->{pluses}->{$node_ns}->{$node_ln};
235 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
236     if $not_allowed;
237     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
238     unshift @nodes, @$sib;
239     push @$new_todos, @$ch;
240     } elsif ($nt == 5) {
241     unshift @nodes, @{$node->child_nodes};
242     }
243     }
244    
245     for (@$new_todos) {
246     $_->{inline} = 1;
247     }
248     return ($new_todos);
249     }; # $HTMLInlineChecker
250    
251     my $HTMLSignificantInlineChecker = $HTMLInlineChecker;
252     ## TODO: check significant content
253    
254     ## Strictly inline-level content
255     my $HTMLStrictlyInlineChecker = sub {
256     my ($self, $todo) = @_;
257     my $el = $todo->{node};
258     my $new_todos = [];
259     my @nodes = (@{$el->child_nodes});
260    
261     while (@nodes) {
262     my $node = shift @nodes;
263     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
264    
265     my $nt = $node->node_type;
266     if ($nt == 1) {
267     my $node_ns = $node->namespace_uri;
268     $node_ns = '' unless defined $node_ns;
269     my $node_ln = $node->manakai_local_name;
270     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
271     $not_allowed = 1
272 wakaba 1.8 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
273     $self->{pluses}->{$node_ns}->{$node_ln};
274 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
275     if $not_allowed;
276     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
277     unshift @nodes, @$sib;
278     push @$new_todos, @$ch;
279     } elsif ($nt == 5) {
280     unshift @nodes, @{$node->child_nodes};
281     }
282     }
283    
284     for (@$new_todos) {
285     $_->{inline} = 1;
286     $_->{strictly_inline} = 1;
287     }
288     return ($new_todos);
289     }; # $HTMLStrictlyInlineChecker
290    
291     my $HTMLSignificantStrictlyInlineChecker = $HTMLStrictlyInlineChecker;
292     ## TODO: check significant content
293    
294 wakaba 1.8 ## Inline-level or strictly inline-level content
295 wakaba 1.1 my $HTMLInlineOrStrictlyInlineChecker = sub {
296     my ($self, $todo) = @_;
297     my $el = $todo->{node};
298     my $new_todos = [];
299     my @nodes = (@{$el->child_nodes});
300    
301     while (@nodes) {
302     my $node = shift @nodes;
303     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
304    
305     my $nt = $node->node_type;
306     if ($nt == 1) {
307     my $node_ns = $node->namespace_uri;
308     $node_ns = '' unless defined $node_ns;
309     my $node_ln = $node->manakai_local_name;
310     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
311     if ($todo->{strictly_inline}) {
312     $not_allowed = 1
313 wakaba 1.8 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
314     $self->{pluses}->{$node_ns}->{$node_ln};
315 wakaba 1.1 } else {
316     $not_allowed = 1
317     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
318 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
319     $self->{pluses}->{$node_ns}->{$node_ln};
320 wakaba 1.1 }
321     $self->{onerror}->(node => $node, type => 'element not allowed')
322     if $not_allowed;
323     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
324     unshift @nodes, @$sib;
325     push @$new_todos, @$ch;
326     } elsif ($nt == 5) {
327     unshift @nodes, @{$node->child_nodes};
328     }
329     }
330    
331     for (@$new_todos) {
332     $_->{inline} = 1;
333     $_->{strictly_inline} = 1;
334     }
335     return ($new_todos);
336     }; # $HTMLInlineOrStrictlyInlineChecker
337    
338     my $HTMLSignificantInlineOrStrictlyInlineChecker
339     = $HTMLInlineOrStrictlyInlineChecker;
340 wakaba 1.12 ## TODO: "siginificant" concept has been gone.
341     ## Instead, we have another SHOULD-level requirement (revision 1114).
342 wakaba 1.1
343 wakaba 1.8 ## Block-level content or inline-level content (i.e. bimorphic content model)
344 wakaba 1.1 my $HTMLBlockOrInlineChecker = sub {
345     my ($self, $todo) = @_;
346     my $el = $todo->{node};
347     my $new_todos = [];
348     my @nodes = (@{$el->child_nodes});
349    
350     my $content = 'block-or-inline'; # or 'block' or 'inline'
351     my @block_not_inline;
352     while (@nodes) {
353     my $node = shift @nodes;
354     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
355    
356 wakaba 1.8 ## ISSUE: It is unclear whether "<rule><div><p/><nest/></div></rule>"
357     ## is conforming or not.
358    
359 wakaba 1.1 my $nt = $node->node_type;
360     if ($nt == 1) {
361     my $node_ns = $node->namespace_uri;
362     $node_ns = '' unless defined $node_ns;
363     my $node_ln = $node->manakai_local_name;
364     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
365     if ($content eq 'block') {
366     $not_allowed = 1
367 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
368     $self->{pluses}->{$node_ns}->{$node_ln};
369 wakaba 1.1 } elsif ($content eq 'inline') {
370     $not_allowed = 1
371     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
372 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
373     $self->{pluses}->{$node_ns}->{$node_ln};
374 wakaba 1.1 } else {
375     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
376     my $is_inline
377     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
378     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
379    
380     push @block_not_inline, $node
381     if $is_block and not $is_inline and not $not_allowed;
382 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
383 wakaba 1.1 $content = 'inline';
384     for (@block_not_inline) {
385     $self->{onerror}->(node => $_, type => 'element not allowed');
386     }
387     $not_allowed = 1 unless $is_inline;
388     }
389     }
390     $self->{onerror}->(node => $node, type => 'element not allowed')
391     if $not_allowed;
392     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
393     unshift @nodes, @$sib;
394     push @$new_todos, @$ch;
395     } elsif ($nt == 3 or $nt == 4) {
396     if ($node->data =~ /[^\x09-\x0D\x20]/) {
397     if ($content eq 'block') {
398     $self->{onerror}->(node => $node, type => 'character not allowed');
399     } else {
400     $content = 'inline';
401     for (@block_not_inline) {
402     $self->{onerror}->(node => $_, type => 'element not allowed');
403     }
404     }
405     }
406     } elsif ($nt == 5) {
407     unshift @nodes, @{$node->child_nodes};
408     }
409     }
410    
411     if ($content eq 'inline') {
412     for (@$new_todos) {
413     $_->{inline} = 1;
414     }
415     }
416     return ($new_todos);
417     };
418    
419     ## Zero or more XXX element, then either block-level or inline-level
420     my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) {
421     my ($elnsuri, $ellname) = @_;
422     return sub {
423     my ($self, $todo) = @_;
424     my $el = $todo->{node};
425     my $new_todos = [];
426     my @nodes = (@{$el->child_nodes});
427    
428     my $has_non_style;
429     my $content = 'block-or-inline'; # or 'block' or 'inline'
430     my @block_not_inline;
431     while (@nodes) {
432     my $node = shift @nodes;
433     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
434    
435     my $nt = $node->node_type;
436     if ($nt == 1) {
437     my $node_ns = $node->namespace_uri;
438     $node_ns = '' unless defined $node_ns;
439     my $node_ln = $node->manakai_local_name;
440     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
441     if ($node_ns eq $elnsuri and $node_ln eq $ellname) {
442     $not_allowed = 1 if $has_non_style;
443     if ($ellname eq 'style' and
444     not $node->has_attribute_ns (undef, 'scoped')) {
445     $not_allowed = 1;
446     }
447     } elsif ($content eq 'block') {
448     $has_non_style = 1;
449     $not_allowed = 1
450 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
451     $self->{pluses}->{$node_ns}->{$node_ln};
452 wakaba 1.1 } elsif ($content eq 'inline') {
453     $has_non_style = 1;
454     $not_allowed = 1
455     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
456 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
457     $self->{pluses}->{$node_ns}->{$node_ln};
458 wakaba 1.1 } else {
459 wakaba 1.8 $has_non_style = 1 unless $self->{pluses}->{$node_ns}->{$node_ln};
460 wakaba 1.1 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
461     my $is_inline
462     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
463     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
464    
465     push @block_not_inline, $node
466     if $is_block and not $is_inline and not $not_allowed;
467 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
468 wakaba 1.1 $content = 'inline';
469     for (@block_not_inline) {
470     $self->{onerror}->(node => $_, type => 'element not allowed');
471     }
472     $not_allowed = 1 unless $is_inline;
473     }
474     }
475     $self->{onerror}->(node => $node, type => 'element not allowed')
476     if $not_allowed;
477     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
478     unshift @nodes, @$sib;
479     push @$new_todos, @$ch;
480     } elsif ($nt == 3 or $nt == 4) {
481     if ($node->data =~ /[^\x09-\x0D\x20]/) {
482     $has_non_style = 1;
483     if ($content eq 'block') {
484     $self->{onerror}->(node => $node, type => 'character not allowed');
485     } else {
486     $content = 'inline';
487     for (@block_not_inline) {
488     $self->{onerror}->(node => $_, type => 'element not allowed');
489     }
490     }
491     }
492     } elsif ($nt == 5) {
493     unshift @nodes, @{$node->child_nodes};
494     }
495     }
496    
497     if ($content eq 'inline') {
498     for (@$new_todos) {
499     $_->{inline} = 1;
500     }
501     }
502     return ($new_todos);
503     };
504     }; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker
505    
506     my $HTMLTransparentChecker = $HTMLBlockOrInlineChecker;
507    
508     our $AttrChecker;
509    
510     my $GetHTMLEnumeratedAttrChecker = sub {
511     my $states = shift; # {value => conforming ? 1 : -1}
512     return sub {
513     my ($self, $attr) = @_;
514     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
515     if ($states->{$value} > 0) {
516     #
517     } elsif ($states->{$value}) {
518     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
519     } else {
520     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
521     }
522     };
523     }; # $GetHTMLEnumeratedAttrChecker
524    
525     my $GetHTMLBooleanAttrChecker = sub {
526     my $local_name = shift;
527     return sub {
528     my ($self, $attr) = @_;
529     my $value = $attr->value;
530     unless ($value eq $local_name or $value eq '') {
531     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
532     }
533     };
534     }; # $GetHTMLBooleanAttrChecker
535    
536 wakaba 1.8 ## Unordered set of space-separated tokens
537     my $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker = sub {
538     my ($self, $attr) = @_;
539     my %word;
540     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
541     unless ($word{$word}) {
542     $word{$word} = 1;
543     } else {
544     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
545     }
546     }
547     }; # $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker
548    
549 wakaba 1.1 ## |rel| attribute (unordered set of space separated tokens,
550     ## whose allowed values are defined by the section on link types)
551     my $HTMLLinkTypesAttrChecker = sub {
552 wakaba 1.4 my ($a_or_area, $todo, $self, $attr) = @_;
553 wakaba 1.1 my %word;
554     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
555     unless ($word{$word}) {
556     $word{$word} = 1;
557     } else {
558     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
559     }
560     }
561     ## NOTE: Case sensitive match (since HTML5 spec does not say link
562     ## types are case-insensitive and it says "The value should not
563     ## be confusingly similar to any other defined value (e.g.
564     ## differing only in case).").
565     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
566     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
567     ## values to be used conformingly.
568     require Whatpm::_LinkTypeList;
569     our $LinkType;
570     for my $word (keys %word) {
571     my $def = $LinkType->{$word};
572     if (defined $def) {
573     if ($def->{status} eq 'accepted') {
574     if (defined $def->{effect}->[$a_or_area]) {
575     #
576     } else {
577     $self->{onerror}->(node => $attr,
578     type => 'link type:bad context:'.$word);
579     }
580     } elsif ($def->{status} eq 'proposal') {
581     $self->{onerror}->(node => $attr, level => 's',
582     type => 'link type:proposed:'.$word);
583     } else { # rejected or synonym
584     $self->{onerror}->(node => $attr,
585     type => 'link type:non-conforming:'.$word);
586     }
587 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
588     if ($word eq 'alternate') {
589     #
590     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
591     $todo->{has_hyperlink_link_type} = 1;
592     }
593     }
594 wakaba 1.1 if ($def->{unique}) {
595     unless ($self->{has_link_type}->{$word}) {
596     $self->{has_link_type}->{$word} = 1;
597     } else {
598     $self->{onerror}->(node => $attr,
599     type => 'link type:duplicate:'.$word);
600     }
601     }
602     } else {
603     $self->{onerror}->(node => $attr, level => 'unsupported',
604     type => 'link type:'.$word);
605     }
606     }
607 wakaba 1.4 $todo->{has_hyperlink_link_type} = 1
608     if $word{alternate} and not $word{stylesheet};
609 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
610     ## says that using both X-Pingback: header field and HTML
611     ## <link rel=pingback> is deprecated and if both appears they
612     ## SHOULD contain exactly the same value.
613     ## ISSUE: Pingback 1.0 specification defines the exact representation
614     ## of its link element, which cannot be tested by the current arch.
615     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
616     ## include any string that matches to the pattern for the rel=pingback link,
617     ## which again inpossible to test.
618     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
619 wakaba 1.12
620     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
621     ## ISSUE: <link rel="up up"> is non-conforming, since rel="" is unordered.
622 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
623    
624     ## URI (or IRI)
625     my $HTMLURIAttrChecker = sub {
626     my ($self, $attr) = @_;
627     ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
628     my $value = $attr->value;
629     Whatpm::URIChecker->check_iri_reference ($value, sub {
630     my %opt = @_;
631     $self->{onerror}->(node => $attr, level => $opt{level},
632     type => 'URI::'.$opt{type}.
633     (defined $opt{position} ? ':'.$opt{position} : ''));
634     });
635 wakaba 1.4 $self->{has_uri_attr} = 1;
636 wakaba 1.1 }; # $HTMLURIAttrChecker
637    
638     ## A space separated list of one or more URIs (or IRIs)
639     my $HTMLSpaceURIsAttrChecker = sub {
640     my ($self, $attr) = @_;
641     my $i = 0;
642     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
643     Whatpm::URIChecker->check_iri_reference ($value, sub {
644     my %opt = @_;
645     $self->{onerror}->(node => $attr, level => $opt{level},
646 wakaba 1.2 type => 'URIs:'.':'.
647     $opt{type}.':'.$i.
648 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
649     });
650     $i++;
651     }
652     ## ISSUE: Relative references?
653     ## ISSUE: Leading or trailing white spaces are conformant?
654     ## ISSUE: A sequence of white space characters are conformant?
655     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
656     ## NOTE: Duplication seems not an error.
657 wakaba 1.4 $self->{has_uri_attr} = 1;
658 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
659    
660     my $HTMLDatetimeAttrChecker = sub {
661     my ($self, $attr) = @_;
662     my $value = $attr->value;
663     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
664     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
665     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
666     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
667     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
668     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
669     if $d < 1 or
670     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
671     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
672     if $M == 2 and $d == 29 and
673     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
674     } else {
675     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
676     }
677     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
678     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
679     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
680     if defined $s and $s > 59;
681     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
682     if $zh > 23;
683     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
684     if $zm > 59;
685     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
686     } else {
687     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
688     }
689     }; # $HTMLDatetimeAttrChecker
690    
691     my $HTMLIntegerAttrChecker = sub {
692     my ($self, $attr) = @_;
693     my $value = $attr->value;
694     unless ($value =~ /\A-?[0-9]+\z/) {
695     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
696     }
697     }; # $HTMLIntegerAttrChecker
698    
699     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
700     my $range_check = shift;
701     return sub {
702     my ($self, $attr) = @_;
703     my $value = $attr->value;
704     if ($value =~ /\A[0-9]+\z/) {
705     unless ($range_check->($value + 0)) {
706     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
707     }
708     } else {
709     $self->{onerror}->(node => $attr,
710     type => 'nninteger:syntax error');
711     }
712     };
713     }; # $GetHTMLNonNegativeIntegerAttrChecker
714    
715     my $GetHTMLFloatingPointNumberAttrChecker = sub {
716     my $range_check = shift;
717     return sub {
718     my ($self, $attr) = @_;
719     my $value = $attr->value;
720     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
721     unless ($range_check->($value + 0)) {
722     $self->{onerror}->(node => $attr, type => 'float:out of range');
723     }
724     } else {
725     $self->{onerror}->(node => $attr,
726     type => 'float:syntax error');
727     }
728     };
729     }; # $GetHTMLFloatingPointNumberAttrChecker
730    
731     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
732     ## ISSUE: RFC 2046 does not define syntax of media types.
733     ## ISSUE: The definition of "a valid MIME type" is unknown.
734     ## Syntactical correctness?
735     my $HTMLIMTAttrChecker = sub {
736     my ($self, $attr) = @_;
737     my $value = $attr->value;
738     ## ISSUE: RFC 2045 Content-Type header field allows insertion
739     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
740     ## ISSUE: RFC 2231 extension? Maybe no.
741     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
742     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
743     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
744     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
745     my @type = ($1, $2);
746     my $param = $3;
747     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
748     if (defined $2) {
749     push @type, $1 => $2;
750     } else {
751     my $n = $1;
752     my $v = $2;
753     $v =~ s/\\(.)/$1/gs;
754     push @type, $n => $v;
755     }
756     }
757     require Whatpm::IMTChecker;
758     Whatpm::IMTChecker->check_imt (sub {
759     my %opt = @_;
760     $self->{onerror}->(node => $attr, level => $opt{level},
761     type => 'IMT:'.$opt{type});
762     }, @type);
763     } else {
764     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
765     }
766     }; # $HTMLIMTAttrChecker
767    
768     my $HTMLLanguageTagAttrChecker = sub {
769 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
770    
771 wakaba 1.1 my ($self, $attr) = @_;
772 wakaba 1.6 my $value = $attr->value;
773     require Whatpm::LangTag;
774     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
775     my %opt = @_;
776     my $type = 'LangTag:'.$opt{type};
777     $type .= ':' . $opt{subtag} if defined $opt{subtag};
778     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
779     level => $opt{level});
780     });
781 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
782 wakaba 1.6
783     ## TODO: testdata
784 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
785    
786     ## "A valid media query [MQ]"
787     my $HTMLMQAttrChecker = sub {
788     my ($self, $attr) = @_;
789     $self->{onerror}->(node => $attr, level => 'unsupported',
790     type => 'media query');
791     ## ISSUE: What is "a valid media query"?
792     }; # $HTMLMQAttrChecker
793    
794     my $HTMLEventHandlerAttrChecker = sub {
795     my ($self, $attr) = @_;
796     $self->{onerror}->(node => $attr, level => 'unsupported',
797     type => 'event handler');
798     ## TODO: MUST contain valid ECMAScript code matching the
799     ## ECMAScript |FunctionBody| production. [ECMA262]
800     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
801     ## ISSUE: Automatic semicolon insertion does not apply?
802     ## ISSUE: Other script languages?
803     }; # $HTMLEventHandlerAttrChecker
804    
805     my $HTMLUsemapAttrChecker = sub {
806     my ($self, $attr) = @_;
807     ## MUST be a valid hashed ID reference to a |map| element
808     my $value = $attr->value;
809     if ($value =~ s/^#//) {
810     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
811     push @{$self->{usemap}}, [$value => $attr];
812     } else {
813     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
814     }
815     ## NOTE: Space characters in hashed ID references are conforming.
816     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
817     }; # $HTMLUsemapAttrChecker
818    
819     my $HTMLTargetAttrChecker = sub {
820     my ($self, $attr) = @_;
821     my $value = $attr->value;
822     if ($value =~ /^_/) {
823     $value = lc $value; ## ISSUE: ASCII case-insentitive?
824     unless ({
825     _self => 1, _parent => 1, _top => 1,
826     }->{$value}) {
827     $self->{onerror}->(node => $attr,
828     type => 'reserved browsing context name');
829     }
830     } else {
831     #$ ISSUE: An empty string is conforming?
832     }
833     }; # $HTMLTargetAttrChecker
834    
835     my $HTMLAttrChecker = {
836     id => sub {
837     ## NOTE: |map| has its own variant of |id=""| checker
838     my ($self, $attr) = @_;
839     my $value = $attr->value;
840     if (length $value > 0) {
841     if ($self->{id}->{$value}) {
842     $self->{onerror}->(node => $attr, type => 'duplicate ID');
843     push @{$self->{id}->{$value}}, $attr;
844     } else {
845     $self->{id}->{$value} = [$attr];
846     }
847     if ($value =~ /[\x09-\x0D\x20]/) {
848     $self->{onerror}->(node => $attr, type => 'space in ID');
849     }
850     } else {
851     ## NOTE: MUST contain at least one character
852     $self->{onerror}->(node => $attr, type => 'empty attribute value');
853     }
854     },
855     title => sub {}, ## NOTE: No conformance creteria
856     lang => sub {
857     my ($self, $attr) = @_;
858 wakaba 1.6 my $value = $attr->value;
859     if ($value eq '') {
860     #
861     } else {
862     require Whatpm::LangTag;
863     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
864     my %opt = @_;
865     my $type = 'LangTag:'.$opt{type};
866     $type .= ':' . $opt{subtag} if defined $opt{subtag};
867     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
868     level => $opt{level});
869     });
870     }
871 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
872     unless ($attr->owner_document->manakai_is_html) {
873     $self->{onerror}->(node => $attr, type => 'in XML:lang');
874     }
875 wakaba 1.6
876     ## TODO: test data
877 wakaba 1.1 },
878     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
879     class => sub {
880     my ($self, $attr) = @_;
881     my %word;
882     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
883     unless ($word{$word}) {
884     $word{$word} = 1;
885     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
886     } else {
887     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
888     }
889     }
890     },
891     contextmenu => sub {
892     my ($self, $attr) = @_;
893     my $value = $attr->value;
894     push @{$self->{contextmenu}}, [$value => $attr];
895     ## ISSUE: "The value must be the ID of a menu element in the DOM."
896     ## What is "in the DOM"? A menu Element node that is not part
897     ## of the Document tree is in the DOM? A menu Element node that
898     ## belong to another Document tree is in the DOM?
899     },
900     irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
901 wakaba 1.8 tabindex => $HTMLIntegerAttrChecker
902     ## TODO: ref, template, registrationmark
903 wakaba 1.1 };
904    
905     for (qw/
906     onabort onbeforeunload onblur onchange onclick oncontextmenu
907     ondblclick ondrag ondragend ondragenter ondragleave ondragover
908     ondragstart ondrop onerror onfocus onkeydown onkeypress
909     onkeyup onload onmessage onmousedown onmousemove onmouseout
910     onmouseover onmouseup onmousewheel onresize onscroll onselect
911     onsubmit onunload
912     /) {
913     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
914     }
915    
916     my $GetHTMLAttrsChecker = sub {
917     my $element_specific_checker = shift;
918     return sub {
919     my ($self, $todo) = @_;
920     for my $attr (@{$todo->{node}->attributes}) {
921     my $attr_ns = $attr->namespace_uri;
922     $attr_ns = '' unless defined $attr_ns;
923     my $attr_ln = $attr->manakai_local_name;
924     my $checker;
925     if ($attr_ns eq '') {
926     $checker = $element_specific_checker->{$attr_ln}
927     || $HTMLAttrChecker->{$attr_ln};
928     }
929     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
930     || $AttrChecker->{$attr_ns}->{''};
931     if ($checker) {
932     $checker->($self, $attr, $todo);
933     } else {
934     $self->{onerror}->(node => $attr, level => 'unsupported',
935     type => 'attribute');
936     ## ISSUE: No comformance createria for unknown attributes in the spec
937     }
938     }
939     };
940     }; # $GetHTMLAttrsChecker
941    
942     our $Element;
943     our $ElementDefault;
944     our $AnyChecker;
945    
946     $Element->{$HTML_NS}->{''} = {
947     attrs_checker => $GetHTMLAttrsChecker->({}),
948     checker => $ElementDefault->{checker},
949     };
950    
951     $Element->{$HTML_NS}->{html} = {
952     is_root => 1,
953     attrs_checker => $GetHTMLAttrsChecker->({
954     xmlns => sub {
955     my ($self, $attr) = @_;
956     my $value = $attr->value;
957     unless ($value eq $HTML_NS) {
958     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
959     }
960     unless ($attr->owner_document->manakai_is_html) {
961     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
962     ## TODO: Test
963     }
964     },
965     }),
966     checker => sub {
967     my ($self, $todo) = @_;
968     my $el = $todo->{node};
969     my $new_todos = [];
970     my @nodes = (@{$el->child_nodes});
971    
972     my $phase = 'before head';
973     while (@nodes) {
974     my $node = shift @nodes;
975     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
976    
977     my $nt = $node->node_type;
978     if ($nt == 1) {
979     my $node_ns = $node->namespace_uri;
980     $node_ns = '' unless defined $node_ns;
981     my $node_ln = $node->manakai_local_name;
982     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
983 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
984     #
985     } elsif ($phase eq 'before head') {
986 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'head') {
987     $phase = 'after head';
988     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'body') {
989     $self->{onerror}->(node => $node, type => 'ps element missing:head');
990     $phase = 'after body';
991     } else {
992     $not_allowed = 1;
993     # before head
994     }
995     } elsif ($phase eq 'after head') {
996     if ($node_ns eq $HTML_NS and $node_ln eq 'body') {
997     $phase = 'after body';
998     } else {
999     $not_allowed = 1;
1000     # after head
1001     }
1002     } else { #elsif ($phase eq 'after body') {
1003     $not_allowed = 1;
1004     # after body
1005     }
1006     $self->{onerror}->(node => $node, type => 'element not allowed')
1007     if $not_allowed;
1008     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1009     unshift @nodes, @$sib;
1010     push @$new_todos, @$ch;
1011     } elsif ($nt == 3 or $nt == 4) {
1012     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1013     $self->{onerror}->(node => $node, type => 'character not allowed');
1014     }
1015     } elsif ($nt == 5) {
1016     unshift @nodes, @{$node->child_nodes};
1017     }
1018     }
1019    
1020     if ($phase eq 'before head') {
1021     $self->{onerror}->(node => $el, type => 'child element missing:head');
1022     $self->{onerror}->(node => $el, type => 'child element missing:body');
1023     } elsif ($phase eq 'after head') {
1024     $self->{onerror}->(node => $el, type => 'child element missing:body');
1025     }
1026    
1027     return ($new_todos);
1028     },
1029     };
1030    
1031     $Element->{$HTML_NS}->{head} = {
1032     attrs_checker => $GetHTMLAttrsChecker->({}),
1033     checker => sub {
1034     my ($self, $todo) = @_;
1035     my $el = $todo->{node};
1036     my $new_todos = [];
1037     my @nodes = (@{$el->child_nodes});
1038    
1039     my $has_title;
1040     my $phase = 'initial'; # 'after charset', 'after base'
1041     while (@nodes) {
1042     my $node = shift @nodes;
1043     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1044    
1045     my $nt = $node->node_type;
1046     if ($nt == 1) {
1047     my $node_ns = $node->namespace_uri;
1048     $node_ns = '' unless defined $node_ns;
1049     my $node_ln = $node->manakai_local_name;
1050     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
1051 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1052     #
1053     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'title') {
1054 wakaba 1.1 $phase = 'after base';
1055     unless ($has_title) {
1056     $has_title = 1;
1057     } else {
1058     $not_allowed = 1;
1059     }
1060     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'meta') {
1061     if ($node->has_attribute_ns (undef, 'charset')) {
1062     if ($phase eq 'initial') {
1063     $phase = 'after charset';
1064     } else {
1065     $not_allowed = 1;
1066     ## NOTE: See also |base|'s "contexts" field in the spec
1067     }
1068 wakaba 1.5 } elsif ($node->has_attribute_ns (undef, 'name') or
1069     $node->has_attribute_ns (undef, 'http-equiv')) {
1070     $phase = 'after base';
1071 wakaba 1.1 } else {
1072     $phase = 'after base';
1073 wakaba 1.5 $not_allowed = 1;
1074 wakaba 1.1 }
1075     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'base') {
1076     if ($phase eq 'initial' or $phase eq 'after charset') {
1077     $phase = 'after base';
1078     } else {
1079     $not_allowed = 1;
1080     }
1081     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'style') {
1082     $phase = 'after base';
1083     if ($node->has_attribute_ns (undef, 'scoped')) {
1084     $not_allowed = 1;
1085     }
1086     } elsif ($HTMLMetadataElements->{$node_ns}->{$node_ln}) {
1087     $phase = 'after base';
1088     } else {
1089     $not_allowed = 1;
1090     }
1091     $self->{onerror}->(node => $node, type => 'element not allowed')
1092     if $not_allowed;
1093 wakaba 1.3 local $todo->{flag}->{in_head} = 1;
1094 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1095     unshift @nodes, @$sib;
1096     push @$new_todos, @$ch;
1097     } elsif ($nt == 3 or $nt == 4) {
1098     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1099     $self->{onerror}->(node => $node, type => 'character not allowed');
1100     }
1101     } elsif ($nt == 5) {
1102     unshift @nodes, @{$node->child_nodes};
1103     }
1104     }
1105     unless ($has_title) {
1106     $self->{onerror}->(node => $el, type => 'child element missing:title');
1107     }
1108     return ($new_todos);
1109     },
1110     };
1111    
1112     $Element->{$HTML_NS}->{title} = {
1113     attrs_checker => $GetHTMLAttrsChecker->({}),
1114     checker => $HTMLTextChecker,
1115     };
1116    
1117     $Element->{$HTML_NS}->{base} = {
1118 wakaba 1.4 attrs_checker => sub {
1119     my ($self, $todo) = @_;
1120    
1121     if ($self->{has_uri_attr} and
1122     $todo->{node}->has_attribute_ns (undef, 'href')) {
1123     ## ISSUE: Are these examples conforming?
1124     ## <head profile="a b c"><base href> (except for |profile|'s
1125     ## non-conformance)
1126     ## <title xml:base="relative"/><base href/> (maybe it should be)
1127     ## <unknown xmlns="relative"/><base href/> (assuming that
1128     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1129     ## <?xml-stylesheet href="relative"?>...<base href=""/>
1130     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1131     ## <style>@import 'relative';</style><base href>
1132     ## <script>location.href = 'relative';</script><base href>
1133     $self->{onerror}->(node => $todo->{node},
1134     type => 'basehref after URI attribute');
1135     }
1136     if ($self->{has_hyperlink_element} and
1137     $todo->{node}->has_attribute_ns (undef, 'target')) {
1138     ## ISSUE: Are these examples conforming?
1139     ## <head><title xlink:href=""/><base target="name"/></head>
1140     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1141     ## (assuming that |xbl:xbl| is allowed before |base|)
1142     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1143     ## <link href=""/><base target="name"/>
1144     ## <link rel=unknown href=""><base target=name>
1145     $self->{onerror}->(node => $todo->{node},
1146     type => 'basetarget after hyperlink');
1147     }
1148    
1149     return $GetHTMLAttrsChecker->({
1150     href => $HTMLURIAttrChecker,
1151     target => $HTMLTargetAttrChecker,
1152     })->($self, $todo);
1153 wakaba 1.12
1154     ## TOOD: <base/> is non-conforming (revision 1115)
1155 wakaba 1.4 },
1156 wakaba 1.1 checker => $HTMLEmptyChecker,
1157     };
1158    
1159     $Element->{$HTML_NS}->{link} = {
1160     attrs_checker => sub {
1161     my ($self, $todo) = @_;
1162     $GetHTMLAttrsChecker->({
1163     href => $HTMLURIAttrChecker,
1164 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(0, $todo, @_) },
1165 wakaba 1.1 media => $HTMLMQAttrChecker,
1166     hreflang => $HTMLLanguageTagAttrChecker,
1167     type => $HTMLIMTAttrChecker,
1168     ## NOTE: Though |title| has special semantics,
1169     ## syntactically same as the |title| as global attribute.
1170     })->($self, $todo);
1171 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
1172     $self->{has_hyperlink_element} = 1 if $todo->{has_hyperlink_link_type};
1173     } else {
1174 wakaba 1.1 $self->{onerror}->(node => $todo->{node},
1175     type => 'attribute missing:href');
1176     }
1177     unless ($todo->{node}->has_attribute_ns (undef, 'rel')) {
1178     $self->{onerror}->(node => $todo->{node},
1179     type => 'attribute missing:rel');
1180     }
1181     },
1182     checker => $HTMLEmptyChecker,
1183     };
1184    
1185     $Element->{$HTML_NS}->{meta} = {
1186     attrs_checker => sub {
1187     my ($self, $todo) = @_;
1188     my $name_attr;
1189     my $http_equiv_attr;
1190     my $charset_attr;
1191     my $content_attr;
1192     for my $attr (@{$todo->{node}->attributes}) {
1193     my $attr_ns = $attr->namespace_uri;
1194     $attr_ns = '' unless defined $attr_ns;
1195     my $attr_ln = $attr->manakai_local_name;
1196     my $checker;
1197     if ($attr_ns eq '') {
1198     if ($attr_ln eq 'content') {
1199     $content_attr = $attr;
1200     $checker = 1;
1201     } elsif ($attr_ln eq 'name') {
1202     $name_attr = $attr;
1203     $checker = 1;
1204     } elsif ($attr_ln eq 'http-equiv') {
1205     $http_equiv_attr = $attr;
1206     $checker = 1;
1207     } elsif ($attr_ln eq 'charset') {
1208     $charset_attr = $attr;
1209     $checker = 1;
1210     } else {
1211     $checker = $HTMLAttrChecker->{$attr_ln}
1212     || $AttrChecker->{$attr_ns}->{$attr_ln}
1213     || $AttrChecker->{$attr_ns}->{''};
1214     }
1215     } else {
1216     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1217     || $AttrChecker->{$attr_ns}->{''};
1218     }
1219     if ($checker) {
1220     $checker->($self, $attr) if ref $checker;
1221     } else {
1222     $self->{onerror}->(node => $attr, level => 'unsupported',
1223     type => 'attribute');
1224     ## ISSUE: No comformance createria for unknown attributes in the spec
1225     }
1226     }
1227    
1228     if (defined $name_attr) {
1229     if (defined $http_equiv_attr) {
1230     $self->{onerror}->(node => $http_equiv_attr,
1231     type => 'attribute not allowed');
1232     } elsif (defined $charset_attr) {
1233     $self->{onerror}->(node => $charset_attr,
1234     type => 'attribute not allowed');
1235     }
1236     my $metadata_name = $name_attr->value;
1237     my $metadata_value;
1238     if (defined $content_attr) {
1239     $metadata_value = $content_attr->value;
1240     } else {
1241     $self->{onerror}->(node => $todo->{node},
1242     type => 'attribute missing:content');
1243     $metadata_value = '';
1244     }
1245     } elsif (defined $http_equiv_attr) {
1246     if (defined $charset_attr) {
1247     $self->{onerror}->(node => $charset_attr,
1248     type => 'attribute not allowed');
1249     }
1250     unless (defined $content_attr) {
1251     $self->{onerror}->(node => $todo->{node},
1252     type => 'attribute missing:content');
1253     }
1254     } elsif (defined $charset_attr) {
1255     if (defined $content_attr) {
1256     $self->{onerror}->(node => $content_attr,
1257     type => 'attribute not allowed');
1258     }
1259     } else {
1260     if (defined $content_attr) {
1261     $self->{onerror}->(node => $content_attr,
1262     type => 'attribute not allowed');
1263     $self->{onerror}->(node => $todo->{node},
1264     type => 'attribute missing:name|http-equiv');
1265     } else {
1266     $self->{onerror}->(node => $todo->{node},
1267     type => 'attribute missing:name|http-equiv|charset');
1268     }
1269     }
1270    
1271     ## TODO: metadata conformance
1272    
1273     ## TODO: pragma conformance
1274     if (defined $http_equiv_attr) { ## An enumerated attribute
1275     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1276     if ({
1277     'refresh' => 1,
1278     'default-style' => 1,
1279     }->{$keyword}) {
1280     #
1281     } else {
1282     $self->{onerror}->(node => $http_equiv_attr,
1283     type => 'enumerated:invalid');
1284     }
1285     }
1286    
1287     if (defined $charset_attr) {
1288     unless ($todo->{node}->owner_document->manakai_is_html) {
1289     $self->{onerror}->(node => $charset_attr,
1290     type => 'in XML:charset');
1291     }
1292     ## TODO: charset
1293     }
1294     },
1295     checker => $HTMLEmptyChecker,
1296     };
1297    
1298     $Element->{$HTML_NS}->{style} = {
1299     attrs_checker => $GetHTMLAttrsChecker->({
1300     type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1301     media => $HTMLMQAttrChecker,
1302     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1303     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1304     ## not different
1305     }),
1306     checker => sub {
1307     ## NOTE: |html:style| has no conformance creteria on content model
1308     my ($self, $todo) = @_;
1309     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
1310     $type = 'text/css' unless defined $type;
1311     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
1312     type => 'style:'.$type); ## TODO: $type normalization
1313     return $AnyChecker->($self, $todo);
1314     },
1315     };
1316    
1317     $Element->{$HTML_NS}->{body} = {
1318     attrs_checker => $GetHTMLAttrsChecker->({}),
1319     checker => $HTMLBlockChecker,
1320     };
1321    
1322     $Element->{$HTML_NS}->{section} = {
1323     attrs_checker => $GetHTMLAttrsChecker->({}),
1324     checker => $HTMLStylableBlockChecker,
1325     };
1326    
1327     $Element->{$HTML_NS}->{nav} = {
1328     attrs_checker => $GetHTMLAttrsChecker->({}),
1329     checker => $HTMLBlockOrInlineChecker,
1330     };
1331    
1332     $Element->{$HTML_NS}->{article} = {
1333     attrs_checker => $GetHTMLAttrsChecker->({}),
1334     checker => $HTMLStylableBlockChecker,
1335     };
1336    
1337     $Element->{$HTML_NS}->{blockquote} = {
1338     attrs_checker => $GetHTMLAttrsChecker->({
1339     cite => $HTMLURIAttrChecker,
1340     }),
1341     checker => $HTMLBlockChecker,
1342     };
1343    
1344     $Element->{$HTML_NS}->{aside} = {
1345     attrs_checker => $GetHTMLAttrsChecker->({}),
1346     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
1347     };
1348    
1349     $Element->{$HTML_NS}->{h1} = {
1350     attrs_checker => $GetHTMLAttrsChecker->({}),
1351     checker => sub {
1352     my ($self, $todo) = @_;
1353     $todo->{flag}->{has_heading}->[0] = 1;
1354     return $HTMLSignificantStrictlyInlineChecker->($self, $todo);
1355     },
1356     };
1357    
1358     $Element->{$HTML_NS}->{h2} = {
1359     attrs_checker => $GetHTMLAttrsChecker->({}),
1360     checker => $Element->{$HTML_NS}->{h1}->{checker},
1361     };
1362    
1363     $Element->{$HTML_NS}->{h3} = {
1364     attrs_checker => $GetHTMLAttrsChecker->({}),
1365     checker => $Element->{$HTML_NS}->{h1}->{checker},
1366     };
1367    
1368     $Element->{$HTML_NS}->{h4} = {
1369     attrs_checker => $GetHTMLAttrsChecker->({}),
1370     checker => $Element->{$HTML_NS}->{h1}->{checker},
1371     };
1372    
1373     $Element->{$HTML_NS}->{h5} = {
1374     attrs_checker => $GetHTMLAttrsChecker->({}),
1375     checker => $Element->{$HTML_NS}->{h1}->{checker},
1376     };
1377    
1378     $Element->{$HTML_NS}->{h6} = {
1379     attrs_checker => $GetHTMLAttrsChecker->({}),
1380     checker => $Element->{$HTML_NS}->{h1}->{checker},
1381     };
1382    
1383     $Element->{$HTML_NS}->{header} = {
1384     attrs_checker => $GetHTMLAttrsChecker->({}),
1385     checker => sub {
1386     my ($self, $todo) = @_;
1387     my $old_flag = $todo->{flag}->{has_heading} || [];
1388     my $new_flag = [];
1389     local $todo->{flag}->{has_heading} = $new_flag;
1390     my $node = $todo->{node};
1391    
1392     my $end = $self->_add_minuses
1393     ({$HTML_NS => {qw/header 1 footer 1/}},
1394     $HTMLSectioningElements);
1395     my ($new_todos, $ch) = $HTMLBlockChecker->($self, $todo);
1396     push @$new_todos, $end,
1397     {type => 'code', code => sub {
1398     if ($new_flag->[0]) {
1399     $old_flag->[0] = 1;
1400     } else {
1401     $self->{onerror}->(node => $node, type => 'element missing:hn');
1402     }
1403     }};
1404     return ($new_todos, $ch);
1405     },
1406     };
1407    
1408     $Element->{$HTML_NS}->{footer} = {
1409     attrs_checker => $GetHTMLAttrsChecker->({}),
1410     checker => sub { ## block -hn -header -footer -sectioning or inline
1411     my ($self, $todo) = @_;
1412     my $el = $todo->{node};
1413     my $new_todos = [];
1414     my @nodes = (@{$el->child_nodes});
1415    
1416     my $content = 'block-or-inline'; # or 'block' or 'inline'
1417     my @block_not_inline;
1418     while (@nodes) {
1419     my $node = shift @nodes;
1420     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1421    
1422     my $nt = $node->node_type;
1423     if ($nt == 1) {
1424     my $node_ns = $node->namespace_uri;
1425     $node_ns = '' unless defined $node_ns;
1426     my $node_ln = $node->manakai_local_name;
1427     my $not_allowed;
1428     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
1429     $not_allowed = 1;
1430     } elsif ($node_ns eq $HTML_NS and
1431     {
1432     qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1/
1433     }->{$node_ln}) {
1434     $not_allowed = 1;
1435     } elsif ($HTMLSectioningElements->{$node_ns}->{$node_ln}) {
1436     $not_allowed = 1;
1437     }
1438     if ($content eq 'block') {
1439     $not_allowed = 1
1440 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
1441     $self->{pluses}->{$node_ns}->{$node_ln};
1442 wakaba 1.1 } elsif ($content eq 'inline') {
1443     $not_allowed = 1
1444     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
1445 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
1446     $self->{pluses}->{$node_ns}->{$node_ln};
1447 wakaba 1.1 } else {
1448     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1449     my $is_inline
1450     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
1451     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1452    
1453     push @block_not_inline, $node
1454     if $is_block and not $is_inline and not $not_allowed;
1455 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
1456 wakaba 1.1 $content = 'inline';
1457     for (@block_not_inline) {
1458     $self->{onerror}->(node => $_, type => 'element not allowed');
1459     }
1460     $not_allowed = 1 unless $is_inline;
1461     }
1462     }
1463     $self->{onerror}->(node => $node, type => 'element not allowed')
1464     if $not_allowed;
1465     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1466     unshift @nodes, @$sib;
1467     push @$new_todos, @$ch;
1468     } elsif ($nt == 3 or $nt == 4) {
1469     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1470     if ($content eq 'block') {
1471     $self->{onerror}->(node => $node, type => 'character not allowed');
1472     } else {
1473     $content = 'inline';
1474     for (@block_not_inline) {
1475     $self->{onerror}->(node => $_, type => 'element not allowed');
1476     }
1477     }
1478     }
1479     } elsif ($nt == 5) {
1480     unshift @nodes, @{$node->child_nodes};
1481     }
1482     }
1483    
1484     my $end = $self->_add_minuses
1485     ({$HTML_NS => {qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1/}},
1486     $HTMLSectioningElements);
1487     push @$new_todos, $end;
1488    
1489     if ($content eq 'inline') {
1490     for (@$new_todos) {
1491     $_->{inline} = 1;
1492     }
1493     }
1494    
1495     return ($new_todos);
1496     },
1497     };
1498    
1499     $Element->{$HTML_NS}->{address} = {
1500     attrs_checker => $GetHTMLAttrsChecker->({}),
1501     checker => $HTMLInlineChecker,
1502     };
1503    
1504     $Element->{$HTML_NS}->{p} = {
1505     attrs_checker => $GetHTMLAttrsChecker->({}),
1506     checker => $HTMLSignificantInlineChecker,
1507     };
1508    
1509     $Element->{$HTML_NS}->{hr} = {
1510     attrs_checker => $GetHTMLAttrsChecker->({}),
1511     checker => $HTMLEmptyChecker,
1512     };
1513    
1514     $Element->{$HTML_NS}->{br} = {
1515     attrs_checker => $GetHTMLAttrsChecker->({}),
1516     checker => $HTMLEmptyChecker,
1517     };
1518    
1519     $Element->{$HTML_NS}->{dialog} = {
1520     attrs_checker => $GetHTMLAttrsChecker->({}),
1521     checker => sub {
1522     my ($self, $todo) = @_;
1523     my $el = $todo->{node};
1524     my $new_todos = [];
1525     my @nodes = (@{$el->child_nodes});
1526    
1527     my $phase = 'before dt';
1528     while (@nodes) {
1529     my $node = shift @nodes;
1530     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1531    
1532     my $nt = $node->node_type;
1533     if ($nt == 1) {
1534     my $node_ns = $node->namespace_uri;
1535     $node_ns = '' unless defined $node_ns;
1536     my $node_ln = $node->manakai_local_name;
1537     ## NOTE: |minuses| list is not checked since redundant
1538 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1539     #
1540     } elsif ($phase eq 'before dt') {
1541 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1542     $phase = 'before dd';
1543     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1544     $self->{onerror}
1545     ->(node => $node, type => 'ps element missing:dt');
1546     $phase = 'before dt';
1547     } else {
1548     $self->{onerror}->(node => $node, type => 'element not allowed');
1549     }
1550     } else { # before dd
1551     if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1552     $phase = 'before dt';
1553     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1554     $self->{onerror}
1555     ->(node => $node, type => 'ps element missing:dd');
1556     $phase = 'before dd';
1557     } else {
1558     $self->{onerror}->(node => $node, type => 'element not allowed');
1559     }
1560     }
1561     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1562     unshift @nodes, @$sib;
1563     push @$new_todos, @$ch;
1564     } elsif ($nt == 3 or $nt == 4) {
1565     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1566     $self->{onerror}->(node => $node, type => 'character not allowed');
1567     }
1568     } elsif ($nt == 5) {
1569     unshift @nodes, @{$node->child_nodes};
1570     }
1571     }
1572     if ($phase eq 'before dd') {
1573 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
1574 wakaba 1.1 }
1575     return ($new_todos);
1576     },
1577     };
1578    
1579     $Element->{$HTML_NS}->{pre} = {
1580     attrs_checker => $GetHTMLAttrsChecker->({}),
1581     checker => $HTMLStrictlyInlineChecker,
1582     };
1583    
1584     $Element->{$HTML_NS}->{ol} = {
1585     attrs_checker => $GetHTMLAttrsChecker->({
1586     start => $HTMLIntegerAttrChecker,
1587     }),
1588     checker => sub {
1589     my ($self, $todo) = @_;
1590     my $el = $todo->{node};
1591     my $new_todos = [];
1592     my @nodes = (@{$el->child_nodes});
1593    
1594     while (@nodes) {
1595     my $node = shift @nodes;
1596     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1597    
1598     my $nt = $node->node_type;
1599     if ($nt == 1) {
1600     my $node_ns = $node->namespace_uri;
1601     $node_ns = '' unless defined $node_ns;
1602     my $node_ln = $node->manakai_local_name;
1603     ## NOTE: |minuses| list is not checked since redundant
1604 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1605     #
1606     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'li')) {
1607 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
1608     }
1609     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1610     unshift @nodes, @$sib;
1611     push @$new_todos, @$ch;
1612     } elsif ($nt == 3 or $nt == 4) {
1613     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1614     $self->{onerror}->(node => $node, type => 'character not allowed');
1615     }
1616     } elsif ($nt == 5) {
1617     unshift @nodes, @{$node->child_nodes};
1618     }
1619     }
1620    
1621     if ($todo->{inline}) {
1622     for (@$new_todos) {
1623     $_->{inline} = 1;
1624     }
1625     }
1626     return ($new_todos);
1627     },
1628     };
1629    
1630     $Element->{$HTML_NS}->{ul} = {
1631     attrs_checker => $GetHTMLAttrsChecker->({}),
1632     checker => $Element->{$HTML_NS}->{ol}->{checker},
1633     };
1634    
1635     $Element->{$HTML_NS}->{li} = {
1636     attrs_checker => $GetHTMLAttrsChecker->({
1637     start => sub {
1638     my ($self, $attr) = @_;
1639     my $parent = $attr->owner_element->manakai_parent_element;
1640     if (defined $parent) {
1641     my $parent_ns = $parent->namespace_uri;
1642     $parent_ns = '' unless defined $parent_ns;
1643     my $parent_ln = $parent->manakai_local_name;
1644     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1645     $self->{onerror}->(node => $attr, level => 'unsupported',
1646     type => 'attribute');
1647     }
1648     }
1649     $HTMLIntegerAttrChecker->($self, $attr);
1650     },
1651     }),
1652     checker => sub {
1653     my ($self, $todo) = @_;
1654     if ($todo->{inline}) {
1655     return $HTMLInlineChecker->($self, $todo);
1656     } else {
1657     return $HTMLBlockOrInlineChecker->($self, $todo);
1658     }
1659     },
1660     };
1661    
1662     $Element->{$HTML_NS}->{dl} = {
1663     attrs_checker => $GetHTMLAttrsChecker->({}),
1664     checker => sub {
1665     my ($self, $todo) = @_;
1666     my $el = $todo->{node};
1667     my $new_todos = [];
1668     my @nodes = (@{$el->child_nodes});
1669    
1670     my $phase = 'before dt';
1671     while (@nodes) {
1672     my $node = shift @nodes;
1673     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1674    
1675     my $nt = $node->node_type;
1676     if ($nt == 1) {
1677     my $node_ns = $node->namespace_uri;
1678     $node_ns = '' unless defined $node_ns;
1679     my $node_ln = $node->manakai_local_name;
1680     ## NOTE: |minuses| list is not checked since redundant
1681 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1682     #
1683     } elsif ($phase eq 'in dds') {
1684 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1685     #$phase = 'in dds';
1686     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1687     $phase = 'in dts';
1688     } else {
1689     $self->{onerror}->(node => $node, type => 'element not allowed');
1690     }
1691     } elsif ($phase eq 'in dts') {
1692     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1693     #$phase = 'in dts';
1694     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1695     $phase = 'in dds';
1696     } else {
1697     $self->{onerror}->(node => $node, type => 'element not allowed');
1698     }
1699     } else { # before dt
1700     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1701     $phase = 'in dts';
1702     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1703     $self->{onerror}
1704     ->(node => $node, type => 'ps element missing:dt');
1705     $phase = 'in dds';
1706     } else {
1707     $self->{onerror}->(node => $node, type => 'element not allowed');
1708     }
1709     }
1710     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1711     unshift @nodes, @$sib;
1712     push @$new_todos, @$ch;
1713     } elsif ($nt == 3 or $nt == 4) {
1714     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1715     $self->{onerror}->(node => $node, type => 'character not allowed');
1716     }
1717     } elsif ($nt == 5) {
1718     unshift @nodes, @{$node->child_nodes};
1719     }
1720     }
1721     if ($phase eq 'in dts') {
1722 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
1723 wakaba 1.1 }
1724    
1725     if ($todo->{inline}) {
1726     for (@$new_todos) {
1727     $_->{inline} = 1;
1728     }
1729     }
1730     return ($new_todos);
1731     },
1732     };
1733    
1734     $Element->{$HTML_NS}->{dt} = {
1735     attrs_checker => $GetHTMLAttrsChecker->({}),
1736     checker => $HTMLStrictlyInlineChecker,
1737     };
1738    
1739     $Element->{$HTML_NS}->{dd} = {
1740     attrs_checker => $GetHTMLAttrsChecker->({}),
1741     checker => $Element->{$HTML_NS}->{li}->{checker},
1742     };
1743    
1744     $Element->{$HTML_NS}->{a} = {
1745     attrs_checker => sub {
1746     my ($self, $todo) = @_;
1747     my %attr;
1748     for my $attr (@{$todo->{node}->attributes}) {
1749     my $attr_ns = $attr->namespace_uri;
1750     $attr_ns = '' unless defined $attr_ns;
1751     my $attr_ln = $attr->manakai_local_name;
1752     my $checker;
1753     if ($attr_ns eq '') {
1754     $checker = {
1755     target => $HTMLTargetAttrChecker,
1756     href => $HTMLURIAttrChecker,
1757     ping => $HTMLSpaceURIsAttrChecker,
1758 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
1759 wakaba 1.1 media => $HTMLMQAttrChecker,
1760     hreflang => $HTMLLanguageTagAttrChecker,
1761     type => $HTMLIMTAttrChecker,
1762     }->{$attr_ln};
1763     if ($checker) {
1764     $attr{$attr_ln} = $attr;
1765     } else {
1766     $checker = $HTMLAttrChecker->{$attr_ln};
1767     }
1768     }
1769     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1770     || $AttrChecker->{$attr_ns}->{''};
1771     if ($checker) {
1772     $checker->($self, $attr) if ref $checker;
1773     } else {
1774     $self->{onerror}->(node => $attr, level => 'unsupported',
1775     type => 'attribute');
1776     ## ISSUE: No comformance createria for unknown attributes in the spec
1777     }
1778     }
1779    
1780 wakaba 1.4 if (defined $attr{href}) {
1781     $self->{has_hyperlink_element} = 1;
1782     } else {
1783 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
1784     if (defined $attr{$_}) {
1785     $self->{onerror}->(node => $attr{$_},
1786     type => 'attribute not allowed');
1787     }
1788     }
1789     }
1790     },
1791     checker => sub {
1792     my ($self, $todo) = @_;
1793    
1794     my $end = $self->_add_minuses ($HTMLInteractiveElements);
1795     my ($new_todos, $ch)
1796     = $HTMLSignificantInlineOrStrictlyInlineChecker->($self, $todo);
1797     push @$new_todos, $end;
1798    
1799     $_->{flag}->{has_a} = 1 for @$new_todos;
1800 wakaba 1.12 ## TODO: <a> -> <a href> (revision 1115)
1801 wakaba 1.1
1802     return ($new_todos, $ch);
1803     },
1804     };
1805    
1806     $Element->{$HTML_NS}->{q} = {
1807     attrs_checker => $GetHTMLAttrsChecker->({
1808     cite => $HTMLURIAttrChecker,
1809     }),
1810     checker => $HTMLInlineOrStrictlyInlineChecker,
1811     };
1812    
1813     $Element->{$HTML_NS}->{cite} = {
1814     attrs_checker => $GetHTMLAttrsChecker->({}),
1815     checker => $HTMLStrictlyInlineChecker,
1816     };
1817    
1818     $Element->{$HTML_NS}->{em} = {
1819     attrs_checker => $GetHTMLAttrsChecker->({}),
1820     checker => $HTMLInlineOrStrictlyInlineChecker,
1821     };
1822    
1823     $Element->{$HTML_NS}->{strong} = {
1824     attrs_checker => $GetHTMLAttrsChecker->({}),
1825     checker => $HTMLInlineOrStrictlyInlineChecker,
1826     };
1827    
1828     $Element->{$HTML_NS}->{small} = {
1829     attrs_checker => $GetHTMLAttrsChecker->({}),
1830     checker => $HTMLInlineOrStrictlyInlineChecker,
1831     };
1832    
1833     $Element->{$HTML_NS}->{m} = {
1834     attrs_checker => $GetHTMLAttrsChecker->({}),
1835     checker => $HTMLInlineOrStrictlyInlineChecker,
1836     };
1837    
1838     $Element->{$HTML_NS}->{dfn} = {
1839     attrs_checker => $GetHTMLAttrsChecker->({}),
1840     checker => sub {
1841     my ($self, $todo) = @_;
1842    
1843     my $end = $self->_add_minuses ({$HTML_NS => {dfn => 1}});
1844     my ($sib, $ch) = $HTMLStrictlyInlineChecker->($self, $todo);
1845     push @$sib, $end;
1846    
1847     my $node = $todo->{node};
1848     my $term = $node->get_attribute_ns (undef, 'title');
1849     unless (defined $term) {
1850     for my $child (@{$node->child_nodes}) {
1851     if ($child->node_type == 1) { # ELEMENT_NODE
1852     if (defined $term) {
1853     undef $term;
1854     last;
1855     } elsif ($child->manakai_local_name eq 'abbr') {
1856     my $nsuri = $child->namespace_uri;
1857     if (defined $nsuri and $nsuri eq $HTML_NS) {
1858     my $attr = $child->get_attribute_node_ns (undef, 'title');
1859     if ($attr) {
1860     $term = $attr->value;
1861     }
1862     }
1863     }
1864     } elsif ($child->node_type == 3 or $child->node_type == 4) {
1865     ## TEXT_NODE or CDATA_SECTION_NODE
1866     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
1867     next;
1868     }
1869     undef $term;
1870     last;
1871     }
1872     }
1873     unless (defined $term) {
1874     $term = $node->text_content;
1875     }
1876     }
1877     if ($self->{term}->{$term}) {
1878     $self->{onerror}->(node => $node, type => 'duplicate term');
1879     push @{$self->{term}->{$term}}, $node;
1880     } else {
1881     $self->{term}->{$term} = [$node];
1882     }
1883     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
1884     ## has |title|.
1885    
1886     return ($sib, $ch);
1887     },
1888     };
1889    
1890     $Element->{$HTML_NS}->{abbr} = {
1891     attrs_checker => $GetHTMLAttrsChecker->({
1892     ## NOTE: |title| has special semantics for |abbr|s, but is syntactically
1893     ## not different. The spec says that the |title| MAY be omitted
1894     ## if there is a |dfn| whose defining term is the abbreviation,
1895     ## but it does not prohibit |abbr| w/o |title| in other cases.
1896     }),
1897     checker => $HTMLStrictlyInlineChecker,
1898     };
1899    
1900     $Element->{$HTML_NS}->{time} = {
1901     attrs_checker => $GetHTMLAttrsChecker->({
1902     datetime => sub { 1 }, # checked in |checker|
1903     }),
1904     ## TODO: Write tests
1905     checker => sub {
1906     my ($self, $todo) = @_;
1907    
1908     my $attr = $todo->{node}->get_attribute_node_ns (undef, 'datetime');
1909     my $input;
1910     my $reg_sp;
1911     my $input_node;
1912     if ($attr) {
1913     $input = $attr->value;
1914     $reg_sp = qr/[\x09-\x0D\x20]*/;
1915     $input_node = $attr;
1916     } else {
1917     $input = $todo->{node}->text_content;
1918     $reg_sp = qr/\p{Zs}*/;
1919     $input_node = $todo->{node};
1920    
1921     ## ISSUE: What is the definition for "successfully extracts a date
1922     ## or time"? If the algorithm says the string is invalid but
1923     ## return some date or time, is it "successfully"?
1924     }
1925    
1926     my $hour;
1927     my $minute;
1928     my $second;
1929     if ($input =~ /
1930     \A
1931     [\x09-\x0D\x20]*
1932     ([0-9]+) # 1
1933     (?>
1934     -([0-9]+) # 2
1935     -([0-9]+) # 3
1936     [\x09-\x0D\x20]*
1937     (?>
1938     T
1939     [\x09-\x0D\x20]*
1940     )?
1941     ([0-9]+) # 4
1942     :([0-9]+) # 5
1943     (?>
1944     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
1945     )?
1946     [\x09-\x0D\x20]*
1947     (?>
1948     Z
1949     [\x09-\x0D\x20]*
1950     |
1951     [+-]([0-9]+):([0-9]+) # 7, 8
1952     [\x09-\x0D\x20]*
1953     )?
1954     \z
1955     |
1956     :([0-9]+) # 9
1957     (?>
1958     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
1959     )?
1960     [\x09-\x0D\x20]*\z
1961     )
1962     /x) {
1963     if (defined $2) { ## YYYY-MM-DD T? hh:mm
1964     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
1965     length $4 != 2 or length $5 != 2) {
1966     $self->{onerror}->(node => $input_node,
1967     type => 'dateortime:syntax error');
1968     }
1969    
1970     if (1 <= $2 and $2 <= 12) {
1971     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1972     if $3 < 1 or
1973     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
1974     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1975     if $2 == 2 and $3 == 29 and
1976     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
1977     } else {
1978     $self->{onerror}->(node => $input_node,
1979     type => 'datetime:bad month');
1980     }
1981    
1982     ($hour, $minute, $second) = ($4, $5, $6);
1983    
1984     if (defined $7) { ## [+-]hh:mm
1985     if (length $7 != 2 or length $8 != 2) {
1986     $self->{onerror}->(node => $input_node,
1987     type => 'dateortime:syntax error');
1988     }
1989    
1990     $self->{onerror}->(node => $input_node,
1991     type => 'datetime:bad timezone hour')
1992     if $7 > 23;
1993     $self->{onerror}->(node => $input_node,
1994     type => 'datetime:bad timezone minute')
1995     if $8 > 59;
1996     }
1997     } else { ## hh:mm
1998     if (length $1 != 2 or length $9 != 2) {
1999     $self->{onerror}->(node => $input_node,
2000     type => qq'dateortime:syntax error');
2001     }
2002    
2003     ($hour, $minute, $second) = ($1, $9, $10);
2004     }
2005    
2006     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
2007     if $hour > 23;
2008     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
2009     if $minute > 59;
2010    
2011     if (defined $second) { ## s
2012     ## NOTE: Integer part of second don't have to have length of two.
2013    
2014     if (substr ($second, 0, 1) eq '.') {
2015     $self->{onerror}->(node => $input_node,
2016     type => 'dateortime:syntax error');
2017     }
2018    
2019     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
2020     if $second >= 60;
2021     }
2022     } else {
2023     $self->{onerror}->(node => $input_node,
2024     type => 'dateortime:syntax error');
2025     }
2026    
2027     return $HTMLStrictlyInlineChecker->($self, $todo);
2028     },
2029     };
2030    
2031     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
2032     attrs_checker => $GetHTMLAttrsChecker->({
2033     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2034     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2035     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2036     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2037     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2038     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2039     }),
2040     checker => $HTMLStrictlyInlineChecker,
2041     };
2042    
2043     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
2044     attrs_checker => $GetHTMLAttrsChecker->({
2045     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
2046     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
2047     }),
2048     checker => $HTMLStrictlyInlineChecker,
2049     };
2050    
2051     $Element->{$HTML_NS}->{code} = {
2052     attrs_checker => $GetHTMLAttrsChecker->({}),
2053     ## NOTE: Though |title| has special semantics,
2054     ## syntatically same as the |title| as global attribute.
2055     checker => $HTMLInlineOrStrictlyInlineChecker,
2056     };
2057    
2058     $Element->{$HTML_NS}->{var} = {
2059     attrs_checker => $GetHTMLAttrsChecker->({}),
2060     ## NOTE: Though |title| has special semantics,
2061     ## syntatically same as the |title| as global attribute.
2062     checker => $HTMLStrictlyInlineChecker,
2063     };
2064    
2065     $Element->{$HTML_NS}->{samp} = {
2066     attrs_checker => $GetHTMLAttrsChecker->({}),
2067     ## NOTE: Though |title| has special semantics,
2068     ## syntatically same as the |title| as global attribute.
2069     checker => $HTMLInlineOrStrictlyInlineChecker,
2070     };
2071    
2072     $Element->{$HTML_NS}->{kbd} = {
2073     attrs_checker => $GetHTMLAttrsChecker->({}),
2074     checker => $HTMLStrictlyInlineChecker,
2075     };
2076    
2077     $Element->{$HTML_NS}->{sub} = {
2078     attrs_checker => $GetHTMLAttrsChecker->({}),
2079     checker => $HTMLStrictlyInlineChecker,
2080     };
2081    
2082     $Element->{$HTML_NS}->{sup} = {
2083     attrs_checker => $GetHTMLAttrsChecker->({}),
2084     checker => $HTMLStrictlyInlineChecker,
2085     };
2086    
2087     $Element->{$HTML_NS}->{span} = {
2088     attrs_checker => $GetHTMLAttrsChecker->({}),
2089     ## NOTE: Though |title| has special semantics,
2090     ## syntatically same as the |title| as global attribute.
2091     checker => $HTMLInlineOrStrictlyInlineChecker,
2092     };
2093    
2094     $Element->{$HTML_NS}->{i} = {
2095     attrs_checker => $GetHTMLAttrsChecker->({}),
2096     ## NOTE: Though |title| has special semantics,
2097     ## syntatically same as the |title| as global attribute.
2098     checker => $HTMLStrictlyInlineChecker,
2099     };
2100    
2101     $Element->{$HTML_NS}->{b} = {
2102     attrs_checker => $GetHTMLAttrsChecker->({}),
2103     checker => $HTMLStrictlyInlineChecker,
2104     };
2105    
2106     $Element->{$HTML_NS}->{bdo} = {
2107     attrs_checker => sub {
2108     my ($self, $todo) = @_;
2109     $GetHTMLAttrsChecker->({})->($self, $todo);
2110     unless ($todo->{node}->has_attribute_ns (undef, 'dir')) {
2111     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:dir');
2112     }
2113     },
2114     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
2115     checker => $HTMLStrictlyInlineChecker,
2116     };
2117    
2118     $Element->{$HTML_NS}->{ins} = {
2119     attrs_checker => $GetHTMLAttrsChecker->({
2120     cite => $HTMLURIAttrChecker,
2121     datetime => $HTMLDatetimeAttrChecker,
2122     }),
2123     checker => $HTMLTransparentChecker,
2124     };
2125    
2126     $Element->{$HTML_NS}->{del} = {
2127     attrs_checker => $GetHTMLAttrsChecker->({
2128     cite => $HTMLURIAttrChecker,
2129     datetime => $HTMLDatetimeAttrChecker,
2130     }),
2131     checker => sub {
2132     my ($self, $todo) = @_;
2133    
2134     my $parent = $todo->{node}->manakai_parent_element;
2135     if (defined $parent) {
2136     my $nsuri = $parent->namespace_uri;
2137     $nsuri = '' unless defined $nsuri;
2138     my $ln = $parent->manakai_local_name;
2139     my $eldef = $Element->{$nsuri}->{$ln} ||
2140     $Element->{$nsuri}->{''} ||
2141     $ElementDefault;
2142     return $eldef->{checker}->($self, $todo);
2143     } else {
2144     return $HTMLBlockOrInlineChecker->($self, $todo);
2145     }
2146     },
2147     };
2148    
2149     ## TODO: figure
2150 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
2151 wakaba 1.1
2152 wakaba 1.4 ## TODO: |alt|
2153 wakaba 1.1 $Element->{$HTML_NS}->{img} = {
2154     attrs_checker => sub {
2155     my ($self, $todo) = @_;
2156     $GetHTMLAttrsChecker->({
2157     alt => sub { }, ## NOTE: No syntactical requirement
2158     src => $HTMLURIAttrChecker,
2159     usemap => $HTMLUsemapAttrChecker,
2160     ismap => sub {
2161     my ($self, $attr, $parent_todo) = @_;
2162     if (not $todo->{flag}->{has_a}) {
2163     $self->{onerror}->(node => $attr, type => 'attribute not allowed');
2164     }
2165     $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_todo);
2166     },
2167     ## TODO: height
2168     ## TODO: width
2169     })->($self, $todo);
2170     unless ($todo->{node}->has_attribute_ns (undef, 'alt')) {
2171     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:alt');
2172     }
2173     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2174     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:src');
2175     }
2176     },
2177     checker => $HTMLEmptyChecker,
2178     };
2179    
2180     $Element->{$HTML_NS}->{iframe} = {
2181     attrs_checker => $GetHTMLAttrsChecker->({
2182     src => $HTMLURIAttrChecker,
2183     }),
2184     checker => $HTMLTextChecker,
2185     };
2186    
2187     $Element->{$HTML_NS}->{embed} = {
2188     attrs_checker => sub {
2189     my ($self, $todo) = @_;
2190     my $has_src;
2191     for my $attr (@{$todo->{node}->attributes}) {
2192     my $attr_ns = $attr->namespace_uri;
2193     $attr_ns = '' unless defined $attr_ns;
2194     my $attr_ln = $attr->manakai_local_name;
2195     my $checker;
2196     if ($attr_ns eq '') {
2197     if ($attr_ln eq 'src') {
2198     $checker = $HTMLURIAttrChecker;
2199     $has_src = 1;
2200     } elsif ($attr_ln eq 'type') {
2201     $checker = $HTMLIMTAttrChecker;
2202     } else {
2203     ## TODO: height
2204     ## TODO: width
2205     $checker = $HTMLAttrChecker->{$attr_ln}
2206     || sub { }; ## NOTE: Any local attribute is ok.
2207     }
2208     }
2209     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2210     || $AttrChecker->{$attr_ns}->{''};
2211     if ($checker) {
2212     $checker->($self, $attr);
2213     } else {
2214     $self->{onerror}->(node => $attr, level => 'unsupported',
2215     type => 'attribute');
2216     ## ISSUE: No comformance createria for global attributes in the spec
2217     }
2218     }
2219    
2220     unless ($has_src) {
2221     $self->{onerror}->(node => $todo->{node},
2222     type => 'attribute missing:src');
2223     }
2224     },
2225     checker => $HTMLEmptyChecker,
2226     };
2227    
2228     $Element->{$HTML_NS}->{object} = {
2229     attrs_checker => sub {
2230     my ($self, $todo) = @_;
2231     $GetHTMLAttrsChecker->({
2232     data => $HTMLURIAttrChecker,
2233     type => $HTMLIMTAttrChecker,
2234     usemap => $HTMLUsemapAttrChecker,
2235     ## TODO: width
2236     ## TODO: height
2237     })->($self, $todo);
2238     unless ($todo->{node}->has_attribute_ns (undef, 'data')) {
2239     unless ($todo->{node}->has_attribute_ns (undef, 'type')) {
2240     $self->{onerror}->(node => $todo->{node},
2241     type => 'attribute missing:data|type');
2242     }
2243     }
2244     },
2245     checker => $ElementDefault->{checker}, ## TODO
2246 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
2247 wakaba 1.1 };
2248    
2249     $Element->{$HTML_NS}->{param} = {
2250     attrs_checker => sub {
2251     my ($self, $todo) = @_;
2252     $GetHTMLAttrsChecker->({
2253     name => sub { },
2254     value => sub { },
2255     })->($self, $todo);
2256     unless ($todo->{node}->has_attribute_ns (undef, 'name')) {
2257     $self->{onerror}->(node => $todo->{node},
2258     type => 'attribute missing:name');
2259     }
2260     unless ($todo->{node}->has_attribute_ns (undef, 'value')) {
2261     $self->{onerror}->(node => $todo->{node},
2262     type => 'attribute missing:value');
2263     }
2264     },
2265     checker => $HTMLEmptyChecker,
2266     };
2267    
2268     $Element->{$HTML_NS}->{video} = {
2269     attrs_checker => $GetHTMLAttrsChecker->({
2270     src => $HTMLURIAttrChecker,
2271     ## TODO: start, loopstart, loopend, end
2272     ## ISSUE: they MUST be "value time offset"s. Value?
2273 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
2274 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2275     controls => $GetHTMLBooleanAttrChecker->('controls'),
2276 wakaba 1.11 poster => $HTMLURIAttrChecker, ## TODO: not for audio!
2277     ## TODO: width, height (not for audio!)
2278 wakaba 1.1 }),
2279     checker => sub {
2280     my ($self, $todo) = @_;
2281    
2282     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2283     return $HTMLBlockOrInlineChecker->($self, $todo);
2284     } else {
2285     return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
2286     ->($self, $todo);
2287     }
2288     },
2289     };
2290    
2291     $Element->{$HTML_NS}->{audio} = {
2292     attrs_checker => $Element->{$HTML_NS}->{video}->{attrs_checker},
2293     checker => $Element->{$HTML_NS}->{video}->{checker},
2294     };
2295    
2296     $Element->{$HTML_NS}->{source} = {
2297     attrs_checker => sub {
2298     my ($self, $todo) = @_;
2299     $GetHTMLAttrsChecker->({
2300     src => $HTMLURIAttrChecker,
2301     type => $HTMLIMTAttrChecker,
2302     media => $HTMLMQAttrChecker,
2303     })->($self, $todo);
2304     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2305     $self->{onerror}->(node => $todo->{node},
2306     type => 'attribute missing:src');
2307     }
2308     },
2309     checker => $HTMLEmptyChecker,
2310     };
2311    
2312     $Element->{$HTML_NS}->{canvas} = {
2313     attrs_checker => $GetHTMLAttrsChecker->({
2314     height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2315     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2316     }),
2317     checker => $HTMLInlineChecker,
2318     };
2319    
2320     $Element->{$HTML_NS}->{map} = {
2321 wakaba 1.4 attrs_checker => sub {
2322     my ($self, $todo) = @_;
2323     my $has_id;
2324     $GetHTMLAttrsChecker->({
2325     id => sub {
2326     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2327     my ($self, $attr) = @_;
2328     my $value = $attr->value;
2329     if (length $value > 0) {
2330     if ($self->{id}->{$value}) {
2331     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2332     push @{$self->{id}->{$value}}, $attr;
2333     } else {
2334     $self->{id}->{$value} = [$attr];
2335     }
2336 wakaba 1.1 } else {
2337 wakaba 1.4 ## NOTE: MUST contain at least one character
2338     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2339 wakaba 1.1 }
2340 wakaba 1.4 if ($value =~ /[\x09-\x0D\x20]/) {
2341     $self->{onerror}->(node => $attr, type => 'space in ID');
2342     }
2343     $self->{map}->{$value} ||= $attr;
2344     $has_id = 1;
2345     },
2346     })->($self, $todo);
2347     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:id')
2348     unless $has_id;
2349     },
2350 wakaba 1.1 checker => $HTMLBlockChecker,
2351     };
2352    
2353     $Element->{$HTML_NS}->{area} = {
2354     attrs_checker => sub {
2355     my ($self, $todo) = @_;
2356     my %attr;
2357     my $coords;
2358     for my $attr (@{$todo->{node}->attributes}) {
2359     my $attr_ns = $attr->namespace_uri;
2360     $attr_ns = '' unless defined $attr_ns;
2361     my $attr_ln = $attr->manakai_local_name;
2362     my $checker;
2363     if ($attr_ns eq '') {
2364     $checker = {
2365     alt => sub { },
2366     ## NOTE: |alt| value has no conformance creteria.
2367     shape => $GetHTMLEnumeratedAttrChecker->({
2368     circ => -1, circle => 1,
2369     default => 1,
2370     poly => 1, polygon => -1,
2371     rect => 1, rectangle => -1,
2372     }),
2373     coords => sub {
2374     my ($self, $attr) = @_;
2375     my $value = $attr->value;
2376     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
2377     $coords = [split /,/, $value];
2378     } else {
2379     $self->{onerror}->(node => $attr,
2380     type => 'coords:syntax error');
2381     }
2382     },
2383     target => $HTMLTargetAttrChecker,
2384     href => $HTMLURIAttrChecker,
2385     ping => $HTMLSpaceURIsAttrChecker,
2386 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
2387 wakaba 1.1 media => $HTMLMQAttrChecker,
2388     hreflang => $HTMLLanguageTagAttrChecker,
2389     type => $HTMLIMTAttrChecker,
2390     }->{$attr_ln};
2391     if ($checker) {
2392     $attr{$attr_ln} = $attr;
2393     } else {
2394     $checker = $HTMLAttrChecker->{$attr_ln};
2395     }
2396     }
2397     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2398     || $AttrChecker->{$attr_ns}->{''};
2399     if ($checker) {
2400     $checker->($self, $attr) if ref $checker;
2401     } else {
2402     $self->{onerror}->(node => $attr, level => 'unsupported',
2403     type => 'attribute');
2404     ## ISSUE: No comformance createria for unknown attributes in the spec
2405     }
2406     }
2407    
2408     if (defined $attr{href}) {
2409 wakaba 1.4 $self->{has_hyperlink_element} = 1;
2410 wakaba 1.1 unless (defined $attr{alt}) {
2411     $self->{onerror}->(node => $todo->{node},
2412     type => 'attribute missing:alt');
2413     }
2414     } else {
2415     for (qw/target ping rel media hreflang type alt/) {
2416     if (defined $attr{$_}) {
2417     $self->{onerror}->(node => $attr{$_},
2418     type => 'attribute not allowed');
2419     }
2420     }
2421     }
2422    
2423     my $shape = 'rectangle';
2424     if (defined $attr{shape}) {
2425     $shape = {
2426     circ => 'circle', circle => 'circle',
2427     default => 'default',
2428     poly => 'polygon', polygon => 'polygon',
2429     rect => 'rectangle', rectangle => 'rectangle',
2430     }->{lc $attr{shape}->value} || 'rectangle';
2431     ## TODO: ASCII lowercase?
2432     }
2433    
2434     if ($shape eq 'circle') {
2435     if (defined $attr{coords}) {
2436     if (defined $coords) {
2437     if (@$coords == 3) {
2438     if ($coords->[2] < 0) {
2439     $self->{onerror}->(node => $attr{coords},
2440     type => 'coords:out of range:2');
2441     }
2442     } else {
2443     $self->{onerror}->(node => $attr{coords},
2444     type => 'coords:number:3:'.@$coords);
2445     }
2446     } else {
2447     ## NOTE: A syntax error has been reported.
2448     }
2449     } else {
2450     $self->{onerror}->(node => $todo->{node},
2451     type => 'attribute missing:coords');
2452     }
2453     } elsif ($shape eq 'default') {
2454     if (defined $attr{coords}) {
2455     $self->{onerror}->(node => $attr{coords},
2456     type => 'attribute not allowed');
2457     }
2458     } elsif ($shape eq 'polygon') {
2459     if (defined $attr{coords}) {
2460     if (defined $coords) {
2461     if (@$coords >= 6) {
2462     unless (@$coords % 2 == 0) {
2463     $self->{onerror}->(node => $attr{coords},
2464     type => 'coords:number:even:'.@$coords);
2465     }
2466     } else {
2467     $self->{onerror}->(node => $attr{coords},
2468     type => 'coords:number:>=6:'.@$coords);
2469     }
2470     } else {
2471     ## NOTE: A syntax error has been reported.
2472     }
2473     } else {
2474     $self->{onerror}->(node => $todo->{node},
2475     type => 'attribute missing:coords');
2476     }
2477     } elsif ($shape eq 'rectangle') {
2478     if (defined $attr{coords}) {
2479     if (defined $coords) {
2480     if (@$coords == 4) {
2481     unless ($coords->[0] < $coords->[2]) {
2482     $self->{onerror}->(node => $attr{coords},
2483     type => 'coords:out of range:0');
2484     }
2485     unless ($coords->[1] < $coords->[3]) {
2486     $self->{onerror}->(node => $attr{coords},
2487     type => 'coords:out of range:1');
2488     }
2489     } else {
2490     $self->{onerror}->(node => $attr{coords},
2491     type => 'coords:number:4:'.@$coords);
2492     }
2493     } else {
2494     ## NOTE: A syntax error has been reported.
2495     }
2496     } else {
2497     $self->{onerror}->(node => $todo->{node},
2498     type => 'attribute missing:coords');
2499     }
2500     }
2501     },
2502     checker => $HTMLEmptyChecker,
2503     };
2504     ## TODO: only in map
2505    
2506     $Element->{$HTML_NS}->{table} = {
2507     attrs_checker => $GetHTMLAttrsChecker->({}),
2508     checker => sub {
2509     my ($self, $todo) = @_;
2510     my $el = $todo->{node};
2511     my $new_todos = [];
2512     my @nodes = (@{$el->child_nodes});
2513    
2514     my $phase = 'before caption';
2515     my $has_tfoot;
2516     while (@nodes) {
2517     my $node = shift @nodes;
2518     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2519    
2520     my $nt = $node->node_type;
2521     if ($nt == 1) {
2522     my $node_ns = $node->namespace_uri;
2523     $node_ns = '' unless defined $node_ns;
2524     my $node_ln = $node->manakai_local_name;
2525     ## NOTE: |minuses| list is not checked since redundant
2526 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2527     #
2528     } elsif ($phase eq 'in tbodys') {
2529 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2530     #$phase = 'in tbodys';
2531     } elsif (not $has_tfoot and
2532     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2533     $phase = 'after tfoot';
2534     $has_tfoot = 1;
2535     } else {
2536     $self->{onerror}->(node => $node, type => 'element not allowed');
2537     }
2538     } elsif ($phase eq 'in trs') {
2539     if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2540     #$phase = 'in trs';
2541     } elsif (not $has_tfoot and
2542     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2543     $phase = 'after tfoot';
2544     $has_tfoot = 1;
2545     } else {
2546     $self->{onerror}->(node => $node, type => 'element not allowed');
2547     }
2548     } elsif ($phase eq 'after thead') {
2549     if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2550     $phase = 'in tbodys';
2551     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2552     $phase = 'in trs';
2553     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2554     $phase = 'in tbodys';
2555     $has_tfoot = 1;
2556     } else {
2557     $self->{onerror}->(node => $node, type => 'element not allowed');
2558     }
2559     } elsif ($phase eq 'in colgroup') {
2560     if ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2561     $phase = 'in colgroup';
2562     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2563     $phase = 'after thead';
2564     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2565     $phase = 'in tbodys';
2566     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2567     $phase = 'in trs';
2568     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2569     $phase = 'in tbodys';
2570     $has_tfoot = 1;
2571     } else {
2572     $self->{onerror}->(node => $node, type => 'element not allowed');
2573     }
2574     } elsif ($phase eq 'before caption') {
2575     if ($node_ns eq $HTML_NS and $node_ln eq 'caption') {
2576     $phase = 'in colgroup';
2577     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2578     $phase = 'in colgroup';
2579     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2580     $phase = 'after thead';
2581     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2582     $phase = 'in tbodys';
2583     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2584     $phase = 'in trs';
2585     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2586     $phase = 'in tbodys';
2587     $has_tfoot = 1;
2588     } else {
2589     $self->{onerror}->(node => $node, type => 'element not allowed');
2590     }
2591     } else { # after tfoot
2592     $self->{onerror}->(node => $node, type => 'element not allowed');
2593     }
2594     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2595     unshift @nodes, @$sib;
2596     push @$new_todos, @$ch;
2597     } elsif ($nt == 3 or $nt == 4) {
2598     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2599     $self->{onerror}->(node => $node, type => 'character not allowed');
2600     }
2601     } elsif ($nt == 5) {
2602     unshift @nodes, @{$node->child_nodes};
2603     }
2604     }
2605    
2606     ## Table model errors
2607     require Whatpm::HTMLTable;
2608     Whatpm::HTMLTable->form_table ($todo->{node}, sub {
2609     my %opt = @_;
2610     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
2611     });
2612     push @{$self->{return}->{table}}, $todo->{node};
2613    
2614     return ($new_todos);
2615     },
2616     };
2617    
2618     $Element->{$HTML_NS}->{caption} = {
2619     attrs_checker => $GetHTMLAttrsChecker->({}),
2620     checker => $HTMLSignificantStrictlyInlineChecker,
2621     };
2622    
2623     $Element->{$HTML_NS}->{colgroup} = {
2624     attrs_checker => $GetHTMLAttrsChecker->({
2625     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2626     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
2627     ## TODO: "attribute not supported" if |col|.
2628     ## ISSUE: MUST NOT if any |col|?
2629     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
2630     }),
2631     checker => sub {
2632     my ($self, $todo) = @_;
2633     my $el = $todo->{node};
2634     my $new_todos = [];
2635     my @nodes = (@{$el->child_nodes});
2636    
2637     while (@nodes) {
2638     my $node = shift @nodes;
2639     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2640    
2641     my $nt = $node->node_type;
2642     if ($nt == 1) {
2643     my $node_ns = $node->namespace_uri;
2644     $node_ns = '' unless defined $node_ns;
2645     my $node_ln = $node->manakai_local_name;
2646     ## NOTE: |minuses| list is not checked since redundant
2647 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2648     #
2649     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'col')) {
2650 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
2651     }
2652     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2653     unshift @nodes, @$sib;
2654     push @$new_todos, @$ch;
2655     } elsif ($nt == 3 or $nt == 4) {
2656     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2657     $self->{onerror}->(node => $node, type => 'character not allowed');
2658     }
2659     } elsif ($nt == 5) {
2660     unshift @nodes, @{$node->child_nodes};
2661     }
2662     }
2663     return ($new_todos);
2664     },
2665     };
2666    
2667     $Element->{$HTML_NS}->{col} = {
2668     attrs_checker => $GetHTMLAttrsChecker->({
2669     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2670     }),
2671     checker => $HTMLEmptyChecker,
2672     };
2673    
2674     $Element->{$HTML_NS}->{tbody} = {
2675     attrs_checker => $GetHTMLAttrsChecker->({}),
2676     checker => sub {
2677     my ($self, $todo) = @_;
2678     my $el = $todo->{node};
2679     my $new_todos = [];
2680     my @nodes = (@{$el->child_nodes});
2681    
2682     my $has_tr;
2683     while (@nodes) {
2684     my $node = shift @nodes;
2685     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2686    
2687     my $nt = $node->node_type;
2688     if ($nt == 1) {
2689     my $node_ns = $node->namespace_uri;
2690     $node_ns = '' unless defined $node_ns;
2691     my $node_ln = $node->manakai_local_name;
2692     ## NOTE: |minuses| list is not checked since redundant
2693 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2694     #
2695     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2696 wakaba 1.1 $has_tr = 1;
2697     } else {
2698     $self->{onerror}->(node => $node, type => 'element not allowed');
2699     }
2700     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2701     unshift @nodes, @$sib;
2702     push @$new_todos, @$ch;
2703     } elsif ($nt == 3 or $nt == 4) {
2704     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2705     $self->{onerror}->(node => $node, type => 'character not allowed');
2706     }
2707     } elsif ($nt == 5) {
2708     unshift @nodes, @{$node->child_nodes};
2709     }
2710     }
2711     unless ($has_tr) {
2712     $self->{onerror}->(node => $el, type => 'child element missing:tr');
2713     }
2714     return ($new_todos);
2715     },
2716     };
2717    
2718     $Element->{$HTML_NS}->{thead} = {
2719     attrs_checker => $GetHTMLAttrsChecker->({}),
2720     checker => $Element->{$HTML_NS}->{tbody}->{checker},
2721     };
2722    
2723     $Element->{$HTML_NS}->{tfoot} = {
2724     attrs_checker => $GetHTMLAttrsChecker->({}),
2725     checker => $Element->{$HTML_NS}->{tbody}->{checker},
2726     };
2727    
2728     $Element->{$HTML_NS}->{tr} = {
2729     attrs_checker => $GetHTMLAttrsChecker->({}),
2730     checker => sub {
2731     my ($self, $todo) = @_;
2732     my $el = $todo->{node};
2733     my $new_todos = [];
2734     my @nodes = (@{$el->child_nodes});
2735    
2736     my $has_td;
2737     while (@nodes) {
2738     my $node = shift @nodes;
2739     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2740    
2741     my $nt = $node->node_type;
2742     if ($nt == 1) {
2743     my $node_ns = $node->namespace_uri;
2744     $node_ns = '' unless defined $node_ns;
2745     my $node_ln = $node->manakai_local_name;
2746     ## NOTE: |minuses| list is not checked since redundant
2747 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2748     #
2749     } elsif ($node_ns eq $HTML_NS and
2750     ($node_ln eq 'td' or $node_ln eq 'th')) {
2751 wakaba 1.1 $has_td = 1;
2752     } else {
2753     $self->{onerror}->(node => $node, type => 'element not allowed');
2754     }
2755     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2756     unshift @nodes, @$sib;
2757     push @$new_todos, @$ch;
2758     } elsif ($nt == 3 or $nt == 4) {
2759     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2760     $self->{onerror}->(node => $node, type => 'character not allowed');
2761     }
2762     } elsif ($nt == 5) {
2763     unshift @nodes, @{$node->child_nodes};
2764     }
2765     }
2766     unless ($has_td) {
2767     $self->{onerror}->(node => $el, type => 'child element missing:td|th');
2768     }
2769     return ($new_todos);
2770     },
2771     };
2772    
2773     $Element->{$HTML_NS}->{td} = {
2774     attrs_checker => $GetHTMLAttrsChecker->({
2775     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2776     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2777     }),
2778     checker => $HTMLBlockOrInlineChecker,
2779     };
2780    
2781     $Element->{$HTML_NS}->{th} = {
2782     attrs_checker => $GetHTMLAttrsChecker->({
2783     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2784     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2785     scope => $GetHTMLEnumeratedAttrChecker
2786     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
2787     }),
2788     checker => $HTMLBlockOrInlineChecker,
2789     };
2790    
2791     ## TODO: forms
2792 wakaba 1.8 ## TODO: Tests for <nest/> in form elements
2793 wakaba 1.1
2794     $Element->{$HTML_NS}->{script} = {
2795 wakaba 1.9 attrs_checker => $GetHTMLAttrsChecker->({
2796 wakaba 1.1 src => $HTMLURIAttrChecker,
2797     defer => $GetHTMLBooleanAttrChecker->('defer'),
2798     async => $GetHTMLBooleanAttrChecker->('async'),
2799     type => $HTMLIMTAttrChecker,
2800 wakaba 1.9 }),
2801 wakaba 1.1 checker => sub {
2802     my ($self, $todo) = @_;
2803    
2804     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2805     return $HTMLEmptyChecker->($self, $todo);
2806     } else {
2807     ## NOTE: No content model conformance in HTML5 spec.
2808     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
2809     my $language = $todo->{node}->get_attribute_ns (undef, 'language');
2810     if ((defined $type and $type eq '') or
2811     (defined $language and $language eq '')) {
2812     $type = 'text/javascript';
2813     } elsif (defined $type) {
2814     #
2815     } elsif (defined $language) {
2816     $type = 'text/' . $language;
2817     } else {
2818     $type = 'text/javascript';
2819     }
2820     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
2821     type => 'script:'.$type); ## TODO: $type normalization
2822     return $AnyChecker->($self, $todo);
2823     }
2824     },
2825     };
2826    
2827     ## NOTE: When script is disabled.
2828     $Element->{$HTML_NS}->{noscript} = {
2829 wakaba 1.3 attrs_checker => sub {
2830     my ($self, $todo) = @_;
2831    
2832     ## NOTE: This check is inserted in |attrs_checker|, rather than |checker|,
2833     ## since the later is not invoked when the |noscript| is used as a
2834     ## transparent element.
2835     unless ($todo->{node}->owner_document->manakai_is_html) {
2836     $self->{onerror}->(node => $todo->{node}, type => 'in XML:noscript');
2837     }
2838    
2839     $GetHTMLAttrsChecker->({})->($self, $todo);
2840     },
2841 wakaba 1.1 checker => sub {
2842     my ($self, $todo) = @_;
2843    
2844 wakaba 1.3 if ($todo->{flag}->{in_head}) {
2845     my $new_todos = [];
2846     my @nodes = (@{$todo->{node}->child_nodes});
2847    
2848     while (@nodes) {
2849     my $node = shift @nodes;
2850     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2851    
2852     my $nt = $node->node_type;
2853     if ($nt == 1) {
2854     my $node_ns = $node->namespace_uri;
2855     $node_ns = '' unless defined $node_ns;
2856     my $node_ln = $node->manakai_local_name;
2857 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2858     #
2859     } elsif ($node_ns eq $HTML_NS) {
2860 wakaba 1.3 if ({link => 1, style => 1}->{$node_ln}) {
2861     #
2862     } elsif ($node_ln eq 'meta') {
2863 wakaba 1.5 if ($node->has_attribute_ns (undef, 'name')) {
2864     #
2865 wakaba 1.3 } else {
2866 wakaba 1.5 $self->{onerror}->(node => $node,
2867     type => 'element not allowed');
2868 wakaba 1.3 }
2869     } else {
2870     $self->{onerror}->(node => $node, type => 'element not allowed');
2871     }
2872     } else {
2873     $self->{onerror}->(node => $node, type => 'element not allowed');
2874     }
2875    
2876     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2877     unshift @nodes, @$sib;
2878     push @$new_todos, @$ch;
2879     } elsif ($nt == 3 or $nt == 4) {
2880     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2881     $self->{onerror}->(node => $node, type => 'character not allowed');
2882     }
2883     } elsif ($nt == 5) {
2884     unshift @nodes, @{$node->child_nodes};
2885     }
2886     }
2887     return ($new_todos);
2888     } else {
2889     my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}});
2890     my ($sib, $ch) = $HTMLBlockOrInlineChecker->($self, $todo);
2891     push @$sib, $end;
2892     return ($sib, $ch);
2893     }
2894 wakaba 1.1 },
2895     };
2896 wakaba 1.3
2897     ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
2898 wakaba 1.1
2899     $Element->{$HTML_NS}->{'event-source'} = {
2900     attrs_checker => $GetHTMLAttrsChecker->({
2901     src => $HTMLURIAttrChecker,
2902     }),
2903     checker => $HTMLEmptyChecker,
2904     };
2905    
2906     $Element->{$HTML_NS}->{details} = {
2907     attrs_checker => $GetHTMLAttrsChecker->({
2908     open => $GetHTMLBooleanAttrChecker->('open'),
2909     }),
2910     checker => sub {
2911     my ($self, $todo) = @_;
2912    
2913     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2914     my ($sib, $ch)
2915     = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
2916     ->($self, $todo);
2917     push @$sib, $end;
2918     return ($sib, $ch);
2919     },
2920     };
2921    
2922     $Element->{$HTML_NS}->{datagrid} = {
2923     attrs_checker => $GetHTMLAttrsChecker->({
2924     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2925     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
2926     }),
2927     checker => sub {
2928     my ($self, $todo) = @_;
2929     my $el = $todo->{node};
2930     my $new_todos = [];
2931     my @nodes = (@{$el->child_nodes});
2932    
2933     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2934    
2935     ## Block-table Block* | table | select | datalist | Empty
2936     my $mode = 'any';
2937     while (@nodes) {
2938     my $node = shift @nodes;
2939     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2940    
2941     my $nt = $node->node_type;
2942     if ($nt == 1) {
2943     my $node_ns = $node->namespace_uri;
2944     $node_ns = '' unless defined $node_ns;
2945     my $node_ln = $node->manakai_local_name;
2946     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
2947 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2948     #
2949     } elsif ($mode eq 'block') {
2950 wakaba 1.1 $not_allowed = 1
2951     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
2952     } elsif ($mode eq 'any') {
2953     if ($node_ns eq $HTML_NS and
2954     {table => 1, select => 1, datalist => 1}->{$node_ln}) {
2955     $mode = 'none';
2956     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
2957     $mode = 'block';
2958     } else {
2959     $not_allowed = 1;
2960     }
2961     } else {
2962     $not_allowed = 1;
2963     }
2964     $self->{onerror}->(node => $node, type => 'element not allowed')
2965     if $not_allowed;
2966     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2967     unshift @nodes, @$sib;
2968     push @$new_todos, @$ch;
2969     } elsif ($nt == 3 or $nt == 4) {
2970     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2971     $self->{onerror}->(node => $node, type => 'character not allowed');
2972     }
2973     } elsif ($nt == 5) {
2974     unshift @nodes, @{$node->child_nodes};
2975     }
2976     }
2977    
2978     push @$new_todos, $end;
2979     return ($new_todos);
2980     },
2981     };
2982    
2983     $Element->{$HTML_NS}->{command} = {
2984     attrs_checker => $GetHTMLAttrsChecker->({
2985     checked => $GetHTMLBooleanAttrChecker->('checked'),
2986     default => $GetHTMLBooleanAttrChecker->('default'),
2987     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2988     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
2989     icon => $HTMLURIAttrChecker,
2990     label => sub { }, ## NOTE: No conformance creteria
2991     radiogroup => sub { }, ## NOTE: No conformance creteria
2992     ## NOTE: |title| has special semantics, but no syntactical difference
2993     type => sub {
2994     my ($self, $attr) = @_;
2995     my $value = $attr->value;
2996     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
2997     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
2998     }
2999     },
3000     }),
3001     checker => $HTMLEmptyChecker,
3002     };
3003    
3004     $Element->{$HTML_NS}->{menu} = {
3005     attrs_checker => $GetHTMLAttrsChecker->({
3006     autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
3007     id => sub {
3008     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
3009     my ($self, $attr) = @_;
3010     my $value = $attr->value;
3011     if (length $value > 0) {
3012     if ($self->{id}->{$value}) {
3013     $self->{onerror}->(node => $attr, type => 'duplicate ID');
3014     push @{$self->{id}->{$value}}, $attr;
3015     } else {
3016     $self->{id}->{$value} = [$attr];
3017     }
3018     } else {
3019     ## NOTE: MUST contain at least one character
3020     $self->{onerror}->(node => $attr, type => 'empty attribute value');
3021     }
3022     if ($value =~ /[\x09-\x0D\x20]/) {
3023     $self->{onerror}->(node => $attr, type => 'space in ID');
3024     }
3025     $self->{menu}->{$value} ||= $attr;
3026     ## ISSUE: <menu id=""><p contextmenu=""> match?
3027     },
3028     label => sub { }, ## NOTE: No conformance creteria
3029     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
3030     }),
3031     checker => sub {
3032     my ($self, $todo) = @_;
3033     my $el = $todo->{node};
3034     my $new_todos = [];
3035     my @nodes = (@{$el->child_nodes});
3036    
3037     my $content = 'li or inline';
3038     while (@nodes) {
3039     my $node = shift @nodes;
3040     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3041    
3042     my $nt = $node->node_type;
3043     if ($nt == 1) {
3044     my $node_ns = $node->namespace_uri;
3045     $node_ns = '' unless defined $node_ns;
3046     my $node_ln = $node->manakai_local_name;
3047     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
3048 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3049     #
3050     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'li') {
3051 wakaba 1.1 if ($content eq 'inline') {
3052     $not_allowed = 1;
3053     } elsif ($content eq 'li or inline') {
3054     $content = 'li';
3055     }
3056     } else {
3057     if ($HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
3058     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln}) {
3059     $content = 'inline';
3060     } else {
3061     $not_allowed = 1;
3062     }
3063     }
3064     $self->{onerror}->(node => $node, type => 'element not allowed')
3065     if $not_allowed;
3066     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3067     unshift @nodes, @$sib;
3068     push @$new_todos, @$ch;
3069     } elsif ($nt == 3 or $nt == 4) {
3070     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3071     if ($content eq 'li') {
3072     $self->{onerror}->(node => $node, type => 'character not allowed');
3073     } elsif ($content eq 'li or inline') {
3074     $content = 'inline';
3075     }
3076     }
3077     } elsif ($nt == 5) {
3078     unshift @nodes, @{$node->child_nodes};
3079     }
3080     }
3081    
3082     for (@$new_todos) {
3083     $_->{inline} = 1;
3084     }
3085     return ($new_todos);
3086     },
3087 wakaba 1.8 };
3088    
3089     $Element->{$HTML_NS}->{datatemplate} = {
3090     attrs_checker => $GetHTMLAttrsChecker->({}),
3091     checker => sub {
3092     my ($self, $todo) = @_;
3093     my $el = $todo->{node};
3094     my $new_todos = [];
3095     my @nodes = (@{$el->child_nodes});
3096    
3097     while (@nodes) {
3098     my $node = shift @nodes;
3099     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3100    
3101     my $nt = $node->node_type;
3102     if ($nt == 1) {
3103     my $node_ns = $node->namespace_uri;
3104     $node_ns = '' unless defined $node_ns;
3105     my $node_ln = $node->manakai_local_name;
3106     ## NOTE: |minuses| list is not checked since redundant
3107     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3108     #
3109     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'rule')) {
3110     $self->{onerror}->(node => $node,
3111     type => 'element not allowed:datatemplate');
3112     }
3113     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3114     unshift @nodes, @$sib;
3115     push @$new_todos, @$ch;
3116     } elsif ($nt == 3 or $nt == 4) {
3117     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3118     $self->{onerror}->(node => $node, type => 'character not allowed');
3119     }
3120     } elsif ($nt == 5) {
3121     unshift @nodes, @{$node->child_nodes};
3122     }
3123     }
3124     return ($new_todos);
3125     },
3126     is_xml_root => 1,
3127     };
3128    
3129     $Element->{$HTML_NS}->{rule} = {
3130     attrs_checker => $GetHTMLAttrsChecker->({
3131     ## TODO: |condition| attribute
3132     mode => $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker,
3133     }),
3134     checker => sub {
3135     my ($self, $todo) = @_;
3136    
3137     my $end = $self->_add_pluses ({$HTML_NS => {nest => 1}});
3138     my ($sib, $ch) = $AnyChecker->($self, $todo);
3139     push @$sib, $end;
3140     return ($sib, $ch);
3141     },
3142     ## NOTE: "MAY be anything that, when the parent |datatemplate|
3143     ## is applied to some conforming data, results in a conforming DOM tree.":
3144     ## We don't check against this.
3145     };
3146    
3147     $Element->{$HTML_NS}->{nest} = {
3148     attrs_checker => $GetHTMLAttrsChecker->({
3149     ## TODO: |filter| attribute
3150     mode => $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker,
3151     }),
3152     checker => $HTMLEmptyChecker,
3153 wakaba 1.1 };
3154    
3155     $Element->{$HTML_NS}->{legend} = {
3156     attrs_checker => $GetHTMLAttrsChecker->({}),
3157     checker => sub {
3158     my ($self, $todo) = @_;
3159    
3160     my $parent = $todo->{node}->manakai_parent_element;
3161     if (defined $parent) {
3162     my $nsuri = $parent->namespace_uri;
3163     $nsuri = '' unless defined $nsuri;
3164     my $ln = $parent->manakai_local_name;
3165     if ($nsuri eq $HTML_NS and $ln eq 'figure') {
3166     return $HTMLInlineChecker->($self, $todo);
3167     } else {
3168     return $HTMLSignificantStrictlyInlineChecker->($self, $todo);
3169     }
3170     } else {
3171     return $HTMLInlineChecker->($self, $todo);
3172     }
3173    
3174     ## ISSUE: Content model is defined only for fieldset/legend,
3175     ## details/legend, and figure/legend.
3176     },
3177     };
3178    
3179     $Element->{$HTML_NS}->{div} = {
3180     attrs_checker => $GetHTMLAttrsChecker->({}),
3181     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
3182     };
3183    
3184     $Element->{$HTML_NS}->{font} = {
3185     attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
3186     checker => $HTMLTransparentChecker,
3187     };
3188    
3189     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
3190    
3191     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24