/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.18 - (hide annotations) (download)
Wed Nov 7 10:55:10 2007 UTC (17 years ago) by wakaba
Branch: MAIN
Changes since 1.17: +6 -5 lines
++ whatpm/Whatpm/ContentChecker/ChangeLog	7 Nov 2007 10:52:48 -0000
2007-11-07  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm: Make |rel="up up"| conforming (HTML5 revision 1130).

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7     my $HTMLMetadataElements = {
8     $HTML_NS => {
9     qw/link 1 meta 1 style 1 script 1 event-source 1 command 1 base 1 title 1
10 wakaba 1.8 noscript 1 datatemplate 1
11 wakaba 1.1 /,
12     },
13     };
14    
15     my $HTMLSectioningElements = {
16     $HTML_NS => {qw/body 1 section 1 nav 1 article 1 blockquote 1 aside 1/},
17     };
18    
19     my $HTMLBlockLevelElements = {
20     $HTML_NS => {
21     qw/
22     section 1 nav 1 article 1 blockquote 1 aside 1
23     h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1
24     address 1 p 1 hr 1 dialog 1 pre 1 ol 1 ul 1 dl 1
25     ins 1 del 1 figure 1 map 1 table 1 script 1 noscript 1
26     event-source 1 details 1 datagrid 1 menu 1 div 1 font 1
27 wakaba 1.8 datatemplate 1
28 wakaba 1.1 /,
29     },
30     };
31    
32     my $HTMLStrictlyInlineLevelElements = {
33     $HTML_NS => {
34     qw/
35     br 1 a 1 q 1 cite 1 em 1 strong 1 small 1 m 1 dfn 1 abbr 1
36     time 1 meter 1 progress 1 code 1 var 1 samp 1 kbd 1
37     sub 1 sup 1 span 1 i 1 b 1 bdo 1 ins 1 del 1 img 1
38     iframe 1 embed 1 object 1 video 1 audio 1 canvas 1 area 1
39     script 1 noscript 1 event-source 1 command 1 font 1
40     /,
41     },
42     };
43    
44     my $HTMLStructuredInlineLevelElements = {
45     $HTML_NS => {qw/blockquote 1 pre 1 ol 1 ul 1 dl 1 table 1 menu 1/},
46     };
47    
48     my $HTMLInteractiveElements = {
49     $HTML_NS => {a => 1, details => 1, datagrid => 1},
50     };
51     ## NOTE: |html:a| and |html:datagrid| are not allowed as a descendant
52     ## of interactive elements
53    
54     # my $HTMLTransparentElements : in |Whatpm/ContentChecker.pm|.
55    
56     #my $HTMLSemiTransparentElements = {
57     # $HTML_NS => {qw/video 1 audio 1/},
58     #};
59    
60     my $HTMLEmbededElements = {
61     $HTML_NS => {qw/img 1 iframe 1 embed 1 object 1 video 1 audio 1 canvas 1/},
62     };
63    
64     ## Empty
65     my $HTMLEmptyChecker = sub {
66     my ($self, $todo) = @_;
67     my $el = $todo->{node};
68     my $new_todos = [];
69     my @nodes = (@{$el->child_nodes});
70    
71     while (@nodes) {
72     my $node = shift @nodes;
73     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
74    
75     my $nt = $node->node_type;
76     if ($nt == 1) {
77 wakaba 1.8 my $node_ns = $node->namespace_uri;
78     $node_ns = '' unless defined $node_ns;
79     my $node_ln = $node->manakai_local_name;
80     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
81     #
82     } else {
83     ## NOTE: |minuses| list is not checked since redundant
84     $self->{onerror}->(node => $node, type => 'element not allowed');
85     }
86 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
87     unshift @nodes, @$sib;
88     push @$new_todos, @$ch;
89     } elsif ($nt == 3 or $nt == 4) {
90     if ($node->data =~ /[^\x09-\x0D\x20]/) {
91     $self->{onerror}->(node => $node, type => 'character not allowed');
92     }
93     } elsif ($nt == 5) {
94     unshift @nodes, @{$node->child_nodes};
95     }
96     }
97     return ($new_todos);
98     };
99    
100     ## Text
101     my $HTMLTextChecker = sub {
102     my ($self, $todo) = @_;
103     my $el = $todo->{node};
104     my $new_todos = [];
105     my @nodes = (@{$el->child_nodes});
106    
107     while (@nodes) {
108     my $node = shift @nodes;
109     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
110    
111     my $nt = $node->node_type;
112     if ($nt == 1) {
113 wakaba 1.8 my $node_ns = $node->namespace_uri;
114     $node_ns = '' unless defined $node_ns;
115     my $node_ln = $node->manakai_local_name;
116     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
117     #
118     } else {
119     ## NOTE: |minuses| list is not checked since redundant
120     $self->{onerror}->(node => $node, type => 'element not allowed');
121     }
122 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
123     unshift @nodes, @$sib;
124     push @$new_todos, @$ch;
125     } elsif ($nt == 5) {
126     unshift @nodes, @{$node->child_nodes};
127     }
128     }
129     return ($new_todos);
130     };
131    
132     ## Zero or more |html:style| elements,
133     ## followed by zero or more block-level elements
134     my $HTMLStylableBlockChecker = sub {
135     my ($self, $todo) = @_;
136     my $el = $todo->{node};
137     my $new_todos = [];
138     my @nodes = (@{$el->child_nodes});
139    
140     my $has_non_style;
141     while (@nodes) {
142     my $node = shift @nodes;
143     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
144    
145     my $nt = $node->node_type;
146     if ($nt == 1) {
147     my $node_ns = $node->namespace_uri;
148     $node_ns = '' unless defined $node_ns;
149     my $node_ln = $node->manakai_local_name;
150     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
151     if ($node_ns eq $HTML_NS and $node_ln eq 'style') {
152     $not_allowed = 1 if $has_non_style or
153     not $node->has_attribute_ns (undef, 'scoped');
154     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
155     $has_non_style = 1;
156 wakaba 1.8 } elsif ($self->{pluses}->{$node_ns}->{$node_ln}) {
157     #
158 wakaba 1.1 } else {
159     $has_non_style = 1;
160     $not_allowed = 1;
161     }
162     $self->{onerror}->(node => $node, type => 'element not allowed')
163     if $not_allowed;
164     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
165     unshift @nodes, @$sib;
166     push @$new_todos, @$ch;
167     } elsif ($nt == 3 or $nt == 4) {
168     if ($node->data =~ /[^\x09-\x0D\x20]/) {
169     $self->{onerror}->(node => $node, type => 'character not allowed');
170     }
171     } elsif ($nt == 5) {
172     unshift @nodes, @{$node->child_nodes};
173     }
174     }
175     return ($new_todos);
176     }; # $HTMLStylableBlockChecker
177    
178     ## Zero or more block-level elements
179     my $HTMLBlockChecker = sub {
180     my ($self, $todo) = @_;
181     my $el = $todo->{node};
182     my $new_todos = [];
183     my @nodes = (@{$el->child_nodes});
184    
185     while (@nodes) {
186     my $node = shift @nodes;
187     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
188    
189     my $nt = $node->node_type;
190     if ($nt == 1) {
191     my $node_ns = $node->namespace_uri;
192     $node_ns = '' unless defined $node_ns;
193     my $node_ln = $node->manakai_local_name;
194     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
195     $not_allowed = 1
196 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
197     $self->{pluses}->{$node_ns}->{$node_ln};
198 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
199     if $not_allowed;
200     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
201     unshift @nodes, @$sib;
202     push @$new_todos, @$ch;
203     } elsif ($nt == 3 or $nt == 4) {
204     if ($node->data =~ /[^\x09-\x0D\x20]/) {
205     $self->{onerror}->(node => $node, type => 'character not allowed');
206     }
207     } elsif ($nt == 5) {
208     unshift @nodes, @{$node->child_nodes};
209     }
210     }
211     return ($new_todos);
212     }; # $HTMLBlockChecker
213    
214     ## Inline-level content
215     my $HTMLInlineChecker = sub {
216     my ($self, $todo) = @_;
217     my $el = $todo->{node};
218     my $new_todos = [];
219     my @nodes = (@{$el->child_nodes});
220    
221     while (@nodes) {
222     my $node = shift @nodes;
223     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
224    
225     my $nt = $node->node_type;
226     if ($nt == 1) {
227     my $node_ns = $node->namespace_uri;
228     $node_ns = '' unless defined $node_ns;
229     my $node_ln = $node->manakai_local_name;
230     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
231     $not_allowed = 1
232     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
233 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
234     $self->{pluses}->{$node_ns}->{$node_ln};
235 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
236     if $not_allowed;
237     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
238     unshift @nodes, @$sib;
239     push @$new_todos, @$ch;
240     } elsif ($nt == 5) {
241     unshift @nodes, @{$node->child_nodes};
242     }
243     }
244    
245     for (@$new_todos) {
246     $_->{inline} = 1;
247     }
248     return ($new_todos);
249     }; # $HTMLInlineChecker
250    
251     ## Strictly inline-level content
252     my $HTMLStrictlyInlineChecker = sub {
253     my ($self, $todo) = @_;
254     my $el = $todo->{node};
255     my $new_todos = [];
256     my @nodes = (@{$el->child_nodes});
257    
258     while (@nodes) {
259     my $node = shift @nodes;
260     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
261    
262     my $nt = $node->node_type;
263     if ($nt == 1) {
264     my $node_ns = $node->namespace_uri;
265     $node_ns = '' unless defined $node_ns;
266     my $node_ln = $node->manakai_local_name;
267     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
268     $not_allowed = 1
269 wakaba 1.8 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
270     $self->{pluses}->{$node_ns}->{$node_ln};
271 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
272     if $not_allowed;
273     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
274     unshift @nodes, @$sib;
275     push @$new_todos, @$ch;
276     } elsif ($nt == 5) {
277     unshift @nodes, @{$node->child_nodes};
278     }
279     }
280    
281     for (@$new_todos) {
282     $_->{inline} = 1;
283     $_->{strictly_inline} = 1;
284     }
285     return ($new_todos);
286     }; # $HTMLStrictlyInlineChecker
287    
288 wakaba 1.8 ## Inline-level or strictly inline-level content
289 wakaba 1.1 my $HTMLInlineOrStrictlyInlineChecker = sub {
290     my ($self, $todo) = @_;
291     my $el = $todo->{node};
292     my $new_todos = [];
293     my @nodes = (@{$el->child_nodes});
294    
295     while (@nodes) {
296     my $node = shift @nodes;
297     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
298    
299     my $nt = $node->node_type;
300     if ($nt == 1) {
301     my $node_ns = $node->namespace_uri;
302     $node_ns = '' unless defined $node_ns;
303     my $node_ln = $node->manakai_local_name;
304     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
305     if ($todo->{strictly_inline}) {
306     $not_allowed = 1
307 wakaba 1.8 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
308     $self->{pluses}->{$node_ns}->{$node_ln};
309 wakaba 1.1 } else {
310     $not_allowed = 1
311     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
312 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
313     $self->{pluses}->{$node_ns}->{$node_ln};
314 wakaba 1.1 }
315     $self->{onerror}->(node => $node, type => 'element not allowed')
316     if $not_allowed;
317     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
318     unshift @nodes, @$sib;
319     push @$new_todos, @$ch;
320     } elsif ($nt == 5) {
321     unshift @nodes, @{$node->child_nodes};
322     }
323     }
324    
325     for (@$new_todos) {
326     $_->{inline} = 1;
327     $_->{strictly_inline} = 1;
328     }
329     return ($new_todos);
330     }; # $HTMLInlineOrStrictlyInlineChecker
331    
332 wakaba 1.13 ## TODO: New "significant" SHOULD-level requirement (revision 1114).
333 wakaba 1.1
334 wakaba 1.8 ## Block-level content or inline-level content (i.e. bimorphic content model)
335 wakaba 1.1 my $HTMLBlockOrInlineChecker = sub {
336     my ($self, $todo) = @_;
337     my $el = $todo->{node};
338     my $new_todos = [];
339     my @nodes = (@{$el->child_nodes});
340    
341     my $content = 'block-or-inline'; # or 'block' or 'inline'
342     my @block_not_inline;
343     while (@nodes) {
344     my $node = shift @nodes;
345     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
346    
347 wakaba 1.8 ## ISSUE: It is unclear whether "<rule><div><p/><nest/></div></rule>"
348     ## is conforming or not.
349    
350 wakaba 1.1 my $nt = $node->node_type;
351     if ($nt == 1) {
352     my $node_ns = $node->namespace_uri;
353     $node_ns = '' unless defined $node_ns;
354     my $node_ln = $node->manakai_local_name;
355     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
356     if ($content eq 'block') {
357     $not_allowed = 1
358 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
359     $self->{pluses}->{$node_ns}->{$node_ln};
360 wakaba 1.1 } elsif ($content eq 'inline') {
361     $not_allowed = 1
362     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
363 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
364     $self->{pluses}->{$node_ns}->{$node_ln};
365 wakaba 1.1 } else {
366     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
367     my $is_inline
368     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
369     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
370    
371     push @block_not_inline, $node
372     if $is_block and not $is_inline and not $not_allowed;
373 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
374 wakaba 1.1 $content = 'inline';
375     for (@block_not_inline) {
376     $self->{onerror}->(node => $_, type => 'element not allowed');
377     }
378     $not_allowed = 1 unless $is_inline;
379     }
380     }
381     $self->{onerror}->(node => $node, type => 'element not allowed')
382     if $not_allowed;
383     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
384     unshift @nodes, @$sib;
385     push @$new_todos, @$ch;
386     } elsif ($nt == 3 or $nt == 4) {
387     if ($node->data =~ /[^\x09-\x0D\x20]/) {
388     if ($content eq 'block') {
389     $self->{onerror}->(node => $node, type => 'character not allowed');
390     } else {
391     $content = 'inline';
392     for (@block_not_inline) {
393     $self->{onerror}->(node => $_, type => 'element not allowed');
394     }
395     }
396     }
397     } elsif ($nt == 5) {
398     unshift @nodes, @{$node->child_nodes};
399     }
400     }
401    
402     if ($content eq 'inline') {
403     for (@$new_todos) {
404     $_->{inline} = 1;
405     }
406     }
407     return ($new_todos);
408     };
409    
410     ## Zero or more XXX element, then either block-level or inline-level
411     my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) {
412     my ($elnsuri, $ellname) = @_;
413     return sub {
414     my ($self, $todo) = @_;
415     my $el = $todo->{node};
416     my $new_todos = [];
417     my @nodes = (@{$el->child_nodes});
418    
419     my $has_non_style;
420     my $content = 'block-or-inline'; # or 'block' or 'inline'
421     my @block_not_inline;
422     while (@nodes) {
423     my $node = shift @nodes;
424     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
425    
426     my $nt = $node->node_type;
427     if ($nt == 1) {
428     my $node_ns = $node->namespace_uri;
429     $node_ns = '' unless defined $node_ns;
430     my $node_ln = $node->manakai_local_name;
431     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
432     if ($node_ns eq $elnsuri and $node_ln eq $ellname) {
433     $not_allowed = 1 if $has_non_style;
434     if ($ellname eq 'style' and
435     not $node->has_attribute_ns (undef, 'scoped')) {
436     $not_allowed = 1;
437     }
438     } elsif ($content eq 'block') {
439     $has_non_style = 1;
440     $not_allowed = 1
441 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
442     $self->{pluses}->{$node_ns}->{$node_ln};
443 wakaba 1.1 } elsif ($content eq 'inline') {
444     $has_non_style = 1;
445     $not_allowed = 1
446     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
447 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
448     $self->{pluses}->{$node_ns}->{$node_ln};
449 wakaba 1.1 } else {
450 wakaba 1.8 $has_non_style = 1 unless $self->{pluses}->{$node_ns}->{$node_ln};
451 wakaba 1.1 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
452     my $is_inline
453     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
454     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
455    
456     push @block_not_inline, $node
457     if $is_block and not $is_inline and not $not_allowed;
458 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
459 wakaba 1.1 $content = 'inline';
460     for (@block_not_inline) {
461     $self->{onerror}->(node => $_, type => 'element not allowed');
462     }
463     $not_allowed = 1 unless $is_inline;
464     }
465     }
466     $self->{onerror}->(node => $node, type => 'element not allowed')
467     if $not_allowed;
468     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
469     unshift @nodes, @$sib;
470     push @$new_todos, @$ch;
471     } elsif ($nt == 3 or $nt == 4) {
472     if ($node->data =~ /[^\x09-\x0D\x20]/) {
473     $has_non_style = 1;
474     if ($content eq 'block') {
475     $self->{onerror}->(node => $node, type => 'character not allowed');
476     } else {
477     $content = 'inline';
478     for (@block_not_inline) {
479     $self->{onerror}->(node => $_, type => 'element not allowed');
480     }
481     }
482     }
483     } elsif ($nt == 5) {
484     unshift @nodes, @{$node->child_nodes};
485     }
486     }
487    
488     if ($content eq 'inline') {
489     for (@$new_todos) {
490     $_->{inline} = 1;
491     }
492     }
493     return ($new_todos);
494     };
495     }; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker
496    
497     my $HTMLTransparentChecker = $HTMLBlockOrInlineChecker;
498    
499     our $AttrChecker;
500    
501     my $GetHTMLEnumeratedAttrChecker = sub {
502     my $states = shift; # {value => conforming ? 1 : -1}
503     return sub {
504     my ($self, $attr) = @_;
505     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
506     if ($states->{$value} > 0) {
507     #
508     } elsif ($states->{$value}) {
509     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
510     } else {
511     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
512     }
513     };
514     }; # $GetHTMLEnumeratedAttrChecker
515    
516     my $GetHTMLBooleanAttrChecker = sub {
517     my $local_name = shift;
518     return sub {
519     my ($self, $attr) = @_;
520     my $value = $attr->value;
521     unless ($value eq $local_name or $value eq '') {
522     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
523     }
524     };
525     }; # $GetHTMLBooleanAttrChecker
526    
527 wakaba 1.8 ## Unordered set of space-separated tokens
528 wakaba 1.18 my $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
529 wakaba 1.8 my ($self, $attr) = @_;
530     my %word;
531     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
532     unless ($word{$word}) {
533     $word{$word} = 1;
534     } else {
535     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
536     }
537     }
538 wakaba 1.18 }; # $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
539 wakaba 1.8
540 wakaba 1.1 ## |rel| attribute (unordered set of space separated tokens,
541     ## whose allowed values are defined by the section on link types)
542     my $HTMLLinkTypesAttrChecker = sub {
543 wakaba 1.4 my ($a_or_area, $todo, $self, $attr) = @_;
544 wakaba 1.1 my %word;
545     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
546     unless ($word{$word}) {
547     $word{$word} = 1;
548 wakaba 1.18 } elsif ($word eq 'up') {
549     #
550 wakaba 1.1 } else {
551     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
552     }
553     }
554     ## NOTE: Case sensitive match (since HTML5 spec does not say link
555     ## types are case-insensitive and it says "The value should not
556     ## be confusingly similar to any other defined value (e.g.
557     ## differing only in case).").
558     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
559     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
560     ## values to be used conformingly.
561     require Whatpm::_LinkTypeList;
562     our $LinkType;
563     for my $word (keys %word) {
564     my $def = $LinkType->{$word};
565     if (defined $def) {
566     if ($def->{status} eq 'accepted') {
567     if (defined $def->{effect}->[$a_or_area]) {
568     #
569     } else {
570     $self->{onerror}->(node => $attr,
571     type => 'link type:bad context:'.$word);
572     }
573     } elsif ($def->{status} eq 'proposal') {
574     $self->{onerror}->(node => $attr, level => 's',
575     type => 'link type:proposed:'.$word);
576     } else { # rejected or synonym
577     $self->{onerror}->(node => $attr,
578     type => 'link type:non-conforming:'.$word);
579     }
580 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
581     if ($word eq 'alternate') {
582     #
583     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
584     $todo->{has_hyperlink_link_type} = 1;
585     }
586     }
587 wakaba 1.1 if ($def->{unique}) {
588     unless ($self->{has_link_type}->{$word}) {
589     $self->{has_link_type}->{$word} = 1;
590     } else {
591     $self->{onerror}->(node => $attr,
592     type => 'link type:duplicate:'.$word);
593     }
594     }
595     } else {
596     $self->{onerror}->(node => $attr, level => 'unsupported',
597     type => 'link type:'.$word);
598     }
599     }
600 wakaba 1.4 $todo->{has_hyperlink_link_type} = 1
601     if $word{alternate} and not $word{stylesheet};
602 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
603     ## says that using both X-Pingback: header field and HTML
604     ## <link rel=pingback> is deprecated and if both appears they
605     ## SHOULD contain exactly the same value.
606     ## ISSUE: Pingback 1.0 specification defines the exact representation
607     ## of its link element, which cannot be tested by the current arch.
608     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
609     ## include any string that matches to the pattern for the rel=pingback link,
610     ## which again inpossible to test.
611     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
612 wakaba 1.12
613     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
614 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
615     ## then they SHOULD be described in different paragraphs.".
616 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
617    
618     ## URI (or IRI)
619     my $HTMLURIAttrChecker = sub {
620     my ($self, $attr) = @_;
621     ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
622     my $value = $attr->value;
623     Whatpm::URIChecker->check_iri_reference ($value, sub {
624     my %opt = @_;
625     $self->{onerror}->(node => $attr, level => $opt{level},
626     type => 'URI::'.$opt{type}.
627     (defined $opt{position} ? ':'.$opt{position} : ''));
628     });
629 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
630 wakaba 1.1 }; # $HTMLURIAttrChecker
631    
632     ## A space separated list of one or more URIs (or IRIs)
633     my $HTMLSpaceURIsAttrChecker = sub {
634     my ($self, $attr) = @_;
635     my $i = 0;
636     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
637     Whatpm::URIChecker->check_iri_reference ($value, sub {
638     my %opt = @_;
639     $self->{onerror}->(node => $attr, level => $opt{level},
640 wakaba 1.2 type => 'URIs:'.':'.
641     $opt{type}.':'.$i.
642 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
643     });
644     $i++;
645     }
646     ## ISSUE: Relative references?
647     ## ISSUE: Leading or trailing white spaces are conformant?
648     ## ISSUE: A sequence of white space characters are conformant?
649     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
650     ## NOTE: Duplication seems not an error.
651 wakaba 1.4 $self->{has_uri_attr} = 1;
652 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
653    
654     my $HTMLDatetimeAttrChecker = sub {
655     my ($self, $attr) = @_;
656     my $value = $attr->value;
657     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
658     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
659     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
660     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
661     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
662     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
663     if $d < 1 or
664     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
665     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
666     if $M == 2 and $d == 29 and
667     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
668     } else {
669     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
670     }
671     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
672     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
673     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
674     if defined $s and $s > 59;
675     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
676     if $zh > 23;
677     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
678     if $zm > 59;
679     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
680     } else {
681     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
682     }
683     }; # $HTMLDatetimeAttrChecker
684    
685     my $HTMLIntegerAttrChecker = sub {
686     my ($self, $attr) = @_;
687     my $value = $attr->value;
688     unless ($value =~ /\A-?[0-9]+\z/) {
689     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
690     }
691     }; # $HTMLIntegerAttrChecker
692    
693     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
694     my $range_check = shift;
695     return sub {
696     my ($self, $attr) = @_;
697     my $value = $attr->value;
698     if ($value =~ /\A[0-9]+\z/) {
699     unless ($range_check->($value + 0)) {
700     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
701     }
702     } else {
703     $self->{onerror}->(node => $attr,
704     type => 'nninteger:syntax error');
705     }
706     };
707     }; # $GetHTMLNonNegativeIntegerAttrChecker
708    
709     my $GetHTMLFloatingPointNumberAttrChecker = sub {
710     my $range_check = shift;
711     return sub {
712     my ($self, $attr) = @_;
713     my $value = $attr->value;
714     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
715     unless ($range_check->($value + 0)) {
716     $self->{onerror}->(node => $attr, type => 'float:out of range');
717     }
718     } else {
719     $self->{onerror}->(node => $attr,
720     type => 'float:syntax error');
721     }
722     };
723     }; # $GetHTMLFloatingPointNumberAttrChecker
724    
725     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
726     ## ISSUE: RFC 2046 does not define syntax of media types.
727     ## ISSUE: The definition of "a valid MIME type" is unknown.
728     ## Syntactical correctness?
729     my $HTMLIMTAttrChecker = sub {
730     my ($self, $attr) = @_;
731     my $value = $attr->value;
732     ## ISSUE: RFC 2045 Content-Type header field allows insertion
733     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
734     ## ISSUE: RFC 2231 extension? Maybe no.
735     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
736     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
737     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
738     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
739     my @type = ($1, $2);
740     my $param = $3;
741     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
742     if (defined $2) {
743     push @type, $1 => $2;
744     } else {
745     my $n = $1;
746     my $v = $2;
747     $v =~ s/\\(.)/$1/gs;
748     push @type, $n => $v;
749     }
750     }
751     require Whatpm::IMTChecker;
752     Whatpm::IMTChecker->check_imt (sub {
753     my %opt = @_;
754     $self->{onerror}->(node => $attr, level => $opt{level},
755     type => 'IMT:'.$opt{type});
756     }, @type);
757     } else {
758     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
759     }
760     }; # $HTMLIMTAttrChecker
761    
762     my $HTMLLanguageTagAttrChecker = sub {
763 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
764    
765 wakaba 1.1 my ($self, $attr) = @_;
766 wakaba 1.6 my $value = $attr->value;
767     require Whatpm::LangTag;
768     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
769     my %opt = @_;
770     my $type = 'LangTag:'.$opt{type};
771     $type .= ':' . $opt{subtag} if defined $opt{subtag};
772     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
773     level => $opt{level});
774     });
775 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
776 wakaba 1.6
777     ## TODO: testdata
778 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
779    
780     ## "A valid media query [MQ]"
781     my $HTMLMQAttrChecker = sub {
782     my ($self, $attr) = @_;
783     $self->{onerror}->(node => $attr, level => 'unsupported',
784     type => 'media query');
785     ## ISSUE: What is "a valid media query"?
786     }; # $HTMLMQAttrChecker
787    
788     my $HTMLEventHandlerAttrChecker = sub {
789     my ($self, $attr) = @_;
790     $self->{onerror}->(node => $attr, level => 'unsupported',
791     type => 'event handler');
792     ## TODO: MUST contain valid ECMAScript code matching the
793     ## ECMAScript |FunctionBody| production. [ECMA262]
794     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
795     ## ISSUE: Automatic semicolon insertion does not apply?
796     ## ISSUE: Other script languages?
797     }; # $HTMLEventHandlerAttrChecker
798    
799     my $HTMLUsemapAttrChecker = sub {
800     my ($self, $attr) = @_;
801     ## MUST be a valid hashed ID reference to a |map| element
802     my $value = $attr->value;
803     if ($value =~ s/^#//) {
804     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
805     push @{$self->{usemap}}, [$value => $attr];
806     } else {
807     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
808     }
809     ## NOTE: Space characters in hashed ID references are conforming.
810     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
811     }; # $HTMLUsemapAttrChecker
812    
813     my $HTMLTargetAttrChecker = sub {
814     my ($self, $attr) = @_;
815     my $value = $attr->value;
816     if ($value =~ /^_/) {
817     $value = lc $value; ## ISSUE: ASCII case-insentitive?
818     unless ({
819     _self => 1, _parent => 1, _top => 1,
820     }->{$value}) {
821     $self->{onerror}->(node => $attr,
822     type => 'reserved browsing context name');
823     }
824     } else {
825     #$ ISSUE: An empty string is conforming?
826     }
827     }; # $HTMLTargetAttrChecker
828    
829     my $HTMLAttrChecker = {
830     id => sub {
831     ## NOTE: |map| has its own variant of |id=""| checker
832     my ($self, $attr) = @_;
833     my $value = $attr->value;
834     if (length $value > 0) {
835     if ($self->{id}->{$value}) {
836     $self->{onerror}->(node => $attr, type => 'duplicate ID');
837     push @{$self->{id}->{$value}}, $attr;
838     } else {
839     $self->{id}->{$value} = [$attr];
840     }
841     if ($value =~ /[\x09-\x0D\x20]/) {
842     $self->{onerror}->(node => $attr, type => 'space in ID');
843     }
844     } else {
845     ## NOTE: MUST contain at least one character
846     $self->{onerror}->(node => $attr, type => 'empty attribute value');
847     }
848     },
849     title => sub {}, ## NOTE: No conformance creteria
850     lang => sub {
851     my ($self, $attr) = @_;
852 wakaba 1.6 my $value = $attr->value;
853     if ($value eq '') {
854     #
855     } else {
856     require Whatpm::LangTag;
857     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
858     my %opt = @_;
859     my $type = 'LangTag:'.$opt{type};
860     $type .= ':' . $opt{subtag} if defined $opt{subtag};
861     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
862     level => $opt{level});
863     });
864     }
865 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
866     unless ($attr->owner_document->manakai_is_html) {
867     $self->{onerror}->(node => $attr, type => 'in XML:lang');
868     }
869 wakaba 1.6
870     ## TODO: test data
871 wakaba 1.1 },
872     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
873     class => sub {
874     my ($self, $attr) = @_;
875     my %word;
876     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
877     unless ($word{$word}) {
878     $word{$word} = 1;
879     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
880     } else {
881     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
882     }
883     }
884     },
885     contextmenu => sub {
886     my ($self, $attr) = @_;
887     my $value = $attr->value;
888     push @{$self->{contextmenu}}, [$value => $attr];
889     ## ISSUE: "The value must be the ID of a menu element in the DOM."
890     ## What is "in the DOM"? A menu Element node that is not part
891     ## of the Document tree is in the DOM? A menu Element node that
892     ## belong to another Document tree is in the DOM?
893     },
894     irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
895 wakaba 1.8 tabindex => $HTMLIntegerAttrChecker
896     ## TODO: ref, template, registrationmark
897 wakaba 1.1 };
898    
899     for (qw/
900     onabort onbeforeunload onblur onchange onclick oncontextmenu
901     ondblclick ondrag ondragend ondragenter ondragleave ondragover
902     ondragstart ondrop onerror onfocus onkeydown onkeypress
903     onkeyup onload onmessage onmousedown onmousemove onmouseout
904     onmouseover onmouseup onmousewheel onresize onscroll onselect
905     onsubmit onunload
906     /) {
907     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
908     }
909    
910     my $GetHTMLAttrsChecker = sub {
911     my $element_specific_checker = shift;
912     return sub {
913     my ($self, $todo) = @_;
914     for my $attr (@{$todo->{node}->attributes}) {
915     my $attr_ns = $attr->namespace_uri;
916     $attr_ns = '' unless defined $attr_ns;
917     my $attr_ln = $attr->manakai_local_name;
918     my $checker;
919     if ($attr_ns eq '') {
920     $checker = $element_specific_checker->{$attr_ln}
921     || $HTMLAttrChecker->{$attr_ln};
922     }
923     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
924     || $AttrChecker->{$attr_ns}->{''};
925     if ($checker) {
926     $checker->($self, $attr, $todo);
927     } else {
928     $self->{onerror}->(node => $attr, level => 'unsupported',
929     type => 'attribute');
930     ## ISSUE: No comformance createria for unknown attributes in the spec
931     }
932     }
933     };
934     }; # $GetHTMLAttrsChecker
935    
936     our $Element;
937     our $ElementDefault;
938     our $AnyChecker;
939    
940     $Element->{$HTML_NS}->{''} = {
941     attrs_checker => $GetHTMLAttrsChecker->({}),
942     checker => $ElementDefault->{checker},
943     };
944    
945     $Element->{$HTML_NS}->{html} = {
946     is_root => 1,
947     attrs_checker => $GetHTMLAttrsChecker->({
948 wakaba 1.16 manifest => $HTMLURIAttrChecker,
949 wakaba 1.1 xmlns => sub {
950     my ($self, $attr) = @_;
951     my $value = $attr->value;
952     unless ($value eq $HTML_NS) {
953     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
954     }
955     unless ($attr->owner_document->manakai_is_html) {
956     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
957     ## TODO: Test
958     }
959     },
960     }),
961     checker => sub {
962     my ($self, $todo) = @_;
963     my $el = $todo->{node};
964     my $new_todos = [];
965     my @nodes = (@{$el->child_nodes});
966    
967     my $phase = 'before head';
968     while (@nodes) {
969     my $node = shift @nodes;
970     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
971    
972     my $nt = $node->node_type;
973     if ($nt == 1) {
974     my $node_ns = $node->namespace_uri;
975     $node_ns = '' unless defined $node_ns;
976     my $node_ln = $node->manakai_local_name;
977     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
978 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
979     #
980     } elsif ($phase eq 'before head') {
981 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'head') {
982     $phase = 'after head';
983     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'body') {
984     $self->{onerror}->(node => $node, type => 'ps element missing:head');
985     $phase = 'after body';
986     } else {
987     $not_allowed = 1;
988     # before head
989     }
990     } elsif ($phase eq 'after head') {
991     if ($node_ns eq $HTML_NS and $node_ln eq 'body') {
992     $phase = 'after body';
993     } else {
994     $not_allowed = 1;
995     # after head
996     }
997     } else { #elsif ($phase eq 'after body') {
998     $not_allowed = 1;
999     # after body
1000     }
1001     $self->{onerror}->(node => $node, type => 'element not allowed')
1002     if $not_allowed;
1003     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1004     unshift @nodes, @$sib;
1005     push @$new_todos, @$ch;
1006     } elsif ($nt == 3 or $nt == 4) {
1007     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1008     $self->{onerror}->(node => $node, type => 'character not allowed');
1009     }
1010     } elsif ($nt == 5) {
1011     unshift @nodes, @{$node->child_nodes};
1012     }
1013     }
1014    
1015     if ($phase eq 'before head') {
1016     $self->{onerror}->(node => $el, type => 'child element missing:head');
1017     $self->{onerror}->(node => $el, type => 'child element missing:body');
1018     } elsif ($phase eq 'after head') {
1019     $self->{onerror}->(node => $el, type => 'child element missing:body');
1020     }
1021    
1022     return ($new_todos);
1023     },
1024     };
1025    
1026     $Element->{$HTML_NS}->{head} = {
1027     attrs_checker => $GetHTMLAttrsChecker->({}),
1028     checker => sub {
1029     my ($self, $todo) = @_;
1030     my $el = $todo->{node};
1031     my $new_todos = [];
1032     my @nodes = (@{$el->child_nodes});
1033    
1034     my $has_title;
1035     my $phase = 'initial'; # 'after charset', 'after base'
1036     while (@nodes) {
1037     my $node = shift @nodes;
1038     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1039    
1040     my $nt = $node->node_type;
1041     if ($nt == 1) {
1042     my $node_ns = $node->namespace_uri;
1043     $node_ns = '' unless defined $node_ns;
1044     my $node_ln = $node->manakai_local_name;
1045     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
1046 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1047     #
1048     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'title') {
1049 wakaba 1.1 $phase = 'after base';
1050     unless ($has_title) {
1051     $has_title = 1;
1052     } else {
1053     $not_allowed = 1;
1054     }
1055     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'meta') {
1056     if ($node->has_attribute_ns (undef, 'charset')) {
1057     if ($phase eq 'initial') {
1058     $phase = 'after charset';
1059     } else {
1060     $not_allowed = 1;
1061     ## NOTE: See also |base|'s "contexts" field in the spec
1062     }
1063 wakaba 1.5 } elsif ($node->has_attribute_ns (undef, 'name') or
1064     $node->has_attribute_ns (undef, 'http-equiv')) {
1065     $phase = 'after base';
1066 wakaba 1.1 } else {
1067     $phase = 'after base';
1068 wakaba 1.5 $not_allowed = 1;
1069 wakaba 1.1 }
1070     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'base') {
1071     if ($phase eq 'initial' or $phase eq 'after charset') {
1072     $phase = 'after base';
1073     } else {
1074     $not_allowed = 1;
1075     }
1076     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'style') {
1077     $phase = 'after base';
1078     if ($node->has_attribute_ns (undef, 'scoped')) {
1079     $not_allowed = 1;
1080     }
1081     } elsif ($HTMLMetadataElements->{$node_ns}->{$node_ln}) {
1082     $phase = 'after base';
1083     } else {
1084     $not_allowed = 1;
1085     }
1086     $self->{onerror}->(node => $node, type => 'element not allowed')
1087     if $not_allowed;
1088 wakaba 1.3 local $todo->{flag}->{in_head} = 1;
1089 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1090     unshift @nodes, @$sib;
1091     push @$new_todos, @$ch;
1092     } elsif ($nt == 3 or $nt == 4) {
1093     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1094     $self->{onerror}->(node => $node, type => 'character not allowed');
1095     }
1096     } elsif ($nt == 5) {
1097     unshift @nodes, @{$node->child_nodes};
1098     }
1099     }
1100     unless ($has_title) {
1101     $self->{onerror}->(node => $el, type => 'child element missing:title');
1102     }
1103     return ($new_todos);
1104     },
1105     };
1106    
1107     $Element->{$HTML_NS}->{title} = {
1108     attrs_checker => $GetHTMLAttrsChecker->({}),
1109     checker => $HTMLTextChecker,
1110     };
1111    
1112     $Element->{$HTML_NS}->{base} = {
1113 wakaba 1.4 attrs_checker => sub {
1114     my ($self, $todo) = @_;
1115    
1116 wakaba 1.14 my $has_href = $todo->{node}->has_attribute_ns (undef, 'href');
1117     my $has_target = $todo->{node}->has_attribute_ns (undef, 'target');
1118    
1119     if ($self->{has_uri_attr} and $has_href) {
1120 wakaba 1.4 ## ISSUE: Are these examples conforming?
1121     ## <head profile="a b c"><base href> (except for |profile|'s
1122     ## non-conformance)
1123     ## <title xml:base="relative"/><base href/> (maybe it should be)
1124     ## <unknown xmlns="relative"/><base href/> (assuming that
1125     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1126     ## <?xml-stylesheet href="relative"?>...<base href=""/>
1127     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1128     ## <style>@import 'relative';</style><base href>
1129     ## <script>location.href = 'relative';</script><base href>
1130 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
1131     ## an exception.
1132 wakaba 1.4 $self->{onerror}->(node => $todo->{node},
1133     type => 'basehref after URI attribute');
1134     }
1135 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
1136 wakaba 1.4 ## ISSUE: Are these examples conforming?
1137     ## <head><title xlink:href=""/><base target="name"/></head>
1138     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1139     ## (assuming that |xbl:xbl| is allowed before |base|)
1140     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1141     ## <link href=""/><base target="name"/>
1142     ## <link rel=unknown href=""><base target=name>
1143     $self->{onerror}->(node => $todo->{node},
1144     type => 'basetarget after hyperlink');
1145     }
1146    
1147 wakaba 1.14 if (not $has_href and not $has_target) {
1148     $self->{onerror}->(node => $todo->{node},
1149     type => 'attribute missing:href|target');
1150     }
1151    
1152 wakaba 1.4 return $GetHTMLAttrsChecker->({
1153     href => $HTMLURIAttrChecker,
1154     target => $HTMLTargetAttrChecker,
1155     })->($self, $todo);
1156     },
1157 wakaba 1.1 checker => $HTMLEmptyChecker,
1158     };
1159    
1160     $Element->{$HTML_NS}->{link} = {
1161     attrs_checker => sub {
1162     my ($self, $todo) = @_;
1163     $GetHTMLAttrsChecker->({
1164     href => $HTMLURIAttrChecker,
1165 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(0, $todo, @_) },
1166 wakaba 1.1 media => $HTMLMQAttrChecker,
1167     hreflang => $HTMLLanguageTagAttrChecker,
1168     type => $HTMLIMTAttrChecker,
1169     ## NOTE: Though |title| has special semantics,
1170     ## syntactically same as the |title| as global attribute.
1171     })->($self, $todo);
1172 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
1173     $self->{has_hyperlink_element} = 1 if $todo->{has_hyperlink_link_type};
1174     } else {
1175 wakaba 1.1 $self->{onerror}->(node => $todo->{node},
1176     type => 'attribute missing:href');
1177     }
1178     unless ($todo->{node}->has_attribute_ns (undef, 'rel')) {
1179     $self->{onerror}->(node => $todo->{node},
1180     type => 'attribute missing:rel');
1181     }
1182     },
1183     checker => $HTMLEmptyChecker,
1184     };
1185    
1186     $Element->{$HTML_NS}->{meta} = {
1187     attrs_checker => sub {
1188     my ($self, $todo) = @_;
1189     my $name_attr;
1190     my $http_equiv_attr;
1191     my $charset_attr;
1192     my $content_attr;
1193     for my $attr (@{$todo->{node}->attributes}) {
1194     my $attr_ns = $attr->namespace_uri;
1195     $attr_ns = '' unless defined $attr_ns;
1196     my $attr_ln = $attr->manakai_local_name;
1197     my $checker;
1198     if ($attr_ns eq '') {
1199     if ($attr_ln eq 'content') {
1200     $content_attr = $attr;
1201     $checker = 1;
1202     } elsif ($attr_ln eq 'name') {
1203     $name_attr = $attr;
1204     $checker = 1;
1205     } elsif ($attr_ln eq 'http-equiv') {
1206     $http_equiv_attr = $attr;
1207     $checker = 1;
1208     } elsif ($attr_ln eq 'charset') {
1209     $charset_attr = $attr;
1210     $checker = 1;
1211     } else {
1212     $checker = $HTMLAttrChecker->{$attr_ln}
1213     || $AttrChecker->{$attr_ns}->{$attr_ln}
1214     || $AttrChecker->{$attr_ns}->{''};
1215     }
1216     } else {
1217     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1218     || $AttrChecker->{$attr_ns}->{''};
1219     }
1220     if ($checker) {
1221     $checker->($self, $attr) if ref $checker;
1222     } else {
1223     $self->{onerror}->(node => $attr, level => 'unsupported',
1224     type => 'attribute');
1225     ## ISSUE: No comformance createria for unknown attributes in the spec
1226     }
1227     }
1228    
1229     if (defined $name_attr) {
1230     if (defined $http_equiv_attr) {
1231     $self->{onerror}->(node => $http_equiv_attr,
1232     type => 'attribute not allowed');
1233     } elsif (defined $charset_attr) {
1234     $self->{onerror}->(node => $charset_attr,
1235     type => 'attribute not allowed');
1236     }
1237     my $metadata_name = $name_attr->value;
1238     my $metadata_value;
1239     if (defined $content_attr) {
1240     $metadata_value = $content_attr->value;
1241     } else {
1242     $self->{onerror}->(node => $todo->{node},
1243     type => 'attribute missing:content');
1244     $metadata_value = '';
1245     }
1246     } elsif (defined $http_equiv_attr) {
1247     if (defined $charset_attr) {
1248     $self->{onerror}->(node => $charset_attr,
1249     type => 'attribute not allowed');
1250     }
1251     unless (defined $content_attr) {
1252     $self->{onerror}->(node => $todo->{node},
1253     type => 'attribute missing:content');
1254     }
1255     } elsif (defined $charset_attr) {
1256     if (defined $content_attr) {
1257     $self->{onerror}->(node => $content_attr,
1258     type => 'attribute not allowed');
1259     }
1260     } else {
1261     if (defined $content_attr) {
1262     $self->{onerror}->(node => $content_attr,
1263     type => 'attribute not allowed');
1264     $self->{onerror}->(node => $todo->{node},
1265     type => 'attribute missing:name|http-equiv');
1266     } else {
1267     $self->{onerror}->(node => $todo->{node},
1268     type => 'attribute missing:name|http-equiv|charset');
1269     }
1270     }
1271    
1272     ## TODO: metadata conformance
1273    
1274     ## TODO: pragma conformance
1275     if (defined $http_equiv_attr) { ## An enumerated attribute
1276     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1277     if ({
1278     'refresh' => 1,
1279     'default-style' => 1,
1280     }->{$keyword}) {
1281     #
1282     } else {
1283     $self->{onerror}->(node => $http_equiv_attr,
1284     type => 'enumerated:invalid');
1285     }
1286     }
1287    
1288     if (defined $charset_attr) {
1289     unless ($todo->{node}->owner_document->manakai_is_html) {
1290     $self->{onerror}->(node => $charset_attr,
1291     type => 'in XML:charset');
1292     }
1293     ## TODO: charset
1294     }
1295     },
1296     checker => $HTMLEmptyChecker,
1297     };
1298    
1299     $Element->{$HTML_NS}->{style} = {
1300     attrs_checker => $GetHTMLAttrsChecker->({
1301     type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1302     media => $HTMLMQAttrChecker,
1303     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1304     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1305     ## not different
1306     }),
1307     checker => sub {
1308     ## NOTE: |html:style| has no conformance creteria on content model
1309     my ($self, $todo) = @_;
1310     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
1311     $type = 'text/css' unless defined $type;
1312     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
1313     type => 'style:'.$type); ## TODO: $type normalization
1314     return $AnyChecker->($self, $todo);
1315     },
1316     };
1317    
1318     $Element->{$HTML_NS}->{body} = {
1319     attrs_checker => $GetHTMLAttrsChecker->({}),
1320     checker => $HTMLBlockChecker,
1321     };
1322    
1323     $Element->{$HTML_NS}->{section} = {
1324     attrs_checker => $GetHTMLAttrsChecker->({}),
1325     checker => $HTMLStylableBlockChecker,
1326     };
1327    
1328     $Element->{$HTML_NS}->{nav} = {
1329     attrs_checker => $GetHTMLAttrsChecker->({}),
1330     checker => $HTMLBlockOrInlineChecker,
1331     };
1332    
1333     $Element->{$HTML_NS}->{article} = {
1334     attrs_checker => $GetHTMLAttrsChecker->({}),
1335     checker => $HTMLStylableBlockChecker,
1336     };
1337    
1338     $Element->{$HTML_NS}->{blockquote} = {
1339     attrs_checker => $GetHTMLAttrsChecker->({
1340     cite => $HTMLURIAttrChecker,
1341     }),
1342     checker => $HTMLBlockChecker,
1343     };
1344    
1345     $Element->{$HTML_NS}->{aside} = {
1346     attrs_checker => $GetHTMLAttrsChecker->({}),
1347     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
1348     };
1349    
1350     $Element->{$HTML_NS}->{h1} = {
1351     attrs_checker => $GetHTMLAttrsChecker->({}),
1352     checker => sub {
1353     my ($self, $todo) = @_;
1354     $todo->{flag}->{has_heading}->[0] = 1;
1355 wakaba 1.13 return $HTMLStrictlyInlineChecker->($self, $todo);
1356 wakaba 1.1 },
1357     };
1358    
1359     $Element->{$HTML_NS}->{h2} = {
1360     attrs_checker => $GetHTMLAttrsChecker->({}),
1361     checker => $Element->{$HTML_NS}->{h1}->{checker},
1362     };
1363    
1364     $Element->{$HTML_NS}->{h3} = {
1365     attrs_checker => $GetHTMLAttrsChecker->({}),
1366     checker => $Element->{$HTML_NS}->{h1}->{checker},
1367     };
1368    
1369     $Element->{$HTML_NS}->{h4} = {
1370     attrs_checker => $GetHTMLAttrsChecker->({}),
1371     checker => $Element->{$HTML_NS}->{h1}->{checker},
1372     };
1373    
1374     $Element->{$HTML_NS}->{h5} = {
1375     attrs_checker => $GetHTMLAttrsChecker->({}),
1376     checker => $Element->{$HTML_NS}->{h1}->{checker},
1377     };
1378    
1379     $Element->{$HTML_NS}->{h6} = {
1380     attrs_checker => $GetHTMLAttrsChecker->({}),
1381     checker => $Element->{$HTML_NS}->{h1}->{checker},
1382     };
1383    
1384     $Element->{$HTML_NS}->{header} = {
1385     attrs_checker => $GetHTMLAttrsChecker->({}),
1386     checker => sub {
1387     my ($self, $todo) = @_;
1388     my $old_flag = $todo->{flag}->{has_heading} || [];
1389     my $new_flag = [];
1390     local $todo->{flag}->{has_heading} = $new_flag;
1391     my $node = $todo->{node};
1392    
1393     my $end = $self->_add_minuses
1394     ({$HTML_NS => {qw/header 1 footer 1/}},
1395     $HTMLSectioningElements);
1396     my ($new_todos, $ch) = $HTMLBlockChecker->($self, $todo);
1397     push @$new_todos, $end,
1398     {type => 'code', code => sub {
1399     if ($new_flag->[0]) {
1400     $old_flag->[0] = 1;
1401     } else {
1402     $self->{onerror}->(node => $node, type => 'element missing:hn');
1403     }
1404     }};
1405     return ($new_todos, $ch);
1406     },
1407     };
1408    
1409     $Element->{$HTML_NS}->{footer} = {
1410     attrs_checker => $GetHTMLAttrsChecker->({}),
1411     checker => sub { ## block -hn -header -footer -sectioning or inline
1412     my ($self, $todo) = @_;
1413     my $el = $todo->{node};
1414     my $new_todos = [];
1415     my @nodes = (@{$el->child_nodes});
1416    
1417     my $content = 'block-or-inline'; # or 'block' or 'inline'
1418     my @block_not_inline;
1419     while (@nodes) {
1420     my $node = shift @nodes;
1421     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1422    
1423     my $nt = $node->node_type;
1424     if ($nt == 1) {
1425     my $node_ns = $node->namespace_uri;
1426     $node_ns = '' unless defined $node_ns;
1427     my $node_ln = $node->manakai_local_name;
1428     my $not_allowed;
1429     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
1430     $not_allowed = 1;
1431     } elsif ($node_ns eq $HTML_NS and
1432     {
1433     qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1/
1434     }->{$node_ln}) {
1435     $not_allowed = 1;
1436     } elsif ($HTMLSectioningElements->{$node_ns}->{$node_ln}) {
1437     $not_allowed = 1;
1438     }
1439     if ($content eq 'block') {
1440     $not_allowed = 1
1441 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
1442     $self->{pluses}->{$node_ns}->{$node_ln};
1443 wakaba 1.1 } elsif ($content eq 'inline') {
1444     $not_allowed = 1
1445     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
1446 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
1447     $self->{pluses}->{$node_ns}->{$node_ln};
1448 wakaba 1.1 } else {
1449     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1450     my $is_inline
1451     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
1452     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1453    
1454     push @block_not_inline, $node
1455     if $is_block and not $is_inline and not $not_allowed;
1456 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
1457 wakaba 1.1 $content = 'inline';
1458     for (@block_not_inline) {
1459     $self->{onerror}->(node => $_, type => 'element not allowed');
1460     }
1461     $not_allowed = 1 unless $is_inline;
1462     }
1463     }
1464     $self->{onerror}->(node => $node, type => 'element not allowed')
1465     if $not_allowed;
1466     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1467     unshift @nodes, @$sib;
1468     push @$new_todos, @$ch;
1469     } elsif ($nt == 3 or $nt == 4) {
1470     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1471     if ($content eq 'block') {
1472     $self->{onerror}->(node => $node, type => 'character not allowed');
1473     } else {
1474     $content = 'inline';
1475     for (@block_not_inline) {
1476     $self->{onerror}->(node => $_, type => 'element not allowed');
1477     }
1478     }
1479     }
1480     } elsif ($nt == 5) {
1481     unshift @nodes, @{$node->child_nodes};
1482     }
1483     }
1484    
1485     my $end = $self->_add_minuses
1486     ({$HTML_NS => {qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1/}},
1487     $HTMLSectioningElements);
1488     push @$new_todos, $end;
1489    
1490     if ($content eq 'inline') {
1491     for (@$new_todos) {
1492     $_->{inline} = 1;
1493     }
1494     }
1495    
1496     return ($new_todos);
1497     },
1498     };
1499    
1500     $Element->{$HTML_NS}->{address} = {
1501     attrs_checker => $GetHTMLAttrsChecker->({}),
1502     checker => $HTMLInlineChecker,
1503     };
1504    
1505     $Element->{$HTML_NS}->{p} = {
1506     attrs_checker => $GetHTMLAttrsChecker->({}),
1507 wakaba 1.13 checker => $HTMLInlineChecker,
1508 wakaba 1.1 };
1509    
1510     $Element->{$HTML_NS}->{hr} = {
1511     attrs_checker => $GetHTMLAttrsChecker->({}),
1512     checker => $HTMLEmptyChecker,
1513     };
1514    
1515     $Element->{$HTML_NS}->{br} = {
1516     attrs_checker => $GetHTMLAttrsChecker->({}),
1517     checker => $HTMLEmptyChecker,
1518     };
1519    
1520     $Element->{$HTML_NS}->{dialog} = {
1521     attrs_checker => $GetHTMLAttrsChecker->({}),
1522     checker => sub {
1523     my ($self, $todo) = @_;
1524     my $el = $todo->{node};
1525     my $new_todos = [];
1526     my @nodes = (@{$el->child_nodes});
1527    
1528     my $phase = 'before dt';
1529     while (@nodes) {
1530     my $node = shift @nodes;
1531     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1532    
1533     my $nt = $node->node_type;
1534     if ($nt == 1) {
1535     my $node_ns = $node->namespace_uri;
1536     $node_ns = '' unless defined $node_ns;
1537     my $node_ln = $node->manakai_local_name;
1538     ## NOTE: |minuses| list is not checked since redundant
1539 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1540     #
1541     } elsif ($phase eq 'before dt') {
1542 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1543     $phase = 'before dd';
1544     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1545     $self->{onerror}
1546     ->(node => $node, type => 'ps element missing:dt');
1547     $phase = 'before dt';
1548     } else {
1549     $self->{onerror}->(node => $node, type => 'element not allowed');
1550     }
1551     } else { # before dd
1552     if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1553     $phase = 'before dt';
1554     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1555     $self->{onerror}
1556     ->(node => $node, type => 'ps element missing:dd');
1557     $phase = 'before dd';
1558     } else {
1559     $self->{onerror}->(node => $node, type => 'element not allowed');
1560     }
1561     }
1562     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1563     unshift @nodes, @$sib;
1564     push @$new_todos, @$ch;
1565     } elsif ($nt == 3 or $nt == 4) {
1566     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1567     $self->{onerror}->(node => $node, type => 'character not allowed');
1568     }
1569     } elsif ($nt == 5) {
1570     unshift @nodes, @{$node->child_nodes};
1571     }
1572     }
1573     if ($phase eq 'before dd') {
1574 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
1575 wakaba 1.1 }
1576     return ($new_todos);
1577     },
1578     };
1579    
1580     $Element->{$HTML_NS}->{pre} = {
1581     attrs_checker => $GetHTMLAttrsChecker->({}),
1582     checker => $HTMLStrictlyInlineChecker,
1583     };
1584    
1585     $Element->{$HTML_NS}->{ol} = {
1586     attrs_checker => $GetHTMLAttrsChecker->({
1587     start => $HTMLIntegerAttrChecker,
1588     }),
1589     checker => sub {
1590     my ($self, $todo) = @_;
1591     my $el = $todo->{node};
1592     my $new_todos = [];
1593     my @nodes = (@{$el->child_nodes});
1594    
1595     while (@nodes) {
1596     my $node = shift @nodes;
1597     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1598    
1599     my $nt = $node->node_type;
1600     if ($nt == 1) {
1601     my $node_ns = $node->namespace_uri;
1602     $node_ns = '' unless defined $node_ns;
1603     my $node_ln = $node->manakai_local_name;
1604     ## NOTE: |minuses| list is not checked since redundant
1605 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1606     #
1607     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'li')) {
1608 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
1609     }
1610     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1611     unshift @nodes, @$sib;
1612     push @$new_todos, @$ch;
1613     } elsif ($nt == 3 or $nt == 4) {
1614     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1615     $self->{onerror}->(node => $node, type => 'character not allowed');
1616     }
1617     } elsif ($nt == 5) {
1618     unshift @nodes, @{$node->child_nodes};
1619     }
1620     }
1621    
1622     if ($todo->{inline}) {
1623     for (@$new_todos) {
1624     $_->{inline} = 1;
1625     }
1626     }
1627     return ($new_todos);
1628     },
1629     };
1630    
1631     $Element->{$HTML_NS}->{ul} = {
1632     attrs_checker => $GetHTMLAttrsChecker->({}),
1633     checker => $Element->{$HTML_NS}->{ol}->{checker},
1634     };
1635    
1636     $Element->{$HTML_NS}->{li} = {
1637     attrs_checker => $GetHTMLAttrsChecker->({
1638     start => sub {
1639     my ($self, $attr) = @_;
1640     my $parent = $attr->owner_element->manakai_parent_element;
1641     if (defined $parent) {
1642     my $parent_ns = $parent->namespace_uri;
1643     $parent_ns = '' unless defined $parent_ns;
1644     my $parent_ln = $parent->manakai_local_name;
1645     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1646     $self->{onerror}->(node => $attr, level => 'unsupported',
1647     type => 'attribute');
1648     }
1649     }
1650     $HTMLIntegerAttrChecker->($self, $attr);
1651     },
1652     }),
1653     checker => sub {
1654     my ($self, $todo) = @_;
1655     if ($todo->{inline}) {
1656     return $HTMLInlineChecker->($self, $todo);
1657     } else {
1658     return $HTMLBlockOrInlineChecker->($self, $todo);
1659     }
1660     },
1661     };
1662    
1663     $Element->{$HTML_NS}->{dl} = {
1664     attrs_checker => $GetHTMLAttrsChecker->({}),
1665     checker => sub {
1666     my ($self, $todo) = @_;
1667     my $el = $todo->{node};
1668     my $new_todos = [];
1669     my @nodes = (@{$el->child_nodes});
1670    
1671     my $phase = 'before dt';
1672     while (@nodes) {
1673     my $node = shift @nodes;
1674     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1675    
1676     my $nt = $node->node_type;
1677     if ($nt == 1) {
1678     my $node_ns = $node->namespace_uri;
1679     $node_ns = '' unless defined $node_ns;
1680     my $node_ln = $node->manakai_local_name;
1681     ## NOTE: |minuses| list is not checked since redundant
1682 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1683     #
1684     } elsif ($phase eq 'in dds') {
1685 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1686     #$phase = 'in dds';
1687     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1688     $phase = 'in dts';
1689     } else {
1690     $self->{onerror}->(node => $node, type => 'element not allowed');
1691     }
1692     } elsif ($phase eq 'in dts') {
1693     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1694     #$phase = 'in dts';
1695     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1696     $phase = 'in dds';
1697     } else {
1698     $self->{onerror}->(node => $node, type => 'element not allowed');
1699     }
1700     } else { # before dt
1701     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1702     $phase = 'in dts';
1703     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1704     $self->{onerror}
1705     ->(node => $node, type => 'ps element missing:dt');
1706     $phase = 'in dds';
1707     } else {
1708     $self->{onerror}->(node => $node, type => 'element not allowed');
1709     }
1710     }
1711     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1712     unshift @nodes, @$sib;
1713     push @$new_todos, @$ch;
1714     } elsif ($nt == 3 or $nt == 4) {
1715     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1716     $self->{onerror}->(node => $node, type => 'character not allowed');
1717     }
1718     } elsif ($nt == 5) {
1719     unshift @nodes, @{$node->child_nodes};
1720     }
1721     }
1722     if ($phase eq 'in dts') {
1723 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
1724 wakaba 1.1 }
1725    
1726     if ($todo->{inline}) {
1727     for (@$new_todos) {
1728     $_->{inline} = 1;
1729     }
1730     }
1731     return ($new_todos);
1732     },
1733     };
1734    
1735     $Element->{$HTML_NS}->{dt} = {
1736     attrs_checker => $GetHTMLAttrsChecker->({}),
1737     checker => $HTMLStrictlyInlineChecker,
1738     };
1739    
1740     $Element->{$HTML_NS}->{dd} = {
1741     attrs_checker => $GetHTMLAttrsChecker->({}),
1742     checker => $Element->{$HTML_NS}->{li}->{checker},
1743     };
1744    
1745     $Element->{$HTML_NS}->{a} = {
1746     attrs_checker => sub {
1747     my ($self, $todo) = @_;
1748     my %attr;
1749     for my $attr (@{$todo->{node}->attributes}) {
1750     my $attr_ns = $attr->namespace_uri;
1751     $attr_ns = '' unless defined $attr_ns;
1752     my $attr_ln = $attr->manakai_local_name;
1753     my $checker;
1754     if ($attr_ns eq '') {
1755     $checker = {
1756     target => $HTMLTargetAttrChecker,
1757     href => $HTMLURIAttrChecker,
1758     ping => $HTMLSpaceURIsAttrChecker,
1759 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
1760 wakaba 1.1 media => $HTMLMQAttrChecker,
1761     hreflang => $HTMLLanguageTagAttrChecker,
1762     type => $HTMLIMTAttrChecker,
1763     }->{$attr_ln};
1764     if ($checker) {
1765     $attr{$attr_ln} = $attr;
1766     } else {
1767     $checker = $HTMLAttrChecker->{$attr_ln};
1768     }
1769     }
1770     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1771     || $AttrChecker->{$attr_ns}->{''};
1772     if ($checker) {
1773     $checker->($self, $attr) if ref $checker;
1774     } else {
1775     $self->{onerror}->(node => $attr, level => 'unsupported',
1776     type => 'attribute');
1777     ## ISSUE: No comformance createria for unknown attributes in the spec
1778     }
1779     }
1780    
1781 wakaba 1.4 if (defined $attr{href}) {
1782     $self->{has_hyperlink_element} = 1;
1783     } else {
1784 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
1785     if (defined $attr{$_}) {
1786     $self->{onerror}->(node => $attr{$_},
1787     type => 'attribute not allowed');
1788     }
1789     }
1790     }
1791     },
1792     checker => sub {
1793     my ($self, $todo) = @_;
1794    
1795     my $end = $self->_add_minuses ($HTMLInteractiveElements);
1796     my ($new_todos, $ch)
1797 wakaba 1.13 = $HTMLInlineOrStrictlyInlineChecker->($self, $todo);
1798 wakaba 1.1 push @$new_todos, $end;
1799    
1800 wakaba 1.15 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
1801     $_->{flag}->{in_a_href} = 1 for @$new_todos;
1802     }
1803 wakaba 1.1
1804     return ($new_todos, $ch);
1805     },
1806     };
1807    
1808     $Element->{$HTML_NS}->{q} = {
1809     attrs_checker => $GetHTMLAttrsChecker->({
1810     cite => $HTMLURIAttrChecker,
1811     }),
1812     checker => $HTMLInlineOrStrictlyInlineChecker,
1813     };
1814    
1815     $Element->{$HTML_NS}->{cite} = {
1816     attrs_checker => $GetHTMLAttrsChecker->({}),
1817     checker => $HTMLStrictlyInlineChecker,
1818     };
1819    
1820     $Element->{$HTML_NS}->{em} = {
1821     attrs_checker => $GetHTMLAttrsChecker->({}),
1822     checker => $HTMLInlineOrStrictlyInlineChecker,
1823     };
1824    
1825     $Element->{$HTML_NS}->{strong} = {
1826     attrs_checker => $GetHTMLAttrsChecker->({}),
1827     checker => $HTMLInlineOrStrictlyInlineChecker,
1828     };
1829    
1830     $Element->{$HTML_NS}->{small} = {
1831     attrs_checker => $GetHTMLAttrsChecker->({}),
1832     checker => $HTMLInlineOrStrictlyInlineChecker,
1833     };
1834    
1835     $Element->{$HTML_NS}->{m} = {
1836     attrs_checker => $GetHTMLAttrsChecker->({}),
1837     checker => $HTMLInlineOrStrictlyInlineChecker,
1838     };
1839    
1840     $Element->{$HTML_NS}->{dfn} = {
1841     attrs_checker => $GetHTMLAttrsChecker->({}),
1842     checker => sub {
1843     my ($self, $todo) = @_;
1844    
1845     my $end = $self->_add_minuses ({$HTML_NS => {dfn => 1}});
1846     my ($sib, $ch) = $HTMLStrictlyInlineChecker->($self, $todo);
1847     push @$sib, $end;
1848    
1849     my $node = $todo->{node};
1850     my $term = $node->get_attribute_ns (undef, 'title');
1851     unless (defined $term) {
1852     for my $child (@{$node->child_nodes}) {
1853     if ($child->node_type == 1) { # ELEMENT_NODE
1854     if (defined $term) {
1855     undef $term;
1856     last;
1857     } elsif ($child->manakai_local_name eq 'abbr') {
1858     my $nsuri = $child->namespace_uri;
1859     if (defined $nsuri and $nsuri eq $HTML_NS) {
1860     my $attr = $child->get_attribute_node_ns (undef, 'title');
1861     if ($attr) {
1862     $term = $attr->value;
1863     }
1864     }
1865     }
1866     } elsif ($child->node_type == 3 or $child->node_type == 4) {
1867     ## TEXT_NODE or CDATA_SECTION_NODE
1868     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
1869     next;
1870     }
1871     undef $term;
1872     last;
1873     }
1874     }
1875     unless (defined $term) {
1876     $term = $node->text_content;
1877     }
1878     }
1879     if ($self->{term}->{$term}) {
1880     $self->{onerror}->(node => $node, type => 'duplicate term');
1881     push @{$self->{term}->{$term}}, $node;
1882     } else {
1883     $self->{term}->{$term} = [$node];
1884     }
1885     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
1886     ## has |title|.
1887    
1888     return ($sib, $ch);
1889     },
1890     };
1891    
1892     $Element->{$HTML_NS}->{abbr} = {
1893     attrs_checker => $GetHTMLAttrsChecker->({
1894     ## NOTE: |title| has special semantics for |abbr|s, but is syntactically
1895     ## not different. The spec says that the |title| MAY be omitted
1896     ## if there is a |dfn| whose defining term is the abbreviation,
1897     ## but it does not prohibit |abbr| w/o |title| in other cases.
1898     }),
1899     checker => $HTMLStrictlyInlineChecker,
1900     };
1901    
1902     $Element->{$HTML_NS}->{time} = {
1903     attrs_checker => $GetHTMLAttrsChecker->({
1904     datetime => sub { 1 }, # checked in |checker|
1905     }),
1906     ## TODO: Write tests
1907     checker => sub {
1908     my ($self, $todo) = @_;
1909    
1910     my $attr = $todo->{node}->get_attribute_node_ns (undef, 'datetime');
1911     my $input;
1912     my $reg_sp;
1913     my $input_node;
1914     if ($attr) {
1915     $input = $attr->value;
1916     $reg_sp = qr/[\x09-\x0D\x20]*/;
1917     $input_node = $attr;
1918     } else {
1919     $input = $todo->{node}->text_content;
1920     $reg_sp = qr/\p{Zs}*/;
1921     $input_node = $todo->{node};
1922    
1923     ## ISSUE: What is the definition for "successfully extracts a date
1924     ## or time"? If the algorithm says the string is invalid but
1925     ## return some date or time, is it "successfully"?
1926     }
1927    
1928     my $hour;
1929     my $minute;
1930     my $second;
1931     if ($input =~ /
1932     \A
1933     [\x09-\x0D\x20]*
1934     ([0-9]+) # 1
1935     (?>
1936     -([0-9]+) # 2
1937     -([0-9]+) # 3
1938     [\x09-\x0D\x20]*
1939     (?>
1940     T
1941     [\x09-\x0D\x20]*
1942     )?
1943     ([0-9]+) # 4
1944     :([0-9]+) # 5
1945     (?>
1946     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
1947     )?
1948     [\x09-\x0D\x20]*
1949     (?>
1950     Z
1951     [\x09-\x0D\x20]*
1952     |
1953     [+-]([0-9]+):([0-9]+) # 7, 8
1954     [\x09-\x0D\x20]*
1955     )?
1956     \z
1957     |
1958     :([0-9]+) # 9
1959     (?>
1960     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
1961     )?
1962     [\x09-\x0D\x20]*\z
1963     )
1964     /x) {
1965     if (defined $2) { ## YYYY-MM-DD T? hh:mm
1966     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
1967     length $4 != 2 or length $5 != 2) {
1968     $self->{onerror}->(node => $input_node,
1969     type => 'dateortime:syntax error');
1970     }
1971    
1972     if (1 <= $2 and $2 <= 12) {
1973     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1974     if $3 < 1 or
1975     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
1976     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1977     if $2 == 2 and $3 == 29 and
1978     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
1979     } else {
1980     $self->{onerror}->(node => $input_node,
1981     type => 'datetime:bad month');
1982     }
1983    
1984     ($hour, $minute, $second) = ($4, $5, $6);
1985    
1986     if (defined $7) { ## [+-]hh:mm
1987     if (length $7 != 2 or length $8 != 2) {
1988     $self->{onerror}->(node => $input_node,
1989     type => 'dateortime:syntax error');
1990     }
1991    
1992     $self->{onerror}->(node => $input_node,
1993     type => 'datetime:bad timezone hour')
1994     if $7 > 23;
1995     $self->{onerror}->(node => $input_node,
1996     type => 'datetime:bad timezone minute')
1997     if $8 > 59;
1998     }
1999     } else { ## hh:mm
2000     if (length $1 != 2 or length $9 != 2) {
2001     $self->{onerror}->(node => $input_node,
2002     type => qq'dateortime:syntax error');
2003     }
2004    
2005     ($hour, $minute, $second) = ($1, $9, $10);
2006     }
2007    
2008     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
2009     if $hour > 23;
2010     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
2011     if $minute > 59;
2012    
2013     if (defined $second) { ## s
2014     ## NOTE: Integer part of second don't have to have length of two.
2015    
2016     if (substr ($second, 0, 1) eq '.') {
2017     $self->{onerror}->(node => $input_node,
2018     type => 'dateortime:syntax error');
2019     }
2020    
2021     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
2022     if $second >= 60;
2023     }
2024     } else {
2025     $self->{onerror}->(node => $input_node,
2026     type => 'dateortime:syntax error');
2027     }
2028    
2029     return $HTMLStrictlyInlineChecker->($self, $todo);
2030     },
2031     };
2032    
2033     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
2034     attrs_checker => $GetHTMLAttrsChecker->({
2035     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2036     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2037     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2038     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2039     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2040     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2041     }),
2042     checker => $HTMLStrictlyInlineChecker,
2043     };
2044    
2045     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
2046     attrs_checker => $GetHTMLAttrsChecker->({
2047     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
2048     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
2049     }),
2050     checker => $HTMLStrictlyInlineChecker,
2051     };
2052    
2053     $Element->{$HTML_NS}->{code} = {
2054     attrs_checker => $GetHTMLAttrsChecker->({}),
2055     ## NOTE: Though |title| has special semantics,
2056     ## syntatically same as the |title| as global attribute.
2057     checker => $HTMLInlineOrStrictlyInlineChecker,
2058     };
2059    
2060     $Element->{$HTML_NS}->{var} = {
2061     attrs_checker => $GetHTMLAttrsChecker->({}),
2062     ## NOTE: Though |title| has special semantics,
2063     ## syntatically same as the |title| as global attribute.
2064     checker => $HTMLStrictlyInlineChecker,
2065     };
2066    
2067     $Element->{$HTML_NS}->{samp} = {
2068     attrs_checker => $GetHTMLAttrsChecker->({}),
2069     ## NOTE: Though |title| has special semantics,
2070     ## syntatically same as the |title| as global attribute.
2071     checker => $HTMLInlineOrStrictlyInlineChecker,
2072     };
2073    
2074     $Element->{$HTML_NS}->{kbd} = {
2075     attrs_checker => $GetHTMLAttrsChecker->({}),
2076     checker => $HTMLStrictlyInlineChecker,
2077     };
2078    
2079     $Element->{$HTML_NS}->{sub} = {
2080     attrs_checker => $GetHTMLAttrsChecker->({}),
2081     checker => $HTMLStrictlyInlineChecker,
2082     };
2083    
2084     $Element->{$HTML_NS}->{sup} = {
2085     attrs_checker => $GetHTMLAttrsChecker->({}),
2086     checker => $HTMLStrictlyInlineChecker,
2087     };
2088    
2089     $Element->{$HTML_NS}->{span} = {
2090     attrs_checker => $GetHTMLAttrsChecker->({}),
2091     ## NOTE: Though |title| has special semantics,
2092     ## syntatically same as the |title| as global attribute.
2093     checker => $HTMLInlineOrStrictlyInlineChecker,
2094     };
2095    
2096     $Element->{$HTML_NS}->{i} = {
2097     attrs_checker => $GetHTMLAttrsChecker->({}),
2098     ## NOTE: Though |title| has special semantics,
2099     ## syntatically same as the |title| as global attribute.
2100     checker => $HTMLStrictlyInlineChecker,
2101     };
2102    
2103     $Element->{$HTML_NS}->{b} = {
2104     attrs_checker => $GetHTMLAttrsChecker->({}),
2105     checker => $HTMLStrictlyInlineChecker,
2106     };
2107    
2108     $Element->{$HTML_NS}->{bdo} = {
2109     attrs_checker => sub {
2110     my ($self, $todo) = @_;
2111     $GetHTMLAttrsChecker->({})->($self, $todo);
2112     unless ($todo->{node}->has_attribute_ns (undef, 'dir')) {
2113     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:dir');
2114     }
2115     },
2116     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
2117     checker => $HTMLStrictlyInlineChecker,
2118     };
2119    
2120     $Element->{$HTML_NS}->{ins} = {
2121     attrs_checker => $GetHTMLAttrsChecker->({
2122     cite => $HTMLURIAttrChecker,
2123     datetime => $HTMLDatetimeAttrChecker,
2124     }),
2125     checker => $HTMLTransparentChecker,
2126     };
2127    
2128     $Element->{$HTML_NS}->{del} = {
2129     attrs_checker => $GetHTMLAttrsChecker->({
2130     cite => $HTMLURIAttrChecker,
2131     datetime => $HTMLDatetimeAttrChecker,
2132     }),
2133     checker => sub {
2134     my ($self, $todo) = @_;
2135    
2136     my $parent = $todo->{node}->manakai_parent_element;
2137     if (defined $parent) {
2138     my $nsuri = $parent->namespace_uri;
2139     $nsuri = '' unless defined $nsuri;
2140     my $ln = $parent->manakai_local_name;
2141     my $eldef = $Element->{$nsuri}->{$ln} ||
2142     $Element->{$nsuri}->{''} ||
2143     $ElementDefault;
2144     return $eldef->{checker}->($self, $todo);
2145     } else {
2146     return $HTMLBlockOrInlineChecker->($self, $todo);
2147     }
2148     },
2149     };
2150    
2151     ## TODO: figure
2152 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
2153 wakaba 1.1
2154 wakaba 1.4 ## TODO: |alt|
2155 wakaba 1.1 $Element->{$HTML_NS}->{img} = {
2156     attrs_checker => sub {
2157     my ($self, $todo) = @_;
2158     $GetHTMLAttrsChecker->({
2159     alt => sub { }, ## NOTE: No syntactical requirement
2160     src => $HTMLURIAttrChecker,
2161     usemap => $HTMLUsemapAttrChecker,
2162     ismap => sub {
2163     my ($self, $attr, $parent_todo) = @_;
2164 wakaba 1.15 if (not $todo->{flag}->{in_a_href}) {
2165     $self->{onerror}->(node => $attr,
2166     type => 'attribute not allowed:ismap');
2167 wakaba 1.1 }
2168     $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_todo);
2169     },
2170     ## TODO: height
2171     ## TODO: width
2172     })->($self, $todo);
2173     unless ($todo->{node}->has_attribute_ns (undef, 'alt')) {
2174     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:alt');
2175     }
2176     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2177     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:src');
2178     }
2179     },
2180     checker => $HTMLEmptyChecker,
2181     };
2182    
2183     $Element->{$HTML_NS}->{iframe} = {
2184     attrs_checker => $GetHTMLAttrsChecker->({
2185     src => $HTMLURIAttrChecker,
2186     }),
2187     checker => $HTMLTextChecker,
2188     };
2189    
2190     $Element->{$HTML_NS}->{embed} = {
2191     attrs_checker => sub {
2192     my ($self, $todo) = @_;
2193     my $has_src;
2194     for my $attr (@{$todo->{node}->attributes}) {
2195     my $attr_ns = $attr->namespace_uri;
2196     $attr_ns = '' unless defined $attr_ns;
2197     my $attr_ln = $attr->manakai_local_name;
2198     my $checker;
2199     if ($attr_ns eq '') {
2200     if ($attr_ln eq 'src') {
2201     $checker = $HTMLURIAttrChecker;
2202     $has_src = 1;
2203     } elsif ($attr_ln eq 'type') {
2204     $checker = $HTMLIMTAttrChecker;
2205     } else {
2206     ## TODO: height
2207     ## TODO: width
2208     $checker = $HTMLAttrChecker->{$attr_ln}
2209     || sub { }; ## NOTE: Any local attribute is ok.
2210     }
2211     }
2212     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2213     || $AttrChecker->{$attr_ns}->{''};
2214     if ($checker) {
2215     $checker->($self, $attr);
2216     } else {
2217     $self->{onerror}->(node => $attr, level => 'unsupported',
2218     type => 'attribute');
2219     ## ISSUE: No comformance createria for global attributes in the spec
2220     }
2221     }
2222    
2223     unless ($has_src) {
2224     $self->{onerror}->(node => $todo->{node},
2225     type => 'attribute missing:src');
2226     }
2227     },
2228     checker => $HTMLEmptyChecker,
2229     };
2230    
2231     $Element->{$HTML_NS}->{object} = {
2232     attrs_checker => sub {
2233     my ($self, $todo) = @_;
2234     $GetHTMLAttrsChecker->({
2235     data => $HTMLURIAttrChecker,
2236     type => $HTMLIMTAttrChecker,
2237     usemap => $HTMLUsemapAttrChecker,
2238     ## TODO: width
2239     ## TODO: height
2240     })->($self, $todo);
2241     unless ($todo->{node}->has_attribute_ns (undef, 'data')) {
2242     unless ($todo->{node}->has_attribute_ns (undef, 'type')) {
2243     $self->{onerror}->(node => $todo->{node},
2244     type => 'attribute missing:data|type');
2245     }
2246     }
2247     },
2248     checker => $ElementDefault->{checker}, ## TODO
2249 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
2250 wakaba 1.1 };
2251    
2252     $Element->{$HTML_NS}->{param} = {
2253     attrs_checker => sub {
2254     my ($self, $todo) = @_;
2255     $GetHTMLAttrsChecker->({
2256     name => sub { },
2257     value => sub { },
2258     })->($self, $todo);
2259     unless ($todo->{node}->has_attribute_ns (undef, 'name')) {
2260     $self->{onerror}->(node => $todo->{node},
2261     type => 'attribute missing:name');
2262     }
2263     unless ($todo->{node}->has_attribute_ns (undef, 'value')) {
2264     $self->{onerror}->(node => $todo->{node},
2265     type => 'attribute missing:value');
2266     }
2267     },
2268     checker => $HTMLEmptyChecker,
2269     };
2270    
2271     $Element->{$HTML_NS}->{video} = {
2272     attrs_checker => $GetHTMLAttrsChecker->({
2273     src => $HTMLURIAttrChecker,
2274     ## TODO: start, loopstart, loopend, end
2275     ## ISSUE: they MUST be "value time offset"s. Value?
2276 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
2277 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2278     controls => $GetHTMLBooleanAttrChecker->('controls'),
2279 wakaba 1.11 poster => $HTMLURIAttrChecker, ## TODO: not for audio!
2280     ## TODO: width, height (not for audio!)
2281 wakaba 1.1 }),
2282     checker => sub {
2283     my ($self, $todo) = @_;
2284    
2285     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2286     return $HTMLBlockOrInlineChecker->($self, $todo);
2287     } else {
2288     return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
2289     ->($self, $todo);
2290     }
2291     },
2292     };
2293    
2294     $Element->{$HTML_NS}->{audio} = {
2295     attrs_checker => $Element->{$HTML_NS}->{video}->{attrs_checker},
2296     checker => $Element->{$HTML_NS}->{video}->{checker},
2297     };
2298    
2299     $Element->{$HTML_NS}->{source} = {
2300     attrs_checker => sub {
2301     my ($self, $todo) = @_;
2302     $GetHTMLAttrsChecker->({
2303     src => $HTMLURIAttrChecker,
2304     type => $HTMLIMTAttrChecker,
2305     media => $HTMLMQAttrChecker,
2306     })->($self, $todo);
2307     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2308     $self->{onerror}->(node => $todo->{node},
2309     type => 'attribute missing:src');
2310     }
2311     },
2312     checker => $HTMLEmptyChecker,
2313     };
2314    
2315     $Element->{$HTML_NS}->{canvas} = {
2316     attrs_checker => $GetHTMLAttrsChecker->({
2317     height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2318     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2319     }),
2320     checker => $HTMLInlineChecker,
2321     };
2322    
2323     $Element->{$HTML_NS}->{map} = {
2324 wakaba 1.4 attrs_checker => sub {
2325     my ($self, $todo) = @_;
2326     my $has_id;
2327     $GetHTMLAttrsChecker->({
2328     id => sub {
2329     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2330     my ($self, $attr) = @_;
2331     my $value = $attr->value;
2332     if (length $value > 0) {
2333     if ($self->{id}->{$value}) {
2334     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2335     push @{$self->{id}->{$value}}, $attr;
2336     } else {
2337     $self->{id}->{$value} = [$attr];
2338     }
2339 wakaba 1.1 } else {
2340 wakaba 1.4 ## NOTE: MUST contain at least one character
2341     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2342 wakaba 1.1 }
2343 wakaba 1.4 if ($value =~ /[\x09-\x0D\x20]/) {
2344     $self->{onerror}->(node => $attr, type => 'space in ID');
2345     }
2346     $self->{map}->{$value} ||= $attr;
2347     $has_id = 1;
2348     },
2349     })->($self, $todo);
2350     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:id')
2351     unless $has_id;
2352     },
2353 wakaba 1.1 checker => $HTMLBlockChecker,
2354     };
2355    
2356     $Element->{$HTML_NS}->{area} = {
2357     attrs_checker => sub {
2358     my ($self, $todo) = @_;
2359     my %attr;
2360     my $coords;
2361     for my $attr (@{$todo->{node}->attributes}) {
2362     my $attr_ns = $attr->namespace_uri;
2363     $attr_ns = '' unless defined $attr_ns;
2364     my $attr_ln = $attr->manakai_local_name;
2365     my $checker;
2366     if ($attr_ns eq '') {
2367     $checker = {
2368     alt => sub { },
2369     ## NOTE: |alt| value has no conformance creteria.
2370     shape => $GetHTMLEnumeratedAttrChecker->({
2371     circ => -1, circle => 1,
2372     default => 1,
2373     poly => 1, polygon => -1,
2374     rect => 1, rectangle => -1,
2375     }),
2376     coords => sub {
2377     my ($self, $attr) = @_;
2378     my $value = $attr->value;
2379     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
2380     $coords = [split /,/, $value];
2381     } else {
2382     $self->{onerror}->(node => $attr,
2383     type => 'coords:syntax error');
2384     }
2385     },
2386     target => $HTMLTargetAttrChecker,
2387     href => $HTMLURIAttrChecker,
2388     ping => $HTMLSpaceURIsAttrChecker,
2389 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
2390 wakaba 1.1 media => $HTMLMQAttrChecker,
2391     hreflang => $HTMLLanguageTagAttrChecker,
2392     type => $HTMLIMTAttrChecker,
2393     }->{$attr_ln};
2394     if ($checker) {
2395     $attr{$attr_ln} = $attr;
2396     } else {
2397     $checker = $HTMLAttrChecker->{$attr_ln};
2398     }
2399     }
2400     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2401     || $AttrChecker->{$attr_ns}->{''};
2402     if ($checker) {
2403     $checker->($self, $attr) if ref $checker;
2404     } else {
2405     $self->{onerror}->(node => $attr, level => 'unsupported',
2406     type => 'attribute');
2407     ## ISSUE: No comformance createria for unknown attributes in the spec
2408     }
2409     }
2410    
2411     if (defined $attr{href}) {
2412 wakaba 1.4 $self->{has_hyperlink_element} = 1;
2413 wakaba 1.1 unless (defined $attr{alt}) {
2414     $self->{onerror}->(node => $todo->{node},
2415     type => 'attribute missing:alt');
2416     }
2417     } else {
2418     for (qw/target ping rel media hreflang type alt/) {
2419     if (defined $attr{$_}) {
2420     $self->{onerror}->(node => $attr{$_},
2421     type => 'attribute not allowed');
2422     }
2423     }
2424     }
2425    
2426     my $shape = 'rectangle';
2427     if (defined $attr{shape}) {
2428     $shape = {
2429     circ => 'circle', circle => 'circle',
2430     default => 'default',
2431     poly => 'polygon', polygon => 'polygon',
2432     rect => 'rectangle', rectangle => 'rectangle',
2433     }->{lc $attr{shape}->value} || 'rectangle';
2434     ## TODO: ASCII lowercase?
2435     }
2436    
2437     if ($shape eq 'circle') {
2438     if (defined $attr{coords}) {
2439     if (defined $coords) {
2440     if (@$coords == 3) {
2441     if ($coords->[2] < 0) {
2442     $self->{onerror}->(node => $attr{coords},
2443     type => 'coords:out of range:2');
2444     }
2445     } else {
2446     $self->{onerror}->(node => $attr{coords},
2447     type => 'coords:number:3:'.@$coords);
2448     }
2449     } else {
2450     ## NOTE: A syntax error has been reported.
2451     }
2452     } else {
2453     $self->{onerror}->(node => $todo->{node},
2454     type => 'attribute missing:coords');
2455     }
2456     } elsif ($shape eq 'default') {
2457     if (defined $attr{coords}) {
2458     $self->{onerror}->(node => $attr{coords},
2459     type => 'attribute not allowed');
2460     }
2461     } elsif ($shape eq 'polygon') {
2462     if (defined $attr{coords}) {
2463     if (defined $coords) {
2464     if (@$coords >= 6) {
2465     unless (@$coords % 2 == 0) {
2466     $self->{onerror}->(node => $attr{coords},
2467     type => 'coords:number:even:'.@$coords);
2468     }
2469     } else {
2470     $self->{onerror}->(node => $attr{coords},
2471     type => 'coords:number:>=6:'.@$coords);
2472     }
2473     } else {
2474     ## NOTE: A syntax error has been reported.
2475     }
2476     } else {
2477     $self->{onerror}->(node => $todo->{node},
2478     type => 'attribute missing:coords');
2479     }
2480     } elsif ($shape eq 'rectangle') {
2481     if (defined $attr{coords}) {
2482     if (defined $coords) {
2483     if (@$coords == 4) {
2484     unless ($coords->[0] < $coords->[2]) {
2485     $self->{onerror}->(node => $attr{coords},
2486     type => 'coords:out of range:0');
2487     }
2488     unless ($coords->[1] < $coords->[3]) {
2489     $self->{onerror}->(node => $attr{coords},
2490     type => 'coords:out of range:1');
2491     }
2492     } else {
2493     $self->{onerror}->(node => $attr{coords},
2494     type => 'coords:number:4:'.@$coords);
2495     }
2496     } else {
2497     ## NOTE: A syntax error has been reported.
2498     }
2499     } else {
2500     $self->{onerror}->(node => $todo->{node},
2501     type => 'attribute missing:coords');
2502     }
2503     }
2504     },
2505     checker => $HTMLEmptyChecker,
2506     };
2507     ## TODO: only in map
2508    
2509     $Element->{$HTML_NS}->{table} = {
2510     attrs_checker => $GetHTMLAttrsChecker->({}),
2511     checker => sub {
2512     my ($self, $todo) = @_;
2513     my $el = $todo->{node};
2514     my $new_todos = [];
2515     my @nodes = (@{$el->child_nodes});
2516    
2517     my $phase = 'before caption';
2518     my $has_tfoot;
2519     while (@nodes) {
2520     my $node = shift @nodes;
2521     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2522    
2523     my $nt = $node->node_type;
2524     if ($nt == 1) {
2525     my $node_ns = $node->namespace_uri;
2526     $node_ns = '' unless defined $node_ns;
2527     my $node_ln = $node->manakai_local_name;
2528     ## NOTE: |minuses| list is not checked since redundant
2529 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2530     #
2531     } elsif ($phase eq 'in tbodys') {
2532 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2533     #$phase = 'in tbodys';
2534     } elsif (not $has_tfoot and
2535     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2536     $phase = 'after tfoot';
2537     $has_tfoot = 1;
2538     } else {
2539     $self->{onerror}->(node => $node, type => 'element not allowed');
2540     }
2541     } elsif ($phase eq 'in trs') {
2542     if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2543     #$phase = 'in trs';
2544     } elsif (not $has_tfoot and
2545     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2546     $phase = 'after tfoot';
2547     $has_tfoot = 1;
2548     } else {
2549     $self->{onerror}->(node => $node, type => 'element not allowed');
2550     }
2551     } elsif ($phase eq 'after thead') {
2552     if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2553     $phase = 'in tbodys';
2554     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2555     $phase = 'in trs';
2556     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2557     $phase = 'in tbodys';
2558     $has_tfoot = 1;
2559     } else {
2560     $self->{onerror}->(node => $node, type => 'element not allowed');
2561     }
2562     } elsif ($phase eq 'in colgroup') {
2563     if ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2564     $phase = 'in colgroup';
2565     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2566     $phase = 'after thead';
2567     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2568     $phase = 'in tbodys';
2569     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2570     $phase = 'in trs';
2571     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2572     $phase = 'in tbodys';
2573     $has_tfoot = 1;
2574     } else {
2575     $self->{onerror}->(node => $node, type => 'element not allowed');
2576     }
2577     } elsif ($phase eq 'before caption') {
2578     if ($node_ns eq $HTML_NS and $node_ln eq 'caption') {
2579     $phase = 'in colgroup';
2580     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2581     $phase = 'in colgroup';
2582     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2583     $phase = 'after thead';
2584     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2585     $phase = 'in tbodys';
2586     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2587     $phase = 'in trs';
2588     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2589     $phase = 'in tbodys';
2590     $has_tfoot = 1;
2591     } else {
2592     $self->{onerror}->(node => $node, type => 'element not allowed');
2593     }
2594     } else { # after tfoot
2595     $self->{onerror}->(node => $node, type => 'element not allowed');
2596     }
2597     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2598     unshift @nodes, @$sib;
2599     push @$new_todos, @$ch;
2600     } elsif ($nt == 3 or $nt == 4) {
2601     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2602     $self->{onerror}->(node => $node, type => 'character not allowed');
2603     }
2604     } elsif ($nt == 5) {
2605     unshift @nodes, @{$node->child_nodes};
2606     }
2607     }
2608    
2609     ## Table model errors
2610     require Whatpm::HTMLTable;
2611     Whatpm::HTMLTable->form_table ($todo->{node}, sub {
2612     my %opt = @_;
2613     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
2614     });
2615     push @{$self->{return}->{table}}, $todo->{node};
2616    
2617     return ($new_todos);
2618     },
2619     };
2620    
2621     $Element->{$HTML_NS}->{caption} = {
2622     attrs_checker => $GetHTMLAttrsChecker->({}),
2623 wakaba 1.13 checker => $HTMLStrictlyInlineChecker,
2624 wakaba 1.1 };
2625    
2626     $Element->{$HTML_NS}->{colgroup} = {
2627     attrs_checker => $GetHTMLAttrsChecker->({
2628     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2629     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
2630     ## TODO: "attribute not supported" if |col|.
2631     ## ISSUE: MUST NOT if any |col|?
2632     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
2633     }),
2634     checker => sub {
2635     my ($self, $todo) = @_;
2636     my $el = $todo->{node};
2637     my $new_todos = [];
2638     my @nodes = (@{$el->child_nodes});
2639    
2640     while (@nodes) {
2641     my $node = shift @nodes;
2642     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2643    
2644     my $nt = $node->node_type;
2645     if ($nt == 1) {
2646     my $node_ns = $node->namespace_uri;
2647     $node_ns = '' unless defined $node_ns;
2648     my $node_ln = $node->manakai_local_name;
2649     ## NOTE: |minuses| list is not checked since redundant
2650 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2651     #
2652     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'col')) {
2653 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
2654     }
2655     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2656     unshift @nodes, @$sib;
2657     push @$new_todos, @$ch;
2658     } elsif ($nt == 3 or $nt == 4) {
2659     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2660     $self->{onerror}->(node => $node, type => 'character not allowed');
2661     }
2662     } elsif ($nt == 5) {
2663     unshift @nodes, @{$node->child_nodes};
2664     }
2665     }
2666     return ($new_todos);
2667     },
2668     };
2669    
2670     $Element->{$HTML_NS}->{col} = {
2671     attrs_checker => $GetHTMLAttrsChecker->({
2672     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2673     }),
2674     checker => $HTMLEmptyChecker,
2675     };
2676    
2677     $Element->{$HTML_NS}->{tbody} = {
2678     attrs_checker => $GetHTMLAttrsChecker->({}),
2679     checker => sub {
2680     my ($self, $todo) = @_;
2681     my $el = $todo->{node};
2682     my $new_todos = [];
2683     my @nodes = (@{$el->child_nodes});
2684    
2685     my $has_tr;
2686     while (@nodes) {
2687     my $node = shift @nodes;
2688     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2689    
2690     my $nt = $node->node_type;
2691     if ($nt == 1) {
2692     my $node_ns = $node->namespace_uri;
2693     $node_ns = '' unless defined $node_ns;
2694     my $node_ln = $node->manakai_local_name;
2695     ## NOTE: |minuses| list is not checked since redundant
2696 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2697     #
2698     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2699 wakaba 1.1 $has_tr = 1;
2700     } else {
2701     $self->{onerror}->(node => $node, type => 'element not allowed');
2702     }
2703     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2704     unshift @nodes, @$sib;
2705     push @$new_todos, @$ch;
2706     } elsif ($nt == 3 or $nt == 4) {
2707     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2708     $self->{onerror}->(node => $node, type => 'character not allowed');
2709     }
2710     } elsif ($nt == 5) {
2711     unshift @nodes, @{$node->child_nodes};
2712     }
2713     }
2714     unless ($has_tr) {
2715     $self->{onerror}->(node => $el, type => 'child element missing:tr');
2716     }
2717     return ($new_todos);
2718     },
2719     };
2720    
2721     $Element->{$HTML_NS}->{thead} = {
2722     attrs_checker => $GetHTMLAttrsChecker->({}),
2723     checker => $Element->{$HTML_NS}->{tbody}->{checker},
2724     };
2725    
2726     $Element->{$HTML_NS}->{tfoot} = {
2727     attrs_checker => $GetHTMLAttrsChecker->({}),
2728     checker => $Element->{$HTML_NS}->{tbody}->{checker},
2729     };
2730    
2731     $Element->{$HTML_NS}->{tr} = {
2732     attrs_checker => $GetHTMLAttrsChecker->({}),
2733     checker => sub {
2734     my ($self, $todo) = @_;
2735     my $el = $todo->{node};
2736     my $new_todos = [];
2737     my @nodes = (@{$el->child_nodes});
2738    
2739     my $has_td;
2740     while (@nodes) {
2741     my $node = shift @nodes;
2742     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2743    
2744     my $nt = $node->node_type;
2745     if ($nt == 1) {
2746     my $node_ns = $node->namespace_uri;
2747     $node_ns = '' unless defined $node_ns;
2748     my $node_ln = $node->manakai_local_name;
2749     ## NOTE: |minuses| list is not checked since redundant
2750 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2751     #
2752     } elsif ($node_ns eq $HTML_NS and
2753     ($node_ln eq 'td' or $node_ln eq 'th')) {
2754 wakaba 1.1 $has_td = 1;
2755     } else {
2756     $self->{onerror}->(node => $node, type => 'element not allowed');
2757     }
2758     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2759     unshift @nodes, @$sib;
2760     push @$new_todos, @$ch;
2761     } elsif ($nt == 3 or $nt == 4) {
2762     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2763     $self->{onerror}->(node => $node, type => 'character not allowed');
2764     }
2765     } elsif ($nt == 5) {
2766     unshift @nodes, @{$node->child_nodes};
2767     }
2768     }
2769     unless ($has_td) {
2770     $self->{onerror}->(node => $el, type => 'child element missing:td|th');
2771     }
2772     return ($new_todos);
2773     },
2774     };
2775    
2776     $Element->{$HTML_NS}->{td} = {
2777     attrs_checker => $GetHTMLAttrsChecker->({
2778     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2779     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2780     }),
2781     checker => $HTMLBlockOrInlineChecker,
2782     };
2783    
2784     $Element->{$HTML_NS}->{th} = {
2785     attrs_checker => $GetHTMLAttrsChecker->({
2786     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2787     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2788     scope => $GetHTMLEnumeratedAttrChecker
2789     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
2790     }),
2791     checker => $HTMLBlockOrInlineChecker,
2792     };
2793    
2794     ## TODO: forms
2795 wakaba 1.8 ## TODO: Tests for <nest/> in form elements
2796 wakaba 1.1
2797     $Element->{$HTML_NS}->{script} = {
2798 wakaba 1.9 attrs_checker => $GetHTMLAttrsChecker->({
2799 wakaba 1.1 src => $HTMLURIAttrChecker,
2800     defer => $GetHTMLBooleanAttrChecker->('defer'),
2801     async => $GetHTMLBooleanAttrChecker->('async'),
2802     type => $HTMLIMTAttrChecker,
2803 wakaba 1.9 }),
2804 wakaba 1.1 checker => sub {
2805     my ($self, $todo) = @_;
2806    
2807     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2808     return $HTMLEmptyChecker->($self, $todo);
2809     } else {
2810     ## NOTE: No content model conformance in HTML5 spec.
2811     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
2812     my $language = $todo->{node}->get_attribute_ns (undef, 'language');
2813     if ((defined $type and $type eq '') or
2814     (defined $language and $language eq '')) {
2815     $type = 'text/javascript';
2816     } elsif (defined $type) {
2817     #
2818     } elsif (defined $language) {
2819     $type = 'text/' . $language;
2820     } else {
2821     $type = 'text/javascript';
2822     }
2823     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
2824     type => 'script:'.$type); ## TODO: $type normalization
2825     return $AnyChecker->($self, $todo);
2826     }
2827     },
2828     };
2829    
2830     ## NOTE: When script is disabled.
2831     $Element->{$HTML_NS}->{noscript} = {
2832 wakaba 1.3 attrs_checker => sub {
2833     my ($self, $todo) = @_;
2834    
2835     ## NOTE: This check is inserted in |attrs_checker|, rather than |checker|,
2836     ## since the later is not invoked when the |noscript| is used as a
2837     ## transparent element.
2838     unless ($todo->{node}->owner_document->manakai_is_html) {
2839     $self->{onerror}->(node => $todo->{node}, type => 'in XML:noscript');
2840     }
2841    
2842     $GetHTMLAttrsChecker->({})->($self, $todo);
2843     },
2844 wakaba 1.1 checker => sub {
2845     my ($self, $todo) = @_;
2846    
2847 wakaba 1.3 if ($todo->{flag}->{in_head}) {
2848     my $new_todos = [];
2849     my @nodes = (@{$todo->{node}->child_nodes});
2850    
2851     while (@nodes) {
2852     my $node = shift @nodes;
2853     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2854    
2855     my $nt = $node->node_type;
2856     if ($nt == 1) {
2857     my $node_ns = $node->namespace_uri;
2858     $node_ns = '' unless defined $node_ns;
2859     my $node_ln = $node->manakai_local_name;
2860 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2861     #
2862     } elsif ($node_ns eq $HTML_NS) {
2863 wakaba 1.3 if ({link => 1, style => 1}->{$node_ln}) {
2864     #
2865     } elsif ($node_ln eq 'meta') {
2866 wakaba 1.5 if ($node->has_attribute_ns (undef, 'name')) {
2867     #
2868 wakaba 1.3 } else {
2869 wakaba 1.5 $self->{onerror}->(node => $node,
2870     type => 'element not allowed');
2871 wakaba 1.3 }
2872     } else {
2873     $self->{onerror}->(node => $node, type => 'element not allowed');
2874     }
2875     } else {
2876     $self->{onerror}->(node => $node, type => 'element not allowed');
2877     }
2878    
2879     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2880     unshift @nodes, @$sib;
2881     push @$new_todos, @$ch;
2882     } elsif ($nt == 3 or $nt == 4) {
2883     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2884     $self->{onerror}->(node => $node, type => 'character not allowed');
2885     }
2886     } elsif ($nt == 5) {
2887     unshift @nodes, @{$node->child_nodes};
2888     }
2889     }
2890     return ($new_todos);
2891     } else {
2892     my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}});
2893     my ($sib, $ch) = $HTMLBlockOrInlineChecker->($self, $todo);
2894     push @$sib, $end;
2895     return ($sib, $ch);
2896     }
2897 wakaba 1.1 },
2898     };
2899 wakaba 1.3
2900     ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
2901 wakaba 1.1
2902     $Element->{$HTML_NS}->{'event-source'} = {
2903     attrs_checker => $GetHTMLAttrsChecker->({
2904     src => $HTMLURIAttrChecker,
2905     }),
2906     checker => $HTMLEmptyChecker,
2907     };
2908    
2909     $Element->{$HTML_NS}->{details} = {
2910     attrs_checker => $GetHTMLAttrsChecker->({
2911     open => $GetHTMLBooleanAttrChecker->('open'),
2912     }),
2913     checker => sub {
2914     my ($self, $todo) = @_;
2915    
2916     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2917     my ($sib, $ch)
2918     = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
2919     ->($self, $todo);
2920     push @$sib, $end;
2921     return ($sib, $ch);
2922     },
2923     };
2924    
2925     $Element->{$HTML_NS}->{datagrid} = {
2926     attrs_checker => $GetHTMLAttrsChecker->({
2927     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2928     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
2929     }),
2930     checker => sub {
2931     my ($self, $todo) = @_;
2932     my $el = $todo->{node};
2933     my $new_todos = [];
2934     my @nodes = (@{$el->child_nodes});
2935    
2936     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2937    
2938     ## Block-table Block* | table | select | datalist | Empty
2939     my $mode = 'any';
2940     while (@nodes) {
2941     my $node = shift @nodes;
2942     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2943    
2944     my $nt = $node->node_type;
2945     if ($nt == 1) {
2946     my $node_ns = $node->namespace_uri;
2947     $node_ns = '' unless defined $node_ns;
2948     my $node_ln = $node->manakai_local_name;
2949     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
2950 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2951     #
2952     } elsif ($mode eq 'block') {
2953 wakaba 1.1 $not_allowed = 1
2954     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
2955     } elsif ($mode eq 'any') {
2956     if ($node_ns eq $HTML_NS and
2957     {table => 1, select => 1, datalist => 1}->{$node_ln}) {
2958     $mode = 'none';
2959     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
2960     $mode = 'block';
2961     } else {
2962     $not_allowed = 1;
2963     }
2964     } else {
2965     $not_allowed = 1;
2966     }
2967     $self->{onerror}->(node => $node, type => 'element not allowed')
2968     if $not_allowed;
2969     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2970     unshift @nodes, @$sib;
2971     push @$new_todos, @$ch;
2972     } elsif ($nt == 3 or $nt == 4) {
2973     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2974     $self->{onerror}->(node => $node, type => 'character not allowed');
2975     }
2976     } elsif ($nt == 5) {
2977     unshift @nodes, @{$node->child_nodes};
2978     }
2979     }
2980    
2981     push @$new_todos, $end;
2982     return ($new_todos);
2983     },
2984     };
2985    
2986     $Element->{$HTML_NS}->{command} = {
2987     attrs_checker => $GetHTMLAttrsChecker->({
2988     checked => $GetHTMLBooleanAttrChecker->('checked'),
2989     default => $GetHTMLBooleanAttrChecker->('default'),
2990     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2991     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
2992     icon => $HTMLURIAttrChecker,
2993     label => sub { }, ## NOTE: No conformance creteria
2994     radiogroup => sub { }, ## NOTE: No conformance creteria
2995     ## NOTE: |title| has special semantics, but no syntactical difference
2996     type => sub {
2997     my ($self, $attr) = @_;
2998     my $value = $attr->value;
2999     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
3000     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
3001     }
3002     },
3003     }),
3004     checker => $HTMLEmptyChecker,
3005     };
3006    
3007     $Element->{$HTML_NS}->{menu} = {
3008     attrs_checker => $GetHTMLAttrsChecker->({
3009     autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
3010     id => sub {
3011     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
3012     my ($self, $attr) = @_;
3013     my $value = $attr->value;
3014     if (length $value > 0) {
3015     if ($self->{id}->{$value}) {
3016     $self->{onerror}->(node => $attr, type => 'duplicate ID');
3017     push @{$self->{id}->{$value}}, $attr;
3018     } else {
3019     $self->{id}->{$value} = [$attr];
3020     }
3021     } else {
3022     ## NOTE: MUST contain at least one character
3023     $self->{onerror}->(node => $attr, type => 'empty attribute value');
3024     }
3025     if ($value =~ /[\x09-\x0D\x20]/) {
3026     $self->{onerror}->(node => $attr, type => 'space in ID');
3027     }
3028     $self->{menu}->{$value} ||= $attr;
3029     ## ISSUE: <menu id=""><p contextmenu=""> match?
3030     },
3031     label => sub { }, ## NOTE: No conformance creteria
3032     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
3033     }),
3034     checker => sub {
3035     my ($self, $todo) = @_;
3036     my $el = $todo->{node};
3037     my $new_todos = [];
3038     my @nodes = (@{$el->child_nodes});
3039    
3040     my $content = 'li or inline';
3041     while (@nodes) {
3042     my $node = shift @nodes;
3043     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3044    
3045     my $nt = $node->node_type;
3046     if ($nt == 1) {
3047     my $node_ns = $node->namespace_uri;
3048     $node_ns = '' unless defined $node_ns;
3049     my $node_ln = $node->manakai_local_name;
3050     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
3051 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3052     #
3053     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'li') {
3054 wakaba 1.1 if ($content eq 'inline') {
3055     $not_allowed = 1;
3056     } elsif ($content eq 'li or inline') {
3057     $content = 'li';
3058     }
3059     } else {
3060     if ($HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
3061     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln}) {
3062     $content = 'inline';
3063     } else {
3064     $not_allowed = 1;
3065     }
3066     }
3067     $self->{onerror}->(node => $node, type => 'element not allowed')
3068     if $not_allowed;
3069     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3070     unshift @nodes, @$sib;
3071     push @$new_todos, @$ch;
3072     } elsif ($nt == 3 or $nt == 4) {
3073     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3074     if ($content eq 'li') {
3075     $self->{onerror}->(node => $node, type => 'character not allowed');
3076     } elsif ($content eq 'li or inline') {
3077     $content = 'inline';
3078     }
3079     }
3080     } elsif ($nt == 5) {
3081     unshift @nodes, @{$node->child_nodes};
3082     }
3083     }
3084    
3085     for (@$new_todos) {
3086     $_->{inline} = 1;
3087     }
3088     return ($new_todos);
3089     },
3090 wakaba 1.8 };
3091    
3092     $Element->{$HTML_NS}->{datatemplate} = {
3093     attrs_checker => $GetHTMLAttrsChecker->({}),
3094     checker => sub {
3095     my ($self, $todo) = @_;
3096     my $el = $todo->{node};
3097     my $new_todos = [];
3098     my @nodes = (@{$el->child_nodes});
3099    
3100     while (@nodes) {
3101     my $node = shift @nodes;
3102     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3103    
3104     my $nt = $node->node_type;
3105     if ($nt == 1) {
3106     my $node_ns = $node->namespace_uri;
3107     $node_ns = '' unless defined $node_ns;
3108     my $node_ln = $node->manakai_local_name;
3109     ## NOTE: |minuses| list is not checked since redundant
3110     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3111     #
3112     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'rule')) {
3113     $self->{onerror}->(node => $node,
3114     type => 'element not allowed:datatemplate');
3115     }
3116     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3117     unshift @nodes, @$sib;
3118     push @$new_todos, @$ch;
3119     } elsif ($nt == 3 or $nt == 4) {
3120     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3121     $self->{onerror}->(node => $node, type => 'character not allowed');
3122     }
3123     } elsif ($nt == 5) {
3124     unshift @nodes, @{$node->child_nodes};
3125     }
3126     }
3127     return ($new_todos);
3128     },
3129     is_xml_root => 1,
3130     };
3131    
3132     $Element->{$HTML_NS}->{rule} = {
3133     attrs_checker => $GetHTMLAttrsChecker->({
3134     ## TODO: |condition| attribute
3135 wakaba 1.18 mode => $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker,
3136 wakaba 1.8 }),
3137     checker => sub {
3138     my ($self, $todo) = @_;
3139    
3140     my $end = $self->_add_pluses ({$HTML_NS => {nest => 1}});
3141     my ($sib, $ch) = $AnyChecker->($self, $todo);
3142     push @$sib, $end;
3143     return ($sib, $ch);
3144     },
3145     ## NOTE: "MAY be anything that, when the parent |datatemplate|
3146     ## is applied to some conforming data, results in a conforming DOM tree.":
3147     ## We don't check against this.
3148     };
3149    
3150     $Element->{$HTML_NS}->{nest} = {
3151     attrs_checker => $GetHTMLAttrsChecker->({
3152     ## TODO: |filter| attribute
3153 wakaba 1.18 mode => $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker,
3154 wakaba 1.8 }),
3155     checker => $HTMLEmptyChecker,
3156 wakaba 1.1 };
3157    
3158     $Element->{$HTML_NS}->{legend} = {
3159     attrs_checker => $GetHTMLAttrsChecker->({}),
3160     checker => sub {
3161     my ($self, $todo) = @_;
3162    
3163     my $parent = $todo->{node}->manakai_parent_element;
3164     if (defined $parent) {
3165     my $nsuri = $parent->namespace_uri;
3166     $nsuri = '' unless defined $nsuri;
3167     my $ln = $parent->manakai_local_name;
3168     if ($nsuri eq $HTML_NS and $ln eq 'figure') {
3169     return $HTMLInlineChecker->($self, $todo);
3170     } else {
3171 wakaba 1.13 return $HTMLStrictlyInlineChecker->($self, $todo);
3172 wakaba 1.1 }
3173     } else {
3174     return $HTMLInlineChecker->($self, $todo);
3175     }
3176    
3177     ## ISSUE: Content model is defined only for fieldset/legend,
3178     ## details/legend, and figure/legend.
3179     },
3180     };
3181    
3182     $Element->{$HTML_NS}->{div} = {
3183     attrs_checker => $GetHTMLAttrsChecker->({}),
3184     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
3185     };
3186    
3187     $Element->{$HTML_NS}->{font} = {
3188     attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
3189     checker => $HTMLTransparentChecker,
3190     };
3191    
3192     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
3193    
3194     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24