/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.17 - (hide annotations) (download)
Sun Nov 4 09:11:14 2007 UTC (17 years ago) by wakaba
Branch: MAIN
Changes since 1.16: +3 -1 lines
++ whatpm/Whatpm/ChangeLog	4 Nov 2007 09:10:40 -0000
	* CacheManifest.pm: New module.

2007-11-04  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7     my $HTMLMetadataElements = {
8     $HTML_NS => {
9     qw/link 1 meta 1 style 1 script 1 event-source 1 command 1 base 1 title 1
10 wakaba 1.8 noscript 1 datatemplate 1
11 wakaba 1.1 /,
12     },
13     };
14    
15     my $HTMLSectioningElements = {
16     $HTML_NS => {qw/body 1 section 1 nav 1 article 1 blockquote 1 aside 1/},
17     };
18    
19     my $HTMLBlockLevelElements = {
20     $HTML_NS => {
21     qw/
22     section 1 nav 1 article 1 blockquote 1 aside 1
23     h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1
24     address 1 p 1 hr 1 dialog 1 pre 1 ol 1 ul 1 dl 1
25     ins 1 del 1 figure 1 map 1 table 1 script 1 noscript 1
26     event-source 1 details 1 datagrid 1 menu 1 div 1 font 1
27 wakaba 1.8 datatemplate 1
28 wakaba 1.1 /,
29     },
30     };
31    
32     my $HTMLStrictlyInlineLevelElements = {
33     $HTML_NS => {
34     qw/
35     br 1 a 1 q 1 cite 1 em 1 strong 1 small 1 m 1 dfn 1 abbr 1
36     time 1 meter 1 progress 1 code 1 var 1 samp 1 kbd 1
37     sub 1 sup 1 span 1 i 1 b 1 bdo 1 ins 1 del 1 img 1
38     iframe 1 embed 1 object 1 video 1 audio 1 canvas 1 area 1
39     script 1 noscript 1 event-source 1 command 1 font 1
40     /,
41     },
42     };
43    
44     my $HTMLStructuredInlineLevelElements = {
45     $HTML_NS => {qw/blockquote 1 pre 1 ol 1 ul 1 dl 1 table 1 menu 1/},
46     };
47    
48     my $HTMLInteractiveElements = {
49     $HTML_NS => {a => 1, details => 1, datagrid => 1},
50     };
51     ## NOTE: |html:a| and |html:datagrid| are not allowed as a descendant
52     ## of interactive elements
53    
54     # my $HTMLTransparentElements : in |Whatpm/ContentChecker.pm|.
55    
56     #my $HTMLSemiTransparentElements = {
57     # $HTML_NS => {qw/video 1 audio 1/},
58     #};
59    
60     my $HTMLEmbededElements = {
61     $HTML_NS => {qw/img 1 iframe 1 embed 1 object 1 video 1 audio 1 canvas 1/},
62     };
63    
64     ## Empty
65     my $HTMLEmptyChecker = sub {
66     my ($self, $todo) = @_;
67     my $el = $todo->{node};
68     my $new_todos = [];
69     my @nodes = (@{$el->child_nodes});
70    
71     while (@nodes) {
72     my $node = shift @nodes;
73     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
74    
75     my $nt = $node->node_type;
76     if ($nt == 1) {
77 wakaba 1.8 my $node_ns = $node->namespace_uri;
78     $node_ns = '' unless defined $node_ns;
79     my $node_ln = $node->manakai_local_name;
80     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
81     #
82     } else {
83     ## NOTE: |minuses| list is not checked since redundant
84     $self->{onerror}->(node => $node, type => 'element not allowed');
85     }
86 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
87     unshift @nodes, @$sib;
88     push @$new_todos, @$ch;
89     } elsif ($nt == 3 or $nt == 4) {
90     if ($node->data =~ /[^\x09-\x0D\x20]/) {
91     $self->{onerror}->(node => $node, type => 'character not allowed');
92     }
93     } elsif ($nt == 5) {
94     unshift @nodes, @{$node->child_nodes};
95     }
96     }
97     return ($new_todos);
98     };
99    
100     ## Text
101     my $HTMLTextChecker = sub {
102     my ($self, $todo) = @_;
103     my $el = $todo->{node};
104     my $new_todos = [];
105     my @nodes = (@{$el->child_nodes});
106    
107     while (@nodes) {
108     my $node = shift @nodes;
109     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
110    
111     my $nt = $node->node_type;
112     if ($nt == 1) {
113 wakaba 1.8 my $node_ns = $node->namespace_uri;
114     $node_ns = '' unless defined $node_ns;
115     my $node_ln = $node->manakai_local_name;
116     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
117     #
118     } else {
119     ## NOTE: |minuses| list is not checked since redundant
120     $self->{onerror}->(node => $node, type => 'element not allowed');
121     }
122 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
123     unshift @nodes, @$sib;
124     push @$new_todos, @$ch;
125     } elsif ($nt == 5) {
126     unshift @nodes, @{$node->child_nodes};
127     }
128     }
129     return ($new_todos);
130     };
131    
132     ## Zero or more |html:style| elements,
133     ## followed by zero or more block-level elements
134     my $HTMLStylableBlockChecker = sub {
135     my ($self, $todo) = @_;
136     my $el = $todo->{node};
137     my $new_todos = [];
138     my @nodes = (@{$el->child_nodes});
139    
140     my $has_non_style;
141     while (@nodes) {
142     my $node = shift @nodes;
143     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
144    
145     my $nt = $node->node_type;
146     if ($nt == 1) {
147     my $node_ns = $node->namespace_uri;
148     $node_ns = '' unless defined $node_ns;
149     my $node_ln = $node->manakai_local_name;
150     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
151     if ($node_ns eq $HTML_NS and $node_ln eq 'style') {
152     $not_allowed = 1 if $has_non_style or
153     not $node->has_attribute_ns (undef, 'scoped');
154     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
155     $has_non_style = 1;
156 wakaba 1.8 } elsif ($self->{pluses}->{$node_ns}->{$node_ln}) {
157     #
158 wakaba 1.1 } else {
159     $has_non_style = 1;
160     $not_allowed = 1;
161     }
162     $self->{onerror}->(node => $node, type => 'element not allowed')
163     if $not_allowed;
164     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
165     unshift @nodes, @$sib;
166     push @$new_todos, @$ch;
167     } elsif ($nt == 3 or $nt == 4) {
168     if ($node->data =~ /[^\x09-\x0D\x20]/) {
169     $self->{onerror}->(node => $node, type => 'character not allowed');
170     }
171     } elsif ($nt == 5) {
172     unshift @nodes, @{$node->child_nodes};
173     }
174     }
175     return ($new_todos);
176     }; # $HTMLStylableBlockChecker
177    
178     ## Zero or more block-level elements
179     my $HTMLBlockChecker = sub {
180     my ($self, $todo) = @_;
181     my $el = $todo->{node};
182     my $new_todos = [];
183     my @nodes = (@{$el->child_nodes});
184    
185     while (@nodes) {
186     my $node = shift @nodes;
187     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
188    
189     my $nt = $node->node_type;
190     if ($nt == 1) {
191     my $node_ns = $node->namespace_uri;
192     $node_ns = '' unless defined $node_ns;
193     my $node_ln = $node->manakai_local_name;
194     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
195     $not_allowed = 1
196 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
197     $self->{pluses}->{$node_ns}->{$node_ln};
198 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
199     if $not_allowed;
200     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
201     unshift @nodes, @$sib;
202     push @$new_todos, @$ch;
203     } elsif ($nt == 3 or $nt == 4) {
204     if ($node->data =~ /[^\x09-\x0D\x20]/) {
205     $self->{onerror}->(node => $node, type => 'character not allowed');
206     }
207     } elsif ($nt == 5) {
208     unshift @nodes, @{$node->child_nodes};
209     }
210     }
211     return ($new_todos);
212     }; # $HTMLBlockChecker
213    
214     ## Inline-level content
215     my $HTMLInlineChecker = sub {
216     my ($self, $todo) = @_;
217     my $el = $todo->{node};
218     my $new_todos = [];
219     my @nodes = (@{$el->child_nodes});
220    
221     while (@nodes) {
222     my $node = shift @nodes;
223     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
224    
225     my $nt = $node->node_type;
226     if ($nt == 1) {
227     my $node_ns = $node->namespace_uri;
228     $node_ns = '' unless defined $node_ns;
229     my $node_ln = $node->manakai_local_name;
230     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
231     $not_allowed = 1
232     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
233 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
234     $self->{pluses}->{$node_ns}->{$node_ln};
235 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
236     if $not_allowed;
237     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
238     unshift @nodes, @$sib;
239     push @$new_todos, @$ch;
240     } elsif ($nt == 5) {
241     unshift @nodes, @{$node->child_nodes};
242     }
243     }
244    
245     for (@$new_todos) {
246     $_->{inline} = 1;
247     }
248     return ($new_todos);
249     }; # $HTMLInlineChecker
250    
251     ## Strictly inline-level content
252     my $HTMLStrictlyInlineChecker = sub {
253     my ($self, $todo) = @_;
254     my $el = $todo->{node};
255     my $new_todos = [];
256     my @nodes = (@{$el->child_nodes});
257    
258     while (@nodes) {
259     my $node = shift @nodes;
260     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
261    
262     my $nt = $node->node_type;
263     if ($nt == 1) {
264     my $node_ns = $node->namespace_uri;
265     $node_ns = '' unless defined $node_ns;
266     my $node_ln = $node->manakai_local_name;
267     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
268     $not_allowed = 1
269 wakaba 1.8 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
270     $self->{pluses}->{$node_ns}->{$node_ln};
271 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
272     if $not_allowed;
273     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
274     unshift @nodes, @$sib;
275     push @$new_todos, @$ch;
276     } elsif ($nt == 5) {
277     unshift @nodes, @{$node->child_nodes};
278     }
279     }
280    
281     for (@$new_todos) {
282     $_->{inline} = 1;
283     $_->{strictly_inline} = 1;
284     }
285     return ($new_todos);
286     }; # $HTMLStrictlyInlineChecker
287    
288 wakaba 1.8 ## Inline-level or strictly inline-level content
289 wakaba 1.1 my $HTMLInlineOrStrictlyInlineChecker = sub {
290     my ($self, $todo) = @_;
291     my $el = $todo->{node};
292     my $new_todos = [];
293     my @nodes = (@{$el->child_nodes});
294    
295     while (@nodes) {
296     my $node = shift @nodes;
297     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
298    
299     my $nt = $node->node_type;
300     if ($nt == 1) {
301     my $node_ns = $node->namespace_uri;
302     $node_ns = '' unless defined $node_ns;
303     my $node_ln = $node->manakai_local_name;
304     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
305     if ($todo->{strictly_inline}) {
306     $not_allowed = 1
307 wakaba 1.8 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
308     $self->{pluses}->{$node_ns}->{$node_ln};
309 wakaba 1.1 } else {
310     $not_allowed = 1
311     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
312 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
313     $self->{pluses}->{$node_ns}->{$node_ln};
314 wakaba 1.1 }
315     $self->{onerror}->(node => $node, type => 'element not allowed')
316     if $not_allowed;
317     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
318     unshift @nodes, @$sib;
319     push @$new_todos, @$ch;
320     } elsif ($nt == 5) {
321     unshift @nodes, @{$node->child_nodes};
322     }
323     }
324    
325     for (@$new_todos) {
326     $_->{inline} = 1;
327     $_->{strictly_inline} = 1;
328     }
329     return ($new_todos);
330     }; # $HTMLInlineOrStrictlyInlineChecker
331    
332 wakaba 1.13 ## TODO: New "significant" SHOULD-level requirement (revision 1114).
333 wakaba 1.1
334 wakaba 1.8 ## Block-level content or inline-level content (i.e. bimorphic content model)
335 wakaba 1.1 my $HTMLBlockOrInlineChecker = sub {
336     my ($self, $todo) = @_;
337     my $el = $todo->{node};
338     my $new_todos = [];
339     my @nodes = (@{$el->child_nodes});
340    
341     my $content = 'block-or-inline'; # or 'block' or 'inline'
342     my @block_not_inline;
343     while (@nodes) {
344     my $node = shift @nodes;
345     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
346    
347 wakaba 1.8 ## ISSUE: It is unclear whether "<rule><div><p/><nest/></div></rule>"
348     ## is conforming or not.
349    
350 wakaba 1.1 my $nt = $node->node_type;
351     if ($nt == 1) {
352     my $node_ns = $node->namespace_uri;
353     $node_ns = '' unless defined $node_ns;
354     my $node_ln = $node->manakai_local_name;
355     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
356     if ($content eq 'block') {
357     $not_allowed = 1
358 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
359     $self->{pluses}->{$node_ns}->{$node_ln};
360 wakaba 1.1 } elsif ($content eq 'inline') {
361     $not_allowed = 1
362     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
363 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
364     $self->{pluses}->{$node_ns}->{$node_ln};
365 wakaba 1.1 } else {
366     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
367     my $is_inline
368     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
369     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
370    
371     push @block_not_inline, $node
372     if $is_block and not $is_inline and not $not_allowed;
373 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
374 wakaba 1.1 $content = 'inline';
375     for (@block_not_inline) {
376     $self->{onerror}->(node => $_, type => 'element not allowed');
377     }
378     $not_allowed = 1 unless $is_inline;
379     }
380     }
381     $self->{onerror}->(node => $node, type => 'element not allowed')
382     if $not_allowed;
383     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
384     unshift @nodes, @$sib;
385     push @$new_todos, @$ch;
386     } elsif ($nt == 3 or $nt == 4) {
387     if ($node->data =~ /[^\x09-\x0D\x20]/) {
388     if ($content eq 'block') {
389     $self->{onerror}->(node => $node, type => 'character not allowed');
390     } else {
391     $content = 'inline';
392     for (@block_not_inline) {
393     $self->{onerror}->(node => $_, type => 'element not allowed');
394     }
395     }
396     }
397     } elsif ($nt == 5) {
398     unshift @nodes, @{$node->child_nodes};
399     }
400     }
401    
402     if ($content eq 'inline') {
403     for (@$new_todos) {
404     $_->{inline} = 1;
405     }
406     }
407     return ($new_todos);
408     };
409    
410     ## Zero or more XXX element, then either block-level or inline-level
411     my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) {
412     my ($elnsuri, $ellname) = @_;
413     return sub {
414     my ($self, $todo) = @_;
415     my $el = $todo->{node};
416     my $new_todos = [];
417     my @nodes = (@{$el->child_nodes});
418    
419     my $has_non_style;
420     my $content = 'block-or-inline'; # or 'block' or 'inline'
421     my @block_not_inline;
422     while (@nodes) {
423     my $node = shift @nodes;
424     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
425    
426     my $nt = $node->node_type;
427     if ($nt == 1) {
428     my $node_ns = $node->namespace_uri;
429     $node_ns = '' unless defined $node_ns;
430     my $node_ln = $node->manakai_local_name;
431     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
432     if ($node_ns eq $elnsuri and $node_ln eq $ellname) {
433     $not_allowed = 1 if $has_non_style;
434     if ($ellname eq 'style' and
435     not $node->has_attribute_ns (undef, 'scoped')) {
436     $not_allowed = 1;
437     }
438     } elsif ($content eq 'block') {
439     $has_non_style = 1;
440     $not_allowed = 1
441 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
442     $self->{pluses}->{$node_ns}->{$node_ln};
443 wakaba 1.1 } elsif ($content eq 'inline') {
444     $has_non_style = 1;
445     $not_allowed = 1
446     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
447 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
448     $self->{pluses}->{$node_ns}->{$node_ln};
449 wakaba 1.1 } else {
450 wakaba 1.8 $has_non_style = 1 unless $self->{pluses}->{$node_ns}->{$node_ln};
451 wakaba 1.1 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
452     my $is_inline
453     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
454     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
455    
456     push @block_not_inline, $node
457     if $is_block and not $is_inline and not $not_allowed;
458 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
459 wakaba 1.1 $content = 'inline';
460     for (@block_not_inline) {
461     $self->{onerror}->(node => $_, type => 'element not allowed');
462     }
463     $not_allowed = 1 unless $is_inline;
464     }
465     }
466     $self->{onerror}->(node => $node, type => 'element not allowed')
467     if $not_allowed;
468     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
469     unshift @nodes, @$sib;
470     push @$new_todos, @$ch;
471     } elsif ($nt == 3 or $nt == 4) {
472     if ($node->data =~ /[^\x09-\x0D\x20]/) {
473     $has_non_style = 1;
474     if ($content eq 'block') {
475     $self->{onerror}->(node => $node, type => 'character not allowed');
476     } else {
477     $content = 'inline';
478     for (@block_not_inline) {
479     $self->{onerror}->(node => $_, type => 'element not allowed');
480     }
481     }
482     }
483     } elsif ($nt == 5) {
484     unshift @nodes, @{$node->child_nodes};
485     }
486     }
487    
488     if ($content eq 'inline') {
489     for (@$new_todos) {
490     $_->{inline} = 1;
491     }
492     }
493     return ($new_todos);
494     };
495     }; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker
496    
497     my $HTMLTransparentChecker = $HTMLBlockOrInlineChecker;
498    
499     our $AttrChecker;
500    
501     my $GetHTMLEnumeratedAttrChecker = sub {
502     my $states = shift; # {value => conforming ? 1 : -1}
503     return sub {
504     my ($self, $attr) = @_;
505     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
506     if ($states->{$value} > 0) {
507     #
508     } elsif ($states->{$value}) {
509     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
510     } else {
511     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
512     }
513     };
514     }; # $GetHTMLEnumeratedAttrChecker
515    
516     my $GetHTMLBooleanAttrChecker = sub {
517     my $local_name = shift;
518     return sub {
519     my ($self, $attr) = @_;
520     my $value = $attr->value;
521     unless ($value eq $local_name or $value eq '') {
522     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
523     }
524     };
525     }; # $GetHTMLBooleanAttrChecker
526    
527 wakaba 1.8 ## Unordered set of space-separated tokens
528     my $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker = sub {
529     my ($self, $attr) = @_;
530     my %word;
531     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
532     unless ($word{$word}) {
533     $word{$word} = 1;
534     } else {
535     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
536     }
537     }
538     }; # $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker
539    
540 wakaba 1.1 ## |rel| attribute (unordered set of space separated tokens,
541     ## whose allowed values are defined by the section on link types)
542     my $HTMLLinkTypesAttrChecker = sub {
543 wakaba 1.4 my ($a_or_area, $todo, $self, $attr) = @_;
544 wakaba 1.1 my %word;
545     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
546     unless ($word{$word}) {
547     $word{$word} = 1;
548     } else {
549     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
550     }
551     }
552     ## NOTE: Case sensitive match (since HTML5 spec does not say link
553     ## types are case-insensitive and it says "The value should not
554     ## be confusingly similar to any other defined value (e.g.
555     ## differing only in case).").
556     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
557     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
558     ## values to be used conformingly.
559     require Whatpm::_LinkTypeList;
560     our $LinkType;
561     for my $word (keys %word) {
562     my $def = $LinkType->{$word};
563     if (defined $def) {
564     if ($def->{status} eq 'accepted') {
565     if (defined $def->{effect}->[$a_or_area]) {
566     #
567     } else {
568     $self->{onerror}->(node => $attr,
569     type => 'link type:bad context:'.$word);
570     }
571     } elsif ($def->{status} eq 'proposal') {
572     $self->{onerror}->(node => $attr, level => 's',
573     type => 'link type:proposed:'.$word);
574     } else { # rejected or synonym
575     $self->{onerror}->(node => $attr,
576     type => 'link type:non-conforming:'.$word);
577     }
578 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
579     if ($word eq 'alternate') {
580     #
581     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
582     $todo->{has_hyperlink_link_type} = 1;
583     }
584     }
585 wakaba 1.1 if ($def->{unique}) {
586     unless ($self->{has_link_type}->{$word}) {
587     $self->{has_link_type}->{$word} = 1;
588     } else {
589     $self->{onerror}->(node => $attr,
590     type => 'link type:duplicate:'.$word);
591     }
592     }
593     } else {
594     $self->{onerror}->(node => $attr, level => 'unsupported',
595     type => 'link type:'.$word);
596     }
597     }
598 wakaba 1.4 $todo->{has_hyperlink_link_type} = 1
599     if $word{alternate} and not $word{stylesheet};
600 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
601     ## says that using both X-Pingback: header field and HTML
602     ## <link rel=pingback> is deprecated and if both appears they
603     ## SHOULD contain exactly the same value.
604     ## ISSUE: Pingback 1.0 specification defines the exact representation
605     ## of its link element, which cannot be tested by the current arch.
606     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
607     ## include any string that matches to the pattern for the rel=pingback link,
608     ## which again inpossible to test.
609     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
610 wakaba 1.12
611     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
612     ## ISSUE: <link rel="up up"> is non-conforming, since rel="" is unordered.
613 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
614     ## then they SHOULD be described in different paragraphs.".
615 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
616    
617     ## URI (or IRI)
618     my $HTMLURIAttrChecker = sub {
619     my ($self, $attr) = @_;
620     ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
621     my $value = $attr->value;
622     Whatpm::URIChecker->check_iri_reference ($value, sub {
623     my %opt = @_;
624     $self->{onerror}->(node => $attr, level => $opt{level},
625     type => 'URI::'.$opt{type}.
626     (defined $opt{position} ? ':'.$opt{position} : ''));
627     });
628 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
629 wakaba 1.1 }; # $HTMLURIAttrChecker
630    
631     ## A space separated list of one or more URIs (or IRIs)
632     my $HTMLSpaceURIsAttrChecker = sub {
633     my ($self, $attr) = @_;
634     my $i = 0;
635     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
636     Whatpm::URIChecker->check_iri_reference ($value, sub {
637     my %opt = @_;
638     $self->{onerror}->(node => $attr, level => $opt{level},
639 wakaba 1.2 type => 'URIs:'.':'.
640     $opt{type}.':'.$i.
641 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
642     });
643     $i++;
644     }
645     ## ISSUE: Relative references?
646     ## ISSUE: Leading or trailing white spaces are conformant?
647     ## ISSUE: A sequence of white space characters are conformant?
648     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
649     ## NOTE: Duplication seems not an error.
650 wakaba 1.4 $self->{has_uri_attr} = 1;
651 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
652    
653     my $HTMLDatetimeAttrChecker = sub {
654     my ($self, $attr) = @_;
655     my $value = $attr->value;
656     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
657     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
658     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
659     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
660     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
661     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
662     if $d < 1 or
663     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
664     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
665     if $M == 2 and $d == 29 and
666     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
667     } else {
668     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
669     }
670     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
671     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
672     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
673     if defined $s and $s > 59;
674     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
675     if $zh > 23;
676     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
677     if $zm > 59;
678     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
679     } else {
680     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
681     }
682     }; # $HTMLDatetimeAttrChecker
683    
684     my $HTMLIntegerAttrChecker = sub {
685     my ($self, $attr) = @_;
686     my $value = $attr->value;
687     unless ($value =~ /\A-?[0-9]+\z/) {
688     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
689     }
690     }; # $HTMLIntegerAttrChecker
691    
692     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
693     my $range_check = shift;
694     return sub {
695     my ($self, $attr) = @_;
696     my $value = $attr->value;
697     if ($value =~ /\A[0-9]+\z/) {
698     unless ($range_check->($value + 0)) {
699     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
700     }
701     } else {
702     $self->{onerror}->(node => $attr,
703     type => 'nninteger:syntax error');
704     }
705     };
706     }; # $GetHTMLNonNegativeIntegerAttrChecker
707    
708     my $GetHTMLFloatingPointNumberAttrChecker = sub {
709     my $range_check = shift;
710     return sub {
711     my ($self, $attr) = @_;
712     my $value = $attr->value;
713     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
714     unless ($range_check->($value + 0)) {
715     $self->{onerror}->(node => $attr, type => 'float:out of range');
716     }
717     } else {
718     $self->{onerror}->(node => $attr,
719     type => 'float:syntax error');
720     }
721     };
722     }; # $GetHTMLFloatingPointNumberAttrChecker
723    
724     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
725     ## ISSUE: RFC 2046 does not define syntax of media types.
726     ## ISSUE: The definition of "a valid MIME type" is unknown.
727     ## Syntactical correctness?
728     my $HTMLIMTAttrChecker = sub {
729     my ($self, $attr) = @_;
730     my $value = $attr->value;
731     ## ISSUE: RFC 2045 Content-Type header field allows insertion
732     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
733     ## ISSUE: RFC 2231 extension? Maybe no.
734     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
735     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
736     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
737     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
738     my @type = ($1, $2);
739     my $param = $3;
740     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
741     if (defined $2) {
742     push @type, $1 => $2;
743     } else {
744     my $n = $1;
745     my $v = $2;
746     $v =~ s/\\(.)/$1/gs;
747     push @type, $n => $v;
748     }
749     }
750     require Whatpm::IMTChecker;
751     Whatpm::IMTChecker->check_imt (sub {
752     my %opt = @_;
753     $self->{onerror}->(node => $attr, level => $opt{level},
754     type => 'IMT:'.$opt{type});
755     }, @type);
756     } else {
757     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
758     }
759     }; # $HTMLIMTAttrChecker
760    
761     my $HTMLLanguageTagAttrChecker = sub {
762 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
763    
764 wakaba 1.1 my ($self, $attr) = @_;
765 wakaba 1.6 my $value = $attr->value;
766     require Whatpm::LangTag;
767     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
768     my %opt = @_;
769     my $type = 'LangTag:'.$opt{type};
770     $type .= ':' . $opt{subtag} if defined $opt{subtag};
771     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
772     level => $opt{level});
773     });
774 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
775 wakaba 1.6
776     ## TODO: testdata
777 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
778    
779     ## "A valid media query [MQ]"
780     my $HTMLMQAttrChecker = sub {
781     my ($self, $attr) = @_;
782     $self->{onerror}->(node => $attr, level => 'unsupported',
783     type => 'media query');
784     ## ISSUE: What is "a valid media query"?
785     }; # $HTMLMQAttrChecker
786    
787     my $HTMLEventHandlerAttrChecker = sub {
788     my ($self, $attr) = @_;
789     $self->{onerror}->(node => $attr, level => 'unsupported',
790     type => 'event handler');
791     ## TODO: MUST contain valid ECMAScript code matching the
792     ## ECMAScript |FunctionBody| production. [ECMA262]
793     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
794     ## ISSUE: Automatic semicolon insertion does not apply?
795     ## ISSUE: Other script languages?
796     }; # $HTMLEventHandlerAttrChecker
797    
798     my $HTMLUsemapAttrChecker = sub {
799     my ($self, $attr) = @_;
800     ## MUST be a valid hashed ID reference to a |map| element
801     my $value = $attr->value;
802     if ($value =~ s/^#//) {
803     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
804     push @{$self->{usemap}}, [$value => $attr];
805     } else {
806     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
807     }
808     ## NOTE: Space characters in hashed ID references are conforming.
809     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
810     }; # $HTMLUsemapAttrChecker
811    
812     my $HTMLTargetAttrChecker = sub {
813     my ($self, $attr) = @_;
814     my $value = $attr->value;
815     if ($value =~ /^_/) {
816     $value = lc $value; ## ISSUE: ASCII case-insentitive?
817     unless ({
818     _self => 1, _parent => 1, _top => 1,
819     }->{$value}) {
820     $self->{onerror}->(node => $attr,
821     type => 'reserved browsing context name');
822     }
823     } else {
824     #$ ISSUE: An empty string is conforming?
825     }
826     }; # $HTMLTargetAttrChecker
827    
828     my $HTMLAttrChecker = {
829     id => sub {
830     ## NOTE: |map| has its own variant of |id=""| checker
831     my ($self, $attr) = @_;
832     my $value = $attr->value;
833     if (length $value > 0) {
834     if ($self->{id}->{$value}) {
835     $self->{onerror}->(node => $attr, type => 'duplicate ID');
836     push @{$self->{id}->{$value}}, $attr;
837     } else {
838     $self->{id}->{$value} = [$attr];
839     }
840     if ($value =~ /[\x09-\x0D\x20]/) {
841     $self->{onerror}->(node => $attr, type => 'space in ID');
842     }
843     } else {
844     ## NOTE: MUST contain at least one character
845     $self->{onerror}->(node => $attr, type => 'empty attribute value');
846     }
847     },
848     title => sub {}, ## NOTE: No conformance creteria
849     lang => sub {
850     my ($self, $attr) = @_;
851 wakaba 1.6 my $value = $attr->value;
852     if ($value eq '') {
853     #
854     } else {
855     require Whatpm::LangTag;
856     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
857     my %opt = @_;
858     my $type = 'LangTag:'.$opt{type};
859     $type .= ':' . $opt{subtag} if defined $opt{subtag};
860     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
861     level => $opt{level});
862     });
863     }
864 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
865     unless ($attr->owner_document->manakai_is_html) {
866     $self->{onerror}->(node => $attr, type => 'in XML:lang');
867     }
868 wakaba 1.6
869     ## TODO: test data
870 wakaba 1.1 },
871     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
872     class => sub {
873     my ($self, $attr) = @_;
874     my %word;
875     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
876     unless ($word{$word}) {
877     $word{$word} = 1;
878     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
879     } else {
880     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
881     }
882     }
883     },
884     contextmenu => sub {
885     my ($self, $attr) = @_;
886     my $value = $attr->value;
887     push @{$self->{contextmenu}}, [$value => $attr];
888     ## ISSUE: "The value must be the ID of a menu element in the DOM."
889     ## What is "in the DOM"? A menu Element node that is not part
890     ## of the Document tree is in the DOM? A menu Element node that
891     ## belong to another Document tree is in the DOM?
892     },
893     irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
894 wakaba 1.8 tabindex => $HTMLIntegerAttrChecker
895     ## TODO: ref, template, registrationmark
896 wakaba 1.1 };
897    
898     for (qw/
899     onabort onbeforeunload onblur onchange onclick oncontextmenu
900     ondblclick ondrag ondragend ondragenter ondragleave ondragover
901     ondragstart ondrop onerror onfocus onkeydown onkeypress
902     onkeyup onload onmessage onmousedown onmousemove onmouseout
903     onmouseover onmouseup onmousewheel onresize onscroll onselect
904     onsubmit onunload
905     /) {
906     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
907     }
908    
909     my $GetHTMLAttrsChecker = sub {
910     my $element_specific_checker = shift;
911     return sub {
912     my ($self, $todo) = @_;
913     for my $attr (@{$todo->{node}->attributes}) {
914     my $attr_ns = $attr->namespace_uri;
915     $attr_ns = '' unless defined $attr_ns;
916     my $attr_ln = $attr->manakai_local_name;
917     my $checker;
918     if ($attr_ns eq '') {
919     $checker = $element_specific_checker->{$attr_ln}
920     || $HTMLAttrChecker->{$attr_ln};
921     }
922     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
923     || $AttrChecker->{$attr_ns}->{''};
924     if ($checker) {
925     $checker->($self, $attr, $todo);
926     } else {
927     $self->{onerror}->(node => $attr, level => 'unsupported',
928     type => 'attribute');
929     ## ISSUE: No comformance createria for unknown attributes in the spec
930     }
931     }
932     };
933     }; # $GetHTMLAttrsChecker
934    
935     our $Element;
936     our $ElementDefault;
937     our $AnyChecker;
938    
939     $Element->{$HTML_NS}->{''} = {
940     attrs_checker => $GetHTMLAttrsChecker->({}),
941     checker => $ElementDefault->{checker},
942     };
943    
944     $Element->{$HTML_NS}->{html} = {
945     is_root => 1,
946     attrs_checker => $GetHTMLAttrsChecker->({
947 wakaba 1.16 manifest => $HTMLURIAttrChecker,
948 wakaba 1.1 xmlns => sub {
949     my ($self, $attr) = @_;
950     my $value = $attr->value;
951     unless ($value eq $HTML_NS) {
952     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
953     }
954     unless ($attr->owner_document->manakai_is_html) {
955     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
956     ## TODO: Test
957     }
958     },
959     }),
960     checker => sub {
961     my ($self, $todo) = @_;
962     my $el = $todo->{node};
963     my $new_todos = [];
964     my @nodes = (@{$el->child_nodes});
965    
966     my $phase = 'before head';
967     while (@nodes) {
968     my $node = shift @nodes;
969     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
970    
971     my $nt = $node->node_type;
972     if ($nt == 1) {
973     my $node_ns = $node->namespace_uri;
974     $node_ns = '' unless defined $node_ns;
975     my $node_ln = $node->manakai_local_name;
976     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
977 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
978     #
979     } elsif ($phase eq 'before head') {
980 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'head') {
981     $phase = 'after head';
982     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'body') {
983     $self->{onerror}->(node => $node, type => 'ps element missing:head');
984     $phase = 'after body';
985     } else {
986     $not_allowed = 1;
987     # before head
988     }
989     } elsif ($phase eq 'after head') {
990     if ($node_ns eq $HTML_NS and $node_ln eq 'body') {
991     $phase = 'after body';
992     } else {
993     $not_allowed = 1;
994     # after head
995     }
996     } else { #elsif ($phase eq 'after body') {
997     $not_allowed = 1;
998     # after body
999     }
1000     $self->{onerror}->(node => $node, type => 'element not allowed')
1001     if $not_allowed;
1002     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1003     unshift @nodes, @$sib;
1004     push @$new_todos, @$ch;
1005     } elsif ($nt == 3 or $nt == 4) {
1006     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1007     $self->{onerror}->(node => $node, type => 'character not allowed');
1008     }
1009     } elsif ($nt == 5) {
1010     unshift @nodes, @{$node->child_nodes};
1011     }
1012     }
1013    
1014     if ($phase eq 'before head') {
1015     $self->{onerror}->(node => $el, type => 'child element missing:head');
1016     $self->{onerror}->(node => $el, type => 'child element missing:body');
1017     } elsif ($phase eq 'after head') {
1018     $self->{onerror}->(node => $el, type => 'child element missing:body');
1019     }
1020    
1021     return ($new_todos);
1022     },
1023     };
1024    
1025     $Element->{$HTML_NS}->{head} = {
1026     attrs_checker => $GetHTMLAttrsChecker->({}),
1027     checker => sub {
1028     my ($self, $todo) = @_;
1029     my $el = $todo->{node};
1030     my $new_todos = [];
1031     my @nodes = (@{$el->child_nodes});
1032    
1033     my $has_title;
1034     my $phase = 'initial'; # 'after charset', 'after base'
1035     while (@nodes) {
1036     my $node = shift @nodes;
1037     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1038    
1039     my $nt = $node->node_type;
1040     if ($nt == 1) {
1041     my $node_ns = $node->namespace_uri;
1042     $node_ns = '' unless defined $node_ns;
1043     my $node_ln = $node->manakai_local_name;
1044     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
1045 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1046     #
1047     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'title') {
1048 wakaba 1.1 $phase = 'after base';
1049     unless ($has_title) {
1050     $has_title = 1;
1051     } else {
1052     $not_allowed = 1;
1053     }
1054     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'meta') {
1055     if ($node->has_attribute_ns (undef, 'charset')) {
1056     if ($phase eq 'initial') {
1057     $phase = 'after charset';
1058     } else {
1059     $not_allowed = 1;
1060     ## NOTE: See also |base|'s "contexts" field in the spec
1061     }
1062 wakaba 1.5 } elsif ($node->has_attribute_ns (undef, 'name') or
1063     $node->has_attribute_ns (undef, 'http-equiv')) {
1064     $phase = 'after base';
1065 wakaba 1.1 } else {
1066     $phase = 'after base';
1067 wakaba 1.5 $not_allowed = 1;
1068 wakaba 1.1 }
1069     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'base') {
1070     if ($phase eq 'initial' or $phase eq 'after charset') {
1071     $phase = 'after base';
1072     } else {
1073     $not_allowed = 1;
1074     }
1075     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'style') {
1076     $phase = 'after base';
1077     if ($node->has_attribute_ns (undef, 'scoped')) {
1078     $not_allowed = 1;
1079     }
1080     } elsif ($HTMLMetadataElements->{$node_ns}->{$node_ln}) {
1081     $phase = 'after base';
1082     } else {
1083     $not_allowed = 1;
1084     }
1085     $self->{onerror}->(node => $node, type => 'element not allowed')
1086     if $not_allowed;
1087 wakaba 1.3 local $todo->{flag}->{in_head} = 1;
1088 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1089     unshift @nodes, @$sib;
1090     push @$new_todos, @$ch;
1091     } elsif ($nt == 3 or $nt == 4) {
1092     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1093     $self->{onerror}->(node => $node, type => 'character not allowed');
1094     }
1095     } elsif ($nt == 5) {
1096     unshift @nodes, @{$node->child_nodes};
1097     }
1098     }
1099     unless ($has_title) {
1100     $self->{onerror}->(node => $el, type => 'child element missing:title');
1101     }
1102     return ($new_todos);
1103     },
1104     };
1105    
1106     $Element->{$HTML_NS}->{title} = {
1107     attrs_checker => $GetHTMLAttrsChecker->({}),
1108     checker => $HTMLTextChecker,
1109     };
1110    
1111     $Element->{$HTML_NS}->{base} = {
1112 wakaba 1.4 attrs_checker => sub {
1113     my ($self, $todo) = @_;
1114    
1115 wakaba 1.14 my $has_href = $todo->{node}->has_attribute_ns (undef, 'href');
1116     my $has_target = $todo->{node}->has_attribute_ns (undef, 'target');
1117    
1118     if ($self->{has_uri_attr} and $has_href) {
1119 wakaba 1.4 ## ISSUE: Are these examples conforming?
1120     ## <head profile="a b c"><base href> (except for |profile|'s
1121     ## non-conformance)
1122     ## <title xml:base="relative"/><base href/> (maybe it should be)
1123     ## <unknown xmlns="relative"/><base href/> (assuming that
1124     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1125     ## <?xml-stylesheet href="relative"?>...<base href=""/>
1126     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1127     ## <style>@import 'relative';</style><base href>
1128     ## <script>location.href = 'relative';</script><base href>
1129 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
1130     ## an exception.
1131 wakaba 1.4 $self->{onerror}->(node => $todo->{node},
1132     type => 'basehref after URI attribute');
1133     }
1134 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
1135 wakaba 1.4 ## ISSUE: Are these examples conforming?
1136     ## <head><title xlink:href=""/><base target="name"/></head>
1137     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1138     ## (assuming that |xbl:xbl| is allowed before |base|)
1139     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1140     ## <link href=""/><base target="name"/>
1141     ## <link rel=unknown href=""><base target=name>
1142     $self->{onerror}->(node => $todo->{node},
1143     type => 'basetarget after hyperlink');
1144     }
1145    
1146 wakaba 1.14 if (not $has_href and not $has_target) {
1147     $self->{onerror}->(node => $todo->{node},
1148     type => 'attribute missing:href|target');
1149     }
1150    
1151 wakaba 1.4 return $GetHTMLAttrsChecker->({
1152     href => $HTMLURIAttrChecker,
1153     target => $HTMLTargetAttrChecker,
1154     })->($self, $todo);
1155     },
1156 wakaba 1.1 checker => $HTMLEmptyChecker,
1157     };
1158    
1159     $Element->{$HTML_NS}->{link} = {
1160     attrs_checker => sub {
1161     my ($self, $todo) = @_;
1162     $GetHTMLAttrsChecker->({
1163     href => $HTMLURIAttrChecker,
1164 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(0, $todo, @_) },
1165 wakaba 1.1 media => $HTMLMQAttrChecker,
1166     hreflang => $HTMLLanguageTagAttrChecker,
1167     type => $HTMLIMTAttrChecker,
1168     ## NOTE: Though |title| has special semantics,
1169     ## syntactically same as the |title| as global attribute.
1170     })->($self, $todo);
1171 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
1172     $self->{has_hyperlink_element} = 1 if $todo->{has_hyperlink_link_type};
1173     } else {
1174 wakaba 1.1 $self->{onerror}->(node => $todo->{node},
1175     type => 'attribute missing:href');
1176     }
1177     unless ($todo->{node}->has_attribute_ns (undef, 'rel')) {
1178     $self->{onerror}->(node => $todo->{node},
1179     type => 'attribute missing:rel');
1180     }
1181     },
1182     checker => $HTMLEmptyChecker,
1183     };
1184    
1185     $Element->{$HTML_NS}->{meta} = {
1186     attrs_checker => sub {
1187     my ($self, $todo) = @_;
1188     my $name_attr;
1189     my $http_equiv_attr;
1190     my $charset_attr;
1191     my $content_attr;
1192     for my $attr (@{$todo->{node}->attributes}) {
1193     my $attr_ns = $attr->namespace_uri;
1194     $attr_ns = '' unless defined $attr_ns;
1195     my $attr_ln = $attr->manakai_local_name;
1196     my $checker;
1197     if ($attr_ns eq '') {
1198     if ($attr_ln eq 'content') {
1199     $content_attr = $attr;
1200     $checker = 1;
1201     } elsif ($attr_ln eq 'name') {
1202     $name_attr = $attr;
1203     $checker = 1;
1204     } elsif ($attr_ln eq 'http-equiv') {
1205     $http_equiv_attr = $attr;
1206     $checker = 1;
1207     } elsif ($attr_ln eq 'charset') {
1208     $charset_attr = $attr;
1209     $checker = 1;
1210     } else {
1211     $checker = $HTMLAttrChecker->{$attr_ln}
1212     || $AttrChecker->{$attr_ns}->{$attr_ln}
1213     || $AttrChecker->{$attr_ns}->{''};
1214     }
1215     } else {
1216     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1217     || $AttrChecker->{$attr_ns}->{''};
1218     }
1219     if ($checker) {
1220     $checker->($self, $attr) if ref $checker;
1221     } else {
1222     $self->{onerror}->(node => $attr, level => 'unsupported',
1223     type => 'attribute');
1224     ## ISSUE: No comformance createria for unknown attributes in the spec
1225     }
1226     }
1227    
1228     if (defined $name_attr) {
1229     if (defined $http_equiv_attr) {
1230     $self->{onerror}->(node => $http_equiv_attr,
1231     type => 'attribute not allowed');
1232     } elsif (defined $charset_attr) {
1233     $self->{onerror}->(node => $charset_attr,
1234     type => 'attribute not allowed');
1235     }
1236     my $metadata_name = $name_attr->value;
1237     my $metadata_value;
1238     if (defined $content_attr) {
1239     $metadata_value = $content_attr->value;
1240     } else {
1241     $self->{onerror}->(node => $todo->{node},
1242     type => 'attribute missing:content');
1243     $metadata_value = '';
1244     }
1245     } elsif (defined $http_equiv_attr) {
1246     if (defined $charset_attr) {
1247     $self->{onerror}->(node => $charset_attr,
1248     type => 'attribute not allowed');
1249     }
1250     unless (defined $content_attr) {
1251     $self->{onerror}->(node => $todo->{node},
1252     type => 'attribute missing:content');
1253     }
1254     } elsif (defined $charset_attr) {
1255     if (defined $content_attr) {
1256     $self->{onerror}->(node => $content_attr,
1257     type => 'attribute not allowed');
1258     }
1259     } else {
1260     if (defined $content_attr) {
1261     $self->{onerror}->(node => $content_attr,
1262     type => 'attribute not allowed');
1263     $self->{onerror}->(node => $todo->{node},
1264     type => 'attribute missing:name|http-equiv');
1265     } else {
1266     $self->{onerror}->(node => $todo->{node},
1267     type => 'attribute missing:name|http-equiv|charset');
1268     }
1269     }
1270    
1271     ## TODO: metadata conformance
1272    
1273     ## TODO: pragma conformance
1274     if (defined $http_equiv_attr) { ## An enumerated attribute
1275     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1276     if ({
1277     'refresh' => 1,
1278     'default-style' => 1,
1279     }->{$keyword}) {
1280     #
1281     } else {
1282     $self->{onerror}->(node => $http_equiv_attr,
1283     type => 'enumerated:invalid');
1284     }
1285     }
1286    
1287     if (defined $charset_attr) {
1288     unless ($todo->{node}->owner_document->manakai_is_html) {
1289     $self->{onerror}->(node => $charset_attr,
1290     type => 'in XML:charset');
1291     }
1292     ## TODO: charset
1293     }
1294     },
1295     checker => $HTMLEmptyChecker,
1296     };
1297    
1298     $Element->{$HTML_NS}->{style} = {
1299     attrs_checker => $GetHTMLAttrsChecker->({
1300     type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1301     media => $HTMLMQAttrChecker,
1302     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1303     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1304     ## not different
1305     }),
1306     checker => sub {
1307     ## NOTE: |html:style| has no conformance creteria on content model
1308     my ($self, $todo) = @_;
1309     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
1310     $type = 'text/css' unless defined $type;
1311     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
1312     type => 'style:'.$type); ## TODO: $type normalization
1313     return $AnyChecker->($self, $todo);
1314     },
1315     };
1316    
1317     $Element->{$HTML_NS}->{body} = {
1318     attrs_checker => $GetHTMLAttrsChecker->({}),
1319     checker => $HTMLBlockChecker,
1320     };
1321    
1322     $Element->{$HTML_NS}->{section} = {
1323     attrs_checker => $GetHTMLAttrsChecker->({}),
1324     checker => $HTMLStylableBlockChecker,
1325     };
1326    
1327     $Element->{$HTML_NS}->{nav} = {
1328     attrs_checker => $GetHTMLAttrsChecker->({}),
1329     checker => $HTMLBlockOrInlineChecker,
1330     };
1331    
1332     $Element->{$HTML_NS}->{article} = {
1333     attrs_checker => $GetHTMLAttrsChecker->({}),
1334     checker => $HTMLStylableBlockChecker,
1335     };
1336    
1337     $Element->{$HTML_NS}->{blockquote} = {
1338     attrs_checker => $GetHTMLAttrsChecker->({
1339     cite => $HTMLURIAttrChecker,
1340     }),
1341     checker => $HTMLBlockChecker,
1342     };
1343    
1344     $Element->{$HTML_NS}->{aside} = {
1345     attrs_checker => $GetHTMLAttrsChecker->({}),
1346     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
1347     };
1348    
1349     $Element->{$HTML_NS}->{h1} = {
1350     attrs_checker => $GetHTMLAttrsChecker->({}),
1351     checker => sub {
1352     my ($self, $todo) = @_;
1353     $todo->{flag}->{has_heading}->[0] = 1;
1354 wakaba 1.13 return $HTMLStrictlyInlineChecker->($self, $todo);
1355 wakaba 1.1 },
1356     };
1357    
1358     $Element->{$HTML_NS}->{h2} = {
1359     attrs_checker => $GetHTMLAttrsChecker->({}),
1360     checker => $Element->{$HTML_NS}->{h1}->{checker},
1361     };
1362    
1363     $Element->{$HTML_NS}->{h3} = {
1364     attrs_checker => $GetHTMLAttrsChecker->({}),
1365     checker => $Element->{$HTML_NS}->{h1}->{checker},
1366     };
1367    
1368     $Element->{$HTML_NS}->{h4} = {
1369     attrs_checker => $GetHTMLAttrsChecker->({}),
1370     checker => $Element->{$HTML_NS}->{h1}->{checker},
1371     };
1372    
1373     $Element->{$HTML_NS}->{h5} = {
1374     attrs_checker => $GetHTMLAttrsChecker->({}),
1375     checker => $Element->{$HTML_NS}->{h1}->{checker},
1376     };
1377    
1378     $Element->{$HTML_NS}->{h6} = {
1379     attrs_checker => $GetHTMLAttrsChecker->({}),
1380     checker => $Element->{$HTML_NS}->{h1}->{checker},
1381     };
1382    
1383     $Element->{$HTML_NS}->{header} = {
1384     attrs_checker => $GetHTMLAttrsChecker->({}),
1385     checker => sub {
1386     my ($self, $todo) = @_;
1387     my $old_flag = $todo->{flag}->{has_heading} || [];
1388     my $new_flag = [];
1389     local $todo->{flag}->{has_heading} = $new_flag;
1390     my $node = $todo->{node};
1391    
1392     my $end = $self->_add_minuses
1393     ({$HTML_NS => {qw/header 1 footer 1/}},
1394     $HTMLSectioningElements);
1395     my ($new_todos, $ch) = $HTMLBlockChecker->($self, $todo);
1396     push @$new_todos, $end,
1397     {type => 'code', code => sub {
1398     if ($new_flag->[0]) {
1399     $old_flag->[0] = 1;
1400     } else {
1401     $self->{onerror}->(node => $node, type => 'element missing:hn');
1402     }
1403     }};
1404     return ($new_todos, $ch);
1405     },
1406     };
1407    
1408     $Element->{$HTML_NS}->{footer} = {
1409     attrs_checker => $GetHTMLAttrsChecker->({}),
1410     checker => sub { ## block -hn -header -footer -sectioning or inline
1411     my ($self, $todo) = @_;
1412     my $el = $todo->{node};
1413     my $new_todos = [];
1414     my @nodes = (@{$el->child_nodes});
1415    
1416     my $content = 'block-or-inline'; # or 'block' or 'inline'
1417     my @block_not_inline;
1418     while (@nodes) {
1419     my $node = shift @nodes;
1420     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1421    
1422     my $nt = $node->node_type;
1423     if ($nt == 1) {
1424     my $node_ns = $node->namespace_uri;
1425     $node_ns = '' unless defined $node_ns;
1426     my $node_ln = $node->manakai_local_name;
1427     my $not_allowed;
1428     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
1429     $not_allowed = 1;
1430     } elsif ($node_ns eq $HTML_NS and
1431     {
1432     qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1/
1433     }->{$node_ln}) {
1434     $not_allowed = 1;
1435     } elsif ($HTMLSectioningElements->{$node_ns}->{$node_ln}) {
1436     $not_allowed = 1;
1437     }
1438     if ($content eq 'block') {
1439     $not_allowed = 1
1440 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
1441     $self->{pluses}->{$node_ns}->{$node_ln};
1442 wakaba 1.1 } elsif ($content eq 'inline') {
1443     $not_allowed = 1
1444     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
1445 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
1446     $self->{pluses}->{$node_ns}->{$node_ln};
1447 wakaba 1.1 } else {
1448     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1449     my $is_inline
1450     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
1451     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1452    
1453     push @block_not_inline, $node
1454     if $is_block and not $is_inline and not $not_allowed;
1455 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
1456 wakaba 1.1 $content = 'inline';
1457     for (@block_not_inline) {
1458     $self->{onerror}->(node => $_, type => 'element not allowed');
1459     }
1460     $not_allowed = 1 unless $is_inline;
1461     }
1462     }
1463     $self->{onerror}->(node => $node, type => 'element not allowed')
1464     if $not_allowed;
1465     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1466     unshift @nodes, @$sib;
1467     push @$new_todos, @$ch;
1468     } elsif ($nt == 3 or $nt == 4) {
1469     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1470     if ($content eq 'block') {
1471     $self->{onerror}->(node => $node, type => 'character not allowed');
1472     } else {
1473     $content = 'inline';
1474     for (@block_not_inline) {
1475     $self->{onerror}->(node => $_, type => 'element not allowed');
1476     }
1477     }
1478     }
1479     } elsif ($nt == 5) {
1480     unshift @nodes, @{$node->child_nodes};
1481     }
1482     }
1483    
1484     my $end = $self->_add_minuses
1485     ({$HTML_NS => {qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1/}},
1486     $HTMLSectioningElements);
1487     push @$new_todos, $end;
1488    
1489     if ($content eq 'inline') {
1490     for (@$new_todos) {
1491     $_->{inline} = 1;
1492     }
1493     }
1494    
1495     return ($new_todos);
1496     },
1497     };
1498    
1499     $Element->{$HTML_NS}->{address} = {
1500     attrs_checker => $GetHTMLAttrsChecker->({}),
1501     checker => $HTMLInlineChecker,
1502     };
1503    
1504     $Element->{$HTML_NS}->{p} = {
1505     attrs_checker => $GetHTMLAttrsChecker->({}),
1506 wakaba 1.13 checker => $HTMLInlineChecker,
1507 wakaba 1.1 };
1508    
1509     $Element->{$HTML_NS}->{hr} = {
1510     attrs_checker => $GetHTMLAttrsChecker->({}),
1511     checker => $HTMLEmptyChecker,
1512     };
1513    
1514     $Element->{$HTML_NS}->{br} = {
1515     attrs_checker => $GetHTMLAttrsChecker->({}),
1516     checker => $HTMLEmptyChecker,
1517     };
1518    
1519     $Element->{$HTML_NS}->{dialog} = {
1520     attrs_checker => $GetHTMLAttrsChecker->({}),
1521     checker => sub {
1522     my ($self, $todo) = @_;
1523     my $el = $todo->{node};
1524     my $new_todos = [];
1525     my @nodes = (@{$el->child_nodes});
1526    
1527     my $phase = 'before dt';
1528     while (@nodes) {
1529     my $node = shift @nodes;
1530     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1531    
1532     my $nt = $node->node_type;
1533     if ($nt == 1) {
1534     my $node_ns = $node->namespace_uri;
1535     $node_ns = '' unless defined $node_ns;
1536     my $node_ln = $node->manakai_local_name;
1537     ## NOTE: |minuses| list is not checked since redundant
1538 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1539     #
1540     } elsif ($phase eq 'before dt') {
1541 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1542     $phase = 'before dd';
1543     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1544     $self->{onerror}
1545     ->(node => $node, type => 'ps element missing:dt');
1546     $phase = 'before dt';
1547     } else {
1548     $self->{onerror}->(node => $node, type => 'element not allowed');
1549     }
1550     } else { # before dd
1551     if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1552     $phase = 'before dt';
1553     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1554     $self->{onerror}
1555     ->(node => $node, type => 'ps element missing:dd');
1556     $phase = 'before dd';
1557     } else {
1558     $self->{onerror}->(node => $node, type => 'element not allowed');
1559     }
1560     }
1561     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1562     unshift @nodes, @$sib;
1563     push @$new_todos, @$ch;
1564     } elsif ($nt == 3 or $nt == 4) {
1565     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1566     $self->{onerror}->(node => $node, type => 'character not allowed');
1567     }
1568     } elsif ($nt == 5) {
1569     unshift @nodes, @{$node->child_nodes};
1570     }
1571     }
1572     if ($phase eq 'before dd') {
1573 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
1574 wakaba 1.1 }
1575     return ($new_todos);
1576     },
1577     };
1578    
1579     $Element->{$HTML_NS}->{pre} = {
1580     attrs_checker => $GetHTMLAttrsChecker->({}),
1581     checker => $HTMLStrictlyInlineChecker,
1582     };
1583    
1584     $Element->{$HTML_NS}->{ol} = {
1585     attrs_checker => $GetHTMLAttrsChecker->({
1586     start => $HTMLIntegerAttrChecker,
1587     }),
1588     checker => sub {
1589     my ($self, $todo) = @_;
1590     my $el = $todo->{node};
1591     my $new_todos = [];
1592     my @nodes = (@{$el->child_nodes});
1593    
1594     while (@nodes) {
1595     my $node = shift @nodes;
1596     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1597    
1598     my $nt = $node->node_type;
1599     if ($nt == 1) {
1600     my $node_ns = $node->namespace_uri;
1601     $node_ns = '' unless defined $node_ns;
1602     my $node_ln = $node->manakai_local_name;
1603     ## NOTE: |minuses| list is not checked since redundant
1604 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1605     #
1606     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'li')) {
1607 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
1608     }
1609     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1610     unshift @nodes, @$sib;
1611     push @$new_todos, @$ch;
1612     } elsif ($nt == 3 or $nt == 4) {
1613     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1614     $self->{onerror}->(node => $node, type => 'character not allowed');
1615     }
1616     } elsif ($nt == 5) {
1617     unshift @nodes, @{$node->child_nodes};
1618     }
1619     }
1620    
1621     if ($todo->{inline}) {
1622     for (@$new_todos) {
1623     $_->{inline} = 1;
1624     }
1625     }
1626     return ($new_todos);
1627     },
1628     };
1629    
1630     $Element->{$HTML_NS}->{ul} = {
1631     attrs_checker => $GetHTMLAttrsChecker->({}),
1632     checker => $Element->{$HTML_NS}->{ol}->{checker},
1633     };
1634    
1635     $Element->{$HTML_NS}->{li} = {
1636     attrs_checker => $GetHTMLAttrsChecker->({
1637     start => sub {
1638     my ($self, $attr) = @_;
1639     my $parent = $attr->owner_element->manakai_parent_element;
1640     if (defined $parent) {
1641     my $parent_ns = $parent->namespace_uri;
1642     $parent_ns = '' unless defined $parent_ns;
1643     my $parent_ln = $parent->manakai_local_name;
1644     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1645     $self->{onerror}->(node => $attr, level => 'unsupported',
1646     type => 'attribute');
1647     }
1648     }
1649     $HTMLIntegerAttrChecker->($self, $attr);
1650     },
1651     }),
1652     checker => sub {
1653     my ($self, $todo) = @_;
1654     if ($todo->{inline}) {
1655     return $HTMLInlineChecker->($self, $todo);
1656     } else {
1657     return $HTMLBlockOrInlineChecker->($self, $todo);
1658     }
1659     },
1660     };
1661    
1662     $Element->{$HTML_NS}->{dl} = {
1663     attrs_checker => $GetHTMLAttrsChecker->({}),
1664     checker => sub {
1665     my ($self, $todo) = @_;
1666     my $el = $todo->{node};
1667     my $new_todos = [];
1668     my @nodes = (@{$el->child_nodes});
1669    
1670     my $phase = 'before dt';
1671     while (@nodes) {
1672     my $node = shift @nodes;
1673     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1674    
1675     my $nt = $node->node_type;
1676     if ($nt == 1) {
1677     my $node_ns = $node->namespace_uri;
1678     $node_ns = '' unless defined $node_ns;
1679     my $node_ln = $node->manakai_local_name;
1680     ## NOTE: |minuses| list is not checked since redundant
1681 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1682     #
1683     } elsif ($phase eq 'in dds') {
1684 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1685     #$phase = 'in dds';
1686     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1687     $phase = 'in dts';
1688     } else {
1689     $self->{onerror}->(node => $node, type => 'element not allowed');
1690     }
1691     } elsif ($phase eq 'in dts') {
1692     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1693     #$phase = 'in dts';
1694     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1695     $phase = 'in dds';
1696     } else {
1697     $self->{onerror}->(node => $node, type => 'element not allowed');
1698     }
1699     } else { # before dt
1700     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1701     $phase = 'in dts';
1702     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1703     $self->{onerror}
1704     ->(node => $node, type => 'ps element missing:dt');
1705     $phase = 'in dds';
1706     } else {
1707     $self->{onerror}->(node => $node, type => 'element not allowed');
1708     }
1709     }
1710     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1711     unshift @nodes, @$sib;
1712     push @$new_todos, @$ch;
1713     } elsif ($nt == 3 or $nt == 4) {
1714     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1715     $self->{onerror}->(node => $node, type => 'character not allowed');
1716     }
1717     } elsif ($nt == 5) {
1718     unshift @nodes, @{$node->child_nodes};
1719     }
1720     }
1721     if ($phase eq 'in dts') {
1722 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
1723 wakaba 1.1 }
1724    
1725     if ($todo->{inline}) {
1726     for (@$new_todos) {
1727     $_->{inline} = 1;
1728     }
1729     }
1730     return ($new_todos);
1731     },
1732     };
1733    
1734     $Element->{$HTML_NS}->{dt} = {
1735     attrs_checker => $GetHTMLAttrsChecker->({}),
1736     checker => $HTMLStrictlyInlineChecker,
1737     };
1738    
1739     $Element->{$HTML_NS}->{dd} = {
1740     attrs_checker => $GetHTMLAttrsChecker->({}),
1741     checker => $Element->{$HTML_NS}->{li}->{checker},
1742     };
1743    
1744     $Element->{$HTML_NS}->{a} = {
1745     attrs_checker => sub {
1746     my ($self, $todo) = @_;
1747     my %attr;
1748     for my $attr (@{$todo->{node}->attributes}) {
1749     my $attr_ns = $attr->namespace_uri;
1750     $attr_ns = '' unless defined $attr_ns;
1751     my $attr_ln = $attr->manakai_local_name;
1752     my $checker;
1753     if ($attr_ns eq '') {
1754     $checker = {
1755     target => $HTMLTargetAttrChecker,
1756     href => $HTMLURIAttrChecker,
1757     ping => $HTMLSpaceURIsAttrChecker,
1758 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
1759 wakaba 1.1 media => $HTMLMQAttrChecker,
1760     hreflang => $HTMLLanguageTagAttrChecker,
1761     type => $HTMLIMTAttrChecker,
1762     }->{$attr_ln};
1763     if ($checker) {
1764     $attr{$attr_ln} = $attr;
1765     } else {
1766     $checker = $HTMLAttrChecker->{$attr_ln};
1767     }
1768     }
1769     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1770     || $AttrChecker->{$attr_ns}->{''};
1771     if ($checker) {
1772     $checker->($self, $attr) if ref $checker;
1773     } else {
1774     $self->{onerror}->(node => $attr, level => 'unsupported',
1775     type => 'attribute');
1776     ## ISSUE: No comformance createria for unknown attributes in the spec
1777     }
1778     }
1779    
1780 wakaba 1.4 if (defined $attr{href}) {
1781     $self->{has_hyperlink_element} = 1;
1782     } else {
1783 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
1784     if (defined $attr{$_}) {
1785     $self->{onerror}->(node => $attr{$_},
1786     type => 'attribute not allowed');
1787     }
1788     }
1789     }
1790     },
1791     checker => sub {
1792     my ($self, $todo) = @_;
1793    
1794     my $end = $self->_add_minuses ($HTMLInteractiveElements);
1795     my ($new_todos, $ch)
1796 wakaba 1.13 = $HTMLInlineOrStrictlyInlineChecker->($self, $todo);
1797 wakaba 1.1 push @$new_todos, $end;
1798    
1799 wakaba 1.15 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
1800     $_->{flag}->{in_a_href} = 1 for @$new_todos;
1801     }
1802 wakaba 1.1
1803     return ($new_todos, $ch);
1804     },
1805     };
1806    
1807     $Element->{$HTML_NS}->{q} = {
1808     attrs_checker => $GetHTMLAttrsChecker->({
1809     cite => $HTMLURIAttrChecker,
1810     }),
1811     checker => $HTMLInlineOrStrictlyInlineChecker,
1812     };
1813    
1814     $Element->{$HTML_NS}->{cite} = {
1815     attrs_checker => $GetHTMLAttrsChecker->({}),
1816     checker => $HTMLStrictlyInlineChecker,
1817     };
1818    
1819     $Element->{$HTML_NS}->{em} = {
1820     attrs_checker => $GetHTMLAttrsChecker->({}),
1821     checker => $HTMLInlineOrStrictlyInlineChecker,
1822     };
1823    
1824     $Element->{$HTML_NS}->{strong} = {
1825     attrs_checker => $GetHTMLAttrsChecker->({}),
1826     checker => $HTMLInlineOrStrictlyInlineChecker,
1827     };
1828    
1829     $Element->{$HTML_NS}->{small} = {
1830     attrs_checker => $GetHTMLAttrsChecker->({}),
1831     checker => $HTMLInlineOrStrictlyInlineChecker,
1832     };
1833    
1834     $Element->{$HTML_NS}->{m} = {
1835     attrs_checker => $GetHTMLAttrsChecker->({}),
1836     checker => $HTMLInlineOrStrictlyInlineChecker,
1837     };
1838    
1839     $Element->{$HTML_NS}->{dfn} = {
1840     attrs_checker => $GetHTMLAttrsChecker->({}),
1841     checker => sub {
1842     my ($self, $todo) = @_;
1843    
1844     my $end = $self->_add_minuses ({$HTML_NS => {dfn => 1}});
1845     my ($sib, $ch) = $HTMLStrictlyInlineChecker->($self, $todo);
1846     push @$sib, $end;
1847    
1848     my $node = $todo->{node};
1849     my $term = $node->get_attribute_ns (undef, 'title');
1850     unless (defined $term) {
1851     for my $child (@{$node->child_nodes}) {
1852     if ($child->node_type == 1) { # ELEMENT_NODE
1853     if (defined $term) {
1854     undef $term;
1855     last;
1856     } elsif ($child->manakai_local_name eq 'abbr') {
1857     my $nsuri = $child->namespace_uri;
1858     if (defined $nsuri and $nsuri eq $HTML_NS) {
1859     my $attr = $child->get_attribute_node_ns (undef, 'title');
1860     if ($attr) {
1861     $term = $attr->value;
1862     }
1863     }
1864     }
1865     } elsif ($child->node_type == 3 or $child->node_type == 4) {
1866     ## TEXT_NODE or CDATA_SECTION_NODE
1867     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
1868     next;
1869     }
1870     undef $term;
1871     last;
1872     }
1873     }
1874     unless (defined $term) {
1875     $term = $node->text_content;
1876     }
1877     }
1878     if ($self->{term}->{$term}) {
1879     $self->{onerror}->(node => $node, type => 'duplicate term');
1880     push @{$self->{term}->{$term}}, $node;
1881     } else {
1882     $self->{term}->{$term} = [$node];
1883     }
1884     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
1885     ## has |title|.
1886    
1887     return ($sib, $ch);
1888     },
1889     };
1890    
1891     $Element->{$HTML_NS}->{abbr} = {
1892     attrs_checker => $GetHTMLAttrsChecker->({
1893     ## NOTE: |title| has special semantics for |abbr|s, but is syntactically
1894     ## not different. The spec says that the |title| MAY be omitted
1895     ## if there is a |dfn| whose defining term is the abbreviation,
1896     ## but it does not prohibit |abbr| w/o |title| in other cases.
1897     }),
1898     checker => $HTMLStrictlyInlineChecker,
1899     };
1900    
1901     $Element->{$HTML_NS}->{time} = {
1902     attrs_checker => $GetHTMLAttrsChecker->({
1903     datetime => sub { 1 }, # checked in |checker|
1904     }),
1905     ## TODO: Write tests
1906     checker => sub {
1907     my ($self, $todo) = @_;
1908    
1909     my $attr = $todo->{node}->get_attribute_node_ns (undef, 'datetime');
1910     my $input;
1911     my $reg_sp;
1912     my $input_node;
1913     if ($attr) {
1914     $input = $attr->value;
1915     $reg_sp = qr/[\x09-\x0D\x20]*/;
1916     $input_node = $attr;
1917     } else {
1918     $input = $todo->{node}->text_content;
1919     $reg_sp = qr/\p{Zs}*/;
1920     $input_node = $todo->{node};
1921    
1922     ## ISSUE: What is the definition for "successfully extracts a date
1923     ## or time"? If the algorithm says the string is invalid but
1924     ## return some date or time, is it "successfully"?
1925     }
1926    
1927     my $hour;
1928     my $minute;
1929     my $second;
1930     if ($input =~ /
1931     \A
1932     [\x09-\x0D\x20]*
1933     ([0-9]+) # 1
1934     (?>
1935     -([0-9]+) # 2
1936     -([0-9]+) # 3
1937     [\x09-\x0D\x20]*
1938     (?>
1939     T
1940     [\x09-\x0D\x20]*
1941     )?
1942     ([0-9]+) # 4
1943     :([0-9]+) # 5
1944     (?>
1945     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
1946     )?
1947     [\x09-\x0D\x20]*
1948     (?>
1949     Z
1950     [\x09-\x0D\x20]*
1951     |
1952     [+-]([0-9]+):([0-9]+) # 7, 8
1953     [\x09-\x0D\x20]*
1954     )?
1955     \z
1956     |
1957     :([0-9]+) # 9
1958     (?>
1959     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
1960     )?
1961     [\x09-\x0D\x20]*\z
1962     )
1963     /x) {
1964     if (defined $2) { ## YYYY-MM-DD T? hh:mm
1965     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
1966     length $4 != 2 or length $5 != 2) {
1967     $self->{onerror}->(node => $input_node,
1968     type => 'dateortime:syntax error');
1969     }
1970    
1971     if (1 <= $2 and $2 <= 12) {
1972     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1973     if $3 < 1 or
1974     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
1975     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1976     if $2 == 2 and $3 == 29 and
1977     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
1978     } else {
1979     $self->{onerror}->(node => $input_node,
1980     type => 'datetime:bad month');
1981     }
1982    
1983     ($hour, $minute, $second) = ($4, $5, $6);
1984    
1985     if (defined $7) { ## [+-]hh:mm
1986     if (length $7 != 2 or length $8 != 2) {
1987     $self->{onerror}->(node => $input_node,
1988     type => 'dateortime:syntax error');
1989     }
1990    
1991     $self->{onerror}->(node => $input_node,
1992     type => 'datetime:bad timezone hour')
1993     if $7 > 23;
1994     $self->{onerror}->(node => $input_node,
1995     type => 'datetime:bad timezone minute')
1996     if $8 > 59;
1997     }
1998     } else { ## hh:mm
1999     if (length $1 != 2 or length $9 != 2) {
2000     $self->{onerror}->(node => $input_node,
2001     type => qq'dateortime:syntax error');
2002     }
2003    
2004     ($hour, $minute, $second) = ($1, $9, $10);
2005     }
2006    
2007     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
2008     if $hour > 23;
2009     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
2010     if $minute > 59;
2011    
2012     if (defined $second) { ## s
2013     ## NOTE: Integer part of second don't have to have length of two.
2014    
2015     if (substr ($second, 0, 1) eq '.') {
2016     $self->{onerror}->(node => $input_node,
2017     type => 'dateortime:syntax error');
2018     }
2019    
2020     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
2021     if $second >= 60;
2022     }
2023     } else {
2024     $self->{onerror}->(node => $input_node,
2025     type => 'dateortime:syntax error');
2026     }
2027    
2028     return $HTMLStrictlyInlineChecker->($self, $todo);
2029     },
2030     };
2031    
2032     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
2033     attrs_checker => $GetHTMLAttrsChecker->({
2034     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2035     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2036     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2037     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2038     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2039     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2040     }),
2041     checker => $HTMLStrictlyInlineChecker,
2042     };
2043    
2044     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
2045     attrs_checker => $GetHTMLAttrsChecker->({
2046     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
2047     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
2048     }),
2049     checker => $HTMLStrictlyInlineChecker,
2050     };
2051    
2052     $Element->{$HTML_NS}->{code} = {
2053     attrs_checker => $GetHTMLAttrsChecker->({}),
2054     ## NOTE: Though |title| has special semantics,
2055     ## syntatically same as the |title| as global attribute.
2056     checker => $HTMLInlineOrStrictlyInlineChecker,
2057     };
2058    
2059     $Element->{$HTML_NS}->{var} = {
2060     attrs_checker => $GetHTMLAttrsChecker->({}),
2061     ## NOTE: Though |title| has special semantics,
2062     ## syntatically same as the |title| as global attribute.
2063     checker => $HTMLStrictlyInlineChecker,
2064     };
2065    
2066     $Element->{$HTML_NS}->{samp} = {
2067     attrs_checker => $GetHTMLAttrsChecker->({}),
2068     ## NOTE: Though |title| has special semantics,
2069     ## syntatically same as the |title| as global attribute.
2070     checker => $HTMLInlineOrStrictlyInlineChecker,
2071     };
2072    
2073     $Element->{$HTML_NS}->{kbd} = {
2074     attrs_checker => $GetHTMLAttrsChecker->({}),
2075     checker => $HTMLStrictlyInlineChecker,
2076     };
2077    
2078     $Element->{$HTML_NS}->{sub} = {
2079     attrs_checker => $GetHTMLAttrsChecker->({}),
2080     checker => $HTMLStrictlyInlineChecker,
2081     };
2082    
2083     $Element->{$HTML_NS}->{sup} = {
2084     attrs_checker => $GetHTMLAttrsChecker->({}),
2085     checker => $HTMLStrictlyInlineChecker,
2086     };
2087    
2088     $Element->{$HTML_NS}->{span} = {
2089     attrs_checker => $GetHTMLAttrsChecker->({}),
2090     ## NOTE: Though |title| has special semantics,
2091     ## syntatically same as the |title| as global attribute.
2092     checker => $HTMLInlineOrStrictlyInlineChecker,
2093     };
2094    
2095     $Element->{$HTML_NS}->{i} = {
2096     attrs_checker => $GetHTMLAttrsChecker->({}),
2097     ## NOTE: Though |title| has special semantics,
2098     ## syntatically same as the |title| as global attribute.
2099     checker => $HTMLStrictlyInlineChecker,
2100     };
2101    
2102     $Element->{$HTML_NS}->{b} = {
2103     attrs_checker => $GetHTMLAttrsChecker->({}),
2104     checker => $HTMLStrictlyInlineChecker,
2105     };
2106    
2107     $Element->{$HTML_NS}->{bdo} = {
2108     attrs_checker => sub {
2109     my ($self, $todo) = @_;
2110     $GetHTMLAttrsChecker->({})->($self, $todo);
2111     unless ($todo->{node}->has_attribute_ns (undef, 'dir')) {
2112     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:dir');
2113     }
2114     },
2115     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
2116     checker => $HTMLStrictlyInlineChecker,
2117     };
2118    
2119     $Element->{$HTML_NS}->{ins} = {
2120     attrs_checker => $GetHTMLAttrsChecker->({
2121     cite => $HTMLURIAttrChecker,
2122     datetime => $HTMLDatetimeAttrChecker,
2123     }),
2124     checker => $HTMLTransparentChecker,
2125     };
2126    
2127     $Element->{$HTML_NS}->{del} = {
2128     attrs_checker => $GetHTMLAttrsChecker->({
2129     cite => $HTMLURIAttrChecker,
2130     datetime => $HTMLDatetimeAttrChecker,
2131     }),
2132     checker => sub {
2133     my ($self, $todo) = @_;
2134    
2135     my $parent = $todo->{node}->manakai_parent_element;
2136     if (defined $parent) {
2137     my $nsuri = $parent->namespace_uri;
2138     $nsuri = '' unless defined $nsuri;
2139     my $ln = $parent->manakai_local_name;
2140     my $eldef = $Element->{$nsuri}->{$ln} ||
2141     $Element->{$nsuri}->{''} ||
2142     $ElementDefault;
2143     return $eldef->{checker}->($self, $todo);
2144     } else {
2145     return $HTMLBlockOrInlineChecker->($self, $todo);
2146     }
2147     },
2148     };
2149    
2150     ## TODO: figure
2151 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
2152 wakaba 1.1
2153 wakaba 1.4 ## TODO: |alt|
2154 wakaba 1.1 $Element->{$HTML_NS}->{img} = {
2155     attrs_checker => sub {
2156     my ($self, $todo) = @_;
2157     $GetHTMLAttrsChecker->({
2158     alt => sub { }, ## NOTE: No syntactical requirement
2159     src => $HTMLURIAttrChecker,
2160     usemap => $HTMLUsemapAttrChecker,
2161     ismap => sub {
2162     my ($self, $attr, $parent_todo) = @_;
2163 wakaba 1.15 if (not $todo->{flag}->{in_a_href}) {
2164     $self->{onerror}->(node => $attr,
2165     type => 'attribute not allowed:ismap');
2166 wakaba 1.1 }
2167     $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_todo);
2168     },
2169     ## TODO: height
2170     ## TODO: width
2171     })->($self, $todo);
2172     unless ($todo->{node}->has_attribute_ns (undef, 'alt')) {
2173     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:alt');
2174     }
2175     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2176     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:src');
2177     }
2178     },
2179     checker => $HTMLEmptyChecker,
2180     };
2181    
2182     $Element->{$HTML_NS}->{iframe} = {
2183     attrs_checker => $GetHTMLAttrsChecker->({
2184     src => $HTMLURIAttrChecker,
2185     }),
2186     checker => $HTMLTextChecker,
2187     };
2188    
2189     $Element->{$HTML_NS}->{embed} = {
2190     attrs_checker => sub {
2191     my ($self, $todo) = @_;
2192     my $has_src;
2193     for my $attr (@{$todo->{node}->attributes}) {
2194     my $attr_ns = $attr->namespace_uri;
2195     $attr_ns = '' unless defined $attr_ns;
2196     my $attr_ln = $attr->manakai_local_name;
2197     my $checker;
2198     if ($attr_ns eq '') {
2199     if ($attr_ln eq 'src') {
2200     $checker = $HTMLURIAttrChecker;
2201     $has_src = 1;
2202     } elsif ($attr_ln eq 'type') {
2203     $checker = $HTMLIMTAttrChecker;
2204     } else {
2205     ## TODO: height
2206     ## TODO: width
2207     $checker = $HTMLAttrChecker->{$attr_ln}
2208     || sub { }; ## NOTE: Any local attribute is ok.
2209     }
2210     }
2211     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2212     || $AttrChecker->{$attr_ns}->{''};
2213     if ($checker) {
2214     $checker->($self, $attr);
2215     } else {
2216     $self->{onerror}->(node => $attr, level => 'unsupported',
2217     type => 'attribute');
2218     ## ISSUE: No comformance createria for global attributes in the spec
2219     }
2220     }
2221    
2222     unless ($has_src) {
2223     $self->{onerror}->(node => $todo->{node},
2224     type => 'attribute missing:src');
2225     }
2226     },
2227     checker => $HTMLEmptyChecker,
2228     };
2229    
2230     $Element->{$HTML_NS}->{object} = {
2231     attrs_checker => sub {
2232     my ($self, $todo) = @_;
2233     $GetHTMLAttrsChecker->({
2234     data => $HTMLURIAttrChecker,
2235     type => $HTMLIMTAttrChecker,
2236     usemap => $HTMLUsemapAttrChecker,
2237     ## TODO: width
2238     ## TODO: height
2239     })->($self, $todo);
2240     unless ($todo->{node}->has_attribute_ns (undef, 'data')) {
2241     unless ($todo->{node}->has_attribute_ns (undef, 'type')) {
2242     $self->{onerror}->(node => $todo->{node},
2243     type => 'attribute missing:data|type');
2244     }
2245     }
2246     },
2247     checker => $ElementDefault->{checker}, ## TODO
2248 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
2249 wakaba 1.1 };
2250    
2251     $Element->{$HTML_NS}->{param} = {
2252     attrs_checker => sub {
2253     my ($self, $todo) = @_;
2254     $GetHTMLAttrsChecker->({
2255     name => sub { },
2256     value => sub { },
2257     })->($self, $todo);
2258     unless ($todo->{node}->has_attribute_ns (undef, 'name')) {
2259     $self->{onerror}->(node => $todo->{node},
2260     type => 'attribute missing:name');
2261     }
2262     unless ($todo->{node}->has_attribute_ns (undef, 'value')) {
2263     $self->{onerror}->(node => $todo->{node},
2264     type => 'attribute missing:value');
2265     }
2266     },
2267     checker => $HTMLEmptyChecker,
2268     };
2269    
2270     $Element->{$HTML_NS}->{video} = {
2271     attrs_checker => $GetHTMLAttrsChecker->({
2272     src => $HTMLURIAttrChecker,
2273     ## TODO: start, loopstart, loopend, end
2274     ## ISSUE: they MUST be "value time offset"s. Value?
2275 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
2276 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2277     controls => $GetHTMLBooleanAttrChecker->('controls'),
2278 wakaba 1.11 poster => $HTMLURIAttrChecker, ## TODO: not for audio!
2279     ## TODO: width, height (not for audio!)
2280 wakaba 1.1 }),
2281     checker => sub {
2282     my ($self, $todo) = @_;
2283    
2284     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2285     return $HTMLBlockOrInlineChecker->($self, $todo);
2286     } else {
2287     return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
2288     ->($self, $todo);
2289     }
2290     },
2291     };
2292    
2293     $Element->{$HTML_NS}->{audio} = {
2294     attrs_checker => $Element->{$HTML_NS}->{video}->{attrs_checker},
2295     checker => $Element->{$HTML_NS}->{video}->{checker},
2296     };
2297    
2298     $Element->{$HTML_NS}->{source} = {
2299     attrs_checker => sub {
2300     my ($self, $todo) = @_;
2301     $GetHTMLAttrsChecker->({
2302     src => $HTMLURIAttrChecker,
2303     type => $HTMLIMTAttrChecker,
2304     media => $HTMLMQAttrChecker,
2305     })->($self, $todo);
2306     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2307     $self->{onerror}->(node => $todo->{node},
2308     type => 'attribute missing:src');
2309     }
2310     },
2311     checker => $HTMLEmptyChecker,
2312     };
2313    
2314     $Element->{$HTML_NS}->{canvas} = {
2315     attrs_checker => $GetHTMLAttrsChecker->({
2316     height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2317     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2318     }),
2319     checker => $HTMLInlineChecker,
2320     };
2321    
2322     $Element->{$HTML_NS}->{map} = {
2323 wakaba 1.4 attrs_checker => sub {
2324     my ($self, $todo) = @_;
2325     my $has_id;
2326     $GetHTMLAttrsChecker->({
2327     id => sub {
2328     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2329     my ($self, $attr) = @_;
2330     my $value = $attr->value;
2331     if (length $value > 0) {
2332     if ($self->{id}->{$value}) {
2333     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2334     push @{$self->{id}->{$value}}, $attr;
2335     } else {
2336     $self->{id}->{$value} = [$attr];
2337     }
2338 wakaba 1.1 } else {
2339 wakaba 1.4 ## NOTE: MUST contain at least one character
2340     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2341 wakaba 1.1 }
2342 wakaba 1.4 if ($value =~ /[\x09-\x0D\x20]/) {
2343     $self->{onerror}->(node => $attr, type => 'space in ID');
2344     }
2345     $self->{map}->{$value} ||= $attr;
2346     $has_id = 1;
2347     },
2348     })->($self, $todo);
2349     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:id')
2350     unless $has_id;
2351     },
2352 wakaba 1.1 checker => $HTMLBlockChecker,
2353     };
2354    
2355     $Element->{$HTML_NS}->{area} = {
2356     attrs_checker => sub {
2357     my ($self, $todo) = @_;
2358     my %attr;
2359     my $coords;
2360     for my $attr (@{$todo->{node}->attributes}) {
2361     my $attr_ns = $attr->namespace_uri;
2362     $attr_ns = '' unless defined $attr_ns;
2363     my $attr_ln = $attr->manakai_local_name;
2364     my $checker;
2365     if ($attr_ns eq '') {
2366     $checker = {
2367     alt => sub { },
2368     ## NOTE: |alt| value has no conformance creteria.
2369     shape => $GetHTMLEnumeratedAttrChecker->({
2370     circ => -1, circle => 1,
2371     default => 1,
2372     poly => 1, polygon => -1,
2373     rect => 1, rectangle => -1,
2374     }),
2375     coords => sub {
2376     my ($self, $attr) = @_;
2377     my $value = $attr->value;
2378     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
2379     $coords = [split /,/, $value];
2380     } else {
2381     $self->{onerror}->(node => $attr,
2382     type => 'coords:syntax error');
2383     }
2384     },
2385     target => $HTMLTargetAttrChecker,
2386     href => $HTMLURIAttrChecker,
2387     ping => $HTMLSpaceURIsAttrChecker,
2388 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
2389 wakaba 1.1 media => $HTMLMQAttrChecker,
2390     hreflang => $HTMLLanguageTagAttrChecker,
2391     type => $HTMLIMTAttrChecker,
2392     }->{$attr_ln};
2393     if ($checker) {
2394     $attr{$attr_ln} = $attr;
2395     } else {
2396     $checker = $HTMLAttrChecker->{$attr_ln};
2397     }
2398     }
2399     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2400     || $AttrChecker->{$attr_ns}->{''};
2401     if ($checker) {
2402     $checker->($self, $attr) if ref $checker;
2403     } else {
2404     $self->{onerror}->(node => $attr, level => 'unsupported',
2405     type => 'attribute');
2406     ## ISSUE: No comformance createria for unknown attributes in the spec
2407     }
2408     }
2409    
2410     if (defined $attr{href}) {
2411 wakaba 1.4 $self->{has_hyperlink_element} = 1;
2412 wakaba 1.1 unless (defined $attr{alt}) {
2413     $self->{onerror}->(node => $todo->{node},
2414     type => 'attribute missing:alt');
2415     }
2416     } else {
2417     for (qw/target ping rel media hreflang type alt/) {
2418     if (defined $attr{$_}) {
2419     $self->{onerror}->(node => $attr{$_},
2420     type => 'attribute not allowed');
2421     }
2422     }
2423     }
2424    
2425     my $shape = 'rectangle';
2426     if (defined $attr{shape}) {
2427     $shape = {
2428     circ => 'circle', circle => 'circle',
2429     default => 'default',
2430     poly => 'polygon', polygon => 'polygon',
2431     rect => 'rectangle', rectangle => 'rectangle',
2432     }->{lc $attr{shape}->value} || 'rectangle';
2433     ## TODO: ASCII lowercase?
2434     }
2435    
2436     if ($shape eq 'circle') {
2437     if (defined $attr{coords}) {
2438     if (defined $coords) {
2439     if (@$coords == 3) {
2440     if ($coords->[2] < 0) {
2441     $self->{onerror}->(node => $attr{coords},
2442     type => 'coords:out of range:2');
2443     }
2444     } else {
2445     $self->{onerror}->(node => $attr{coords},
2446     type => 'coords:number:3:'.@$coords);
2447     }
2448     } else {
2449     ## NOTE: A syntax error has been reported.
2450     }
2451     } else {
2452     $self->{onerror}->(node => $todo->{node},
2453     type => 'attribute missing:coords');
2454     }
2455     } elsif ($shape eq 'default') {
2456     if (defined $attr{coords}) {
2457     $self->{onerror}->(node => $attr{coords},
2458     type => 'attribute not allowed');
2459     }
2460     } elsif ($shape eq 'polygon') {
2461     if (defined $attr{coords}) {
2462     if (defined $coords) {
2463     if (@$coords >= 6) {
2464     unless (@$coords % 2 == 0) {
2465     $self->{onerror}->(node => $attr{coords},
2466     type => 'coords:number:even:'.@$coords);
2467     }
2468     } else {
2469     $self->{onerror}->(node => $attr{coords},
2470     type => 'coords:number:>=6:'.@$coords);
2471     }
2472     } else {
2473     ## NOTE: A syntax error has been reported.
2474     }
2475     } else {
2476     $self->{onerror}->(node => $todo->{node},
2477     type => 'attribute missing:coords');
2478     }
2479     } elsif ($shape eq 'rectangle') {
2480     if (defined $attr{coords}) {
2481     if (defined $coords) {
2482     if (@$coords == 4) {
2483     unless ($coords->[0] < $coords->[2]) {
2484     $self->{onerror}->(node => $attr{coords},
2485     type => 'coords:out of range:0');
2486     }
2487     unless ($coords->[1] < $coords->[3]) {
2488     $self->{onerror}->(node => $attr{coords},
2489     type => 'coords:out of range:1');
2490     }
2491     } else {
2492     $self->{onerror}->(node => $attr{coords},
2493     type => 'coords:number:4:'.@$coords);
2494     }
2495     } else {
2496     ## NOTE: A syntax error has been reported.
2497     }
2498     } else {
2499     $self->{onerror}->(node => $todo->{node},
2500     type => 'attribute missing:coords');
2501     }
2502     }
2503     },
2504     checker => $HTMLEmptyChecker,
2505     };
2506     ## TODO: only in map
2507    
2508     $Element->{$HTML_NS}->{table} = {
2509     attrs_checker => $GetHTMLAttrsChecker->({}),
2510     checker => sub {
2511     my ($self, $todo) = @_;
2512     my $el = $todo->{node};
2513     my $new_todos = [];
2514     my @nodes = (@{$el->child_nodes});
2515    
2516     my $phase = 'before caption';
2517     my $has_tfoot;
2518     while (@nodes) {
2519     my $node = shift @nodes;
2520     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2521    
2522     my $nt = $node->node_type;
2523     if ($nt == 1) {
2524     my $node_ns = $node->namespace_uri;
2525     $node_ns = '' unless defined $node_ns;
2526     my $node_ln = $node->manakai_local_name;
2527     ## NOTE: |minuses| list is not checked since redundant
2528 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2529     #
2530     } elsif ($phase eq 'in tbodys') {
2531 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2532     #$phase = 'in tbodys';
2533     } elsif (not $has_tfoot and
2534     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2535     $phase = 'after tfoot';
2536     $has_tfoot = 1;
2537     } else {
2538     $self->{onerror}->(node => $node, type => 'element not allowed');
2539     }
2540     } elsif ($phase eq 'in trs') {
2541     if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2542     #$phase = 'in trs';
2543     } elsif (not $has_tfoot and
2544     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2545     $phase = 'after tfoot';
2546     $has_tfoot = 1;
2547     } else {
2548     $self->{onerror}->(node => $node, type => 'element not allowed');
2549     }
2550     } elsif ($phase eq 'after thead') {
2551     if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2552     $phase = 'in tbodys';
2553     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2554     $phase = 'in trs';
2555     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2556     $phase = 'in tbodys';
2557     $has_tfoot = 1;
2558     } else {
2559     $self->{onerror}->(node => $node, type => 'element not allowed');
2560     }
2561     } elsif ($phase eq 'in colgroup') {
2562     if ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2563     $phase = 'in colgroup';
2564     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2565     $phase = 'after thead';
2566     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2567     $phase = 'in tbodys';
2568     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2569     $phase = 'in trs';
2570     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2571     $phase = 'in tbodys';
2572     $has_tfoot = 1;
2573     } else {
2574     $self->{onerror}->(node => $node, type => 'element not allowed');
2575     }
2576     } elsif ($phase eq 'before caption') {
2577     if ($node_ns eq $HTML_NS and $node_ln eq 'caption') {
2578     $phase = 'in colgroup';
2579     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2580     $phase = 'in colgroup';
2581     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2582     $phase = 'after thead';
2583     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2584     $phase = 'in tbodys';
2585     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2586     $phase = 'in trs';
2587     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2588     $phase = 'in tbodys';
2589     $has_tfoot = 1;
2590     } else {
2591     $self->{onerror}->(node => $node, type => 'element not allowed');
2592     }
2593     } else { # after tfoot
2594     $self->{onerror}->(node => $node, type => 'element not allowed');
2595     }
2596     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2597     unshift @nodes, @$sib;
2598     push @$new_todos, @$ch;
2599     } elsif ($nt == 3 or $nt == 4) {
2600     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2601     $self->{onerror}->(node => $node, type => 'character not allowed');
2602     }
2603     } elsif ($nt == 5) {
2604     unshift @nodes, @{$node->child_nodes};
2605     }
2606     }
2607    
2608     ## Table model errors
2609     require Whatpm::HTMLTable;
2610     Whatpm::HTMLTable->form_table ($todo->{node}, sub {
2611     my %opt = @_;
2612     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
2613     });
2614     push @{$self->{return}->{table}}, $todo->{node};
2615    
2616     return ($new_todos);
2617     },
2618     };
2619    
2620     $Element->{$HTML_NS}->{caption} = {
2621     attrs_checker => $GetHTMLAttrsChecker->({}),
2622 wakaba 1.13 checker => $HTMLStrictlyInlineChecker,
2623 wakaba 1.1 };
2624    
2625     $Element->{$HTML_NS}->{colgroup} = {
2626     attrs_checker => $GetHTMLAttrsChecker->({
2627     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2628     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
2629     ## TODO: "attribute not supported" if |col|.
2630     ## ISSUE: MUST NOT if any |col|?
2631     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
2632     }),
2633     checker => sub {
2634     my ($self, $todo) = @_;
2635     my $el = $todo->{node};
2636     my $new_todos = [];
2637     my @nodes = (@{$el->child_nodes});
2638    
2639     while (@nodes) {
2640     my $node = shift @nodes;
2641     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2642    
2643     my $nt = $node->node_type;
2644     if ($nt == 1) {
2645     my $node_ns = $node->namespace_uri;
2646     $node_ns = '' unless defined $node_ns;
2647     my $node_ln = $node->manakai_local_name;
2648     ## NOTE: |minuses| list is not checked since redundant
2649 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2650     #
2651     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'col')) {
2652 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
2653     }
2654     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2655     unshift @nodes, @$sib;
2656     push @$new_todos, @$ch;
2657     } elsif ($nt == 3 or $nt == 4) {
2658     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2659     $self->{onerror}->(node => $node, type => 'character not allowed');
2660     }
2661     } elsif ($nt == 5) {
2662     unshift @nodes, @{$node->child_nodes};
2663     }
2664     }
2665     return ($new_todos);
2666     },
2667     };
2668    
2669     $Element->{$HTML_NS}->{col} = {
2670     attrs_checker => $GetHTMLAttrsChecker->({
2671     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2672     }),
2673     checker => $HTMLEmptyChecker,
2674     };
2675    
2676     $Element->{$HTML_NS}->{tbody} = {
2677     attrs_checker => $GetHTMLAttrsChecker->({}),
2678     checker => sub {
2679     my ($self, $todo) = @_;
2680     my $el = $todo->{node};
2681     my $new_todos = [];
2682     my @nodes = (@{$el->child_nodes});
2683    
2684     my $has_tr;
2685     while (@nodes) {
2686     my $node = shift @nodes;
2687     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2688    
2689     my $nt = $node->node_type;
2690     if ($nt == 1) {
2691     my $node_ns = $node->namespace_uri;
2692     $node_ns = '' unless defined $node_ns;
2693     my $node_ln = $node->manakai_local_name;
2694     ## NOTE: |minuses| list is not checked since redundant
2695 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2696     #
2697     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2698 wakaba 1.1 $has_tr = 1;
2699     } else {
2700     $self->{onerror}->(node => $node, type => 'element not allowed');
2701     }
2702     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2703     unshift @nodes, @$sib;
2704     push @$new_todos, @$ch;
2705     } elsif ($nt == 3 or $nt == 4) {
2706     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2707     $self->{onerror}->(node => $node, type => 'character not allowed');
2708     }
2709     } elsif ($nt == 5) {
2710     unshift @nodes, @{$node->child_nodes};
2711     }
2712     }
2713     unless ($has_tr) {
2714     $self->{onerror}->(node => $el, type => 'child element missing:tr');
2715     }
2716     return ($new_todos);
2717     },
2718     };
2719    
2720     $Element->{$HTML_NS}->{thead} = {
2721     attrs_checker => $GetHTMLAttrsChecker->({}),
2722     checker => $Element->{$HTML_NS}->{tbody}->{checker},
2723     };
2724    
2725     $Element->{$HTML_NS}->{tfoot} = {
2726     attrs_checker => $GetHTMLAttrsChecker->({}),
2727     checker => $Element->{$HTML_NS}->{tbody}->{checker},
2728     };
2729    
2730     $Element->{$HTML_NS}->{tr} = {
2731     attrs_checker => $GetHTMLAttrsChecker->({}),
2732     checker => sub {
2733     my ($self, $todo) = @_;
2734     my $el = $todo->{node};
2735     my $new_todos = [];
2736     my @nodes = (@{$el->child_nodes});
2737    
2738     my $has_td;
2739     while (@nodes) {
2740     my $node = shift @nodes;
2741     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2742    
2743     my $nt = $node->node_type;
2744     if ($nt == 1) {
2745     my $node_ns = $node->namespace_uri;
2746     $node_ns = '' unless defined $node_ns;
2747     my $node_ln = $node->manakai_local_name;
2748     ## NOTE: |minuses| list is not checked since redundant
2749 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2750     #
2751     } elsif ($node_ns eq $HTML_NS and
2752     ($node_ln eq 'td' or $node_ln eq 'th')) {
2753 wakaba 1.1 $has_td = 1;
2754     } else {
2755     $self->{onerror}->(node => $node, type => 'element not allowed');
2756     }
2757     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2758     unshift @nodes, @$sib;
2759     push @$new_todos, @$ch;
2760     } elsif ($nt == 3 or $nt == 4) {
2761     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2762     $self->{onerror}->(node => $node, type => 'character not allowed');
2763     }
2764     } elsif ($nt == 5) {
2765     unshift @nodes, @{$node->child_nodes};
2766     }
2767     }
2768     unless ($has_td) {
2769     $self->{onerror}->(node => $el, type => 'child element missing:td|th');
2770     }
2771     return ($new_todos);
2772     },
2773     };
2774    
2775     $Element->{$HTML_NS}->{td} = {
2776     attrs_checker => $GetHTMLAttrsChecker->({
2777     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2778     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2779     }),
2780     checker => $HTMLBlockOrInlineChecker,
2781     };
2782    
2783     $Element->{$HTML_NS}->{th} = {
2784     attrs_checker => $GetHTMLAttrsChecker->({
2785     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2786     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2787     scope => $GetHTMLEnumeratedAttrChecker
2788     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
2789     }),
2790     checker => $HTMLBlockOrInlineChecker,
2791     };
2792    
2793     ## TODO: forms
2794 wakaba 1.8 ## TODO: Tests for <nest/> in form elements
2795 wakaba 1.1
2796     $Element->{$HTML_NS}->{script} = {
2797 wakaba 1.9 attrs_checker => $GetHTMLAttrsChecker->({
2798 wakaba 1.1 src => $HTMLURIAttrChecker,
2799     defer => $GetHTMLBooleanAttrChecker->('defer'),
2800     async => $GetHTMLBooleanAttrChecker->('async'),
2801     type => $HTMLIMTAttrChecker,
2802 wakaba 1.9 }),
2803 wakaba 1.1 checker => sub {
2804     my ($self, $todo) = @_;
2805    
2806     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2807     return $HTMLEmptyChecker->($self, $todo);
2808     } else {
2809     ## NOTE: No content model conformance in HTML5 spec.
2810     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
2811     my $language = $todo->{node}->get_attribute_ns (undef, 'language');
2812     if ((defined $type and $type eq '') or
2813     (defined $language and $language eq '')) {
2814     $type = 'text/javascript';
2815     } elsif (defined $type) {
2816     #
2817     } elsif (defined $language) {
2818     $type = 'text/' . $language;
2819     } else {
2820     $type = 'text/javascript';
2821     }
2822     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
2823     type => 'script:'.$type); ## TODO: $type normalization
2824     return $AnyChecker->($self, $todo);
2825     }
2826     },
2827     };
2828    
2829     ## NOTE: When script is disabled.
2830     $Element->{$HTML_NS}->{noscript} = {
2831 wakaba 1.3 attrs_checker => sub {
2832     my ($self, $todo) = @_;
2833    
2834     ## NOTE: This check is inserted in |attrs_checker|, rather than |checker|,
2835     ## since the later is not invoked when the |noscript| is used as a
2836     ## transparent element.
2837     unless ($todo->{node}->owner_document->manakai_is_html) {
2838     $self->{onerror}->(node => $todo->{node}, type => 'in XML:noscript');
2839     }
2840    
2841     $GetHTMLAttrsChecker->({})->($self, $todo);
2842     },
2843 wakaba 1.1 checker => sub {
2844     my ($self, $todo) = @_;
2845    
2846 wakaba 1.3 if ($todo->{flag}->{in_head}) {
2847     my $new_todos = [];
2848     my @nodes = (@{$todo->{node}->child_nodes});
2849    
2850     while (@nodes) {
2851     my $node = shift @nodes;
2852     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2853    
2854     my $nt = $node->node_type;
2855     if ($nt == 1) {
2856     my $node_ns = $node->namespace_uri;
2857     $node_ns = '' unless defined $node_ns;
2858     my $node_ln = $node->manakai_local_name;
2859 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2860     #
2861     } elsif ($node_ns eq $HTML_NS) {
2862 wakaba 1.3 if ({link => 1, style => 1}->{$node_ln}) {
2863     #
2864     } elsif ($node_ln eq 'meta') {
2865 wakaba 1.5 if ($node->has_attribute_ns (undef, 'name')) {
2866     #
2867 wakaba 1.3 } else {
2868 wakaba 1.5 $self->{onerror}->(node => $node,
2869     type => 'element not allowed');
2870 wakaba 1.3 }
2871     } else {
2872     $self->{onerror}->(node => $node, type => 'element not allowed');
2873     }
2874     } else {
2875     $self->{onerror}->(node => $node, type => 'element not allowed');
2876     }
2877    
2878     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2879     unshift @nodes, @$sib;
2880     push @$new_todos, @$ch;
2881     } elsif ($nt == 3 or $nt == 4) {
2882     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2883     $self->{onerror}->(node => $node, type => 'character not allowed');
2884     }
2885     } elsif ($nt == 5) {
2886     unshift @nodes, @{$node->child_nodes};
2887     }
2888     }
2889     return ($new_todos);
2890     } else {
2891     my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}});
2892     my ($sib, $ch) = $HTMLBlockOrInlineChecker->($self, $todo);
2893     push @$sib, $end;
2894     return ($sib, $ch);
2895     }
2896 wakaba 1.1 },
2897     };
2898 wakaba 1.3
2899     ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
2900 wakaba 1.1
2901     $Element->{$HTML_NS}->{'event-source'} = {
2902     attrs_checker => $GetHTMLAttrsChecker->({
2903     src => $HTMLURIAttrChecker,
2904     }),
2905     checker => $HTMLEmptyChecker,
2906     };
2907    
2908     $Element->{$HTML_NS}->{details} = {
2909     attrs_checker => $GetHTMLAttrsChecker->({
2910     open => $GetHTMLBooleanAttrChecker->('open'),
2911     }),
2912     checker => sub {
2913     my ($self, $todo) = @_;
2914    
2915     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2916     my ($sib, $ch)
2917     = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
2918     ->($self, $todo);
2919     push @$sib, $end;
2920     return ($sib, $ch);
2921     },
2922     };
2923    
2924     $Element->{$HTML_NS}->{datagrid} = {
2925     attrs_checker => $GetHTMLAttrsChecker->({
2926     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2927     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
2928     }),
2929     checker => sub {
2930     my ($self, $todo) = @_;
2931     my $el = $todo->{node};
2932     my $new_todos = [];
2933     my @nodes = (@{$el->child_nodes});
2934    
2935     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2936    
2937     ## Block-table Block* | table | select | datalist | Empty
2938     my $mode = 'any';
2939     while (@nodes) {
2940     my $node = shift @nodes;
2941     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2942    
2943     my $nt = $node->node_type;
2944     if ($nt == 1) {
2945     my $node_ns = $node->namespace_uri;
2946     $node_ns = '' unless defined $node_ns;
2947     my $node_ln = $node->manakai_local_name;
2948     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
2949 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2950     #
2951     } elsif ($mode eq 'block') {
2952 wakaba 1.1 $not_allowed = 1
2953     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
2954     } elsif ($mode eq 'any') {
2955     if ($node_ns eq $HTML_NS and
2956     {table => 1, select => 1, datalist => 1}->{$node_ln}) {
2957     $mode = 'none';
2958     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
2959     $mode = 'block';
2960     } else {
2961     $not_allowed = 1;
2962     }
2963     } else {
2964     $not_allowed = 1;
2965     }
2966     $self->{onerror}->(node => $node, type => 'element not allowed')
2967     if $not_allowed;
2968     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2969     unshift @nodes, @$sib;
2970     push @$new_todos, @$ch;
2971     } elsif ($nt == 3 or $nt == 4) {
2972     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2973     $self->{onerror}->(node => $node, type => 'character not allowed');
2974     }
2975     } elsif ($nt == 5) {
2976     unshift @nodes, @{$node->child_nodes};
2977     }
2978     }
2979    
2980     push @$new_todos, $end;
2981     return ($new_todos);
2982     },
2983     };
2984    
2985     $Element->{$HTML_NS}->{command} = {
2986     attrs_checker => $GetHTMLAttrsChecker->({
2987     checked => $GetHTMLBooleanAttrChecker->('checked'),
2988     default => $GetHTMLBooleanAttrChecker->('default'),
2989     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2990     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
2991     icon => $HTMLURIAttrChecker,
2992     label => sub { }, ## NOTE: No conformance creteria
2993     radiogroup => sub { }, ## NOTE: No conformance creteria
2994     ## NOTE: |title| has special semantics, but no syntactical difference
2995     type => sub {
2996     my ($self, $attr) = @_;
2997     my $value = $attr->value;
2998     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
2999     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
3000     }
3001     },
3002     }),
3003     checker => $HTMLEmptyChecker,
3004     };
3005    
3006     $Element->{$HTML_NS}->{menu} = {
3007     attrs_checker => $GetHTMLAttrsChecker->({
3008     autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
3009     id => sub {
3010     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
3011     my ($self, $attr) = @_;
3012     my $value = $attr->value;
3013     if (length $value > 0) {
3014     if ($self->{id}->{$value}) {
3015     $self->{onerror}->(node => $attr, type => 'duplicate ID');
3016     push @{$self->{id}->{$value}}, $attr;
3017     } else {
3018     $self->{id}->{$value} = [$attr];
3019     }
3020     } else {
3021     ## NOTE: MUST contain at least one character
3022     $self->{onerror}->(node => $attr, type => 'empty attribute value');
3023     }
3024     if ($value =~ /[\x09-\x0D\x20]/) {
3025     $self->{onerror}->(node => $attr, type => 'space in ID');
3026     }
3027     $self->{menu}->{$value} ||= $attr;
3028     ## ISSUE: <menu id=""><p contextmenu=""> match?
3029     },
3030     label => sub { }, ## NOTE: No conformance creteria
3031     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
3032     }),
3033     checker => sub {
3034     my ($self, $todo) = @_;
3035     my $el = $todo->{node};
3036     my $new_todos = [];
3037     my @nodes = (@{$el->child_nodes});
3038    
3039     my $content = 'li or inline';
3040     while (@nodes) {
3041     my $node = shift @nodes;
3042     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3043    
3044     my $nt = $node->node_type;
3045     if ($nt == 1) {
3046     my $node_ns = $node->namespace_uri;
3047     $node_ns = '' unless defined $node_ns;
3048     my $node_ln = $node->manakai_local_name;
3049     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
3050 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3051     #
3052     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'li') {
3053 wakaba 1.1 if ($content eq 'inline') {
3054     $not_allowed = 1;
3055     } elsif ($content eq 'li or inline') {
3056     $content = 'li';
3057     }
3058     } else {
3059     if ($HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
3060     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln}) {
3061     $content = 'inline';
3062     } else {
3063     $not_allowed = 1;
3064     }
3065     }
3066     $self->{onerror}->(node => $node, type => 'element not allowed')
3067     if $not_allowed;
3068     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3069     unshift @nodes, @$sib;
3070     push @$new_todos, @$ch;
3071     } elsif ($nt == 3 or $nt == 4) {
3072     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3073     if ($content eq 'li') {
3074     $self->{onerror}->(node => $node, type => 'character not allowed');
3075     } elsif ($content eq 'li or inline') {
3076     $content = 'inline';
3077     }
3078     }
3079     } elsif ($nt == 5) {
3080     unshift @nodes, @{$node->child_nodes};
3081     }
3082     }
3083    
3084     for (@$new_todos) {
3085     $_->{inline} = 1;
3086     }
3087     return ($new_todos);
3088     },
3089 wakaba 1.8 };
3090    
3091     $Element->{$HTML_NS}->{datatemplate} = {
3092     attrs_checker => $GetHTMLAttrsChecker->({}),
3093     checker => sub {
3094     my ($self, $todo) = @_;
3095     my $el = $todo->{node};
3096     my $new_todos = [];
3097     my @nodes = (@{$el->child_nodes});
3098    
3099     while (@nodes) {
3100     my $node = shift @nodes;
3101     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3102    
3103     my $nt = $node->node_type;
3104     if ($nt == 1) {
3105     my $node_ns = $node->namespace_uri;
3106     $node_ns = '' unless defined $node_ns;
3107     my $node_ln = $node->manakai_local_name;
3108     ## NOTE: |minuses| list is not checked since redundant
3109     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3110     #
3111     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'rule')) {
3112     $self->{onerror}->(node => $node,
3113     type => 'element not allowed:datatemplate');
3114     }
3115     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3116     unshift @nodes, @$sib;
3117     push @$new_todos, @$ch;
3118     } elsif ($nt == 3 or $nt == 4) {
3119     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3120     $self->{onerror}->(node => $node, type => 'character not allowed');
3121     }
3122     } elsif ($nt == 5) {
3123     unshift @nodes, @{$node->child_nodes};
3124     }
3125     }
3126     return ($new_todos);
3127     },
3128     is_xml_root => 1,
3129     };
3130    
3131     $Element->{$HTML_NS}->{rule} = {
3132     attrs_checker => $GetHTMLAttrsChecker->({
3133     ## TODO: |condition| attribute
3134     mode => $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker,
3135     }),
3136     checker => sub {
3137     my ($self, $todo) = @_;
3138    
3139     my $end = $self->_add_pluses ({$HTML_NS => {nest => 1}});
3140     my ($sib, $ch) = $AnyChecker->($self, $todo);
3141     push @$sib, $end;
3142     return ($sib, $ch);
3143     },
3144     ## NOTE: "MAY be anything that, when the parent |datatemplate|
3145     ## is applied to some conforming data, results in a conforming DOM tree.":
3146     ## We don't check against this.
3147     };
3148    
3149     $Element->{$HTML_NS}->{nest} = {
3150     attrs_checker => $GetHTMLAttrsChecker->({
3151     ## TODO: |filter| attribute
3152     mode => $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker,
3153     }),
3154     checker => $HTMLEmptyChecker,
3155 wakaba 1.1 };
3156    
3157     $Element->{$HTML_NS}->{legend} = {
3158     attrs_checker => $GetHTMLAttrsChecker->({}),
3159     checker => sub {
3160     my ($self, $todo) = @_;
3161    
3162     my $parent = $todo->{node}->manakai_parent_element;
3163     if (defined $parent) {
3164     my $nsuri = $parent->namespace_uri;
3165     $nsuri = '' unless defined $nsuri;
3166     my $ln = $parent->manakai_local_name;
3167     if ($nsuri eq $HTML_NS and $ln eq 'figure') {
3168     return $HTMLInlineChecker->($self, $todo);
3169     } else {
3170 wakaba 1.13 return $HTMLStrictlyInlineChecker->($self, $todo);
3171 wakaba 1.1 }
3172     } else {
3173     return $HTMLInlineChecker->($self, $todo);
3174     }
3175    
3176     ## ISSUE: Content model is defined only for fieldset/legend,
3177     ## details/legend, and figure/legend.
3178     },
3179     };
3180    
3181     $Element->{$HTML_NS}->{div} = {
3182     attrs_checker => $GetHTMLAttrsChecker->({}),
3183     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
3184     };
3185    
3186     $Element->{$HTML_NS}->{font} = {
3187     attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
3188     checker => $HTMLTransparentChecker,
3189     };
3190    
3191     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
3192    
3193     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24