/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.13 - (hide annotations) (download)
Sun Nov 4 03:36:54 2007 UTC (17 years ago) by wakaba
Branch: MAIN
Changes since 1.12: +6 -15 lines
++ whatpm/Whatpm/ContentChecker/ChangeLog	4 Nov 2007 03:36:52 -0000
2007-11-04  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm: "Significant" has been gone (HTML5 revision 1114).

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7     my $HTMLMetadataElements = {
8     $HTML_NS => {
9     qw/link 1 meta 1 style 1 script 1 event-source 1 command 1 base 1 title 1
10 wakaba 1.8 noscript 1 datatemplate 1
11 wakaba 1.1 /,
12     },
13     };
14    
15     my $HTMLSectioningElements = {
16     $HTML_NS => {qw/body 1 section 1 nav 1 article 1 blockquote 1 aside 1/},
17     };
18    
19     my $HTMLBlockLevelElements = {
20     $HTML_NS => {
21     qw/
22     section 1 nav 1 article 1 blockquote 1 aside 1
23     h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1
24     address 1 p 1 hr 1 dialog 1 pre 1 ol 1 ul 1 dl 1
25     ins 1 del 1 figure 1 map 1 table 1 script 1 noscript 1
26     event-source 1 details 1 datagrid 1 menu 1 div 1 font 1
27 wakaba 1.8 datatemplate 1
28 wakaba 1.1 /,
29     },
30     };
31    
32     my $HTMLStrictlyInlineLevelElements = {
33     $HTML_NS => {
34     qw/
35     br 1 a 1 q 1 cite 1 em 1 strong 1 small 1 m 1 dfn 1 abbr 1
36     time 1 meter 1 progress 1 code 1 var 1 samp 1 kbd 1
37     sub 1 sup 1 span 1 i 1 b 1 bdo 1 ins 1 del 1 img 1
38     iframe 1 embed 1 object 1 video 1 audio 1 canvas 1 area 1
39     script 1 noscript 1 event-source 1 command 1 font 1
40     /,
41     },
42     };
43    
44     my $HTMLStructuredInlineLevelElements = {
45     $HTML_NS => {qw/blockquote 1 pre 1 ol 1 ul 1 dl 1 table 1 menu 1/},
46     };
47    
48     my $HTMLInteractiveElements = {
49     $HTML_NS => {a => 1, details => 1, datagrid => 1},
50     };
51     ## NOTE: |html:a| and |html:datagrid| are not allowed as a descendant
52     ## of interactive elements
53    
54     # my $HTMLTransparentElements : in |Whatpm/ContentChecker.pm|.
55    
56     #my $HTMLSemiTransparentElements = {
57     # $HTML_NS => {qw/video 1 audio 1/},
58     #};
59    
60     my $HTMLEmbededElements = {
61     $HTML_NS => {qw/img 1 iframe 1 embed 1 object 1 video 1 audio 1 canvas 1/},
62     };
63    
64     ## Empty
65     my $HTMLEmptyChecker = sub {
66     my ($self, $todo) = @_;
67     my $el = $todo->{node};
68     my $new_todos = [];
69     my @nodes = (@{$el->child_nodes});
70    
71     while (@nodes) {
72     my $node = shift @nodes;
73     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
74    
75     my $nt = $node->node_type;
76     if ($nt == 1) {
77 wakaba 1.8 my $node_ns = $node->namespace_uri;
78     $node_ns = '' unless defined $node_ns;
79     my $node_ln = $node->manakai_local_name;
80     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
81     #
82     } else {
83     ## NOTE: |minuses| list is not checked since redundant
84     $self->{onerror}->(node => $node, type => 'element not allowed');
85     }
86 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
87     unshift @nodes, @$sib;
88     push @$new_todos, @$ch;
89     } elsif ($nt == 3 or $nt == 4) {
90     if ($node->data =~ /[^\x09-\x0D\x20]/) {
91     $self->{onerror}->(node => $node, type => 'character not allowed');
92     }
93     } elsif ($nt == 5) {
94     unshift @nodes, @{$node->child_nodes};
95     }
96     }
97     return ($new_todos);
98     };
99    
100     ## Text
101     my $HTMLTextChecker = sub {
102     my ($self, $todo) = @_;
103     my $el = $todo->{node};
104     my $new_todos = [];
105     my @nodes = (@{$el->child_nodes});
106    
107     while (@nodes) {
108     my $node = shift @nodes;
109     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
110    
111     my $nt = $node->node_type;
112     if ($nt == 1) {
113 wakaba 1.8 my $node_ns = $node->namespace_uri;
114     $node_ns = '' unless defined $node_ns;
115     my $node_ln = $node->manakai_local_name;
116     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
117     #
118     } else {
119     ## NOTE: |minuses| list is not checked since redundant
120     $self->{onerror}->(node => $node, type => 'element not allowed');
121     }
122 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
123     unshift @nodes, @$sib;
124     push @$new_todos, @$ch;
125     } elsif ($nt == 5) {
126     unshift @nodes, @{$node->child_nodes};
127     }
128     }
129     return ($new_todos);
130     };
131    
132     ## Zero or more |html:style| elements,
133     ## followed by zero or more block-level elements
134     my $HTMLStylableBlockChecker = sub {
135     my ($self, $todo) = @_;
136     my $el = $todo->{node};
137     my $new_todos = [];
138     my @nodes = (@{$el->child_nodes});
139    
140     my $has_non_style;
141     while (@nodes) {
142     my $node = shift @nodes;
143     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
144    
145     my $nt = $node->node_type;
146     if ($nt == 1) {
147     my $node_ns = $node->namespace_uri;
148     $node_ns = '' unless defined $node_ns;
149     my $node_ln = $node->manakai_local_name;
150     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
151     if ($node_ns eq $HTML_NS and $node_ln eq 'style') {
152     $not_allowed = 1 if $has_non_style or
153     not $node->has_attribute_ns (undef, 'scoped');
154     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
155     $has_non_style = 1;
156 wakaba 1.8 } elsif ($self->{pluses}->{$node_ns}->{$node_ln}) {
157     #
158 wakaba 1.1 } else {
159     $has_non_style = 1;
160     $not_allowed = 1;
161     }
162     $self->{onerror}->(node => $node, type => 'element not allowed')
163     if $not_allowed;
164     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
165     unshift @nodes, @$sib;
166     push @$new_todos, @$ch;
167     } elsif ($nt == 3 or $nt == 4) {
168     if ($node->data =~ /[^\x09-\x0D\x20]/) {
169     $self->{onerror}->(node => $node, type => 'character not allowed');
170     }
171     } elsif ($nt == 5) {
172     unshift @nodes, @{$node->child_nodes};
173     }
174     }
175     return ($new_todos);
176     }; # $HTMLStylableBlockChecker
177    
178     ## Zero or more block-level elements
179     my $HTMLBlockChecker = sub {
180     my ($self, $todo) = @_;
181     my $el = $todo->{node};
182     my $new_todos = [];
183     my @nodes = (@{$el->child_nodes});
184    
185     while (@nodes) {
186     my $node = shift @nodes;
187     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
188    
189     my $nt = $node->node_type;
190     if ($nt == 1) {
191     my $node_ns = $node->namespace_uri;
192     $node_ns = '' unless defined $node_ns;
193     my $node_ln = $node->manakai_local_name;
194     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
195     $not_allowed = 1
196 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
197     $self->{pluses}->{$node_ns}->{$node_ln};
198 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
199     if $not_allowed;
200     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
201     unshift @nodes, @$sib;
202     push @$new_todos, @$ch;
203     } elsif ($nt == 3 or $nt == 4) {
204     if ($node->data =~ /[^\x09-\x0D\x20]/) {
205     $self->{onerror}->(node => $node, type => 'character not allowed');
206     }
207     } elsif ($nt == 5) {
208     unshift @nodes, @{$node->child_nodes};
209     }
210     }
211     return ($new_todos);
212     }; # $HTMLBlockChecker
213    
214     ## Inline-level content
215     my $HTMLInlineChecker = sub {
216     my ($self, $todo) = @_;
217     my $el = $todo->{node};
218     my $new_todos = [];
219     my @nodes = (@{$el->child_nodes});
220    
221     while (@nodes) {
222     my $node = shift @nodes;
223     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
224    
225     my $nt = $node->node_type;
226     if ($nt == 1) {
227     my $node_ns = $node->namespace_uri;
228     $node_ns = '' unless defined $node_ns;
229     my $node_ln = $node->manakai_local_name;
230     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
231     $not_allowed = 1
232     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
233 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
234     $self->{pluses}->{$node_ns}->{$node_ln};
235 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
236     if $not_allowed;
237     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
238     unshift @nodes, @$sib;
239     push @$new_todos, @$ch;
240     } elsif ($nt == 5) {
241     unshift @nodes, @{$node->child_nodes};
242     }
243     }
244    
245     for (@$new_todos) {
246     $_->{inline} = 1;
247     }
248     return ($new_todos);
249     }; # $HTMLInlineChecker
250    
251     ## Strictly inline-level content
252     my $HTMLStrictlyInlineChecker = sub {
253     my ($self, $todo) = @_;
254     my $el = $todo->{node};
255     my $new_todos = [];
256     my @nodes = (@{$el->child_nodes});
257    
258     while (@nodes) {
259     my $node = shift @nodes;
260     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
261    
262     my $nt = $node->node_type;
263     if ($nt == 1) {
264     my $node_ns = $node->namespace_uri;
265     $node_ns = '' unless defined $node_ns;
266     my $node_ln = $node->manakai_local_name;
267     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
268     $not_allowed = 1
269 wakaba 1.8 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
270     $self->{pluses}->{$node_ns}->{$node_ln};
271 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
272     if $not_allowed;
273     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
274     unshift @nodes, @$sib;
275     push @$new_todos, @$ch;
276     } elsif ($nt == 5) {
277     unshift @nodes, @{$node->child_nodes};
278     }
279     }
280    
281     for (@$new_todos) {
282     $_->{inline} = 1;
283     $_->{strictly_inline} = 1;
284     }
285     return ($new_todos);
286     }; # $HTMLStrictlyInlineChecker
287    
288 wakaba 1.8 ## Inline-level or strictly inline-level content
289 wakaba 1.1 my $HTMLInlineOrStrictlyInlineChecker = sub {
290     my ($self, $todo) = @_;
291     my $el = $todo->{node};
292     my $new_todos = [];
293     my @nodes = (@{$el->child_nodes});
294    
295     while (@nodes) {
296     my $node = shift @nodes;
297     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
298    
299     my $nt = $node->node_type;
300     if ($nt == 1) {
301     my $node_ns = $node->namespace_uri;
302     $node_ns = '' unless defined $node_ns;
303     my $node_ln = $node->manakai_local_name;
304     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
305     if ($todo->{strictly_inline}) {
306     $not_allowed = 1
307 wakaba 1.8 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
308     $self->{pluses}->{$node_ns}->{$node_ln};
309 wakaba 1.1 } else {
310     $not_allowed = 1
311     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
312 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
313     $self->{pluses}->{$node_ns}->{$node_ln};
314 wakaba 1.1 }
315     $self->{onerror}->(node => $node, type => 'element not allowed')
316     if $not_allowed;
317     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
318     unshift @nodes, @$sib;
319     push @$new_todos, @$ch;
320     } elsif ($nt == 5) {
321     unshift @nodes, @{$node->child_nodes};
322     }
323     }
324    
325     for (@$new_todos) {
326     $_->{inline} = 1;
327     $_->{strictly_inline} = 1;
328     }
329     return ($new_todos);
330     }; # $HTMLInlineOrStrictlyInlineChecker
331    
332 wakaba 1.13 ## TODO: New "significant" SHOULD-level requirement (revision 1114).
333 wakaba 1.1
334 wakaba 1.8 ## Block-level content or inline-level content (i.e. bimorphic content model)
335 wakaba 1.1 my $HTMLBlockOrInlineChecker = sub {
336     my ($self, $todo) = @_;
337     my $el = $todo->{node};
338     my $new_todos = [];
339     my @nodes = (@{$el->child_nodes});
340    
341     my $content = 'block-or-inline'; # or 'block' or 'inline'
342     my @block_not_inline;
343     while (@nodes) {
344     my $node = shift @nodes;
345     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
346    
347 wakaba 1.8 ## ISSUE: It is unclear whether "<rule><div><p/><nest/></div></rule>"
348     ## is conforming or not.
349    
350 wakaba 1.1 my $nt = $node->node_type;
351     if ($nt == 1) {
352     my $node_ns = $node->namespace_uri;
353     $node_ns = '' unless defined $node_ns;
354     my $node_ln = $node->manakai_local_name;
355     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
356     if ($content eq 'block') {
357     $not_allowed = 1
358 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
359     $self->{pluses}->{$node_ns}->{$node_ln};
360 wakaba 1.1 } elsif ($content eq 'inline') {
361     $not_allowed = 1
362     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
363 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
364     $self->{pluses}->{$node_ns}->{$node_ln};
365 wakaba 1.1 } else {
366     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
367     my $is_inline
368     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
369     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
370    
371     push @block_not_inline, $node
372     if $is_block and not $is_inline and not $not_allowed;
373 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
374 wakaba 1.1 $content = 'inline';
375     for (@block_not_inline) {
376     $self->{onerror}->(node => $_, type => 'element not allowed');
377     }
378     $not_allowed = 1 unless $is_inline;
379     }
380     }
381     $self->{onerror}->(node => $node, type => 'element not allowed')
382     if $not_allowed;
383     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
384     unshift @nodes, @$sib;
385     push @$new_todos, @$ch;
386     } elsif ($nt == 3 or $nt == 4) {
387     if ($node->data =~ /[^\x09-\x0D\x20]/) {
388     if ($content eq 'block') {
389     $self->{onerror}->(node => $node, type => 'character not allowed');
390     } else {
391     $content = 'inline';
392     for (@block_not_inline) {
393     $self->{onerror}->(node => $_, type => 'element not allowed');
394     }
395     }
396     }
397     } elsif ($nt == 5) {
398     unshift @nodes, @{$node->child_nodes};
399     }
400     }
401    
402     if ($content eq 'inline') {
403     for (@$new_todos) {
404     $_->{inline} = 1;
405     }
406     }
407     return ($new_todos);
408     };
409    
410     ## Zero or more XXX element, then either block-level or inline-level
411     my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) {
412     my ($elnsuri, $ellname) = @_;
413     return sub {
414     my ($self, $todo) = @_;
415     my $el = $todo->{node};
416     my $new_todos = [];
417     my @nodes = (@{$el->child_nodes});
418    
419     my $has_non_style;
420     my $content = 'block-or-inline'; # or 'block' or 'inline'
421     my @block_not_inline;
422     while (@nodes) {
423     my $node = shift @nodes;
424     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
425    
426     my $nt = $node->node_type;
427     if ($nt == 1) {
428     my $node_ns = $node->namespace_uri;
429     $node_ns = '' unless defined $node_ns;
430     my $node_ln = $node->manakai_local_name;
431     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
432     if ($node_ns eq $elnsuri and $node_ln eq $ellname) {
433     $not_allowed = 1 if $has_non_style;
434     if ($ellname eq 'style' and
435     not $node->has_attribute_ns (undef, 'scoped')) {
436     $not_allowed = 1;
437     }
438     } elsif ($content eq 'block') {
439     $has_non_style = 1;
440     $not_allowed = 1
441 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
442     $self->{pluses}->{$node_ns}->{$node_ln};
443 wakaba 1.1 } elsif ($content eq 'inline') {
444     $has_non_style = 1;
445     $not_allowed = 1
446     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
447 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
448     $self->{pluses}->{$node_ns}->{$node_ln};
449 wakaba 1.1 } else {
450 wakaba 1.8 $has_non_style = 1 unless $self->{pluses}->{$node_ns}->{$node_ln};
451 wakaba 1.1 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
452     my $is_inline
453     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
454     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
455    
456     push @block_not_inline, $node
457     if $is_block and not $is_inline and not $not_allowed;
458 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
459 wakaba 1.1 $content = 'inline';
460     for (@block_not_inline) {
461     $self->{onerror}->(node => $_, type => 'element not allowed');
462     }
463     $not_allowed = 1 unless $is_inline;
464     }
465     }
466     $self->{onerror}->(node => $node, type => 'element not allowed')
467     if $not_allowed;
468     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
469     unshift @nodes, @$sib;
470     push @$new_todos, @$ch;
471     } elsif ($nt == 3 or $nt == 4) {
472     if ($node->data =~ /[^\x09-\x0D\x20]/) {
473     $has_non_style = 1;
474     if ($content eq 'block') {
475     $self->{onerror}->(node => $node, type => 'character not allowed');
476     } else {
477     $content = 'inline';
478     for (@block_not_inline) {
479     $self->{onerror}->(node => $_, type => 'element not allowed');
480     }
481     }
482     }
483     } elsif ($nt == 5) {
484     unshift @nodes, @{$node->child_nodes};
485     }
486     }
487    
488     if ($content eq 'inline') {
489     for (@$new_todos) {
490     $_->{inline} = 1;
491     }
492     }
493     return ($new_todos);
494     };
495     }; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker
496    
497     my $HTMLTransparentChecker = $HTMLBlockOrInlineChecker;
498    
499     our $AttrChecker;
500    
501     my $GetHTMLEnumeratedAttrChecker = sub {
502     my $states = shift; # {value => conforming ? 1 : -1}
503     return sub {
504     my ($self, $attr) = @_;
505     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
506     if ($states->{$value} > 0) {
507     #
508     } elsif ($states->{$value}) {
509     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
510     } else {
511     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
512     }
513     };
514     }; # $GetHTMLEnumeratedAttrChecker
515    
516     my $GetHTMLBooleanAttrChecker = sub {
517     my $local_name = shift;
518     return sub {
519     my ($self, $attr) = @_;
520     my $value = $attr->value;
521     unless ($value eq $local_name or $value eq '') {
522     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
523     }
524     };
525     }; # $GetHTMLBooleanAttrChecker
526    
527 wakaba 1.8 ## Unordered set of space-separated tokens
528     my $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker = sub {
529     my ($self, $attr) = @_;
530     my %word;
531     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
532     unless ($word{$word}) {
533     $word{$word} = 1;
534     } else {
535     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
536     }
537     }
538     }; # $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker
539    
540 wakaba 1.1 ## |rel| attribute (unordered set of space separated tokens,
541     ## whose allowed values are defined by the section on link types)
542     my $HTMLLinkTypesAttrChecker = sub {
543 wakaba 1.4 my ($a_or_area, $todo, $self, $attr) = @_;
544 wakaba 1.1 my %word;
545     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
546     unless ($word{$word}) {
547     $word{$word} = 1;
548     } else {
549     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
550     }
551     }
552     ## NOTE: Case sensitive match (since HTML5 spec does not say link
553     ## types are case-insensitive and it says "The value should not
554     ## be confusingly similar to any other defined value (e.g.
555     ## differing only in case).").
556     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
557     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
558     ## values to be used conformingly.
559     require Whatpm::_LinkTypeList;
560     our $LinkType;
561     for my $word (keys %word) {
562     my $def = $LinkType->{$word};
563     if (defined $def) {
564     if ($def->{status} eq 'accepted') {
565     if (defined $def->{effect}->[$a_or_area]) {
566     #
567     } else {
568     $self->{onerror}->(node => $attr,
569     type => 'link type:bad context:'.$word);
570     }
571     } elsif ($def->{status} eq 'proposal') {
572     $self->{onerror}->(node => $attr, level => 's',
573     type => 'link type:proposed:'.$word);
574     } else { # rejected or synonym
575     $self->{onerror}->(node => $attr,
576     type => 'link type:non-conforming:'.$word);
577     }
578 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
579     if ($word eq 'alternate') {
580     #
581     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
582     $todo->{has_hyperlink_link_type} = 1;
583     }
584     }
585 wakaba 1.1 if ($def->{unique}) {
586     unless ($self->{has_link_type}->{$word}) {
587     $self->{has_link_type}->{$word} = 1;
588     } else {
589     $self->{onerror}->(node => $attr,
590     type => 'link type:duplicate:'.$word);
591     }
592     }
593     } else {
594     $self->{onerror}->(node => $attr, level => 'unsupported',
595     type => 'link type:'.$word);
596     }
597     }
598 wakaba 1.4 $todo->{has_hyperlink_link_type} = 1
599     if $word{alternate} and not $word{stylesheet};
600 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
601     ## says that using both X-Pingback: header field and HTML
602     ## <link rel=pingback> is deprecated and if both appears they
603     ## SHOULD contain exactly the same value.
604     ## ISSUE: Pingback 1.0 specification defines the exact representation
605     ## of its link element, which cannot be tested by the current arch.
606     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
607     ## include any string that matches to the pattern for the rel=pingback link,
608     ## which again inpossible to test.
609     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
610 wakaba 1.12
611     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
612     ## ISSUE: <link rel="up up"> is non-conforming, since rel="" is unordered.
613 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
614    
615     ## URI (or IRI)
616     my $HTMLURIAttrChecker = sub {
617     my ($self, $attr) = @_;
618     ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
619     my $value = $attr->value;
620     Whatpm::URIChecker->check_iri_reference ($value, sub {
621     my %opt = @_;
622     $self->{onerror}->(node => $attr, level => $opt{level},
623     type => 'URI::'.$opt{type}.
624     (defined $opt{position} ? ':'.$opt{position} : ''));
625     });
626 wakaba 1.4 $self->{has_uri_attr} = 1;
627 wakaba 1.1 }; # $HTMLURIAttrChecker
628    
629     ## A space separated list of one or more URIs (or IRIs)
630     my $HTMLSpaceURIsAttrChecker = sub {
631     my ($self, $attr) = @_;
632     my $i = 0;
633     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
634     Whatpm::URIChecker->check_iri_reference ($value, sub {
635     my %opt = @_;
636     $self->{onerror}->(node => $attr, level => $opt{level},
637 wakaba 1.2 type => 'URIs:'.':'.
638     $opt{type}.':'.$i.
639 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
640     });
641     $i++;
642     }
643     ## ISSUE: Relative references?
644     ## ISSUE: Leading or trailing white spaces are conformant?
645     ## ISSUE: A sequence of white space characters are conformant?
646     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
647     ## NOTE: Duplication seems not an error.
648 wakaba 1.4 $self->{has_uri_attr} = 1;
649 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
650    
651     my $HTMLDatetimeAttrChecker = sub {
652     my ($self, $attr) = @_;
653     my $value = $attr->value;
654     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
655     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
656     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
657     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
658     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
659     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
660     if $d < 1 or
661     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
662     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
663     if $M == 2 and $d == 29 and
664     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
665     } else {
666     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
667     }
668     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
669     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
670     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
671     if defined $s and $s > 59;
672     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
673     if $zh > 23;
674     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
675     if $zm > 59;
676     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
677     } else {
678     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
679     }
680     }; # $HTMLDatetimeAttrChecker
681    
682     my $HTMLIntegerAttrChecker = sub {
683     my ($self, $attr) = @_;
684     my $value = $attr->value;
685     unless ($value =~ /\A-?[0-9]+\z/) {
686     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
687     }
688     }; # $HTMLIntegerAttrChecker
689    
690     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
691     my $range_check = shift;
692     return sub {
693     my ($self, $attr) = @_;
694     my $value = $attr->value;
695     if ($value =~ /\A[0-9]+\z/) {
696     unless ($range_check->($value + 0)) {
697     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
698     }
699     } else {
700     $self->{onerror}->(node => $attr,
701     type => 'nninteger:syntax error');
702     }
703     };
704     }; # $GetHTMLNonNegativeIntegerAttrChecker
705    
706     my $GetHTMLFloatingPointNumberAttrChecker = sub {
707     my $range_check = shift;
708     return sub {
709     my ($self, $attr) = @_;
710     my $value = $attr->value;
711     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
712     unless ($range_check->($value + 0)) {
713     $self->{onerror}->(node => $attr, type => 'float:out of range');
714     }
715     } else {
716     $self->{onerror}->(node => $attr,
717     type => 'float:syntax error');
718     }
719     };
720     }; # $GetHTMLFloatingPointNumberAttrChecker
721    
722     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
723     ## ISSUE: RFC 2046 does not define syntax of media types.
724     ## ISSUE: The definition of "a valid MIME type" is unknown.
725     ## Syntactical correctness?
726     my $HTMLIMTAttrChecker = sub {
727     my ($self, $attr) = @_;
728     my $value = $attr->value;
729     ## ISSUE: RFC 2045 Content-Type header field allows insertion
730     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
731     ## ISSUE: RFC 2231 extension? Maybe no.
732     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
733     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
734     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
735     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
736     my @type = ($1, $2);
737     my $param = $3;
738     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
739     if (defined $2) {
740     push @type, $1 => $2;
741     } else {
742     my $n = $1;
743     my $v = $2;
744     $v =~ s/\\(.)/$1/gs;
745     push @type, $n => $v;
746     }
747     }
748     require Whatpm::IMTChecker;
749     Whatpm::IMTChecker->check_imt (sub {
750     my %opt = @_;
751     $self->{onerror}->(node => $attr, level => $opt{level},
752     type => 'IMT:'.$opt{type});
753     }, @type);
754     } else {
755     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
756     }
757     }; # $HTMLIMTAttrChecker
758    
759     my $HTMLLanguageTagAttrChecker = sub {
760 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
761    
762 wakaba 1.1 my ($self, $attr) = @_;
763 wakaba 1.6 my $value = $attr->value;
764     require Whatpm::LangTag;
765     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
766     my %opt = @_;
767     my $type = 'LangTag:'.$opt{type};
768     $type .= ':' . $opt{subtag} if defined $opt{subtag};
769     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
770     level => $opt{level});
771     });
772 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
773 wakaba 1.6
774     ## TODO: testdata
775 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
776    
777     ## "A valid media query [MQ]"
778     my $HTMLMQAttrChecker = sub {
779     my ($self, $attr) = @_;
780     $self->{onerror}->(node => $attr, level => 'unsupported',
781     type => 'media query');
782     ## ISSUE: What is "a valid media query"?
783     }; # $HTMLMQAttrChecker
784    
785     my $HTMLEventHandlerAttrChecker = sub {
786     my ($self, $attr) = @_;
787     $self->{onerror}->(node => $attr, level => 'unsupported',
788     type => 'event handler');
789     ## TODO: MUST contain valid ECMAScript code matching the
790     ## ECMAScript |FunctionBody| production. [ECMA262]
791     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
792     ## ISSUE: Automatic semicolon insertion does not apply?
793     ## ISSUE: Other script languages?
794     }; # $HTMLEventHandlerAttrChecker
795    
796     my $HTMLUsemapAttrChecker = sub {
797     my ($self, $attr) = @_;
798     ## MUST be a valid hashed ID reference to a |map| element
799     my $value = $attr->value;
800     if ($value =~ s/^#//) {
801     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
802     push @{$self->{usemap}}, [$value => $attr];
803     } else {
804     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
805     }
806     ## NOTE: Space characters in hashed ID references are conforming.
807     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
808     }; # $HTMLUsemapAttrChecker
809    
810     my $HTMLTargetAttrChecker = sub {
811     my ($self, $attr) = @_;
812     my $value = $attr->value;
813     if ($value =~ /^_/) {
814     $value = lc $value; ## ISSUE: ASCII case-insentitive?
815     unless ({
816     _self => 1, _parent => 1, _top => 1,
817     }->{$value}) {
818     $self->{onerror}->(node => $attr,
819     type => 'reserved browsing context name');
820     }
821     } else {
822     #$ ISSUE: An empty string is conforming?
823     }
824     }; # $HTMLTargetAttrChecker
825    
826     my $HTMLAttrChecker = {
827     id => sub {
828     ## NOTE: |map| has its own variant of |id=""| checker
829     my ($self, $attr) = @_;
830     my $value = $attr->value;
831     if (length $value > 0) {
832     if ($self->{id}->{$value}) {
833     $self->{onerror}->(node => $attr, type => 'duplicate ID');
834     push @{$self->{id}->{$value}}, $attr;
835     } else {
836     $self->{id}->{$value} = [$attr];
837     }
838     if ($value =~ /[\x09-\x0D\x20]/) {
839     $self->{onerror}->(node => $attr, type => 'space in ID');
840     }
841     } else {
842     ## NOTE: MUST contain at least one character
843     $self->{onerror}->(node => $attr, type => 'empty attribute value');
844     }
845     },
846     title => sub {}, ## NOTE: No conformance creteria
847     lang => sub {
848     my ($self, $attr) = @_;
849 wakaba 1.6 my $value = $attr->value;
850     if ($value eq '') {
851     #
852     } else {
853     require Whatpm::LangTag;
854     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
855     my %opt = @_;
856     my $type = 'LangTag:'.$opt{type};
857     $type .= ':' . $opt{subtag} if defined $opt{subtag};
858     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
859     level => $opt{level});
860     });
861     }
862 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
863     unless ($attr->owner_document->manakai_is_html) {
864     $self->{onerror}->(node => $attr, type => 'in XML:lang');
865     }
866 wakaba 1.6
867     ## TODO: test data
868 wakaba 1.1 },
869     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
870     class => sub {
871     my ($self, $attr) = @_;
872     my %word;
873     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
874     unless ($word{$word}) {
875     $word{$word} = 1;
876     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
877     } else {
878     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
879     }
880     }
881     },
882     contextmenu => sub {
883     my ($self, $attr) = @_;
884     my $value = $attr->value;
885     push @{$self->{contextmenu}}, [$value => $attr];
886     ## ISSUE: "The value must be the ID of a menu element in the DOM."
887     ## What is "in the DOM"? A menu Element node that is not part
888     ## of the Document tree is in the DOM? A menu Element node that
889     ## belong to another Document tree is in the DOM?
890     },
891     irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
892 wakaba 1.8 tabindex => $HTMLIntegerAttrChecker
893     ## TODO: ref, template, registrationmark
894 wakaba 1.1 };
895    
896     for (qw/
897     onabort onbeforeunload onblur onchange onclick oncontextmenu
898     ondblclick ondrag ondragend ondragenter ondragleave ondragover
899     ondragstart ondrop onerror onfocus onkeydown onkeypress
900     onkeyup onload onmessage onmousedown onmousemove onmouseout
901     onmouseover onmouseup onmousewheel onresize onscroll onselect
902     onsubmit onunload
903     /) {
904     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
905     }
906    
907     my $GetHTMLAttrsChecker = sub {
908     my $element_specific_checker = shift;
909     return sub {
910     my ($self, $todo) = @_;
911     for my $attr (@{$todo->{node}->attributes}) {
912     my $attr_ns = $attr->namespace_uri;
913     $attr_ns = '' unless defined $attr_ns;
914     my $attr_ln = $attr->manakai_local_name;
915     my $checker;
916     if ($attr_ns eq '') {
917     $checker = $element_specific_checker->{$attr_ln}
918     || $HTMLAttrChecker->{$attr_ln};
919     }
920     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
921     || $AttrChecker->{$attr_ns}->{''};
922     if ($checker) {
923     $checker->($self, $attr, $todo);
924     } else {
925     $self->{onerror}->(node => $attr, level => 'unsupported',
926     type => 'attribute');
927     ## ISSUE: No comformance createria for unknown attributes in the spec
928     }
929     }
930     };
931     }; # $GetHTMLAttrsChecker
932    
933     our $Element;
934     our $ElementDefault;
935     our $AnyChecker;
936    
937     $Element->{$HTML_NS}->{''} = {
938     attrs_checker => $GetHTMLAttrsChecker->({}),
939     checker => $ElementDefault->{checker},
940     };
941    
942     $Element->{$HTML_NS}->{html} = {
943     is_root => 1,
944     attrs_checker => $GetHTMLAttrsChecker->({
945     xmlns => sub {
946     my ($self, $attr) = @_;
947     my $value = $attr->value;
948     unless ($value eq $HTML_NS) {
949     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
950     }
951     unless ($attr->owner_document->manakai_is_html) {
952     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
953     ## TODO: Test
954     }
955     },
956     }),
957     checker => sub {
958     my ($self, $todo) = @_;
959     my $el = $todo->{node};
960     my $new_todos = [];
961     my @nodes = (@{$el->child_nodes});
962    
963     my $phase = 'before head';
964     while (@nodes) {
965     my $node = shift @nodes;
966     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
967    
968     my $nt = $node->node_type;
969     if ($nt == 1) {
970     my $node_ns = $node->namespace_uri;
971     $node_ns = '' unless defined $node_ns;
972     my $node_ln = $node->manakai_local_name;
973     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
974 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
975     #
976     } elsif ($phase eq 'before head') {
977 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'head') {
978     $phase = 'after head';
979     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'body') {
980     $self->{onerror}->(node => $node, type => 'ps element missing:head');
981     $phase = 'after body';
982     } else {
983     $not_allowed = 1;
984     # before head
985     }
986     } elsif ($phase eq 'after head') {
987     if ($node_ns eq $HTML_NS and $node_ln eq 'body') {
988     $phase = 'after body';
989     } else {
990     $not_allowed = 1;
991     # after head
992     }
993     } else { #elsif ($phase eq 'after body') {
994     $not_allowed = 1;
995     # after body
996     }
997     $self->{onerror}->(node => $node, type => 'element not allowed')
998     if $not_allowed;
999     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1000     unshift @nodes, @$sib;
1001     push @$new_todos, @$ch;
1002     } elsif ($nt == 3 or $nt == 4) {
1003     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1004     $self->{onerror}->(node => $node, type => 'character not allowed');
1005     }
1006     } elsif ($nt == 5) {
1007     unshift @nodes, @{$node->child_nodes};
1008     }
1009     }
1010    
1011     if ($phase eq 'before head') {
1012     $self->{onerror}->(node => $el, type => 'child element missing:head');
1013     $self->{onerror}->(node => $el, type => 'child element missing:body');
1014     } elsif ($phase eq 'after head') {
1015     $self->{onerror}->(node => $el, type => 'child element missing:body');
1016     }
1017    
1018     return ($new_todos);
1019     },
1020     };
1021    
1022     $Element->{$HTML_NS}->{head} = {
1023     attrs_checker => $GetHTMLAttrsChecker->({}),
1024     checker => sub {
1025     my ($self, $todo) = @_;
1026     my $el = $todo->{node};
1027     my $new_todos = [];
1028     my @nodes = (@{$el->child_nodes});
1029    
1030     my $has_title;
1031     my $phase = 'initial'; # 'after charset', 'after base'
1032     while (@nodes) {
1033     my $node = shift @nodes;
1034     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1035    
1036     my $nt = $node->node_type;
1037     if ($nt == 1) {
1038     my $node_ns = $node->namespace_uri;
1039     $node_ns = '' unless defined $node_ns;
1040     my $node_ln = $node->manakai_local_name;
1041     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
1042 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1043     #
1044     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'title') {
1045 wakaba 1.1 $phase = 'after base';
1046     unless ($has_title) {
1047     $has_title = 1;
1048     } else {
1049     $not_allowed = 1;
1050     }
1051     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'meta') {
1052     if ($node->has_attribute_ns (undef, 'charset')) {
1053     if ($phase eq 'initial') {
1054     $phase = 'after charset';
1055     } else {
1056     $not_allowed = 1;
1057     ## NOTE: See also |base|'s "contexts" field in the spec
1058     }
1059 wakaba 1.5 } elsif ($node->has_attribute_ns (undef, 'name') or
1060     $node->has_attribute_ns (undef, 'http-equiv')) {
1061     $phase = 'after base';
1062 wakaba 1.1 } else {
1063     $phase = 'after base';
1064 wakaba 1.5 $not_allowed = 1;
1065 wakaba 1.1 }
1066     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'base') {
1067     if ($phase eq 'initial' or $phase eq 'after charset') {
1068     $phase = 'after base';
1069     } else {
1070     $not_allowed = 1;
1071     }
1072     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'style') {
1073     $phase = 'after base';
1074     if ($node->has_attribute_ns (undef, 'scoped')) {
1075     $not_allowed = 1;
1076     }
1077     } elsif ($HTMLMetadataElements->{$node_ns}->{$node_ln}) {
1078     $phase = 'after base';
1079     } else {
1080     $not_allowed = 1;
1081     }
1082     $self->{onerror}->(node => $node, type => 'element not allowed')
1083     if $not_allowed;
1084 wakaba 1.3 local $todo->{flag}->{in_head} = 1;
1085 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1086     unshift @nodes, @$sib;
1087     push @$new_todos, @$ch;
1088     } elsif ($nt == 3 or $nt == 4) {
1089     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1090     $self->{onerror}->(node => $node, type => 'character not allowed');
1091     }
1092     } elsif ($nt == 5) {
1093     unshift @nodes, @{$node->child_nodes};
1094     }
1095     }
1096     unless ($has_title) {
1097     $self->{onerror}->(node => $el, type => 'child element missing:title');
1098     }
1099     return ($new_todos);
1100     },
1101     };
1102    
1103     $Element->{$HTML_NS}->{title} = {
1104     attrs_checker => $GetHTMLAttrsChecker->({}),
1105     checker => $HTMLTextChecker,
1106     };
1107    
1108     $Element->{$HTML_NS}->{base} = {
1109 wakaba 1.4 attrs_checker => sub {
1110     my ($self, $todo) = @_;
1111    
1112     if ($self->{has_uri_attr} and
1113     $todo->{node}->has_attribute_ns (undef, 'href')) {
1114     ## ISSUE: Are these examples conforming?
1115     ## <head profile="a b c"><base href> (except for |profile|'s
1116     ## non-conformance)
1117     ## <title xml:base="relative"/><base href/> (maybe it should be)
1118     ## <unknown xmlns="relative"/><base href/> (assuming that
1119     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1120     ## <?xml-stylesheet href="relative"?>...<base href=""/>
1121     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1122     ## <style>@import 'relative';</style><base href>
1123     ## <script>location.href = 'relative';</script><base href>
1124     $self->{onerror}->(node => $todo->{node},
1125     type => 'basehref after URI attribute');
1126     }
1127     if ($self->{has_hyperlink_element} and
1128     $todo->{node}->has_attribute_ns (undef, 'target')) {
1129     ## ISSUE: Are these examples conforming?
1130     ## <head><title xlink:href=""/><base target="name"/></head>
1131     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1132     ## (assuming that |xbl:xbl| is allowed before |base|)
1133     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1134     ## <link href=""/><base target="name"/>
1135     ## <link rel=unknown href=""><base target=name>
1136     $self->{onerror}->(node => $todo->{node},
1137     type => 'basetarget after hyperlink');
1138     }
1139    
1140     return $GetHTMLAttrsChecker->({
1141     href => $HTMLURIAttrChecker,
1142     target => $HTMLTargetAttrChecker,
1143     })->($self, $todo);
1144 wakaba 1.12
1145     ## TOOD: <base/> is non-conforming (revision 1115)
1146 wakaba 1.4 },
1147 wakaba 1.1 checker => $HTMLEmptyChecker,
1148     };
1149    
1150     $Element->{$HTML_NS}->{link} = {
1151     attrs_checker => sub {
1152     my ($self, $todo) = @_;
1153     $GetHTMLAttrsChecker->({
1154     href => $HTMLURIAttrChecker,
1155 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(0, $todo, @_) },
1156 wakaba 1.1 media => $HTMLMQAttrChecker,
1157     hreflang => $HTMLLanguageTagAttrChecker,
1158     type => $HTMLIMTAttrChecker,
1159     ## NOTE: Though |title| has special semantics,
1160     ## syntactically same as the |title| as global attribute.
1161     })->($self, $todo);
1162 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
1163     $self->{has_hyperlink_element} = 1 if $todo->{has_hyperlink_link_type};
1164     } else {
1165 wakaba 1.1 $self->{onerror}->(node => $todo->{node},
1166     type => 'attribute missing:href');
1167     }
1168     unless ($todo->{node}->has_attribute_ns (undef, 'rel')) {
1169     $self->{onerror}->(node => $todo->{node},
1170     type => 'attribute missing:rel');
1171     }
1172     },
1173     checker => $HTMLEmptyChecker,
1174     };
1175    
1176     $Element->{$HTML_NS}->{meta} = {
1177     attrs_checker => sub {
1178     my ($self, $todo) = @_;
1179     my $name_attr;
1180     my $http_equiv_attr;
1181     my $charset_attr;
1182     my $content_attr;
1183     for my $attr (@{$todo->{node}->attributes}) {
1184     my $attr_ns = $attr->namespace_uri;
1185     $attr_ns = '' unless defined $attr_ns;
1186     my $attr_ln = $attr->manakai_local_name;
1187     my $checker;
1188     if ($attr_ns eq '') {
1189     if ($attr_ln eq 'content') {
1190     $content_attr = $attr;
1191     $checker = 1;
1192     } elsif ($attr_ln eq 'name') {
1193     $name_attr = $attr;
1194     $checker = 1;
1195     } elsif ($attr_ln eq 'http-equiv') {
1196     $http_equiv_attr = $attr;
1197     $checker = 1;
1198     } elsif ($attr_ln eq 'charset') {
1199     $charset_attr = $attr;
1200     $checker = 1;
1201     } else {
1202     $checker = $HTMLAttrChecker->{$attr_ln}
1203     || $AttrChecker->{$attr_ns}->{$attr_ln}
1204     || $AttrChecker->{$attr_ns}->{''};
1205     }
1206     } else {
1207     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1208     || $AttrChecker->{$attr_ns}->{''};
1209     }
1210     if ($checker) {
1211     $checker->($self, $attr) if ref $checker;
1212     } else {
1213     $self->{onerror}->(node => $attr, level => 'unsupported',
1214     type => 'attribute');
1215     ## ISSUE: No comformance createria for unknown attributes in the spec
1216     }
1217     }
1218    
1219     if (defined $name_attr) {
1220     if (defined $http_equiv_attr) {
1221     $self->{onerror}->(node => $http_equiv_attr,
1222     type => 'attribute not allowed');
1223     } elsif (defined $charset_attr) {
1224     $self->{onerror}->(node => $charset_attr,
1225     type => 'attribute not allowed');
1226     }
1227     my $metadata_name = $name_attr->value;
1228     my $metadata_value;
1229     if (defined $content_attr) {
1230     $metadata_value = $content_attr->value;
1231     } else {
1232     $self->{onerror}->(node => $todo->{node},
1233     type => 'attribute missing:content');
1234     $metadata_value = '';
1235     }
1236     } elsif (defined $http_equiv_attr) {
1237     if (defined $charset_attr) {
1238     $self->{onerror}->(node => $charset_attr,
1239     type => 'attribute not allowed');
1240     }
1241     unless (defined $content_attr) {
1242     $self->{onerror}->(node => $todo->{node},
1243     type => 'attribute missing:content');
1244     }
1245     } elsif (defined $charset_attr) {
1246     if (defined $content_attr) {
1247     $self->{onerror}->(node => $content_attr,
1248     type => 'attribute not allowed');
1249     }
1250     } else {
1251     if (defined $content_attr) {
1252     $self->{onerror}->(node => $content_attr,
1253     type => 'attribute not allowed');
1254     $self->{onerror}->(node => $todo->{node},
1255     type => 'attribute missing:name|http-equiv');
1256     } else {
1257     $self->{onerror}->(node => $todo->{node},
1258     type => 'attribute missing:name|http-equiv|charset');
1259     }
1260     }
1261    
1262     ## TODO: metadata conformance
1263    
1264     ## TODO: pragma conformance
1265     if (defined $http_equiv_attr) { ## An enumerated attribute
1266     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1267     if ({
1268     'refresh' => 1,
1269     'default-style' => 1,
1270     }->{$keyword}) {
1271     #
1272     } else {
1273     $self->{onerror}->(node => $http_equiv_attr,
1274     type => 'enumerated:invalid');
1275     }
1276     }
1277    
1278     if (defined $charset_attr) {
1279     unless ($todo->{node}->owner_document->manakai_is_html) {
1280     $self->{onerror}->(node => $charset_attr,
1281     type => 'in XML:charset');
1282     }
1283     ## TODO: charset
1284     }
1285     },
1286     checker => $HTMLEmptyChecker,
1287     };
1288    
1289     $Element->{$HTML_NS}->{style} = {
1290     attrs_checker => $GetHTMLAttrsChecker->({
1291     type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1292     media => $HTMLMQAttrChecker,
1293     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1294     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1295     ## not different
1296     }),
1297     checker => sub {
1298     ## NOTE: |html:style| has no conformance creteria on content model
1299     my ($self, $todo) = @_;
1300     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
1301     $type = 'text/css' unless defined $type;
1302     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
1303     type => 'style:'.$type); ## TODO: $type normalization
1304     return $AnyChecker->($self, $todo);
1305     },
1306     };
1307    
1308     $Element->{$HTML_NS}->{body} = {
1309     attrs_checker => $GetHTMLAttrsChecker->({}),
1310     checker => $HTMLBlockChecker,
1311     };
1312    
1313     $Element->{$HTML_NS}->{section} = {
1314     attrs_checker => $GetHTMLAttrsChecker->({}),
1315     checker => $HTMLStylableBlockChecker,
1316     };
1317    
1318     $Element->{$HTML_NS}->{nav} = {
1319     attrs_checker => $GetHTMLAttrsChecker->({}),
1320     checker => $HTMLBlockOrInlineChecker,
1321     };
1322    
1323     $Element->{$HTML_NS}->{article} = {
1324     attrs_checker => $GetHTMLAttrsChecker->({}),
1325     checker => $HTMLStylableBlockChecker,
1326     };
1327    
1328     $Element->{$HTML_NS}->{blockquote} = {
1329     attrs_checker => $GetHTMLAttrsChecker->({
1330     cite => $HTMLURIAttrChecker,
1331     }),
1332     checker => $HTMLBlockChecker,
1333     };
1334    
1335     $Element->{$HTML_NS}->{aside} = {
1336     attrs_checker => $GetHTMLAttrsChecker->({}),
1337     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
1338     };
1339    
1340     $Element->{$HTML_NS}->{h1} = {
1341     attrs_checker => $GetHTMLAttrsChecker->({}),
1342     checker => sub {
1343     my ($self, $todo) = @_;
1344     $todo->{flag}->{has_heading}->[0] = 1;
1345 wakaba 1.13 return $HTMLStrictlyInlineChecker->($self, $todo);
1346 wakaba 1.1 },
1347     };
1348    
1349     $Element->{$HTML_NS}->{h2} = {
1350     attrs_checker => $GetHTMLAttrsChecker->({}),
1351     checker => $Element->{$HTML_NS}->{h1}->{checker},
1352     };
1353    
1354     $Element->{$HTML_NS}->{h3} = {
1355     attrs_checker => $GetHTMLAttrsChecker->({}),
1356     checker => $Element->{$HTML_NS}->{h1}->{checker},
1357     };
1358    
1359     $Element->{$HTML_NS}->{h4} = {
1360     attrs_checker => $GetHTMLAttrsChecker->({}),
1361     checker => $Element->{$HTML_NS}->{h1}->{checker},
1362     };
1363    
1364     $Element->{$HTML_NS}->{h5} = {
1365     attrs_checker => $GetHTMLAttrsChecker->({}),
1366     checker => $Element->{$HTML_NS}->{h1}->{checker},
1367     };
1368    
1369     $Element->{$HTML_NS}->{h6} = {
1370     attrs_checker => $GetHTMLAttrsChecker->({}),
1371     checker => $Element->{$HTML_NS}->{h1}->{checker},
1372     };
1373    
1374     $Element->{$HTML_NS}->{header} = {
1375     attrs_checker => $GetHTMLAttrsChecker->({}),
1376     checker => sub {
1377     my ($self, $todo) = @_;
1378     my $old_flag = $todo->{flag}->{has_heading} || [];
1379     my $new_flag = [];
1380     local $todo->{flag}->{has_heading} = $new_flag;
1381     my $node = $todo->{node};
1382    
1383     my $end = $self->_add_minuses
1384     ({$HTML_NS => {qw/header 1 footer 1/}},
1385     $HTMLSectioningElements);
1386     my ($new_todos, $ch) = $HTMLBlockChecker->($self, $todo);
1387     push @$new_todos, $end,
1388     {type => 'code', code => sub {
1389     if ($new_flag->[0]) {
1390     $old_flag->[0] = 1;
1391     } else {
1392     $self->{onerror}->(node => $node, type => 'element missing:hn');
1393     }
1394     }};
1395     return ($new_todos, $ch);
1396     },
1397     };
1398    
1399     $Element->{$HTML_NS}->{footer} = {
1400     attrs_checker => $GetHTMLAttrsChecker->({}),
1401     checker => sub { ## block -hn -header -footer -sectioning or inline
1402     my ($self, $todo) = @_;
1403     my $el = $todo->{node};
1404     my $new_todos = [];
1405     my @nodes = (@{$el->child_nodes});
1406    
1407     my $content = 'block-or-inline'; # or 'block' or 'inline'
1408     my @block_not_inline;
1409     while (@nodes) {
1410     my $node = shift @nodes;
1411     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1412    
1413     my $nt = $node->node_type;
1414     if ($nt == 1) {
1415     my $node_ns = $node->namespace_uri;
1416     $node_ns = '' unless defined $node_ns;
1417     my $node_ln = $node->manakai_local_name;
1418     my $not_allowed;
1419     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
1420     $not_allowed = 1;
1421     } elsif ($node_ns eq $HTML_NS and
1422     {
1423     qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1/
1424     }->{$node_ln}) {
1425     $not_allowed = 1;
1426     } elsif ($HTMLSectioningElements->{$node_ns}->{$node_ln}) {
1427     $not_allowed = 1;
1428     }
1429     if ($content eq 'block') {
1430     $not_allowed = 1
1431 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
1432     $self->{pluses}->{$node_ns}->{$node_ln};
1433 wakaba 1.1 } elsif ($content eq 'inline') {
1434     $not_allowed = 1
1435     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
1436 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
1437     $self->{pluses}->{$node_ns}->{$node_ln};
1438 wakaba 1.1 } else {
1439     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1440     my $is_inline
1441     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
1442     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1443    
1444     push @block_not_inline, $node
1445     if $is_block and not $is_inline and not $not_allowed;
1446 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
1447 wakaba 1.1 $content = 'inline';
1448     for (@block_not_inline) {
1449     $self->{onerror}->(node => $_, type => 'element not allowed');
1450     }
1451     $not_allowed = 1 unless $is_inline;
1452     }
1453     }
1454     $self->{onerror}->(node => $node, type => 'element not allowed')
1455     if $not_allowed;
1456     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1457     unshift @nodes, @$sib;
1458     push @$new_todos, @$ch;
1459     } elsif ($nt == 3 or $nt == 4) {
1460     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1461     if ($content eq 'block') {
1462     $self->{onerror}->(node => $node, type => 'character not allowed');
1463     } else {
1464     $content = 'inline';
1465     for (@block_not_inline) {
1466     $self->{onerror}->(node => $_, type => 'element not allowed');
1467     }
1468     }
1469     }
1470     } elsif ($nt == 5) {
1471     unshift @nodes, @{$node->child_nodes};
1472     }
1473     }
1474    
1475     my $end = $self->_add_minuses
1476     ({$HTML_NS => {qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1/}},
1477     $HTMLSectioningElements);
1478     push @$new_todos, $end;
1479    
1480     if ($content eq 'inline') {
1481     for (@$new_todos) {
1482     $_->{inline} = 1;
1483     }
1484     }
1485    
1486     return ($new_todos);
1487     },
1488     };
1489    
1490     $Element->{$HTML_NS}->{address} = {
1491     attrs_checker => $GetHTMLAttrsChecker->({}),
1492     checker => $HTMLInlineChecker,
1493     };
1494    
1495     $Element->{$HTML_NS}->{p} = {
1496     attrs_checker => $GetHTMLAttrsChecker->({}),
1497 wakaba 1.13 checker => $HTMLInlineChecker,
1498 wakaba 1.1 };
1499    
1500     $Element->{$HTML_NS}->{hr} = {
1501     attrs_checker => $GetHTMLAttrsChecker->({}),
1502     checker => $HTMLEmptyChecker,
1503     };
1504    
1505     $Element->{$HTML_NS}->{br} = {
1506     attrs_checker => $GetHTMLAttrsChecker->({}),
1507     checker => $HTMLEmptyChecker,
1508     };
1509    
1510     $Element->{$HTML_NS}->{dialog} = {
1511     attrs_checker => $GetHTMLAttrsChecker->({}),
1512     checker => sub {
1513     my ($self, $todo) = @_;
1514     my $el = $todo->{node};
1515     my $new_todos = [];
1516     my @nodes = (@{$el->child_nodes});
1517    
1518     my $phase = 'before dt';
1519     while (@nodes) {
1520     my $node = shift @nodes;
1521     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1522    
1523     my $nt = $node->node_type;
1524     if ($nt == 1) {
1525     my $node_ns = $node->namespace_uri;
1526     $node_ns = '' unless defined $node_ns;
1527     my $node_ln = $node->manakai_local_name;
1528     ## NOTE: |minuses| list is not checked since redundant
1529 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1530     #
1531     } elsif ($phase eq 'before dt') {
1532 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1533     $phase = 'before dd';
1534     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1535     $self->{onerror}
1536     ->(node => $node, type => 'ps element missing:dt');
1537     $phase = 'before dt';
1538     } else {
1539     $self->{onerror}->(node => $node, type => 'element not allowed');
1540     }
1541     } else { # before dd
1542     if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1543     $phase = 'before dt';
1544     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1545     $self->{onerror}
1546     ->(node => $node, type => 'ps element missing:dd');
1547     $phase = 'before dd';
1548     } else {
1549     $self->{onerror}->(node => $node, type => 'element not allowed');
1550     }
1551     }
1552     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1553     unshift @nodes, @$sib;
1554     push @$new_todos, @$ch;
1555     } elsif ($nt == 3 or $nt == 4) {
1556     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1557     $self->{onerror}->(node => $node, type => 'character not allowed');
1558     }
1559     } elsif ($nt == 5) {
1560     unshift @nodes, @{$node->child_nodes};
1561     }
1562     }
1563     if ($phase eq 'before dd') {
1564 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
1565 wakaba 1.1 }
1566     return ($new_todos);
1567     },
1568     };
1569    
1570     $Element->{$HTML_NS}->{pre} = {
1571     attrs_checker => $GetHTMLAttrsChecker->({}),
1572     checker => $HTMLStrictlyInlineChecker,
1573     };
1574    
1575     $Element->{$HTML_NS}->{ol} = {
1576     attrs_checker => $GetHTMLAttrsChecker->({
1577     start => $HTMLIntegerAttrChecker,
1578     }),
1579     checker => sub {
1580     my ($self, $todo) = @_;
1581     my $el = $todo->{node};
1582     my $new_todos = [];
1583     my @nodes = (@{$el->child_nodes});
1584    
1585     while (@nodes) {
1586     my $node = shift @nodes;
1587     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1588    
1589     my $nt = $node->node_type;
1590     if ($nt == 1) {
1591     my $node_ns = $node->namespace_uri;
1592     $node_ns = '' unless defined $node_ns;
1593     my $node_ln = $node->manakai_local_name;
1594     ## NOTE: |minuses| list is not checked since redundant
1595 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1596     #
1597     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'li')) {
1598 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
1599     }
1600     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1601     unshift @nodes, @$sib;
1602     push @$new_todos, @$ch;
1603     } elsif ($nt == 3 or $nt == 4) {
1604     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1605     $self->{onerror}->(node => $node, type => 'character not allowed');
1606     }
1607     } elsif ($nt == 5) {
1608     unshift @nodes, @{$node->child_nodes};
1609     }
1610     }
1611    
1612     if ($todo->{inline}) {
1613     for (@$new_todos) {
1614     $_->{inline} = 1;
1615     }
1616     }
1617     return ($new_todos);
1618     },
1619     };
1620    
1621     $Element->{$HTML_NS}->{ul} = {
1622     attrs_checker => $GetHTMLAttrsChecker->({}),
1623     checker => $Element->{$HTML_NS}->{ol}->{checker},
1624     };
1625    
1626     $Element->{$HTML_NS}->{li} = {
1627     attrs_checker => $GetHTMLAttrsChecker->({
1628     start => sub {
1629     my ($self, $attr) = @_;
1630     my $parent = $attr->owner_element->manakai_parent_element;
1631     if (defined $parent) {
1632     my $parent_ns = $parent->namespace_uri;
1633     $parent_ns = '' unless defined $parent_ns;
1634     my $parent_ln = $parent->manakai_local_name;
1635     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1636     $self->{onerror}->(node => $attr, level => 'unsupported',
1637     type => 'attribute');
1638     }
1639     }
1640     $HTMLIntegerAttrChecker->($self, $attr);
1641     },
1642     }),
1643     checker => sub {
1644     my ($self, $todo) = @_;
1645     if ($todo->{inline}) {
1646     return $HTMLInlineChecker->($self, $todo);
1647     } else {
1648     return $HTMLBlockOrInlineChecker->($self, $todo);
1649     }
1650     },
1651     };
1652    
1653     $Element->{$HTML_NS}->{dl} = {
1654     attrs_checker => $GetHTMLAttrsChecker->({}),
1655     checker => sub {
1656     my ($self, $todo) = @_;
1657     my $el = $todo->{node};
1658     my $new_todos = [];
1659     my @nodes = (@{$el->child_nodes});
1660    
1661     my $phase = 'before dt';
1662     while (@nodes) {
1663     my $node = shift @nodes;
1664     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1665    
1666     my $nt = $node->node_type;
1667     if ($nt == 1) {
1668     my $node_ns = $node->namespace_uri;
1669     $node_ns = '' unless defined $node_ns;
1670     my $node_ln = $node->manakai_local_name;
1671     ## NOTE: |minuses| list is not checked since redundant
1672 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1673     #
1674     } elsif ($phase eq 'in dds') {
1675 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1676     #$phase = 'in dds';
1677     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1678     $phase = 'in dts';
1679     } else {
1680     $self->{onerror}->(node => $node, type => 'element not allowed');
1681     }
1682     } elsif ($phase eq 'in dts') {
1683     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1684     #$phase = 'in dts';
1685     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1686     $phase = 'in dds';
1687     } else {
1688     $self->{onerror}->(node => $node, type => 'element not allowed');
1689     }
1690     } else { # before dt
1691     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1692     $phase = 'in dts';
1693     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1694     $self->{onerror}
1695     ->(node => $node, type => 'ps element missing:dt');
1696     $phase = 'in dds';
1697     } else {
1698     $self->{onerror}->(node => $node, type => 'element not allowed');
1699     }
1700     }
1701     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1702     unshift @nodes, @$sib;
1703     push @$new_todos, @$ch;
1704     } elsif ($nt == 3 or $nt == 4) {
1705     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1706     $self->{onerror}->(node => $node, type => 'character not allowed');
1707     }
1708     } elsif ($nt == 5) {
1709     unshift @nodes, @{$node->child_nodes};
1710     }
1711     }
1712     if ($phase eq 'in dts') {
1713 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
1714 wakaba 1.1 }
1715    
1716     if ($todo->{inline}) {
1717     for (@$new_todos) {
1718     $_->{inline} = 1;
1719     }
1720     }
1721     return ($new_todos);
1722     },
1723     };
1724    
1725     $Element->{$HTML_NS}->{dt} = {
1726     attrs_checker => $GetHTMLAttrsChecker->({}),
1727     checker => $HTMLStrictlyInlineChecker,
1728     };
1729    
1730     $Element->{$HTML_NS}->{dd} = {
1731     attrs_checker => $GetHTMLAttrsChecker->({}),
1732     checker => $Element->{$HTML_NS}->{li}->{checker},
1733     };
1734    
1735     $Element->{$HTML_NS}->{a} = {
1736     attrs_checker => sub {
1737     my ($self, $todo) = @_;
1738     my %attr;
1739     for my $attr (@{$todo->{node}->attributes}) {
1740     my $attr_ns = $attr->namespace_uri;
1741     $attr_ns = '' unless defined $attr_ns;
1742     my $attr_ln = $attr->manakai_local_name;
1743     my $checker;
1744     if ($attr_ns eq '') {
1745     $checker = {
1746     target => $HTMLTargetAttrChecker,
1747     href => $HTMLURIAttrChecker,
1748     ping => $HTMLSpaceURIsAttrChecker,
1749 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
1750 wakaba 1.1 media => $HTMLMQAttrChecker,
1751     hreflang => $HTMLLanguageTagAttrChecker,
1752     type => $HTMLIMTAttrChecker,
1753     }->{$attr_ln};
1754     if ($checker) {
1755     $attr{$attr_ln} = $attr;
1756     } else {
1757     $checker = $HTMLAttrChecker->{$attr_ln};
1758     }
1759     }
1760     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1761     || $AttrChecker->{$attr_ns}->{''};
1762     if ($checker) {
1763     $checker->($self, $attr) if ref $checker;
1764     } else {
1765     $self->{onerror}->(node => $attr, level => 'unsupported',
1766     type => 'attribute');
1767     ## ISSUE: No comformance createria for unknown attributes in the spec
1768     }
1769     }
1770    
1771 wakaba 1.4 if (defined $attr{href}) {
1772     $self->{has_hyperlink_element} = 1;
1773     } else {
1774 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
1775     if (defined $attr{$_}) {
1776     $self->{onerror}->(node => $attr{$_},
1777     type => 'attribute not allowed');
1778     }
1779     }
1780     }
1781     },
1782     checker => sub {
1783     my ($self, $todo) = @_;
1784    
1785     my $end = $self->_add_minuses ($HTMLInteractiveElements);
1786     my ($new_todos, $ch)
1787 wakaba 1.13 = $HTMLInlineOrStrictlyInlineChecker->($self, $todo);
1788 wakaba 1.1 push @$new_todos, $end;
1789    
1790     $_->{flag}->{has_a} = 1 for @$new_todos;
1791 wakaba 1.12 ## TODO: <a> -> <a href> (revision 1115)
1792 wakaba 1.1
1793     return ($new_todos, $ch);
1794     },
1795     };
1796    
1797     $Element->{$HTML_NS}->{q} = {
1798     attrs_checker => $GetHTMLAttrsChecker->({
1799     cite => $HTMLURIAttrChecker,
1800     }),
1801     checker => $HTMLInlineOrStrictlyInlineChecker,
1802     };
1803    
1804     $Element->{$HTML_NS}->{cite} = {
1805     attrs_checker => $GetHTMLAttrsChecker->({}),
1806     checker => $HTMLStrictlyInlineChecker,
1807     };
1808    
1809     $Element->{$HTML_NS}->{em} = {
1810     attrs_checker => $GetHTMLAttrsChecker->({}),
1811     checker => $HTMLInlineOrStrictlyInlineChecker,
1812     };
1813    
1814     $Element->{$HTML_NS}->{strong} = {
1815     attrs_checker => $GetHTMLAttrsChecker->({}),
1816     checker => $HTMLInlineOrStrictlyInlineChecker,
1817     };
1818    
1819     $Element->{$HTML_NS}->{small} = {
1820     attrs_checker => $GetHTMLAttrsChecker->({}),
1821     checker => $HTMLInlineOrStrictlyInlineChecker,
1822     };
1823    
1824     $Element->{$HTML_NS}->{m} = {
1825     attrs_checker => $GetHTMLAttrsChecker->({}),
1826     checker => $HTMLInlineOrStrictlyInlineChecker,
1827     };
1828    
1829     $Element->{$HTML_NS}->{dfn} = {
1830     attrs_checker => $GetHTMLAttrsChecker->({}),
1831     checker => sub {
1832     my ($self, $todo) = @_;
1833    
1834     my $end = $self->_add_minuses ({$HTML_NS => {dfn => 1}});
1835     my ($sib, $ch) = $HTMLStrictlyInlineChecker->($self, $todo);
1836     push @$sib, $end;
1837    
1838     my $node = $todo->{node};
1839     my $term = $node->get_attribute_ns (undef, 'title');
1840     unless (defined $term) {
1841     for my $child (@{$node->child_nodes}) {
1842     if ($child->node_type == 1) { # ELEMENT_NODE
1843     if (defined $term) {
1844     undef $term;
1845     last;
1846     } elsif ($child->manakai_local_name eq 'abbr') {
1847     my $nsuri = $child->namespace_uri;
1848     if (defined $nsuri and $nsuri eq $HTML_NS) {
1849     my $attr = $child->get_attribute_node_ns (undef, 'title');
1850     if ($attr) {
1851     $term = $attr->value;
1852     }
1853     }
1854     }
1855     } elsif ($child->node_type == 3 or $child->node_type == 4) {
1856     ## TEXT_NODE or CDATA_SECTION_NODE
1857     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
1858     next;
1859     }
1860     undef $term;
1861     last;
1862     }
1863     }
1864     unless (defined $term) {
1865     $term = $node->text_content;
1866     }
1867     }
1868     if ($self->{term}->{$term}) {
1869     $self->{onerror}->(node => $node, type => 'duplicate term');
1870     push @{$self->{term}->{$term}}, $node;
1871     } else {
1872     $self->{term}->{$term} = [$node];
1873     }
1874     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
1875     ## has |title|.
1876    
1877     return ($sib, $ch);
1878     },
1879     };
1880    
1881     $Element->{$HTML_NS}->{abbr} = {
1882     attrs_checker => $GetHTMLAttrsChecker->({
1883     ## NOTE: |title| has special semantics for |abbr|s, but is syntactically
1884     ## not different. The spec says that the |title| MAY be omitted
1885     ## if there is a |dfn| whose defining term is the abbreviation,
1886     ## but it does not prohibit |abbr| w/o |title| in other cases.
1887     }),
1888     checker => $HTMLStrictlyInlineChecker,
1889     };
1890    
1891     $Element->{$HTML_NS}->{time} = {
1892     attrs_checker => $GetHTMLAttrsChecker->({
1893     datetime => sub { 1 }, # checked in |checker|
1894     }),
1895     ## TODO: Write tests
1896     checker => sub {
1897     my ($self, $todo) = @_;
1898    
1899     my $attr = $todo->{node}->get_attribute_node_ns (undef, 'datetime');
1900     my $input;
1901     my $reg_sp;
1902     my $input_node;
1903     if ($attr) {
1904     $input = $attr->value;
1905     $reg_sp = qr/[\x09-\x0D\x20]*/;
1906     $input_node = $attr;
1907     } else {
1908     $input = $todo->{node}->text_content;
1909     $reg_sp = qr/\p{Zs}*/;
1910     $input_node = $todo->{node};
1911    
1912     ## ISSUE: What is the definition for "successfully extracts a date
1913     ## or time"? If the algorithm says the string is invalid but
1914     ## return some date or time, is it "successfully"?
1915     }
1916    
1917     my $hour;
1918     my $minute;
1919     my $second;
1920     if ($input =~ /
1921     \A
1922     [\x09-\x0D\x20]*
1923     ([0-9]+) # 1
1924     (?>
1925     -([0-9]+) # 2
1926     -([0-9]+) # 3
1927     [\x09-\x0D\x20]*
1928     (?>
1929     T
1930     [\x09-\x0D\x20]*
1931     )?
1932     ([0-9]+) # 4
1933     :([0-9]+) # 5
1934     (?>
1935     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
1936     )?
1937     [\x09-\x0D\x20]*
1938     (?>
1939     Z
1940     [\x09-\x0D\x20]*
1941     |
1942     [+-]([0-9]+):([0-9]+) # 7, 8
1943     [\x09-\x0D\x20]*
1944     )?
1945     \z
1946     |
1947     :([0-9]+) # 9
1948     (?>
1949     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
1950     )?
1951     [\x09-\x0D\x20]*\z
1952     )
1953     /x) {
1954     if (defined $2) { ## YYYY-MM-DD T? hh:mm
1955     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
1956     length $4 != 2 or length $5 != 2) {
1957     $self->{onerror}->(node => $input_node,
1958     type => 'dateortime:syntax error');
1959     }
1960    
1961     if (1 <= $2 and $2 <= 12) {
1962     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1963     if $3 < 1 or
1964     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
1965     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1966     if $2 == 2 and $3 == 29 and
1967     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
1968     } else {
1969     $self->{onerror}->(node => $input_node,
1970     type => 'datetime:bad month');
1971     }
1972    
1973     ($hour, $minute, $second) = ($4, $5, $6);
1974    
1975     if (defined $7) { ## [+-]hh:mm
1976     if (length $7 != 2 or length $8 != 2) {
1977     $self->{onerror}->(node => $input_node,
1978     type => 'dateortime:syntax error');
1979     }
1980    
1981     $self->{onerror}->(node => $input_node,
1982     type => 'datetime:bad timezone hour')
1983     if $7 > 23;
1984     $self->{onerror}->(node => $input_node,
1985     type => 'datetime:bad timezone minute')
1986     if $8 > 59;
1987     }
1988     } else { ## hh:mm
1989     if (length $1 != 2 or length $9 != 2) {
1990     $self->{onerror}->(node => $input_node,
1991     type => qq'dateortime:syntax error');
1992     }
1993    
1994     ($hour, $minute, $second) = ($1, $9, $10);
1995     }
1996    
1997     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
1998     if $hour > 23;
1999     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
2000     if $minute > 59;
2001    
2002     if (defined $second) { ## s
2003     ## NOTE: Integer part of second don't have to have length of two.
2004    
2005     if (substr ($second, 0, 1) eq '.') {
2006     $self->{onerror}->(node => $input_node,
2007     type => 'dateortime:syntax error');
2008     }
2009    
2010     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
2011     if $second >= 60;
2012     }
2013     } else {
2014     $self->{onerror}->(node => $input_node,
2015     type => 'dateortime:syntax error');
2016     }
2017    
2018     return $HTMLStrictlyInlineChecker->($self, $todo);
2019     },
2020     };
2021    
2022     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
2023     attrs_checker => $GetHTMLAttrsChecker->({
2024     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2025     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2026     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2027     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2028     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2029     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2030     }),
2031     checker => $HTMLStrictlyInlineChecker,
2032     };
2033    
2034     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
2035     attrs_checker => $GetHTMLAttrsChecker->({
2036     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
2037     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
2038     }),
2039     checker => $HTMLStrictlyInlineChecker,
2040     };
2041    
2042     $Element->{$HTML_NS}->{code} = {
2043     attrs_checker => $GetHTMLAttrsChecker->({}),
2044     ## NOTE: Though |title| has special semantics,
2045     ## syntatically same as the |title| as global attribute.
2046     checker => $HTMLInlineOrStrictlyInlineChecker,
2047     };
2048    
2049     $Element->{$HTML_NS}->{var} = {
2050     attrs_checker => $GetHTMLAttrsChecker->({}),
2051     ## NOTE: Though |title| has special semantics,
2052     ## syntatically same as the |title| as global attribute.
2053     checker => $HTMLStrictlyInlineChecker,
2054     };
2055    
2056     $Element->{$HTML_NS}->{samp} = {
2057     attrs_checker => $GetHTMLAttrsChecker->({}),
2058     ## NOTE: Though |title| has special semantics,
2059     ## syntatically same as the |title| as global attribute.
2060     checker => $HTMLInlineOrStrictlyInlineChecker,
2061     };
2062    
2063     $Element->{$HTML_NS}->{kbd} = {
2064     attrs_checker => $GetHTMLAttrsChecker->({}),
2065     checker => $HTMLStrictlyInlineChecker,
2066     };
2067    
2068     $Element->{$HTML_NS}->{sub} = {
2069     attrs_checker => $GetHTMLAttrsChecker->({}),
2070     checker => $HTMLStrictlyInlineChecker,
2071     };
2072    
2073     $Element->{$HTML_NS}->{sup} = {
2074     attrs_checker => $GetHTMLAttrsChecker->({}),
2075     checker => $HTMLStrictlyInlineChecker,
2076     };
2077    
2078     $Element->{$HTML_NS}->{span} = {
2079     attrs_checker => $GetHTMLAttrsChecker->({}),
2080     ## NOTE: Though |title| has special semantics,
2081     ## syntatically same as the |title| as global attribute.
2082     checker => $HTMLInlineOrStrictlyInlineChecker,
2083     };
2084    
2085     $Element->{$HTML_NS}->{i} = {
2086     attrs_checker => $GetHTMLAttrsChecker->({}),
2087     ## NOTE: Though |title| has special semantics,
2088     ## syntatically same as the |title| as global attribute.
2089     checker => $HTMLStrictlyInlineChecker,
2090     };
2091    
2092     $Element->{$HTML_NS}->{b} = {
2093     attrs_checker => $GetHTMLAttrsChecker->({}),
2094     checker => $HTMLStrictlyInlineChecker,
2095     };
2096    
2097     $Element->{$HTML_NS}->{bdo} = {
2098     attrs_checker => sub {
2099     my ($self, $todo) = @_;
2100     $GetHTMLAttrsChecker->({})->($self, $todo);
2101     unless ($todo->{node}->has_attribute_ns (undef, 'dir')) {
2102     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:dir');
2103     }
2104     },
2105     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
2106     checker => $HTMLStrictlyInlineChecker,
2107     };
2108    
2109     $Element->{$HTML_NS}->{ins} = {
2110     attrs_checker => $GetHTMLAttrsChecker->({
2111     cite => $HTMLURIAttrChecker,
2112     datetime => $HTMLDatetimeAttrChecker,
2113     }),
2114     checker => $HTMLTransparentChecker,
2115     };
2116    
2117     $Element->{$HTML_NS}->{del} = {
2118     attrs_checker => $GetHTMLAttrsChecker->({
2119     cite => $HTMLURIAttrChecker,
2120     datetime => $HTMLDatetimeAttrChecker,
2121     }),
2122     checker => sub {
2123     my ($self, $todo) = @_;
2124    
2125     my $parent = $todo->{node}->manakai_parent_element;
2126     if (defined $parent) {
2127     my $nsuri = $parent->namespace_uri;
2128     $nsuri = '' unless defined $nsuri;
2129     my $ln = $parent->manakai_local_name;
2130     my $eldef = $Element->{$nsuri}->{$ln} ||
2131     $Element->{$nsuri}->{''} ||
2132     $ElementDefault;
2133     return $eldef->{checker}->($self, $todo);
2134     } else {
2135     return $HTMLBlockOrInlineChecker->($self, $todo);
2136     }
2137     },
2138     };
2139    
2140     ## TODO: figure
2141 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
2142 wakaba 1.1
2143 wakaba 1.4 ## TODO: |alt|
2144 wakaba 1.1 $Element->{$HTML_NS}->{img} = {
2145     attrs_checker => sub {
2146     my ($self, $todo) = @_;
2147     $GetHTMLAttrsChecker->({
2148     alt => sub { }, ## NOTE: No syntactical requirement
2149     src => $HTMLURIAttrChecker,
2150     usemap => $HTMLUsemapAttrChecker,
2151     ismap => sub {
2152     my ($self, $attr, $parent_todo) = @_;
2153     if (not $todo->{flag}->{has_a}) {
2154     $self->{onerror}->(node => $attr, type => 'attribute not allowed');
2155     }
2156     $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_todo);
2157     },
2158     ## TODO: height
2159     ## TODO: width
2160     })->($self, $todo);
2161     unless ($todo->{node}->has_attribute_ns (undef, 'alt')) {
2162     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:alt');
2163     }
2164     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2165     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:src');
2166     }
2167     },
2168     checker => $HTMLEmptyChecker,
2169     };
2170    
2171     $Element->{$HTML_NS}->{iframe} = {
2172     attrs_checker => $GetHTMLAttrsChecker->({
2173     src => $HTMLURIAttrChecker,
2174     }),
2175     checker => $HTMLTextChecker,
2176     };
2177    
2178     $Element->{$HTML_NS}->{embed} = {
2179     attrs_checker => sub {
2180     my ($self, $todo) = @_;
2181     my $has_src;
2182     for my $attr (@{$todo->{node}->attributes}) {
2183     my $attr_ns = $attr->namespace_uri;
2184     $attr_ns = '' unless defined $attr_ns;
2185     my $attr_ln = $attr->manakai_local_name;
2186     my $checker;
2187     if ($attr_ns eq '') {
2188     if ($attr_ln eq 'src') {
2189     $checker = $HTMLURIAttrChecker;
2190     $has_src = 1;
2191     } elsif ($attr_ln eq 'type') {
2192     $checker = $HTMLIMTAttrChecker;
2193     } else {
2194     ## TODO: height
2195     ## TODO: width
2196     $checker = $HTMLAttrChecker->{$attr_ln}
2197     || sub { }; ## NOTE: Any local attribute is ok.
2198     }
2199     }
2200     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2201     || $AttrChecker->{$attr_ns}->{''};
2202     if ($checker) {
2203     $checker->($self, $attr);
2204     } else {
2205     $self->{onerror}->(node => $attr, level => 'unsupported',
2206     type => 'attribute');
2207     ## ISSUE: No comformance createria for global attributes in the spec
2208     }
2209     }
2210    
2211     unless ($has_src) {
2212     $self->{onerror}->(node => $todo->{node},
2213     type => 'attribute missing:src');
2214     }
2215     },
2216     checker => $HTMLEmptyChecker,
2217     };
2218    
2219     $Element->{$HTML_NS}->{object} = {
2220     attrs_checker => sub {
2221     my ($self, $todo) = @_;
2222     $GetHTMLAttrsChecker->({
2223     data => $HTMLURIAttrChecker,
2224     type => $HTMLIMTAttrChecker,
2225     usemap => $HTMLUsemapAttrChecker,
2226     ## TODO: width
2227     ## TODO: height
2228     })->($self, $todo);
2229     unless ($todo->{node}->has_attribute_ns (undef, 'data')) {
2230     unless ($todo->{node}->has_attribute_ns (undef, 'type')) {
2231     $self->{onerror}->(node => $todo->{node},
2232     type => 'attribute missing:data|type');
2233     }
2234     }
2235     },
2236     checker => $ElementDefault->{checker}, ## TODO
2237 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
2238 wakaba 1.1 };
2239    
2240     $Element->{$HTML_NS}->{param} = {
2241     attrs_checker => sub {
2242     my ($self, $todo) = @_;
2243     $GetHTMLAttrsChecker->({
2244     name => sub { },
2245     value => sub { },
2246     })->($self, $todo);
2247     unless ($todo->{node}->has_attribute_ns (undef, 'name')) {
2248     $self->{onerror}->(node => $todo->{node},
2249     type => 'attribute missing:name');
2250     }
2251     unless ($todo->{node}->has_attribute_ns (undef, 'value')) {
2252     $self->{onerror}->(node => $todo->{node},
2253     type => 'attribute missing:value');
2254     }
2255     },
2256     checker => $HTMLEmptyChecker,
2257     };
2258    
2259     $Element->{$HTML_NS}->{video} = {
2260     attrs_checker => $GetHTMLAttrsChecker->({
2261     src => $HTMLURIAttrChecker,
2262     ## TODO: start, loopstart, loopend, end
2263     ## ISSUE: they MUST be "value time offset"s. Value?
2264 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
2265 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2266     controls => $GetHTMLBooleanAttrChecker->('controls'),
2267 wakaba 1.11 poster => $HTMLURIAttrChecker, ## TODO: not for audio!
2268     ## TODO: width, height (not for audio!)
2269 wakaba 1.1 }),
2270     checker => sub {
2271     my ($self, $todo) = @_;
2272    
2273     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2274     return $HTMLBlockOrInlineChecker->($self, $todo);
2275     } else {
2276     return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
2277     ->($self, $todo);
2278     }
2279     },
2280     };
2281    
2282     $Element->{$HTML_NS}->{audio} = {
2283     attrs_checker => $Element->{$HTML_NS}->{video}->{attrs_checker},
2284     checker => $Element->{$HTML_NS}->{video}->{checker},
2285     };
2286    
2287     $Element->{$HTML_NS}->{source} = {
2288     attrs_checker => sub {
2289     my ($self, $todo) = @_;
2290     $GetHTMLAttrsChecker->({
2291     src => $HTMLURIAttrChecker,
2292     type => $HTMLIMTAttrChecker,
2293     media => $HTMLMQAttrChecker,
2294     })->($self, $todo);
2295     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2296     $self->{onerror}->(node => $todo->{node},
2297     type => 'attribute missing:src');
2298     }
2299     },
2300     checker => $HTMLEmptyChecker,
2301     };
2302    
2303     $Element->{$HTML_NS}->{canvas} = {
2304     attrs_checker => $GetHTMLAttrsChecker->({
2305     height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2306     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2307     }),
2308     checker => $HTMLInlineChecker,
2309     };
2310    
2311     $Element->{$HTML_NS}->{map} = {
2312 wakaba 1.4 attrs_checker => sub {
2313     my ($self, $todo) = @_;
2314     my $has_id;
2315     $GetHTMLAttrsChecker->({
2316     id => sub {
2317     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2318     my ($self, $attr) = @_;
2319     my $value = $attr->value;
2320     if (length $value > 0) {
2321     if ($self->{id}->{$value}) {
2322     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2323     push @{$self->{id}->{$value}}, $attr;
2324     } else {
2325     $self->{id}->{$value} = [$attr];
2326     }
2327 wakaba 1.1 } else {
2328 wakaba 1.4 ## NOTE: MUST contain at least one character
2329     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2330 wakaba 1.1 }
2331 wakaba 1.4 if ($value =~ /[\x09-\x0D\x20]/) {
2332     $self->{onerror}->(node => $attr, type => 'space in ID');
2333     }
2334     $self->{map}->{$value} ||= $attr;
2335     $has_id = 1;
2336     },
2337     })->($self, $todo);
2338     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:id')
2339     unless $has_id;
2340     },
2341 wakaba 1.1 checker => $HTMLBlockChecker,
2342     };
2343    
2344     $Element->{$HTML_NS}->{area} = {
2345     attrs_checker => sub {
2346     my ($self, $todo) = @_;
2347     my %attr;
2348     my $coords;
2349     for my $attr (@{$todo->{node}->attributes}) {
2350     my $attr_ns = $attr->namespace_uri;
2351     $attr_ns = '' unless defined $attr_ns;
2352     my $attr_ln = $attr->manakai_local_name;
2353     my $checker;
2354     if ($attr_ns eq '') {
2355     $checker = {
2356     alt => sub { },
2357     ## NOTE: |alt| value has no conformance creteria.
2358     shape => $GetHTMLEnumeratedAttrChecker->({
2359     circ => -1, circle => 1,
2360     default => 1,
2361     poly => 1, polygon => -1,
2362     rect => 1, rectangle => -1,
2363     }),
2364     coords => sub {
2365     my ($self, $attr) = @_;
2366     my $value = $attr->value;
2367     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
2368     $coords = [split /,/, $value];
2369     } else {
2370     $self->{onerror}->(node => $attr,
2371     type => 'coords:syntax error');
2372     }
2373     },
2374     target => $HTMLTargetAttrChecker,
2375     href => $HTMLURIAttrChecker,
2376     ping => $HTMLSpaceURIsAttrChecker,
2377 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
2378 wakaba 1.1 media => $HTMLMQAttrChecker,
2379     hreflang => $HTMLLanguageTagAttrChecker,
2380     type => $HTMLIMTAttrChecker,
2381     }->{$attr_ln};
2382     if ($checker) {
2383     $attr{$attr_ln} = $attr;
2384     } else {
2385     $checker = $HTMLAttrChecker->{$attr_ln};
2386     }
2387     }
2388     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2389     || $AttrChecker->{$attr_ns}->{''};
2390     if ($checker) {
2391     $checker->($self, $attr) if ref $checker;
2392     } else {
2393     $self->{onerror}->(node => $attr, level => 'unsupported',
2394     type => 'attribute');
2395     ## ISSUE: No comformance createria for unknown attributes in the spec
2396     }
2397     }
2398    
2399     if (defined $attr{href}) {
2400 wakaba 1.4 $self->{has_hyperlink_element} = 1;
2401 wakaba 1.1 unless (defined $attr{alt}) {
2402     $self->{onerror}->(node => $todo->{node},
2403     type => 'attribute missing:alt');
2404     }
2405     } else {
2406     for (qw/target ping rel media hreflang type alt/) {
2407     if (defined $attr{$_}) {
2408     $self->{onerror}->(node => $attr{$_},
2409     type => 'attribute not allowed');
2410     }
2411     }
2412     }
2413    
2414     my $shape = 'rectangle';
2415     if (defined $attr{shape}) {
2416     $shape = {
2417     circ => 'circle', circle => 'circle',
2418     default => 'default',
2419     poly => 'polygon', polygon => 'polygon',
2420     rect => 'rectangle', rectangle => 'rectangle',
2421     }->{lc $attr{shape}->value} || 'rectangle';
2422     ## TODO: ASCII lowercase?
2423     }
2424    
2425     if ($shape eq 'circle') {
2426     if (defined $attr{coords}) {
2427     if (defined $coords) {
2428     if (@$coords == 3) {
2429     if ($coords->[2] < 0) {
2430     $self->{onerror}->(node => $attr{coords},
2431     type => 'coords:out of range:2');
2432     }
2433     } else {
2434     $self->{onerror}->(node => $attr{coords},
2435     type => 'coords:number:3:'.@$coords);
2436     }
2437     } else {
2438     ## NOTE: A syntax error has been reported.
2439     }
2440     } else {
2441     $self->{onerror}->(node => $todo->{node},
2442     type => 'attribute missing:coords');
2443     }
2444     } elsif ($shape eq 'default') {
2445     if (defined $attr{coords}) {
2446     $self->{onerror}->(node => $attr{coords},
2447     type => 'attribute not allowed');
2448     }
2449     } elsif ($shape eq 'polygon') {
2450     if (defined $attr{coords}) {
2451     if (defined $coords) {
2452     if (@$coords >= 6) {
2453     unless (@$coords % 2 == 0) {
2454     $self->{onerror}->(node => $attr{coords},
2455     type => 'coords:number:even:'.@$coords);
2456     }
2457     } else {
2458     $self->{onerror}->(node => $attr{coords},
2459     type => 'coords:number:>=6:'.@$coords);
2460     }
2461     } else {
2462     ## NOTE: A syntax error has been reported.
2463     }
2464     } else {
2465     $self->{onerror}->(node => $todo->{node},
2466     type => 'attribute missing:coords');
2467     }
2468     } elsif ($shape eq 'rectangle') {
2469     if (defined $attr{coords}) {
2470     if (defined $coords) {
2471     if (@$coords == 4) {
2472     unless ($coords->[0] < $coords->[2]) {
2473     $self->{onerror}->(node => $attr{coords},
2474     type => 'coords:out of range:0');
2475     }
2476     unless ($coords->[1] < $coords->[3]) {
2477     $self->{onerror}->(node => $attr{coords},
2478     type => 'coords:out of range:1');
2479     }
2480     } else {
2481     $self->{onerror}->(node => $attr{coords},
2482     type => 'coords:number:4:'.@$coords);
2483     }
2484     } else {
2485     ## NOTE: A syntax error has been reported.
2486     }
2487     } else {
2488     $self->{onerror}->(node => $todo->{node},
2489     type => 'attribute missing:coords');
2490     }
2491     }
2492     },
2493     checker => $HTMLEmptyChecker,
2494     };
2495     ## TODO: only in map
2496    
2497     $Element->{$HTML_NS}->{table} = {
2498     attrs_checker => $GetHTMLAttrsChecker->({}),
2499     checker => sub {
2500     my ($self, $todo) = @_;
2501     my $el = $todo->{node};
2502     my $new_todos = [];
2503     my @nodes = (@{$el->child_nodes});
2504    
2505     my $phase = 'before caption';
2506     my $has_tfoot;
2507     while (@nodes) {
2508     my $node = shift @nodes;
2509     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2510    
2511     my $nt = $node->node_type;
2512     if ($nt == 1) {
2513     my $node_ns = $node->namespace_uri;
2514     $node_ns = '' unless defined $node_ns;
2515     my $node_ln = $node->manakai_local_name;
2516     ## NOTE: |minuses| list is not checked since redundant
2517 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2518     #
2519     } elsif ($phase eq 'in tbodys') {
2520 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2521     #$phase = 'in tbodys';
2522     } elsif (not $has_tfoot and
2523     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2524     $phase = 'after tfoot';
2525     $has_tfoot = 1;
2526     } else {
2527     $self->{onerror}->(node => $node, type => 'element not allowed');
2528     }
2529     } elsif ($phase eq 'in trs') {
2530     if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2531     #$phase = 'in trs';
2532     } elsif (not $has_tfoot and
2533     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2534     $phase = 'after tfoot';
2535     $has_tfoot = 1;
2536     } else {
2537     $self->{onerror}->(node => $node, type => 'element not allowed');
2538     }
2539     } elsif ($phase eq 'after thead') {
2540     if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2541     $phase = 'in tbodys';
2542     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2543     $phase = 'in trs';
2544     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2545     $phase = 'in tbodys';
2546     $has_tfoot = 1;
2547     } else {
2548     $self->{onerror}->(node => $node, type => 'element not allowed');
2549     }
2550     } elsif ($phase eq 'in colgroup') {
2551     if ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2552     $phase = 'in colgroup';
2553     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2554     $phase = 'after thead';
2555     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2556     $phase = 'in tbodys';
2557     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2558     $phase = 'in trs';
2559     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2560     $phase = 'in tbodys';
2561     $has_tfoot = 1;
2562     } else {
2563     $self->{onerror}->(node => $node, type => 'element not allowed');
2564     }
2565     } elsif ($phase eq 'before caption') {
2566     if ($node_ns eq $HTML_NS and $node_ln eq 'caption') {
2567     $phase = 'in colgroup';
2568     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2569     $phase = 'in colgroup';
2570     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2571     $phase = 'after thead';
2572     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2573     $phase = 'in tbodys';
2574     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2575     $phase = 'in trs';
2576     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2577     $phase = 'in tbodys';
2578     $has_tfoot = 1;
2579     } else {
2580     $self->{onerror}->(node => $node, type => 'element not allowed');
2581     }
2582     } else { # after tfoot
2583     $self->{onerror}->(node => $node, type => 'element not allowed');
2584     }
2585     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2586     unshift @nodes, @$sib;
2587     push @$new_todos, @$ch;
2588     } elsif ($nt == 3 or $nt == 4) {
2589     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2590     $self->{onerror}->(node => $node, type => 'character not allowed');
2591     }
2592     } elsif ($nt == 5) {
2593     unshift @nodes, @{$node->child_nodes};
2594     }
2595     }
2596    
2597     ## Table model errors
2598     require Whatpm::HTMLTable;
2599     Whatpm::HTMLTable->form_table ($todo->{node}, sub {
2600     my %opt = @_;
2601     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
2602     });
2603     push @{$self->{return}->{table}}, $todo->{node};
2604    
2605     return ($new_todos);
2606     },
2607     };
2608    
2609     $Element->{$HTML_NS}->{caption} = {
2610     attrs_checker => $GetHTMLAttrsChecker->({}),
2611 wakaba 1.13 checker => $HTMLStrictlyInlineChecker,
2612 wakaba 1.1 };
2613    
2614     $Element->{$HTML_NS}->{colgroup} = {
2615     attrs_checker => $GetHTMLAttrsChecker->({
2616     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2617     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
2618     ## TODO: "attribute not supported" if |col|.
2619     ## ISSUE: MUST NOT if any |col|?
2620     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
2621     }),
2622     checker => sub {
2623     my ($self, $todo) = @_;
2624     my $el = $todo->{node};
2625     my $new_todos = [];
2626     my @nodes = (@{$el->child_nodes});
2627    
2628     while (@nodes) {
2629     my $node = shift @nodes;
2630     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2631    
2632     my $nt = $node->node_type;
2633     if ($nt == 1) {
2634     my $node_ns = $node->namespace_uri;
2635     $node_ns = '' unless defined $node_ns;
2636     my $node_ln = $node->manakai_local_name;
2637     ## NOTE: |minuses| list is not checked since redundant
2638 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2639     #
2640     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'col')) {
2641 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
2642     }
2643     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2644     unshift @nodes, @$sib;
2645     push @$new_todos, @$ch;
2646     } elsif ($nt == 3 or $nt == 4) {
2647     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2648     $self->{onerror}->(node => $node, type => 'character not allowed');
2649     }
2650     } elsif ($nt == 5) {
2651     unshift @nodes, @{$node->child_nodes};
2652     }
2653     }
2654     return ($new_todos);
2655     },
2656     };
2657    
2658     $Element->{$HTML_NS}->{col} = {
2659     attrs_checker => $GetHTMLAttrsChecker->({
2660     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2661     }),
2662     checker => $HTMLEmptyChecker,
2663     };
2664    
2665     $Element->{$HTML_NS}->{tbody} = {
2666     attrs_checker => $GetHTMLAttrsChecker->({}),
2667     checker => sub {
2668     my ($self, $todo) = @_;
2669     my $el = $todo->{node};
2670     my $new_todos = [];
2671     my @nodes = (@{$el->child_nodes});
2672    
2673     my $has_tr;
2674     while (@nodes) {
2675     my $node = shift @nodes;
2676     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2677    
2678     my $nt = $node->node_type;
2679     if ($nt == 1) {
2680     my $node_ns = $node->namespace_uri;
2681     $node_ns = '' unless defined $node_ns;
2682     my $node_ln = $node->manakai_local_name;
2683     ## NOTE: |minuses| list is not checked since redundant
2684 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2685     #
2686     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2687 wakaba 1.1 $has_tr = 1;
2688     } else {
2689     $self->{onerror}->(node => $node, type => 'element not allowed');
2690     }
2691     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2692     unshift @nodes, @$sib;
2693     push @$new_todos, @$ch;
2694     } elsif ($nt == 3 or $nt == 4) {
2695     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2696     $self->{onerror}->(node => $node, type => 'character not allowed');
2697     }
2698     } elsif ($nt == 5) {
2699     unshift @nodes, @{$node->child_nodes};
2700     }
2701     }
2702     unless ($has_tr) {
2703     $self->{onerror}->(node => $el, type => 'child element missing:tr');
2704     }
2705     return ($new_todos);
2706     },
2707     };
2708    
2709     $Element->{$HTML_NS}->{thead} = {
2710     attrs_checker => $GetHTMLAttrsChecker->({}),
2711     checker => $Element->{$HTML_NS}->{tbody}->{checker},
2712     };
2713    
2714     $Element->{$HTML_NS}->{tfoot} = {
2715     attrs_checker => $GetHTMLAttrsChecker->({}),
2716     checker => $Element->{$HTML_NS}->{tbody}->{checker},
2717     };
2718    
2719     $Element->{$HTML_NS}->{tr} = {
2720     attrs_checker => $GetHTMLAttrsChecker->({}),
2721     checker => sub {
2722     my ($self, $todo) = @_;
2723     my $el = $todo->{node};
2724     my $new_todos = [];
2725     my @nodes = (@{$el->child_nodes});
2726    
2727     my $has_td;
2728     while (@nodes) {
2729     my $node = shift @nodes;
2730     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2731    
2732     my $nt = $node->node_type;
2733     if ($nt == 1) {
2734     my $node_ns = $node->namespace_uri;
2735     $node_ns = '' unless defined $node_ns;
2736     my $node_ln = $node->manakai_local_name;
2737     ## NOTE: |minuses| list is not checked since redundant
2738 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2739     #
2740     } elsif ($node_ns eq $HTML_NS and
2741     ($node_ln eq 'td' or $node_ln eq 'th')) {
2742 wakaba 1.1 $has_td = 1;
2743     } else {
2744     $self->{onerror}->(node => $node, type => 'element not allowed');
2745     }
2746     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2747     unshift @nodes, @$sib;
2748     push @$new_todos, @$ch;
2749     } elsif ($nt == 3 or $nt == 4) {
2750     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2751     $self->{onerror}->(node => $node, type => 'character not allowed');
2752     }
2753     } elsif ($nt == 5) {
2754     unshift @nodes, @{$node->child_nodes};
2755     }
2756     }
2757     unless ($has_td) {
2758     $self->{onerror}->(node => $el, type => 'child element missing:td|th');
2759     }
2760     return ($new_todos);
2761     },
2762     };
2763    
2764     $Element->{$HTML_NS}->{td} = {
2765     attrs_checker => $GetHTMLAttrsChecker->({
2766     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2767     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2768     }),
2769     checker => $HTMLBlockOrInlineChecker,
2770     };
2771    
2772     $Element->{$HTML_NS}->{th} = {
2773     attrs_checker => $GetHTMLAttrsChecker->({
2774     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2775     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2776     scope => $GetHTMLEnumeratedAttrChecker
2777     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
2778     }),
2779     checker => $HTMLBlockOrInlineChecker,
2780     };
2781    
2782     ## TODO: forms
2783 wakaba 1.8 ## TODO: Tests for <nest/> in form elements
2784 wakaba 1.1
2785     $Element->{$HTML_NS}->{script} = {
2786 wakaba 1.9 attrs_checker => $GetHTMLAttrsChecker->({
2787 wakaba 1.1 src => $HTMLURIAttrChecker,
2788     defer => $GetHTMLBooleanAttrChecker->('defer'),
2789     async => $GetHTMLBooleanAttrChecker->('async'),
2790     type => $HTMLIMTAttrChecker,
2791 wakaba 1.9 }),
2792 wakaba 1.1 checker => sub {
2793     my ($self, $todo) = @_;
2794    
2795     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2796     return $HTMLEmptyChecker->($self, $todo);
2797     } else {
2798     ## NOTE: No content model conformance in HTML5 spec.
2799     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
2800     my $language = $todo->{node}->get_attribute_ns (undef, 'language');
2801     if ((defined $type and $type eq '') or
2802     (defined $language and $language eq '')) {
2803     $type = 'text/javascript';
2804     } elsif (defined $type) {
2805     #
2806     } elsif (defined $language) {
2807     $type = 'text/' . $language;
2808     } else {
2809     $type = 'text/javascript';
2810     }
2811     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
2812     type => 'script:'.$type); ## TODO: $type normalization
2813     return $AnyChecker->($self, $todo);
2814     }
2815     },
2816     };
2817    
2818     ## NOTE: When script is disabled.
2819     $Element->{$HTML_NS}->{noscript} = {
2820 wakaba 1.3 attrs_checker => sub {
2821     my ($self, $todo) = @_;
2822    
2823     ## NOTE: This check is inserted in |attrs_checker|, rather than |checker|,
2824     ## since the later is not invoked when the |noscript| is used as a
2825     ## transparent element.
2826     unless ($todo->{node}->owner_document->manakai_is_html) {
2827     $self->{onerror}->(node => $todo->{node}, type => 'in XML:noscript');
2828     }
2829    
2830     $GetHTMLAttrsChecker->({})->($self, $todo);
2831     },
2832 wakaba 1.1 checker => sub {
2833     my ($self, $todo) = @_;
2834    
2835 wakaba 1.3 if ($todo->{flag}->{in_head}) {
2836     my $new_todos = [];
2837     my @nodes = (@{$todo->{node}->child_nodes});
2838    
2839     while (@nodes) {
2840     my $node = shift @nodes;
2841     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2842    
2843     my $nt = $node->node_type;
2844     if ($nt == 1) {
2845     my $node_ns = $node->namespace_uri;
2846     $node_ns = '' unless defined $node_ns;
2847     my $node_ln = $node->manakai_local_name;
2848 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2849     #
2850     } elsif ($node_ns eq $HTML_NS) {
2851 wakaba 1.3 if ({link => 1, style => 1}->{$node_ln}) {
2852     #
2853     } elsif ($node_ln eq 'meta') {
2854 wakaba 1.5 if ($node->has_attribute_ns (undef, 'name')) {
2855     #
2856 wakaba 1.3 } else {
2857 wakaba 1.5 $self->{onerror}->(node => $node,
2858     type => 'element not allowed');
2859 wakaba 1.3 }
2860     } else {
2861     $self->{onerror}->(node => $node, type => 'element not allowed');
2862     }
2863     } else {
2864     $self->{onerror}->(node => $node, type => 'element not allowed');
2865     }
2866    
2867     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2868     unshift @nodes, @$sib;
2869     push @$new_todos, @$ch;
2870     } elsif ($nt == 3 or $nt == 4) {
2871     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2872     $self->{onerror}->(node => $node, type => 'character not allowed');
2873     }
2874     } elsif ($nt == 5) {
2875     unshift @nodes, @{$node->child_nodes};
2876     }
2877     }
2878     return ($new_todos);
2879     } else {
2880     my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}});
2881     my ($sib, $ch) = $HTMLBlockOrInlineChecker->($self, $todo);
2882     push @$sib, $end;
2883     return ($sib, $ch);
2884     }
2885 wakaba 1.1 },
2886     };
2887 wakaba 1.3
2888     ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
2889 wakaba 1.1
2890     $Element->{$HTML_NS}->{'event-source'} = {
2891     attrs_checker => $GetHTMLAttrsChecker->({
2892     src => $HTMLURIAttrChecker,
2893     }),
2894     checker => $HTMLEmptyChecker,
2895     };
2896    
2897     $Element->{$HTML_NS}->{details} = {
2898     attrs_checker => $GetHTMLAttrsChecker->({
2899     open => $GetHTMLBooleanAttrChecker->('open'),
2900     }),
2901     checker => sub {
2902     my ($self, $todo) = @_;
2903    
2904     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2905     my ($sib, $ch)
2906     = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
2907     ->($self, $todo);
2908     push @$sib, $end;
2909     return ($sib, $ch);
2910     },
2911     };
2912    
2913     $Element->{$HTML_NS}->{datagrid} = {
2914     attrs_checker => $GetHTMLAttrsChecker->({
2915     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2916     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
2917     }),
2918     checker => sub {
2919     my ($self, $todo) = @_;
2920     my $el = $todo->{node};
2921     my $new_todos = [];
2922     my @nodes = (@{$el->child_nodes});
2923    
2924     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2925    
2926     ## Block-table Block* | table | select | datalist | Empty
2927     my $mode = 'any';
2928     while (@nodes) {
2929     my $node = shift @nodes;
2930     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2931    
2932     my $nt = $node->node_type;
2933     if ($nt == 1) {
2934     my $node_ns = $node->namespace_uri;
2935     $node_ns = '' unless defined $node_ns;
2936     my $node_ln = $node->manakai_local_name;
2937     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
2938 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2939     #
2940     } elsif ($mode eq 'block') {
2941 wakaba 1.1 $not_allowed = 1
2942     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
2943     } elsif ($mode eq 'any') {
2944     if ($node_ns eq $HTML_NS and
2945     {table => 1, select => 1, datalist => 1}->{$node_ln}) {
2946     $mode = 'none';
2947     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
2948     $mode = 'block';
2949     } else {
2950     $not_allowed = 1;
2951     }
2952     } else {
2953     $not_allowed = 1;
2954     }
2955     $self->{onerror}->(node => $node, type => 'element not allowed')
2956     if $not_allowed;
2957     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2958     unshift @nodes, @$sib;
2959     push @$new_todos, @$ch;
2960     } elsif ($nt == 3 or $nt == 4) {
2961     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2962     $self->{onerror}->(node => $node, type => 'character not allowed');
2963     }
2964     } elsif ($nt == 5) {
2965     unshift @nodes, @{$node->child_nodes};
2966     }
2967     }
2968    
2969     push @$new_todos, $end;
2970     return ($new_todos);
2971     },
2972     };
2973    
2974     $Element->{$HTML_NS}->{command} = {
2975     attrs_checker => $GetHTMLAttrsChecker->({
2976     checked => $GetHTMLBooleanAttrChecker->('checked'),
2977     default => $GetHTMLBooleanAttrChecker->('default'),
2978     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2979     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
2980     icon => $HTMLURIAttrChecker,
2981     label => sub { }, ## NOTE: No conformance creteria
2982     radiogroup => sub { }, ## NOTE: No conformance creteria
2983     ## NOTE: |title| has special semantics, but no syntactical difference
2984     type => sub {
2985     my ($self, $attr) = @_;
2986     my $value = $attr->value;
2987     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
2988     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
2989     }
2990     },
2991     }),
2992     checker => $HTMLEmptyChecker,
2993     };
2994    
2995     $Element->{$HTML_NS}->{menu} = {
2996     attrs_checker => $GetHTMLAttrsChecker->({
2997     autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
2998     id => sub {
2999     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
3000     my ($self, $attr) = @_;
3001     my $value = $attr->value;
3002     if (length $value > 0) {
3003     if ($self->{id}->{$value}) {
3004     $self->{onerror}->(node => $attr, type => 'duplicate ID');
3005     push @{$self->{id}->{$value}}, $attr;
3006     } else {
3007     $self->{id}->{$value} = [$attr];
3008     }
3009     } else {
3010     ## NOTE: MUST contain at least one character
3011     $self->{onerror}->(node => $attr, type => 'empty attribute value');
3012     }
3013     if ($value =~ /[\x09-\x0D\x20]/) {
3014     $self->{onerror}->(node => $attr, type => 'space in ID');
3015     }
3016     $self->{menu}->{$value} ||= $attr;
3017     ## ISSUE: <menu id=""><p contextmenu=""> match?
3018     },
3019     label => sub { }, ## NOTE: No conformance creteria
3020     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
3021     }),
3022     checker => sub {
3023     my ($self, $todo) = @_;
3024     my $el = $todo->{node};
3025     my $new_todos = [];
3026     my @nodes = (@{$el->child_nodes});
3027    
3028     my $content = 'li or inline';
3029     while (@nodes) {
3030     my $node = shift @nodes;
3031     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3032    
3033     my $nt = $node->node_type;
3034     if ($nt == 1) {
3035     my $node_ns = $node->namespace_uri;
3036     $node_ns = '' unless defined $node_ns;
3037     my $node_ln = $node->manakai_local_name;
3038     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
3039 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3040     #
3041     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'li') {
3042 wakaba 1.1 if ($content eq 'inline') {
3043     $not_allowed = 1;
3044     } elsif ($content eq 'li or inline') {
3045     $content = 'li';
3046     }
3047     } else {
3048     if ($HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
3049     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln}) {
3050     $content = 'inline';
3051     } else {
3052     $not_allowed = 1;
3053     }
3054     }
3055     $self->{onerror}->(node => $node, type => 'element not allowed')
3056     if $not_allowed;
3057     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3058     unshift @nodes, @$sib;
3059     push @$new_todos, @$ch;
3060     } elsif ($nt == 3 or $nt == 4) {
3061     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3062     if ($content eq 'li') {
3063     $self->{onerror}->(node => $node, type => 'character not allowed');
3064     } elsif ($content eq 'li or inline') {
3065     $content = 'inline';
3066     }
3067     }
3068     } elsif ($nt == 5) {
3069     unshift @nodes, @{$node->child_nodes};
3070     }
3071     }
3072    
3073     for (@$new_todos) {
3074     $_->{inline} = 1;
3075     }
3076     return ($new_todos);
3077     },
3078 wakaba 1.8 };
3079    
3080     $Element->{$HTML_NS}->{datatemplate} = {
3081     attrs_checker => $GetHTMLAttrsChecker->({}),
3082     checker => sub {
3083     my ($self, $todo) = @_;
3084     my $el = $todo->{node};
3085     my $new_todos = [];
3086     my @nodes = (@{$el->child_nodes});
3087    
3088     while (@nodes) {
3089     my $node = shift @nodes;
3090     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3091    
3092     my $nt = $node->node_type;
3093     if ($nt == 1) {
3094     my $node_ns = $node->namespace_uri;
3095     $node_ns = '' unless defined $node_ns;
3096     my $node_ln = $node->manakai_local_name;
3097     ## NOTE: |minuses| list is not checked since redundant
3098     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3099     #
3100     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'rule')) {
3101     $self->{onerror}->(node => $node,
3102     type => 'element not allowed:datatemplate');
3103     }
3104     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3105     unshift @nodes, @$sib;
3106     push @$new_todos, @$ch;
3107     } elsif ($nt == 3 or $nt == 4) {
3108     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3109     $self->{onerror}->(node => $node, type => 'character not allowed');
3110     }
3111     } elsif ($nt == 5) {
3112     unshift @nodes, @{$node->child_nodes};
3113     }
3114     }
3115     return ($new_todos);
3116     },
3117     is_xml_root => 1,
3118     };
3119    
3120     $Element->{$HTML_NS}->{rule} = {
3121     attrs_checker => $GetHTMLAttrsChecker->({
3122     ## TODO: |condition| attribute
3123     mode => $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker,
3124     }),
3125     checker => sub {
3126     my ($self, $todo) = @_;
3127    
3128     my $end = $self->_add_pluses ({$HTML_NS => {nest => 1}});
3129     my ($sib, $ch) = $AnyChecker->($self, $todo);
3130     push @$sib, $end;
3131     return ($sib, $ch);
3132     },
3133     ## NOTE: "MAY be anything that, when the parent |datatemplate|
3134     ## is applied to some conforming data, results in a conforming DOM tree.":
3135     ## We don't check against this.
3136     };
3137    
3138     $Element->{$HTML_NS}->{nest} = {
3139     attrs_checker => $GetHTMLAttrsChecker->({
3140     ## TODO: |filter| attribute
3141     mode => $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker,
3142     }),
3143     checker => $HTMLEmptyChecker,
3144 wakaba 1.1 };
3145    
3146     $Element->{$HTML_NS}->{legend} = {
3147     attrs_checker => $GetHTMLAttrsChecker->({}),
3148     checker => sub {
3149     my ($self, $todo) = @_;
3150    
3151     my $parent = $todo->{node}->manakai_parent_element;
3152     if (defined $parent) {
3153     my $nsuri = $parent->namespace_uri;
3154     $nsuri = '' unless defined $nsuri;
3155     my $ln = $parent->manakai_local_name;
3156     if ($nsuri eq $HTML_NS and $ln eq 'figure') {
3157     return $HTMLInlineChecker->($self, $todo);
3158     } else {
3159 wakaba 1.13 return $HTMLStrictlyInlineChecker->($self, $todo);
3160 wakaba 1.1 }
3161     } else {
3162     return $HTMLInlineChecker->($self, $todo);
3163     }
3164    
3165     ## ISSUE: Content model is defined only for fieldset/legend,
3166     ## details/legend, and figure/legend.
3167     },
3168     };
3169    
3170     $Element->{$HTML_NS}->{div} = {
3171     attrs_checker => $GetHTMLAttrsChecker->({}),
3172     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
3173     };
3174    
3175     $Element->{$HTML_NS}->{font} = {
3176     attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
3177     checker => $HTMLTransparentChecker,
3178     };
3179    
3180     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
3181    
3182     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24