/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.24 - (hide annotations) (download)
Sun Nov 25 03:46:07 2007 UTC (17 years, 8 months ago) by wakaba
Branch: MAIN
Changes since 1.23: +15 -12 lines
++ whatpm/Whatpm/ChangeLog	25 Nov 2007 03:45:37 -0000
2007-11-25  Wakaba  <wakaba@suika.fam.cx>

	* ContentChecker.pm (check_element): New todo item type |descendant|.

++ whatpm/Whatpm/ContentChecker/ChangeLog	25 Nov 2007 03:46:01 -0000
2007-11-25  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm (header): Descendant constraint is redefined by
	new todo item type |descendant|.

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7     my $HTMLMetadataElements = {
8     $HTML_NS => {
9     qw/link 1 meta 1 style 1 script 1 event-source 1 command 1 base 1 title 1
10 wakaba 1.8 noscript 1 datatemplate 1
11 wakaba 1.1 /,
12     },
13     };
14    
15     my $HTMLSectioningElements = {
16     $HTML_NS => {qw/body 1 section 1 nav 1 article 1 blockquote 1 aside 1/},
17     };
18    
19     my $HTMLBlockLevelElements = {
20     $HTML_NS => {
21     qw/
22     section 1 nav 1 article 1 blockquote 1 aside 1
23     h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1
24     address 1 p 1 hr 1 dialog 1 pre 1 ol 1 ul 1 dl 1
25     ins 1 del 1 figure 1 map 1 table 1 script 1 noscript 1
26     event-source 1 details 1 datagrid 1 menu 1 div 1 font 1
27 wakaba 1.8 datatemplate 1
28 wakaba 1.1 /,
29     },
30     };
31    
32     my $HTMLStrictlyInlineLevelElements = {
33     $HTML_NS => {
34     qw/
35     br 1 a 1 q 1 cite 1 em 1 strong 1 small 1 m 1 dfn 1 abbr 1
36     time 1 meter 1 progress 1 code 1 var 1 samp 1 kbd 1
37     sub 1 sup 1 span 1 i 1 b 1 bdo 1 ins 1 del 1 img 1
38     iframe 1 embed 1 object 1 video 1 audio 1 canvas 1 area 1
39     script 1 noscript 1 event-source 1 command 1 font 1
40     /,
41     },
42     };
43    
44     my $HTMLStructuredInlineLevelElements = {
45     $HTML_NS => {qw/blockquote 1 pre 1 ol 1 ul 1 dl 1 table 1 menu 1/},
46     };
47    
48     my $HTMLInteractiveElements = {
49     $HTML_NS => {a => 1, details => 1, datagrid => 1},
50     };
51     ## NOTE: |html:a| and |html:datagrid| are not allowed as a descendant
52     ## of interactive elements
53    
54     # my $HTMLTransparentElements : in |Whatpm/ContentChecker.pm|.
55    
56     #my $HTMLSemiTransparentElements = {
57     # $HTML_NS => {qw/video 1 audio 1/},
58     #};
59    
60     my $HTMLEmbededElements = {
61     $HTML_NS => {qw/img 1 iframe 1 embed 1 object 1 video 1 audio 1 canvas 1/},
62     };
63    
64     ## Empty
65     my $HTMLEmptyChecker = sub {
66     my ($self, $todo) = @_;
67     my $el = $todo->{node};
68     my $new_todos = [];
69     my @nodes = (@{$el->child_nodes});
70    
71     while (@nodes) {
72     my $node = shift @nodes;
73     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
74    
75     my $nt = $node->node_type;
76     if ($nt == 1) {
77 wakaba 1.8 my $node_ns = $node->namespace_uri;
78     $node_ns = '' unless defined $node_ns;
79     my $node_ln = $node->manakai_local_name;
80     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
81     #
82     } else {
83     ## NOTE: |minuses| list is not checked since redundant
84     $self->{onerror}->(node => $node, type => 'element not allowed');
85     }
86 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
87     unshift @nodes, @$sib;
88     push @$new_todos, @$ch;
89     } elsif ($nt == 3 or $nt == 4) {
90     if ($node->data =~ /[^\x09-\x0D\x20]/) {
91     $self->{onerror}->(node => $node, type => 'character not allowed');
92     }
93     } elsif ($nt == 5) {
94     unshift @nodes, @{$node->child_nodes};
95     }
96     }
97     return ($new_todos);
98     };
99    
100     ## Text
101     my $HTMLTextChecker = sub {
102     my ($self, $todo) = @_;
103     my $el = $todo->{node};
104     my $new_todos = [];
105     my @nodes = (@{$el->child_nodes});
106    
107     while (@nodes) {
108     my $node = shift @nodes;
109     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
110    
111     my $nt = $node->node_type;
112     if ($nt == 1) {
113 wakaba 1.8 my $node_ns = $node->namespace_uri;
114     $node_ns = '' unless defined $node_ns;
115     my $node_ln = $node->manakai_local_name;
116     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
117     #
118     } else {
119     ## NOTE: |minuses| list is not checked since redundant
120     $self->{onerror}->(node => $node, type => 'element not allowed');
121     }
122 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
123     unshift @nodes, @$sib;
124     push @$new_todos, @$ch;
125     } elsif ($nt == 5) {
126     unshift @nodes, @{$node->child_nodes};
127     }
128     }
129     return ($new_todos);
130     };
131    
132     ## Zero or more |html:style| elements,
133     ## followed by zero or more block-level elements
134     my $HTMLStylableBlockChecker = sub {
135     my ($self, $todo) = @_;
136     my $el = $todo->{node};
137     my $new_todos = [];
138     my @nodes = (@{$el->child_nodes});
139    
140     my $has_non_style;
141     while (@nodes) {
142     my $node = shift @nodes;
143     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
144    
145     my $nt = $node->node_type;
146     if ($nt == 1) {
147     my $node_ns = $node->namespace_uri;
148     $node_ns = '' unless defined $node_ns;
149     my $node_ln = $node->manakai_local_name;
150     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
151     if ($node_ns eq $HTML_NS and $node_ln eq 'style') {
152     $not_allowed = 1 if $has_non_style or
153     not $node->has_attribute_ns (undef, 'scoped');
154     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
155     $has_non_style = 1;
156 wakaba 1.8 } elsif ($self->{pluses}->{$node_ns}->{$node_ln}) {
157     #
158 wakaba 1.1 } else {
159     $has_non_style = 1;
160     $not_allowed = 1;
161     }
162     $self->{onerror}->(node => $node, type => 'element not allowed')
163     if $not_allowed;
164     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
165     unshift @nodes, @$sib;
166     push @$new_todos, @$ch;
167     } elsif ($nt == 3 or $nt == 4) {
168     if ($node->data =~ /[^\x09-\x0D\x20]/) {
169     $self->{onerror}->(node => $node, type => 'character not allowed');
170     }
171     } elsif ($nt == 5) {
172     unshift @nodes, @{$node->child_nodes};
173     }
174     }
175     return ($new_todos);
176     }; # $HTMLStylableBlockChecker
177    
178     ## Zero or more block-level elements
179     my $HTMLBlockChecker = sub {
180     my ($self, $todo) = @_;
181     my $el = $todo->{node};
182     my $new_todos = [];
183     my @nodes = (@{$el->child_nodes});
184    
185     while (@nodes) {
186     my $node = shift @nodes;
187     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
188    
189     my $nt = $node->node_type;
190     if ($nt == 1) {
191     my $node_ns = $node->namespace_uri;
192     $node_ns = '' unless defined $node_ns;
193     my $node_ln = $node->manakai_local_name;
194     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
195     $not_allowed = 1
196 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
197     $self->{pluses}->{$node_ns}->{$node_ln};
198 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
199     if $not_allowed;
200     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
201     unshift @nodes, @$sib;
202     push @$new_todos, @$ch;
203     } elsif ($nt == 3 or $nt == 4) {
204     if ($node->data =~ /[^\x09-\x0D\x20]/) {
205     $self->{onerror}->(node => $node, type => 'character not allowed');
206     }
207     } elsif ($nt == 5) {
208     unshift @nodes, @{$node->child_nodes};
209     }
210     }
211     return ($new_todos);
212     }; # $HTMLBlockChecker
213    
214     ## Inline-level content
215     my $HTMLInlineChecker = sub {
216     my ($self, $todo) = @_;
217     my $el = $todo->{node};
218     my $new_todos = [];
219     my @nodes = (@{$el->child_nodes});
220    
221     while (@nodes) {
222     my $node = shift @nodes;
223     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
224    
225     my $nt = $node->node_type;
226     if ($nt == 1) {
227     my $node_ns = $node->namespace_uri;
228     $node_ns = '' unless defined $node_ns;
229     my $node_ln = $node->manakai_local_name;
230     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
231     $not_allowed = 1
232     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
233 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
234     $self->{pluses}->{$node_ns}->{$node_ln};
235 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
236     if $not_allowed;
237     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
238     unshift @nodes, @$sib;
239     push @$new_todos, @$ch;
240     } elsif ($nt == 5) {
241     unshift @nodes, @{$node->child_nodes};
242     }
243     }
244    
245     for (@$new_todos) {
246     $_->{inline} = 1;
247     }
248     return ($new_todos);
249     }; # $HTMLInlineChecker
250    
251     ## Strictly inline-level content
252     my $HTMLStrictlyInlineChecker = sub {
253     my ($self, $todo) = @_;
254     my $el = $todo->{node};
255     my $new_todos = [];
256     my @nodes = (@{$el->child_nodes});
257    
258     while (@nodes) {
259     my $node = shift @nodes;
260     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
261    
262     my $nt = $node->node_type;
263     if ($nt == 1) {
264     my $node_ns = $node->namespace_uri;
265     $node_ns = '' unless defined $node_ns;
266     my $node_ln = $node->manakai_local_name;
267     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
268     $not_allowed = 1
269 wakaba 1.8 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
270     $self->{pluses}->{$node_ns}->{$node_ln};
271 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
272     if $not_allowed;
273     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
274     unshift @nodes, @$sib;
275     push @$new_todos, @$ch;
276     } elsif ($nt == 5) {
277     unshift @nodes, @{$node->child_nodes};
278     }
279     }
280    
281     for (@$new_todos) {
282     $_->{inline} = 1;
283     $_->{strictly_inline} = 1;
284     }
285     return ($new_todos);
286     }; # $HTMLStrictlyInlineChecker
287    
288 wakaba 1.8 ## Inline-level or strictly inline-level content
289 wakaba 1.1 my $HTMLInlineOrStrictlyInlineChecker = sub {
290     my ($self, $todo) = @_;
291     my $el = $todo->{node};
292     my $new_todos = [];
293     my @nodes = (@{$el->child_nodes});
294    
295     while (@nodes) {
296     my $node = shift @nodes;
297     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
298    
299     my $nt = $node->node_type;
300     if ($nt == 1) {
301     my $node_ns = $node->namespace_uri;
302     $node_ns = '' unless defined $node_ns;
303     my $node_ln = $node->manakai_local_name;
304     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
305     if ($todo->{strictly_inline}) {
306     $not_allowed = 1
307 wakaba 1.8 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
308     $self->{pluses}->{$node_ns}->{$node_ln};
309 wakaba 1.1 } else {
310     $not_allowed = 1
311     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
312 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
313     $self->{pluses}->{$node_ns}->{$node_ln};
314 wakaba 1.1 }
315     $self->{onerror}->(node => $node, type => 'element not allowed')
316     if $not_allowed;
317     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
318     unshift @nodes, @$sib;
319     push @$new_todos, @$ch;
320     } elsif ($nt == 5) {
321     unshift @nodes, @{$node->child_nodes};
322     }
323     }
324    
325     for (@$new_todos) {
326     $_->{inline} = 1;
327     $_->{strictly_inline} = 1;
328     }
329     return ($new_todos);
330     }; # $HTMLInlineOrStrictlyInlineChecker
331    
332 wakaba 1.13 ## TODO: New "significant" SHOULD-level requirement (revision 1114).
333 wakaba 1.1
334 wakaba 1.8 ## Block-level content or inline-level content (i.e. bimorphic content model)
335 wakaba 1.1 my $HTMLBlockOrInlineChecker = sub {
336     my ($self, $todo) = @_;
337     my $el = $todo->{node};
338     my $new_todos = [];
339     my @nodes = (@{$el->child_nodes});
340    
341     my $content = 'block-or-inline'; # or 'block' or 'inline'
342     my @block_not_inline;
343     while (@nodes) {
344     my $node = shift @nodes;
345     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
346    
347 wakaba 1.8 ## ISSUE: It is unclear whether "<rule><div><p/><nest/></div></rule>"
348     ## is conforming or not.
349    
350 wakaba 1.1 my $nt = $node->node_type;
351     if ($nt == 1) {
352     my $node_ns = $node->namespace_uri;
353     $node_ns = '' unless defined $node_ns;
354     my $node_ln = $node->manakai_local_name;
355     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
356     if ($content eq 'block') {
357     $not_allowed = 1
358 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
359     $self->{pluses}->{$node_ns}->{$node_ln};
360 wakaba 1.1 } elsif ($content eq 'inline') {
361     $not_allowed = 1
362     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
363 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
364     $self->{pluses}->{$node_ns}->{$node_ln};
365 wakaba 1.1 } else {
366     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
367     my $is_inline
368     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
369     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
370    
371     push @block_not_inline, $node
372     if $is_block and not $is_inline and not $not_allowed;
373 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
374 wakaba 1.1 $content = 'inline';
375     for (@block_not_inline) {
376     $self->{onerror}->(node => $_, type => 'element not allowed');
377     }
378     $not_allowed = 1 unless $is_inline;
379     }
380     }
381     $self->{onerror}->(node => $node, type => 'element not allowed')
382     if $not_allowed;
383     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
384     unshift @nodes, @$sib;
385     push @$new_todos, @$ch;
386     } elsif ($nt == 3 or $nt == 4) {
387     if ($node->data =~ /[^\x09-\x0D\x20]/) {
388     if ($content eq 'block') {
389     $self->{onerror}->(node => $node, type => 'character not allowed');
390     } else {
391     $content = 'inline';
392     for (@block_not_inline) {
393     $self->{onerror}->(node => $_, type => 'element not allowed');
394     }
395     }
396     }
397     } elsif ($nt == 5) {
398     unshift @nodes, @{$node->child_nodes};
399     }
400     }
401    
402     if ($content eq 'inline') {
403     for (@$new_todos) {
404     $_->{inline} = 1;
405     }
406     }
407     return ($new_todos);
408     };
409    
410     ## Zero or more XXX element, then either block-level or inline-level
411     my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) {
412     my ($elnsuri, $ellname) = @_;
413     return sub {
414     my ($self, $todo) = @_;
415     my $el = $todo->{node};
416     my $new_todos = [];
417     my @nodes = (@{$el->child_nodes});
418    
419     my $has_non_style;
420     my $content = 'block-or-inline'; # or 'block' or 'inline'
421     my @block_not_inline;
422     while (@nodes) {
423     my $node = shift @nodes;
424     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
425    
426     my $nt = $node->node_type;
427     if ($nt == 1) {
428     my $node_ns = $node->namespace_uri;
429     $node_ns = '' unless defined $node_ns;
430     my $node_ln = $node->manakai_local_name;
431     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
432     if ($node_ns eq $elnsuri and $node_ln eq $ellname) {
433     $not_allowed = 1 if $has_non_style;
434     if ($ellname eq 'style' and
435     not $node->has_attribute_ns (undef, 'scoped')) {
436     $not_allowed = 1;
437     }
438     } elsif ($content eq 'block') {
439     $has_non_style = 1;
440     $not_allowed = 1
441 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
442     $self->{pluses}->{$node_ns}->{$node_ln};
443 wakaba 1.1 } elsif ($content eq 'inline') {
444     $has_non_style = 1;
445     $not_allowed = 1
446     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
447 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
448     $self->{pluses}->{$node_ns}->{$node_ln};
449 wakaba 1.1 } else {
450 wakaba 1.8 $has_non_style = 1 unless $self->{pluses}->{$node_ns}->{$node_ln};
451 wakaba 1.1 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
452     my $is_inline
453     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
454     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
455    
456     push @block_not_inline, $node
457     if $is_block and not $is_inline and not $not_allowed;
458 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
459 wakaba 1.1 $content = 'inline';
460     for (@block_not_inline) {
461     $self->{onerror}->(node => $_, type => 'element not allowed');
462     }
463     $not_allowed = 1 unless $is_inline;
464     }
465     }
466     $self->{onerror}->(node => $node, type => 'element not allowed')
467     if $not_allowed;
468     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
469     unshift @nodes, @$sib;
470     push @$new_todos, @$ch;
471     } elsif ($nt == 3 or $nt == 4) {
472     if ($node->data =~ /[^\x09-\x0D\x20]/) {
473     $has_non_style = 1;
474     if ($content eq 'block') {
475     $self->{onerror}->(node => $node, type => 'character not allowed');
476     } else {
477     $content = 'inline';
478     for (@block_not_inline) {
479     $self->{onerror}->(node => $_, type => 'element not allowed');
480     }
481     }
482     }
483     } elsif ($nt == 5) {
484     unshift @nodes, @{$node->child_nodes};
485     }
486     }
487    
488     if ($content eq 'inline') {
489     for (@$new_todos) {
490     $_->{inline} = 1;
491     }
492     }
493     return ($new_todos);
494     };
495     }; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker
496    
497     my $HTMLTransparentChecker = $HTMLBlockOrInlineChecker;
498    
499     our $AttrChecker;
500    
501     my $GetHTMLEnumeratedAttrChecker = sub {
502     my $states = shift; # {value => conforming ? 1 : -1}
503     return sub {
504     my ($self, $attr) = @_;
505     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
506     if ($states->{$value} > 0) {
507     #
508     } elsif ($states->{$value}) {
509     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
510     } else {
511     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
512     }
513     };
514     }; # $GetHTMLEnumeratedAttrChecker
515    
516     my $GetHTMLBooleanAttrChecker = sub {
517     my $local_name = shift;
518     return sub {
519     my ($self, $attr) = @_;
520     my $value = $attr->value;
521     unless ($value eq $local_name or $value eq '') {
522     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
523     }
524     };
525     }; # $GetHTMLBooleanAttrChecker
526    
527 wakaba 1.8 ## Unordered set of space-separated tokens
528 wakaba 1.18 my $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
529 wakaba 1.8 my ($self, $attr) = @_;
530     my %word;
531     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
532     unless ($word{$word}) {
533     $word{$word} = 1;
534     } else {
535     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
536     }
537     }
538 wakaba 1.18 }; # $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
539 wakaba 1.8
540 wakaba 1.1 ## |rel| attribute (unordered set of space separated tokens,
541     ## whose allowed values are defined by the section on link types)
542     my $HTMLLinkTypesAttrChecker = sub {
543 wakaba 1.4 my ($a_or_area, $todo, $self, $attr) = @_;
544 wakaba 1.1 my %word;
545     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
546     unless ($word{$word}) {
547     $word{$word} = 1;
548 wakaba 1.18 } elsif ($word eq 'up') {
549     #
550 wakaba 1.1 } else {
551     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
552     }
553     }
554     ## NOTE: Case sensitive match (since HTML5 spec does not say link
555     ## types are case-insensitive and it says "The value should not
556     ## be confusingly similar to any other defined value (e.g.
557     ## differing only in case).").
558     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
559     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
560     ## values to be used conformingly.
561     require Whatpm::_LinkTypeList;
562     our $LinkType;
563     for my $word (keys %word) {
564     my $def = $LinkType->{$word};
565     if (defined $def) {
566     if ($def->{status} eq 'accepted') {
567     if (defined $def->{effect}->[$a_or_area]) {
568     #
569     } else {
570     $self->{onerror}->(node => $attr,
571     type => 'link type:bad context:'.$word);
572     }
573     } elsif ($def->{status} eq 'proposal') {
574     $self->{onerror}->(node => $attr, level => 's',
575     type => 'link type:proposed:'.$word);
576 wakaba 1.20 if (defined $def->{effect}->[$a_or_area]) {
577     #
578     } else {
579     $self->{onerror}->(node => $attr,
580     type => 'link type:bad context:'.$word);
581     }
582 wakaba 1.1 } else { # rejected or synonym
583     $self->{onerror}->(node => $attr,
584     type => 'link type:non-conforming:'.$word);
585     }
586 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
587     if ($word eq 'alternate') {
588     #
589     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
590     $todo->{has_hyperlink_link_type} = 1;
591     }
592     }
593 wakaba 1.1 if ($def->{unique}) {
594     unless ($self->{has_link_type}->{$word}) {
595     $self->{has_link_type}->{$word} = 1;
596     } else {
597     $self->{onerror}->(node => $attr,
598     type => 'link type:duplicate:'.$word);
599     }
600     }
601     } else {
602     $self->{onerror}->(node => $attr, level => 'unsupported',
603     type => 'link type:'.$word);
604     }
605     }
606 wakaba 1.4 $todo->{has_hyperlink_link_type} = 1
607     if $word{alternate} and not $word{stylesheet};
608 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
609     ## says that using both X-Pingback: header field and HTML
610     ## <link rel=pingback> is deprecated and if both appears they
611     ## SHOULD contain exactly the same value.
612     ## ISSUE: Pingback 1.0 specification defines the exact representation
613     ## of its link element, which cannot be tested by the current arch.
614     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
615     ## include any string that matches to the pattern for the rel=pingback link,
616     ## which again inpossible to test.
617     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
618 wakaba 1.12
619     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
620 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
621     ## then they SHOULD be described in different paragraphs.".
622 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
623 wakaba 1.20
624     ## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status."
625 wakaba 1.1
626     ## URI (or IRI)
627     my $HTMLURIAttrChecker = sub {
628     my ($self, $attr) = @_;
629     ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
630     my $value = $attr->value;
631     Whatpm::URIChecker->check_iri_reference ($value, sub {
632     my %opt = @_;
633     $self->{onerror}->(node => $attr, level => $opt{level},
634     type => 'URI::'.$opt{type}.
635     (defined $opt{position} ? ':'.$opt{position} : ''));
636     });
637 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
638 wakaba 1.1 }; # $HTMLURIAttrChecker
639    
640     ## A space separated list of one or more URIs (or IRIs)
641     my $HTMLSpaceURIsAttrChecker = sub {
642     my ($self, $attr) = @_;
643     my $i = 0;
644     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
645     Whatpm::URIChecker->check_iri_reference ($value, sub {
646     my %opt = @_;
647     $self->{onerror}->(node => $attr, level => $opt{level},
648 wakaba 1.2 type => 'URIs:'.':'.
649     $opt{type}.':'.$i.
650 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
651     });
652     $i++;
653     }
654     ## ISSUE: Relative references?
655     ## ISSUE: Leading or trailing white spaces are conformant?
656     ## ISSUE: A sequence of white space characters are conformant?
657     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
658     ## NOTE: Duplication seems not an error.
659 wakaba 1.4 $self->{has_uri_attr} = 1;
660 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
661    
662     my $HTMLDatetimeAttrChecker = sub {
663     my ($self, $attr) = @_;
664     my $value = $attr->value;
665     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
666     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
667     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
668     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
669     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
670     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
671     if $d < 1 or
672     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
673     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
674     if $M == 2 and $d == 29 and
675     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
676     } else {
677     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
678     }
679     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
680     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
681     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
682     if defined $s and $s > 59;
683     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
684     if $zh > 23;
685     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
686     if $zm > 59;
687     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
688     } else {
689     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
690     }
691     }; # $HTMLDatetimeAttrChecker
692    
693     my $HTMLIntegerAttrChecker = sub {
694     my ($self, $attr) = @_;
695     my $value = $attr->value;
696     unless ($value =~ /\A-?[0-9]+\z/) {
697     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
698     }
699     }; # $HTMLIntegerAttrChecker
700    
701     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
702     my $range_check = shift;
703     return sub {
704     my ($self, $attr) = @_;
705     my $value = $attr->value;
706     if ($value =~ /\A[0-9]+\z/) {
707     unless ($range_check->($value + 0)) {
708     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
709     }
710     } else {
711     $self->{onerror}->(node => $attr,
712     type => 'nninteger:syntax error');
713     }
714     };
715     }; # $GetHTMLNonNegativeIntegerAttrChecker
716    
717     my $GetHTMLFloatingPointNumberAttrChecker = sub {
718     my $range_check = shift;
719     return sub {
720     my ($self, $attr) = @_;
721     my $value = $attr->value;
722     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
723     unless ($range_check->($value + 0)) {
724     $self->{onerror}->(node => $attr, type => 'float:out of range');
725     }
726     } else {
727     $self->{onerror}->(node => $attr,
728     type => 'float:syntax error');
729     }
730     };
731     }; # $GetHTMLFloatingPointNumberAttrChecker
732    
733     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
734     ## ISSUE: RFC 2046 does not define syntax of media types.
735     ## ISSUE: The definition of "a valid MIME type" is unknown.
736     ## Syntactical correctness?
737     my $HTMLIMTAttrChecker = sub {
738     my ($self, $attr) = @_;
739     my $value = $attr->value;
740     ## ISSUE: RFC 2045 Content-Type header field allows insertion
741     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
742     ## ISSUE: RFC 2231 extension? Maybe no.
743     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
744     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
745     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
746     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
747     my @type = ($1, $2);
748     my $param = $3;
749     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
750     if (defined $2) {
751     push @type, $1 => $2;
752     } else {
753     my $n = $1;
754     my $v = $2;
755     $v =~ s/\\(.)/$1/gs;
756     push @type, $n => $v;
757     }
758     }
759     require Whatpm::IMTChecker;
760     Whatpm::IMTChecker->check_imt (sub {
761     my %opt = @_;
762     $self->{onerror}->(node => $attr, level => $opt{level},
763     type => 'IMT:'.$opt{type});
764     }, @type);
765     } else {
766     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
767     }
768     }; # $HTMLIMTAttrChecker
769    
770     my $HTMLLanguageTagAttrChecker = sub {
771 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
772    
773 wakaba 1.1 my ($self, $attr) = @_;
774 wakaba 1.6 my $value = $attr->value;
775     require Whatpm::LangTag;
776     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
777     my %opt = @_;
778     my $type = 'LangTag:'.$opt{type};
779     $type .= ':' . $opt{subtag} if defined $opt{subtag};
780     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
781     level => $opt{level});
782     });
783 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
784 wakaba 1.6
785     ## TODO: testdata
786 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
787    
788     ## "A valid media query [MQ]"
789     my $HTMLMQAttrChecker = sub {
790     my ($self, $attr) = @_;
791     $self->{onerror}->(node => $attr, level => 'unsupported',
792     type => 'media query');
793     ## ISSUE: What is "a valid media query"?
794     }; # $HTMLMQAttrChecker
795    
796     my $HTMLEventHandlerAttrChecker = sub {
797     my ($self, $attr) = @_;
798     $self->{onerror}->(node => $attr, level => 'unsupported',
799     type => 'event handler');
800     ## TODO: MUST contain valid ECMAScript code matching the
801     ## ECMAScript |FunctionBody| production. [ECMA262]
802     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
803     ## ISSUE: Automatic semicolon insertion does not apply?
804     ## ISSUE: Other script languages?
805     }; # $HTMLEventHandlerAttrChecker
806    
807     my $HTMLUsemapAttrChecker = sub {
808     my ($self, $attr) = @_;
809     ## MUST be a valid hashed ID reference to a |map| element
810     my $value = $attr->value;
811     if ($value =~ s/^#//) {
812     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
813     push @{$self->{usemap}}, [$value => $attr];
814     } else {
815     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
816     }
817     ## NOTE: Space characters in hashed ID references are conforming.
818     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
819     }; # $HTMLUsemapAttrChecker
820    
821     my $HTMLTargetAttrChecker = sub {
822     my ($self, $attr) = @_;
823     my $value = $attr->value;
824     if ($value =~ /^_/) {
825     $value = lc $value; ## ISSUE: ASCII case-insentitive?
826     unless ({
827     _self => 1, _parent => 1, _top => 1,
828     }->{$value}) {
829     $self->{onerror}->(node => $attr,
830     type => 'reserved browsing context name');
831     }
832     } else {
833     #$ ISSUE: An empty string is conforming?
834     }
835     }; # $HTMLTargetAttrChecker
836    
837 wakaba 1.23 my $HTMLSelectorsAttrChecker = sub {
838     my ($self, $attr) = @_;
839    
840     ## ISSUE: Namespace resolution?
841    
842     my $value = $attr->value;
843    
844     require Whatpm::CSS::SelectorsParser;
845     my $p = Whatpm::CSS::SelectorsParser->new;
846     $p->{pseudo_class}->{$_} = 1 for qw/
847     active checked disabled empty enabled first-child first-of-type
848     focus hover indeterminate last-child last-of-type link only-child
849     only-of-type root target visited
850     lang nth-child nth-last-child nth-of-type nth-last-of-type not
851     -manakai-contains -manakai-current
852     /;
853    
854     $p->{pseudo_element}->{$_} = 1 for qw/
855     after before first-letter first-line
856     /;
857    
858     $p->{must_level} = $self->{must_level};
859     $p->{onerror} = sub {
860     my %opt = @_;
861     $opt{type} = 'selectors:'.$opt{type};
862     $self->{onerror}->(%opt, node => $attr);
863     };
864     $p->parse_string ($value);
865     }; # $HTMLSelectorsAttrChecker
866    
867 wakaba 1.1 my $HTMLAttrChecker = {
868     id => sub {
869     ## NOTE: |map| has its own variant of |id=""| checker
870     my ($self, $attr) = @_;
871     my $value = $attr->value;
872     if (length $value > 0) {
873     if ($self->{id}->{$value}) {
874     $self->{onerror}->(node => $attr, type => 'duplicate ID');
875     push @{$self->{id}->{$value}}, $attr;
876     } else {
877     $self->{id}->{$value} = [$attr];
878     }
879     if ($value =~ /[\x09-\x0D\x20]/) {
880     $self->{onerror}->(node => $attr, type => 'space in ID');
881     }
882     } else {
883     ## NOTE: MUST contain at least one character
884     $self->{onerror}->(node => $attr, type => 'empty attribute value');
885     }
886     },
887     title => sub {}, ## NOTE: No conformance creteria
888     lang => sub {
889     my ($self, $attr) = @_;
890 wakaba 1.6 my $value = $attr->value;
891     if ($value eq '') {
892     #
893     } else {
894     require Whatpm::LangTag;
895     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
896     my %opt = @_;
897     my $type = 'LangTag:'.$opt{type};
898     $type .= ':' . $opt{subtag} if defined $opt{subtag};
899     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
900     level => $opt{level});
901     });
902     }
903 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
904     unless ($attr->owner_document->manakai_is_html) {
905     $self->{onerror}->(node => $attr, type => 'in XML:lang');
906     }
907 wakaba 1.6
908     ## TODO: test data
909 wakaba 1.1 },
910     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
911     class => sub {
912     my ($self, $attr) = @_;
913     my %word;
914     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
915     unless ($word{$word}) {
916     $word{$word} = 1;
917     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
918     } else {
919     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
920     }
921     }
922     },
923     contextmenu => sub {
924     my ($self, $attr) = @_;
925     my $value = $attr->value;
926     push @{$self->{contextmenu}}, [$value => $attr];
927     ## ISSUE: "The value must be the ID of a menu element in the DOM."
928     ## What is "in the DOM"? A menu Element node that is not part
929     ## of the Document tree is in the DOM? A menu Element node that
930     ## belong to another Document tree is in the DOM?
931     },
932     irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
933 wakaba 1.8 tabindex => $HTMLIntegerAttrChecker
934     ## TODO: ref, template, registrationmark
935 wakaba 1.1 };
936    
937     for (qw/
938     onabort onbeforeunload onblur onchange onclick oncontextmenu
939     ondblclick ondrag ondragend ondragenter ondragleave ondragover
940     ondragstart ondrop onerror onfocus onkeydown onkeypress
941     onkeyup onload onmessage onmousedown onmousemove onmouseout
942     onmouseover onmouseup onmousewheel onresize onscroll onselect
943     onsubmit onunload
944     /) {
945     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
946     }
947    
948     my $GetHTMLAttrsChecker = sub {
949     my $element_specific_checker = shift;
950     return sub {
951     my ($self, $todo) = @_;
952     for my $attr (@{$todo->{node}->attributes}) {
953     my $attr_ns = $attr->namespace_uri;
954     $attr_ns = '' unless defined $attr_ns;
955     my $attr_ln = $attr->manakai_local_name;
956     my $checker;
957     if ($attr_ns eq '') {
958     $checker = $element_specific_checker->{$attr_ln}
959     || $HTMLAttrChecker->{$attr_ln};
960     }
961     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
962     || $AttrChecker->{$attr_ns}->{''};
963     if ($checker) {
964     $checker->($self, $attr, $todo);
965     } else {
966     $self->{onerror}->(node => $attr, level => 'unsupported',
967     type => 'attribute');
968     ## ISSUE: No comformance createria for unknown attributes in the spec
969     }
970     }
971     };
972     }; # $GetHTMLAttrsChecker
973    
974     our $Element;
975     our $ElementDefault;
976     our $AnyChecker;
977    
978     $Element->{$HTML_NS}->{''} = {
979     attrs_checker => $GetHTMLAttrsChecker->({}),
980     checker => $ElementDefault->{checker},
981     };
982    
983     $Element->{$HTML_NS}->{html} = {
984     is_root => 1,
985     attrs_checker => $GetHTMLAttrsChecker->({
986 wakaba 1.16 manifest => $HTMLURIAttrChecker,
987 wakaba 1.1 xmlns => sub {
988     my ($self, $attr) = @_;
989     my $value = $attr->value;
990     unless ($value eq $HTML_NS) {
991     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
992     }
993     unless ($attr->owner_document->manakai_is_html) {
994     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
995     ## TODO: Test
996     }
997     },
998     }),
999     checker => sub {
1000     my ($self, $todo) = @_;
1001     my $el = $todo->{node};
1002     my $new_todos = [];
1003     my @nodes = (@{$el->child_nodes});
1004    
1005     my $phase = 'before head';
1006     while (@nodes) {
1007     my $node = shift @nodes;
1008     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1009    
1010     my $nt = $node->node_type;
1011     if ($nt == 1) {
1012     my $node_ns = $node->namespace_uri;
1013     $node_ns = '' unless defined $node_ns;
1014     my $node_ln = $node->manakai_local_name;
1015     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
1016 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1017     #
1018     } elsif ($phase eq 'before head') {
1019 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'head') {
1020     $phase = 'after head';
1021     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'body') {
1022     $self->{onerror}->(node => $node, type => 'ps element missing:head');
1023     $phase = 'after body';
1024     } else {
1025     $not_allowed = 1;
1026     # before head
1027     }
1028     } elsif ($phase eq 'after head') {
1029     if ($node_ns eq $HTML_NS and $node_ln eq 'body') {
1030     $phase = 'after body';
1031     } else {
1032     $not_allowed = 1;
1033     # after head
1034     }
1035     } else { #elsif ($phase eq 'after body') {
1036     $not_allowed = 1;
1037     # after body
1038     }
1039     $self->{onerror}->(node => $node, type => 'element not allowed')
1040     if $not_allowed;
1041     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1042     unshift @nodes, @$sib;
1043     push @$new_todos, @$ch;
1044     } elsif ($nt == 3 or $nt == 4) {
1045     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1046     $self->{onerror}->(node => $node, type => 'character not allowed');
1047     }
1048     } elsif ($nt == 5) {
1049     unshift @nodes, @{$node->child_nodes};
1050     }
1051     }
1052    
1053     if ($phase eq 'before head') {
1054     $self->{onerror}->(node => $el, type => 'child element missing:head');
1055     $self->{onerror}->(node => $el, type => 'child element missing:body');
1056     } elsif ($phase eq 'after head') {
1057     $self->{onerror}->(node => $el, type => 'child element missing:body');
1058     }
1059    
1060     return ($new_todos);
1061     },
1062     };
1063    
1064     $Element->{$HTML_NS}->{head} = {
1065     attrs_checker => $GetHTMLAttrsChecker->({}),
1066     checker => sub {
1067     my ($self, $todo) = @_;
1068     my $el = $todo->{node};
1069     my $new_todos = [];
1070     my @nodes = (@{$el->child_nodes});
1071    
1072     my $has_title;
1073     my $phase = 'initial'; # 'after charset', 'after base'
1074     while (@nodes) {
1075     my $node = shift @nodes;
1076     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1077    
1078     my $nt = $node->node_type;
1079     if ($nt == 1) {
1080     my $node_ns = $node->namespace_uri;
1081     $node_ns = '' unless defined $node_ns;
1082     my $node_ln = $node->manakai_local_name;
1083     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
1084 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1085     #
1086     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'title') {
1087 wakaba 1.1 $phase = 'after base';
1088     unless ($has_title) {
1089     $has_title = 1;
1090     } else {
1091     $not_allowed = 1;
1092     }
1093     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'meta') {
1094     if ($node->has_attribute_ns (undef, 'charset')) {
1095     if ($phase eq 'initial') {
1096     $phase = 'after charset';
1097     } else {
1098     $not_allowed = 1;
1099     ## NOTE: See also |base|'s "contexts" field in the spec
1100     }
1101 wakaba 1.5 } elsif ($node->has_attribute_ns (undef, 'name') or
1102     $node->has_attribute_ns (undef, 'http-equiv')) {
1103     $phase = 'after base';
1104 wakaba 1.1 } else {
1105     $phase = 'after base';
1106 wakaba 1.5 $not_allowed = 1;
1107 wakaba 1.1 }
1108     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'base') {
1109     if ($phase eq 'initial' or $phase eq 'after charset') {
1110     $phase = 'after base';
1111     } else {
1112     $not_allowed = 1;
1113     }
1114     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'style') {
1115     $phase = 'after base';
1116     if ($node->has_attribute_ns (undef, 'scoped')) {
1117     $not_allowed = 1;
1118     }
1119     } elsif ($HTMLMetadataElements->{$node_ns}->{$node_ln}) {
1120     $phase = 'after base';
1121     } else {
1122     $not_allowed = 1;
1123     }
1124     $self->{onerror}->(node => $node, type => 'element not allowed')
1125     if $not_allowed;
1126 wakaba 1.3 local $todo->{flag}->{in_head} = 1;
1127 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1128     unshift @nodes, @$sib;
1129     push @$new_todos, @$ch;
1130     } elsif ($nt == 3 or $nt == 4) {
1131     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1132     $self->{onerror}->(node => $node, type => 'character not allowed');
1133     }
1134     } elsif ($nt == 5) {
1135     unshift @nodes, @{$node->child_nodes};
1136     }
1137     }
1138     unless ($has_title) {
1139     $self->{onerror}->(node => $el, type => 'child element missing:title');
1140     }
1141     return ($new_todos);
1142     },
1143     };
1144    
1145     $Element->{$HTML_NS}->{title} = {
1146     attrs_checker => $GetHTMLAttrsChecker->({}),
1147     checker => $HTMLTextChecker,
1148     };
1149    
1150     $Element->{$HTML_NS}->{base} = {
1151 wakaba 1.4 attrs_checker => sub {
1152     my ($self, $todo) = @_;
1153    
1154 wakaba 1.14 my $has_href = $todo->{node}->has_attribute_ns (undef, 'href');
1155     my $has_target = $todo->{node}->has_attribute_ns (undef, 'target');
1156    
1157     if ($self->{has_uri_attr} and $has_href) {
1158 wakaba 1.4 ## ISSUE: Are these examples conforming?
1159     ## <head profile="a b c"><base href> (except for |profile|'s
1160     ## non-conformance)
1161     ## <title xml:base="relative"/><base href/> (maybe it should be)
1162     ## <unknown xmlns="relative"/><base href/> (assuming that
1163     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1164     ## <?xml-stylesheet href="relative"?>...<base href=""/>
1165     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1166     ## <style>@import 'relative';</style><base href>
1167     ## <script>location.href = 'relative';</script><base href>
1168 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
1169     ## an exception.
1170 wakaba 1.4 $self->{onerror}->(node => $todo->{node},
1171     type => 'basehref after URI attribute');
1172     }
1173 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
1174 wakaba 1.4 ## ISSUE: Are these examples conforming?
1175     ## <head><title xlink:href=""/><base target="name"/></head>
1176     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1177     ## (assuming that |xbl:xbl| is allowed before |base|)
1178     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1179     ## <link href=""/><base target="name"/>
1180     ## <link rel=unknown href=""><base target=name>
1181     $self->{onerror}->(node => $todo->{node},
1182     type => 'basetarget after hyperlink');
1183     }
1184    
1185 wakaba 1.14 if (not $has_href and not $has_target) {
1186     $self->{onerror}->(node => $todo->{node},
1187     type => 'attribute missing:href|target');
1188     }
1189    
1190 wakaba 1.4 return $GetHTMLAttrsChecker->({
1191     href => $HTMLURIAttrChecker,
1192     target => $HTMLTargetAttrChecker,
1193     })->($self, $todo);
1194     },
1195 wakaba 1.1 checker => $HTMLEmptyChecker,
1196     };
1197    
1198     $Element->{$HTML_NS}->{link} = {
1199     attrs_checker => sub {
1200     my ($self, $todo) = @_;
1201     $GetHTMLAttrsChecker->({
1202     href => $HTMLURIAttrChecker,
1203 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(0, $todo, @_) },
1204 wakaba 1.1 media => $HTMLMQAttrChecker,
1205     hreflang => $HTMLLanguageTagAttrChecker,
1206     type => $HTMLIMTAttrChecker,
1207     ## NOTE: Though |title| has special semantics,
1208     ## syntactically same as the |title| as global attribute.
1209     })->($self, $todo);
1210 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
1211     $self->{has_hyperlink_element} = 1 if $todo->{has_hyperlink_link_type};
1212     } else {
1213 wakaba 1.1 $self->{onerror}->(node => $todo->{node},
1214     type => 'attribute missing:href');
1215     }
1216     unless ($todo->{node}->has_attribute_ns (undef, 'rel')) {
1217     $self->{onerror}->(node => $todo->{node},
1218     type => 'attribute missing:rel');
1219     }
1220     },
1221     checker => $HTMLEmptyChecker,
1222     };
1223    
1224     $Element->{$HTML_NS}->{meta} = {
1225     attrs_checker => sub {
1226     my ($self, $todo) = @_;
1227     my $name_attr;
1228     my $http_equiv_attr;
1229     my $charset_attr;
1230     my $content_attr;
1231     for my $attr (@{$todo->{node}->attributes}) {
1232     my $attr_ns = $attr->namespace_uri;
1233     $attr_ns = '' unless defined $attr_ns;
1234     my $attr_ln = $attr->manakai_local_name;
1235     my $checker;
1236     if ($attr_ns eq '') {
1237     if ($attr_ln eq 'content') {
1238     $content_attr = $attr;
1239     $checker = 1;
1240     } elsif ($attr_ln eq 'name') {
1241     $name_attr = $attr;
1242     $checker = 1;
1243     } elsif ($attr_ln eq 'http-equiv') {
1244     $http_equiv_attr = $attr;
1245     $checker = 1;
1246     } elsif ($attr_ln eq 'charset') {
1247     $charset_attr = $attr;
1248     $checker = 1;
1249     } else {
1250     $checker = $HTMLAttrChecker->{$attr_ln}
1251     || $AttrChecker->{$attr_ns}->{$attr_ln}
1252     || $AttrChecker->{$attr_ns}->{''};
1253     }
1254     } else {
1255     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1256     || $AttrChecker->{$attr_ns}->{''};
1257     }
1258     if ($checker) {
1259     $checker->($self, $attr) if ref $checker;
1260     } else {
1261     $self->{onerror}->(node => $attr, level => 'unsupported',
1262     type => 'attribute');
1263     ## ISSUE: No comformance createria for unknown attributes in the spec
1264     }
1265     }
1266    
1267     if (defined $name_attr) {
1268     if (defined $http_equiv_attr) {
1269     $self->{onerror}->(node => $http_equiv_attr,
1270     type => 'attribute not allowed');
1271     } elsif (defined $charset_attr) {
1272     $self->{onerror}->(node => $charset_attr,
1273     type => 'attribute not allowed');
1274     }
1275     my $metadata_name = $name_attr->value;
1276     my $metadata_value;
1277     if (defined $content_attr) {
1278     $metadata_value = $content_attr->value;
1279     } else {
1280     $self->{onerror}->(node => $todo->{node},
1281     type => 'attribute missing:content');
1282     $metadata_value = '';
1283     }
1284     } elsif (defined $http_equiv_attr) {
1285     if (defined $charset_attr) {
1286     $self->{onerror}->(node => $charset_attr,
1287     type => 'attribute not allowed');
1288     }
1289     unless (defined $content_attr) {
1290     $self->{onerror}->(node => $todo->{node},
1291     type => 'attribute missing:content');
1292     }
1293     } elsif (defined $charset_attr) {
1294     if (defined $content_attr) {
1295     $self->{onerror}->(node => $content_attr,
1296     type => 'attribute not allowed');
1297     }
1298     } else {
1299     if (defined $content_attr) {
1300     $self->{onerror}->(node => $content_attr,
1301     type => 'attribute not allowed');
1302     $self->{onerror}->(node => $todo->{node},
1303     type => 'attribute missing:name|http-equiv');
1304     } else {
1305     $self->{onerror}->(node => $todo->{node},
1306     type => 'attribute missing:name|http-equiv|charset');
1307     }
1308     }
1309    
1310     ## TODO: metadata conformance
1311    
1312     ## TODO: pragma conformance
1313     if (defined $http_equiv_attr) { ## An enumerated attribute
1314     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1315     if ({
1316     'refresh' => 1,
1317     'default-style' => 1,
1318     }->{$keyword}) {
1319     #
1320 wakaba 1.19 } elsif ($keyword eq 'content-type') {
1321     $self->{onerror}
1322     ->(node => $http_equiv_attr,
1323     type => 'enumerated:invalid:http-equiv:content-type');
1324 wakaba 1.1 } else {
1325     $self->{onerror}->(node => $http_equiv_attr,
1326     type => 'enumerated:invalid');
1327     }
1328     }
1329    
1330     if (defined $charset_attr) {
1331     unless ($todo->{node}->owner_document->manakai_is_html) {
1332     $self->{onerror}->(node => $charset_attr,
1333     type => 'in XML:charset');
1334     }
1335 wakaba 1.21
1336     my $charset_value = $charset_attr->value;
1337     ## NOTE: Though the case-sensitivility of |charset| attribute value
1338     ## is not explicitly spelled in the HTML5 spec, the Character Set
1339     ## registry of IANA, which is referenced from HTML5 spec, says that
1340     ## charset name is case-insensitive.
1341     $charset_value =~ tr/A-Z/a-z/; ## NOTE: ASCII Case-insensitive.
1342    
1343     require Message::Charset::Info;
1344     my $charset = $Message::Charset::Info::IANACharset->{$charset_value};
1345     my $ic = $todo->{node}->owner_document->input_encoding;
1346     if (defined $ic) {
1347     ## TODO: Test for this case
1348     my $ic_charset = $Message::Charset::Info::IANACharset->{$ic};
1349     if ($charset ne $ic_charset) {
1350     $self->{onerror}->(node => $charset_attr,
1351     type => 'mismatched charset name:'.$ic.
1352     ':'.$charset_value,
1353     level => 'm');
1354     }
1355     } else {
1356     ## NOTE: MUST, but not checkable, since the document is not originally
1357     ## in serialized form (or the parser does not preserve the input
1358     ## encoding information).
1359     $self->{onerror}->(node => $charset_attr,
1360     type => 'mismatched charset name::'.$charset_value,
1361     level => 'unsupported');
1362     }
1363    
1364     ## ISSUE: What is "valid character encoding name"? Syntactically valid?
1365     ## Syntactically valid and registered? What about x-charset names?
1366     unless (Message::Charset::Info::is_syntactically_valid_iana_charset_name
1367     ($charset_value)) {
1368     $self->{onerror}->(node => $charset_attr,
1369     type => 'charset:syntax error:'.$charset_value,
1370     level => 'm');
1371     }
1372    
1373     if ($charset) {
1374     ## ISSUE: What is "the preferred name for that encoding" (for a charset
1375     ## with no "preferred MIME name" label)?
1376     my $charset_status = $charset->{iana_names}->{$charset_value} || 0;
1377     if (($charset_status &
1378     Message::Charset::Info::PREFERRED_CHARSET_NAME ())
1379     != Message::Charset::Info::PREFERRED_CHARSET_NAME ()) {
1380     $self->{onerror}->(node => $charset_attr,
1381     type => 'charset:not preferred:'.
1382     $charset_value,
1383     level => 'm');
1384     }
1385     if (($charset_status &
1386     Message::Charset::Info::REGISTERED_CHARSET_NAME ())
1387     != Message::Charset::Info::REGISTERED_CHARSET_NAME ()) {
1388     if ($charset_value =~ /^x-/) {
1389     $self->{onerror}->(node => $charset_attr,
1390     type => 'charset:private:'.$charset_value,
1391     level => $self->{good_level});
1392     } else {
1393     $self->{onerror}->(node => $charset_attr,
1394     type => 'charset:not registered:'.
1395     $charset_value,
1396     level => $self->{good_level});
1397     }
1398     }
1399     } elsif ($charset_value =~ /^x-/) {
1400     $self->{onerror}->(node => $charset_attr,
1401     type => 'charset:private:'.$charset_value,
1402     level => $self->{good_level});
1403     } else {
1404     $self->{onerror}->(node => $charset_attr,
1405     type => 'charset:not registered:'.$charset_value,
1406     level => $self->{good_level});
1407     }
1408    
1409 wakaba 1.22 if ($charset_attr->get_user_data ('manakai_has_reference')) {
1410     $self->{onerror}->(node => $charset_attr,
1411     type => 'character reference in charset',
1412     level => $self->{must_level});
1413     }
1414 wakaba 1.1 }
1415     },
1416     checker => $HTMLEmptyChecker,
1417     };
1418    
1419     $Element->{$HTML_NS}->{style} = {
1420     attrs_checker => $GetHTMLAttrsChecker->({
1421     type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1422     media => $HTMLMQAttrChecker,
1423     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1424     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1425     ## not different
1426     }),
1427     checker => sub {
1428     ## NOTE: |html:style| has no conformance creteria on content model
1429     my ($self, $todo) = @_;
1430     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
1431     $type = 'text/css' unless defined $type;
1432     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
1433     type => 'style:'.$type); ## TODO: $type normalization
1434     return $AnyChecker->($self, $todo);
1435     },
1436     };
1437    
1438     $Element->{$HTML_NS}->{body} = {
1439     attrs_checker => $GetHTMLAttrsChecker->({}),
1440     checker => $HTMLBlockChecker,
1441     };
1442    
1443     $Element->{$HTML_NS}->{section} = {
1444     attrs_checker => $GetHTMLAttrsChecker->({}),
1445     checker => $HTMLStylableBlockChecker,
1446     };
1447    
1448     $Element->{$HTML_NS}->{nav} = {
1449     attrs_checker => $GetHTMLAttrsChecker->({}),
1450     checker => $HTMLBlockOrInlineChecker,
1451     };
1452    
1453     $Element->{$HTML_NS}->{article} = {
1454     attrs_checker => $GetHTMLAttrsChecker->({}),
1455     checker => $HTMLStylableBlockChecker,
1456     };
1457    
1458     $Element->{$HTML_NS}->{blockquote} = {
1459     attrs_checker => $GetHTMLAttrsChecker->({
1460     cite => $HTMLURIAttrChecker,
1461     }),
1462     checker => $HTMLBlockChecker,
1463     };
1464    
1465     $Element->{$HTML_NS}->{aside} = {
1466     attrs_checker => $GetHTMLAttrsChecker->({}),
1467     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
1468     };
1469    
1470     $Element->{$HTML_NS}->{h1} = {
1471     attrs_checker => $GetHTMLAttrsChecker->({}),
1472     checker => sub {
1473     my ($self, $todo) = @_;
1474 wakaba 1.24 $todo->{flag}->{has_descendant}->{hn} = 1;
1475 wakaba 1.13 return $HTMLStrictlyInlineChecker->($self, $todo);
1476 wakaba 1.1 },
1477     };
1478    
1479     $Element->{$HTML_NS}->{h2} = {
1480     attrs_checker => $GetHTMLAttrsChecker->({}),
1481     checker => $Element->{$HTML_NS}->{h1}->{checker},
1482     };
1483    
1484     $Element->{$HTML_NS}->{h3} = {
1485     attrs_checker => $GetHTMLAttrsChecker->({}),
1486     checker => $Element->{$HTML_NS}->{h1}->{checker},
1487     };
1488    
1489     $Element->{$HTML_NS}->{h4} = {
1490     attrs_checker => $GetHTMLAttrsChecker->({}),
1491     checker => $Element->{$HTML_NS}->{h1}->{checker},
1492     };
1493    
1494     $Element->{$HTML_NS}->{h5} = {
1495     attrs_checker => $GetHTMLAttrsChecker->({}),
1496     checker => $Element->{$HTML_NS}->{h1}->{checker},
1497     };
1498    
1499     $Element->{$HTML_NS}->{h6} = {
1500     attrs_checker => $GetHTMLAttrsChecker->({}),
1501     checker => $Element->{$HTML_NS}->{h1}->{checker},
1502     };
1503    
1504     $Element->{$HTML_NS}->{header} = {
1505     attrs_checker => $GetHTMLAttrsChecker->({}),
1506     checker => sub {
1507     my ($self, $todo) = @_;
1508 wakaba 1.24
1509     my $old_flags = {hn => $todo->{flag}->{has_descendant}->{hn}};
1510     $todo->{flag}->{has_descendant}->{hn} = 0;
1511 wakaba 1.1
1512     my $end = $self->_add_minuses
1513     ({$HTML_NS => {qw/header 1 footer 1/}},
1514     $HTMLSectioningElements);
1515     my ($new_todos, $ch) = $HTMLBlockChecker->($self, $todo);
1516 wakaba 1.24 push @$new_todos, $end,
1517     {type => 'descendant', node => $todo->{node},
1518     flag => $todo->{flag}, old_values => $old_flags,
1519     errors => {
1520     hn => sub {
1521     my ($self, $todo) = @_;
1522     $self->{onerror}->(node => $todo->{node},
1523     type => 'element missing:hn');
1524     },
1525 wakaba 1.1 }};
1526     return ($new_todos, $ch);
1527 wakaba 1.24
1528     ## ISSUE: <header><del><h1>...</h1></del></header> is conforming?
1529 wakaba 1.1 },
1530     };
1531    
1532     $Element->{$HTML_NS}->{footer} = {
1533     attrs_checker => $GetHTMLAttrsChecker->({}),
1534     checker => sub { ## block -hn -header -footer -sectioning or inline
1535     my ($self, $todo) = @_;
1536     my $el = $todo->{node};
1537     my $new_todos = [];
1538     my @nodes = (@{$el->child_nodes});
1539    
1540     my $content = 'block-or-inline'; # or 'block' or 'inline'
1541     my @block_not_inline;
1542     while (@nodes) {
1543     my $node = shift @nodes;
1544     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1545    
1546     my $nt = $node->node_type;
1547     if ($nt == 1) {
1548     my $node_ns = $node->namespace_uri;
1549     $node_ns = '' unless defined $node_ns;
1550     my $node_ln = $node->manakai_local_name;
1551     my $not_allowed;
1552     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
1553     $not_allowed = 1;
1554     } elsif ($node_ns eq $HTML_NS and
1555     {
1556     qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1/
1557     }->{$node_ln}) {
1558     $not_allowed = 1;
1559     } elsif ($HTMLSectioningElements->{$node_ns}->{$node_ln}) {
1560     $not_allowed = 1;
1561     }
1562     if ($content eq 'block') {
1563     $not_allowed = 1
1564 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
1565     $self->{pluses}->{$node_ns}->{$node_ln};
1566 wakaba 1.1 } elsif ($content eq 'inline') {
1567     $not_allowed = 1
1568     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
1569 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
1570     $self->{pluses}->{$node_ns}->{$node_ln};
1571 wakaba 1.1 } else {
1572     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1573     my $is_inline
1574     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
1575     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1576    
1577     push @block_not_inline, $node
1578     if $is_block and not $is_inline and not $not_allowed;
1579 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
1580 wakaba 1.1 $content = 'inline';
1581     for (@block_not_inline) {
1582     $self->{onerror}->(node => $_, type => 'element not allowed');
1583     }
1584     $not_allowed = 1 unless $is_inline;
1585     }
1586     }
1587     $self->{onerror}->(node => $node, type => 'element not allowed')
1588     if $not_allowed;
1589     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1590     unshift @nodes, @$sib;
1591     push @$new_todos, @$ch;
1592     } elsif ($nt == 3 or $nt == 4) {
1593     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1594     if ($content eq 'block') {
1595     $self->{onerror}->(node => $node, type => 'character not allowed');
1596     } else {
1597     $content = 'inline';
1598     for (@block_not_inline) {
1599     $self->{onerror}->(node => $_, type => 'element not allowed');
1600     }
1601     }
1602     }
1603     } elsif ($nt == 5) {
1604     unshift @nodes, @{$node->child_nodes};
1605     }
1606     }
1607    
1608     my $end = $self->_add_minuses
1609     ({$HTML_NS => {qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1/}},
1610     $HTMLSectioningElements);
1611     push @$new_todos, $end;
1612    
1613     if ($content eq 'inline') {
1614     for (@$new_todos) {
1615     $_->{inline} = 1;
1616     }
1617     }
1618    
1619     return ($new_todos);
1620     },
1621     };
1622    
1623     $Element->{$HTML_NS}->{address} = {
1624     attrs_checker => $GetHTMLAttrsChecker->({}),
1625     checker => $HTMLInlineChecker,
1626     };
1627    
1628     $Element->{$HTML_NS}->{p} = {
1629     attrs_checker => $GetHTMLAttrsChecker->({}),
1630 wakaba 1.13 checker => $HTMLInlineChecker,
1631 wakaba 1.1 };
1632    
1633     $Element->{$HTML_NS}->{hr} = {
1634     attrs_checker => $GetHTMLAttrsChecker->({}),
1635     checker => $HTMLEmptyChecker,
1636     };
1637    
1638     $Element->{$HTML_NS}->{br} = {
1639     attrs_checker => $GetHTMLAttrsChecker->({}),
1640     checker => $HTMLEmptyChecker,
1641     };
1642    
1643     $Element->{$HTML_NS}->{dialog} = {
1644     attrs_checker => $GetHTMLAttrsChecker->({}),
1645     checker => sub {
1646     my ($self, $todo) = @_;
1647     my $el = $todo->{node};
1648     my $new_todos = [];
1649     my @nodes = (@{$el->child_nodes});
1650    
1651     my $phase = 'before dt';
1652     while (@nodes) {
1653     my $node = shift @nodes;
1654     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1655    
1656     my $nt = $node->node_type;
1657     if ($nt == 1) {
1658     my $node_ns = $node->namespace_uri;
1659     $node_ns = '' unless defined $node_ns;
1660     my $node_ln = $node->manakai_local_name;
1661     ## NOTE: |minuses| list is not checked since redundant
1662 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1663     #
1664     } elsif ($phase eq 'before dt') {
1665 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1666     $phase = 'before dd';
1667     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1668     $self->{onerror}
1669     ->(node => $node, type => 'ps element missing:dt');
1670     $phase = 'before dt';
1671     } else {
1672     $self->{onerror}->(node => $node, type => 'element not allowed');
1673     }
1674     } else { # before dd
1675     if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1676     $phase = 'before dt';
1677     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1678     $self->{onerror}
1679     ->(node => $node, type => 'ps element missing:dd');
1680     $phase = 'before dd';
1681     } else {
1682     $self->{onerror}->(node => $node, type => 'element not allowed');
1683     }
1684     }
1685     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1686     unshift @nodes, @$sib;
1687     push @$new_todos, @$ch;
1688     } elsif ($nt == 3 or $nt == 4) {
1689     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1690     $self->{onerror}->(node => $node, type => 'character not allowed');
1691     }
1692     } elsif ($nt == 5) {
1693     unshift @nodes, @{$node->child_nodes};
1694     }
1695     }
1696     if ($phase eq 'before dd') {
1697 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
1698 wakaba 1.1 }
1699     return ($new_todos);
1700     },
1701     };
1702    
1703     $Element->{$HTML_NS}->{pre} = {
1704     attrs_checker => $GetHTMLAttrsChecker->({}),
1705     checker => $HTMLStrictlyInlineChecker,
1706     };
1707    
1708     $Element->{$HTML_NS}->{ol} = {
1709     attrs_checker => $GetHTMLAttrsChecker->({
1710     start => $HTMLIntegerAttrChecker,
1711     }),
1712     checker => sub {
1713     my ($self, $todo) = @_;
1714     my $el = $todo->{node};
1715     my $new_todos = [];
1716     my @nodes = (@{$el->child_nodes});
1717    
1718     while (@nodes) {
1719     my $node = shift @nodes;
1720     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1721    
1722     my $nt = $node->node_type;
1723     if ($nt == 1) {
1724     my $node_ns = $node->namespace_uri;
1725     $node_ns = '' unless defined $node_ns;
1726     my $node_ln = $node->manakai_local_name;
1727     ## NOTE: |minuses| list is not checked since redundant
1728 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1729     #
1730     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'li')) {
1731 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
1732     }
1733     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1734     unshift @nodes, @$sib;
1735     push @$new_todos, @$ch;
1736     } elsif ($nt == 3 or $nt == 4) {
1737     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1738     $self->{onerror}->(node => $node, type => 'character not allowed');
1739     }
1740     } elsif ($nt == 5) {
1741     unshift @nodes, @{$node->child_nodes};
1742     }
1743     }
1744    
1745     if ($todo->{inline}) {
1746     for (@$new_todos) {
1747     $_->{inline} = 1;
1748     }
1749     }
1750     return ($new_todos);
1751     },
1752     };
1753    
1754     $Element->{$HTML_NS}->{ul} = {
1755     attrs_checker => $GetHTMLAttrsChecker->({}),
1756     checker => $Element->{$HTML_NS}->{ol}->{checker},
1757     };
1758    
1759     $Element->{$HTML_NS}->{li} = {
1760     attrs_checker => $GetHTMLAttrsChecker->({
1761     start => sub {
1762     my ($self, $attr) = @_;
1763     my $parent = $attr->owner_element->manakai_parent_element;
1764     if (defined $parent) {
1765     my $parent_ns = $parent->namespace_uri;
1766     $parent_ns = '' unless defined $parent_ns;
1767     my $parent_ln = $parent->manakai_local_name;
1768     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1769     $self->{onerror}->(node => $attr, level => 'unsupported',
1770     type => 'attribute');
1771     }
1772     }
1773     $HTMLIntegerAttrChecker->($self, $attr);
1774     },
1775     }),
1776     checker => sub {
1777     my ($self, $todo) = @_;
1778     if ($todo->{inline}) {
1779     return $HTMLInlineChecker->($self, $todo);
1780     } else {
1781     return $HTMLBlockOrInlineChecker->($self, $todo);
1782     }
1783     },
1784     };
1785    
1786     $Element->{$HTML_NS}->{dl} = {
1787     attrs_checker => $GetHTMLAttrsChecker->({}),
1788     checker => sub {
1789     my ($self, $todo) = @_;
1790     my $el = $todo->{node};
1791     my $new_todos = [];
1792     my @nodes = (@{$el->child_nodes});
1793    
1794     my $phase = 'before dt';
1795     while (@nodes) {
1796     my $node = shift @nodes;
1797     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1798    
1799     my $nt = $node->node_type;
1800     if ($nt == 1) {
1801     my $node_ns = $node->namespace_uri;
1802     $node_ns = '' unless defined $node_ns;
1803     my $node_ln = $node->manakai_local_name;
1804     ## NOTE: |minuses| list is not checked since redundant
1805 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1806     #
1807     } elsif ($phase eq 'in dds') {
1808 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1809     #$phase = 'in dds';
1810     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1811     $phase = 'in dts';
1812     } else {
1813     $self->{onerror}->(node => $node, type => 'element not allowed');
1814     }
1815     } elsif ($phase eq 'in dts') {
1816     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1817     #$phase = 'in dts';
1818     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1819     $phase = 'in dds';
1820     } else {
1821     $self->{onerror}->(node => $node, type => 'element not allowed');
1822     }
1823     } else { # before dt
1824     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1825     $phase = 'in dts';
1826     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1827     $self->{onerror}
1828     ->(node => $node, type => 'ps element missing:dt');
1829     $phase = 'in dds';
1830     } else {
1831     $self->{onerror}->(node => $node, type => 'element not allowed');
1832     }
1833     }
1834     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1835     unshift @nodes, @$sib;
1836     push @$new_todos, @$ch;
1837     } elsif ($nt == 3 or $nt == 4) {
1838     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1839     $self->{onerror}->(node => $node, type => 'character not allowed');
1840     }
1841     } elsif ($nt == 5) {
1842     unshift @nodes, @{$node->child_nodes};
1843     }
1844     }
1845     if ($phase eq 'in dts') {
1846 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
1847 wakaba 1.1 }
1848    
1849     if ($todo->{inline}) {
1850     for (@$new_todos) {
1851     $_->{inline} = 1;
1852     }
1853     }
1854     return ($new_todos);
1855     },
1856     };
1857    
1858     $Element->{$HTML_NS}->{dt} = {
1859     attrs_checker => $GetHTMLAttrsChecker->({}),
1860     checker => $HTMLStrictlyInlineChecker,
1861     };
1862    
1863     $Element->{$HTML_NS}->{dd} = {
1864     attrs_checker => $GetHTMLAttrsChecker->({}),
1865     checker => $Element->{$HTML_NS}->{li}->{checker},
1866     };
1867    
1868     $Element->{$HTML_NS}->{a} = {
1869     attrs_checker => sub {
1870     my ($self, $todo) = @_;
1871     my %attr;
1872     for my $attr (@{$todo->{node}->attributes}) {
1873     my $attr_ns = $attr->namespace_uri;
1874     $attr_ns = '' unless defined $attr_ns;
1875     my $attr_ln = $attr->manakai_local_name;
1876     my $checker;
1877     if ($attr_ns eq '') {
1878     $checker = {
1879     target => $HTMLTargetAttrChecker,
1880     href => $HTMLURIAttrChecker,
1881     ping => $HTMLSpaceURIsAttrChecker,
1882 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
1883 wakaba 1.1 media => $HTMLMQAttrChecker,
1884     hreflang => $HTMLLanguageTagAttrChecker,
1885     type => $HTMLIMTAttrChecker,
1886     }->{$attr_ln};
1887     if ($checker) {
1888     $attr{$attr_ln} = $attr;
1889     } else {
1890     $checker = $HTMLAttrChecker->{$attr_ln};
1891     }
1892     }
1893     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1894     || $AttrChecker->{$attr_ns}->{''};
1895     if ($checker) {
1896     $checker->($self, $attr) if ref $checker;
1897     } else {
1898     $self->{onerror}->(node => $attr, level => 'unsupported',
1899     type => 'attribute');
1900     ## ISSUE: No comformance createria for unknown attributes in the spec
1901     }
1902     }
1903    
1904 wakaba 1.4 if (defined $attr{href}) {
1905     $self->{has_hyperlink_element} = 1;
1906     } else {
1907 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
1908     if (defined $attr{$_}) {
1909     $self->{onerror}->(node => $attr{$_},
1910     type => 'attribute not allowed');
1911     }
1912     }
1913     }
1914     },
1915     checker => sub {
1916     my ($self, $todo) = @_;
1917    
1918     my $end = $self->_add_minuses ($HTMLInteractiveElements);
1919     my ($new_todos, $ch)
1920 wakaba 1.13 = $HTMLInlineOrStrictlyInlineChecker->($self, $todo);
1921 wakaba 1.1 push @$new_todos, $end;
1922    
1923 wakaba 1.15 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
1924     $_->{flag}->{in_a_href} = 1 for @$new_todos;
1925     }
1926 wakaba 1.1
1927     return ($new_todos, $ch);
1928     },
1929     };
1930    
1931     $Element->{$HTML_NS}->{q} = {
1932     attrs_checker => $GetHTMLAttrsChecker->({
1933     cite => $HTMLURIAttrChecker,
1934     }),
1935     checker => $HTMLInlineOrStrictlyInlineChecker,
1936     };
1937    
1938     $Element->{$HTML_NS}->{cite} = {
1939     attrs_checker => $GetHTMLAttrsChecker->({}),
1940     checker => $HTMLStrictlyInlineChecker,
1941     };
1942    
1943     $Element->{$HTML_NS}->{em} = {
1944     attrs_checker => $GetHTMLAttrsChecker->({}),
1945     checker => $HTMLInlineOrStrictlyInlineChecker,
1946     };
1947    
1948     $Element->{$HTML_NS}->{strong} = {
1949     attrs_checker => $GetHTMLAttrsChecker->({}),
1950     checker => $HTMLInlineOrStrictlyInlineChecker,
1951     };
1952    
1953     $Element->{$HTML_NS}->{small} = {
1954     attrs_checker => $GetHTMLAttrsChecker->({}),
1955     checker => $HTMLInlineOrStrictlyInlineChecker,
1956     };
1957    
1958     $Element->{$HTML_NS}->{m} = {
1959     attrs_checker => $GetHTMLAttrsChecker->({}),
1960     checker => $HTMLInlineOrStrictlyInlineChecker,
1961     };
1962    
1963     $Element->{$HTML_NS}->{dfn} = {
1964     attrs_checker => $GetHTMLAttrsChecker->({}),
1965     checker => sub {
1966     my ($self, $todo) = @_;
1967    
1968     my $end = $self->_add_minuses ({$HTML_NS => {dfn => 1}});
1969     my ($sib, $ch) = $HTMLStrictlyInlineChecker->($self, $todo);
1970     push @$sib, $end;
1971    
1972     my $node = $todo->{node};
1973     my $term = $node->get_attribute_ns (undef, 'title');
1974     unless (defined $term) {
1975     for my $child (@{$node->child_nodes}) {
1976     if ($child->node_type == 1) { # ELEMENT_NODE
1977     if (defined $term) {
1978     undef $term;
1979     last;
1980     } elsif ($child->manakai_local_name eq 'abbr') {
1981     my $nsuri = $child->namespace_uri;
1982     if (defined $nsuri and $nsuri eq $HTML_NS) {
1983     my $attr = $child->get_attribute_node_ns (undef, 'title');
1984     if ($attr) {
1985     $term = $attr->value;
1986     }
1987     }
1988     }
1989     } elsif ($child->node_type == 3 or $child->node_type == 4) {
1990     ## TEXT_NODE or CDATA_SECTION_NODE
1991     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
1992     next;
1993     }
1994     undef $term;
1995     last;
1996     }
1997     }
1998     unless (defined $term) {
1999     $term = $node->text_content;
2000     }
2001     }
2002     if ($self->{term}->{$term}) {
2003     $self->{onerror}->(node => $node, type => 'duplicate term');
2004     push @{$self->{term}->{$term}}, $node;
2005     } else {
2006     $self->{term}->{$term} = [$node];
2007     }
2008     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
2009     ## has |title|.
2010    
2011     return ($sib, $ch);
2012     },
2013     };
2014    
2015     $Element->{$HTML_NS}->{abbr} = {
2016     attrs_checker => $GetHTMLAttrsChecker->({
2017     ## NOTE: |title| has special semantics for |abbr|s, but is syntactically
2018     ## not different. The spec says that the |title| MAY be omitted
2019     ## if there is a |dfn| whose defining term is the abbreviation,
2020     ## but it does not prohibit |abbr| w/o |title| in other cases.
2021     }),
2022     checker => $HTMLStrictlyInlineChecker,
2023     };
2024    
2025     $Element->{$HTML_NS}->{time} = {
2026     attrs_checker => $GetHTMLAttrsChecker->({
2027     datetime => sub { 1 }, # checked in |checker|
2028     }),
2029     ## TODO: Write tests
2030     checker => sub {
2031     my ($self, $todo) = @_;
2032    
2033     my $attr = $todo->{node}->get_attribute_node_ns (undef, 'datetime');
2034     my $input;
2035     my $reg_sp;
2036     my $input_node;
2037     if ($attr) {
2038     $input = $attr->value;
2039     $reg_sp = qr/[\x09-\x0D\x20]*/;
2040     $input_node = $attr;
2041     } else {
2042     $input = $todo->{node}->text_content;
2043     $reg_sp = qr/\p{Zs}*/;
2044     $input_node = $todo->{node};
2045    
2046     ## ISSUE: What is the definition for "successfully extracts a date
2047     ## or time"? If the algorithm says the string is invalid but
2048     ## return some date or time, is it "successfully"?
2049     }
2050    
2051     my $hour;
2052     my $minute;
2053     my $second;
2054     if ($input =~ /
2055     \A
2056     [\x09-\x0D\x20]*
2057     ([0-9]+) # 1
2058     (?>
2059     -([0-9]+) # 2
2060     -([0-9]+) # 3
2061     [\x09-\x0D\x20]*
2062     (?>
2063     T
2064     [\x09-\x0D\x20]*
2065     )?
2066     ([0-9]+) # 4
2067     :([0-9]+) # 5
2068     (?>
2069     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
2070     )?
2071     [\x09-\x0D\x20]*
2072     (?>
2073     Z
2074     [\x09-\x0D\x20]*
2075     |
2076     [+-]([0-9]+):([0-9]+) # 7, 8
2077     [\x09-\x0D\x20]*
2078     )?
2079     \z
2080     |
2081     :([0-9]+) # 9
2082     (?>
2083     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
2084     )?
2085     [\x09-\x0D\x20]*\z
2086     )
2087     /x) {
2088     if (defined $2) { ## YYYY-MM-DD T? hh:mm
2089     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
2090     length $4 != 2 or length $5 != 2) {
2091     $self->{onerror}->(node => $input_node,
2092     type => 'dateortime:syntax error');
2093     }
2094    
2095     if (1 <= $2 and $2 <= 12) {
2096     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
2097     if $3 < 1 or
2098     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
2099     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
2100     if $2 == 2 and $3 == 29 and
2101     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
2102     } else {
2103     $self->{onerror}->(node => $input_node,
2104     type => 'datetime:bad month');
2105     }
2106    
2107     ($hour, $minute, $second) = ($4, $5, $6);
2108    
2109     if (defined $7) { ## [+-]hh:mm
2110     if (length $7 != 2 or length $8 != 2) {
2111     $self->{onerror}->(node => $input_node,
2112     type => 'dateortime:syntax error');
2113     }
2114    
2115     $self->{onerror}->(node => $input_node,
2116     type => 'datetime:bad timezone hour')
2117     if $7 > 23;
2118     $self->{onerror}->(node => $input_node,
2119     type => 'datetime:bad timezone minute')
2120     if $8 > 59;
2121     }
2122     } else { ## hh:mm
2123     if (length $1 != 2 or length $9 != 2) {
2124     $self->{onerror}->(node => $input_node,
2125     type => qq'dateortime:syntax error');
2126     }
2127    
2128     ($hour, $minute, $second) = ($1, $9, $10);
2129     }
2130    
2131     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
2132     if $hour > 23;
2133     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
2134     if $minute > 59;
2135    
2136     if (defined $second) { ## s
2137     ## NOTE: Integer part of second don't have to have length of two.
2138    
2139     if (substr ($second, 0, 1) eq '.') {
2140     $self->{onerror}->(node => $input_node,
2141     type => 'dateortime:syntax error');
2142     }
2143    
2144     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
2145     if $second >= 60;
2146     }
2147     } else {
2148     $self->{onerror}->(node => $input_node,
2149     type => 'dateortime:syntax error');
2150     }
2151    
2152     return $HTMLStrictlyInlineChecker->($self, $todo);
2153     },
2154     };
2155    
2156     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
2157     attrs_checker => $GetHTMLAttrsChecker->({
2158     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2159     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2160     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2161     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2162     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2163     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2164     }),
2165     checker => $HTMLStrictlyInlineChecker,
2166     };
2167    
2168     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
2169     attrs_checker => $GetHTMLAttrsChecker->({
2170     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
2171     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
2172     }),
2173     checker => $HTMLStrictlyInlineChecker,
2174     };
2175    
2176     $Element->{$HTML_NS}->{code} = {
2177     attrs_checker => $GetHTMLAttrsChecker->({}),
2178     ## NOTE: Though |title| has special semantics,
2179     ## syntatically same as the |title| as global attribute.
2180     checker => $HTMLInlineOrStrictlyInlineChecker,
2181     };
2182    
2183     $Element->{$HTML_NS}->{var} = {
2184     attrs_checker => $GetHTMLAttrsChecker->({}),
2185     ## NOTE: Though |title| has special semantics,
2186     ## syntatically same as the |title| as global attribute.
2187     checker => $HTMLStrictlyInlineChecker,
2188     };
2189    
2190     $Element->{$HTML_NS}->{samp} = {
2191     attrs_checker => $GetHTMLAttrsChecker->({}),
2192     ## NOTE: Though |title| has special semantics,
2193     ## syntatically same as the |title| as global attribute.
2194     checker => $HTMLInlineOrStrictlyInlineChecker,
2195     };
2196    
2197     $Element->{$HTML_NS}->{kbd} = {
2198     attrs_checker => $GetHTMLAttrsChecker->({}),
2199     checker => $HTMLStrictlyInlineChecker,
2200     };
2201    
2202     $Element->{$HTML_NS}->{sub} = {
2203     attrs_checker => $GetHTMLAttrsChecker->({}),
2204     checker => $HTMLStrictlyInlineChecker,
2205     };
2206    
2207     $Element->{$HTML_NS}->{sup} = {
2208     attrs_checker => $GetHTMLAttrsChecker->({}),
2209     checker => $HTMLStrictlyInlineChecker,
2210     };
2211    
2212     $Element->{$HTML_NS}->{span} = {
2213     attrs_checker => $GetHTMLAttrsChecker->({}),
2214     ## NOTE: Though |title| has special semantics,
2215     ## syntatically same as the |title| as global attribute.
2216     checker => $HTMLInlineOrStrictlyInlineChecker,
2217     };
2218    
2219     $Element->{$HTML_NS}->{i} = {
2220     attrs_checker => $GetHTMLAttrsChecker->({}),
2221     ## NOTE: Though |title| has special semantics,
2222     ## syntatically same as the |title| as global attribute.
2223     checker => $HTMLStrictlyInlineChecker,
2224     };
2225    
2226     $Element->{$HTML_NS}->{b} = {
2227     attrs_checker => $GetHTMLAttrsChecker->({}),
2228     checker => $HTMLStrictlyInlineChecker,
2229     };
2230    
2231     $Element->{$HTML_NS}->{bdo} = {
2232     attrs_checker => sub {
2233     my ($self, $todo) = @_;
2234     $GetHTMLAttrsChecker->({})->($self, $todo);
2235     unless ($todo->{node}->has_attribute_ns (undef, 'dir')) {
2236     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:dir');
2237     }
2238     },
2239     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
2240     checker => $HTMLStrictlyInlineChecker,
2241     };
2242    
2243     $Element->{$HTML_NS}->{ins} = {
2244     attrs_checker => $GetHTMLAttrsChecker->({
2245     cite => $HTMLURIAttrChecker,
2246     datetime => $HTMLDatetimeAttrChecker,
2247     }),
2248     checker => $HTMLTransparentChecker,
2249     };
2250    
2251     $Element->{$HTML_NS}->{del} = {
2252     attrs_checker => $GetHTMLAttrsChecker->({
2253     cite => $HTMLURIAttrChecker,
2254     datetime => $HTMLDatetimeAttrChecker,
2255     }),
2256     checker => sub {
2257     my ($self, $todo) = @_;
2258    
2259     my $parent = $todo->{node}->manakai_parent_element;
2260     if (defined $parent) {
2261     my $nsuri = $parent->namespace_uri;
2262     $nsuri = '' unless defined $nsuri;
2263     my $ln = $parent->manakai_local_name;
2264     my $eldef = $Element->{$nsuri}->{$ln} ||
2265     $Element->{$nsuri}->{''} ||
2266     $ElementDefault;
2267     return $eldef->{checker}->($self, $todo);
2268     } else {
2269     return $HTMLBlockOrInlineChecker->($self, $todo);
2270     }
2271     },
2272     };
2273    
2274     ## TODO: figure
2275 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
2276 wakaba 1.1
2277 wakaba 1.4 ## TODO: |alt|
2278 wakaba 1.1 $Element->{$HTML_NS}->{img} = {
2279     attrs_checker => sub {
2280     my ($self, $todo) = @_;
2281     $GetHTMLAttrsChecker->({
2282     alt => sub { }, ## NOTE: No syntactical requirement
2283     src => $HTMLURIAttrChecker,
2284     usemap => $HTMLUsemapAttrChecker,
2285     ismap => sub {
2286     my ($self, $attr, $parent_todo) = @_;
2287 wakaba 1.15 if (not $todo->{flag}->{in_a_href}) {
2288     $self->{onerror}->(node => $attr,
2289     type => 'attribute not allowed:ismap');
2290 wakaba 1.1 }
2291     $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_todo);
2292     },
2293     ## TODO: height
2294     ## TODO: width
2295     })->($self, $todo);
2296     unless ($todo->{node}->has_attribute_ns (undef, 'alt')) {
2297     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:alt');
2298     }
2299     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2300     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:src');
2301     }
2302     },
2303     checker => $HTMLEmptyChecker,
2304     };
2305    
2306     $Element->{$HTML_NS}->{iframe} = {
2307     attrs_checker => $GetHTMLAttrsChecker->({
2308     src => $HTMLURIAttrChecker,
2309     }),
2310     checker => $HTMLTextChecker,
2311     };
2312    
2313     $Element->{$HTML_NS}->{embed} = {
2314     attrs_checker => sub {
2315     my ($self, $todo) = @_;
2316     my $has_src;
2317     for my $attr (@{$todo->{node}->attributes}) {
2318     my $attr_ns = $attr->namespace_uri;
2319     $attr_ns = '' unless defined $attr_ns;
2320     my $attr_ln = $attr->manakai_local_name;
2321     my $checker;
2322     if ($attr_ns eq '') {
2323     if ($attr_ln eq 'src') {
2324     $checker = $HTMLURIAttrChecker;
2325     $has_src = 1;
2326     } elsif ($attr_ln eq 'type') {
2327     $checker = $HTMLIMTAttrChecker;
2328     } else {
2329     ## TODO: height
2330     ## TODO: width
2331     $checker = $HTMLAttrChecker->{$attr_ln}
2332     || sub { }; ## NOTE: Any local attribute is ok.
2333     }
2334     }
2335     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2336     || $AttrChecker->{$attr_ns}->{''};
2337     if ($checker) {
2338     $checker->($self, $attr);
2339     } else {
2340     $self->{onerror}->(node => $attr, level => 'unsupported',
2341     type => 'attribute');
2342     ## ISSUE: No comformance createria for global attributes in the spec
2343     }
2344     }
2345    
2346     unless ($has_src) {
2347     $self->{onerror}->(node => $todo->{node},
2348     type => 'attribute missing:src');
2349     }
2350     },
2351     checker => $HTMLEmptyChecker,
2352     };
2353    
2354     $Element->{$HTML_NS}->{object} = {
2355     attrs_checker => sub {
2356     my ($self, $todo) = @_;
2357     $GetHTMLAttrsChecker->({
2358     data => $HTMLURIAttrChecker,
2359     type => $HTMLIMTAttrChecker,
2360     usemap => $HTMLUsemapAttrChecker,
2361     ## TODO: width
2362     ## TODO: height
2363     })->($self, $todo);
2364     unless ($todo->{node}->has_attribute_ns (undef, 'data')) {
2365     unless ($todo->{node}->has_attribute_ns (undef, 'type')) {
2366     $self->{onerror}->(node => $todo->{node},
2367     type => 'attribute missing:data|type');
2368     }
2369     }
2370     },
2371     checker => $ElementDefault->{checker}, ## TODO
2372 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
2373 wakaba 1.1 };
2374    
2375     $Element->{$HTML_NS}->{param} = {
2376     attrs_checker => sub {
2377     my ($self, $todo) = @_;
2378     $GetHTMLAttrsChecker->({
2379     name => sub { },
2380     value => sub { },
2381     })->($self, $todo);
2382     unless ($todo->{node}->has_attribute_ns (undef, 'name')) {
2383     $self->{onerror}->(node => $todo->{node},
2384     type => 'attribute missing:name');
2385     }
2386     unless ($todo->{node}->has_attribute_ns (undef, 'value')) {
2387     $self->{onerror}->(node => $todo->{node},
2388     type => 'attribute missing:value');
2389     }
2390     },
2391     checker => $HTMLEmptyChecker,
2392     };
2393    
2394     $Element->{$HTML_NS}->{video} = {
2395     attrs_checker => $GetHTMLAttrsChecker->({
2396     src => $HTMLURIAttrChecker,
2397     ## TODO: start, loopstart, loopend, end
2398     ## ISSUE: they MUST be "value time offset"s. Value?
2399 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
2400 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2401     controls => $GetHTMLBooleanAttrChecker->('controls'),
2402 wakaba 1.11 poster => $HTMLURIAttrChecker, ## TODO: not for audio!
2403     ## TODO: width, height (not for audio!)
2404 wakaba 1.1 }),
2405     checker => sub {
2406     my ($self, $todo) = @_;
2407    
2408     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2409     return $HTMLBlockOrInlineChecker->($self, $todo);
2410     } else {
2411     return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
2412     ->($self, $todo);
2413     }
2414     },
2415     };
2416    
2417     $Element->{$HTML_NS}->{audio} = {
2418     attrs_checker => $Element->{$HTML_NS}->{video}->{attrs_checker},
2419     checker => $Element->{$HTML_NS}->{video}->{checker},
2420     };
2421    
2422     $Element->{$HTML_NS}->{source} = {
2423     attrs_checker => sub {
2424     my ($self, $todo) = @_;
2425     $GetHTMLAttrsChecker->({
2426     src => $HTMLURIAttrChecker,
2427     type => $HTMLIMTAttrChecker,
2428     media => $HTMLMQAttrChecker,
2429     })->($self, $todo);
2430     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2431     $self->{onerror}->(node => $todo->{node},
2432     type => 'attribute missing:src');
2433     }
2434     },
2435     checker => $HTMLEmptyChecker,
2436     };
2437    
2438     $Element->{$HTML_NS}->{canvas} = {
2439     attrs_checker => $GetHTMLAttrsChecker->({
2440     height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2441     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2442     }),
2443     checker => $HTMLInlineChecker,
2444     };
2445    
2446     $Element->{$HTML_NS}->{map} = {
2447 wakaba 1.4 attrs_checker => sub {
2448     my ($self, $todo) = @_;
2449     my $has_id;
2450     $GetHTMLAttrsChecker->({
2451     id => sub {
2452     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2453     my ($self, $attr) = @_;
2454     my $value = $attr->value;
2455     if (length $value > 0) {
2456     if ($self->{id}->{$value}) {
2457     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2458     push @{$self->{id}->{$value}}, $attr;
2459     } else {
2460     $self->{id}->{$value} = [$attr];
2461     }
2462 wakaba 1.1 } else {
2463 wakaba 1.4 ## NOTE: MUST contain at least one character
2464     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2465 wakaba 1.1 }
2466 wakaba 1.4 if ($value =~ /[\x09-\x0D\x20]/) {
2467     $self->{onerror}->(node => $attr, type => 'space in ID');
2468     }
2469     $self->{map}->{$value} ||= $attr;
2470     $has_id = 1;
2471     },
2472     })->($self, $todo);
2473     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:id')
2474     unless $has_id;
2475     },
2476 wakaba 1.1 checker => $HTMLBlockChecker,
2477     };
2478    
2479     $Element->{$HTML_NS}->{area} = {
2480     attrs_checker => sub {
2481     my ($self, $todo) = @_;
2482     my %attr;
2483     my $coords;
2484     for my $attr (@{$todo->{node}->attributes}) {
2485     my $attr_ns = $attr->namespace_uri;
2486     $attr_ns = '' unless defined $attr_ns;
2487     my $attr_ln = $attr->manakai_local_name;
2488     my $checker;
2489     if ($attr_ns eq '') {
2490     $checker = {
2491     alt => sub { },
2492     ## NOTE: |alt| value has no conformance creteria.
2493     shape => $GetHTMLEnumeratedAttrChecker->({
2494     circ => -1, circle => 1,
2495     default => 1,
2496     poly => 1, polygon => -1,
2497     rect => 1, rectangle => -1,
2498     }),
2499     coords => sub {
2500     my ($self, $attr) = @_;
2501     my $value = $attr->value;
2502     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
2503     $coords = [split /,/, $value];
2504     } else {
2505     $self->{onerror}->(node => $attr,
2506     type => 'coords:syntax error');
2507     }
2508     },
2509     target => $HTMLTargetAttrChecker,
2510     href => $HTMLURIAttrChecker,
2511     ping => $HTMLSpaceURIsAttrChecker,
2512 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
2513 wakaba 1.1 media => $HTMLMQAttrChecker,
2514     hreflang => $HTMLLanguageTagAttrChecker,
2515     type => $HTMLIMTAttrChecker,
2516     }->{$attr_ln};
2517     if ($checker) {
2518     $attr{$attr_ln} = $attr;
2519     } else {
2520     $checker = $HTMLAttrChecker->{$attr_ln};
2521     }
2522     }
2523     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2524     || $AttrChecker->{$attr_ns}->{''};
2525     if ($checker) {
2526     $checker->($self, $attr) if ref $checker;
2527     } else {
2528     $self->{onerror}->(node => $attr, level => 'unsupported',
2529     type => 'attribute');
2530     ## ISSUE: No comformance createria for unknown attributes in the spec
2531     }
2532     }
2533    
2534     if (defined $attr{href}) {
2535 wakaba 1.4 $self->{has_hyperlink_element} = 1;
2536 wakaba 1.1 unless (defined $attr{alt}) {
2537     $self->{onerror}->(node => $todo->{node},
2538     type => 'attribute missing:alt');
2539     }
2540     } else {
2541     for (qw/target ping rel media hreflang type alt/) {
2542     if (defined $attr{$_}) {
2543     $self->{onerror}->(node => $attr{$_},
2544     type => 'attribute not allowed');
2545     }
2546     }
2547     }
2548    
2549     my $shape = 'rectangle';
2550     if (defined $attr{shape}) {
2551     $shape = {
2552     circ => 'circle', circle => 'circle',
2553     default => 'default',
2554     poly => 'polygon', polygon => 'polygon',
2555     rect => 'rectangle', rectangle => 'rectangle',
2556     }->{lc $attr{shape}->value} || 'rectangle';
2557     ## TODO: ASCII lowercase?
2558     }
2559    
2560     if ($shape eq 'circle') {
2561     if (defined $attr{coords}) {
2562     if (defined $coords) {
2563     if (@$coords == 3) {
2564     if ($coords->[2] < 0) {
2565     $self->{onerror}->(node => $attr{coords},
2566     type => 'coords:out of range:2');
2567     }
2568     } else {
2569     $self->{onerror}->(node => $attr{coords},
2570     type => 'coords:number:3:'.@$coords);
2571     }
2572     } else {
2573     ## NOTE: A syntax error has been reported.
2574     }
2575     } else {
2576     $self->{onerror}->(node => $todo->{node},
2577     type => 'attribute missing:coords');
2578     }
2579     } elsif ($shape eq 'default') {
2580     if (defined $attr{coords}) {
2581     $self->{onerror}->(node => $attr{coords},
2582     type => 'attribute not allowed');
2583     }
2584     } elsif ($shape eq 'polygon') {
2585     if (defined $attr{coords}) {
2586     if (defined $coords) {
2587     if (@$coords >= 6) {
2588     unless (@$coords % 2 == 0) {
2589     $self->{onerror}->(node => $attr{coords},
2590     type => 'coords:number:even:'.@$coords);
2591     }
2592     } else {
2593     $self->{onerror}->(node => $attr{coords},
2594     type => 'coords:number:>=6:'.@$coords);
2595     }
2596     } else {
2597     ## NOTE: A syntax error has been reported.
2598     }
2599     } else {
2600     $self->{onerror}->(node => $todo->{node},
2601     type => 'attribute missing:coords');
2602     }
2603     } elsif ($shape eq 'rectangle') {
2604     if (defined $attr{coords}) {
2605     if (defined $coords) {
2606     if (@$coords == 4) {
2607     unless ($coords->[0] < $coords->[2]) {
2608     $self->{onerror}->(node => $attr{coords},
2609     type => 'coords:out of range:0');
2610     }
2611     unless ($coords->[1] < $coords->[3]) {
2612     $self->{onerror}->(node => $attr{coords},
2613     type => 'coords:out of range:1');
2614     }
2615     } else {
2616     $self->{onerror}->(node => $attr{coords},
2617     type => 'coords:number:4:'.@$coords);
2618     }
2619     } else {
2620     ## NOTE: A syntax error has been reported.
2621     }
2622     } else {
2623     $self->{onerror}->(node => $todo->{node},
2624     type => 'attribute missing:coords');
2625     }
2626     }
2627     },
2628     checker => $HTMLEmptyChecker,
2629     };
2630     ## TODO: only in map
2631    
2632     $Element->{$HTML_NS}->{table} = {
2633     attrs_checker => $GetHTMLAttrsChecker->({}),
2634     checker => sub {
2635     my ($self, $todo) = @_;
2636     my $el = $todo->{node};
2637     my $new_todos = [];
2638     my @nodes = (@{$el->child_nodes});
2639    
2640     my $phase = 'before caption';
2641     my $has_tfoot;
2642     while (@nodes) {
2643     my $node = shift @nodes;
2644     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2645    
2646     my $nt = $node->node_type;
2647     if ($nt == 1) {
2648     my $node_ns = $node->namespace_uri;
2649     $node_ns = '' unless defined $node_ns;
2650     my $node_ln = $node->manakai_local_name;
2651     ## NOTE: |minuses| list is not checked since redundant
2652 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2653     #
2654     } elsif ($phase eq 'in tbodys') {
2655 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2656     #$phase = 'in tbodys';
2657     } elsif (not $has_tfoot and
2658     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2659     $phase = 'after tfoot';
2660     $has_tfoot = 1;
2661     } else {
2662     $self->{onerror}->(node => $node, type => 'element not allowed');
2663     }
2664     } elsif ($phase eq 'in trs') {
2665     if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2666     #$phase = 'in trs';
2667     } elsif (not $has_tfoot and
2668     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2669     $phase = 'after tfoot';
2670     $has_tfoot = 1;
2671     } else {
2672     $self->{onerror}->(node => $node, type => 'element not allowed');
2673     }
2674     } elsif ($phase eq 'after thead') {
2675     if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2676     $phase = 'in tbodys';
2677     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2678     $phase = 'in trs';
2679     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2680     $phase = 'in tbodys';
2681     $has_tfoot = 1;
2682     } else {
2683     $self->{onerror}->(node => $node, type => 'element not allowed');
2684     }
2685     } elsif ($phase eq 'in colgroup') {
2686     if ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2687     $phase = 'in colgroup';
2688     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2689     $phase = 'after thead';
2690     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2691     $phase = 'in tbodys';
2692     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2693     $phase = 'in trs';
2694     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2695     $phase = 'in tbodys';
2696     $has_tfoot = 1;
2697     } else {
2698     $self->{onerror}->(node => $node, type => 'element not allowed');
2699     }
2700     } elsif ($phase eq 'before caption') {
2701     if ($node_ns eq $HTML_NS and $node_ln eq 'caption') {
2702     $phase = 'in colgroup';
2703     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2704     $phase = 'in colgroup';
2705     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2706     $phase = 'after thead';
2707     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2708     $phase = 'in tbodys';
2709     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2710     $phase = 'in trs';
2711     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2712     $phase = 'in tbodys';
2713     $has_tfoot = 1;
2714     } else {
2715     $self->{onerror}->(node => $node, type => 'element not allowed');
2716     }
2717     } else { # after tfoot
2718     $self->{onerror}->(node => $node, type => 'element not allowed');
2719     }
2720     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2721     unshift @nodes, @$sib;
2722     push @$new_todos, @$ch;
2723     } elsif ($nt == 3 or $nt == 4) {
2724     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2725     $self->{onerror}->(node => $node, type => 'character not allowed');
2726     }
2727     } elsif ($nt == 5) {
2728     unshift @nodes, @{$node->child_nodes};
2729     }
2730     }
2731    
2732     ## Table model errors
2733     require Whatpm::HTMLTable;
2734     Whatpm::HTMLTable->form_table ($todo->{node}, sub {
2735     my %opt = @_;
2736     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
2737     });
2738     push @{$self->{return}->{table}}, $todo->{node};
2739    
2740     return ($new_todos);
2741     },
2742     };
2743    
2744     $Element->{$HTML_NS}->{caption} = {
2745     attrs_checker => $GetHTMLAttrsChecker->({}),
2746 wakaba 1.13 checker => $HTMLStrictlyInlineChecker,
2747 wakaba 1.1 };
2748    
2749     $Element->{$HTML_NS}->{colgroup} = {
2750     attrs_checker => $GetHTMLAttrsChecker->({
2751     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2752     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
2753     ## TODO: "attribute not supported" if |col|.
2754     ## ISSUE: MUST NOT if any |col|?
2755     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
2756     }),
2757     checker => sub {
2758     my ($self, $todo) = @_;
2759     my $el = $todo->{node};
2760     my $new_todos = [];
2761     my @nodes = (@{$el->child_nodes});
2762    
2763     while (@nodes) {
2764     my $node = shift @nodes;
2765     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2766    
2767     my $nt = $node->node_type;
2768     if ($nt == 1) {
2769     my $node_ns = $node->namespace_uri;
2770     $node_ns = '' unless defined $node_ns;
2771     my $node_ln = $node->manakai_local_name;
2772     ## NOTE: |minuses| list is not checked since redundant
2773 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2774     #
2775     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'col')) {
2776 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
2777     }
2778     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2779     unshift @nodes, @$sib;
2780     push @$new_todos, @$ch;
2781     } elsif ($nt == 3 or $nt == 4) {
2782     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2783     $self->{onerror}->(node => $node, type => 'character not allowed');
2784     }
2785     } elsif ($nt == 5) {
2786     unshift @nodes, @{$node->child_nodes};
2787     }
2788     }
2789     return ($new_todos);
2790     },
2791     };
2792    
2793     $Element->{$HTML_NS}->{col} = {
2794     attrs_checker => $GetHTMLAttrsChecker->({
2795     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2796     }),
2797     checker => $HTMLEmptyChecker,
2798     };
2799    
2800     $Element->{$HTML_NS}->{tbody} = {
2801     attrs_checker => $GetHTMLAttrsChecker->({}),
2802     checker => sub {
2803     my ($self, $todo) = @_;
2804     my $el = $todo->{node};
2805     my $new_todos = [];
2806     my @nodes = (@{$el->child_nodes});
2807    
2808     my $has_tr;
2809     while (@nodes) {
2810     my $node = shift @nodes;
2811     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2812    
2813     my $nt = $node->node_type;
2814     if ($nt == 1) {
2815     my $node_ns = $node->namespace_uri;
2816     $node_ns = '' unless defined $node_ns;
2817     my $node_ln = $node->manakai_local_name;
2818     ## NOTE: |minuses| list is not checked since redundant
2819 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2820     #
2821     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2822 wakaba 1.1 $has_tr = 1;
2823     } else {
2824     $self->{onerror}->(node => $node, type => 'element not allowed');
2825     }
2826     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2827     unshift @nodes, @$sib;
2828     push @$new_todos, @$ch;
2829     } elsif ($nt == 3 or $nt == 4) {
2830     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2831     $self->{onerror}->(node => $node, type => 'character not allowed');
2832     }
2833     } elsif ($nt == 5) {
2834     unshift @nodes, @{$node->child_nodes};
2835     }
2836     }
2837     unless ($has_tr) {
2838     $self->{onerror}->(node => $el, type => 'child element missing:tr');
2839     }
2840     return ($new_todos);
2841     },
2842     };
2843    
2844     $Element->{$HTML_NS}->{thead} = {
2845     attrs_checker => $GetHTMLAttrsChecker->({}),
2846     checker => $Element->{$HTML_NS}->{tbody}->{checker},
2847     };
2848    
2849     $Element->{$HTML_NS}->{tfoot} = {
2850     attrs_checker => $GetHTMLAttrsChecker->({}),
2851     checker => $Element->{$HTML_NS}->{tbody}->{checker},
2852     };
2853    
2854     $Element->{$HTML_NS}->{tr} = {
2855     attrs_checker => $GetHTMLAttrsChecker->({}),
2856     checker => sub {
2857     my ($self, $todo) = @_;
2858     my $el = $todo->{node};
2859     my $new_todos = [];
2860     my @nodes = (@{$el->child_nodes});
2861    
2862     my $has_td;
2863     while (@nodes) {
2864     my $node = shift @nodes;
2865     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2866    
2867     my $nt = $node->node_type;
2868     if ($nt == 1) {
2869     my $node_ns = $node->namespace_uri;
2870     $node_ns = '' unless defined $node_ns;
2871     my $node_ln = $node->manakai_local_name;
2872     ## NOTE: |minuses| list is not checked since redundant
2873 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2874     #
2875     } elsif ($node_ns eq $HTML_NS and
2876     ($node_ln eq 'td' or $node_ln eq 'th')) {
2877 wakaba 1.1 $has_td = 1;
2878     } else {
2879     $self->{onerror}->(node => $node, type => 'element not allowed');
2880     }
2881     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2882     unshift @nodes, @$sib;
2883     push @$new_todos, @$ch;
2884     } elsif ($nt == 3 or $nt == 4) {
2885     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2886     $self->{onerror}->(node => $node, type => 'character not allowed');
2887     }
2888     } elsif ($nt == 5) {
2889     unshift @nodes, @{$node->child_nodes};
2890     }
2891     }
2892     unless ($has_td) {
2893     $self->{onerror}->(node => $el, type => 'child element missing:td|th');
2894     }
2895     return ($new_todos);
2896     },
2897     };
2898    
2899     $Element->{$HTML_NS}->{td} = {
2900     attrs_checker => $GetHTMLAttrsChecker->({
2901     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2902     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2903     }),
2904     checker => $HTMLBlockOrInlineChecker,
2905     };
2906    
2907     $Element->{$HTML_NS}->{th} = {
2908     attrs_checker => $GetHTMLAttrsChecker->({
2909     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2910     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2911     scope => $GetHTMLEnumeratedAttrChecker
2912     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
2913     }),
2914     checker => $HTMLBlockOrInlineChecker,
2915     };
2916    
2917     ## TODO: forms
2918 wakaba 1.8 ## TODO: Tests for <nest/> in form elements
2919 wakaba 1.1
2920     $Element->{$HTML_NS}->{script} = {
2921 wakaba 1.9 attrs_checker => $GetHTMLAttrsChecker->({
2922 wakaba 1.1 src => $HTMLURIAttrChecker,
2923     defer => $GetHTMLBooleanAttrChecker->('defer'),
2924     async => $GetHTMLBooleanAttrChecker->('async'),
2925     type => $HTMLIMTAttrChecker,
2926 wakaba 1.9 }),
2927 wakaba 1.1 checker => sub {
2928     my ($self, $todo) = @_;
2929    
2930     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2931     return $HTMLEmptyChecker->($self, $todo);
2932     } else {
2933     ## NOTE: No content model conformance in HTML5 spec.
2934     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
2935     my $language = $todo->{node}->get_attribute_ns (undef, 'language');
2936     if ((defined $type and $type eq '') or
2937     (defined $language and $language eq '')) {
2938     $type = 'text/javascript';
2939     } elsif (defined $type) {
2940     #
2941     } elsif (defined $language) {
2942     $type = 'text/' . $language;
2943     } else {
2944     $type = 'text/javascript';
2945     }
2946     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
2947     type => 'script:'.$type); ## TODO: $type normalization
2948     return $AnyChecker->($self, $todo);
2949     }
2950     },
2951     };
2952    
2953     ## NOTE: When script is disabled.
2954     $Element->{$HTML_NS}->{noscript} = {
2955 wakaba 1.3 attrs_checker => sub {
2956     my ($self, $todo) = @_;
2957    
2958     ## NOTE: This check is inserted in |attrs_checker|, rather than |checker|,
2959     ## since the later is not invoked when the |noscript| is used as a
2960     ## transparent element.
2961     unless ($todo->{node}->owner_document->manakai_is_html) {
2962     $self->{onerror}->(node => $todo->{node}, type => 'in XML:noscript');
2963     }
2964    
2965     $GetHTMLAttrsChecker->({})->($self, $todo);
2966     },
2967 wakaba 1.1 checker => sub {
2968     my ($self, $todo) = @_;
2969    
2970 wakaba 1.3 if ($todo->{flag}->{in_head}) {
2971     my $new_todos = [];
2972     my @nodes = (@{$todo->{node}->child_nodes});
2973    
2974     while (@nodes) {
2975     my $node = shift @nodes;
2976     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2977    
2978     my $nt = $node->node_type;
2979     if ($nt == 1) {
2980     my $node_ns = $node->namespace_uri;
2981     $node_ns = '' unless defined $node_ns;
2982     my $node_ln = $node->manakai_local_name;
2983 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2984     #
2985     } elsif ($node_ns eq $HTML_NS) {
2986 wakaba 1.3 if ({link => 1, style => 1}->{$node_ln}) {
2987     #
2988     } elsif ($node_ln eq 'meta') {
2989 wakaba 1.5 if ($node->has_attribute_ns (undef, 'name')) {
2990     #
2991 wakaba 1.3 } else {
2992 wakaba 1.5 $self->{onerror}->(node => $node,
2993     type => 'element not allowed');
2994 wakaba 1.3 }
2995     } else {
2996     $self->{onerror}->(node => $node, type => 'element not allowed');
2997     }
2998     } else {
2999     $self->{onerror}->(node => $node, type => 'element not allowed');
3000     }
3001    
3002     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3003     unshift @nodes, @$sib;
3004     push @$new_todos, @$ch;
3005     } elsif ($nt == 3 or $nt == 4) {
3006     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3007     $self->{onerror}->(node => $node, type => 'character not allowed');
3008     }
3009     } elsif ($nt == 5) {
3010     unshift @nodes, @{$node->child_nodes};
3011     }
3012     }
3013     return ($new_todos);
3014     } else {
3015     my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}});
3016     my ($sib, $ch) = $HTMLBlockOrInlineChecker->($self, $todo);
3017     push @$sib, $end;
3018     return ($sib, $ch);
3019     }
3020 wakaba 1.1 },
3021     };
3022 wakaba 1.3
3023     ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
3024 wakaba 1.1
3025     $Element->{$HTML_NS}->{'event-source'} = {
3026     attrs_checker => $GetHTMLAttrsChecker->({
3027     src => $HTMLURIAttrChecker,
3028     }),
3029     checker => $HTMLEmptyChecker,
3030     };
3031    
3032     $Element->{$HTML_NS}->{details} = {
3033     attrs_checker => $GetHTMLAttrsChecker->({
3034     open => $GetHTMLBooleanAttrChecker->('open'),
3035     }),
3036     checker => sub {
3037     my ($self, $todo) = @_;
3038    
3039     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
3040     my ($sib, $ch)
3041     = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
3042     ->($self, $todo);
3043     push @$sib, $end;
3044     return ($sib, $ch);
3045     },
3046     };
3047    
3048     $Element->{$HTML_NS}->{datagrid} = {
3049     attrs_checker => $GetHTMLAttrsChecker->({
3050     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3051     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
3052     }),
3053     checker => sub {
3054     my ($self, $todo) = @_;
3055     my $el = $todo->{node};
3056     my $new_todos = [];
3057     my @nodes = (@{$el->child_nodes});
3058    
3059     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
3060    
3061     ## Block-table Block* | table | select | datalist | Empty
3062     my $mode = 'any';
3063     while (@nodes) {
3064     my $node = shift @nodes;
3065     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3066    
3067     my $nt = $node->node_type;
3068     if ($nt == 1) {
3069     my $node_ns = $node->namespace_uri;
3070     $node_ns = '' unless defined $node_ns;
3071     my $node_ln = $node->manakai_local_name;
3072     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
3073 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3074     #
3075     } elsif ($mode eq 'block') {
3076 wakaba 1.1 $not_allowed = 1
3077     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
3078     } elsif ($mode eq 'any') {
3079     if ($node_ns eq $HTML_NS and
3080     {table => 1, select => 1, datalist => 1}->{$node_ln}) {
3081     $mode = 'none';
3082     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
3083     $mode = 'block';
3084     } else {
3085     $not_allowed = 1;
3086     }
3087     } else {
3088     $not_allowed = 1;
3089     }
3090     $self->{onerror}->(node => $node, type => 'element not allowed')
3091     if $not_allowed;
3092     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3093     unshift @nodes, @$sib;
3094     push @$new_todos, @$ch;
3095     } elsif ($nt == 3 or $nt == 4) {
3096     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3097     $self->{onerror}->(node => $node, type => 'character not allowed');
3098     }
3099     } elsif ($nt == 5) {
3100     unshift @nodes, @{$node->child_nodes};
3101     }
3102     }
3103    
3104     push @$new_todos, $end;
3105     return ($new_todos);
3106     },
3107     };
3108    
3109     $Element->{$HTML_NS}->{command} = {
3110     attrs_checker => $GetHTMLAttrsChecker->({
3111     checked => $GetHTMLBooleanAttrChecker->('checked'),
3112     default => $GetHTMLBooleanAttrChecker->('default'),
3113     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3114     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
3115     icon => $HTMLURIAttrChecker,
3116     label => sub { }, ## NOTE: No conformance creteria
3117     radiogroup => sub { }, ## NOTE: No conformance creteria
3118     ## NOTE: |title| has special semantics, but no syntactical difference
3119     type => sub {
3120     my ($self, $attr) = @_;
3121     my $value = $attr->value;
3122     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
3123     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
3124     }
3125     },
3126     }),
3127     checker => $HTMLEmptyChecker,
3128     };
3129    
3130     $Element->{$HTML_NS}->{menu} = {
3131     attrs_checker => $GetHTMLAttrsChecker->({
3132     autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
3133     id => sub {
3134     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
3135     my ($self, $attr) = @_;
3136     my $value = $attr->value;
3137     if (length $value > 0) {
3138     if ($self->{id}->{$value}) {
3139     $self->{onerror}->(node => $attr, type => 'duplicate ID');
3140     push @{$self->{id}->{$value}}, $attr;
3141     } else {
3142     $self->{id}->{$value} = [$attr];
3143     }
3144     } else {
3145     ## NOTE: MUST contain at least one character
3146     $self->{onerror}->(node => $attr, type => 'empty attribute value');
3147     }
3148     if ($value =~ /[\x09-\x0D\x20]/) {
3149     $self->{onerror}->(node => $attr, type => 'space in ID');
3150     }
3151     $self->{menu}->{$value} ||= $attr;
3152     ## ISSUE: <menu id=""><p contextmenu=""> match?
3153     },
3154     label => sub { }, ## NOTE: No conformance creteria
3155     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
3156     }),
3157     checker => sub {
3158     my ($self, $todo) = @_;
3159     my $el = $todo->{node};
3160     my $new_todos = [];
3161     my @nodes = (@{$el->child_nodes});
3162    
3163     my $content = 'li or inline';
3164     while (@nodes) {
3165     my $node = shift @nodes;
3166     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3167    
3168     my $nt = $node->node_type;
3169     if ($nt == 1) {
3170     my $node_ns = $node->namespace_uri;
3171     $node_ns = '' unless defined $node_ns;
3172     my $node_ln = $node->manakai_local_name;
3173     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
3174 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3175     #
3176     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'li') {
3177 wakaba 1.1 if ($content eq 'inline') {
3178     $not_allowed = 1;
3179     } elsif ($content eq 'li or inline') {
3180     $content = 'li';
3181     }
3182     } else {
3183     if ($HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
3184     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln}) {
3185     $content = 'inline';
3186     } else {
3187     $not_allowed = 1;
3188     }
3189     }
3190     $self->{onerror}->(node => $node, type => 'element not allowed')
3191     if $not_allowed;
3192     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3193     unshift @nodes, @$sib;
3194     push @$new_todos, @$ch;
3195     } elsif ($nt == 3 or $nt == 4) {
3196     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3197     if ($content eq 'li') {
3198     $self->{onerror}->(node => $node, type => 'character not allowed');
3199     } elsif ($content eq 'li or inline') {
3200     $content = 'inline';
3201     }
3202     }
3203     } elsif ($nt == 5) {
3204     unshift @nodes, @{$node->child_nodes};
3205     }
3206     }
3207    
3208     for (@$new_todos) {
3209     $_->{inline} = 1;
3210     }
3211     return ($new_todos);
3212     },
3213 wakaba 1.8 };
3214    
3215     $Element->{$HTML_NS}->{datatemplate} = {
3216     attrs_checker => $GetHTMLAttrsChecker->({}),
3217     checker => sub {
3218     my ($self, $todo) = @_;
3219     my $el = $todo->{node};
3220     my $new_todos = [];
3221     my @nodes = (@{$el->child_nodes});
3222    
3223     while (@nodes) {
3224     my $node = shift @nodes;
3225     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3226    
3227     my $nt = $node->node_type;
3228     if ($nt == 1) {
3229     my $node_ns = $node->namespace_uri;
3230     $node_ns = '' unless defined $node_ns;
3231     my $node_ln = $node->manakai_local_name;
3232     ## NOTE: |minuses| list is not checked since redundant
3233     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3234     #
3235     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'rule')) {
3236     $self->{onerror}->(node => $node,
3237     type => 'element not allowed:datatemplate');
3238     }
3239     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3240     unshift @nodes, @$sib;
3241     push @$new_todos, @$ch;
3242     } elsif ($nt == 3 or $nt == 4) {
3243     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3244     $self->{onerror}->(node => $node, type => 'character not allowed');
3245     }
3246     } elsif ($nt == 5) {
3247     unshift @nodes, @{$node->child_nodes};
3248     }
3249     }
3250     return ($new_todos);
3251     },
3252     is_xml_root => 1,
3253     };
3254    
3255     $Element->{$HTML_NS}->{rule} = {
3256     attrs_checker => $GetHTMLAttrsChecker->({
3257 wakaba 1.23 condition => $HTMLSelectorsAttrChecker,
3258 wakaba 1.18 mode => $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker,
3259 wakaba 1.8 }),
3260     checker => sub {
3261     my ($self, $todo) = @_;
3262    
3263     my $end = $self->_add_pluses ({$HTML_NS => {nest => 1}});
3264     my ($sib, $ch) = $AnyChecker->($self, $todo);
3265     push @$sib, $end;
3266     return ($sib, $ch);
3267     },
3268     ## NOTE: "MAY be anything that, when the parent |datatemplate|
3269     ## is applied to some conforming data, results in a conforming DOM tree.":
3270     ## We don't check against this.
3271     };
3272    
3273     $Element->{$HTML_NS}->{nest} = {
3274     attrs_checker => $GetHTMLAttrsChecker->({
3275 wakaba 1.23 filter => $HTMLSelectorsAttrChecker,
3276     mode => sub {
3277     my ($self, $attr) = @_;
3278     my $value = $attr->value;
3279     if ($value !~ /\A[^\x09-\x0D\x20]+\z/) {
3280     $self->{onerror}->(node => $attr, type => 'mode:syntax error');
3281     }
3282     },
3283 wakaba 1.8 }),
3284     checker => $HTMLEmptyChecker,
3285 wakaba 1.1 };
3286    
3287     $Element->{$HTML_NS}->{legend} = {
3288     attrs_checker => $GetHTMLAttrsChecker->({}),
3289     checker => sub {
3290     my ($self, $todo) = @_;
3291    
3292     my $parent = $todo->{node}->manakai_parent_element;
3293     if (defined $parent) {
3294     my $nsuri = $parent->namespace_uri;
3295     $nsuri = '' unless defined $nsuri;
3296     my $ln = $parent->manakai_local_name;
3297     if ($nsuri eq $HTML_NS and $ln eq 'figure') {
3298     return $HTMLInlineChecker->($self, $todo);
3299     } else {
3300 wakaba 1.13 return $HTMLStrictlyInlineChecker->($self, $todo);
3301 wakaba 1.1 }
3302     } else {
3303     return $HTMLInlineChecker->($self, $todo);
3304     }
3305    
3306     ## ISSUE: Content model is defined only for fieldset/legend,
3307     ## details/legend, and figure/legend.
3308     },
3309     };
3310    
3311     $Element->{$HTML_NS}->{div} = {
3312     attrs_checker => $GetHTMLAttrsChecker->({}),
3313     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
3314     };
3315    
3316     $Element->{$HTML_NS}->{font} = {
3317     attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
3318     checker => $HTMLTransparentChecker,
3319     };
3320    
3321     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
3322    
3323     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24