/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.8 - (hide annotations) (download)
Sun Oct 14 09:21:46 2007 UTC (17 years ago) by wakaba
Branch: MAIN
Changes since 1.7: +176 -37 lines
++ whatpm/t/ChangeLog	14 Oct 2007 09:21:32 -0000
2007-10-14  Wakaba  <wakaba@suika.fam.cx>

	* content-model-1.dat, content-model-2.dat: New test
	data for |rule|, |nest|, and |datatemplate| elements.

++ whatpm/Whatpm/ChangeLog	14 Oct 2007 09:20:23 -0000
2007-10-14  Wakaba  <wakaba@suika.fam.cx>

	* ContentChecker.pm (check_document): Support for
	new |is_xml_root| flag.
	(check_element): Support for new |pluses| state.
	(_add_pluses): New method.
	(_remove_minuses): Support for new |minus| item.

++ whatpm/Whatpm/ContentChecker/ChangeLog	14 Oct 2007 09:20:50 -0000
2007-10-14  Wakaba  <wakaba@suika.fam.cx>

	* Atom.pm, HTML.pm: Support for |html:nest|, |html:datatemplate|,
	and |html:rule| elements.

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7     my $HTMLMetadataElements = {
8     $HTML_NS => {
9     qw/link 1 meta 1 style 1 script 1 event-source 1 command 1 base 1 title 1
10 wakaba 1.8 noscript 1 datatemplate 1
11 wakaba 1.1 /,
12     },
13     };
14    
15     my $HTMLSectioningElements = {
16     $HTML_NS => {qw/body 1 section 1 nav 1 article 1 blockquote 1 aside 1/},
17     };
18    
19     my $HTMLBlockLevelElements = {
20     $HTML_NS => {
21     qw/
22     section 1 nav 1 article 1 blockquote 1 aside 1
23     h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1
24     address 1 p 1 hr 1 dialog 1 pre 1 ol 1 ul 1 dl 1
25     ins 1 del 1 figure 1 map 1 table 1 script 1 noscript 1
26     event-source 1 details 1 datagrid 1 menu 1 div 1 font 1
27 wakaba 1.8 datatemplate 1
28 wakaba 1.1 /,
29     },
30     };
31    
32     my $HTMLStrictlyInlineLevelElements = {
33     $HTML_NS => {
34     qw/
35     br 1 a 1 q 1 cite 1 em 1 strong 1 small 1 m 1 dfn 1 abbr 1
36     time 1 meter 1 progress 1 code 1 var 1 samp 1 kbd 1
37     sub 1 sup 1 span 1 i 1 b 1 bdo 1 ins 1 del 1 img 1
38     iframe 1 embed 1 object 1 video 1 audio 1 canvas 1 area 1
39     script 1 noscript 1 event-source 1 command 1 font 1
40     /,
41     },
42     };
43    
44     my $HTMLStructuredInlineLevelElements = {
45     $HTML_NS => {qw/blockquote 1 pre 1 ol 1 ul 1 dl 1 table 1 menu 1/},
46     };
47    
48     my $HTMLInteractiveElements = {
49     $HTML_NS => {a => 1, details => 1, datagrid => 1},
50     };
51     ## NOTE: |html:a| and |html:datagrid| are not allowed as a descendant
52     ## of interactive elements
53    
54     # my $HTMLTransparentElements : in |Whatpm/ContentChecker.pm|.
55    
56     #my $HTMLSemiTransparentElements = {
57     # $HTML_NS => {qw/video 1 audio 1/},
58     #};
59    
60     my $HTMLEmbededElements = {
61     $HTML_NS => {qw/img 1 iframe 1 embed 1 object 1 video 1 audio 1 canvas 1/},
62     };
63    
64     ## Empty
65     my $HTMLEmptyChecker = sub {
66     my ($self, $todo) = @_;
67     my $el = $todo->{node};
68     my $new_todos = [];
69     my @nodes = (@{$el->child_nodes});
70    
71     while (@nodes) {
72     my $node = shift @nodes;
73     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
74    
75     my $nt = $node->node_type;
76     if ($nt == 1) {
77 wakaba 1.8 my $node_ns = $node->namespace_uri;
78     $node_ns = '' unless defined $node_ns;
79     my $node_ln = $node->manakai_local_name;
80     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
81     #
82     } else {
83     ## NOTE: |minuses| list is not checked since redundant
84     $self->{onerror}->(node => $node, type => 'element not allowed');
85     }
86 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
87     unshift @nodes, @$sib;
88     push @$new_todos, @$ch;
89     } elsif ($nt == 3 or $nt == 4) {
90     if ($node->data =~ /[^\x09-\x0D\x20]/) {
91     $self->{onerror}->(node => $node, type => 'character not allowed');
92     }
93     } elsif ($nt == 5) {
94     unshift @nodes, @{$node->child_nodes};
95     }
96     }
97     return ($new_todos);
98     };
99    
100     ## Text
101     my $HTMLTextChecker = sub {
102     my ($self, $todo) = @_;
103     my $el = $todo->{node};
104     my $new_todos = [];
105     my @nodes = (@{$el->child_nodes});
106    
107     while (@nodes) {
108     my $node = shift @nodes;
109     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
110    
111     my $nt = $node->node_type;
112     if ($nt == 1) {
113 wakaba 1.8 my $node_ns = $node->namespace_uri;
114     $node_ns = '' unless defined $node_ns;
115     my $node_ln = $node->manakai_local_name;
116     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
117     #
118     } else {
119     ## NOTE: |minuses| list is not checked since redundant
120     $self->{onerror}->(node => $node, type => 'element not allowed');
121     }
122 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
123     unshift @nodes, @$sib;
124     push @$new_todos, @$ch;
125     } elsif ($nt == 5) {
126     unshift @nodes, @{$node->child_nodes};
127     }
128     }
129     return ($new_todos);
130     };
131    
132     ## Zero or more |html:style| elements,
133     ## followed by zero or more block-level elements
134     my $HTMLStylableBlockChecker = sub {
135     my ($self, $todo) = @_;
136     my $el = $todo->{node};
137     my $new_todos = [];
138     my @nodes = (@{$el->child_nodes});
139    
140     my $has_non_style;
141     while (@nodes) {
142     my $node = shift @nodes;
143     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
144    
145     my $nt = $node->node_type;
146     if ($nt == 1) {
147     my $node_ns = $node->namespace_uri;
148     $node_ns = '' unless defined $node_ns;
149     my $node_ln = $node->manakai_local_name;
150     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
151     if ($node_ns eq $HTML_NS and $node_ln eq 'style') {
152     $not_allowed = 1 if $has_non_style or
153     not $node->has_attribute_ns (undef, 'scoped');
154     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
155     $has_non_style = 1;
156 wakaba 1.8 } elsif ($self->{pluses}->{$node_ns}->{$node_ln}) {
157     #
158 wakaba 1.1 } else {
159     $has_non_style = 1;
160     $not_allowed = 1;
161     }
162     $self->{onerror}->(node => $node, type => 'element not allowed')
163     if $not_allowed;
164     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
165     unshift @nodes, @$sib;
166     push @$new_todos, @$ch;
167     } elsif ($nt == 3 or $nt == 4) {
168     if ($node->data =~ /[^\x09-\x0D\x20]/) {
169     $self->{onerror}->(node => $node, type => 'character not allowed');
170     }
171     } elsif ($nt == 5) {
172     unshift @nodes, @{$node->child_nodes};
173     }
174     }
175     return ($new_todos);
176     }; # $HTMLStylableBlockChecker
177    
178     ## Zero or more block-level elements
179     my $HTMLBlockChecker = sub {
180     my ($self, $todo) = @_;
181     my $el = $todo->{node};
182     my $new_todos = [];
183     my @nodes = (@{$el->child_nodes});
184    
185     while (@nodes) {
186     my $node = shift @nodes;
187     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
188    
189     my $nt = $node->node_type;
190     if ($nt == 1) {
191     my $node_ns = $node->namespace_uri;
192     $node_ns = '' unless defined $node_ns;
193     my $node_ln = $node->manakai_local_name;
194     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
195     $not_allowed = 1
196 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
197     $self->{pluses}->{$node_ns}->{$node_ln};
198 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
199     if $not_allowed;
200     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
201     unshift @nodes, @$sib;
202     push @$new_todos, @$ch;
203     } elsif ($nt == 3 or $nt == 4) {
204     if ($node->data =~ /[^\x09-\x0D\x20]/) {
205     $self->{onerror}->(node => $node, type => 'character not allowed');
206     }
207     } elsif ($nt == 5) {
208     unshift @nodes, @{$node->child_nodes};
209     }
210     }
211     return ($new_todos);
212     }; # $HTMLBlockChecker
213    
214     ## Inline-level content
215     my $HTMLInlineChecker = sub {
216     my ($self, $todo) = @_;
217     my $el = $todo->{node};
218     my $new_todos = [];
219     my @nodes = (@{$el->child_nodes});
220    
221     while (@nodes) {
222     my $node = shift @nodes;
223     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
224    
225     my $nt = $node->node_type;
226     if ($nt == 1) {
227     my $node_ns = $node->namespace_uri;
228     $node_ns = '' unless defined $node_ns;
229     my $node_ln = $node->manakai_local_name;
230     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
231     $not_allowed = 1
232     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
233 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
234     $self->{pluses}->{$node_ns}->{$node_ln};
235 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
236     if $not_allowed;
237     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
238     unshift @nodes, @$sib;
239     push @$new_todos, @$ch;
240     } elsif ($nt == 5) {
241     unshift @nodes, @{$node->child_nodes};
242     }
243     }
244    
245     for (@$new_todos) {
246     $_->{inline} = 1;
247     }
248     return ($new_todos);
249     }; # $HTMLInlineChecker
250    
251     my $HTMLSignificantInlineChecker = $HTMLInlineChecker;
252     ## TODO: check significant content
253    
254     ## Strictly inline-level content
255     my $HTMLStrictlyInlineChecker = sub {
256     my ($self, $todo) = @_;
257     my $el = $todo->{node};
258     my $new_todos = [];
259     my @nodes = (@{$el->child_nodes});
260    
261     while (@nodes) {
262     my $node = shift @nodes;
263     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
264    
265     my $nt = $node->node_type;
266     if ($nt == 1) {
267     my $node_ns = $node->namespace_uri;
268     $node_ns = '' unless defined $node_ns;
269     my $node_ln = $node->manakai_local_name;
270     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
271     $not_allowed = 1
272 wakaba 1.8 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
273     $self->{pluses}->{$node_ns}->{$node_ln};
274 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
275     if $not_allowed;
276     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
277     unshift @nodes, @$sib;
278     push @$new_todos, @$ch;
279     } elsif ($nt == 5) {
280     unshift @nodes, @{$node->child_nodes};
281     }
282     }
283    
284     for (@$new_todos) {
285     $_->{inline} = 1;
286     $_->{strictly_inline} = 1;
287     }
288     return ($new_todos);
289     }; # $HTMLStrictlyInlineChecker
290    
291     my $HTMLSignificantStrictlyInlineChecker = $HTMLStrictlyInlineChecker;
292     ## TODO: check significant content
293    
294 wakaba 1.8 ## Inline-level or strictly inline-level content
295 wakaba 1.1 my $HTMLInlineOrStrictlyInlineChecker = sub {
296     my ($self, $todo) = @_;
297     my $el = $todo->{node};
298     my $new_todos = [];
299     my @nodes = (@{$el->child_nodes});
300    
301     while (@nodes) {
302     my $node = shift @nodes;
303     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
304    
305     my $nt = $node->node_type;
306     if ($nt == 1) {
307     my $node_ns = $node->namespace_uri;
308     $node_ns = '' unless defined $node_ns;
309     my $node_ln = $node->manakai_local_name;
310     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
311     if ($todo->{strictly_inline}) {
312     $not_allowed = 1
313 wakaba 1.8 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
314     $self->{pluses}->{$node_ns}->{$node_ln};
315 wakaba 1.1 } else {
316     $not_allowed = 1
317     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
318 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
319     $self->{pluses}->{$node_ns}->{$node_ln};
320 wakaba 1.1 }
321     $self->{onerror}->(node => $node, type => 'element not allowed')
322     if $not_allowed;
323     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
324     unshift @nodes, @$sib;
325     push @$new_todos, @$ch;
326     } elsif ($nt == 5) {
327     unshift @nodes, @{$node->child_nodes};
328     }
329     }
330    
331     for (@$new_todos) {
332     $_->{inline} = 1;
333     $_->{strictly_inline} = 1;
334     }
335     return ($new_todos);
336     }; # $HTMLInlineOrStrictlyInlineChecker
337    
338     my $HTMLSignificantInlineOrStrictlyInlineChecker
339     = $HTMLInlineOrStrictlyInlineChecker;
340     ## TODO: check significant content
341    
342 wakaba 1.8 ## Block-level content or inline-level content (i.e. bimorphic content model)
343 wakaba 1.1 my $HTMLBlockOrInlineChecker = sub {
344     my ($self, $todo) = @_;
345     my $el = $todo->{node};
346     my $new_todos = [];
347     my @nodes = (@{$el->child_nodes});
348    
349     my $content = 'block-or-inline'; # or 'block' or 'inline'
350     my @block_not_inline;
351     while (@nodes) {
352     my $node = shift @nodes;
353     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
354    
355 wakaba 1.8 ## ISSUE: It is unclear whether "<rule><div><p/><nest/></div></rule>"
356     ## is conforming or not.
357    
358 wakaba 1.1 my $nt = $node->node_type;
359     if ($nt == 1) {
360     my $node_ns = $node->namespace_uri;
361     $node_ns = '' unless defined $node_ns;
362     my $node_ln = $node->manakai_local_name;
363     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
364     if ($content eq 'block') {
365     $not_allowed = 1
366 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
367     $self->{pluses}->{$node_ns}->{$node_ln};
368 wakaba 1.1 } elsif ($content eq 'inline') {
369     $not_allowed = 1
370     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
371 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
372     $self->{pluses}->{$node_ns}->{$node_ln};
373 wakaba 1.1 } else {
374     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
375     my $is_inline
376     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
377     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
378    
379     push @block_not_inline, $node
380     if $is_block and not $is_inline and not $not_allowed;
381 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
382 wakaba 1.1 $content = 'inline';
383     for (@block_not_inline) {
384     $self->{onerror}->(node => $_, type => 'element not allowed');
385     }
386     $not_allowed = 1 unless $is_inline;
387     }
388     }
389     $self->{onerror}->(node => $node, type => 'element not allowed')
390     if $not_allowed;
391     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
392     unshift @nodes, @$sib;
393     push @$new_todos, @$ch;
394     } elsif ($nt == 3 or $nt == 4) {
395     if ($node->data =~ /[^\x09-\x0D\x20]/) {
396     if ($content eq 'block') {
397     $self->{onerror}->(node => $node, type => 'character not allowed');
398     } else {
399     $content = 'inline';
400     for (@block_not_inline) {
401     $self->{onerror}->(node => $_, type => 'element not allowed');
402     }
403     }
404     }
405     } elsif ($nt == 5) {
406     unshift @nodes, @{$node->child_nodes};
407     }
408     }
409    
410     if ($content eq 'inline') {
411     for (@$new_todos) {
412     $_->{inline} = 1;
413     }
414     }
415     return ($new_todos);
416     };
417    
418     ## Zero or more XXX element, then either block-level or inline-level
419     my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) {
420     my ($elnsuri, $ellname) = @_;
421     return sub {
422     my ($self, $todo) = @_;
423     my $el = $todo->{node};
424     my $new_todos = [];
425     my @nodes = (@{$el->child_nodes});
426    
427     my $has_non_style;
428     my $content = 'block-or-inline'; # or 'block' or 'inline'
429     my @block_not_inline;
430     while (@nodes) {
431     my $node = shift @nodes;
432     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
433    
434     my $nt = $node->node_type;
435     if ($nt == 1) {
436     my $node_ns = $node->namespace_uri;
437     $node_ns = '' unless defined $node_ns;
438     my $node_ln = $node->manakai_local_name;
439     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
440     if ($node_ns eq $elnsuri and $node_ln eq $ellname) {
441     $not_allowed = 1 if $has_non_style;
442     if ($ellname eq 'style' and
443     not $node->has_attribute_ns (undef, 'scoped')) {
444     $not_allowed = 1;
445     }
446     } elsif ($content eq 'block') {
447     $has_non_style = 1;
448     $not_allowed = 1
449 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
450     $self->{pluses}->{$node_ns}->{$node_ln};
451 wakaba 1.1 } elsif ($content eq 'inline') {
452     $has_non_style = 1;
453     $not_allowed = 1
454     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
455 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
456     $self->{pluses}->{$node_ns}->{$node_ln};
457 wakaba 1.1 } else {
458 wakaba 1.8 $has_non_style = 1 unless $self->{pluses}->{$node_ns}->{$node_ln};
459 wakaba 1.1 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
460     my $is_inline
461     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
462     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
463    
464     push @block_not_inline, $node
465     if $is_block and not $is_inline and not $not_allowed;
466 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
467 wakaba 1.1 $content = 'inline';
468     for (@block_not_inline) {
469     $self->{onerror}->(node => $_, type => 'element not allowed');
470     }
471     $not_allowed = 1 unless $is_inline;
472     }
473     }
474     $self->{onerror}->(node => $node, type => 'element not allowed')
475     if $not_allowed;
476     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
477     unshift @nodes, @$sib;
478     push @$new_todos, @$ch;
479     } elsif ($nt == 3 or $nt == 4) {
480     if ($node->data =~ /[^\x09-\x0D\x20]/) {
481     $has_non_style = 1;
482     if ($content eq 'block') {
483     $self->{onerror}->(node => $node, type => 'character not allowed');
484     } else {
485     $content = 'inline';
486     for (@block_not_inline) {
487     $self->{onerror}->(node => $_, type => 'element not allowed');
488     }
489     }
490     }
491     } elsif ($nt == 5) {
492     unshift @nodes, @{$node->child_nodes};
493     }
494     }
495    
496     if ($content eq 'inline') {
497     for (@$new_todos) {
498     $_->{inline} = 1;
499     }
500     }
501     return ($new_todos);
502     };
503     }; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker
504    
505     my $HTMLTransparentChecker = $HTMLBlockOrInlineChecker;
506    
507     our $AttrChecker;
508    
509     my $GetHTMLEnumeratedAttrChecker = sub {
510     my $states = shift; # {value => conforming ? 1 : -1}
511     return sub {
512     my ($self, $attr) = @_;
513     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
514     if ($states->{$value} > 0) {
515     #
516     } elsif ($states->{$value}) {
517     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
518     } else {
519     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
520     }
521     };
522     }; # $GetHTMLEnumeratedAttrChecker
523    
524     my $GetHTMLBooleanAttrChecker = sub {
525     my $local_name = shift;
526     return sub {
527     my ($self, $attr) = @_;
528     my $value = $attr->value;
529     unless ($value eq $local_name or $value eq '') {
530     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
531     }
532     };
533     }; # $GetHTMLBooleanAttrChecker
534    
535 wakaba 1.8 ## Unordered set of space-separated tokens
536     my $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker = sub {
537     my ($self, $attr) = @_;
538     my %word;
539     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
540     unless ($word{$word}) {
541     $word{$word} = 1;
542     } else {
543     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
544     }
545     }
546     }; # $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker
547    
548 wakaba 1.1 ## |rel| attribute (unordered set of space separated tokens,
549     ## whose allowed values are defined by the section on link types)
550     my $HTMLLinkTypesAttrChecker = sub {
551 wakaba 1.4 my ($a_or_area, $todo, $self, $attr) = @_;
552 wakaba 1.1 my %word;
553     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
554     unless ($word{$word}) {
555     $word{$word} = 1;
556     } else {
557     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
558     }
559     }
560     ## NOTE: Case sensitive match (since HTML5 spec does not say link
561     ## types are case-insensitive and it says "The value should not
562     ## be confusingly similar to any other defined value (e.g.
563     ## differing only in case).").
564     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
565     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
566     ## values to be used conformingly.
567     require Whatpm::_LinkTypeList;
568     our $LinkType;
569     for my $word (keys %word) {
570     my $def = $LinkType->{$word};
571     if (defined $def) {
572     if ($def->{status} eq 'accepted') {
573     if (defined $def->{effect}->[$a_or_area]) {
574     #
575     } else {
576     $self->{onerror}->(node => $attr,
577     type => 'link type:bad context:'.$word);
578     }
579     } elsif ($def->{status} eq 'proposal') {
580     $self->{onerror}->(node => $attr, level => 's',
581     type => 'link type:proposed:'.$word);
582     } else { # rejected or synonym
583     $self->{onerror}->(node => $attr,
584     type => 'link type:non-conforming:'.$word);
585     }
586 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
587     if ($word eq 'alternate') {
588     #
589     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
590     $todo->{has_hyperlink_link_type} = 1;
591     }
592     }
593 wakaba 1.1 if ($def->{unique}) {
594     unless ($self->{has_link_type}->{$word}) {
595     $self->{has_link_type}->{$word} = 1;
596     } else {
597     $self->{onerror}->(node => $attr,
598     type => 'link type:duplicate:'.$word);
599     }
600     }
601     } else {
602     $self->{onerror}->(node => $attr, level => 'unsupported',
603     type => 'link type:'.$word);
604     }
605     }
606 wakaba 1.4 $todo->{has_hyperlink_link_type} = 1
607     if $word{alternate} and not $word{stylesheet};
608 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
609     ## says that using both X-Pingback: header field and HTML
610     ## <link rel=pingback> is deprecated and if both appears they
611     ## SHOULD contain exactly the same value.
612     ## ISSUE: Pingback 1.0 specification defines the exact representation
613     ## of its link element, which cannot be tested by the current arch.
614     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
615     ## include any string that matches to the pattern for the rel=pingback link,
616     ## which again inpossible to test.
617     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
618     }; # $HTMLLinkTypesAttrChecker
619    
620     ## URI (or IRI)
621     my $HTMLURIAttrChecker = sub {
622     my ($self, $attr) = @_;
623     ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
624     my $value = $attr->value;
625     Whatpm::URIChecker->check_iri_reference ($value, sub {
626     my %opt = @_;
627     $self->{onerror}->(node => $attr, level => $opt{level},
628     type => 'URI::'.$opt{type}.
629     (defined $opt{position} ? ':'.$opt{position} : ''));
630     });
631 wakaba 1.4 $self->{has_uri_attr} = 1;
632 wakaba 1.1 }; # $HTMLURIAttrChecker
633    
634     ## A space separated list of one or more URIs (or IRIs)
635     my $HTMLSpaceURIsAttrChecker = sub {
636     my ($self, $attr) = @_;
637     my $i = 0;
638     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
639     Whatpm::URIChecker->check_iri_reference ($value, sub {
640     my %opt = @_;
641     $self->{onerror}->(node => $attr, level => $opt{level},
642 wakaba 1.2 type => 'URIs:'.':'.
643     $opt{type}.':'.$i.
644 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
645     });
646     $i++;
647     }
648     ## ISSUE: Relative references?
649     ## ISSUE: Leading or trailing white spaces are conformant?
650     ## ISSUE: A sequence of white space characters are conformant?
651     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
652     ## NOTE: Duplication seems not an error.
653 wakaba 1.4 $self->{has_uri_attr} = 1;
654 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
655    
656     my $HTMLDatetimeAttrChecker = sub {
657     my ($self, $attr) = @_;
658     my $value = $attr->value;
659     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
660     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
661     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
662     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
663     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
664     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
665     if $d < 1 or
666     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
667     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
668     if $M == 2 and $d == 29 and
669     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
670     } else {
671     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
672     }
673     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
674     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
675     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
676     if defined $s and $s > 59;
677     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
678     if $zh > 23;
679     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
680     if $zm > 59;
681     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
682     } else {
683     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
684     }
685     }; # $HTMLDatetimeAttrChecker
686    
687     my $HTMLIntegerAttrChecker = sub {
688     my ($self, $attr) = @_;
689     my $value = $attr->value;
690     unless ($value =~ /\A-?[0-9]+\z/) {
691     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
692     }
693     }; # $HTMLIntegerAttrChecker
694    
695     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
696     my $range_check = shift;
697     return sub {
698     my ($self, $attr) = @_;
699     my $value = $attr->value;
700     if ($value =~ /\A[0-9]+\z/) {
701     unless ($range_check->($value + 0)) {
702     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
703     }
704     } else {
705     $self->{onerror}->(node => $attr,
706     type => 'nninteger:syntax error');
707     }
708     };
709     }; # $GetHTMLNonNegativeIntegerAttrChecker
710    
711     my $GetHTMLFloatingPointNumberAttrChecker = sub {
712     my $range_check = shift;
713     return sub {
714     my ($self, $attr) = @_;
715     my $value = $attr->value;
716     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
717     unless ($range_check->($value + 0)) {
718     $self->{onerror}->(node => $attr, type => 'float:out of range');
719     }
720     } else {
721     $self->{onerror}->(node => $attr,
722     type => 'float:syntax error');
723     }
724     };
725     }; # $GetHTMLFloatingPointNumberAttrChecker
726    
727     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
728     ## ISSUE: RFC 2046 does not define syntax of media types.
729     ## ISSUE: The definition of "a valid MIME type" is unknown.
730     ## Syntactical correctness?
731     my $HTMLIMTAttrChecker = sub {
732     my ($self, $attr) = @_;
733     my $value = $attr->value;
734     ## ISSUE: RFC 2045 Content-Type header field allows insertion
735     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
736     ## ISSUE: RFC 2231 extension? Maybe no.
737     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
738     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
739     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
740     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
741     my @type = ($1, $2);
742     my $param = $3;
743     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
744     if (defined $2) {
745     push @type, $1 => $2;
746     } else {
747     my $n = $1;
748     my $v = $2;
749     $v =~ s/\\(.)/$1/gs;
750     push @type, $n => $v;
751     }
752     }
753     require Whatpm::IMTChecker;
754     Whatpm::IMTChecker->check_imt (sub {
755     my %opt = @_;
756     $self->{onerror}->(node => $attr, level => $opt{level},
757     type => 'IMT:'.$opt{type});
758     }, @type);
759     } else {
760     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
761     }
762     }; # $HTMLIMTAttrChecker
763    
764     my $HTMLLanguageTagAttrChecker = sub {
765 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
766    
767 wakaba 1.1 my ($self, $attr) = @_;
768 wakaba 1.6 my $value = $attr->value;
769     require Whatpm::LangTag;
770     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
771     my %opt = @_;
772     my $type = 'LangTag:'.$opt{type};
773     $type .= ':' . $opt{subtag} if defined $opt{subtag};
774     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
775     level => $opt{level});
776     });
777 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
778 wakaba 1.6
779     ## TODO: testdata
780 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
781    
782     ## "A valid media query [MQ]"
783     my $HTMLMQAttrChecker = sub {
784     my ($self, $attr) = @_;
785     $self->{onerror}->(node => $attr, level => 'unsupported',
786     type => 'media query');
787     ## ISSUE: What is "a valid media query"?
788     }; # $HTMLMQAttrChecker
789    
790     my $HTMLEventHandlerAttrChecker = sub {
791     my ($self, $attr) = @_;
792     $self->{onerror}->(node => $attr, level => 'unsupported',
793     type => 'event handler');
794     ## TODO: MUST contain valid ECMAScript code matching the
795     ## ECMAScript |FunctionBody| production. [ECMA262]
796     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
797     ## ISSUE: Automatic semicolon insertion does not apply?
798     ## ISSUE: Other script languages?
799     }; # $HTMLEventHandlerAttrChecker
800    
801     my $HTMLUsemapAttrChecker = sub {
802     my ($self, $attr) = @_;
803     ## MUST be a valid hashed ID reference to a |map| element
804     my $value = $attr->value;
805     if ($value =~ s/^#//) {
806     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
807     push @{$self->{usemap}}, [$value => $attr];
808     } else {
809     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
810     }
811     ## NOTE: Space characters in hashed ID references are conforming.
812     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
813     }; # $HTMLUsemapAttrChecker
814    
815     my $HTMLTargetAttrChecker = sub {
816     my ($self, $attr) = @_;
817     my $value = $attr->value;
818     if ($value =~ /^_/) {
819     $value = lc $value; ## ISSUE: ASCII case-insentitive?
820     unless ({
821     _self => 1, _parent => 1, _top => 1,
822     }->{$value}) {
823     $self->{onerror}->(node => $attr,
824     type => 'reserved browsing context name');
825     }
826     } else {
827     #$ ISSUE: An empty string is conforming?
828     }
829     }; # $HTMLTargetAttrChecker
830    
831     my $HTMLAttrChecker = {
832     id => sub {
833     ## NOTE: |map| has its own variant of |id=""| checker
834     my ($self, $attr) = @_;
835     my $value = $attr->value;
836     if (length $value > 0) {
837     if ($self->{id}->{$value}) {
838     $self->{onerror}->(node => $attr, type => 'duplicate ID');
839     push @{$self->{id}->{$value}}, $attr;
840     } else {
841     $self->{id}->{$value} = [$attr];
842     }
843     if ($value =~ /[\x09-\x0D\x20]/) {
844     $self->{onerror}->(node => $attr, type => 'space in ID');
845     }
846     } else {
847     ## NOTE: MUST contain at least one character
848     $self->{onerror}->(node => $attr, type => 'empty attribute value');
849     }
850     },
851     title => sub {}, ## NOTE: No conformance creteria
852     lang => sub {
853     my ($self, $attr) = @_;
854 wakaba 1.6 my $value = $attr->value;
855     if ($value eq '') {
856     #
857     } else {
858     require Whatpm::LangTag;
859     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
860     my %opt = @_;
861     my $type = 'LangTag:'.$opt{type};
862     $type .= ':' . $opt{subtag} if defined $opt{subtag};
863     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
864     level => $opt{level});
865     });
866     }
867 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
868     unless ($attr->owner_document->manakai_is_html) {
869     $self->{onerror}->(node => $attr, type => 'in XML:lang');
870     }
871 wakaba 1.6
872     ## TODO: test data
873 wakaba 1.1 },
874     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
875     class => sub {
876     my ($self, $attr) = @_;
877     my %word;
878     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
879     unless ($word{$word}) {
880     $word{$word} = 1;
881     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
882     } else {
883     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
884     }
885     }
886     },
887     contextmenu => sub {
888     my ($self, $attr) = @_;
889     my $value = $attr->value;
890     push @{$self->{contextmenu}}, [$value => $attr];
891     ## ISSUE: "The value must be the ID of a menu element in the DOM."
892     ## What is "in the DOM"? A menu Element node that is not part
893     ## of the Document tree is in the DOM? A menu Element node that
894     ## belong to another Document tree is in the DOM?
895     },
896     irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
897 wakaba 1.8 tabindex => $HTMLIntegerAttrChecker
898     ## TODO: ref, template, registrationmark
899 wakaba 1.1 };
900    
901     for (qw/
902     onabort onbeforeunload onblur onchange onclick oncontextmenu
903     ondblclick ondrag ondragend ondragenter ondragleave ondragover
904     ondragstart ondrop onerror onfocus onkeydown onkeypress
905     onkeyup onload onmessage onmousedown onmousemove onmouseout
906     onmouseover onmouseup onmousewheel onresize onscroll onselect
907     onsubmit onunload
908     /) {
909     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
910     }
911    
912     my $GetHTMLAttrsChecker = sub {
913     my $element_specific_checker = shift;
914     return sub {
915     my ($self, $todo) = @_;
916     for my $attr (@{$todo->{node}->attributes}) {
917     my $attr_ns = $attr->namespace_uri;
918     $attr_ns = '' unless defined $attr_ns;
919     my $attr_ln = $attr->manakai_local_name;
920     my $checker;
921     if ($attr_ns eq '') {
922     $checker = $element_specific_checker->{$attr_ln}
923     || $HTMLAttrChecker->{$attr_ln};
924     }
925     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
926     || $AttrChecker->{$attr_ns}->{''};
927     if ($checker) {
928     $checker->($self, $attr, $todo);
929     } else {
930     $self->{onerror}->(node => $attr, level => 'unsupported',
931     type => 'attribute');
932     ## ISSUE: No comformance createria for unknown attributes in the spec
933     }
934     }
935     };
936     }; # $GetHTMLAttrsChecker
937    
938     our $Element;
939     our $ElementDefault;
940     our $AnyChecker;
941    
942     $Element->{$HTML_NS}->{''} = {
943     attrs_checker => $GetHTMLAttrsChecker->({}),
944     checker => $ElementDefault->{checker},
945     };
946    
947     $Element->{$HTML_NS}->{html} = {
948     is_root => 1,
949     attrs_checker => $GetHTMLAttrsChecker->({
950     xmlns => sub {
951     my ($self, $attr) = @_;
952     my $value = $attr->value;
953     unless ($value eq $HTML_NS) {
954     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
955     }
956     unless ($attr->owner_document->manakai_is_html) {
957     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
958     ## TODO: Test
959     }
960     },
961     }),
962     checker => sub {
963     my ($self, $todo) = @_;
964     my $el = $todo->{node};
965     my $new_todos = [];
966     my @nodes = (@{$el->child_nodes});
967    
968     my $phase = 'before head';
969     while (@nodes) {
970     my $node = shift @nodes;
971     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
972    
973     my $nt = $node->node_type;
974     if ($nt == 1) {
975     my $node_ns = $node->namespace_uri;
976     $node_ns = '' unless defined $node_ns;
977     my $node_ln = $node->manakai_local_name;
978     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
979 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
980     #
981     } elsif ($phase eq 'before head') {
982 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'head') {
983     $phase = 'after head';
984     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'body') {
985     $self->{onerror}->(node => $node, type => 'ps element missing:head');
986     $phase = 'after body';
987     } else {
988     $not_allowed = 1;
989     # before head
990     }
991     } elsif ($phase eq 'after head') {
992     if ($node_ns eq $HTML_NS and $node_ln eq 'body') {
993     $phase = 'after body';
994     } else {
995     $not_allowed = 1;
996     # after head
997     }
998     } else { #elsif ($phase eq 'after body') {
999     $not_allowed = 1;
1000     # after body
1001     }
1002     $self->{onerror}->(node => $node, type => 'element not allowed')
1003     if $not_allowed;
1004     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1005     unshift @nodes, @$sib;
1006     push @$new_todos, @$ch;
1007     } elsif ($nt == 3 or $nt == 4) {
1008     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1009     $self->{onerror}->(node => $node, type => 'character not allowed');
1010     }
1011     } elsif ($nt == 5) {
1012     unshift @nodes, @{$node->child_nodes};
1013     }
1014     }
1015    
1016     if ($phase eq 'before head') {
1017     $self->{onerror}->(node => $el, type => 'child element missing:head');
1018     $self->{onerror}->(node => $el, type => 'child element missing:body');
1019     } elsif ($phase eq 'after head') {
1020     $self->{onerror}->(node => $el, type => 'child element missing:body');
1021     }
1022    
1023     return ($new_todos);
1024     },
1025     };
1026    
1027     $Element->{$HTML_NS}->{head} = {
1028     attrs_checker => $GetHTMLAttrsChecker->({}),
1029     checker => sub {
1030     my ($self, $todo) = @_;
1031     my $el = $todo->{node};
1032     my $new_todos = [];
1033     my @nodes = (@{$el->child_nodes});
1034    
1035     my $has_title;
1036     my $phase = 'initial'; # 'after charset', 'after base'
1037     while (@nodes) {
1038     my $node = shift @nodes;
1039     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1040    
1041     my $nt = $node->node_type;
1042     if ($nt == 1) {
1043     my $node_ns = $node->namespace_uri;
1044     $node_ns = '' unless defined $node_ns;
1045     my $node_ln = $node->manakai_local_name;
1046     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
1047 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1048     #
1049     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'title') {
1050 wakaba 1.1 $phase = 'after base';
1051     unless ($has_title) {
1052     $has_title = 1;
1053     } else {
1054     $not_allowed = 1;
1055     }
1056     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'meta') {
1057     if ($node->has_attribute_ns (undef, 'charset')) {
1058     if ($phase eq 'initial') {
1059     $phase = 'after charset';
1060     } else {
1061     $not_allowed = 1;
1062     ## NOTE: See also |base|'s "contexts" field in the spec
1063     }
1064 wakaba 1.5 } elsif ($node->has_attribute_ns (undef, 'name') or
1065     $node->has_attribute_ns (undef, 'http-equiv')) {
1066     $phase = 'after base';
1067 wakaba 1.1 } else {
1068     $phase = 'after base';
1069 wakaba 1.5 $not_allowed = 1;
1070 wakaba 1.1 }
1071     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'base') {
1072     if ($phase eq 'initial' or $phase eq 'after charset') {
1073     $phase = 'after base';
1074     } else {
1075     $not_allowed = 1;
1076     }
1077     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'style') {
1078     $phase = 'after base';
1079     if ($node->has_attribute_ns (undef, 'scoped')) {
1080     $not_allowed = 1;
1081     }
1082     } elsif ($HTMLMetadataElements->{$node_ns}->{$node_ln}) {
1083     $phase = 'after base';
1084     } else {
1085     $not_allowed = 1;
1086     }
1087     $self->{onerror}->(node => $node, type => 'element not allowed')
1088     if $not_allowed;
1089 wakaba 1.3 local $todo->{flag}->{in_head} = 1;
1090 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1091     unshift @nodes, @$sib;
1092     push @$new_todos, @$ch;
1093     } elsif ($nt == 3 or $nt == 4) {
1094     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1095     $self->{onerror}->(node => $node, type => 'character not allowed');
1096     }
1097     } elsif ($nt == 5) {
1098     unshift @nodes, @{$node->child_nodes};
1099     }
1100     }
1101     unless ($has_title) {
1102     $self->{onerror}->(node => $el, type => 'child element missing:title');
1103     }
1104     return ($new_todos);
1105     },
1106     };
1107    
1108     $Element->{$HTML_NS}->{title} = {
1109     attrs_checker => $GetHTMLAttrsChecker->({}),
1110     checker => $HTMLTextChecker,
1111     };
1112    
1113     $Element->{$HTML_NS}->{base} = {
1114 wakaba 1.4 attrs_checker => sub {
1115     my ($self, $todo) = @_;
1116    
1117     if ($self->{has_uri_attr} and
1118     $todo->{node}->has_attribute_ns (undef, 'href')) {
1119     ## ISSUE: Are these examples conforming?
1120     ## <head profile="a b c"><base href> (except for |profile|'s
1121     ## non-conformance)
1122     ## <title xml:base="relative"/><base href/> (maybe it should be)
1123     ## <unknown xmlns="relative"/><base href/> (assuming that
1124     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1125     ## <?xml-stylesheet href="relative"?>...<base href=""/>
1126     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1127     ## <style>@import 'relative';</style><base href>
1128     ## <script>location.href = 'relative';</script><base href>
1129     $self->{onerror}->(node => $todo->{node},
1130     type => 'basehref after URI attribute');
1131     }
1132     if ($self->{has_hyperlink_element} and
1133     $todo->{node}->has_attribute_ns (undef, 'target')) {
1134     ## ISSUE: Are these examples conforming?
1135     ## <head><title xlink:href=""/><base target="name"/></head>
1136     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1137     ## (assuming that |xbl:xbl| is allowed before |base|)
1138     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1139     ## <link href=""/><base target="name"/>
1140     ## <link rel=unknown href=""><base target=name>
1141     $self->{onerror}->(node => $todo->{node},
1142     type => 'basetarget after hyperlink');
1143     }
1144    
1145     return $GetHTMLAttrsChecker->({
1146     href => $HTMLURIAttrChecker,
1147     target => $HTMLTargetAttrChecker,
1148     })->($self, $todo);
1149     },
1150 wakaba 1.1 checker => $HTMLEmptyChecker,
1151     };
1152    
1153     $Element->{$HTML_NS}->{link} = {
1154     attrs_checker => sub {
1155     my ($self, $todo) = @_;
1156     $GetHTMLAttrsChecker->({
1157     href => $HTMLURIAttrChecker,
1158 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(0, $todo, @_) },
1159 wakaba 1.1 media => $HTMLMQAttrChecker,
1160     hreflang => $HTMLLanguageTagAttrChecker,
1161     type => $HTMLIMTAttrChecker,
1162     ## NOTE: Though |title| has special semantics,
1163     ## syntactically same as the |title| as global attribute.
1164     })->($self, $todo);
1165 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
1166     $self->{has_hyperlink_element} = 1 if $todo->{has_hyperlink_link_type};
1167     } else {
1168 wakaba 1.1 $self->{onerror}->(node => $todo->{node},
1169     type => 'attribute missing:href');
1170     }
1171     unless ($todo->{node}->has_attribute_ns (undef, 'rel')) {
1172     $self->{onerror}->(node => $todo->{node},
1173     type => 'attribute missing:rel');
1174     }
1175     },
1176     checker => $HTMLEmptyChecker,
1177     };
1178    
1179     $Element->{$HTML_NS}->{meta} = {
1180     attrs_checker => sub {
1181     my ($self, $todo) = @_;
1182     my $name_attr;
1183     my $http_equiv_attr;
1184     my $charset_attr;
1185     my $content_attr;
1186     for my $attr (@{$todo->{node}->attributes}) {
1187     my $attr_ns = $attr->namespace_uri;
1188     $attr_ns = '' unless defined $attr_ns;
1189     my $attr_ln = $attr->manakai_local_name;
1190     my $checker;
1191     if ($attr_ns eq '') {
1192     if ($attr_ln eq 'content') {
1193     $content_attr = $attr;
1194     $checker = 1;
1195     } elsif ($attr_ln eq 'name') {
1196     $name_attr = $attr;
1197     $checker = 1;
1198     } elsif ($attr_ln eq 'http-equiv') {
1199     $http_equiv_attr = $attr;
1200     $checker = 1;
1201     } elsif ($attr_ln eq 'charset') {
1202     $charset_attr = $attr;
1203     $checker = 1;
1204     } else {
1205     $checker = $HTMLAttrChecker->{$attr_ln}
1206     || $AttrChecker->{$attr_ns}->{$attr_ln}
1207     || $AttrChecker->{$attr_ns}->{''};
1208     }
1209     } else {
1210     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1211     || $AttrChecker->{$attr_ns}->{''};
1212     }
1213     if ($checker) {
1214     $checker->($self, $attr) if ref $checker;
1215     } else {
1216     $self->{onerror}->(node => $attr, level => 'unsupported',
1217     type => 'attribute');
1218     ## ISSUE: No comformance createria for unknown attributes in the spec
1219     }
1220     }
1221    
1222     if (defined $name_attr) {
1223     if (defined $http_equiv_attr) {
1224     $self->{onerror}->(node => $http_equiv_attr,
1225     type => 'attribute not allowed');
1226     } elsif (defined $charset_attr) {
1227     $self->{onerror}->(node => $charset_attr,
1228     type => 'attribute not allowed');
1229     }
1230     my $metadata_name = $name_attr->value;
1231     my $metadata_value;
1232     if (defined $content_attr) {
1233     $metadata_value = $content_attr->value;
1234     } else {
1235     $self->{onerror}->(node => $todo->{node},
1236     type => 'attribute missing:content');
1237     $metadata_value = '';
1238     }
1239     } elsif (defined $http_equiv_attr) {
1240     if (defined $charset_attr) {
1241     $self->{onerror}->(node => $charset_attr,
1242     type => 'attribute not allowed');
1243     }
1244     unless (defined $content_attr) {
1245     $self->{onerror}->(node => $todo->{node},
1246     type => 'attribute missing:content');
1247     }
1248     } elsif (defined $charset_attr) {
1249     if (defined $content_attr) {
1250     $self->{onerror}->(node => $content_attr,
1251     type => 'attribute not allowed');
1252     }
1253     } else {
1254     if (defined $content_attr) {
1255     $self->{onerror}->(node => $content_attr,
1256     type => 'attribute not allowed');
1257     $self->{onerror}->(node => $todo->{node},
1258     type => 'attribute missing:name|http-equiv');
1259     } else {
1260     $self->{onerror}->(node => $todo->{node},
1261     type => 'attribute missing:name|http-equiv|charset');
1262     }
1263     }
1264    
1265     ## TODO: metadata conformance
1266    
1267     ## TODO: pragma conformance
1268     if (defined $http_equiv_attr) { ## An enumerated attribute
1269     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1270     if ({
1271     'refresh' => 1,
1272     'default-style' => 1,
1273     }->{$keyword}) {
1274     #
1275     } else {
1276     $self->{onerror}->(node => $http_equiv_attr,
1277     type => 'enumerated:invalid');
1278     }
1279     }
1280    
1281     if (defined $charset_attr) {
1282     unless ($todo->{node}->owner_document->manakai_is_html) {
1283     $self->{onerror}->(node => $charset_attr,
1284     type => 'in XML:charset');
1285     }
1286     ## TODO: charset
1287     }
1288     },
1289     checker => $HTMLEmptyChecker,
1290     };
1291    
1292     $Element->{$HTML_NS}->{style} = {
1293     attrs_checker => $GetHTMLAttrsChecker->({
1294     type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1295     media => $HTMLMQAttrChecker,
1296     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1297     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1298     ## not different
1299     }),
1300     checker => sub {
1301     ## NOTE: |html:style| has no conformance creteria on content model
1302     my ($self, $todo) = @_;
1303     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
1304     $type = 'text/css' unless defined $type;
1305     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
1306     type => 'style:'.$type); ## TODO: $type normalization
1307     return $AnyChecker->($self, $todo);
1308     },
1309     };
1310    
1311     $Element->{$HTML_NS}->{body} = {
1312     attrs_checker => $GetHTMLAttrsChecker->({}),
1313     checker => $HTMLBlockChecker,
1314     };
1315    
1316     $Element->{$HTML_NS}->{section} = {
1317     attrs_checker => $GetHTMLAttrsChecker->({}),
1318     checker => $HTMLStylableBlockChecker,
1319     };
1320    
1321     $Element->{$HTML_NS}->{nav} = {
1322     attrs_checker => $GetHTMLAttrsChecker->({}),
1323     checker => $HTMLBlockOrInlineChecker,
1324     };
1325    
1326     $Element->{$HTML_NS}->{article} = {
1327     attrs_checker => $GetHTMLAttrsChecker->({}),
1328     checker => $HTMLStylableBlockChecker,
1329     };
1330    
1331     $Element->{$HTML_NS}->{blockquote} = {
1332     attrs_checker => $GetHTMLAttrsChecker->({
1333     cite => $HTMLURIAttrChecker,
1334     }),
1335     checker => $HTMLBlockChecker,
1336     };
1337    
1338     $Element->{$HTML_NS}->{aside} = {
1339     attrs_checker => $GetHTMLAttrsChecker->({}),
1340     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
1341     };
1342    
1343     $Element->{$HTML_NS}->{h1} = {
1344     attrs_checker => $GetHTMLAttrsChecker->({}),
1345     checker => sub {
1346     my ($self, $todo) = @_;
1347     $todo->{flag}->{has_heading}->[0] = 1;
1348     return $HTMLSignificantStrictlyInlineChecker->($self, $todo);
1349     },
1350     };
1351    
1352     $Element->{$HTML_NS}->{h2} = {
1353     attrs_checker => $GetHTMLAttrsChecker->({}),
1354     checker => $Element->{$HTML_NS}->{h1}->{checker},
1355     };
1356    
1357     $Element->{$HTML_NS}->{h3} = {
1358     attrs_checker => $GetHTMLAttrsChecker->({}),
1359     checker => $Element->{$HTML_NS}->{h1}->{checker},
1360     };
1361    
1362     $Element->{$HTML_NS}->{h4} = {
1363     attrs_checker => $GetHTMLAttrsChecker->({}),
1364     checker => $Element->{$HTML_NS}->{h1}->{checker},
1365     };
1366    
1367     $Element->{$HTML_NS}->{h5} = {
1368     attrs_checker => $GetHTMLAttrsChecker->({}),
1369     checker => $Element->{$HTML_NS}->{h1}->{checker},
1370     };
1371    
1372     $Element->{$HTML_NS}->{h6} = {
1373     attrs_checker => $GetHTMLAttrsChecker->({}),
1374     checker => $Element->{$HTML_NS}->{h1}->{checker},
1375     };
1376    
1377     $Element->{$HTML_NS}->{header} = {
1378     attrs_checker => $GetHTMLAttrsChecker->({}),
1379     checker => sub {
1380     my ($self, $todo) = @_;
1381     my $old_flag = $todo->{flag}->{has_heading} || [];
1382     my $new_flag = [];
1383     local $todo->{flag}->{has_heading} = $new_flag;
1384     my $node = $todo->{node};
1385    
1386     my $end = $self->_add_minuses
1387     ({$HTML_NS => {qw/header 1 footer 1/}},
1388     $HTMLSectioningElements);
1389     my ($new_todos, $ch) = $HTMLBlockChecker->($self, $todo);
1390     push @$new_todos, $end,
1391     {type => 'code', code => sub {
1392     if ($new_flag->[0]) {
1393     $old_flag->[0] = 1;
1394     } else {
1395     $self->{onerror}->(node => $node, type => 'element missing:hn');
1396     }
1397     }};
1398     return ($new_todos, $ch);
1399     },
1400     };
1401    
1402     $Element->{$HTML_NS}->{footer} = {
1403     attrs_checker => $GetHTMLAttrsChecker->({}),
1404     checker => sub { ## block -hn -header -footer -sectioning or inline
1405     my ($self, $todo) = @_;
1406     my $el = $todo->{node};
1407     my $new_todos = [];
1408     my @nodes = (@{$el->child_nodes});
1409    
1410     my $content = 'block-or-inline'; # or 'block' or 'inline'
1411     my @block_not_inline;
1412     while (@nodes) {
1413     my $node = shift @nodes;
1414     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1415    
1416     my $nt = $node->node_type;
1417     if ($nt == 1) {
1418     my $node_ns = $node->namespace_uri;
1419     $node_ns = '' unless defined $node_ns;
1420     my $node_ln = $node->manakai_local_name;
1421     my $not_allowed;
1422     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
1423     $not_allowed = 1;
1424     } elsif ($node_ns eq $HTML_NS and
1425     {
1426     qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1/
1427     }->{$node_ln}) {
1428     $not_allowed = 1;
1429     } elsif ($HTMLSectioningElements->{$node_ns}->{$node_ln}) {
1430     $not_allowed = 1;
1431     }
1432     if ($content eq 'block') {
1433     $not_allowed = 1
1434 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
1435     $self->{pluses}->{$node_ns}->{$node_ln};
1436 wakaba 1.1 } elsif ($content eq 'inline') {
1437     $not_allowed = 1
1438     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
1439 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
1440     $self->{pluses}->{$node_ns}->{$node_ln};
1441 wakaba 1.1 } else {
1442     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1443     my $is_inline
1444     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
1445     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1446    
1447     push @block_not_inline, $node
1448     if $is_block and not $is_inline and not $not_allowed;
1449 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
1450 wakaba 1.1 $content = 'inline';
1451     for (@block_not_inline) {
1452     $self->{onerror}->(node => $_, type => 'element not allowed');
1453     }
1454     $not_allowed = 1 unless $is_inline;
1455     }
1456     }
1457     $self->{onerror}->(node => $node, type => 'element not allowed')
1458     if $not_allowed;
1459     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1460     unshift @nodes, @$sib;
1461     push @$new_todos, @$ch;
1462     } elsif ($nt == 3 or $nt == 4) {
1463     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1464     if ($content eq 'block') {
1465     $self->{onerror}->(node => $node, type => 'character not allowed');
1466     } else {
1467     $content = 'inline';
1468     for (@block_not_inline) {
1469     $self->{onerror}->(node => $_, type => 'element not allowed');
1470     }
1471     }
1472     }
1473     } elsif ($nt == 5) {
1474     unshift @nodes, @{$node->child_nodes};
1475     }
1476     }
1477    
1478     my $end = $self->_add_minuses
1479     ({$HTML_NS => {qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1/}},
1480     $HTMLSectioningElements);
1481     push @$new_todos, $end;
1482    
1483     if ($content eq 'inline') {
1484     for (@$new_todos) {
1485     $_->{inline} = 1;
1486     }
1487     }
1488    
1489     return ($new_todos);
1490     },
1491     };
1492    
1493     $Element->{$HTML_NS}->{address} = {
1494     attrs_checker => $GetHTMLAttrsChecker->({}),
1495     checker => $HTMLInlineChecker,
1496     };
1497    
1498     $Element->{$HTML_NS}->{p} = {
1499     attrs_checker => $GetHTMLAttrsChecker->({}),
1500     checker => $HTMLSignificantInlineChecker,
1501     };
1502    
1503     $Element->{$HTML_NS}->{hr} = {
1504     attrs_checker => $GetHTMLAttrsChecker->({}),
1505     checker => $HTMLEmptyChecker,
1506     };
1507    
1508     $Element->{$HTML_NS}->{br} = {
1509     attrs_checker => $GetHTMLAttrsChecker->({}),
1510     checker => $HTMLEmptyChecker,
1511     };
1512    
1513     $Element->{$HTML_NS}->{dialog} = {
1514     attrs_checker => $GetHTMLAttrsChecker->({}),
1515     checker => sub {
1516     my ($self, $todo) = @_;
1517     my $el = $todo->{node};
1518     my $new_todos = [];
1519     my @nodes = (@{$el->child_nodes});
1520    
1521     my $phase = 'before dt';
1522     while (@nodes) {
1523     my $node = shift @nodes;
1524     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1525    
1526     my $nt = $node->node_type;
1527     if ($nt == 1) {
1528     my $node_ns = $node->namespace_uri;
1529     $node_ns = '' unless defined $node_ns;
1530     my $node_ln = $node->manakai_local_name;
1531     ## NOTE: |minuses| list is not checked since redundant
1532 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1533     #
1534     } elsif ($phase eq 'before dt') {
1535 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1536     $phase = 'before dd';
1537     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1538     $self->{onerror}
1539     ->(node => $node, type => 'ps element missing:dt');
1540     $phase = 'before dt';
1541     } else {
1542     $self->{onerror}->(node => $node, type => 'element not allowed');
1543     }
1544     } else { # before dd
1545     if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1546     $phase = 'before dt';
1547     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1548     $self->{onerror}
1549     ->(node => $node, type => 'ps element missing:dd');
1550     $phase = 'before dd';
1551     } else {
1552     $self->{onerror}->(node => $node, type => 'element not allowed');
1553     }
1554     }
1555     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1556     unshift @nodes, @$sib;
1557     push @$new_todos, @$ch;
1558     } elsif ($nt == 3 or $nt == 4) {
1559     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1560     $self->{onerror}->(node => $node, type => 'character not allowed');
1561     }
1562     } elsif ($nt == 5) {
1563     unshift @nodes, @{$node->child_nodes};
1564     }
1565     }
1566     if ($phase eq 'before dd') {
1567 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
1568 wakaba 1.1 }
1569     return ($new_todos);
1570     },
1571     };
1572    
1573     $Element->{$HTML_NS}->{pre} = {
1574     attrs_checker => $GetHTMLAttrsChecker->({}),
1575     checker => $HTMLStrictlyInlineChecker,
1576     };
1577    
1578     $Element->{$HTML_NS}->{ol} = {
1579     attrs_checker => $GetHTMLAttrsChecker->({
1580     start => $HTMLIntegerAttrChecker,
1581     }),
1582     checker => sub {
1583     my ($self, $todo) = @_;
1584     my $el = $todo->{node};
1585     my $new_todos = [];
1586     my @nodes = (@{$el->child_nodes});
1587    
1588     while (@nodes) {
1589     my $node = shift @nodes;
1590     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1591    
1592     my $nt = $node->node_type;
1593     if ($nt == 1) {
1594     my $node_ns = $node->namespace_uri;
1595     $node_ns = '' unless defined $node_ns;
1596     my $node_ln = $node->manakai_local_name;
1597     ## NOTE: |minuses| list is not checked since redundant
1598 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1599     #
1600     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'li')) {
1601 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
1602     }
1603     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1604     unshift @nodes, @$sib;
1605     push @$new_todos, @$ch;
1606     } elsif ($nt == 3 or $nt == 4) {
1607     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1608     $self->{onerror}->(node => $node, type => 'character not allowed');
1609     }
1610     } elsif ($nt == 5) {
1611     unshift @nodes, @{$node->child_nodes};
1612     }
1613     }
1614    
1615     if ($todo->{inline}) {
1616     for (@$new_todos) {
1617     $_->{inline} = 1;
1618     }
1619     }
1620     return ($new_todos);
1621     },
1622     };
1623    
1624     $Element->{$HTML_NS}->{ul} = {
1625     attrs_checker => $GetHTMLAttrsChecker->({}),
1626     checker => $Element->{$HTML_NS}->{ol}->{checker},
1627     };
1628    
1629     $Element->{$HTML_NS}->{li} = {
1630     attrs_checker => $GetHTMLAttrsChecker->({
1631     start => sub {
1632     my ($self, $attr) = @_;
1633     my $parent = $attr->owner_element->manakai_parent_element;
1634     if (defined $parent) {
1635     my $parent_ns = $parent->namespace_uri;
1636     $parent_ns = '' unless defined $parent_ns;
1637     my $parent_ln = $parent->manakai_local_name;
1638     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1639     $self->{onerror}->(node => $attr, level => 'unsupported',
1640     type => 'attribute');
1641     }
1642     }
1643     $HTMLIntegerAttrChecker->($self, $attr);
1644     },
1645     }),
1646     checker => sub {
1647     my ($self, $todo) = @_;
1648     if ($todo->{inline}) {
1649     return $HTMLInlineChecker->($self, $todo);
1650     } else {
1651     return $HTMLBlockOrInlineChecker->($self, $todo);
1652     }
1653     },
1654     };
1655    
1656     $Element->{$HTML_NS}->{dl} = {
1657     attrs_checker => $GetHTMLAttrsChecker->({}),
1658     checker => sub {
1659     my ($self, $todo) = @_;
1660     my $el = $todo->{node};
1661     my $new_todos = [];
1662     my @nodes = (@{$el->child_nodes});
1663    
1664     my $phase = 'before dt';
1665     while (@nodes) {
1666     my $node = shift @nodes;
1667     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1668    
1669     my $nt = $node->node_type;
1670     if ($nt == 1) {
1671     my $node_ns = $node->namespace_uri;
1672     $node_ns = '' unless defined $node_ns;
1673     my $node_ln = $node->manakai_local_name;
1674     ## NOTE: |minuses| list is not checked since redundant
1675 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1676     #
1677     } elsif ($phase eq 'in dds') {
1678 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1679     #$phase = 'in dds';
1680     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1681     $phase = 'in dts';
1682     } else {
1683     $self->{onerror}->(node => $node, type => 'element not allowed');
1684     }
1685     } elsif ($phase eq 'in dts') {
1686     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1687     #$phase = 'in dts';
1688     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1689     $phase = 'in dds';
1690     } else {
1691     $self->{onerror}->(node => $node, type => 'element not allowed');
1692     }
1693     } else { # before dt
1694     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1695     $phase = 'in dts';
1696     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1697     $self->{onerror}
1698     ->(node => $node, type => 'ps element missing:dt');
1699     $phase = 'in dds';
1700     } else {
1701     $self->{onerror}->(node => $node, type => 'element not allowed');
1702     }
1703     }
1704     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1705     unshift @nodes, @$sib;
1706     push @$new_todos, @$ch;
1707     } elsif ($nt == 3 or $nt == 4) {
1708     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1709     $self->{onerror}->(node => $node, type => 'character not allowed');
1710     }
1711     } elsif ($nt == 5) {
1712     unshift @nodes, @{$node->child_nodes};
1713     }
1714     }
1715     if ($phase eq 'in dts') {
1716 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
1717 wakaba 1.1 }
1718    
1719     if ($todo->{inline}) {
1720     for (@$new_todos) {
1721     $_->{inline} = 1;
1722     }
1723     }
1724     return ($new_todos);
1725     },
1726     };
1727    
1728     $Element->{$HTML_NS}->{dt} = {
1729     attrs_checker => $GetHTMLAttrsChecker->({}),
1730     checker => $HTMLStrictlyInlineChecker,
1731     };
1732    
1733     $Element->{$HTML_NS}->{dd} = {
1734     attrs_checker => $GetHTMLAttrsChecker->({}),
1735     checker => $Element->{$HTML_NS}->{li}->{checker},
1736     };
1737    
1738     $Element->{$HTML_NS}->{a} = {
1739     attrs_checker => sub {
1740     my ($self, $todo) = @_;
1741     my %attr;
1742     for my $attr (@{$todo->{node}->attributes}) {
1743     my $attr_ns = $attr->namespace_uri;
1744     $attr_ns = '' unless defined $attr_ns;
1745     my $attr_ln = $attr->manakai_local_name;
1746     my $checker;
1747     if ($attr_ns eq '') {
1748     $checker = {
1749     target => $HTMLTargetAttrChecker,
1750     href => $HTMLURIAttrChecker,
1751     ping => $HTMLSpaceURIsAttrChecker,
1752 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
1753 wakaba 1.1 media => $HTMLMQAttrChecker,
1754     hreflang => $HTMLLanguageTagAttrChecker,
1755     type => $HTMLIMTAttrChecker,
1756     }->{$attr_ln};
1757     if ($checker) {
1758     $attr{$attr_ln} = $attr;
1759     } else {
1760     $checker = $HTMLAttrChecker->{$attr_ln};
1761     }
1762     }
1763     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1764     || $AttrChecker->{$attr_ns}->{''};
1765     if ($checker) {
1766     $checker->($self, $attr) if ref $checker;
1767     } else {
1768     $self->{onerror}->(node => $attr, level => 'unsupported',
1769     type => 'attribute');
1770     ## ISSUE: No comformance createria for unknown attributes in the spec
1771     }
1772     }
1773    
1774 wakaba 1.4 if (defined $attr{href}) {
1775     $self->{has_hyperlink_element} = 1;
1776     } else {
1777 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
1778     if (defined $attr{$_}) {
1779     $self->{onerror}->(node => $attr{$_},
1780     type => 'attribute not allowed');
1781     }
1782     }
1783     }
1784     },
1785     checker => sub {
1786     my ($self, $todo) = @_;
1787    
1788     my $end = $self->_add_minuses ($HTMLInteractiveElements);
1789     my ($new_todos, $ch)
1790     = $HTMLSignificantInlineOrStrictlyInlineChecker->($self, $todo);
1791     push @$new_todos, $end;
1792    
1793     $_->{flag}->{has_a} = 1 for @$new_todos;
1794    
1795     return ($new_todos, $ch);
1796     },
1797     };
1798    
1799     $Element->{$HTML_NS}->{q} = {
1800     attrs_checker => $GetHTMLAttrsChecker->({
1801     cite => $HTMLURIAttrChecker,
1802     }),
1803     checker => $HTMLInlineOrStrictlyInlineChecker,
1804     };
1805    
1806     $Element->{$HTML_NS}->{cite} = {
1807     attrs_checker => $GetHTMLAttrsChecker->({}),
1808     checker => $HTMLStrictlyInlineChecker,
1809     };
1810    
1811     $Element->{$HTML_NS}->{em} = {
1812     attrs_checker => $GetHTMLAttrsChecker->({}),
1813     checker => $HTMLInlineOrStrictlyInlineChecker,
1814     };
1815    
1816     $Element->{$HTML_NS}->{strong} = {
1817     attrs_checker => $GetHTMLAttrsChecker->({}),
1818     checker => $HTMLInlineOrStrictlyInlineChecker,
1819     };
1820    
1821     $Element->{$HTML_NS}->{small} = {
1822     attrs_checker => $GetHTMLAttrsChecker->({}),
1823     checker => $HTMLInlineOrStrictlyInlineChecker,
1824     };
1825    
1826     $Element->{$HTML_NS}->{m} = {
1827     attrs_checker => $GetHTMLAttrsChecker->({}),
1828     checker => $HTMLInlineOrStrictlyInlineChecker,
1829     };
1830    
1831     $Element->{$HTML_NS}->{dfn} = {
1832     attrs_checker => $GetHTMLAttrsChecker->({}),
1833     checker => sub {
1834     my ($self, $todo) = @_;
1835    
1836     my $end = $self->_add_minuses ({$HTML_NS => {dfn => 1}});
1837     my ($sib, $ch) = $HTMLStrictlyInlineChecker->($self, $todo);
1838     push @$sib, $end;
1839    
1840     my $node = $todo->{node};
1841     my $term = $node->get_attribute_ns (undef, 'title');
1842     unless (defined $term) {
1843     for my $child (@{$node->child_nodes}) {
1844     if ($child->node_type == 1) { # ELEMENT_NODE
1845     if (defined $term) {
1846     undef $term;
1847     last;
1848     } elsif ($child->manakai_local_name eq 'abbr') {
1849     my $nsuri = $child->namespace_uri;
1850     if (defined $nsuri and $nsuri eq $HTML_NS) {
1851     my $attr = $child->get_attribute_node_ns (undef, 'title');
1852     if ($attr) {
1853     $term = $attr->value;
1854     }
1855     }
1856     }
1857     } elsif ($child->node_type == 3 or $child->node_type == 4) {
1858     ## TEXT_NODE or CDATA_SECTION_NODE
1859     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
1860     next;
1861     }
1862     undef $term;
1863     last;
1864     }
1865     }
1866     unless (defined $term) {
1867     $term = $node->text_content;
1868     }
1869     }
1870     if ($self->{term}->{$term}) {
1871     $self->{onerror}->(node => $node, type => 'duplicate term');
1872     push @{$self->{term}->{$term}}, $node;
1873     } else {
1874     $self->{term}->{$term} = [$node];
1875     }
1876     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
1877     ## has |title|.
1878    
1879     return ($sib, $ch);
1880     },
1881     };
1882    
1883     $Element->{$HTML_NS}->{abbr} = {
1884     attrs_checker => $GetHTMLAttrsChecker->({
1885     ## NOTE: |title| has special semantics for |abbr|s, but is syntactically
1886     ## not different. The spec says that the |title| MAY be omitted
1887     ## if there is a |dfn| whose defining term is the abbreviation,
1888     ## but it does not prohibit |abbr| w/o |title| in other cases.
1889     }),
1890     checker => $HTMLStrictlyInlineChecker,
1891     };
1892    
1893     $Element->{$HTML_NS}->{time} = {
1894     attrs_checker => $GetHTMLAttrsChecker->({
1895     datetime => sub { 1 }, # checked in |checker|
1896     }),
1897     ## TODO: Write tests
1898     checker => sub {
1899     my ($self, $todo) = @_;
1900    
1901     my $attr = $todo->{node}->get_attribute_node_ns (undef, 'datetime');
1902     my $input;
1903     my $reg_sp;
1904     my $input_node;
1905     if ($attr) {
1906     $input = $attr->value;
1907     $reg_sp = qr/[\x09-\x0D\x20]*/;
1908     $input_node = $attr;
1909     } else {
1910     $input = $todo->{node}->text_content;
1911     $reg_sp = qr/\p{Zs}*/;
1912     $input_node = $todo->{node};
1913    
1914     ## ISSUE: What is the definition for "successfully extracts a date
1915     ## or time"? If the algorithm says the string is invalid but
1916     ## return some date or time, is it "successfully"?
1917     }
1918    
1919     my $hour;
1920     my $minute;
1921     my $second;
1922     if ($input =~ /
1923     \A
1924     [\x09-\x0D\x20]*
1925     ([0-9]+) # 1
1926     (?>
1927     -([0-9]+) # 2
1928     -([0-9]+) # 3
1929     [\x09-\x0D\x20]*
1930     (?>
1931     T
1932     [\x09-\x0D\x20]*
1933     )?
1934     ([0-9]+) # 4
1935     :([0-9]+) # 5
1936     (?>
1937     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
1938     )?
1939     [\x09-\x0D\x20]*
1940     (?>
1941     Z
1942     [\x09-\x0D\x20]*
1943     |
1944     [+-]([0-9]+):([0-9]+) # 7, 8
1945     [\x09-\x0D\x20]*
1946     )?
1947     \z
1948     |
1949     :([0-9]+) # 9
1950     (?>
1951     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
1952     )?
1953     [\x09-\x0D\x20]*\z
1954     )
1955     /x) {
1956     if (defined $2) { ## YYYY-MM-DD T? hh:mm
1957     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
1958     length $4 != 2 or length $5 != 2) {
1959     $self->{onerror}->(node => $input_node,
1960     type => 'dateortime:syntax error');
1961     }
1962    
1963     if (1 <= $2 and $2 <= 12) {
1964     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1965     if $3 < 1 or
1966     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
1967     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1968     if $2 == 2 and $3 == 29 and
1969     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
1970     } else {
1971     $self->{onerror}->(node => $input_node,
1972     type => 'datetime:bad month');
1973     }
1974    
1975     ($hour, $minute, $second) = ($4, $5, $6);
1976    
1977     if (defined $7) { ## [+-]hh:mm
1978     if (length $7 != 2 or length $8 != 2) {
1979     $self->{onerror}->(node => $input_node,
1980     type => 'dateortime:syntax error');
1981     }
1982    
1983     $self->{onerror}->(node => $input_node,
1984     type => 'datetime:bad timezone hour')
1985     if $7 > 23;
1986     $self->{onerror}->(node => $input_node,
1987     type => 'datetime:bad timezone minute')
1988     if $8 > 59;
1989     }
1990     } else { ## hh:mm
1991     if (length $1 != 2 or length $9 != 2) {
1992     $self->{onerror}->(node => $input_node,
1993     type => qq'dateortime:syntax error');
1994     }
1995    
1996     ($hour, $minute, $second) = ($1, $9, $10);
1997     }
1998    
1999     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
2000     if $hour > 23;
2001     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
2002     if $minute > 59;
2003    
2004     if (defined $second) { ## s
2005     ## NOTE: Integer part of second don't have to have length of two.
2006    
2007     if (substr ($second, 0, 1) eq '.') {
2008     $self->{onerror}->(node => $input_node,
2009     type => 'dateortime:syntax error');
2010     }
2011    
2012     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
2013     if $second >= 60;
2014     }
2015     } else {
2016     $self->{onerror}->(node => $input_node,
2017     type => 'dateortime:syntax error');
2018     }
2019    
2020     return $HTMLStrictlyInlineChecker->($self, $todo);
2021     },
2022     };
2023    
2024     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
2025     attrs_checker => $GetHTMLAttrsChecker->({
2026     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2027     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2028     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2029     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2030     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2031     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2032     }),
2033     checker => $HTMLStrictlyInlineChecker,
2034     };
2035    
2036     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
2037     attrs_checker => $GetHTMLAttrsChecker->({
2038     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
2039     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
2040     }),
2041     checker => $HTMLStrictlyInlineChecker,
2042     };
2043    
2044     $Element->{$HTML_NS}->{code} = {
2045     attrs_checker => $GetHTMLAttrsChecker->({}),
2046     ## NOTE: Though |title| has special semantics,
2047     ## syntatically same as the |title| as global attribute.
2048     checker => $HTMLInlineOrStrictlyInlineChecker,
2049     };
2050    
2051     $Element->{$HTML_NS}->{var} = {
2052     attrs_checker => $GetHTMLAttrsChecker->({}),
2053     ## NOTE: Though |title| has special semantics,
2054     ## syntatically same as the |title| as global attribute.
2055     checker => $HTMLStrictlyInlineChecker,
2056     };
2057    
2058     $Element->{$HTML_NS}->{samp} = {
2059     attrs_checker => $GetHTMLAttrsChecker->({}),
2060     ## NOTE: Though |title| has special semantics,
2061     ## syntatically same as the |title| as global attribute.
2062     checker => $HTMLInlineOrStrictlyInlineChecker,
2063     };
2064    
2065     $Element->{$HTML_NS}->{kbd} = {
2066     attrs_checker => $GetHTMLAttrsChecker->({}),
2067     checker => $HTMLStrictlyInlineChecker,
2068     };
2069    
2070     $Element->{$HTML_NS}->{sub} = {
2071     attrs_checker => $GetHTMLAttrsChecker->({}),
2072     checker => $HTMLStrictlyInlineChecker,
2073     };
2074    
2075     $Element->{$HTML_NS}->{sup} = {
2076     attrs_checker => $GetHTMLAttrsChecker->({}),
2077     checker => $HTMLStrictlyInlineChecker,
2078     };
2079    
2080     $Element->{$HTML_NS}->{span} = {
2081     attrs_checker => $GetHTMLAttrsChecker->({}),
2082     ## NOTE: Though |title| has special semantics,
2083     ## syntatically same as the |title| as global attribute.
2084     checker => $HTMLInlineOrStrictlyInlineChecker,
2085     };
2086    
2087     $Element->{$HTML_NS}->{i} = {
2088     attrs_checker => $GetHTMLAttrsChecker->({}),
2089     ## NOTE: Though |title| has special semantics,
2090     ## syntatically same as the |title| as global attribute.
2091     checker => $HTMLStrictlyInlineChecker,
2092     };
2093    
2094     $Element->{$HTML_NS}->{b} = {
2095     attrs_checker => $GetHTMLAttrsChecker->({}),
2096     checker => $HTMLStrictlyInlineChecker,
2097     };
2098    
2099     $Element->{$HTML_NS}->{bdo} = {
2100     attrs_checker => sub {
2101     my ($self, $todo) = @_;
2102     $GetHTMLAttrsChecker->({})->($self, $todo);
2103     unless ($todo->{node}->has_attribute_ns (undef, 'dir')) {
2104     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:dir');
2105     }
2106     },
2107     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
2108     checker => $HTMLStrictlyInlineChecker,
2109     };
2110    
2111     $Element->{$HTML_NS}->{ins} = {
2112     attrs_checker => $GetHTMLAttrsChecker->({
2113     cite => $HTMLURIAttrChecker,
2114     datetime => $HTMLDatetimeAttrChecker,
2115     }),
2116     checker => $HTMLTransparentChecker,
2117     };
2118    
2119     $Element->{$HTML_NS}->{del} = {
2120     attrs_checker => $GetHTMLAttrsChecker->({
2121     cite => $HTMLURIAttrChecker,
2122     datetime => $HTMLDatetimeAttrChecker,
2123     }),
2124     checker => sub {
2125     my ($self, $todo) = @_;
2126    
2127     my $parent = $todo->{node}->manakai_parent_element;
2128     if (defined $parent) {
2129     my $nsuri = $parent->namespace_uri;
2130     $nsuri = '' unless defined $nsuri;
2131     my $ln = $parent->manakai_local_name;
2132     my $eldef = $Element->{$nsuri}->{$ln} ||
2133     $Element->{$nsuri}->{''} ||
2134     $ElementDefault;
2135     return $eldef->{checker}->($self, $todo);
2136     } else {
2137     return $HTMLBlockOrInlineChecker->($self, $todo);
2138     }
2139     },
2140     };
2141    
2142     ## TODO: figure
2143 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
2144 wakaba 1.1
2145 wakaba 1.4 ## TODO: |alt|
2146 wakaba 1.1 $Element->{$HTML_NS}->{img} = {
2147     attrs_checker => sub {
2148     my ($self, $todo) = @_;
2149     $GetHTMLAttrsChecker->({
2150     alt => sub { }, ## NOTE: No syntactical requirement
2151     src => $HTMLURIAttrChecker,
2152     usemap => $HTMLUsemapAttrChecker,
2153     ismap => sub {
2154     my ($self, $attr, $parent_todo) = @_;
2155     if (not $todo->{flag}->{has_a}) {
2156     $self->{onerror}->(node => $attr, type => 'attribute not allowed');
2157     }
2158     $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_todo);
2159     },
2160     ## TODO: height
2161     ## TODO: width
2162     })->($self, $todo);
2163     unless ($todo->{node}->has_attribute_ns (undef, 'alt')) {
2164     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:alt');
2165     }
2166     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2167     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:src');
2168     }
2169     },
2170     checker => $HTMLEmptyChecker,
2171     };
2172    
2173     $Element->{$HTML_NS}->{iframe} = {
2174     attrs_checker => $GetHTMLAttrsChecker->({
2175     src => $HTMLURIAttrChecker,
2176     }),
2177     checker => $HTMLTextChecker,
2178     };
2179    
2180     $Element->{$HTML_NS}->{embed} = {
2181     attrs_checker => sub {
2182     my ($self, $todo) = @_;
2183     my $has_src;
2184     for my $attr (@{$todo->{node}->attributes}) {
2185     my $attr_ns = $attr->namespace_uri;
2186     $attr_ns = '' unless defined $attr_ns;
2187     my $attr_ln = $attr->manakai_local_name;
2188     my $checker;
2189     if ($attr_ns eq '') {
2190     if ($attr_ln eq 'src') {
2191     $checker = $HTMLURIAttrChecker;
2192     $has_src = 1;
2193     } elsif ($attr_ln eq 'type') {
2194     $checker = $HTMLIMTAttrChecker;
2195     } else {
2196     ## TODO: height
2197     ## TODO: width
2198     $checker = $HTMLAttrChecker->{$attr_ln}
2199     || sub { }; ## NOTE: Any local attribute is ok.
2200     }
2201     }
2202     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2203     || $AttrChecker->{$attr_ns}->{''};
2204     if ($checker) {
2205     $checker->($self, $attr);
2206     } else {
2207     $self->{onerror}->(node => $attr, level => 'unsupported',
2208     type => 'attribute');
2209     ## ISSUE: No comformance createria for global attributes in the spec
2210     }
2211     }
2212    
2213     unless ($has_src) {
2214     $self->{onerror}->(node => $todo->{node},
2215     type => 'attribute missing:src');
2216     }
2217     },
2218     checker => $HTMLEmptyChecker,
2219     };
2220    
2221     $Element->{$HTML_NS}->{object} = {
2222     attrs_checker => sub {
2223     my ($self, $todo) = @_;
2224     $GetHTMLAttrsChecker->({
2225     data => $HTMLURIAttrChecker,
2226     type => $HTMLIMTAttrChecker,
2227     usemap => $HTMLUsemapAttrChecker,
2228     ## TODO: width
2229     ## TODO: height
2230     })->($self, $todo);
2231     unless ($todo->{node}->has_attribute_ns (undef, 'data')) {
2232     unless ($todo->{node}->has_attribute_ns (undef, 'type')) {
2233     $self->{onerror}->(node => $todo->{node},
2234     type => 'attribute missing:data|type');
2235     }
2236     }
2237     },
2238     checker => $ElementDefault->{checker}, ## TODO
2239 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
2240 wakaba 1.1 };
2241    
2242     $Element->{$HTML_NS}->{param} = {
2243     attrs_checker => sub {
2244     my ($self, $todo) = @_;
2245     $GetHTMLAttrsChecker->({
2246     name => sub { },
2247     value => sub { },
2248     })->($self, $todo);
2249     unless ($todo->{node}->has_attribute_ns (undef, 'name')) {
2250     $self->{onerror}->(node => $todo->{node},
2251     type => 'attribute missing:name');
2252     }
2253     unless ($todo->{node}->has_attribute_ns (undef, 'value')) {
2254     $self->{onerror}->(node => $todo->{node},
2255     type => 'attribute missing:value');
2256     }
2257     },
2258     checker => $HTMLEmptyChecker,
2259     };
2260    
2261     $Element->{$HTML_NS}->{video} = {
2262     attrs_checker => $GetHTMLAttrsChecker->({
2263     src => $HTMLURIAttrChecker,
2264     ## TODO: start, loopstart, loopend, end
2265     ## ISSUE: they MUST be "value time offset"s. Value?
2266     ## ISSUE: loopcount has no conformance creteria
2267     autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2268     controls => $GetHTMLBooleanAttrChecker->('controls'),
2269     }),
2270     checker => sub {
2271     my ($self, $todo) = @_;
2272    
2273     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2274     return $HTMLBlockOrInlineChecker->($self, $todo);
2275     } else {
2276     return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
2277     ->($self, $todo);
2278     }
2279     },
2280     };
2281    
2282     $Element->{$HTML_NS}->{audio} = {
2283     attrs_checker => $Element->{$HTML_NS}->{video}->{attrs_checker},
2284     checker => $Element->{$HTML_NS}->{video}->{checker},
2285     };
2286    
2287     $Element->{$HTML_NS}->{source} = {
2288     attrs_checker => sub {
2289     my ($self, $todo) = @_;
2290     $GetHTMLAttrsChecker->({
2291     src => $HTMLURIAttrChecker,
2292     type => $HTMLIMTAttrChecker,
2293     media => $HTMLMQAttrChecker,
2294     })->($self, $todo);
2295     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2296     $self->{onerror}->(node => $todo->{node},
2297     type => 'attribute missing:src');
2298     }
2299     },
2300     checker => $HTMLEmptyChecker,
2301     };
2302    
2303     $Element->{$HTML_NS}->{canvas} = {
2304     attrs_checker => $GetHTMLAttrsChecker->({
2305     height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2306     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2307     }),
2308     checker => $HTMLInlineChecker,
2309     };
2310    
2311     $Element->{$HTML_NS}->{map} = {
2312 wakaba 1.4 attrs_checker => sub {
2313     my ($self, $todo) = @_;
2314     my $has_id;
2315     $GetHTMLAttrsChecker->({
2316     id => sub {
2317     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2318     my ($self, $attr) = @_;
2319     my $value = $attr->value;
2320     if (length $value > 0) {
2321     if ($self->{id}->{$value}) {
2322     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2323     push @{$self->{id}->{$value}}, $attr;
2324     } else {
2325     $self->{id}->{$value} = [$attr];
2326     }
2327 wakaba 1.1 } else {
2328 wakaba 1.4 ## NOTE: MUST contain at least one character
2329     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2330 wakaba 1.1 }
2331 wakaba 1.4 if ($value =~ /[\x09-\x0D\x20]/) {
2332     $self->{onerror}->(node => $attr, type => 'space in ID');
2333     }
2334     $self->{map}->{$value} ||= $attr;
2335     $has_id = 1;
2336     },
2337     })->($self, $todo);
2338     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:id')
2339     unless $has_id;
2340     },
2341 wakaba 1.1 checker => $HTMLBlockChecker,
2342     };
2343    
2344     $Element->{$HTML_NS}->{area} = {
2345     attrs_checker => sub {
2346     my ($self, $todo) = @_;
2347     my %attr;
2348     my $coords;
2349     for my $attr (@{$todo->{node}->attributes}) {
2350     my $attr_ns = $attr->namespace_uri;
2351     $attr_ns = '' unless defined $attr_ns;
2352     my $attr_ln = $attr->manakai_local_name;
2353     my $checker;
2354     if ($attr_ns eq '') {
2355     $checker = {
2356     alt => sub { },
2357     ## NOTE: |alt| value has no conformance creteria.
2358     shape => $GetHTMLEnumeratedAttrChecker->({
2359     circ => -1, circle => 1,
2360     default => 1,
2361     poly => 1, polygon => -1,
2362     rect => 1, rectangle => -1,
2363     }),
2364     coords => sub {
2365     my ($self, $attr) = @_;
2366     my $value = $attr->value;
2367     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
2368     $coords = [split /,/, $value];
2369     } else {
2370     $self->{onerror}->(node => $attr,
2371     type => 'coords:syntax error');
2372     }
2373     },
2374     target => $HTMLTargetAttrChecker,
2375     href => $HTMLURIAttrChecker,
2376     ping => $HTMLSpaceURIsAttrChecker,
2377 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
2378 wakaba 1.1 media => $HTMLMQAttrChecker,
2379     hreflang => $HTMLLanguageTagAttrChecker,
2380     type => $HTMLIMTAttrChecker,
2381     }->{$attr_ln};
2382     if ($checker) {
2383     $attr{$attr_ln} = $attr;
2384     } else {
2385     $checker = $HTMLAttrChecker->{$attr_ln};
2386     }
2387     }
2388     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2389     || $AttrChecker->{$attr_ns}->{''};
2390     if ($checker) {
2391     $checker->($self, $attr) if ref $checker;
2392     } else {
2393     $self->{onerror}->(node => $attr, level => 'unsupported',
2394     type => 'attribute');
2395     ## ISSUE: No comformance createria for unknown attributes in the spec
2396     }
2397     }
2398    
2399     if (defined $attr{href}) {
2400 wakaba 1.4 $self->{has_hyperlink_element} = 1;
2401 wakaba 1.1 unless (defined $attr{alt}) {
2402     $self->{onerror}->(node => $todo->{node},
2403     type => 'attribute missing:alt');
2404     }
2405     } else {
2406     for (qw/target ping rel media hreflang type alt/) {
2407     if (defined $attr{$_}) {
2408     $self->{onerror}->(node => $attr{$_},
2409     type => 'attribute not allowed');
2410     }
2411     }
2412     }
2413    
2414     my $shape = 'rectangle';
2415     if (defined $attr{shape}) {
2416     $shape = {
2417     circ => 'circle', circle => 'circle',
2418     default => 'default',
2419     poly => 'polygon', polygon => 'polygon',
2420     rect => 'rectangle', rectangle => 'rectangle',
2421     }->{lc $attr{shape}->value} || 'rectangle';
2422     ## TODO: ASCII lowercase?
2423     }
2424    
2425     if ($shape eq 'circle') {
2426     if (defined $attr{coords}) {
2427     if (defined $coords) {
2428     if (@$coords == 3) {
2429     if ($coords->[2] < 0) {
2430     $self->{onerror}->(node => $attr{coords},
2431     type => 'coords:out of range:2');
2432     }
2433     } else {
2434     $self->{onerror}->(node => $attr{coords},
2435     type => 'coords:number:3:'.@$coords);
2436     }
2437     } else {
2438     ## NOTE: A syntax error has been reported.
2439     }
2440     } else {
2441     $self->{onerror}->(node => $todo->{node},
2442     type => 'attribute missing:coords');
2443     }
2444     } elsif ($shape eq 'default') {
2445     if (defined $attr{coords}) {
2446     $self->{onerror}->(node => $attr{coords},
2447     type => 'attribute not allowed');
2448     }
2449     } elsif ($shape eq 'polygon') {
2450     if (defined $attr{coords}) {
2451     if (defined $coords) {
2452     if (@$coords >= 6) {
2453     unless (@$coords % 2 == 0) {
2454     $self->{onerror}->(node => $attr{coords},
2455     type => 'coords:number:even:'.@$coords);
2456     }
2457     } else {
2458     $self->{onerror}->(node => $attr{coords},
2459     type => 'coords:number:>=6:'.@$coords);
2460     }
2461     } else {
2462     ## NOTE: A syntax error has been reported.
2463     }
2464     } else {
2465     $self->{onerror}->(node => $todo->{node},
2466     type => 'attribute missing:coords');
2467     }
2468     } elsif ($shape eq 'rectangle') {
2469     if (defined $attr{coords}) {
2470     if (defined $coords) {
2471     if (@$coords == 4) {
2472     unless ($coords->[0] < $coords->[2]) {
2473     $self->{onerror}->(node => $attr{coords},
2474     type => 'coords:out of range:0');
2475     }
2476     unless ($coords->[1] < $coords->[3]) {
2477     $self->{onerror}->(node => $attr{coords},
2478     type => 'coords:out of range:1');
2479     }
2480     } else {
2481     $self->{onerror}->(node => $attr{coords},
2482     type => 'coords:number:4:'.@$coords);
2483     }
2484     } else {
2485     ## NOTE: A syntax error has been reported.
2486     }
2487     } else {
2488     $self->{onerror}->(node => $todo->{node},
2489     type => 'attribute missing:coords');
2490     }
2491     }
2492     },
2493     checker => $HTMLEmptyChecker,
2494     };
2495     ## TODO: only in map
2496    
2497     $Element->{$HTML_NS}->{table} = {
2498     attrs_checker => $GetHTMLAttrsChecker->({}),
2499     checker => sub {
2500     my ($self, $todo) = @_;
2501     my $el = $todo->{node};
2502     my $new_todos = [];
2503     my @nodes = (@{$el->child_nodes});
2504    
2505     my $phase = 'before caption';
2506     my $has_tfoot;
2507     while (@nodes) {
2508     my $node = shift @nodes;
2509     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2510    
2511     my $nt = $node->node_type;
2512     if ($nt == 1) {
2513     my $node_ns = $node->namespace_uri;
2514     $node_ns = '' unless defined $node_ns;
2515     my $node_ln = $node->manakai_local_name;
2516     ## NOTE: |minuses| list is not checked since redundant
2517 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2518     #
2519     } elsif ($phase eq 'in tbodys') {
2520 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2521     #$phase = 'in tbodys';
2522     } elsif (not $has_tfoot and
2523     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2524     $phase = 'after tfoot';
2525     $has_tfoot = 1;
2526     } else {
2527     $self->{onerror}->(node => $node, type => 'element not allowed');
2528     }
2529     } elsif ($phase eq 'in trs') {
2530     if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2531     #$phase = 'in trs';
2532     } elsif (not $has_tfoot and
2533     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2534     $phase = 'after tfoot';
2535     $has_tfoot = 1;
2536     } else {
2537     $self->{onerror}->(node => $node, type => 'element not allowed');
2538     }
2539     } elsif ($phase eq 'after thead') {
2540     if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2541     $phase = 'in tbodys';
2542     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2543     $phase = 'in trs';
2544     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2545     $phase = 'in tbodys';
2546     $has_tfoot = 1;
2547     } else {
2548     $self->{onerror}->(node => $node, type => 'element not allowed');
2549     }
2550     } elsif ($phase eq 'in colgroup') {
2551     if ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2552     $phase = 'in colgroup';
2553     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2554     $phase = 'after thead';
2555     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2556     $phase = 'in tbodys';
2557     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2558     $phase = 'in trs';
2559     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2560     $phase = 'in tbodys';
2561     $has_tfoot = 1;
2562     } else {
2563     $self->{onerror}->(node => $node, type => 'element not allowed');
2564     }
2565     } elsif ($phase eq 'before caption') {
2566     if ($node_ns eq $HTML_NS and $node_ln eq 'caption') {
2567     $phase = 'in colgroup';
2568     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2569     $phase = 'in colgroup';
2570     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2571     $phase = 'after thead';
2572     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2573     $phase = 'in tbodys';
2574     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2575     $phase = 'in trs';
2576     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2577     $phase = 'in tbodys';
2578     $has_tfoot = 1;
2579     } else {
2580     $self->{onerror}->(node => $node, type => 'element not allowed');
2581     }
2582     } else { # after tfoot
2583     $self->{onerror}->(node => $node, type => 'element not allowed');
2584     }
2585     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2586     unshift @nodes, @$sib;
2587     push @$new_todos, @$ch;
2588     } elsif ($nt == 3 or $nt == 4) {
2589     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2590     $self->{onerror}->(node => $node, type => 'character not allowed');
2591     }
2592     } elsif ($nt == 5) {
2593     unshift @nodes, @{$node->child_nodes};
2594     }
2595     }
2596    
2597     ## Table model errors
2598     require Whatpm::HTMLTable;
2599     Whatpm::HTMLTable->form_table ($todo->{node}, sub {
2600     my %opt = @_;
2601     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
2602     });
2603     push @{$self->{return}->{table}}, $todo->{node};
2604    
2605     return ($new_todos);
2606     },
2607     };
2608    
2609     $Element->{$HTML_NS}->{caption} = {
2610     attrs_checker => $GetHTMLAttrsChecker->({}),
2611     checker => $HTMLSignificantStrictlyInlineChecker,
2612     };
2613    
2614     $Element->{$HTML_NS}->{colgroup} = {
2615     attrs_checker => $GetHTMLAttrsChecker->({
2616     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2617     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
2618     ## TODO: "attribute not supported" if |col|.
2619     ## ISSUE: MUST NOT if any |col|?
2620     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
2621     }),
2622     checker => sub {
2623     my ($self, $todo) = @_;
2624     my $el = $todo->{node};
2625     my $new_todos = [];
2626     my @nodes = (@{$el->child_nodes});
2627    
2628     while (@nodes) {
2629     my $node = shift @nodes;
2630     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2631    
2632     my $nt = $node->node_type;
2633     if ($nt == 1) {
2634     my $node_ns = $node->namespace_uri;
2635     $node_ns = '' unless defined $node_ns;
2636     my $node_ln = $node->manakai_local_name;
2637     ## NOTE: |minuses| list is not checked since redundant
2638 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2639     #
2640     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'col')) {
2641 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
2642     }
2643     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2644     unshift @nodes, @$sib;
2645     push @$new_todos, @$ch;
2646     } elsif ($nt == 3 or $nt == 4) {
2647     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2648     $self->{onerror}->(node => $node, type => 'character not allowed');
2649     }
2650     } elsif ($nt == 5) {
2651     unshift @nodes, @{$node->child_nodes};
2652     }
2653     }
2654     return ($new_todos);
2655     },
2656     };
2657    
2658     $Element->{$HTML_NS}->{col} = {
2659     attrs_checker => $GetHTMLAttrsChecker->({
2660     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2661     }),
2662     checker => $HTMLEmptyChecker,
2663     };
2664    
2665     $Element->{$HTML_NS}->{tbody} = {
2666     attrs_checker => $GetHTMLAttrsChecker->({}),
2667     checker => sub {
2668     my ($self, $todo) = @_;
2669     my $el = $todo->{node};
2670     my $new_todos = [];
2671     my @nodes = (@{$el->child_nodes});
2672    
2673     my $has_tr;
2674     while (@nodes) {
2675     my $node = shift @nodes;
2676     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2677    
2678     my $nt = $node->node_type;
2679     if ($nt == 1) {
2680     my $node_ns = $node->namespace_uri;
2681     $node_ns = '' unless defined $node_ns;
2682     my $node_ln = $node->manakai_local_name;
2683     ## NOTE: |minuses| list is not checked since redundant
2684 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2685     #
2686     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2687 wakaba 1.1 $has_tr = 1;
2688     } else {
2689     $self->{onerror}->(node => $node, type => 'element not allowed');
2690     }
2691     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2692     unshift @nodes, @$sib;
2693     push @$new_todos, @$ch;
2694     } elsif ($nt == 3 or $nt == 4) {
2695     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2696     $self->{onerror}->(node => $node, type => 'character not allowed');
2697     }
2698     } elsif ($nt == 5) {
2699     unshift @nodes, @{$node->child_nodes};
2700     }
2701     }
2702     unless ($has_tr) {
2703     $self->{onerror}->(node => $el, type => 'child element missing:tr');
2704     }
2705     return ($new_todos);
2706     },
2707     };
2708    
2709     $Element->{$HTML_NS}->{thead} = {
2710     attrs_checker => $GetHTMLAttrsChecker->({}),
2711     checker => $Element->{$HTML_NS}->{tbody}->{checker},
2712     };
2713    
2714     $Element->{$HTML_NS}->{tfoot} = {
2715     attrs_checker => $GetHTMLAttrsChecker->({}),
2716     checker => $Element->{$HTML_NS}->{tbody}->{checker},
2717     };
2718    
2719     $Element->{$HTML_NS}->{tr} = {
2720     attrs_checker => $GetHTMLAttrsChecker->({}),
2721     checker => sub {
2722     my ($self, $todo) = @_;
2723     my $el = $todo->{node};
2724     my $new_todos = [];
2725     my @nodes = (@{$el->child_nodes});
2726    
2727     my $has_td;
2728     while (@nodes) {
2729     my $node = shift @nodes;
2730     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2731    
2732     my $nt = $node->node_type;
2733     if ($nt == 1) {
2734     my $node_ns = $node->namespace_uri;
2735     $node_ns = '' unless defined $node_ns;
2736     my $node_ln = $node->manakai_local_name;
2737     ## NOTE: |minuses| list is not checked since redundant
2738 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2739     #
2740     } elsif ($node_ns eq $HTML_NS and
2741     ($node_ln eq 'td' or $node_ln eq 'th')) {
2742 wakaba 1.1 $has_td = 1;
2743     } else {
2744     $self->{onerror}->(node => $node, type => 'element not allowed');
2745     }
2746     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2747     unshift @nodes, @$sib;
2748     push @$new_todos, @$ch;
2749     } elsif ($nt == 3 or $nt == 4) {
2750     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2751     $self->{onerror}->(node => $node, type => 'character not allowed');
2752     }
2753     } elsif ($nt == 5) {
2754     unshift @nodes, @{$node->child_nodes};
2755     }
2756     }
2757     unless ($has_td) {
2758     $self->{onerror}->(node => $el, type => 'child element missing:td|th');
2759     }
2760     return ($new_todos);
2761     },
2762     };
2763    
2764     $Element->{$HTML_NS}->{td} = {
2765     attrs_checker => $GetHTMLAttrsChecker->({
2766     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2767     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2768     }),
2769     checker => $HTMLBlockOrInlineChecker,
2770     };
2771    
2772     $Element->{$HTML_NS}->{th} = {
2773     attrs_checker => $GetHTMLAttrsChecker->({
2774     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2775     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2776     scope => $GetHTMLEnumeratedAttrChecker
2777     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
2778     }),
2779     checker => $HTMLBlockOrInlineChecker,
2780     };
2781    
2782     ## TODO: forms
2783 wakaba 1.8 ## TODO: Tests for <nest/> in form elements
2784 wakaba 1.1
2785     $Element->{$HTML_NS}->{script} = {
2786     attrs_checker => sub {
2787     my ($self, $todo) = @_;
2788     $GetHTMLAttrsChecker->({
2789     src => $HTMLURIAttrChecker,
2790     defer => $GetHTMLBooleanAttrChecker->('defer'),
2791     async => $GetHTMLBooleanAttrChecker->('async'),
2792     type => $HTMLIMTAttrChecker,
2793     })->($self, $todo);
2794     if ($todo->{node}->has_attribute_ns (undef, 'defer')) {
2795     my $async_attr = $todo->{node}->get_attribute_node_ns (undef, 'async');
2796     if ($async_attr) {
2797     $self->{onerror}->(node => $async_attr,
2798     type => 'attribute not allowed'); # MUST NOT
2799     }
2800     }
2801     },
2802     checker => sub {
2803     my ($self, $todo) = @_;
2804    
2805     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2806     return $HTMLEmptyChecker->($self, $todo);
2807     } else {
2808     ## NOTE: No content model conformance in HTML5 spec.
2809     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
2810     my $language = $todo->{node}->get_attribute_ns (undef, 'language');
2811     if ((defined $type and $type eq '') or
2812     (defined $language and $language eq '')) {
2813     $type = 'text/javascript';
2814     } elsif (defined $type) {
2815     #
2816     } elsif (defined $language) {
2817     $type = 'text/' . $language;
2818     } else {
2819     $type = 'text/javascript';
2820     }
2821     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
2822     type => 'script:'.$type); ## TODO: $type normalization
2823     return $AnyChecker->($self, $todo);
2824     }
2825     },
2826     };
2827    
2828     ## NOTE: When script is disabled.
2829     $Element->{$HTML_NS}->{noscript} = {
2830 wakaba 1.3 attrs_checker => sub {
2831     my ($self, $todo) = @_;
2832    
2833     ## NOTE: This check is inserted in |attrs_checker|, rather than |checker|,
2834     ## since the later is not invoked when the |noscript| is used as a
2835     ## transparent element.
2836     unless ($todo->{node}->owner_document->manakai_is_html) {
2837     $self->{onerror}->(node => $todo->{node}, type => 'in XML:noscript');
2838     }
2839    
2840     $GetHTMLAttrsChecker->({})->($self, $todo);
2841     },
2842 wakaba 1.1 checker => sub {
2843     my ($self, $todo) = @_;
2844    
2845 wakaba 1.3 if ($todo->{flag}->{in_head}) {
2846     my $new_todos = [];
2847     my @nodes = (@{$todo->{node}->child_nodes});
2848    
2849     while (@nodes) {
2850     my $node = shift @nodes;
2851     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2852    
2853     my $nt = $node->node_type;
2854     if ($nt == 1) {
2855     my $node_ns = $node->namespace_uri;
2856     $node_ns = '' unless defined $node_ns;
2857     my $node_ln = $node->manakai_local_name;
2858 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2859     #
2860     } elsif ($node_ns eq $HTML_NS) {
2861 wakaba 1.3 if ({link => 1, style => 1}->{$node_ln}) {
2862     #
2863     } elsif ($node_ln eq 'meta') {
2864 wakaba 1.5 if ($node->has_attribute_ns (undef, 'name')) {
2865     #
2866 wakaba 1.3 } else {
2867 wakaba 1.5 $self->{onerror}->(node => $node,
2868     type => 'element not allowed');
2869 wakaba 1.3 }
2870     } else {
2871     $self->{onerror}->(node => $node, type => 'element not allowed');
2872     }
2873     } else {
2874     $self->{onerror}->(node => $node, type => 'element not allowed');
2875     }
2876    
2877     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2878     unshift @nodes, @$sib;
2879     push @$new_todos, @$ch;
2880     } elsif ($nt == 3 or $nt == 4) {
2881     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2882     $self->{onerror}->(node => $node, type => 'character not allowed');
2883     }
2884     } elsif ($nt == 5) {
2885     unshift @nodes, @{$node->child_nodes};
2886     }
2887     }
2888     return ($new_todos);
2889     } else {
2890     my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}});
2891     my ($sib, $ch) = $HTMLBlockOrInlineChecker->($self, $todo);
2892     push @$sib, $end;
2893     return ($sib, $ch);
2894     }
2895 wakaba 1.1 },
2896     };
2897 wakaba 1.3
2898     ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
2899 wakaba 1.1
2900     $Element->{$HTML_NS}->{'event-source'} = {
2901     attrs_checker => $GetHTMLAttrsChecker->({
2902     src => $HTMLURIAttrChecker,
2903     }),
2904     checker => $HTMLEmptyChecker,
2905     };
2906    
2907     $Element->{$HTML_NS}->{details} = {
2908     attrs_checker => $GetHTMLAttrsChecker->({
2909     open => $GetHTMLBooleanAttrChecker->('open'),
2910     }),
2911     checker => sub {
2912     my ($self, $todo) = @_;
2913    
2914     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2915     my ($sib, $ch)
2916     = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
2917     ->($self, $todo);
2918     push @$sib, $end;
2919     return ($sib, $ch);
2920     },
2921     };
2922    
2923     $Element->{$HTML_NS}->{datagrid} = {
2924     attrs_checker => $GetHTMLAttrsChecker->({
2925     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2926     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
2927     }),
2928     checker => sub {
2929     my ($self, $todo) = @_;
2930     my $el = $todo->{node};
2931     my $new_todos = [];
2932     my @nodes = (@{$el->child_nodes});
2933    
2934     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2935    
2936     ## Block-table Block* | table | select | datalist | Empty
2937     my $mode = 'any';
2938     while (@nodes) {
2939     my $node = shift @nodes;
2940     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2941    
2942     my $nt = $node->node_type;
2943     if ($nt == 1) {
2944     my $node_ns = $node->namespace_uri;
2945     $node_ns = '' unless defined $node_ns;
2946     my $node_ln = $node->manakai_local_name;
2947     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
2948 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2949     #
2950     } elsif ($mode eq 'block') {
2951 wakaba 1.1 $not_allowed = 1
2952     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
2953     } elsif ($mode eq 'any') {
2954     if ($node_ns eq $HTML_NS and
2955     {table => 1, select => 1, datalist => 1}->{$node_ln}) {
2956     $mode = 'none';
2957     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
2958     $mode = 'block';
2959     } else {
2960     $not_allowed = 1;
2961     }
2962     } else {
2963     $not_allowed = 1;
2964     }
2965     $self->{onerror}->(node => $node, type => 'element not allowed')
2966     if $not_allowed;
2967     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2968     unshift @nodes, @$sib;
2969     push @$new_todos, @$ch;
2970     } elsif ($nt == 3 or $nt == 4) {
2971     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2972     $self->{onerror}->(node => $node, type => 'character not allowed');
2973     }
2974     } elsif ($nt == 5) {
2975     unshift @nodes, @{$node->child_nodes};
2976     }
2977     }
2978    
2979     push @$new_todos, $end;
2980     return ($new_todos);
2981     },
2982     };
2983    
2984     $Element->{$HTML_NS}->{command} = {
2985     attrs_checker => $GetHTMLAttrsChecker->({
2986     checked => $GetHTMLBooleanAttrChecker->('checked'),
2987     default => $GetHTMLBooleanAttrChecker->('default'),
2988     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2989     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
2990     icon => $HTMLURIAttrChecker,
2991     label => sub { }, ## NOTE: No conformance creteria
2992     radiogroup => sub { }, ## NOTE: No conformance creteria
2993     ## NOTE: |title| has special semantics, but no syntactical difference
2994     type => sub {
2995     my ($self, $attr) = @_;
2996     my $value = $attr->value;
2997     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
2998     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
2999     }
3000     },
3001     }),
3002     checker => $HTMLEmptyChecker,
3003     };
3004    
3005     $Element->{$HTML_NS}->{menu} = {
3006     attrs_checker => $GetHTMLAttrsChecker->({
3007     autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
3008     id => sub {
3009     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
3010     my ($self, $attr) = @_;
3011     my $value = $attr->value;
3012     if (length $value > 0) {
3013     if ($self->{id}->{$value}) {
3014     $self->{onerror}->(node => $attr, type => 'duplicate ID');
3015     push @{$self->{id}->{$value}}, $attr;
3016     } else {
3017     $self->{id}->{$value} = [$attr];
3018     }
3019     } else {
3020     ## NOTE: MUST contain at least one character
3021     $self->{onerror}->(node => $attr, type => 'empty attribute value');
3022     }
3023     if ($value =~ /[\x09-\x0D\x20]/) {
3024     $self->{onerror}->(node => $attr, type => 'space in ID');
3025     }
3026     $self->{menu}->{$value} ||= $attr;
3027     ## ISSUE: <menu id=""><p contextmenu=""> match?
3028     },
3029     label => sub { }, ## NOTE: No conformance creteria
3030     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
3031     }),
3032     checker => sub {
3033     my ($self, $todo) = @_;
3034     my $el = $todo->{node};
3035     my $new_todos = [];
3036     my @nodes = (@{$el->child_nodes});
3037    
3038     my $content = 'li or inline';
3039     while (@nodes) {
3040     my $node = shift @nodes;
3041     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3042    
3043     my $nt = $node->node_type;
3044     if ($nt == 1) {
3045     my $node_ns = $node->namespace_uri;
3046     $node_ns = '' unless defined $node_ns;
3047     my $node_ln = $node->manakai_local_name;
3048     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
3049 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3050     #
3051     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'li') {
3052 wakaba 1.1 if ($content eq 'inline') {
3053     $not_allowed = 1;
3054     } elsif ($content eq 'li or inline') {
3055     $content = 'li';
3056     }
3057     } else {
3058     if ($HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
3059     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln}) {
3060     $content = 'inline';
3061     } else {
3062     $not_allowed = 1;
3063     }
3064     }
3065     $self->{onerror}->(node => $node, type => 'element not allowed')
3066     if $not_allowed;
3067     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3068     unshift @nodes, @$sib;
3069     push @$new_todos, @$ch;
3070     } elsif ($nt == 3 or $nt == 4) {
3071     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3072     if ($content eq 'li') {
3073     $self->{onerror}->(node => $node, type => 'character not allowed');
3074     } elsif ($content eq 'li or inline') {
3075     $content = 'inline';
3076     }
3077     }
3078     } elsif ($nt == 5) {
3079     unshift @nodes, @{$node->child_nodes};
3080     }
3081     }
3082    
3083     for (@$new_todos) {
3084     $_->{inline} = 1;
3085     }
3086     return ($new_todos);
3087     },
3088 wakaba 1.8 };
3089    
3090     $Element->{$HTML_NS}->{datatemplate} = {
3091     attrs_checker => $GetHTMLAttrsChecker->({}),
3092     checker => sub {
3093     my ($self, $todo) = @_;
3094     my $el = $todo->{node};
3095     my $new_todos = [];
3096     my @nodes = (@{$el->child_nodes});
3097    
3098     while (@nodes) {
3099     my $node = shift @nodes;
3100     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3101    
3102     my $nt = $node->node_type;
3103     if ($nt == 1) {
3104     my $node_ns = $node->namespace_uri;
3105     $node_ns = '' unless defined $node_ns;
3106     my $node_ln = $node->manakai_local_name;
3107     ## NOTE: |minuses| list is not checked since redundant
3108     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3109     #
3110     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'rule')) {
3111     $self->{onerror}->(node => $node,
3112     type => 'element not allowed:datatemplate');
3113     }
3114     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3115     unshift @nodes, @$sib;
3116     push @$new_todos, @$ch;
3117     } elsif ($nt == 3 or $nt == 4) {
3118     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3119     $self->{onerror}->(node => $node, type => 'character not allowed');
3120     }
3121     } elsif ($nt == 5) {
3122     unshift @nodes, @{$node->child_nodes};
3123     }
3124     }
3125     return ($new_todos);
3126     },
3127     is_xml_root => 1,
3128     };
3129    
3130     $Element->{$HTML_NS}->{rule} = {
3131     attrs_checker => $GetHTMLAttrsChecker->({
3132     ## TODO: |condition| attribute
3133     mode => $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker,
3134     }),
3135     checker => sub {
3136     my ($self, $todo) = @_;
3137    
3138     my $end = $self->_add_pluses ({$HTML_NS => {nest => 1}});
3139     my ($sib, $ch) = $AnyChecker->($self, $todo);
3140     push @$sib, $end;
3141     return ($sib, $ch);
3142     },
3143     ## NOTE: "MAY be anything that, when the parent |datatemplate|
3144     ## is applied to some conforming data, results in a conforming DOM tree.":
3145     ## We don't check against this.
3146     };
3147    
3148     $Element->{$HTML_NS}->{nest} = {
3149     attrs_checker => $GetHTMLAttrsChecker->({
3150     ## TODO: |filter| attribute
3151     mode => $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker,
3152     }),
3153     checker => $HTMLEmptyChecker,
3154 wakaba 1.1 };
3155    
3156     $Element->{$HTML_NS}->{legend} = {
3157     attrs_checker => $GetHTMLAttrsChecker->({}),
3158     checker => sub {
3159     my ($self, $todo) = @_;
3160    
3161     my $parent = $todo->{node}->manakai_parent_element;
3162     if (defined $parent) {
3163     my $nsuri = $parent->namespace_uri;
3164     $nsuri = '' unless defined $nsuri;
3165     my $ln = $parent->manakai_local_name;
3166     if ($nsuri eq $HTML_NS and $ln eq 'figure') {
3167     return $HTMLInlineChecker->($self, $todo);
3168     } else {
3169     return $HTMLSignificantStrictlyInlineChecker->($self, $todo);
3170     }
3171     } else {
3172     return $HTMLInlineChecker->($self, $todo);
3173     }
3174    
3175     ## ISSUE: Content model is defined only for fieldset/legend,
3176     ## details/legend, and figure/legend.
3177     },
3178     };
3179    
3180     $Element->{$HTML_NS}->{div} = {
3181     attrs_checker => $GetHTMLAttrsChecker->({}),
3182     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
3183     };
3184    
3185     $Element->{$HTML_NS}->{font} = {
3186     attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
3187     checker => $HTMLTransparentChecker,
3188     };
3189    
3190     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
3191    
3192     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24