/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.28 - (hide annotations) (download)
Sun Feb 10 04:09:57 2008 UTC (16 years, 8 months ago) by wakaba
Branch: MAIN
Changes since 1.27: +2 -11 lines
++ whatpm/Whatpm/ChangeLog	10 Feb 2008 04:09:00 -0000
2008-02-10  Wakaba  <wakaba@suika.fam.cx>

	* ContentChecker.pm (check_document, check_element): Support
	for second argument ($onsubdoc).
	(_get_css_parser): Removed (now it is part of WDCC).

++ whatpm/Whatpm/ContentChecker/ChangeLog	10 Feb 2008 04:09:52 -0000
2008-02-10  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm (<style>): CSS validation code removed; instead,
	it does invoke subdoc callback to ask to the callee to validate
	the style sheet separately.

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7     my $HTMLMetadataElements = {
8     $HTML_NS => {
9     qw/link 1 meta 1 style 1 script 1 event-source 1 command 1 base 1 title 1
10 wakaba 1.8 noscript 1 datatemplate 1
11 wakaba 1.1 /,
12     },
13     };
14    
15     my $HTMLSectioningElements = {
16     $HTML_NS => {qw/body 1 section 1 nav 1 article 1 blockquote 1 aside 1/},
17     };
18    
19     my $HTMLBlockLevelElements = {
20     $HTML_NS => {
21     qw/
22     section 1 nav 1 article 1 blockquote 1 aside 1
23     h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1
24     address 1 p 1 hr 1 dialog 1 pre 1 ol 1 ul 1 dl 1
25     ins 1 del 1 figure 1 map 1 table 1 script 1 noscript 1
26     event-source 1 details 1 datagrid 1 menu 1 div 1 font 1
27 wakaba 1.8 datatemplate 1
28 wakaba 1.1 /,
29     },
30     };
31    
32     my $HTMLStrictlyInlineLevelElements = {
33     $HTML_NS => {
34     qw/
35     br 1 a 1 q 1 cite 1 em 1 strong 1 small 1 m 1 dfn 1 abbr 1
36     time 1 meter 1 progress 1 code 1 var 1 samp 1 kbd 1
37     sub 1 sup 1 span 1 i 1 b 1 bdo 1 ins 1 del 1 img 1
38     iframe 1 embed 1 object 1 video 1 audio 1 canvas 1 area 1
39     script 1 noscript 1 event-source 1 command 1 font 1
40     /,
41     },
42     };
43    
44     my $HTMLStructuredInlineLevelElements = {
45     $HTML_NS => {qw/blockquote 1 pre 1 ol 1 ul 1 dl 1 table 1 menu 1/},
46     };
47    
48     my $HTMLInteractiveElements = {
49     $HTML_NS => {a => 1, details => 1, datagrid => 1},
50     };
51     ## NOTE: |html:a| and |html:datagrid| are not allowed as a descendant
52     ## of interactive elements
53    
54     # my $HTMLTransparentElements : in |Whatpm/ContentChecker.pm|.
55    
56     #my $HTMLSemiTransparentElements = {
57     # $HTML_NS => {qw/video 1 audio 1/},
58     #};
59    
60     my $HTMLEmbededElements = {
61     $HTML_NS => {qw/img 1 iframe 1 embed 1 object 1 video 1 audio 1 canvas 1/},
62     };
63 wakaba 1.25 ## NOTE: When an element is added to this list, make sure that
64     ## the element's checker set |has_descendant| flag for |significant| content
65     ## as true.
66    
67 wakaba 1.26 my $HTMLSignificantContentErrors = {
68     significant => sub {
69     my ($self, $todo) = @_;
70     $self->{onerror}->(node => $todo->{node},
71     level => $self->{should_level},
72     type => 'no significant content');
73     },
74     }; # $HTMLSignificantContentErrors
75    
76 wakaba 1.25 our $AnyChecker;
77     my $HTMLAnyChecker = sub {
78     my ($self, $todo) = @_;
79    
80     my $old_values = {significant =>
81     $todo->{flag}->{has_descendant}->{significant}};
82     $todo->{flag}->{has_descendant}->{significant} = 0;
83    
84     my ($new_todos) = $AnyChecker->($self, $todo);
85    
86     push @$new_todos, {
87     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
88     old_values => $old_values,
89 wakaba 1.26 errors => $HTMLSignificantContentErrors,
90 wakaba 1.25 };
91    
92     return ($new_todos);
93     }; # $HTMLAnyChecker
94 wakaba 1.1
95     ## Empty
96     my $HTMLEmptyChecker = sub {
97     my ($self, $todo) = @_;
98     my $el = $todo->{node};
99     my $new_todos = [];
100     my @nodes = (@{$el->child_nodes});
101    
102     while (@nodes) {
103     my $node = shift @nodes;
104     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
105    
106     my $nt = $node->node_type;
107     if ($nt == 1) {
108 wakaba 1.8 my $node_ns = $node->namespace_uri;
109     $node_ns = '' unless defined $node_ns;
110     my $node_ln = $node->manakai_local_name;
111     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
112     #
113     } else {
114     ## NOTE: |minuses| list is not checked since redundant
115     $self->{onerror}->(node => $node, type => 'element not allowed');
116     }
117 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
118     unshift @nodes, @$sib;
119     push @$new_todos, @$ch;
120     } elsif ($nt == 3 or $nt == 4) {
121     if ($node->data =~ /[^\x09-\x0D\x20]/) {
122     $self->{onerror}->(node => $node, type => 'character not allowed');
123 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
124 wakaba 1.1 }
125     } elsif ($nt == 5) {
126     unshift @nodes, @{$node->child_nodes};
127     }
128     }
129     return ($new_todos);
130     };
131    
132     ## Text
133     my $HTMLTextChecker = sub {
134     my ($self, $todo) = @_;
135     my $el = $todo->{node};
136     my $new_todos = [];
137     my @nodes = (@{$el->child_nodes});
138    
139 wakaba 1.25 my $old_values = {significant =>
140     $todo->{flag}->{has_descendant}->{significant}};
141     $todo->{flag}->{has_descendant}->{significant} = 0;
142    
143 wakaba 1.1 while (@nodes) {
144     my $node = shift @nodes;
145     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
146    
147     my $nt = $node->node_type;
148     if ($nt == 1) {
149 wakaba 1.8 my $node_ns = $node->namespace_uri;
150     $node_ns = '' unless defined $node_ns;
151     my $node_ln = $node->manakai_local_name;
152     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
153     #
154     } else {
155     ## NOTE: |minuses| list is not checked since redundant
156     $self->{onerror}->(node => $node, type => 'element not allowed');
157     }
158 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
159     unshift @nodes, @$sib;
160     push @$new_todos, @$ch;
161 wakaba 1.25 } elsif ($nt == 3 or $nt == 4) {
162     if ($node->data =~ /[^\x09-\x0D\x20]/) {
163     $todo->{flag}->{has_descendant}->{significant} = 1;
164     }
165 wakaba 1.1 } elsif ($nt == 5) {
166     unshift @nodes, @{$node->child_nodes};
167     }
168     }
169 wakaba 1.25
170     push @$new_todos, {
171     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
172     old_values => $old_values,
173 wakaba 1.26 errors => $HTMLSignificantContentErrors,
174 wakaba 1.25 };
175    
176 wakaba 1.1 return ($new_todos);
177     };
178    
179     ## Zero or more |html:style| elements,
180     ## followed by zero or more block-level elements
181     my $HTMLStylableBlockChecker = sub {
182     my ($self, $todo) = @_;
183     my $el = $todo->{node};
184     my $new_todos = [];
185     my @nodes = (@{$el->child_nodes});
186 wakaba 1.25
187     my $old_values = {significant =>
188     $todo->{flag}->{has_descendant}->{significant}};
189     $todo->{flag}->{has_descendant}->{significant} = 0;
190 wakaba 1.1
191     my $has_non_style;
192     while (@nodes) {
193     my $node = shift @nodes;
194     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
195    
196     my $nt = $node->node_type;
197     if ($nt == 1) {
198     my $node_ns = $node->namespace_uri;
199     $node_ns = '' unless defined $node_ns;
200     my $node_ln = $node->manakai_local_name;
201     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
202     if ($node_ns eq $HTML_NS and $node_ln eq 'style') {
203     $not_allowed = 1 if $has_non_style or
204     not $node->has_attribute_ns (undef, 'scoped');
205     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
206     $has_non_style = 1;
207 wakaba 1.8 } elsif ($self->{pluses}->{$node_ns}->{$node_ln}) {
208     #
209 wakaba 1.1 } else {
210     $has_non_style = 1;
211     $not_allowed = 1;
212     }
213     $self->{onerror}->(node => $node, type => 'element not allowed')
214     if $not_allowed;
215     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
216     unshift @nodes, @$sib;
217     push @$new_todos, @$ch;
218     } elsif ($nt == 3 or $nt == 4) {
219     if ($node->data =~ /[^\x09-\x0D\x20]/) {
220     $self->{onerror}->(node => $node, type => 'character not allowed');
221 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
222 wakaba 1.1 }
223     } elsif ($nt == 5) {
224     unshift @nodes, @{$node->child_nodes};
225     }
226     }
227 wakaba 1.25
228     push @$new_todos, {
229     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
230     old_values => $old_values,
231 wakaba 1.26 errors => $HTMLSignificantContentErrors,
232 wakaba 1.25 };
233    
234 wakaba 1.1 return ($new_todos);
235     }; # $HTMLStylableBlockChecker
236    
237     ## Zero or more block-level elements
238     my $HTMLBlockChecker = sub {
239     my ($self, $todo) = @_;
240     my $el = $todo->{node};
241     my $new_todos = [];
242     my @nodes = (@{$el->child_nodes});
243    
244 wakaba 1.25 my $old_values = {significant =>
245     $todo->{flag}->{has_descendant}->{significant}};
246     $todo->{flag}->{has_descendant}->{significant} = 0;
247    
248 wakaba 1.1 while (@nodes) {
249     my $node = shift @nodes;
250     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
251    
252     my $nt = $node->node_type;
253     if ($nt == 1) {
254     my $node_ns = $node->namespace_uri;
255     $node_ns = '' unless defined $node_ns;
256     my $node_ln = $node->manakai_local_name;
257     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
258     $not_allowed = 1
259 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
260     $self->{pluses}->{$node_ns}->{$node_ln};
261 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
262     if $not_allowed;
263     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
264     unshift @nodes, @$sib;
265     push @$new_todos, @$ch;
266     } elsif ($nt == 3 or $nt == 4) {
267     if ($node->data =~ /[^\x09-\x0D\x20]/) {
268     $self->{onerror}->(node => $node, type => 'character not allowed');
269 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
270 wakaba 1.1 }
271     } elsif ($nt == 5) {
272     unshift @nodes, @{$node->child_nodes};
273     }
274     }
275 wakaba 1.25
276     push @$new_todos, {
277     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
278     old_values => $old_values,
279 wakaba 1.26 errors => $HTMLSignificantContentErrors,
280 wakaba 1.25 };
281    
282 wakaba 1.1 return ($new_todos);
283     }; # $HTMLBlockChecker
284    
285     ## Inline-level content
286     my $HTMLInlineChecker = sub {
287     my ($self, $todo) = @_;
288     my $el = $todo->{node};
289     my $new_todos = [];
290     my @nodes = (@{$el->child_nodes});
291    
292 wakaba 1.25 my $old_values = {significant =>
293     $todo->{flag}->{has_descendant}->{significant}};
294     $todo->{flag}->{has_descendant}->{significant} = 0;
295    
296 wakaba 1.1 while (@nodes) {
297     my $node = shift @nodes;
298     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
299    
300     my $nt = $node->node_type;
301     if ($nt == 1) {
302     my $node_ns = $node->namespace_uri;
303     $node_ns = '' unless defined $node_ns;
304     my $node_ln = $node->manakai_local_name;
305     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
306     $not_allowed = 1
307     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
308 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
309     $self->{pluses}->{$node_ns}->{$node_ln};
310 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
311     if $not_allowed;
312     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
313     unshift @nodes, @$sib;
314     push @$new_todos, @$ch;
315 wakaba 1.25 } elsif ($nt == 3 or $nt == 4) {
316     if ($node->data =~ /[^\x09-\x0D\x20]/) {
317     $todo->{flag}->{has_descendant}->{significant} = 1;
318     }
319 wakaba 1.1 } elsif ($nt == 5) {
320     unshift @nodes, @{$node->child_nodes};
321     }
322     }
323    
324     for (@$new_todos) {
325     $_->{inline} = 1;
326     }
327 wakaba 1.25
328     push @$new_todos, {
329     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
330     old_values => $old_values,
331 wakaba 1.26 errors => $HTMLSignificantContentErrors,
332 wakaba 1.25 };
333    
334 wakaba 1.1 return ($new_todos);
335     }; # $HTMLInlineChecker
336    
337     ## Strictly inline-level content
338     my $HTMLStrictlyInlineChecker = sub {
339     my ($self, $todo) = @_;
340     my $el = $todo->{node};
341     my $new_todos = [];
342     my @nodes = (@{$el->child_nodes});
343 wakaba 1.25
344     my $old_values = {significant =>
345     $todo->{flag}->{has_descendant}->{significant}};
346     $todo->{flag}->{has_descendant}->{significant} = 0;
347 wakaba 1.1
348     while (@nodes) {
349     my $node = shift @nodes;
350     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
351    
352     my $nt = $node->node_type;
353     if ($nt == 1) {
354     my $node_ns = $node->namespace_uri;
355     $node_ns = '' unless defined $node_ns;
356     my $node_ln = $node->manakai_local_name;
357     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
358     $not_allowed = 1
359 wakaba 1.8 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
360     $self->{pluses}->{$node_ns}->{$node_ln};
361 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
362     if $not_allowed;
363     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
364     unshift @nodes, @$sib;
365     push @$new_todos, @$ch;
366 wakaba 1.25 } elsif ($nt == 3 or $nt == 4) {
367     if ($node->data =~ /[^\x09-\x0D\x20]/) {
368     $todo->{flag}->{has_descendant}->{significant} = 1;
369     }
370 wakaba 1.1 } elsif ($nt == 5) {
371     unshift @nodes, @{$node->child_nodes};
372     }
373     }
374    
375     for (@$new_todos) {
376     $_->{inline} = 1;
377     $_->{strictly_inline} = 1;
378     }
379 wakaba 1.25
380     push @$new_todos, {
381     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
382     old_values => $old_values,
383 wakaba 1.26 errors => $HTMLSignificantContentErrors,
384 wakaba 1.25 };
385    
386 wakaba 1.1 return ($new_todos);
387     }; # $HTMLStrictlyInlineChecker
388    
389 wakaba 1.8 ## Inline-level or strictly inline-level content
390 wakaba 1.1 my $HTMLInlineOrStrictlyInlineChecker = sub {
391     my ($self, $todo) = @_;
392     my $el = $todo->{node};
393     my $new_todos = [];
394     my @nodes = (@{$el->child_nodes});
395 wakaba 1.25
396     my $old_values = {significant =>
397     $todo->{flag}->{has_descendant}->{significant}};
398     $todo->{flag}->{has_descendant}->{significant} = 0;
399 wakaba 1.1
400     while (@nodes) {
401     my $node = shift @nodes;
402     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
403    
404     my $nt = $node->node_type;
405     if ($nt == 1) {
406     my $node_ns = $node->namespace_uri;
407     $node_ns = '' unless defined $node_ns;
408     my $node_ln = $node->manakai_local_name;
409     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
410     if ($todo->{strictly_inline}) {
411     $not_allowed = 1
412 wakaba 1.8 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
413     $self->{pluses}->{$node_ns}->{$node_ln};
414 wakaba 1.1 } else {
415     $not_allowed = 1
416     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
417 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
418     $self->{pluses}->{$node_ns}->{$node_ln};
419 wakaba 1.1 }
420     $self->{onerror}->(node => $node, type => 'element not allowed')
421     if $not_allowed;
422     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
423     unshift @nodes, @$sib;
424     push @$new_todos, @$ch;
425 wakaba 1.25 } elsif ($nt == 3 or $nt == 4) {
426     if ($node->data =~ /[^\x09-\x0D\x20]/) {
427     $todo->{flag}->{has_descendant}->{significant} = 1;
428     }
429 wakaba 1.1 } elsif ($nt == 5) {
430     unshift @nodes, @{$node->child_nodes};
431     }
432     }
433    
434     for (@$new_todos) {
435     $_->{inline} = 1;
436     $_->{strictly_inline} = 1;
437     }
438 wakaba 1.25
439     push @$new_todos, {
440     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
441     old_values => $old_values,
442 wakaba 1.26 errors => $HTMLSignificantContentErrors,
443 wakaba 1.25 };
444    
445 wakaba 1.1 return ($new_todos);
446     }; # $HTMLInlineOrStrictlyInlineChecker
447    
448 wakaba 1.8 ## Block-level content or inline-level content (i.e. bimorphic content model)
449 wakaba 1.1 my $HTMLBlockOrInlineChecker = sub {
450     my ($self, $todo) = @_;
451     my $el = $todo->{node};
452     my $new_todos = [];
453     my @nodes = (@{$el->child_nodes});
454 wakaba 1.25
455     my $old_values = {significant =>
456     $todo->{flag}->{has_descendant}->{significant}};
457     $todo->{flag}->{has_descendant}->{significant} = 0;
458 wakaba 1.1
459     my $content = 'block-or-inline'; # or 'block' or 'inline'
460     my @block_not_inline;
461     while (@nodes) {
462     my $node = shift @nodes;
463     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
464    
465 wakaba 1.8 ## ISSUE: It is unclear whether "<rule><div><p/><nest/></div></rule>"
466     ## is conforming or not.
467    
468 wakaba 1.1 my $nt = $node->node_type;
469     if ($nt == 1) {
470     my $node_ns = $node->namespace_uri;
471     $node_ns = '' unless defined $node_ns;
472     my $node_ln = $node->manakai_local_name;
473     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
474     if ($content eq 'block') {
475     $not_allowed = 1
476 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
477     $self->{pluses}->{$node_ns}->{$node_ln};
478 wakaba 1.1 } elsif ($content eq 'inline') {
479     $not_allowed = 1
480     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
481 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
482     $self->{pluses}->{$node_ns}->{$node_ln};
483 wakaba 1.1 } else {
484     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
485     my $is_inline
486     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
487     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
488    
489     push @block_not_inline, $node
490     if $is_block and not $is_inline and not $not_allowed;
491 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
492 wakaba 1.1 $content = 'inline';
493     for (@block_not_inline) {
494     $self->{onerror}->(node => $_, type => 'element not allowed');
495     }
496     $not_allowed = 1 unless $is_inline;
497     }
498     }
499     $self->{onerror}->(node => $node, type => 'element not allowed')
500     if $not_allowed;
501     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
502     unshift @nodes, @$sib;
503     push @$new_todos, @$ch;
504     } elsif ($nt == 3 or $nt == 4) {
505     if ($node->data =~ /[^\x09-\x0D\x20]/) {
506     if ($content eq 'block') {
507     $self->{onerror}->(node => $node, type => 'character not allowed');
508     } else {
509     $content = 'inline';
510     for (@block_not_inline) {
511     $self->{onerror}->(node => $_, type => 'element not allowed');
512     }
513     }
514 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
515 wakaba 1.1 }
516     } elsif ($nt == 5) {
517     unshift @nodes, @{$node->child_nodes};
518     }
519     }
520    
521     if ($content eq 'inline') {
522     for (@$new_todos) {
523     $_->{inline} = 1;
524     }
525     }
526 wakaba 1.25
527     push @$new_todos, {
528     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
529     old_values => $old_values,
530 wakaba 1.26 errors => $HTMLSignificantContentErrors,
531 wakaba 1.25 };
532    
533 wakaba 1.1 return ($new_todos);
534     };
535    
536     ## Zero or more XXX element, then either block-level or inline-level
537     my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) {
538     my ($elnsuri, $ellname) = @_;
539     return sub {
540     my ($self, $todo) = @_;
541     my $el = $todo->{node};
542     my $new_todos = [];
543     my @nodes = (@{$el->child_nodes});
544 wakaba 1.25
545     my $old_values = {significant =>
546     $todo->{flag}->{has_descendant}->{significant}};
547     $todo->{flag}->{has_descendant}->{significant} = 0;
548 wakaba 1.1
549     my $has_non_style;
550     my $content = 'block-or-inline'; # or 'block' or 'inline'
551     my @block_not_inline;
552     while (@nodes) {
553     my $node = shift @nodes;
554     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
555    
556     my $nt = $node->node_type;
557     if ($nt == 1) {
558     my $node_ns = $node->namespace_uri;
559     $node_ns = '' unless defined $node_ns;
560     my $node_ln = $node->manakai_local_name;
561     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
562     if ($node_ns eq $elnsuri and $node_ln eq $ellname) {
563     $not_allowed = 1 if $has_non_style;
564     if ($ellname eq 'style' and
565     not $node->has_attribute_ns (undef, 'scoped')) {
566     $not_allowed = 1;
567     }
568     } elsif ($content eq 'block') {
569     $has_non_style = 1;
570     $not_allowed = 1
571 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
572     $self->{pluses}->{$node_ns}->{$node_ln};
573 wakaba 1.1 } elsif ($content eq 'inline') {
574     $has_non_style = 1;
575     $not_allowed = 1
576     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
577 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
578     $self->{pluses}->{$node_ns}->{$node_ln};
579 wakaba 1.1 } else {
580 wakaba 1.8 $has_non_style = 1 unless $self->{pluses}->{$node_ns}->{$node_ln};
581 wakaba 1.1 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
582     my $is_inline
583     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
584     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
585    
586     push @block_not_inline, $node
587     if $is_block and not $is_inline and not $not_allowed;
588 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
589 wakaba 1.1 $content = 'inline';
590     for (@block_not_inline) {
591     $self->{onerror}->(node => $_, type => 'element not allowed');
592     }
593     $not_allowed = 1 unless $is_inline;
594     }
595     }
596     $self->{onerror}->(node => $node, type => 'element not allowed')
597     if $not_allowed;
598     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
599     unshift @nodes, @$sib;
600     push @$new_todos, @$ch;
601     } elsif ($nt == 3 or $nt == 4) {
602     if ($node->data =~ /[^\x09-\x0D\x20]/) {
603     $has_non_style = 1;
604     if ($content eq 'block') {
605     $self->{onerror}->(node => $node, type => 'character not allowed');
606     } else {
607     $content = 'inline';
608     for (@block_not_inline) {
609     $self->{onerror}->(node => $_, type => 'element not allowed');
610     }
611     }
612 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
613 wakaba 1.1 }
614     } elsif ($nt == 5) {
615     unshift @nodes, @{$node->child_nodes};
616     }
617     }
618    
619     if ($content eq 'inline') {
620     for (@$new_todos) {
621     $_->{inline} = 1;
622     }
623     }
624 wakaba 1.25
625     push @$new_todos, {
626     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
627     old_values => $old_values,
628 wakaba 1.26 errors => $HTMLSignificantContentErrors,
629 wakaba 1.25 };
630    
631 wakaba 1.1 return ($new_todos);
632     };
633     }; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker
634    
635     my $HTMLTransparentChecker = $HTMLBlockOrInlineChecker;
636 wakaba 1.25 ## ISSUE: Significant content rule should be applied to transparent element
637     ## with parent? Currently, applied to |video| but not to others.
638 wakaba 1.1
639     our $AttrChecker;
640    
641     my $GetHTMLEnumeratedAttrChecker = sub {
642     my $states = shift; # {value => conforming ? 1 : -1}
643     return sub {
644     my ($self, $attr) = @_;
645     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
646     if ($states->{$value} > 0) {
647     #
648     } elsif ($states->{$value}) {
649     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
650     } else {
651     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
652     }
653     };
654     }; # $GetHTMLEnumeratedAttrChecker
655    
656     my $GetHTMLBooleanAttrChecker = sub {
657     my $local_name = shift;
658     return sub {
659     my ($self, $attr) = @_;
660     my $value = $attr->value;
661     unless ($value eq $local_name or $value eq '') {
662     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
663     }
664     };
665     }; # $GetHTMLBooleanAttrChecker
666    
667 wakaba 1.8 ## Unordered set of space-separated tokens
668 wakaba 1.18 my $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
669 wakaba 1.8 my ($self, $attr) = @_;
670     my %word;
671     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
672     unless ($word{$word}) {
673     $word{$word} = 1;
674     } else {
675     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
676     }
677     }
678 wakaba 1.18 }; # $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
679 wakaba 1.8
680 wakaba 1.1 ## |rel| attribute (unordered set of space separated tokens,
681     ## whose allowed values are defined by the section on link types)
682     my $HTMLLinkTypesAttrChecker = sub {
683 wakaba 1.4 my ($a_or_area, $todo, $self, $attr) = @_;
684 wakaba 1.1 my %word;
685     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
686     unless ($word{$word}) {
687     $word{$word} = 1;
688 wakaba 1.18 } elsif ($word eq 'up') {
689     #
690 wakaba 1.1 } else {
691     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
692     }
693     }
694     ## NOTE: Case sensitive match (since HTML5 spec does not say link
695     ## types are case-insensitive and it says "The value should not
696     ## be confusingly similar to any other defined value (e.g.
697     ## differing only in case).").
698     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
699     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
700     ## values to be used conformingly.
701     require Whatpm::_LinkTypeList;
702     our $LinkType;
703     for my $word (keys %word) {
704     my $def = $LinkType->{$word};
705     if (defined $def) {
706     if ($def->{status} eq 'accepted') {
707     if (defined $def->{effect}->[$a_or_area]) {
708     #
709     } else {
710     $self->{onerror}->(node => $attr,
711     type => 'link type:bad context:'.$word);
712     }
713     } elsif ($def->{status} eq 'proposal') {
714     $self->{onerror}->(node => $attr, level => 's',
715     type => 'link type:proposed:'.$word);
716 wakaba 1.20 if (defined $def->{effect}->[$a_or_area]) {
717     #
718     } else {
719     $self->{onerror}->(node => $attr,
720     type => 'link type:bad context:'.$word);
721     }
722 wakaba 1.1 } else { # rejected or synonym
723     $self->{onerror}->(node => $attr,
724     type => 'link type:non-conforming:'.$word);
725     }
726 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
727     if ($word eq 'alternate') {
728     #
729     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
730     $todo->{has_hyperlink_link_type} = 1;
731     }
732     }
733 wakaba 1.1 if ($def->{unique}) {
734     unless ($self->{has_link_type}->{$word}) {
735     $self->{has_link_type}->{$word} = 1;
736     } else {
737     $self->{onerror}->(node => $attr,
738     type => 'link type:duplicate:'.$word);
739     }
740     }
741     } else {
742     $self->{onerror}->(node => $attr, level => 'unsupported',
743     type => 'link type:'.$word);
744     }
745     }
746 wakaba 1.4 $todo->{has_hyperlink_link_type} = 1
747     if $word{alternate} and not $word{stylesheet};
748 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
749     ## says that using both X-Pingback: header field and HTML
750     ## <link rel=pingback> is deprecated and if both appears they
751     ## SHOULD contain exactly the same value.
752     ## ISSUE: Pingback 1.0 specification defines the exact representation
753     ## of its link element, which cannot be tested by the current arch.
754     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
755     ## include any string that matches to the pattern for the rel=pingback link,
756     ## which again inpossible to test.
757     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
758 wakaba 1.12
759     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
760 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
761     ## then they SHOULD be described in different paragraphs.".
762 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
763 wakaba 1.20
764     ## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status."
765 wakaba 1.1
766     ## URI (or IRI)
767     my $HTMLURIAttrChecker = sub {
768     my ($self, $attr) = @_;
769     ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
770     my $value = $attr->value;
771     Whatpm::URIChecker->check_iri_reference ($value, sub {
772     my %opt = @_;
773     $self->{onerror}->(node => $attr, level => $opt{level},
774     type => 'URI::'.$opt{type}.
775     (defined $opt{position} ? ':'.$opt{position} : ''));
776     });
777 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
778 wakaba 1.1 }; # $HTMLURIAttrChecker
779    
780     ## A space separated list of one or more URIs (or IRIs)
781     my $HTMLSpaceURIsAttrChecker = sub {
782     my ($self, $attr) = @_;
783     my $i = 0;
784     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
785     Whatpm::URIChecker->check_iri_reference ($value, sub {
786     my %opt = @_;
787     $self->{onerror}->(node => $attr, level => $opt{level},
788 wakaba 1.2 type => 'URIs:'.':'.
789     $opt{type}.':'.$i.
790 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
791     });
792     $i++;
793     }
794     ## ISSUE: Relative references?
795     ## ISSUE: Leading or trailing white spaces are conformant?
796     ## ISSUE: A sequence of white space characters are conformant?
797     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
798     ## NOTE: Duplication seems not an error.
799 wakaba 1.4 $self->{has_uri_attr} = 1;
800 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
801    
802     my $HTMLDatetimeAttrChecker = sub {
803     my ($self, $attr) = @_;
804     my $value = $attr->value;
805     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
806     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
807     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
808     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
809     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
810     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
811     if $d < 1 or
812     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
813     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
814     if $M == 2 and $d == 29 and
815     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
816     } else {
817     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
818     }
819     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
820     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
821     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
822     if defined $s and $s > 59;
823     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
824     if $zh > 23;
825     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
826     if $zm > 59;
827     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
828     } else {
829     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
830     }
831     }; # $HTMLDatetimeAttrChecker
832    
833     my $HTMLIntegerAttrChecker = sub {
834     my ($self, $attr) = @_;
835     my $value = $attr->value;
836     unless ($value =~ /\A-?[0-9]+\z/) {
837     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
838     }
839     }; # $HTMLIntegerAttrChecker
840    
841     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
842     my $range_check = shift;
843     return sub {
844     my ($self, $attr) = @_;
845     my $value = $attr->value;
846     if ($value =~ /\A[0-9]+\z/) {
847     unless ($range_check->($value + 0)) {
848     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
849     }
850     } else {
851     $self->{onerror}->(node => $attr,
852     type => 'nninteger:syntax error');
853     }
854     };
855     }; # $GetHTMLNonNegativeIntegerAttrChecker
856    
857     my $GetHTMLFloatingPointNumberAttrChecker = sub {
858     my $range_check = shift;
859     return sub {
860     my ($self, $attr) = @_;
861     my $value = $attr->value;
862     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
863     unless ($range_check->($value + 0)) {
864     $self->{onerror}->(node => $attr, type => 'float:out of range');
865     }
866     } else {
867     $self->{onerror}->(node => $attr,
868     type => 'float:syntax error');
869     }
870     };
871     }; # $GetHTMLFloatingPointNumberAttrChecker
872    
873     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
874     ## ISSUE: RFC 2046 does not define syntax of media types.
875     ## ISSUE: The definition of "a valid MIME type" is unknown.
876     ## Syntactical correctness?
877     my $HTMLIMTAttrChecker = sub {
878     my ($self, $attr) = @_;
879     my $value = $attr->value;
880     ## ISSUE: RFC 2045 Content-Type header field allows insertion
881     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
882     ## ISSUE: RFC 2231 extension? Maybe no.
883     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
884     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
885     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
886     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
887     my @type = ($1, $2);
888     my $param = $3;
889     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
890     if (defined $2) {
891     push @type, $1 => $2;
892     } else {
893     my $n = $1;
894     my $v = $2;
895     $v =~ s/\\(.)/$1/gs;
896     push @type, $n => $v;
897     }
898     }
899     require Whatpm::IMTChecker;
900     Whatpm::IMTChecker->check_imt (sub {
901     my %opt = @_;
902     $self->{onerror}->(node => $attr, level => $opt{level},
903     type => 'IMT:'.$opt{type});
904     }, @type);
905     } else {
906     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
907     }
908     }; # $HTMLIMTAttrChecker
909    
910     my $HTMLLanguageTagAttrChecker = sub {
911 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
912    
913 wakaba 1.1 my ($self, $attr) = @_;
914 wakaba 1.6 my $value = $attr->value;
915     require Whatpm::LangTag;
916     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
917     my %opt = @_;
918     my $type = 'LangTag:'.$opt{type};
919     $type .= ':' . $opt{subtag} if defined $opt{subtag};
920     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
921     level => $opt{level});
922     });
923 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
924 wakaba 1.6
925     ## TODO: testdata
926 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
927    
928     ## "A valid media query [MQ]"
929     my $HTMLMQAttrChecker = sub {
930     my ($self, $attr) = @_;
931     $self->{onerror}->(node => $attr, level => 'unsupported',
932     type => 'media query');
933     ## ISSUE: What is "a valid media query"?
934     }; # $HTMLMQAttrChecker
935    
936     my $HTMLEventHandlerAttrChecker = sub {
937     my ($self, $attr) = @_;
938     $self->{onerror}->(node => $attr, level => 'unsupported',
939     type => 'event handler');
940     ## TODO: MUST contain valid ECMAScript code matching the
941     ## ECMAScript |FunctionBody| production. [ECMA262]
942     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
943     ## ISSUE: Automatic semicolon insertion does not apply?
944     ## ISSUE: Other script languages?
945     }; # $HTMLEventHandlerAttrChecker
946    
947     my $HTMLUsemapAttrChecker = sub {
948     my ($self, $attr) = @_;
949     ## MUST be a valid hashed ID reference to a |map| element
950     my $value = $attr->value;
951     if ($value =~ s/^#//) {
952     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
953     push @{$self->{usemap}}, [$value => $attr];
954     } else {
955     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
956     }
957     ## NOTE: Space characters in hashed ID references are conforming.
958     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
959     }; # $HTMLUsemapAttrChecker
960    
961     my $HTMLTargetAttrChecker = sub {
962     my ($self, $attr) = @_;
963     my $value = $attr->value;
964     if ($value =~ /^_/) {
965     $value = lc $value; ## ISSUE: ASCII case-insentitive?
966     unless ({
967     _self => 1, _parent => 1, _top => 1,
968     }->{$value}) {
969     $self->{onerror}->(node => $attr,
970     type => 'reserved browsing context name');
971     }
972     } else {
973     #$ ISSUE: An empty string is conforming?
974     }
975     }; # $HTMLTargetAttrChecker
976    
977 wakaba 1.23 my $HTMLSelectorsAttrChecker = sub {
978     my ($self, $attr) = @_;
979    
980     ## ISSUE: Namespace resolution?
981    
982     my $value = $attr->value;
983    
984     require Whatpm::CSS::SelectorsParser;
985     my $p = Whatpm::CSS::SelectorsParser->new;
986     $p->{pseudo_class}->{$_} = 1 for qw/
987     active checked disabled empty enabled first-child first-of-type
988     focus hover indeterminate last-child last-of-type link only-child
989     only-of-type root target visited
990     lang nth-child nth-last-child nth-of-type nth-last-of-type not
991     -manakai-contains -manakai-current
992     /;
993    
994     $p->{pseudo_element}->{$_} = 1 for qw/
995     after before first-letter first-line
996     /;
997    
998     $p->{must_level} = $self->{must_level};
999     $p->{onerror} = sub {
1000     my %opt = @_;
1001     $opt{type} = 'selectors:'.$opt{type};
1002     $self->{onerror}->(%opt, node => $attr);
1003     };
1004     $p->parse_string ($value);
1005     }; # $HTMLSelectorsAttrChecker
1006    
1007 wakaba 1.1 my $HTMLAttrChecker = {
1008     id => sub {
1009     ## NOTE: |map| has its own variant of |id=""| checker
1010     my ($self, $attr) = @_;
1011     my $value = $attr->value;
1012     if (length $value > 0) {
1013     if ($self->{id}->{$value}) {
1014     $self->{onerror}->(node => $attr, type => 'duplicate ID');
1015     push @{$self->{id}->{$value}}, $attr;
1016     } else {
1017     $self->{id}->{$value} = [$attr];
1018     }
1019     if ($value =~ /[\x09-\x0D\x20]/) {
1020     $self->{onerror}->(node => $attr, type => 'space in ID');
1021     }
1022     } else {
1023     ## NOTE: MUST contain at least one character
1024     $self->{onerror}->(node => $attr, type => 'empty attribute value');
1025     }
1026     },
1027     title => sub {}, ## NOTE: No conformance creteria
1028     lang => sub {
1029     my ($self, $attr) = @_;
1030 wakaba 1.6 my $value = $attr->value;
1031     if ($value eq '') {
1032     #
1033     } else {
1034     require Whatpm::LangTag;
1035     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
1036     my %opt = @_;
1037     my $type = 'LangTag:'.$opt{type};
1038     $type .= ':' . $opt{subtag} if defined $opt{subtag};
1039     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
1040     level => $opt{level});
1041     });
1042     }
1043 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
1044     unless ($attr->owner_document->manakai_is_html) {
1045     $self->{onerror}->(node => $attr, type => 'in XML:lang');
1046     }
1047 wakaba 1.6
1048     ## TODO: test data
1049 wakaba 1.1 },
1050     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
1051     class => sub {
1052     my ($self, $attr) = @_;
1053     my %word;
1054     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
1055     unless ($word{$word}) {
1056     $word{$word} = 1;
1057     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
1058     } else {
1059     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
1060     }
1061     }
1062     },
1063     contextmenu => sub {
1064     my ($self, $attr) = @_;
1065     my $value = $attr->value;
1066     push @{$self->{contextmenu}}, [$value => $attr];
1067     ## ISSUE: "The value must be the ID of a menu element in the DOM."
1068     ## What is "in the DOM"? A menu Element node that is not part
1069     ## of the Document tree is in the DOM? A menu Element node that
1070     ## belong to another Document tree is in the DOM?
1071     },
1072     irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
1073 wakaba 1.8 tabindex => $HTMLIntegerAttrChecker
1074     ## TODO: ref, template, registrationmark
1075 wakaba 1.1 };
1076    
1077     for (qw/
1078     onabort onbeforeunload onblur onchange onclick oncontextmenu
1079     ondblclick ondrag ondragend ondragenter ondragleave ondragover
1080     ondragstart ondrop onerror onfocus onkeydown onkeypress
1081     onkeyup onload onmessage onmousedown onmousemove onmouseout
1082     onmouseover onmouseup onmousewheel onresize onscroll onselect
1083     onsubmit onunload
1084     /) {
1085     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
1086     }
1087    
1088     my $GetHTMLAttrsChecker = sub {
1089     my $element_specific_checker = shift;
1090     return sub {
1091     my ($self, $todo) = @_;
1092     for my $attr (@{$todo->{node}->attributes}) {
1093     my $attr_ns = $attr->namespace_uri;
1094     $attr_ns = '' unless defined $attr_ns;
1095     my $attr_ln = $attr->manakai_local_name;
1096     my $checker;
1097     if ($attr_ns eq '') {
1098     $checker = $element_specific_checker->{$attr_ln}
1099     || $HTMLAttrChecker->{$attr_ln};
1100     }
1101     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1102     || $AttrChecker->{$attr_ns}->{''};
1103     if ($checker) {
1104     $checker->($self, $attr, $todo);
1105     } else {
1106     $self->{onerror}->(node => $attr, level => 'unsupported',
1107     type => 'attribute');
1108     ## ISSUE: No comformance createria for unknown attributes in the spec
1109     }
1110     }
1111     };
1112     }; # $GetHTMLAttrsChecker
1113    
1114     our $Element;
1115     our $ElementDefault;
1116    
1117     $Element->{$HTML_NS}->{''} = {
1118     attrs_checker => $GetHTMLAttrsChecker->({}),
1119     checker => $ElementDefault->{checker},
1120     };
1121    
1122     $Element->{$HTML_NS}->{html} = {
1123     is_root => 1,
1124     attrs_checker => $GetHTMLAttrsChecker->({
1125 wakaba 1.16 manifest => $HTMLURIAttrChecker,
1126 wakaba 1.1 xmlns => sub {
1127     my ($self, $attr) = @_;
1128     my $value = $attr->value;
1129     unless ($value eq $HTML_NS) {
1130     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
1131     }
1132     unless ($attr->owner_document->manakai_is_html) {
1133     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
1134     ## TODO: Test
1135     }
1136     },
1137     }),
1138     checker => sub {
1139     my ($self, $todo) = @_;
1140     my $el = $todo->{node};
1141     my $new_todos = [];
1142     my @nodes = (@{$el->child_nodes});
1143    
1144 wakaba 1.25 my $old_values = {significant =>
1145     $todo->{flag}->{has_descendant}->{significant}};
1146     $todo->{flag}->{has_descendant}->{significant} = 0;
1147    
1148 wakaba 1.1 my $phase = 'before head';
1149     while (@nodes) {
1150     my $node = shift @nodes;
1151     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1152    
1153     my $nt = $node->node_type;
1154     if ($nt == 1) {
1155     my $node_ns = $node->namespace_uri;
1156     $node_ns = '' unless defined $node_ns;
1157     my $node_ln = $node->manakai_local_name;
1158     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
1159 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1160     #
1161     } elsif ($phase eq 'before head') {
1162 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'head') {
1163     $phase = 'after head';
1164     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'body') {
1165     $self->{onerror}->(node => $node, type => 'ps element missing:head');
1166     $phase = 'after body';
1167     } else {
1168     $not_allowed = 1;
1169     # before head
1170     }
1171     } elsif ($phase eq 'after head') {
1172     if ($node_ns eq $HTML_NS and $node_ln eq 'body') {
1173     $phase = 'after body';
1174     } else {
1175     $not_allowed = 1;
1176     # after head
1177     }
1178     } else { #elsif ($phase eq 'after body') {
1179     $not_allowed = 1;
1180     # after body
1181     }
1182     $self->{onerror}->(node => $node, type => 'element not allowed')
1183     if $not_allowed;
1184     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1185     unshift @nodes, @$sib;
1186     push @$new_todos, @$ch;
1187     } elsif ($nt == 3 or $nt == 4) {
1188     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1189     $self->{onerror}->(node => $node, type => 'character not allowed');
1190 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
1191 wakaba 1.1 }
1192     } elsif ($nt == 5) {
1193     unshift @nodes, @{$node->child_nodes};
1194     }
1195     }
1196    
1197     if ($phase eq 'before head') {
1198     $self->{onerror}->(node => $el, type => 'child element missing:head');
1199     $self->{onerror}->(node => $el, type => 'child element missing:body');
1200     } elsif ($phase eq 'after head') {
1201     $self->{onerror}->(node => $el, type => 'child element missing:body');
1202     }
1203    
1204 wakaba 1.25 ## NOTE: Significant content check - this is performed here since
1205     ## |html| content model allows a block-level element - |body|.
1206     push @$new_todos, {
1207     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
1208     old_values => $old_values,
1209 wakaba 1.26 errors => $HTMLSignificantContentErrors,
1210 wakaba 1.25 };
1211    
1212 wakaba 1.1 return ($new_todos);
1213     },
1214     };
1215    
1216     $Element->{$HTML_NS}->{head} = {
1217     attrs_checker => $GetHTMLAttrsChecker->({}),
1218     checker => sub {
1219     my ($self, $todo) = @_;
1220     my $el = $todo->{node};
1221     my $new_todos = [];
1222     my @nodes = (@{$el->child_nodes});
1223    
1224     my $has_title;
1225     my $phase = 'initial'; # 'after charset', 'after base'
1226     while (@nodes) {
1227     my $node = shift @nodes;
1228     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1229    
1230     my $nt = $node->node_type;
1231     if ($nt == 1) {
1232     my $node_ns = $node->namespace_uri;
1233     $node_ns = '' unless defined $node_ns;
1234     my $node_ln = $node->manakai_local_name;
1235     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
1236 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1237     #
1238     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'title') {
1239 wakaba 1.1 $phase = 'after base';
1240     unless ($has_title) {
1241     $has_title = 1;
1242     } else {
1243     $not_allowed = 1;
1244     }
1245     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'meta') {
1246     if ($node->has_attribute_ns (undef, 'charset')) {
1247     if ($phase eq 'initial') {
1248     $phase = 'after charset';
1249     } else {
1250     $not_allowed = 1;
1251     ## NOTE: See also |base|'s "contexts" field in the spec
1252     }
1253 wakaba 1.5 } elsif ($node->has_attribute_ns (undef, 'name') or
1254     $node->has_attribute_ns (undef, 'http-equiv')) {
1255     $phase = 'after base';
1256 wakaba 1.1 } else {
1257     $phase = 'after base';
1258 wakaba 1.5 $not_allowed = 1;
1259 wakaba 1.1 }
1260     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'base') {
1261     if ($phase eq 'initial' or $phase eq 'after charset') {
1262     $phase = 'after base';
1263     } else {
1264     $not_allowed = 1;
1265     }
1266     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'style') {
1267     $phase = 'after base';
1268     if ($node->has_attribute_ns (undef, 'scoped')) {
1269     $not_allowed = 1;
1270     }
1271     } elsif ($HTMLMetadataElements->{$node_ns}->{$node_ln}) {
1272     $phase = 'after base';
1273     } else {
1274     $not_allowed = 1;
1275     }
1276     $self->{onerror}->(node => $node, type => 'element not allowed')
1277     if $not_allowed;
1278 wakaba 1.3 local $todo->{flag}->{in_head} = 1;
1279 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1280     unshift @nodes, @$sib;
1281     push @$new_todos, @$ch;
1282     } elsif ($nt == 3 or $nt == 4) {
1283     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1284     $self->{onerror}->(node => $node, type => 'character not allowed');
1285 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
1286 wakaba 1.1 }
1287     } elsif ($nt == 5) {
1288     unshift @nodes, @{$node->child_nodes};
1289     }
1290     }
1291     unless ($has_title) {
1292     $self->{onerror}->(node => $el, type => 'child element missing:title');
1293     }
1294     return ($new_todos);
1295     },
1296     };
1297    
1298     $Element->{$HTML_NS}->{title} = {
1299     attrs_checker => $GetHTMLAttrsChecker->({}),
1300     checker => $HTMLTextChecker,
1301     };
1302    
1303     $Element->{$HTML_NS}->{base} = {
1304 wakaba 1.4 attrs_checker => sub {
1305     my ($self, $todo) = @_;
1306    
1307 wakaba 1.14 my $has_href = $todo->{node}->has_attribute_ns (undef, 'href');
1308     my $has_target = $todo->{node}->has_attribute_ns (undef, 'target');
1309    
1310     if ($self->{has_uri_attr} and $has_href) {
1311 wakaba 1.4 ## ISSUE: Are these examples conforming?
1312     ## <head profile="a b c"><base href> (except for |profile|'s
1313     ## non-conformance)
1314     ## <title xml:base="relative"/><base href/> (maybe it should be)
1315     ## <unknown xmlns="relative"/><base href/> (assuming that
1316     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1317     ## <?xml-stylesheet href="relative"?>...<base href=""/>
1318     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1319     ## <style>@import 'relative';</style><base href>
1320     ## <script>location.href = 'relative';</script><base href>
1321 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
1322     ## an exception.
1323 wakaba 1.4 $self->{onerror}->(node => $todo->{node},
1324     type => 'basehref after URI attribute');
1325     }
1326 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
1327 wakaba 1.4 ## ISSUE: Are these examples conforming?
1328     ## <head><title xlink:href=""/><base target="name"/></head>
1329     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1330     ## (assuming that |xbl:xbl| is allowed before |base|)
1331     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1332     ## <link href=""/><base target="name"/>
1333     ## <link rel=unknown href=""><base target=name>
1334     $self->{onerror}->(node => $todo->{node},
1335     type => 'basetarget after hyperlink');
1336     }
1337    
1338 wakaba 1.14 if (not $has_href and not $has_target) {
1339     $self->{onerror}->(node => $todo->{node},
1340     type => 'attribute missing:href|target');
1341     }
1342    
1343 wakaba 1.4 return $GetHTMLAttrsChecker->({
1344     href => $HTMLURIAttrChecker,
1345     target => $HTMLTargetAttrChecker,
1346     })->($self, $todo);
1347     },
1348 wakaba 1.1 checker => $HTMLEmptyChecker,
1349     };
1350    
1351     $Element->{$HTML_NS}->{link} = {
1352     attrs_checker => sub {
1353     my ($self, $todo) = @_;
1354     $GetHTMLAttrsChecker->({
1355     href => $HTMLURIAttrChecker,
1356 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(0, $todo, @_) },
1357 wakaba 1.1 media => $HTMLMQAttrChecker,
1358     hreflang => $HTMLLanguageTagAttrChecker,
1359     type => $HTMLIMTAttrChecker,
1360     ## NOTE: Though |title| has special semantics,
1361     ## syntactically same as the |title| as global attribute.
1362     })->($self, $todo);
1363 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
1364     $self->{has_hyperlink_element} = 1 if $todo->{has_hyperlink_link_type};
1365     } else {
1366 wakaba 1.1 $self->{onerror}->(node => $todo->{node},
1367     type => 'attribute missing:href');
1368     }
1369     unless ($todo->{node}->has_attribute_ns (undef, 'rel')) {
1370     $self->{onerror}->(node => $todo->{node},
1371     type => 'attribute missing:rel');
1372     }
1373     },
1374     checker => $HTMLEmptyChecker,
1375     };
1376    
1377     $Element->{$HTML_NS}->{meta} = {
1378     attrs_checker => sub {
1379     my ($self, $todo) = @_;
1380     my $name_attr;
1381     my $http_equiv_attr;
1382     my $charset_attr;
1383     my $content_attr;
1384     for my $attr (@{$todo->{node}->attributes}) {
1385     my $attr_ns = $attr->namespace_uri;
1386     $attr_ns = '' unless defined $attr_ns;
1387     my $attr_ln = $attr->manakai_local_name;
1388     my $checker;
1389     if ($attr_ns eq '') {
1390     if ($attr_ln eq 'content') {
1391     $content_attr = $attr;
1392     $checker = 1;
1393     } elsif ($attr_ln eq 'name') {
1394     $name_attr = $attr;
1395     $checker = 1;
1396     } elsif ($attr_ln eq 'http-equiv') {
1397     $http_equiv_attr = $attr;
1398     $checker = 1;
1399     } elsif ($attr_ln eq 'charset') {
1400     $charset_attr = $attr;
1401     $checker = 1;
1402     } else {
1403     $checker = $HTMLAttrChecker->{$attr_ln}
1404     || $AttrChecker->{$attr_ns}->{$attr_ln}
1405     || $AttrChecker->{$attr_ns}->{''};
1406     }
1407     } else {
1408     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1409     || $AttrChecker->{$attr_ns}->{''};
1410     }
1411     if ($checker) {
1412     $checker->($self, $attr) if ref $checker;
1413     } else {
1414     $self->{onerror}->(node => $attr, level => 'unsupported',
1415     type => 'attribute');
1416     ## ISSUE: No comformance createria for unknown attributes in the spec
1417     }
1418     }
1419    
1420     if (defined $name_attr) {
1421     if (defined $http_equiv_attr) {
1422     $self->{onerror}->(node => $http_equiv_attr,
1423     type => 'attribute not allowed');
1424     } elsif (defined $charset_attr) {
1425     $self->{onerror}->(node => $charset_attr,
1426     type => 'attribute not allowed');
1427     }
1428     my $metadata_name = $name_attr->value;
1429     my $metadata_value;
1430     if (defined $content_attr) {
1431     $metadata_value = $content_attr->value;
1432     } else {
1433     $self->{onerror}->(node => $todo->{node},
1434     type => 'attribute missing:content');
1435     $metadata_value = '';
1436     }
1437     } elsif (defined $http_equiv_attr) {
1438     if (defined $charset_attr) {
1439     $self->{onerror}->(node => $charset_attr,
1440     type => 'attribute not allowed');
1441     }
1442     unless (defined $content_attr) {
1443     $self->{onerror}->(node => $todo->{node},
1444     type => 'attribute missing:content');
1445     }
1446     } elsif (defined $charset_attr) {
1447     if (defined $content_attr) {
1448     $self->{onerror}->(node => $content_attr,
1449     type => 'attribute not allowed');
1450     }
1451     } else {
1452     if (defined $content_attr) {
1453     $self->{onerror}->(node => $content_attr,
1454     type => 'attribute not allowed');
1455     $self->{onerror}->(node => $todo->{node},
1456     type => 'attribute missing:name|http-equiv');
1457     } else {
1458     $self->{onerror}->(node => $todo->{node},
1459     type => 'attribute missing:name|http-equiv|charset');
1460     }
1461     }
1462    
1463     ## TODO: metadata conformance
1464    
1465     ## TODO: pragma conformance
1466     if (defined $http_equiv_attr) { ## An enumerated attribute
1467     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1468     if ({
1469     'refresh' => 1,
1470     'default-style' => 1,
1471     }->{$keyword}) {
1472     #
1473 wakaba 1.19 } elsif ($keyword eq 'content-type') {
1474     $self->{onerror}
1475     ->(node => $http_equiv_attr,
1476     type => 'enumerated:invalid:http-equiv:content-type');
1477 wakaba 1.1 } else {
1478     $self->{onerror}->(node => $http_equiv_attr,
1479     type => 'enumerated:invalid');
1480     }
1481     }
1482    
1483     if (defined $charset_attr) {
1484     unless ($todo->{node}->owner_document->manakai_is_html) {
1485     $self->{onerror}->(node => $charset_attr,
1486     type => 'in XML:charset');
1487     }
1488 wakaba 1.21
1489     my $charset_value = $charset_attr->value;
1490     ## NOTE: Though the case-sensitivility of |charset| attribute value
1491     ## is not explicitly spelled in the HTML5 spec, the Character Set
1492     ## registry of IANA, which is referenced from HTML5 spec, says that
1493     ## charset name is case-insensitive.
1494     $charset_value =~ tr/A-Z/a-z/; ## NOTE: ASCII Case-insensitive.
1495    
1496     require Message::Charset::Info;
1497     my $charset = $Message::Charset::Info::IANACharset->{$charset_value};
1498     my $ic = $todo->{node}->owner_document->input_encoding;
1499     if (defined $ic) {
1500     ## TODO: Test for this case
1501     my $ic_charset = $Message::Charset::Info::IANACharset->{$ic};
1502     if ($charset ne $ic_charset) {
1503     $self->{onerror}->(node => $charset_attr,
1504     type => 'mismatched charset name:'.$ic.
1505     ':'.$charset_value,
1506     level => 'm');
1507     }
1508     } else {
1509     ## NOTE: MUST, but not checkable, since the document is not originally
1510     ## in serialized form (or the parser does not preserve the input
1511     ## encoding information).
1512     $self->{onerror}->(node => $charset_attr,
1513     type => 'mismatched charset name::'.$charset_value,
1514     level => 'unsupported');
1515     }
1516    
1517     ## ISSUE: What is "valid character encoding name"? Syntactically valid?
1518     ## Syntactically valid and registered? What about x-charset names?
1519     unless (Message::Charset::Info::is_syntactically_valid_iana_charset_name
1520     ($charset_value)) {
1521     $self->{onerror}->(node => $charset_attr,
1522     type => 'charset:syntax error:'.$charset_value,
1523     level => 'm');
1524     }
1525    
1526     if ($charset) {
1527     ## ISSUE: What is "the preferred name for that encoding" (for a charset
1528     ## with no "preferred MIME name" label)?
1529     my $charset_status = $charset->{iana_names}->{$charset_value} || 0;
1530     if (($charset_status &
1531     Message::Charset::Info::PREFERRED_CHARSET_NAME ())
1532     != Message::Charset::Info::PREFERRED_CHARSET_NAME ()) {
1533     $self->{onerror}->(node => $charset_attr,
1534     type => 'charset:not preferred:'.
1535     $charset_value,
1536     level => 'm');
1537     }
1538     if (($charset_status &
1539     Message::Charset::Info::REGISTERED_CHARSET_NAME ())
1540     != Message::Charset::Info::REGISTERED_CHARSET_NAME ()) {
1541     if ($charset_value =~ /^x-/) {
1542     $self->{onerror}->(node => $charset_attr,
1543     type => 'charset:private:'.$charset_value,
1544     level => $self->{good_level});
1545     } else {
1546     $self->{onerror}->(node => $charset_attr,
1547     type => 'charset:not registered:'.
1548     $charset_value,
1549     level => $self->{good_level});
1550     }
1551     }
1552     } elsif ($charset_value =~ /^x-/) {
1553     $self->{onerror}->(node => $charset_attr,
1554     type => 'charset:private:'.$charset_value,
1555     level => $self->{good_level});
1556     } else {
1557     $self->{onerror}->(node => $charset_attr,
1558     type => 'charset:not registered:'.$charset_value,
1559     level => $self->{good_level});
1560     }
1561    
1562 wakaba 1.22 if ($charset_attr->get_user_data ('manakai_has_reference')) {
1563     $self->{onerror}->(node => $charset_attr,
1564     type => 'character reference in charset',
1565     level => $self->{must_level});
1566     }
1567 wakaba 1.1 }
1568     },
1569     checker => $HTMLEmptyChecker,
1570     };
1571    
1572     $Element->{$HTML_NS}->{style} = {
1573     attrs_checker => $GetHTMLAttrsChecker->({
1574     type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1575     media => $HTMLMQAttrChecker,
1576     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1577     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1578     ## not different
1579     }),
1580     checker => sub {
1581 wakaba 1.27 ## NOTE: |html:style| itself has no conformance creteria on content model.
1582 wakaba 1.1 my ($self, $todo) = @_;
1583     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
1584 wakaba 1.27 if (not defined $type or
1585     $type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*[Tt][Ee][Xx][Tt](?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*[Cc][Ss][Ss](?>(?>\x0D\x0A)?[\x09\x20])*\z]) {
1586     my $el = $todo->{node};
1587     my $new_todos = [];
1588     my @nodes = (@{$el->child_nodes});
1589    
1590     my $ss_text = '';
1591     while (@nodes) {
1592     my $node = shift @nodes;
1593     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1594    
1595     my $nt = $node->node_type;
1596     if ($nt == 1) {
1597     $self->{onerror}->(node => $node, type => 'element not allowed');
1598     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1599     unshift @nodes, @$sib;
1600     push @$new_todos, @$ch;
1601     } elsif ($nt == 3 or $nt == 4) {
1602     $ss_text .= $node->text_content;
1603     } elsif ($nt == 5) {
1604     unshift @nodes, @{$node->child_nodes};
1605     }
1606     }
1607    
1608 wakaba 1.28 $self->{onsubdoc}->({s => $ss_text, container_node => $el,
1609     media_type => 'text/css', is_char_string => 1});
1610 wakaba 1.27 return ($new_todos);
1611     } else {
1612     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
1613     type => 'style:'.$type); ## TODO: $type normalization
1614     return $AnyChecker->($self, $todo);
1615     }
1616 wakaba 1.1 },
1617     };
1618 wakaba 1.25 ## ISSUE: Relationship to significant content check?
1619 wakaba 1.1
1620     $Element->{$HTML_NS}->{body} = {
1621     attrs_checker => $GetHTMLAttrsChecker->({}),
1622     checker => $HTMLBlockChecker,
1623     };
1624    
1625     $Element->{$HTML_NS}->{section} = {
1626     attrs_checker => $GetHTMLAttrsChecker->({}),
1627     checker => $HTMLStylableBlockChecker,
1628     };
1629    
1630     $Element->{$HTML_NS}->{nav} = {
1631     attrs_checker => $GetHTMLAttrsChecker->({}),
1632     checker => $HTMLBlockOrInlineChecker,
1633     };
1634    
1635     $Element->{$HTML_NS}->{article} = {
1636     attrs_checker => $GetHTMLAttrsChecker->({}),
1637     checker => $HTMLStylableBlockChecker,
1638     };
1639    
1640     $Element->{$HTML_NS}->{blockquote} = {
1641     attrs_checker => $GetHTMLAttrsChecker->({
1642     cite => $HTMLURIAttrChecker,
1643     }),
1644     checker => $HTMLBlockChecker,
1645     };
1646    
1647     $Element->{$HTML_NS}->{aside} = {
1648     attrs_checker => $GetHTMLAttrsChecker->({}),
1649     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
1650     };
1651    
1652     $Element->{$HTML_NS}->{h1} = {
1653     attrs_checker => $GetHTMLAttrsChecker->({}),
1654     checker => sub {
1655     my ($self, $todo) = @_;
1656 wakaba 1.24 $todo->{flag}->{has_descendant}->{hn} = 1;
1657 wakaba 1.13 return $HTMLStrictlyInlineChecker->($self, $todo);
1658 wakaba 1.1 },
1659     };
1660    
1661     $Element->{$HTML_NS}->{h2} = {
1662     attrs_checker => $GetHTMLAttrsChecker->({}),
1663     checker => $Element->{$HTML_NS}->{h1}->{checker},
1664     };
1665    
1666     $Element->{$HTML_NS}->{h3} = {
1667     attrs_checker => $GetHTMLAttrsChecker->({}),
1668     checker => $Element->{$HTML_NS}->{h1}->{checker},
1669     };
1670    
1671     $Element->{$HTML_NS}->{h4} = {
1672     attrs_checker => $GetHTMLAttrsChecker->({}),
1673     checker => $Element->{$HTML_NS}->{h1}->{checker},
1674     };
1675    
1676     $Element->{$HTML_NS}->{h5} = {
1677     attrs_checker => $GetHTMLAttrsChecker->({}),
1678     checker => $Element->{$HTML_NS}->{h1}->{checker},
1679     };
1680    
1681     $Element->{$HTML_NS}->{h6} = {
1682     attrs_checker => $GetHTMLAttrsChecker->({}),
1683     checker => $Element->{$HTML_NS}->{h1}->{checker},
1684     };
1685    
1686     $Element->{$HTML_NS}->{header} = {
1687     attrs_checker => $GetHTMLAttrsChecker->({}),
1688     checker => sub {
1689     my ($self, $todo) = @_;
1690 wakaba 1.24
1691     my $old_flags = {hn => $todo->{flag}->{has_descendant}->{hn}};
1692     $todo->{flag}->{has_descendant}->{hn} = 0;
1693 wakaba 1.1
1694     my $end = $self->_add_minuses
1695     ({$HTML_NS => {qw/header 1 footer 1/}},
1696     $HTMLSectioningElements);
1697     my ($new_todos, $ch) = $HTMLBlockChecker->($self, $todo);
1698 wakaba 1.24 push @$new_todos, $end,
1699     {type => 'descendant', node => $todo->{node},
1700     flag => $todo->{flag}, old_values => $old_flags,
1701     errors => {
1702     hn => sub {
1703     my ($self, $todo) = @_;
1704     $self->{onerror}->(node => $todo->{node},
1705     type => 'element missing:hn');
1706     },
1707 wakaba 1.1 }};
1708     return ($new_todos, $ch);
1709 wakaba 1.24
1710     ## ISSUE: <header><del><h1>...</h1></del></header> is conforming?
1711 wakaba 1.1 },
1712     };
1713    
1714     $Element->{$HTML_NS}->{footer} = {
1715     attrs_checker => $GetHTMLAttrsChecker->({}),
1716     checker => sub { ## block -hn -header -footer -sectioning or inline
1717     my ($self, $todo) = @_;
1718     my $el = $todo->{node};
1719     my $new_todos = [];
1720     my @nodes = (@{$el->child_nodes});
1721 wakaba 1.25
1722     my $old_values = {significant =>
1723     $todo->{flag}->{has_descendant}->{significant}};
1724     $todo->{flag}->{has_descendant}->{significant} = 0;
1725 wakaba 1.1
1726     my $content = 'block-or-inline'; # or 'block' or 'inline'
1727     my @block_not_inline;
1728     while (@nodes) {
1729     my $node = shift @nodes;
1730     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1731    
1732     my $nt = $node->node_type;
1733     if ($nt == 1) {
1734     my $node_ns = $node->namespace_uri;
1735     $node_ns = '' unless defined $node_ns;
1736     my $node_ln = $node->manakai_local_name;
1737     my $not_allowed;
1738     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
1739     $not_allowed = 1;
1740     } elsif ($node_ns eq $HTML_NS and
1741     {
1742     qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1/
1743     }->{$node_ln}) {
1744     $not_allowed = 1;
1745     } elsif ($HTMLSectioningElements->{$node_ns}->{$node_ln}) {
1746     $not_allowed = 1;
1747     }
1748     if ($content eq 'block') {
1749     $not_allowed = 1
1750 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
1751     $self->{pluses}->{$node_ns}->{$node_ln};
1752 wakaba 1.1 } elsif ($content eq 'inline') {
1753     $not_allowed = 1
1754     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
1755 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
1756     $self->{pluses}->{$node_ns}->{$node_ln};
1757 wakaba 1.1 } else {
1758     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1759     my $is_inline
1760     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
1761     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1762    
1763     push @block_not_inline, $node
1764     if $is_block and not $is_inline and not $not_allowed;
1765 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
1766 wakaba 1.1 $content = 'inline';
1767     for (@block_not_inline) {
1768     $self->{onerror}->(node => $_, type => 'element not allowed');
1769     }
1770     $not_allowed = 1 unless $is_inline;
1771     }
1772     }
1773     $self->{onerror}->(node => $node, type => 'element not allowed')
1774     if $not_allowed;
1775     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1776     unshift @nodes, @$sib;
1777     push @$new_todos, @$ch;
1778     } elsif ($nt == 3 or $nt == 4) {
1779     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1780     if ($content eq 'block') {
1781     $self->{onerror}->(node => $node, type => 'character not allowed');
1782     } else {
1783     $content = 'inline';
1784     for (@block_not_inline) {
1785     $self->{onerror}->(node => $_, type => 'element not allowed');
1786     }
1787     }
1788 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
1789 wakaba 1.1 }
1790     } elsif ($nt == 5) {
1791     unshift @nodes, @{$node->child_nodes};
1792     }
1793     }
1794    
1795     my $end = $self->_add_minuses
1796     ({$HTML_NS => {qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1/}},
1797     $HTMLSectioningElements);
1798     push @$new_todos, $end;
1799    
1800     if ($content eq 'inline') {
1801     for (@$new_todos) {
1802     $_->{inline} = 1;
1803     }
1804     }
1805    
1806 wakaba 1.25 push @$new_todos, {
1807     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
1808     old_values => $old_values,
1809 wakaba 1.26 errors => $HTMLSignificantContentErrors,
1810 wakaba 1.25 };
1811    
1812 wakaba 1.1 return ($new_todos);
1813     },
1814     };
1815    
1816     $Element->{$HTML_NS}->{address} = {
1817     attrs_checker => $GetHTMLAttrsChecker->({}),
1818     checker => $HTMLInlineChecker,
1819     };
1820    
1821     $Element->{$HTML_NS}->{p} = {
1822     attrs_checker => $GetHTMLAttrsChecker->({}),
1823 wakaba 1.13 checker => $HTMLInlineChecker,
1824 wakaba 1.1 };
1825    
1826     $Element->{$HTML_NS}->{hr} = {
1827     attrs_checker => $GetHTMLAttrsChecker->({}),
1828     checker => $HTMLEmptyChecker,
1829     };
1830    
1831     $Element->{$HTML_NS}->{br} = {
1832     attrs_checker => $GetHTMLAttrsChecker->({}),
1833     checker => $HTMLEmptyChecker,
1834     };
1835    
1836     $Element->{$HTML_NS}->{dialog} = {
1837     attrs_checker => $GetHTMLAttrsChecker->({}),
1838     checker => sub {
1839     my ($self, $todo) = @_;
1840     my $el = $todo->{node};
1841     my $new_todos = [];
1842     my @nodes = (@{$el->child_nodes});
1843    
1844     my $phase = 'before dt';
1845     while (@nodes) {
1846     my $node = shift @nodes;
1847     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1848    
1849     my $nt = $node->node_type;
1850     if ($nt == 1) {
1851     my $node_ns = $node->namespace_uri;
1852     $node_ns = '' unless defined $node_ns;
1853     my $node_ln = $node->manakai_local_name;
1854     ## NOTE: |minuses| list is not checked since redundant
1855 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1856     #
1857     } elsif ($phase eq 'before dt') {
1858 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1859     $phase = 'before dd';
1860     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1861     $self->{onerror}
1862     ->(node => $node, type => 'ps element missing:dt');
1863     $phase = 'before dt';
1864     } else {
1865     $self->{onerror}->(node => $node, type => 'element not allowed');
1866     }
1867     } else { # before dd
1868     if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1869     $phase = 'before dt';
1870     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1871     $self->{onerror}
1872     ->(node => $node, type => 'ps element missing:dd');
1873     $phase = 'before dd';
1874     } else {
1875     $self->{onerror}->(node => $node, type => 'element not allowed');
1876     }
1877     }
1878     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1879     unshift @nodes, @$sib;
1880     push @$new_todos, @$ch;
1881     } elsif ($nt == 3 or $nt == 4) {
1882     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1883     $self->{onerror}->(node => $node, type => 'character not allowed');
1884 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
1885 wakaba 1.1 }
1886     } elsif ($nt == 5) {
1887     unshift @nodes, @{$node->child_nodes};
1888     }
1889     }
1890     if ($phase eq 'before dd') {
1891 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
1892 wakaba 1.1 }
1893     return ($new_todos);
1894     },
1895     };
1896    
1897     $Element->{$HTML_NS}->{pre} = {
1898     attrs_checker => $GetHTMLAttrsChecker->({}),
1899     checker => $HTMLStrictlyInlineChecker,
1900     };
1901    
1902     $Element->{$HTML_NS}->{ol} = {
1903     attrs_checker => $GetHTMLAttrsChecker->({
1904     start => $HTMLIntegerAttrChecker,
1905     }),
1906     checker => sub {
1907     my ($self, $todo) = @_;
1908     my $el = $todo->{node};
1909     my $new_todos = [];
1910     my @nodes = (@{$el->child_nodes});
1911    
1912     while (@nodes) {
1913     my $node = shift @nodes;
1914     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1915    
1916     my $nt = $node->node_type;
1917     if ($nt == 1) {
1918     my $node_ns = $node->namespace_uri;
1919     $node_ns = '' unless defined $node_ns;
1920     my $node_ln = $node->manakai_local_name;
1921     ## NOTE: |minuses| list is not checked since redundant
1922 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1923     #
1924     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'li')) {
1925 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
1926     }
1927     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1928     unshift @nodes, @$sib;
1929     push @$new_todos, @$ch;
1930     } elsif ($nt == 3 or $nt == 4) {
1931     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1932     $self->{onerror}->(node => $node, type => 'character not allowed');
1933 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
1934 wakaba 1.1 }
1935     } elsif ($nt == 5) {
1936     unshift @nodes, @{$node->child_nodes};
1937     }
1938     }
1939    
1940     if ($todo->{inline}) {
1941     for (@$new_todos) {
1942     $_->{inline} = 1;
1943     }
1944     }
1945     return ($new_todos);
1946     },
1947     };
1948    
1949     $Element->{$HTML_NS}->{ul} = {
1950     attrs_checker => $GetHTMLAttrsChecker->({}),
1951     checker => $Element->{$HTML_NS}->{ol}->{checker},
1952     };
1953    
1954     $Element->{$HTML_NS}->{li} = {
1955     attrs_checker => $GetHTMLAttrsChecker->({
1956     start => sub {
1957     my ($self, $attr) = @_;
1958     my $parent = $attr->owner_element->manakai_parent_element;
1959     if (defined $parent) {
1960     my $parent_ns = $parent->namespace_uri;
1961     $parent_ns = '' unless defined $parent_ns;
1962     my $parent_ln = $parent->manakai_local_name;
1963     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1964     $self->{onerror}->(node => $attr, level => 'unsupported',
1965     type => 'attribute');
1966     }
1967     }
1968     $HTMLIntegerAttrChecker->($self, $attr);
1969     },
1970     }),
1971     checker => sub {
1972     my ($self, $todo) = @_;
1973     if ($todo->{inline}) {
1974     return $HTMLInlineChecker->($self, $todo);
1975     } else {
1976     return $HTMLBlockOrInlineChecker->($self, $todo);
1977     }
1978     },
1979     };
1980    
1981     $Element->{$HTML_NS}->{dl} = {
1982     attrs_checker => $GetHTMLAttrsChecker->({}),
1983     checker => sub {
1984     my ($self, $todo) = @_;
1985     my $el = $todo->{node};
1986     my $new_todos = [];
1987     my @nodes = (@{$el->child_nodes});
1988    
1989     my $phase = 'before dt';
1990     while (@nodes) {
1991     my $node = shift @nodes;
1992     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1993    
1994     my $nt = $node->node_type;
1995     if ($nt == 1) {
1996     my $node_ns = $node->namespace_uri;
1997     $node_ns = '' unless defined $node_ns;
1998     my $node_ln = $node->manakai_local_name;
1999     ## NOTE: |minuses| list is not checked since redundant
2000 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2001     #
2002     } elsif ($phase eq 'in dds') {
2003 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
2004     #$phase = 'in dds';
2005     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
2006     $phase = 'in dts';
2007     } else {
2008     $self->{onerror}->(node => $node, type => 'element not allowed');
2009     }
2010     } elsif ($phase eq 'in dts') {
2011     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
2012     #$phase = 'in dts';
2013     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
2014     $phase = 'in dds';
2015     } else {
2016     $self->{onerror}->(node => $node, type => 'element not allowed');
2017     }
2018     } else { # before dt
2019     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
2020     $phase = 'in dts';
2021     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
2022     $self->{onerror}
2023     ->(node => $node, type => 'ps element missing:dt');
2024     $phase = 'in dds';
2025     } else {
2026     $self->{onerror}->(node => $node, type => 'element not allowed');
2027     }
2028     }
2029     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2030     unshift @nodes, @$sib;
2031     push @$new_todos, @$ch;
2032     } elsif ($nt == 3 or $nt == 4) {
2033     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2034     $self->{onerror}->(node => $node, type => 'character not allowed');
2035 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
2036 wakaba 1.1 }
2037     } elsif ($nt == 5) {
2038     unshift @nodes, @{$node->child_nodes};
2039     }
2040     }
2041     if ($phase eq 'in dts') {
2042 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
2043 wakaba 1.1 }
2044    
2045     if ($todo->{inline}) {
2046     for (@$new_todos) {
2047     $_->{inline} = 1;
2048     }
2049     }
2050     return ($new_todos);
2051     },
2052     };
2053    
2054     $Element->{$HTML_NS}->{dt} = {
2055     attrs_checker => $GetHTMLAttrsChecker->({}),
2056     checker => $HTMLStrictlyInlineChecker,
2057     };
2058    
2059     $Element->{$HTML_NS}->{dd} = {
2060     attrs_checker => $GetHTMLAttrsChecker->({}),
2061     checker => $Element->{$HTML_NS}->{li}->{checker},
2062     };
2063    
2064     $Element->{$HTML_NS}->{a} = {
2065     attrs_checker => sub {
2066     my ($self, $todo) = @_;
2067     my %attr;
2068     for my $attr (@{$todo->{node}->attributes}) {
2069     my $attr_ns = $attr->namespace_uri;
2070     $attr_ns = '' unless defined $attr_ns;
2071     my $attr_ln = $attr->manakai_local_name;
2072     my $checker;
2073     if ($attr_ns eq '') {
2074     $checker = {
2075     target => $HTMLTargetAttrChecker,
2076     href => $HTMLURIAttrChecker,
2077     ping => $HTMLSpaceURIsAttrChecker,
2078 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
2079 wakaba 1.1 media => $HTMLMQAttrChecker,
2080     hreflang => $HTMLLanguageTagAttrChecker,
2081     type => $HTMLIMTAttrChecker,
2082     }->{$attr_ln};
2083     if ($checker) {
2084     $attr{$attr_ln} = $attr;
2085     } else {
2086     $checker = $HTMLAttrChecker->{$attr_ln};
2087     }
2088     }
2089     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2090     || $AttrChecker->{$attr_ns}->{''};
2091     if ($checker) {
2092     $checker->($self, $attr) if ref $checker;
2093     } else {
2094     $self->{onerror}->(node => $attr, level => 'unsupported',
2095     type => 'attribute');
2096     ## ISSUE: No comformance createria for unknown attributes in the spec
2097     }
2098     }
2099    
2100 wakaba 1.4 if (defined $attr{href}) {
2101     $self->{has_hyperlink_element} = 1;
2102     } else {
2103 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
2104     if (defined $attr{$_}) {
2105     $self->{onerror}->(node => $attr{$_},
2106     type => 'attribute not allowed');
2107     }
2108     }
2109     }
2110     },
2111     checker => sub {
2112     my ($self, $todo) = @_;
2113    
2114     my $end = $self->_add_minuses ($HTMLInteractiveElements);
2115     my ($new_todos, $ch)
2116 wakaba 1.13 = $HTMLInlineOrStrictlyInlineChecker->($self, $todo);
2117 wakaba 1.1 push @$new_todos, $end;
2118    
2119 wakaba 1.15 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
2120     $_->{flag}->{in_a_href} = 1 for @$new_todos;
2121     }
2122 wakaba 1.1
2123     return ($new_todos, $ch);
2124     },
2125     };
2126    
2127     $Element->{$HTML_NS}->{q} = {
2128     attrs_checker => $GetHTMLAttrsChecker->({
2129     cite => $HTMLURIAttrChecker,
2130     }),
2131     checker => $HTMLInlineOrStrictlyInlineChecker,
2132     };
2133    
2134     $Element->{$HTML_NS}->{cite} = {
2135     attrs_checker => $GetHTMLAttrsChecker->({}),
2136     checker => $HTMLStrictlyInlineChecker,
2137     };
2138    
2139     $Element->{$HTML_NS}->{em} = {
2140     attrs_checker => $GetHTMLAttrsChecker->({}),
2141     checker => $HTMLInlineOrStrictlyInlineChecker,
2142     };
2143    
2144     $Element->{$HTML_NS}->{strong} = {
2145     attrs_checker => $GetHTMLAttrsChecker->({}),
2146     checker => $HTMLInlineOrStrictlyInlineChecker,
2147     };
2148    
2149     $Element->{$HTML_NS}->{small} = {
2150     attrs_checker => $GetHTMLAttrsChecker->({}),
2151     checker => $HTMLInlineOrStrictlyInlineChecker,
2152     };
2153    
2154     $Element->{$HTML_NS}->{m} = {
2155     attrs_checker => $GetHTMLAttrsChecker->({}),
2156     checker => $HTMLInlineOrStrictlyInlineChecker,
2157     };
2158    
2159     $Element->{$HTML_NS}->{dfn} = {
2160     attrs_checker => $GetHTMLAttrsChecker->({}),
2161     checker => sub {
2162     my ($self, $todo) = @_;
2163    
2164     my $end = $self->_add_minuses ({$HTML_NS => {dfn => 1}});
2165     my ($sib, $ch) = $HTMLStrictlyInlineChecker->($self, $todo);
2166     push @$sib, $end;
2167    
2168     my $node = $todo->{node};
2169     my $term = $node->get_attribute_ns (undef, 'title');
2170     unless (defined $term) {
2171     for my $child (@{$node->child_nodes}) {
2172     if ($child->node_type == 1) { # ELEMENT_NODE
2173     if (defined $term) {
2174     undef $term;
2175     last;
2176     } elsif ($child->manakai_local_name eq 'abbr') {
2177     my $nsuri = $child->namespace_uri;
2178     if (defined $nsuri and $nsuri eq $HTML_NS) {
2179     my $attr = $child->get_attribute_node_ns (undef, 'title');
2180     if ($attr) {
2181     $term = $attr->value;
2182     }
2183     }
2184     }
2185     } elsif ($child->node_type == 3 or $child->node_type == 4) {
2186     ## TEXT_NODE or CDATA_SECTION_NODE
2187     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
2188     next;
2189     }
2190     undef $term;
2191     last;
2192     }
2193     }
2194     unless (defined $term) {
2195     $term = $node->text_content;
2196     }
2197     }
2198     if ($self->{term}->{$term}) {
2199     $self->{onerror}->(node => $node, type => 'duplicate term');
2200     push @{$self->{term}->{$term}}, $node;
2201     } else {
2202     $self->{term}->{$term} = [$node];
2203     }
2204     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
2205     ## has |title|.
2206    
2207     return ($sib, $ch);
2208     },
2209     };
2210    
2211     $Element->{$HTML_NS}->{abbr} = {
2212     attrs_checker => $GetHTMLAttrsChecker->({
2213     ## NOTE: |title| has special semantics for |abbr|s, but is syntactically
2214     ## not different. The spec says that the |title| MAY be omitted
2215     ## if there is a |dfn| whose defining term is the abbreviation,
2216     ## but it does not prohibit |abbr| w/o |title| in other cases.
2217     }),
2218     checker => $HTMLStrictlyInlineChecker,
2219     };
2220    
2221     $Element->{$HTML_NS}->{time} = {
2222     attrs_checker => $GetHTMLAttrsChecker->({
2223     datetime => sub { 1 }, # checked in |checker|
2224     }),
2225     ## TODO: Write tests
2226     checker => sub {
2227     my ($self, $todo) = @_;
2228    
2229     my $attr = $todo->{node}->get_attribute_node_ns (undef, 'datetime');
2230     my $input;
2231     my $reg_sp;
2232     my $input_node;
2233     if ($attr) {
2234     $input = $attr->value;
2235     $reg_sp = qr/[\x09-\x0D\x20]*/;
2236     $input_node = $attr;
2237     } else {
2238     $input = $todo->{node}->text_content;
2239     $reg_sp = qr/\p{Zs}*/;
2240     $input_node = $todo->{node};
2241    
2242     ## ISSUE: What is the definition for "successfully extracts a date
2243     ## or time"? If the algorithm says the string is invalid but
2244     ## return some date or time, is it "successfully"?
2245     }
2246    
2247     my $hour;
2248     my $minute;
2249     my $second;
2250     if ($input =~ /
2251     \A
2252     [\x09-\x0D\x20]*
2253     ([0-9]+) # 1
2254     (?>
2255     -([0-9]+) # 2
2256     -([0-9]+) # 3
2257     [\x09-\x0D\x20]*
2258     (?>
2259     T
2260     [\x09-\x0D\x20]*
2261     )?
2262     ([0-9]+) # 4
2263     :([0-9]+) # 5
2264     (?>
2265     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
2266     )?
2267     [\x09-\x0D\x20]*
2268     (?>
2269     Z
2270     [\x09-\x0D\x20]*
2271     |
2272     [+-]([0-9]+):([0-9]+) # 7, 8
2273     [\x09-\x0D\x20]*
2274     )?
2275     \z
2276     |
2277     :([0-9]+) # 9
2278     (?>
2279     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
2280     )?
2281     [\x09-\x0D\x20]*\z
2282     )
2283     /x) {
2284     if (defined $2) { ## YYYY-MM-DD T? hh:mm
2285     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
2286     length $4 != 2 or length $5 != 2) {
2287     $self->{onerror}->(node => $input_node,
2288     type => 'dateortime:syntax error');
2289     }
2290    
2291     if (1 <= $2 and $2 <= 12) {
2292     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
2293     if $3 < 1 or
2294     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
2295     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
2296     if $2 == 2 and $3 == 29 and
2297     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
2298     } else {
2299     $self->{onerror}->(node => $input_node,
2300     type => 'datetime:bad month');
2301     }
2302    
2303     ($hour, $minute, $second) = ($4, $5, $6);
2304    
2305     if (defined $7) { ## [+-]hh:mm
2306     if (length $7 != 2 or length $8 != 2) {
2307     $self->{onerror}->(node => $input_node,
2308     type => 'dateortime:syntax error');
2309     }
2310    
2311     $self->{onerror}->(node => $input_node,
2312     type => 'datetime:bad timezone hour')
2313     if $7 > 23;
2314     $self->{onerror}->(node => $input_node,
2315     type => 'datetime:bad timezone minute')
2316     if $8 > 59;
2317     }
2318     } else { ## hh:mm
2319     if (length $1 != 2 or length $9 != 2) {
2320     $self->{onerror}->(node => $input_node,
2321     type => qq'dateortime:syntax error');
2322     }
2323    
2324     ($hour, $minute, $second) = ($1, $9, $10);
2325     }
2326    
2327     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
2328     if $hour > 23;
2329     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
2330     if $minute > 59;
2331    
2332     if (defined $second) { ## s
2333     ## NOTE: Integer part of second don't have to have length of two.
2334    
2335     if (substr ($second, 0, 1) eq '.') {
2336     $self->{onerror}->(node => $input_node,
2337     type => 'dateortime:syntax error');
2338     }
2339    
2340     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
2341     if $second >= 60;
2342     }
2343     } else {
2344     $self->{onerror}->(node => $input_node,
2345     type => 'dateortime:syntax error');
2346     }
2347    
2348     return $HTMLStrictlyInlineChecker->($self, $todo);
2349     },
2350     };
2351    
2352     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
2353     attrs_checker => $GetHTMLAttrsChecker->({
2354     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2355     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2356     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2357     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2358     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2359     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2360     }),
2361     checker => $HTMLStrictlyInlineChecker,
2362     };
2363    
2364     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
2365     attrs_checker => $GetHTMLAttrsChecker->({
2366     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
2367     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
2368     }),
2369     checker => $HTMLStrictlyInlineChecker,
2370     };
2371    
2372     $Element->{$HTML_NS}->{code} = {
2373     attrs_checker => $GetHTMLAttrsChecker->({}),
2374     ## NOTE: Though |title| has special semantics,
2375     ## syntatically same as the |title| as global attribute.
2376     checker => $HTMLInlineOrStrictlyInlineChecker,
2377     };
2378    
2379     $Element->{$HTML_NS}->{var} = {
2380     attrs_checker => $GetHTMLAttrsChecker->({}),
2381     ## NOTE: Though |title| has special semantics,
2382     ## syntatically same as the |title| as global attribute.
2383     checker => $HTMLStrictlyInlineChecker,
2384     };
2385    
2386     $Element->{$HTML_NS}->{samp} = {
2387     attrs_checker => $GetHTMLAttrsChecker->({}),
2388     ## NOTE: Though |title| has special semantics,
2389     ## syntatically same as the |title| as global attribute.
2390     checker => $HTMLInlineOrStrictlyInlineChecker,
2391     };
2392    
2393     $Element->{$HTML_NS}->{kbd} = {
2394     attrs_checker => $GetHTMLAttrsChecker->({}),
2395     checker => $HTMLStrictlyInlineChecker,
2396     };
2397    
2398     $Element->{$HTML_NS}->{sub} = {
2399     attrs_checker => $GetHTMLAttrsChecker->({}),
2400     checker => $HTMLStrictlyInlineChecker,
2401     };
2402    
2403     $Element->{$HTML_NS}->{sup} = {
2404     attrs_checker => $GetHTMLAttrsChecker->({}),
2405     checker => $HTMLStrictlyInlineChecker,
2406     };
2407    
2408     $Element->{$HTML_NS}->{span} = {
2409     attrs_checker => $GetHTMLAttrsChecker->({}),
2410     ## NOTE: Though |title| has special semantics,
2411     ## syntatically same as the |title| as global attribute.
2412     checker => $HTMLInlineOrStrictlyInlineChecker,
2413     };
2414    
2415     $Element->{$HTML_NS}->{i} = {
2416     attrs_checker => $GetHTMLAttrsChecker->({}),
2417     ## NOTE: Though |title| has special semantics,
2418     ## syntatically same as the |title| as global attribute.
2419     checker => $HTMLStrictlyInlineChecker,
2420     };
2421    
2422     $Element->{$HTML_NS}->{b} = {
2423     attrs_checker => $GetHTMLAttrsChecker->({}),
2424     checker => $HTMLStrictlyInlineChecker,
2425     };
2426    
2427     $Element->{$HTML_NS}->{bdo} = {
2428     attrs_checker => sub {
2429     my ($self, $todo) = @_;
2430     $GetHTMLAttrsChecker->({})->($self, $todo);
2431     unless ($todo->{node}->has_attribute_ns (undef, 'dir')) {
2432     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:dir');
2433     }
2434     },
2435     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
2436     checker => $HTMLStrictlyInlineChecker,
2437     };
2438    
2439     $Element->{$HTML_NS}->{ins} = {
2440     attrs_checker => $GetHTMLAttrsChecker->({
2441     cite => $HTMLURIAttrChecker,
2442     datetime => $HTMLDatetimeAttrChecker,
2443     }),
2444     checker => $HTMLTransparentChecker,
2445     };
2446    
2447     $Element->{$HTML_NS}->{del} = {
2448     attrs_checker => $GetHTMLAttrsChecker->({
2449     cite => $HTMLURIAttrChecker,
2450     datetime => $HTMLDatetimeAttrChecker,
2451     }),
2452     checker => sub {
2453     my ($self, $todo) = @_;
2454    
2455     my $parent = $todo->{node}->manakai_parent_element;
2456     if (defined $parent) {
2457 wakaba 1.25 my $sig_flag = $todo->{flag}->{has_descendant}->{significant};
2458 wakaba 1.1 my $nsuri = $parent->namespace_uri;
2459     $nsuri = '' unless defined $nsuri;
2460     my $ln = $parent->manakai_local_name;
2461     my $eldef = $Element->{$nsuri}->{$ln} ||
2462     $Element->{$nsuri}->{''} ||
2463     $ElementDefault;
2464 wakaba 1.25 my ($new_todos) = $eldef->{checker}->($self, $todo);
2465     push @$new_todos, {type => 'code', code => sub {
2466     $todo->{flag}->{has_descendant}->{significant} = 0;
2467     }} if not $sig_flag;
2468     return $new_todos;
2469 wakaba 1.1 } else {
2470     return $HTMLBlockOrInlineChecker->($self, $todo);
2471     }
2472     },
2473     };
2474    
2475     ## TODO: figure
2476 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
2477 wakaba 1.1
2478 wakaba 1.4 ## TODO: |alt|
2479 wakaba 1.1 $Element->{$HTML_NS}->{img} = {
2480     attrs_checker => sub {
2481     my ($self, $todo) = @_;
2482     $GetHTMLAttrsChecker->({
2483     alt => sub { }, ## NOTE: No syntactical requirement
2484     src => $HTMLURIAttrChecker,
2485     usemap => $HTMLUsemapAttrChecker,
2486     ismap => sub {
2487     my ($self, $attr, $parent_todo) = @_;
2488 wakaba 1.15 if (not $todo->{flag}->{in_a_href}) {
2489     $self->{onerror}->(node => $attr,
2490     type => 'attribute not allowed:ismap');
2491 wakaba 1.1 }
2492     $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_todo);
2493     },
2494     ## TODO: height
2495     ## TODO: width
2496     })->($self, $todo);
2497     unless ($todo->{node}->has_attribute_ns (undef, 'alt')) {
2498     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:alt');
2499     }
2500     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2501     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:src');
2502     }
2503     },
2504 wakaba 1.25 checker => sub {
2505     my ($self, $todo) = @_;
2506     $todo->{flag}->{has_descendant}->{significant} = 1;
2507     return $HTMLEmptyChecker->($self, $todo);
2508     },
2509 wakaba 1.1 };
2510    
2511     $Element->{$HTML_NS}->{iframe} = {
2512     attrs_checker => $GetHTMLAttrsChecker->({
2513     src => $HTMLURIAttrChecker,
2514     }),
2515 wakaba 1.25 checker => sub {
2516     my ($self, $todo) = @_;
2517     $todo->{flag}->{has_descendant}->{significant} = 1;
2518     return $HTMLTextChecker->($self, $todo);
2519     },
2520 wakaba 1.1 };
2521    
2522     $Element->{$HTML_NS}->{embed} = {
2523     attrs_checker => sub {
2524     my ($self, $todo) = @_;
2525     my $has_src;
2526     for my $attr (@{$todo->{node}->attributes}) {
2527     my $attr_ns = $attr->namespace_uri;
2528     $attr_ns = '' unless defined $attr_ns;
2529     my $attr_ln = $attr->manakai_local_name;
2530     my $checker;
2531     if ($attr_ns eq '') {
2532     if ($attr_ln eq 'src') {
2533     $checker = $HTMLURIAttrChecker;
2534     $has_src = 1;
2535     } elsif ($attr_ln eq 'type') {
2536     $checker = $HTMLIMTAttrChecker;
2537     } else {
2538     ## TODO: height
2539     ## TODO: width
2540     $checker = $HTMLAttrChecker->{$attr_ln}
2541     || sub { }; ## NOTE: Any local attribute is ok.
2542     }
2543     }
2544     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2545     || $AttrChecker->{$attr_ns}->{''};
2546     if ($checker) {
2547     $checker->($self, $attr);
2548     } else {
2549     $self->{onerror}->(node => $attr, level => 'unsupported',
2550     type => 'attribute');
2551     ## ISSUE: No comformance createria for global attributes in the spec
2552     }
2553     }
2554    
2555     unless ($has_src) {
2556     $self->{onerror}->(node => $todo->{node},
2557     type => 'attribute missing:src');
2558     }
2559     },
2560 wakaba 1.25 checker => sub {
2561     my ($self, $todo) = @_;
2562     $todo->{flag}->{has_descendant}->{significant} = 1;
2563     return $HTMLEmptyChecker->($self, $todo);
2564     },
2565 wakaba 1.1 };
2566    
2567     $Element->{$HTML_NS}->{object} = {
2568     attrs_checker => sub {
2569     my ($self, $todo) = @_;
2570     $GetHTMLAttrsChecker->({
2571     data => $HTMLURIAttrChecker,
2572     type => $HTMLIMTAttrChecker,
2573     usemap => $HTMLUsemapAttrChecker,
2574     ## TODO: width
2575     ## TODO: height
2576     })->($self, $todo);
2577     unless ($todo->{node}->has_attribute_ns (undef, 'data')) {
2578     unless ($todo->{node}->has_attribute_ns (undef, 'type')) {
2579     $self->{onerror}->(node => $todo->{node},
2580     type => 'attribute missing:data|type');
2581     }
2582     }
2583     },
2584 wakaba 1.25 checker => sub {
2585     my ($self, $todo) = @_;
2586     $todo->{flag}->{has_descendant}->{significant} = 1;
2587     return $ElementDefault->{checker}->($self, $todo); ## TODO
2588     },
2589 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
2590 wakaba 1.1 };
2591    
2592     $Element->{$HTML_NS}->{param} = {
2593     attrs_checker => sub {
2594     my ($self, $todo) = @_;
2595     $GetHTMLAttrsChecker->({
2596     name => sub { },
2597     value => sub { },
2598     })->($self, $todo);
2599     unless ($todo->{node}->has_attribute_ns (undef, 'name')) {
2600     $self->{onerror}->(node => $todo->{node},
2601     type => 'attribute missing:name');
2602     }
2603     unless ($todo->{node}->has_attribute_ns (undef, 'value')) {
2604     $self->{onerror}->(node => $todo->{node},
2605     type => 'attribute missing:value');
2606     }
2607     },
2608     checker => $HTMLEmptyChecker,
2609     };
2610    
2611     $Element->{$HTML_NS}->{video} = {
2612     attrs_checker => $GetHTMLAttrsChecker->({
2613     src => $HTMLURIAttrChecker,
2614     ## TODO: start, loopstart, loopend, end
2615     ## ISSUE: they MUST be "value time offset"s. Value?
2616 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
2617 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2618     controls => $GetHTMLBooleanAttrChecker->('controls'),
2619 wakaba 1.11 poster => $HTMLURIAttrChecker, ## TODO: not for audio!
2620     ## TODO: width, height (not for audio!)
2621 wakaba 1.1 }),
2622     checker => sub {
2623     my ($self, $todo) = @_;
2624 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
2625 wakaba 1.1
2626     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2627     return $HTMLBlockOrInlineChecker->($self, $todo);
2628     } else {
2629     return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
2630     ->($self, $todo);
2631     }
2632     },
2633     };
2634    
2635     $Element->{$HTML_NS}->{audio} = {
2636     attrs_checker => $Element->{$HTML_NS}->{video}->{attrs_checker},
2637     checker => $Element->{$HTML_NS}->{video}->{checker},
2638     };
2639    
2640     $Element->{$HTML_NS}->{source} = {
2641     attrs_checker => sub {
2642     my ($self, $todo) = @_;
2643     $GetHTMLAttrsChecker->({
2644     src => $HTMLURIAttrChecker,
2645     type => $HTMLIMTAttrChecker,
2646     media => $HTMLMQAttrChecker,
2647     })->($self, $todo);
2648     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2649     $self->{onerror}->(node => $todo->{node},
2650     type => 'attribute missing:src');
2651     }
2652     },
2653     checker => $HTMLEmptyChecker,
2654     };
2655    
2656     $Element->{$HTML_NS}->{canvas} = {
2657     attrs_checker => $GetHTMLAttrsChecker->({
2658     height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2659     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2660     }),
2661 wakaba 1.25 checker => sub {
2662     my ($self, $todo) = @_;
2663     $todo->{flag}->{has_descendant}->{significant} = 1;
2664     return $HTMLInlineChecker->($self, $todo);
2665     },
2666 wakaba 1.1 };
2667    
2668     $Element->{$HTML_NS}->{map} = {
2669 wakaba 1.4 attrs_checker => sub {
2670     my ($self, $todo) = @_;
2671     my $has_id;
2672     $GetHTMLAttrsChecker->({
2673     id => sub {
2674     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2675     my ($self, $attr) = @_;
2676     my $value = $attr->value;
2677     if (length $value > 0) {
2678     if ($self->{id}->{$value}) {
2679     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2680     push @{$self->{id}->{$value}}, $attr;
2681     } else {
2682     $self->{id}->{$value} = [$attr];
2683     }
2684 wakaba 1.1 } else {
2685 wakaba 1.4 ## NOTE: MUST contain at least one character
2686     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2687 wakaba 1.1 }
2688 wakaba 1.4 if ($value =~ /[\x09-\x0D\x20]/) {
2689     $self->{onerror}->(node => $attr, type => 'space in ID');
2690     }
2691     $self->{map}->{$value} ||= $attr;
2692     $has_id = 1;
2693     },
2694     })->($self, $todo);
2695     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:id')
2696     unless $has_id;
2697     },
2698 wakaba 1.1 checker => $HTMLBlockChecker,
2699     };
2700    
2701     $Element->{$HTML_NS}->{area} = {
2702     attrs_checker => sub {
2703     my ($self, $todo) = @_;
2704     my %attr;
2705     my $coords;
2706     for my $attr (@{$todo->{node}->attributes}) {
2707     my $attr_ns = $attr->namespace_uri;
2708     $attr_ns = '' unless defined $attr_ns;
2709     my $attr_ln = $attr->manakai_local_name;
2710     my $checker;
2711     if ($attr_ns eq '') {
2712     $checker = {
2713     alt => sub { },
2714     ## NOTE: |alt| value has no conformance creteria.
2715     shape => $GetHTMLEnumeratedAttrChecker->({
2716     circ => -1, circle => 1,
2717     default => 1,
2718     poly => 1, polygon => -1,
2719     rect => 1, rectangle => -1,
2720     }),
2721     coords => sub {
2722     my ($self, $attr) = @_;
2723     my $value = $attr->value;
2724     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
2725     $coords = [split /,/, $value];
2726     } else {
2727     $self->{onerror}->(node => $attr,
2728     type => 'coords:syntax error');
2729     }
2730     },
2731     target => $HTMLTargetAttrChecker,
2732     href => $HTMLURIAttrChecker,
2733     ping => $HTMLSpaceURIsAttrChecker,
2734 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
2735 wakaba 1.1 media => $HTMLMQAttrChecker,
2736     hreflang => $HTMLLanguageTagAttrChecker,
2737     type => $HTMLIMTAttrChecker,
2738     }->{$attr_ln};
2739     if ($checker) {
2740     $attr{$attr_ln} = $attr;
2741     } else {
2742     $checker = $HTMLAttrChecker->{$attr_ln};
2743     }
2744     }
2745     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2746     || $AttrChecker->{$attr_ns}->{''};
2747     if ($checker) {
2748     $checker->($self, $attr) if ref $checker;
2749     } else {
2750     $self->{onerror}->(node => $attr, level => 'unsupported',
2751     type => 'attribute');
2752     ## ISSUE: No comformance createria for unknown attributes in the spec
2753     }
2754     }
2755    
2756     if (defined $attr{href}) {
2757 wakaba 1.4 $self->{has_hyperlink_element} = 1;
2758 wakaba 1.1 unless (defined $attr{alt}) {
2759     $self->{onerror}->(node => $todo->{node},
2760     type => 'attribute missing:alt');
2761     }
2762     } else {
2763     for (qw/target ping rel media hreflang type alt/) {
2764     if (defined $attr{$_}) {
2765     $self->{onerror}->(node => $attr{$_},
2766     type => 'attribute not allowed');
2767     }
2768     }
2769     }
2770    
2771     my $shape = 'rectangle';
2772     if (defined $attr{shape}) {
2773     $shape = {
2774     circ => 'circle', circle => 'circle',
2775     default => 'default',
2776     poly => 'polygon', polygon => 'polygon',
2777     rect => 'rectangle', rectangle => 'rectangle',
2778     }->{lc $attr{shape}->value} || 'rectangle';
2779     ## TODO: ASCII lowercase?
2780     }
2781    
2782     if ($shape eq 'circle') {
2783     if (defined $attr{coords}) {
2784     if (defined $coords) {
2785     if (@$coords == 3) {
2786     if ($coords->[2] < 0) {
2787     $self->{onerror}->(node => $attr{coords},
2788     type => 'coords:out of range:2');
2789     }
2790     } else {
2791     $self->{onerror}->(node => $attr{coords},
2792     type => 'coords:number:3:'.@$coords);
2793     }
2794     } else {
2795     ## NOTE: A syntax error has been reported.
2796     }
2797     } else {
2798     $self->{onerror}->(node => $todo->{node},
2799     type => 'attribute missing:coords');
2800     }
2801     } elsif ($shape eq 'default') {
2802     if (defined $attr{coords}) {
2803     $self->{onerror}->(node => $attr{coords},
2804     type => 'attribute not allowed');
2805     }
2806     } elsif ($shape eq 'polygon') {
2807     if (defined $attr{coords}) {
2808     if (defined $coords) {
2809     if (@$coords >= 6) {
2810     unless (@$coords % 2 == 0) {
2811     $self->{onerror}->(node => $attr{coords},
2812     type => 'coords:number:even:'.@$coords);
2813     }
2814     } else {
2815     $self->{onerror}->(node => $attr{coords},
2816     type => 'coords:number:>=6:'.@$coords);
2817     }
2818     } else {
2819     ## NOTE: A syntax error has been reported.
2820     }
2821     } else {
2822     $self->{onerror}->(node => $todo->{node},
2823     type => 'attribute missing:coords');
2824     }
2825     } elsif ($shape eq 'rectangle') {
2826     if (defined $attr{coords}) {
2827     if (defined $coords) {
2828     if (@$coords == 4) {
2829     unless ($coords->[0] < $coords->[2]) {
2830     $self->{onerror}->(node => $attr{coords},
2831     type => 'coords:out of range:0');
2832     }
2833     unless ($coords->[1] < $coords->[3]) {
2834     $self->{onerror}->(node => $attr{coords},
2835     type => 'coords:out of range:1');
2836     }
2837     } else {
2838     $self->{onerror}->(node => $attr{coords},
2839     type => 'coords:number:4:'.@$coords);
2840     }
2841     } else {
2842     ## NOTE: A syntax error has been reported.
2843     }
2844     } else {
2845     $self->{onerror}->(node => $todo->{node},
2846     type => 'attribute missing:coords');
2847     }
2848     }
2849     },
2850     checker => $HTMLEmptyChecker,
2851     };
2852     ## TODO: only in map
2853    
2854     $Element->{$HTML_NS}->{table} = {
2855     attrs_checker => $GetHTMLAttrsChecker->({}),
2856     checker => sub {
2857     my ($self, $todo) = @_;
2858     my $el = $todo->{node};
2859     my $new_todos = [];
2860     my @nodes = (@{$el->child_nodes});
2861    
2862     my $phase = 'before caption';
2863     my $has_tfoot;
2864     while (@nodes) {
2865     my $node = shift @nodes;
2866     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2867    
2868     my $nt = $node->node_type;
2869     if ($nt == 1) {
2870     my $node_ns = $node->namespace_uri;
2871     $node_ns = '' unless defined $node_ns;
2872     my $node_ln = $node->manakai_local_name;
2873     ## NOTE: |minuses| list is not checked since redundant
2874 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2875     #
2876     } elsif ($phase eq 'in tbodys') {
2877 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2878     #$phase = 'in tbodys';
2879     } elsif (not $has_tfoot and
2880     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2881     $phase = 'after tfoot';
2882     $has_tfoot = 1;
2883     } else {
2884     $self->{onerror}->(node => $node, type => 'element not allowed');
2885     }
2886     } elsif ($phase eq 'in trs') {
2887     if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2888     #$phase = 'in trs';
2889     } elsif (not $has_tfoot and
2890     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2891     $phase = 'after tfoot';
2892     $has_tfoot = 1;
2893     } else {
2894     $self->{onerror}->(node => $node, type => 'element not allowed');
2895     }
2896     } elsif ($phase eq 'after thead') {
2897     if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2898     $phase = 'in tbodys';
2899     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2900     $phase = 'in trs';
2901     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2902     $phase = 'in tbodys';
2903     $has_tfoot = 1;
2904     } else {
2905     $self->{onerror}->(node => $node, type => 'element not allowed');
2906     }
2907     } elsif ($phase eq 'in colgroup') {
2908     if ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2909     $phase = 'in colgroup';
2910     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2911     $phase = 'after thead';
2912     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2913     $phase = 'in tbodys';
2914     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2915     $phase = 'in trs';
2916     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2917     $phase = 'in tbodys';
2918     $has_tfoot = 1;
2919     } else {
2920     $self->{onerror}->(node => $node, type => 'element not allowed');
2921     }
2922     } elsif ($phase eq 'before caption') {
2923     if ($node_ns eq $HTML_NS and $node_ln eq 'caption') {
2924     $phase = 'in colgroup';
2925     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2926     $phase = 'in colgroup';
2927     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2928     $phase = 'after thead';
2929     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2930     $phase = 'in tbodys';
2931     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2932     $phase = 'in trs';
2933     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2934     $phase = 'in tbodys';
2935     $has_tfoot = 1;
2936     } else {
2937     $self->{onerror}->(node => $node, type => 'element not allowed');
2938     }
2939     } else { # after tfoot
2940     $self->{onerror}->(node => $node, type => 'element not allowed');
2941     }
2942     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2943     unshift @nodes, @$sib;
2944     push @$new_todos, @$ch;
2945     } elsif ($nt == 3 or $nt == 4) {
2946     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2947     $self->{onerror}->(node => $node, type => 'character not allowed');
2948 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
2949 wakaba 1.1 }
2950     } elsif ($nt == 5) {
2951     unshift @nodes, @{$node->child_nodes};
2952     }
2953     }
2954    
2955     ## Table model errors
2956     require Whatpm::HTMLTable;
2957     Whatpm::HTMLTable->form_table ($todo->{node}, sub {
2958     my %opt = @_;
2959     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
2960     });
2961     push @{$self->{return}->{table}}, $todo->{node};
2962    
2963     return ($new_todos);
2964     },
2965     };
2966    
2967     $Element->{$HTML_NS}->{caption} = {
2968     attrs_checker => $GetHTMLAttrsChecker->({}),
2969 wakaba 1.13 checker => $HTMLStrictlyInlineChecker,
2970 wakaba 1.1 };
2971    
2972     $Element->{$HTML_NS}->{colgroup} = {
2973     attrs_checker => $GetHTMLAttrsChecker->({
2974     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2975     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
2976     ## TODO: "attribute not supported" if |col|.
2977     ## ISSUE: MUST NOT if any |col|?
2978     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
2979     }),
2980     checker => sub {
2981     my ($self, $todo) = @_;
2982     my $el = $todo->{node};
2983     my $new_todos = [];
2984     my @nodes = (@{$el->child_nodes});
2985    
2986     while (@nodes) {
2987     my $node = shift @nodes;
2988     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2989    
2990     my $nt = $node->node_type;
2991     if ($nt == 1) {
2992     my $node_ns = $node->namespace_uri;
2993     $node_ns = '' unless defined $node_ns;
2994     my $node_ln = $node->manakai_local_name;
2995     ## NOTE: |minuses| list is not checked since redundant
2996 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2997     #
2998     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'col')) {
2999 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
3000     }
3001     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3002     unshift @nodes, @$sib;
3003     push @$new_todos, @$ch;
3004     } elsif ($nt == 3 or $nt == 4) {
3005     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3006     $self->{onerror}->(node => $node, type => 'character not allowed');
3007 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3008 wakaba 1.1 }
3009     } elsif ($nt == 5) {
3010     unshift @nodes, @{$node->child_nodes};
3011     }
3012     }
3013     return ($new_todos);
3014     },
3015     };
3016    
3017     $Element->{$HTML_NS}->{col} = {
3018     attrs_checker => $GetHTMLAttrsChecker->({
3019     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3020     }),
3021     checker => $HTMLEmptyChecker,
3022     };
3023    
3024     $Element->{$HTML_NS}->{tbody} = {
3025     attrs_checker => $GetHTMLAttrsChecker->({}),
3026     checker => sub {
3027     my ($self, $todo) = @_;
3028     my $el = $todo->{node};
3029     my $new_todos = [];
3030     my @nodes = (@{$el->child_nodes});
3031    
3032     my $has_tr;
3033     while (@nodes) {
3034     my $node = shift @nodes;
3035     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3036    
3037     my $nt = $node->node_type;
3038     if ($nt == 1) {
3039     my $node_ns = $node->namespace_uri;
3040     $node_ns = '' unless defined $node_ns;
3041     my $node_ln = $node->manakai_local_name;
3042     ## NOTE: |minuses| list is not checked since redundant
3043 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3044     #
3045     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
3046 wakaba 1.1 $has_tr = 1;
3047     } else {
3048     $self->{onerror}->(node => $node, type => 'element not allowed');
3049     }
3050     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3051     unshift @nodes, @$sib;
3052     push @$new_todos, @$ch;
3053     } elsif ($nt == 3 or $nt == 4) {
3054     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3055     $self->{onerror}->(node => $node, type => 'character not allowed');
3056 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3057 wakaba 1.1 }
3058     } elsif ($nt == 5) {
3059     unshift @nodes, @{$node->child_nodes};
3060     }
3061     }
3062     unless ($has_tr) {
3063     $self->{onerror}->(node => $el, type => 'child element missing:tr');
3064     }
3065     return ($new_todos);
3066     },
3067     };
3068    
3069     $Element->{$HTML_NS}->{thead} = {
3070     attrs_checker => $GetHTMLAttrsChecker->({}),
3071     checker => $Element->{$HTML_NS}->{tbody}->{checker},
3072     };
3073    
3074     $Element->{$HTML_NS}->{tfoot} = {
3075     attrs_checker => $GetHTMLAttrsChecker->({}),
3076     checker => $Element->{$HTML_NS}->{tbody}->{checker},
3077     };
3078    
3079     $Element->{$HTML_NS}->{tr} = {
3080     attrs_checker => $GetHTMLAttrsChecker->({}),
3081     checker => sub {
3082     my ($self, $todo) = @_;
3083     my $el = $todo->{node};
3084     my $new_todos = [];
3085     my @nodes = (@{$el->child_nodes});
3086    
3087     my $has_td;
3088     while (@nodes) {
3089     my $node = shift @nodes;
3090     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3091    
3092     my $nt = $node->node_type;
3093     if ($nt == 1) {
3094     my $node_ns = $node->namespace_uri;
3095     $node_ns = '' unless defined $node_ns;
3096     my $node_ln = $node->manakai_local_name;
3097     ## NOTE: |minuses| list is not checked since redundant
3098 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3099     #
3100     } elsif ($node_ns eq $HTML_NS and
3101     ($node_ln eq 'td' or $node_ln eq 'th')) {
3102 wakaba 1.1 $has_td = 1;
3103     } else {
3104     $self->{onerror}->(node => $node, type => 'element not allowed');
3105     }
3106     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3107     unshift @nodes, @$sib;
3108     push @$new_todos, @$ch;
3109     } elsif ($nt == 3 or $nt == 4) {
3110     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3111     $self->{onerror}->(node => $node, type => 'character not allowed');
3112 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3113 wakaba 1.1 }
3114     } elsif ($nt == 5) {
3115     unshift @nodes, @{$node->child_nodes};
3116     }
3117     }
3118     unless ($has_td) {
3119     $self->{onerror}->(node => $el, type => 'child element missing:td|th');
3120     }
3121     return ($new_todos);
3122     },
3123     };
3124    
3125     $Element->{$HTML_NS}->{td} = {
3126     attrs_checker => $GetHTMLAttrsChecker->({
3127     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3128     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3129     }),
3130     checker => $HTMLBlockOrInlineChecker,
3131     };
3132    
3133     $Element->{$HTML_NS}->{th} = {
3134     attrs_checker => $GetHTMLAttrsChecker->({
3135     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3136     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3137     scope => $GetHTMLEnumeratedAttrChecker
3138     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
3139     }),
3140     checker => $HTMLBlockOrInlineChecker,
3141     };
3142    
3143     ## TODO: forms
3144 wakaba 1.8 ## TODO: Tests for <nest/> in form elements
3145 wakaba 1.1
3146     $Element->{$HTML_NS}->{script} = {
3147 wakaba 1.9 attrs_checker => $GetHTMLAttrsChecker->({
3148 wakaba 1.1 src => $HTMLURIAttrChecker,
3149     defer => $GetHTMLBooleanAttrChecker->('defer'),
3150     async => $GetHTMLBooleanAttrChecker->('async'),
3151     type => $HTMLIMTAttrChecker,
3152 wakaba 1.9 }),
3153 wakaba 1.1 checker => sub {
3154     my ($self, $todo) = @_;
3155    
3156     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
3157     return $HTMLEmptyChecker->($self, $todo);
3158     } else {
3159     ## NOTE: No content model conformance in HTML5 spec.
3160     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
3161     my $language = $todo->{node}->get_attribute_ns (undef, 'language');
3162     if ((defined $type and $type eq '') or
3163     (defined $language and $language eq '')) {
3164     $type = 'text/javascript';
3165     } elsif (defined $type) {
3166     #
3167     } elsif (defined $language) {
3168     $type = 'text/' . $language;
3169     } else {
3170     $type = 'text/javascript';
3171     }
3172     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
3173     type => 'script:'.$type); ## TODO: $type normalization
3174     return $AnyChecker->($self, $todo);
3175     }
3176     },
3177     };
3178 wakaba 1.25 ## ISSUE: Significant check and text child node
3179 wakaba 1.1
3180     ## NOTE: When script is disabled.
3181     $Element->{$HTML_NS}->{noscript} = {
3182 wakaba 1.3 attrs_checker => sub {
3183     my ($self, $todo) = @_;
3184    
3185     ## NOTE: This check is inserted in |attrs_checker|, rather than |checker|,
3186     ## since the later is not invoked when the |noscript| is used as a
3187     ## transparent element.
3188     unless ($todo->{node}->owner_document->manakai_is_html) {
3189     $self->{onerror}->(node => $todo->{node}, type => 'in XML:noscript');
3190     }
3191    
3192     $GetHTMLAttrsChecker->({})->($self, $todo);
3193     },
3194 wakaba 1.1 checker => sub {
3195     my ($self, $todo) = @_;
3196    
3197 wakaba 1.3 if ($todo->{flag}->{in_head}) {
3198     my $new_todos = [];
3199     my @nodes = (@{$todo->{node}->child_nodes});
3200    
3201     while (@nodes) {
3202     my $node = shift @nodes;
3203     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3204    
3205     my $nt = $node->node_type;
3206     if ($nt == 1) {
3207     my $node_ns = $node->namespace_uri;
3208     $node_ns = '' unless defined $node_ns;
3209     my $node_ln = $node->manakai_local_name;
3210 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3211     #
3212     } elsif ($node_ns eq $HTML_NS) {
3213 wakaba 1.3 if ({link => 1, style => 1}->{$node_ln}) {
3214     #
3215     } elsif ($node_ln eq 'meta') {
3216 wakaba 1.5 if ($node->has_attribute_ns (undef, 'name')) {
3217     #
3218 wakaba 1.3 } else {
3219 wakaba 1.5 $self->{onerror}->(node => $node,
3220     type => 'element not allowed');
3221 wakaba 1.3 }
3222     } else {
3223     $self->{onerror}->(node => $node, type => 'element not allowed');
3224     }
3225     } else {
3226     $self->{onerror}->(node => $node, type => 'element not allowed');
3227     }
3228    
3229     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3230     unshift @nodes, @$sib;
3231     push @$new_todos, @$ch;
3232     } elsif ($nt == 3 or $nt == 4) {
3233     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3234     $self->{onerror}->(node => $node, type => 'character not allowed');
3235 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3236 wakaba 1.3 }
3237     } elsif ($nt == 5) {
3238     unshift @nodes, @{$node->child_nodes};
3239     }
3240     }
3241     return ($new_todos);
3242     } else {
3243     my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}});
3244     my ($sib, $ch) = $HTMLBlockOrInlineChecker->($self, $todo);
3245     push @$sib, $end;
3246     return ($sib, $ch);
3247     }
3248 wakaba 1.1 },
3249     };
3250 wakaba 1.3
3251     ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
3252 wakaba 1.1
3253     $Element->{$HTML_NS}->{'event-source'} = {
3254     attrs_checker => $GetHTMLAttrsChecker->({
3255     src => $HTMLURIAttrChecker,
3256     }),
3257     checker => $HTMLEmptyChecker,
3258     };
3259    
3260     $Element->{$HTML_NS}->{details} = {
3261     attrs_checker => $GetHTMLAttrsChecker->({
3262     open => $GetHTMLBooleanAttrChecker->('open'),
3263     }),
3264     checker => sub {
3265     my ($self, $todo) = @_;
3266    
3267     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
3268     my ($sib, $ch)
3269     = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
3270     ->($self, $todo);
3271     push @$sib, $end;
3272     return ($sib, $ch);
3273     },
3274     };
3275    
3276     $Element->{$HTML_NS}->{datagrid} = {
3277     attrs_checker => $GetHTMLAttrsChecker->({
3278     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3279     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
3280     }),
3281     checker => sub {
3282     my ($self, $todo) = @_;
3283     my $el = $todo->{node};
3284     my $new_todos = [];
3285     my @nodes = (@{$el->child_nodes});
3286    
3287 wakaba 1.25 my $old_values = {significant =>
3288     $todo->{flag}->{has_descendant}->{significant}};
3289     $todo->{flag}->{has_descendant}->{significant} = 0;
3290    
3291 wakaba 1.1 my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
3292    
3293     ## Block-table Block* | table | select | datalist | Empty
3294     my $mode = 'any';
3295     while (@nodes) {
3296     my $node = shift @nodes;
3297     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3298    
3299     my $nt = $node->node_type;
3300     if ($nt == 1) {
3301     my $node_ns = $node->namespace_uri;
3302     $node_ns = '' unless defined $node_ns;
3303     my $node_ln = $node->manakai_local_name;
3304     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
3305 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3306     #
3307     } elsif ($mode eq 'block') {
3308 wakaba 1.1 $not_allowed = 1
3309     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
3310     } elsif ($mode eq 'any') {
3311     if ($node_ns eq $HTML_NS and
3312     {table => 1, select => 1, datalist => 1}->{$node_ln}) {
3313     $mode = 'none';
3314     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
3315     $mode = 'block';
3316     } else {
3317     $not_allowed = 1;
3318     }
3319     } else {
3320     $not_allowed = 1;
3321     }
3322     $self->{onerror}->(node => $node, type => 'element not allowed')
3323     if $not_allowed;
3324     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3325     unshift @nodes, @$sib;
3326     push @$new_todos, @$ch;
3327     } elsif ($nt == 3 or $nt == 4) {
3328     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3329     $self->{onerror}->(node => $node, type => 'character not allowed');
3330 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3331 wakaba 1.1 }
3332     } elsif ($nt == 5) {
3333     unshift @nodes, @{$node->child_nodes};
3334     }
3335     }
3336    
3337     push @$new_todos, $end;
3338 wakaba 1.25
3339     push @$new_todos, {
3340     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
3341     old_values => $old_values,
3342 wakaba 1.26 errors => $HTMLSignificantContentErrors,
3343 wakaba 1.25 };
3344    
3345 wakaba 1.1 return ($new_todos);
3346     },
3347     };
3348    
3349     $Element->{$HTML_NS}->{command} = {
3350     attrs_checker => $GetHTMLAttrsChecker->({
3351     checked => $GetHTMLBooleanAttrChecker->('checked'),
3352     default => $GetHTMLBooleanAttrChecker->('default'),
3353     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3354     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
3355     icon => $HTMLURIAttrChecker,
3356     label => sub { }, ## NOTE: No conformance creteria
3357     radiogroup => sub { }, ## NOTE: No conformance creteria
3358     ## NOTE: |title| has special semantics, but no syntactical difference
3359     type => sub {
3360     my ($self, $attr) = @_;
3361     my $value = $attr->value;
3362     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
3363     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
3364     }
3365     },
3366     }),
3367     checker => $HTMLEmptyChecker,
3368     };
3369    
3370     $Element->{$HTML_NS}->{menu} = {
3371     attrs_checker => $GetHTMLAttrsChecker->({
3372     autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
3373     id => sub {
3374     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
3375     my ($self, $attr) = @_;
3376     my $value = $attr->value;
3377     if (length $value > 0) {
3378     if ($self->{id}->{$value}) {
3379     $self->{onerror}->(node => $attr, type => 'duplicate ID');
3380     push @{$self->{id}->{$value}}, $attr;
3381     } else {
3382     $self->{id}->{$value} = [$attr];
3383     }
3384     } else {
3385     ## NOTE: MUST contain at least one character
3386     $self->{onerror}->(node => $attr, type => 'empty attribute value');
3387     }
3388     if ($value =~ /[\x09-\x0D\x20]/) {
3389     $self->{onerror}->(node => $attr, type => 'space in ID');
3390     }
3391     $self->{menu}->{$value} ||= $attr;
3392     ## ISSUE: <menu id=""><p contextmenu=""> match?
3393     },
3394     label => sub { }, ## NOTE: No conformance creteria
3395     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
3396     }),
3397     checker => sub {
3398     my ($self, $todo) = @_;
3399     my $el = $todo->{node};
3400     my $new_todos = [];
3401     my @nodes = (@{$el->child_nodes});
3402 wakaba 1.25
3403     my $old_values = {significant =>
3404     $todo->{flag}->{has_descendant}->{significant}};
3405     $todo->{flag}->{has_descendant}->{significant} = 0;
3406 wakaba 1.1
3407     my $content = 'li or inline';
3408     while (@nodes) {
3409     my $node = shift @nodes;
3410     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3411    
3412     my $nt = $node->node_type;
3413     if ($nt == 1) {
3414     my $node_ns = $node->namespace_uri;
3415     $node_ns = '' unless defined $node_ns;
3416     my $node_ln = $node->manakai_local_name;
3417     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
3418 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3419     #
3420     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'li') {
3421 wakaba 1.1 if ($content eq 'inline') {
3422     $not_allowed = 1;
3423     } elsif ($content eq 'li or inline') {
3424     $content = 'li';
3425     }
3426     } else {
3427     if ($HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
3428     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln}) {
3429     $content = 'inline';
3430     } else {
3431     $not_allowed = 1;
3432     }
3433     }
3434     $self->{onerror}->(node => $node, type => 'element not allowed')
3435     if $not_allowed;
3436     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3437     unshift @nodes, @$sib;
3438     push @$new_todos, @$ch;
3439     } elsif ($nt == 3 or $nt == 4) {
3440     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3441     if ($content eq 'li') {
3442     $self->{onerror}->(node => $node, type => 'character not allowed');
3443     } elsif ($content eq 'li or inline') {
3444     $content = 'inline';
3445     }
3446 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3447 wakaba 1.1 }
3448     } elsif ($nt == 5) {
3449     unshift @nodes, @{$node->child_nodes};
3450     }
3451     }
3452    
3453     for (@$new_todos) {
3454     $_->{inline} = 1;
3455     }
3456 wakaba 1.25
3457     push @$new_todos, {
3458     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
3459     old_values => $old_values,
3460 wakaba 1.26 errors => $HTMLSignificantContentErrors,
3461 wakaba 1.25 };
3462    
3463 wakaba 1.1 return ($new_todos);
3464     },
3465 wakaba 1.8 };
3466    
3467     $Element->{$HTML_NS}->{datatemplate} = {
3468     attrs_checker => $GetHTMLAttrsChecker->({}),
3469     checker => sub {
3470     my ($self, $todo) = @_;
3471     my $el = $todo->{node};
3472     my $new_todos = [];
3473     my @nodes = (@{$el->child_nodes});
3474    
3475     while (@nodes) {
3476     my $node = shift @nodes;
3477     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3478    
3479     my $nt = $node->node_type;
3480     if ($nt == 1) {
3481     my $node_ns = $node->namespace_uri;
3482     $node_ns = '' unless defined $node_ns;
3483     my $node_ln = $node->manakai_local_name;
3484     ## NOTE: |minuses| list is not checked since redundant
3485     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3486     #
3487     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'rule')) {
3488     $self->{onerror}->(node => $node,
3489     type => 'element not allowed:datatemplate');
3490     }
3491     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3492     unshift @nodes, @$sib;
3493     push @$new_todos, @$ch;
3494     } elsif ($nt == 3 or $nt == 4) {
3495     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3496     $self->{onerror}->(node => $node, type => 'character not allowed');
3497 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3498 wakaba 1.8 }
3499     } elsif ($nt == 5) {
3500     unshift @nodes, @{$node->child_nodes};
3501     }
3502     }
3503     return ($new_todos);
3504     },
3505     is_xml_root => 1,
3506     };
3507    
3508     $Element->{$HTML_NS}->{rule} = {
3509     attrs_checker => $GetHTMLAttrsChecker->({
3510 wakaba 1.23 condition => $HTMLSelectorsAttrChecker,
3511 wakaba 1.18 mode => $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker,
3512 wakaba 1.8 }),
3513     checker => sub {
3514     my ($self, $todo) = @_;
3515    
3516     my $end = $self->_add_pluses ({$HTML_NS => {nest => 1}});
3517 wakaba 1.25 my ($sib, $ch) = $HTMLAnyChecker->($self, $todo);
3518 wakaba 1.8 push @$sib, $end;
3519     return ($sib, $ch);
3520     },
3521     ## NOTE: "MAY be anything that, when the parent |datatemplate|
3522     ## is applied to some conforming data, results in a conforming DOM tree.":
3523     ## We don't check against this.
3524     };
3525    
3526     $Element->{$HTML_NS}->{nest} = {
3527     attrs_checker => $GetHTMLAttrsChecker->({
3528 wakaba 1.23 filter => $HTMLSelectorsAttrChecker,
3529     mode => sub {
3530     my ($self, $attr) = @_;
3531     my $value = $attr->value;
3532     if ($value !~ /\A[^\x09-\x0D\x20]+\z/) {
3533     $self->{onerror}->(node => $attr, type => 'mode:syntax error');
3534     }
3535     },
3536 wakaba 1.8 }),
3537     checker => $HTMLEmptyChecker,
3538 wakaba 1.1 };
3539    
3540     $Element->{$HTML_NS}->{legend} = {
3541     attrs_checker => $GetHTMLAttrsChecker->({}),
3542     checker => sub {
3543     my ($self, $todo) = @_;
3544    
3545     my $parent = $todo->{node}->manakai_parent_element;
3546     if (defined $parent) {
3547     my $nsuri = $parent->namespace_uri;
3548     $nsuri = '' unless defined $nsuri;
3549     my $ln = $parent->manakai_local_name;
3550     if ($nsuri eq $HTML_NS and $ln eq 'figure') {
3551     return $HTMLInlineChecker->($self, $todo);
3552     } else {
3553 wakaba 1.13 return $HTMLStrictlyInlineChecker->($self, $todo);
3554 wakaba 1.1 }
3555     } else {
3556     return $HTMLInlineChecker->($self, $todo);
3557     }
3558    
3559     ## ISSUE: Content model is defined only for fieldset/legend,
3560     ## details/legend, and figure/legend.
3561     },
3562     };
3563    
3564     $Element->{$HTML_NS}->{div} = {
3565     attrs_checker => $GetHTMLAttrsChecker->({}),
3566     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
3567     };
3568    
3569     $Element->{$HTML_NS}->{font} = {
3570     attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
3571     checker => $HTMLTransparentChecker,
3572     };
3573    
3574     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
3575    
3576     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24