/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.27 - (hide annotations) (download)
Sat Feb 9 11:58:16 2008 UTC (17 years, 5 months ago) by wakaba
Branch: MAIN
Changes since 1.26: +42 -5 lines
++ whatpm/Whatpm/ChangeLog	9 Feb 2008 11:57:29 -0000
2008-02-09  Wakaba  <wakaba@suika.fam.cx>

	* ContentChecker.pm (_get_css_parser): New.

++ whatpm/Whatpm/ContentChecker/ChangeLog	9 Feb 2008 11:58:10 -0000
2008-02-09  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm (<style>): Initial version of CSS validation support.

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7     my $HTMLMetadataElements = {
8     $HTML_NS => {
9     qw/link 1 meta 1 style 1 script 1 event-source 1 command 1 base 1 title 1
10 wakaba 1.8 noscript 1 datatemplate 1
11 wakaba 1.1 /,
12     },
13     };
14    
15     my $HTMLSectioningElements = {
16     $HTML_NS => {qw/body 1 section 1 nav 1 article 1 blockquote 1 aside 1/},
17     };
18    
19     my $HTMLBlockLevelElements = {
20     $HTML_NS => {
21     qw/
22     section 1 nav 1 article 1 blockquote 1 aside 1
23     h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1
24     address 1 p 1 hr 1 dialog 1 pre 1 ol 1 ul 1 dl 1
25     ins 1 del 1 figure 1 map 1 table 1 script 1 noscript 1
26     event-source 1 details 1 datagrid 1 menu 1 div 1 font 1
27 wakaba 1.8 datatemplate 1
28 wakaba 1.1 /,
29     },
30     };
31    
32     my $HTMLStrictlyInlineLevelElements = {
33     $HTML_NS => {
34     qw/
35     br 1 a 1 q 1 cite 1 em 1 strong 1 small 1 m 1 dfn 1 abbr 1
36     time 1 meter 1 progress 1 code 1 var 1 samp 1 kbd 1
37     sub 1 sup 1 span 1 i 1 b 1 bdo 1 ins 1 del 1 img 1
38     iframe 1 embed 1 object 1 video 1 audio 1 canvas 1 area 1
39     script 1 noscript 1 event-source 1 command 1 font 1
40     /,
41     },
42     };
43    
44     my $HTMLStructuredInlineLevelElements = {
45     $HTML_NS => {qw/blockquote 1 pre 1 ol 1 ul 1 dl 1 table 1 menu 1/},
46     };
47    
48     my $HTMLInteractiveElements = {
49     $HTML_NS => {a => 1, details => 1, datagrid => 1},
50     };
51     ## NOTE: |html:a| and |html:datagrid| are not allowed as a descendant
52     ## of interactive elements
53    
54     # my $HTMLTransparentElements : in |Whatpm/ContentChecker.pm|.
55    
56     #my $HTMLSemiTransparentElements = {
57     # $HTML_NS => {qw/video 1 audio 1/},
58     #};
59    
60     my $HTMLEmbededElements = {
61     $HTML_NS => {qw/img 1 iframe 1 embed 1 object 1 video 1 audio 1 canvas 1/},
62     };
63 wakaba 1.25 ## NOTE: When an element is added to this list, make sure that
64     ## the element's checker set |has_descendant| flag for |significant| content
65     ## as true.
66    
67 wakaba 1.26 my $HTMLSignificantContentErrors = {
68     significant => sub {
69     my ($self, $todo) = @_;
70     $self->{onerror}->(node => $todo->{node},
71     level => $self->{should_level},
72     type => 'no significant content');
73     },
74     }; # $HTMLSignificantContentErrors
75    
76 wakaba 1.25 our $AnyChecker;
77     my $HTMLAnyChecker = sub {
78     my ($self, $todo) = @_;
79    
80     my $old_values = {significant =>
81     $todo->{flag}->{has_descendant}->{significant}};
82     $todo->{flag}->{has_descendant}->{significant} = 0;
83    
84     my ($new_todos) = $AnyChecker->($self, $todo);
85    
86     push @$new_todos, {
87     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
88     old_values => $old_values,
89 wakaba 1.26 errors => $HTMLSignificantContentErrors,
90 wakaba 1.25 };
91    
92     return ($new_todos);
93     }; # $HTMLAnyChecker
94 wakaba 1.1
95     ## Empty
96     my $HTMLEmptyChecker = sub {
97     my ($self, $todo) = @_;
98     my $el = $todo->{node};
99     my $new_todos = [];
100     my @nodes = (@{$el->child_nodes});
101    
102     while (@nodes) {
103     my $node = shift @nodes;
104     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
105    
106     my $nt = $node->node_type;
107     if ($nt == 1) {
108 wakaba 1.8 my $node_ns = $node->namespace_uri;
109     $node_ns = '' unless defined $node_ns;
110     my $node_ln = $node->manakai_local_name;
111     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
112     #
113     } else {
114     ## NOTE: |minuses| list is not checked since redundant
115     $self->{onerror}->(node => $node, type => 'element not allowed');
116     }
117 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
118     unshift @nodes, @$sib;
119     push @$new_todos, @$ch;
120     } elsif ($nt == 3 or $nt == 4) {
121     if ($node->data =~ /[^\x09-\x0D\x20]/) {
122     $self->{onerror}->(node => $node, type => 'character not allowed');
123 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
124 wakaba 1.1 }
125     } elsif ($nt == 5) {
126     unshift @nodes, @{$node->child_nodes};
127     }
128     }
129     return ($new_todos);
130     };
131    
132     ## Text
133     my $HTMLTextChecker = sub {
134     my ($self, $todo) = @_;
135     my $el = $todo->{node};
136     my $new_todos = [];
137     my @nodes = (@{$el->child_nodes});
138    
139 wakaba 1.25 my $old_values = {significant =>
140     $todo->{flag}->{has_descendant}->{significant}};
141     $todo->{flag}->{has_descendant}->{significant} = 0;
142    
143 wakaba 1.1 while (@nodes) {
144     my $node = shift @nodes;
145     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
146    
147     my $nt = $node->node_type;
148     if ($nt == 1) {
149 wakaba 1.8 my $node_ns = $node->namespace_uri;
150     $node_ns = '' unless defined $node_ns;
151     my $node_ln = $node->manakai_local_name;
152     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
153     #
154     } else {
155     ## NOTE: |minuses| list is not checked since redundant
156     $self->{onerror}->(node => $node, type => 'element not allowed');
157     }
158 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
159     unshift @nodes, @$sib;
160     push @$new_todos, @$ch;
161 wakaba 1.25 } elsif ($nt == 3 or $nt == 4) {
162     if ($node->data =~ /[^\x09-\x0D\x20]/) {
163     $todo->{flag}->{has_descendant}->{significant} = 1;
164     }
165 wakaba 1.1 } elsif ($nt == 5) {
166     unshift @nodes, @{$node->child_nodes};
167     }
168     }
169 wakaba 1.25
170     push @$new_todos, {
171     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
172     old_values => $old_values,
173 wakaba 1.26 errors => $HTMLSignificantContentErrors,
174 wakaba 1.25 };
175    
176 wakaba 1.1 return ($new_todos);
177     };
178    
179     ## Zero or more |html:style| elements,
180     ## followed by zero or more block-level elements
181     my $HTMLStylableBlockChecker = sub {
182     my ($self, $todo) = @_;
183     my $el = $todo->{node};
184     my $new_todos = [];
185     my @nodes = (@{$el->child_nodes});
186 wakaba 1.25
187     my $old_values = {significant =>
188     $todo->{flag}->{has_descendant}->{significant}};
189     $todo->{flag}->{has_descendant}->{significant} = 0;
190 wakaba 1.1
191     my $has_non_style;
192     while (@nodes) {
193     my $node = shift @nodes;
194     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
195    
196     my $nt = $node->node_type;
197     if ($nt == 1) {
198     my $node_ns = $node->namespace_uri;
199     $node_ns = '' unless defined $node_ns;
200     my $node_ln = $node->manakai_local_name;
201     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
202     if ($node_ns eq $HTML_NS and $node_ln eq 'style') {
203     $not_allowed = 1 if $has_non_style or
204     not $node->has_attribute_ns (undef, 'scoped');
205     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
206     $has_non_style = 1;
207 wakaba 1.8 } elsif ($self->{pluses}->{$node_ns}->{$node_ln}) {
208     #
209 wakaba 1.1 } else {
210     $has_non_style = 1;
211     $not_allowed = 1;
212     }
213     $self->{onerror}->(node => $node, type => 'element not allowed')
214     if $not_allowed;
215     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
216     unshift @nodes, @$sib;
217     push @$new_todos, @$ch;
218     } elsif ($nt == 3 or $nt == 4) {
219     if ($node->data =~ /[^\x09-\x0D\x20]/) {
220     $self->{onerror}->(node => $node, type => 'character not allowed');
221 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
222 wakaba 1.1 }
223     } elsif ($nt == 5) {
224     unshift @nodes, @{$node->child_nodes};
225     }
226     }
227 wakaba 1.25
228     push @$new_todos, {
229     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
230     old_values => $old_values,
231 wakaba 1.26 errors => $HTMLSignificantContentErrors,
232 wakaba 1.25 };
233    
234 wakaba 1.1 return ($new_todos);
235     }; # $HTMLStylableBlockChecker
236    
237     ## Zero or more block-level elements
238     my $HTMLBlockChecker = sub {
239     my ($self, $todo) = @_;
240     my $el = $todo->{node};
241     my $new_todos = [];
242     my @nodes = (@{$el->child_nodes});
243    
244 wakaba 1.25 my $old_values = {significant =>
245     $todo->{flag}->{has_descendant}->{significant}};
246     $todo->{flag}->{has_descendant}->{significant} = 0;
247    
248 wakaba 1.1 while (@nodes) {
249     my $node = shift @nodes;
250     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
251    
252     my $nt = $node->node_type;
253     if ($nt == 1) {
254     my $node_ns = $node->namespace_uri;
255     $node_ns = '' unless defined $node_ns;
256     my $node_ln = $node->manakai_local_name;
257     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
258     $not_allowed = 1
259 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
260     $self->{pluses}->{$node_ns}->{$node_ln};
261 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
262     if $not_allowed;
263     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
264     unshift @nodes, @$sib;
265     push @$new_todos, @$ch;
266     } elsif ($nt == 3 or $nt == 4) {
267     if ($node->data =~ /[^\x09-\x0D\x20]/) {
268     $self->{onerror}->(node => $node, type => 'character not allowed');
269 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
270 wakaba 1.1 }
271     } elsif ($nt == 5) {
272     unshift @nodes, @{$node->child_nodes};
273     }
274     }
275 wakaba 1.25
276     push @$new_todos, {
277     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
278     old_values => $old_values,
279 wakaba 1.26 errors => $HTMLSignificantContentErrors,
280 wakaba 1.25 };
281    
282 wakaba 1.1 return ($new_todos);
283     }; # $HTMLBlockChecker
284    
285     ## Inline-level content
286     my $HTMLInlineChecker = sub {
287     my ($self, $todo) = @_;
288     my $el = $todo->{node};
289     my $new_todos = [];
290     my @nodes = (@{$el->child_nodes});
291    
292 wakaba 1.25 my $old_values = {significant =>
293     $todo->{flag}->{has_descendant}->{significant}};
294     $todo->{flag}->{has_descendant}->{significant} = 0;
295    
296 wakaba 1.1 while (@nodes) {
297     my $node = shift @nodes;
298     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
299    
300     my $nt = $node->node_type;
301     if ($nt == 1) {
302     my $node_ns = $node->namespace_uri;
303     $node_ns = '' unless defined $node_ns;
304     my $node_ln = $node->manakai_local_name;
305     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
306     $not_allowed = 1
307     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
308 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
309     $self->{pluses}->{$node_ns}->{$node_ln};
310 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
311     if $not_allowed;
312     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
313     unshift @nodes, @$sib;
314     push @$new_todos, @$ch;
315 wakaba 1.25 } elsif ($nt == 3 or $nt == 4) {
316     if ($node->data =~ /[^\x09-\x0D\x20]/) {
317     $todo->{flag}->{has_descendant}->{significant} = 1;
318     }
319 wakaba 1.1 } elsif ($nt == 5) {
320     unshift @nodes, @{$node->child_nodes};
321     }
322     }
323    
324     for (@$new_todos) {
325     $_->{inline} = 1;
326     }
327 wakaba 1.25
328     push @$new_todos, {
329     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
330     old_values => $old_values,
331 wakaba 1.26 errors => $HTMLSignificantContentErrors,
332 wakaba 1.25 };
333    
334 wakaba 1.1 return ($new_todos);
335     }; # $HTMLInlineChecker
336    
337     ## Strictly inline-level content
338     my $HTMLStrictlyInlineChecker = sub {
339     my ($self, $todo) = @_;
340     my $el = $todo->{node};
341     my $new_todos = [];
342     my @nodes = (@{$el->child_nodes});
343 wakaba 1.25
344     my $old_values = {significant =>
345     $todo->{flag}->{has_descendant}->{significant}};
346     $todo->{flag}->{has_descendant}->{significant} = 0;
347 wakaba 1.1
348     while (@nodes) {
349     my $node = shift @nodes;
350     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
351    
352     my $nt = $node->node_type;
353     if ($nt == 1) {
354     my $node_ns = $node->namespace_uri;
355     $node_ns = '' unless defined $node_ns;
356     my $node_ln = $node->manakai_local_name;
357     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
358     $not_allowed = 1
359 wakaba 1.8 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
360     $self->{pluses}->{$node_ns}->{$node_ln};
361 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
362     if $not_allowed;
363     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
364     unshift @nodes, @$sib;
365     push @$new_todos, @$ch;
366 wakaba 1.25 } elsif ($nt == 3 or $nt == 4) {
367     if ($node->data =~ /[^\x09-\x0D\x20]/) {
368     $todo->{flag}->{has_descendant}->{significant} = 1;
369     }
370 wakaba 1.1 } elsif ($nt == 5) {
371     unshift @nodes, @{$node->child_nodes};
372     }
373     }
374    
375     for (@$new_todos) {
376     $_->{inline} = 1;
377     $_->{strictly_inline} = 1;
378     }
379 wakaba 1.25
380     push @$new_todos, {
381     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
382     old_values => $old_values,
383 wakaba 1.26 errors => $HTMLSignificantContentErrors,
384 wakaba 1.25 };
385    
386 wakaba 1.1 return ($new_todos);
387     }; # $HTMLStrictlyInlineChecker
388    
389 wakaba 1.8 ## Inline-level or strictly inline-level content
390 wakaba 1.1 my $HTMLInlineOrStrictlyInlineChecker = sub {
391     my ($self, $todo) = @_;
392     my $el = $todo->{node};
393     my $new_todos = [];
394     my @nodes = (@{$el->child_nodes});
395 wakaba 1.25
396     my $old_values = {significant =>
397     $todo->{flag}->{has_descendant}->{significant}};
398     $todo->{flag}->{has_descendant}->{significant} = 0;
399 wakaba 1.1
400     while (@nodes) {
401     my $node = shift @nodes;
402     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
403    
404     my $nt = $node->node_type;
405     if ($nt == 1) {
406     my $node_ns = $node->namespace_uri;
407     $node_ns = '' unless defined $node_ns;
408     my $node_ln = $node->manakai_local_name;
409     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
410     if ($todo->{strictly_inline}) {
411     $not_allowed = 1
412 wakaba 1.8 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
413     $self->{pluses}->{$node_ns}->{$node_ln};
414 wakaba 1.1 } else {
415     $not_allowed = 1
416     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
417 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
418     $self->{pluses}->{$node_ns}->{$node_ln};
419 wakaba 1.1 }
420     $self->{onerror}->(node => $node, type => 'element not allowed')
421     if $not_allowed;
422     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
423     unshift @nodes, @$sib;
424     push @$new_todos, @$ch;
425 wakaba 1.25 } elsif ($nt == 3 or $nt == 4) {
426     if ($node->data =~ /[^\x09-\x0D\x20]/) {
427     $todo->{flag}->{has_descendant}->{significant} = 1;
428     }
429 wakaba 1.1 } elsif ($nt == 5) {
430     unshift @nodes, @{$node->child_nodes};
431     }
432     }
433    
434     for (@$new_todos) {
435     $_->{inline} = 1;
436     $_->{strictly_inline} = 1;
437     }
438 wakaba 1.25
439     push @$new_todos, {
440     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
441     old_values => $old_values,
442 wakaba 1.26 errors => $HTMLSignificantContentErrors,
443 wakaba 1.25 };
444    
445 wakaba 1.1 return ($new_todos);
446     }; # $HTMLInlineOrStrictlyInlineChecker
447    
448 wakaba 1.8 ## Block-level content or inline-level content (i.e. bimorphic content model)
449 wakaba 1.1 my $HTMLBlockOrInlineChecker = sub {
450     my ($self, $todo) = @_;
451     my $el = $todo->{node};
452     my $new_todos = [];
453     my @nodes = (@{$el->child_nodes});
454 wakaba 1.25
455     my $old_values = {significant =>
456     $todo->{flag}->{has_descendant}->{significant}};
457     $todo->{flag}->{has_descendant}->{significant} = 0;
458 wakaba 1.1
459     my $content = 'block-or-inline'; # or 'block' or 'inline'
460     my @block_not_inline;
461     while (@nodes) {
462     my $node = shift @nodes;
463     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
464    
465 wakaba 1.8 ## ISSUE: It is unclear whether "<rule><div><p/><nest/></div></rule>"
466     ## is conforming or not.
467    
468 wakaba 1.1 my $nt = $node->node_type;
469     if ($nt == 1) {
470     my $node_ns = $node->namespace_uri;
471     $node_ns = '' unless defined $node_ns;
472     my $node_ln = $node->manakai_local_name;
473     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
474     if ($content eq 'block') {
475     $not_allowed = 1
476 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
477     $self->{pluses}->{$node_ns}->{$node_ln};
478 wakaba 1.1 } elsif ($content eq 'inline') {
479     $not_allowed = 1
480     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
481 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
482     $self->{pluses}->{$node_ns}->{$node_ln};
483 wakaba 1.1 } else {
484     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
485     my $is_inline
486     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
487     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
488    
489     push @block_not_inline, $node
490     if $is_block and not $is_inline and not $not_allowed;
491 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
492 wakaba 1.1 $content = 'inline';
493     for (@block_not_inline) {
494     $self->{onerror}->(node => $_, type => 'element not allowed');
495     }
496     $not_allowed = 1 unless $is_inline;
497     }
498     }
499     $self->{onerror}->(node => $node, type => 'element not allowed')
500     if $not_allowed;
501     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
502     unshift @nodes, @$sib;
503     push @$new_todos, @$ch;
504     } elsif ($nt == 3 or $nt == 4) {
505     if ($node->data =~ /[^\x09-\x0D\x20]/) {
506     if ($content eq 'block') {
507     $self->{onerror}->(node => $node, type => 'character not allowed');
508     } else {
509     $content = 'inline';
510     for (@block_not_inline) {
511     $self->{onerror}->(node => $_, type => 'element not allowed');
512     }
513     }
514 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
515 wakaba 1.1 }
516     } elsif ($nt == 5) {
517     unshift @nodes, @{$node->child_nodes};
518     }
519     }
520    
521     if ($content eq 'inline') {
522     for (@$new_todos) {
523     $_->{inline} = 1;
524     }
525     }
526 wakaba 1.25
527     push @$new_todos, {
528     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
529     old_values => $old_values,
530 wakaba 1.26 errors => $HTMLSignificantContentErrors,
531 wakaba 1.25 };
532    
533 wakaba 1.1 return ($new_todos);
534     };
535    
536     ## Zero or more XXX element, then either block-level or inline-level
537     my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) {
538     my ($elnsuri, $ellname) = @_;
539     return sub {
540     my ($self, $todo) = @_;
541     my $el = $todo->{node};
542     my $new_todos = [];
543     my @nodes = (@{$el->child_nodes});
544 wakaba 1.25
545     my $old_values = {significant =>
546     $todo->{flag}->{has_descendant}->{significant}};
547     $todo->{flag}->{has_descendant}->{significant} = 0;
548 wakaba 1.1
549     my $has_non_style;
550     my $content = 'block-or-inline'; # or 'block' or 'inline'
551     my @block_not_inline;
552     while (@nodes) {
553     my $node = shift @nodes;
554     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
555    
556     my $nt = $node->node_type;
557     if ($nt == 1) {
558     my $node_ns = $node->namespace_uri;
559     $node_ns = '' unless defined $node_ns;
560     my $node_ln = $node->manakai_local_name;
561     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
562     if ($node_ns eq $elnsuri and $node_ln eq $ellname) {
563     $not_allowed = 1 if $has_non_style;
564     if ($ellname eq 'style' and
565     not $node->has_attribute_ns (undef, 'scoped')) {
566     $not_allowed = 1;
567     }
568     } elsif ($content eq 'block') {
569     $has_non_style = 1;
570     $not_allowed = 1
571 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
572     $self->{pluses}->{$node_ns}->{$node_ln};
573 wakaba 1.1 } elsif ($content eq 'inline') {
574     $has_non_style = 1;
575     $not_allowed = 1
576     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
577 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
578     $self->{pluses}->{$node_ns}->{$node_ln};
579 wakaba 1.1 } else {
580 wakaba 1.8 $has_non_style = 1 unless $self->{pluses}->{$node_ns}->{$node_ln};
581 wakaba 1.1 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
582     my $is_inline
583     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
584     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
585    
586     push @block_not_inline, $node
587     if $is_block and not $is_inline and not $not_allowed;
588 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
589 wakaba 1.1 $content = 'inline';
590     for (@block_not_inline) {
591     $self->{onerror}->(node => $_, type => 'element not allowed');
592     }
593     $not_allowed = 1 unless $is_inline;
594     }
595     }
596     $self->{onerror}->(node => $node, type => 'element not allowed')
597     if $not_allowed;
598     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
599     unshift @nodes, @$sib;
600     push @$new_todos, @$ch;
601     } elsif ($nt == 3 or $nt == 4) {
602     if ($node->data =~ /[^\x09-\x0D\x20]/) {
603     $has_non_style = 1;
604     if ($content eq 'block') {
605     $self->{onerror}->(node => $node, type => 'character not allowed');
606     } else {
607     $content = 'inline';
608     for (@block_not_inline) {
609     $self->{onerror}->(node => $_, type => 'element not allowed');
610     }
611     }
612 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
613 wakaba 1.1 }
614     } elsif ($nt == 5) {
615     unshift @nodes, @{$node->child_nodes};
616     }
617     }
618    
619     if ($content eq 'inline') {
620     for (@$new_todos) {
621     $_->{inline} = 1;
622     }
623     }
624 wakaba 1.25
625     push @$new_todos, {
626     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
627     old_values => $old_values,
628 wakaba 1.26 errors => $HTMLSignificantContentErrors,
629 wakaba 1.25 };
630    
631 wakaba 1.1 return ($new_todos);
632     };
633     }; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker
634    
635     my $HTMLTransparentChecker = $HTMLBlockOrInlineChecker;
636 wakaba 1.25 ## ISSUE: Significant content rule should be applied to transparent element
637     ## with parent? Currently, applied to |video| but not to others.
638 wakaba 1.1
639     our $AttrChecker;
640    
641     my $GetHTMLEnumeratedAttrChecker = sub {
642     my $states = shift; # {value => conforming ? 1 : -1}
643     return sub {
644     my ($self, $attr) = @_;
645     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
646     if ($states->{$value} > 0) {
647     #
648     } elsif ($states->{$value}) {
649     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
650     } else {
651     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
652     }
653     };
654     }; # $GetHTMLEnumeratedAttrChecker
655    
656     my $GetHTMLBooleanAttrChecker = sub {
657     my $local_name = shift;
658     return sub {
659     my ($self, $attr) = @_;
660     my $value = $attr->value;
661     unless ($value eq $local_name or $value eq '') {
662     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
663     }
664     };
665     }; # $GetHTMLBooleanAttrChecker
666    
667 wakaba 1.8 ## Unordered set of space-separated tokens
668 wakaba 1.18 my $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
669 wakaba 1.8 my ($self, $attr) = @_;
670     my %word;
671     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
672     unless ($word{$word}) {
673     $word{$word} = 1;
674     } else {
675     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
676     }
677     }
678 wakaba 1.18 }; # $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
679 wakaba 1.8
680 wakaba 1.1 ## |rel| attribute (unordered set of space separated tokens,
681     ## whose allowed values are defined by the section on link types)
682     my $HTMLLinkTypesAttrChecker = sub {
683 wakaba 1.4 my ($a_or_area, $todo, $self, $attr) = @_;
684 wakaba 1.1 my %word;
685     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
686     unless ($word{$word}) {
687     $word{$word} = 1;
688 wakaba 1.18 } elsif ($word eq 'up') {
689     #
690 wakaba 1.1 } else {
691     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
692     }
693     }
694     ## NOTE: Case sensitive match (since HTML5 spec does not say link
695     ## types are case-insensitive and it says "The value should not
696     ## be confusingly similar to any other defined value (e.g.
697     ## differing only in case).").
698     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
699     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
700     ## values to be used conformingly.
701     require Whatpm::_LinkTypeList;
702     our $LinkType;
703     for my $word (keys %word) {
704     my $def = $LinkType->{$word};
705     if (defined $def) {
706     if ($def->{status} eq 'accepted') {
707     if (defined $def->{effect}->[$a_or_area]) {
708     #
709     } else {
710     $self->{onerror}->(node => $attr,
711     type => 'link type:bad context:'.$word);
712     }
713     } elsif ($def->{status} eq 'proposal') {
714     $self->{onerror}->(node => $attr, level => 's',
715     type => 'link type:proposed:'.$word);
716 wakaba 1.20 if (defined $def->{effect}->[$a_or_area]) {
717     #
718     } else {
719     $self->{onerror}->(node => $attr,
720     type => 'link type:bad context:'.$word);
721     }
722 wakaba 1.1 } else { # rejected or synonym
723     $self->{onerror}->(node => $attr,
724     type => 'link type:non-conforming:'.$word);
725     }
726 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
727     if ($word eq 'alternate') {
728     #
729     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
730     $todo->{has_hyperlink_link_type} = 1;
731     }
732     }
733 wakaba 1.1 if ($def->{unique}) {
734     unless ($self->{has_link_type}->{$word}) {
735     $self->{has_link_type}->{$word} = 1;
736     } else {
737     $self->{onerror}->(node => $attr,
738     type => 'link type:duplicate:'.$word);
739     }
740     }
741     } else {
742     $self->{onerror}->(node => $attr, level => 'unsupported',
743     type => 'link type:'.$word);
744     }
745     }
746 wakaba 1.4 $todo->{has_hyperlink_link_type} = 1
747     if $word{alternate} and not $word{stylesheet};
748 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
749     ## says that using both X-Pingback: header field and HTML
750     ## <link rel=pingback> is deprecated and if both appears they
751     ## SHOULD contain exactly the same value.
752     ## ISSUE: Pingback 1.0 specification defines the exact representation
753     ## of its link element, which cannot be tested by the current arch.
754     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
755     ## include any string that matches to the pattern for the rel=pingback link,
756     ## which again inpossible to test.
757     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
758 wakaba 1.12
759     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
760 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
761     ## then they SHOULD be described in different paragraphs.".
762 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
763 wakaba 1.20
764     ## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status."
765 wakaba 1.1
766     ## URI (or IRI)
767     my $HTMLURIAttrChecker = sub {
768     my ($self, $attr) = @_;
769     ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
770     my $value = $attr->value;
771     Whatpm::URIChecker->check_iri_reference ($value, sub {
772     my %opt = @_;
773     $self->{onerror}->(node => $attr, level => $opt{level},
774     type => 'URI::'.$opt{type}.
775     (defined $opt{position} ? ':'.$opt{position} : ''));
776     });
777 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
778 wakaba 1.1 }; # $HTMLURIAttrChecker
779    
780     ## A space separated list of one or more URIs (or IRIs)
781     my $HTMLSpaceURIsAttrChecker = sub {
782     my ($self, $attr) = @_;
783     my $i = 0;
784     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
785     Whatpm::URIChecker->check_iri_reference ($value, sub {
786     my %opt = @_;
787     $self->{onerror}->(node => $attr, level => $opt{level},
788 wakaba 1.2 type => 'URIs:'.':'.
789     $opt{type}.':'.$i.
790 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
791     });
792     $i++;
793     }
794     ## ISSUE: Relative references?
795     ## ISSUE: Leading or trailing white spaces are conformant?
796     ## ISSUE: A sequence of white space characters are conformant?
797     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
798     ## NOTE: Duplication seems not an error.
799 wakaba 1.4 $self->{has_uri_attr} = 1;
800 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
801    
802     my $HTMLDatetimeAttrChecker = sub {
803     my ($self, $attr) = @_;
804     my $value = $attr->value;
805     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
806     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
807     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
808     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
809     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
810     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
811     if $d < 1 or
812     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
813     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
814     if $M == 2 and $d == 29 and
815     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
816     } else {
817     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
818     }
819     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
820     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
821     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
822     if defined $s and $s > 59;
823     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
824     if $zh > 23;
825     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
826     if $zm > 59;
827     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
828     } else {
829     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
830     }
831     }; # $HTMLDatetimeAttrChecker
832    
833     my $HTMLIntegerAttrChecker = sub {
834     my ($self, $attr) = @_;
835     my $value = $attr->value;
836     unless ($value =~ /\A-?[0-9]+\z/) {
837     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
838     }
839     }; # $HTMLIntegerAttrChecker
840    
841     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
842     my $range_check = shift;
843     return sub {
844     my ($self, $attr) = @_;
845     my $value = $attr->value;
846     if ($value =~ /\A[0-9]+\z/) {
847     unless ($range_check->($value + 0)) {
848     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
849     }
850     } else {
851     $self->{onerror}->(node => $attr,
852     type => 'nninteger:syntax error');
853     }
854     };
855     }; # $GetHTMLNonNegativeIntegerAttrChecker
856    
857     my $GetHTMLFloatingPointNumberAttrChecker = sub {
858     my $range_check = shift;
859     return sub {
860     my ($self, $attr) = @_;
861     my $value = $attr->value;
862     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
863     unless ($range_check->($value + 0)) {
864     $self->{onerror}->(node => $attr, type => 'float:out of range');
865     }
866     } else {
867     $self->{onerror}->(node => $attr,
868     type => 'float:syntax error');
869     }
870     };
871     }; # $GetHTMLFloatingPointNumberAttrChecker
872    
873     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
874     ## ISSUE: RFC 2046 does not define syntax of media types.
875     ## ISSUE: The definition of "a valid MIME type" is unknown.
876     ## Syntactical correctness?
877     my $HTMLIMTAttrChecker = sub {
878     my ($self, $attr) = @_;
879     my $value = $attr->value;
880     ## ISSUE: RFC 2045 Content-Type header field allows insertion
881     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
882     ## ISSUE: RFC 2231 extension? Maybe no.
883     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
884     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
885     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
886     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
887     my @type = ($1, $2);
888     my $param = $3;
889     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
890     if (defined $2) {
891     push @type, $1 => $2;
892     } else {
893     my $n = $1;
894     my $v = $2;
895     $v =~ s/\\(.)/$1/gs;
896     push @type, $n => $v;
897     }
898     }
899     require Whatpm::IMTChecker;
900     Whatpm::IMTChecker->check_imt (sub {
901     my %opt = @_;
902     $self->{onerror}->(node => $attr, level => $opt{level},
903     type => 'IMT:'.$opt{type});
904     }, @type);
905     } else {
906     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
907     }
908     }; # $HTMLIMTAttrChecker
909    
910     my $HTMLLanguageTagAttrChecker = sub {
911 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
912    
913 wakaba 1.1 my ($self, $attr) = @_;
914 wakaba 1.6 my $value = $attr->value;
915     require Whatpm::LangTag;
916     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
917     my %opt = @_;
918     my $type = 'LangTag:'.$opt{type};
919     $type .= ':' . $opt{subtag} if defined $opt{subtag};
920     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
921     level => $opt{level});
922     });
923 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
924 wakaba 1.6
925     ## TODO: testdata
926 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
927    
928     ## "A valid media query [MQ]"
929     my $HTMLMQAttrChecker = sub {
930     my ($self, $attr) = @_;
931     $self->{onerror}->(node => $attr, level => 'unsupported',
932     type => 'media query');
933     ## ISSUE: What is "a valid media query"?
934     }; # $HTMLMQAttrChecker
935    
936     my $HTMLEventHandlerAttrChecker = sub {
937     my ($self, $attr) = @_;
938     $self->{onerror}->(node => $attr, level => 'unsupported',
939     type => 'event handler');
940     ## TODO: MUST contain valid ECMAScript code matching the
941     ## ECMAScript |FunctionBody| production. [ECMA262]
942     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
943     ## ISSUE: Automatic semicolon insertion does not apply?
944     ## ISSUE: Other script languages?
945     }; # $HTMLEventHandlerAttrChecker
946    
947     my $HTMLUsemapAttrChecker = sub {
948     my ($self, $attr) = @_;
949     ## MUST be a valid hashed ID reference to a |map| element
950     my $value = $attr->value;
951     if ($value =~ s/^#//) {
952     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
953     push @{$self->{usemap}}, [$value => $attr];
954     } else {
955     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
956     }
957     ## NOTE: Space characters in hashed ID references are conforming.
958     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
959     }; # $HTMLUsemapAttrChecker
960    
961     my $HTMLTargetAttrChecker = sub {
962     my ($self, $attr) = @_;
963     my $value = $attr->value;
964     if ($value =~ /^_/) {
965     $value = lc $value; ## ISSUE: ASCII case-insentitive?
966     unless ({
967     _self => 1, _parent => 1, _top => 1,
968     }->{$value}) {
969     $self->{onerror}->(node => $attr,
970     type => 'reserved browsing context name');
971     }
972     } else {
973     #$ ISSUE: An empty string is conforming?
974     }
975     }; # $HTMLTargetAttrChecker
976    
977 wakaba 1.23 my $HTMLSelectorsAttrChecker = sub {
978     my ($self, $attr) = @_;
979    
980     ## ISSUE: Namespace resolution?
981    
982     my $value = $attr->value;
983    
984     require Whatpm::CSS::SelectorsParser;
985     my $p = Whatpm::CSS::SelectorsParser->new;
986     $p->{pseudo_class}->{$_} = 1 for qw/
987     active checked disabled empty enabled first-child first-of-type
988     focus hover indeterminate last-child last-of-type link only-child
989     only-of-type root target visited
990     lang nth-child nth-last-child nth-of-type nth-last-of-type not
991     -manakai-contains -manakai-current
992     /;
993    
994     $p->{pseudo_element}->{$_} = 1 for qw/
995     after before first-letter first-line
996     /;
997    
998     $p->{must_level} = $self->{must_level};
999     $p->{onerror} = sub {
1000     my %opt = @_;
1001     $opt{type} = 'selectors:'.$opt{type};
1002     $self->{onerror}->(%opt, node => $attr);
1003     };
1004     $p->parse_string ($value);
1005     }; # $HTMLSelectorsAttrChecker
1006    
1007 wakaba 1.1 my $HTMLAttrChecker = {
1008     id => sub {
1009     ## NOTE: |map| has its own variant of |id=""| checker
1010     my ($self, $attr) = @_;
1011     my $value = $attr->value;
1012     if (length $value > 0) {
1013     if ($self->{id}->{$value}) {
1014     $self->{onerror}->(node => $attr, type => 'duplicate ID');
1015     push @{$self->{id}->{$value}}, $attr;
1016     } else {
1017     $self->{id}->{$value} = [$attr];
1018     }
1019     if ($value =~ /[\x09-\x0D\x20]/) {
1020     $self->{onerror}->(node => $attr, type => 'space in ID');
1021     }
1022     } else {
1023     ## NOTE: MUST contain at least one character
1024     $self->{onerror}->(node => $attr, type => 'empty attribute value');
1025     }
1026     },
1027     title => sub {}, ## NOTE: No conformance creteria
1028     lang => sub {
1029     my ($self, $attr) = @_;
1030 wakaba 1.6 my $value = $attr->value;
1031     if ($value eq '') {
1032     #
1033     } else {
1034     require Whatpm::LangTag;
1035     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
1036     my %opt = @_;
1037     my $type = 'LangTag:'.$opt{type};
1038     $type .= ':' . $opt{subtag} if defined $opt{subtag};
1039     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
1040     level => $opt{level});
1041     });
1042     }
1043 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
1044     unless ($attr->owner_document->manakai_is_html) {
1045     $self->{onerror}->(node => $attr, type => 'in XML:lang');
1046     }
1047 wakaba 1.6
1048     ## TODO: test data
1049 wakaba 1.1 },
1050     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
1051     class => sub {
1052     my ($self, $attr) = @_;
1053     my %word;
1054     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
1055     unless ($word{$word}) {
1056     $word{$word} = 1;
1057     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
1058     } else {
1059     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
1060     }
1061     }
1062     },
1063     contextmenu => sub {
1064     my ($self, $attr) = @_;
1065     my $value = $attr->value;
1066     push @{$self->{contextmenu}}, [$value => $attr];
1067     ## ISSUE: "The value must be the ID of a menu element in the DOM."
1068     ## What is "in the DOM"? A menu Element node that is not part
1069     ## of the Document tree is in the DOM? A menu Element node that
1070     ## belong to another Document tree is in the DOM?
1071     },
1072     irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
1073 wakaba 1.8 tabindex => $HTMLIntegerAttrChecker
1074     ## TODO: ref, template, registrationmark
1075 wakaba 1.1 };
1076    
1077     for (qw/
1078     onabort onbeforeunload onblur onchange onclick oncontextmenu
1079     ondblclick ondrag ondragend ondragenter ondragleave ondragover
1080     ondragstart ondrop onerror onfocus onkeydown onkeypress
1081     onkeyup onload onmessage onmousedown onmousemove onmouseout
1082     onmouseover onmouseup onmousewheel onresize onscroll onselect
1083     onsubmit onunload
1084     /) {
1085     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
1086     }
1087    
1088     my $GetHTMLAttrsChecker = sub {
1089     my $element_specific_checker = shift;
1090     return sub {
1091     my ($self, $todo) = @_;
1092     for my $attr (@{$todo->{node}->attributes}) {
1093     my $attr_ns = $attr->namespace_uri;
1094     $attr_ns = '' unless defined $attr_ns;
1095     my $attr_ln = $attr->manakai_local_name;
1096     my $checker;
1097     if ($attr_ns eq '') {
1098     $checker = $element_specific_checker->{$attr_ln}
1099     || $HTMLAttrChecker->{$attr_ln};
1100     }
1101     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1102     || $AttrChecker->{$attr_ns}->{''};
1103     if ($checker) {
1104     $checker->($self, $attr, $todo);
1105     } else {
1106     $self->{onerror}->(node => $attr, level => 'unsupported',
1107     type => 'attribute');
1108     ## ISSUE: No comformance createria for unknown attributes in the spec
1109     }
1110     }
1111     };
1112     }; # $GetHTMLAttrsChecker
1113    
1114     our $Element;
1115     our $ElementDefault;
1116    
1117     $Element->{$HTML_NS}->{''} = {
1118     attrs_checker => $GetHTMLAttrsChecker->({}),
1119     checker => $ElementDefault->{checker},
1120     };
1121    
1122     $Element->{$HTML_NS}->{html} = {
1123     is_root => 1,
1124     attrs_checker => $GetHTMLAttrsChecker->({
1125 wakaba 1.16 manifest => $HTMLURIAttrChecker,
1126 wakaba 1.1 xmlns => sub {
1127     my ($self, $attr) = @_;
1128     my $value = $attr->value;
1129     unless ($value eq $HTML_NS) {
1130     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
1131     }
1132     unless ($attr->owner_document->manakai_is_html) {
1133     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
1134     ## TODO: Test
1135     }
1136     },
1137     }),
1138     checker => sub {
1139     my ($self, $todo) = @_;
1140     my $el = $todo->{node};
1141     my $new_todos = [];
1142     my @nodes = (@{$el->child_nodes});
1143    
1144 wakaba 1.25 my $old_values = {significant =>
1145     $todo->{flag}->{has_descendant}->{significant}};
1146     $todo->{flag}->{has_descendant}->{significant} = 0;
1147    
1148 wakaba 1.1 my $phase = 'before head';
1149     while (@nodes) {
1150     my $node = shift @nodes;
1151     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1152    
1153     my $nt = $node->node_type;
1154     if ($nt == 1) {
1155     my $node_ns = $node->namespace_uri;
1156     $node_ns = '' unless defined $node_ns;
1157     my $node_ln = $node->manakai_local_name;
1158     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
1159 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1160     #
1161     } elsif ($phase eq 'before head') {
1162 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'head') {
1163     $phase = 'after head';
1164     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'body') {
1165     $self->{onerror}->(node => $node, type => 'ps element missing:head');
1166     $phase = 'after body';
1167     } else {
1168     $not_allowed = 1;
1169     # before head
1170     }
1171     } elsif ($phase eq 'after head') {
1172     if ($node_ns eq $HTML_NS and $node_ln eq 'body') {
1173     $phase = 'after body';
1174     } else {
1175     $not_allowed = 1;
1176     # after head
1177     }
1178     } else { #elsif ($phase eq 'after body') {
1179     $not_allowed = 1;
1180     # after body
1181     }
1182     $self->{onerror}->(node => $node, type => 'element not allowed')
1183     if $not_allowed;
1184     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1185     unshift @nodes, @$sib;
1186     push @$new_todos, @$ch;
1187     } elsif ($nt == 3 or $nt == 4) {
1188     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1189     $self->{onerror}->(node => $node, type => 'character not allowed');
1190 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
1191 wakaba 1.1 }
1192     } elsif ($nt == 5) {
1193     unshift @nodes, @{$node->child_nodes};
1194     }
1195     }
1196    
1197     if ($phase eq 'before head') {
1198     $self->{onerror}->(node => $el, type => 'child element missing:head');
1199     $self->{onerror}->(node => $el, type => 'child element missing:body');
1200     } elsif ($phase eq 'after head') {
1201     $self->{onerror}->(node => $el, type => 'child element missing:body');
1202     }
1203    
1204 wakaba 1.25 ## NOTE: Significant content check - this is performed here since
1205     ## |html| content model allows a block-level element - |body|.
1206     push @$new_todos, {
1207     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
1208     old_values => $old_values,
1209 wakaba 1.26 errors => $HTMLSignificantContentErrors,
1210 wakaba 1.25 };
1211    
1212 wakaba 1.1 return ($new_todos);
1213     },
1214     };
1215    
1216     $Element->{$HTML_NS}->{head} = {
1217     attrs_checker => $GetHTMLAttrsChecker->({}),
1218     checker => sub {
1219     my ($self, $todo) = @_;
1220     my $el = $todo->{node};
1221     my $new_todos = [];
1222     my @nodes = (@{$el->child_nodes});
1223    
1224     my $has_title;
1225     my $phase = 'initial'; # 'after charset', 'after base'
1226     while (@nodes) {
1227     my $node = shift @nodes;
1228     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1229    
1230     my $nt = $node->node_type;
1231     if ($nt == 1) {
1232     my $node_ns = $node->namespace_uri;
1233     $node_ns = '' unless defined $node_ns;
1234     my $node_ln = $node->manakai_local_name;
1235     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
1236 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1237     #
1238     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'title') {
1239 wakaba 1.1 $phase = 'after base';
1240     unless ($has_title) {
1241     $has_title = 1;
1242     } else {
1243     $not_allowed = 1;
1244     }
1245     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'meta') {
1246     if ($node->has_attribute_ns (undef, 'charset')) {
1247     if ($phase eq 'initial') {
1248     $phase = 'after charset';
1249     } else {
1250     $not_allowed = 1;
1251     ## NOTE: See also |base|'s "contexts" field in the spec
1252     }
1253 wakaba 1.5 } elsif ($node->has_attribute_ns (undef, 'name') or
1254     $node->has_attribute_ns (undef, 'http-equiv')) {
1255     $phase = 'after base';
1256 wakaba 1.1 } else {
1257     $phase = 'after base';
1258 wakaba 1.5 $not_allowed = 1;
1259 wakaba 1.1 }
1260     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'base') {
1261     if ($phase eq 'initial' or $phase eq 'after charset') {
1262     $phase = 'after base';
1263     } else {
1264     $not_allowed = 1;
1265     }
1266     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'style') {
1267     $phase = 'after base';
1268     if ($node->has_attribute_ns (undef, 'scoped')) {
1269     $not_allowed = 1;
1270     }
1271     } elsif ($HTMLMetadataElements->{$node_ns}->{$node_ln}) {
1272     $phase = 'after base';
1273     } else {
1274     $not_allowed = 1;
1275     }
1276     $self->{onerror}->(node => $node, type => 'element not allowed')
1277     if $not_allowed;
1278 wakaba 1.3 local $todo->{flag}->{in_head} = 1;
1279 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1280     unshift @nodes, @$sib;
1281     push @$new_todos, @$ch;
1282     } elsif ($nt == 3 or $nt == 4) {
1283     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1284     $self->{onerror}->(node => $node, type => 'character not allowed');
1285 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
1286 wakaba 1.1 }
1287     } elsif ($nt == 5) {
1288     unshift @nodes, @{$node->child_nodes};
1289     }
1290     }
1291     unless ($has_title) {
1292     $self->{onerror}->(node => $el, type => 'child element missing:title');
1293     }
1294     return ($new_todos);
1295     },
1296     };
1297    
1298     $Element->{$HTML_NS}->{title} = {
1299     attrs_checker => $GetHTMLAttrsChecker->({}),
1300     checker => $HTMLTextChecker,
1301     };
1302    
1303     $Element->{$HTML_NS}->{base} = {
1304 wakaba 1.4 attrs_checker => sub {
1305     my ($self, $todo) = @_;
1306    
1307 wakaba 1.14 my $has_href = $todo->{node}->has_attribute_ns (undef, 'href');
1308     my $has_target = $todo->{node}->has_attribute_ns (undef, 'target');
1309    
1310     if ($self->{has_uri_attr} and $has_href) {
1311 wakaba 1.4 ## ISSUE: Are these examples conforming?
1312     ## <head profile="a b c"><base href> (except for |profile|'s
1313     ## non-conformance)
1314     ## <title xml:base="relative"/><base href/> (maybe it should be)
1315     ## <unknown xmlns="relative"/><base href/> (assuming that
1316     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1317     ## <?xml-stylesheet href="relative"?>...<base href=""/>
1318     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1319     ## <style>@import 'relative';</style><base href>
1320     ## <script>location.href = 'relative';</script><base href>
1321 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
1322     ## an exception.
1323 wakaba 1.4 $self->{onerror}->(node => $todo->{node},
1324     type => 'basehref after URI attribute');
1325     }
1326 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
1327 wakaba 1.4 ## ISSUE: Are these examples conforming?
1328     ## <head><title xlink:href=""/><base target="name"/></head>
1329     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1330     ## (assuming that |xbl:xbl| is allowed before |base|)
1331     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1332     ## <link href=""/><base target="name"/>
1333     ## <link rel=unknown href=""><base target=name>
1334     $self->{onerror}->(node => $todo->{node},
1335     type => 'basetarget after hyperlink');
1336     }
1337    
1338 wakaba 1.14 if (not $has_href and not $has_target) {
1339     $self->{onerror}->(node => $todo->{node},
1340     type => 'attribute missing:href|target');
1341     }
1342    
1343 wakaba 1.4 return $GetHTMLAttrsChecker->({
1344     href => $HTMLURIAttrChecker,
1345     target => $HTMLTargetAttrChecker,
1346     })->($self, $todo);
1347     },
1348 wakaba 1.1 checker => $HTMLEmptyChecker,
1349     };
1350    
1351     $Element->{$HTML_NS}->{link} = {
1352     attrs_checker => sub {
1353     my ($self, $todo) = @_;
1354     $GetHTMLAttrsChecker->({
1355     href => $HTMLURIAttrChecker,
1356 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(0, $todo, @_) },
1357 wakaba 1.1 media => $HTMLMQAttrChecker,
1358     hreflang => $HTMLLanguageTagAttrChecker,
1359     type => $HTMLIMTAttrChecker,
1360     ## NOTE: Though |title| has special semantics,
1361     ## syntactically same as the |title| as global attribute.
1362     })->($self, $todo);
1363 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
1364     $self->{has_hyperlink_element} = 1 if $todo->{has_hyperlink_link_type};
1365     } else {
1366 wakaba 1.1 $self->{onerror}->(node => $todo->{node},
1367     type => 'attribute missing:href');
1368     }
1369     unless ($todo->{node}->has_attribute_ns (undef, 'rel')) {
1370     $self->{onerror}->(node => $todo->{node},
1371     type => 'attribute missing:rel');
1372     }
1373     },
1374     checker => $HTMLEmptyChecker,
1375     };
1376    
1377     $Element->{$HTML_NS}->{meta} = {
1378     attrs_checker => sub {
1379     my ($self, $todo) = @_;
1380     my $name_attr;
1381     my $http_equiv_attr;
1382     my $charset_attr;
1383     my $content_attr;
1384     for my $attr (@{$todo->{node}->attributes}) {
1385     my $attr_ns = $attr->namespace_uri;
1386     $attr_ns = '' unless defined $attr_ns;
1387     my $attr_ln = $attr->manakai_local_name;
1388     my $checker;
1389     if ($attr_ns eq '') {
1390     if ($attr_ln eq 'content') {
1391     $content_attr = $attr;
1392     $checker = 1;
1393     } elsif ($attr_ln eq 'name') {
1394     $name_attr = $attr;
1395     $checker = 1;
1396     } elsif ($attr_ln eq 'http-equiv') {
1397     $http_equiv_attr = $attr;
1398     $checker = 1;
1399     } elsif ($attr_ln eq 'charset') {
1400     $charset_attr = $attr;
1401     $checker = 1;
1402     } else {
1403     $checker = $HTMLAttrChecker->{$attr_ln}
1404     || $AttrChecker->{$attr_ns}->{$attr_ln}
1405     || $AttrChecker->{$attr_ns}->{''};
1406     }
1407     } else {
1408     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1409     || $AttrChecker->{$attr_ns}->{''};
1410     }
1411     if ($checker) {
1412     $checker->($self, $attr) if ref $checker;
1413     } else {
1414     $self->{onerror}->(node => $attr, level => 'unsupported',
1415     type => 'attribute');
1416     ## ISSUE: No comformance createria for unknown attributes in the spec
1417     }
1418     }
1419    
1420     if (defined $name_attr) {
1421     if (defined $http_equiv_attr) {
1422     $self->{onerror}->(node => $http_equiv_attr,
1423     type => 'attribute not allowed');
1424     } elsif (defined $charset_attr) {
1425     $self->{onerror}->(node => $charset_attr,
1426     type => 'attribute not allowed');
1427     }
1428     my $metadata_name = $name_attr->value;
1429     my $metadata_value;
1430     if (defined $content_attr) {
1431     $metadata_value = $content_attr->value;
1432     } else {
1433     $self->{onerror}->(node => $todo->{node},
1434     type => 'attribute missing:content');
1435     $metadata_value = '';
1436     }
1437     } elsif (defined $http_equiv_attr) {
1438     if (defined $charset_attr) {
1439     $self->{onerror}->(node => $charset_attr,
1440     type => 'attribute not allowed');
1441     }
1442     unless (defined $content_attr) {
1443     $self->{onerror}->(node => $todo->{node},
1444     type => 'attribute missing:content');
1445     }
1446     } elsif (defined $charset_attr) {
1447     if (defined $content_attr) {
1448     $self->{onerror}->(node => $content_attr,
1449     type => 'attribute not allowed');
1450     }
1451     } else {
1452     if (defined $content_attr) {
1453     $self->{onerror}->(node => $content_attr,
1454     type => 'attribute not allowed');
1455     $self->{onerror}->(node => $todo->{node},
1456     type => 'attribute missing:name|http-equiv');
1457     } else {
1458     $self->{onerror}->(node => $todo->{node},
1459     type => 'attribute missing:name|http-equiv|charset');
1460     }
1461     }
1462    
1463     ## TODO: metadata conformance
1464    
1465     ## TODO: pragma conformance
1466     if (defined $http_equiv_attr) { ## An enumerated attribute
1467     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1468     if ({
1469     'refresh' => 1,
1470     'default-style' => 1,
1471     }->{$keyword}) {
1472     #
1473 wakaba 1.19 } elsif ($keyword eq 'content-type') {
1474     $self->{onerror}
1475     ->(node => $http_equiv_attr,
1476     type => 'enumerated:invalid:http-equiv:content-type');
1477 wakaba 1.1 } else {
1478     $self->{onerror}->(node => $http_equiv_attr,
1479     type => 'enumerated:invalid');
1480     }
1481     }
1482    
1483     if (defined $charset_attr) {
1484     unless ($todo->{node}->owner_document->manakai_is_html) {
1485     $self->{onerror}->(node => $charset_attr,
1486     type => 'in XML:charset');
1487     }
1488 wakaba 1.21
1489     my $charset_value = $charset_attr->value;
1490     ## NOTE: Though the case-sensitivility of |charset| attribute value
1491     ## is not explicitly spelled in the HTML5 spec, the Character Set
1492     ## registry of IANA, which is referenced from HTML5 spec, says that
1493     ## charset name is case-insensitive.
1494     $charset_value =~ tr/A-Z/a-z/; ## NOTE: ASCII Case-insensitive.
1495    
1496     require Message::Charset::Info;
1497     my $charset = $Message::Charset::Info::IANACharset->{$charset_value};
1498     my $ic = $todo->{node}->owner_document->input_encoding;
1499     if (defined $ic) {
1500     ## TODO: Test for this case
1501     my $ic_charset = $Message::Charset::Info::IANACharset->{$ic};
1502     if ($charset ne $ic_charset) {
1503     $self->{onerror}->(node => $charset_attr,
1504     type => 'mismatched charset name:'.$ic.
1505     ':'.$charset_value,
1506     level => 'm');
1507     }
1508     } else {
1509     ## NOTE: MUST, but not checkable, since the document is not originally
1510     ## in serialized form (or the parser does not preserve the input
1511     ## encoding information).
1512     $self->{onerror}->(node => $charset_attr,
1513     type => 'mismatched charset name::'.$charset_value,
1514     level => 'unsupported');
1515     }
1516    
1517     ## ISSUE: What is "valid character encoding name"? Syntactically valid?
1518     ## Syntactically valid and registered? What about x-charset names?
1519     unless (Message::Charset::Info::is_syntactically_valid_iana_charset_name
1520     ($charset_value)) {
1521     $self->{onerror}->(node => $charset_attr,
1522     type => 'charset:syntax error:'.$charset_value,
1523     level => 'm');
1524     }
1525    
1526     if ($charset) {
1527     ## ISSUE: What is "the preferred name for that encoding" (for a charset
1528     ## with no "preferred MIME name" label)?
1529     my $charset_status = $charset->{iana_names}->{$charset_value} || 0;
1530     if (($charset_status &
1531     Message::Charset::Info::PREFERRED_CHARSET_NAME ())
1532     != Message::Charset::Info::PREFERRED_CHARSET_NAME ()) {
1533     $self->{onerror}->(node => $charset_attr,
1534     type => 'charset:not preferred:'.
1535     $charset_value,
1536     level => 'm');
1537     }
1538     if (($charset_status &
1539     Message::Charset::Info::REGISTERED_CHARSET_NAME ())
1540     != Message::Charset::Info::REGISTERED_CHARSET_NAME ()) {
1541     if ($charset_value =~ /^x-/) {
1542     $self->{onerror}->(node => $charset_attr,
1543     type => 'charset:private:'.$charset_value,
1544     level => $self->{good_level});
1545     } else {
1546     $self->{onerror}->(node => $charset_attr,
1547     type => 'charset:not registered:'.
1548     $charset_value,
1549     level => $self->{good_level});
1550     }
1551     }
1552     } elsif ($charset_value =~ /^x-/) {
1553     $self->{onerror}->(node => $charset_attr,
1554     type => 'charset:private:'.$charset_value,
1555     level => $self->{good_level});
1556     } else {
1557     $self->{onerror}->(node => $charset_attr,
1558     type => 'charset:not registered:'.$charset_value,
1559     level => $self->{good_level});
1560     }
1561    
1562 wakaba 1.22 if ($charset_attr->get_user_data ('manakai_has_reference')) {
1563     $self->{onerror}->(node => $charset_attr,
1564     type => 'character reference in charset',
1565     level => $self->{must_level});
1566     }
1567 wakaba 1.1 }
1568     },
1569     checker => $HTMLEmptyChecker,
1570     };
1571    
1572     $Element->{$HTML_NS}->{style} = {
1573     attrs_checker => $GetHTMLAttrsChecker->({
1574     type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1575     media => $HTMLMQAttrChecker,
1576     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1577     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1578     ## not different
1579     }),
1580     checker => sub {
1581 wakaba 1.27 ## NOTE: |html:style| itself has no conformance creteria on content model.
1582 wakaba 1.1 my ($self, $todo) = @_;
1583     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
1584 wakaba 1.27 if (not defined $type or
1585     $type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*[Tt][Ee][Xx][Tt](?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*[Cc][Ss][Ss](?>(?>\x0D\x0A)?[\x09\x20])*\z]) {
1586     my $el = $todo->{node};
1587     my $new_todos = [];
1588     my @nodes = (@{$el->child_nodes});
1589    
1590     my $ss_text = '';
1591     while (@nodes) {
1592     my $node = shift @nodes;
1593     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1594    
1595     my $nt = $node->node_type;
1596     if ($nt == 1) {
1597     $self->{onerror}->(node => $node, type => 'element not allowed');
1598     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1599     unshift @nodes, @$sib;
1600     push @$new_todos, @$ch;
1601     } elsif ($nt == 3 or $nt == 4) {
1602     $ss_text .= $node->text_content;
1603     } elsif ($nt == 5) {
1604     unshift @nodes, @{$node->child_nodes};
1605     }
1606     }
1607    
1608     my $p = $self->_get_css_parser;
1609     $p->{onerror} = sub {
1610     $self->{onerror}->(@_, node => $el);
1611     };
1612     $p->{href} = $el->owner_document->document_uri;
1613    
1614     my $ss = $p->parse_char_string ($ss_text);
1615    
1616     ## TODO: C.c. of $ss
1617    
1618     $p->{onerror} = sub {};
1619     return ($new_todos);
1620     } else {
1621     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
1622     type => 'style:'.$type); ## TODO: $type normalization
1623     return $AnyChecker->($self, $todo);
1624     }
1625 wakaba 1.1 },
1626     };
1627 wakaba 1.25 ## ISSUE: Relationship to significant content check?
1628 wakaba 1.1
1629     $Element->{$HTML_NS}->{body} = {
1630     attrs_checker => $GetHTMLAttrsChecker->({}),
1631     checker => $HTMLBlockChecker,
1632     };
1633    
1634     $Element->{$HTML_NS}->{section} = {
1635     attrs_checker => $GetHTMLAttrsChecker->({}),
1636     checker => $HTMLStylableBlockChecker,
1637     };
1638    
1639     $Element->{$HTML_NS}->{nav} = {
1640     attrs_checker => $GetHTMLAttrsChecker->({}),
1641     checker => $HTMLBlockOrInlineChecker,
1642     };
1643    
1644     $Element->{$HTML_NS}->{article} = {
1645     attrs_checker => $GetHTMLAttrsChecker->({}),
1646     checker => $HTMLStylableBlockChecker,
1647     };
1648    
1649     $Element->{$HTML_NS}->{blockquote} = {
1650     attrs_checker => $GetHTMLAttrsChecker->({
1651     cite => $HTMLURIAttrChecker,
1652     }),
1653     checker => $HTMLBlockChecker,
1654     };
1655    
1656     $Element->{$HTML_NS}->{aside} = {
1657     attrs_checker => $GetHTMLAttrsChecker->({}),
1658     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
1659     };
1660    
1661     $Element->{$HTML_NS}->{h1} = {
1662     attrs_checker => $GetHTMLAttrsChecker->({}),
1663     checker => sub {
1664     my ($self, $todo) = @_;
1665 wakaba 1.24 $todo->{flag}->{has_descendant}->{hn} = 1;
1666 wakaba 1.13 return $HTMLStrictlyInlineChecker->($self, $todo);
1667 wakaba 1.1 },
1668     };
1669    
1670     $Element->{$HTML_NS}->{h2} = {
1671     attrs_checker => $GetHTMLAttrsChecker->({}),
1672     checker => $Element->{$HTML_NS}->{h1}->{checker},
1673     };
1674    
1675     $Element->{$HTML_NS}->{h3} = {
1676     attrs_checker => $GetHTMLAttrsChecker->({}),
1677     checker => $Element->{$HTML_NS}->{h1}->{checker},
1678     };
1679    
1680     $Element->{$HTML_NS}->{h4} = {
1681     attrs_checker => $GetHTMLAttrsChecker->({}),
1682     checker => $Element->{$HTML_NS}->{h1}->{checker},
1683     };
1684    
1685     $Element->{$HTML_NS}->{h5} = {
1686     attrs_checker => $GetHTMLAttrsChecker->({}),
1687     checker => $Element->{$HTML_NS}->{h1}->{checker},
1688     };
1689    
1690     $Element->{$HTML_NS}->{h6} = {
1691     attrs_checker => $GetHTMLAttrsChecker->({}),
1692     checker => $Element->{$HTML_NS}->{h1}->{checker},
1693     };
1694    
1695     $Element->{$HTML_NS}->{header} = {
1696     attrs_checker => $GetHTMLAttrsChecker->({}),
1697     checker => sub {
1698     my ($self, $todo) = @_;
1699 wakaba 1.24
1700     my $old_flags = {hn => $todo->{flag}->{has_descendant}->{hn}};
1701     $todo->{flag}->{has_descendant}->{hn} = 0;
1702 wakaba 1.1
1703     my $end = $self->_add_minuses
1704     ({$HTML_NS => {qw/header 1 footer 1/}},
1705     $HTMLSectioningElements);
1706     my ($new_todos, $ch) = $HTMLBlockChecker->($self, $todo);
1707 wakaba 1.24 push @$new_todos, $end,
1708     {type => 'descendant', node => $todo->{node},
1709     flag => $todo->{flag}, old_values => $old_flags,
1710     errors => {
1711     hn => sub {
1712     my ($self, $todo) = @_;
1713     $self->{onerror}->(node => $todo->{node},
1714     type => 'element missing:hn');
1715     },
1716 wakaba 1.1 }};
1717     return ($new_todos, $ch);
1718 wakaba 1.24
1719     ## ISSUE: <header><del><h1>...</h1></del></header> is conforming?
1720 wakaba 1.1 },
1721     };
1722    
1723     $Element->{$HTML_NS}->{footer} = {
1724     attrs_checker => $GetHTMLAttrsChecker->({}),
1725     checker => sub { ## block -hn -header -footer -sectioning or inline
1726     my ($self, $todo) = @_;
1727     my $el = $todo->{node};
1728     my $new_todos = [];
1729     my @nodes = (@{$el->child_nodes});
1730 wakaba 1.25
1731     my $old_values = {significant =>
1732     $todo->{flag}->{has_descendant}->{significant}};
1733     $todo->{flag}->{has_descendant}->{significant} = 0;
1734 wakaba 1.1
1735     my $content = 'block-or-inline'; # or 'block' or 'inline'
1736     my @block_not_inline;
1737     while (@nodes) {
1738     my $node = shift @nodes;
1739     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1740    
1741     my $nt = $node->node_type;
1742     if ($nt == 1) {
1743     my $node_ns = $node->namespace_uri;
1744     $node_ns = '' unless defined $node_ns;
1745     my $node_ln = $node->manakai_local_name;
1746     my $not_allowed;
1747     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
1748     $not_allowed = 1;
1749     } elsif ($node_ns eq $HTML_NS and
1750     {
1751     qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1/
1752     }->{$node_ln}) {
1753     $not_allowed = 1;
1754     } elsif ($HTMLSectioningElements->{$node_ns}->{$node_ln}) {
1755     $not_allowed = 1;
1756     }
1757     if ($content eq 'block') {
1758     $not_allowed = 1
1759 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
1760     $self->{pluses}->{$node_ns}->{$node_ln};
1761 wakaba 1.1 } elsif ($content eq 'inline') {
1762     $not_allowed = 1
1763     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
1764 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
1765     $self->{pluses}->{$node_ns}->{$node_ln};
1766 wakaba 1.1 } else {
1767     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1768     my $is_inline
1769     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
1770     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1771    
1772     push @block_not_inline, $node
1773     if $is_block and not $is_inline and not $not_allowed;
1774 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
1775 wakaba 1.1 $content = 'inline';
1776     for (@block_not_inline) {
1777     $self->{onerror}->(node => $_, type => 'element not allowed');
1778     }
1779     $not_allowed = 1 unless $is_inline;
1780     }
1781     }
1782     $self->{onerror}->(node => $node, type => 'element not allowed')
1783     if $not_allowed;
1784     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1785     unshift @nodes, @$sib;
1786     push @$new_todos, @$ch;
1787     } elsif ($nt == 3 or $nt == 4) {
1788     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1789     if ($content eq 'block') {
1790     $self->{onerror}->(node => $node, type => 'character not allowed');
1791     } else {
1792     $content = 'inline';
1793     for (@block_not_inline) {
1794     $self->{onerror}->(node => $_, type => 'element not allowed');
1795     }
1796     }
1797 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
1798 wakaba 1.1 }
1799     } elsif ($nt == 5) {
1800     unshift @nodes, @{$node->child_nodes};
1801     }
1802     }
1803    
1804     my $end = $self->_add_minuses
1805     ({$HTML_NS => {qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1/}},
1806     $HTMLSectioningElements);
1807     push @$new_todos, $end;
1808    
1809     if ($content eq 'inline') {
1810     for (@$new_todos) {
1811     $_->{inline} = 1;
1812     }
1813     }
1814    
1815 wakaba 1.25 push @$new_todos, {
1816     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
1817     old_values => $old_values,
1818 wakaba 1.26 errors => $HTMLSignificantContentErrors,
1819 wakaba 1.25 };
1820    
1821 wakaba 1.1 return ($new_todos);
1822     },
1823     };
1824    
1825     $Element->{$HTML_NS}->{address} = {
1826     attrs_checker => $GetHTMLAttrsChecker->({}),
1827     checker => $HTMLInlineChecker,
1828     };
1829    
1830     $Element->{$HTML_NS}->{p} = {
1831     attrs_checker => $GetHTMLAttrsChecker->({}),
1832 wakaba 1.13 checker => $HTMLInlineChecker,
1833 wakaba 1.1 };
1834    
1835     $Element->{$HTML_NS}->{hr} = {
1836     attrs_checker => $GetHTMLAttrsChecker->({}),
1837     checker => $HTMLEmptyChecker,
1838     };
1839    
1840     $Element->{$HTML_NS}->{br} = {
1841     attrs_checker => $GetHTMLAttrsChecker->({}),
1842     checker => $HTMLEmptyChecker,
1843     };
1844    
1845     $Element->{$HTML_NS}->{dialog} = {
1846     attrs_checker => $GetHTMLAttrsChecker->({}),
1847     checker => sub {
1848     my ($self, $todo) = @_;
1849     my $el = $todo->{node};
1850     my $new_todos = [];
1851     my @nodes = (@{$el->child_nodes});
1852    
1853     my $phase = 'before dt';
1854     while (@nodes) {
1855     my $node = shift @nodes;
1856     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1857    
1858     my $nt = $node->node_type;
1859     if ($nt == 1) {
1860     my $node_ns = $node->namespace_uri;
1861     $node_ns = '' unless defined $node_ns;
1862     my $node_ln = $node->manakai_local_name;
1863     ## NOTE: |minuses| list is not checked since redundant
1864 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1865     #
1866     } elsif ($phase eq 'before dt') {
1867 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1868     $phase = 'before dd';
1869     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1870     $self->{onerror}
1871     ->(node => $node, type => 'ps element missing:dt');
1872     $phase = 'before dt';
1873     } else {
1874     $self->{onerror}->(node => $node, type => 'element not allowed');
1875     }
1876     } else { # before dd
1877     if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1878     $phase = 'before dt';
1879     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1880     $self->{onerror}
1881     ->(node => $node, type => 'ps element missing:dd');
1882     $phase = 'before dd';
1883     } else {
1884     $self->{onerror}->(node => $node, type => 'element not allowed');
1885     }
1886     }
1887     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1888     unshift @nodes, @$sib;
1889     push @$new_todos, @$ch;
1890     } elsif ($nt == 3 or $nt == 4) {
1891     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1892     $self->{onerror}->(node => $node, type => 'character not allowed');
1893 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
1894 wakaba 1.1 }
1895     } elsif ($nt == 5) {
1896     unshift @nodes, @{$node->child_nodes};
1897     }
1898     }
1899     if ($phase eq 'before dd') {
1900 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
1901 wakaba 1.1 }
1902     return ($new_todos);
1903     },
1904     };
1905    
1906     $Element->{$HTML_NS}->{pre} = {
1907     attrs_checker => $GetHTMLAttrsChecker->({}),
1908     checker => $HTMLStrictlyInlineChecker,
1909     };
1910    
1911     $Element->{$HTML_NS}->{ol} = {
1912     attrs_checker => $GetHTMLAttrsChecker->({
1913     start => $HTMLIntegerAttrChecker,
1914     }),
1915     checker => sub {
1916     my ($self, $todo) = @_;
1917     my $el = $todo->{node};
1918     my $new_todos = [];
1919     my @nodes = (@{$el->child_nodes});
1920    
1921     while (@nodes) {
1922     my $node = shift @nodes;
1923     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1924    
1925     my $nt = $node->node_type;
1926     if ($nt == 1) {
1927     my $node_ns = $node->namespace_uri;
1928     $node_ns = '' unless defined $node_ns;
1929     my $node_ln = $node->manakai_local_name;
1930     ## NOTE: |minuses| list is not checked since redundant
1931 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1932     #
1933     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'li')) {
1934 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
1935     }
1936     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1937     unshift @nodes, @$sib;
1938     push @$new_todos, @$ch;
1939     } elsif ($nt == 3 or $nt == 4) {
1940     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1941     $self->{onerror}->(node => $node, type => 'character not allowed');
1942 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
1943 wakaba 1.1 }
1944     } elsif ($nt == 5) {
1945     unshift @nodes, @{$node->child_nodes};
1946     }
1947     }
1948    
1949     if ($todo->{inline}) {
1950     for (@$new_todos) {
1951     $_->{inline} = 1;
1952     }
1953     }
1954     return ($new_todos);
1955     },
1956     };
1957    
1958     $Element->{$HTML_NS}->{ul} = {
1959     attrs_checker => $GetHTMLAttrsChecker->({}),
1960     checker => $Element->{$HTML_NS}->{ol}->{checker},
1961     };
1962    
1963     $Element->{$HTML_NS}->{li} = {
1964     attrs_checker => $GetHTMLAttrsChecker->({
1965     start => sub {
1966     my ($self, $attr) = @_;
1967     my $parent = $attr->owner_element->manakai_parent_element;
1968     if (defined $parent) {
1969     my $parent_ns = $parent->namespace_uri;
1970     $parent_ns = '' unless defined $parent_ns;
1971     my $parent_ln = $parent->manakai_local_name;
1972     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1973     $self->{onerror}->(node => $attr, level => 'unsupported',
1974     type => 'attribute');
1975     }
1976     }
1977     $HTMLIntegerAttrChecker->($self, $attr);
1978     },
1979     }),
1980     checker => sub {
1981     my ($self, $todo) = @_;
1982     if ($todo->{inline}) {
1983     return $HTMLInlineChecker->($self, $todo);
1984     } else {
1985     return $HTMLBlockOrInlineChecker->($self, $todo);
1986     }
1987     },
1988     };
1989    
1990     $Element->{$HTML_NS}->{dl} = {
1991     attrs_checker => $GetHTMLAttrsChecker->({}),
1992     checker => sub {
1993     my ($self, $todo) = @_;
1994     my $el = $todo->{node};
1995     my $new_todos = [];
1996     my @nodes = (@{$el->child_nodes});
1997    
1998     my $phase = 'before dt';
1999     while (@nodes) {
2000     my $node = shift @nodes;
2001     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2002    
2003     my $nt = $node->node_type;
2004     if ($nt == 1) {
2005     my $node_ns = $node->namespace_uri;
2006     $node_ns = '' unless defined $node_ns;
2007     my $node_ln = $node->manakai_local_name;
2008     ## NOTE: |minuses| list is not checked since redundant
2009 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2010     #
2011     } elsif ($phase eq 'in dds') {
2012 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
2013     #$phase = 'in dds';
2014     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
2015     $phase = 'in dts';
2016     } else {
2017     $self->{onerror}->(node => $node, type => 'element not allowed');
2018     }
2019     } elsif ($phase eq 'in dts') {
2020     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
2021     #$phase = 'in dts';
2022     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
2023     $phase = 'in dds';
2024     } else {
2025     $self->{onerror}->(node => $node, type => 'element not allowed');
2026     }
2027     } else { # before dt
2028     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
2029     $phase = 'in dts';
2030     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
2031     $self->{onerror}
2032     ->(node => $node, type => 'ps element missing:dt');
2033     $phase = 'in dds';
2034     } else {
2035     $self->{onerror}->(node => $node, type => 'element not allowed');
2036     }
2037     }
2038     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2039     unshift @nodes, @$sib;
2040     push @$new_todos, @$ch;
2041     } elsif ($nt == 3 or $nt == 4) {
2042     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2043     $self->{onerror}->(node => $node, type => 'character not allowed');
2044 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
2045 wakaba 1.1 }
2046     } elsif ($nt == 5) {
2047     unshift @nodes, @{$node->child_nodes};
2048     }
2049     }
2050     if ($phase eq 'in dts') {
2051 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
2052 wakaba 1.1 }
2053    
2054     if ($todo->{inline}) {
2055     for (@$new_todos) {
2056     $_->{inline} = 1;
2057     }
2058     }
2059     return ($new_todos);
2060     },
2061     };
2062    
2063     $Element->{$HTML_NS}->{dt} = {
2064     attrs_checker => $GetHTMLAttrsChecker->({}),
2065     checker => $HTMLStrictlyInlineChecker,
2066     };
2067    
2068     $Element->{$HTML_NS}->{dd} = {
2069     attrs_checker => $GetHTMLAttrsChecker->({}),
2070     checker => $Element->{$HTML_NS}->{li}->{checker},
2071     };
2072    
2073     $Element->{$HTML_NS}->{a} = {
2074     attrs_checker => sub {
2075     my ($self, $todo) = @_;
2076     my %attr;
2077     for my $attr (@{$todo->{node}->attributes}) {
2078     my $attr_ns = $attr->namespace_uri;
2079     $attr_ns = '' unless defined $attr_ns;
2080     my $attr_ln = $attr->manakai_local_name;
2081     my $checker;
2082     if ($attr_ns eq '') {
2083     $checker = {
2084     target => $HTMLTargetAttrChecker,
2085     href => $HTMLURIAttrChecker,
2086     ping => $HTMLSpaceURIsAttrChecker,
2087 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
2088 wakaba 1.1 media => $HTMLMQAttrChecker,
2089     hreflang => $HTMLLanguageTagAttrChecker,
2090     type => $HTMLIMTAttrChecker,
2091     }->{$attr_ln};
2092     if ($checker) {
2093     $attr{$attr_ln} = $attr;
2094     } else {
2095     $checker = $HTMLAttrChecker->{$attr_ln};
2096     }
2097     }
2098     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2099     || $AttrChecker->{$attr_ns}->{''};
2100     if ($checker) {
2101     $checker->($self, $attr) if ref $checker;
2102     } else {
2103     $self->{onerror}->(node => $attr, level => 'unsupported',
2104     type => 'attribute');
2105     ## ISSUE: No comformance createria for unknown attributes in the spec
2106     }
2107     }
2108    
2109 wakaba 1.4 if (defined $attr{href}) {
2110     $self->{has_hyperlink_element} = 1;
2111     } else {
2112 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
2113     if (defined $attr{$_}) {
2114     $self->{onerror}->(node => $attr{$_},
2115     type => 'attribute not allowed');
2116     }
2117     }
2118     }
2119     },
2120     checker => sub {
2121     my ($self, $todo) = @_;
2122    
2123     my $end = $self->_add_minuses ($HTMLInteractiveElements);
2124     my ($new_todos, $ch)
2125 wakaba 1.13 = $HTMLInlineOrStrictlyInlineChecker->($self, $todo);
2126 wakaba 1.1 push @$new_todos, $end;
2127    
2128 wakaba 1.15 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
2129     $_->{flag}->{in_a_href} = 1 for @$new_todos;
2130     }
2131 wakaba 1.1
2132     return ($new_todos, $ch);
2133     },
2134     };
2135    
2136     $Element->{$HTML_NS}->{q} = {
2137     attrs_checker => $GetHTMLAttrsChecker->({
2138     cite => $HTMLURIAttrChecker,
2139     }),
2140     checker => $HTMLInlineOrStrictlyInlineChecker,
2141     };
2142    
2143     $Element->{$HTML_NS}->{cite} = {
2144     attrs_checker => $GetHTMLAttrsChecker->({}),
2145     checker => $HTMLStrictlyInlineChecker,
2146     };
2147    
2148     $Element->{$HTML_NS}->{em} = {
2149     attrs_checker => $GetHTMLAttrsChecker->({}),
2150     checker => $HTMLInlineOrStrictlyInlineChecker,
2151     };
2152    
2153     $Element->{$HTML_NS}->{strong} = {
2154     attrs_checker => $GetHTMLAttrsChecker->({}),
2155     checker => $HTMLInlineOrStrictlyInlineChecker,
2156     };
2157    
2158     $Element->{$HTML_NS}->{small} = {
2159     attrs_checker => $GetHTMLAttrsChecker->({}),
2160     checker => $HTMLInlineOrStrictlyInlineChecker,
2161     };
2162    
2163     $Element->{$HTML_NS}->{m} = {
2164     attrs_checker => $GetHTMLAttrsChecker->({}),
2165     checker => $HTMLInlineOrStrictlyInlineChecker,
2166     };
2167    
2168     $Element->{$HTML_NS}->{dfn} = {
2169     attrs_checker => $GetHTMLAttrsChecker->({}),
2170     checker => sub {
2171     my ($self, $todo) = @_;
2172    
2173     my $end = $self->_add_minuses ({$HTML_NS => {dfn => 1}});
2174     my ($sib, $ch) = $HTMLStrictlyInlineChecker->($self, $todo);
2175     push @$sib, $end;
2176    
2177     my $node = $todo->{node};
2178     my $term = $node->get_attribute_ns (undef, 'title');
2179     unless (defined $term) {
2180     for my $child (@{$node->child_nodes}) {
2181     if ($child->node_type == 1) { # ELEMENT_NODE
2182     if (defined $term) {
2183     undef $term;
2184     last;
2185     } elsif ($child->manakai_local_name eq 'abbr') {
2186     my $nsuri = $child->namespace_uri;
2187     if (defined $nsuri and $nsuri eq $HTML_NS) {
2188     my $attr = $child->get_attribute_node_ns (undef, 'title');
2189     if ($attr) {
2190     $term = $attr->value;
2191     }
2192     }
2193     }
2194     } elsif ($child->node_type == 3 or $child->node_type == 4) {
2195     ## TEXT_NODE or CDATA_SECTION_NODE
2196     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
2197     next;
2198     }
2199     undef $term;
2200     last;
2201     }
2202     }
2203     unless (defined $term) {
2204     $term = $node->text_content;
2205     }
2206     }
2207     if ($self->{term}->{$term}) {
2208     $self->{onerror}->(node => $node, type => 'duplicate term');
2209     push @{$self->{term}->{$term}}, $node;
2210     } else {
2211     $self->{term}->{$term} = [$node];
2212     }
2213     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
2214     ## has |title|.
2215    
2216     return ($sib, $ch);
2217     },
2218     };
2219    
2220     $Element->{$HTML_NS}->{abbr} = {
2221     attrs_checker => $GetHTMLAttrsChecker->({
2222     ## NOTE: |title| has special semantics for |abbr|s, but is syntactically
2223     ## not different. The spec says that the |title| MAY be omitted
2224     ## if there is a |dfn| whose defining term is the abbreviation,
2225     ## but it does not prohibit |abbr| w/o |title| in other cases.
2226     }),
2227     checker => $HTMLStrictlyInlineChecker,
2228     };
2229    
2230     $Element->{$HTML_NS}->{time} = {
2231     attrs_checker => $GetHTMLAttrsChecker->({
2232     datetime => sub { 1 }, # checked in |checker|
2233     }),
2234     ## TODO: Write tests
2235     checker => sub {
2236     my ($self, $todo) = @_;
2237    
2238     my $attr = $todo->{node}->get_attribute_node_ns (undef, 'datetime');
2239     my $input;
2240     my $reg_sp;
2241     my $input_node;
2242     if ($attr) {
2243     $input = $attr->value;
2244     $reg_sp = qr/[\x09-\x0D\x20]*/;
2245     $input_node = $attr;
2246     } else {
2247     $input = $todo->{node}->text_content;
2248     $reg_sp = qr/\p{Zs}*/;
2249     $input_node = $todo->{node};
2250    
2251     ## ISSUE: What is the definition for "successfully extracts a date
2252     ## or time"? If the algorithm says the string is invalid but
2253     ## return some date or time, is it "successfully"?
2254     }
2255    
2256     my $hour;
2257     my $minute;
2258     my $second;
2259     if ($input =~ /
2260     \A
2261     [\x09-\x0D\x20]*
2262     ([0-9]+) # 1
2263     (?>
2264     -([0-9]+) # 2
2265     -([0-9]+) # 3
2266     [\x09-\x0D\x20]*
2267     (?>
2268     T
2269     [\x09-\x0D\x20]*
2270     )?
2271     ([0-9]+) # 4
2272     :([0-9]+) # 5
2273     (?>
2274     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
2275     )?
2276     [\x09-\x0D\x20]*
2277     (?>
2278     Z
2279     [\x09-\x0D\x20]*
2280     |
2281     [+-]([0-9]+):([0-9]+) # 7, 8
2282     [\x09-\x0D\x20]*
2283     )?
2284     \z
2285     |
2286     :([0-9]+) # 9
2287     (?>
2288     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
2289     )?
2290     [\x09-\x0D\x20]*\z
2291     )
2292     /x) {
2293     if (defined $2) { ## YYYY-MM-DD T? hh:mm
2294     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
2295     length $4 != 2 or length $5 != 2) {
2296     $self->{onerror}->(node => $input_node,
2297     type => 'dateortime:syntax error');
2298     }
2299    
2300     if (1 <= $2 and $2 <= 12) {
2301     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
2302     if $3 < 1 or
2303     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
2304     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
2305     if $2 == 2 and $3 == 29 and
2306     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
2307     } else {
2308     $self->{onerror}->(node => $input_node,
2309     type => 'datetime:bad month');
2310     }
2311    
2312     ($hour, $minute, $second) = ($4, $5, $6);
2313    
2314     if (defined $7) { ## [+-]hh:mm
2315     if (length $7 != 2 or length $8 != 2) {
2316     $self->{onerror}->(node => $input_node,
2317     type => 'dateortime:syntax error');
2318     }
2319    
2320     $self->{onerror}->(node => $input_node,
2321     type => 'datetime:bad timezone hour')
2322     if $7 > 23;
2323     $self->{onerror}->(node => $input_node,
2324     type => 'datetime:bad timezone minute')
2325     if $8 > 59;
2326     }
2327     } else { ## hh:mm
2328     if (length $1 != 2 or length $9 != 2) {
2329     $self->{onerror}->(node => $input_node,
2330     type => qq'dateortime:syntax error');
2331     }
2332    
2333     ($hour, $minute, $second) = ($1, $9, $10);
2334     }
2335    
2336     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
2337     if $hour > 23;
2338     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
2339     if $minute > 59;
2340    
2341     if (defined $second) { ## s
2342     ## NOTE: Integer part of second don't have to have length of two.
2343    
2344     if (substr ($second, 0, 1) eq '.') {
2345     $self->{onerror}->(node => $input_node,
2346     type => 'dateortime:syntax error');
2347     }
2348    
2349     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
2350     if $second >= 60;
2351     }
2352     } else {
2353     $self->{onerror}->(node => $input_node,
2354     type => 'dateortime:syntax error');
2355     }
2356    
2357     return $HTMLStrictlyInlineChecker->($self, $todo);
2358     },
2359     };
2360    
2361     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
2362     attrs_checker => $GetHTMLAttrsChecker->({
2363     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2364     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2365     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2366     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2367     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2368     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2369     }),
2370     checker => $HTMLStrictlyInlineChecker,
2371     };
2372    
2373     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
2374     attrs_checker => $GetHTMLAttrsChecker->({
2375     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
2376     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
2377     }),
2378     checker => $HTMLStrictlyInlineChecker,
2379     };
2380    
2381     $Element->{$HTML_NS}->{code} = {
2382     attrs_checker => $GetHTMLAttrsChecker->({}),
2383     ## NOTE: Though |title| has special semantics,
2384     ## syntatically same as the |title| as global attribute.
2385     checker => $HTMLInlineOrStrictlyInlineChecker,
2386     };
2387    
2388     $Element->{$HTML_NS}->{var} = {
2389     attrs_checker => $GetHTMLAttrsChecker->({}),
2390     ## NOTE: Though |title| has special semantics,
2391     ## syntatically same as the |title| as global attribute.
2392     checker => $HTMLStrictlyInlineChecker,
2393     };
2394    
2395     $Element->{$HTML_NS}->{samp} = {
2396     attrs_checker => $GetHTMLAttrsChecker->({}),
2397     ## NOTE: Though |title| has special semantics,
2398     ## syntatically same as the |title| as global attribute.
2399     checker => $HTMLInlineOrStrictlyInlineChecker,
2400     };
2401    
2402     $Element->{$HTML_NS}->{kbd} = {
2403     attrs_checker => $GetHTMLAttrsChecker->({}),
2404     checker => $HTMLStrictlyInlineChecker,
2405     };
2406    
2407     $Element->{$HTML_NS}->{sub} = {
2408     attrs_checker => $GetHTMLAttrsChecker->({}),
2409     checker => $HTMLStrictlyInlineChecker,
2410     };
2411    
2412     $Element->{$HTML_NS}->{sup} = {
2413     attrs_checker => $GetHTMLAttrsChecker->({}),
2414     checker => $HTMLStrictlyInlineChecker,
2415     };
2416    
2417     $Element->{$HTML_NS}->{span} = {
2418     attrs_checker => $GetHTMLAttrsChecker->({}),
2419     ## NOTE: Though |title| has special semantics,
2420     ## syntatically same as the |title| as global attribute.
2421     checker => $HTMLInlineOrStrictlyInlineChecker,
2422     };
2423    
2424     $Element->{$HTML_NS}->{i} = {
2425     attrs_checker => $GetHTMLAttrsChecker->({}),
2426     ## NOTE: Though |title| has special semantics,
2427     ## syntatically same as the |title| as global attribute.
2428     checker => $HTMLStrictlyInlineChecker,
2429     };
2430    
2431     $Element->{$HTML_NS}->{b} = {
2432     attrs_checker => $GetHTMLAttrsChecker->({}),
2433     checker => $HTMLStrictlyInlineChecker,
2434     };
2435    
2436     $Element->{$HTML_NS}->{bdo} = {
2437     attrs_checker => sub {
2438     my ($self, $todo) = @_;
2439     $GetHTMLAttrsChecker->({})->($self, $todo);
2440     unless ($todo->{node}->has_attribute_ns (undef, 'dir')) {
2441     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:dir');
2442     }
2443     },
2444     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
2445     checker => $HTMLStrictlyInlineChecker,
2446     };
2447    
2448     $Element->{$HTML_NS}->{ins} = {
2449     attrs_checker => $GetHTMLAttrsChecker->({
2450     cite => $HTMLURIAttrChecker,
2451     datetime => $HTMLDatetimeAttrChecker,
2452     }),
2453     checker => $HTMLTransparentChecker,
2454     };
2455    
2456     $Element->{$HTML_NS}->{del} = {
2457     attrs_checker => $GetHTMLAttrsChecker->({
2458     cite => $HTMLURIAttrChecker,
2459     datetime => $HTMLDatetimeAttrChecker,
2460     }),
2461     checker => sub {
2462     my ($self, $todo) = @_;
2463    
2464     my $parent = $todo->{node}->manakai_parent_element;
2465     if (defined $parent) {
2466 wakaba 1.25 my $sig_flag = $todo->{flag}->{has_descendant}->{significant};
2467 wakaba 1.1 my $nsuri = $parent->namespace_uri;
2468     $nsuri = '' unless defined $nsuri;
2469     my $ln = $parent->manakai_local_name;
2470     my $eldef = $Element->{$nsuri}->{$ln} ||
2471     $Element->{$nsuri}->{''} ||
2472     $ElementDefault;
2473 wakaba 1.25 my ($new_todos) = $eldef->{checker}->($self, $todo);
2474     push @$new_todos, {type => 'code', code => sub {
2475     $todo->{flag}->{has_descendant}->{significant} = 0;
2476     }} if not $sig_flag;
2477     return $new_todos;
2478 wakaba 1.1 } else {
2479     return $HTMLBlockOrInlineChecker->($self, $todo);
2480     }
2481     },
2482     };
2483    
2484     ## TODO: figure
2485 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
2486 wakaba 1.1
2487 wakaba 1.4 ## TODO: |alt|
2488 wakaba 1.1 $Element->{$HTML_NS}->{img} = {
2489     attrs_checker => sub {
2490     my ($self, $todo) = @_;
2491     $GetHTMLAttrsChecker->({
2492     alt => sub { }, ## NOTE: No syntactical requirement
2493     src => $HTMLURIAttrChecker,
2494     usemap => $HTMLUsemapAttrChecker,
2495     ismap => sub {
2496     my ($self, $attr, $parent_todo) = @_;
2497 wakaba 1.15 if (not $todo->{flag}->{in_a_href}) {
2498     $self->{onerror}->(node => $attr,
2499     type => 'attribute not allowed:ismap');
2500 wakaba 1.1 }
2501     $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_todo);
2502     },
2503     ## TODO: height
2504     ## TODO: width
2505     })->($self, $todo);
2506     unless ($todo->{node}->has_attribute_ns (undef, 'alt')) {
2507     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:alt');
2508     }
2509     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2510     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:src');
2511     }
2512     },
2513 wakaba 1.25 checker => sub {
2514     my ($self, $todo) = @_;
2515     $todo->{flag}->{has_descendant}->{significant} = 1;
2516     return $HTMLEmptyChecker->($self, $todo);
2517     },
2518 wakaba 1.1 };
2519    
2520     $Element->{$HTML_NS}->{iframe} = {
2521     attrs_checker => $GetHTMLAttrsChecker->({
2522     src => $HTMLURIAttrChecker,
2523     }),
2524 wakaba 1.25 checker => sub {
2525     my ($self, $todo) = @_;
2526     $todo->{flag}->{has_descendant}->{significant} = 1;
2527     return $HTMLTextChecker->($self, $todo);
2528     },
2529 wakaba 1.1 };
2530    
2531     $Element->{$HTML_NS}->{embed} = {
2532     attrs_checker => sub {
2533     my ($self, $todo) = @_;
2534     my $has_src;
2535     for my $attr (@{$todo->{node}->attributes}) {
2536     my $attr_ns = $attr->namespace_uri;
2537     $attr_ns = '' unless defined $attr_ns;
2538     my $attr_ln = $attr->manakai_local_name;
2539     my $checker;
2540     if ($attr_ns eq '') {
2541     if ($attr_ln eq 'src') {
2542     $checker = $HTMLURIAttrChecker;
2543     $has_src = 1;
2544     } elsif ($attr_ln eq 'type') {
2545     $checker = $HTMLIMTAttrChecker;
2546     } else {
2547     ## TODO: height
2548     ## TODO: width
2549     $checker = $HTMLAttrChecker->{$attr_ln}
2550     || sub { }; ## NOTE: Any local attribute is ok.
2551     }
2552     }
2553     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2554     || $AttrChecker->{$attr_ns}->{''};
2555     if ($checker) {
2556     $checker->($self, $attr);
2557     } else {
2558     $self->{onerror}->(node => $attr, level => 'unsupported',
2559     type => 'attribute');
2560     ## ISSUE: No comformance createria for global attributes in the spec
2561     }
2562     }
2563    
2564     unless ($has_src) {
2565     $self->{onerror}->(node => $todo->{node},
2566     type => 'attribute missing:src');
2567     }
2568     },
2569 wakaba 1.25 checker => sub {
2570     my ($self, $todo) = @_;
2571     $todo->{flag}->{has_descendant}->{significant} = 1;
2572     return $HTMLEmptyChecker->($self, $todo);
2573     },
2574 wakaba 1.1 };
2575    
2576     $Element->{$HTML_NS}->{object} = {
2577     attrs_checker => sub {
2578     my ($self, $todo) = @_;
2579     $GetHTMLAttrsChecker->({
2580     data => $HTMLURIAttrChecker,
2581     type => $HTMLIMTAttrChecker,
2582     usemap => $HTMLUsemapAttrChecker,
2583     ## TODO: width
2584     ## TODO: height
2585     })->($self, $todo);
2586     unless ($todo->{node}->has_attribute_ns (undef, 'data')) {
2587     unless ($todo->{node}->has_attribute_ns (undef, 'type')) {
2588     $self->{onerror}->(node => $todo->{node},
2589     type => 'attribute missing:data|type');
2590     }
2591     }
2592     },
2593 wakaba 1.25 checker => sub {
2594     my ($self, $todo) = @_;
2595     $todo->{flag}->{has_descendant}->{significant} = 1;
2596     return $ElementDefault->{checker}->($self, $todo); ## TODO
2597     },
2598 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
2599 wakaba 1.1 };
2600    
2601     $Element->{$HTML_NS}->{param} = {
2602     attrs_checker => sub {
2603     my ($self, $todo) = @_;
2604     $GetHTMLAttrsChecker->({
2605     name => sub { },
2606     value => sub { },
2607     })->($self, $todo);
2608     unless ($todo->{node}->has_attribute_ns (undef, 'name')) {
2609     $self->{onerror}->(node => $todo->{node},
2610     type => 'attribute missing:name');
2611     }
2612     unless ($todo->{node}->has_attribute_ns (undef, 'value')) {
2613     $self->{onerror}->(node => $todo->{node},
2614     type => 'attribute missing:value');
2615     }
2616     },
2617     checker => $HTMLEmptyChecker,
2618     };
2619    
2620     $Element->{$HTML_NS}->{video} = {
2621     attrs_checker => $GetHTMLAttrsChecker->({
2622     src => $HTMLURIAttrChecker,
2623     ## TODO: start, loopstart, loopend, end
2624     ## ISSUE: they MUST be "value time offset"s. Value?
2625 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
2626 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2627     controls => $GetHTMLBooleanAttrChecker->('controls'),
2628 wakaba 1.11 poster => $HTMLURIAttrChecker, ## TODO: not for audio!
2629     ## TODO: width, height (not for audio!)
2630 wakaba 1.1 }),
2631     checker => sub {
2632     my ($self, $todo) = @_;
2633 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
2634 wakaba 1.1
2635     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2636     return $HTMLBlockOrInlineChecker->($self, $todo);
2637     } else {
2638     return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
2639     ->($self, $todo);
2640     }
2641     },
2642     };
2643    
2644     $Element->{$HTML_NS}->{audio} = {
2645     attrs_checker => $Element->{$HTML_NS}->{video}->{attrs_checker},
2646     checker => $Element->{$HTML_NS}->{video}->{checker},
2647     };
2648    
2649     $Element->{$HTML_NS}->{source} = {
2650     attrs_checker => sub {
2651     my ($self, $todo) = @_;
2652     $GetHTMLAttrsChecker->({
2653     src => $HTMLURIAttrChecker,
2654     type => $HTMLIMTAttrChecker,
2655     media => $HTMLMQAttrChecker,
2656     })->($self, $todo);
2657     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2658     $self->{onerror}->(node => $todo->{node},
2659     type => 'attribute missing:src');
2660     }
2661     },
2662     checker => $HTMLEmptyChecker,
2663     };
2664    
2665     $Element->{$HTML_NS}->{canvas} = {
2666     attrs_checker => $GetHTMLAttrsChecker->({
2667     height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2668     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2669     }),
2670 wakaba 1.25 checker => sub {
2671     my ($self, $todo) = @_;
2672     $todo->{flag}->{has_descendant}->{significant} = 1;
2673     return $HTMLInlineChecker->($self, $todo);
2674     },
2675 wakaba 1.1 };
2676    
2677     $Element->{$HTML_NS}->{map} = {
2678 wakaba 1.4 attrs_checker => sub {
2679     my ($self, $todo) = @_;
2680     my $has_id;
2681     $GetHTMLAttrsChecker->({
2682     id => sub {
2683     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2684     my ($self, $attr) = @_;
2685     my $value = $attr->value;
2686     if (length $value > 0) {
2687     if ($self->{id}->{$value}) {
2688     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2689     push @{$self->{id}->{$value}}, $attr;
2690     } else {
2691     $self->{id}->{$value} = [$attr];
2692     }
2693 wakaba 1.1 } else {
2694 wakaba 1.4 ## NOTE: MUST contain at least one character
2695     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2696 wakaba 1.1 }
2697 wakaba 1.4 if ($value =~ /[\x09-\x0D\x20]/) {
2698     $self->{onerror}->(node => $attr, type => 'space in ID');
2699     }
2700     $self->{map}->{$value} ||= $attr;
2701     $has_id = 1;
2702     },
2703     })->($self, $todo);
2704     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:id')
2705     unless $has_id;
2706     },
2707 wakaba 1.1 checker => $HTMLBlockChecker,
2708     };
2709    
2710     $Element->{$HTML_NS}->{area} = {
2711     attrs_checker => sub {
2712     my ($self, $todo) = @_;
2713     my %attr;
2714     my $coords;
2715     for my $attr (@{$todo->{node}->attributes}) {
2716     my $attr_ns = $attr->namespace_uri;
2717     $attr_ns = '' unless defined $attr_ns;
2718     my $attr_ln = $attr->manakai_local_name;
2719     my $checker;
2720     if ($attr_ns eq '') {
2721     $checker = {
2722     alt => sub { },
2723     ## NOTE: |alt| value has no conformance creteria.
2724     shape => $GetHTMLEnumeratedAttrChecker->({
2725     circ => -1, circle => 1,
2726     default => 1,
2727     poly => 1, polygon => -1,
2728     rect => 1, rectangle => -1,
2729     }),
2730     coords => sub {
2731     my ($self, $attr) = @_;
2732     my $value = $attr->value;
2733     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
2734     $coords = [split /,/, $value];
2735     } else {
2736     $self->{onerror}->(node => $attr,
2737     type => 'coords:syntax error');
2738     }
2739     },
2740     target => $HTMLTargetAttrChecker,
2741     href => $HTMLURIAttrChecker,
2742     ping => $HTMLSpaceURIsAttrChecker,
2743 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
2744 wakaba 1.1 media => $HTMLMQAttrChecker,
2745     hreflang => $HTMLLanguageTagAttrChecker,
2746     type => $HTMLIMTAttrChecker,
2747     }->{$attr_ln};
2748     if ($checker) {
2749     $attr{$attr_ln} = $attr;
2750     } else {
2751     $checker = $HTMLAttrChecker->{$attr_ln};
2752     }
2753     }
2754     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2755     || $AttrChecker->{$attr_ns}->{''};
2756     if ($checker) {
2757     $checker->($self, $attr) if ref $checker;
2758     } else {
2759     $self->{onerror}->(node => $attr, level => 'unsupported',
2760     type => 'attribute');
2761     ## ISSUE: No comformance createria for unknown attributes in the spec
2762     }
2763     }
2764    
2765     if (defined $attr{href}) {
2766 wakaba 1.4 $self->{has_hyperlink_element} = 1;
2767 wakaba 1.1 unless (defined $attr{alt}) {
2768     $self->{onerror}->(node => $todo->{node},
2769     type => 'attribute missing:alt');
2770     }
2771     } else {
2772     for (qw/target ping rel media hreflang type alt/) {
2773     if (defined $attr{$_}) {
2774     $self->{onerror}->(node => $attr{$_},
2775     type => 'attribute not allowed');
2776     }
2777     }
2778     }
2779    
2780     my $shape = 'rectangle';
2781     if (defined $attr{shape}) {
2782     $shape = {
2783     circ => 'circle', circle => 'circle',
2784     default => 'default',
2785     poly => 'polygon', polygon => 'polygon',
2786     rect => 'rectangle', rectangle => 'rectangle',
2787     }->{lc $attr{shape}->value} || 'rectangle';
2788     ## TODO: ASCII lowercase?
2789     }
2790    
2791     if ($shape eq 'circle') {
2792     if (defined $attr{coords}) {
2793     if (defined $coords) {
2794     if (@$coords == 3) {
2795     if ($coords->[2] < 0) {
2796     $self->{onerror}->(node => $attr{coords},
2797     type => 'coords:out of range:2');
2798     }
2799     } else {
2800     $self->{onerror}->(node => $attr{coords},
2801     type => 'coords:number:3:'.@$coords);
2802     }
2803     } else {
2804     ## NOTE: A syntax error has been reported.
2805     }
2806     } else {
2807     $self->{onerror}->(node => $todo->{node},
2808     type => 'attribute missing:coords');
2809     }
2810     } elsif ($shape eq 'default') {
2811     if (defined $attr{coords}) {
2812     $self->{onerror}->(node => $attr{coords},
2813     type => 'attribute not allowed');
2814     }
2815     } elsif ($shape eq 'polygon') {
2816     if (defined $attr{coords}) {
2817     if (defined $coords) {
2818     if (@$coords >= 6) {
2819     unless (@$coords % 2 == 0) {
2820     $self->{onerror}->(node => $attr{coords},
2821     type => 'coords:number:even:'.@$coords);
2822     }
2823     } else {
2824     $self->{onerror}->(node => $attr{coords},
2825     type => 'coords:number:>=6:'.@$coords);
2826     }
2827     } else {
2828     ## NOTE: A syntax error has been reported.
2829     }
2830     } else {
2831     $self->{onerror}->(node => $todo->{node},
2832     type => 'attribute missing:coords');
2833     }
2834     } elsif ($shape eq 'rectangle') {
2835     if (defined $attr{coords}) {
2836     if (defined $coords) {
2837     if (@$coords == 4) {
2838     unless ($coords->[0] < $coords->[2]) {
2839     $self->{onerror}->(node => $attr{coords},
2840     type => 'coords:out of range:0');
2841     }
2842     unless ($coords->[1] < $coords->[3]) {
2843     $self->{onerror}->(node => $attr{coords},
2844     type => 'coords:out of range:1');
2845     }
2846     } else {
2847     $self->{onerror}->(node => $attr{coords},
2848     type => 'coords:number:4:'.@$coords);
2849     }
2850     } else {
2851     ## NOTE: A syntax error has been reported.
2852     }
2853     } else {
2854     $self->{onerror}->(node => $todo->{node},
2855     type => 'attribute missing:coords');
2856     }
2857     }
2858     },
2859     checker => $HTMLEmptyChecker,
2860     };
2861     ## TODO: only in map
2862    
2863     $Element->{$HTML_NS}->{table} = {
2864     attrs_checker => $GetHTMLAttrsChecker->({}),
2865     checker => sub {
2866     my ($self, $todo) = @_;
2867     my $el = $todo->{node};
2868     my $new_todos = [];
2869     my @nodes = (@{$el->child_nodes});
2870    
2871     my $phase = 'before caption';
2872     my $has_tfoot;
2873     while (@nodes) {
2874     my $node = shift @nodes;
2875     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2876    
2877     my $nt = $node->node_type;
2878     if ($nt == 1) {
2879     my $node_ns = $node->namespace_uri;
2880     $node_ns = '' unless defined $node_ns;
2881     my $node_ln = $node->manakai_local_name;
2882     ## NOTE: |minuses| list is not checked since redundant
2883 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2884     #
2885     } elsif ($phase eq 'in tbodys') {
2886 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2887     #$phase = 'in tbodys';
2888     } elsif (not $has_tfoot and
2889     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2890     $phase = 'after tfoot';
2891     $has_tfoot = 1;
2892     } else {
2893     $self->{onerror}->(node => $node, type => 'element not allowed');
2894     }
2895     } elsif ($phase eq 'in trs') {
2896     if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2897     #$phase = 'in trs';
2898     } elsif (not $has_tfoot and
2899     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2900     $phase = 'after tfoot';
2901     $has_tfoot = 1;
2902     } else {
2903     $self->{onerror}->(node => $node, type => 'element not allowed');
2904     }
2905     } elsif ($phase eq 'after thead') {
2906     if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2907     $phase = 'in tbodys';
2908     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2909     $phase = 'in trs';
2910     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2911     $phase = 'in tbodys';
2912     $has_tfoot = 1;
2913     } else {
2914     $self->{onerror}->(node => $node, type => 'element not allowed');
2915     }
2916     } elsif ($phase eq 'in colgroup') {
2917     if ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2918     $phase = 'in colgroup';
2919     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2920     $phase = 'after thead';
2921     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2922     $phase = 'in tbodys';
2923     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2924     $phase = 'in trs';
2925     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2926     $phase = 'in tbodys';
2927     $has_tfoot = 1;
2928     } else {
2929     $self->{onerror}->(node => $node, type => 'element not allowed');
2930     }
2931     } elsif ($phase eq 'before caption') {
2932     if ($node_ns eq $HTML_NS and $node_ln eq 'caption') {
2933     $phase = 'in colgroup';
2934     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2935     $phase = 'in colgroup';
2936     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2937     $phase = 'after thead';
2938     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2939     $phase = 'in tbodys';
2940     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2941     $phase = 'in trs';
2942     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2943     $phase = 'in tbodys';
2944     $has_tfoot = 1;
2945     } else {
2946     $self->{onerror}->(node => $node, type => 'element not allowed');
2947     }
2948     } else { # after tfoot
2949     $self->{onerror}->(node => $node, type => 'element not allowed');
2950     }
2951     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2952     unshift @nodes, @$sib;
2953     push @$new_todos, @$ch;
2954     } elsif ($nt == 3 or $nt == 4) {
2955     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2956     $self->{onerror}->(node => $node, type => 'character not allowed');
2957 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
2958 wakaba 1.1 }
2959     } elsif ($nt == 5) {
2960     unshift @nodes, @{$node->child_nodes};
2961     }
2962     }
2963    
2964     ## Table model errors
2965     require Whatpm::HTMLTable;
2966     Whatpm::HTMLTable->form_table ($todo->{node}, sub {
2967     my %opt = @_;
2968     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
2969     });
2970     push @{$self->{return}->{table}}, $todo->{node};
2971    
2972     return ($new_todos);
2973     },
2974     };
2975    
2976     $Element->{$HTML_NS}->{caption} = {
2977     attrs_checker => $GetHTMLAttrsChecker->({}),
2978 wakaba 1.13 checker => $HTMLStrictlyInlineChecker,
2979 wakaba 1.1 };
2980    
2981     $Element->{$HTML_NS}->{colgroup} = {
2982     attrs_checker => $GetHTMLAttrsChecker->({
2983     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2984     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
2985     ## TODO: "attribute not supported" if |col|.
2986     ## ISSUE: MUST NOT if any |col|?
2987     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
2988     }),
2989     checker => sub {
2990     my ($self, $todo) = @_;
2991     my $el = $todo->{node};
2992     my $new_todos = [];
2993     my @nodes = (@{$el->child_nodes});
2994    
2995     while (@nodes) {
2996     my $node = shift @nodes;
2997     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2998    
2999     my $nt = $node->node_type;
3000     if ($nt == 1) {
3001     my $node_ns = $node->namespace_uri;
3002     $node_ns = '' unless defined $node_ns;
3003     my $node_ln = $node->manakai_local_name;
3004     ## NOTE: |minuses| list is not checked since redundant
3005 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3006     #
3007     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'col')) {
3008 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
3009     }
3010     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3011     unshift @nodes, @$sib;
3012     push @$new_todos, @$ch;
3013     } elsif ($nt == 3 or $nt == 4) {
3014     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3015     $self->{onerror}->(node => $node, type => 'character not allowed');
3016 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3017 wakaba 1.1 }
3018     } elsif ($nt == 5) {
3019     unshift @nodes, @{$node->child_nodes};
3020     }
3021     }
3022     return ($new_todos);
3023     },
3024     };
3025    
3026     $Element->{$HTML_NS}->{col} = {
3027     attrs_checker => $GetHTMLAttrsChecker->({
3028     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3029     }),
3030     checker => $HTMLEmptyChecker,
3031     };
3032    
3033     $Element->{$HTML_NS}->{tbody} = {
3034     attrs_checker => $GetHTMLAttrsChecker->({}),
3035     checker => sub {
3036     my ($self, $todo) = @_;
3037     my $el = $todo->{node};
3038     my $new_todos = [];
3039     my @nodes = (@{$el->child_nodes});
3040    
3041     my $has_tr;
3042     while (@nodes) {
3043     my $node = shift @nodes;
3044     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3045    
3046     my $nt = $node->node_type;
3047     if ($nt == 1) {
3048     my $node_ns = $node->namespace_uri;
3049     $node_ns = '' unless defined $node_ns;
3050     my $node_ln = $node->manakai_local_name;
3051     ## NOTE: |minuses| list is not checked since redundant
3052 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3053     #
3054     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
3055 wakaba 1.1 $has_tr = 1;
3056     } else {
3057     $self->{onerror}->(node => $node, type => 'element not allowed');
3058     }
3059     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3060     unshift @nodes, @$sib;
3061     push @$new_todos, @$ch;
3062     } elsif ($nt == 3 or $nt == 4) {
3063     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3064     $self->{onerror}->(node => $node, type => 'character not allowed');
3065 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3066 wakaba 1.1 }
3067     } elsif ($nt == 5) {
3068     unshift @nodes, @{$node->child_nodes};
3069     }
3070     }
3071     unless ($has_tr) {
3072     $self->{onerror}->(node => $el, type => 'child element missing:tr');
3073     }
3074     return ($new_todos);
3075     },
3076     };
3077    
3078     $Element->{$HTML_NS}->{thead} = {
3079     attrs_checker => $GetHTMLAttrsChecker->({}),
3080     checker => $Element->{$HTML_NS}->{tbody}->{checker},
3081     };
3082    
3083     $Element->{$HTML_NS}->{tfoot} = {
3084     attrs_checker => $GetHTMLAttrsChecker->({}),
3085     checker => $Element->{$HTML_NS}->{tbody}->{checker},
3086     };
3087    
3088     $Element->{$HTML_NS}->{tr} = {
3089     attrs_checker => $GetHTMLAttrsChecker->({}),
3090     checker => sub {
3091     my ($self, $todo) = @_;
3092     my $el = $todo->{node};
3093     my $new_todos = [];
3094     my @nodes = (@{$el->child_nodes});
3095    
3096     my $has_td;
3097     while (@nodes) {
3098     my $node = shift @nodes;
3099     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3100    
3101     my $nt = $node->node_type;
3102     if ($nt == 1) {
3103     my $node_ns = $node->namespace_uri;
3104     $node_ns = '' unless defined $node_ns;
3105     my $node_ln = $node->manakai_local_name;
3106     ## NOTE: |minuses| list is not checked since redundant
3107 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3108     #
3109     } elsif ($node_ns eq $HTML_NS and
3110     ($node_ln eq 'td' or $node_ln eq 'th')) {
3111 wakaba 1.1 $has_td = 1;
3112     } else {
3113     $self->{onerror}->(node => $node, type => 'element not allowed');
3114     }
3115     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3116     unshift @nodes, @$sib;
3117     push @$new_todos, @$ch;
3118     } elsif ($nt == 3 or $nt == 4) {
3119     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3120     $self->{onerror}->(node => $node, type => 'character not allowed');
3121 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3122 wakaba 1.1 }
3123     } elsif ($nt == 5) {
3124     unshift @nodes, @{$node->child_nodes};
3125     }
3126     }
3127     unless ($has_td) {
3128     $self->{onerror}->(node => $el, type => 'child element missing:td|th');
3129     }
3130     return ($new_todos);
3131     },
3132     };
3133    
3134     $Element->{$HTML_NS}->{td} = {
3135     attrs_checker => $GetHTMLAttrsChecker->({
3136     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3137     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3138     }),
3139     checker => $HTMLBlockOrInlineChecker,
3140     };
3141    
3142     $Element->{$HTML_NS}->{th} = {
3143     attrs_checker => $GetHTMLAttrsChecker->({
3144     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3145     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3146     scope => $GetHTMLEnumeratedAttrChecker
3147     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
3148     }),
3149     checker => $HTMLBlockOrInlineChecker,
3150     };
3151    
3152     ## TODO: forms
3153 wakaba 1.8 ## TODO: Tests for <nest/> in form elements
3154 wakaba 1.1
3155     $Element->{$HTML_NS}->{script} = {
3156 wakaba 1.9 attrs_checker => $GetHTMLAttrsChecker->({
3157 wakaba 1.1 src => $HTMLURIAttrChecker,
3158     defer => $GetHTMLBooleanAttrChecker->('defer'),
3159     async => $GetHTMLBooleanAttrChecker->('async'),
3160     type => $HTMLIMTAttrChecker,
3161 wakaba 1.9 }),
3162 wakaba 1.1 checker => sub {
3163     my ($self, $todo) = @_;
3164    
3165     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
3166     return $HTMLEmptyChecker->($self, $todo);
3167     } else {
3168     ## NOTE: No content model conformance in HTML5 spec.
3169     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
3170     my $language = $todo->{node}->get_attribute_ns (undef, 'language');
3171     if ((defined $type and $type eq '') or
3172     (defined $language and $language eq '')) {
3173     $type = 'text/javascript';
3174     } elsif (defined $type) {
3175     #
3176     } elsif (defined $language) {
3177     $type = 'text/' . $language;
3178     } else {
3179     $type = 'text/javascript';
3180     }
3181     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
3182     type => 'script:'.$type); ## TODO: $type normalization
3183     return $AnyChecker->($self, $todo);
3184     }
3185     },
3186     };
3187 wakaba 1.25 ## ISSUE: Significant check and text child node
3188 wakaba 1.1
3189     ## NOTE: When script is disabled.
3190     $Element->{$HTML_NS}->{noscript} = {
3191 wakaba 1.3 attrs_checker => sub {
3192     my ($self, $todo) = @_;
3193    
3194     ## NOTE: This check is inserted in |attrs_checker|, rather than |checker|,
3195     ## since the later is not invoked when the |noscript| is used as a
3196     ## transparent element.
3197     unless ($todo->{node}->owner_document->manakai_is_html) {
3198     $self->{onerror}->(node => $todo->{node}, type => 'in XML:noscript');
3199     }
3200    
3201     $GetHTMLAttrsChecker->({})->($self, $todo);
3202     },
3203 wakaba 1.1 checker => sub {
3204     my ($self, $todo) = @_;
3205    
3206 wakaba 1.3 if ($todo->{flag}->{in_head}) {
3207     my $new_todos = [];
3208     my @nodes = (@{$todo->{node}->child_nodes});
3209    
3210     while (@nodes) {
3211     my $node = shift @nodes;
3212     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3213    
3214     my $nt = $node->node_type;
3215     if ($nt == 1) {
3216     my $node_ns = $node->namespace_uri;
3217     $node_ns = '' unless defined $node_ns;
3218     my $node_ln = $node->manakai_local_name;
3219 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3220     #
3221     } elsif ($node_ns eq $HTML_NS) {
3222 wakaba 1.3 if ({link => 1, style => 1}->{$node_ln}) {
3223     #
3224     } elsif ($node_ln eq 'meta') {
3225 wakaba 1.5 if ($node->has_attribute_ns (undef, 'name')) {
3226     #
3227 wakaba 1.3 } else {
3228 wakaba 1.5 $self->{onerror}->(node => $node,
3229     type => 'element not allowed');
3230 wakaba 1.3 }
3231     } else {
3232     $self->{onerror}->(node => $node, type => 'element not allowed');
3233     }
3234     } else {
3235     $self->{onerror}->(node => $node, type => 'element not allowed');
3236     }
3237    
3238     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3239     unshift @nodes, @$sib;
3240     push @$new_todos, @$ch;
3241     } elsif ($nt == 3 or $nt == 4) {
3242     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3243     $self->{onerror}->(node => $node, type => 'character not allowed');
3244 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3245 wakaba 1.3 }
3246     } elsif ($nt == 5) {
3247     unshift @nodes, @{$node->child_nodes};
3248     }
3249     }
3250     return ($new_todos);
3251     } else {
3252     my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}});
3253     my ($sib, $ch) = $HTMLBlockOrInlineChecker->($self, $todo);
3254     push @$sib, $end;
3255     return ($sib, $ch);
3256     }
3257 wakaba 1.1 },
3258     };
3259 wakaba 1.3
3260     ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
3261 wakaba 1.1
3262     $Element->{$HTML_NS}->{'event-source'} = {
3263     attrs_checker => $GetHTMLAttrsChecker->({
3264     src => $HTMLURIAttrChecker,
3265     }),
3266     checker => $HTMLEmptyChecker,
3267     };
3268    
3269     $Element->{$HTML_NS}->{details} = {
3270     attrs_checker => $GetHTMLAttrsChecker->({
3271     open => $GetHTMLBooleanAttrChecker->('open'),
3272     }),
3273     checker => sub {
3274     my ($self, $todo) = @_;
3275    
3276     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
3277     my ($sib, $ch)
3278     = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
3279     ->($self, $todo);
3280     push @$sib, $end;
3281     return ($sib, $ch);
3282     },
3283     };
3284    
3285     $Element->{$HTML_NS}->{datagrid} = {
3286     attrs_checker => $GetHTMLAttrsChecker->({
3287     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3288     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
3289     }),
3290     checker => sub {
3291     my ($self, $todo) = @_;
3292     my $el = $todo->{node};
3293     my $new_todos = [];
3294     my @nodes = (@{$el->child_nodes});
3295    
3296 wakaba 1.25 my $old_values = {significant =>
3297     $todo->{flag}->{has_descendant}->{significant}};
3298     $todo->{flag}->{has_descendant}->{significant} = 0;
3299    
3300 wakaba 1.1 my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
3301    
3302     ## Block-table Block* | table | select | datalist | Empty
3303     my $mode = 'any';
3304     while (@nodes) {
3305     my $node = shift @nodes;
3306     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3307    
3308     my $nt = $node->node_type;
3309     if ($nt == 1) {
3310     my $node_ns = $node->namespace_uri;
3311     $node_ns = '' unless defined $node_ns;
3312     my $node_ln = $node->manakai_local_name;
3313     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
3314 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3315     #
3316     } elsif ($mode eq 'block') {
3317 wakaba 1.1 $not_allowed = 1
3318     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
3319     } elsif ($mode eq 'any') {
3320     if ($node_ns eq $HTML_NS and
3321     {table => 1, select => 1, datalist => 1}->{$node_ln}) {
3322     $mode = 'none';
3323     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
3324     $mode = 'block';
3325     } else {
3326     $not_allowed = 1;
3327     }
3328     } else {
3329     $not_allowed = 1;
3330     }
3331     $self->{onerror}->(node => $node, type => 'element not allowed')
3332     if $not_allowed;
3333     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3334     unshift @nodes, @$sib;
3335     push @$new_todos, @$ch;
3336     } elsif ($nt == 3 or $nt == 4) {
3337     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3338     $self->{onerror}->(node => $node, type => 'character not allowed');
3339 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3340 wakaba 1.1 }
3341     } elsif ($nt == 5) {
3342     unshift @nodes, @{$node->child_nodes};
3343     }
3344     }
3345    
3346     push @$new_todos, $end;
3347 wakaba 1.25
3348     push @$new_todos, {
3349     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
3350     old_values => $old_values,
3351 wakaba 1.26 errors => $HTMLSignificantContentErrors,
3352 wakaba 1.25 };
3353    
3354 wakaba 1.1 return ($new_todos);
3355     },
3356     };
3357    
3358     $Element->{$HTML_NS}->{command} = {
3359     attrs_checker => $GetHTMLAttrsChecker->({
3360     checked => $GetHTMLBooleanAttrChecker->('checked'),
3361     default => $GetHTMLBooleanAttrChecker->('default'),
3362     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3363     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
3364     icon => $HTMLURIAttrChecker,
3365     label => sub { }, ## NOTE: No conformance creteria
3366     radiogroup => sub { }, ## NOTE: No conformance creteria
3367     ## NOTE: |title| has special semantics, but no syntactical difference
3368     type => sub {
3369     my ($self, $attr) = @_;
3370     my $value = $attr->value;
3371     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
3372     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
3373     }
3374     },
3375     }),
3376     checker => $HTMLEmptyChecker,
3377     };
3378    
3379     $Element->{$HTML_NS}->{menu} = {
3380     attrs_checker => $GetHTMLAttrsChecker->({
3381     autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
3382     id => sub {
3383     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
3384     my ($self, $attr) = @_;
3385     my $value = $attr->value;
3386     if (length $value > 0) {
3387     if ($self->{id}->{$value}) {
3388     $self->{onerror}->(node => $attr, type => 'duplicate ID');
3389     push @{$self->{id}->{$value}}, $attr;
3390     } else {
3391     $self->{id}->{$value} = [$attr];
3392     }
3393     } else {
3394     ## NOTE: MUST contain at least one character
3395     $self->{onerror}->(node => $attr, type => 'empty attribute value');
3396     }
3397     if ($value =~ /[\x09-\x0D\x20]/) {
3398     $self->{onerror}->(node => $attr, type => 'space in ID');
3399     }
3400     $self->{menu}->{$value} ||= $attr;
3401     ## ISSUE: <menu id=""><p contextmenu=""> match?
3402     },
3403     label => sub { }, ## NOTE: No conformance creteria
3404     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
3405     }),
3406     checker => sub {
3407     my ($self, $todo) = @_;
3408     my $el = $todo->{node};
3409     my $new_todos = [];
3410     my @nodes = (@{$el->child_nodes});
3411 wakaba 1.25
3412     my $old_values = {significant =>
3413     $todo->{flag}->{has_descendant}->{significant}};
3414     $todo->{flag}->{has_descendant}->{significant} = 0;
3415 wakaba 1.1
3416     my $content = 'li or inline';
3417     while (@nodes) {
3418     my $node = shift @nodes;
3419     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3420    
3421     my $nt = $node->node_type;
3422     if ($nt == 1) {
3423     my $node_ns = $node->namespace_uri;
3424     $node_ns = '' unless defined $node_ns;
3425     my $node_ln = $node->manakai_local_name;
3426     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
3427 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3428     #
3429     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'li') {
3430 wakaba 1.1 if ($content eq 'inline') {
3431     $not_allowed = 1;
3432     } elsif ($content eq 'li or inline') {
3433     $content = 'li';
3434     }
3435     } else {
3436     if ($HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
3437     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln}) {
3438     $content = 'inline';
3439     } else {
3440     $not_allowed = 1;
3441     }
3442     }
3443     $self->{onerror}->(node => $node, type => 'element not allowed')
3444     if $not_allowed;
3445     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3446     unshift @nodes, @$sib;
3447     push @$new_todos, @$ch;
3448     } elsif ($nt == 3 or $nt == 4) {
3449     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3450     if ($content eq 'li') {
3451     $self->{onerror}->(node => $node, type => 'character not allowed');
3452     } elsif ($content eq 'li or inline') {
3453     $content = 'inline';
3454     }
3455 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3456 wakaba 1.1 }
3457     } elsif ($nt == 5) {
3458     unshift @nodes, @{$node->child_nodes};
3459     }
3460     }
3461    
3462     for (@$new_todos) {
3463     $_->{inline} = 1;
3464     }
3465 wakaba 1.25
3466     push @$new_todos, {
3467     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
3468     old_values => $old_values,
3469 wakaba 1.26 errors => $HTMLSignificantContentErrors,
3470 wakaba 1.25 };
3471    
3472 wakaba 1.1 return ($new_todos);
3473     },
3474 wakaba 1.8 };
3475    
3476     $Element->{$HTML_NS}->{datatemplate} = {
3477     attrs_checker => $GetHTMLAttrsChecker->({}),
3478     checker => sub {
3479     my ($self, $todo) = @_;
3480     my $el = $todo->{node};
3481     my $new_todos = [];
3482     my @nodes = (@{$el->child_nodes});
3483    
3484     while (@nodes) {
3485     my $node = shift @nodes;
3486     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3487    
3488     my $nt = $node->node_type;
3489     if ($nt == 1) {
3490     my $node_ns = $node->namespace_uri;
3491     $node_ns = '' unless defined $node_ns;
3492     my $node_ln = $node->manakai_local_name;
3493     ## NOTE: |minuses| list is not checked since redundant
3494     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3495     #
3496     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'rule')) {
3497     $self->{onerror}->(node => $node,
3498     type => 'element not allowed:datatemplate');
3499     }
3500     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3501     unshift @nodes, @$sib;
3502     push @$new_todos, @$ch;
3503     } elsif ($nt == 3 or $nt == 4) {
3504     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3505     $self->{onerror}->(node => $node, type => 'character not allowed');
3506 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3507 wakaba 1.8 }
3508     } elsif ($nt == 5) {
3509     unshift @nodes, @{$node->child_nodes};
3510     }
3511     }
3512     return ($new_todos);
3513     },
3514     is_xml_root => 1,
3515     };
3516    
3517     $Element->{$HTML_NS}->{rule} = {
3518     attrs_checker => $GetHTMLAttrsChecker->({
3519 wakaba 1.23 condition => $HTMLSelectorsAttrChecker,
3520 wakaba 1.18 mode => $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker,
3521 wakaba 1.8 }),
3522     checker => sub {
3523     my ($self, $todo) = @_;
3524    
3525     my $end = $self->_add_pluses ({$HTML_NS => {nest => 1}});
3526 wakaba 1.25 my ($sib, $ch) = $HTMLAnyChecker->($self, $todo);
3527 wakaba 1.8 push @$sib, $end;
3528     return ($sib, $ch);
3529     },
3530     ## NOTE: "MAY be anything that, when the parent |datatemplate|
3531     ## is applied to some conforming data, results in a conforming DOM tree.":
3532     ## We don't check against this.
3533     };
3534    
3535     $Element->{$HTML_NS}->{nest} = {
3536     attrs_checker => $GetHTMLAttrsChecker->({
3537 wakaba 1.23 filter => $HTMLSelectorsAttrChecker,
3538     mode => sub {
3539     my ($self, $attr) = @_;
3540     my $value = $attr->value;
3541     if ($value !~ /\A[^\x09-\x0D\x20]+\z/) {
3542     $self->{onerror}->(node => $attr, type => 'mode:syntax error');
3543     }
3544     },
3545 wakaba 1.8 }),
3546     checker => $HTMLEmptyChecker,
3547 wakaba 1.1 };
3548    
3549     $Element->{$HTML_NS}->{legend} = {
3550     attrs_checker => $GetHTMLAttrsChecker->({}),
3551     checker => sub {
3552     my ($self, $todo) = @_;
3553    
3554     my $parent = $todo->{node}->manakai_parent_element;
3555     if (defined $parent) {
3556     my $nsuri = $parent->namespace_uri;
3557     $nsuri = '' unless defined $nsuri;
3558     my $ln = $parent->manakai_local_name;
3559     if ($nsuri eq $HTML_NS and $ln eq 'figure') {
3560     return $HTMLInlineChecker->($self, $todo);
3561     } else {
3562 wakaba 1.13 return $HTMLStrictlyInlineChecker->($self, $todo);
3563 wakaba 1.1 }
3564     } else {
3565     return $HTMLInlineChecker->($self, $todo);
3566     }
3567    
3568     ## ISSUE: Content model is defined only for fieldset/legend,
3569     ## details/legend, and figure/legend.
3570     },
3571     };
3572    
3573     $Element->{$HTML_NS}->{div} = {
3574     attrs_checker => $GetHTMLAttrsChecker->({}),
3575     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
3576     };
3577    
3578     $Element->{$HTML_NS}->{font} = {
3579     attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
3580     checker => $HTMLTransparentChecker,
3581     };
3582    
3583     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
3584    
3585     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24