/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.11 - (hide annotations) (download)
Sun Oct 28 06:35:15 2007 UTC (17 years ago) by wakaba
Branch: MAIN
Changes since 1.10: +3 -3 lines
++ whatpm/Whatpm/CSS/ChangeLog	28 Oct 2007 06:34:48 -0000
2007-10-28  Wakaba  <wakaba@suika.fam.cx>

	* SelectorsSerializer.pm (serialize_test): Namespace prefix
	IS namespace URI for sorting stability (attribute and :not()
	selectors).

	* selectors-object.en.html: Namespace URI cannot be empty.

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7     my $HTMLMetadataElements = {
8     $HTML_NS => {
9     qw/link 1 meta 1 style 1 script 1 event-source 1 command 1 base 1 title 1
10 wakaba 1.8 noscript 1 datatemplate 1
11 wakaba 1.1 /,
12     },
13     };
14    
15     my $HTMLSectioningElements = {
16     $HTML_NS => {qw/body 1 section 1 nav 1 article 1 blockquote 1 aside 1/},
17     };
18    
19     my $HTMLBlockLevelElements = {
20     $HTML_NS => {
21     qw/
22     section 1 nav 1 article 1 blockquote 1 aside 1
23     h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1
24     address 1 p 1 hr 1 dialog 1 pre 1 ol 1 ul 1 dl 1
25     ins 1 del 1 figure 1 map 1 table 1 script 1 noscript 1
26     event-source 1 details 1 datagrid 1 menu 1 div 1 font 1
27 wakaba 1.8 datatemplate 1
28 wakaba 1.1 /,
29     },
30     };
31    
32     my $HTMLStrictlyInlineLevelElements = {
33     $HTML_NS => {
34     qw/
35     br 1 a 1 q 1 cite 1 em 1 strong 1 small 1 m 1 dfn 1 abbr 1
36     time 1 meter 1 progress 1 code 1 var 1 samp 1 kbd 1
37     sub 1 sup 1 span 1 i 1 b 1 bdo 1 ins 1 del 1 img 1
38     iframe 1 embed 1 object 1 video 1 audio 1 canvas 1 area 1
39     script 1 noscript 1 event-source 1 command 1 font 1
40     /,
41     },
42     };
43    
44     my $HTMLStructuredInlineLevelElements = {
45     $HTML_NS => {qw/blockquote 1 pre 1 ol 1 ul 1 dl 1 table 1 menu 1/},
46     };
47    
48     my $HTMLInteractiveElements = {
49     $HTML_NS => {a => 1, details => 1, datagrid => 1},
50     };
51     ## NOTE: |html:a| and |html:datagrid| are not allowed as a descendant
52     ## of interactive elements
53    
54     # my $HTMLTransparentElements : in |Whatpm/ContentChecker.pm|.
55    
56     #my $HTMLSemiTransparentElements = {
57     # $HTML_NS => {qw/video 1 audio 1/},
58     #};
59    
60     my $HTMLEmbededElements = {
61     $HTML_NS => {qw/img 1 iframe 1 embed 1 object 1 video 1 audio 1 canvas 1/},
62     };
63    
64     ## Empty
65     my $HTMLEmptyChecker = sub {
66     my ($self, $todo) = @_;
67     my $el = $todo->{node};
68     my $new_todos = [];
69     my @nodes = (@{$el->child_nodes});
70    
71     while (@nodes) {
72     my $node = shift @nodes;
73     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
74    
75     my $nt = $node->node_type;
76     if ($nt == 1) {
77 wakaba 1.8 my $node_ns = $node->namespace_uri;
78     $node_ns = '' unless defined $node_ns;
79     my $node_ln = $node->manakai_local_name;
80     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
81     #
82     } else {
83     ## NOTE: |minuses| list is not checked since redundant
84     $self->{onerror}->(node => $node, type => 'element not allowed');
85     }
86 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
87     unshift @nodes, @$sib;
88     push @$new_todos, @$ch;
89     } elsif ($nt == 3 or $nt == 4) {
90     if ($node->data =~ /[^\x09-\x0D\x20]/) {
91     $self->{onerror}->(node => $node, type => 'character not allowed');
92     }
93     } elsif ($nt == 5) {
94     unshift @nodes, @{$node->child_nodes};
95     }
96     }
97     return ($new_todos);
98     };
99    
100     ## Text
101     my $HTMLTextChecker = sub {
102     my ($self, $todo) = @_;
103     my $el = $todo->{node};
104     my $new_todos = [];
105     my @nodes = (@{$el->child_nodes});
106    
107     while (@nodes) {
108     my $node = shift @nodes;
109     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
110    
111     my $nt = $node->node_type;
112     if ($nt == 1) {
113 wakaba 1.8 my $node_ns = $node->namespace_uri;
114     $node_ns = '' unless defined $node_ns;
115     my $node_ln = $node->manakai_local_name;
116     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
117     #
118     } else {
119     ## NOTE: |minuses| list is not checked since redundant
120     $self->{onerror}->(node => $node, type => 'element not allowed');
121     }
122 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
123     unshift @nodes, @$sib;
124     push @$new_todos, @$ch;
125     } elsif ($nt == 5) {
126     unshift @nodes, @{$node->child_nodes};
127     }
128     }
129     return ($new_todos);
130     };
131    
132     ## Zero or more |html:style| elements,
133     ## followed by zero or more block-level elements
134     my $HTMLStylableBlockChecker = sub {
135     my ($self, $todo) = @_;
136     my $el = $todo->{node};
137     my $new_todos = [];
138     my @nodes = (@{$el->child_nodes});
139    
140     my $has_non_style;
141     while (@nodes) {
142     my $node = shift @nodes;
143     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
144    
145     my $nt = $node->node_type;
146     if ($nt == 1) {
147     my $node_ns = $node->namespace_uri;
148     $node_ns = '' unless defined $node_ns;
149     my $node_ln = $node->manakai_local_name;
150     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
151     if ($node_ns eq $HTML_NS and $node_ln eq 'style') {
152     $not_allowed = 1 if $has_non_style or
153     not $node->has_attribute_ns (undef, 'scoped');
154     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
155     $has_non_style = 1;
156 wakaba 1.8 } elsif ($self->{pluses}->{$node_ns}->{$node_ln}) {
157     #
158 wakaba 1.1 } else {
159     $has_non_style = 1;
160     $not_allowed = 1;
161     }
162     $self->{onerror}->(node => $node, type => 'element not allowed')
163     if $not_allowed;
164     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
165     unshift @nodes, @$sib;
166     push @$new_todos, @$ch;
167     } elsif ($nt == 3 or $nt == 4) {
168     if ($node->data =~ /[^\x09-\x0D\x20]/) {
169     $self->{onerror}->(node => $node, type => 'character not allowed');
170     }
171     } elsif ($nt == 5) {
172     unshift @nodes, @{$node->child_nodes};
173     }
174     }
175     return ($new_todos);
176     }; # $HTMLStylableBlockChecker
177    
178     ## Zero or more block-level elements
179     my $HTMLBlockChecker = sub {
180     my ($self, $todo) = @_;
181     my $el = $todo->{node};
182     my $new_todos = [];
183     my @nodes = (@{$el->child_nodes});
184    
185     while (@nodes) {
186     my $node = shift @nodes;
187     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
188    
189     my $nt = $node->node_type;
190     if ($nt == 1) {
191     my $node_ns = $node->namespace_uri;
192     $node_ns = '' unless defined $node_ns;
193     my $node_ln = $node->manakai_local_name;
194     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
195     $not_allowed = 1
196 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
197     $self->{pluses}->{$node_ns}->{$node_ln};
198 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
199     if $not_allowed;
200     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
201     unshift @nodes, @$sib;
202     push @$new_todos, @$ch;
203     } elsif ($nt == 3 or $nt == 4) {
204     if ($node->data =~ /[^\x09-\x0D\x20]/) {
205     $self->{onerror}->(node => $node, type => 'character not allowed');
206     }
207     } elsif ($nt == 5) {
208     unshift @nodes, @{$node->child_nodes};
209     }
210     }
211     return ($new_todos);
212     }; # $HTMLBlockChecker
213    
214     ## Inline-level content
215     my $HTMLInlineChecker = sub {
216     my ($self, $todo) = @_;
217     my $el = $todo->{node};
218     my $new_todos = [];
219     my @nodes = (@{$el->child_nodes});
220    
221     while (@nodes) {
222     my $node = shift @nodes;
223     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
224    
225     my $nt = $node->node_type;
226     if ($nt == 1) {
227     my $node_ns = $node->namespace_uri;
228     $node_ns = '' unless defined $node_ns;
229     my $node_ln = $node->manakai_local_name;
230     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
231     $not_allowed = 1
232     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
233 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
234     $self->{pluses}->{$node_ns}->{$node_ln};
235 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
236     if $not_allowed;
237     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
238     unshift @nodes, @$sib;
239     push @$new_todos, @$ch;
240     } elsif ($nt == 5) {
241     unshift @nodes, @{$node->child_nodes};
242     }
243     }
244    
245     for (@$new_todos) {
246     $_->{inline} = 1;
247     }
248     return ($new_todos);
249     }; # $HTMLInlineChecker
250    
251     my $HTMLSignificantInlineChecker = $HTMLInlineChecker;
252     ## TODO: check significant content
253    
254     ## Strictly inline-level content
255     my $HTMLStrictlyInlineChecker = sub {
256     my ($self, $todo) = @_;
257     my $el = $todo->{node};
258     my $new_todos = [];
259     my @nodes = (@{$el->child_nodes});
260    
261     while (@nodes) {
262     my $node = shift @nodes;
263     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
264    
265     my $nt = $node->node_type;
266     if ($nt == 1) {
267     my $node_ns = $node->namespace_uri;
268     $node_ns = '' unless defined $node_ns;
269     my $node_ln = $node->manakai_local_name;
270     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
271     $not_allowed = 1
272 wakaba 1.8 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
273     $self->{pluses}->{$node_ns}->{$node_ln};
274 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
275     if $not_allowed;
276     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
277     unshift @nodes, @$sib;
278     push @$new_todos, @$ch;
279     } elsif ($nt == 5) {
280     unshift @nodes, @{$node->child_nodes};
281     }
282     }
283    
284     for (@$new_todos) {
285     $_->{inline} = 1;
286     $_->{strictly_inline} = 1;
287     }
288     return ($new_todos);
289     }; # $HTMLStrictlyInlineChecker
290    
291     my $HTMLSignificantStrictlyInlineChecker = $HTMLStrictlyInlineChecker;
292     ## TODO: check significant content
293    
294 wakaba 1.8 ## Inline-level or strictly inline-level content
295 wakaba 1.1 my $HTMLInlineOrStrictlyInlineChecker = sub {
296     my ($self, $todo) = @_;
297     my $el = $todo->{node};
298     my $new_todos = [];
299     my @nodes = (@{$el->child_nodes});
300    
301     while (@nodes) {
302     my $node = shift @nodes;
303     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
304    
305     my $nt = $node->node_type;
306     if ($nt == 1) {
307     my $node_ns = $node->namespace_uri;
308     $node_ns = '' unless defined $node_ns;
309     my $node_ln = $node->manakai_local_name;
310     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
311     if ($todo->{strictly_inline}) {
312     $not_allowed = 1
313 wakaba 1.8 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
314     $self->{pluses}->{$node_ns}->{$node_ln};
315 wakaba 1.1 } else {
316     $not_allowed = 1
317     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
318 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
319     $self->{pluses}->{$node_ns}->{$node_ln};
320 wakaba 1.1 }
321     $self->{onerror}->(node => $node, type => 'element not allowed')
322     if $not_allowed;
323     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
324     unshift @nodes, @$sib;
325     push @$new_todos, @$ch;
326     } elsif ($nt == 5) {
327     unshift @nodes, @{$node->child_nodes};
328     }
329     }
330    
331     for (@$new_todos) {
332     $_->{inline} = 1;
333     $_->{strictly_inline} = 1;
334     }
335     return ($new_todos);
336     }; # $HTMLInlineOrStrictlyInlineChecker
337    
338     my $HTMLSignificantInlineOrStrictlyInlineChecker
339     = $HTMLInlineOrStrictlyInlineChecker;
340     ## TODO: check significant content
341    
342 wakaba 1.8 ## Block-level content or inline-level content (i.e. bimorphic content model)
343 wakaba 1.1 my $HTMLBlockOrInlineChecker = sub {
344     my ($self, $todo) = @_;
345     my $el = $todo->{node};
346     my $new_todos = [];
347     my @nodes = (@{$el->child_nodes});
348    
349     my $content = 'block-or-inline'; # or 'block' or 'inline'
350     my @block_not_inline;
351     while (@nodes) {
352     my $node = shift @nodes;
353     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
354    
355 wakaba 1.8 ## ISSUE: It is unclear whether "<rule><div><p/><nest/></div></rule>"
356     ## is conforming or not.
357    
358 wakaba 1.1 my $nt = $node->node_type;
359     if ($nt == 1) {
360     my $node_ns = $node->namespace_uri;
361     $node_ns = '' unless defined $node_ns;
362     my $node_ln = $node->manakai_local_name;
363     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
364     if ($content eq 'block') {
365     $not_allowed = 1
366 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
367     $self->{pluses}->{$node_ns}->{$node_ln};
368 wakaba 1.1 } elsif ($content eq 'inline') {
369     $not_allowed = 1
370     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
371 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
372     $self->{pluses}->{$node_ns}->{$node_ln};
373 wakaba 1.1 } else {
374     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
375     my $is_inline
376     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
377     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
378    
379     push @block_not_inline, $node
380     if $is_block and not $is_inline and not $not_allowed;
381 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
382 wakaba 1.1 $content = 'inline';
383     for (@block_not_inline) {
384     $self->{onerror}->(node => $_, type => 'element not allowed');
385     }
386     $not_allowed = 1 unless $is_inline;
387     }
388     }
389     $self->{onerror}->(node => $node, type => 'element not allowed')
390     if $not_allowed;
391     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
392     unshift @nodes, @$sib;
393     push @$new_todos, @$ch;
394     } elsif ($nt == 3 or $nt == 4) {
395     if ($node->data =~ /[^\x09-\x0D\x20]/) {
396     if ($content eq 'block') {
397     $self->{onerror}->(node => $node, type => 'character not allowed');
398     } else {
399     $content = 'inline';
400     for (@block_not_inline) {
401     $self->{onerror}->(node => $_, type => 'element not allowed');
402     }
403     }
404     }
405     } elsif ($nt == 5) {
406     unshift @nodes, @{$node->child_nodes};
407     }
408     }
409    
410     if ($content eq 'inline') {
411     for (@$new_todos) {
412     $_->{inline} = 1;
413     }
414     }
415     return ($new_todos);
416     };
417    
418     ## Zero or more XXX element, then either block-level or inline-level
419     my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) {
420     my ($elnsuri, $ellname) = @_;
421     return sub {
422     my ($self, $todo) = @_;
423     my $el = $todo->{node};
424     my $new_todos = [];
425     my @nodes = (@{$el->child_nodes});
426    
427     my $has_non_style;
428     my $content = 'block-or-inline'; # or 'block' or 'inline'
429     my @block_not_inline;
430     while (@nodes) {
431     my $node = shift @nodes;
432     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
433    
434     my $nt = $node->node_type;
435     if ($nt == 1) {
436     my $node_ns = $node->namespace_uri;
437     $node_ns = '' unless defined $node_ns;
438     my $node_ln = $node->manakai_local_name;
439     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
440     if ($node_ns eq $elnsuri and $node_ln eq $ellname) {
441     $not_allowed = 1 if $has_non_style;
442     if ($ellname eq 'style' and
443     not $node->has_attribute_ns (undef, 'scoped')) {
444     $not_allowed = 1;
445     }
446     } elsif ($content eq 'block') {
447     $has_non_style = 1;
448     $not_allowed = 1
449 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
450     $self->{pluses}->{$node_ns}->{$node_ln};
451 wakaba 1.1 } elsif ($content eq 'inline') {
452     $has_non_style = 1;
453     $not_allowed = 1
454     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
455 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
456     $self->{pluses}->{$node_ns}->{$node_ln};
457 wakaba 1.1 } else {
458 wakaba 1.8 $has_non_style = 1 unless $self->{pluses}->{$node_ns}->{$node_ln};
459 wakaba 1.1 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
460     my $is_inline
461     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
462     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
463    
464     push @block_not_inline, $node
465     if $is_block and not $is_inline and not $not_allowed;
466 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
467 wakaba 1.1 $content = 'inline';
468     for (@block_not_inline) {
469     $self->{onerror}->(node => $_, type => 'element not allowed');
470     }
471     $not_allowed = 1 unless $is_inline;
472     }
473     }
474     $self->{onerror}->(node => $node, type => 'element not allowed')
475     if $not_allowed;
476     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
477     unshift @nodes, @$sib;
478     push @$new_todos, @$ch;
479     } elsif ($nt == 3 or $nt == 4) {
480     if ($node->data =~ /[^\x09-\x0D\x20]/) {
481     $has_non_style = 1;
482     if ($content eq 'block') {
483     $self->{onerror}->(node => $node, type => 'character not allowed');
484     } else {
485     $content = 'inline';
486     for (@block_not_inline) {
487     $self->{onerror}->(node => $_, type => 'element not allowed');
488     }
489     }
490     }
491     } elsif ($nt == 5) {
492     unshift @nodes, @{$node->child_nodes};
493     }
494     }
495    
496     if ($content eq 'inline') {
497     for (@$new_todos) {
498     $_->{inline} = 1;
499     }
500     }
501     return ($new_todos);
502     };
503     }; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker
504    
505     my $HTMLTransparentChecker = $HTMLBlockOrInlineChecker;
506    
507     our $AttrChecker;
508    
509     my $GetHTMLEnumeratedAttrChecker = sub {
510     my $states = shift; # {value => conforming ? 1 : -1}
511     return sub {
512     my ($self, $attr) = @_;
513     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
514     if ($states->{$value} > 0) {
515     #
516     } elsif ($states->{$value}) {
517     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
518     } else {
519     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
520     }
521     };
522     }; # $GetHTMLEnumeratedAttrChecker
523    
524     my $GetHTMLBooleanAttrChecker = sub {
525     my $local_name = shift;
526     return sub {
527     my ($self, $attr) = @_;
528     my $value = $attr->value;
529     unless ($value eq $local_name or $value eq '') {
530     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
531     }
532     };
533     }; # $GetHTMLBooleanAttrChecker
534    
535 wakaba 1.8 ## Unordered set of space-separated tokens
536     my $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker = sub {
537     my ($self, $attr) = @_;
538     my %word;
539     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
540     unless ($word{$word}) {
541     $word{$word} = 1;
542     } else {
543     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
544     }
545     }
546     }; # $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker
547    
548 wakaba 1.1 ## |rel| attribute (unordered set of space separated tokens,
549     ## whose allowed values are defined by the section on link types)
550     my $HTMLLinkTypesAttrChecker = sub {
551 wakaba 1.4 my ($a_or_area, $todo, $self, $attr) = @_;
552 wakaba 1.1 my %word;
553     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
554     unless ($word{$word}) {
555     $word{$word} = 1;
556     } else {
557     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
558     }
559     }
560     ## NOTE: Case sensitive match (since HTML5 spec does not say link
561     ## types are case-insensitive and it says "The value should not
562     ## be confusingly similar to any other defined value (e.g.
563     ## differing only in case).").
564     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
565     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
566     ## values to be used conformingly.
567     require Whatpm::_LinkTypeList;
568     our $LinkType;
569     for my $word (keys %word) {
570     my $def = $LinkType->{$word};
571     if (defined $def) {
572     if ($def->{status} eq 'accepted') {
573     if (defined $def->{effect}->[$a_or_area]) {
574     #
575     } else {
576     $self->{onerror}->(node => $attr,
577     type => 'link type:bad context:'.$word);
578     }
579     } elsif ($def->{status} eq 'proposal') {
580     $self->{onerror}->(node => $attr, level => 's',
581     type => 'link type:proposed:'.$word);
582     } else { # rejected or synonym
583     $self->{onerror}->(node => $attr,
584     type => 'link type:non-conforming:'.$word);
585     }
586 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
587     if ($word eq 'alternate') {
588     #
589     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
590     $todo->{has_hyperlink_link_type} = 1;
591     }
592     }
593 wakaba 1.1 if ($def->{unique}) {
594     unless ($self->{has_link_type}->{$word}) {
595     $self->{has_link_type}->{$word} = 1;
596     } else {
597     $self->{onerror}->(node => $attr,
598     type => 'link type:duplicate:'.$word);
599     }
600     }
601     } else {
602     $self->{onerror}->(node => $attr, level => 'unsupported',
603     type => 'link type:'.$word);
604     }
605     }
606 wakaba 1.4 $todo->{has_hyperlink_link_type} = 1
607     if $word{alternate} and not $word{stylesheet};
608 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
609     ## says that using both X-Pingback: header field and HTML
610     ## <link rel=pingback> is deprecated and if both appears they
611     ## SHOULD contain exactly the same value.
612     ## ISSUE: Pingback 1.0 specification defines the exact representation
613     ## of its link element, which cannot be tested by the current arch.
614     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
615     ## include any string that matches to the pattern for the rel=pingback link,
616     ## which again inpossible to test.
617     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
618     }; # $HTMLLinkTypesAttrChecker
619    
620     ## URI (or IRI)
621     my $HTMLURIAttrChecker = sub {
622     my ($self, $attr) = @_;
623     ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
624     my $value = $attr->value;
625     Whatpm::URIChecker->check_iri_reference ($value, sub {
626     my %opt = @_;
627     $self->{onerror}->(node => $attr, level => $opt{level},
628     type => 'URI::'.$opt{type}.
629     (defined $opt{position} ? ':'.$opt{position} : ''));
630     });
631 wakaba 1.4 $self->{has_uri_attr} = 1;
632 wakaba 1.1 }; # $HTMLURIAttrChecker
633    
634     ## A space separated list of one or more URIs (or IRIs)
635     my $HTMLSpaceURIsAttrChecker = sub {
636     my ($self, $attr) = @_;
637     my $i = 0;
638     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
639     Whatpm::URIChecker->check_iri_reference ($value, sub {
640     my %opt = @_;
641     $self->{onerror}->(node => $attr, level => $opt{level},
642 wakaba 1.2 type => 'URIs:'.':'.
643     $opt{type}.':'.$i.
644 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
645     });
646     $i++;
647     }
648     ## ISSUE: Relative references?
649     ## ISSUE: Leading or trailing white spaces are conformant?
650     ## ISSUE: A sequence of white space characters are conformant?
651     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
652     ## NOTE: Duplication seems not an error.
653 wakaba 1.4 $self->{has_uri_attr} = 1;
654 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
655    
656     my $HTMLDatetimeAttrChecker = sub {
657     my ($self, $attr) = @_;
658     my $value = $attr->value;
659     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
660     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
661     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
662     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
663     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
664     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
665     if $d < 1 or
666     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
667     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
668     if $M == 2 and $d == 29 and
669     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
670     } else {
671     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
672     }
673     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
674     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
675     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
676     if defined $s and $s > 59;
677     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
678     if $zh > 23;
679     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
680     if $zm > 59;
681     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
682     } else {
683     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
684     }
685     }; # $HTMLDatetimeAttrChecker
686    
687     my $HTMLIntegerAttrChecker = sub {
688     my ($self, $attr) = @_;
689     my $value = $attr->value;
690     unless ($value =~ /\A-?[0-9]+\z/) {
691     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
692     }
693     }; # $HTMLIntegerAttrChecker
694    
695     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
696     my $range_check = shift;
697     return sub {
698     my ($self, $attr) = @_;
699     my $value = $attr->value;
700     if ($value =~ /\A[0-9]+\z/) {
701     unless ($range_check->($value + 0)) {
702     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
703     }
704     } else {
705     $self->{onerror}->(node => $attr,
706     type => 'nninteger:syntax error');
707     }
708     };
709     }; # $GetHTMLNonNegativeIntegerAttrChecker
710    
711     my $GetHTMLFloatingPointNumberAttrChecker = sub {
712     my $range_check = shift;
713     return sub {
714     my ($self, $attr) = @_;
715     my $value = $attr->value;
716     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
717     unless ($range_check->($value + 0)) {
718     $self->{onerror}->(node => $attr, type => 'float:out of range');
719     }
720     } else {
721     $self->{onerror}->(node => $attr,
722     type => 'float:syntax error');
723     }
724     };
725     }; # $GetHTMLFloatingPointNumberAttrChecker
726    
727     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
728     ## ISSUE: RFC 2046 does not define syntax of media types.
729     ## ISSUE: The definition of "a valid MIME type" is unknown.
730     ## Syntactical correctness?
731     my $HTMLIMTAttrChecker = sub {
732     my ($self, $attr) = @_;
733     my $value = $attr->value;
734     ## ISSUE: RFC 2045 Content-Type header field allows insertion
735     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
736     ## ISSUE: RFC 2231 extension? Maybe no.
737     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
738     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
739     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
740     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
741     my @type = ($1, $2);
742     my $param = $3;
743     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
744     if (defined $2) {
745     push @type, $1 => $2;
746     } else {
747     my $n = $1;
748     my $v = $2;
749     $v =~ s/\\(.)/$1/gs;
750     push @type, $n => $v;
751     }
752     }
753     require Whatpm::IMTChecker;
754     Whatpm::IMTChecker->check_imt (sub {
755     my %opt = @_;
756     $self->{onerror}->(node => $attr, level => $opt{level},
757     type => 'IMT:'.$opt{type});
758     }, @type);
759     } else {
760     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
761     }
762     }; # $HTMLIMTAttrChecker
763    
764     my $HTMLLanguageTagAttrChecker = sub {
765 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
766    
767 wakaba 1.1 my ($self, $attr) = @_;
768 wakaba 1.6 my $value = $attr->value;
769     require Whatpm::LangTag;
770     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
771     my %opt = @_;
772     my $type = 'LangTag:'.$opt{type};
773     $type .= ':' . $opt{subtag} if defined $opt{subtag};
774     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
775     level => $opt{level});
776     });
777 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
778 wakaba 1.6
779     ## TODO: testdata
780 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
781    
782     ## "A valid media query [MQ]"
783     my $HTMLMQAttrChecker = sub {
784     my ($self, $attr) = @_;
785     $self->{onerror}->(node => $attr, level => 'unsupported',
786     type => 'media query');
787     ## ISSUE: What is "a valid media query"?
788     }; # $HTMLMQAttrChecker
789    
790     my $HTMLEventHandlerAttrChecker = sub {
791     my ($self, $attr) = @_;
792     $self->{onerror}->(node => $attr, level => 'unsupported',
793     type => 'event handler');
794     ## TODO: MUST contain valid ECMAScript code matching the
795     ## ECMAScript |FunctionBody| production. [ECMA262]
796     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
797     ## ISSUE: Automatic semicolon insertion does not apply?
798     ## ISSUE: Other script languages?
799     }; # $HTMLEventHandlerAttrChecker
800    
801     my $HTMLUsemapAttrChecker = sub {
802     my ($self, $attr) = @_;
803     ## MUST be a valid hashed ID reference to a |map| element
804     my $value = $attr->value;
805     if ($value =~ s/^#//) {
806     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
807     push @{$self->{usemap}}, [$value => $attr];
808     } else {
809     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
810     }
811     ## NOTE: Space characters in hashed ID references are conforming.
812     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
813     }; # $HTMLUsemapAttrChecker
814    
815     my $HTMLTargetAttrChecker = sub {
816     my ($self, $attr) = @_;
817     my $value = $attr->value;
818     if ($value =~ /^_/) {
819     $value = lc $value; ## ISSUE: ASCII case-insentitive?
820     unless ({
821     _self => 1, _parent => 1, _top => 1,
822     }->{$value}) {
823     $self->{onerror}->(node => $attr,
824     type => 'reserved browsing context name');
825     }
826     } else {
827     #$ ISSUE: An empty string is conforming?
828     }
829     }; # $HTMLTargetAttrChecker
830    
831     my $HTMLAttrChecker = {
832     id => sub {
833     ## NOTE: |map| has its own variant of |id=""| checker
834     my ($self, $attr) = @_;
835     my $value = $attr->value;
836     if (length $value > 0) {
837     if ($self->{id}->{$value}) {
838     $self->{onerror}->(node => $attr, type => 'duplicate ID');
839     push @{$self->{id}->{$value}}, $attr;
840     } else {
841     $self->{id}->{$value} = [$attr];
842     }
843     if ($value =~ /[\x09-\x0D\x20]/) {
844     $self->{onerror}->(node => $attr, type => 'space in ID');
845     }
846     } else {
847     ## NOTE: MUST contain at least one character
848     $self->{onerror}->(node => $attr, type => 'empty attribute value');
849     }
850     },
851     title => sub {}, ## NOTE: No conformance creteria
852     lang => sub {
853     my ($self, $attr) = @_;
854 wakaba 1.6 my $value = $attr->value;
855     if ($value eq '') {
856     #
857     } else {
858     require Whatpm::LangTag;
859     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
860     my %opt = @_;
861     my $type = 'LangTag:'.$opt{type};
862     $type .= ':' . $opt{subtag} if defined $opt{subtag};
863     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
864     level => $opt{level});
865     });
866     }
867 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
868     unless ($attr->owner_document->manakai_is_html) {
869     $self->{onerror}->(node => $attr, type => 'in XML:lang');
870     }
871 wakaba 1.6
872     ## TODO: test data
873 wakaba 1.1 },
874     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
875     class => sub {
876     my ($self, $attr) = @_;
877     my %word;
878     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
879     unless ($word{$word}) {
880     $word{$word} = 1;
881     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
882     } else {
883     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
884     }
885     }
886     },
887     contextmenu => sub {
888     my ($self, $attr) = @_;
889     my $value = $attr->value;
890     push @{$self->{contextmenu}}, [$value => $attr];
891     ## ISSUE: "The value must be the ID of a menu element in the DOM."
892     ## What is "in the DOM"? A menu Element node that is not part
893     ## of the Document tree is in the DOM? A menu Element node that
894     ## belong to another Document tree is in the DOM?
895     },
896     irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
897 wakaba 1.8 tabindex => $HTMLIntegerAttrChecker
898     ## TODO: ref, template, registrationmark
899 wakaba 1.1 };
900    
901     for (qw/
902     onabort onbeforeunload onblur onchange onclick oncontextmenu
903     ondblclick ondrag ondragend ondragenter ondragleave ondragover
904     ondragstart ondrop onerror onfocus onkeydown onkeypress
905     onkeyup onload onmessage onmousedown onmousemove onmouseout
906     onmouseover onmouseup onmousewheel onresize onscroll onselect
907     onsubmit onunload
908     /) {
909     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
910     }
911    
912     my $GetHTMLAttrsChecker = sub {
913     my $element_specific_checker = shift;
914     return sub {
915     my ($self, $todo) = @_;
916     for my $attr (@{$todo->{node}->attributes}) {
917     my $attr_ns = $attr->namespace_uri;
918     $attr_ns = '' unless defined $attr_ns;
919     my $attr_ln = $attr->manakai_local_name;
920     my $checker;
921     if ($attr_ns eq '') {
922     $checker = $element_specific_checker->{$attr_ln}
923     || $HTMLAttrChecker->{$attr_ln};
924     }
925     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
926     || $AttrChecker->{$attr_ns}->{''};
927     if ($checker) {
928     $checker->($self, $attr, $todo);
929     } else {
930     $self->{onerror}->(node => $attr, level => 'unsupported',
931     type => 'attribute');
932     ## ISSUE: No comformance createria for unknown attributes in the spec
933     }
934     }
935     };
936     }; # $GetHTMLAttrsChecker
937    
938     our $Element;
939     our $ElementDefault;
940     our $AnyChecker;
941    
942     $Element->{$HTML_NS}->{''} = {
943     attrs_checker => $GetHTMLAttrsChecker->({}),
944     checker => $ElementDefault->{checker},
945     };
946    
947     $Element->{$HTML_NS}->{html} = {
948     is_root => 1,
949     attrs_checker => $GetHTMLAttrsChecker->({
950     xmlns => sub {
951     my ($self, $attr) = @_;
952     my $value = $attr->value;
953     unless ($value eq $HTML_NS) {
954     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
955     }
956     unless ($attr->owner_document->manakai_is_html) {
957     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
958     ## TODO: Test
959     }
960     },
961     }),
962     checker => sub {
963     my ($self, $todo) = @_;
964     my $el = $todo->{node};
965     my $new_todos = [];
966     my @nodes = (@{$el->child_nodes});
967    
968     my $phase = 'before head';
969     while (@nodes) {
970     my $node = shift @nodes;
971     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
972    
973     my $nt = $node->node_type;
974     if ($nt == 1) {
975     my $node_ns = $node->namespace_uri;
976     $node_ns = '' unless defined $node_ns;
977     my $node_ln = $node->manakai_local_name;
978     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
979 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
980     #
981     } elsif ($phase eq 'before head') {
982 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'head') {
983     $phase = 'after head';
984     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'body') {
985     $self->{onerror}->(node => $node, type => 'ps element missing:head');
986     $phase = 'after body';
987     } else {
988     $not_allowed = 1;
989     # before head
990     }
991     } elsif ($phase eq 'after head') {
992     if ($node_ns eq $HTML_NS and $node_ln eq 'body') {
993     $phase = 'after body';
994     } else {
995     $not_allowed = 1;
996     # after head
997     }
998     } else { #elsif ($phase eq 'after body') {
999     $not_allowed = 1;
1000     # after body
1001     }
1002     $self->{onerror}->(node => $node, type => 'element not allowed')
1003     if $not_allowed;
1004     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1005     unshift @nodes, @$sib;
1006     push @$new_todos, @$ch;
1007     } elsif ($nt == 3 or $nt == 4) {
1008     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1009     $self->{onerror}->(node => $node, type => 'character not allowed');
1010     }
1011     } elsif ($nt == 5) {
1012     unshift @nodes, @{$node->child_nodes};
1013     }
1014     }
1015    
1016     if ($phase eq 'before head') {
1017     $self->{onerror}->(node => $el, type => 'child element missing:head');
1018     $self->{onerror}->(node => $el, type => 'child element missing:body');
1019     } elsif ($phase eq 'after head') {
1020     $self->{onerror}->(node => $el, type => 'child element missing:body');
1021     }
1022    
1023     return ($new_todos);
1024     },
1025     };
1026    
1027     $Element->{$HTML_NS}->{head} = {
1028     attrs_checker => $GetHTMLAttrsChecker->({}),
1029     checker => sub {
1030     my ($self, $todo) = @_;
1031     my $el = $todo->{node};
1032     my $new_todos = [];
1033     my @nodes = (@{$el->child_nodes});
1034    
1035     my $has_title;
1036     my $phase = 'initial'; # 'after charset', 'after base'
1037     while (@nodes) {
1038     my $node = shift @nodes;
1039     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1040    
1041     my $nt = $node->node_type;
1042     if ($nt == 1) {
1043     my $node_ns = $node->namespace_uri;
1044     $node_ns = '' unless defined $node_ns;
1045     my $node_ln = $node->manakai_local_name;
1046     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
1047 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1048     #
1049     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'title') {
1050 wakaba 1.1 $phase = 'after base';
1051     unless ($has_title) {
1052     $has_title = 1;
1053     } else {
1054     $not_allowed = 1;
1055     }
1056     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'meta') {
1057     if ($node->has_attribute_ns (undef, 'charset')) {
1058     if ($phase eq 'initial') {
1059     $phase = 'after charset';
1060     } else {
1061     $not_allowed = 1;
1062     ## NOTE: See also |base|'s "contexts" field in the spec
1063     }
1064 wakaba 1.5 } elsif ($node->has_attribute_ns (undef, 'name') or
1065     $node->has_attribute_ns (undef, 'http-equiv')) {
1066     $phase = 'after base';
1067 wakaba 1.1 } else {
1068     $phase = 'after base';
1069 wakaba 1.5 $not_allowed = 1;
1070 wakaba 1.1 }
1071     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'base') {
1072     if ($phase eq 'initial' or $phase eq 'after charset') {
1073     $phase = 'after base';
1074     } else {
1075     $not_allowed = 1;
1076     }
1077     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'style') {
1078     $phase = 'after base';
1079     if ($node->has_attribute_ns (undef, 'scoped')) {
1080     $not_allowed = 1;
1081     }
1082     } elsif ($HTMLMetadataElements->{$node_ns}->{$node_ln}) {
1083     $phase = 'after base';
1084     } else {
1085     $not_allowed = 1;
1086     }
1087     $self->{onerror}->(node => $node, type => 'element not allowed')
1088     if $not_allowed;
1089 wakaba 1.3 local $todo->{flag}->{in_head} = 1;
1090 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1091     unshift @nodes, @$sib;
1092     push @$new_todos, @$ch;
1093     } elsif ($nt == 3 or $nt == 4) {
1094     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1095     $self->{onerror}->(node => $node, type => 'character not allowed');
1096     }
1097     } elsif ($nt == 5) {
1098     unshift @nodes, @{$node->child_nodes};
1099     }
1100     }
1101     unless ($has_title) {
1102     $self->{onerror}->(node => $el, type => 'child element missing:title');
1103     }
1104     return ($new_todos);
1105     },
1106     };
1107    
1108     $Element->{$HTML_NS}->{title} = {
1109     attrs_checker => $GetHTMLAttrsChecker->({}),
1110     checker => $HTMLTextChecker,
1111     };
1112    
1113     $Element->{$HTML_NS}->{base} = {
1114 wakaba 1.4 attrs_checker => sub {
1115     my ($self, $todo) = @_;
1116    
1117     if ($self->{has_uri_attr} and
1118     $todo->{node}->has_attribute_ns (undef, 'href')) {
1119     ## ISSUE: Are these examples conforming?
1120     ## <head profile="a b c"><base href> (except for |profile|'s
1121     ## non-conformance)
1122     ## <title xml:base="relative"/><base href/> (maybe it should be)
1123     ## <unknown xmlns="relative"/><base href/> (assuming that
1124     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1125     ## <?xml-stylesheet href="relative"?>...<base href=""/>
1126     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1127     ## <style>@import 'relative';</style><base href>
1128     ## <script>location.href = 'relative';</script><base href>
1129     $self->{onerror}->(node => $todo->{node},
1130     type => 'basehref after URI attribute');
1131     }
1132     if ($self->{has_hyperlink_element} and
1133     $todo->{node}->has_attribute_ns (undef, 'target')) {
1134     ## ISSUE: Are these examples conforming?
1135     ## <head><title xlink:href=""/><base target="name"/></head>
1136     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1137     ## (assuming that |xbl:xbl| is allowed before |base|)
1138     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1139     ## <link href=""/><base target="name"/>
1140     ## <link rel=unknown href=""><base target=name>
1141     $self->{onerror}->(node => $todo->{node},
1142     type => 'basetarget after hyperlink');
1143     }
1144    
1145     return $GetHTMLAttrsChecker->({
1146     href => $HTMLURIAttrChecker,
1147     target => $HTMLTargetAttrChecker,
1148     })->($self, $todo);
1149     },
1150 wakaba 1.1 checker => $HTMLEmptyChecker,
1151     };
1152    
1153     $Element->{$HTML_NS}->{link} = {
1154     attrs_checker => sub {
1155     my ($self, $todo) = @_;
1156     $GetHTMLAttrsChecker->({
1157     href => $HTMLURIAttrChecker,
1158 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(0, $todo, @_) },
1159 wakaba 1.1 media => $HTMLMQAttrChecker,
1160     hreflang => $HTMLLanguageTagAttrChecker,
1161     type => $HTMLIMTAttrChecker,
1162     ## NOTE: Though |title| has special semantics,
1163     ## syntactically same as the |title| as global attribute.
1164     })->($self, $todo);
1165 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
1166     $self->{has_hyperlink_element} = 1 if $todo->{has_hyperlink_link_type};
1167     } else {
1168 wakaba 1.1 $self->{onerror}->(node => $todo->{node},
1169     type => 'attribute missing:href');
1170     }
1171     unless ($todo->{node}->has_attribute_ns (undef, 'rel')) {
1172     $self->{onerror}->(node => $todo->{node},
1173     type => 'attribute missing:rel');
1174     }
1175     },
1176     checker => $HTMLEmptyChecker,
1177     };
1178    
1179     $Element->{$HTML_NS}->{meta} = {
1180     attrs_checker => sub {
1181     my ($self, $todo) = @_;
1182     my $name_attr;
1183     my $http_equiv_attr;
1184     my $charset_attr;
1185     my $content_attr;
1186     for my $attr (@{$todo->{node}->attributes}) {
1187     my $attr_ns = $attr->namespace_uri;
1188     $attr_ns = '' unless defined $attr_ns;
1189     my $attr_ln = $attr->manakai_local_name;
1190     my $checker;
1191     if ($attr_ns eq '') {
1192     if ($attr_ln eq 'content') {
1193     $content_attr = $attr;
1194     $checker = 1;
1195     } elsif ($attr_ln eq 'name') {
1196     $name_attr = $attr;
1197     $checker = 1;
1198     } elsif ($attr_ln eq 'http-equiv') {
1199     $http_equiv_attr = $attr;
1200     $checker = 1;
1201     } elsif ($attr_ln eq 'charset') {
1202     $charset_attr = $attr;
1203     $checker = 1;
1204     } else {
1205     $checker = $HTMLAttrChecker->{$attr_ln}
1206     || $AttrChecker->{$attr_ns}->{$attr_ln}
1207     || $AttrChecker->{$attr_ns}->{''};
1208     }
1209     } else {
1210     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1211     || $AttrChecker->{$attr_ns}->{''};
1212     }
1213     if ($checker) {
1214     $checker->($self, $attr) if ref $checker;
1215     } else {
1216     $self->{onerror}->(node => $attr, level => 'unsupported',
1217     type => 'attribute');
1218     ## ISSUE: No comformance createria for unknown attributes in the spec
1219     }
1220     }
1221    
1222     if (defined $name_attr) {
1223     if (defined $http_equiv_attr) {
1224     $self->{onerror}->(node => $http_equiv_attr,
1225     type => 'attribute not allowed');
1226     } elsif (defined $charset_attr) {
1227     $self->{onerror}->(node => $charset_attr,
1228     type => 'attribute not allowed');
1229     }
1230     my $metadata_name = $name_attr->value;
1231     my $metadata_value;
1232     if (defined $content_attr) {
1233     $metadata_value = $content_attr->value;
1234     } else {
1235     $self->{onerror}->(node => $todo->{node},
1236     type => 'attribute missing:content');
1237     $metadata_value = '';
1238     }
1239     } elsif (defined $http_equiv_attr) {
1240     if (defined $charset_attr) {
1241     $self->{onerror}->(node => $charset_attr,
1242     type => 'attribute not allowed');
1243     }
1244     unless (defined $content_attr) {
1245     $self->{onerror}->(node => $todo->{node},
1246     type => 'attribute missing:content');
1247     }
1248     } elsif (defined $charset_attr) {
1249     if (defined $content_attr) {
1250     $self->{onerror}->(node => $content_attr,
1251     type => 'attribute not allowed');
1252     }
1253     } else {
1254     if (defined $content_attr) {
1255     $self->{onerror}->(node => $content_attr,
1256     type => 'attribute not allowed');
1257     $self->{onerror}->(node => $todo->{node},
1258     type => 'attribute missing:name|http-equiv');
1259     } else {
1260     $self->{onerror}->(node => $todo->{node},
1261     type => 'attribute missing:name|http-equiv|charset');
1262     }
1263     }
1264    
1265     ## TODO: metadata conformance
1266    
1267     ## TODO: pragma conformance
1268     if (defined $http_equiv_attr) { ## An enumerated attribute
1269     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1270     if ({
1271     'refresh' => 1,
1272     'default-style' => 1,
1273     }->{$keyword}) {
1274     #
1275     } else {
1276     $self->{onerror}->(node => $http_equiv_attr,
1277     type => 'enumerated:invalid');
1278     }
1279     }
1280    
1281     if (defined $charset_attr) {
1282     unless ($todo->{node}->owner_document->manakai_is_html) {
1283     $self->{onerror}->(node => $charset_attr,
1284     type => 'in XML:charset');
1285     }
1286     ## TODO: charset
1287     }
1288     },
1289     checker => $HTMLEmptyChecker,
1290     };
1291    
1292     $Element->{$HTML_NS}->{style} = {
1293     attrs_checker => $GetHTMLAttrsChecker->({
1294     type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1295     media => $HTMLMQAttrChecker,
1296     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1297     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1298     ## not different
1299     }),
1300     checker => sub {
1301     ## NOTE: |html:style| has no conformance creteria on content model
1302     my ($self, $todo) = @_;
1303     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
1304     $type = 'text/css' unless defined $type;
1305     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
1306     type => 'style:'.$type); ## TODO: $type normalization
1307     return $AnyChecker->($self, $todo);
1308     },
1309     };
1310    
1311     $Element->{$HTML_NS}->{body} = {
1312     attrs_checker => $GetHTMLAttrsChecker->({}),
1313     checker => $HTMLBlockChecker,
1314     };
1315    
1316     $Element->{$HTML_NS}->{section} = {
1317     attrs_checker => $GetHTMLAttrsChecker->({}),
1318     checker => $HTMLStylableBlockChecker,
1319     };
1320    
1321     $Element->{$HTML_NS}->{nav} = {
1322     attrs_checker => $GetHTMLAttrsChecker->({}),
1323     checker => $HTMLBlockOrInlineChecker,
1324     };
1325    
1326     $Element->{$HTML_NS}->{article} = {
1327     attrs_checker => $GetHTMLAttrsChecker->({}),
1328     checker => $HTMLStylableBlockChecker,
1329     };
1330    
1331     $Element->{$HTML_NS}->{blockquote} = {
1332     attrs_checker => $GetHTMLAttrsChecker->({
1333     cite => $HTMLURIAttrChecker,
1334     }),
1335     checker => $HTMLBlockChecker,
1336     };
1337    
1338     $Element->{$HTML_NS}->{aside} = {
1339     attrs_checker => $GetHTMLAttrsChecker->({}),
1340     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
1341     };
1342    
1343     $Element->{$HTML_NS}->{h1} = {
1344     attrs_checker => $GetHTMLAttrsChecker->({}),
1345     checker => sub {
1346     my ($self, $todo) = @_;
1347     $todo->{flag}->{has_heading}->[0] = 1;
1348     return $HTMLSignificantStrictlyInlineChecker->($self, $todo);
1349     },
1350     };
1351    
1352     $Element->{$HTML_NS}->{h2} = {
1353     attrs_checker => $GetHTMLAttrsChecker->({}),
1354     checker => $Element->{$HTML_NS}->{h1}->{checker},
1355     };
1356    
1357     $Element->{$HTML_NS}->{h3} = {
1358     attrs_checker => $GetHTMLAttrsChecker->({}),
1359     checker => $Element->{$HTML_NS}->{h1}->{checker},
1360     };
1361    
1362     $Element->{$HTML_NS}->{h4} = {
1363     attrs_checker => $GetHTMLAttrsChecker->({}),
1364     checker => $Element->{$HTML_NS}->{h1}->{checker},
1365     };
1366    
1367     $Element->{$HTML_NS}->{h5} = {
1368     attrs_checker => $GetHTMLAttrsChecker->({}),
1369     checker => $Element->{$HTML_NS}->{h1}->{checker},
1370     };
1371    
1372     $Element->{$HTML_NS}->{h6} = {
1373     attrs_checker => $GetHTMLAttrsChecker->({}),
1374     checker => $Element->{$HTML_NS}->{h1}->{checker},
1375     };
1376    
1377     $Element->{$HTML_NS}->{header} = {
1378     attrs_checker => $GetHTMLAttrsChecker->({}),
1379     checker => sub {
1380     my ($self, $todo) = @_;
1381     my $old_flag = $todo->{flag}->{has_heading} || [];
1382     my $new_flag = [];
1383     local $todo->{flag}->{has_heading} = $new_flag;
1384     my $node = $todo->{node};
1385    
1386     my $end = $self->_add_minuses
1387     ({$HTML_NS => {qw/header 1 footer 1/}},
1388     $HTMLSectioningElements);
1389     my ($new_todos, $ch) = $HTMLBlockChecker->($self, $todo);
1390     push @$new_todos, $end,
1391     {type => 'code', code => sub {
1392     if ($new_flag->[0]) {
1393     $old_flag->[0] = 1;
1394     } else {
1395     $self->{onerror}->(node => $node, type => 'element missing:hn');
1396     }
1397     }};
1398     return ($new_todos, $ch);
1399     },
1400     };
1401    
1402     $Element->{$HTML_NS}->{footer} = {
1403     attrs_checker => $GetHTMLAttrsChecker->({}),
1404     checker => sub { ## block -hn -header -footer -sectioning or inline
1405     my ($self, $todo) = @_;
1406     my $el = $todo->{node};
1407     my $new_todos = [];
1408     my @nodes = (@{$el->child_nodes});
1409    
1410     my $content = 'block-or-inline'; # or 'block' or 'inline'
1411     my @block_not_inline;
1412     while (@nodes) {
1413     my $node = shift @nodes;
1414     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1415    
1416     my $nt = $node->node_type;
1417     if ($nt == 1) {
1418     my $node_ns = $node->namespace_uri;
1419     $node_ns = '' unless defined $node_ns;
1420     my $node_ln = $node->manakai_local_name;
1421     my $not_allowed;
1422     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
1423     $not_allowed = 1;
1424     } elsif ($node_ns eq $HTML_NS and
1425     {
1426     qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1/
1427     }->{$node_ln}) {
1428     $not_allowed = 1;
1429     } elsif ($HTMLSectioningElements->{$node_ns}->{$node_ln}) {
1430     $not_allowed = 1;
1431     }
1432     if ($content eq 'block') {
1433     $not_allowed = 1
1434 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
1435     $self->{pluses}->{$node_ns}->{$node_ln};
1436 wakaba 1.1 } elsif ($content eq 'inline') {
1437     $not_allowed = 1
1438     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
1439 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
1440     $self->{pluses}->{$node_ns}->{$node_ln};
1441 wakaba 1.1 } else {
1442     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1443     my $is_inline
1444     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
1445     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1446    
1447     push @block_not_inline, $node
1448     if $is_block and not $is_inline and not $not_allowed;
1449 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
1450 wakaba 1.1 $content = 'inline';
1451     for (@block_not_inline) {
1452     $self->{onerror}->(node => $_, type => 'element not allowed');
1453     }
1454     $not_allowed = 1 unless $is_inline;
1455     }
1456     }
1457     $self->{onerror}->(node => $node, type => 'element not allowed')
1458     if $not_allowed;
1459     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1460     unshift @nodes, @$sib;
1461     push @$new_todos, @$ch;
1462     } elsif ($nt == 3 or $nt == 4) {
1463     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1464     if ($content eq 'block') {
1465     $self->{onerror}->(node => $node, type => 'character not allowed');
1466     } else {
1467     $content = 'inline';
1468     for (@block_not_inline) {
1469     $self->{onerror}->(node => $_, type => 'element not allowed');
1470     }
1471     }
1472     }
1473     } elsif ($nt == 5) {
1474     unshift @nodes, @{$node->child_nodes};
1475     }
1476     }
1477    
1478     my $end = $self->_add_minuses
1479     ({$HTML_NS => {qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1/}},
1480     $HTMLSectioningElements);
1481     push @$new_todos, $end;
1482    
1483     if ($content eq 'inline') {
1484     for (@$new_todos) {
1485     $_->{inline} = 1;
1486     }
1487     }
1488    
1489     return ($new_todos);
1490     },
1491     };
1492    
1493     $Element->{$HTML_NS}->{address} = {
1494     attrs_checker => $GetHTMLAttrsChecker->({}),
1495     checker => $HTMLInlineChecker,
1496     };
1497    
1498     $Element->{$HTML_NS}->{p} = {
1499     attrs_checker => $GetHTMLAttrsChecker->({}),
1500     checker => $HTMLSignificantInlineChecker,
1501     };
1502    
1503     $Element->{$HTML_NS}->{hr} = {
1504     attrs_checker => $GetHTMLAttrsChecker->({}),
1505     checker => $HTMLEmptyChecker,
1506     };
1507    
1508     $Element->{$HTML_NS}->{br} = {
1509     attrs_checker => $GetHTMLAttrsChecker->({}),
1510     checker => $HTMLEmptyChecker,
1511     };
1512    
1513     $Element->{$HTML_NS}->{dialog} = {
1514     attrs_checker => $GetHTMLAttrsChecker->({}),
1515     checker => sub {
1516     my ($self, $todo) = @_;
1517     my $el = $todo->{node};
1518     my $new_todos = [];
1519     my @nodes = (@{$el->child_nodes});
1520    
1521     my $phase = 'before dt';
1522     while (@nodes) {
1523     my $node = shift @nodes;
1524     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1525    
1526     my $nt = $node->node_type;
1527     if ($nt == 1) {
1528     my $node_ns = $node->namespace_uri;
1529     $node_ns = '' unless defined $node_ns;
1530     my $node_ln = $node->manakai_local_name;
1531     ## NOTE: |minuses| list is not checked since redundant
1532 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1533     #
1534     } elsif ($phase eq 'before dt') {
1535 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1536     $phase = 'before dd';
1537     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1538     $self->{onerror}
1539     ->(node => $node, type => 'ps element missing:dt');
1540     $phase = 'before dt';
1541     } else {
1542     $self->{onerror}->(node => $node, type => 'element not allowed');
1543     }
1544     } else { # before dd
1545     if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1546     $phase = 'before dt';
1547     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1548     $self->{onerror}
1549     ->(node => $node, type => 'ps element missing:dd');
1550     $phase = 'before dd';
1551     } else {
1552     $self->{onerror}->(node => $node, type => 'element not allowed');
1553     }
1554     }
1555     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1556     unshift @nodes, @$sib;
1557     push @$new_todos, @$ch;
1558     } elsif ($nt == 3 or $nt == 4) {
1559     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1560     $self->{onerror}->(node => $node, type => 'character not allowed');
1561     }
1562     } elsif ($nt == 5) {
1563     unshift @nodes, @{$node->child_nodes};
1564     }
1565     }
1566     if ($phase eq 'before dd') {
1567 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
1568 wakaba 1.1 }
1569     return ($new_todos);
1570     },
1571     };
1572    
1573     $Element->{$HTML_NS}->{pre} = {
1574     attrs_checker => $GetHTMLAttrsChecker->({}),
1575     checker => $HTMLStrictlyInlineChecker,
1576     };
1577    
1578     $Element->{$HTML_NS}->{ol} = {
1579     attrs_checker => $GetHTMLAttrsChecker->({
1580     start => $HTMLIntegerAttrChecker,
1581     }),
1582     checker => sub {
1583     my ($self, $todo) = @_;
1584     my $el = $todo->{node};
1585     my $new_todos = [];
1586     my @nodes = (@{$el->child_nodes});
1587    
1588     while (@nodes) {
1589     my $node = shift @nodes;
1590     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1591    
1592     my $nt = $node->node_type;
1593     if ($nt == 1) {
1594     my $node_ns = $node->namespace_uri;
1595     $node_ns = '' unless defined $node_ns;
1596     my $node_ln = $node->manakai_local_name;
1597     ## NOTE: |minuses| list is not checked since redundant
1598 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1599     #
1600     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'li')) {
1601 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
1602     }
1603     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1604     unshift @nodes, @$sib;
1605     push @$new_todos, @$ch;
1606     } elsif ($nt == 3 or $nt == 4) {
1607     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1608     $self->{onerror}->(node => $node, type => 'character not allowed');
1609     }
1610     } elsif ($nt == 5) {
1611     unshift @nodes, @{$node->child_nodes};
1612     }
1613     }
1614    
1615     if ($todo->{inline}) {
1616     for (@$new_todos) {
1617     $_->{inline} = 1;
1618     }
1619     }
1620     return ($new_todos);
1621     },
1622     };
1623    
1624     $Element->{$HTML_NS}->{ul} = {
1625     attrs_checker => $GetHTMLAttrsChecker->({}),
1626     checker => $Element->{$HTML_NS}->{ol}->{checker},
1627     };
1628    
1629     $Element->{$HTML_NS}->{li} = {
1630     attrs_checker => $GetHTMLAttrsChecker->({
1631     start => sub {
1632     my ($self, $attr) = @_;
1633     my $parent = $attr->owner_element->manakai_parent_element;
1634     if (defined $parent) {
1635     my $parent_ns = $parent->namespace_uri;
1636     $parent_ns = '' unless defined $parent_ns;
1637     my $parent_ln = $parent->manakai_local_name;
1638     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1639     $self->{onerror}->(node => $attr, level => 'unsupported',
1640     type => 'attribute');
1641     }
1642     }
1643     $HTMLIntegerAttrChecker->($self, $attr);
1644     },
1645     }),
1646     checker => sub {
1647     my ($self, $todo) = @_;
1648     if ($todo->{inline}) {
1649     return $HTMLInlineChecker->($self, $todo);
1650     } else {
1651     return $HTMLBlockOrInlineChecker->($self, $todo);
1652     }
1653     },
1654     };
1655    
1656     $Element->{$HTML_NS}->{dl} = {
1657     attrs_checker => $GetHTMLAttrsChecker->({}),
1658     checker => sub {
1659     my ($self, $todo) = @_;
1660     my $el = $todo->{node};
1661     my $new_todos = [];
1662     my @nodes = (@{$el->child_nodes});
1663    
1664     my $phase = 'before dt';
1665     while (@nodes) {
1666     my $node = shift @nodes;
1667     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1668    
1669     my $nt = $node->node_type;
1670     if ($nt == 1) {
1671     my $node_ns = $node->namespace_uri;
1672     $node_ns = '' unless defined $node_ns;
1673     my $node_ln = $node->manakai_local_name;
1674     ## NOTE: |minuses| list is not checked since redundant
1675 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1676     #
1677     } elsif ($phase eq 'in dds') {
1678 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1679     #$phase = 'in dds';
1680     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1681     $phase = 'in dts';
1682     } else {
1683     $self->{onerror}->(node => $node, type => 'element not allowed');
1684     }
1685     } elsif ($phase eq 'in dts') {
1686     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1687     #$phase = 'in dts';
1688     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1689     $phase = 'in dds';
1690     } else {
1691     $self->{onerror}->(node => $node, type => 'element not allowed');
1692     }
1693     } else { # before dt
1694     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1695     $phase = 'in dts';
1696     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1697     $self->{onerror}
1698     ->(node => $node, type => 'ps element missing:dt');
1699     $phase = 'in dds';
1700     } else {
1701     $self->{onerror}->(node => $node, type => 'element not allowed');
1702     }
1703     }
1704     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1705     unshift @nodes, @$sib;
1706     push @$new_todos, @$ch;
1707     } elsif ($nt == 3 or $nt == 4) {
1708     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1709     $self->{onerror}->(node => $node, type => 'character not allowed');
1710     }
1711     } elsif ($nt == 5) {
1712     unshift @nodes, @{$node->child_nodes};
1713     }
1714     }
1715     if ($phase eq 'in dts') {
1716 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
1717 wakaba 1.1 }
1718    
1719     if ($todo->{inline}) {
1720     for (@$new_todos) {
1721     $_->{inline} = 1;
1722     }
1723     }
1724     return ($new_todos);
1725     },
1726     };
1727    
1728     $Element->{$HTML_NS}->{dt} = {
1729     attrs_checker => $GetHTMLAttrsChecker->({}),
1730     checker => $HTMLStrictlyInlineChecker,
1731     };
1732    
1733     $Element->{$HTML_NS}->{dd} = {
1734     attrs_checker => $GetHTMLAttrsChecker->({}),
1735     checker => $Element->{$HTML_NS}->{li}->{checker},
1736     };
1737    
1738     $Element->{$HTML_NS}->{a} = {
1739     attrs_checker => sub {
1740     my ($self, $todo) = @_;
1741     my %attr;
1742     for my $attr (@{$todo->{node}->attributes}) {
1743     my $attr_ns = $attr->namespace_uri;
1744     $attr_ns = '' unless defined $attr_ns;
1745     my $attr_ln = $attr->manakai_local_name;
1746     my $checker;
1747     if ($attr_ns eq '') {
1748     $checker = {
1749     target => $HTMLTargetAttrChecker,
1750     href => $HTMLURIAttrChecker,
1751     ping => $HTMLSpaceURIsAttrChecker,
1752 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
1753 wakaba 1.1 media => $HTMLMQAttrChecker,
1754     hreflang => $HTMLLanguageTagAttrChecker,
1755     type => $HTMLIMTAttrChecker,
1756     }->{$attr_ln};
1757     if ($checker) {
1758     $attr{$attr_ln} = $attr;
1759     } else {
1760     $checker = $HTMLAttrChecker->{$attr_ln};
1761     }
1762     }
1763     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1764     || $AttrChecker->{$attr_ns}->{''};
1765     if ($checker) {
1766     $checker->($self, $attr) if ref $checker;
1767     } else {
1768     $self->{onerror}->(node => $attr, level => 'unsupported',
1769     type => 'attribute');
1770     ## ISSUE: No comformance createria for unknown attributes in the spec
1771     }
1772     }
1773    
1774 wakaba 1.4 if (defined $attr{href}) {
1775     $self->{has_hyperlink_element} = 1;
1776     } else {
1777 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
1778     if (defined $attr{$_}) {
1779     $self->{onerror}->(node => $attr{$_},
1780     type => 'attribute not allowed');
1781     }
1782     }
1783     }
1784     },
1785     checker => sub {
1786     my ($self, $todo) = @_;
1787    
1788     my $end = $self->_add_minuses ($HTMLInteractiveElements);
1789     my ($new_todos, $ch)
1790     = $HTMLSignificantInlineOrStrictlyInlineChecker->($self, $todo);
1791     push @$new_todos, $end;
1792    
1793     $_->{flag}->{has_a} = 1 for @$new_todos;
1794    
1795     return ($new_todos, $ch);
1796     },
1797     };
1798    
1799     $Element->{$HTML_NS}->{q} = {
1800     attrs_checker => $GetHTMLAttrsChecker->({
1801     cite => $HTMLURIAttrChecker,
1802     }),
1803     checker => $HTMLInlineOrStrictlyInlineChecker,
1804     };
1805    
1806     $Element->{$HTML_NS}->{cite} = {
1807     attrs_checker => $GetHTMLAttrsChecker->({}),
1808     checker => $HTMLStrictlyInlineChecker,
1809     };
1810    
1811     $Element->{$HTML_NS}->{em} = {
1812     attrs_checker => $GetHTMLAttrsChecker->({}),
1813     checker => $HTMLInlineOrStrictlyInlineChecker,
1814     };
1815    
1816     $Element->{$HTML_NS}->{strong} = {
1817     attrs_checker => $GetHTMLAttrsChecker->({}),
1818     checker => $HTMLInlineOrStrictlyInlineChecker,
1819     };
1820    
1821     $Element->{$HTML_NS}->{small} = {
1822     attrs_checker => $GetHTMLAttrsChecker->({}),
1823     checker => $HTMLInlineOrStrictlyInlineChecker,
1824     };
1825    
1826     $Element->{$HTML_NS}->{m} = {
1827     attrs_checker => $GetHTMLAttrsChecker->({}),
1828     checker => $HTMLInlineOrStrictlyInlineChecker,
1829     };
1830    
1831     $Element->{$HTML_NS}->{dfn} = {
1832     attrs_checker => $GetHTMLAttrsChecker->({}),
1833     checker => sub {
1834     my ($self, $todo) = @_;
1835    
1836     my $end = $self->_add_minuses ({$HTML_NS => {dfn => 1}});
1837     my ($sib, $ch) = $HTMLStrictlyInlineChecker->($self, $todo);
1838     push @$sib, $end;
1839    
1840     my $node = $todo->{node};
1841     my $term = $node->get_attribute_ns (undef, 'title');
1842     unless (defined $term) {
1843     for my $child (@{$node->child_nodes}) {
1844     if ($child->node_type == 1) { # ELEMENT_NODE
1845     if (defined $term) {
1846     undef $term;
1847     last;
1848     } elsif ($child->manakai_local_name eq 'abbr') {
1849     my $nsuri = $child->namespace_uri;
1850     if (defined $nsuri and $nsuri eq $HTML_NS) {
1851     my $attr = $child->get_attribute_node_ns (undef, 'title');
1852     if ($attr) {
1853     $term = $attr->value;
1854     }
1855     }
1856     }
1857     } elsif ($child->node_type == 3 or $child->node_type == 4) {
1858     ## TEXT_NODE or CDATA_SECTION_NODE
1859     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
1860     next;
1861     }
1862     undef $term;
1863     last;
1864     }
1865     }
1866     unless (defined $term) {
1867     $term = $node->text_content;
1868     }
1869     }
1870     if ($self->{term}->{$term}) {
1871     $self->{onerror}->(node => $node, type => 'duplicate term');
1872     push @{$self->{term}->{$term}}, $node;
1873     } else {
1874     $self->{term}->{$term} = [$node];
1875     }
1876     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
1877     ## has |title|.
1878    
1879     return ($sib, $ch);
1880     },
1881     };
1882    
1883     $Element->{$HTML_NS}->{abbr} = {
1884     attrs_checker => $GetHTMLAttrsChecker->({
1885     ## NOTE: |title| has special semantics for |abbr|s, but is syntactically
1886     ## not different. The spec says that the |title| MAY be omitted
1887     ## if there is a |dfn| whose defining term is the abbreviation,
1888     ## but it does not prohibit |abbr| w/o |title| in other cases.
1889     }),
1890     checker => $HTMLStrictlyInlineChecker,
1891     };
1892    
1893     $Element->{$HTML_NS}->{time} = {
1894     attrs_checker => $GetHTMLAttrsChecker->({
1895     datetime => sub { 1 }, # checked in |checker|
1896     }),
1897     ## TODO: Write tests
1898     checker => sub {
1899     my ($self, $todo) = @_;
1900    
1901     my $attr = $todo->{node}->get_attribute_node_ns (undef, 'datetime');
1902     my $input;
1903     my $reg_sp;
1904     my $input_node;
1905     if ($attr) {
1906     $input = $attr->value;
1907     $reg_sp = qr/[\x09-\x0D\x20]*/;
1908     $input_node = $attr;
1909     } else {
1910     $input = $todo->{node}->text_content;
1911     $reg_sp = qr/\p{Zs}*/;
1912     $input_node = $todo->{node};
1913    
1914     ## ISSUE: What is the definition for "successfully extracts a date
1915     ## or time"? If the algorithm says the string is invalid but
1916     ## return some date or time, is it "successfully"?
1917     }
1918    
1919     my $hour;
1920     my $minute;
1921     my $second;
1922     if ($input =~ /
1923     \A
1924     [\x09-\x0D\x20]*
1925     ([0-9]+) # 1
1926     (?>
1927     -([0-9]+) # 2
1928     -([0-9]+) # 3
1929     [\x09-\x0D\x20]*
1930     (?>
1931     T
1932     [\x09-\x0D\x20]*
1933     )?
1934     ([0-9]+) # 4
1935     :([0-9]+) # 5
1936     (?>
1937     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
1938     )?
1939     [\x09-\x0D\x20]*
1940     (?>
1941     Z
1942     [\x09-\x0D\x20]*
1943     |
1944     [+-]([0-9]+):([0-9]+) # 7, 8
1945     [\x09-\x0D\x20]*
1946     )?
1947     \z
1948     |
1949     :([0-9]+) # 9
1950     (?>
1951     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
1952     )?
1953     [\x09-\x0D\x20]*\z
1954     )
1955     /x) {
1956     if (defined $2) { ## YYYY-MM-DD T? hh:mm
1957     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
1958     length $4 != 2 or length $5 != 2) {
1959     $self->{onerror}->(node => $input_node,
1960     type => 'dateortime:syntax error');
1961     }
1962    
1963     if (1 <= $2 and $2 <= 12) {
1964     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1965     if $3 < 1 or
1966     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
1967     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1968     if $2 == 2 and $3 == 29 and
1969     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
1970     } else {
1971     $self->{onerror}->(node => $input_node,
1972     type => 'datetime:bad month');
1973     }
1974    
1975     ($hour, $minute, $second) = ($4, $5, $6);
1976    
1977     if (defined $7) { ## [+-]hh:mm
1978     if (length $7 != 2 or length $8 != 2) {
1979     $self->{onerror}->(node => $input_node,
1980     type => 'dateortime:syntax error');
1981     }
1982    
1983     $self->{onerror}->(node => $input_node,
1984     type => 'datetime:bad timezone hour')
1985     if $7 > 23;
1986     $self->{onerror}->(node => $input_node,
1987     type => 'datetime:bad timezone minute')
1988     if $8 > 59;
1989     }
1990     } else { ## hh:mm
1991     if (length $1 != 2 or length $9 != 2) {
1992     $self->{onerror}->(node => $input_node,
1993     type => qq'dateortime:syntax error');
1994     }
1995    
1996     ($hour, $minute, $second) = ($1, $9, $10);
1997     }
1998    
1999     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
2000     if $hour > 23;
2001     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
2002     if $minute > 59;
2003    
2004     if (defined $second) { ## s
2005     ## NOTE: Integer part of second don't have to have length of two.
2006    
2007     if (substr ($second, 0, 1) eq '.') {
2008     $self->{onerror}->(node => $input_node,
2009     type => 'dateortime:syntax error');
2010     }
2011    
2012     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
2013     if $second >= 60;
2014     }
2015     } else {
2016     $self->{onerror}->(node => $input_node,
2017     type => 'dateortime:syntax error');
2018     }
2019    
2020     return $HTMLStrictlyInlineChecker->($self, $todo);
2021     },
2022     };
2023    
2024     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
2025     attrs_checker => $GetHTMLAttrsChecker->({
2026     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2027     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2028     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2029     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2030     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2031     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2032     }),
2033     checker => $HTMLStrictlyInlineChecker,
2034     };
2035    
2036     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
2037     attrs_checker => $GetHTMLAttrsChecker->({
2038     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
2039     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
2040     }),
2041     checker => $HTMLStrictlyInlineChecker,
2042     };
2043    
2044     $Element->{$HTML_NS}->{code} = {
2045     attrs_checker => $GetHTMLAttrsChecker->({}),
2046     ## NOTE: Though |title| has special semantics,
2047     ## syntatically same as the |title| as global attribute.
2048     checker => $HTMLInlineOrStrictlyInlineChecker,
2049     };
2050    
2051     $Element->{$HTML_NS}->{var} = {
2052     attrs_checker => $GetHTMLAttrsChecker->({}),
2053     ## NOTE: Though |title| has special semantics,
2054     ## syntatically same as the |title| as global attribute.
2055     checker => $HTMLStrictlyInlineChecker,
2056     };
2057    
2058     $Element->{$HTML_NS}->{samp} = {
2059     attrs_checker => $GetHTMLAttrsChecker->({}),
2060     ## NOTE: Though |title| has special semantics,
2061     ## syntatically same as the |title| as global attribute.
2062     checker => $HTMLInlineOrStrictlyInlineChecker,
2063     };
2064    
2065     $Element->{$HTML_NS}->{kbd} = {
2066     attrs_checker => $GetHTMLAttrsChecker->({}),
2067     checker => $HTMLStrictlyInlineChecker,
2068     };
2069    
2070     $Element->{$HTML_NS}->{sub} = {
2071     attrs_checker => $GetHTMLAttrsChecker->({}),
2072     checker => $HTMLStrictlyInlineChecker,
2073     };
2074    
2075     $Element->{$HTML_NS}->{sup} = {
2076     attrs_checker => $GetHTMLAttrsChecker->({}),
2077     checker => $HTMLStrictlyInlineChecker,
2078     };
2079    
2080     $Element->{$HTML_NS}->{span} = {
2081     attrs_checker => $GetHTMLAttrsChecker->({}),
2082     ## NOTE: Though |title| has special semantics,
2083     ## syntatically same as the |title| as global attribute.
2084     checker => $HTMLInlineOrStrictlyInlineChecker,
2085     };
2086    
2087     $Element->{$HTML_NS}->{i} = {
2088     attrs_checker => $GetHTMLAttrsChecker->({}),
2089     ## NOTE: Though |title| has special semantics,
2090     ## syntatically same as the |title| as global attribute.
2091     checker => $HTMLStrictlyInlineChecker,
2092     };
2093    
2094     $Element->{$HTML_NS}->{b} = {
2095     attrs_checker => $GetHTMLAttrsChecker->({}),
2096     checker => $HTMLStrictlyInlineChecker,
2097     };
2098    
2099     $Element->{$HTML_NS}->{bdo} = {
2100     attrs_checker => sub {
2101     my ($self, $todo) = @_;
2102     $GetHTMLAttrsChecker->({})->($self, $todo);
2103     unless ($todo->{node}->has_attribute_ns (undef, 'dir')) {
2104     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:dir');
2105     }
2106     },
2107     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
2108     checker => $HTMLStrictlyInlineChecker,
2109     };
2110    
2111     $Element->{$HTML_NS}->{ins} = {
2112     attrs_checker => $GetHTMLAttrsChecker->({
2113     cite => $HTMLURIAttrChecker,
2114     datetime => $HTMLDatetimeAttrChecker,
2115     }),
2116     checker => $HTMLTransparentChecker,
2117     };
2118    
2119     $Element->{$HTML_NS}->{del} = {
2120     attrs_checker => $GetHTMLAttrsChecker->({
2121     cite => $HTMLURIAttrChecker,
2122     datetime => $HTMLDatetimeAttrChecker,
2123     }),
2124     checker => sub {
2125     my ($self, $todo) = @_;
2126    
2127     my $parent = $todo->{node}->manakai_parent_element;
2128     if (defined $parent) {
2129     my $nsuri = $parent->namespace_uri;
2130     $nsuri = '' unless defined $nsuri;
2131     my $ln = $parent->manakai_local_name;
2132     my $eldef = $Element->{$nsuri}->{$ln} ||
2133     $Element->{$nsuri}->{''} ||
2134     $ElementDefault;
2135     return $eldef->{checker}->($self, $todo);
2136     } else {
2137     return $HTMLBlockOrInlineChecker->($self, $todo);
2138     }
2139     },
2140     };
2141    
2142     ## TODO: figure
2143 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
2144 wakaba 1.1
2145 wakaba 1.4 ## TODO: |alt|
2146 wakaba 1.1 $Element->{$HTML_NS}->{img} = {
2147     attrs_checker => sub {
2148     my ($self, $todo) = @_;
2149     $GetHTMLAttrsChecker->({
2150     alt => sub { }, ## NOTE: No syntactical requirement
2151     src => $HTMLURIAttrChecker,
2152     usemap => $HTMLUsemapAttrChecker,
2153     ismap => sub {
2154     my ($self, $attr, $parent_todo) = @_;
2155     if (not $todo->{flag}->{has_a}) {
2156     $self->{onerror}->(node => $attr, type => 'attribute not allowed');
2157     }
2158     $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_todo);
2159     },
2160     ## TODO: height
2161     ## TODO: width
2162     })->($self, $todo);
2163     unless ($todo->{node}->has_attribute_ns (undef, 'alt')) {
2164     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:alt');
2165     }
2166     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2167     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:src');
2168     }
2169     },
2170     checker => $HTMLEmptyChecker,
2171     };
2172    
2173     $Element->{$HTML_NS}->{iframe} = {
2174     attrs_checker => $GetHTMLAttrsChecker->({
2175     src => $HTMLURIAttrChecker,
2176     }),
2177     checker => $HTMLTextChecker,
2178     };
2179    
2180     $Element->{$HTML_NS}->{embed} = {
2181     attrs_checker => sub {
2182     my ($self, $todo) = @_;
2183     my $has_src;
2184     for my $attr (@{$todo->{node}->attributes}) {
2185     my $attr_ns = $attr->namespace_uri;
2186     $attr_ns = '' unless defined $attr_ns;
2187     my $attr_ln = $attr->manakai_local_name;
2188     my $checker;
2189     if ($attr_ns eq '') {
2190     if ($attr_ln eq 'src') {
2191     $checker = $HTMLURIAttrChecker;
2192     $has_src = 1;
2193     } elsif ($attr_ln eq 'type') {
2194     $checker = $HTMLIMTAttrChecker;
2195     } else {
2196     ## TODO: height
2197     ## TODO: width
2198     $checker = $HTMLAttrChecker->{$attr_ln}
2199     || sub { }; ## NOTE: Any local attribute is ok.
2200     }
2201     }
2202     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2203     || $AttrChecker->{$attr_ns}->{''};
2204     if ($checker) {
2205     $checker->($self, $attr);
2206     } else {
2207     $self->{onerror}->(node => $attr, level => 'unsupported',
2208     type => 'attribute');
2209     ## ISSUE: No comformance createria for global attributes in the spec
2210     }
2211     }
2212    
2213     unless ($has_src) {
2214     $self->{onerror}->(node => $todo->{node},
2215     type => 'attribute missing:src');
2216     }
2217     },
2218     checker => $HTMLEmptyChecker,
2219     };
2220    
2221     $Element->{$HTML_NS}->{object} = {
2222     attrs_checker => sub {
2223     my ($self, $todo) = @_;
2224     $GetHTMLAttrsChecker->({
2225     data => $HTMLURIAttrChecker,
2226     type => $HTMLIMTAttrChecker,
2227     usemap => $HTMLUsemapAttrChecker,
2228     ## TODO: width
2229     ## TODO: height
2230     })->($self, $todo);
2231     unless ($todo->{node}->has_attribute_ns (undef, 'data')) {
2232     unless ($todo->{node}->has_attribute_ns (undef, 'type')) {
2233     $self->{onerror}->(node => $todo->{node},
2234     type => 'attribute missing:data|type');
2235     }
2236     }
2237     },
2238     checker => $ElementDefault->{checker}, ## TODO
2239 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
2240 wakaba 1.1 };
2241    
2242     $Element->{$HTML_NS}->{param} = {
2243     attrs_checker => sub {
2244     my ($self, $todo) = @_;
2245     $GetHTMLAttrsChecker->({
2246     name => sub { },
2247     value => sub { },
2248     })->($self, $todo);
2249     unless ($todo->{node}->has_attribute_ns (undef, 'name')) {
2250     $self->{onerror}->(node => $todo->{node},
2251     type => 'attribute missing:name');
2252     }
2253     unless ($todo->{node}->has_attribute_ns (undef, 'value')) {
2254     $self->{onerror}->(node => $todo->{node},
2255     type => 'attribute missing:value');
2256     }
2257     },
2258     checker => $HTMLEmptyChecker,
2259     };
2260    
2261     $Element->{$HTML_NS}->{video} = {
2262     attrs_checker => $GetHTMLAttrsChecker->({
2263     src => $HTMLURIAttrChecker,
2264     ## TODO: start, loopstart, loopend, end
2265     ## ISSUE: they MUST be "value time offset"s. Value?
2266 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
2267 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2268     controls => $GetHTMLBooleanAttrChecker->('controls'),
2269 wakaba 1.11 poster => $HTMLURIAttrChecker, ## TODO: not for audio!
2270     ## TODO: width, height (not for audio!)
2271 wakaba 1.1 }),
2272     checker => sub {
2273     my ($self, $todo) = @_;
2274    
2275     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2276     return $HTMLBlockOrInlineChecker->($self, $todo);
2277     } else {
2278     return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
2279     ->($self, $todo);
2280     }
2281     },
2282     };
2283    
2284     $Element->{$HTML_NS}->{audio} = {
2285     attrs_checker => $Element->{$HTML_NS}->{video}->{attrs_checker},
2286     checker => $Element->{$HTML_NS}->{video}->{checker},
2287     };
2288    
2289     $Element->{$HTML_NS}->{source} = {
2290     attrs_checker => sub {
2291     my ($self, $todo) = @_;
2292     $GetHTMLAttrsChecker->({
2293     src => $HTMLURIAttrChecker,
2294     type => $HTMLIMTAttrChecker,
2295     media => $HTMLMQAttrChecker,
2296     })->($self, $todo);
2297     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2298     $self->{onerror}->(node => $todo->{node},
2299     type => 'attribute missing:src');
2300     }
2301     },
2302     checker => $HTMLEmptyChecker,
2303     };
2304    
2305     $Element->{$HTML_NS}->{canvas} = {
2306     attrs_checker => $GetHTMLAttrsChecker->({
2307     height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2308     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2309     }),
2310     checker => $HTMLInlineChecker,
2311     };
2312    
2313     $Element->{$HTML_NS}->{map} = {
2314 wakaba 1.4 attrs_checker => sub {
2315     my ($self, $todo) = @_;
2316     my $has_id;
2317     $GetHTMLAttrsChecker->({
2318     id => sub {
2319     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2320     my ($self, $attr) = @_;
2321     my $value = $attr->value;
2322     if (length $value > 0) {
2323     if ($self->{id}->{$value}) {
2324     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2325     push @{$self->{id}->{$value}}, $attr;
2326     } else {
2327     $self->{id}->{$value} = [$attr];
2328     }
2329 wakaba 1.1 } else {
2330 wakaba 1.4 ## NOTE: MUST contain at least one character
2331     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2332 wakaba 1.1 }
2333 wakaba 1.4 if ($value =~ /[\x09-\x0D\x20]/) {
2334     $self->{onerror}->(node => $attr, type => 'space in ID');
2335     }
2336     $self->{map}->{$value} ||= $attr;
2337     $has_id = 1;
2338     },
2339     })->($self, $todo);
2340     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:id')
2341     unless $has_id;
2342     },
2343 wakaba 1.1 checker => $HTMLBlockChecker,
2344     };
2345    
2346     $Element->{$HTML_NS}->{area} = {
2347     attrs_checker => sub {
2348     my ($self, $todo) = @_;
2349     my %attr;
2350     my $coords;
2351     for my $attr (@{$todo->{node}->attributes}) {
2352     my $attr_ns = $attr->namespace_uri;
2353     $attr_ns = '' unless defined $attr_ns;
2354     my $attr_ln = $attr->manakai_local_name;
2355     my $checker;
2356     if ($attr_ns eq '') {
2357     $checker = {
2358     alt => sub { },
2359     ## NOTE: |alt| value has no conformance creteria.
2360     shape => $GetHTMLEnumeratedAttrChecker->({
2361     circ => -1, circle => 1,
2362     default => 1,
2363     poly => 1, polygon => -1,
2364     rect => 1, rectangle => -1,
2365     }),
2366     coords => sub {
2367     my ($self, $attr) = @_;
2368     my $value = $attr->value;
2369     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
2370     $coords = [split /,/, $value];
2371     } else {
2372     $self->{onerror}->(node => $attr,
2373     type => 'coords:syntax error');
2374     }
2375     },
2376     target => $HTMLTargetAttrChecker,
2377     href => $HTMLURIAttrChecker,
2378     ping => $HTMLSpaceURIsAttrChecker,
2379 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
2380 wakaba 1.1 media => $HTMLMQAttrChecker,
2381     hreflang => $HTMLLanguageTagAttrChecker,
2382     type => $HTMLIMTAttrChecker,
2383     }->{$attr_ln};
2384     if ($checker) {
2385     $attr{$attr_ln} = $attr;
2386     } else {
2387     $checker = $HTMLAttrChecker->{$attr_ln};
2388     }
2389     }
2390     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2391     || $AttrChecker->{$attr_ns}->{''};
2392     if ($checker) {
2393     $checker->($self, $attr) if ref $checker;
2394     } else {
2395     $self->{onerror}->(node => $attr, level => 'unsupported',
2396     type => 'attribute');
2397     ## ISSUE: No comformance createria for unknown attributes in the spec
2398     }
2399     }
2400    
2401     if (defined $attr{href}) {
2402 wakaba 1.4 $self->{has_hyperlink_element} = 1;
2403 wakaba 1.1 unless (defined $attr{alt}) {
2404     $self->{onerror}->(node => $todo->{node},
2405     type => 'attribute missing:alt');
2406     }
2407     } else {
2408     for (qw/target ping rel media hreflang type alt/) {
2409     if (defined $attr{$_}) {
2410     $self->{onerror}->(node => $attr{$_},
2411     type => 'attribute not allowed');
2412     }
2413     }
2414     }
2415    
2416     my $shape = 'rectangle';
2417     if (defined $attr{shape}) {
2418     $shape = {
2419     circ => 'circle', circle => 'circle',
2420     default => 'default',
2421     poly => 'polygon', polygon => 'polygon',
2422     rect => 'rectangle', rectangle => 'rectangle',
2423     }->{lc $attr{shape}->value} || 'rectangle';
2424     ## TODO: ASCII lowercase?
2425     }
2426    
2427     if ($shape eq 'circle') {
2428     if (defined $attr{coords}) {
2429     if (defined $coords) {
2430     if (@$coords == 3) {
2431     if ($coords->[2] < 0) {
2432     $self->{onerror}->(node => $attr{coords},
2433     type => 'coords:out of range:2');
2434     }
2435     } else {
2436     $self->{onerror}->(node => $attr{coords},
2437     type => 'coords:number:3:'.@$coords);
2438     }
2439     } else {
2440     ## NOTE: A syntax error has been reported.
2441     }
2442     } else {
2443     $self->{onerror}->(node => $todo->{node},
2444     type => 'attribute missing:coords');
2445     }
2446     } elsif ($shape eq 'default') {
2447     if (defined $attr{coords}) {
2448     $self->{onerror}->(node => $attr{coords},
2449     type => 'attribute not allowed');
2450     }
2451     } elsif ($shape eq 'polygon') {
2452     if (defined $attr{coords}) {
2453     if (defined $coords) {
2454     if (@$coords >= 6) {
2455     unless (@$coords % 2 == 0) {
2456     $self->{onerror}->(node => $attr{coords},
2457     type => 'coords:number:even:'.@$coords);
2458     }
2459     } else {
2460     $self->{onerror}->(node => $attr{coords},
2461     type => 'coords:number:>=6:'.@$coords);
2462     }
2463     } else {
2464     ## NOTE: A syntax error has been reported.
2465     }
2466     } else {
2467     $self->{onerror}->(node => $todo->{node},
2468     type => 'attribute missing:coords');
2469     }
2470     } elsif ($shape eq 'rectangle') {
2471     if (defined $attr{coords}) {
2472     if (defined $coords) {
2473     if (@$coords == 4) {
2474     unless ($coords->[0] < $coords->[2]) {
2475     $self->{onerror}->(node => $attr{coords},
2476     type => 'coords:out of range:0');
2477     }
2478     unless ($coords->[1] < $coords->[3]) {
2479     $self->{onerror}->(node => $attr{coords},
2480     type => 'coords:out of range:1');
2481     }
2482     } else {
2483     $self->{onerror}->(node => $attr{coords},
2484     type => 'coords:number:4:'.@$coords);
2485     }
2486     } else {
2487     ## NOTE: A syntax error has been reported.
2488     }
2489     } else {
2490     $self->{onerror}->(node => $todo->{node},
2491     type => 'attribute missing:coords');
2492     }
2493     }
2494     },
2495     checker => $HTMLEmptyChecker,
2496     };
2497     ## TODO: only in map
2498    
2499     $Element->{$HTML_NS}->{table} = {
2500     attrs_checker => $GetHTMLAttrsChecker->({}),
2501     checker => sub {
2502     my ($self, $todo) = @_;
2503     my $el = $todo->{node};
2504     my $new_todos = [];
2505     my @nodes = (@{$el->child_nodes});
2506    
2507     my $phase = 'before caption';
2508     my $has_tfoot;
2509     while (@nodes) {
2510     my $node = shift @nodes;
2511     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2512    
2513     my $nt = $node->node_type;
2514     if ($nt == 1) {
2515     my $node_ns = $node->namespace_uri;
2516     $node_ns = '' unless defined $node_ns;
2517     my $node_ln = $node->manakai_local_name;
2518     ## NOTE: |minuses| list is not checked since redundant
2519 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2520     #
2521     } elsif ($phase eq 'in tbodys') {
2522 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2523     #$phase = 'in tbodys';
2524     } elsif (not $has_tfoot and
2525     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2526     $phase = 'after tfoot';
2527     $has_tfoot = 1;
2528     } else {
2529     $self->{onerror}->(node => $node, type => 'element not allowed');
2530     }
2531     } elsif ($phase eq 'in trs') {
2532     if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2533     #$phase = 'in trs';
2534     } elsif (not $has_tfoot and
2535     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2536     $phase = 'after tfoot';
2537     $has_tfoot = 1;
2538     } else {
2539     $self->{onerror}->(node => $node, type => 'element not allowed');
2540     }
2541     } elsif ($phase eq 'after thead') {
2542     if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2543     $phase = 'in tbodys';
2544     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2545     $phase = 'in trs';
2546     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2547     $phase = 'in tbodys';
2548     $has_tfoot = 1;
2549     } else {
2550     $self->{onerror}->(node => $node, type => 'element not allowed');
2551     }
2552     } elsif ($phase eq 'in colgroup') {
2553     if ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2554     $phase = 'in colgroup';
2555     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2556     $phase = 'after thead';
2557     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2558     $phase = 'in tbodys';
2559     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2560     $phase = 'in trs';
2561     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2562     $phase = 'in tbodys';
2563     $has_tfoot = 1;
2564     } else {
2565     $self->{onerror}->(node => $node, type => 'element not allowed');
2566     }
2567     } elsif ($phase eq 'before caption') {
2568     if ($node_ns eq $HTML_NS and $node_ln eq 'caption') {
2569     $phase = 'in colgroup';
2570     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2571     $phase = 'in colgroup';
2572     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2573     $phase = 'after thead';
2574     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2575     $phase = 'in tbodys';
2576     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2577     $phase = 'in trs';
2578     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2579     $phase = 'in tbodys';
2580     $has_tfoot = 1;
2581     } else {
2582     $self->{onerror}->(node => $node, type => 'element not allowed');
2583     }
2584     } else { # after tfoot
2585     $self->{onerror}->(node => $node, type => 'element not allowed');
2586     }
2587     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2588     unshift @nodes, @$sib;
2589     push @$new_todos, @$ch;
2590     } elsif ($nt == 3 or $nt == 4) {
2591     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2592     $self->{onerror}->(node => $node, type => 'character not allowed');
2593     }
2594     } elsif ($nt == 5) {
2595     unshift @nodes, @{$node->child_nodes};
2596     }
2597     }
2598    
2599     ## Table model errors
2600     require Whatpm::HTMLTable;
2601     Whatpm::HTMLTable->form_table ($todo->{node}, sub {
2602     my %opt = @_;
2603     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
2604     });
2605     push @{$self->{return}->{table}}, $todo->{node};
2606    
2607     return ($new_todos);
2608     },
2609     };
2610    
2611     $Element->{$HTML_NS}->{caption} = {
2612     attrs_checker => $GetHTMLAttrsChecker->({}),
2613     checker => $HTMLSignificantStrictlyInlineChecker,
2614     };
2615    
2616     $Element->{$HTML_NS}->{colgroup} = {
2617     attrs_checker => $GetHTMLAttrsChecker->({
2618     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2619     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
2620     ## TODO: "attribute not supported" if |col|.
2621     ## ISSUE: MUST NOT if any |col|?
2622     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
2623     }),
2624     checker => sub {
2625     my ($self, $todo) = @_;
2626     my $el = $todo->{node};
2627     my $new_todos = [];
2628     my @nodes = (@{$el->child_nodes});
2629    
2630     while (@nodes) {
2631     my $node = shift @nodes;
2632     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2633    
2634     my $nt = $node->node_type;
2635     if ($nt == 1) {
2636     my $node_ns = $node->namespace_uri;
2637     $node_ns = '' unless defined $node_ns;
2638     my $node_ln = $node->manakai_local_name;
2639     ## NOTE: |minuses| list is not checked since redundant
2640 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2641     #
2642     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'col')) {
2643 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
2644     }
2645     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2646     unshift @nodes, @$sib;
2647     push @$new_todos, @$ch;
2648     } elsif ($nt == 3 or $nt == 4) {
2649     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2650     $self->{onerror}->(node => $node, type => 'character not allowed');
2651     }
2652     } elsif ($nt == 5) {
2653     unshift @nodes, @{$node->child_nodes};
2654     }
2655     }
2656     return ($new_todos);
2657     },
2658     };
2659    
2660     $Element->{$HTML_NS}->{col} = {
2661     attrs_checker => $GetHTMLAttrsChecker->({
2662     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2663     }),
2664     checker => $HTMLEmptyChecker,
2665     };
2666    
2667     $Element->{$HTML_NS}->{tbody} = {
2668     attrs_checker => $GetHTMLAttrsChecker->({}),
2669     checker => sub {
2670     my ($self, $todo) = @_;
2671     my $el = $todo->{node};
2672     my $new_todos = [];
2673     my @nodes = (@{$el->child_nodes});
2674    
2675     my $has_tr;
2676     while (@nodes) {
2677     my $node = shift @nodes;
2678     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2679    
2680     my $nt = $node->node_type;
2681     if ($nt == 1) {
2682     my $node_ns = $node->namespace_uri;
2683     $node_ns = '' unless defined $node_ns;
2684     my $node_ln = $node->manakai_local_name;
2685     ## NOTE: |minuses| list is not checked since redundant
2686 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2687     #
2688     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2689 wakaba 1.1 $has_tr = 1;
2690     } else {
2691     $self->{onerror}->(node => $node, type => 'element not allowed');
2692     }
2693     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2694     unshift @nodes, @$sib;
2695     push @$new_todos, @$ch;
2696     } elsif ($nt == 3 or $nt == 4) {
2697     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2698     $self->{onerror}->(node => $node, type => 'character not allowed');
2699     }
2700     } elsif ($nt == 5) {
2701     unshift @nodes, @{$node->child_nodes};
2702     }
2703     }
2704     unless ($has_tr) {
2705     $self->{onerror}->(node => $el, type => 'child element missing:tr');
2706     }
2707     return ($new_todos);
2708     },
2709     };
2710    
2711     $Element->{$HTML_NS}->{thead} = {
2712     attrs_checker => $GetHTMLAttrsChecker->({}),
2713     checker => $Element->{$HTML_NS}->{tbody}->{checker},
2714     };
2715    
2716     $Element->{$HTML_NS}->{tfoot} = {
2717     attrs_checker => $GetHTMLAttrsChecker->({}),
2718     checker => $Element->{$HTML_NS}->{tbody}->{checker},
2719     };
2720    
2721     $Element->{$HTML_NS}->{tr} = {
2722     attrs_checker => $GetHTMLAttrsChecker->({}),
2723     checker => sub {
2724     my ($self, $todo) = @_;
2725     my $el = $todo->{node};
2726     my $new_todos = [];
2727     my @nodes = (@{$el->child_nodes});
2728    
2729     my $has_td;
2730     while (@nodes) {
2731     my $node = shift @nodes;
2732     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2733    
2734     my $nt = $node->node_type;
2735     if ($nt == 1) {
2736     my $node_ns = $node->namespace_uri;
2737     $node_ns = '' unless defined $node_ns;
2738     my $node_ln = $node->manakai_local_name;
2739     ## NOTE: |minuses| list is not checked since redundant
2740 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2741     #
2742     } elsif ($node_ns eq $HTML_NS and
2743     ($node_ln eq 'td' or $node_ln eq 'th')) {
2744 wakaba 1.1 $has_td = 1;
2745     } else {
2746     $self->{onerror}->(node => $node, type => 'element not allowed');
2747     }
2748     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2749     unshift @nodes, @$sib;
2750     push @$new_todos, @$ch;
2751     } elsif ($nt == 3 or $nt == 4) {
2752     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2753     $self->{onerror}->(node => $node, type => 'character not allowed');
2754     }
2755     } elsif ($nt == 5) {
2756     unshift @nodes, @{$node->child_nodes};
2757     }
2758     }
2759     unless ($has_td) {
2760     $self->{onerror}->(node => $el, type => 'child element missing:td|th');
2761     }
2762     return ($new_todos);
2763     },
2764     };
2765    
2766     $Element->{$HTML_NS}->{td} = {
2767     attrs_checker => $GetHTMLAttrsChecker->({
2768     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2769     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2770     }),
2771     checker => $HTMLBlockOrInlineChecker,
2772     };
2773    
2774     $Element->{$HTML_NS}->{th} = {
2775     attrs_checker => $GetHTMLAttrsChecker->({
2776     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2777     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2778     scope => $GetHTMLEnumeratedAttrChecker
2779     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
2780     }),
2781     checker => $HTMLBlockOrInlineChecker,
2782     };
2783    
2784     ## TODO: forms
2785 wakaba 1.8 ## TODO: Tests for <nest/> in form elements
2786 wakaba 1.1
2787     $Element->{$HTML_NS}->{script} = {
2788 wakaba 1.9 attrs_checker => $GetHTMLAttrsChecker->({
2789 wakaba 1.1 src => $HTMLURIAttrChecker,
2790     defer => $GetHTMLBooleanAttrChecker->('defer'),
2791     async => $GetHTMLBooleanAttrChecker->('async'),
2792     type => $HTMLIMTAttrChecker,
2793 wakaba 1.9 }),
2794 wakaba 1.1 checker => sub {
2795     my ($self, $todo) = @_;
2796    
2797     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2798     return $HTMLEmptyChecker->($self, $todo);
2799     } else {
2800     ## NOTE: No content model conformance in HTML5 spec.
2801     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
2802     my $language = $todo->{node}->get_attribute_ns (undef, 'language');
2803     if ((defined $type and $type eq '') or
2804     (defined $language and $language eq '')) {
2805     $type = 'text/javascript';
2806     } elsif (defined $type) {
2807     #
2808     } elsif (defined $language) {
2809     $type = 'text/' . $language;
2810     } else {
2811     $type = 'text/javascript';
2812     }
2813     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
2814     type => 'script:'.$type); ## TODO: $type normalization
2815     return $AnyChecker->($self, $todo);
2816     }
2817     },
2818     };
2819    
2820     ## NOTE: When script is disabled.
2821     $Element->{$HTML_NS}->{noscript} = {
2822 wakaba 1.3 attrs_checker => sub {
2823     my ($self, $todo) = @_;
2824    
2825     ## NOTE: This check is inserted in |attrs_checker|, rather than |checker|,
2826     ## since the later is not invoked when the |noscript| is used as a
2827     ## transparent element.
2828     unless ($todo->{node}->owner_document->manakai_is_html) {
2829     $self->{onerror}->(node => $todo->{node}, type => 'in XML:noscript');
2830     }
2831    
2832     $GetHTMLAttrsChecker->({})->($self, $todo);
2833     },
2834 wakaba 1.1 checker => sub {
2835     my ($self, $todo) = @_;
2836    
2837 wakaba 1.3 if ($todo->{flag}->{in_head}) {
2838     my $new_todos = [];
2839     my @nodes = (@{$todo->{node}->child_nodes});
2840    
2841     while (@nodes) {
2842     my $node = shift @nodes;
2843     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2844    
2845     my $nt = $node->node_type;
2846     if ($nt == 1) {
2847     my $node_ns = $node->namespace_uri;
2848     $node_ns = '' unless defined $node_ns;
2849     my $node_ln = $node->manakai_local_name;
2850 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2851     #
2852     } elsif ($node_ns eq $HTML_NS) {
2853 wakaba 1.3 if ({link => 1, style => 1}->{$node_ln}) {
2854     #
2855     } elsif ($node_ln eq 'meta') {
2856 wakaba 1.5 if ($node->has_attribute_ns (undef, 'name')) {
2857     #
2858 wakaba 1.3 } else {
2859 wakaba 1.5 $self->{onerror}->(node => $node,
2860     type => 'element not allowed');
2861 wakaba 1.3 }
2862     } else {
2863     $self->{onerror}->(node => $node, type => 'element not allowed');
2864     }
2865     } else {
2866     $self->{onerror}->(node => $node, type => 'element not allowed');
2867     }
2868    
2869     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2870     unshift @nodes, @$sib;
2871     push @$new_todos, @$ch;
2872     } elsif ($nt == 3 or $nt == 4) {
2873     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2874     $self->{onerror}->(node => $node, type => 'character not allowed');
2875     }
2876     } elsif ($nt == 5) {
2877     unshift @nodes, @{$node->child_nodes};
2878     }
2879     }
2880     return ($new_todos);
2881     } else {
2882     my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}});
2883     my ($sib, $ch) = $HTMLBlockOrInlineChecker->($self, $todo);
2884     push @$sib, $end;
2885     return ($sib, $ch);
2886     }
2887 wakaba 1.1 },
2888     };
2889 wakaba 1.3
2890     ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
2891 wakaba 1.1
2892     $Element->{$HTML_NS}->{'event-source'} = {
2893     attrs_checker => $GetHTMLAttrsChecker->({
2894     src => $HTMLURIAttrChecker,
2895     }),
2896     checker => $HTMLEmptyChecker,
2897     };
2898    
2899     $Element->{$HTML_NS}->{details} = {
2900     attrs_checker => $GetHTMLAttrsChecker->({
2901     open => $GetHTMLBooleanAttrChecker->('open'),
2902     }),
2903     checker => sub {
2904     my ($self, $todo) = @_;
2905    
2906     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2907     my ($sib, $ch)
2908     = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
2909     ->($self, $todo);
2910     push @$sib, $end;
2911     return ($sib, $ch);
2912     },
2913     };
2914    
2915     $Element->{$HTML_NS}->{datagrid} = {
2916     attrs_checker => $GetHTMLAttrsChecker->({
2917     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2918     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
2919     }),
2920     checker => sub {
2921     my ($self, $todo) = @_;
2922     my $el = $todo->{node};
2923     my $new_todos = [];
2924     my @nodes = (@{$el->child_nodes});
2925    
2926     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2927    
2928     ## Block-table Block* | table | select | datalist | Empty
2929     my $mode = 'any';
2930     while (@nodes) {
2931     my $node = shift @nodes;
2932     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2933    
2934     my $nt = $node->node_type;
2935     if ($nt == 1) {
2936     my $node_ns = $node->namespace_uri;
2937     $node_ns = '' unless defined $node_ns;
2938     my $node_ln = $node->manakai_local_name;
2939     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
2940 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2941     #
2942     } elsif ($mode eq 'block') {
2943 wakaba 1.1 $not_allowed = 1
2944     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
2945     } elsif ($mode eq 'any') {
2946     if ($node_ns eq $HTML_NS and
2947     {table => 1, select => 1, datalist => 1}->{$node_ln}) {
2948     $mode = 'none';
2949     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
2950     $mode = 'block';
2951     } else {
2952     $not_allowed = 1;
2953     }
2954     } else {
2955     $not_allowed = 1;
2956     }
2957     $self->{onerror}->(node => $node, type => 'element not allowed')
2958     if $not_allowed;
2959     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2960     unshift @nodes, @$sib;
2961     push @$new_todos, @$ch;
2962     } elsif ($nt == 3 or $nt == 4) {
2963     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2964     $self->{onerror}->(node => $node, type => 'character not allowed');
2965     }
2966     } elsif ($nt == 5) {
2967     unshift @nodes, @{$node->child_nodes};
2968     }
2969     }
2970    
2971     push @$new_todos, $end;
2972     return ($new_todos);
2973     },
2974     };
2975    
2976     $Element->{$HTML_NS}->{command} = {
2977     attrs_checker => $GetHTMLAttrsChecker->({
2978     checked => $GetHTMLBooleanAttrChecker->('checked'),
2979     default => $GetHTMLBooleanAttrChecker->('default'),
2980     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2981     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
2982     icon => $HTMLURIAttrChecker,
2983     label => sub { }, ## NOTE: No conformance creteria
2984     radiogroup => sub { }, ## NOTE: No conformance creteria
2985     ## NOTE: |title| has special semantics, but no syntactical difference
2986     type => sub {
2987     my ($self, $attr) = @_;
2988     my $value = $attr->value;
2989     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
2990     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
2991     }
2992     },
2993     }),
2994     checker => $HTMLEmptyChecker,
2995     };
2996    
2997     $Element->{$HTML_NS}->{menu} = {
2998     attrs_checker => $GetHTMLAttrsChecker->({
2999     autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
3000     id => sub {
3001     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
3002     my ($self, $attr) = @_;
3003     my $value = $attr->value;
3004     if (length $value > 0) {
3005     if ($self->{id}->{$value}) {
3006     $self->{onerror}->(node => $attr, type => 'duplicate ID');
3007     push @{$self->{id}->{$value}}, $attr;
3008     } else {
3009     $self->{id}->{$value} = [$attr];
3010     }
3011     } else {
3012     ## NOTE: MUST contain at least one character
3013     $self->{onerror}->(node => $attr, type => 'empty attribute value');
3014     }
3015     if ($value =~ /[\x09-\x0D\x20]/) {
3016     $self->{onerror}->(node => $attr, type => 'space in ID');
3017     }
3018     $self->{menu}->{$value} ||= $attr;
3019     ## ISSUE: <menu id=""><p contextmenu=""> match?
3020     },
3021     label => sub { }, ## NOTE: No conformance creteria
3022     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
3023     }),
3024     checker => sub {
3025     my ($self, $todo) = @_;
3026     my $el = $todo->{node};
3027     my $new_todos = [];
3028     my @nodes = (@{$el->child_nodes});
3029    
3030     my $content = 'li or inline';
3031     while (@nodes) {
3032     my $node = shift @nodes;
3033     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3034    
3035     my $nt = $node->node_type;
3036     if ($nt == 1) {
3037     my $node_ns = $node->namespace_uri;
3038     $node_ns = '' unless defined $node_ns;
3039     my $node_ln = $node->manakai_local_name;
3040     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
3041 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3042     #
3043     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'li') {
3044 wakaba 1.1 if ($content eq 'inline') {
3045     $not_allowed = 1;
3046     } elsif ($content eq 'li or inline') {
3047     $content = 'li';
3048     }
3049     } else {
3050     if ($HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
3051     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln}) {
3052     $content = 'inline';
3053     } else {
3054     $not_allowed = 1;
3055     }
3056     }
3057     $self->{onerror}->(node => $node, type => 'element not allowed')
3058     if $not_allowed;
3059     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3060     unshift @nodes, @$sib;
3061     push @$new_todos, @$ch;
3062     } elsif ($nt == 3 or $nt == 4) {
3063     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3064     if ($content eq 'li') {
3065     $self->{onerror}->(node => $node, type => 'character not allowed');
3066     } elsif ($content eq 'li or inline') {
3067     $content = 'inline';
3068     }
3069     }
3070     } elsif ($nt == 5) {
3071     unshift @nodes, @{$node->child_nodes};
3072     }
3073     }
3074    
3075     for (@$new_todos) {
3076     $_->{inline} = 1;
3077     }
3078     return ($new_todos);
3079     },
3080 wakaba 1.8 };
3081    
3082     $Element->{$HTML_NS}->{datatemplate} = {
3083     attrs_checker => $GetHTMLAttrsChecker->({}),
3084     checker => sub {
3085     my ($self, $todo) = @_;
3086     my $el = $todo->{node};
3087     my $new_todos = [];
3088     my @nodes = (@{$el->child_nodes});
3089    
3090     while (@nodes) {
3091     my $node = shift @nodes;
3092     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3093    
3094     my $nt = $node->node_type;
3095     if ($nt == 1) {
3096     my $node_ns = $node->namespace_uri;
3097     $node_ns = '' unless defined $node_ns;
3098     my $node_ln = $node->manakai_local_name;
3099     ## NOTE: |minuses| list is not checked since redundant
3100     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3101     #
3102     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'rule')) {
3103     $self->{onerror}->(node => $node,
3104     type => 'element not allowed:datatemplate');
3105     }
3106     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3107     unshift @nodes, @$sib;
3108     push @$new_todos, @$ch;
3109     } elsif ($nt == 3 or $nt == 4) {
3110     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3111     $self->{onerror}->(node => $node, type => 'character not allowed');
3112     }
3113     } elsif ($nt == 5) {
3114     unshift @nodes, @{$node->child_nodes};
3115     }
3116     }
3117     return ($new_todos);
3118     },
3119     is_xml_root => 1,
3120     };
3121    
3122     $Element->{$HTML_NS}->{rule} = {
3123     attrs_checker => $GetHTMLAttrsChecker->({
3124     ## TODO: |condition| attribute
3125     mode => $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker,
3126     }),
3127     checker => sub {
3128     my ($self, $todo) = @_;
3129    
3130     my $end = $self->_add_pluses ({$HTML_NS => {nest => 1}});
3131     my ($sib, $ch) = $AnyChecker->($self, $todo);
3132     push @$sib, $end;
3133     return ($sib, $ch);
3134     },
3135     ## NOTE: "MAY be anything that, when the parent |datatemplate|
3136     ## is applied to some conforming data, results in a conforming DOM tree.":
3137     ## We don't check against this.
3138     };
3139    
3140     $Element->{$HTML_NS}->{nest} = {
3141     attrs_checker => $GetHTMLAttrsChecker->({
3142     ## TODO: |filter| attribute
3143     mode => $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker,
3144     }),
3145     checker => $HTMLEmptyChecker,
3146 wakaba 1.1 };
3147    
3148     $Element->{$HTML_NS}->{legend} = {
3149     attrs_checker => $GetHTMLAttrsChecker->({}),
3150     checker => sub {
3151     my ($self, $todo) = @_;
3152    
3153     my $parent = $todo->{node}->manakai_parent_element;
3154     if (defined $parent) {
3155     my $nsuri = $parent->namespace_uri;
3156     $nsuri = '' unless defined $nsuri;
3157     my $ln = $parent->manakai_local_name;
3158     if ($nsuri eq $HTML_NS and $ln eq 'figure') {
3159     return $HTMLInlineChecker->($self, $todo);
3160     } else {
3161     return $HTMLSignificantStrictlyInlineChecker->($self, $todo);
3162     }
3163     } else {
3164     return $HTMLInlineChecker->($self, $todo);
3165     }
3166    
3167     ## ISSUE: Content model is defined only for fieldset/legend,
3168     ## details/legend, and figure/legend.
3169     },
3170     };
3171    
3172     $Element->{$HTML_NS}->{div} = {
3173     attrs_checker => $GetHTMLAttrsChecker->({}),
3174     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
3175     };
3176    
3177     $Element->{$HTML_NS}->{font} = {
3178     attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
3179     checker => $HTMLTransparentChecker,
3180     };
3181    
3182     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
3183    
3184     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24