/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.15 - (hide annotations) (download)
Sun Nov 4 03:52:51 2007 UTC (17 years ago) by wakaba
Branch: MAIN
Changes since 1.14: +6 -4 lines
++ whatpm/t/ChangeLog	4 Nov 2007 03:50:55 -0000
	* content-model-2.dat: <a><img ismap></a> is no longer
	conforming (HTML5 revision 1115).

2007-11-04  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ContentChecker/ChangeLog	4 Nov 2007 03:48:04 -0000
	* HTML.pm: Don't allow <a><img ismap></a> (<a> w/o href="") (HTML5
	revision 1115).

2007-11-04  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7     my $HTMLMetadataElements = {
8     $HTML_NS => {
9     qw/link 1 meta 1 style 1 script 1 event-source 1 command 1 base 1 title 1
10 wakaba 1.8 noscript 1 datatemplate 1
11 wakaba 1.1 /,
12     },
13     };
14    
15     my $HTMLSectioningElements = {
16     $HTML_NS => {qw/body 1 section 1 nav 1 article 1 blockquote 1 aside 1/},
17     };
18    
19     my $HTMLBlockLevelElements = {
20     $HTML_NS => {
21     qw/
22     section 1 nav 1 article 1 blockquote 1 aside 1
23     h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1
24     address 1 p 1 hr 1 dialog 1 pre 1 ol 1 ul 1 dl 1
25     ins 1 del 1 figure 1 map 1 table 1 script 1 noscript 1
26     event-source 1 details 1 datagrid 1 menu 1 div 1 font 1
27 wakaba 1.8 datatemplate 1
28 wakaba 1.1 /,
29     },
30     };
31    
32     my $HTMLStrictlyInlineLevelElements = {
33     $HTML_NS => {
34     qw/
35     br 1 a 1 q 1 cite 1 em 1 strong 1 small 1 m 1 dfn 1 abbr 1
36     time 1 meter 1 progress 1 code 1 var 1 samp 1 kbd 1
37     sub 1 sup 1 span 1 i 1 b 1 bdo 1 ins 1 del 1 img 1
38     iframe 1 embed 1 object 1 video 1 audio 1 canvas 1 area 1
39     script 1 noscript 1 event-source 1 command 1 font 1
40     /,
41     },
42     };
43    
44     my $HTMLStructuredInlineLevelElements = {
45     $HTML_NS => {qw/blockquote 1 pre 1 ol 1 ul 1 dl 1 table 1 menu 1/},
46     };
47    
48     my $HTMLInteractiveElements = {
49     $HTML_NS => {a => 1, details => 1, datagrid => 1},
50     };
51     ## NOTE: |html:a| and |html:datagrid| are not allowed as a descendant
52     ## of interactive elements
53    
54     # my $HTMLTransparentElements : in |Whatpm/ContentChecker.pm|.
55    
56     #my $HTMLSemiTransparentElements = {
57     # $HTML_NS => {qw/video 1 audio 1/},
58     #};
59    
60     my $HTMLEmbededElements = {
61     $HTML_NS => {qw/img 1 iframe 1 embed 1 object 1 video 1 audio 1 canvas 1/},
62     };
63    
64     ## Empty
65     my $HTMLEmptyChecker = sub {
66     my ($self, $todo) = @_;
67     my $el = $todo->{node};
68     my $new_todos = [];
69     my @nodes = (@{$el->child_nodes});
70    
71     while (@nodes) {
72     my $node = shift @nodes;
73     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
74    
75     my $nt = $node->node_type;
76     if ($nt == 1) {
77 wakaba 1.8 my $node_ns = $node->namespace_uri;
78     $node_ns = '' unless defined $node_ns;
79     my $node_ln = $node->manakai_local_name;
80     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
81     #
82     } else {
83     ## NOTE: |minuses| list is not checked since redundant
84     $self->{onerror}->(node => $node, type => 'element not allowed');
85     }
86 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
87     unshift @nodes, @$sib;
88     push @$new_todos, @$ch;
89     } elsif ($nt == 3 or $nt == 4) {
90     if ($node->data =~ /[^\x09-\x0D\x20]/) {
91     $self->{onerror}->(node => $node, type => 'character not allowed');
92     }
93     } elsif ($nt == 5) {
94     unshift @nodes, @{$node->child_nodes};
95     }
96     }
97     return ($new_todos);
98     };
99    
100     ## Text
101     my $HTMLTextChecker = sub {
102     my ($self, $todo) = @_;
103     my $el = $todo->{node};
104     my $new_todos = [];
105     my @nodes = (@{$el->child_nodes});
106    
107     while (@nodes) {
108     my $node = shift @nodes;
109     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
110    
111     my $nt = $node->node_type;
112     if ($nt == 1) {
113 wakaba 1.8 my $node_ns = $node->namespace_uri;
114     $node_ns = '' unless defined $node_ns;
115     my $node_ln = $node->manakai_local_name;
116     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
117     #
118     } else {
119     ## NOTE: |minuses| list is not checked since redundant
120     $self->{onerror}->(node => $node, type => 'element not allowed');
121     }
122 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
123     unshift @nodes, @$sib;
124     push @$new_todos, @$ch;
125     } elsif ($nt == 5) {
126     unshift @nodes, @{$node->child_nodes};
127     }
128     }
129     return ($new_todos);
130     };
131    
132     ## Zero or more |html:style| elements,
133     ## followed by zero or more block-level elements
134     my $HTMLStylableBlockChecker = sub {
135     my ($self, $todo) = @_;
136     my $el = $todo->{node};
137     my $new_todos = [];
138     my @nodes = (@{$el->child_nodes});
139    
140     my $has_non_style;
141     while (@nodes) {
142     my $node = shift @nodes;
143     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
144    
145     my $nt = $node->node_type;
146     if ($nt == 1) {
147     my $node_ns = $node->namespace_uri;
148     $node_ns = '' unless defined $node_ns;
149     my $node_ln = $node->manakai_local_name;
150     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
151     if ($node_ns eq $HTML_NS and $node_ln eq 'style') {
152     $not_allowed = 1 if $has_non_style or
153     not $node->has_attribute_ns (undef, 'scoped');
154     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
155     $has_non_style = 1;
156 wakaba 1.8 } elsif ($self->{pluses}->{$node_ns}->{$node_ln}) {
157     #
158 wakaba 1.1 } else {
159     $has_non_style = 1;
160     $not_allowed = 1;
161     }
162     $self->{onerror}->(node => $node, type => 'element not allowed')
163     if $not_allowed;
164     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
165     unshift @nodes, @$sib;
166     push @$new_todos, @$ch;
167     } elsif ($nt == 3 or $nt == 4) {
168     if ($node->data =~ /[^\x09-\x0D\x20]/) {
169     $self->{onerror}->(node => $node, type => 'character not allowed');
170     }
171     } elsif ($nt == 5) {
172     unshift @nodes, @{$node->child_nodes};
173     }
174     }
175     return ($new_todos);
176     }; # $HTMLStylableBlockChecker
177    
178     ## Zero or more block-level elements
179     my $HTMLBlockChecker = sub {
180     my ($self, $todo) = @_;
181     my $el = $todo->{node};
182     my $new_todos = [];
183     my @nodes = (@{$el->child_nodes});
184    
185     while (@nodes) {
186     my $node = shift @nodes;
187     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
188    
189     my $nt = $node->node_type;
190     if ($nt == 1) {
191     my $node_ns = $node->namespace_uri;
192     $node_ns = '' unless defined $node_ns;
193     my $node_ln = $node->manakai_local_name;
194     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
195     $not_allowed = 1
196 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
197     $self->{pluses}->{$node_ns}->{$node_ln};
198 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
199     if $not_allowed;
200     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
201     unshift @nodes, @$sib;
202     push @$new_todos, @$ch;
203     } elsif ($nt == 3 or $nt == 4) {
204     if ($node->data =~ /[^\x09-\x0D\x20]/) {
205     $self->{onerror}->(node => $node, type => 'character not allowed');
206     }
207     } elsif ($nt == 5) {
208     unshift @nodes, @{$node->child_nodes};
209     }
210     }
211     return ($new_todos);
212     }; # $HTMLBlockChecker
213    
214     ## Inline-level content
215     my $HTMLInlineChecker = sub {
216     my ($self, $todo) = @_;
217     my $el = $todo->{node};
218     my $new_todos = [];
219     my @nodes = (@{$el->child_nodes});
220    
221     while (@nodes) {
222     my $node = shift @nodes;
223     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
224    
225     my $nt = $node->node_type;
226     if ($nt == 1) {
227     my $node_ns = $node->namespace_uri;
228     $node_ns = '' unless defined $node_ns;
229     my $node_ln = $node->manakai_local_name;
230     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
231     $not_allowed = 1
232     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
233 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
234     $self->{pluses}->{$node_ns}->{$node_ln};
235 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
236     if $not_allowed;
237     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
238     unshift @nodes, @$sib;
239     push @$new_todos, @$ch;
240     } elsif ($nt == 5) {
241     unshift @nodes, @{$node->child_nodes};
242     }
243     }
244    
245     for (@$new_todos) {
246     $_->{inline} = 1;
247     }
248     return ($new_todos);
249     }; # $HTMLInlineChecker
250    
251     ## Strictly inline-level content
252     my $HTMLStrictlyInlineChecker = sub {
253     my ($self, $todo) = @_;
254     my $el = $todo->{node};
255     my $new_todos = [];
256     my @nodes = (@{$el->child_nodes});
257    
258     while (@nodes) {
259     my $node = shift @nodes;
260     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
261    
262     my $nt = $node->node_type;
263     if ($nt == 1) {
264     my $node_ns = $node->namespace_uri;
265     $node_ns = '' unless defined $node_ns;
266     my $node_ln = $node->manakai_local_name;
267     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
268     $not_allowed = 1
269 wakaba 1.8 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
270     $self->{pluses}->{$node_ns}->{$node_ln};
271 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
272     if $not_allowed;
273     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
274     unshift @nodes, @$sib;
275     push @$new_todos, @$ch;
276     } elsif ($nt == 5) {
277     unshift @nodes, @{$node->child_nodes};
278     }
279     }
280    
281     for (@$new_todos) {
282     $_->{inline} = 1;
283     $_->{strictly_inline} = 1;
284     }
285     return ($new_todos);
286     }; # $HTMLStrictlyInlineChecker
287    
288 wakaba 1.8 ## Inline-level or strictly inline-level content
289 wakaba 1.1 my $HTMLInlineOrStrictlyInlineChecker = sub {
290     my ($self, $todo) = @_;
291     my $el = $todo->{node};
292     my $new_todos = [];
293     my @nodes = (@{$el->child_nodes});
294    
295     while (@nodes) {
296     my $node = shift @nodes;
297     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
298    
299     my $nt = $node->node_type;
300     if ($nt == 1) {
301     my $node_ns = $node->namespace_uri;
302     $node_ns = '' unless defined $node_ns;
303     my $node_ln = $node->manakai_local_name;
304     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
305     if ($todo->{strictly_inline}) {
306     $not_allowed = 1
307 wakaba 1.8 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
308     $self->{pluses}->{$node_ns}->{$node_ln};
309 wakaba 1.1 } else {
310     $not_allowed = 1
311     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
312 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
313     $self->{pluses}->{$node_ns}->{$node_ln};
314 wakaba 1.1 }
315     $self->{onerror}->(node => $node, type => 'element not allowed')
316     if $not_allowed;
317     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
318     unshift @nodes, @$sib;
319     push @$new_todos, @$ch;
320     } elsif ($nt == 5) {
321     unshift @nodes, @{$node->child_nodes};
322     }
323     }
324    
325     for (@$new_todos) {
326     $_->{inline} = 1;
327     $_->{strictly_inline} = 1;
328     }
329     return ($new_todos);
330     }; # $HTMLInlineOrStrictlyInlineChecker
331    
332 wakaba 1.13 ## TODO: New "significant" SHOULD-level requirement (revision 1114).
333 wakaba 1.1
334 wakaba 1.8 ## Block-level content or inline-level content (i.e. bimorphic content model)
335 wakaba 1.1 my $HTMLBlockOrInlineChecker = sub {
336     my ($self, $todo) = @_;
337     my $el = $todo->{node};
338     my $new_todos = [];
339     my @nodes = (@{$el->child_nodes});
340    
341     my $content = 'block-or-inline'; # or 'block' or 'inline'
342     my @block_not_inline;
343     while (@nodes) {
344     my $node = shift @nodes;
345     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
346    
347 wakaba 1.8 ## ISSUE: It is unclear whether "<rule><div><p/><nest/></div></rule>"
348     ## is conforming or not.
349    
350 wakaba 1.1 my $nt = $node->node_type;
351     if ($nt == 1) {
352     my $node_ns = $node->namespace_uri;
353     $node_ns = '' unless defined $node_ns;
354     my $node_ln = $node->manakai_local_name;
355     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
356     if ($content eq 'block') {
357     $not_allowed = 1
358 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
359     $self->{pluses}->{$node_ns}->{$node_ln};
360 wakaba 1.1 } elsif ($content eq 'inline') {
361     $not_allowed = 1
362     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
363 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
364     $self->{pluses}->{$node_ns}->{$node_ln};
365 wakaba 1.1 } else {
366     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
367     my $is_inline
368     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
369     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
370    
371     push @block_not_inline, $node
372     if $is_block and not $is_inline and not $not_allowed;
373 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
374 wakaba 1.1 $content = 'inline';
375     for (@block_not_inline) {
376     $self->{onerror}->(node => $_, type => 'element not allowed');
377     }
378     $not_allowed = 1 unless $is_inline;
379     }
380     }
381     $self->{onerror}->(node => $node, type => 'element not allowed')
382     if $not_allowed;
383     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
384     unshift @nodes, @$sib;
385     push @$new_todos, @$ch;
386     } elsif ($nt == 3 or $nt == 4) {
387     if ($node->data =~ /[^\x09-\x0D\x20]/) {
388     if ($content eq 'block') {
389     $self->{onerror}->(node => $node, type => 'character not allowed');
390     } else {
391     $content = 'inline';
392     for (@block_not_inline) {
393     $self->{onerror}->(node => $_, type => 'element not allowed');
394     }
395     }
396     }
397     } elsif ($nt == 5) {
398     unshift @nodes, @{$node->child_nodes};
399     }
400     }
401    
402     if ($content eq 'inline') {
403     for (@$new_todos) {
404     $_->{inline} = 1;
405     }
406     }
407     return ($new_todos);
408     };
409    
410     ## Zero or more XXX element, then either block-level or inline-level
411     my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) {
412     my ($elnsuri, $ellname) = @_;
413     return sub {
414     my ($self, $todo) = @_;
415     my $el = $todo->{node};
416     my $new_todos = [];
417     my @nodes = (@{$el->child_nodes});
418    
419     my $has_non_style;
420     my $content = 'block-or-inline'; # or 'block' or 'inline'
421     my @block_not_inline;
422     while (@nodes) {
423     my $node = shift @nodes;
424     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
425    
426     my $nt = $node->node_type;
427     if ($nt == 1) {
428     my $node_ns = $node->namespace_uri;
429     $node_ns = '' unless defined $node_ns;
430     my $node_ln = $node->manakai_local_name;
431     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
432     if ($node_ns eq $elnsuri and $node_ln eq $ellname) {
433     $not_allowed = 1 if $has_non_style;
434     if ($ellname eq 'style' and
435     not $node->has_attribute_ns (undef, 'scoped')) {
436     $not_allowed = 1;
437     }
438     } elsif ($content eq 'block') {
439     $has_non_style = 1;
440     $not_allowed = 1
441 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
442     $self->{pluses}->{$node_ns}->{$node_ln};
443 wakaba 1.1 } elsif ($content eq 'inline') {
444     $has_non_style = 1;
445     $not_allowed = 1
446     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
447 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
448     $self->{pluses}->{$node_ns}->{$node_ln};
449 wakaba 1.1 } else {
450 wakaba 1.8 $has_non_style = 1 unless $self->{pluses}->{$node_ns}->{$node_ln};
451 wakaba 1.1 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
452     my $is_inline
453     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
454     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
455    
456     push @block_not_inline, $node
457     if $is_block and not $is_inline and not $not_allowed;
458 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
459 wakaba 1.1 $content = 'inline';
460     for (@block_not_inline) {
461     $self->{onerror}->(node => $_, type => 'element not allowed');
462     }
463     $not_allowed = 1 unless $is_inline;
464     }
465     }
466     $self->{onerror}->(node => $node, type => 'element not allowed')
467     if $not_allowed;
468     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
469     unshift @nodes, @$sib;
470     push @$new_todos, @$ch;
471     } elsif ($nt == 3 or $nt == 4) {
472     if ($node->data =~ /[^\x09-\x0D\x20]/) {
473     $has_non_style = 1;
474     if ($content eq 'block') {
475     $self->{onerror}->(node => $node, type => 'character not allowed');
476     } else {
477     $content = 'inline';
478     for (@block_not_inline) {
479     $self->{onerror}->(node => $_, type => 'element not allowed');
480     }
481     }
482     }
483     } elsif ($nt == 5) {
484     unshift @nodes, @{$node->child_nodes};
485     }
486     }
487    
488     if ($content eq 'inline') {
489     for (@$new_todos) {
490     $_->{inline} = 1;
491     }
492     }
493     return ($new_todos);
494     };
495     }; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker
496    
497     my $HTMLTransparentChecker = $HTMLBlockOrInlineChecker;
498    
499     our $AttrChecker;
500    
501     my $GetHTMLEnumeratedAttrChecker = sub {
502     my $states = shift; # {value => conforming ? 1 : -1}
503     return sub {
504     my ($self, $attr) = @_;
505     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
506     if ($states->{$value} > 0) {
507     #
508     } elsif ($states->{$value}) {
509     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
510     } else {
511     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
512     }
513     };
514     }; # $GetHTMLEnumeratedAttrChecker
515    
516     my $GetHTMLBooleanAttrChecker = sub {
517     my $local_name = shift;
518     return sub {
519     my ($self, $attr) = @_;
520     my $value = $attr->value;
521     unless ($value eq $local_name or $value eq '') {
522     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
523     }
524     };
525     }; # $GetHTMLBooleanAttrChecker
526    
527 wakaba 1.8 ## Unordered set of space-separated tokens
528     my $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker = sub {
529     my ($self, $attr) = @_;
530     my %word;
531     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
532     unless ($word{$word}) {
533     $word{$word} = 1;
534     } else {
535     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
536     }
537     }
538     }; # $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker
539    
540 wakaba 1.1 ## |rel| attribute (unordered set of space separated tokens,
541     ## whose allowed values are defined by the section on link types)
542     my $HTMLLinkTypesAttrChecker = sub {
543 wakaba 1.4 my ($a_or_area, $todo, $self, $attr) = @_;
544 wakaba 1.1 my %word;
545     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
546     unless ($word{$word}) {
547     $word{$word} = 1;
548     } else {
549     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
550     }
551     }
552     ## NOTE: Case sensitive match (since HTML5 spec does not say link
553     ## types are case-insensitive and it says "The value should not
554     ## be confusingly similar to any other defined value (e.g.
555     ## differing only in case).").
556     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
557     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
558     ## values to be used conformingly.
559     require Whatpm::_LinkTypeList;
560     our $LinkType;
561     for my $word (keys %word) {
562     my $def = $LinkType->{$word};
563     if (defined $def) {
564     if ($def->{status} eq 'accepted') {
565     if (defined $def->{effect}->[$a_or_area]) {
566     #
567     } else {
568     $self->{onerror}->(node => $attr,
569     type => 'link type:bad context:'.$word);
570     }
571     } elsif ($def->{status} eq 'proposal') {
572     $self->{onerror}->(node => $attr, level => 's',
573     type => 'link type:proposed:'.$word);
574     } else { # rejected or synonym
575     $self->{onerror}->(node => $attr,
576     type => 'link type:non-conforming:'.$word);
577     }
578 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
579     if ($word eq 'alternate') {
580     #
581     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
582     $todo->{has_hyperlink_link_type} = 1;
583     }
584     }
585 wakaba 1.1 if ($def->{unique}) {
586     unless ($self->{has_link_type}->{$word}) {
587     $self->{has_link_type}->{$word} = 1;
588     } else {
589     $self->{onerror}->(node => $attr,
590     type => 'link type:duplicate:'.$word);
591     }
592     }
593     } else {
594     $self->{onerror}->(node => $attr, level => 'unsupported',
595     type => 'link type:'.$word);
596     }
597     }
598 wakaba 1.4 $todo->{has_hyperlink_link_type} = 1
599     if $word{alternate} and not $word{stylesheet};
600 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
601     ## says that using both X-Pingback: header field and HTML
602     ## <link rel=pingback> is deprecated and if both appears they
603     ## SHOULD contain exactly the same value.
604     ## ISSUE: Pingback 1.0 specification defines the exact representation
605     ## of its link element, which cannot be tested by the current arch.
606     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
607     ## include any string that matches to the pattern for the rel=pingback link,
608     ## which again inpossible to test.
609     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
610 wakaba 1.12
611     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
612     ## ISSUE: <link rel="up up"> is non-conforming, since rel="" is unordered.
613 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
614    
615     ## URI (or IRI)
616     my $HTMLURIAttrChecker = sub {
617     my ($self, $attr) = @_;
618     ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
619     my $value = $attr->value;
620     Whatpm::URIChecker->check_iri_reference ($value, sub {
621     my %opt = @_;
622     $self->{onerror}->(node => $attr, level => $opt{level},
623     type => 'URI::'.$opt{type}.
624     (defined $opt{position} ? ':'.$opt{position} : ''));
625     });
626 wakaba 1.4 $self->{has_uri_attr} = 1;
627 wakaba 1.1 }; # $HTMLURIAttrChecker
628    
629     ## A space separated list of one or more URIs (or IRIs)
630     my $HTMLSpaceURIsAttrChecker = sub {
631     my ($self, $attr) = @_;
632     my $i = 0;
633     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
634     Whatpm::URIChecker->check_iri_reference ($value, sub {
635     my %opt = @_;
636     $self->{onerror}->(node => $attr, level => $opt{level},
637 wakaba 1.2 type => 'URIs:'.':'.
638     $opt{type}.':'.$i.
639 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
640     });
641     $i++;
642     }
643     ## ISSUE: Relative references?
644     ## ISSUE: Leading or trailing white spaces are conformant?
645     ## ISSUE: A sequence of white space characters are conformant?
646     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
647     ## NOTE: Duplication seems not an error.
648 wakaba 1.4 $self->{has_uri_attr} = 1;
649 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
650    
651     my $HTMLDatetimeAttrChecker = sub {
652     my ($self, $attr) = @_;
653     my $value = $attr->value;
654     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
655     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
656     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
657     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
658     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
659     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
660     if $d < 1 or
661     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
662     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
663     if $M == 2 and $d == 29 and
664     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
665     } else {
666     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
667     }
668     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
669     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
670     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
671     if defined $s and $s > 59;
672     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
673     if $zh > 23;
674     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
675     if $zm > 59;
676     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
677     } else {
678     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
679     }
680     }; # $HTMLDatetimeAttrChecker
681    
682     my $HTMLIntegerAttrChecker = sub {
683     my ($self, $attr) = @_;
684     my $value = $attr->value;
685     unless ($value =~ /\A-?[0-9]+\z/) {
686     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
687     }
688     }; # $HTMLIntegerAttrChecker
689    
690     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
691     my $range_check = shift;
692     return sub {
693     my ($self, $attr) = @_;
694     my $value = $attr->value;
695     if ($value =~ /\A[0-9]+\z/) {
696     unless ($range_check->($value + 0)) {
697     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
698     }
699     } else {
700     $self->{onerror}->(node => $attr,
701     type => 'nninteger:syntax error');
702     }
703     };
704     }; # $GetHTMLNonNegativeIntegerAttrChecker
705    
706     my $GetHTMLFloatingPointNumberAttrChecker = sub {
707     my $range_check = shift;
708     return sub {
709     my ($self, $attr) = @_;
710     my $value = $attr->value;
711     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
712     unless ($range_check->($value + 0)) {
713     $self->{onerror}->(node => $attr, type => 'float:out of range');
714     }
715     } else {
716     $self->{onerror}->(node => $attr,
717     type => 'float:syntax error');
718     }
719     };
720     }; # $GetHTMLFloatingPointNumberAttrChecker
721    
722     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
723     ## ISSUE: RFC 2046 does not define syntax of media types.
724     ## ISSUE: The definition of "a valid MIME type" is unknown.
725     ## Syntactical correctness?
726     my $HTMLIMTAttrChecker = sub {
727     my ($self, $attr) = @_;
728     my $value = $attr->value;
729     ## ISSUE: RFC 2045 Content-Type header field allows insertion
730     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
731     ## ISSUE: RFC 2231 extension? Maybe no.
732     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
733     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
734     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
735     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
736     my @type = ($1, $2);
737     my $param = $3;
738     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
739     if (defined $2) {
740     push @type, $1 => $2;
741     } else {
742     my $n = $1;
743     my $v = $2;
744     $v =~ s/\\(.)/$1/gs;
745     push @type, $n => $v;
746     }
747     }
748     require Whatpm::IMTChecker;
749     Whatpm::IMTChecker->check_imt (sub {
750     my %opt = @_;
751     $self->{onerror}->(node => $attr, level => $opt{level},
752     type => 'IMT:'.$opt{type});
753     }, @type);
754     } else {
755     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
756     }
757     }; # $HTMLIMTAttrChecker
758    
759     my $HTMLLanguageTagAttrChecker = sub {
760 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
761    
762 wakaba 1.1 my ($self, $attr) = @_;
763 wakaba 1.6 my $value = $attr->value;
764     require Whatpm::LangTag;
765     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
766     my %opt = @_;
767     my $type = 'LangTag:'.$opt{type};
768     $type .= ':' . $opt{subtag} if defined $opt{subtag};
769     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
770     level => $opt{level});
771     });
772 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
773 wakaba 1.6
774     ## TODO: testdata
775 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
776    
777     ## "A valid media query [MQ]"
778     my $HTMLMQAttrChecker = sub {
779     my ($self, $attr) = @_;
780     $self->{onerror}->(node => $attr, level => 'unsupported',
781     type => 'media query');
782     ## ISSUE: What is "a valid media query"?
783     }; # $HTMLMQAttrChecker
784    
785     my $HTMLEventHandlerAttrChecker = sub {
786     my ($self, $attr) = @_;
787     $self->{onerror}->(node => $attr, level => 'unsupported',
788     type => 'event handler');
789     ## TODO: MUST contain valid ECMAScript code matching the
790     ## ECMAScript |FunctionBody| production. [ECMA262]
791     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
792     ## ISSUE: Automatic semicolon insertion does not apply?
793     ## ISSUE: Other script languages?
794     }; # $HTMLEventHandlerAttrChecker
795    
796     my $HTMLUsemapAttrChecker = sub {
797     my ($self, $attr) = @_;
798     ## MUST be a valid hashed ID reference to a |map| element
799     my $value = $attr->value;
800     if ($value =~ s/^#//) {
801     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
802     push @{$self->{usemap}}, [$value => $attr];
803     } else {
804     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
805     }
806     ## NOTE: Space characters in hashed ID references are conforming.
807     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
808     }; # $HTMLUsemapAttrChecker
809    
810     my $HTMLTargetAttrChecker = sub {
811     my ($self, $attr) = @_;
812     my $value = $attr->value;
813     if ($value =~ /^_/) {
814     $value = lc $value; ## ISSUE: ASCII case-insentitive?
815     unless ({
816     _self => 1, _parent => 1, _top => 1,
817     }->{$value}) {
818     $self->{onerror}->(node => $attr,
819     type => 'reserved browsing context name');
820     }
821     } else {
822     #$ ISSUE: An empty string is conforming?
823     }
824     }; # $HTMLTargetAttrChecker
825    
826     my $HTMLAttrChecker = {
827     id => sub {
828     ## NOTE: |map| has its own variant of |id=""| checker
829     my ($self, $attr) = @_;
830     my $value = $attr->value;
831     if (length $value > 0) {
832     if ($self->{id}->{$value}) {
833     $self->{onerror}->(node => $attr, type => 'duplicate ID');
834     push @{$self->{id}->{$value}}, $attr;
835     } else {
836     $self->{id}->{$value} = [$attr];
837     }
838     if ($value =~ /[\x09-\x0D\x20]/) {
839     $self->{onerror}->(node => $attr, type => 'space in ID');
840     }
841     } else {
842     ## NOTE: MUST contain at least one character
843     $self->{onerror}->(node => $attr, type => 'empty attribute value');
844     }
845     },
846     title => sub {}, ## NOTE: No conformance creteria
847     lang => sub {
848     my ($self, $attr) = @_;
849 wakaba 1.6 my $value = $attr->value;
850     if ($value eq '') {
851     #
852     } else {
853     require Whatpm::LangTag;
854     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
855     my %opt = @_;
856     my $type = 'LangTag:'.$opt{type};
857     $type .= ':' . $opt{subtag} if defined $opt{subtag};
858     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
859     level => $opt{level});
860     });
861     }
862 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
863     unless ($attr->owner_document->manakai_is_html) {
864     $self->{onerror}->(node => $attr, type => 'in XML:lang');
865     }
866 wakaba 1.6
867     ## TODO: test data
868 wakaba 1.1 },
869     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
870     class => sub {
871     my ($self, $attr) = @_;
872     my %word;
873     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
874     unless ($word{$word}) {
875     $word{$word} = 1;
876     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
877     } else {
878     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
879     }
880     }
881     },
882     contextmenu => sub {
883     my ($self, $attr) = @_;
884     my $value = $attr->value;
885     push @{$self->{contextmenu}}, [$value => $attr];
886     ## ISSUE: "The value must be the ID of a menu element in the DOM."
887     ## What is "in the DOM"? A menu Element node that is not part
888     ## of the Document tree is in the DOM? A menu Element node that
889     ## belong to another Document tree is in the DOM?
890     },
891     irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
892 wakaba 1.8 tabindex => $HTMLIntegerAttrChecker
893     ## TODO: ref, template, registrationmark
894 wakaba 1.1 };
895    
896     for (qw/
897     onabort onbeforeunload onblur onchange onclick oncontextmenu
898     ondblclick ondrag ondragend ondragenter ondragleave ondragover
899     ondragstart ondrop onerror onfocus onkeydown onkeypress
900     onkeyup onload onmessage onmousedown onmousemove onmouseout
901     onmouseover onmouseup onmousewheel onresize onscroll onselect
902     onsubmit onunload
903     /) {
904     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
905     }
906    
907     my $GetHTMLAttrsChecker = sub {
908     my $element_specific_checker = shift;
909     return sub {
910     my ($self, $todo) = @_;
911     for my $attr (@{$todo->{node}->attributes}) {
912     my $attr_ns = $attr->namespace_uri;
913     $attr_ns = '' unless defined $attr_ns;
914     my $attr_ln = $attr->manakai_local_name;
915     my $checker;
916     if ($attr_ns eq '') {
917     $checker = $element_specific_checker->{$attr_ln}
918     || $HTMLAttrChecker->{$attr_ln};
919     }
920     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
921     || $AttrChecker->{$attr_ns}->{''};
922     if ($checker) {
923     $checker->($self, $attr, $todo);
924     } else {
925     $self->{onerror}->(node => $attr, level => 'unsupported',
926     type => 'attribute');
927     ## ISSUE: No comformance createria for unknown attributes in the spec
928     }
929     }
930     };
931     }; # $GetHTMLAttrsChecker
932    
933     our $Element;
934     our $ElementDefault;
935     our $AnyChecker;
936    
937     $Element->{$HTML_NS}->{''} = {
938     attrs_checker => $GetHTMLAttrsChecker->({}),
939     checker => $ElementDefault->{checker},
940     };
941    
942     $Element->{$HTML_NS}->{html} = {
943     is_root => 1,
944     attrs_checker => $GetHTMLAttrsChecker->({
945     xmlns => sub {
946     my ($self, $attr) = @_;
947     my $value = $attr->value;
948     unless ($value eq $HTML_NS) {
949     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
950     }
951     unless ($attr->owner_document->manakai_is_html) {
952     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
953     ## TODO: Test
954     }
955     },
956     }),
957     checker => sub {
958     my ($self, $todo) = @_;
959     my $el = $todo->{node};
960     my $new_todos = [];
961     my @nodes = (@{$el->child_nodes});
962    
963     my $phase = 'before head';
964     while (@nodes) {
965     my $node = shift @nodes;
966     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
967    
968     my $nt = $node->node_type;
969     if ($nt == 1) {
970     my $node_ns = $node->namespace_uri;
971     $node_ns = '' unless defined $node_ns;
972     my $node_ln = $node->manakai_local_name;
973     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
974 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
975     #
976     } elsif ($phase eq 'before head') {
977 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'head') {
978     $phase = 'after head';
979     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'body') {
980     $self->{onerror}->(node => $node, type => 'ps element missing:head');
981     $phase = 'after body';
982     } else {
983     $not_allowed = 1;
984     # before head
985     }
986     } elsif ($phase eq 'after head') {
987     if ($node_ns eq $HTML_NS and $node_ln eq 'body') {
988     $phase = 'after body';
989     } else {
990     $not_allowed = 1;
991     # after head
992     }
993     } else { #elsif ($phase eq 'after body') {
994     $not_allowed = 1;
995     # after body
996     }
997     $self->{onerror}->(node => $node, type => 'element not allowed')
998     if $not_allowed;
999     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1000     unshift @nodes, @$sib;
1001     push @$new_todos, @$ch;
1002     } elsif ($nt == 3 or $nt == 4) {
1003     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1004     $self->{onerror}->(node => $node, type => 'character not allowed');
1005     }
1006     } elsif ($nt == 5) {
1007     unshift @nodes, @{$node->child_nodes};
1008     }
1009     }
1010    
1011     if ($phase eq 'before head') {
1012     $self->{onerror}->(node => $el, type => 'child element missing:head');
1013     $self->{onerror}->(node => $el, type => 'child element missing:body');
1014     } elsif ($phase eq 'after head') {
1015     $self->{onerror}->(node => $el, type => 'child element missing:body');
1016     }
1017    
1018     return ($new_todos);
1019     },
1020     };
1021    
1022     $Element->{$HTML_NS}->{head} = {
1023     attrs_checker => $GetHTMLAttrsChecker->({}),
1024     checker => sub {
1025     my ($self, $todo) = @_;
1026     my $el = $todo->{node};
1027     my $new_todos = [];
1028     my @nodes = (@{$el->child_nodes});
1029    
1030     my $has_title;
1031     my $phase = 'initial'; # 'after charset', 'after base'
1032     while (@nodes) {
1033     my $node = shift @nodes;
1034     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1035    
1036     my $nt = $node->node_type;
1037     if ($nt == 1) {
1038     my $node_ns = $node->namespace_uri;
1039     $node_ns = '' unless defined $node_ns;
1040     my $node_ln = $node->manakai_local_name;
1041     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
1042 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1043     #
1044     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'title') {
1045 wakaba 1.1 $phase = 'after base';
1046     unless ($has_title) {
1047     $has_title = 1;
1048     } else {
1049     $not_allowed = 1;
1050     }
1051     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'meta') {
1052     if ($node->has_attribute_ns (undef, 'charset')) {
1053     if ($phase eq 'initial') {
1054     $phase = 'after charset';
1055     } else {
1056     $not_allowed = 1;
1057     ## NOTE: See also |base|'s "contexts" field in the spec
1058     }
1059 wakaba 1.5 } elsif ($node->has_attribute_ns (undef, 'name') or
1060     $node->has_attribute_ns (undef, 'http-equiv')) {
1061     $phase = 'after base';
1062 wakaba 1.1 } else {
1063     $phase = 'after base';
1064 wakaba 1.5 $not_allowed = 1;
1065 wakaba 1.1 }
1066     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'base') {
1067     if ($phase eq 'initial' or $phase eq 'after charset') {
1068     $phase = 'after base';
1069     } else {
1070     $not_allowed = 1;
1071     }
1072     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'style') {
1073     $phase = 'after base';
1074     if ($node->has_attribute_ns (undef, 'scoped')) {
1075     $not_allowed = 1;
1076     }
1077     } elsif ($HTMLMetadataElements->{$node_ns}->{$node_ln}) {
1078     $phase = 'after base';
1079     } else {
1080     $not_allowed = 1;
1081     }
1082     $self->{onerror}->(node => $node, type => 'element not allowed')
1083     if $not_allowed;
1084 wakaba 1.3 local $todo->{flag}->{in_head} = 1;
1085 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1086     unshift @nodes, @$sib;
1087     push @$new_todos, @$ch;
1088     } elsif ($nt == 3 or $nt == 4) {
1089     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1090     $self->{onerror}->(node => $node, type => 'character not allowed');
1091     }
1092     } elsif ($nt == 5) {
1093     unshift @nodes, @{$node->child_nodes};
1094     }
1095     }
1096     unless ($has_title) {
1097     $self->{onerror}->(node => $el, type => 'child element missing:title');
1098     }
1099     return ($new_todos);
1100     },
1101     };
1102    
1103     $Element->{$HTML_NS}->{title} = {
1104     attrs_checker => $GetHTMLAttrsChecker->({}),
1105     checker => $HTMLTextChecker,
1106     };
1107    
1108     $Element->{$HTML_NS}->{base} = {
1109 wakaba 1.4 attrs_checker => sub {
1110     my ($self, $todo) = @_;
1111    
1112 wakaba 1.14 my $has_href = $todo->{node}->has_attribute_ns (undef, 'href');
1113     my $has_target = $todo->{node}->has_attribute_ns (undef, 'target');
1114    
1115     if ($self->{has_uri_attr} and $has_href) {
1116 wakaba 1.4 ## ISSUE: Are these examples conforming?
1117     ## <head profile="a b c"><base href> (except for |profile|'s
1118     ## non-conformance)
1119     ## <title xml:base="relative"/><base href/> (maybe it should be)
1120     ## <unknown xmlns="relative"/><base href/> (assuming that
1121     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1122     ## <?xml-stylesheet href="relative"?>...<base href=""/>
1123     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1124     ## <style>@import 'relative';</style><base href>
1125     ## <script>location.href = 'relative';</script><base href>
1126 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
1127     ## an exception.
1128 wakaba 1.4 $self->{onerror}->(node => $todo->{node},
1129     type => 'basehref after URI attribute');
1130     }
1131 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
1132 wakaba 1.4 ## ISSUE: Are these examples conforming?
1133     ## <head><title xlink:href=""/><base target="name"/></head>
1134     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1135     ## (assuming that |xbl:xbl| is allowed before |base|)
1136     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1137     ## <link href=""/><base target="name"/>
1138     ## <link rel=unknown href=""><base target=name>
1139     $self->{onerror}->(node => $todo->{node},
1140     type => 'basetarget after hyperlink');
1141     }
1142    
1143 wakaba 1.14 if (not $has_href and not $has_target) {
1144     $self->{onerror}->(node => $todo->{node},
1145     type => 'attribute missing:href|target');
1146     }
1147    
1148 wakaba 1.4 return $GetHTMLAttrsChecker->({
1149     href => $HTMLURIAttrChecker,
1150     target => $HTMLTargetAttrChecker,
1151     })->($self, $todo);
1152     },
1153 wakaba 1.1 checker => $HTMLEmptyChecker,
1154     };
1155    
1156     $Element->{$HTML_NS}->{link} = {
1157     attrs_checker => sub {
1158     my ($self, $todo) = @_;
1159     $GetHTMLAttrsChecker->({
1160     href => $HTMLURIAttrChecker,
1161 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(0, $todo, @_) },
1162 wakaba 1.1 media => $HTMLMQAttrChecker,
1163     hreflang => $HTMLLanguageTagAttrChecker,
1164     type => $HTMLIMTAttrChecker,
1165     ## NOTE: Though |title| has special semantics,
1166     ## syntactically same as the |title| as global attribute.
1167     })->($self, $todo);
1168 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
1169     $self->{has_hyperlink_element} = 1 if $todo->{has_hyperlink_link_type};
1170     } else {
1171 wakaba 1.1 $self->{onerror}->(node => $todo->{node},
1172     type => 'attribute missing:href');
1173     }
1174     unless ($todo->{node}->has_attribute_ns (undef, 'rel')) {
1175     $self->{onerror}->(node => $todo->{node},
1176     type => 'attribute missing:rel');
1177     }
1178     },
1179     checker => $HTMLEmptyChecker,
1180     };
1181    
1182     $Element->{$HTML_NS}->{meta} = {
1183     attrs_checker => sub {
1184     my ($self, $todo) = @_;
1185     my $name_attr;
1186     my $http_equiv_attr;
1187     my $charset_attr;
1188     my $content_attr;
1189     for my $attr (@{$todo->{node}->attributes}) {
1190     my $attr_ns = $attr->namespace_uri;
1191     $attr_ns = '' unless defined $attr_ns;
1192     my $attr_ln = $attr->manakai_local_name;
1193     my $checker;
1194     if ($attr_ns eq '') {
1195     if ($attr_ln eq 'content') {
1196     $content_attr = $attr;
1197     $checker = 1;
1198     } elsif ($attr_ln eq 'name') {
1199     $name_attr = $attr;
1200     $checker = 1;
1201     } elsif ($attr_ln eq 'http-equiv') {
1202     $http_equiv_attr = $attr;
1203     $checker = 1;
1204     } elsif ($attr_ln eq 'charset') {
1205     $charset_attr = $attr;
1206     $checker = 1;
1207     } else {
1208     $checker = $HTMLAttrChecker->{$attr_ln}
1209     || $AttrChecker->{$attr_ns}->{$attr_ln}
1210     || $AttrChecker->{$attr_ns}->{''};
1211     }
1212     } else {
1213     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1214     || $AttrChecker->{$attr_ns}->{''};
1215     }
1216     if ($checker) {
1217     $checker->($self, $attr) if ref $checker;
1218     } else {
1219     $self->{onerror}->(node => $attr, level => 'unsupported',
1220     type => 'attribute');
1221     ## ISSUE: No comformance createria for unknown attributes in the spec
1222     }
1223     }
1224    
1225     if (defined $name_attr) {
1226     if (defined $http_equiv_attr) {
1227     $self->{onerror}->(node => $http_equiv_attr,
1228     type => 'attribute not allowed');
1229     } elsif (defined $charset_attr) {
1230     $self->{onerror}->(node => $charset_attr,
1231     type => 'attribute not allowed');
1232     }
1233     my $metadata_name = $name_attr->value;
1234     my $metadata_value;
1235     if (defined $content_attr) {
1236     $metadata_value = $content_attr->value;
1237     } else {
1238     $self->{onerror}->(node => $todo->{node},
1239     type => 'attribute missing:content');
1240     $metadata_value = '';
1241     }
1242     } elsif (defined $http_equiv_attr) {
1243     if (defined $charset_attr) {
1244     $self->{onerror}->(node => $charset_attr,
1245     type => 'attribute not allowed');
1246     }
1247     unless (defined $content_attr) {
1248     $self->{onerror}->(node => $todo->{node},
1249     type => 'attribute missing:content');
1250     }
1251     } elsif (defined $charset_attr) {
1252     if (defined $content_attr) {
1253     $self->{onerror}->(node => $content_attr,
1254     type => 'attribute not allowed');
1255     }
1256     } else {
1257     if (defined $content_attr) {
1258     $self->{onerror}->(node => $content_attr,
1259     type => 'attribute not allowed');
1260     $self->{onerror}->(node => $todo->{node},
1261     type => 'attribute missing:name|http-equiv');
1262     } else {
1263     $self->{onerror}->(node => $todo->{node},
1264     type => 'attribute missing:name|http-equiv|charset');
1265     }
1266     }
1267    
1268     ## TODO: metadata conformance
1269    
1270     ## TODO: pragma conformance
1271     if (defined $http_equiv_attr) { ## An enumerated attribute
1272     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1273     if ({
1274     'refresh' => 1,
1275     'default-style' => 1,
1276     }->{$keyword}) {
1277     #
1278     } else {
1279     $self->{onerror}->(node => $http_equiv_attr,
1280     type => 'enumerated:invalid');
1281     }
1282     }
1283    
1284     if (defined $charset_attr) {
1285     unless ($todo->{node}->owner_document->manakai_is_html) {
1286     $self->{onerror}->(node => $charset_attr,
1287     type => 'in XML:charset');
1288     }
1289     ## TODO: charset
1290     }
1291     },
1292     checker => $HTMLEmptyChecker,
1293     };
1294    
1295     $Element->{$HTML_NS}->{style} = {
1296     attrs_checker => $GetHTMLAttrsChecker->({
1297     type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1298     media => $HTMLMQAttrChecker,
1299     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1300     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1301     ## not different
1302     }),
1303     checker => sub {
1304     ## NOTE: |html:style| has no conformance creteria on content model
1305     my ($self, $todo) = @_;
1306     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
1307     $type = 'text/css' unless defined $type;
1308     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
1309     type => 'style:'.$type); ## TODO: $type normalization
1310     return $AnyChecker->($self, $todo);
1311     },
1312     };
1313    
1314     $Element->{$HTML_NS}->{body} = {
1315     attrs_checker => $GetHTMLAttrsChecker->({}),
1316     checker => $HTMLBlockChecker,
1317     };
1318    
1319     $Element->{$HTML_NS}->{section} = {
1320     attrs_checker => $GetHTMLAttrsChecker->({}),
1321     checker => $HTMLStylableBlockChecker,
1322     };
1323    
1324     $Element->{$HTML_NS}->{nav} = {
1325     attrs_checker => $GetHTMLAttrsChecker->({}),
1326     checker => $HTMLBlockOrInlineChecker,
1327     };
1328    
1329     $Element->{$HTML_NS}->{article} = {
1330     attrs_checker => $GetHTMLAttrsChecker->({}),
1331     checker => $HTMLStylableBlockChecker,
1332     };
1333    
1334     $Element->{$HTML_NS}->{blockquote} = {
1335     attrs_checker => $GetHTMLAttrsChecker->({
1336     cite => $HTMLURIAttrChecker,
1337     }),
1338     checker => $HTMLBlockChecker,
1339     };
1340    
1341     $Element->{$HTML_NS}->{aside} = {
1342     attrs_checker => $GetHTMLAttrsChecker->({}),
1343     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
1344     };
1345    
1346     $Element->{$HTML_NS}->{h1} = {
1347     attrs_checker => $GetHTMLAttrsChecker->({}),
1348     checker => sub {
1349     my ($self, $todo) = @_;
1350     $todo->{flag}->{has_heading}->[0] = 1;
1351 wakaba 1.13 return $HTMLStrictlyInlineChecker->($self, $todo);
1352 wakaba 1.1 },
1353     };
1354    
1355     $Element->{$HTML_NS}->{h2} = {
1356     attrs_checker => $GetHTMLAttrsChecker->({}),
1357     checker => $Element->{$HTML_NS}->{h1}->{checker},
1358     };
1359    
1360     $Element->{$HTML_NS}->{h3} = {
1361     attrs_checker => $GetHTMLAttrsChecker->({}),
1362     checker => $Element->{$HTML_NS}->{h1}->{checker},
1363     };
1364    
1365     $Element->{$HTML_NS}->{h4} = {
1366     attrs_checker => $GetHTMLAttrsChecker->({}),
1367     checker => $Element->{$HTML_NS}->{h1}->{checker},
1368     };
1369    
1370     $Element->{$HTML_NS}->{h5} = {
1371     attrs_checker => $GetHTMLAttrsChecker->({}),
1372     checker => $Element->{$HTML_NS}->{h1}->{checker},
1373     };
1374    
1375     $Element->{$HTML_NS}->{h6} = {
1376     attrs_checker => $GetHTMLAttrsChecker->({}),
1377     checker => $Element->{$HTML_NS}->{h1}->{checker},
1378     };
1379    
1380     $Element->{$HTML_NS}->{header} = {
1381     attrs_checker => $GetHTMLAttrsChecker->({}),
1382     checker => sub {
1383     my ($self, $todo) = @_;
1384     my $old_flag = $todo->{flag}->{has_heading} || [];
1385     my $new_flag = [];
1386     local $todo->{flag}->{has_heading} = $new_flag;
1387     my $node = $todo->{node};
1388    
1389     my $end = $self->_add_minuses
1390     ({$HTML_NS => {qw/header 1 footer 1/}},
1391     $HTMLSectioningElements);
1392     my ($new_todos, $ch) = $HTMLBlockChecker->($self, $todo);
1393     push @$new_todos, $end,
1394     {type => 'code', code => sub {
1395     if ($new_flag->[0]) {
1396     $old_flag->[0] = 1;
1397     } else {
1398     $self->{onerror}->(node => $node, type => 'element missing:hn');
1399     }
1400     }};
1401     return ($new_todos, $ch);
1402     },
1403     };
1404    
1405     $Element->{$HTML_NS}->{footer} = {
1406     attrs_checker => $GetHTMLAttrsChecker->({}),
1407     checker => sub { ## block -hn -header -footer -sectioning or inline
1408     my ($self, $todo) = @_;
1409     my $el = $todo->{node};
1410     my $new_todos = [];
1411     my @nodes = (@{$el->child_nodes});
1412    
1413     my $content = 'block-or-inline'; # or 'block' or 'inline'
1414     my @block_not_inline;
1415     while (@nodes) {
1416     my $node = shift @nodes;
1417     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1418    
1419     my $nt = $node->node_type;
1420     if ($nt == 1) {
1421     my $node_ns = $node->namespace_uri;
1422     $node_ns = '' unless defined $node_ns;
1423     my $node_ln = $node->manakai_local_name;
1424     my $not_allowed;
1425     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
1426     $not_allowed = 1;
1427     } elsif ($node_ns eq $HTML_NS and
1428     {
1429     qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1/
1430     }->{$node_ln}) {
1431     $not_allowed = 1;
1432     } elsif ($HTMLSectioningElements->{$node_ns}->{$node_ln}) {
1433     $not_allowed = 1;
1434     }
1435     if ($content eq 'block') {
1436     $not_allowed = 1
1437 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
1438     $self->{pluses}->{$node_ns}->{$node_ln};
1439 wakaba 1.1 } elsif ($content eq 'inline') {
1440     $not_allowed = 1
1441     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
1442 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
1443     $self->{pluses}->{$node_ns}->{$node_ln};
1444 wakaba 1.1 } else {
1445     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1446     my $is_inline
1447     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
1448     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1449    
1450     push @block_not_inline, $node
1451     if $is_block and not $is_inline and not $not_allowed;
1452 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
1453 wakaba 1.1 $content = 'inline';
1454     for (@block_not_inline) {
1455     $self->{onerror}->(node => $_, type => 'element not allowed');
1456     }
1457     $not_allowed = 1 unless $is_inline;
1458     }
1459     }
1460     $self->{onerror}->(node => $node, type => 'element not allowed')
1461     if $not_allowed;
1462     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1463     unshift @nodes, @$sib;
1464     push @$new_todos, @$ch;
1465     } elsif ($nt == 3 or $nt == 4) {
1466     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1467     if ($content eq 'block') {
1468     $self->{onerror}->(node => $node, type => 'character not allowed');
1469     } else {
1470     $content = 'inline';
1471     for (@block_not_inline) {
1472     $self->{onerror}->(node => $_, type => 'element not allowed');
1473     }
1474     }
1475     }
1476     } elsif ($nt == 5) {
1477     unshift @nodes, @{$node->child_nodes};
1478     }
1479     }
1480    
1481     my $end = $self->_add_minuses
1482     ({$HTML_NS => {qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1/}},
1483     $HTMLSectioningElements);
1484     push @$new_todos, $end;
1485    
1486     if ($content eq 'inline') {
1487     for (@$new_todos) {
1488     $_->{inline} = 1;
1489     }
1490     }
1491    
1492     return ($new_todos);
1493     },
1494     };
1495    
1496     $Element->{$HTML_NS}->{address} = {
1497     attrs_checker => $GetHTMLAttrsChecker->({}),
1498     checker => $HTMLInlineChecker,
1499     };
1500    
1501     $Element->{$HTML_NS}->{p} = {
1502     attrs_checker => $GetHTMLAttrsChecker->({}),
1503 wakaba 1.13 checker => $HTMLInlineChecker,
1504 wakaba 1.1 };
1505    
1506     $Element->{$HTML_NS}->{hr} = {
1507     attrs_checker => $GetHTMLAttrsChecker->({}),
1508     checker => $HTMLEmptyChecker,
1509     };
1510    
1511     $Element->{$HTML_NS}->{br} = {
1512     attrs_checker => $GetHTMLAttrsChecker->({}),
1513     checker => $HTMLEmptyChecker,
1514     };
1515    
1516     $Element->{$HTML_NS}->{dialog} = {
1517     attrs_checker => $GetHTMLAttrsChecker->({}),
1518     checker => sub {
1519     my ($self, $todo) = @_;
1520     my $el = $todo->{node};
1521     my $new_todos = [];
1522     my @nodes = (@{$el->child_nodes});
1523    
1524     my $phase = 'before dt';
1525     while (@nodes) {
1526     my $node = shift @nodes;
1527     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1528    
1529     my $nt = $node->node_type;
1530     if ($nt == 1) {
1531     my $node_ns = $node->namespace_uri;
1532     $node_ns = '' unless defined $node_ns;
1533     my $node_ln = $node->manakai_local_name;
1534     ## NOTE: |minuses| list is not checked since redundant
1535 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1536     #
1537     } elsif ($phase eq 'before dt') {
1538 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1539     $phase = 'before dd';
1540     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1541     $self->{onerror}
1542     ->(node => $node, type => 'ps element missing:dt');
1543     $phase = 'before dt';
1544     } else {
1545     $self->{onerror}->(node => $node, type => 'element not allowed');
1546     }
1547     } else { # before dd
1548     if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1549     $phase = 'before dt';
1550     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1551     $self->{onerror}
1552     ->(node => $node, type => 'ps element missing:dd');
1553     $phase = 'before dd';
1554     } else {
1555     $self->{onerror}->(node => $node, type => 'element not allowed');
1556     }
1557     }
1558     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1559     unshift @nodes, @$sib;
1560     push @$new_todos, @$ch;
1561     } elsif ($nt == 3 or $nt == 4) {
1562     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1563     $self->{onerror}->(node => $node, type => 'character not allowed');
1564     }
1565     } elsif ($nt == 5) {
1566     unshift @nodes, @{$node->child_nodes};
1567     }
1568     }
1569     if ($phase eq 'before dd') {
1570 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
1571 wakaba 1.1 }
1572     return ($new_todos);
1573     },
1574     };
1575    
1576     $Element->{$HTML_NS}->{pre} = {
1577     attrs_checker => $GetHTMLAttrsChecker->({}),
1578     checker => $HTMLStrictlyInlineChecker,
1579     };
1580    
1581     $Element->{$HTML_NS}->{ol} = {
1582     attrs_checker => $GetHTMLAttrsChecker->({
1583     start => $HTMLIntegerAttrChecker,
1584     }),
1585     checker => sub {
1586     my ($self, $todo) = @_;
1587     my $el = $todo->{node};
1588     my $new_todos = [];
1589     my @nodes = (@{$el->child_nodes});
1590    
1591     while (@nodes) {
1592     my $node = shift @nodes;
1593     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1594    
1595     my $nt = $node->node_type;
1596     if ($nt == 1) {
1597     my $node_ns = $node->namespace_uri;
1598     $node_ns = '' unless defined $node_ns;
1599     my $node_ln = $node->manakai_local_name;
1600     ## NOTE: |minuses| list is not checked since redundant
1601 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1602     #
1603     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'li')) {
1604 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
1605     }
1606     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1607     unshift @nodes, @$sib;
1608     push @$new_todos, @$ch;
1609     } elsif ($nt == 3 or $nt == 4) {
1610     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1611     $self->{onerror}->(node => $node, type => 'character not allowed');
1612     }
1613     } elsif ($nt == 5) {
1614     unshift @nodes, @{$node->child_nodes};
1615     }
1616     }
1617    
1618     if ($todo->{inline}) {
1619     for (@$new_todos) {
1620     $_->{inline} = 1;
1621     }
1622     }
1623     return ($new_todos);
1624     },
1625     };
1626    
1627     $Element->{$HTML_NS}->{ul} = {
1628     attrs_checker => $GetHTMLAttrsChecker->({}),
1629     checker => $Element->{$HTML_NS}->{ol}->{checker},
1630     };
1631    
1632     $Element->{$HTML_NS}->{li} = {
1633     attrs_checker => $GetHTMLAttrsChecker->({
1634     start => sub {
1635     my ($self, $attr) = @_;
1636     my $parent = $attr->owner_element->manakai_parent_element;
1637     if (defined $parent) {
1638     my $parent_ns = $parent->namespace_uri;
1639     $parent_ns = '' unless defined $parent_ns;
1640     my $parent_ln = $parent->manakai_local_name;
1641     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1642     $self->{onerror}->(node => $attr, level => 'unsupported',
1643     type => 'attribute');
1644     }
1645     }
1646     $HTMLIntegerAttrChecker->($self, $attr);
1647     },
1648     }),
1649     checker => sub {
1650     my ($self, $todo) = @_;
1651     if ($todo->{inline}) {
1652     return $HTMLInlineChecker->($self, $todo);
1653     } else {
1654     return $HTMLBlockOrInlineChecker->($self, $todo);
1655     }
1656     },
1657     };
1658    
1659     $Element->{$HTML_NS}->{dl} = {
1660     attrs_checker => $GetHTMLAttrsChecker->({}),
1661     checker => sub {
1662     my ($self, $todo) = @_;
1663     my $el = $todo->{node};
1664     my $new_todos = [];
1665     my @nodes = (@{$el->child_nodes});
1666    
1667     my $phase = 'before dt';
1668     while (@nodes) {
1669     my $node = shift @nodes;
1670     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1671    
1672     my $nt = $node->node_type;
1673     if ($nt == 1) {
1674     my $node_ns = $node->namespace_uri;
1675     $node_ns = '' unless defined $node_ns;
1676     my $node_ln = $node->manakai_local_name;
1677     ## NOTE: |minuses| list is not checked since redundant
1678 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1679     #
1680     } elsif ($phase eq 'in dds') {
1681 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1682     #$phase = 'in dds';
1683     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1684     $phase = 'in dts';
1685     } else {
1686     $self->{onerror}->(node => $node, type => 'element not allowed');
1687     }
1688     } elsif ($phase eq 'in dts') {
1689     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1690     #$phase = 'in dts';
1691     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1692     $phase = 'in dds';
1693     } else {
1694     $self->{onerror}->(node => $node, type => 'element not allowed');
1695     }
1696     } else { # before dt
1697     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1698     $phase = 'in dts';
1699     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1700     $self->{onerror}
1701     ->(node => $node, type => 'ps element missing:dt');
1702     $phase = 'in dds';
1703     } else {
1704     $self->{onerror}->(node => $node, type => 'element not allowed');
1705     }
1706     }
1707     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1708     unshift @nodes, @$sib;
1709     push @$new_todos, @$ch;
1710     } elsif ($nt == 3 or $nt == 4) {
1711     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1712     $self->{onerror}->(node => $node, type => 'character not allowed');
1713     }
1714     } elsif ($nt == 5) {
1715     unshift @nodes, @{$node->child_nodes};
1716     }
1717     }
1718     if ($phase eq 'in dts') {
1719 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
1720 wakaba 1.1 }
1721    
1722     if ($todo->{inline}) {
1723     for (@$new_todos) {
1724     $_->{inline} = 1;
1725     }
1726     }
1727     return ($new_todos);
1728     },
1729     };
1730    
1731     $Element->{$HTML_NS}->{dt} = {
1732     attrs_checker => $GetHTMLAttrsChecker->({}),
1733     checker => $HTMLStrictlyInlineChecker,
1734     };
1735    
1736     $Element->{$HTML_NS}->{dd} = {
1737     attrs_checker => $GetHTMLAttrsChecker->({}),
1738     checker => $Element->{$HTML_NS}->{li}->{checker},
1739     };
1740    
1741     $Element->{$HTML_NS}->{a} = {
1742     attrs_checker => sub {
1743     my ($self, $todo) = @_;
1744     my %attr;
1745     for my $attr (@{$todo->{node}->attributes}) {
1746     my $attr_ns = $attr->namespace_uri;
1747     $attr_ns = '' unless defined $attr_ns;
1748     my $attr_ln = $attr->manakai_local_name;
1749     my $checker;
1750     if ($attr_ns eq '') {
1751     $checker = {
1752     target => $HTMLTargetAttrChecker,
1753     href => $HTMLURIAttrChecker,
1754     ping => $HTMLSpaceURIsAttrChecker,
1755 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
1756 wakaba 1.1 media => $HTMLMQAttrChecker,
1757     hreflang => $HTMLLanguageTagAttrChecker,
1758     type => $HTMLIMTAttrChecker,
1759     }->{$attr_ln};
1760     if ($checker) {
1761     $attr{$attr_ln} = $attr;
1762     } else {
1763     $checker = $HTMLAttrChecker->{$attr_ln};
1764     }
1765     }
1766     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1767     || $AttrChecker->{$attr_ns}->{''};
1768     if ($checker) {
1769     $checker->($self, $attr) if ref $checker;
1770     } else {
1771     $self->{onerror}->(node => $attr, level => 'unsupported',
1772     type => 'attribute');
1773     ## ISSUE: No comformance createria for unknown attributes in the spec
1774     }
1775     }
1776    
1777 wakaba 1.4 if (defined $attr{href}) {
1778     $self->{has_hyperlink_element} = 1;
1779     } else {
1780 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
1781     if (defined $attr{$_}) {
1782     $self->{onerror}->(node => $attr{$_},
1783     type => 'attribute not allowed');
1784     }
1785     }
1786     }
1787     },
1788     checker => sub {
1789     my ($self, $todo) = @_;
1790    
1791     my $end = $self->_add_minuses ($HTMLInteractiveElements);
1792     my ($new_todos, $ch)
1793 wakaba 1.13 = $HTMLInlineOrStrictlyInlineChecker->($self, $todo);
1794 wakaba 1.1 push @$new_todos, $end;
1795    
1796 wakaba 1.15 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
1797     $_->{flag}->{in_a_href} = 1 for @$new_todos;
1798     }
1799 wakaba 1.1
1800     return ($new_todos, $ch);
1801     },
1802     };
1803    
1804     $Element->{$HTML_NS}->{q} = {
1805     attrs_checker => $GetHTMLAttrsChecker->({
1806     cite => $HTMLURIAttrChecker,
1807     }),
1808     checker => $HTMLInlineOrStrictlyInlineChecker,
1809     };
1810    
1811     $Element->{$HTML_NS}->{cite} = {
1812     attrs_checker => $GetHTMLAttrsChecker->({}),
1813     checker => $HTMLStrictlyInlineChecker,
1814     };
1815    
1816     $Element->{$HTML_NS}->{em} = {
1817     attrs_checker => $GetHTMLAttrsChecker->({}),
1818     checker => $HTMLInlineOrStrictlyInlineChecker,
1819     };
1820    
1821     $Element->{$HTML_NS}->{strong} = {
1822     attrs_checker => $GetHTMLAttrsChecker->({}),
1823     checker => $HTMLInlineOrStrictlyInlineChecker,
1824     };
1825    
1826     $Element->{$HTML_NS}->{small} = {
1827     attrs_checker => $GetHTMLAttrsChecker->({}),
1828     checker => $HTMLInlineOrStrictlyInlineChecker,
1829     };
1830    
1831     $Element->{$HTML_NS}->{m} = {
1832     attrs_checker => $GetHTMLAttrsChecker->({}),
1833     checker => $HTMLInlineOrStrictlyInlineChecker,
1834     };
1835    
1836     $Element->{$HTML_NS}->{dfn} = {
1837     attrs_checker => $GetHTMLAttrsChecker->({}),
1838     checker => sub {
1839     my ($self, $todo) = @_;
1840    
1841     my $end = $self->_add_minuses ({$HTML_NS => {dfn => 1}});
1842     my ($sib, $ch) = $HTMLStrictlyInlineChecker->($self, $todo);
1843     push @$sib, $end;
1844    
1845     my $node = $todo->{node};
1846     my $term = $node->get_attribute_ns (undef, 'title');
1847     unless (defined $term) {
1848     for my $child (@{$node->child_nodes}) {
1849     if ($child->node_type == 1) { # ELEMENT_NODE
1850     if (defined $term) {
1851     undef $term;
1852     last;
1853     } elsif ($child->manakai_local_name eq 'abbr') {
1854     my $nsuri = $child->namespace_uri;
1855     if (defined $nsuri and $nsuri eq $HTML_NS) {
1856     my $attr = $child->get_attribute_node_ns (undef, 'title');
1857     if ($attr) {
1858     $term = $attr->value;
1859     }
1860     }
1861     }
1862     } elsif ($child->node_type == 3 or $child->node_type == 4) {
1863     ## TEXT_NODE or CDATA_SECTION_NODE
1864     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
1865     next;
1866     }
1867     undef $term;
1868     last;
1869     }
1870     }
1871     unless (defined $term) {
1872     $term = $node->text_content;
1873     }
1874     }
1875     if ($self->{term}->{$term}) {
1876     $self->{onerror}->(node => $node, type => 'duplicate term');
1877     push @{$self->{term}->{$term}}, $node;
1878     } else {
1879     $self->{term}->{$term} = [$node];
1880     }
1881     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
1882     ## has |title|.
1883    
1884     return ($sib, $ch);
1885     },
1886     };
1887    
1888     $Element->{$HTML_NS}->{abbr} = {
1889     attrs_checker => $GetHTMLAttrsChecker->({
1890     ## NOTE: |title| has special semantics for |abbr|s, but is syntactically
1891     ## not different. The spec says that the |title| MAY be omitted
1892     ## if there is a |dfn| whose defining term is the abbreviation,
1893     ## but it does not prohibit |abbr| w/o |title| in other cases.
1894     }),
1895     checker => $HTMLStrictlyInlineChecker,
1896     };
1897    
1898     $Element->{$HTML_NS}->{time} = {
1899     attrs_checker => $GetHTMLAttrsChecker->({
1900     datetime => sub { 1 }, # checked in |checker|
1901     }),
1902     ## TODO: Write tests
1903     checker => sub {
1904     my ($self, $todo) = @_;
1905    
1906     my $attr = $todo->{node}->get_attribute_node_ns (undef, 'datetime');
1907     my $input;
1908     my $reg_sp;
1909     my $input_node;
1910     if ($attr) {
1911     $input = $attr->value;
1912     $reg_sp = qr/[\x09-\x0D\x20]*/;
1913     $input_node = $attr;
1914     } else {
1915     $input = $todo->{node}->text_content;
1916     $reg_sp = qr/\p{Zs}*/;
1917     $input_node = $todo->{node};
1918    
1919     ## ISSUE: What is the definition for "successfully extracts a date
1920     ## or time"? If the algorithm says the string is invalid but
1921     ## return some date or time, is it "successfully"?
1922     }
1923    
1924     my $hour;
1925     my $minute;
1926     my $second;
1927     if ($input =~ /
1928     \A
1929     [\x09-\x0D\x20]*
1930     ([0-9]+) # 1
1931     (?>
1932     -([0-9]+) # 2
1933     -([0-9]+) # 3
1934     [\x09-\x0D\x20]*
1935     (?>
1936     T
1937     [\x09-\x0D\x20]*
1938     )?
1939     ([0-9]+) # 4
1940     :([0-9]+) # 5
1941     (?>
1942     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
1943     )?
1944     [\x09-\x0D\x20]*
1945     (?>
1946     Z
1947     [\x09-\x0D\x20]*
1948     |
1949     [+-]([0-9]+):([0-9]+) # 7, 8
1950     [\x09-\x0D\x20]*
1951     )?
1952     \z
1953     |
1954     :([0-9]+) # 9
1955     (?>
1956     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
1957     )?
1958     [\x09-\x0D\x20]*\z
1959     )
1960     /x) {
1961     if (defined $2) { ## YYYY-MM-DD T? hh:mm
1962     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
1963     length $4 != 2 or length $5 != 2) {
1964     $self->{onerror}->(node => $input_node,
1965     type => 'dateortime:syntax error');
1966     }
1967    
1968     if (1 <= $2 and $2 <= 12) {
1969     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1970     if $3 < 1 or
1971     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
1972     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1973     if $2 == 2 and $3 == 29 and
1974     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
1975     } else {
1976     $self->{onerror}->(node => $input_node,
1977     type => 'datetime:bad month');
1978     }
1979    
1980     ($hour, $minute, $second) = ($4, $5, $6);
1981    
1982     if (defined $7) { ## [+-]hh:mm
1983     if (length $7 != 2 or length $8 != 2) {
1984     $self->{onerror}->(node => $input_node,
1985     type => 'dateortime:syntax error');
1986     }
1987    
1988     $self->{onerror}->(node => $input_node,
1989     type => 'datetime:bad timezone hour')
1990     if $7 > 23;
1991     $self->{onerror}->(node => $input_node,
1992     type => 'datetime:bad timezone minute')
1993     if $8 > 59;
1994     }
1995     } else { ## hh:mm
1996     if (length $1 != 2 or length $9 != 2) {
1997     $self->{onerror}->(node => $input_node,
1998     type => qq'dateortime:syntax error');
1999     }
2000    
2001     ($hour, $minute, $second) = ($1, $9, $10);
2002     }
2003    
2004     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
2005     if $hour > 23;
2006     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
2007     if $minute > 59;
2008    
2009     if (defined $second) { ## s
2010     ## NOTE: Integer part of second don't have to have length of two.
2011    
2012     if (substr ($second, 0, 1) eq '.') {
2013     $self->{onerror}->(node => $input_node,
2014     type => 'dateortime:syntax error');
2015     }
2016    
2017     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
2018     if $second >= 60;
2019     }
2020     } else {
2021     $self->{onerror}->(node => $input_node,
2022     type => 'dateortime:syntax error');
2023     }
2024    
2025     return $HTMLStrictlyInlineChecker->($self, $todo);
2026     },
2027     };
2028    
2029     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
2030     attrs_checker => $GetHTMLAttrsChecker->({
2031     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2032     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2033     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2034     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2035     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2036     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2037     }),
2038     checker => $HTMLStrictlyInlineChecker,
2039     };
2040    
2041     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
2042     attrs_checker => $GetHTMLAttrsChecker->({
2043     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
2044     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
2045     }),
2046     checker => $HTMLStrictlyInlineChecker,
2047     };
2048    
2049     $Element->{$HTML_NS}->{code} = {
2050     attrs_checker => $GetHTMLAttrsChecker->({}),
2051     ## NOTE: Though |title| has special semantics,
2052     ## syntatically same as the |title| as global attribute.
2053     checker => $HTMLInlineOrStrictlyInlineChecker,
2054     };
2055    
2056     $Element->{$HTML_NS}->{var} = {
2057     attrs_checker => $GetHTMLAttrsChecker->({}),
2058     ## NOTE: Though |title| has special semantics,
2059     ## syntatically same as the |title| as global attribute.
2060     checker => $HTMLStrictlyInlineChecker,
2061     };
2062    
2063     $Element->{$HTML_NS}->{samp} = {
2064     attrs_checker => $GetHTMLAttrsChecker->({}),
2065     ## NOTE: Though |title| has special semantics,
2066     ## syntatically same as the |title| as global attribute.
2067     checker => $HTMLInlineOrStrictlyInlineChecker,
2068     };
2069    
2070     $Element->{$HTML_NS}->{kbd} = {
2071     attrs_checker => $GetHTMLAttrsChecker->({}),
2072     checker => $HTMLStrictlyInlineChecker,
2073     };
2074    
2075     $Element->{$HTML_NS}->{sub} = {
2076     attrs_checker => $GetHTMLAttrsChecker->({}),
2077     checker => $HTMLStrictlyInlineChecker,
2078     };
2079    
2080     $Element->{$HTML_NS}->{sup} = {
2081     attrs_checker => $GetHTMLAttrsChecker->({}),
2082     checker => $HTMLStrictlyInlineChecker,
2083     };
2084    
2085     $Element->{$HTML_NS}->{span} = {
2086     attrs_checker => $GetHTMLAttrsChecker->({}),
2087     ## NOTE: Though |title| has special semantics,
2088     ## syntatically same as the |title| as global attribute.
2089     checker => $HTMLInlineOrStrictlyInlineChecker,
2090     };
2091    
2092     $Element->{$HTML_NS}->{i} = {
2093     attrs_checker => $GetHTMLAttrsChecker->({}),
2094     ## NOTE: Though |title| has special semantics,
2095     ## syntatically same as the |title| as global attribute.
2096     checker => $HTMLStrictlyInlineChecker,
2097     };
2098    
2099     $Element->{$HTML_NS}->{b} = {
2100     attrs_checker => $GetHTMLAttrsChecker->({}),
2101     checker => $HTMLStrictlyInlineChecker,
2102     };
2103    
2104     $Element->{$HTML_NS}->{bdo} = {
2105     attrs_checker => sub {
2106     my ($self, $todo) = @_;
2107     $GetHTMLAttrsChecker->({})->($self, $todo);
2108     unless ($todo->{node}->has_attribute_ns (undef, 'dir')) {
2109     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:dir');
2110     }
2111     },
2112     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
2113     checker => $HTMLStrictlyInlineChecker,
2114     };
2115    
2116     $Element->{$HTML_NS}->{ins} = {
2117     attrs_checker => $GetHTMLAttrsChecker->({
2118     cite => $HTMLURIAttrChecker,
2119     datetime => $HTMLDatetimeAttrChecker,
2120     }),
2121     checker => $HTMLTransparentChecker,
2122     };
2123    
2124     $Element->{$HTML_NS}->{del} = {
2125     attrs_checker => $GetHTMLAttrsChecker->({
2126     cite => $HTMLURIAttrChecker,
2127     datetime => $HTMLDatetimeAttrChecker,
2128     }),
2129     checker => sub {
2130     my ($self, $todo) = @_;
2131    
2132     my $parent = $todo->{node}->manakai_parent_element;
2133     if (defined $parent) {
2134     my $nsuri = $parent->namespace_uri;
2135     $nsuri = '' unless defined $nsuri;
2136     my $ln = $parent->manakai_local_name;
2137     my $eldef = $Element->{$nsuri}->{$ln} ||
2138     $Element->{$nsuri}->{''} ||
2139     $ElementDefault;
2140     return $eldef->{checker}->($self, $todo);
2141     } else {
2142     return $HTMLBlockOrInlineChecker->($self, $todo);
2143     }
2144     },
2145     };
2146    
2147     ## TODO: figure
2148 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
2149 wakaba 1.1
2150 wakaba 1.4 ## TODO: |alt|
2151 wakaba 1.1 $Element->{$HTML_NS}->{img} = {
2152     attrs_checker => sub {
2153     my ($self, $todo) = @_;
2154     $GetHTMLAttrsChecker->({
2155     alt => sub { }, ## NOTE: No syntactical requirement
2156     src => $HTMLURIAttrChecker,
2157     usemap => $HTMLUsemapAttrChecker,
2158     ismap => sub {
2159     my ($self, $attr, $parent_todo) = @_;
2160 wakaba 1.15 if (not $todo->{flag}->{in_a_href}) {
2161     $self->{onerror}->(node => $attr,
2162     type => 'attribute not allowed:ismap');
2163 wakaba 1.1 }
2164     $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_todo);
2165     },
2166     ## TODO: height
2167     ## TODO: width
2168     })->($self, $todo);
2169     unless ($todo->{node}->has_attribute_ns (undef, 'alt')) {
2170     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:alt');
2171     }
2172     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2173     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:src');
2174     }
2175     },
2176     checker => $HTMLEmptyChecker,
2177     };
2178    
2179     $Element->{$HTML_NS}->{iframe} = {
2180     attrs_checker => $GetHTMLAttrsChecker->({
2181     src => $HTMLURIAttrChecker,
2182     }),
2183     checker => $HTMLTextChecker,
2184     };
2185    
2186     $Element->{$HTML_NS}->{embed} = {
2187     attrs_checker => sub {
2188     my ($self, $todo) = @_;
2189     my $has_src;
2190     for my $attr (@{$todo->{node}->attributes}) {
2191     my $attr_ns = $attr->namespace_uri;
2192     $attr_ns = '' unless defined $attr_ns;
2193     my $attr_ln = $attr->manakai_local_name;
2194     my $checker;
2195     if ($attr_ns eq '') {
2196     if ($attr_ln eq 'src') {
2197     $checker = $HTMLURIAttrChecker;
2198     $has_src = 1;
2199     } elsif ($attr_ln eq 'type') {
2200     $checker = $HTMLIMTAttrChecker;
2201     } else {
2202     ## TODO: height
2203     ## TODO: width
2204     $checker = $HTMLAttrChecker->{$attr_ln}
2205     || sub { }; ## NOTE: Any local attribute is ok.
2206     }
2207     }
2208     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2209     || $AttrChecker->{$attr_ns}->{''};
2210     if ($checker) {
2211     $checker->($self, $attr);
2212     } else {
2213     $self->{onerror}->(node => $attr, level => 'unsupported',
2214     type => 'attribute');
2215     ## ISSUE: No comformance createria for global attributes in the spec
2216     }
2217     }
2218    
2219     unless ($has_src) {
2220     $self->{onerror}->(node => $todo->{node},
2221     type => 'attribute missing:src');
2222     }
2223     },
2224     checker => $HTMLEmptyChecker,
2225     };
2226    
2227     $Element->{$HTML_NS}->{object} = {
2228     attrs_checker => sub {
2229     my ($self, $todo) = @_;
2230     $GetHTMLAttrsChecker->({
2231     data => $HTMLURIAttrChecker,
2232     type => $HTMLIMTAttrChecker,
2233     usemap => $HTMLUsemapAttrChecker,
2234     ## TODO: width
2235     ## TODO: height
2236     })->($self, $todo);
2237     unless ($todo->{node}->has_attribute_ns (undef, 'data')) {
2238     unless ($todo->{node}->has_attribute_ns (undef, 'type')) {
2239     $self->{onerror}->(node => $todo->{node},
2240     type => 'attribute missing:data|type');
2241     }
2242     }
2243     },
2244     checker => $ElementDefault->{checker}, ## TODO
2245 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
2246 wakaba 1.1 };
2247    
2248     $Element->{$HTML_NS}->{param} = {
2249     attrs_checker => sub {
2250     my ($self, $todo) = @_;
2251     $GetHTMLAttrsChecker->({
2252     name => sub { },
2253     value => sub { },
2254     })->($self, $todo);
2255     unless ($todo->{node}->has_attribute_ns (undef, 'name')) {
2256     $self->{onerror}->(node => $todo->{node},
2257     type => 'attribute missing:name');
2258     }
2259     unless ($todo->{node}->has_attribute_ns (undef, 'value')) {
2260     $self->{onerror}->(node => $todo->{node},
2261     type => 'attribute missing:value');
2262     }
2263     },
2264     checker => $HTMLEmptyChecker,
2265     };
2266    
2267     $Element->{$HTML_NS}->{video} = {
2268     attrs_checker => $GetHTMLAttrsChecker->({
2269     src => $HTMLURIAttrChecker,
2270     ## TODO: start, loopstart, loopend, end
2271     ## ISSUE: they MUST be "value time offset"s. Value?
2272 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
2273 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2274     controls => $GetHTMLBooleanAttrChecker->('controls'),
2275 wakaba 1.11 poster => $HTMLURIAttrChecker, ## TODO: not for audio!
2276     ## TODO: width, height (not for audio!)
2277 wakaba 1.1 }),
2278     checker => sub {
2279     my ($self, $todo) = @_;
2280    
2281     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2282     return $HTMLBlockOrInlineChecker->($self, $todo);
2283     } else {
2284     return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
2285     ->($self, $todo);
2286     }
2287     },
2288     };
2289    
2290     $Element->{$HTML_NS}->{audio} = {
2291     attrs_checker => $Element->{$HTML_NS}->{video}->{attrs_checker},
2292     checker => $Element->{$HTML_NS}->{video}->{checker},
2293     };
2294    
2295     $Element->{$HTML_NS}->{source} = {
2296     attrs_checker => sub {
2297     my ($self, $todo) = @_;
2298     $GetHTMLAttrsChecker->({
2299     src => $HTMLURIAttrChecker,
2300     type => $HTMLIMTAttrChecker,
2301     media => $HTMLMQAttrChecker,
2302     })->($self, $todo);
2303     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2304     $self->{onerror}->(node => $todo->{node},
2305     type => 'attribute missing:src');
2306     }
2307     },
2308     checker => $HTMLEmptyChecker,
2309     };
2310    
2311     $Element->{$HTML_NS}->{canvas} = {
2312     attrs_checker => $GetHTMLAttrsChecker->({
2313     height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2314     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2315     }),
2316     checker => $HTMLInlineChecker,
2317     };
2318    
2319     $Element->{$HTML_NS}->{map} = {
2320 wakaba 1.4 attrs_checker => sub {
2321     my ($self, $todo) = @_;
2322     my $has_id;
2323     $GetHTMLAttrsChecker->({
2324     id => sub {
2325     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2326     my ($self, $attr) = @_;
2327     my $value = $attr->value;
2328     if (length $value > 0) {
2329     if ($self->{id}->{$value}) {
2330     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2331     push @{$self->{id}->{$value}}, $attr;
2332     } else {
2333     $self->{id}->{$value} = [$attr];
2334     }
2335 wakaba 1.1 } else {
2336 wakaba 1.4 ## NOTE: MUST contain at least one character
2337     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2338 wakaba 1.1 }
2339 wakaba 1.4 if ($value =~ /[\x09-\x0D\x20]/) {
2340     $self->{onerror}->(node => $attr, type => 'space in ID');
2341     }
2342     $self->{map}->{$value} ||= $attr;
2343     $has_id = 1;
2344     },
2345     })->($self, $todo);
2346     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:id')
2347     unless $has_id;
2348     },
2349 wakaba 1.1 checker => $HTMLBlockChecker,
2350     };
2351    
2352     $Element->{$HTML_NS}->{area} = {
2353     attrs_checker => sub {
2354     my ($self, $todo) = @_;
2355     my %attr;
2356     my $coords;
2357     for my $attr (@{$todo->{node}->attributes}) {
2358     my $attr_ns = $attr->namespace_uri;
2359     $attr_ns = '' unless defined $attr_ns;
2360     my $attr_ln = $attr->manakai_local_name;
2361     my $checker;
2362     if ($attr_ns eq '') {
2363     $checker = {
2364     alt => sub { },
2365     ## NOTE: |alt| value has no conformance creteria.
2366     shape => $GetHTMLEnumeratedAttrChecker->({
2367     circ => -1, circle => 1,
2368     default => 1,
2369     poly => 1, polygon => -1,
2370     rect => 1, rectangle => -1,
2371     }),
2372     coords => sub {
2373     my ($self, $attr) = @_;
2374     my $value = $attr->value;
2375     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
2376     $coords = [split /,/, $value];
2377     } else {
2378     $self->{onerror}->(node => $attr,
2379     type => 'coords:syntax error');
2380     }
2381     },
2382     target => $HTMLTargetAttrChecker,
2383     href => $HTMLURIAttrChecker,
2384     ping => $HTMLSpaceURIsAttrChecker,
2385 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
2386 wakaba 1.1 media => $HTMLMQAttrChecker,
2387     hreflang => $HTMLLanguageTagAttrChecker,
2388     type => $HTMLIMTAttrChecker,
2389     }->{$attr_ln};
2390     if ($checker) {
2391     $attr{$attr_ln} = $attr;
2392     } else {
2393     $checker = $HTMLAttrChecker->{$attr_ln};
2394     }
2395     }
2396     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2397     || $AttrChecker->{$attr_ns}->{''};
2398     if ($checker) {
2399     $checker->($self, $attr) if ref $checker;
2400     } else {
2401     $self->{onerror}->(node => $attr, level => 'unsupported',
2402     type => 'attribute');
2403     ## ISSUE: No comformance createria for unknown attributes in the spec
2404     }
2405     }
2406    
2407     if (defined $attr{href}) {
2408 wakaba 1.4 $self->{has_hyperlink_element} = 1;
2409 wakaba 1.1 unless (defined $attr{alt}) {
2410     $self->{onerror}->(node => $todo->{node},
2411     type => 'attribute missing:alt');
2412     }
2413     } else {
2414     for (qw/target ping rel media hreflang type alt/) {
2415     if (defined $attr{$_}) {
2416     $self->{onerror}->(node => $attr{$_},
2417     type => 'attribute not allowed');
2418     }
2419     }
2420     }
2421    
2422     my $shape = 'rectangle';
2423     if (defined $attr{shape}) {
2424     $shape = {
2425     circ => 'circle', circle => 'circle',
2426     default => 'default',
2427     poly => 'polygon', polygon => 'polygon',
2428     rect => 'rectangle', rectangle => 'rectangle',
2429     }->{lc $attr{shape}->value} || 'rectangle';
2430     ## TODO: ASCII lowercase?
2431     }
2432    
2433     if ($shape eq 'circle') {
2434     if (defined $attr{coords}) {
2435     if (defined $coords) {
2436     if (@$coords == 3) {
2437     if ($coords->[2] < 0) {
2438     $self->{onerror}->(node => $attr{coords},
2439     type => 'coords:out of range:2');
2440     }
2441     } else {
2442     $self->{onerror}->(node => $attr{coords},
2443     type => 'coords:number:3:'.@$coords);
2444     }
2445     } else {
2446     ## NOTE: A syntax error has been reported.
2447     }
2448     } else {
2449     $self->{onerror}->(node => $todo->{node},
2450     type => 'attribute missing:coords');
2451     }
2452     } elsif ($shape eq 'default') {
2453     if (defined $attr{coords}) {
2454     $self->{onerror}->(node => $attr{coords},
2455     type => 'attribute not allowed');
2456     }
2457     } elsif ($shape eq 'polygon') {
2458     if (defined $attr{coords}) {
2459     if (defined $coords) {
2460     if (@$coords >= 6) {
2461     unless (@$coords % 2 == 0) {
2462     $self->{onerror}->(node => $attr{coords},
2463     type => 'coords:number:even:'.@$coords);
2464     }
2465     } else {
2466     $self->{onerror}->(node => $attr{coords},
2467     type => 'coords:number:>=6:'.@$coords);
2468     }
2469     } else {
2470     ## NOTE: A syntax error has been reported.
2471     }
2472     } else {
2473     $self->{onerror}->(node => $todo->{node},
2474     type => 'attribute missing:coords');
2475     }
2476     } elsif ($shape eq 'rectangle') {
2477     if (defined $attr{coords}) {
2478     if (defined $coords) {
2479     if (@$coords == 4) {
2480     unless ($coords->[0] < $coords->[2]) {
2481     $self->{onerror}->(node => $attr{coords},
2482     type => 'coords:out of range:0');
2483     }
2484     unless ($coords->[1] < $coords->[3]) {
2485     $self->{onerror}->(node => $attr{coords},
2486     type => 'coords:out of range:1');
2487     }
2488     } else {
2489     $self->{onerror}->(node => $attr{coords},
2490     type => 'coords:number:4:'.@$coords);
2491     }
2492     } else {
2493     ## NOTE: A syntax error has been reported.
2494     }
2495     } else {
2496     $self->{onerror}->(node => $todo->{node},
2497     type => 'attribute missing:coords');
2498     }
2499     }
2500     },
2501     checker => $HTMLEmptyChecker,
2502     };
2503     ## TODO: only in map
2504    
2505     $Element->{$HTML_NS}->{table} = {
2506     attrs_checker => $GetHTMLAttrsChecker->({}),
2507     checker => sub {
2508     my ($self, $todo) = @_;
2509     my $el = $todo->{node};
2510     my $new_todos = [];
2511     my @nodes = (@{$el->child_nodes});
2512    
2513     my $phase = 'before caption';
2514     my $has_tfoot;
2515     while (@nodes) {
2516     my $node = shift @nodes;
2517     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2518    
2519     my $nt = $node->node_type;
2520     if ($nt == 1) {
2521     my $node_ns = $node->namespace_uri;
2522     $node_ns = '' unless defined $node_ns;
2523     my $node_ln = $node->manakai_local_name;
2524     ## NOTE: |minuses| list is not checked since redundant
2525 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2526     #
2527     } elsif ($phase eq 'in tbodys') {
2528 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2529     #$phase = 'in tbodys';
2530     } elsif (not $has_tfoot and
2531     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2532     $phase = 'after tfoot';
2533     $has_tfoot = 1;
2534     } else {
2535     $self->{onerror}->(node => $node, type => 'element not allowed');
2536     }
2537     } elsif ($phase eq 'in trs') {
2538     if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2539     #$phase = 'in trs';
2540     } elsif (not $has_tfoot and
2541     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2542     $phase = 'after tfoot';
2543     $has_tfoot = 1;
2544     } else {
2545     $self->{onerror}->(node => $node, type => 'element not allowed');
2546     }
2547     } elsif ($phase eq 'after thead') {
2548     if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2549     $phase = 'in tbodys';
2550     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2551     $phase = 'in trs';
2552     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2553     $phase = 'in tbodys';
2554     $has_tfoot = 1;
2555     } else {
2556     $self->{onerror}->(node => $node, type => 'element not allowed');
2557     }
2558     } elsif ($phase eq 'in colgroup') {
2559     if ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2560     $phase = 'in colgroup';
2561     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2562     $phase = 'after thead';
2563     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2564     $phase = 'in tbodys';
2565     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2566     $phase = 'in trs';
2567     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2568     $phase = 'in tbodys';
2569     $has_tfoot = 1;
2570     } else {
2571     $self->{onerror}->(node => $node, type => 'element not allowed');
2572     }
2573     } elsif ($phase eq 'before caption') {
2574     if ($node_ns eq $HTML_NS and $node_ln eq 'caption') {
2575     $phase = 'in colgroup';
2576     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2577     $phase = 'in colgroup';
2578     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2579     $phase = 'after thead';
2580     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2581     $phase = 'in tbodys';
2582     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2583     $phase = 'in trs';
2584     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2585     $phase = 'in tbodys';
2586     $has_tfoot = 1;
2587     } else {
2588     $self->{onerror}->(node => $node, type => 'element not allowed');
2589     }
2590     } else { # after tfoot
2591     $self->{onerror}->(node => $node, type => 'element not allowed');
2592     }
2593     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2594     unshift @nodes, @$sib;
2595     push @$new_todos, @$ch;
2596     } elsif ($nt == 3 or $nt == 4) {
2597     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2598     $self->{onerror}->(node => $node, type => 'character not allowed');
2599     }
2600     } elsif ($nt == 5) {
2601     unshift @nodes, @{$node->child_nodes};
2602     }
2603     }
2604    
2605     ## Table model errors
2606     require Whatpm::HTMLTable;
2607     Whatpm::HTMLTable->form_table ($todo->{node}, sub {
2608     my %opt = @_;
2609     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
2610     });
2611     push @{$self->{return}->{table}}, $todo->{node};
2612    
2613     return ($new_todos);
2614     },
2615     };
2616    
2617     $Element->{$HTML_NS}->{caption} = {
2618     attrs_checker => $GetHTMLAttrsChecker->({}),
2619 wakaba 1.13 checker => $HTMLStrictlyInlineChecker,
2620 wakaba 1.1 };
2621    
2622     $Element->{$HTML_NS}->{colgroup} = {
2623     attrs_checker => $GetHTMLAttrsChecker->({
2624     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2625     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
2626     ## TODO: "attribute not supported" if |col|.
2627     ## ISSUE: MUST NOT if any |col|?
2628     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
2629     }),
2630     checker => sub {
2631     my ($self, $todo) = @_;
2632     my $el = $todo->{node};
2633     my $new_todos = [];
2634     my @nodes = (@{$el->child_nodes});
2635    
2636     while (@nodes) {
2637     my $node = shift @nodes;
2638     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2639    
2640     my $nt = $node->node_type;
2641     if ($nt == 1) {
2642     my $node_ns = $node->namespace_uri;
2643     $node_ns = '' unless defined $node_ns;
2644     my $node_ln = $node->manakai_local_name;
2645     ## NOTE: |minuses| list is not checked since redundant
2646 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2647     #
2648     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'col')) {
2649 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
2650     }
2651     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2652     unshift @nodes, @$sib;
2653     push @$new_todos, @$ch;
2654     } elsif ($nt == 3 or $nt == 4) {
2655     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2656     $self->{onerror}->(node => $node, type => 'character not allowed');
2657     }
2658     } elsif ($nt == 5) {
2659     unshift @nodes, @{$node->child_nodes};
2660     }
2661     }
2662     return ($new_todos);
2663     },
2664     };
2665    
2666     $Element->{$HTML_NS}->{col} = {
2667     attrs_checker => $GetHTMLAttrsChecker->({
2668     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2669     }),
2670     checker => $HTMLEmptyChecker,
2671     };
2672    
2673     $Element->{$HTML_NS}->{tbody} = {
2674     attrs_checker => $GetHTMLAttrsChecker->({}),
2675     checker => sub {
2676     my ($self, $todo) = @_;
2677     my $el = $todo->{node};
2678     my $new_todos = [];
2679     my @nodes = (@{$el->child_nodes});
2680    
2681     my $has_tr;
2682     while (@nodes) {
2683     my $node = shift @nodes;
2684     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2685    
2686     my $nt = $node->node_type;
2687     if ($nt == 1) {
2688     my $node_ns = $node->namespace_uri;
2689     $node_ns = '' unless defined $node_ns;
2690     my $node_ln = $node->manakai_local_name;
2691     ## NOTE: |minuses| list is not checked since redundant
2692 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2693     #
2694     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2695 wakaba 1.1 $has_tr = 1;
2696     } else {
2697     $self->{onerror}->(node => $node, type => 'element not allowed');
2698     }
2699     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2700     unshift @nodes, @$sib;
2701     push @$new_todos, @$ch;
2702     } elsif ($nt == 3 or $nt == 4) {
2703     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2704     $self->{onerror}->(node => $node, type => 'character not allowed');
2705     }
2706     } elsif ($nt == 5) {
2707     unshift @nodes, @{$node->child_nodes};
2708     }
2709     }
2710     unless ($has_tr) {
2711     $self->{onerror}->(node => $el, type => 'child element missing:tr');
2712     }
2713     return ($new_todos);
2714     },
2715     };
2716    
2717     $Element->{$HTML_NS}->{thead} = {
2718     attrs_checker => $GetHTMLAttrsChecker->({}),
2719     checker => $Element->{$HTML_NS}->{tbody}->{checker},
2720     };
2721    
2722     $Element->{$HTML_NS}->{tfoot} = {
2723     attrs_checker => $GetHTMLAttrsChecker->({}),
2724     checker => $Element->{$HTML_NS}->{tbody}->{checker},
2725     };
2726    
2727     $Element->{$HTML_NS}->{tr} = {
2728     attrs_checker => $GetHTMLAttrsChecker->({}),
2729     checker => sub {
2730     my ($self, $todo) = @_;
2731     my $el = $todo->{node};
2732     my $new_todos = [];
2733     my @nodes = (@{$el->child_nodes});
2734    
2735     my $has_td;
2736     while (@nodes) {
2737     my $node = shift @nodes;
2738     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2739    
2740     my $nt = $node->node_type;
2741     if ($nt == 1) {
2742     my $node_ns = $node->namespace_uri;
2743     $node_ns = '' unless defined $node_ns;
2744     my $node_ln = $node->manakai_local_name;
2745     ## NOTE: |minuses| list is not checked since redundant
2746 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2747     #
2748     } elsif ($node_ns eq $HTML_NS and
2749     ($node_ln eq 'td' or $node_ln eq 'th')) {
2750 wakaba 1.1 $has_td = 1;
2751     } else {
2752     $self->{onerror}->(node => $node, type => 'element not allowed');
2753     }
2754     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2755     unshift @nodes, @$sib;
2756     push @$new_todos, @$ch;
2757     } elsif ($nt == 3 or $nt == 4) {
2758     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2759     $self->{onerror}->(node => $node, type => 'character not allowed');
2760     }
2761     } elsif ($nt == 5) {
2762     unshift @nodes, @{$node->child_nodes};
2763     }
2764     }
2765     unless ($has_td) {
2766     $self->{onerror}->(node => $el, type => 'child element missing:td|th');
2767     }
2768     return ($new_todos);
2769     },
2770     };
2771    
2772     $Element->{$HTML_NS}->{td} = {
2773     attrs_checker => $GetHTMLAttrsChecker->({
2774     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2775     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2776     }),
2777     checker => $HTMLBlockOrInlineChecker,
2778     };
2779    
2780     $Element->{$HTML_NS}->{th} = {
2781     attrs_checker => $GetHTMLAttrsChecker->({
2782     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2783     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2784     scope => $GetHTMLEnumeratedAttrChecker
2785     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
2786     }),
2787     checker => $HTMLBlockOrInlineChecker,
2788     };
2789    
2790     ## TODO: forms
2791 wakaba 1.8 ## TODO: Tests for <nest/> in form elements
2792 wakaba 1.1
2793     $Element->{$HTML_NS}->{script} = {
2794 wakaba 1.9 attrs_checker => $GetHTMLAttrsChecker->({
2795 wakaba 1.1 src => $HTMLURIAttrChecker,
2796     defer => $GetHTMLBooleanAttrChecker->('defer'),
2797     async => $GetHTMLBooleanAttrChecker->('async'),
2798     type => $HTMLIMTAttrChecker,
2799 wakaba 1.9 }),
2800 wakaba 1.1 checker => sub {
2801     my ($self, $todo) = @_;
2802    
2803     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2804     return $HTMLEmptyChecker->($self, $todo);
2805     } else {
2806     ## NOTE: No content model conformance in HTML5 spec.
2807     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
2808     my $language = $todo->{node}->get_attribute_ns (undef, 'language');
2809     if ((defined $type and $type eq '') or
2810     (defined $language and $language eq '')) {
2811     $type = 'text/javascript';
2812     } elsif (defined $type) {
2813     #
2814     } elsif (defined $language) {
2815     $type = 'text/' . $language;
2816     } else {
2817     $type = 'text/javascript';
2818     }
2819     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
2820     type => 'script:'.$type); ## TODO: $type normalization
2821     return $AnyChecker->($self, $todo);
2822     }
2823     },
2824     };
2825    
2826     ## NOTE: When script is disabled.
2827     $Element->{$HTML_NS}->{noscript} = {
2828 wakaba 1.3 attrs_checker => sub {
2829     my ($self, $todo) = @_;
2830    
2831     ## NOTE: This check is inserted in |attrs_checker|, rather than |checker|,
2832     ## since the later is not invoked when the |noscript| is used as a
2833     ## transparent element.
2834     unless ($todo->{node}->owner_document->manakai_is_html) {
2835     $self->{onerror}->(node => $todo->{node}, type => 'in XML:noscript');
2836     }
2837    
2838     $GetHTMLAttrsChecker->({})->($self, $todo);
2839     },
2840 wakaba 1.1 checker => sub {
2841     my ($self, $todo) = @_;
2842    
2843 wakaba 1.3 if ($todo->{flag}->{in_head}) {
2844     my $new_todos = [];
2845     my @nodes = (@{$todo->{node}->child_nodes});
2846    
2847     while (@nodes) {
2848     my $node = shift @nodes;
2849     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2850    
2851     my $nt = $node->node_type;
2852     if ($nt == 1) {
2853     my $node_ns = $node->namespace_uri;
2854     $node_ns = '' unless defined $node_ns;
2855     my $node_ln = $node->manakai_local_name;
2856 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2857     #
2858     } elsif ($node_ns eq $HTML_NS) {
2859 wakaba 1.3 if ({link => 1, style => 1}->{$node_ln}) {
2860     #
2861     } elsif ($node_ln eq 'meta') {
2862 wakaba 1.5 if ($node->has_attribute_ns (undef, 'name')) {
2863     #
2864 wakaba 1.3 } else {
2865 wakaba 1.5 $self->{onerror}->(node => $node,
2866     type => 'element not allowed');
2867 wakaba 1.3 }
2868     } else {
2869     $self->{onerror}->(node => $node, type => 'element not allowed');
2870     }
2871     } else {
2872     $self->{onerror}->(node => $node, type => 'element not allowed');
2873     }
2874    
2875     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2876     unshift @nodes, @$sib;
2877     push @$new_todos, @$ch;
2878     } elsif ($nt == 3 or $nt == 4) {
2879     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2880     $self->{onerror}->(node => $node, type => 'character not allowed');
2881     }
2882     } elsif ($nt == 5) {
2883     unshift @nodes, @{$node->child_nodes};
2884     }
2885     }
2886     return ($new_todos);
2887     } else {
2888     my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}});
2889     my ($sib, $ch) = $HTMLBlockOrInlineChecker->($self, $todo);
2890     push @$sib, $end;
2891     return ($sib, $ch);
2892     }
2893 wakaba 1.1 },
2894     };
2895 wakaba 1.3
2896     ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
2897 wakaba 1.1
2898     $Element->{$HTML_NS}->{'event-source'} = {
2899     attrs_checker => $GetHTMLAttrsChecker->({
2900     src => $HTMLURIAttrChecker,
2901     }),
2902     checker => $HTMLEmptyChecker,
2903     };
2904    
2905     $Element->{$HTML_NS}->{details} = {
2906     attrs_checker => $GetHTMLAttrsChecker->({
2907     open => $GetHTMLBooleanAttrChecker->('open'),
2908     }),
2909     checker => sub {
2910     my ($self, $todo) = @_;
2911    
2912     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2913     my ($sib, $ch)
2914     = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
2915     ->($self, $todo);
2916     push @$sib, $end;
2917     return ($sib, $ch);
2918     },
2919     };
2920    
2921     $Element->{$HTML_NS}->{datagrid} = {
2922     attrs_checker => $GetHTMLAttrsChecker->({
2923     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2924     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
2925     }),
2926     checker => sub {
2927     my ($self, $todo) = @_;
2928     my $el = $todo->{node};
2929     my $new_todos = [];
2930     my @nodes = (@{$el->child_nodes});
2931    
2932     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2933    
2934     ## Block-table Block* | table | select | datalist | Empty
2935     my $mode = 'any';
2936     while (@nodes) {
2937     my $node = shift @nodes;
2938     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2939    
2940     my $nt = $node->node_type;
2941     if ($nt == 1) {
2942     my $node_ns = $node->namespace_uri;
2943     $node_ns = '' unless defined $node_ns;
2944     my $node_ln = $node->manakai_local_name;
2945     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
2946 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2947     #
2948     } elsif ($mode eq 'block') {
2949 wakaba 1.1 $not_allowed = 1
2950     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
2951     } elsif ($mode eq 'any') {
2952     if ($node_ns eq $HTML_NS and
2953     {table => 1, select => 1, datalist => 1}->{$node_ln}) {
2954     $mode = 'none';
2955     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
2956     $mode = 'block';
2957     } else {
2958     $not_allowed = 1;
2959     }
2960     } else {
2961     $not_allowed = 1;
2962     }
2963     $self->{onerror}->(node => $node, type => 'element not allowed')
2964     if $not_allowed;
2965     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2966     unshift @nodes, @$sib;
2967     push @$new_todos, @$ch;
2968     } elsif ($nt == 3 or $nt == 4) {
2969     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2970     $self->{onerror}->(node => $node, type => 'character not allowed');
2971     }
2972     } elsif ($nt == 5) {
2973     unshift @nodes, @{$node->child_nodes};
2974     }
2975     }
2976    
2977     push @$new_todos, $end;
2978     return ($new_todos);
2979     },
2980     };
2981    
2982     $Element->{$HTML_NS}->{command} = {
2983     attrs_checker => $GetHTMLAttrsChecker->({
2984     checked => $GetHTMLBooleanAttrChecker->('checked'),
2985     default => $GetHTMLBooleanAttrChecker->('default'),
2986     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2987     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
2988     icon => $HTMLURIAttrChecker,
2989     label => sub { }, ## NOTE: No conformance creteria
2990     radiogroup => sub { }, ## NOTE: No conformance creteria
2991     ## NOTE: |title| has special semantics, but no syntactical difference
2992     type => sub {
2993     my ($self, $attr) = @_;
2994     my $value = $attr->value;
2995     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
2996     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
2997     }
2998     },
2999     }),
3000     checker => $HTMLEmptyChecker,
3001     };
3002    
3003     $Element->{$HTML_NS}->{menu} = {
3004     attrs_checker => $GetHTMLAttrsChecker->({
3005     autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
3006     id => sub {
3007     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
3008     my ($self, $attr) = @_;
3009     my $value = $attr->value;
3010     if (length $value > 0) {
3011     if ($self->{id}->{$value}) {
3012     $self->{onerror}->(node => $attr, type => 'duplicate ID');
3013     push @{$self->{id}->{$value}}, $attr;
3014     } else {
3015     $self->{id}->{$value} = [$attr];
3016     }
3017     } else {
3018     ## NOTE: MUST contain at least one character
3019     $self->{onerror}->(node => $attr, type => 'empty attribute value');
3020     }
3021     if ($value =~ /[\x09-\x0D\x20]/) {
3022     $self->{onerror}->(node => $attr, type => 'space in ID');
3023     }
3024     $self->{menu}->{$value} ||= $attr;
3025     ## ISSUE: <menu id=""><p contextmenu=""> match?
3026     },
3027     label => sub { }, ## NOTE: No conformance creteria
3028     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
3029     }),
3030     checker => sub {
3031     my ($self, $todo) = @_;
3032     my $el = $todo->{node};
3033     my $new_todos = [];
3034     my @nodes = (@{$el->child_nodes});
3035    
3036     my $content = 'li or inline';
3037     while (@nodes) {
3038     my $node = shift @nodes;
3039     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3040    
3041     my $nt = $node->node_type;
3042     if ($nt == 1) {
3043     my $node_ns = $node->namespace_uri;
3044     $node_ns = '' unless defined $node_ns;
3045     my $node_ln = $node->manakai_local_name;
3046     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
3047 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3048     #
3049     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'li') {
3050 wakaba 1.1 if ($content eq 'inline') {
3051     $not_allowed = 1;
3052     } elsif ($content eq 'li or inline') {
3053     $content = 'li';
3054     }
3055     } else {
3056     if ($HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
3057     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln}) {
3058     $content = 'inline';
3059     } else {
3060     $not_allowed = 1;
3061     }
3062     }
3063     $self->{onerror}->(node => $node, type => 'element not allowed')
3064     if $not_allowed;
3065     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3066     unshift @nodes, @$sib;
3067     push @$new_todos, @$ch;
3068     } elsif ($nt == 3 or $nt == 4) {
3069     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3070     if ($content eq 'li') {
3071     $self->{onerror}->(node => $node, type => 'character not allowed');
3072     } elsif ($content eq 'li or inline') {
3073     $content = 'inline';
3074     }
3075     }
3076     } elsif ($nt == 5) {
3077     unshift @nodes, @{$node->child_nodes};
3078     }
3079     }
3080    
3081     for (@$new_todos) {
3082     $_->{inline} = 1;
3083     }
3084     return ($new_todos);
3085     },
3086 wakaba 1.8 };
3087    
3088     $Element->{$HTML_NS}->{datatemplate} = {
3089     attrs_checker => $GetHTMLAttrsChecker->({}),
3090     checker => sub {
3091     my ($self, $todo) = @_;
3092     my $el = $todo->{node};
3093     my $new_todos = [];
3094     my @nodes = (@{$el->child_nodes});
3095    
3096     while (@nodes) {
3097     my $node = shift @nodes;
3098     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3099    
3100     my $nt = $node->node_type;
3101     if ($nt == 1) {
3102     my $node_ns = $node->namespace_uri;
3103     $node_ns = '' unless defined $node_ns;
3104     my $node_ln = $node->manakai_local_name;
3105     ## NOTE: |minuses| list is not checked since redundant
3106     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3107     #
3108     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'rule')) {
3109     $self->{onerror}->(node => $node,
3110     type => 'element not allowed:datatemplate');
3111     }
3112     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3113     unshift @nodes, @$sib;
3114     push @$new_todos, @$ch;
3115     } elsif ($nt == 3 or $nt == 4) {
3116     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3117     $self->{onerror}->(node => $node, type => 'character not allowed');
3118     }
3119     } elsif ($nt == 5) {
3120     unshift @nodes, @{$node->child_nodes};
3121     }
3122     }
3123     return ($new_todos);
3124     },
3125     is_xml_root => 1,
3126     };
3127    
3128     $Element->{$HTML_NS}->{rule} = {
3129     attrs_checker => $GetHTMLAttrsChecker->({
3130     ## TODO: |condition| attribute
3131     mode => $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker,
3132     }),
3133     checker => sub {
3134     my ($self, $todo) = @_;
3135    
3136     my $end = $self->_add_pluses ({$HTML_NS => {nest => 1}});
3137     my ($sib, $ch) = $AnyChecker->($self, $todo);
3138     push @$sib, $end;
3139     return ($sib, $ch);
3140     },
3141     ## NOTE: "MAY be anything that, when the parent |datatemplate|
3142     ## is applied to some conforming data, results in a conforming DOM tree.":
3143     ## We don't check against this.
3144     };
3145    
3146     $Element->{$HTML_NS}->{nest} = {
3147     attrs_checker => $GetHTMLAttrsChecker->({
3148     ## TODO: |filter| attribute
3149     mode => $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker,
3150     }),
3151     checker => $HTMLEmptyChecker,
3152 wakaba 1.1 };
3153    
3154     $Element->{$HTML_NS}->{legend} = {
3155     attrs_checker => $GetHTMLAttrsChecker->({}),
3156     checker => sub {
3157     my ($self, $todo) = @_;
3158    
3159     my $parent = $todo->{node}->manakai_parent_element;
3160     if (defined $parent) {
3161     my $nsuri = $parent->namespace_uri;
3162     $nsuri = '' unless defined $nsuri;
3163     my $ln = $parent->manakai_local_name;
3164     if ($nsuri eq $HTML_NS and $ln eq 'figure') {
3165     return $HTMLInlineChecker->($self, $todo);
3166     } else {
3167 wakaba 1.13 return $HTMLStrictlyInlineChecker->($self, $todo);
3168 wakaba 1.1 }
3169     } else {
3170     return $HTMLInlineChecker->($self, $todo);
3171     }
3172    
3173     ## ISSUE: Content model is defined only for fieldset/legend,
3174     ## details/legend, and figure/legend.
3175     },
3176     };
3177    
3178     $Element->{$HTML_NS}->{div} = {
3179     attrs_checker => $GetHTMLAttrsChecker->({}),
3180     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
3181     };
3182    
3183     $Element->{$HTML_NS}->{font} = {
3184     attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
3185     checker => $HTMLTransparentChecker,
3186     };
3187    
3188     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
3189    
3190     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24