/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.16 - (hide annotations) (download)
Sun Nov 4 04:34:30 2007 UTC (17 years, 8 months ago) by wakaba
Branch: MAIN
Changes since 1.15: +1 -0 lines
++ whatpm/t/ChangeLog	4 Nov 2007 04:31:01 -0000
	* content-model-2.dat: Tests for |manifest=""| are added.

2007-11-04  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ContentChecker/ChangeLog	4 Nov 2007 04:32:03 -0000
	* HTML.pm: Allow <html manifest=URI>.

2007-11-04  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7     my $HTMLMetadataElements = {
8     $HTML_NS => {
9     qw/link 1 meta 1 style 1 script 1 event-source 1 command 1 base 1 title 1
10 wakaba 1.8 noscript 1 datatemplate 1
11 wakaba 1.1 /,
12     },
13     };
14    
15     my $HTMLSectioningElements = {
16     $HTML_NS => {qw/body 1 section 1 nav 1 article 1 blockquote 1 aside 1/},
17     };
18    
19     my $HTMLBlockLevelElements = {
20     $HTML_NS => {
21     qw/
22     section 1 nav 1 article 1 blockquote 1 aside 1
23     h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1
24     address 1 p 1 hr 1 dialog 1 pre 1 ol 1 ul 1 dl 1
25     ins 1 del 1 figure 1 map 1 table 1 script 1 noscript 1
26     event-source 1 details 1 datagrid 1 menu 1 div 1 font 1
27 wakaba 1.8 datatemplate 1
28 wakaba 1.1 /,
29     },
30     };
31    
32     my $HTMLStrictlyInlineLevelElements = {
33     $HTML_NS => {
34     qw/
35     br 1 a 1 q 1 cite 1 em 1 strong 1 small 1 m 1 dfn 1 abbr 1
36     time 1 meter 1 progress 1 code 1 var 1 samp 1 kbd 1
37     sub 1 sup 1 span 1 i 1 b 1 bdo 1 ins 1 del 1 img 1
38     iframe 1 embed 1 object 1 video 1 audio 1 canvas 1 area 1
39     script 1 noscript 1 event-source 1 command 1 font 1
40     /,
41     },
42     };
43    
44     my $HTMLStructuredInlineLevelElements = {
45     $HTML_NS => {qw/blockquote 1 pre 1 ol 1 ul 1 dl 1 table 1 menu 1/},
46     };
47    
48     my $HTMLInteractiveElements = {
49     $HTML_NS => {a => 1, details => 1, datagrid => 1},
50     };
51     ## NOTE: |html:a| and |html:datagrid| are not allowed as a descendant
52     ## of interactive elements
53    
54     # my $HTMLTransparentElements : in |Whatpm/ContentChecker.pm|.
55    
56     #my $HTMLSemiTransparentElements = {
57     # $HTML_NS => {qw/video 1 audio 1/},
58     #};
59    
60     my $HTMLEmbededElements = {
61     $HTML_NS => {qw/img 1 iframe 1 embed 1 object 1 video 1 audio 1 canvas 1/},
62     };
63    
64     ## Empty
65     my $HTMLEmptyChecker = sub {
66     my ($self, $todo) = @_;
67     my $el = $todo->{node};
68     my $new_todos = [];
69     my @nodes = (@{$el->child_nodes});
70    
71     while (@nodes) {
72     my $node = shift @nodes;
73     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
74    
75     my $nt = $node->node_type;
76     if ($nt == 1) {
77 wakaba 1.8 my $node_ns = $node->namespace_uri;
78     $node_ns = '' unless defined $node_ns;
79     my $node_ln = $node->manakai_local_name;
80     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
81     #
82     } else {
83     ## NOTE: |minuses| list is not checked since redundant
84     $self->{onerror}->(node => $node, type => 'element not allowed');
85     }
86 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
87     unshift @nodes, @$sib;
88     push @$new_todos, @$ch;
89     } elsif ($nt == 3 or $nt == 4) {
90     if ($node->data =~ /[^\x09-\x0D\x20]/) {
91     $self->{onerror}->(node => $node, type => 'character not allowed');
92     }
93     } elsif ($nt == 5) {
94     unshift @nodes, @{$node->child_nodes};
95     }
96     }
97     return ($new_todos);
98     };
99    
100     ## Text
101     my $HTMLTextChecker = sub {
102     my ($self, $todo) = @_;
103     my $el = $todo->{node};
104     my $new_todos = [];
105     my @nodes = (@{$el->child_nodes});
106    
107     while (@nodes) {
108     my $node = shift @nodes;
109     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
110    
111     my $nt = $node->node_type;
112     if ($nt == 1) {
113 wakaba 1.8 my $node_ns = $node->namespace_uri;
114     $node_ns = '' unless defined $node_ns;
115     my $node_ln = $node->manakai_local_name;
116     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
117     #
118     } else {
119     ## NOTE: |minuses| list is not checked since redundant
120     $self->{onerror}->(node => $node, type => 'element not allowed');
121     }
122 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
123     unshift @nodes, @$sib;
124     push @$new_todos, @$ch;
125     } elsif ($nt == 5) {
126     unshift @nodes, @{$node->child_nodes};
127     }
128     }
129     return ($new_todos);
130     };
131    
132     ## Zero or more |html:style| elements,
133     ## followed by zero or more block-level elements
134     my $HTMLStylableBlockChecker = sub {
135     my ($self, $todo) = @_;
136     my $el = $todo->{node};
137     my $new_todos = [];
138     my @nodes = (@{$el->child_nodes});
139    
140     my $has_non_style;
141     while (@nodes) {
142     my $node = shift @nodes;
143     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
144    
145     my $nt = $node->node_type;
146     if ($nt == 1) {
147     my $node_ns = $node->namespace_uri;
148     $node_ns = '' unless defined $node_ns;
149     my $node_ln = $node->manakai_local_name;
150     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
151     if ($node_ns eq $HTML_NS and $node_ln eq 'style') {
152     $not_allowed = 1 if $has_non_style or
153     not $node->has_attribute_ns (undef, 'scoped');
154     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
155     $has_non_style = 1;
156 wakaba 1.8 } elsif ($self->{pluses}->{$node_ns}->{$node_ln}) {
157     #
158 wakaba 1.1 } else {
159     $has_non_style = 1;
160     $not_allowed = 1;
161     }
162     $self->{onerror}->(node => $node, type => 'element not allowed')
163     if $not_allowed;
164     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
165     unshift @nodes, @$sib;
166     push @$new_todos, @$ch;
167     } elsif ($nt == 3 or $nt == 4) {
168     if ($node->data =~ /[^\x09-\x0D\x20]/) {
169     $self->{onerror}->(node => $node, type => 'character not allowed');
170     }
171     } elsif ($nt == 5) {
172     unshift @nodes, @{$node->child_nodes};
173     }
174     }
175     return ($new_todos);
176     }; # $HTMLStylableBlockChecker
177    
178     ## Zero or more block-level elements
179     my $HTMLBlockChecker = sub {
180     my ($self, $todo) = @_;
181     my $el = $todo->{node};
182     my $new_todos = [];
183     my @nodes = (@{$el->child_nodes});
184    
185     while (@nodes) {
186     my $node = shift @nodes;
187     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
188    
189     my $nt = $node->node_type;
190     if ($nt == 1) {
191     my $node_ns = $node->namespace_uri;
192     $node_ns = '' unless defined $node_ns;
193     my $node_ln = $node->manakai_local_name;
194     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
195     $not_allowed = 1
196 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
197     $self->{pluses}->{$node_ns}->{$node_ln};
198 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
199     if $not_allowed;
200     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
201     unshift @nodes, @$sib;
202     push @$new_todos, @$ch;
203     } elsif ($nt == 3 or $nt == 4) {
204     if ($node->data =~ /[^\x09-\x0D\x20]/) {
205     $self->{onerror}->(node => $node, type => 'character not allowed');
206     }
207     } elsif ($nt == 5) {
208     unshift @nodes, @{$node->child_nodes};
209     }
210     }
211     return ($new_todos);
212     }; # $HTMLBlockChecker
213    
214     ## Inline-level content
215     my $HTMLInlineChecker = sub {
216     my ($self, $todo) = @_;
217     my $el = $todo->{node};
218     my $new_todos = [];
219     my @nodes = (@{$el->child_nodes});
220    
221     while (@nodes) {
222     my $node = shift @nodes;
223     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
224    
225     my $nt = $node->node_type;
226     if ($nt == 1) {
227     my $node_ns = $node->namespace_uri;
228     $node_ns = '' unless defined $node_ns;
229     my $node_ln = $node->manakai_local_name;
230     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
231     $not_allowed = 1
232     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
233 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
234     $self->{pluses}->{$node_ns}->{$node_ln};
235 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
236     if $not_allowed;
237     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
238     unshift @nodes, @$sib;
239     push @$new_todos, @$ch;
240     } elsif ($nt == 5) {
241     unshift @nodes, @{$node->child_nodes};
242     }
243     }
244    
245     for (@$new_todos) {
246     $_->{inline} = 1;
247     }
248     return ($new_todos);
249     }; # $HTMLInlineChecker
250    
251     ## Strictly inline-level content
252     my $HTMLStrictlyInlineChecker = sub {
253     my ($self, $todo) = @_;
254     my $el = $todo->{node};
255     my $new_todos = [];
256     my @nodes = (@{$el->child_nodes});
257    
258     while (@nodes) {
259     my $node = shift @nodes;
260     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
261    
262     my $nt = $node->node_type;
263     if ($nt == 1) {
264     my $node_ns = $node->namespace_uri;
265     $node_ns = '' unless defined $node_ns;
266     my $node_ln = $node->manakai_local_name;
267     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
268     $not_allowed = 1
269 wakaba 1.8 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
270     $self->{pluses}->{$node_ns}->{$node_ln};
271 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
272     if $not_allowed;
273     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
274     unshift @nodes, @$sib;
275     push @$new_todos, @$ch;
276     } elsif ($nt == 5) {
277     unshift @nodes, @{$node->child_nodes};
278     }
279     }
280    
281     for (@$new_todos) {
282     $_->{inline} = 1;
283     $_->{strictly_inline} = 1;
284     }
285     return ($new_todos);
286     }; # $HTMLStrictlyInlineChecker
287    
288 wakaba 1.8 ## Inline-level or strictly inline-level content
289 wakaba 1.1 my $HTMLInlineOrStrictlyInlineChecker = sub {
290     my ($self, $todo) = @_;
291     my $el = $todo->{node};
292     my $new_todos = [];
293     my @nodes = (@{$el->child_nodes});
294    
295     while (@nodes) {
296     my $node = shift @nodes;
297     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
298    
299     my $nt = $node->node_type;
300     if ($nt == 1) {
301     my $node_ns = $node->namespace_uri;
302     $node_ns = '' unless defined $node_ns;
303     my $node_ln = $node->manakai_local_name;
304     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
305     if ($todo->{strictly_inline}) {
306     $not_allowed = 1
307 wakaba 1.8 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
308     $self->{pluses}->{$node_ns}->{$node_ln};
309 wakaba 1.1 } else {
310     $not_allowed = 1
311     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
312 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
313     $self->{pluses}->{$node_ns}->{$node_ln};
314 wakaba 1.1 }
315     $self->{onerror}->(node => $node, type => 'element not allowed')
316     if $not_allowed;
317     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
318     unshift @nodes, @$sib;
319     push @$new_todos, @$ch;
320     } elsif ($nt == 5) {
321     unshift @nodes, @{$node->child_nodes};
322     }
323     }
324    
325     for (@$new_todos) {
326     $_->{inline} = 1;
327     $_->{strictly_inline} = 1;
328     }
329     return ($new_todos);
330     }; # $HTMLInlineOrStrictlyInlineChecker
331    
332 wakaba 1.13 ## TODO: New "significant" SHOULD-level requirement (revision 1114).
333 wakaba 1.1
334 wakaba 1.8 ## Block-level content or inline-level content (i.e. bimorphic content model)
335 wakaba 1.1 my $HTMLBlockOrInlineChecker = sub {
336     my ($self, $todo) = @_;
337     my $el = $todo->{node};
338     my $new_todos = [];
339     my @nodes = (@{$el->child_nodes});
340    
341     my $content = 'block-or-inline'; # or 'block' or 'inline'
342     my @block_not_inline;
343     while (@nodes) {
344     my $node = shift @nodes;
345     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
346    
347 wakaba 1.8 ## ISSUE: It is unclear whether "<rule><div><p/><nest/></div></rule>"
348     ## is conforming or not.
349    
350 wakaba 1.1 my $nt = $node->node_type;
351     if ($nt == 1) {
352     my $node_ns = $node->namespace_uri;
353     $node_ns = '' unless defined $node_ns;
354     my $node_ln = $node->manakai_local_name;
355     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
356     if ($content eq 'block') {
357     $not_allowed = 1
358 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
359     $self->{pluses}->{$node_ns}->{$node_ln};
360 wakaba 1.1 } elsif ($content eq 'inline') {
361     $not_allowed = 1
362     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
363 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
364     $self->{pluses}->{$node_ns}->{$node_ln};
365 wakaba 1.1 } else {
366     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
367     my $is_inline
368     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
369     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
370    
371     push @block_not_inline, $node
372     if $is_block and not $is_inline and not $not_allowed;
373 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
374 wakaba 1.1 $content = 'inline';
375     for (@block_not_inline) {
376     $self->{onerror}->(node => $_, type => 'element not allowed');
377     }
378     $not_allowed = 1 unless $is_inline;
379     }
380     }
381     $self->{onerror}->(node => $node, type => 'element not allowed')
382     if $not_allowed;
383     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
384     unshift @nodes, @$sib;
385     push @$new_todos, @$ch;
386     } elsif ($nt == 3 or $nt == 4) {
387     if ($node->data =~ /[^\x09-\x0D\x20]/) {
388     if ($content eq 'block') {
389     $self->{onerror}->(node => $node, type => 'character not allowed');
390     } else {
391     $content = 'inline';
392     for (@block_not_inline) {
393     $self->{onerror}->(node => $_, type => 'element not allowed');
394     }
395     }
396     }
397     } elsif ($nt == 5) {
398     unshift @nodes, @{$node->child_nodes};
399     }
400     }
401    
402     if ($content eq 'inline') {
403     for (@$new_todos) {
404     $_->{inline} = 1;
405     }
406     }
407     return ($new_todos);
408     };
409    
410     ## Zero or more XXX element, then either block-level or inline-level
411     my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) {
412     my ($elnsuri, $ellname) = @_;
413     return sub {
414     my ($self, $todo) = @_;
415     my $el = $todo->{node};
416     my $new_todos = [];
417     my @nodes = (@{$el->child_nodes});
418    
419     my $has_non_style;
420     my $content = 'block-or-inline'; # or 'block' or 'inline'
421     my @block_not_inline;
422     while (@nodes) {
423     my $node = shift @nodes;
424     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
425    
426     my $nt = $node->node_type;
427     if ($nt == 1) {
428     my $node_ns = $node->namespace_uri;
429     $node_ns = '' unless defined $node_ns;
430     my $node_ln = $node->manakai_local_name;
431     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
432     if ($node_ns eq $elnsuri and $node_ln eq $ellname) {
433     $not_allowed = 1 if $has_non_style;
434     if ($ellname eq 'style' and
435     not $node->has_attribute_ns (undef, 'scoped')) {
436     $not_allowed = 1;
437     }
438     } elsif ($content eq 'block') {
439     $has_non_style = 1;
440     $not_allowed = 1
441 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
442     $self->{pluses}->{$node_ns}->{$node_ln};
443 wakaba 1.1 } elsif ($content eq 'inline') {
444     $has_non_style = 1;
445     $not_allowed = 1
446     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
447 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
448     $self->{pluses}->{$node_ns}->{$node_ln};
449 wakaba 1.1 } else {
450 wakaba 1.8 $has_non_style = 1 unless $self->{pluses}->{$node_ns}->{$node_ln};
451 wakaba 1.1 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
452     my $is_inline
453     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
454     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
455    
456     push @block_not_inline, $node
457     if $is_block and not $is_inline and not $not_allowed;
458 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
459 wakaba 1.1 $content = 'inline';
460     for (@block_not_inline) {
461     $self->{onerror}->(node => $_, type => 'element not allowed');
462     }
463     $not_allowed = 1 unless $is_inline;
464     }
465     }
466     $self->{onerror}->(node => $node, type => 'element not allowed')
467     if $not_allowed;
468     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
469     unshift @nodes, @$sib;
470     push @$new_todos, @$ch;
471     } elsif ($nt == 3 or $nt == 4) {
472     if ($node->data =~ /[^\x09-\x0D\x20]/) {
473     $has_non_style = 1;
474     if ($content eq 'block') {
475     $self->{onerror}->(node => $node, type => 'character not allowed');
476     } else {
477     $content = 'inline';
478     for (@block_not_inline) {
479     $self->{onerror}->(node => $_, type => 'element not allowed');
480     }
481     }
482     }
483     } elsif ($nt == 5) {
484     unshift @nodes, @{$node->child_nodes};
485     }
486     }
487    
488     if ($content eq 'inline') {
489     for (@$new_todos) {
490     $_->{inline} = 1;
491     }
492     }
493     return ($new_todos);
494     };
495     }; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker
496    
497     my $HTMLTransparentChecker = $HTMLBlockOrInlineChecker;
498    
499     our $AttrChecker;
500    
501     my $GetHTMLEnumeratedAttrChecker = sub {
502     my $states = shift; # {value => conforming ? 1 : -1}
503     return sub {
504     my ($self, $attr) = @_;
505     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
506     if ($states->{$value} > 0) {
507     #
508     } elsif ($states->{$value}) {
509     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
510     } else {
511     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
512     }
513     };
514     }; # $GetHTMLEnumeratedAttrChecker
515    
516     my $GetHTMLBooleanAttrChecker = sub {
517     my $local_name = shift;
518     return sub {
519     my ($self, $attr) = @_;
520     my $value = $attr->value;
521     unless ($value eq $local_name or $value eq '') {
522     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
523     }
524     };
525     }; # $GetHTMLBooleanAttrChecker
526    
527 wakaba 1.8 ## Unordered set of space-separated tokens
528     my $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker = sub {
529     my ($self, $attr) = @_;
530     my %word;
531     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
532     unless ($word{$word}) {
533     $word{$word} = 1;
534     } else {
535     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
536     }
537     }
538     }; # $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker
539    
540 wakaba 1.1 ## |rel| attribute (unordered set of space separated tokens,
541     ## whose allowed values are defined by the section on link types)
542     my $HTMLLinkTypesAttrChecker = sub {
543 wakaba 1.4 my ($a_or_area, $todo, $self, $attr) = @_;
544 wakaba 1.1 my %word;
545     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
546     unless ($word{$word}) {
547     $word{$word} = 1;
548     } else {
549     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
550     }
551     }
552     ## NOTE: Case sensitive match (since HTML5 spec does not say link
553     ## types are case-insensitive and it says "The value should not
554     ## be confusingly similar to any other defined value (e.g.
555     ## differing only in case).").
556     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
557     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
558     ## values to be used conformingly.
559     require Whatpm::_LinkTypeList;
560     our $LinkType;
561     for my $word (keys %word) {
562     my $def = $LinkType->{$word};
563     if (defined $def) {
564     if ($def->{status} eq 'accepted') {
565     if (defined $def->{effect}->[$a_or_area]) {
566     #
567     } else {
568     $self->{onerror}->(node => $attr,
569     type => 'link type:bad context:'.$word);
570     }
571     } elsif ($def->{status} eq 'proposal') {
572     $self->{onerror}->(node => $attr, level => 's',
573     type => 'link type:proposed:'.$word);
574     } else { # rejected or synonym
575     $self->{onerror}->(node => $attr,
576     type => 'link type:non-conforming:'.$word);
577     }
578 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
579     if ($word eq 'alternate') {
580     #
581     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
582     $todo->{has_hyperlink_link_type} = 1;
583     }
584     }
585 wakaba 1.1 if ($def->{unique}) {
586     unless ($self->{has_link_type}->{$word}) {
587     $self->{has_link_type}->{$word} = 1;
588     } else {
589     $self->{onerror}->(node => $attr,
590     type => 'link type:duplicate:'.$word);
591     }
592     }
593     } else {
594     $self->{onerror}->(node => $attr, level => 'unsupported',
595     type => 'link type:'.$word);
596     }
597     }
598 wakaba 1.4 $todo->{has_hyperlink_link_type} = 1
599     if $word{alternate} and not $word{stylesheet};
600 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
601     ## says that using both X-Pingback: header field and HTML
602     ## <link rel=pingback> is deprecated and if both appears they
603     ## SHOULD contain exactly the same value.
604     ## ISSUE: Pingback 1.0 specification defines the exact representation
605     ## of its link element, which cannot be tested by the current arch.
606     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
607     ## include any string that matches to the pattern for the rel=pingback link,
608     ## which again inpossible to test.
609     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
610 wakaba 1.12
611     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
612     ## ISSUE: <link rel="up up"> is non-conforming, since rel="" is unordered.
613 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
614    
615     ## URI (or IRI)
616     my $HTMLURIAttrChecker = sub {
617     my ($self, $attr) = @_;
618     ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
619     my $value = $attr->value;
620     Whatpm::URIChecker->check_iri_reference ($value, sub {
621     my %opt = @_;
622     $self->{onerror}->(node => $attr, level => $opt{level},
623     type => 'URI::'.$opt{type}.
624     (defined $opt{position} ? ':'.$opt{position} : ''));
625     });
626 wakaba 1.4 $self->{has_uri_attr} = 1;
627 wakaba 1.1 }; # $HTMLURIAttrChecker
628    
629     ## A space separated list of one or more URIs (or IRIs)
630     my $HTMLSpaceURIsAttrChecker = sub {
631     my ($self, $attr) = @_;
632     my $i = 0;
633     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
634     Whatpm::URIChecker->check_iri_reference ($value, sub {
635     my %opt = @_;
636     $self->{onerror}->(node => $attr, level => $opt{level},
637 wakaba 1.2 type => 'URIs:'.':'.
638     $opt{type}.':'.$i.
639 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
640     });
641     $i++;
642     }
643     ## ISSUE: Relative references?
644     ## ISSUE: Leading or trailing white spaces are conformant?
645     ## ISSUE: A sequence of white space characters are conformant?
646     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
647     ## NOTE: Duplication seems not an error.
648 wakaba 1.4 $self->{has_uri_attr} = 1;
649 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
650    
651     my $HTMLDatetimeAttrChecker = sub {
652     my ($self, $attr) = @_;
653     my $value = $attr->value;
654     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
655     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
656     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
657     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
658     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
659     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
660     if $d < 1 or
661     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
662     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
663     if $M == 2 and $d == 29 and
664     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
665     } else {
666     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
667     }
668     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
669     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
670     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
671     if defined $s and $s > 59;
672     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
673     if $zh > 23;
674     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
675     if $zm > 59;
676     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
677     } else {
678     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
679     }
680     }; # $HTMLDatetimeAttrChecker
681    
682     my $HTMLIntegerAttrChecker = sub {
683     my ($self, $attr) = @_;
684     my $value = $attr->value;
685     unless ($value =~ /\A-?[0-9]+\z/) {
686     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
687     }
688     }; # $HTMLIntegerAttrChecker
689    
690     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
691     my $range_check = shift;
692     return sub {
693     my ($self, $attr) = @_;
694     my $value = $attr->value;
695     if ($value =~ /\A[0-9]+\z/) {
696     unless ($range_check->($value + 0)) {
697     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
698     }
699     } else {
700     $self->{onerror}->(node => $attr,
701     type => 'nninteger:syntax error');
702     }
703     };
704     }; # $GetHTMLNonNegativeIntegerAttrChecker
705    
706     my $GetHTMLFloatingPointNumberAttrChecker = sub {
707     my $range_check = shift;
708     return sub {
709     my ($self, $attr) = @_;
710     my $value = $attr->value;
711     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
712     unless ($range_check->($value + 0)) {
713     $self->{onerror}->(node => $attr, type => 'float:out of range');
714     }
715     } else {
716     $self->{onerror}->(node => $attr,
717     type => 'float:syntax error');
718     }
719     };
720     }; # $GetHTMLFloatingPointNumberAttrChecker
721    
722     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
723     ## ISSUE: RFC 2046 does not define syntax of media types.
724     ## ISSUE: The definition of "a valid MIME type" is unknown.
725     ## Syntactical correctness?
726     my $HTMLIMTAttrChecker = sub {
727     my ($self, $attr) = @_;
728     my $value = $attr->value;
729     ## ISSUE: RFC 2045 Content-Type header field allows insertion
730     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
731     ## ISSUE: RFC 2231 extension? Maybe no.
732     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
733     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
734     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
735     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
736     my @type = ($1, $2);
737     my $param = $3;
738     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
739     if (defined $2) {
740     push @type, $1 => $2;
741     } else {
742     my $n = $1;
743     my $v = $2;
744     $v =~ s/\\(.)/$1/gs;
745     push @type, $n => $v;
746     }
747     }
748     require Whatpm::IMTChecker;
749     Whatpm::IMTChecker->check_imt (sub {
750     my %opt = @_;
751     $self->{onerror}->(node => $attr, level => $opt{level},
752     type => 'IMT:'.$opt{type});
753     }, @type);
754     } else {
755     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
756     }
757     }; # $HTMLIMTAttrChecker
758    
759     my $HTMLLanguageTagAttrChecker = sub {
760 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
761    
762 wakaba 1.1 my ($self, $attr) = @_;
763 wakaba 1.6 my $value = $attr->value;
764     require Whatpm::LangTag;
765     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
766     my %opt = @_;
767     my $type = 'LangTag:'.$opt{type};
768     $type .= ':' . $opt{subtag} if defined $opt{subtag};
769     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
770     level => $opt{level});
771     });
772 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
773 wakaba 1.6
774     ## TODO: testdata
775 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
776    
777     ## "A valid media query [MQ]"
778     my $HTMLMQAttrChecker = sub {
779     my ($self, $attr) = @_;
780     $self->{onerror}->(node => $attr, level => 'unsupported',
781     type => 'media query');
782     ## ISSUE: What is "a valid media query"?
783     }; # $HTMLMQAttrChecker
784    
785     my $HTMLEventHandlerAttrChecker = sub {
786     my ($self, $attr) = @_;
787     $self->{onerror}->(node => $attr, level => 'unsupported',
788     type => 'event handler');
789     ## TODO: MUST contain valid ECMAScript code matching the
790     ## ECMAScript |FunctionBody| production. [ECMA262]
791     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
792     ## ISSUE: Automatic semicolon insertion does not apply?
793     ## ISSUE: Other script languages?
794     }; # $HTMLEventHandlerAttrChecker
795    
796     my $HTMLUsemapAttrChecker = sub {
797     my ($self, $attr) = @_;
798     ## MUST be a valid hashed ID reference to a |map| element
799     my $value = $attr->value;
800     if ($value =~ s/^#//) {
801     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
802     push @{$self->{usemap}}, [$value => $attr];
803     } else {
804     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
805     }
806     ## NOTE: Space characters in hashed ID references are conforming.
807     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
808     }; # $HTMLUsemapAttrChecker
809    
810     my $HTMLTargetAttrChecker = sub {
811     my ($self, $attr) = @_;
812     my $value = $attr->value;
813     if ($value =~ /^_/) {
814     $value = lc $value; ## ISSUE: ASCII case-insentitive?
815     unless ({
816     _self => 1, _parent => 1, _top => 1,
817     }->{$value}) {
818     $self->{onerror}->(node => $attr,
819     type => 'reserved browsing context name');
820     }
821     } else {
822     #$ ISSUE: An empty string is conforming?
823     }
824     }; # $HTMLTargetAttrChecker
825    
826     my $HTMLAttrChecker = {
827     id => sub {
828     ## NOTE: |map| has its own variant of |id=""| checker
829     my ($self, $attr) = @_;
830     my $value = $attr->value;
831     if (length $value > 0) {
832     if ($self->{id}->{$value}) {
833     $self->{onerror}->(node => $attr, type => 'duplicate ID');
834     push @{$self->{id}->{$value}}, $attr;
835     } else {
836     $self->{id}->{$value} = [$attr];
837     }
838     if ($value =~ /[\x09-\x0D\x20]/) {
839     $self->{onerror}->(node => $attr, type => 'space in ID');
840     }
841     } else {
842     ## NOTE: MUST contain at least one character
843     $self->{onerror}->(node => $attr, type => 'empty attribute value');
844     }
845     },
846     title => sub {}, ## NOTE: No conformance creteria
847     lang => sub {
848     my ($self, $attr) = @_;
849 wakaba 1.6 my $value = $attr->value;
850     if ($value eq '') {
851     #
852     } else {
853     require Whatpm::LangTag;
854     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
855     my %opt = @_;
856     my $type = 'LangTag:'.$opt{type};
857     $type .= ':' . $opt{subtag} if defined $opt{subtag};
858     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
859     level => $opt{level});
860     });
861     }
862 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
863     unless ($attr->owner_document->manakai_is_html) {
864     $self->{onerror}->(node => $attr, type => 'in XML:lang');
865     }
866 wakaba 1.6
867     ## TODO: test data
868 wakaba 1.1 },
869     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
870     class => sub {
871     my ($self, $attr) = @_;
872     my %word;
873     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
874     unless ($word{$word}) {
875     $word{$word} = 1;
876     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
877     } else {
878     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
879     }
880     }
881     },
882     contextmenu => sub {
883     my ($self, $attr) = @_;
884     my $value = $attr->value;
885     push @{$self->{contextmenu}}, [$value => $attr];
886     ## ISSUE: "The value must be the ID of a menu element in the DOM."
887     ## What is "in the DOM"? A menu Element node that is not part
888     ## of the Document tree is in the DOM? A menu Element node that
889     ## belong to another Document tree is in the DOM?
890     },
891     irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
892 wakaba 1.8 tabindex => $HTMLIntegerAttrChecker
893     ## TODO: ref, template, registrationmark
894 wakaba 1.1 };
895    
896     for (qw/
897     onabort onbeforeunload onblur onchange onclick oncontextmenu
898     ondblclick ondrag ondragend ondragenter ondragleave ondragover
899     ondragstart ondrop onerror onfocus onkeydown onkeypress
900     onkeyup onload onmessage onmousedown onmousemove onmouseout
901     onmouseover onmouseup onmousewheel onresize onscroll onselect
902     onsubmit onunload
903     /) {
904     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
905     }
906    
907     my $GetHTMLAttrsChecker = sub {
908     my $element_specific_checker = shift;
909     return sub {
910     my ($self, $todo) = @_;
911     for my $attr (@{$todo->{node}->attributes}) {
912     my $attr_ns = $attr->namespace_uri;
913     $attr_ns = '' unless defined $attr_ns;
914     my $attr_ln = $attr->manakai_local_name;
915     my $checker;
916     if ($attr_ns eq '') {
917     $checker = $element_specific_checker->{$attr_ln}
918     || $HTMLAttrChecker->{$attr_ln};
919     }
920     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
921     || $AttrChecker->{$attr_ns}->{''};
922     if ($checker) {
923     $checker->($self, $attr, $todo);
924     } else {
925     $self->{onerror}->(node => $attr, level => 'unsupported',
926     type => 'attribute');
927     ## ISSUE: No comformance createria for unknown attributes in the spec
928     }
929     }
930     };
931     }; # $GetHTMLAttrsChecker
932    
933     our $Element;
934     our $ElementDefault;
935     our $AnyChecker;
936    
937     $Element->{$HTML_NS}->{''} = {
938     attrs_checker => $GetHTMLAttrsChecker->({}),
939     checker => $ElementDefault->{checker},
940     };
941    
942     $Element->{$HTML_NS}->{html} = {
943     is_root => 1,
944     attrs_checker => $GetHTMLAttrsChecker->({
945 wakaba 1.16 manifest => $HTMLURIAttrChecker,
946 wakaba 1.1 xmlns => sub {
947     my ($self, $attr) = @_;
948     my $value = $attr->value;
949     unless ($value eq $HTML_NS) {
950     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
951     }
952     unless ($attr->owner_document->manakai_is_html) {
953     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
954     ## TODO: Test
955     }
956     },
957     }),
958     checker => sub {
959     my ($self, $todo) = @_;
960     my $el = $todo->{node};
961     my $new_todos = [];
962     my @nodes = (@{$el->child_nodes});
963    
964     my $phase = 'before head';
965     while (@nodes) {
966     my $node = shift @nodes;
967     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
968    
969     my $nt = $node->node_type;
970     if ($nt == 1) {
971     my $node_ns = $node->namespace_uri;
972     $node_ns = '' unless defined $node_ns;
973     my $node_ln = $node->manakai_local_name;
974     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
975 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
976     #
977     } elsif ($phase eq 'before head') {
978 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'head') {
979     $phase = 'after head';
980     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'body') {
981     $self->{onerror}->(node => $node, type => 'ps element missing:head');
982     $phase = 'after body';
983     } else {
984     $not_allowed = 1;
985     # before head
986     }
987     } elsif ($phase eq 'after head') {
988     if ($node_ns eq $HTML_NS and $node_ln eq 'body') {
989     $phase = 'after body';
990     } else {
991     $not_allowed = 1;
992     # after head
993     }
994     } else { #elsif ($phase eq 'after body') {
995     $not_allowed = 1;
996     # after body
997     }
998     $self->{onerror}->(node => $node, type => 'element not allowed')
999     if $not_allowed;
1000     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1001     unshift @nodes, @$sib;
1002     push @$new_todos, @$ch;
1003     } elsif ($nt == 3 or $nt == 4) {
1004     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1005     $self->{onerror}->(node => $node, type => 'character not allowed');
1006     }
1007     } elsif ($nt == 5) {
1008     unshift @nodes, @{$node->child_nodes};
1009     }
1010     }
1011    
1012     if ($phase eq 'before head') {
1013     $self->{onerror}->(node => $el, type => 'child element missing:head');
1014     $self->{onerror}->(node => $el, type => 'child element missing:body');
1015     } elsif ($phase eq 'after head') {
1016     $self->{onerror}->(node => $el, type => 'child element missing:body');
1017     }
1018    
1019     return ($new_todos);
1020     },
1021     };
1022    
1023     $Element->{$HTML_NS}->{head} = {
1024     attrs_checker => $GetHTMLAttrsChecker->({}),
1025     checker => sub {
1026     my ($self, $todo) = @_;
1027     my $el = $todo->{node};
1028     my $new_todos = [];
1029     my @nodes = (@{$el->child_nodes});
1030    
1031     my $has_title;
1032     my $phase = 'initial'; # 'after charset', 'after base'
1033     while (@nodes) {
1034     my $node = shift @nodes;
1035     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1036    
1037     my $nt = $node->node_type;
1038     if ($nt == 1) {
1039     my $node_ns = $node->namespace_uri;
1040     $node_ns = '' unless defined $node_ns;
1041     my $node_ln = $node->manakai_local_name;
1042     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
1043 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1044     #
1045     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'title') {
1046 wakaba 1.1 $phase = 'after base';
1047     unless ($has_title) {
1048     $has_title = 1;
1049     } else {
1050     $not_allowed = 1;
1051     }
1052     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'meta') {
1053     if ($node->has_attribute_ns (undef, 'charset')) {
1054     if ($phase eq 'initial') {
1055     $phase = 'after charset';
1056     } else {
1057     $not_allowed = 1;
1058     ## NOTE: See also |base|'s "contexts" field in the spec
1059     }
1060 wakaba 1.5 } elsif ($node->has_attribute_ns (undef, 'name') or
1061     $node->has_attribute_ns (undef, 'http-equiv')) {
1062     $phase = 'after base';
1063 wakaba 1.1 } else {
1064     $phase = 'after base';
1065 wakaba 1.5 $not_allowed = 1;
1066 wakaba 1.1 }
1067     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'base') {
1068     if ($phase eq 'initial' or $phase eq 'after charset') {
1069     $phase = 'after base';
1070     } else {
1071     $not_allowed = 1;
1072     }
1073     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'style') {
1074     $phase = 'after base';
1075     if ($node->has_attribute_ns (undef, 'scoped')) {
1076     $not_allowed = 1;
1077     }
1078     } elsif ($HTMLMetadataElements->{$node_ns}->{$node_ln}) {
1079     $phase = 'after base';
1080     } else {
1081     $not_allowed = 1;
1082     }
1083     $self->{onerror}->(node => $node, type => 'element not allowed')
1084     if $not_allowed;
1085 wakaba 1.3 local $todo->{flag}->{in_head} = 1;
1086 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1087     unshift @nodes, @$sib;
1088     push @$new_todos, @$ch;
1089     } elsif ($nt == 3 or $nt == 4) {
1090     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1091     $self->{onerror}->(node => $node, type => 'character not allowed');
1092     }
1093     } elsif ($nt == 5) {
1094     unshift @nodes, @{$node->child_nodes};
1095     }
1096     }
1097     unless ($has_title) {
1098     $self->{onerror}->(node => $el, type => 'child element missing:title');
1099     }
1100     return ($new_todos);
1101     },
1102     };
1103    
1104     $Element->{$HTML_NS}->{title} = {
1105     attrs_checker => $GetHTMLAttrsChecker->({}),
1106     checker => $HTMLTextChecker,
1107     };
1108    
1109     $Element->{$HTML_NS}->{base} = {
1110 wakaba 1.4 attrs_checker => sub {
1111     my ($self, $todo) = @_;
1112    
1113 wakaba 1.14 my $has_href = $todo->{node}->has_attribute_ns (undef, 'href');
1114     my $has_target = $todo->{node}->has_attribute_ns (undef, 'target');
1115    
1116     if ($self->{has_uri_attr} and $has_href) {
1117 wakaba 1.4 ## ISSUE: Are these examples conforming?
1118     ## <head profile="a b c"><base href> (except for |profile|'s
1119     ## non-conformance)
1120     ## <title xml:base="relative"/><base href/> (maybe it should be)
1121     ## <unknown xmlns="relative"/><base href/> (assuming that
1122     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1123     ## <?xml-stylesheet href="relative"?>...<base href=""/>
1124     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1125     ## <style>@import 'relative';</style><base href>
1126     ## <script>location.href = 'relative';</script><base href>
1127 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
1128     ## an exception.
1129 wakaba 1.4 $self->{onerror}->(node => $todo->{node},
1130     type => 'basehref after URI attribute');
1131     }
1132 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
1133 wakaba 1.4 ## ISSUE: Are these examples conforming?
1134     ## <head><title xlink:href=""/><base target="name"/></head>
1135     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1136     ## (assuming that |xbl:xbl| is allowed before |base|)
1137     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1138     ## <link href=""/><base target="name"/>
1139     ## <link rel=unknown href=""><base target=name>
1140     $self->{onerror}->(node => $todo->{node},
1141     type => 'basetarget after hyperlink');
1142     }
1143    
1144 wakaba 1.14 if (not $has_href and not $has_target) {
1145     $self->{onerror}->(node => $todo->{node},
1146     type => 'attribute missing:href|target');
1147     }
1148    
1149 wakaba 1.4 return $GetHTMLAttrsChecker->({
1150     href => $HTMLURIAttrChecker,
1151     target => $HTMLTargetAttrChecker,
1152     })->($self, $todo);
1153     },
1154 wakaba 1.1 checker => $HTMLEmptyChecker,
1155     };
1156    
1157     $Element->{$HTML_NS}->{link} = {
1158     attrs_checker => sub {
1159     my ($self, $todo) = @_;
1160     $GetHTMLAttrsChecker->({
1161     href => $HTMLURIAttrChecker,
1162 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(0, $todo, @_) },
1163 wakaba 1.1 media => $HTMLMQAttrChecker,
1164     hreflang => $HTMLLanguageTagAttrChecker,
1165     type => $HTMLIMTAttrChecker,
1166     ## NOTE: Though |title| has special semantics,
1167     ## syntactically same as the |title| as global attribute.
1168     })->($self, $todo);
1169 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
1170     $self->{has_hyperlink_element} = 1 if $todo->{has_hyperlink_link_type};
1171     } else {
1172 wakaba 1.1 $self->{onerror}->(node => $todo->{node},
1173     type => 'attribute missing:href');
1174     }
1175     unless ($todo->{node}->has_attribute_ns (undef, 'rel')) {
1176     $self->{onerror}->(node => $todo->{node},
1177     type => 'attribute missing:rel');
1178     }
1179     },
1180     checker => $HTMLEmptyChecker,
1181     };
1182    
1183     $Element->{$HTML_NS}->{meta} = {
1184     attrs_checker => sub {
1185     my ($self, $todo) = @_;
1186     my $name_attr;
1187     my $http_equiv_attr;
1188     my $charset_attr;
1189     my $content_attr;
1190     for my $attr (@{$todo->{node}->attributes}) {
1191     my $attr_ns = $attr->namespace_uri;
1192     $attr_ns = '' unless defined $attr_ns;
1193     my $attr_ln = $attr->manakai_local_name;
1194     my $checker;
1195     if ($attr_ns eq '') {
1196     if ($attr_ln eq 'content') {
1197     $content_attr = $attr;
1198     $checker = 1;
1199     } elsif ($attr_ln eq 'name') {
1200     $name_attr = $attr;
1201     $checker = 1;
1202     } elsif ($attr_ln eq 'http-equiv') {
1203     $http_equiv_attr = $attr;
1204     $checker = 1;
1205     } elsif ($attr_ln eq 'charset') {
1206     $charset_attr = $attr;
1207     $checker = 1;
1208     } else {
1209     $checker = $HTMLAttrChecker->{$attr_ln}
1210     || $AttrChecker->{$attr_ns}->{$attr_ln}
1211     || $AttrChecker->{$attr_ns}->{''};
1212     }
1213     } else {
1214     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1215     || $AttrChecker->{$attr_ns}->{''};
1216     }
1217     if ($checker) {
1218     $checker->($self, $attr) if ref $checker;
1219     } else {
1220     $self->{onerror}->(node => $attr, level => 'unsupported',
1221     type => 'attribute');
1222     ## ISSUE: No comformance createria for unknown attributes in the spec
1223     }
1224     }
1225    
1226     if (defined $name_attr) {
1227     if (defined $http_equiv_attr) {
1228     $self->{onerror}->(node => $http_equiv_attr,
1229     type => 'attribute not allowed');
1230     } elsif (defined $charset_attr) {
1231     $self->{onerror}->(node => $charset_attr,
1232     type => 'attribute not allowed');
1233     }
1234     my $metadata_name = $name_attr->value;
1235     my $metadata_value;
1236     if (defined $content_attr) {
1237     $metadata_value = $content_attr->value;
1238     } else {
1239     $self->{onerror}->(node => $todo->{node},
1240     type => 'attribute missing:content');
1241     $metadata_value = '';
1242     }
1243     } elsif (defined $http_equiv_attr) {
1244     if (defined $charset_attr) {
1245     $self->{onerror}->(node => $charset_attr,
1246     type => 'attribute not allowed');
1247     }
1248     unless (defined $content_attr) {
1249     $self->{onerror}->(node => $todo->{node},
1250     type => 'attribute missing:content');
1251     }
1252     } elsif (defined $charset_attr) {
1253     if (defined $content_attr) {
1254     $self->{onerror}->(node => $content_attr,
1255     type => 'attribute not allowed');
1256     }
1257     } else {
1258     if (defined $content_attr) {
1259     $self->{onerror}->(node => $content_attr,
1260     type => 'attribute not allowed');
1261     $self->{onerror}->(node => $todo->{node},
1262     type => 'attribute missing:name|http-equiv');
1263     } else {
1264     $self->{onerror}->(node => $todo->{node},
1265     type => 'attribute missing:name|http-equiv|charset');
1266     }
1267     }
1268    
1269     ## TODO: metadata conformance
1270    
1271     ## TODO: pragma conformance
1272     if (defined $http_equiv_attr) { ## An enumerated attribute
1273     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1274     if ({
1275     'refresh' => 1,
1276     'default-style' => 1,
1277     }->{$keyword}) {
1278     #
1279     } else {
1280     $self->{onerror}->(node => $http_equiv_attr,
1281     type => 'enumerated:invalid');
1282     }
1283     }
1284    
1285     if (defined $charset_attr) {
1286     unless ($todo->{node}->owner_document->manakai_is_html) {
1287     $self->{onerror}->(node => $charset_attr,
1288     type => 'in XML:charset');
1289     }
1290     ## TODO: charset
1291     }
1292     },
1293     checker => $HTMLEmptyChecker,
1294     };
1295    
1296     $Element->{$HTML_NS}->{style} = {
1297     attrs_checker => $GetHTMLAttrsChecker->({
1298     type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1299     media => $HTMLMQAttrChecker,
1300     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1301     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1302     ## not different
1303     }),
1304     checker => sub {
1305     ## NOTE: |html:style| has no conformance creteria on content model
1306     my ($self, $todo) = @_;
1307     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
1308     $type = 'text/css' unless defined $type;
1309     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
1310     type => 'style:'.$type); ## TODO: $type normalization
1311     return $AnyChecker->($self, $todo);
1312     },
1313     };
1314    
1315     $Element->{$HTML_NS}->{body} = {
1316     attrs_checker => $GetHTMLAttrsChecker->({}),
1317     checker => $HTMLBlockChecker,
1318     };
1319    
1320     $Element->{$HTML_NS}->{section} = {
1321     attrs_checker => $GetHTMLAttrsChecker->({}),
1322     checker => $HTMLStylableBlockChecker,
1323     };
1324    
1325     $Element->{$HTML_NS}->{nav} = {
1326     attrs_checker => $GetHTMLAttrsChecker->({}),
1327     checker => $HTMLBlockOrInlineChecker,
1328     };
1329    
1330     $Element->{$HTML_NS}->{article} = {
1331     attrs_checker => $GetHTMLAttrsChecker->({}),
1332     checker => $HTMLStylableBlockChecker,
1333     };
1334    
1335     $Element->{$HTML_NS}->{blockquote} = {
1336     attrs_checker => $GetHTMLAttrsChecker->({
1337     cite => $HTMLURIAttrChecker,
1338     }),
1339     checker => $HTMLBlockChecker,
1340     };
1341    
1342     $Element->{$HTML_NS}->{aside} = {
1343     attrs_checker => $GetHTMLAttrsChecker->({}),
1344     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
1345     };
1346    
1347     $Element->{$HTML_NS}->{h1} = {
1348     attrs_checker => $GetHTMLAttrsChecker->({}),
1349     checker => sub {
1350     my ($self, $todo) = @_;
1351     $todo->{flag}->{has_heading}->[0] = 1;
1352 wakaba 1.13 return $HTMLStrictlyInlineChecker->($self, $todo);
1353 wakaba 1.1 },
1354     };
1355    
1356     $Element->{$HTML_NS}->{h2} = {
1357     attrs_checker => $GetHTMLAttrsChecker->({}),
1358     checker => $Element->{$HTML_NS}->{h1}->{checker},
1359     };
1360    
1361     $Element->{$HTML_NS}->{h3} = {
1362     attrs_checker => $GetHTMLAttrsChecker->({}),
1363     checker => $Element->{$HTML_NS}->{h1}->{checker},
1364     };
1365    
1366     $Element->{$HTML_NS}->{h4} = {
1367     attrs_checker => $GetHTMLAttrsChecker->({}),
1368     checker => $Element->{$HTML_NS}->{h1}->{checker},
1369     };
1370    
1371     $Element->{$HTML_NS}->{h5} = {
1372     attrs_checker => $GetHTMLAttrsChecker->({}),
1373     checker => $Element->{$HTML_NS}->{h1}->{checker},
1374     };
1375    
1376     $Element->{$HTML_NS}->{h6} = {
1377     attrs_checker => $GetHTMLAttrsChecker->({}),
1378     checker => $Element->{$HTML_NS}->{h1}->{checker},
1379     };
1380    
1381     $Element->{$HTML_NS}->{header} = {
1382     attrs_checker => $GetHTMLAttrsChecker->({}),
1383     checker => sub {
1384     my ($self, $todo) = @_;
1385     my $old_flag = $todo->{flag}->{has_heading} || [];
1386     my $new_flag = [];
1387     local $todo->{flag}->{has_heading} = $new_flag;
1388     my $node = $todo->{node};
1389    
1390     my $end = $self->_add_minuses
1391     ({$HTML_NS => {qw/header 1 footer 1/}},
1392     $HTMLSectioningElements);
1393     my ($new_todos, $ch) = $HTMLBlockChecker->($self, $todo);
1394     push @$new_todos, $end,
1395     {type => 'code', code => sub {
1396     if ($new_flag->[0]) {
1397     $old_flag->[0] = 1;
1398     } else {
1399     $self->{onerror}->(node => $node, type => 'element missing:hn');
1400     }
1401     }};
1402     return ($new_todos, $ch);
1403     },
1404     };
1405    
1406     $Element->{$HTML_NS}->{footer} = {
1407     attrs_checker => $GetHTMLAttrsChecker->({}),
1408     checker => sub { ## block -hn -header -footer -sectioning or inline
1409     my ($self, $todo) = @_;
1410     my $el = $todo->{node};
1411     my $new_todos = [];
1412     my @nodes = (@{$el->child_nodes});
1413    
1414     my $content = 'block-or-inline'; # or 'block' or 'inline'
1415     my @block_not_inline;
1416     while (@nodes) {
1417     my $node = shift @nodes;
1418     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1419    
1420     my $nt = $node->node_type;
1421     if ($nt == 1) {
1422     my $node_ns = $node->namespace_uri;
1423     $node_ns = '' unless defined $node_ns;
1424     my $node_ln = $node->manakai_local_name;
1425     my $not_allowed;
1426     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
1427     $not_allowed = 1;
1428     } elsif ($node_ns eq $HTML_NS and
1429     {
1430     qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1/
1431     }->{$node_ln}) {
1432     $not_allowed = 1;
1433     } elsif ($HTMLSectioningElements->{$node_ns}->{$node_ln}) {
1434     $not_allowed = 1;
1435     }
1436     if ($content eq 'block') {
1437     $not_allowed = 1
1438 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
1439     $self->{pluses}->{$node_ns}->{$node_ln};
1440 wakaba 1.1 } elsif ($content eq 'inline') {
1441     $not_allowed = 1
1442     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
1443 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
1444     $self->{pluses}->{$node_ns}->{$node_ln};
1445 wakaba 1.1 } else {
1446     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1447     my $is_inline
1448     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
1449     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1450    
1451     push @block_not_inline, $node
1452     if $is_block and not $is_inline and not $not_allowed;
1453 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
1454 wakaba 1.1 $content = 'inline';
1455     for (@block_not_inline) {
1456     $self->{onerror}->(node => $_, type => 'element not allowed');
1457     }
1458     $not_allowed = 1 unless $is_inline;
1459     }
1460     }
1461     $self->{onerror}->(node => $node, type => 'element not allowed')
1462     if $not_allowed;
1463     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1464     unshift @nodes, @$sib;
1465     push @$new_todos, @$ch;
1466     } elsif ($nt == 3 or $nt == 4) {
1467     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1468     if ($content eq 'block') {
1469     $self->{onerror}->(node => $node, type => 'character not allowed');
1470     } else {
1471     $content = 'inline';
1472     for (@block_not_inline) {
1473     $self->{onerror}->(node => $_, type => 'element not allowed');
1474     }
1475     }
1476     }
1477     } elsif ($nt == 5) {
1478     unshift @nodes, @{$node->child_nodes};
1479     }
1480     }
1481    
1482     my $end = $self->_add_minuses
1483     ({$HTML_NS => {qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1/}},
1484     $HTMLSectioningElements);
1485     push @$new_todos, $end;
1486    
1487     if ($content eq 'inline') {
1488     for (@$new_todos) {
1489     $_->{inline} = 1;
1490     }
1491     }
1492    
1493     return ($new_todos);
1494     },
1495     };
1496    
1497     $Element->{$HTML_NS}->{address} = {
1498     attrs_checker => $GetHTMLAttrsChecker->({}),
1499     checker => $HTMLInlineChecker,
1500     };
1501    
1502     $Element->{$HTML_NS}->{p} = {
1503     attrs_checker => $GetHTMLAttrsChecker->({}),
1504 wakaba 1.13 checker => $HTMLInlineChecker,
1505 wakaba 1.1 };
1506    
1507     $Element->{$HTML_NS}->{hr} = {
1508     attrs_checker => $GetHTMLAttrsChecker->({}),
1509     checker => $HTMLEmptyChecker,
1510     };
1511    
1512     $Element->{$HTML_NS}->{br} = {
1513     attrs_checker => $GetHTMLAttrsChecker->({}),
1514     checker => $HTMLEmptyChecker,
1515     };
1516    
1517     $Element->{$HTML_NS}->{dialog} = {
1518     attrs_checker => $GetHTMLAttrsChecker->({}),
1519     checker => sub {
1520     my ($self, $todo) = @_;
1521     my $el = $todo->{node};
1522     my $new_todos = [];
1523     my @nodes = (@{$el->child_nodes});
1524    
1525     my $phase = 'before dt';
1526     while (@nodes) {
1527     my $node = shift @nodes;
1528     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1529    
1530     my $nt = $node->node_type;
1531     if ($nt == 1) {
1532     my $node_ns = $node->namespace_uri;
1533     $node_ns = '' unless defined $node_ns;
1534     my $node_ln = $node->manakai_local_name;
1535     ## NOTE: |minuses| list is not checked since redundant
1536 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1537     #
1538     } elsif ($phase eq 'before dt') {
1539 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1540     $phase = 'before dd';
1541     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1542     $self->{onerror}
1543     ->(node => $node, type => 'ps element missing:dt');
1544     $phase = 'before dt';
1545     } else {
1546     $self->{onerror}->(node => $node, type => 'element not allowed');
1547     }
1548     } else { # before dd
1549     if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1550     $phase = 'before dt';
1551     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1552     $self->{onerror}
1553     ->(node => $node, type => 'ps element missing:dd');
1554     $phase = 'before dd';
1555     } else {
1556     $self->{onerror}->(node => $node, type => 'element not allowed');
1557     }
1558     }
1559     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1560     unshift @nodes, @$sib;
1561     push @$new_todos, @$ch;
1562     } elsif ($nt == 3 or $nt == 4) {
1563     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1564     $self->{onerror}->(node => $node, type => 'character not allowed');
1565     }
1566     } elsif ($nt == 5) {
1567     unshift @nodes, @{$node->child_nodes};
1568     }
1569     }
1570     if ($phase eq 'before dd') {
1571 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
1572 wakaba 1.1 }
1573     return ($new_todos);
1574     },
1575     };
1576    
1577     $Element->{$HTML_NS}->{pre} = {
1578     attrs_checker => $GetHTMLAttrsChecker->({}),
1579     checker => $HTMLStrictlyInlineChecker,
1580     };
1581    
1582     $Element->{$HTML_NS}->{ol} = {
1583     attrs_checker => $GetHTMLAttrsChecker->({
1584     start => $HTMLIntegerAttrChecker,
1585     }),
1586     checker => sub {
1587     my ($self, $todo) = @_;
1588     my $el = $todo->{node};
1589     my $new_todos = [];
1590     my @nodes = (@{$el->child_nodes});
1591    
1592     while (@nodes) {
1593     my $node = shift @nodes;
1594     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1595    
1596     my $nt = $node->node_type;
1597     if ($nt == 1) {
1598     my $node_ns = $node->namespace_uri;
1599     $node_ns = '' unless defined $node_ns;
1600     my $node_ln = $node->manakai_local_name;
1601     ## NOTE: |minuses| list is not checked since redundant
1602 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1603     #
1604     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'li')) {
1605 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
1606     }
1607     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1608     unshift @nodes, @$sib;
1609     push @$new_todos, @$ch;
1610     } elsif ($nt == 3 or $nt == 4) {
1611     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1612     $self->{onerror}->(node => $node, type => 'character not allowed');
1613     }
1614     } elsif ($nt == 5) {
1615     unshift @nodes, @{$node->child_nodes};
1616     }
1617     }
1618    
1619     if ($todo->{inline}) {
1620     for (@$new_todos) {
1621     $_->{inline} = 1;
1622     }
1623     }
1624     return ($new_todos);
1625     },
1626     };
1627    
1628     $Element->{$HTML_NS}->{ul} = {
1629     attrs_checker => $GetHTMLAttrsChecker->({}),
1630     checker => $Element->{$HTML_NS}->{ol}->{checker},
1631     };
1632    
1633     $Element->{$HTML_NS}->{li} = {
1634     attrs_checker => $GetHTMLAttrsChecker->({
1635     start => sub {
1636     my ($self, $attr) = @_;
1637     my $parent = $attr->owner_element->manakai_parent_element;
1638     if (defined $parent) {
1639     my $parent_ns = $parent->namespace_uri;
1640     $parent_ns = '' unless defined $parent_ns;
1641     my $parent_ln = $parent->manakai_local_name;
1642     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1643     $self->{onerror}->(node => $attr, level => 'unsupported',
1644     type => 'attribute');
1645     }
1646     }
1647     $HTMLIntegerAttrChecker->($self, $attr);
1648     },
1649     }),
1650     checker => sub {
1651     my ($self, $todo) = @_;
1652     if ($todo->{inline}) {
1653     return $HTMLInlineChecker->($self, $todo);
1654     } else {
1655     return $HTMLBlockOrInlineChecker->($self, $todo);
1656     }
1657     },
1658     };
1659    
1660     $Element->{$HTML_NS}->{dl} = {
1661     attrs_checker => $GetHTMLAttrsChecker->({}),
1662     checker => sub {
1663     my ($self, $todo) = @_;
1664     my $el = $todo->{node};
1665     my $new_todos = [];
1666     my @nodes = (@{$el->child_nodes});
1667    
1668     my $phase = 'before dt';
1669     while (@nodes) {
1670     my $node = shift @nodes;
1671     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1672    
1673     my $nt = $node->node_type;
1674     if ($nt == 1) {
1675     my $node_ns = $node->namespace_uri;
1676     $node_ns = '' unless defined $node_ns;
1677     my $node_ln = $node->manakai_local_name;
1678     ## NOTE: |minuses| list is not checked since redundant
1679 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1680     #
1681     } elsif ($phase eq 'in dds') {
1682 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1683     #$phase = 'in dds';
1684     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1685     $phase = 'in dts';
1686     } else {
1687     $self->{onerror}->(node => $node, type => 'element not allowed');
1688     }
1689     } elsif ($phase eq 'in dts') {
1690     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1691     #$phase = 'in dts';
1692     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1693     $phase = 'in dds';
1694     } else {
1695     $self->{onerror}->(node => $node, type => 'element not allowed');
1696     }
1697     } else { # before dt
1698     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1699     $phase = 'in dts';
1700     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1701     $self->{onerror}
1702     ->(node => $node, type => 'ps element missing:dt');
1703     $phase = 'in dds';
1704     } else {
1705     $self->{onerror}->(node => $node, type => 'element not allowed');
1706     }
1707     }
1708     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1709     unshift @nodes, @$sib;
1710     push @$new_todos, @$ch;
1711     } elsif ($nt == 3 or $nt == 4) {
1712     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1713     $self->{onerror}->(node => $node, type => 'character not allowed');
1714     }
1715     } elsif ($nt == 5) {
1716     unshift @nodes, @{$node->child_nodes};
1717     }
1718     }
1719     if ($phase eq 'in dts') {
1720 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
1721 wakaba 1.1 }
1722    
1723     if ($todo->{inline}) {
1724     for (@$new_todos) {
1725     $_->{inline} = 1;
1726     }
1727     }
1728     return ($new_todos);
1729     },
1730     };
1731    
1732     $Element->{$HTML_NS}->{dt} = {
1733     attrs_checker => $GetHTMLAttrsChecker->({}),
1734     checker => $HTMLStrictlyInlineChecker,
1735     };
1736    
1737     $Element->{$HTML_NS}->{dd} = {
1738     attrs_checker => $GetHTMLAttrsChecker->({}),
1739     checker => $Element->{$HTML_NS}->{li}->{checker},
1740     };
1741    
1742     $Element->{$HTML_NS}->{a} = {
1743     attrs_checker => sub {
1744     my ($self, $todo) = @_;
1745     my %attr;
1746     for my $attr (@{$todo->{node}->attributes}) {
1747     my $attr_ns = $attr->namespace_uri;
1748     $attr_ns = '' unless defined $attr_ns;
1749     my $attr_ln = $attr->manakai_local_name;
1750     my $checker;
1751     if ($attr_ns eq '') {
1752     $checker = {
1753     target => $HTMLTargetAttrChecker,
1754     href => $HTMLURIAttrChecker,
1755     ping => $HTMLSpaceURIsAttrChecker,
1756 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
1757 wakaba 1.1 media => $HTMLMQAttrChecker,
1758     hreflang => $HTMLLanguageTagAttrChecker,
1759     type => $HTMLIMTAttrChecker,
1760     }->{$attr_ln};
1761     if ($checker) {
1762     $attr{$attr_ln} = $attr;
1763     } else {
1764     $checker = $HTMLAttrChecker->{$attr_ln};
1765     }
1766     }
1767     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1768     || $AttrChecker->{$attr_ns}->{''};
1769     if ($checker) {
1770     $checker->($self, $attr) if ref $checker;
1771     } else {
1772     $self->{onerror}->(node => $attr, level => 'unsupported',
1773     type => 'attribute');
1774     ## ISSUE: No comformance createria for unknown attributes in the spec
1775     }
1776     }
1777    
1778 wakaba 1.4 if (defined $attr{href}) {
1779     $self->{has_hyperlink_element} = 1;
1780     } else {
1781 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
1782     if (defined $attr{$_}) {
1783     $self->{onerror}->(node => $attr{$_},
1784     type => 'attribute not allowed');
1785     }
1786     }
1787     }
1788     },
1789     checker => sub {
1790     my ($self, $todo) = @_;
1791    
1792     my $end = $self->_add_minuses ($HTMLInteractiveElements);
1793     my ($new_todos, $ch)
1794 wakaba 1.13 = $HTMLInlineOrStrictlyInlineChecker->($self, $todo);
1795 wakaba 1.1 push @$new_todos, $end;
1796    
1797 wakaba 1.15 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
1798     $_->{flag}->{in_a_href} = 1 for @$new_todos;
1799     }
1800 wakaba 1.1
1801     return ($new_todos, $ch);
1802     },
1803     };
1804    
1805     $Element->{$HTML_NS}->{q} = {
1806     attrs_checker => $GetHTMLAttrsChecker->({
1807     cite => $HTMLURIAttrChecker,
1808     }),
1809     checker => $HTMLInlineOrStrictlyInlineChecker,
1810     };
1811    
1812     $Element->{$HTML_NS}->{cite} = {
1813     attrs_checker => $GetHTMLAttrsChecker->({}),
1814     checker => $HTMLStrictlyInlineChecker,
1815     };
1816    
1817     $Element->{$HTML_NS}->{em} = {
1818     attrs_checker => $GetHTMLAttrsChecker->({}),
1819     checker => $HTMLInlineOrStrictlyInlineChecker,
1820     };
1821    
1822     $Element->{$HTML_NS}->{strong} = {
1823     attrs_checker => $GetHTMLAttrsChecker->({}),
1824     checker => $HTMLInlineOrStrictlyInlineChecker,
1825     };
1826    
1827     $Element->{$HTML_NS}->{small} = {
1828     attrs_checker => $GetHTMLAttrsChecker->({}),
1829     checker => $HTMLInlineOrStrictlyInlineChecker,
1830     };
1831    
1832     $Element->{$HTML_NS}->{m} = {
1833     attrs_checker => $GetHTMLAttrsChecker->({}),
1834     checker => $HTMLInlineOrStrictlyInlineChecker,
1835     };
1836    
1837     $Element->{$HTML_NS}->{dfn} = {
1838     attrs_checker => $GetHTMLAttrsChecker->({}),
1839     checker => sub {
1840     my ($self, $todo) = @_;
1841    
1842     my $end = $self->_add_minuses ({$HTML_NS => {dfn => 1}});
1843     my ($sib, $ch) = $HTMLStrictlyInlineChecker->($self, $todo);
1844     push @$sib, $end;
1845    
1846     my $node = $todo->{node};
1847     my $term = $node->get_attribute_ns (undef, 'title');
1848     unless (defined $term) {
1849     for my $child (@{$node->child_nodes}) {
1850     if ($child->node_type == 1) { # ELEMENT_NODE
1851     if (defined $term) {
1852     undef $term;
1853     last;
1854     } elsif ($child->manakai_local_name eq 'abbr') {
1855     my $nsuri = $child->namespace_uri;
1856     if (defined $nsuri and $nsuri eq $HTML_NS) {
1857     my $attr = $child->get_attribute_node_ns (undef, 'title');
1858     if ($attr) {
1859     $term = $attr->value;
1860     }
1861     }
1862     }
1863     } elsif ($child->node_type == 3 or $child->node_type == 4) {
1864     ## TEXT_NODE or CDATA_SECTION_NODE
1865     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
1866     next;
1867     }
1868     undef $term;
1869     last;
1870     }
1871     }
1872     unless (defined $term) {
1873     $term = $node->text_content;
1874     }
1875     }
1876     if ($self->{term}->{$term}) {
1877     $self->{onerror}->(node => $node, type => 'duplicate term');
1878     push @{$self->{term}->{$term}}, $node;
1879     } else {
1880     $self->{term}->{$term} = [$node];
1881     }
1882     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
1883     ## has |title|.
1884    
1885     return ($sib, $ch);
1886     },
1887     };
1888    
1889     $Element->{$HTML_NS}->{abbr} = {
1890     attrs_checker => $GetHTMLAttrsChecker->({
1891     ## NOTE: |title| has special semantics for |abbr|s, but is syntactically
1892     ## not different. The spec says that the |title| MAY be omitted
1893     ## if there is a |dfn| whose defining term is the abbreviation,
1894     ## but it does not prohibit |abbr| w/o |title| in other cases.
1895     }),
1896     checker => $HTMLStrictlyInlineChecker,
1897     };
1898    
1899     $Element->{$HTML_NS}->{time} = {
1900     attrs_checker => $GetHTMLAttrsChecker->({
1901     datetime => sub { 1 }, # checked in |checker|
1902     }),
1903     ## TODO: Write tests
1904     checker => sub {
1905     my ($self, $todo) = @_;
1906    
1907     my $attr = $todo->{node}->get_attribute_node_ns (undef, 'datetime');
1908     my $input;
1909     my $reg_sp;
1910     my $input_node;
1911     if ($attr) {
1912     $input = $attr->value;
1913     $reg_sp = qr/[\x09-\x0D\x20]*/;
1914     $input_node = $attr;
1915     } else {
1916     $input = $todo->{node}->text_content;
1917     $reg_sp = qr/\p{Zs}*/;
1918     $input_node = $todo->{node};
1919    
1920     ## ISSUE: What is the definition for "successfully extracts a date
1921     ## or time"? If the algorithm says the string is invalid but
1922     ## return some date or time, is it "successfully"?
1923     }
1924    
1925     my $hour;
1926     my $minute;
1927     my $second;
1928     if ($input =~ /
1929     \A
1930     [\x09-\x0D\x20]*
1931     ([0-9]+) # 1
1932     (?>
1933     -([0-9]+) # 2
1934     -([0-9]+) # 3
1935     [\x09-\x0D\x20]*
1936     (?>
1937     T
1938     [\x09-\x0D\x20]*
1939     )?
1940     ([0-9]+) # 4
1941     :([0-9]+) # 5
1942     (?>
1943     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
1944     )?
1945     [\x09-\x0D\x20]*
1946     (?>
1947     Z
1948     [\x09-\x0D\x20]*
1949     |
1950     [+-]([0-9]+):([0-9]+) # 7, 8
1951     [\x09-\x0D\x20]*
1952     )?
1953     \z
1954     |
1955     :([0-9]+) # 9
1956     (?>
1957     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
1958     )?
1959     [\x09-\x0D\x20]*\z
1960     )
1961     /x) {
1962     if (defined $2) { ## YYYY-MM-DD T? hh:mm
1963     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
1964     length $4 != 2 or length $5 != 2) {
1965     $self->{onerror}->(node => $input_node,
1966     type => 'dateortime:syntax error');
1967     }
1968    
1969     if (1 <= $2 and $2 <= 12) {
1970     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1971     if $3 < 1 or
1972     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
1973     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1974     if $2 == 2 and $3 == 29 and
1975     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
1976     } else {
1977     $self->{onerror}->(node => $input_node,
1978     type => 'datetime:bad month');
1979     }
1980    
1981     ($hour, $minute, $second) = ($4, $5, $6);
1982    
1983     if (defined $7) { ## [+-]hh:mm
1984     if (length $7 != 2 or length $8 != 2) {
1985     $self->{onerror}->(node => $input_node,
1986     type => 'dateortime:syntax error');
1987     }
1988    
1989     $self->{onerror}->(node => $input_node,
1990     type => 'datetime:bad timezone hour')
1991     if $7 > 23;
1992     $self->{onerror}->(node => $input_node,
1993     type => 'datetime:bad timezone minute')
1994     if $8 > 59;
1995     }
1996     } else { ## hh:mm
1997     if (length $1 != 2 or length $9 != 2) {
1998     $self->{onerror}->(node => $input_node,
1999     type => qq'dateortime:syntax error');
2000     }
2001    
2002     ($hour, $minute, $second) = ($1, $9, $10);
2003     }
2004    
2005     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
2006     if $hour > 23;
2007     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
2008     if $minute > 59;
2009    
2010     if (defined $second) { ## s
2011     ## NOTE: Integer part of second don't have to have length of two.
2012    
2013     if (substr ($second, 0, 1) eq '.') {
2014     $self->{onerror}->(node => $input_node,
2015     type => 'dateortime:syntax error');
2016     }
2017    
2018     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
2019     if $second >= 60;
2020     }
2021     } else {
2022     $self->{onerror}->(node => $input_node,
2023     type => 'dateortime:syntax error');
2024     }
2025    
2026     return $HTMLStrictlyInlineChecker->($self, $todo);
2027     },
2028     };
2029    
2030     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
2031     attrs_checker => $GetHTMLAttrsChecker->({
2032     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2033     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2034     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2035     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2036     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2037     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2038     }),
2039     checker => $HTMLStrictlyInlineChecker,
2040     };
2041    
2042     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
2043     attrs_checker => $GetHTMLAttrsChecker->({
2044     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
2045     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
2046     }),
2047     checker => $HTMLStrictlyInlineChecker,
2048     };
2049    
2050     $Element->{$HTML_NS}->{code} = {
2051     attrs_checker => $GetHTMLAttrsChecker->({}),
2052     ## NOTE: Though |title| has special semantics,
2053     ## syntatically same as the |title| as global attribute.
2054     checker => $HTMLInlineOrStrictlyInlineChecker,
2055     };
2056    
2057     $Element->{$HTML_NS}->{var} = {
2058     attrs_checker => $GetHTMLAttrsChecker->({}),
2059     ## NOTE: Though |title| has special semantics,
2060     ## syntatically same as the |title| as global attribute.
2061     checker => $HTMLStrictlyInlineChecker,
2062     };
2063    
2064     $Element->{$HTML_NS}->{samp} = {
2065     attrs_checker => $GetHTMLAttrsChecker->({}),
2066     ## NOTE: Though |title| has special semantics,
2067     ## syntatically same as the |title| as global attribute.
2068     checker => $HTMLInlineOrStrictlyInlineChecker,
2069     };
2070    
2071     $Element->{$HTML_NS}->{kbd} = {
2072     attrs_checker => $GetHTMLAttrsChecker->({}),
2073     checker => $HTMLStrictlyInlineChecker,
2074     };
2075    
2076     $Element->{$HTML_NS}->{sub} = {
2077     attrs_checker => $GetHTMLAttrsChecker->({}),
2078     checker => $HTMLStrictlyInlineChecker,
2079     };
2080    
2081     $Element->{$HTML_NS}->{sup} = {
2082     attrs_checker => $GetHTMLAttrsChecker->({}),
2083     checker => $HTMLStrictlyInlineChecker,
2084     };
2085    
2086     $Element->{$HTML_NS}->{span} = {
2087     attrs_checker => $GetHTMLAttrsChecker->({}),
2088     ## NOTE: Though |title| has special semantics,
2089     ## syntatically same as the |title| as global attribute.
2090     checker => $HTMLInlineOrStrictlyInlineChecker,
2091     };
2092    
2093     $Element->{$HTML_NS}->{i} = {
2094     attrs_checker => $GetHTMLAttrsChecker->({}),
2095     ## NOTE: Though |title| has special semantics,
2096     ## syntatically same as the |title| as global attribute.
2097     checker => $HTMLStrictlyInlineChecker,
2098     };
2099    
2100     $Element->{$HTML_NS}->{b} = {
2101     attrs_checker => $GetHTMLAttrsChecker->({}),
2102     checker => $HTMLStrictlyInlineChecker,
2103     };
2104    
2105     $Element->{$HTML_NS}->{bdo} = {
2106     attrs_checker => sub {
2107     my ($self, $todo) = @_;
2108     $GetHTMLAttrsChecker->({})->($self, $todo);
2109     unless ($todo->{node}->has_attribute_ns (undef, 'dir')) {
2110     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:dir');
2111     }
2112     },
2113     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
2114     checker => $HTMLStrictlyInlineChecker,
2115     };
2116    
2117     $Element->{$HTML_NS}->{ins} = {
2118     attrs_checker => $GetHTMLAttrsChecker->({
2119     cite => $HTMLURIAttrChecker,
2120     datetime => $HTMLDatetimeAttrChecker,
2121     }),
2122     checker => $HTMLTransparentChecker,
2123     };
2124    
2125     $Element->{$HTML_NS}->{del} = {
2126     attrs_checker => $GetHTMLAttrsChecker->({
2127     cite => $HTMLURIAttrChecker,
2128     datetime => $HTMLDatetimeAttrChecker,
2129     }),
2130     checker => sub {
2131     my ($self, $todo) = @_;
2132    
2133     my $parent = $todo->{node}->manakai_parent_element;
2134     if (defined $parent) {
2135     my $nsuri = $parent->namespace_uri;
2136     $nsuri = '' unless defined $nsuri;
2137     my $ln = $parent->manakai_local_name;
2138     my $eldef = $Element->{$nsuri}->{$ln} ||
2139     $Element->{$nsuri}->{''} ||
2140     $ElementDefault;
2141     return $eldef->{checker}->($self, $todo);
2142     } else {
2143     return $HTMLBlockOrInlineChecker->($self, $todo);
2144     }
2145     },
2146     };
2147    
2148     ## TODO: figure
2149 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
2150 wakaba 1.1
2151 wakaba 1.4 ## TODO: |alt|
2152 wakaba 1.1 $Element->{$HTML_NS}->{img} = {
2153     attrs_checker => sub {
2154     my ($self, $todo) = @_;
2155     $GetHTMLAttrsChecker->({
2156     alt => sub { }, ## NOTE: No syntactical requirement
2157     src => $HTMLURIAttrChecker,
2158     usemap => $HTMLUsemapAttrChecker,
2159     ismap => sub {
2160     my ($self, $attr, $parent_todo) = @_;
2161 wakaba 1.15 if (not $todo->{flag}->{in_a_href}) {
2162     $self->{onerror}->(node => $attr,
2163     type => 'attribute not allowed:ismap');
2164 wakaba 1.1 }
2165     $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_todo);
2166     },
2167     ## TODO: height
2168     ## TODO: width
2169     })->($self, $todo);
2170     unless ($todo->{node}->has_attribute_ns (undef, 'alt')) {
2171     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:alt');
2172     }
2173     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2174     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:src');
2175     }
2176     },
2177     checker => $HTMLEmptyChecker,
2178     };
2179    
2180     $Element->{$HTML_NS}->{iframe} = {
2181     attrs_checker => $GetHTMLAttrsChecker->({
2182     src => $HTMLURIAttrChecker,
2183     }),
2184     checker => $HTMLTextChecker,
2185     };
2186    
2187     $Element->{$HTML_NS}->{embed} = {
2188     attrs_checker => sub {
2189     my ($self, $todo) = @_;
2190     my $has_src;
2191     for my $attr (@{$todo->{node}->attributes}) {
2192     my $attr_ns = $attr->namespace_uri;
2193     $attr_ns = '' unless defined $attr_ns;
2194     my $attr_ln = $attr->manakai_local_name;
2195     my $checker;
2196     if ($attr_ns eq '') {
2197     if ($attr_ln eq 'src') {
2198     $checker = $HTMLURIAttrChecker;
2199     $has_src = 1;
2200     } elsif ($attr_ln eq 'type') {
2201     $checker = $HTMLIMTAttrChecker;
2202     } else {
2203     ## TODO: height
2204     ## TODO: width
2205     $checker = $HTMLAttrChecker->{$attr_ln}
2206     || sub { }; ## NOTE: Any local attribute is ok.
2207     }
2208     }
2209     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2210     || $AttrChecker->{$attr_ns}->{''};
2211     if ($checker) {
2212     $checker->($self, $attr);
2213     } else {
2214     $self->{onerror}->(node => $attr, level => 'unsupported',
2215     type => 'attribute');
2216     ## ISSUE: No comformance createria for global attributes in the spec
2217     }
2218     }
2219    
2220     unless ($has_src) {
2221     $self->{onerror}->(node => $todo->{node},
2222     type => 'attribute missing:src');
2223     }
2224     },
2225     checker => $HTMLEmptyChecker,
2226     };
2227    
2228     $Element->{$HTML_NS}->{object} = {
2229     attrs_checker => sub {
2230     my ($self, $todo) = @_;
2231     $GetHTMLAttrsChecker->({
2232     data => $HTMLURIAttrChecker,
2233     type => $HTMLIMTAttrChecker,
2234     usemap => $HTMLUsemapAttrChecker,
2235     ## TODO: width
2236     ## TODO: height
2237     })->($self, $todo);
2238     unless ($todo->{node}->has_attribute_ns (undef, 'data')) {
2239     unless ($todo->{node}->has_attribute_ns (undef, 'type')) {
2240     $self->{onerror}->(node => $todo->{node},
2241     type => 'attribute missing:data|type');
2242     }
2243     }
2244     },
2245     checker => $ElementDefault->{checker}, ## TODO
2246 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
2247 wakaba 1.1 };
2248    
2249     $Element->{$HTML_NS}->{param} = {
2250     attrs_checker => sub {
2251     my ($self, $todo) = @_;
2252     $GetHTMLAttrsChecker->({
2253     name => sub { },
2254     value => sub { },
2255     })->($self, $todo);
2256     unless ($todo->{node}->has_attribute_ns (undef, 'name')) {
2257     $self->{onerror}->(node => $todo->{node},
2258     type => 'attribute missing:name');
2259     }
2260     unless ($todo->{node}->has_attribute_ns (undef, 'value')) {
2261     $self->{onerror}->(node => $todo->{node},
2262     type => 'attribute missing:value');
2263     }
2264     },
2265     checker => $HTMLEmptyChecker,
2266     };
2267    
2268     $Element->{$HTML_NS}->{video} = {
2269     attrs_checker => $GetHTMLAttrsChecker->({
2270     src => $HTMLURIAttrChecker,
2271     ## TODO: start, loopstart, loopend, end
2272     ## ISSUE: they MUST be "value time offset"s. Value?
2273 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
2274 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2275     controls => $GetHTMLBooleanAttrChecker->('controls'),
2276 wakaba 1.11 poster => $HTMLURIAttrChecker, ## TODO: not for audio!
2277     ## TODO: width, height (not for audio!)
2278 wakaba 1.1 }),
2279     checker => sub {
2280     my ($self, $todo) = @_;
2281    
2282     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2283     return $HTMLBlockOrInlineChecker->($self, $todo);
2284     } else {
2285     return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
2286     ->($self, $todo);
2287     }
2288     },
2289     };
2290    
2291     $Element->{$HTML_NS}->{audio} = {
2292     attrs_checker => $Element->{$HTML_NS}->{video}->{attrs_checker},
2293     checker => $Element->{$HTML_NS}->{video}->{checker},
2294     };
2295    
2296     $Element->{$HTML_NS}->{source} = {
2297     attrs_checker => sub {
2298     my ($self, $todo) = @_;
2299     $GetHTMLAttrsChecker->({
2300     src => $HTMLURIAttrChecker,
2301     type => $HTMLIMTAttrChecker,
2302     media => $HTMLMQAttrChecker,
2303     })->($self, $todo);
2304     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2305     $self->{onerror}->(node => $todo->{node},
2306     type => 'attribute missing:src');
2307     }
2308     },
2309     checker => $HTMLEmptyChecker,
2310     };
2311    
2312     $Element->{$HTML_NS}->{canvas} = {
2313     attrs_checker => $GetHTMLAttrsChecker->({
2314     height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2315     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2316     }),
2317     checker => $HTMLInlineChecker,
2318     };
2319    
2320     $Element->{$HTML_NS}->{map} = {
2321 wakaba 1.4 attrs_checker => sub {
2322     my ($self, $todo) = @_;
2323     my $has_id;
2324     $GetHTMLAttrsChecker->({
2325     id => sub {
2326     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2327     my ($self, $attr) = @_;
2328     my $value = $attr->value;
2329     if (length $value > 0) {
2330     if ($self->{id}->{$value}) {
2331     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2332     push @{$self->{id}->{$value}}, $attr;
2333     } else {
2334     $self->{id}->{$value} = [$attr];
2335     }
2336 wakaba 1.1 } else {
2337 wakaba 1.4 ## NOTE: MUST contain at least one character
2338     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2339 wakaba 1.1 }
2340 wakaba 1.4 if ($value =~ /[\x09-\x0D\x20]/) {
2341     $self->{onerror}->(node => $attr, type => 'space in ID');
2342     }
2343     $self->{map}->{$value} ||= $attr;
2344     $has_id = 1;
2345     },
2346     })->($self, $todo);
2347     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:id')
2348     unless $has_id;
2349     },
2350 wakaba 1.1 checker => $HTMLBlockChecker,
2351     };
2352    
2353     $Element->{$HTML_NS}->{area} = {
2354     attrs_checker => sub {
2355     my ($self, $todo) = @_;
2356     my %attr;
2357     my $coords;
2358     for my $attr (@{$todo->{node}->attributes}) {
2359     my $attr_ns = $attr->namespace_uri;
2360     $attr_ns = '' unless defined $attr_ns;
2361     my $attr_ln = $attr->manakai_local_name;
2362     my $checker;
2363     if ($attr_ns eq '') {
2364     $checker = {
2365     alt => sub { },
2366     ## NOTE: |alt| value has no conformance creteria.
2367     shape => $GetHTMLEnumeratedAttrChecker->({
2368     circ => -1, circle => 1,
2369     default => 1,
2370     poly => 1, polygon => -1,
2371     rect => 1, rectangle => -1,
2372     }),
2373     coords => sub {
2374     my ($self, $attr) = @_;
2375     my $value = $attr->value;
2376     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
2377     $coords = [split /,/, $value];
2378     } else {
2379     $self->{onerror}->(node => $attr,
2380     type => 'coords:syntax error');
2381     }
2382     },
2383     target => $HTMLTargetAttrChecker,
2384     href => $HTMLURIAttrChecker,
2385     ping => $HTMLSpaceURIsAttrChecker,
2386 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
2387 wakaba 1.1 media => $HTMLMQAttrChecker,
2388     hreflang => $HTMLLanguageTagAttrChecker,
2389     type => $HTMLIMTAttrChecker,
2390     }->{$attr_ln};
2391     if ($checker) {
2392     $attr{$attr_ln} = $attr;
2393     } else {
2394     $checker = $HTMLAttrChecker->{$attr_ln};
2395     }
2396     }
2397     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2398     || $AttrChecker->{$attr_ns}->{''};
2399     if ($checker) {
2400     $checker->($self, $attr) if ref $checker;
2401     } else {
2402     $self->{onerror}->(node => $attr, level => 'unsupported',
2403     type => 'attribute');
2404     ## ISSUE: No comformance createria for unknown attributes in the spec
2405     }
2406     }
2407    
2408     if (defined $attr{href}) {
2409 wakaba 1.4 $self->{has_hyperlink_element} = 1;
2410 wakaba 1.1 unless (defined $attr{alt}) {
2411     $self->{onerror}->(node => $todo->{node},
2412     type => 'attribute missing:alt');
2413     }
2414     } else {
2415     for (qw/target ping rel media hreflang type alt/) {
2416     if (defined $attr{$_}) {
2417     $self->{onerror}->(node => $attr{$_},
2418     type => 'attribute not allowed');
2419     }
2420     }
2421     }
2422    
2423     my $shape = 'rectangle';
2424     if (defined $attr{shape}) {
2425     $shape = {
2426     circ => 'circle', circle => 'circle',
2427     default => 'default',
2428     poly => 'polygon', polygon => 'polygon',
2429     rect => 'rectangle', rectangle => 'rectangle',
2430     }->{lc $attr{shape}->value} || 'rectangle';
2431     ## TODO: ASCII lowercase?
2432     }
2433    
2434     if ($shape eq 'circle') {
2435     if (defined $attr{coords}) {
2436     if (defined $coords) {
2437     if (@$coords == 3) {
2438     if ($coords->[2] < 0) {
2439     $self->{onerror}->(node => $attr{coords},
2440     type => 'coords:out of range:2');
2441     }
2442     } else {
2443     $self->{onerror}->(node => $attr{coords},
2444     type => 'coords:number:3:'.@$coords);
2445     }
2446     } else {
2447     ## NOTE: A syntax error has been reported.
2448     }
2449     } else {
2450     $self->{onerror}->(node => $todo->{node},
2451     type => 'attribute missing:coords');
2452     }
2453     } elsif ($shape eq 'default') {
2454     if (defined $attr{coords}) {
2455     $self->{onerror}->(node => $attr{coords},
2456     type => 'attribute not allowed');
2457     }
2458     } elsif ($shape eq 'polygon') {
2459     if (defined $attr{coords}) {
2460     if (defined $coords) {
2461     if (@$coords >= 6) {
2462     unless (@$coords % 2 == 0) {
2463     $self->{onerror}->(node => $attr{coords},
2464     type => 'coords:number:even:'.@$coords);
2465     }
2466     } else {
2467     $self->{onerror}->(node => $attr{coords},
2468     type => 'coords:number:>=6:'.@$coords);
2469     }
2470     } else {
2471     ## NOTE: A syntax error has been reported.
2472     }
2473     } else {
2474     $self->{onerror}->(node => $todo->{node},
2475     type => 'attribute missing:coords');
2476     }
2477     } elsif ($shape eq 'rectangle') {
2478     if (defined $attr{coords}) {
2479     if (defined $coords) {
2480     if (@$coords == 4) {
2481     unless ($coords->[0] < $coords->[2]) {
2482     $self->{onerror}->(node => $attr{coords},
2483     type => 'coords:out of range:0');
2484     }
2485     unless ($coords->[1] < $coords->[3]) {
2486     $self->{onerror}->(node => $attr{coords},
2487     type => 'coords:out of range:1');
2488     }
2489     } else {
2490     $self->{onerror}->(node => $attr{coords},
2491     type => 'coords:number:4:'.@$coords);
2492     }
2493     } else {
2494     ## NOTE: A syntax error has been reported.
2495     }
2496     } else {
2497     $self->{onerror}->(node => $todo->{node},
2498     type => 'attribute missing:coords');
2499     }
2500     }
2501     },
2502     checker => $HTMLEmptyChecker,
2503     };
2504     ## TODO: only in map
2505    
2506     $Element->{$HTML_NS}->{table} = {
2507     attrs_checker => $GetHTMLAttrsChecker->({}),
2508     checker => sub {
2509     my ($self, $todo) = @_;
2510     my $el = $todo->{node};
2511     my $new_todos = [];
2512     my @nodes = (@{$el->child_nodes});
2513    
2514     my $phase = 'before caption';
2515     my $has_tfoot;
2516     while (@nodes) {
2517     my $node = shift @nodes;
2518     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2519    
2520     my $nt = $node->node_type;
2521     if ($nt == 1) {
2522     my $node_ns = $node->namespace_uri;
2523     $node_ns = '' unless defined $node_ns;
2524     my $node_ln = $node->manakai_local_name;
2525     ## NOTE: |minuses| list is not checked since redundant
2526 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2527     #
2528     } elsif ($phase eq 'in tbodys') {
2529 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2530     #$phase = 'in tbodys';
2531     } elsif (not $has_tfoot and
2532     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2533     $phase = 'after tfoot';
2534     $has_tfoot = 1;
2535     } else {
2536     $self->{onerror}->(node => $node, type => 'element not allowed');
2537     }
2538     } elsif ($phase eq 'in trs') {
2539     if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2540     #$phase = 'in trs';
2541     } elsif (not $has_tfoot and
2542     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2543     $phase = 'after tfoot';
2544     $has_tfoot = 1;
2545     } else {
2546     $self->{onerror}->(node => $node, type => 'element not allowed');
2547     }
2548     } elsif ($phase eq 'after thead') {
2549     if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2550     $phase = 'in tbodys';
2551     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2552     $phase = 'in trs';
2553     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2554     $phase = 'in tbodys';
2555     $has_tfoot = 1;
2556     } else {
2557     $self->{onerror}->(node => $node, type => 'element not allowed');
2558     }
2559     } elsif ($phase eq 'in colgroup') {
2560     if ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2561     $phase = 'in colgroup';
2562     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2563     $phase = 'after thead';
2564     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2565     $phase = 'in tbodys';
2566     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2567     $phase = 'in trs';
2568     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2569     $phase = 'in tbodys';
2570     $has_tfoot = 1;
2571     } else {
2572     $self->{onerror}->(node => $node, type => 'element not allowed');
2573     }
2574     } elsif ($phase eq 'before caption') {
2575     if ($node_ns eq $HTML_NS and $node_ln eq 'caption') {
2576     $phase = 'in colgroup';
2577     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2578     $phase = 'in colgroup';
2579     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2580     $phase = 'after thead';
2581     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2582     $phase = 'in tbodys';
2583     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2584     $phase = 'in trs';
2585     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2586     $phase = 'in tbodys';
2587     $has_tfoot = 1;
2588     } else {
2589     $self->{onerror}->(node => $node, type => 'element not allowed');
2590     }
2591     } else { # after tfoot
2592     $self->{onerror}->(node => $node, type => 'element not allowed');
2593     }
2594     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2595     unshift @nodes, @$sib;
2596     push @$new_todos, @$ch;
2597     } elsif ($nt == 3 or $nt == 4) {
2598     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2599     $self->{onerror}->(node => $node, type => 'character not allowed');
2600     }
2601     } elsif ($nt == 5) {
2602     unshift @nodes, @{$node->child_nodes};
2603     }
2604     }
2605    
2606     ## Table model errors
2607     require Whatpm::HTMLTable;
2608     Whatpm::HTMLTable->form_table ($todo->{node}, sub {
2609     my %opt = @_;
2610     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
2611     });
2612     push @{$self->{return}->{table}}, $todo->{node};
2613    
2614     return ($new_todos);
2615     },
2616     };
2617    
2618     $Element->{$HTML_NS}->{caption} = {
2619     attrs_checker => $GetHTMLAttrsChecker->({}),
2620 wakaba 1.13 checker => $HTMLStrictlyInlineChecker,
2621 wakaba 1.1 };
2622    
2623     $Element->{$HTML_NS}->{colgroup} = {
2624     attrs_checker => $GetHTMLAttrsChecker->({
2625     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2626     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
2627     ## TODO: "attribute not supported" if |col|.
2628     ## ISSUE: MUST NOT if any |col|?
2629     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
2630     }),
2631     checker => sub {
2632     my ($self, $todo) = @_;
2633     my $el = $todo->{node};
2634     my $new_todos = [];
2635     my @nodes = (@{$el->child_nodes});
2636    
2637     while (@nodes) {
2638     my $node = shift @nodes;
2639     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2640    
2641     my $nt = $node->node_type;
2642     if ($nt == 1) {
2643     my $node_ns = $node->namespace_uri;
2644     $node_ns = '' unless defined $node_ns;
2645     my $node_ln = $node->manakai_local_name;
2646     ## NOTE: |minuses| list is not checked since redundant
2647 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2648     #
2649     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'col')) {
2650 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
2651     }
2652     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2653     unshift @nodes, @$sib;
2654     push @$new_todos, @$ch;
2655     } elsif ($nt == 3 or $nt == 4) {
2656     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2657     $self->{onerror}->(node => $node, type => 'character not allowed');
2658     }
2659     } elsif ($nt == 5) {
2660     unshift @nodes, @{$node->child_nodes};
2661     }
2662     }
2663     return ($new_todos);
2664     },
2665     };
2666    
2667     $Element->{$HTML_NS}->{col} = {
2668     attrs_checker => $GetHTMLAttrsChecker->({
2669     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2670     }),
2671     checker => $HTMLEmptyChecker,
2672     };
2673    
2674     $Element->{$HTML_NS}->{tbody} = {
2675     attrs_checker => $GetHTMLAttrsChecker->({}),
2676     checker => sub {
2677     my ($self, $todo) = @_;
2678     my $el = $todo->{node};
2679     my $new_todos = [];
2680     my @nodes = (@{$el->child_nodes});
2681    
2682     my $has_tr;
2683     while (@nodes) {
2684     my $node = shift @nodes;
2685     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2686    
2687     my $nt = $node->node_type;
2688     if ($nt == 1) {
2689     my $node_ns = $node->namespace_uri;
2690     $node_ns = '' unless defined $node_ns;
2691     my $node_ln = $node->manakai_local_name;
2692     ## NOTE: |minuses| list is not checked since redundant
2693 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2694     #
2695     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2696 wakaba 1.1 $has_tr = 1;
2697     } else {
2698     $self->{onerror}->(node => $node, type => 'element not allowed');
2699     }
2700     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2701     unshift @nodes, @$sib;
2702     push @$new_todos, @$ch;
2703     } elsif ($nt == 3 or $nt == 4) {
2704     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2705     $self->{onerror}->(node => $node, type => 'character not allowed');
2706     }
2707     } elsif ($nt == 5) {
2708     unshift @nodes, @{$node->child_nodes};
2709     }
2710     }
2711     unless ($has_tr) {
2712     $self->{onerror}->(node => $el, type => 'child element missing:tr');
2713     }
2714     return ($new_todos);
2715     },
2716     };
2717    
2718     $Element->{$HTML_NS}->{thead} = {
2719     attrs_checker => $GetHTMLAttrsChecker->({}),
2720     checker => $Element->{$HTML_NS}->{tbody}->{checker},
2721     };
2722    
2723     $Element->{$HTML_NS}->{tfoot} = {
2724     attrs_checker => $GetHTMLAttrsChecker->({}),
2725     checker => $Element->{$HTML_NS}->{tbody}->{checker},
2726     };
2727    
2728     $Element->{$HTML_NS}->{tr} = {
2729     attrs_checker => $GetHTMLAttrsChecker->({}),
2730     checker => sub {
2731     my ($self, $todo) = @_;
2732     my $el = $todo->{node};
2733     my $new_todos = [];
2734     my @nodes = (@{$el->child_nodes});
2735    
2736     my $has_td;
2737     while (@nodes) {
2738     my $node = shift @nodes;
2739     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2740    
2741     my $nt = $node->node_type;
2742     if ($nt == 1) {
2743     my $node_ns = $node->namespace_uri;
2744     $node_ns = '' unless defined $node_ns;
2745     my $node_ln = $node->manakai_local_name;
2746     ## NOTE: |minuses| list is not checked since redundant
2747 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2748     #
2749     } elsif ($node_ns eq $HTML_NS and
2750     ($node_ln eq 'td' or $node_ln eq 'th')) {
2751 wakaba 1.1 $has_td = 1;
2752     } else {
2753     $self->{onerror}->(node => $node, type => 'element not allowed');
2754     }
2755     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2756     unshift @nodes, @$sib;
2757     push @$new_todos, @$ch;
2758     } elsif ($nt == 3 or $nt == 4) {
2759     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2760     $self->{onerror}->(node => $node, type => 'character not allowed');
2761     }
2762     } elsif ($nt == 5) {
2763     unshift @nodes, @{$node->child_nodes};
2764     }
2765     }
2766     unless ($has_td) {
2767     $self->{onerror}->(node => $el, type => 'child element missing:td|th');
2768     }
2769     return ($new_todos);
2770     },
2771     };
2772    
2773     $Element->{$HTML_NS}->{td} = {
2774     attrs_checker => $GetHTMLAttrsChecker->({
2775     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2776     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2777     }),
2778     checker => $HTMLBlockOrInlineChecker,
2779     };
2780    
2781     $Element->{$HTML_NS}->{th} = {
2782     attrs_checker => $GetHTMLAttrsChecker->({
2783     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2784     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2785     scope => $GetHTMLEnumeratedAttrChecker
2786     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
2787     }),
2788     checker => $HTMLBlockOrInlineChecker,
2789     };
2790    
2791     ## TODO: forms
2792 wakaba 1.8 ## TODO: Tests for <nest/> in form elements
2793 wakaba 1.1
2794     $Element->{$HTML_NS}->{script} = {
2795 wakaba 1.9 attrs_checker => $GetHTMLAttrsChecker->({
2796 wakaba 1.1 src => $HTMLURIAttrChecker,
2797     defer => $GetHTMLBooleanAttrChecker->('defer'),
2798     async => $GetHTMLBooleanAttrChecker->('async'),
2799     type => $HTMLIMTAttrChecker,
2800 wakaba 1.9 }),
2801 wakaba 1.1 checker => sub {
2802     my ($self, $todo) = @_;
2803    
2804     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2805     return $HTMLEmptyChecker->($self, $todo);
2806     } else {
2807     ## NOTE: No content model conformance in HTML5 spec.
2808     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
2809     my $language = $todo->{node}->get_attribute_ns (undef, 'language');
2810     if ((defined $type and $type eq '') or
2811     (defined $language and $language eq '')) {
2812     $type = 'text/javascript';
2813     } elsif (defined $type) {
2814     #
2815     } elsif (defined $language) {
2816     $type = 'text/' . $language;
2817     } else {
2818     $type = 'text/javascript';
2819     }
2820     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
2821     type => 'script:'.$type); ## TODO: $type normalization
2822     return $AnyChecker->($self, $todo);
2823     }
2824     },
2825     };
2826    
2827     ## NOTE: When script is disabled.
2828     $Element->{$HTML_NS}->{noscript} = {
2829 wakaba 1.3 attrs_checker => sub {
2830     my ($self, $todo) = @_;
2831    
2832     ## NOTE: This check is inserted in |attrs_checker|, rather than |checker|,
2833     ## since the later is not invoked when the |noscript| is used as a
2834     ## transparent element.
2835     unless ($todo->{node}->owner_document->manakai_is_html) {
2836     $self->{onerror}->(node => $todo->{node}, type => 'in XML:noscript');
2837     }
2838    
2839     $GetHTMLAttrsChecker->({})->($self, $todo);
2840     },
2841 wakaba 1.1 checker => sub {
2842     my ($self, $todo) = @_;
2843    
2844 wakaba 1.3 if ($todo->{flag}->{in_head}) {
2845     my $new_todos = [];
2846     my @nodes = (@{$todo->{node}->child_nodes});
2847    
2848     while (@nodes) {
2849     my $node = shift @nodes;
2850     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2851    
2852     my $nt = $node->node_type;
2853     if ($nt == 1) {
2854     my $node_ns = $node->namespace_uri;
2855     $node_ns = '' unless defined $node_ns;
2856     my $node_ln = $node->manakai_local_name;
2857 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2858     #
2859     } elsif ($node_ns eq $HTML_NS) {
2860 wakaba 1.3 if ({link => 1, style => 1}->{$node_ln}) {
2861     #
2862     } elsif ($node_ln eq 'meta') {
2863 wakaba 1.5 if ($node->has_attribute_ns (undef, 'name')) {
2864     #
2865 wakaba 1.3 } else {
2866 wakaba 1.5 $self->{onerror}->(node => $node,
2867     type => 'element not allowed');
2868 wakaba 1.3 }
2869     } else {
2870     $self->{onerror}->(node => $node, type => 'element not allowed');
2871     }
2872     } else {
2873     $self->{onerror}->(node => $node, type => 'element not allowed');
2874     }
2875    
2876     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2877     unshift @nodes, @$sib;
2878     push @$new_todos, @$ch;
2879     } elsif ($nt == 3 or $nt == 4) {
2880     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2881     $self->{onerror}->(node => $node, type => 'character not allowed');
2882     }
2883     } elsif ($nt == 5) {
2884     unshift @nodes, @{$node->child_nodes};
2885     }
2886     }
2887     return ($new_todos);
2888     } else {
2889     my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}});
2890     my ($sib, $ch) = $HTMLBlockOrInlineChecker->($self, $todo);
2891     push @$sib, $end;
2892     return ($sib, $ch);
2893     }
2894 wakaba 1.1 },
2895     };
2896 wakaba 1.3
2897     ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
2898 wakaba 1.1
2899     $Element->{$HTML_NS}->{'event-source'} = {
2900     attrs_checker => $GetHTMLAttrsChecker->({
2901     src => $HTMLURIAttrChecker,
2902     }),
2903     checker => $HTMLEmptyChecker,
2904     };
2905    
2906     $Element->{$HTML_NS}->{details} = {
2907     attrs_checker => $GetHTMLAttrsChecker->({
2908     open => $GetHTMLBooleanAttrChecker->('open'),
2909     }),
2910     checker => sub {
2911     my ($self, $todo) = @_;
2912    
2913     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2914     my ($sib, $ch)
2915     = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
2916     ->($self, $todo);
2917     push @$sib, $end;
2918     return ($sib, $ch);
2919     },
2920     };
2921    
2922     $Element->{$HTML_NS}->{datagrid} = {
2923     attrs_checker => $GetHTMLAttrsChecker->({
2924     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2925     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
2926     }),
2927     checker => sub {
2928     my ($self, $todo) = @_;
2929     my $el = $todo->{node};
2930     my $new_todos = [];
2931     my @nodes = (@{$el->child_nodes});
2932    
2933     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2934    
2935     ## Block-table Block* | table | select | datalist | Empty
2936     my $mode = 'any';
2937     while (@nodes) {
2938     my $node = shift @nodes;
2939     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2940    
2941     my $nt = $node->node_type;
2942     if ($nt == 1) {
2943     my $node_ns = $node->namespace_uri;
2944     $node_ns = '' unless defined $node_ns;
2945     my $node_ln = $node->manakai_local_name;
2946     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
2947 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2948     #
2949     } elsif ($mode eq 'block') {
2950 wakaba 1.1 $not_allowed = 1
2951     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
2952     } elsif ($mode eq 'any') {
2953     if ($node_ns eq $HTML_NS and
2954     {table => 1, select => 1, datalist => 1}->{$node_ln}) {
2955     $mode = 'none';
2956     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
2957     $mode = 'block';
2958     } else {
2959     $not_allowed = 1;
2960     }
2961     } else {
2962     $not_allowed = 1;
2963     }
2964     $self->{onerror}->(node => $node, type => 'element not allowed')
2965     if $not_allowed;
2966     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2967     unshift @nodes, @$sib;
2968     push @$new_todos, @$ch;
2969     } elsif ($nt == 3 or $nt == 4) {
2970     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2971     $self->{onerror}->(node => $node, type => 'character not allowed');
2972     }
2973     } elsif ($nt == 5) {
2974     unshift @nodes, @{$node->child_nodes};
2975     }
2976     }
2977    
2978     push @$new_todos, $end;
2979     return ($new_todos);
2980     },
2981     };
2982    
2983     $Element->{$HTML_NS}->{command} = {
2984     attrs_checker => $GetHTMLAttrsChecker->({
2985     checked => $GetHTMLBooleanAttrChecker->('checked'),
2986     default => $GetHTMLBooleanAttrChecker->('default'),
2987     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2988     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
2989     icon => $HTMLURIAttrChecker,
2990     label => sub { }, ## NOTE: No conformance creteria
2991     radiogroup => sub { }, ## NOTE: No conformance creteria
2992     ## NOTE: |title| has special semantics, but no syntactical difference
2993     type => sub {
2994     my ($self, $attr) = @_;
2995     my $value = $attr->value;
2996     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
2997     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
2998     }
2999     },
3000     }),
3001     checker => $HTMLEmptyChecker,
3002     };
3003    
3004     $Element->{$HTML_NS}->{menu} = {
3005     attrs_checker => $GetHTMLAttrsChecker->({
3006     autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
3007     id => sub {
3008     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
3009     my ($self, $attr) = @_;
3010     my $value = $attr->value;
3011     if (length $value > 0) {
3012     if ($self->{id}->{$value}) {
3013     $self->{onerror}->(node => $attr, type => 'duplicate ID');
3014     push @{$self->{id}->{$value}}, $attr;
3015     } else {
3016     $self->{id}->{$value} = [$attr];
3017     }
3018     } else {
3019     ## NOTE: MUST contain at least one character
3020     $self->{onerror}->(node => $attr, type => 'empty attribute value');
3021     }
3022     if ($value =~ /[\x09-\x0D\x20]/) {
3023     $self->{onerror}->(node => $attr, type => 'space in ID');
3024     }
3025     $self->{menu}->{$value} ||= $attr;
3026     ## ISSUE: <menu id=""><p contextmenu=""> match?
3027     },
3028     label => sub { }, ## NOTE: No conformance creteria
3029     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
3030     }),
3031     checker => sub {
3032     my ($self, $todo) = @_;
3033     my $el = $todo->{node};
3034     my $new_todos = [];
3035     my @nodes = (@{$el->child_nodes});
3036    
3037     my $content = 'li or inline';
3038     while (@nodes) {
3039     my $node = shift @nodes;
3040     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3041    
3042     my $nt = $node->node_type;
3043     if ($nt == 1) {
3044     my $node_ns = $node->namespace_uri;
3045     $node_ns = '' unless defined $node_ns;
3046     my $node_ln = $node->manakai_local_name;
3047     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
3048 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3049     #
3050     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'li') {
3051 wakaba 1.1 if ($content eq 'inline') {
3052     $not_allowed = 1;
3053     } elsif ($content eq 'li or inline') {
3054     $content = 'li';
3055     }
3056     } else {
3057     if ($HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
3058     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln}) {
3059     $content = 'inline';
3060     } else {
3061     $not_allowed = 1;
3062     }
3063     }
3064     $self->{onerror}->(node => $node, type => 'element not allowed')
3065     if $not_allowed;
3066     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3067     unshift @nodes, @$sib;
3068     push @$new_todos, @$ch;
3069     } elsif ($nt == 3 or $nt == 4) {
3070     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3071     if ($content eq 'li') {
3072     $self->{onerror}->(node => $node, type => 'character not allowed');
3073     } elsif ($content eq 'li or inline') {
3074     $content = 'inline';
3075     }
3076     }
3077     } elsif ($nt == 5) {
3078     unshift @nodes, @{$node->child_nodes};
3079     }
3080     }
3081    
3082     for (@$new_todos) {
3083     $_->{inline} = 1;
3084     }
3085     return ($new_todos);
3086     },
3087 wakaba 1.8 };
3088    
3089     $Element->{$HTML_NS}->{datatemplate} = {
3090     attrs_checker => $GetHTMLAttrsChecker->({}),
3091     checker => sub {
3092     my ($self, $todo) = @_;
3093     my $el = $todo->{node};
3094     my $new_todos = [];
3095     my @nodes = (@{$el->child_nodes});
3096    
3097     while (@nodes) {
3098     my $node = shift @nodes;
3099     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3100    
3101     my $nt = $node->node_type;
3102     if ($nt == 1) {
3103     my $node_ns = $node->namespace_uri;
3104     $node_ns = '' unless defined $node_ns;
3105     my $node_ln = $node->manakai_local_name;
3106     ## NOTE: |minuses| list is not checked since redundant
3107     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3108     #
3109     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'rule')) {
3110     $self->{onerror}->(node => $node,
3111     type => 'element not allowed:datatemplate');
3112     }
3113     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3114     unshift @nodes, @$sib;
3115     push @$new_todos, @$ch;
3116     } elsif ($nt == 3 or $nt == 4) {
3117     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3118     $self->{onerror}->(node => $node, type => 'character not allowed');
3119     }
3120     } elsif ($nt == 5) {
3121     unshift @nodes, @{$node->child_nodes};
3122     }
3123     }
3124     return ($new_todos);
3125     },
3126     is_xml_root => 1,
3127     };
3128    
3129     $Element->{$HTML_NS}->{rule} = {
3130     attrs_checker => $GetHTMLAttrsChecker->({
3131     ## TODO: |condition| attribute
3132     mode => $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker,
3133     }),
3134     checker => sub {
3135     my ($self, $todo) = @_;
3136    
3137     my $end = $self->_add_pluses ({$HTML_NS => {nest => 1}});
3138     my ($sib, $ch) = $AnyChecker->($self, $todo);
3139     push @$sib, $end;
3140     return ($sib, $ch);
3141     },
3142     ## NOTE: "MAY be anything that, when the parent |datatemplate|
3143     ## is applied to some conforming data, results in a conforming DOM tree.":
3144     ## We don't check against this.
3145     };
3146    
3147     $Element->{$HTML_NS}->{nest} = {
3148     attrs_checker => $GetHTMLAttrsChecker->({
3149     ## TODO: |filter| attribute
3150     mode => $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker,
3151     }),
3152     checker => $HTMLEmptyChecker,
3153 wakaba 1.1 };
3154    
3155     $Element->{$HTML_NS}->{legend} = {
3156     attrs_checker => $GetHTMLAttrsChecker->({}),
3157     checker => sub {
3158     my ($self, $todo) = @_;
3159    
3160     my $parent = $todo->{node}->manakai_parent_element;
3161     if (defined $parent) {
3162     my $nsuri = $parent->namespace_uri;
3163     $nsuri = '' unless defined $nsuri;
3164     my $ln = $parent->manakai_local_name;
3165     if ($nsuri eq $HTML_NS and $ln eq 'figure') {
3166     return $HTMLInlineChecker->($self, $todo);
3167     } else {
3168 wakaba 1.13 return $HTMLStrictlyInlineChecker->($self, $todo);
3169 wakaba 1.1 }
3170     } else {
3171     return $HTMLInlineChecker->($self, $todo);
3172     }
3173    
3174     ## ISSUE: Content model is defined only for fieldset/legend,
3175     ## details/legend, and figure/legend.
3176     },
3177     };
3178    
3179     $Element->{$HTML_NS}->{div} = {
3180     attrs_checker => $GetHTMLAttrsChecker->({}),
3181     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
3182     };
3183    
3184     $Element->{$HTML_NS}->{font} = {
3185     attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
3186     checker => $HTMLTransparentChecker,
3187     };
3188    
3189     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
3190    
3191     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24