/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.19 - (hide annotations) (download)
Wed Nov 7 11:30:38 2007 UTC (17 years, 8 months ago) by wakaba
Branch: MAIN
Changes since 1.18: +4 -0 lines
++ whatpm/Whatpm/ContentChecker/ChangeLog	7 Nov 2007 11:30:34 -0000
	* HTML.pm: Specialized error for old charset syntax.

2007-11-07  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7     my $HTMLMetadataElements = {
8     $HTML_NS => {
9     qw/link 1 meta 1 style 1 script 1 event-source 1 command 1 base 1 title 1
10 wakaba 1.8 noscript 1 datatemplate 1
11 wakaba 1.1 /,
12     },
13     };
14    
15     my $HTMLSectioningElements = {
16     $HTML_NS => {qw/body 1 section 1 nav 1 article 1 blockquote 1 aside 1/},
17     };
18    
19     my $HTMLBlockLevelElements = {
20     $HTML_NS => {
21     qw/
22     section 1 nav 1 article 1 blockquote 1 aside 1
23     h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1
24     address 1 p 1 hr 1 dialog 1 pre 1 ol 1 ul 1 dl 1
25     ins 1 del 1 figure 1 map 1 table 1 script 1 noscript 1
26     event-source 1 details 1 datagrid 1 menu 1 div 1 font 1
27 wakaba 1.8 datatemplate 1
28 wakaba 1.1 /,
29     },
30     };
31    
32     my $HTMLStrictlyInlineLevelElements = {
33     $HTML_NS => {
34     qw/
35     br 1 a 1 q 1 cite 1 em 1 strong 1 small 1 m 1 dfn 1 abbr 1
36     time 1 meter 1 progress 1 code 1 var 1 samp 1 kbd 1
37     sub 1 sup 1 span 1 i 1 b 1 bdo 1 ins 1 del 1 img 1
38     iframe 1 embed 1 object 1 video 1 audio 1 canvas 1 area 1
39     script 1 noscript 1 event-source 1 command 1 font 1
40     /,
41     },
42     };
43    
44     my $HTMLStructuredInlineLevelElements = {
45     $HTML_NS => {qw/blockquote 1 pre 1 ol 1 ul 1 dl 1 table 1 menu 1/},
46     };
47    
48     my $HTMLInteractiveElements = {
49     $HTML_NS => {a => 1, details => 1, datagrid => 1},
50     };
51     ## NOTE: |html:a| and |html:datagrid| are not allowed as a descendant
52     ## of interactive elements
53    
54     # my $HTMLTransparentElements : in |Whatpm/ContentChecker.pm|.
55    
56     #my $HTMLSemiTransparentElements = {
57     # $HTML_NS => {qw/video 1 audio 1/},
58     #};
59    
60     my $HTMLEmbededElements = {
61     $HTML_NS => {qw/img 1 iframe 1 embed 1 object 1 video 1 audio 1 canvas 1/},
62     };
63    
64     ## Empty
65     my $HTMLEmptyChecker = sub {
66     my ($self, $todo) = @_;
67     my $el = $todo->{node};
68     my $new_todos = [];
69     my @nodes = (@{$el->child_nodes});
70    
71     while (@nodes) {
72     my $node = shift @nodes;
73     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
74    
75     my $nt = $node->node_type;
76     if ($nt == 1) {
77 wakaba 1.8 my $node_ns = $node->namespace_uri;
78     $node_ns = '' unless defined $node_ns;
79     my $node_ln = $node->manakai_local_name;
80     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
81     #
82     } else {
83     ## NOTE: |minuses| list is not checked since redundant
84     $self->{onerror}->(node => $node, type => 'element not allowed');
85     }
86 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
87     unshift @nodes, @$sib;
88     push @$new_todos, @$ch;
89     } elsif ($nt == 3 or $nt == 4) {
90     if ($node->data =~ /[^\x09-\x0D\x20]/) {
91     $self->{onerror}->(node => $node, type => 'character not allowed');
92     }
93     } elsif ($nt == 5) {
94     unshift @nodes, @{$node->child_nodes};
95     }
96     }
97     return ($new_todos);
98     };
99    
100     ## Text
101     my $HTMLTextChecker = sub {
102     my ($self, $todo) = @_;
103     my $el = $todo->{node};
104     my $new_todos = [];
105     my @nodes = (@{$el->child_nodes});
106    
107     while (@nodes) {
108     my $node = shift @nodes;
109     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
110    
111     my $nt = $node->node_type;
112     if ($nt == 1) {
113 wakaba 1.8 my $node_ns = $node->namespace_uri;
114     $node_ns = '' unless defined $node_ns;
115     my $node_ln = $node->manakai_local_name;
116     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
117     #
118     } else {
119     ## NOTE: |minuses| list is not checked since redundant
120     $self->{onerror}->(node => $node, type => 'element not allowed');
121     }
122 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
123     unshift @nodes, @$sib;
124     push @$new_todos, @$ch;
125     } elsif ($nt == 5) {
126     unshift @nodes, @{$node->child_nodes};
127     }
128     }
129     return ($new_todos);
130     };
131    
132     ## Zero or more |html:style| elements,
133     ## followed by zero or more block-level elements
134     my $HTMLStylableBlockChecker = sub {
135     my ($self, $todo) = @_;
136     my $el = $todo->{node};
137     my $new_todos = [];
138     my @nodes = (@{$el->child_nodes});
139    
140     my $has_non_style;
141     while (@nodes) {
142     my $node = shift @nodes;
143     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
144    
145     my $nt = $node->node_type;
146     if ($nt == 1) {
147     my $node_ns = $node->namespace_uri;
148     $node_ns = '' unless defined $node_ns;
149     my $node_ln = $node->manakai_local_name;
150     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
151     if ($node_ns eq $HTML_NS and $node_ln eq 'style') {
152     $not_allowed = 1 if $has_non_style or
153     not $node->has_attribute_ns (undef, 'scoped');
154     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
155     $has_non_style = 1;
156 wakaba 1.8 } elsif ($self->{pluses}->{$node_ns}->{$node_ln}) {
157     #
158 wakaba 1.1 } else {
159     $has_non_style = 1;
160     $not_allowed = 1;
161     }
162     $self->{onerror}->(node => $node, type => 'element not allowed')
163     if $not_allowed;
164     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
165     unshift @nodes, @$sib;
166     push @$new_todos, @$ch;
167     } elsif ($nt == 3 or $nt == 4) {
168     if ($node->data =~ /[^\x09-\x0D\x20]/) {
169     $self->{onerror}->(node => $node, type => 'character not allowed');
170     }
171     } elsif ($nt == 5) {
172     unshift @nodes, @{$node->child_nodes};
173     }
174     }
175     return ($new_todos);
176     }; # $HTMLStylableBlockChecker
177    
178     ## Zero or more block-level elements
179     my $HTMLBlockChecker = sub {
180     my ($self, $todo) = @_;
181     my $el = $todo->{node};
182     my $new_todos = [];
183     my @nodes = (@{$el->child_nodes});
184    
185     while (@nodes) {
186     my $node = shift @nodes;
187     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
188    
189     my $nt = $node->node_type;
190     if ($nt == 1) {
191     my $node_ns = $node->namespace_uri;
192     $node_ns = '' unless defined $node_ns;
193     my $node_ln = $node->manakai_local_name;
194     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
195     $not_allowed = 1
196 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
197     $self->{pluses}->{$node_ns}->{$node_ln};
198 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
199     if $not_allowed;
200     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
201     unshift @nodes, @$sib;
202     push @$new_todos, @$ch;
203     } elsif ($nt == 3 or $nt == 4) {
204     if ($node->data =~ /[^\x09-\x0D\x20]/) {
205     $self->{onerror}->(node => $node, type => 'character not allowed');
206     }
207     } elsif ($nt == 5) {
208     unshift @nodes, @{$node->child_nodes};
209     }
210     }
211     return ($new_todos);
212     }; # $HTMLBlockChecker
213    
214     ## Inline-level content
215     my $HTMLInlineChecker = sub {
216     my ($self, $todo) = @_;
217     my $el = $todo->{node};
218     my $new_todos = [];
219     my @nodes = (@{$el->child_nodes});
220    
221     while (@nodes) {
222     my $node = shift @nodes;
223     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
224    
225     my $nt = $node->node_type;
226     if ($nt == 1) {
227     my $node_ns = $node->namespace_uri;
228     $node_ns = '' unless defined $node_ns;
229     my $node_ln = $node->manakai_local_name;
230     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
231     $not_allowed = 1
232     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
233 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
234     $self->{pluses}->{$node_ns}->{$node_ln};
235 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
236     if $not_allowed;
237     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
238     unshift @nodes, @$sib;
239     push @$new_todos, @$ch;
240     } elsif ($nt == 5) {
241     unshift @nodes, @{$node->child_nodes};
242     }
243     }
244    
245     for (@$new_todos) {
246     $_->{inline} = 1;
247     }
248     return ($new_todos);
249     }; # $HTMLInlineChecker
250    
251     ## Strictly inline-level content
252     my $HTMLStrictlyInlineChecker = sub {
253     my ($self, $todo) = @_;
254     my $el = $todo->{node};
255     my $new_todos = [];
256     my @nodes = (@{$el->child_nodes});
257    
258     while (@nodes) {
259     my $node = shift @nodes;
260     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
261    
262     my $nt = $node->node_type;
263     if ($nt == 1) {
264     my $node_ns = $node->namespace_uri;
265     $node_ns = '' unless defined $node_ns;
266     my $node_ln = $node->manakai_local_name;
267     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
268     $not_allowed = 1
269 wakaba 1.8 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
270     $self->{pluses}->{$node_ns}->{$node_ln};
271 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
272     if $not_allowed;
273     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
274     unshift @nodes, @$sib;
275     push @$new_todos, @$ch;
276     } elsif ($nt == 5) {
277     unshift @nodes, @{$node->child_nodes};
278     }
279     }
280    
281     for (@$new_todos) {
282     $_->{inline} = 1;
283     $_->{strictly_inline} = 1;
284     }
285     return ($new_todos);
286     }; # $HTMLStrictlyInlineChecker
287    
288 wakaba 1.8 ## Inline-level or strictly inline-level content
289 wakaba 1.1 my $HTMLInlineOrStrictlyInlineChecker = sub {
290     my ($self, $todo) = @_;
291     my $el = $todo->{node};
292     my $new_todos = [];
293     my @nodes = (@{$el->child_nodes});
294    
295     while (@nodes) {
296     my $node = shift @nodes;
297     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
298    
299     my $nt = $node->node_type;
300     if ($nt == 1) {
301     my $node_ns = $node->namespace_uri;
302     $node_ns = '' unless defined $node_ns;
303     my $node_ln = $node->manakai_local_name;
304     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
305     if ($todo->{strictly_inline}) {
306     $not_allowed = 1
307 wakaba 1.8 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
308     $self->{pluses}->{$node_ns}->{$node_ln};
309 wakaba 1.1 } else {
310     $not_allowed = 1
311     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
312 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
313     $self->{pluses}->{$node_ns}->{$node_ln};
314 wakaba 1.1 }
315     $self->{onerror}->(node => $node, type => 'element not allowed')
316     if $not_allowed;
317     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
318     unshift @nodes, @$sib;
319     push @$new_todos, @$ch;
320     } elsif ($nt == 5) {
321     unshift @nodes, @{$node->child_nodes};
322     }
323     }
324    
325     for (@$new_todos) {
326     $_->{inline} = 1;
327     $_->{strictly_inline} = 1;
328     }
329     return ($new_todos);
330     }; # $HTMLInlineOrStrictlyInlineChecker
331    
332 wakaba 1.13 ## TODO: New "significant" SHOULD-level requirement (revision 1114).
333 wakaba 1.1
334 wakaba 1.8 ## Block-level content or inline-level content (i.e. bimorphic content model)
335 wakaba 1.1 my $HTMLBlockOrInlineChecker = sub {
336     my ($self, $todo) = @_;
337     my $el = $todo->{node};
338     my $new_todos = [];
339     my @nodes = (@{$el->child_nodes});
340    
341     my $content = 'block-or-inline'; # or 'block' or 'inline'
342     my @block_not_inline;
343     while (@nodes) {
344     my $node = shift @nodes;
345     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
346    
347 wakaba 1.8 ## ISSUE: It is unclear whether "<rule><div><p/><nest/></div></rule>"
348     ## is conforming or not.
349    
350 wakaba 1.1 my $nt = $node->node_type;
351     if ($nt == 1) {
352     my $node_ns = $node->namespace_uri;
353     $node_ns = '' unless defined $node_ns;
354     my $node_ln = $node->manakai_local_name;
355     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
356     if ($content eq 'block') {
357     $not_allowed = 1
358 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
359     $self->{pluses}->{$node_ns}->{$node_ln};
360 wakaba 1.1 } elsif ($content eq 'inline') {
361     $not_allowed = 1
362     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
363 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
364     $self->{pluses}->{$node_ns}->{$node_ln};
365 wakaba 1.1 } else {
366     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
367     my $is_inline
368     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
369     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
370    
371     push @block_not_inline, $node
372     if $is_block and not $is_inline and not $not_allowed;
373 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
374 wakaba 1.1 $content = 'inline';
375     for (@block_not_inline) {
376     $self->{onerror}->(node => $_, type => 'element not allowed');
377     }
378     $not_allowed = 1 unless $is_inline;
379     }
380     }
381     $self->{onerror}->(node => $node, type => 'element not allowed')
382     if $not_allowed;
383     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
384     unshift @nodes, @$sib;
385     push @$new_todos, @$ch;
386     } elsif ($nt == 3 or $nt == 4) {
387     if ($node->data =~ /[^\x09-\x0D\x20]/) {
388     if ($content eq 'block') {
389     $self->{onerror}->(node => $node, type => 'character not allowed');
390     } else {
391     $content = 'inline';
392     for (@block_not_inline) {
393     $self->{onerror}->(node => $_, type => 'element not allowed');
394     }
395     }
396     }
397     } elsif ($nt == 5) {
398     unshift @nodes, @{$node->child_nodes};
399     }
400     }
401    
402     if ($content eq 'inline') {
403     for (@$new_todos) {
404     $_->{inline} = 1;
405     }
406     }
407     return ($new_todos);
408     };
409    
410     ## Zero or more XXX element, then either block-level or inline-level
411     my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) {
412     my ($elnsuri, $ellname) = @_;
413     return sub {
414     my ($self, $todo) = @_;
415     my $el = $todo->{node};
416     my $new_todos = [];
417     my @nodes = (@{$el->child_nodes});
418    
419     my $has_non_style;
420     my $content = 'block-or-inline'; # or 'block' or 'inline'
421     my @block_not_inline;
422     while (@nodes) {
423     my $node = shift @nodes;
424     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
425    
426     my $nt = $node->node_type;
427     if ($nt == 1) {
428     my $node_ns = $node->namespace_uri;
429     $node_ns = '' unless defined $node_ns;
430     my $node_ln = $node->manakai_local_name;
431     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
432     if ($node_ns eq $elnsuri and $node_ln eq $ellname) {
433     $not_allowed = 1 if $has_non_style;
434     if ($ellname eq 'style' and
435     not $node->has_attribute_ns (undef, 'scoped')) {
436     $not_allowed = 1;
437     }
438     } elsif ($content eq 'block') {
439     $has_non_style = 1;
440     $not_allowed = 1
441 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
442     $self->{pluses}->{$node_ns}->{$node_ln};
443 wakaba 1.1 } elsif ($content eq 'inline') {
444     $has_non_style = 1;
445     $not_allowed = 1
446     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
447 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
448     $self->{pluses}->{$node_ns}->{$node_ln};
449 wakaba 1.1 } else {
450 wakaba 1.8 $has_non_style = 1 unless $self->{pluses}->{$node_ns}->{$node_ln};
451 wakaba 1.1 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
452     my $is_inline
453     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
454     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
455    
456     push @block_not_inline, $node
457     if $is_block and not $is_inline and not $not_allowed;
458 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
459 wakaba 1.1 $content = 'inline';
460     for (@block_not_inline) {
461     $self->{onerror}->(node => $_, type => 'element not allowed');
462     }
463     $not_allowed = 1 unless $is_inline;
464     }
465     }
466     $self->{onerror}->(node => $node, type => 'element not allowed')
467     if $not_allowed;
468     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
469     unshift @nodes, @$sib;
470     push @$new_todos, @$ch;
471     } elsif ($nt == 3 or $nt == 4) {
472     if ($node->data =~ /[^\x09-\x0D\x20]/) {
473     $has_non_style = 1;
474     if ($content eq 'block') {
475     $self->{onerror}->(node => $node, type => 'character not allowed');
476     } else {
477     $content = 'inline';
478     for (@block_not_inline) {
479     $self->{onerror}->(node => $_, type => 'element not allowed');
480     }
481     }
482     }
483     } elsif ($nt == 5) {
484     unshift @nodes, @{$node->child_nodes};
485     }
486     }
487    
488     if ($content eq 'inline') {
489     for (@$new_todos) {
490     $_->{inline} = 1;
491     }
492     }
493     return ($new_todos);
494     };
495     }; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker
496    
497     my $HTMLTransparentChecker = $HTMLBlockOrInlineChecker;
498    
499     our $AttrChecker;
500    
501     my $GetHTMLEnumeratedAttrChecker = sub {
502     my $states = shift; # {value => conforming ? 1 : -1}
503     return sub {
504     my ($self, $attr) = @_;
505     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
506     if ($states->{$value} > 0) {
507     #
508     } elsif ($states->{$value}) {
509     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
510     } else {
511     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
512     }
513     };
514     }; # $GetHTMLEnumeratedAttrChecker
515    
516     my $GetHTMLBooleanAttrChecker = sub {
517     my $local_name = shift;
518     return sub {
519     my ($self, $attr) = @_;
520     my $value = $attr->value;
521     unless ($value eq $local_name or $value eq '') {
522     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
523     }
524     };
525     }; # $GetHTMLBooleanAttrChecker
526    
527 wakaba 1.8 ## Unordered set of space-separated tokens
528 wakaba 1.18 my $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
529 wakaba 1.8 my ($self, $attr) = @_;
530     my %word;
531     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
532     unless ($word{$word}) {
533     $word{$word} = 1;
534     } else {
535     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
536     }
537     }
538 wakaba 1.18 }; # $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
539 wakaba 1.8
540 wakaba 1.1 ## |rel| attribute (unordered set of space separated tokens,
541     ## whose allowed values are defined by the section on link types)
542     my $HTMLLinkTypesAttrChecker = sub {
543 wakaba 1.4 my ($a_or_area, $todo, $self, $attr) = @_;
544 wakaba 1.1 my %word;
545     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
546     unless ($word{$word}) {
547     $word{$word} = 1;
548 wakaba 1.18 } elsif ($word eq 'up') {
549     #
550 wakaba 1.1 } else {
551     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
552     }
553     }
554     ## NOTE: Case sensitive match (since HTML5 spec does not say link
555     ## types are case-insensitive and it says "The value should not
556     ## be confusingly similar to any other defined value (e.g.
557     ## differing only in case).").
558     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
559     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
560     ## values to be used conformingly.
561     require Whatpm::_LinkTypeList;
562     our $LinkType;
563     for my $word (keys %word) {
564     my $def = $LinkType->{$word};
565     if (defined $def) {
566     if ($def->{status} eq 'accepted') {
567     if (defined $def->{effect}->[$a_or_area]) {
568     #
569     } else {
570     $self->{onerror}->(node => $attr,
571     type => 'link type:bad context:'.$word);
572     }
573     } elsif ($def->{status} eq 'proposal') {
574     $self->{onerror}->(node => $attr, level => 's',
575     type => 'link type:proposed:'.$word);
576     } else { # rejected or synonym
577     $self->{onerror}->(node => $attr,
578     type => 'link type:non-conforming:'.$word);
579     }
580 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
581     if ($word eq 'alternate') {
582     #
583     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
584     $todo->{has_hyperlink_link_type} = 1;
585     }
586     }
587 wakaba 1.1 if ($def->{unique}) {
588     unless ($self->{has_link_type}->{$word}) {
589     $self->{has_link_type}->{$word} = 1;
590     } else {
591     $self->{onerror}->(node => $attr,
592     type => 'link type:duplicate:'.$word);
593     }
594     }
595     } else {
596     $self->{onerror}->(node => $attr, level => 'unsupported',
597     type => 'link type:'.$word);
598     }
599     }
600 wakaba 1.4 $todo->{has_hyperlink_link_type} = 1
601     if $word{alternate} and not $word{stylesheet};
602 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
603     ## says that using both X-Pingback: header field and HTML
604     ## <link rel=pingback> is deprecated and if both appears they
605     ## SHOULD contain exactly the same value.
606     ## ISSUE: Pingback 1.0 specification defines the exact representation
607     ## of its link element, which cannot be tested by the current arch.
608     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
609     ## include any string that matches to the pattern for the rel=pingback link,
610     ## which again inpossible to test.
611     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
612 wakaba 1.12
613     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
614 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
615     ## then they SHOULD be described in different paragraphs.".
616 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
617    
618     ## URI (or IRI)
619     my $HTMLURIAttrChecker = sub {
620     my ($self, $attr) = @_;
621     ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
622     my $value = $attr->value;
623     Whatpm::URIChecker->check_iri_reference ($value, sub {
624     my %opt = @_;
625     $self->{onerror}->(node => $attr, level => $opt{level},
626     type => 'URI::'.$opt{type}.
627     (defined $opt{position} ? ':'.$opt{position} : ''));
628     });
629 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
630 wakaba 1.1 }; # $HTMLURIAttrChecker
631    
632     ## A space separated list of one or more URIs (or IRIs)
633     my $HTMLSpaceURIsAttrChecker = sub {
634     my ($self, $attr) = @_;
635     my $i = 0;
636     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
637     Whatpm::URIChecker->check_iri_reference ($value, sub {
638     my %opt = @_;
639     $self->{onerror}->(node => $attr, level => $opt{level},
640 wakaba 1.2 type => 'URIs:'.':'.
641     $opt{type}.':'.$i.
642 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
643     });
644     $i++;
645     }
646     ## ISSUE: Relative references?
647     ## ISSUE: Leading or trailing white spaces are conformant?
648     ## ISSUE: A sequence of white space characters are conformant?
649     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
650     ## NOTE: Duplication seems not an error.
651 wakaba 1.4 $self->{has_uri_attr} = 1;
652 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
653    
654     my $HTMLDatetimeAttrChecker = sub {
655     my ($self, $attr) = @_;
656     my $value = $attr->value;
657     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
658     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
659     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
660     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
661     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
662     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
663     if $d < 1 or
664     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
665     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
666     if $M == 2 and $d == 29 and
667     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
668     } else {
669     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
670     }
671     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
672     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
673     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
674     if defined $s and $s > 59;
675     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
676     if $zh > 23;
677     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
678     if $zm > 59;
679     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
680     } else {
681     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
682     }
683     }; # $HTMLDatetimeAttrChecker
684    
685     my $HTMLIntegerAttrChecker = sub {
686     my ($self, $attr) = @_;
687     my $value = $attr->value;
688     unless ($value =~ /\A-?[0-9]+\z/) {
689     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
690     }
691     }; # $HTMLIntegerAttrChecker
692    
693     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
694     my $range_check = shift;
695     return sub {
696     my ($self, $attr) = @_;
697     my $value = $attr->value;
698     if ($value =~ /\A[0-9]+\z/) {
699     unless ($range_check->($value + 0)) {
700     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
701     }
702     } else {
703     $self->{onerror}->(node => $attr,
704     type => 'nninteger:syntax error');
705     }
706     };
707     }; # $GetHTMLNonNegativeIntegerAttrChecker
708    
709     my $GetHTMLFloatingPointNumberAttrChecker = sub {
710     my $range_check = shift;
711     return sub {
712     my ($self, $attr) = @_;
713     my $value = $attr->value;
714     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
715     unless ($range_check->($value + 0)) {
716     $self->{onerror}->(node => $attr, type => 'float:out of range');
717     }
718     } else {
719     $self->{onerror}->(node => $attr,
720     type => 'float:syntax error');
721     }
722     };
723     }; # $GetHTMLFloatingPointNumberAttrChecker
724    
725     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
726     ## ISSUE: RFC 2046 does not define syntax of media types.
727     ## ISSUE: The definition of "a valid MIME type" is unknown.
728     ## Syntactical correctness?
729     my $HTMLIMTAttrChecker = sub {
730     my ($self, $attr) = @_;
731     my $value = $attr->value;
732     ## ISSUE: RFC 2045 Content-Type header field allows insertion
733     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
734     ## ISSUE: RFC 2231 extension? Maybe no.
735     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
736     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
737     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
738     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
739     my @type = ($1, $2);
740     my $param = $3;
741     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
742     if (defined $2) {
743     push @type, $1 => $2;
744     } else {
745     my $n = $1;
746     my $v = $2;
747     $v =~ s/\\(.)/$1/gs;
748     push @type, $n => $v;
749     }
750     }
751     require Whatpm::IMTChecker;
752     Whatpm::IMTChecker->check_imt (sub {
753     my %opt = @_;
754     $self->{onerror}->(node => $attr, level => $opt{level},
755     type => 'IMT:'.$opt{type});
756     }, @type);
757     } else {
758     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
759     }
760     }; # $HTMLIMTAttrChecker
761    
762     my $HTMLLanguageTagAttrChecker = sub {
763 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
764    
765 wakaba 1.1 my ($self, $attr) = @_;
766 wakaba 1.6 my $value = $attr->value;
767     require Whatpm::LangTag;
768     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
769     my %opt = @_;
770     my $type = 'LangTag:'.$opt{type};
771     $type .= ':' . $opt{subtag} if defined $opt{subtag};
772     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
773     level => $opt{level});
774     });
775 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
776 wakaba 1.6
777     ## TODO: testdata
778 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
779    
780     ## "A valid media query [MQ]"
781     my $HTMLMQAttrChecker = sub {
782     my ($self, $attr) = @_;
783     $self->{onerror}->(node => $attr, level => 'unsupported',
784     type => 'media query');
785     ## ISSUE: What is "a valid media query"?
786     }; # $HTMLMQAttrChecker
787    
788     my $HTMLEventHandlerAttrChecker = sub {
789     my ($self, $attr) = @_;
790     $self->{onerror}->(node => $attr, level => 'unsupported',
791     type => 'event handler');
792     ## TODO: MUST contain valid ECMAScript code matching the
793     ## ECMAScript |FunctionBody| production. [ECMA262]
794     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
795     ## ISSUE: Automatic semicolon insertion does not apply?
796     ## ISSUE: Other script languages?
797     }; # $HTMLEventHandlerAttrChecker
798    
799     my $HTMLUsemapAttrChecker = sub {
800     my ($self, $attr) = @_;
801     ## MUST be a valid hashed ID reference to a |map| element
802     my $value = $attr->value;
803     if ($value =~ s/^#//) {
804     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
805     push @{$self->{usemap}}, [$value => $attr];
806     } else {
807     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
808     }
809     ## NOTE: Space characters in hashed ID references are conforming.
810     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
811     }; # $HTMLUsemapAttrChecker
812    
813     my $HTMLTargetAttrChecker = sub {
814     my ($self, $attr) = @_;
815     my $value = $attr->value;
816     if ($value =~ /^_/) {
817     $value = lc $value; ## ISSUE: ASCII case-insentitive?
818     unless ({
819     _self => 1, _parent => 1, _top => 1,
820     }->{$value}) {
821     $self->{onerror}->(node => $attr,
822     type => 'reserved browsing context name');
823     }
824     } else {
825     #$ ISSUE: An empty string is conforming?
826     }
827     }; # $HTMLTargetAttrChecker
828    
829     my $HTMLAttrChecker = {
830     id => sub {
831     ## NOTE: |map| has its own variant of |id=""| checker
832     my ($self, $attr) = @_;
833     my $value = $attr->value;
834     if (length $value > 0) {
835     if ($self->{id}->{$value}) {
836     $self->{onerror}->(node => $attr, type => 'duplicate ID');
837     push @{$self->{id}->{$value}}, $attr;
838     } else {
839     $self->{id}->{$value} = [$attr];
840     }
841     if ($value =~ /[\x09-\x0D\x20]/) {
842     $self->{onerror}->(node => $attr, type => 'space in ID');
843     }
844     } else {
845     ## NOTE: MUST contain at least one character
846     $self->{onerror}->(node => $attr, type => 'empty attribute value');
847     }
848     },
849     title => sub {}, ## NOTE: No conformance creteria
850     lang => sub {
851     my ($self, $attr) = @_;
852 wakaba 1.6 my $value = $attr->value;
853     if ($value eq '') {
854     #
855     } else {
856     require Whatpm::LangTag;
857     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
858     my %opt = @_;
859     my $type = 'LangTag:'.$opt{type};
860     $type .= ':' . $opt{subtag} if defined $opt{subtag};
861     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
862     level => $opt{level});
863     });
864     }
865 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
866     unless ($attr->owner_document->manakai_is_html) {
867     $self->{onerror}->(node => $attr, type => 'in XML:lang');
868     }
869 wakaba 1.6
870     ## TODO: test data
871 wakaba 1.1 },
872     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
873     class => sub {
874     my ($self, $attr) = @_;
875     my %word;
876     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
877     unless ($word{$word}) {
878     $word{$word} = 1;
879     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
880     } else {
881     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
882     }
883     }
884     },
885     contextmenu => sub {
886     my ($self, $attr) = @_;
887     my $value = $attr->value;
888     push @{$self->{contextmenu}}, [$value => $attr];
889     ## ISSUE: "The value must be the ID of a menu element in the DOM."
890     ## What is "in the DOM"? A menu Element node that is not part
891     ## of the Document tree is in the DOM? A menu Element node that
892     ## belong to another Document tree is in the DOM?
893     },
894     irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
895 wakaba 1.8 tabindex => $HTMLIntegerAttrChecker
896     ## TODO: ref, template, registrationmark
897 wakaba 1.1 };
898    
899     for (qw/
900     onabort onbeforeunload onblur onchange onclick oncontextmenu
901     ondblclick ondrag ondragend ondragenter ondragleave ondragover
902     ondragstart ondrop onerror onfocus onkeydown onkeypress
903     onkeyup onload onmessage onmousedown onmousemove onmouseout
904     onmouseover onmouseup onmousewheel onresize onscroll onselect
905     onsubmit onunload
906     /) {
907     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
908     }
909    
910     my $GetHTMLAttrsChecker = sub {
911     my $element_specific_checker = shift;
912     return sub {
913     my ($self, $todo) = @_;
914     for my $attr (@{$todo->{node}->attributes}) {
915     my $attr_ns = $attr->namespace_uri;
916     $attr_ns = '' unless defined $attr_ns;
917     my $attr_ln = $attr->manakai_local_name;
918     my $checker;
919     if ($attr_ns eq '') {
920     $checker = $element_specific_checker->{$attr_ln}
921     || $HTMLAttrChecker->{$attr_ln};
922     }
923     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
924     || $AttrChecker->{$attr_ns}->{''};
925     if ($checker) {
926     $checker->($self, $attr, $todo);
927     } else {
928     $self->{onerror}->(node => $attr, level => 'unsupported',
929     type => 'attribute');
930     ## ISSUE: No comformance createria for unknown attributes in the spec
931     }
932     }
933     };
934     }; # $GetHTMLAttrsChecker
935    
936     our $Element;
937     our $ElementDefault;
938     our $AnyChecker;
939    
940     $Element->{$HTML_NS}->{''} = {
941     attrs_checker => $GetHTMLAttrsChecker->({}),
942     checker => $ElementDefault->{checker},
943     };
944    
945     $Element->{$HTML_NS}->{html} = {
946     is_root => 1,
947     attrs_checker => $GetHTMLAttrsChecker->({
948 wakaba 1.16 manifest => $HTMLURIAttrChecker,
949 wakaba 1.1 xmlns => sub {
950     my ($self, $attr) = @_;
951     my $value = $attr->value;
952     unless ($value eq $HTML_NS) {
953     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
954     }
955     unless ($attr->owner_document->manakai_is_html) {
956     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
957     ## TODO: Test
958     }
959     },
960     }),
961     checker => sub {
962     my ($self, $todo) = @_;
963     my $el = $todo->{node};
964     my $new_todos = [];
965     my @nodes = (@{$el->child_nodes});
966    
967     my $phase = 'before head';
968     while (@nodes) {
969     my $node = shift @nodes;
970     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
971    
972     my $nt = $node->node_type;
973     if ($nt == 1) {
974     my $node_ns = $node->namespace_uri;
975     $node_ns = '' unless defined $node_ns;
976     my $node_ln = $node->manakai_local_name;
977     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
978 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
979     #
980     } elsif ($phase eq 'before head') {
981 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'head') {
982     $phase = 'after head';
983     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'body') {
984     $self->{onerror}->(node => $node, type => 'ps element missing:head');
985     $phase = 'after body';
986     } else {
987     $not_allowed = 1;
988     # before head
989     }
990     } elsif ($phase eq 'after head') {
991     if ($node_ns eq $HTML_NS and $node_ln eq 'body') {
992     $phase = 'after body';
993     } else {
994     $not_allowed = 1;
995     # after head
996     }
997     } else { #elsif ($phase eq 'after body') {
998     $not_allowed = 1;
999     # after body
1000     }
1001     $self->{onerror}->(node => $node, type => 'element not allowed')
1002     if $not_allowed;
1003     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1004     unshift @nodes, @$sib;
1005     push @$new_todos, @$ch;
1006     } elsif ($nt == 3 or $nt == 4) {
1007     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1008     $self->{onerror}->(node => $node, type => 'character not allowed');
1009     }
1010     } elsif ($nt == 5) {
1011     unshift @nodes, @{$node->child_nodes};
1012     }
1013     }
1014    
1015     if ($phase eq 'before head') {
1016     $self->{onerror}->(node => $el, type => 'child element missing:head');
1017     $self->{onerror}->(node => $el, type => 'child element missing:body');
1018     } elsif ($phase eq 'after head') {
1019     $self->{onerror}->(node => $el, type => 'child element missing:body');
1020     }
1021    
1022     return ($new_todos);
1023     },
1024     };
1025    
1026     $Element->{$HTML_NS}->{head} = {
1027     attrs_checker => $GetHTMLAttrsChecker->({}),
1028     checker => sub {
1029     my ($self, $todo) = @_;
1030     my $el = $todo->{node};
1031     my $new_todos = [];
1032     my @nodes = (@{$el->child_nodes});
1033    
1034     my $has_title;
1035     my $phase = 'initial'; # 'after charset', 'after base'
1036     while (@nodes) {
1037     my $node = shift @nodes;
1038     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1039    
1040     my $nt = $node->node_type;
1041     if ($nt == 1) {
1042     my $node_ns = $node->namespace_uri;
1043     $node_ns = '' unless defined $node_ns;
1044     my $node_ln = $node->manakai_local_name;
1045     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
1046 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1047     #
1048     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'title') {
1049 wakaba 1.1 $phase = 'after base';
1050     unless ($has_title) {
1051     $has_title = 1;
1052     } else {
1053     $not_allowed = 1;
1054     }
1055     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'meta') {
1056     if ($node->has_attribute_ns (undef, 'charset')) {
1057     if ($phase eq 'initial') {
1058     $phase = 'after charset';
1059     } else {
1060     $not_allowed = 1;
1061     ## NOTE: See also |base|'s "contexts" field in the spec
1062     }
1063 wakaba 1.5 } elsif ($node->has_attribute_ns (undef, 'name') or
1064     $node->has_attribute_ns (undef, 'http-equiv')) {
1065     $phase = 'after base';
1066 wakaba 1.1 } else {
1067     $phase = 'after base';
1068 wakaba 1.5 $not_allowed = 1;
1069 wakaba 1.1 }
1070     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'base') {
1071     if ($phase eq 'initial' or $phase eq 'after charset') {
1072     $phase = 'after base';
1073     } else {
1074     $not_allowed = 1;
1075     }
1076     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'style') {
1077     $phase = 'after base';
1078     if ($node->has_attribute_ns (undef, 'scoped')) {
1079     $not_allowed = 1;
1080     }
1081     } elsif ($HTMLMetadataElements->{$node_ns}->{$node_ln}) {
1082     $phase = 'after base';
1083     } else {
1084     $not_allowed = 1;
1085     }
1086     $self->{onerror}->(node => $node, type => 'element not allowed')
1087     if $not_allowed;
1088 wakaba 1.3 local $todo->{flag}->{in_head} = 1;
1089 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1090     unshift @nodes, @$sib;
1091     push @$new_todos, @$ch;
1092     } elsif ($nt == 3 or $nt == 4) {
1093     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1094     $self->{onerror}->(node => $node, type => 'character not allowed');
1095     }
1096     } elsif ($nt == 5) {
1097     unshift @nodes, @{$node->child_nodes};
1098     }
1099     }
1100     unless ($has_title) {
1101     $self->{onerror}->(node => $el, type => 'child element missing:title');
1102     }
1103     return ($new_todos);
1104     },
1105     };
1106    
1107     $Element->{$HTML_NS}->{title} = {
1108     attrs_checker => $GetHTMLAttrsChecker->({}),
1109     checker => $HTMLTextChecker,
1110     };
1111    
1112     $Element->{$HTML_NS}->{base} = {
1113 wakaba 1.4 attrs_checker => sub {
1114     my ($self, $todo) = @_;
1115    
1116 wakaba 1.14 my $has_href = $todo->{node}->has_attribute_ns (undef, 'href');
1117     my $has_target = $todo->{node}->has_attribute_ns (undef, 'target');
1118    
1119     if ($self->{has_uri_attr} and $has_href) {
1120 wakaba 1.4 ## ISSUE: Are these examples conforming?
1121     ## <head profile="a b c"><base href> (except for |profile|'s
1122     ## non-conformance)
1123     ## <title xml:base="relative"/><base href/> (maybe it should be)
1124     ## <unknown xmlns="relative"/><base href/> (assuming that
1125     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1126     ## <?xml-stylesheet href="relative"?>...<base href=""/>
1127     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1128     ## <style>@import 'relative';</style><base href>
1129     ## <script>location.href = 'relative';</script><base href>
1130 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
1131     ## an exception.
1132 wakaba 1.4 $self->{onerror}->(node => $todo->{node},
1133     type => 'basehref after URI attribute');
1134     }
1135 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
1136 wakaba 1.4 ## ISSUE: Are these examples conforming?
1137     ## <head><title xlink:href=""/><base target="name"/></head>
1138     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1139     ## (assuming that |xbl:xbl| is allowed before |base|)
1140     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1141     ## <link href=""/><base target="name"/>
1142     ## <link rel=unknown href=""><base target=name>
1143     $self->{onerror}->(node => $todo->{node},
1144     type => 'basetarget after hyperlink');
1145     }
1146    
1147 wakaba 1.14 if (not $has_href and not $has_target) {
1148     $self->{onerror}->(node => $todo->{node},
1149     type => 'attribute missing:href|target');
1150     }
1151    
1152 wakaba 1.4 return $GetHTMLAttrsChecker->({
1153     href => $HTMLURIAttrChecker,
1154     target => $HTMLTargetAttrChecker,
1155     })->($self, $todo);
1156     },
1157 wakaba 1.1 checker => $HTMLEmptyChecker,
1158     };
1159    
1160     $Element->{$HTML_NS}->{link} = {
1161     attrs_checker => sub {
1162     my ($self, $todo) = @_;
1163     $GetHTMLAttrsChecker->({
1164     href => $HTMLURIAttrChecker,
1165 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(0, $todo, @_) },
1166 wakaba 1.1 media => $HTMLMQAttrChecker,
1167     hreflang => $HTMLLanguageTagAttrChecker,
1168     type => $HTMLIMTAttrChecker,
1169     ## NOTE: Though |title| has special semantics,
1170     ## syntactically same as the |title| as global attribute.
1171     })->($self, $todo);
1172 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
1173     $self->{has_hyperlink_element} = 1 if $todo->{has_hyperlink_link_type};
1174     } else {
1175 wakaba 1.1 $self->{onerror}->(node => $todo->{node},
1176     type => 'attribute missing:href');
1177     }
1178     unless ($todo->{node}->has_attribute_ns (undef, 'rel')) {
1179     $self->{onerror}->(node => $todo->{node},
1180     type => 'attribute missing:rel');
1181     }
1182     },
1183     checker => $HTMLEmptyChecker,
1184     };
1185    
1186     $Element->{$HTML_NS}->{meta} = {
1187     attrs_checker => sub {
1188     my ($self, $todo) = @_;
1189     my $name_attr;
1190     my $http_equiv_attr;
1191     my $charset_attr;
1192     my $content_attr;
1193     for my $attr (@{$todo->{node}->attributes}) {
1194     my $attr_ns = $attr->namespace_uri;
1195     $attr_ns = '' unless defined $attr_ns;
1196     my $attr_ln = $attr->manakai_local_name;
1197     my $checker;
1198     if ($attr_ns eq '') {
1199     if ($attr_ln eq 'content') {
1200     $content_attr = $attr;
1201     $checker = 1;
1202     } elsif ($attr_ln eq 'name') {
1203     $name_attr = $attr;
1204     $checker = 1;
1205     } elsif ($attr_ln eq 'http-equiv') {
1206     $http_equiv_attr = $attr;
1207     $checker = 1;
1208     } elsif ($attr_ln eq 'charset') {
1209     $charset_attr = $attr;
1210     $checker = 1;
1211     } else {
1212     $checker = $HTMLAttrChecker->{$attr_ln}
1213     || $AttrChecker->{$attr_ns}->{$attr_ln}
1214     || $AttrChecker->{$attr_ns}->{''};
1215     }
1216     } else {
1217     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1218     || $AttrChecker->{$attr_ns}->{''};
1219     }
1220     if ($checker) {
1221     $checker->($self, $attr) if ref $checker;
1222     } else {
1223     $self->{onerror}->(node => $attr, level => 'unsupported',
1224     type => 'attribute');
1225     ## ISSUE: No comformance createria for unknown attributes in the spec
1226     }
1227     }
1228    
1229     if (defined $name_attr) {
1230     if (defined $http_equiv_attr) {
1231     $self->{onerror}->(node => $http_equiv_attr,
1232     type => 'attribute not allowed');
1233     } elsif (defined $charset_attr) {
1234     $self->{onerror}->(node => $charset_attr,
1235     type => 'attribute not allowed');
1236     }
1237     my $metadata_name = $name_attr->value;
1238     my $metadata_value;
1239     if (defined $content_attr) {
1240     $metadata_value = $content_attr->value;
1241     } else {
1242     $self->{onerror}->(node => $todo->{node},
1243     type => 'attribute missing:content');
1244     $metadata_value = '';
1245     }
1246     } elsif (defined $http_equiv_attr) {
1247     if (defined $charset_attr) {
1248     $self->{onerror}->(node => $charset_attr,
1249     type => 'attribute not allowed');
1250     }
1251     unless (defined $content_attr) {
1252     $self->{onerror}->(node => $todo->{node},
1253     type => 'attribute missing:content');
1254     }
1255     } elsif (defined $charset_attr) {
1256     if (defined $content_attr) {
1257     $self->{onerror}->(node => $content_attr,
1258     type => 'attribute not allowed');
1259     }
1260     } else {
1261     if (defined $content_attr) {
1262     $self->{onerror}->(node => $content_attr,
1263     type => 'attribute not allowed');
1264     $self->{onerror}->(node => $todo->{node},
1265     type => 'attribute missing:name|http-equiv');
1266     } else {
1267     $self->{onerror}->(node => $todo->{node},
1268     type => 'attribute missing:name|http-equiv|charset');
1269     }
1270     }
1271    
1272     ## TODO: metadata conformance
1273    
1274     ## TODO: pragma conformance
1275     if (defined $http_equiv_attr) { ## An enumerated attribute
1276     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1277     if ({
1278     'refresh' => 1,
1279     'default-style' => 1,
1280     }->{$keyword}) {
1281     #
1282 wakaba 1.19 } elsif ($keyword eq 'content-type') {
1283     $self->{onerror}
1284     ->(node => $http_equiv_attr,
1285     type => 'enumerated:invalid:http-equiv:content-type');
1286 wakaba 1.1 } else {
1287     $self->{onerror}->(node => $http_equiv_attr,
1288     type => 'enumerated:invalid');
1289     }
1290     }
1291    
1292     if (defined $charset_attr) {
1293     unless ($todo->{node}->owner_document->manakai_is_html) {
1294     $self->{onerror}->(node => $charset_attr,
1295     type => 'in XML:charset');
1296     }
1297     ## TODO: charset
1298     }
1299     },
1300     checker => $HTMLEmptyChecker,
1301     };
1302    
1303     $Element->{$HTML_NS}->{style} = {
1304     attrs_checker => $GetHTMLAttrsChecker->({
1305     type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1306     media => $HTMLMQAttrChecker,
1307     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1308     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1309     ## not different
1310     }),
1311     checker => sub {
1312     ## NOTE: |html:style| has no conformance creteria on content model
1313     my ($self, $todo) = @_;
1314     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
1315     $type = 'text/css' unless defined $type;
1316     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
1317     type => 'style:'.$type); ## TODO: $type normalization
1318     return $AnyChecker->($self, $todo);
1319     },
1320     };
1321    
1322     $Element->{$HTML_NS}->{body} = {
1323     attrs_checker => $GetHTMLAttrsChecker->({}),
1324     checker => $HTMLBlockChecker,
1325     };
1326    
1327     $Element->{$HTML_NS}->{section} = {
1328     attrs_checker => $GetHTMLAttrsChecker->({}),
1329     checker => $HTMLStylableBlockChecker,
1330     };
1331    
1332     $Element->{$HTML_NS}->{nav} = {
1333     attrs_checker => $GetHTMLAttrsChecker->({}),
1334     checker => $HTMLBlockOrInlineChecker,
1335     };
1336    
1337     $Element->{$HTML_NS}->{article} = {
1338     attrs_checker => $GetHTMLAttrsChecker->({}),
1339     checker => $HTMLStylableBlockChecker,
1340     };
1341    
1342     $Element->{$HTML_NS}->{blockquote} = {
1343     attrs_checker => $GetHTMLAttrsChecker->({
1344     cite => $HTMLURIAttrChecker,
1345     }),
1346     checker => $HTMLBlockChecker,
1347     };
1348    
1349     $Element->{$HTML_NS}->{aside} = {
1350     attrs_checker => $GetHTMLAttrsChecker->({}),
1351     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
1352     };
1353    
1354     $Element->{$HTML_NS}->{h1} = {
1355     attrs_checker => $GetHTMLAttrsChecker->({}),
1356     checker => sub {
1357     my ($self, $todo) = @_;
1358     $todo->{flag}->{has_heading}->[0] = 1;
1359 wakaba 1.13 return $HTMLStrictlyInlineChecker->($self, $todo);
1360 wakaba 1.1 },
1361     };
1362    
1363     $Element->{$HTML_NS}->{h2} = {
1364     attrs_checker => $GetHTMLAttrsChecker->({}),
1365     checker => $Element->{$HTML_NS}->{h1}->{checker},
1366     };
1367    
1368     $Element->{$HTML_NS}->{h3} = {
1369     attrs_checker => $GetHTMLAttrsChecker->({}),
1370     checker => $Element->{$HTML_NS}->{h1}->{checker},
1371     };
1372    
1373     $Element->{$HTML_NS}->{h4} = {
1374     attrs_checker => $GetHTMLAttrsChecker->({}),
1375     checker => $Element->{$HTML_NS}->{h1}->{checker},
1376     };
1377    
1378     $Element->{$HTML_NS}->{h5} = {
1379     attrs_checker => $GetHTMLAttrsChecker->({}),
1380     checker => $Element->{$HTML_NS}->{h1}->{checker},
1381     };
1382    
1383     $Element->{$HTML_NS}->{h6} = {
1384     attrs_checker => $GetHTMLAttrsChecker->({}),
1385     checker => $Element->{$HTML_NS}->{h1}->{checker},
1386     };
1387    
1388     $Element->{$HTML_NS}->{header} = {
1389     attrs_checker => $GetHTMLAttrsChecker->({}),
1390     checker => sub {
1391     my ($self, $todo) = @_;
1392     my $old_flag = $todo->{flag}->{has_heading} || [];
1393     my $new_flag = [];
1394     local $todo->{flag}->{has_heading} = $new_flag;
1395     my $node = $todo->{node};
1396    
1397     my $end = $self->_add_minuses
1398     ({$HTML_NS => {qw/header 1 footer 1/}},
1399     $HTMLSectioningElements);
1400     my ($new_todos, $ch) = $HTMLBlockChecker->($self, $todo);
1401     push @$new_todos, $end,
1402     {type => 'code', code => sub {
1403     if ($new_flag->[0]) {
1404     $old_flag->[0] = 1;
1405     } else {
1406     $self->{onerror}->(node => $node, type => 'element missing:hn');
1407     }
1408     }};
1409     return ($new_todos, $ch);
1410     },
1411     };
1412    
1413     $Element->{$HTML_NS}->{footer} = {
1414     attrs_checker => $GetHTMLAttrsChecker->({}),
1415     checker => sub { ## block -hn -header -footer -sectioning or inline
1416     my ($self, $todo) = @_;
1417     my $el = $todo->{node};
1418     my $new_todos = [];
1419     my @nodes = (@{$el->child_nodes});
1420    
1421     my $content = 'block-or-inline'; # or 'block' or 'inline'
1422     my @block_not_inline;
1423     while (@nodes) {
1424     my $node = shift @nodes;
1425     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1426    
1427     my $nt = $node->node_type;
1428     if ($nt == 1) {
1429     my $node_ns = $node->namespace_uri;
1430     $node_ns = '' unless defined $node_ns;
1431     my $node_ln = $node->manakai_local_name;
1432     my $not_allowed;
1433     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
1434     $not_allowed = 1;
1435     } elsif ($node_ns eq $HTML_NS and
1436     {
1437     qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1/
1438     }->{$node_ln}) {
1439     $not_allowed = 1;
1440     } elsif ($HTMLSectioningElements->{$node_ns}->{$node_ln}) {
1441     $not_allowed = 1;
1442     }
1443     if ($content eq 'block') {
1444     $not_allowed = 1
1445 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
1446     $self->{pluses}->{$node_ns}->{$node_ln};
1447 wakaba 1.1 } elsif ($content eq 'inline') {
1448     $not_allowed = 1
1449     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
1450 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
1451     $self->{pluses}->{$node_ns}->{$node_ln};
1452 wakaba 1.1 } else {
1453     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1454     my $is_inline
1455     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
1456     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1457    
1458     push @block_not_inline, $node
1459     if $is_block and not $is_inline and not $not_allowed;
1460 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
1461 wakaba 1.1 $content = 'inline';
1462     for (@block_not_inline) {
1463     $self->{onerror}->(node => $_, type => 'element not allowed');
1464     }
1465     $not_allowed = 1 unless $is_inline;
1466     }
1467     }
1468     $self->{onerror}->(node => $node, type => 'element not allowed')
1469     if $not_allowed;
1470     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1471     unshift @nodes, @$sib;
1472     push @$new_todos, @$ch;
1473     } elsif ($nt == 3 or $nt == 4) {
1474     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1475     if ($content eq 'block') {
1476     $self->{onerror}->(node => $node, type => 'character not allowed');
1477     } else {
1478     $content = 'inline';
1479     for (@block_not_inline) {
1480     $self->{onerror}->(node => $_, type => 'element not allowed');
1481     }
1482     }
1483     }
1484     } elsif ($nt == 5) {
1485     unshift @nodes, @{$node->child_nodes};
1486     }
1487     }
1488    
1489     my $end = $self->_add_minuses
1490     ({$HTML_NS => {qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1/}},
1491     $HTMLSectioningElements);
1492     push @$new_todos, $end;
1493    
1494     if ($content eq 'inline') {
1495     for (@$new_todos) {
1496     $_->{inline} = 1;
1497     }
1498     }
1499    
1500     return ($new_todos);
1501     },
1502     };
1503    
1504     $Element->{$HTML_NS}->{address} = {
1505     attrs_checker => $GetHTMLAttrsChecker->({}),
1506     checker => $HTMLInlineChecker,
1507     };
1508    
1509     $Element->{$HTML_NS}->{p} = {
1510     attrs_checker => $GetHTMLAttrsChecker->({}),
1511 wakaba 1.13 checker => $HTMLInlineChecker,
1512 wakaba 1.1 };
1513    
1514     $Element->{$HTML_NS}->{hr} = {
1515     attrs_checker => $GetHTMLAttrsChecker->({}),
1516     checker => $HTMLEmptyChecker,
1517     };
1518    
1519     $Element->{$HTML_NS}->{br} = {
1520     attrs_checker => $GetHTMLAttrsChecker->({}),
1521     checker => $HTMLEmptyChecker,
1522     };
1523    
1524     $Element->{$HTML_NS}->{dialog} = {
1525     attrs_checker => $GetHTMLAttrsChecker->({}),
1526     checker => sub {
1527     my ($self, $todo) = @_;
1528     my $el = $todo->{node};
1529     my $new_todos = [];
1530     my @nodes = (@{$el->child_nodes});
1531    
1532     my $phase = 'before dt';
1533     while (@nodes) {
1534     my $node = shift @nodes;
1535     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1536    
1537     my $nt = $node->node_type;
1538     if ($nt == 1) {
1539     my $node_ns = $node->namespace_uri;
1540     $node_ns = '' unless defined $node_ns;
1541     my $node_ln = $node->manakai_local_name;
1542     ## NOTE: |minuses| list is not checked since redundant
1543 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1544     #
1545     } elsif ($phase eq 'before dt') {
1546 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1547     $phase = 'before dd';
1548     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1549     $self->{onerror}
1550     ->(node => $node, type => 'ps element missing:dt');
1551     $phase = 'before dt';
1552     } else {
1553     $self->{onerror}->(node => $node, type => 'element not allowed');
1554     }
1555     } else { # before dd
1556     if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1557     $phase = 'before dt';
1558     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1559     $self->{onerror}
1560     ->(node => $node, type => 'ps element missing:dd');
1561     $phase = 'before dd';
1562     } else {
1563     $self->{onerror}->(node => $node, type => 'element not allowed');
1564     }
1565     }
1566     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1567     unshift @nodes, @$sib;
1568     push @$new_todos, @$ch;
1569     } elsif ($nt == 3 or $nt == 4) {
1570     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1571     $self->{onerror}->(node => $node, type => 'character not allowed');
1572     }
1573     } elsif ($nt == 5) {
1574     unshift @nodes, @{$node->child_nodes};
1575     }
1576     }
1577     if ($phase eq 'before dd') {
1578 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
1579 wakaba 1.1 }
1580     return ($new_todos);
1581     },
1582     };
1583    
1584     $Element->{$HTML_NS}->{pre} = {
1585     attrs_checker => $GetHTMLAttrsChecker->({}),
1586     checker => $HTMLStrictlyInlineChecker,
1587     };
1588    
1589     $Element->{$HTML_NS}->{ol} = {
1590     attrs_checker => $GetHTMLAttrsChecker->({
1591     start => $HTMLIntegerAttrChecker,
1592     }),
1593     checker => sub {
1594     my ($self, $todo) = @_;
1595     my $el = $todo->{node};
1596     my $new_todos = [];
1597     my @nodes = (@{$el->child_nodes});
1598    
1599     while (@nodes) {
1600     my $node = shift @nodes;
1601     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1602    
1603     my $nt = $node->node_type;
1604     if ($nt == 1) {
1605     my $node_ns = $node->namespace_uri;
1606     $node_ns = '' unless defined $node_ns;
1607     my $node_ln = $node->manakai_local_name;
1608     ## NOTE: |minuses| list is not checked since redundant
1609 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1610     #
1611     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'li')) {
1612 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
1613     }
1614     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1615     unshift @nodes, @$sib;
1616     push @$new_todos, @$ch;
1617     } elsif ($nt == 3 or $nt == 4) {
1618     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1619     $self->{onerror}->(node => $node, type => 'character not allowed');
1620     }
1621     } elsif ($nt == 5) {
1622     unshift @nodes, @{$node->child_nodes};
1623     }
1624     }
1625    
1626     if ($todo->{inline}) {
1627     for (@$new_todos) {
1628     $_->{inline} = 1;
1629     }
1630     }
1631     return ($new_todos);
1632     },
1633     };
1634    
1635     $Element->{$HTML_NS}->{ul} = {
1636     attrs_checker => $GetHTMLAttrsChecker->({}),
1637     checker => $Element->{$HTML_NS}->{ol}->{checker},
1638     };
1639    
1640     $Element->{$HTML_NS}->{li} = {
1641     attrs_checker => $GetHTMLAttrsChecker->({
1642     start => sub {
1643     my ($self, $attr) = @_;
1644     my $parent = $attr->owner_element->manakai_parent_element;
1645     if (defined $parent) {
1646     my $parent_ns = $parent->namespace_uri;
1647     $parent_ns = '' unless defined $parent_ns;
1648     my $parent_ln = $parent->manakai_local_name;
1649     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1650     $self->{onerror}->(node => $attr, level => 'unsupported',
1651     type => 'attribute');
1652     }
1653     }
1654     $HTMLIntegerAttrChecker->($self, $attr);
1655     },
1656     }),
1657     checker => sub {
1658     my ($self, $todo) = @_;
1659     if ($todo->{inline}) {
1660     return $HTMLInlineChecker->($self, $todo);
1661     } else {
1662     return $HTMLBlockOrInlineChecker->($self, $todo);
1663     }
1664     },
1665     };
1666    
1667     $Element->{$HTML_NS}->{dl} = {
1668     attrs_checker => $GetHTMLAttrsChecker->({}),
1669     checker => sub {
1670     my ($self, $todo) = @_;
1671     my $el = $todo->{node};
1672     my $new_todos = [];
1673     my @nodes = (@{$el->child_nodes});
1674    
1675     my $phase = 'before dt';
1676     while (@nodes) {
1677     my $node = shift @nodes;
1678     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1679    
1680     my $nt = $node->node_type;
1681     if ($nt == 1) {
1682     my $node_ns = $node->namespace_uri;
1683     $node_ns = '' unless defined $node_ns;
1684     my $node_ln = $node->manakai_local_name;
1685     ## NOTE: |minuses| list is not checked since redundant
1686 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1687     #
1688     } elsif ($phase eq 'in dds') {
1689 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1690     #$phase = 'in dds';
1691     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1692     $phase = 'in dts';
1693     } else {
1694     $self->{onerror}->(node => $node, type => 'element not allowed');
1695     }
1696     } elsif ($phase eq 'in dts') {
1697     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1698     #$phase = 'in dts';
1699     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1700     $phase = 'in dds';
1701     } else {
1702     $self->{onerror}->(node => $node, type => 'element not allowed');
1703     }
1704     } else { # before dt
1705     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1706     $phase = 'in dts';
1707     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1708     $self->{onerror}
1709     ->(node => $node, type => 'ps element missing:dt');
1710     $phase = 'in dds';
1711     } else {
1712     $self->{onerror}->(node => $node, type => 'element not allowed');
1713     }
1714     }
1715     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1716     unshift @nodes, @$sib;
1717     push @$new_todos, @$ch;
1718     } elsif ($nt == 3 or $nt == 4) {
1719     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1720     $self->{onerror}->(node => $node, type => 'character not allowed');
1721     }
1722     } elsif ($nt == 5) {
1723     unshift @nodes, @{$node->child_nodes};
1724     }
1725     }
1726     if ($phase eq 'in dts') {
1727 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
1728 wakaba 1.1 }
1729    
1730     if ($todo->{inline}) {
1731     for (@$new_todos) {
1732     $_->{inline} = 1;
1733     }
1734     }
1735     return ($new_todos);
1736     },
1737     };
1738    
1739     $Element->{$HTML_NS}->{dt} = {
1740     attrs_checker => $GetHTMLAttrsChecker->({}),
1741     checker => $HTMLStrictlyInlineChecker,
1742     };
1743    
1744     $Element->{$HTML_NS}->{dd} = {
1745     attrs_checker => $GetHTMLAttrsChecker->({}),
1746     checker => $Element->{$HTML_NS}->{li}->{checker},
1747     };
1748    
1749     $Element->{$HTML_NS}->{a} = {
1750     attrs_checker => sub {
1751     my ($self, $todo) = @_;
1752     my %attr;
1753     for my $attr (@{$todo->{node}->attributes}) {
1754     my $attr_ns = $attr->namespace_uri;
1755     $attr_ns = '' unless defined $attr_ns;
1756     my $attr_ln = $attr->manakai_local_name;
1757     my $checker;
1758     if ($attr_ns eq '') {
1759     $checker = {
1760     target => $HTMLTargetAttrChecker,
1761     href => $HTMLURIAttrChecker,
1762     ping => $HTMLSpaceURIsAttrChecker,
1763 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
1764 wakaba 1.1 media => $HTMLMQAttrChecker,
1765     hreflang => $HTMLLanguageTagAttrChecker,
1766     type => $HTMLIMTAttrChecker,
1767     }->{$attr_ln};
1768     if ($checker) {
1769     $attr{$attr_ln} = $attr;
1770     } else {
1771     $checker = $HTMLAttrChecker->{$attr_ln};
1772     }
1773     }
1774     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1775     || $AttrChecker->{$attr_ns}->{''};
1776     if ($checker) {
1777     $checker->($self, $attr) if ref $checker;
1778     } else {
1779     $self->{onerror}->(node => $attr, level => 'unsupported',
1780     type => 'attribute');
1781     ## ISSUE: No comformance createria for unknown attributes in the spec
1782     }
1783     }
1784    
1785 wakaba 1.4 if (defined $attr{href}) {
1786     $self->{has_hyperlink_element} = 1;
1787     } else {
1788 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
1789     if (defined $attr{$_}) {
1790     $self->{onerror}->(node => $attr{$_},
1791     type => 'attribute not allowed');
1792     }
1793     }
1794     }
1795     },
1796     checker => sub {
1797     my ($self, $todo) = @_;
1798    
1799     my $end = $self->_add_minuses ($HTMLInteractiveElements);
1800     my ($new_todos, $ch)
1801 wakaba 1.13 = $HTMLInlineOrStrictlyInlineChecker->($self, $todo);
1802 wakaba 1.1 push @$new_todos, $end;
1803    
1804 wakaba 1.15 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
1805     $_->{flag}->{in_a_href} = 1 for @$new_todos;
1806     }
1807 wakaba 1.1
1808     return ($new_todos, $ch);
1809     },
1810     };
1811    
1812     $Element->{$HTML_NS}->{q} = {
1813     attrs_checker => $GetHTMLAttrsChecker->({
1814     cite => $HTMLURIAttrChecker,
1815     }),
1816     checker => $HTMLInlineOrStrictlyInlineChecker,
1817     };
1818    
1819     $Element->{$HTML_NS}->{cite} = {
1820     attrs_checker => $GetHTMLAttrsChecker->({}),
1821     checker => $HTMLStrictlyInlineChecker,
1822     };
1823    
1824     $Element->{$HTML_NS}->{em} = {
1825     attrs_checker => $GetHTMLAttrsChecker->({}),
1826     checker => $HTMLInlineOrStrictlyInlineChecker,
1827     };
1828    
1829     $Element->{$HTML_NS}->{strong} = {
1830     attrs_checker => $GetHTMLAttrsChecker->({}),
1831     checker => $HTMLInlineOrStrictlyInlineChecker,
1832     };
1833    
1834     $Element->{$HTML_NS}->{small} = {
1835     attrs_checker => $GetHTMLAttrsChecker->({}),
1836     checker => $HTMLInlineOrStrictlyInlineChecker,
1837     };
1838    
1839     $Element->{$HTML_NS}->{m} = {
1840     attrs_checker => $GetHTMLAttrsChecker->({}),
1841     checker => $HTMLInlineOrStrictlyInlineChecker,
1842     };
1843    
1844     $Element->{$HTML_NS}->{dfn} = {
1845     attrs_checker => $GetHTMLAttrsChecker->({}),
1846     checker => sub {
1847     my ($self, $todo) = @_;
1848    
1849     my $end = $self->_add_minuses ({$HTML_NS => {dfn => 1}});
1850     my ($sib, $ch) = $HTMLStrictlyInlineChecker->($self, $todo);
1851     push @$sib, $end;
1852    
1853     my $node = $todo->{node};
1854     my $term = $node->get_attribute_ns (undef, 'title');
1855     unless (defined $term) {
1856     for my $child (@{$node->child_nodes}) {
1857     if ($child->node_type == 1) { # ELEMENT_NODE
1858     if (defined $term) {
1859     undef $term;
1860     last;
1861     } elsif ($child->manakai_local_name eq 'abbr') {
1862     my $nsuri = $child->namespace_uri;
1863     if (defined $nsuri and $nsuri eq $HTML_NS) {
1864     my $attr = $child->get_attribute_node_ns (undef, 'title');
1865     if ($attr) {
1866     $term = $attr->value;
1867     }
1868     }
1869     }
1870     } elsif ($child->node_type == 3 or $child->node_type == 4) {
1871     ## TEXT_NODE or CDATA_SECTION_NODE
1872     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
1873     next;
1874     }
1875     undef $term;
1876     last;
1877     }
1878     }
1879     unless (defined $term) {
1880     $term = $node->text_content;
1881     }
1882     }
1883     if ($self->{term}->{$term}) {
1884     $self->{onerror}->(node => $node, type => 'duplicate term');
1885     push @{$self->{term}->{$term}}, $node;
1886     } else {
1887     $self->{term}->{$term} = [$node];
1888     }
1889     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
1890     ## has |title|.
1891    
1892     return ($sib, $ch);
1893     },
1894     };
1895    
1896     $Element->{$HTML_NS}->{abbr} = {
1897     attrs_checker => $GetHTMLAttrsChecker->({
1898     ## NOTE: |title| has special semantics for |abbr|s, but is syntactically
1899     ## not different. The spec says that the |title| MAY be omitted
1900     ## if there is a |dfn| whose defining term is the abbreviation,
1901     ## but it does not prohibit |abbr| w/o |title| in other cases.
1902     }),
1903     checker => $HTMLStrictlyInlineChecker,
1904     };
1905    
1906     $Element->{$HTML_NS}->{time} = {
1907     attrs_checker => $GetHTMLAttrsChecker->({
1908     datetime => sub { 1 }, # checked in |checker|
1909     }),
1910     ## TODO: Write tests
1911     checker => sub {
1912     my ($self, $todo) = @_;
1913    
1914     my $attr = $todo->{node}->get_attribute_node_ns (undef, 'datetime');
1915     my $input;
1916     my $reg_sp;
1917     my $input_node;
1918     if ($attr) {
1919     $input = $attr->value;
1920     $reg_sp = qr/[\x09-\x0D\x20]*/;
1921     $input_node = $attr;
1922     } else {
1923     $input = $todo->{node}->text_content;
1924     $reg_sp = qr/\p{Zs}*/;
1925     $input_node = $todo->{node};
1926    
1927     ## ISSUE: What is the definition for "successfully extracts a date
1928     ## or time"? If the algorithm says the string is invalid but
1929     ## return some date or time, is it "successfully"?
1930     }
1931    
1932     my $hour;
1933     my $minute;
1934     my $second;
1935     if ($input =~ /
1936     \A
1937     [\x09-\x0D\x20]*
1938     ([0-9]+) # 1
1939     (?>
1940     -([0-9]+) # 2
1941     -([0-9]+) # 3
1942     [\x09-\x0D\x20]*
1943     (?>
1944     T
1945     [\x09-\x0D\x20]*
1946     )?
1947     ([0-9]+) # 4
1948     :([0-9]+) # 5
1949     (?>
1950     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
1951     )?
1952     [\x09-\x0D\x20]*
1953     (?>
1954     Z
1955     [\x09-\x0D\x20]*
1956     |
1957     [+-]([0-9]+):([0-9]+) # 7, 8
1958     [\x09-\x0D\x20]*
1959     )?
1960     \z
1961     |
1962     :([0-9]+) # 9
1963     (?>
1964     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
1965     )?
1966     [\x09-\x0D\x20]*\z
1967     )
1968     /x) {
1969     if (defined $2) { ## YYYY-MM-DD T? hh:mm
1970     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
1971     length $4 != 2 or length $5 != 2) {
1972     $self->{onerror}->(node => $input_node,
1973     type => 'dateortime:syntax error');
1974     }
1975    
1976     if (1 <= $2 and $2 <= 12) {
1977     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1978     if $3 < 1 or
1979     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
1980     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1981     if $2 == 2 and $3 == 29 and
1982     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
1983     } else {
1984     $self->{onerror}->(node => $input_node,
1985     type => 'datetime:bad month');
1986     }
1987    
1988     ($hour, $minute, $second) = ($4, $5, $6);
1989    
1990     if (defined $7) { ## [+-]hh:mm
1991     if (length $7 != 2 or length $8 != 2) {
1992     $self->{onerror}->(node => $input_node,
1993     type => 'dateortime:syntax error');
1994     }
1995    
1996     $self->{onerror}->(node => $input_node,
1997     type => 'datetime:bad timezone hour')
1998     if $7 > 23;
1999     $self->{onerror}->(node => $input_node,
2000     type => 'datetime:bad timezone minute')
2001     if $8 > 59;
2002     }
2003     } else { ## hh:mm
2004     if (length $1 != 2 or length $9 != 2) {
2005     $self->{onerror}->(node => $input_node,
2006     type => qq'dateortime:syntax error');
2007     }
2008    
2009     ($hour, $minute, $second) = ($1, $9, $10);
2010     }
2011    
2012     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
2013     if $hour > 23;
2014     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
2015     if $minute > 59;
2016    
2017     if (defined $second) { ## s
2018     ## NOTE: Integer part of second don't have to have length of two.
2019    
2020     if (substr ($second, 0, 1) eq '.') {
2021     $self->{onerror}->(node => $input_node,
2022     type => 'dateortime:syntax error');
2023     }
2024    
2025     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
2026     if $second >= 60;
2027     }
2028     } else {
2029     $self->{onerror}->(node => $input_node,
2030     type => 'dateortime:syntax error');
2031     }
2032    
2033     return $HTMLStrictlyInlineChecker->($self, $todo);
2034     },
2035     };
2036    
2037     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
2038     attrs_checker => $GetHTMLAttrsChecker->({
2039     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2040     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2041     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2042     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2043     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2044     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2045     }),
2046     checker => $HTMLStrictlyInlineChecker,
2047     };
2048    
2049     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
2050     attrs_checker => $GetHTMLAttrsChecker->({
2051     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
2052     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
2053     }),
2054     checker => $HTMLStrictlyInlineChecker,
2055     };
2056    
2057     $Element->{$HTML_NS}->{code} = {
2058     attrs_checker => $GetHTMLAttrsChecker->({}),
2059     ## NOTE: Though |title| has special semantics,
2060     ## syntatically same as the |title| as global attribute.
2061     checker => $HTMLInlineOrStrictlyInlineChecker,
2062     };
2063    
2064     $Element->{$HTML_NS}->{var} = {
2065     attrs_checker => $GetHTMLAttrsChecker->({}),
2066     ## NOTE: Though |title| has special semantics,
2067     ## syntatically same as the |title| as global attribute.
2068     checker => $HTMLStrictlyInlineChecker,
2069     };
2070    
2071     $Element->{$HTML_NS}->{samp} = {
2072     attrs_checker => $GetHTMLAttrsChecker->({}),
2073     ## NOTE: Though |title| has special semantics,
2074     ## syntatically same as the |title| as global attribute.
2075     checker => $HTMLInlineOrStrictlyInlineChecker,
2076     };
2077    
2078     $Element->{$HTML_NS}->{kbd} = {
2079     attrs_checker => $GetHTMLAttrsChecker->({}),
2080     checker => $HTMLStrictlyInlineChecker,
2081     };
2082    
2083     $Element->{$HTML_NS}->{sub} = {
2084     attrs_checker => $GetHTMLAttrsChecker->({}),
2085     checker => $HTMLStrictlyInlineChecker,
2086     };
2087    
2088     $Element->{$HTML_NS}->{sup} = {
2089     attrs_checker => $GetHTMLAttrsChecker->({}),
2090     checker => $HTMLStrictlyInlineChecker,
2091     };
2092    
2093     $Element->{$HTML_NS}->{span} = {
2094     attrs_checker => $GetHTMLAttrsChecker->({}),
2095     ## NOTE: Though |title| has special semantics,
2096     ## syntatically same as the |title| as global attribute.
2097     checker => $HTMLInlineOrStrictlyInlineChecker,
2098     };
2099    
2100     $Element->{$HTML_NS}->{i} = {
2101     attrs_checker => $GetHTMLAttrsChecker->({}),
2102     ## NOTE: Though |title| has special semantics,
2103     ## syntatically same as the |title| as global attribute.
2104     checker => $HTMLStrictlyInlineChecker,
2105     };
2106    
2107     $Element->{$HTML_NS}->{b} = {
2108     attrs_checker => $GetHTMLAttrsChecker->({}),
2109     checker => $HTMLStrictlyInlineChecker,
2110     };
2111    
2112     $Element->{$HTML_NS}->{bdo} = {
2113     attrs_checker => sub {
2114     my ($self, $todo) = @_;
2115     $GetHTMLAttrsChecker->({})->($self, $todo);
2116     unless ($todo->{node}->has_attribute_ns (undef, 'dir')) {
2117     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:dir');
2118     }
2119     },
2120     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
2121     checker => $HTMLStrictlyInlineChecker,
2122     };
2123    
2124     $Element->{$HTML_NS}->{ins} = {
2125     attrs_checker => $GetHTMLAttrsChecker->({
2126     cite => $HTMLURIAttrChecker,
2127     datetime => $HTMLDatetimeAttrChecker,
2128     }),
2129     checker => $HTMLTransparentChecker,
2130     };
2131    
2132     $Element->{$HTML_NS}->{del} = {
2133     attrs_checker => $GetHTMLAttrsChecker->({
2134     cite => $HTMLURIAttrChecker,
2135     datetime => $HTMLDatetimeAttrChecker,
2136     }),
2137     checker => sub {
2138     my ($self, $todo) = @_;
2139    
2140     my $parent = $todo->{node}->manakai_parent_element;
2141     if (defined $parent) {
2142     my $nsuri = $parent->namespace_uri;
2143     $nsuri = '' unless defined $nsuri;
2144     my $ln = $parent->manakai_local_name;
2145     my $eldef = $Element->{$nsuri}->{$ln} ||
2146     $Element->{$nsuri}->{''} ||
2147     $ElementDefault;
2148     return $eldef->{checker}->($self, $todo);
2149     } else {
2150     return $HTMLBlockOrInlineChecker->($self, $todo);
2151     }
2152     },
2153     };
2154    
2155     ## TODO: figure
2156 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
2157 wakaba 1.1
2158 wakaba 1.4 ## TODO: |alt|
2159 wakaba 1.1 $Element->{$HTML_NS}->{img} = {
2160     attrs_checker => sub {
2161     my ($self, $todo) = @_;
2162     $GetHTMLAttrsChecker->({
2163     alt => sub { }, ## NOTE: No syntactical requirement
2164     src => $HTMLURIAttrChecker,
2165     usemap => $HTMLUsemapAttrChecker,
2166     ismap => sub {
2167     my ($self, $attr, $parent_todo) = @_;
2168 wakaba 1.15 if (not $todo->{flag}->{in_a_href}) {
2169     $self->{onerror}->(node => $attr,
2170     type => 'attribute not allowed:ismap');
2171 wakaba 1.1 }
2172     $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_todo);
2173     },
2174     ## TODO: height
2175     ## TODO: width
2176     })->($self, $todo);
2177     unless ($todo->{node}->has_attribute_ns (undef, 'alt')) {
2178     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:alt');
2179     }
2180     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2181     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:src');
2182     }
2183     },
2184     checker => $HTMLEmptyChecker,
2185     };
2186    
2187     $Element->{$HTML_NS}->{iframe} = {
2188     attrs_checker => $GetHTMLAttrsChecker->({
2189     src => $HTMLURIAttrChecker,
2190     }),
2191     checker => $HTMLTextChecker,
2192     };
2193    
2194     $Element->{$HTML_NS}->{embed} = {
2195     attrs_checker => sub {
2196     my ($self, $todo) = @_;
2197     my $has_src;
2198     for my $attr (@{$todo->{node}->attributes}) {
2199     my $attr_ns = $attr->namespace_uri;
2200     $attr_ns = '' unless defined $attr_ns;
2201     my $attr_ln = $attr->manakai_local_name;
2202     my $checker;
2203     if ($attr_ns eq '') {
2204     if ($attr_ln eq 'src') {
2205     $checker = $HTMLURIAttrChecker;
2206     $has_src = 1;
2207     } elsif ($attr_ln eq 'type') {
2208     $checker = $HTMLIMTAttrChecker;
2209     } else {
2210     ## TODO: height
2211     ## TODO: width
2212     $checker = $HTMLAttrChecker->{$attr_ln}
2213     || sub { }; ## NOTE: Any local attribute is ok.
2214     }
2215     }
2216     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2217     || $AttrChecker->{$attr_ns}->{''};
2218     if ($checker) {
2219     $checker->($self, $attr);
2220     } else {
2221     $self->{onerror}->(node => $attr, level => 'unsupported',
2222     type => 'attribute');
2223     ## ISSUE: No comformance createria for global attributes in the spec
2224     }
2225     }
2226    
2227     unless ($has_src) {
2228     $self->{onerror}->(node => $todo->{node},
2229     type => 'attribute missing:src');
2230     }
2231     },
2232     checker => $HTMLEmptyChecker,
2233     };
2234    
2235     $Element->{$HTML_NS}->{object} = {
2236     attrs_checker => sub {
2237     my ($self, $todo) = @_;
2238     $GetHTMLAttrsChecker->({
2239     data => $HTMLURIAttrChecker,
2240     type => $HTMLIMTAttrChecker,
2241     usemap => $HTMLUsemapAttrChecker,
2242     ## TODO: width
2243     ## TODO: height
2244     })->($self, $todo);
2245     unless ($todo->{node}->has_attribute_ns (undef, 'data')) {
2246     unless ($todo->{node}->has_attribute_ns (undef, 'type')) {
2247     $self->{onerror}->(node => $todo->{node},
2248     type => 'attribute missing:data|type');
2249     }
2250     }
2251     },
2252     checker => $ElementDefault->{checker}, ## TODO
2253 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
2254 wakaba 1.1 };
2255    
2256     $Element->{$HTML_NS}->{param} = {
2257     attrs_checker => sub {
2258     my ($self, $todo) = @_;
2259     $GetHTMLAttrsChecker->({
2260     name => sub { },
2261     value => sub { },
2262     })->($self, $todo);
2263     unless ($todo->{node}->has_attribute_ns (undef, 'name')) {
2264     $self->{onerror}->(node => $todo->{node},
2265     type => 'attribute missing:name');
2266     }
2267     unless ($todo->{node}->has_attribute_ns (undef, 'value')) {
2268     $self->{onerror}->(node => $todo->{node},
2269     type => 'attribute missing:value');
2270     }
2271     },
2272     checker => $HTMLEmptyChecker,
2273     };
2274    
2275     $Element->{$HTML_NS}->{video} = {
2276     attrs_checker => $GetHTMLAttrsChecker->({
2277     src => $HTMLURIAttrChecker,
2278     ## TODO: start, loopstart, loopend, end
2279     ## ISSUE: they MUST be "value time offset"s. Value?
2280 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
2281 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2282     controls => $GetHTMLBooleanAttrChecker->('controls'),
2283 wakaba 1.11 poster => $HTMLURIAttrChecker, ## TODO: not for audio!
2284     ## TODO: width, height (not for audio!)
2285 wakaba 1.1 }),
2286     checker => sub {
2287     my ($self, $todo) = @_;
2288    
2289     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2290     return $HTMLBlockOrInlineChecker->($self, $todo);
2291     } else {
2292     return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
2293     ->($self, $todo);
2294     }
2295     },
2296     };
2297    
2298     $Element->{$HTML_NS}->{audio} = {
2299     attrs_checker => $Element->{$HTML_NS}->{video}->{attrs_checker},
2300     checker => $Element->{$HTML_NS}->{video}->{checker},
2301     };
2302    
2303     $Element->{$HTML_NS}->{source} = {
2304     attrs_checker => sub {
2305     my ($self, $todo) = @_;
2306     $GetHTMLAttrsChecker->({
2307     src => $HTMLURIAttrChecker,
2308     type => $HTMLIMTAttrChecker,
2309     media => $HTMLMQAttrChecker,
2310     })->($self, $todo);
2311     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2312     $self->{onerror}->(node => $todo->{node},
2313     type => 'attribute missing:src');
2314     }
2315     },
2316     checker => $HTMLEmptyChecker,
2317     };
2318    
2319     $Element->{$HTML_NS}->{canvas} = {
2320     attrs_checker => $GetHTMLAttrsChecker->({
2321     height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2322     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2323     }),
2324     checker => $HTMLInlineChecker,
2325     };
2326    
2327     $Element->{$HTML_NS}->{map} = {
2328 wakaba 1.4 attrs_checker => sub {
2329     my ($self, $todo) = @_;
2330     my $has_id;
2331     $GetHTMLAttrsChecker->({
2332     id => sub {
2333     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2334     my ($self, $attr) = @_;
2335     my $value = $attr->value;
2336     if (length $value > 0) {
2337     if ($self->{id}->{$value}) {
2338     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2339     push @{$self->{id}->{$value}}, $attr;
2340     } else {
2341     $self->{id}->{$value} = [$attr];
2342     }
2343 wakaba 1.1 } else {
2344 wakaba 1.4 ## NOTE: MUST contain at least one character
2345     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2346 wakaba 1.1 }
2347 wakaba 1.4 if ($value =~ /[\x09-\x0D\x20]/) {
2348     $self->{onerror}->(node => $attr, type => 'space in ID');
2349     }
2350     $self->{map}->{$value} ||= $attr;
2351     $has_id = 1;
2352     },
2353     })->($self, $todo);
2354     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:id')
2355     unless $has_id;
2356     },
2357 wakaba 1.1 checker => $HTMLBlockChecker,
2358     };
2359    
2360     $Element->{$HTML_NS}->{area} = {
2361     attrs_checker => sub {
2362     my ($self, $todo) = @_;
2363     my %attr;
2364     my $coords;
2365     for my $attr (@{$todo->{node}->attributes}) {
2366     my $attr_ns = $attr->namespace_uri;
2367     $attr_ns = '' unless defined $attr_ns;
2368     my $attr_ln = $attr->manakai_local_name;
2369     my $checker;
2370     if ($attr_ns eq '') {
2371     $checker = {
2372     alt => sub { },
2373     ## NOTE: |alt| value has no conformance creteria.
2374     shape => $GetHTMLEnumeratedAttrChecker->({
2375     circ => -1, circle => 1,
2376     default => 1,
2377     poly => 1, polygon => -1,
2378     rect => 1, rectangle => -1,
2379     }),
2380     coords => sub {
2381     my ($self, $attr) = @_;
2382     my $value = $attr->value;
2383     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
2384     $coords = [split /,/, $value];
2385     } else {
2386     $self->{onerror}->(node => $attr,
2387     type => 'coords:syntax error');
2388     }
2389     },
2390     target => $HTMLTargetAttrChecker,
2391     href => $HTMLURIAttrChecker,
2392     ping => $HTMLSpaceURIsAttrChecker,
2393 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
2394 wakaba 1.1 media => $HTMLMQAttrChecker,
2395     hreflang => $HTMLLanguageTagAttrChecker,
2396     type => $HTMLIMTAttrChecker,
2397     }->{$attr_ln};
2398     if ($checker) {
2399     $attr{$attr_ln} = $attr;
2400     } else {
2401     $checker = $HTMLAttrChecker->{$attr_ln};
2402     }
2403     }
2404     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2405     || $AttrChecker->{$attr_ns}->{''};
2406     if ($checker) {
2407     $checker->($self, $attr) if ref $checker;
2408     } else {
2409     $self->{onerror}->(node => $attr, level => 'unsupported',
2410     type => 'attribute');
2411     ## ISSUE: No comformance createria for unknown attributes in the spec
2412     }
2413     }
2414    
2415     if (defined $attr{href}) {
2416 wakaba 1.4 $self->{has_hyperlink_element} = 1;
2417 wakaba 1.1 unless (defined $attr{alt}) {
2418     $self->{onerror}->(node => $todo->{node},
2419     type => 'attribute missing:alt');
2420     }
2421     } else {
2422     for (qw/target ping rel media hreflang type alt/) {
2423     if (defined $attr{$_}) {
2424     $self->{onerror}->(node => $attr{$_},
2425     type => 'attribute not allowed');
2426     }
2427     }
2428     }
2429    
2430     my $shape = 'rectangle';
2431     if (defined $attr{shape}) {
2432     $shape = {
2433     circ => 'circle', circle => 'circle',
2434     default => 'default',
2435     poly => 'polygon', polygon => 'polygon',
2436     rect => 'rectangle', rectangle => 'rectangle',
2437     }->{lc $attr{shape}->value} || 'rectangle';
2438     ## TODO: ASCII lowercase?
2439     }
2440    
2441     if ($shape eq 'circle') {
2442     if (defined $attr{coords}) {
2443     if (defined $coords) {
2444     if (@$coords == 3) {
2445     if ($coords->[2] < 0) {
2446     $self->{onerror}->(node => $attr{coords},
2447     type => 'coords:out of range:2');
2448     }
2449     } else {
2450     $self->{onerror}->(node => $attr{coords},
2451     type => 'coords:number:3:'.@$coords);
2452     }
2453     } else {
2454     ## NOTE: A syntax error has been reported.
2455     }
2456     } else {
2457     $self->{onerror}->(node => $todo->{node},
2458     type => 'attribute missing:coords');
2459     }
2460     } elsif ($shape eq 'default') {
2461     if (defined $attr{coords}) {
2462     $self->{onerror}->(node => $attr{coords},
2463     type => 'attribute not allowed');
2464     }
2465     } elsif ($shape eq 'polygon') {
2466     if (defined $attr{coords}) {
2467     if (defined $coords) {
2468     if (@$coords >= 6) {
2469     unless (@$coords % 2 == 0) {
2470     $self->{onerror}->(node => $attr{coords},
2471     type => 'coords:number:even:'.@$coords);
2472     }
2473     } else {
2474     $self->{onerror}->(node => $attr{coords},
2475     type => 'coords:number:>=6:'.@$coords);
2476     }
2477     } else {
2478     ## NOTE: A syntax error has been reported.
2479     }
2480     } else {
2481     $self->{onerror}->(node => $todo->{node},
2482     type => 'attribute missing:coords');
2483     }
2484     } elsif ($shape eq 'rectangle') {
2485     if (defined $attr{coords}) {
2486     if (defined $coords) {
2487     if (@$coords == 4) {
2488     unless ($coords->[0] < $coords->[2]) {
2489     $self->{onerror}->(node => $attr{coords},
2490     type => 'coords:out of range:0');
2491     }
2492     unless ($coords->[1] < $coords->[3]) {
2493     $self->{onerror}->(node => $attr{coords},
2494     type => 'coords:out of range:1');
2495     }
2496     } else {
2497     $self->{onerror}->(node => $attr{coords},
2498     type => 'coords:number:4:'.@$coords);
2499     }
2500     } else {
2501     ## NOTE: A syntax error has been reported.
2502     }
2503     } else {
2504     $self->{onerror}->(node => $todo->{node},
2505     type => 'attribute missing:coords');
2506     }
2507     }
2508     },
2509     checker => $HTMLEmptyChecker,
2510     };
2511     ## TODO: only in map
2512    
2513     $Element->{$HTML_NS}->{table} = {
2514     attrs_checker => $GetHTMLAttrsChecker->({}),
2515     checker => sub {
2516     my ($self, $todo) = @_;
2517     my $el = $todo->{node};
2518     my $new_todos = [];
2519     my @nodes = (@{$el->child_nodes});
2520    
2521     my $phase = 'before caption';
2522     my $has_tfoot;
2523     while (@nodes) {
2524     my $node = shift @nodes;
2525     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2526    
2527     my $nt = $node->node_type;
2528     if ($nt == 1) {
2529     my $node_ns = $node->namespace_uri;
2530     $node_ns = '' unless defined $node_ns;
2531     my $node_ln = $node->manakai_local_name;
2532     ## NOTE: |minuses| list is not checked since redundant
2533 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2534     #
2535     } elsif ($phase eq 'in tbodys') {
2536 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2537     #$phase = 'in tbodys';
2538     } elsif (not $has_tfoot and
2539     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2540     $phase = 'after tfoot';
2541     $has_tfoot = 1;
2542     } else {
2543     $self->{onerror}->(node => $node, type => 'element not allowed');
2544     }
2545     } elsif ($phase eq 'in trs') {
2546     if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2547     #$phase = 'in trs';
2548     } elsif (not $has_tfoot and
2549     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2550     $phase = 'after tfoot';
2551     $has_tfoot = 1;
2552     } else {
2553     $self->{onerror}->(node => $node, type => 'element not allowed');
2554     }
2555     } elsif ($phase eq 'after thead') {
2556     if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2557     $phase = 'in tbodys';
2558     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2559     $phase = 'in trs';
2560     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2561     $phase = 'in tbodys';
2562     $has_tfoot = 1;
2563     } else {
2564     $self->{onerror}->(node => $node, type => 'element not allowed');
2565     }
2566     } elsif ($phase eq 'in colgroup') {
2567     if ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2568     $phase = 'in colgroup';
2569     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2570     $phase = 'after thead';
2571     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2572     $phase = 'in tbodys';
2573     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2574     $phase = 'in trs';
2575     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2576     $phase = 'in tbodys';
2577     $has_tfoot = 1;
2578     } else {
2579     $self->{onerror}->(node => $node, type => 'element not allowed');
2580     }
2581     } elsif ($phase eq 'before caption') {
2582     if ($node_ns eq $HTML_NS and $node_ln eq 'caption') {
2583     $phase = 'in colgroup';
2584     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2585     $phase = 'in colgroup';
2586     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2587     $phase = 'after thead';
2588     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2589     $phase = 'in tbodys';
2590     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2591     $phase = 'in trs';
2592     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2593     $phase = 'in tbodys';
2594     $has_tfoot = 1;
2595     } else {
2596     $self->{onerror}->(node => $node, type => 'element not allowed');
2597     }
2598     } else { # after tfoot
2599     $self->{onerror}->(node => $node, type => 'element not allowed');
2600     }
2601     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2602     unshift @nodes, @$sib;
2603     push @$new_todos, @$ch;
2604     } elsif ($nt == 3 or $nt == 4) {
2605     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2606     $self->{onerror}->(node => $node, type => 'character not allowed');
2607     }
2608     } elsif ($nt == 5) {
2609     unshift @nodes, @{$node->child_nodes};
2610     }
2611     }
2612    
2613     ## Table model errors
2614     require Whatpm::HTMLTable;
2615     Whatpm::HTMLTable->form_table ($todo->{node}, sub {
2616     my %opt = @_;
2617     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
2618     });
2619     push @{$self->{return}->{table}}, $todo->{node};
2620    
2621     return ($new_todos);
2622     },
2623     };
2624    
2625     $Element->{$HTML_NS}->{caption} = {
2626     attrs_checker => $GetHTMLAttrsChecker->({}),
2627 wakaba 1.13 checker => $HTMLStrictlyInlineChecker,
2628 wakaba 1.1 };
2629    
2630     $Element->{$HTML_NS}->{colgroup} = {
2631     attrs_checker => $GetHTMLAttrsChecker->({
2632     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2633     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
2634     ## TODO: "attribute not supported" if |col|.
2635     ## ISSUE: MUST NOT if any |col|?
2636     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
2637     }),
2638     checker => sub {
2639     my ($self, $todo) = @_;
2640     my $el = $todo->{node};
2641     my $new_todos = [];
2642     my @nodes = (@{$el->child_nodes});
2643    
2644     while (@nodes) {
2645     my $node = shift @nodes;
2646     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2647    
2648     my $nt = $node->node_type;
2649     if ($nt == 1) {
2650     my $node_ns = $node->namespace_uri;
2651     $node_ns = '' unless defined $node_ns;
2652     my $node_ln = $node->manakai_local_name;
2653     ## NOTE: |minuses| list is not checked since redundant
2654 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2655     #
2656     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'col')) {
2657 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
2658     }
2659     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2660     unshift @nodes, @$sib;
2661     push @$new_todos, @$ch;
2662     } elsif ($nt == 3 or $nt == 4) {
2663     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2664     $self->{onerror}->(node => $node, type => 'character not allowed');
2665     }
2666     } elsif ($nt == 5) {
2667     unshift @nodes, @{$node->child_nodes};
2668     }
2669     }
2670     return ($new_todos);
2671     },
2672     };
2673    
2674     $Element->{$HTML_NS}->{col} = {
2675     attrs_checker => $GetHTMLAttrsChecker->({
2676     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2677     }),
2678     checker => $HTMLEmptyChecker,
2679     };
2680    
2681     $Element->{$HTML_NS}->{tbody} = {
2682     attrs_checker => $GetHTMLAttrsChecker->({}),
2683     checker => sub {
2684     my ($self, $todo) = @_;
2685     my $el = $todo->{node};
2686     my $new_todos = [];
2687     my @nodes = (@{$el->child_nodes});
2688    
2689     my $has_tr;
2690     while (@nodes) {
2691     my $node = shift @nodes;
2692     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2693    
2694     my $nt = $node->node_type;
2695     if ($nt == 1) {
2696     my $node_ns = $node->namespace_uri;
2697     $node_ns = '' unless defined $node_ns;
2698     my $node_ln = $node->manakai_local_name;
2699     ## NOTE: |minuses| list is not checked since redundant
2700 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2701     #
2702     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2703 wakaba 1.1 $has_tr = 1;
2704     } else {
2705     $self->{onerror}->(node => $node, type => 'element not allowed');
2706     }
2707     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2708     unshift @nodes, @$sib;
2709     push @$new_todos, @$ch;
2710     } elsif ($nt == 3 or $nt == 4) {
2711     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2712     $self->{onerror}->(node => $node, type => 'character not allowed');
2713     }
2714     } elsif ($nt == 5) {
2715     unshift @nodes, @{$node->child_nodes};
2716     }
2717     }
2718     unless ($has_tr) {
2719     $self->{onerror}->(node => $el, type => 'child element missing:tr');
2720     }
2721     return ($new_todos);
2722     },
2723     };
2724    
2725     $Element->{$HTML_NS}->{thead} = {
2726     attrs_checker => $GetHTMLAttrsChecker->({}),
2727     checker => $Element->{$HTML_NS}->{tbody}->{checker},
2728     };
2729    
2730     $Element->{$HTML_NS}->{tfoot} = {
2731     attrs_checker => $GetHTMLAttrsChecker->({}),
2732     checker => $Element->{$HTML_NS}->{tbody}->{checker},
2733     };
2734    
2735     $Element->{$HTML_NS}->{tr} = {
2736     attrs_checker => $GetHTMLAttrsChecker->({}),
2737     checker => sub {
2738     my ($self, $todo) = @_;
2739     my $el = $todo->{node};
2740     my $new_todos = [];
2741     my @nodes = (@{$el->child_nodes});
2742    
2743     my $has_td;
2744     while (@nodes) {
2745     my $node = shift @nodes;
2746     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2747    
2748     my $nt = $node->node_type;
2749     if ($nt == 1) {
2750     my $node_ns = $node->namespace_uri;
2751     $node_ns = '' unless defined $node_ns;
2752     my $node_ln = $node->manakai_local_name;
2753     ## NOTE: |minuses| list is not checked since redundant
2754 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2755     #
2756     } elsif ($node_ns eq $HTML_NS and
2757     ($node_ln eq 'td' or $node_ln eq 'th')) {
2758 wakaba 1.1 $has_td = 1;
2759     } else {
2760     $self->{onerror}->(node => $node, type => 'element not allowed');
2761     }
2762     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2763     unshift @nodes, @$sib;
2764     push @$new_todos, @$ch;
2765     } elsif ($nt == 3 or $nt == 4) {
2766     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2767     $self->{onerror}->(node => $node, type => 'character not allowed');
2768     }
2769     } elsif ($nt == 5) {
2770     unshift @nodes, @{$node->child_nodes};
2771     }
2772     }
2773     unless ($has_td) {
2774     $self->{onerror}->(node => $el, type => 'child element missing:td|th');
2775     }
2776     return ($new_todos);
2777     },
2778     };
2779    
2780     $Element->{$HTML_NS}->{td} = {
2781     attrs_checker => $GetHTMLAttrsChecker->({
2782     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2783     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2784     }),
2785     checker => $HTMLBlockOrInlineChecker,
2786     };
2787    
2788     $Element->{$HTML_NS}->{th} = {
2789     attrs_checker => $GetHTMLAttrsChecker->({
2790     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2791     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2792     scope => $GetHTMLEnumeratedAttrChecker
2793     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
2794     }),
2795     checker => $HTMLBlockOrInlineChecker,
2796     };
2797    
2798     ## TODO: forms
2799 wakaba 1.8 ## TODO: Tests for <nest/> in form elements
2800 wakaba 1.1
2801     $Element->{$HTML_NS}->{script} = {
2802 wakaba 1.9 attrs_checker => $GetHTMLAttrsChecker->({
2803 wakaba 1.1 src => $HTMLURIAttrChecker,
2804     defer => $GetHTMLBooleanAttrChecker->('defer'),
2805     async => $GetHTMLBooleanAttrChecker->('async'),
2806     type => $HTMLIMTAttrChecker,
2807 wakaba 1.9 }),
2808 wakaba 1.1 checker => sub {
2809     my ($self, $todo) = @_;
2810    
2811     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2812     return $HTMLEmptyChecker->($self, $todo);
2813     } else {
2814     ## NOTE: No content model conformance in HTML5 spec.
2815     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
2816     my $language = $todo->{node}->get_attribute_ns (undef, 'language');
2817     if ((defined $type and $type eq '') or
2818     (defined $language and $language eq '')) {
2819     $type = 'text/javascript';
2820     } elsif (defined $type) {
2821     #
2822     } elsif (defined $language) {
2823     $type = 'text/' . $language;
2824     } else {
2825     $type = 'text/javascript';
2826     }
2827     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
2828     type => 'script:'.$type); ## TODO: $type normalization
2829     return $AnyChecker->($self, $todo);
2830     }
2831     },
2832     };
2833    
2834     ## NOTE: When script is disabled.
2835     $Element->{$HTML_NS}->{noscript} = {
2836 wakaba 1.3 attrs_checker => sub {
2837     my ($self, $todo) = @_;
2838    
2839     ## NOTE: This check is inserted in |attrs_checker|, rather than |checker|,
2840     ## since the later is not invoked when the |noscript| is used as a
2841     ## transparent element.
2842     unless ($todo->{node}->owner_document->manakai_is_html) {
2843     $self->{onerror}->(node => $todo->{node}, type => 'in XML:noscript');
2844     }
2845    
2846     $GetHTMLAttrsChecker->({})->($self, $todo);
2847     },
2848 wakaba 1.1 checker => sub {
2849     my ($self, $todo) = @_;
2850    
2851 wakaba 1.3 if ($todo->{flag}->{in_head}) {
2852     my $new_todos = [];
2853     my @nodes = (@{$todo->{node}->child_nodes});
2854    
2855     while (@nodes) {
2856     my $node = shift @nodes;
2857     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2858    
2859     my $nt = $node->node_type;
2860     if ($nt == 1) {
2861     my $node_ns = $node->namespace_uri;
2862     $node_ns = '' unless defined $node_ns;
2863     my $node_ln = $node->manakai_local_name;
2864 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2865     #
2866     } elsif ($node_ns eq $HTML_NS) {
2867 wakaba 1.3 if ({link => 1, style => 1}->{$node_ln}) {
2868     #
2869     } elsif ($node_ln eq 'meta') {
2870 wakaba 1.5 if ($node->has_attribute_ns (undef, 'name')) {
2871     #
2872 wakaba 1.3 } else {
2873 wakaba 1.5 $self->{onerror}->(node => $node,
2874     type => 'element not allowed');
2875 wakaba 1.3 }
2876     } else {
2877     $self->{onerror}->(node => $node, type => 'element not allowed');
2878     }
2879     } else {
2880     $self->{onerror}->(node => $node, type => 'element not allowed');
2881     }
2882    
2883     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2884     unshift @nodes, @$sib;
2885     push @$new_todos, @$ch;
2886     } elsif ($nt == 3 or $nt == 4) {
2887     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2888     $self->{onerror}->(node => $node, type => 'character not allowed');
2889     }
2890     } elsif ($nt == 5) {
2891     unshift @nodes, @{$node->child_nodes};
2892     }
2893     }
2894     return ($new_todos);
2895     } else {
2896     my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}});
2897     my ($sib, $ch) = $HTMLBlockOrInlineChecker->($self, $todo);
2898     push @$sib, $end;
2899     return ($sib, $ch);
2900     }
2901 wakaba 1.1 },
2902     };
2903 wakaba 1.3
2904     ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
2905 wakaba 1.1
2906     $Element->{$HTML_NS}->{'event-source'} = {
2907     attrs_checker => $GetHTMLAttrsChecker->({
2908     src => $HTMLURIAttrChecker,
2909     }),
2910     checker => $HTMLEmptyChecker,
2911     };
2912    
2913     $Element->{$HTML_NS}->{details} = {
2914     attrs_checker => $GetHTMLAttrsChecker->({
2915     open => $GetHTMLBooleanAttrChecker->('open'),
2916     }),
2917     checker => sub {
2918     my ($self, $todo) = @_;
2919    
2920     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2921     my ($sib, $ch)
2922     = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
2923     ->($self, $todo);
2924     push @$sib, $end;
2925     return ($sib, $ch);
2926     },
2927     };
2928    
2929     $Element->{$HTML_NS}->{datagrid} = {
2930     attrs_checker => $GetHTMLAttrsChecker->({
2931     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2932     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
2933     }),
2934     checker => sub {
2935     my ($self, $todo) = @_;
2936     my $el = $todo->{node};
2937     my $new_todos = [];
2938     my @nodes = (@{$el->child_nodes});
2939    
2940     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2941    
2942     ## Block-table Block* | table | select | datalist | Empty
2943     my $mode = 'any';
2944     while (@nodes) {
2945     my $node = shift @nodes;
2946     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2947    
2948     my $nt = $node->node_type;
2949     if ($nt == 1) {
2950     my $node_ns = $node->namespace_uri;
2951     $node_ns = '' unless defined $node_ns;
2952     my $node_ln = $node->manakai_local_name;
2953     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
2954 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2955     #
2956     } elsif ($mode eq 'block') {
2957 wakaba 1.1 $not_allowed = 1
2958     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
2959     } elsif ($mode eq 'any') {
2960     if ($node_ns eq $HTML_NS and
2961     {table => 1, select => 1, datalist => 1}->{$node_ln}) {
2962     $mode = 'none';
2963     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
2964     $mode = 'block';
2965     } else {
2966     $not_allowed = 1;
2967     }
2968     } else {
2969     $not_allowed = 1;
2970     }
2971     $self->{onerror}->(node => $node, type => 'element not allowed')
2972     if $not_allowed;
2973     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2974     unshift @nodes, @$sib;
2975     push @$new_todos, @$ch;
2976     } elsif ($nt == 3 or $nt == 4) {
2977     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2978     $self->{onerror}->(node => $node, type => 'character not allowed');
2979     }
2980     } elsif ($nt == 5) {
2981     unshift @nodes, @{$node->child_nodes};
2982     }
2983     }
2984    
2985     push @$new_todos, $end;
2986     return ($new_todos);
2987     },
2988     };
2989    
2990     $Element->{$HTML_NS}->{command} = {
2991     attrs_checker => $GetHTMLAttrsChecker->({
2992     checked => $GetHTMLBooleanAttrChecker->('checked'),
2993     default => $GetHTMLBooleanAttrChecker->('default'),
2994     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2995     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
2996     icon => $HTMLURIAttrChecker,
2997     label => sub { }, ## NOTE: No conformance creteria
2998     radiogroup => sub { }, ## NOTE: No conformance creteria
2999     ## NOTE: |title| has special semantics, but no syntactical difference
3000     type => sub {
3001     my ($self, $attr) = @_;
3002     my $value = $attr->value;
3003     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
3004     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
3005     }
3006     },
3007     }),
3008     checker => $HTMLEmptyChecker,
3009     };
3010    
3011     $Element->{$HTML_NS}->{menu} = {
3012     attrs_checker => $GetHTMLAttrsChecker->({
3013     autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
3014     id => sub {
3015     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
3016     my ($self, $attr) = @_;
3017     my $value = $attr->value;
3018     if (length $value > 0) {
3019     if ($self->{id}->{$value}) {
3020     $self->{onerror}->(node => $attr, type => 'duplicate ID');
3021     push @{$self->{id}->{$value}}, $attr;
3022     } else {
3023     $self->{id}->{$value} = [$attr];
3024     }
3025     } else {
3026     ## NOTE: MUST contain at least one character
3027     $self->{onerror}->(node => $attr, type => 'empty attribute value');
3028     }
3029     if ($value =~ /[\x09-\x0D\x20]/) {
3030     $self->{onerror}->(node => $attr, type => 'space in ID');
3031     }
3032     $self->{menu}->{$value} ||= $attr;
3033     ## ISSUE: <menu id=""><p contextmenu=""> match?
3034     },
3035     label => sub { }, ## NOTE: No conformance creteria
3036     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
3037     }),
3038     checker => sub {
3039     my ($self, $todo) = @_;
3040     my $el = $todo->{node};
3041     my $new_todos = [];
3042     my @nodes = (@{$el->child_nodes});
3043    
3044     my $content = 'li or inline';
3045     while (@nodes) {
3046     my $node = shift @nodes;
3047     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3048    
3049     my $nt = $node->node_type;
3050     if ($nt == 1) {
3051     my $node_ns = $node->namespace_uri;
3052     $node_ns = '' unless defined $node_ns;
3053     my $node_ln = $node->manakai_local_name;
3054     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
3055 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3056     #
3057     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'li') {
3058 wakaba 1.1 if ($content eq 'inline') {
3059     $not_allowed = 1;
3060     } elsif ($content eq 'li or inline') {
3061     $content = 'li';
3062     }
3063     } else {
3064     if ($HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
3065     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln}) {
3066     $content = 'inline';
3067     } else {
3068     $not_allowed = 1;
3069     }
3070     }
3071     $self->{onerror}->(node => $node, type => 'element not allowed')
3072     if $not_allowed;
3073     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3074     unshift @nodes, @$sib;
3075     push @$new_todos, @$ch;
3076     } elsif ($nt == 3 or $nt == 4) {
3077     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3078     if ($content eq 'li') {
3079     $self->{onerror}->(node => $node, type => 'character not allowed');
3080     } elsif ($content eq 'li or inline') {
3081     $content = 'inline';
3082     }
3083     }
3084     } elsif ($nt == 5) {
3085     unshift @nodes, @{$node->child_nodes};
3086     }
3087     }
3088    
3089     for (@$new_todos) {
3090     $_->{inline} = 1;
3091     }
3092     return ($new_todos);
3093     },
3094 wakaba 1.8 };
3095    
3096     $Element->{$HTML_NS}->{datatemplate} = {
3097     attrs_checker => $GetHTMLAttrsChecker->({}),
3098     checker => sub {
3099     my ($self, $todo) = @_;
3100     my $el = $todo->{node};
3101     my $new_todos = [];
3102     my @nodes = (@{$el->child_nodes});
3103    
3104     while (@nodes) {
3105     my $node = shift @nodes;
3106     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3107    
3108     my $nt = $node->node_type;
3109     if ($nt == 1) {
3110     my $node_ns = $node->namespace_uri;
3111     $node_ns = '' unless defined $node_ns;
3112     my $node_ln = $node->manakai_local_name;
3113     ## NOTE: |minuses| list is not checked since redundant
3114     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3115     #
3116     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'rule')) {
3117     $self->{onerror}->(node => $node,
3118     type => 'element not allowed:datatemplate');
3119     }
3120     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3121     unshift @nodes, @$sib;
3122     push @$new_todos, @$ch;
3123     } elsif ($nt == 3 or $nt == 4) {
3124     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3125     $self->{onerror}->(node => $node, type => 'character not allowed');
3126     }
3127     } elsif ($nt == 5) {
3128     unshift @nodes, @{$node->child_nodes};
3129     }
3130     }
3131     return ($new_todos);
3132     },
3133     is_xml_root => 1,
3134     };
3135    
3136     $Element->{$HTML_NS}->{rule} = {
3137     attrs_checker => $GetHTMLAttrsChecker->({
3138     ## TODO: |condition| attribute
3139 wakaba 1.18 mode => $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker,
3140 wakaba 1.8 }),
3141     checker => sub {
3142     my ($self, $todo) = @_;
3143    
3144     my $end = $self->_add_pluses ({$HTML_NS => {nest => 1}});
3145     my ($sib, $ch) = $AnyChecker->($self, $todo);
3146     push @$sib, $end;
3147     return ($sib, $ch);
3148     },
3149     ## NOTE: "MAY be anything that, when the parent |datatemplate|
3150     ## is applied to some conforming data, results in a conforming DOM tree.":
3151     ## We don't check against this.
3152     };
3153    
3154     $Element->{$HTML_NS}->{nest} = {
3155     attrs_checker => $GetHTMLAttrsChecker->({
3156     ## TODO: |filter| attribute
3157 wakaba 1.18 mode => $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker,
3158 wakaba 1.8 }),
3159     checker => $HTMLEmptyChecker,
3160 wakaba 1.1 };
3161    
3162     $Element->{$HTML_NS}->{legend} = {
3163     attrs_checker => $GetHTMLAttrsChecker->({}),
3164     checker => sub {
3165     my ($self, $todo) = @_;
3166    
3167     my $parent = $todo->{node}->manakai_parent_element;
3168     if (defined $parent) {
3169     my $nsuri = $parent->namespace_uri;
3170     $nsuri = '' unless defined $nsuri;
3171     my $ln = $parent->manakai_local_name;
3172     if ($nsuri eq $HTML_NS and $ln eq 'figure') {
3173     return $HTMLInlineChecker->($self, $todo);
3174     } else {
3175 wakaba 1.13 return $HTMLStrictlyInlineChecker->($self, $todo);
3176 wakaba 1.1 }
3177     } else {
3178     return $HTMLInlineChecker->($self, $todo);
3179     }
3180    
3181     ## ISSUE: Content model is defined only for fieldset/legend,
3182     ## details/legend, and figure/legend.
3183     },
3184     };
3185    
3186     $Element->{$HTML_NS}->{div} = {
3187     attrs_checker => $GetHTMLAttrsChecker->({}),
3188     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
3189     };
3190    
3191     $Element->{$HTML_NS}->{font} = {
3192     attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
3193     checker => $HTMLTransparentChecker,
3194     };
3195    
3196     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
3197    
3198     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24