/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.23 - (hide annotations) (download)
Sat Nov 24 11:21:04 2007 UTC (17 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.22: +39 -3 lines
++ whatpm/t/ChangeLog	24 Nov 2007 11:19:23 -0000
2007-11-24  Wakaba  <wakaba@suika.fam.cx>

	* content-model-2.dat: nest/@mode test data revised.
	Test data for nest/@filter and rule/@condition are added.

++ whatpm/Whatpm/CSS/ChangeLog	24 Nov 2007 11:20:48 -0000
2007-11-24  Wakaba  <wakaba@suika.fam.cx>

	* SelectorsParser.pm (parse_string): Raise errors if appropriate.

++ whatpm/Whatpm/ContentChecker/ChangeLog	24 Nov 2007 11:20:02 -0000
2007-11-24  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm (nest/@filter, rule/@condition): Implemented.
	(nest/@mode): Definition was wrong.
	($HTMLSelectorsAttrChecker): New.

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7     my $HTMLMetadataElements = {
8     $HTML_NS => {
9     qw/link 1 meta 1 style 1 script 1 event-source 1 command 1 base 1 title 1
10 wakaba 1.8 noscript 1 datatemplate 1
11 wakaba 1.1 /,
12     },
13     };
14    
15     my $HTMLSectioningElements = {
16     $HTML_NS => {qw/body 1 section 1 nav 1 article 1 blockquote 1 aside 1/},
17     };
18    
19     my $HTMLBlockLevelElements = {
20     $HTML_NS => {
21     qw/
22     section 1 nav 1 article 1 blockquote 1 aside 1
23     h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1
24     address 1 p 1 hr 1 dialog 1 pre 1 ol 1 ul 1 dl 1
25     ins 1 del 1 figure 1 map 1 table 1 script 1 noscript 1
26     event-source 1 details 1 datagrid 1 menu 1 div 1 font 1
27 wakaba 1.8 datatemplate 1
28 wakaba 1.1 /,
29     },
30     };
31    
32     my $HTMLStrictlyInlineLevelElements = {
33     $HTML_NS => {
34     qw/
35     br 1 a 1 q 1 cite 1 em 1 strong 1 small 1 m 1 dfn 1 abbr 1
36     time 1 meter 1 progress 1 code 1 var 1 samp 1 kbd 1
37     sub 1 sup 1 span 1 i 1 b 1 bdo 1 ins 1 del 1 img 1
38     iframe 1 embed 1 object 1 video 1 audio 1 canvas 1 area 1
39     script 1 noscript 1 event-source 1 command 1 font 1
40     /,
41     },
42     };
43    
44     my $HTMLStructuredInlineLevelElements = {
45     $HTML_NS => {qw/blockquote 1 pre 1 ol 1 ul 1 dl 1 table 1 menu 1/},
46     };
47    
48     my $HTMLInteractiveElements = {
49     $HTML_NS => {a => 1, details => 1, datagrid => 1},
50     };
51     ## NOTE: |html:a| and |html:datagrid| are not allowed as a descendant
52     ## of interactive elements
53    
54     # my $HTMLTransparentElements : in |Whatpm/ContentChecker.pm|.
55    
56     #my $HTMLSemiTransparentElements = {
57     # $HTML_NS => {qw/video 1 audio 1/},
58     #};
59    
60     my $HTMLEmbededElements = {
61     $HTML_NS => {qw/img 1 iframe 1 embed 1 object 1 video 1 audio 1 canvas 1/},
62     };
63    
64     ## Empty
65     my $HTMLEmptyChecker = sub {
66     my ($self, $todo) = @_;
67     my $el = $todo->{node};
68     my $new_todos = [];
69     my @nodes = (@{$el->child_nodes});
70    
71     while (@nodes) {
72     my $node = shift @nodes;
73     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
74    
75     my $nt = $node->node_type;
76     if ($nt == 1) {
77 wakaba 1.8 my $node_ns = $node->namespace_uri;
78     $node_ns = '' unless defined $node_ns;
79     my $node_ln = $node->manakai_local_name;
80     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
81     #
82     } else {
83     ## NOTE: |minuses| list is not checked since redundant
84     $self->{onerror}->(node => $node, type => 'element not allowed');
85     }
86 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
87     unshift @nodes, @$sib;
88     push @$new_todos, @$ch;
89     } elsif ($nt == 3 or $nt == 4) {
90     if ($node->data =~ /[^\x09-\x0D\x20]/) {
91     $self->{onerror}->(node => $node, type => 'character not allowed');
92     }
93     } elsif ($nt == 5) {
94     unshift @nodes, @{$node->child_nodes};
95     }
96     }
97     return ($new_todos);
98     };
99    
100     ## Text
101     my $HTMLTextChecker = sub {
102     my ($self, $todo) = @_;
103     my $el = $todo->{node};
104     my $new_todos = [];
105     my @nodes = (@{$el->child_nodes});
106    
107     while (@nodes) {
108     my $node = shift @nodes;
109     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
110    
111     my $nt = $node->node_type;
112     if ($nt == 1) {
113 wakaba 1.8 my $node_ns = $node->namespace_uri;
114     $node_ns = '' unless defined $node_ns;
115     my $node_ln = $node->manakai_local_name;
116     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
117     #
118     } else {
119     ## NOTE: |minuses| list is not checked since redundant
120     $self->{onerror}->(node => $node, type => 'element not allowed');
121     }
122 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
123     unshift @nodes, @$sib;
124     push @$new_todos, @$ch;
125     } elsif ($nt == 5) {
126     unshift @nodes, @{$node->child_nodes};
127     }
128     }
129     return ($new_todos);
130     };
131    
132     ## Zero or more |html:style| elements,
133     ## followed by zero or more block-level elements
134     my $HTMLStylableBlockChecker = sub {
135     my ($self, $todo) = @_;
136     my $el = $todo->{node};
137     my $new_todos = [];
138     my @nodes = (@{$el->child_nodes});
139    
140     my $has_non_style;
141     while (@nodes) {
142     my $node = shift @nodes;
143     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
144    
145     my $nt = $node->node_type;
146     if ($nt == 1) {
147     my $node_ns = $node->namespace_uri;
148     $node_ns = '' unless defined $node_ns;
149     my $node_ln = $node->manakai_local_name;
150     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
151     if ($node_ns eq $HTML_NS and $node_ln eq 'style') {
152     $not_allowed = 1 if $has_non_style or
153     not $node->has_attribute_ns (undef, 'scoped');
154     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
155     $has_non_style = 1;
156 wakaba 1.8 } elsif ($self->{pluses}->{$node_ns}->{$node_ln}) {
157     #
158 wakaba 1.1 } else {
159     $has_non_style = 1;
160     $not_allowed = 1;
161     }
162     $self->{onerror}->(node => $node, type => 'element not allowed')
163     if $not_allowed;
164     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
165     unshift @nodes, @$sib;
166     push @$new_todos, @$ch;
167     } elsif ($nt == 3 or $nt == 4) {
168     if ($node->data =~ /[^\x09-\x0D\x20]/) {
169     $self->{onerror}->(node => $node, type => 'character not allowed');
170     }
171     } elsif ($nt == 5) {
172     unshift @nodes, @{$node->child_nodes};
173     }
174     }
175     return ($new_todos);
176     }; # $HTMLStylableBlockChecker
177    
178     ## Zero or more block-level elements
179     my $HTMLBlockChecker = sub {
180     my ($self, $todo) = @_;
181     my $el = $todo->{node};
182     my $new_todos = [];
183     my @nodes = (@{$el->child_nodes});
184    
185     while (@nodes) {
186     my $node = shift @nodes;
187     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
188    
189     my $nt = $node->node_type;
190     if ($nt == 1) {
191     my $node_ns = $node->namespace_uri;
192     $node_ns = '' unless defined $node_ns;
193     my $node_ln = $node->manakai_local_name;
194     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
195     $not_allowed = 1
196 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
197     $self->{pluses}->{$node_ns}->{$node_ln};
198 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
199     if $not_allowed;
200     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
201     unshift @nodes, @$sib;
202     push @$new_todos, @$ch;
203     } elsif ($nt == 3 or $nt == 4) {
204     if ($node->data =~ /[^\x09-\x0D\x20]/) {
205     $self->{onerror}->(node => $node, type => 'character not allowed');
206     }
207     } elsif ($nt == 5) {
208     unshift @nodes, @{$node->child_nodes};
209     }
210     }
211     return ($new_todos);
212     }; # $HTMLBlockChecker
213    
214     ## Inline-level content
215     my $HTMLInlineChecker = sub {
216     my ($self, $todo) = @_;
217     my $el = $todo->{node};
218     my $new_todos = [];
219     my @nodes = (@{$el->child_nodes});
220    
221     while (@nodes) {
222     my $node = shift @nodes;
223     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
224    
225     my $nt = $node->node_type;
226     if ($nt == 1) {
227     my $node_ns = $node->namespace_uri;
228     $node_ns = '' unless defined $node_ns;
229     my $node_ln = $node->manakai_local_name;
230     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
231     $not_allowed = 1
232     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
233 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
234     $self->{pluses}->{$node_ns}->{$node_ln};
235 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
236     if $not_allowed;
237     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
238     unshift @nodes, @$sib;
239     push @$new_todos, @$ch;
240     } elsif ($nt == 5) {
241     unshift @nodes, @{$node->child_nodes};
242     }
243     }
244    
245     for (@$new_todos) {
246     $_->{inline} = 1;
247     }
248     return ($new_todos);
249     }; # $HTMLInlineChecker
250    
251     ## Strictly inline-level content
252     my $HTMLStrictlyInlineChecker = sub {
253     my ($self, $todo) = @_;
254     my $el = $todo->{node};
255     my $new_todos = [];
256     my @nodes = (@{$el->child_nodes});
257    
258     while (@nodes) {
259     my $node = shift @nodes;
260     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
261    
262     my $nt = $node->node_type;
263     if ($nt == 1) {
264     my $node_ns = $node->namespace_uri;
265     $node_ns = '' unless defined $node_ns;
266     my $node_ln = $node->manakai_local_name;
267     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
268     $not_allowed = 1
269 wakaba 1.8 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
270     $self->{pluses}->{$node_ns}->{$node_ln};
271 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
272     if $not_allowed;
273     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
274     unshift @nodes, @$sib;
275     push @$new_todos, @$ch;
276     } elsif ($nt == 5) {
277     unshift @nodes, @{$node->child_nodes};
278     }
279     }
280    
281     for (@$new_todos) {
282     $_->{inline} = 1;
283     $_->{strictly_inline} = 1;
284     }
285     return ($new_todos);
286     }; # $HTMLStrictlyInlineChecker
287    
288 wakaba 1.8 ## Inline-level or strictly inline-level content
289 wakaba 1.1 my $HTMLInlineOrStrictlyInlineChecker = sub {
290     my ($self, $todo) = @_;
291     my $el = $todo->{node};
292     my $new_todos = [];
293     my @nodes = (@{$el->child_nodes});
294    
295     while (@nodes) {
296     my $node = shift @nodes;
297     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
298    
299     my $nt = $node->node_type;
300     if ($nt == 1) {
301     my $node_ns = $node->namespace_uri;
302     $node_ns = '' unless defined $node_ns;
303     my $node_ln = $node->manakai_local_name;
304     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
305     if ($todo->{strictly_inline}) {
306     $not_allowed = 1
307 wakaba 1.8 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
308     $self->{pluses}->{$node_ns}->{$node_ln};
309 wakaba 1.1 } else {
310     $not_allowed = 1
311     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
312 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
313     $self->{pluses}->{$node_ns}->{$node_ln};
314 wakaba 1.1 }
315     $self->{onerror}->(node => $node, type => 'element not allowed')
316     if $not_allowed;
317     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
318     unshift @nodes, @$sib;
319     push @$new_todos, @$ch;
320     } elsif ($nt == 5) {
321     unshift @nodes, @{$node->child_nodes};
322     }
323     }
324    
325     for (@$new_todos) {
326     $_->{inline} = 1;
327     $_->{strictly_inline} = 1;
328     }
329     return ($new_todos);
330     }; # $HTMLInlineOrStrictlyInlineChecker
331    
332 wakaba 1.13 ## TODO: New "significant" SHOULD-level requirement (revision 1114).
333 wakaba 1.1
334 wakaba 1.8 ## Block-level content or inline-level content (i.e. bimorphic content model)
335 wakaba 1.1 my $HTMLBlockOrInlineChecker = sub {
336     my ($self, $todo) = @_;
337     my $el = $todo->{node};
338     my $new_todos = [];
339     my @nodes = (@{$el->child_nodes});
340    
341     my $content = 'block-or-inline'; # or 'block' or 'inline'
342     my @block_not_inline;
343     while (@nodes) {
344     my $node = shift @nodes;
345     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
346    
347 wakaba 1.8 ## ISSUE: It is unclear whether "<rule><div><p/><nest/></div></rule>"
348     ## is conforming or not.
349    
350 wakaba 1.1 my $nt = $node->node_type;
351     if ($nt == 1) {
352     my $node_ns = $node->namespace_uri;
353     $node_ns = '' unless defined $node_ns;
354     my $node_ln = $node->manakai_local_name;
355     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
356     if ($content eq 'block') {
357     $not_allowed = 1
358 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
359     $self->{pluses}->{$node_ns}->{$node_ln};
360 wakaba 1.1 } elsif ($content eq 'inline') {
361     $not_allowed = 1
362     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
363 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
364     $self->{pluses}->{$node_ns}->{$node_ln};
365 wakaba 1.1 } else {
366     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
367     my $is_inline
368     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
369     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
370    
371     push @block_not_inline, $node
372     if $is_block and not $is_inline and not $not_allowed;
373 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
374 wakaba 1.1 $content = 'inline';
375     for (@block_not_inline) {
376     $self->{onerror}->(node => $_, type => 'element not allowed');
377     }
378     $not_allowed = 1 unless $is_inline;
379     }
380     }
381     $self->{onerror}->(node => $node, type => 'element not allowed')
382     if $not_allowed;
383     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
384     unshift @nodes, @$sib;
385     push @$new_todos, @$ch;
386     } elsif ($nt == 3 or $nt == 4) {
387     if ($node->data =~ /[^\x09-\x0D\x20]/) {
388     if ($content eq 'block') {
389     $self->{onerror}->(node => $node, type => 'character not allowed');
390     } else {
391     $content = 'inline';
392     for (@block_not_inline) {
393     $self->{onerror}->(node => $_, type => 'element not allowed');
394     }
395     }
396     }
397     } elsif ($nt == 5) {
398     unshift @nodes, @{$node->child_nodes};
399     }
400     }
401    
402     if ($content eq 'inline') {
403     for (@$new_todos) {
404     $_->{inline} = 1;
405     }
406     }
407     return ($new_todos);
408     };
409    
410     ## Zero or more XXX element, then either block-level or inline-level
411     my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) {
412     my ($elnsuri, $ellname) = @_;
413     return sub {
414     my ($self, $todo) = @_;
415     my $el = $todo->{node};
416     my $new_todos = [];
417     my @nodes = (@{$el->child_nodes});
418    
419     my $has_non_style;
420     my $content = 'block-or-inline'; # or 'block' or 'inline'
421     my @block_not_inline;
422     while (@nodes) {
423     my $node = shift @nodes;
424     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
425    
426     my $nt = $node->node_type;
427     if ($nt == 1) {
428     my $node_ns = $node->namespace_uri;
429     $node_ns = '' unless defined $node_ns;
430     my $node_ln = $node->manakai_local_name;
431     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
432     if ($node_ns eq $elnsuri and $node_ln eq $ellname) {
433     $not_allowed = 1 if $has_non_style;
434     if ($ellname eq 'style' and
435     not $node->has_attribute_ns (undef, 'scoped')) {
436     $not_allowed = 1;
437     }
438     } elsif ($content eq 'block') {
439     $has_non_style = 1;
440     $not_allowed = 1
441 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
442     $self->{pluses}->{$node_ns}->{$node_ln};
443 wakaba 1.1 } elsif ($content eq 'inline') {
444     $has_non_style = 1;
445     $not_allowed = 1
446     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
447 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
448     $self->{pluses}->{$node_ns}->{$node_ln};
449 wakaba 1.1 } else {
450 wakaba 1.8 $has_non_style = 1 unless $self->{pluses}->{$node_ns}->{$node_ln};
451 wakaba 1.1 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
452     my $is_inline
453     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
454     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
455    
456     push @block_not_inline, $node
457     if $is_block and not $is_inline and not $not_allowed;
458 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
459 wakaba 1.1 $content = 'inline';
460     for (@block_not_inline) {
461     $self->{onerror}->(node => $_, type => 'element not allowed');
462     }
463     $not_allowed = 1 unless $is_inline;
464     }
465     }
466     $self->{onerror}->(node => $node, type => 'element not allowed')
467     if $not_allowed;
468     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
469     unshift @nodes, @$sib;
470     push @$new_todos, @$ch;
471     } elsif ($nt == 3 or $nt == 4) {
472     if ($node->data =~ /[^\x09-\x0D\x20]/) {
473     $has_non_style = 1;
474     if ($content eq 'block') {
475     $self->{onerror}->(node => $node, type => 'character not allowed');
476     } else {
477     $content = 'inline';
478     for (@block_not_inline) {
479     $self->{onerror}->(node => $_, type => 'element not allowed');
480     }
481     }
482     }
483     } elsif ($nt == 5) {
484     unshift @nodes, @{$node->child_nodes};
485     }
486     }
487    
488     if ($content eq 'inline') {
489     for (@$new_todos) {
490     $_->{inline} = 1;
491     }
492     }
493     return ($new_todos);
494     };
495     }; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker
496    
497     my $HTMLTransparentChecker = $HTMLBlockOrInlineChecker;
498    
499     our $AttrChecker;
500    
501     my $GetHTMLEnumeratedAttrChecker = sub {
502     my $states = shift; # {value => conforming ? 1 : -1}
503     return sub {
504     my ($self, $attr) = @_;
505     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
506     if ($states->{$value} > 0) {
507     #
508     } elsif ($states->{$value}) {
509     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
510     } else {
511     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
512     }
513     };
514     }; # $GetHTMLEnumeratedAttrChecker
515    
516     my $GetHTMLBooleanAttrChecker = sub {
517     my $local_name = shift;
518     return sub {
519     my ($self, $attr) = @_;
520     my $value = $attr->value;
521     unless ($value eq $local_name or $value eq '') {
522     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
523     }
524     };
525     }; # $GetHTMLBooleanAttrChecker
526    
527 wakaba 1.8 ## Unordered set of space-separated tokens
528 wakaba 1.18 my $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
529 wakaba 1.8 my ($self, $attr) = @_;
530     my %word;
531     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
532     unless ($word{$word}) {
533     $word{$word} = 1;
534     } else {
535     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
536     }
537     }
538 wakaba 1.18 }; # $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
539 wakaba 1.8
540 wakaba 1.1 ## |rel| attribute (unordered set of space separated tokens,
541     ## whose allowed values are defined by the section on link types)
542     my $HTMLLinkTypesAttrChecker = sub {
543 wakaba 1.4 my ($a_or_area, $todo, $self, $attr) = @_;
544 wakaba 1.1 my %word;
545     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
546     unless ($word{$word}) {
547     $word{$word} = 1;
548 wakaba 1.18 } elsif ($word eq 'up') {
549     #
550 wakaba 1.1 } else {
551     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
552     }
553     }
554     ## NOTE: Case sensitive match (since HTML5 spec does not say link
555     ## types are case-insensitive and it says "The value should not
556     ## be confusingly similar to any other defined value (e.g.
557     ## differing only in case).").
558     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
559     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
560     ## values to be used conformingly.
561     require Whatpm::_LinkTypeList;
562     our $LinkType;
563     for my $word (keys %word) {
564     my $def = $LinkType->{$word};
565     if (defined $def) {
566     if ($def->{status} eq 'accepted') {
567     if (defined $def->{effect}->[$a_or_area]) {
568     #
569     } else {
570     $self->{onerror}->(node => $attr,
571     type => 'link type:bad context:'.$word);
572     }
573     } elsif ($def->{status} eq 'proposal') {
574     $self->{onerror}->(node => $attr, level => 's',
575     type => 'link type:proposed:'.$word);
576 wakaba 1.20 if (defined $def->{effect}->[$a_or_area]) {
577     #
578     } else {
579     $self->{onerror}->(node => $attr,
580     type => 'link type:bad context:'.$word);
581     }
582 wakaba 1.1 } else { # rejected or synonym
583     $self->{onerror}->(node => $attr,
584     type => 'link type:non-conforming:'.$word);
585     }
586 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
587     if ($word eq 'alternate') {
588     #
589     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
590     $todo->{has_hyperlink_link_type} = 1;
591     }
592     }
593 wakaba 1.1 if ($def->{unique}) {
594     unless ($self->{has_link_type}->{$word}) {
595     $self->{has_link_type}->{$word} = 1;
596     } else {
597     $self->{onerror}->(node => $attr,
598     type => 'link type:duplicate:'.$word);
599     }
600     }
601     } else {
602     $self->{onerror}->(node => $attr, level => 'unsupported',
603     type => 'link type:'.$word);
604     }
605     }
606 wakaba 1.4 $todo->{has_hyperlink_link_type} = 1
607     if $word{alternate} and not $word{stylesheet};
608 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
609     ## says that using both X-Pingback: header field and HTML
610     ## <link rel=pingback> is deprecated and if both appears they
611     ## SHOULD contain exactly the same value.
612     ## ISSUE: Pingback 1.0 specification defines the exact representation
613     ## of its link element, which cannot be tested by the current arch.
614     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
615     ## include any string that matches to the pattern for the rel=pingback link,
616     ## which again inpossible to test.
617     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
618 wakaba 1.12
619     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
620 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
621     ## then they SHOULD be described in different paragraphs.".
622 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
623 wakaba 1.20
624     ## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status."
625 wakaba 1.1
626     ## URI (or IRI)
627     my $HTMLURIAttrChecker = sub {
628     my ($self, $attr) = @_;
629     ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
630     my $value = $attr->value;
631     Whatpm::URIChecker->check_iri_reference ($value, sub {
632     my %opt = @_;
633     $self->{onerror}->(node => $attr, level => $opt{level},
634     type => 'URI::'.$opt{type}.
635     (defined $opt{position} ? ':'.$opt{position} : ''));
636     });
637 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
638 wakaba 1.1 }; # $HTMLURIAttrChecker
639    
640     ## A space separated list of one or more URIs (or IRIs)
641     my $HTMLSpaceURIsAttrChecker = sub {
642     my ($self, $attr) = @_;
643     my $i = 0;
644     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
645     Whatpm::URIChecker->check_iri_reference ($value, sub {
646     my %opt = @_;
647     $self->{onerror}->(node => $attr, level => $opt{level},
648 wakaba 1.2 type => 'URIs:'.':'.
649     $opt{type}.':'.$i.
650 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
651     });
652     $i++;
653     }
654     ## ISSUE: Relative references?
655     ## ISSUE: Leading or trailing white spaces are conformant?
656     ## ISSUE: A sequence of white space characters are conformant?
657     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
658     ## NOTE: Duplication seems not an error.
659 wakaba 1.4 $self->{has_uri_attr} = 1;
660 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
661    
662     my $HTMLDatetimeAttrChecker = sub {
663     my ($self, $attr) = @_;
664     my $value = $attr->value;
665     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
666     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
667     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
668     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
669     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
670     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
671     if $d < 1 or
672     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
673     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
674     if $M == 2 and $d == 29 and
675     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
676     } else {
677     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
678     }
679     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
680     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
681     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
682     if defined $s and $s > 59;
683     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
684     if $zh > 23;
685     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
686     if $zm > 59;
687     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
688     } else {
689     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
690     }
691     }; # $HTMLDatetimeAttrChecker
692    
693     my $HTMLIntegerAttrChecker = sub {
694     my ($self, $attr) = @_;
695     my $value = $attr->value;
696     unless ($value =~ /\A-?[0-9]+\z/) {
697     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
698     }
699     }; # $HTMLIntegerAttrChecker
700    
701     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
702     my $range_check = shift;
703     return sub {
704     my ($self, $attr) = @_;
705     my $value = $attr->value;
706     if ($value =~ /\A[0-9]+\z/) {
707     unless ($range_check->($value + 0)) {
708     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
709     }
710     } else {
711     $self->{onerror}->(node => $attr,
712     type => 'nninteger:syntax error');
713     }
714     };
715     }; # $GetHTMLNonNegativeIntegerAttrChecker
716    
717     my $GetHTMLFloatingPointNumberAttrChecker = sub {
718     my $range_check = shift;
719     return sub {
720     my ($self, $attr) = @_;
721     my $value = $attr->value;
722     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
723     unless ($range_check->($value + 0)) {
724     $self->{onerror}->(node => $attr, type => 'float:out of range');
725     }
726     } else {
727     $self->{onerror}->(node => $attr,
728     type => 'float:syntax error');
729     }
730     };
731     }; # $GetHTMLFloatingPointNumberAttrChecker
732    
733     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
734     ## ISSUE: RFC 2046 does not define syntax of media types.
735     ## ISSUE: The definition of "a valid MIME type" is unknown.
736     ## Syntactical correctness?
737     my $HTMLIMTAttrChecker = sub {
738     my ($self, $attr) = @_;
739     my $value = $attr->value;
740     ## ISSUE: RFC 2045 Content-Type header field allows insertion
741     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
742     ## ISSUE: RFC 2231 extension? Maybe no.
743     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
744     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
745     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
746     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
747     my @type = ($1, $2);
748     my $param = $3;
749     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
750     if (defined $2) {
751     push @type, $1 => $2;
752     } else {
753     my $n = $1;
754     my $v = $2;
755     $v =~ s/\\(.)/$1/gs;
756     push @type, $n => $v;
757     }
758     }
759     require Whatpm::IMTChecker;
760     Whatpm::IMTChecker->check_imt (sub {
761     my %opt = @_;
762     $self->{onerror}->(node => $attr, level => $opt{level},
763     type => 'IMT:'.$opt{type});
764     }, @type);
765     } else {
766     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
767     }
768     }; # $HTMLIMTAttrChecker
769    
770     my $HTMLLanguageTagAttrChecker = sub {
771 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
772    
773 wakaba 1.1 my ($self, $attr) = @_;
774 wakaba 1.6 my $value = $attr->value;
775     require Whatpm::LangTag;
776     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
777     my %opt = @_;
778     my $type = 'LangTag:'.$opt{type};
779     $type .= ':' . $opt{subtag} if defined $opt{subtag};
780     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
781     level => $opt{level});
782     });
783 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
784 wakaba 1.6
785     ## TODO: testdata
786 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
787    
788     ## "A valid media query [MQ]"
789     my $HTMLMQAttrChecker = sub {
790     my ($self, $attr) = @_;
791     $self->{onerror}->(node => $attr, level => 'unsupported',
792     type => 'media query');
793     ## ISSUE: What is "a valid media query"?
794     }; # $HTMLMQAttrChecker
795    
796     my $HTMLEventHandlerAttrChecker = sub {
797     my ($self, $attr) = @_;
798     $self->{onerror}->(node => $attr, level => 'unsupported',
799     type => 'event handler');
800     ## TODO: MUST contain valid ECMAScript code matching the
801     ## ECMAScript |FunctionBody| production. [ECMA262]
802     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
803     ## ISSUE: Automatic semicolon insertion does not apply?
804     ## ISSUE: Other script languages?
805     }; # $HTMLEventHandlerAttrChecker
806    
807     my $HTMLUsemapAttrChecker = sub {
808     my ($self, $attr) = @_;
809     ## MUST be a valid hashed ID reference to a |map| element
810     my $value = $attr->value;
811     if ($value =~ s/^#//) {
812     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
813     push @{$self->{usemap}}, [$value => $attr];
814     } else {
815     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
816     }
817     ## NOTE: Space characters in hashed ID references are conforming.
818     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
819     }; # $HTMLUsemapAttrChecker
820    
821     my $HTMLTargetAttrChecker = sub {
822     my ($self, $attr) = @_;
823     my $value = $attr->value;
824     if ($value =~ /^_/) {
825     $value = lc $value; ## ISSUE: ASCII case-insentitive?
826     unless ({
827     _self => 1, _parent => 1, _top => 1,
828     }->{$value}) {
829     $self->{onerror}->(node => $attr,
830     type => 'reserved browsing context name');
831     }
832     } else {
833     #$ ISSUE: An empty string is conforming?
834     }
835     }; # $HTMLTargetAttrChecker
836    
837 wakaba 1.23 my $HTMLSelectorsAttrChecker = sub {
838     my ($self, $attr) = @_;
839    
840     ## ISSUE: Namespace resolution?
841    
842     my $value = $attr->value;
843    
844     require Whatpm::CSS::SelectorsParser;
845     my $p = Whatpm::CSS::SelectorsParser->new;
846     $p->{pseudo_class}->{$_} = 1 for qw/
847     active checked disabled empty enabled first-child first-of-type
848     focus hover indeterminate last-child last-of-type link only-child
849     only-of-type root target visited
850     lang nth-child nth-last-child nth-of-type nth-last-of-type not
851     -manakai-contains -manakai-current
852     /;
853    
854     $p->{pseudo_element}->{$_} = 1 for qw/
855     after before first-letter first-line
856     /;
857    
858     $p->{must_level} = $self->{must_level};
859     $p->{onerror} = sub {
860     my %opt = @_;
861     $opt{type} = 'selectors:'.$opt{type};
862     $self->{onerror}->(%opt, node => $attr);
863     };
864     $p->parse_string ($value);
865     }; # $HTMLSelectorsAttrChecker
866    
867 wakaba 1.1 my $HTMLAttrChecker = {
868     id => sub {
869     ## NOTE: |map| has its own variant of |id=""| checker
870     my ($self, $attr) = @_;
871     my $value = $attr->value;
872     if (length $value > 0) {
873     if ($self->{id}->{$value}) {
874     $self->{onerror}->(node => $attr, type => 'duplicate ID');
875     push @{$self->{id}->{$value}}, $attr;
876     } else {
877     $self->{id}->{$value} = [$attr];
878     }
879     if ($value =~ /[\x09-\x0D\x20]/) {
880     $self->{onerror}->(node => $attr, type => 'space in ID');
881     }
882     } else {
883     ## NOTE: MUST contain at least one character
884     $self->{onerror}->(node => $attr, type => 'empty attribute value');
885     }
886     },
887     title => sub {}, ## NOTE: No conformance creteria
888     lang => sub {
889     my ($self, $attr) = @_;
890 wakaba 1.6 my $value = $attr->value;
891     if ($value eq '') {
892     #
893     } else {
894     require Whatpm::LangTag;
895     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
896     my %opt = @_;
897     my $type = 'LangTag:'.$opt{type};
898     $type .= ':' . $opt{subtag} if defined $opt{subtag};
899     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
900     level => $opt{level});
901     });
902     }
903 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
904     unless ($attr->owner_document->manakai_is_html) {
905     $self->{onerror}->(node => $attr, type => 'in XML:lang');
906     }
907 wakaba 1.6
908     ## TODO: test data
909 wakaba 1.1 },
910     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
911     class => sub {
912     my ($self, $attr) = @_;
913     my %word;
914     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
915     unless ($word{$word}) {
916     $word{$word} = 1;
917     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
918     } else {
919     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
920     }
921     }
922     },
923     contextmenu => sub {
924     my ($self, $attr) = @_;
925     my $value = $attr->value;
926     push @{$self->{contextmenu}}, [$value => $attr];
927     ## ISSUE: "The value must be the ID of a menu element in the DOM."
928     ## What is "in the DOM"? A menu Element node that is not part
929     ## of the Document tree is in the DOM? A menu Element node that
930     ## belong to another Document tree is in the DOM?
931     },
932     irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
933 wakaba 1.8 tabindex => $HTMLIntegerAttrChecker
934     ## TODO: ref, template, registrationmark
935 wakaba 1.1 };
936    
937     for (qw/
938     onabort onbeforeunload onblur onchange onclick oncontextmenu
939     ondblclick ondrag ondragend ondragenter ondragleave ondragover
940     ondragstart ondrop onerror onfocus onkeydown onkeypress
941     onkeyup onload onmessage onmousedown onmousemove onmouseout
942     onmouseover onmouseup onmousewheel onresize onscroll onselect
943     onsubmit onunload
944     /) {
945     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
946     }
947    
948     my $GetHTMLAttrsChecker = sub {
949     my $element_specific_checker = shift;
950     return sub {
951     my ($self, $todo) = @_;
952     for my $attr (@{$todo->{node}->attributes}) {
953     my $attr_ns = $attr->namespace_uri;
954     $attr_ns = '' unless defined $attr_ns;
955     my $attr_ln = $attr->manakai_local_name;
956     my $checker;
957     if ($attr_ns eq '') {
958     $checker = $element_specific_checker->{$attr_ln}
959     || $HTMLAttrChecker->{$attr_ln};
960     }
961     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
962     || $AttrChecker->{$attr_ns}->{''};
963     if ($checker) {
964     $checker->($self, $attr, $todo);
965     } else {
966     $self->{onerror}->(node => $attr, level => 'unsupported',
967     type => 'attribute');
968     ## ISSUE: No comformance createria for unknown attributes in the spec
969     }
970     }
971     };
972     }; # $GetHTMLAttrsChecker
973    
974     our $Element;
975     our $ElementDefault;
976     our $AnyChecker;
977    
978     $Element->{$HTML_NS}->{''} = {
979     attrs_checker => $GetHTMLAttrsChecker->({}),
980     checker => $ElementDefault->{checker},
981     };
982    
983     $Element->{$HTML_NS}->{html} = {
984     is_root => 1,
985     attrs_checker => $GetHTMLAttrsChecker->({
986 wakaba 1.16 manifest => $HTMLURIAttrChecker,
987 wakaba 1.1 xmlns => sub {
988     my ($self, $attr) = @_;
989     my $value = $attr->value;
990     unless ($value eq $HTML_NS) {
991     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
992     }
993     unless ($attr->owner_document->manakai_is_html) {
994     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
995     ## TODO: Test
996     }
997     },
998     }),
999     checker => sub {
1000     my ($self, $todo) = @_;
1001     my $el = $todo->{node};
1002     my $new_todos = [];
1003     my @nodes = (@{$el->child_nodes});
1004    
1005     my $phase = 'before head';
1006     while (@nodes) {
1007     my $node = shift @nodes;
1008     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1009    
1010     my $nt = $node->node_type;
1011     if ($nt == 1) {
1012     my $node_ns = $node->namespace_uri;
1013     $node_ns = '' unless defined $node_ns;
1014     my $node_ln = $node->manakai_local_name;
1015     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
1016 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1017     #
1018     } elsif ($phase eq 'before head') {
1019 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'head') {
1020     $phase = 'after head';
1021     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'body') {
1022     $self->{onerror}->(node => $node, type => 'ps element missing:head');
1023     $phase = 'after body';
1024     } else {
1025     $not_allowed = 1;
1026     # before head
1027     }
1028     } elsif ($phase eq 'after head') {
1029     if ($node_ns eq $HTML_NS and $node_ln eq 'body') {
1030     $phase = 'after body';
1031     } else {
1032     $not_allowed = 1;
1033     # after head
1034     }
1035     } else { #elsif ($phase eq 'after body') {
1036     $not_allowed = 1;
1037     # after body
1038     }
1039     $self->{onerror}->(node => $node, type => 'element not allowed')
1040     if $not_allowed;
1041     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1042     unshift @nodes, @$sib;
1043     push @$new_todos, @$ch;
1044     } elsif ($nt == 3 or $nt == 4) {
1045     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1046     $self->{onerror}->(node => $node, type => 'character not allowed');
1047     }
1048     } elsif ($nt == 5) {
1049     unshift @nodes, @{$node->child_nodes};
1050     }
1051     }
1052    
1053     if ($phase eq 'before head') {
1054     $self->{onerror}->(node => $el, type => 'child element missing:head');
1055     $self->{onerror}->(node => $el, type => 'child element missing:body');
1056     } elsif ($phase eq 'after head') {
1057     $self->{onerror}->(node => $el, type => 'child element missing:body');
1058     }
1059    
1060     return ($new_todos);
1061     },
1062     };
1063    
1064     $Element->{$HTML_NS}->{head} = {
1065     attrs_checker => $GetHTMLAttrsChecker->({}),
1066     checker => sub {
1067     my ($self, $todo) = @_;
1068     my $el = $todo->{node};
1069     my $new_todos = [];
1070     my @nodes = (@{$el->child_nodes});
1071    
1072     my $has_title;
1073     my $phase = 'initial'; # 'after charset', 'after base'
1074     while (@nodes) {
1075     my $node = shift @nodes;
1076     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1077    
1078     my $nt = $node->node_type;
1079     if ($nt == 1) {
1080     my $node_ns = $node->namespace_uri;
1081     $node_ns = '' unless defined $node_ns;
1082     my $node_ln = $node->manakai_local_name;
1083     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
1084 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1085     #
1086     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'title') {
1087 wakaba 1.1 $phase = 'after base';
1088     unless ($has_title) {
1089     $has_title = 1;
1090     } else {
1091     $not_allowed = 1;
1092     }
1093     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'meta') {
1094     if ($node->has_attribute_ns (undef, 'charset')) {
1095     if ($phase eq 'initial') {
1096     $phase = 'after charset';
1097     } else {
1098     $not_allowed = 1;
1099     ## NOTE: See also |base|'s "contexts" field in the spec
1100     }
1101 wakaba 1.5 } elsif ($node->has_attribute_ns (undef, 'name') or
1102     $node->has_attribute_ns (undef, 'http-equiv')) {
1103     $phase = 'after base';
1104 wakaba 1.1 } else {
1105     $phase = 'after base';
1106 wakaba 1.5 $not_allowed = 1;
1107 wakaba 1.1 }
1108     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'base') {
1109     if ($phase eq 'initial' or $phase eq 'after charset') {
1110     $phase = 'after base';
1111     } else {
1112     $not_allowed = 1;
1113     }
1114     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'style') {
1115     $phase = 'after base';
1116     if ($node->has_attribute_ns (undef, 'scoped')) {
1117     $not_allowed = 1;
1118     }
1119     } elsif ($HTMLMetadataElements->{$node_ns}->{$node_ln}) {
1120     $phase = 'after base';
1121     } else {
1122     $not_allowed = 1;
1123     }
1124     $self->{onerror}->(node => $node, type => 'element not allowed')
1125     if $not_allowed;
1126 wakaba 1.3 local $todo->{flag}->{in_head} = 1;
1127 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1128     unshift @nodes, @$sib;
1129     push @$new_todos, @$ch;
1130     } elsif ($nt == 3 or $nt == 4) {
1131     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1132     $self->{onerror}->(node => $node, type => 'character not allowed');
1133     }
1134     } elsif ($nt == 5) {
1135     unshift @nodes, @{$node->child_nodes};
1136     }
1137     }
1138     unless ($has_title) {
1139     $self->{onerror}->(node => $el, type => 'child element missing:title');
1140     }
1141     return ($new_todos);
1142     },
1143     };
1144    
1145     $Element->{$HTML_NS}->{title} = {
1146     attrs_checker => $GetHTMLAttrsChecker->({}),
1147     checker => $HTMLTextChecker,
1148     };
1149    
1150     $Element->{$HTML_NS}->{base} = {
1151 wakaba 1.4 attrs_checker => sub {
1152     my ($self, $todo) = @_;
1153    
1154 wakaba 1.14 my $has_href = $todo->{node}->has_attribute_ns (undef, 'href');
1155     my $has_target = $todo->{node}->has_attribute_ns (undef, 'target');
1156    
1157     if ($self->{has_uri_attr} and $has_href) {
1158 wakaba 1.4 ## ISSUE: Are these examples conforming?
1159     ## <head profile="a b c"><base href> (except for |profile|'s
1160     ## non-conformance)
1161     ## <title xml:base="relative"/><base href/> (maybe it should be)
1162     ## <unknown xmlns="relative"/><base href/> (assuming that
1163     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1164     ## <?xml-stylesheet href="relative"?>...<base href=""/>
1165     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1166     ## <style>@import 'relative';</style><base href>
1167     ## <script>location.href = 'relative';</script><base href>
1168 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
1169     ## an exception.
1170 wakaba 1.4 $self->{onerror}->(node => $todo->{node},
1171     type => 'basehref after URI attribute');
1172     }
1173 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
1174 wakaba 1.4 ## ISSUE: Are these examples conforming?
1175     ## <head><title xlink:href=""/><base target="name"/></head>
1176     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1177     ## (assuming that |xbl:xbl| is allowed before |base|)
1178     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1179     ## <link href=""/><base target="name"/>
1180     ## <link rel=unknown href=""><base target=name>
1181     $self->{onerror}->(node => $todo->{node},
1182     type => 'basetarget after hyperlink');
1183     }
1184    
1185 wakaba 1.14 if (not $has_href and not $has_target) {
1186     $self->{onerror}->(node => $todo->{node},
1187     type => 'attribute missing:href|target');
1188     }
1189    
1190 wakaba 1.4 return $GetHTMLAttrsChecker->({
1191     href => $HTMLURIAttrChecker,
1192     target => $HTMLTargetAttrChecker,
1193     })->($self, $todo);
1194     },
1195 wakaba 1.1 checker => $HTMLEmptyChecker,
1196     };
1197    
1198     $Element->{$HTML_NS}->{link} = {
1199     attrs_checker => sub {
1200     my ($self, $todo) = @_;
1201     $GetHTMLAttrsChecker->({
1202     href => $HTMLURIAttrChecker,
1203 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(0, $todo, @_) },
1204 wakaba 1.1 media => $HTMLMQAttrChecker,
1205     hreflang => $HTMLLanguageTagAttrChecker,
1206     type => $HTMLIMTAttrChecker,
1207     ## NOTE: Though |title| has special semantics,
1208     ## syntactically same as the |title| as global attribute.
1209     })->($self, $todo);
1210 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
1211     $self->{has_hyperlink_element} = 1 if $todo->{has_hyperlink_link_type};
1212     } else {
1213 wakaba 1.1 $self->{onerror}->(node => $todo->{node},
1214     type => 'attribute missing:href');
1215     }
1216     unless ($todo->{node}->has_attribute_ns (undef, 'rel')) {
1217     $self->{onerror}->(node => $todo->{node},
1218     type => 'attribute missing:rel');
1219     }
1220     },
1221     checker => $HTMLEmptyChecker,
1222     };
1223    
1224     $Element->{$HTML_NS}->{meta} = {
1225     attrs_checker => sub {
1226     my ($self, $todo) = @_;
1227     my $name_attr;
1228     my $http_equiv_attr;
1229     my $charset_attr;
1230     my $content_attr;
1231     for my $attr (@{$todo->{node}->attributes}) {
1232     my $attr_ns = $attr->namespace_uri;
1233     $attr_ns = '' unless defined $attr_ns;
1234     my $attr_ln = $attr->manakai_local_name;
1235     my $checker;
1236     if ($attr_ns eq '') {
1237     if ($attr_ln eq 'content') {
1238     $content_attr = $attr;
1239     $checker = 1;
1240     } elsif ($attr_ln eq 'name') {
1241     $name_attr = $attr;
1242     $checker = 1;
1243     } elsif ($attr_ln eq 'http-equiv') {
1244     $http_equiv_attr = $attr;
1245     $checker = 1;
1246     } elsif ($attr_ln eq 'charset') {
1247     $charset_attr = $attr;
1248     $checker = 1;
1249     } else {
1250     $checker = $HTMLAttrChecker->{$attr_ln}
1251     || $AttrChecker->{$attr_ns}->{$attr_ln}
1252     || $AttrChecker->{$attr_ns}->{''};
1253     }
1254     } else {
1255     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1256     || $AttrChecker->{$attr_ns}->{''};
1257     }
1258     if ($checker) {
1259     $checker->($self, $attr) if ref $checker;
1260     } else {
1261     $self->{onerror}->(node => $attr, level => 'unsupported',
1262     type => 'attribute');
1263     ## ISSUE: No comformance createria for unknown attributes in the spec
1264     }
1265     }
1266    
1267     if (defined $name_attr) {
1268     if (defined $http_equiv_attr) {
1269     $self->{onerror}->(node => $http_equiv_attr,
1270     type => 'attribute not allowed');
1271     } elsif (defined $charset_attr) {
1272     $self->{onerror}->(node => $charset_attr,
1273     type => 'attribute not allowed');
1274     }
1275     my $metadata_name = $name_attr->value;
1276     my $metadata_value;
1277     if (defined $content_attr) {
1278     $metadata_value = $content_attr->value;
1279     } else {
1280     $self->{onerror}->(node => $todo->{node},
1281     type => 'attribute missing:content');
1282     $metadata_value = '';
1283     }
1284     } elsif (defined $http_equiv_attr) {
1285     if (defined $charset_attr) {
1286     $self->{onerror}->(node => $charset_attr,
1287     type => 'attribute not allowed');
1288     }
1289     unless (defined $content_attr) {
1290     $self->{onerror}->(node => $todo->{node},
1291     type => 'attribute missing:content');
1292     }
1293     } elsif (defined $charset_attr) {
1294     if (defined $content_attr) {
1295     $self->{onerror}->(node => $content_attr,
1296     type => 'attribute not allowed');
1297     }
1298     } else {
1299     if (defined $content_attr) {
1300     $self->{onerror}->(node => $content_attr,
1301     type => 'attribute not allowed');
1302     $self->{onerror}->(node => $todo->{node},
1303     type => 'attribute missing:name|http-equiv');
1304     } else {
1305     $self->{onerror}->(node => $todo->{node},
1306     type => 'attribute missing:name|http-equiv|charset');
1307     }
1308     }
1309    
1310     ## TODO: metadata conformance
1311    
1312     ## TODO: pragma conformance
1313     if (defined $http_equiv_attr) { ## An enumerated attribute
1314     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1315     if ({
1316     'refresh' => 1,
1317     'default-style' => 1,
1318     }->{$keyword}) {
1319     #
1320 wakaba 1.19 } elsif ($keyword eq 'content-type') {
1321     $self->{onerror}
1322     ->(node => $http_equiv_attr,
1323     type => 'enumerated:invalid:http-equiv:content-type');
1324 wakaba 1.1 } else {
1325     $self->{onerror}->(node => $http_equiv_attr,
1326     type => 'enumerated:invalid');
1327     }
1328     }
1329    
1330     if (defined $charset_attr) {
1331     unless ($todo->{node}->owner_document->manakai_is_html) {
1332     $self->{onerror}->(node => $charset_attr,
1333     type => 'in XML:charset');
1334     }
1335 wakaba 1.21
1336     my $charset_value = $charset_attr->value;
1337     ## NOTE: Though the case-sensitivility of |charset| attribute value
1338     ## is not explicitly spelled in the HTML5 spec, the Character Set
1339     ## registry of IANA, which is referenced from HTML5 spec, says that
1340     ## charset name is case-insensitive.
1341     $charset_value =~ tr/A-Z/a-z/; ## NOTE: ASCII Case-insensitive.
1342    
1343     require Message::Charset::Info;
1344     my $charset = $Message::Charset::Info::IANACharset->{$charset_value};
1345     my $ic = $todo->{node}->owner_document->input_encoding;
1346     if (defined $ic) {
1347     ## TODO: Test for this case
1348     my $ic_charset = $Message::Charset::Info::IANACharset->{$ic};
1349     if ($charset ne $ic_charset) {
1350     $self->{onerror}->(node => $charset_attr,
1351     type => 'mismatched charset name:'.$ic.
1352     ':'.$charset_value,
1353     level => 'm');
1354     }
1355     } else {
1356     ## NOTE: MUST, but not checkable, since the document is not originally
1357     ## in serialized form (or the parser does not preserve the input
1358     ## encoding information).
1359     $self->{onerror}->(node => $charset_attr,
1360     type => 'mismatched charset name::'.$charset_value,
1361     level => 'unsupported');
1362     }
1363    
1364     ## ISSUE: What is "valid character encoding name"? Syntactically valid?
1365     ## Syntactically valid and registered? What about x-charset names?
1366     unless (Message::Charset::Info::is_syntactically_valid_iana_charset_name
1367     ($charset_value)) {
1368     $self->{onerror}->(node => $charset_attr,
1369     type => 'charset:syntax error:'.$charset_value,
1370     level => 'm');
1371     }
1372    
1373     if ($charset) {
1374     ## ISSUE: What is "the preferred name for that encoding" (for a charset
1375     ## with no "preferred MIME name" label)?
1376     my $charset_status = $charset->{iana_names}->{$charset_value} || 0;
1377     if (($charset_status &
1378     Message::Charset::Info::PREFERRED_CHARSET_NAME ())
1379     != Message::Charset::Info::PREFERRED_CHARSET_NAME ()) {
1380     $self->{onerror}->(node => $charset_attr,
1381     type => 'charset:not preferred:'.
1382     $charset_value,
1383     level => 'm');
1384     }
1385     if (($charset_status &
1386     Message::Charset::Info::REGISTERED_CHARSET_NAME ())
1387     != Message::Charset::Info::REGISTERED_CHARSET_NAME ()) {
1388     if ($charset_value =~ /^x-/) {
1389     $self->{onerror}->(node => $charset_attr,
1390     type => 'charset:private:'.$charset_value,
1391     level => $self->{good_level});
1392     } else {
1393     $self->{onerror}->(node => $charset_attr,
1394     type => 'charset:not registered:'.
1395     $charset_value,
1396     level => $self->{good_level});
1397     }
1398     }
1399     } elsif ($charset_value =~ /^x-/) {
1400     $self->{onerror}->(node => $charset_attr,
1401     type => 'charset:private:'.$charset_value,
1402     level => $self->{good_level});
1403     } else {
1404     $self->{onerror}->(node => $charset_attr,
1405     type => 'charset:not registered:'.$charset_value,
1406     level => $self->{good_level});
1407     }
1408    
1409 wakaba 1.22 if ($charset_attr->get_user_data ('manakai_has_reference')) {
1410     $self->{onerror}->(node => $charset_attr,
1411     type => 'character reference in charset',
1412     level => $self->{must_level});
1413     }
1414 wakaba 1.1 }
1415     },
1416     checker => $HTMLEmptyChecker,
1417     };
1418    
1419     $Element->{$HTML_NS}->{style} = {
1420     attrs_checker => $GetHTMLAttrsChecker->({
1421     type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1422     media => $HTMLMQAttrChecker,
1423     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1424     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1425     ## not different
1426     }),
1427     checker => sub {
1428     ## NOTE: |html:style| has no conformance creteria on content model
1429     my ($self, $todo) = @_;
1430     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
1431     $type = 'text/css' unless defined $type;
1432     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
1433     type => 'style:'.$type); ## TODO: $type normalization
1434     return $AnyChecker->($self, $todo);
1435     },
1436     };
1437    
1438     $Element->{$HTML_NS}->{body} = {
1439     attrs_checker => $GetHTMLAttrsChecker->({}),
1440     checker => $HTMLBlockChecker,
1441     };
1442    
1443     $Element->{$HTML_NS}->{section} = {
1444     attrs_checker => $GetHTMLAttrsChecker->({}),
1445     checker => $HTMLStylableBlockChecker,
1446     };
1447    
1448     $Element->{$HTML_NS}->{nav} = {
1449     attrs_checker => $GetHTMLAttrsChecker->({}),
1450     checker => $HTMLBlockOrInlineChecker,
1451     };
1452    
1453     $Element->{$HTML_NS}->{article} = {
1454     attrs_checker => $GetHTMLAttrsChecker->({}),
1455     checker => $HTMLStylableBlockChecker,
1456     };
1457    
1458     $Element->{$HTML_NS}->{blockquote} = {
1459     attrs_checker => $GetHTMLAttrsChecker->({
1460     cite => $HTMLURIAttrChecker,
1461     }),
1462     checker => $HTMLBlockChecker,
1463     };
1464    
1465     $Element->{$HTML_NS}->{aside} = {
1466     attrs_checker => $GetHTMLAttrsChecker->({}),
1467     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
1468     };
1469    
1470     $Element->{$HTML_NS}->{h1} = {
1471     attrs_checker => $GetHTMLAttrsChecker->({}),
1472     checker => sub {
1473     my ($self, $todo) = @_;
1474     $todo->{flag}->{has_heading}->[0] = 1;
1475 wakaba 1.13 return $HTMLStrictlyInlineChecker->($self, $todo);
1476 wakaba 1.1 },
1477     };
1478    
1479     $Element->{$HTML_NS}->{h2} = {
1480     attrs_checker => $GetHTMLAttrsChecker->({}),
1481     checker => $Element->{$HTML_NS}->{h1}->{checker},
1482     };
1483    
1484     $Element->{$HTML_NS}->{h3} = {
1485     attrs_checker => $GetHTMLAttrsChecker->({}),
1486     checker => $Element->{$HTML_NS}->{h1}->{checker},
1487     };
1488    
1489     $Element->{$HTML_NS}->{h4} = {
1490     attrs_checker => $GetHTMLAttrsChecker->({}),
1491     checker => $Element->{$HTML_NS}->{h1}->{checker},
1492     };
1493    
1494     $Element->{$HTML_NS}->{h5} = {
1495     attrs_checker => $GetHTMLAttrsChecker->({}),
1496     checker => $Element->{$HTML_NS}->{h1}->{checker},
1497     };
1498    
1499     $Element->{$HTML_NS}->{h6} = {
1500     attrs_checker => $GetHTMLAttrsChecker->({}),
1501     checker => $Element->{$HTML_NS}->{h1}->{checker},
1502     };
1503    
1504     $Element->{$HTML_NS}->{header} = {
1505     attrs_checker => $GetHTMLAttrsChecker->({}),
1506     checker => sub {
1507     my ($self, $todo) = @_;
1508     my $old_flag = $todo->{flag}->{has_heading} || [];
1509     my $new_flag = [];
1510     local $todo->{flag}->{has_heading} = $new_flag;
1511     my $node = $todo->{node};
1512    
1513     my $end = $self->_add_minuses
1514     ({$HTML_NS => {qw/header 1 footer 1/}},
1515     $HTMLSectioningElements);
1516     my ($new_todos, $ch) = $HTMLBlockChecker->($self, $todo);
1517     push @$new_todos, $end,
1518     {type => 'code', code => sub {
1519     if ($new_flag->[0]) {
1520     $old_flag->[0] = 1;
1521     } else {
1522     $self->{onerror}->(node => $node, type => 'element missing:hn');
1523     }
1524     }};
1525     return ($new_todos, $ch);
1526     },
1527     };
1528    
1529     $Element->{$HTML_NS}->{footer} = {
1530     attrs_checker => $GetHTMLAttrsChecker->({}),
1531     checker => sub { ## block -hn -header -footer -sectioning or inline
1532     my ($self, $todo) = @_;
1533     my $el = $todo->{node};
1534     my $new_todos = [];
1535     my @nodes = (@{$el->child_nodes});
1536    
1537     my $content = 'block-or-inline'; # or 'block' or 'inline'
1538     my @block_not_inline;
1539     while (@nodes) {
1540     my $node = shift @nodes;
1541     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1542    
1543     my $nt = $node->node_type;
1544     if ($nt == 1) {
1545     my $node_ns = $node->namespace_uri;
1546     $node_ns = '' unless defined $node_ns;
1547     my $node_ln = $node->manakai_local_name;
1548     my $not_allowed;
1549     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
1550     $not_allowed = 1;
1551     } elsif ($node_ns eq $HTML_NS and
1552     {
1553     qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1/
1554     }->{$node_ln}) {
1555     $not_allowed = 1;
1556     } elsif ($HTMLSectioningElements->{$node_ns}->{$node_ln}) {
1557     $not_allowed = 1;
1558     }
1559     if ($content eq 'block') {
1560     $not_allowed = 1
1561 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
1562     $self->{pluses}->{$node_ns}->{$node_ln};
1563 wakaba 1.1 } elsif ($content eq 'inline') {
1564     $not_allowed = 1
1565     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
1566 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
1567     $self->{pluses}->{$node_ns}->{$node_ln};
1568 wakaba 1.1 } else {
1569     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1570     my $is_inline
1571     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
1572     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1573    
1574     push @block_not_inline, $node
1575     if $is_block and not $is_inline and not $not_allowed;
1576 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
1577 wakaba 1.1 $content = 'inline';
1578     for (@block_not_inline) {
1579     $self->{onerror}->(node => $_, type => 'element not allowed');
1580     }
1581     $not_allowed = 1 unless $is_inline;
1582     }
1583     }
1584     $self->{onerror}->(node => $node, type => 'element not allowed')
1585     if $not_allowed;
1586     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1587     unshift @nodes, @$sib;
1588     push @$new_todos, @$ch;
1589     } elsif ($nt == 3 or $nt == 4) {
1590     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1591     if ($content eq 'block') {
1592     $self->{onerror}->(node => $node, type => 'character not allowed');
1593     } else {
1594     $content = 'inline';
1595     for (@block_not_inline) {
1596     $self->{onerror}->(node => $_, type => 'element not allowed');
1597     }
1598     }
1599     }
1600     } elsif ($nt == 5) {
1601     unshift @nodes, @{$node->child_nodes};
1602     }
1603     }
1604    
1605     my $end = $self->_add_minuses
1606     ({$HTML_NS => {qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1/}},
1607     $HTMLSectioningElements);
1608     push @$new_todos, $end;
1609    
1610     if ($content eq 'inline') {
1611     for (@$new_todos) {
1612     $_->{inline} = 1;
1613     }
1614     }
1615    
1616     return ($new_todos);
1617     },
1618     };
1619    
1620     $Element->{$HTML_NS}->{address} = {
1621     attrs_checker => $GetHTMLAttrsChecker->({}),
1622     checker => $HTMLInlineChecker,
1623     };
1624    
1625     $Element->{$HTML_NS}->{p} = {
1626     attrs_checker => $GetHTMLAttrsChecker->({}),
1627 wakaba 1.13 checker => $HTMLInlineChecker,
1628 wakaba 1.1 };
1629    
1630     $Element->{$HTML_NS}->{hr} = {
1631     attrs_checker => $GetHTMLAttrsChecker->({}),
1632     checker => $HTMLEmptyChecker,
1633     };
1634    
1635     $Element->{$HTML_NS}->{br} = {
1636     attrs_checker => $GetHTMLAttrsChecker->({}),
1637     checker => $HTMLEmptyChecker,
1638     };
1639    
1640     $Element->{$HTML_NS}->{dialog} = {
1641     attrs_checker => $GetHTMLAttrsChecker->({}),
1642     checker => sub {
1643     my ($self, $todo) = @_;
1644     my $el = $todo->{node};
1645     my $new_todos = [];
1646     my @nodes = (@{$el->child_nodes});
1647    
1648     my $phase = 'before dt';
1649     while (@nodes) {
1650     my $node = shift @nodes;
1651     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1652    
1653     my $nt = $node->node_type;
1654     if ($nt == 1) {
1655     my $node_ns = $node->namespace_uri;
1656     $node_ns = '' unless defined $node_ns;
1657     my $node_ln = $node->manakai_local_name;
1658     ## NOTE: |minuses| list is not checked since redundant
1659 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1660     #
1661     } elsif ($phase eq 'before dt') {
1662 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1663     $phase = 'before dd';
1664     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1665     $self->{onerror}
1666     ->(node => $node, type => 'ps element missing:dt');
1667     $phase = 'before dt';
1668     } else {
1669     $self->{onerror}->(node => $node, type => 'element not allowed');
1670     }
1671     } else { # before dd
1672     if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1673     $phase = 'before dt';
1674     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1675     $self->{onerror}
1676     ->(node => $node, type => 'ps element missing:dd');
1677     $phase = 'before dd';
1678     } else {
1679     $self->{onerror}->(node => $node, type => 'element not allowed');
1680     }
1681     }
1682     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1683     unshift @nodes, @$sib;
1684     push @$new_todos, @$ch;
1685     } elsif ($nt == 3 or $nt == 4) {
1686     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1687     $self->{onerror}->(node => $node, type => 'character not allowed');
1688     }
1689     } elsif ($nt == 5) {
1690     unshift @nodes, @{$node->child_nodes};
1691     }
1692     }
1693     if ($phase eq 'before dd') {
1694 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
1695 wakaba 1.1 }
1696     return ($new_todos);
1697     },
1698     };
1699    
1700     $Element->{$HTML_NS}->{pre} = {
1701     attrs_checker => $GetHTMLAttrsChecker->({}),
1702     checker => $HTMLStrictlyInlineChecker,
1703     };
1704    
1705     $Element->{$HTML_NS}->{ol} = {
1706     attrs_checker => $GetHTMLAttrsChecker->({
1707     start => $HTMLIntegerAttrChecker,
1708     }),
1709     checker => sub {
1710     my ($self, $todo) = @_;
1711     my $el = $todo->{node};
1712     my $new_todos = [];
1713     my @nodes = (@{$el->child_nodes});
1714    
1715     while (@nodes) {
1716     my $node = shift @nodes;
1717     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1718    
1719     my $nt = $node->node_type;
1720     if ($nt == 1) {
1721     my $node_ns = $node->namespace_uri;
1722     $node_ns = '' unless defined $node_ns;
1723     my $node_ln = $node->manakai_local_name;
1724     ## NOTE: |minuses| list is not checked since redundant
1725 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1726     #
1727     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'li')) {
1728 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
1729     }
1730     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1731     unshift @nodes, @$sib;
1732     push @$new_todos, @$ch;
1733     } elsif ($nt == 3 or $nt == 4) {
1734     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1735     $self->{onerror}->(node => $node, type => 'character not allowed');
1736     }
1737     } elsif ($nt == 5) {
1738     unshift @nodes, @{$node->child_nodes};
1739     }
1740     }
1741    
1742     if ($todo->{inline}) {
1743     for (@$new_todos) {
1744     $_->{inline} = 1;
1745     }
1746     }
1747     return ($new_todos);
1748     },
1749     };
1750    
1751     $Element->{$HTML_NS}->{ul} = {
1752     attrs_checker => $GetHTMLAttrsChecker->({}),
1753     checker => $Element->{$HTML_NS}->{ol}->{checker},
1754     };
1755    
1756     $Element->{$HTML_NS}->{li} = {
1757     attrs_checker => $GetHTMLAttrsChecker->({
1758     start => sub {
1759     my ($self, $attr) = @_;
1760     my $parent = $attr->owner_element->manakai_parent_element;
1761     if (defined $parent) {
1762     my $parent_ns = $parent->namespace_uri;
1763     $parent_ns = '' unless defined $parent_ns;
1764     my $parent_ln = $parent->manakai_local_name;
1765     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1766     $self->{onerror}->(node => $attr, level => 'unsupported',
1767     type => 'attribute');
1768     }
1769     }
1770     $HTMLIntegerAttrChecker->($self, $attr);
1771     },
1772     }),
1773     checker => sub {
1774     my ($self, $todo) = @_;
1775     if ($todo->{inline}) {
1776     return $HTMLInlineChecker->($self, $todo);
1777     } else {
1778     return $HTMLBlockOrInlineChecker->($self, $todo);
1779     }
1780     },
1781     };
1782    
1783     $Element->{$HTML_NS}->{dl} = {
1784     attrs_checker => $GetHTMLAttrsChecker->({}),
1785     checker => sub {
1786     my ($self, $todo) = @_;
1787     my $el = $todo->{node};
1788     my $new_todos = [];
1789     my @nodes = (@{$el->child_nodes});
1790    
1791     my $phase = 'before dt';
1792     while (@nodes) {
1793     my $node = shift @nodes;
1794     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1795    
1796     my $nt = $node->node_type;
1797     if ($nt == 1) {
1798     my $node_ns = $node->namespace_uri;
1799     $node_ns = '' unless defined $node_ns;
1800     my $node_ln = $node->manakai_local_name;
1801     ## NOTE: |minuses| list is not checked since redundant
1802 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1803     #
1804     } elsif ($phase eq 'in dds') {
1805 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1806     #$phase = 'in dds';
1807     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1808     $phase = 'in dts';
1809     } else {
1810     $self->{onerror}->(node => $node, type => 'element not allowed');
1811     }
1812     } elsif ($phase eq 'in dts') {
1813     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1814     #$phase = 'in dts';
1815     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1816     $phase = 'in dds';
1817     } else {
1818     $self->{onerror}->(node => $node, type => 'element not allowed');
1819     }
1820     } else { # before dt
1821     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1822     $phase = 'in dts';
1823     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1824     $self->{onerror}
1825     ->(node => $node, type => 'ps element missing:dt');
1826     $phase = 'in dds';
1827     } else {
1828     $self->{onerror}->(node => $node, type => 'element not allowed');
1829     }
1830     }
1831     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1832     unshift @nodes, @$sib;
1833     push @$new_todos, @$ch;
1834     } elsif ($nt == 3 or $nt == 4) {
1835     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1836     $self->{onerror}->(node => $node, type => 'character not allowed');
1837     }
1838     } elsif ($nt == 5) {
1839     unshift @nodes, @{$node->child_nodes};
1840     }
1841     }
1842     if ($phase eq 'in dts') {
1843 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
1844 wakaba 1.1 }
1845    
1846     if ($todo->{inline}) {
1847     for (@$new_todos) {
1848     $_->{inline} = 1;
1849     }
1850     }
1851     return ($new_todos);
1852     },
1853     };
1854    
1855     $Element->{$HTML_NS}->{dt} = {
1856     attrs_checker => $GetHTMLAttrsChecker->({}),
1857     checker => $HTMLStrictlyInlineChecker,
1858     };
1859    
1860     $Element->{$HTML_NS}->{dd} = {
1861     attrs_checker => $GetHTMLAttrsChecker->({}),
1862     checker => $Element->{$HTML_NS}->{li}->{checker},
1863     };
1864    
1865     $Element->{$HTML_NS}->{a} = {
1866     attrs_checker => sub {
1867     my ($self, $todo) = @_;
1868     my %attr;
1869     for my $attr (@{$todo->{node}->attributes}) {
1870     my $attr_ns = $attr->namespace_uri;
1871     $attr_ns = '' unless defined $attr_ns;
1872     my $attr_ln = $attr->manakai_local_name;
1873     my $checker;
1874     if ($attr_ns eq '') {
1875     $checker = {
1876     target => $HTMLTargetAttrChecker,
1877     href => $HTMLURIAttrChecker,
1878     ping => $HTMLSpaceURIsAttrChecker,
1879 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
1880 wakaba 1.1 media => $HTMLMQAttrChecker,
1881     hreflang => $HTMLLanguageTagAttrChecker,
1882     type => $HTMLIMTAttrChecker,
1883     }->{$attr_ln};
1884     if ($checker) {
1885     $attr{$attr_ln} = $attr;
1886     } else {
1887     $checker = $HTMLAttrChecker->{$attr_ln};
1888     }
1889     }
1890     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1891     || $AttrChecker->{$attr_ns}->{''};
1892     if ($checker) {
1893     $checker->($self, $attr) if ref $checker;
1894     } else {
1895     $self->{onerror}->(node => $attr, level => 'unsupported',
1896     type => 'attribute');
1897     ## ISSUE: No comformance createria for unknown attributes in the spec
1898     }
1899     }
1900    
1901 wakaba 1.4 if (defined $attr{href}) {
1902     $self->{has_hyperlink_element} = 1;
1903     } else {
1904 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
1905     if (defined $attr{$_}) {
1906     $self->{onerror}->(node => $attr{$_},
1907     type => 'attribute not allowed');
1908     }
1909     }
1910     }
1911     },
1912     checker => sub {
1913     my ($self, $todo) = @_;
1914    
1915     my $end = $self->_add_minuses ($HTMLInteractiveElements);
1916     my ($new_todos, $ch)
1917 wakaba 1.13 = $HTMLInlineOrStrictlyInlineChecker->($self, $todo);
1918 wakaba 1.1 push @$new_todos, $end;
1919    
1920 wakaba 1.15 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
1921     $_->{flag}->{in_a_href} = 1 for @$new_todos;
1922     }
1923 wakaba 1.1
1924     return ($new_todos, $ch);
1925     },
1926     };
1927    
1928     $Element->{$HTML_NS}->{q} = {
1929     attrs_checker => $GetHTMLAttrsChecker->({
1930     cite => $HTMLURIAttrChecker,
1931     }),
1932     checker => $HTMLInlineOrStrictlyInlineChecker,
1933     };
1934    
1935     $Element->{$HTML_NS}->{cite} = {
1936     attrs_checker => $GetHTMLAttrsChecker->({}),
1937     checker => $HTMLStrictlyInlineChecker,
1938     };
1939    
1940     $Element->{$HTML_NS}->{em} = {
1941     attrs_checker => $GetHTMLAttrsChecker->({}),
1942     checker => $HTMLInlineOrStrictlyInlineChecker,
1943     };
1944    
1945     $Element->{$HTML_NS}->{strong} = {
1946     attrs_checker => $GetHTMLAttrsChecker->({}),
1947     checker => $HTMLInlineOrStrictlyInlineChecker,
1948     };
1949    
1950     $Element->{$HTML_NS}->{small} = {
1951     attrs_checker => $GetHTMLAttrsChecker->({}),
1952     checker => $HTMLInlineOrStrictlyInlineChecker,
1953     };
1954    
1955     $Element->{$HTML_NS}->{m} = {
1956     attrs_checker => $GetHTMLAttrsChecker->({}),
1957     checker => $HTMLInlineOrStrictlyInlineChecker,
1958     };
1959    
1960     $Element->{$HTML_NS}->{dfn} = {
1961     attrs_checker => $GetHTMLAttrsChecker->({}),
1962     checker => sub {
1963     my ($self, $todo) = @_;
1964    
1965     my $end = $self->_add_minuses ({$HTML_NS => {dfn => 1}});
1966     my ($sib, $ch) = $HTMLStrictlyInlineChecker->($self, $todo);
1967     push @$sib, $end;
1968    
1969     my $node = $todo->{node};
1970     my $term = $node->get_attribute_ns (undef, 'title');
1971     unless (defined $term) {
1972     for my $child (@{$node->child_nodes}) {
1973     if ($child->node_type == 1) { # ELEMENT_NODE
1974     if (defined $term) {
1975     undef $term;
1976     last;
1977     } elsif ($child->manakai_local_name eq 'abbr') {
1978     my $nsuri = $child->namespace_uri;
1979     if (defined $nsuri and $nsuri eq $HTML_NS) {
1980     my $attr = $child->get_attribute_node_ns (undef, 'title');
1981     if ($attr) {
1982     $term = $attr->value;
1983     }
1984     }
1985     }
1986     } elsif ($child->node_type == 3 or $child->node_type == 4) {
1987     ## TEXT_NODE or CDATA_SECTION_NODE
1988     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
1989     next;
1990     }
1991     undef $term;
1992     last;
1993     }
1994     }
1995     unless (defined $term) {
1996     $term = $node->text_content;
1997     }
1998     }
1999     if ($self->{term}->{$term}) {
2000     $self->{onerror}->(node => $node, type => 'duplicate term');
2001     push @{$self->{term}->{$term}}, $node;
2002     } else {
2003     $self->{term}->{$term} = [$node];
2004     }
2005     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
2006     ## has |title|.
2007    
2008     return ($sib, $ch);
2009     },
2010     };
2011    
2012     $Element->{$HTML_NS}->{abbr} = {
2013     attrs_checker => $GetHTMLAttrsChecker->({
2014     ## NOTE: |title| has special semantics for |abbr|s, but is syntactically
2015     ## not different. The spec says that the |title| MAY be omitted
2016     ## if there is a |dfn| whose defining term is the abbreviation,
2017     ## but it does not prohibit |abbr| w/o |title| in other cases.
2018     }),
2019     checker => $HTMLStrictlyInlineChecker,
2020     };
2021    
2022     $Element->{$HTML_NS}->{time} = {
2023     attrs_checker => $GetHTMLAttrsChecker->({
2024     datetime => sub { 1 }, # checked in |checker|
2025     }),
2026     ## TODO: Write tests
2027     checker => sub {
2028     my ($self, $todo) = @_;
2029    
2030     my $attr = $todo->{node}->get_attribute_node_ns (undef, 'datetime');
2031     my $input;
2032     my $reg_sp;
2033     my $input_node;
2034     if ($attr) {
2035     $input = $attr->value;
2036     $reg_sp = qr/[\x09-\x0D\x20]*/;
2037     $input_node = $attr;
2038     } else {
2039     $input = $todo->{node}->text_content;
2040     $reg_sp = qr/\p{Zs}*/;
2041     $input_node = $todo->{node};
2042    
2043     ## ISSUE: What is the definition for "successfully extracts a date
2044     ## or time"? If the algorithm says the string is invalid but
2045     ## return some date or time, is it "successfully"?
2046     }
2047    
2048     my $hour;
2049     my $minute;
2050     my $second;
2051     if ($input =~ /
2052     \A
2053     [\x09-\x0D\x20]*
2054     ([0-9]+) # 1
2055     (?>
2056     -([0-9]+) # 2
2057     -([0-9]+) # 3
2058     [\x09-\x0D\x20]*
2059     (?>
2060     T
2061     [\x09-\x0D\x20]*
2062     )?
2063     ([0-9]+) # 4
2064     :([0-9]+) # 5
2065     (?>
2066     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
2067     )?
2068     [\x09-\x0D\x20]*
2069     (?>
2070     Z
2071     [\x09-\x0D\x20]*
2072     |
2073     [+-]([0-9]+):([0-9]+) # 7, 8
2074     [\x09-\x0D\x20]*
2075     )?
2076     \z
2077     |
2078     :([0-9]+) # 9
2079     (?>
2080     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
2081     )?
2082     [\x09-\x0D\x20]*\z
2083     )
2084     /x) {
2085     if (defined $2) { ## YYYY-MM-DD T? hh:mm
2086     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
2087     length $4 != 2 or length $5 != 2) {
2088     $self->{onerror}->(node => $input_node,
2089     type => 'dateortime:syntax error');
2090     }
2091    
2092     if (1 <= $2 and $2 <= 12) {
2093     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
2094     if $3 < 1 or
2095     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
2096     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
2097     if $2 == 2 and $3 == 29 and
2098     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
2099     } else {
2100     $self->{onerror}->(node => $input_node,
2101     type => 'datetime:bad month');
2102     }
2103    
2104     ($hour, $minute, $second) = ($4, $5, $6);
2105    
2106     if (defined $7) { ## [+-]hh:mm
2107     if (length $7 != 2 or length $8 != 2) {
2108     $self->{onerror}->(node => $input_node,
2109     type => 'dateortime:syntax error');
2110     }
2111    
2112     $self->{onerror}->(node => $input_node,
2113     type => 'datetime:bad timezone hour')
2114     if $7 > 23;
2115     $self->{onerror}->(node => $input_node,
2116     type => 'datetime:bad timezone minute')
2117     if $8 > 59;
2118     }
2119     } else { ## hh:mm
2120     if (length $1 != 2 or length $9 != 2) {
2121     $self->{onerror}->(node => $input_node,
2122     type => qq'dateortime:syntax error');
2123     }
2124    
2125     ($hour, $minute, $second) = ($1, $9, $10);
2126     }
2127    
2128     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
2129     if $hour > 23;
2130     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
2131     if $minute > 59;
2132    
2133     if (defined $second) { ## s
2134     ## NOTE: Integer part of second don't have to have length of two.
2135    
2136     if (substr ($second, 0, 1) eq '.') {
2137     $self->{onerror}->(node => $input_node,
2138     type => 'dateortime:syntax error');
2139     }
2140    
2141     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
2142     if $second >= 60;
2143     }
2144     } else {
2145     $self->{onerror}->(node => $input_node,
2146     type => 'dateortime:syntax error');
2147     }
2148    
2149     return $HTMLStrictlyInlineChecker->($self, $todo);
2150     },
2151     };
2152    
2153     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
2154     attrs_checker => $GetHTMLAttrsChecker->({
2155     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2156     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2157     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2158     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2159     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2160     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2161     }),
2162     checker => $HTMLStrictlyInlineChecker,
2163     };
2164    
2165     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
2166     attrs_checker => $GetHTMLAttrsChecker->({
2167     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
2168     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
2169     }),
2170     checker => $HTMLStrictlyInlineChecker,
2171     };
2172    
2173     $Element->{$HTML_NS}->{code} = {
2174     attrs_checker => $GetHTMLAttrsChecker->({}),
2175     ## NOTE: Though |title| has special semantics,
2176     ## syntatically same as the |title| as global attribute.
2177     checker => $HTMLInlineOrStrictlyInlineChecker,
2178     };
2179    
2180     $Element->{$HTML_NS}->{var} = {
2181     attrs_checker => $GetHTMLAttrsChecker->({}),
2182     ## NOTE: Though |title| has special semantics,
2183     ## syntatically same as the |title| as global attribute.
2184     checker => $HTMLStrictlyInlineChecker,
2185     };
2186    
2187     $Element->{$HTML_NS}->{samp} = {
2188     attrs_checker => $GetHTMLAttrsChecker->({}),
2189     ## NOTE: Though |title| has special semantics,
2190     ## syntatically same as the |title| as global attribute.
2191     checker => $HTMLInlineOrStrictlyInlineChecker,
2192     };
2193    
2194     $Element->{$HTML_NS}->{kbd} = {
2195     attrs_checker => $GetHTMLAttrsChecker->({}),
2196     checker => $HTMLStrictlyInlineChecker,
2197     };
2198    
2199     $Element->{$HTML_NS}->{sub} = {
2200     attrs_checker => $GetHTMLAttrsChecker->({}),
2201     checker => $HTMLStrictlyInlineChecker,
2202     };
2203    
2204     $Element->{$HTML_NS}->{sup} = {
2205     attrs_checker => $GetHTMLAttrsChecker->({}),
2206     checker => $HTMLStrictlyInlineChecker,
2207     };
2208    
2209     $Element->{$HTML_NS}->{span} = {
2210     attrs_checker => $GetHTMLAttrsChecker->({}),
2211     ## NOTE: Though |title| has special semantics,
2212     ## syntatically same as the |title| as global attribute.
2213     checker => $HTMLInlineOrStrictlyInlineChecker,
2214     };
2215    
2216     $Element->{$HTML_NS}->{i} = {
2217     attrs_checker => $GetHTMLAttrsChecker->({}),
2218     ## NOTE: Though |title| has special semantics,
2219     ## syntatically same as the |title| as global attribute.
2220     checker => $HTMLStrictlyInlineChecker,
2221     };
2222    
2223     $Element->{$HTML_NS}->{b} = {
2224     attrs_checker => $GetHTMLAttrsChecker->({}),
2225     checker => $HTMLStrictlyInlineChecker,
2226     };
2227    
2228     $Element->{$HTML_NS}->{bdo} = {
2229     attrs_checker => sub {
2230     my ($self, $todo) = @_;
2231     $GetHTMLAttrsChecker->({})->($self, $todo);
2232     unless ($todo->{node}->has_attribute_ns (undef, 'dir')) {
2233     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:dir');
2234     }
2235     },
2236     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
2237     checker => $HTMLStrictlyInlineChecker,
2238     };
2239    
2240     $Element->{$HTML_NS}->{ins} = {
2241     attrs_checker => $GetHTMLAttrsChecker->({
2242     cite => $HTMLURIAttrChecker,
2243     datetime => $HTMLDatetimeAttrChecker,
2244     }),
2245     checker => $HTMLTransparentChecker,
2246     };
2247    
2248     $Element->{$HTML_NS}->{del} = {
2249     attrs_checker => $GetHTMLAttrsChecker->({
2250     cite => $HTMLURIAttrChecker,
2251     datetime => $HTMLDatetimeAttrChecker,
2252     }),
2253     checker => sub {
2254     my ($self, $todo) = @_;
2255    
2256     my $parent = $todo->{node}->manakai_parent_element;
2257     if (defined $parent) {
2258     my $nsuri = $parent->namespace_uri;
2259     $nsuri = '' unless defined $nsuri;
2260     my $ln = $parent->manakai_local_name;
2261     my $eldef = $Element->{$nsuri}->{$ln} ||
2262     $Element->{$nsuri}->{''} ||
2263     $ElementDefault;
2264     return $eldef->{checker}->($self, $todo);
2265     } else {
2266     return $HTMLBlockOrInlineChecker->($self, $todo);
2267     }
2268     },
2269     };
2270    
2271     ## TODO: figure
2272 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
2273 wakaba 1.1
2274 wakaba 1.4 ## TODO: |alt|
2275 wakaba 1.1 $Element->{$HTML_NS}->{img} = {
2276     attrs_checker => sub {
2277     my ($self, $todo) = @_;
2278     $GetHTMLAttrsChecker->({
2279     alt => sub { }, ## NOTE: No syntactical requirement
2280     src => $HTMLURIAttrChecker,
2281     usemap => $HTMLUsemapAttrChecker,
2282     ismap => sub {
2283     my ($self, $attr, $parent_todo) = @_;
2284 wakaba 1.15 if (not $todo->{flag}->{in_a_href}) {
2285     $self->{onerror}->(node => $attr,
2286     type => 'attribute not allowed:ismap');
2287 wakaba 1.1 }
2288     $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_todo);
2289     },
2290     ## TODO: height
2291     ## TODO: width
2292     })->($self, $todo);
2293     unless ($todo->{node}->has_attribute_ns (undef, 'alt')) {
2294     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:alt');
2295     }
2296     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2297     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:src');
2298     }
2299     },
2300     checker => $HTMLEmptyChecker,
2301     };
2302    
2303     $Element->{$HTML_NS}->{iframe} = {
2304     attrs_checker => $GetHTMLAttrsChecker->({
2305     src => $HTMLURIAttrChecker,
2306     }),
2307     checker => $HTMLTextChecker,
2308     };
2309    
2310     $Element->{$HTML_NS}->{embed} = {
2311     attrs_checker => sub {
2312     my ($self, $todo) = @_;
2313     my $has_src;
2314     for my $attr (@{$todo->{node}->attributes}) {
2315     my $attr_ns = $attr->namespace_uri;
2316     $attr_ns = '' unless defined $attr_ns;
2317     my $attr_ln = $attr->manakai_local_name;
2318     my $checker;
2319     if ($attr_ns eq '') {
2320     if ($attr_ln eq 'src') {
2321     $checker = $HTMLURIAttrChecker;
2322     $has_src = 1;
2323     } elsif ($attr_ln eq 'type') {
2324     $checker = $HTMLIMTAttrChecker;
2325     } else {
2326     ## TODO: height
2327     ## TODO: width
2328     $checker = $HTMLAttrChecker->{$attr_ln}
2329     || sub { }; ## NOTE: Any local attribute is ok.
2330     }
2331     }
2332     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2333     || $AttrChecker->{$attr_ns}->{''};
2334     if ($checker) {
2335     $checker->($self, $attr);
2336     } else {
2337     $self->{onerror}->(node => $attr, level => 'unsupported',
2338     type => 'attribute');
2339     ## ISSUE: No comformance createria for global attributes in the spec
2340     }
2341     }
2342    
2343     unless ($has_src) {
2344     $self->{onerror}->(node => $todo->{node},
2345     type => 'attribute missing:src');
2346     }
2347     },
2348     checker => $HTMLEmptyChecker,
2349     };
2350    
2351     $Element->{$HTML_NS}->{object} = {
2352     attrs_checker => sub {
2353     my ($self, $todo) = @_;
2354     $GetHTMLAttrsChecker->({
2355     data => $HTMLURIAttrChecker,
2356     type => $HTMLIMTAttrChecker,
2357     usemap => $HTMLUsemapAttrChecker,
2358     ## TODO: width
2359     ## TODO: height
2360     })->($self, $todo);
2361     unless ($todo->{node}->has_attribute_ns (undef, 'data')) {
2362     unless ($todo->{node}->has_attribute_ns (undef, 'type')) {
2363     $self->{onerror}->(node => $todo->{node},
2364     type => 'attribute missing:data|type');
2365     }
2366     }
2367     },
2368     checker => $ElementDefault->{checker}, ## TODO
2369 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
2370 wakaba 1.1 };
2371    
2372     $Element->{$HTML_NS}->{param} = {
2373     attrs_checker => sub {
2374     my ($self, $todo) = @_;
2375     $GetHTMLAttrsChecker->({
2376     name => sub { },
2377     value => sub { },
2378     })->($self, $todo);
2379     unless ($todo->{node}->has_attribute_ns (undef, 'name')) {
2380     $self->{onerror}->(node => $todo->{node},
2381     type => 'attribute missing:name');
2382     }
2383     unless ($todo->{node}->has_attribute_ns (undef, 'value')) {
2384     $self->{onerror}->(node => $todo->{node},
2385     type => 'attribute missing:value');
2386     }
2387     },
2388     checker => $HTMLEmptyChecker,
2389     };
2390    
2391     $Element->{$HTML_NS}->{video} = {
2392     attrs_checker => $GetHTMLAttrsChecker->({
2393     src => $HTMLURIAttrChecker,
2394     ## TODO: start, loopstart, loopend, end
2395     ## ISSUE: they MUST be "value time offset"s. Value?
2396 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
2397 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2398     controls => $GetHTMLBooleanAttrChecker->('controls'),
2399 wakaba 1.11 poster => $HTMLURIAttrChecker, ## TODO: not for audio!
2400     ## TODO: width, height (not for audio!)
2401 wakaba 1.1 }),
2402     checker => sub {
2403     my ($self, $todo) = @_;
2404    
2405     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2406     return $HTMLBlockOrInlineChecker->($self, $todo);
2407     } else {
2408     return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
2409     ->($self, $todo);
2410     }
2411     },
2412     };
2413    
2414     $Element->{$HTML_NS}->{audio} = {
2415     attrs_checker => $Element->{$HTML_NS}->{video}->{attrs_checker},
2416     checker => $Element->{$HTML_NS}->{video}->{checker},
2417     };
2418    
2419     $Element->{$HTML_NS}->{source} = {
2420     attrs_checker => sub {
2421     my ($self, $todo) = @_;
2422     $GetHTMLAttrsChecker->({
2423     src => $HTMLURIAttrChecker,
2424     type => $HTMLIMTAttrChecker,
2425     media => $HTMLMQAttrChecker,
2426     })->($self, $todo);
2427     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2428     $self->{onerror}->(node => $todo->{node},
2429     type => 'attribute missing:src');
2430     }
2431     },
2432     checker => $HTMLEmptyChecker,
2433     };
2434    
2435     $Element->{$HTML_NS}->{canvas} = {
2436     attrs_checker => $GetHTMLAttrsChecker->({
2437     height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2438     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2439     }),
2440     checker => $HTMLInlineChecker,
2441     };
2442    
2443     $Element->{$HTML_NS}->{map} = {
2444 wakaba 1.4 attrs_checker => sub {
2445     my ($self, $todo) = @_;
2446     my $has_id;
2447     $GetHTMLAttrsChecker->({
2448     id => sub {
2449     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2450     my ($self, $attr) = @_;
2451     my $value = $attr->value;
2452     if (length $value > 0) {
2453     if ($self->{id}->{$value}) {
2454     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2455     push @{$self->{id}->{$value}}, $attr;
2456     } else {
2457     $self->{id}->{$value} = [$attr];
2458     }
2459 wakaba 1.1 } else {
2460 wakaba 1.4 ## NOTE: MUST contain at least one character
2461     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2462 wakaba 1.1 }
2463 wakaba 1.4 if ($value =~ /[\x09-\x0D\x20]/) {
2464     $self->{onerror}->(node => $attr, type => 'space in ID');
2465     }
2466     $self->{map}->{$value} ||= $attr;
2467     $has_id = 1;
2468     },
2469     })->($self, $todo);
2470     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:id')
2471     unless $has_id;
2472     },
2473 wakaba 1.1 checker => $HTMLBlockChecker,
2474     };
2475    
2476     $Element->{$HTML_NS}->{area} = {
2477     attrs_checker => sub {
2478     my ($self, $todo) = @_;
2479     my %attr;
2480     my $coords;
2481     for my $attr (@{$todo->{node}->attributes}) {
2482     my $attr_ns = $attr->namespace_uri;
2483     $attr_ns = '' unless defined $attr_ns;
2484     my $attr_ln = $attr->manakai_local_name;
2485     my $checker;
2486     if ($attr_ns eq '') {
2487     $checker = {
2488     alt => sub { },
2489     ## NOTE: |alt| value has no conformance creteria.
2490     shape => $GetHTMLEnumeratedAttrChecker->({
2491     circ => -1, circle => 1,
2492     default => 1,
2493     poly => 1, polygon => -1,
2494     rect => 1, rectangle => -1,
2495     }),
2496     coords => sub {
2497     my ($self, $attr) = @_;
2498     my $value = $attr->value;
2499     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
2500     $coords = [split /,/, $value];
2501     } else {
2502     $self->{onerror}->(node => $attr,
2503     type => 'coords:syntax error');
2504     }
2505     },
2506     target => $HTMLTargetAttrChecker,
2507     href => $HTMLURIAttrChecker,
2508     ping => $HTMLSpaceURIsAttrChecker,
2509 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
2510 wakaba 1.1 media => $HTMLMQAttrChecker,
2511     hreflang => $HTMLLanguageTagAttrChecker,
2512     type => $HTMLIMTAttrChecker,
2513     }->{$attr_ln};
2514     if ($checker) {
2515     $attr{$attr_ln} = $attr;
2516     } else {
2517     $checker = $HTMLAttrChecker->{$attr_ln};
2518     }
2519     }
2520     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2521     || $AttrChecker->{$attr_ns}->{''};
2522     if ($checker) {
2523     $checker->($self, $attr) if ref $checker;
2524     } else {
2525     $self->{onerror}->(node => $attr, level => 'unsupported',
2526     type => 'attribute');
2527     ## ISSUE: No comformance createria for unknown attributes in the spec
2528     }
2529     }
2530    
2531     if (defined $attr{href}) {
2532 wakaba 1.4 $self->{has_hyperlink_element} = 1;
2533 wakaba 1.1 unless (defined $attr{alt}) {
2534     $self->{onerror}->(node => $todo->{node},
2535     type => 'attribute missing:alt');
2536     }
2537     } else {
2538     for (qw/target ping rel media hreflang type alt/) {
2539     if (defined $attr{$_}) {
2540     $self->{onerror}->(node => $attr{$_},
2541     type => 'attribute not allowed');
2542     }
2543     }
2544     }
2545    
2546     my $shape = 'rectangle';
2547     if (defined $attr{shape}) {
2548     $shape = {
2549     circ => 'circle', circle => 'circle',
2550     default => 'default',
2551     poly => 'polygon', polygon => 'polygon',
2552     rect => 'rectangle', rectangle => 'rectangle',
2553     }->{lc $attr{shape}->value} || 'rectangle';
2554     ## TODO: ASCII lowercase?
2555     }
2556    
2557     if ($shape eq 'circle') {
2558     if (defined $attr{coords}) {
2559     if (defined $coords) {
2560     if (@$coords == 3) {
2561     if ($coords->[2] < 0) {
2562     $self->{onerror}->(node => $attr{coords},
2563     type => 'coords:out of range:2');
2564     }
2565     } else {
2566     $self->{onerror}->(node => $attr{coords},
2567     type => 'coords:number:3:'.@$coords);
2568     }
2569     } else {
2570     ## NOTE: A syntax error has been reported.
2571     }
2572     } else {
2573     $self->{onerror}->(node => $todo->{node},
2574     type => 'attribute missing:coords');
2575     }
2576     } elsif ($shape eq 'default') {
2577     if (defined $attr{coords}) {
2578     $self->{onerror}->(node => $attr{coords},
2579     type => 'attribute not allowed');
2580     }
2581     } elsif ($shape eq 'polygon') {
2582     if (defined $attr{coords}) {
2583     if (defined $coords) {
2584     if (@$coords >= 6) {
2585     unless (@$coords % 2 == 0) {
2586     $self->{onerror}->(node => $attr{coords},
2587     type => 'coords:number:even:'.@$coords);
2588     }
2589     } else {
2590     $self->{onerror}->(node => $attr{coords},
2591     type => 'coords:number:>=6:'.@$coords);
2592     }
2593     } else {
2594     ## NOTE: A syntax error has been reported.
2595     }
2596     } else {
2597     $self->{onerror}->(node => $todo->{node},
2598     type => 'attribute missing:coords');
2599     }
2600     } elsif ($shape eq 'rectangle') {
2601     if (defined $attr{coords}) {
2602     if (defined $coords) {
2603     if (@$coords == 4) {
2604     unless ($coords->[0] < $coords->[2]) {
2605     $self->{onerror}->(node => $attr{coords},
2606     type => 'coords:out of range:0');
2607     }
2608     unless ($coords->[1] < $coords->[3]) {
2609     $self->{onerror}->(node => $attr{coords},
2610     type => 'coords:out of range:1');
2611     }
2612     } else {
2613     $self->{onerror}->(node => $attr{coords},
2614     type => 'coords:number:4:'.@$coords);
2615     }
2616     } else {
2617     ## NOTE: A syntax error has been reported.
2618     }
2619     } else {
2620     $self->{onerror}->(node => $todo->{node},
2621     type => 'attribute missing:coords');
2622     }
2623     }
2624     },
2625     checker => $HTMLEmptyChecker,
2626     };
2627     ## TODO: only in map
2628    
2629     $Element->{$HTML_NS}->{table} = {
2630     attrs_checker => $GetHTMLAttrsChecker->({}),
2631     checker => sub {
2632     my ($self, $todo) = @_;
2633     my $el = $todo->{node};
2634     my $new_todos = [];
2635     my @nodes = (@{$el->child_nodes});
2636    
2637     my $phase = 'before caption';
2638     my $has_tfoot;
2639     while (@nodes) {
2640     my $node = shift @nodes;
2641     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2642    
2643     my $nt = $node->node_type;
2644     if ($nt == 1) {
2645     my $node_ns = $node->namespace_uri;
2646     $node_ns = '' unless defined $node_ns;
2647     my $node_ln = $node->manakai_local_name;
2648     ## NOTE: |minuses| list is not checked since redundant
2649 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2650     #
2651     } elsif ($phase eq 'in tbodys') {
2652 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2653     #$phase = 'in tbodys';
2654     } elsif (not $has_tfoot and
2655     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2656     $phase = 'after tfoot';
2657     $has_tfoot = 1;
2658     } else {
2659     $self->{onerror}->(node => $node, type => 'element not allowed');
2660     }
2661     } elsif ($phase eq 'in trs') {
2662     if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2663     #$phase = 'in trs';
2664     } elsif (not $has_tfoot and
2665     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2666     $phase = 'after tfoot';
2667     $has_tfoot = 1;
2668     } else {
2669     $self->{onerror}->(node => $node, type => 'element not allowed');
2670     }
2671     } elsif ($phase eq 'after thead') {
2672     if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2673     $phase = 'in tbodys';
2674     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2675     $phase = 'in trs';
2676     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2677     $phase = 'in tbodys';
2678     $has_tfoot = 1;
2679     } else {
2680     $self->{onerror}->(node => $node, type => 'element not allowed');
2681     }
2682     } elsif ($phase eq 'in colgroup') {
2683     if ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2684     $phase = 'in colgroup';
2685     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2686     $phase = 'after thead';
2687     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2688     $phase = 'in tbodys';
2689     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2690     $phase = 'in trs';
2691     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2692     $phase = 'in tbodys';
2693     $has_tfoot = 1;
2694     } else {
2695     $self->{onerror}->(node => $node, type => 'element not allowed');
2696     }
2697     } elsif ($phase eq 'before caption') {
2698     if ($node_ns eq $HTML_NS and $node_ln eq 'caption') {
2699     $phase = 'in colgroup';
2700     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2701     $phase = 'in colgroup';
2702     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2703     $phase = 'after thead';
2704     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2705     $phase = 'in tbodys';
2706     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2707     $phase = 'in trs';
2708     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2709     $phase = 'in tbodys';
2710     $has_tfoot = 1;
2711     } else {
2712     $self->{onerror}->(node => $node, type => 'element not allowed');
2713     }
2714     } else { # after tfoot
2715     $self->{onerror}->(node => $node, type => 'element not allowed');
2716     }
2717     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2718     unshift @nodes, @$sib;
2719     push @$new_todos, @$ch;
2720     } elsif ($nt == 3 or $nt == 4) {
2721     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2722     $self->{onerror}->(node => $node, type => 'character not allowed');
2723     }
2724     } elsif ($nt == 5) {
2725     unshift @nodes, @{$node->child_nodes};
2726     }
2727     }
2728    
2729     ## Table model errors
2730     require Whatpm::HTMLTable;
2731     Whatpm::HTMLTable->form_table ($todo->{node}, sub {
2732     my %opt = @_;
2733     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
2734     });
2735     push @{$self->{return}->{table}}, $todo->{node};
2736    
2737     return ($new_todos);
2738     },
2739     };
2740    
2741     $Element->{$HTML_NS}->{caption} = {
2742     attrs_checker => $GetHTMLAttrsChecker->({}),
2743 wakaba 1.13 checker => $HTMLStrictlyInlineChecker,
2744 wakaba 1.1 };
2745    
2746     $Element->{$HTML_NS}->{colgroup} = {
2747     attrs_checker => $GetHTMLAttrsChecker->({
2748     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2749     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
2750     ## TODO: "attribute not supported" if |col|.
2751     ## ISSUE: MUST NOT if any |col|?
2752     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
2753     }),
2754     checker => sub {
2755     my ($self, $todo) = @_;
2756     my $el = $todo->{node};
2757     my $new_todos = [];
2758     my @nodes = (@{$el->child_nodes});
2759    
2760     while (@nodes) {
2761     my $node = shift @nodes;
2762     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2763    
2764     my $nt = $node->node_type;
2765     if ($nt == 1) {
2766     my $node_ns = $node->namespace_uri;
2767     $node_ns = '' unless defined $node_ns;
2768     my $node_ln = $node->manakai_local_name;
2769     ## NOTE: |minuses| list is not checked since redundant
2770 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2771     #
2772     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'col')) {
2773 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
2774     }
2775     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2776     unshift @nodes, @$sib;
2777     push @$new_todos, @$ch;
2778     } elsif ($nt == 3 or $nt == 4) {
2779     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2780     $self->{onerror}->(node => $node, type => 'character not allowed');
2781     }
2782     } elsif ($nt == 5) {
2783     unshift @nodes, @{$node->child_nodes};
2784     }
2785     }
2786     return ($new_todos);
2787     },
2788     };
2789    
2790     $Element->{$HTML_NS}->{col} = {
2791     attrs_checker => $GetHTMLAttrsChecker->({
2792     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2793     }),
2794     checker => $HTMLEmptyChecker,
2795     };
2796    
2797     $Element->{$HTML_NS}->{tbody} = {
2798     attrs_checker => $GetHTMLAttrsChecker->({}),
2799     checker => sub {
2800     my ($self, $todo) = @_;
2801     my $el = $todo->{node};
2802     my $new_todos = [];
2803     my @nodes = (@{$el->child_nodes});
2804    
2805     my $has_tr;
2806     while (@nodes) {
2807     my $node = shift @nodes;
2808     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2809    
2810     my $nt = $node->node_type;
2811     if ($nt == 1) {
2812     my $node_ns = $node->namespace_uri;
2813     $node_ns = '' unless defined $node_ns;
2814     my $node_ln = $node->manakai_local_name;
2815     ## NOTE: |minuses| list is not checked since redundant
2816 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2817     #
2818     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2819 wakaba 1.1 $has_tr = 1;
2820     } else {
2821     $self->{onerror}->(node => $node, type => 'element not allowed');
2822     }
2823     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2824     unshift @nodes, @$sib;
2825     push @$new_todos, @$ch;
2826     } elsif ($nt == 3 or $nt == 4) {
2827     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2828     $self->{onerror}->(node => $node, type => 'character not allowed');
2829     }
2830     } elsif ($nt == 5) {
2831     unshift @nodes, @{$node->child_nodes};
2832     }
2833     }
2834     unless ($has_tr) {
2835     $self->{onerror}->(node => $el, type => 'child element missing:tr');
2836     }
2837     return ($new_todos);
2838     },
2839     };
2840    
2841     $Element->{$HTML_NS}->{thead} = {
2842     attrs_checker => $GetHTMLAttrsChecker->({}),
2843     checker => $Element->{$HTML_NS}->{tbody}->{checker},
2844     };
2845    
2846     $Element->{$HTML_NS}->{tfoot} = {
2847     attrs_checker => $GetHTMLAttrsChecker->({}),
2848     checker => $Element->{$HTML_NS}->{tbody}->{checker},
2849     };
2850    
2851     $Element->{$HTML_NS}->{tr} = {
2852     attrs_checker => $GetHTMLAttrsChecker->({}),
2853     checker => sub {
2854     my ($self, $todo) = @_;
2855     my $el = $todo->{node};
2856     my $new_todos = [];
2857     my @nodes = (@{$el->child_nodes});
2858    
2859     my $has_td;
2860     while (@nodes) {
2861     my $node = shift @nodes;
2862     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2863    
2864     my $nt = $node->node_type;
2865     if ($nt == 1) {
2866     my $node_ns = $node->namespace_uri;
2867     $node_ns = '' unless defined $node_ns;
2868     my $node_ln = $node->manakai_local_name;
2869     ## NOTE: |minuses| list is not checked since redundant
2870 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2871     #
2872     } elsif ($node_ns eq $HTML_NS and
2873     ($node_ln eq 'td' or $node_ln eq 'th')) {
2874 wakaba 1.1 $has_td = 1;
2875     } else {
2876     $self->{onerror}->(node => $node, type => 'element not allowed');
2877     }
2878     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2879     unshift @nodes, @$sib;
2880     push @$new_todos, @$ch;
2881     } elsif ($nt == 3 or $nt == 4) {
2882     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2883     $self->{onerror}->(node => $node, type => 'character not allowed');
2884     }
2885     } elsif ($nt == 5) {
2886     unshift @nodes, @{$node->child_nodes};
2887     }
2888     }
2889     unless ($has_td) {
2890     $self->{onerror}->(node => $el, type => 'child element missing:td|th');
2891     }
2892     return ($new_todos);
2893     },
2894     };
2895    
2896     $Element->{$HTML_NS}->{td} = {
2897     attrs_checker => $GetHTMLAttrsChecker->({
2898     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2899     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2900     }),
2901     checker => $HTMLBlockOrInlineChecker,
2902     };
2903    
2904     $Element->{$HTML_NS}->{th} = {
2905     attrs_checker => $GetHTMLAttrsChecker->({
2906     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2907     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2908     scope => $GetHTMLEnumeratedAttrChecker
2909     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
2910     }),
2911     checker => $HTMLBlockOrInlineChecker,
2912     };
2913    
2914     ## TODO: forms
2915 wakaba 1.8 ## TODO: Tests for <nest/> in form elements
2916 wakaba 1.1
2917     $Element->{$HTML_NS}->{script} = {
2918 wakaba 1.9 attrs_checker => $GetHTMLAttrsChecker->({
2919 wakaba 1.1 src => $HTMLURIAttrChecker,
2920     defer => $GetHTMLBooleanAttrChecker->('defer'),
2921     async => $GetHTMLBooleanAttrChecker->('async'),
2922     type => $HTMLIMTAttrChecker,
2923 wakaba 1.9 }),
2924 wakaba 1.1 checker => sub {
2925     my ($self, $todo) = @_;
2926    
2927     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2928     return $HTMLEmptyChecker->($self, $todo);
2929     } else {
2930     ## NOTE: No content model conformance in HTML5 spec.
2931     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
2932     my $language = $todo->{node}->get_attribute_ns (undef, 'language');
2933     if ((defined $type and $type eq '') or
2934     (defined $language and $language eq '')) {
2935     $type = 'text/javascript';
2936     } elsif (defined $type) {
2937     #
2938     } elsif (defined $language) {
2939     $type = 'text/' . $language;
2940     } else {
2941     $type = 'text/javascript';
2942     }
2943     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
2944     type => 'script:'.$type); ## TODO: $type normalization
2945     return $AnyChecker->($self, $todo);
2946     }
2947     },
2948     };
2949    
2950     ## NOTE: When script is disabled.
2951     $Element->{$HTML_NS}->{noscript} = {
2952 wakaba 1.3 attrs_checker => sub {
2953     my ($self, $todo) = @_;
2954    
2955     ## NOTE: This check is inserted in |attrs_checker|, rather than |checker|,
2956     ## since the later is not invoked when the |noscript| is used as a
2957     ## transparent element.
2958     unless ($todo->{node}->owner_document->manakai_is_html) {
2959     $self->{onerror}->(node => $todo->{node}, type => 'in XML:noscript');
2960     }
2961    
2962     $GetHTMLAttrsChecker->({})->($self, $todo);
2963     },
2964 wakaba 1.1 checker => sub {
2965     my ($self, $todo) = @_;
2966    
2967 wakaba 1.3 if ($todo->{flag}->{in_head}) {
2968     my $new_todos = [];
2969     my @nodes = (@{$todo->{node}->child_nodes});
2970    
2971     while (@nodes) {
2972     my $node = shift @nodes;
2973     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2974    
2975     my $nt = $node->node_type;
2976     if ($nt == 1) {
2977     my $node_ns = $node->namespace_uri;
2978     $node_ns = '' unless defined $node_ns;
2979     my $node_ln = $node->manakai_local_name;
2980 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2981     #
2982     } elsif ($node_ns eq $HTML_NS) {
2983 wakaba 1.3 if ({link => 1, style => 1}->{$node_ln}) {
2984     #
2985     } elsif ($node_ln eq 'meta') {
2986 wakaba 1.5 if ($node->has_attribute_ns (undef, 'name')) {
2987     #
2988 wakaba 1.3 } else {
2989 wakaba 1.5 $self->{onerror}->(node => $node,
2990     type => 'element not allowed');
2991 wakaba 1.3 }
2992     } else {
2993     $self->{onerror}->(node => $node, type => 'element not allowed');
2994     }
2995     } else {
2996     $self->{onerror}->(node => $node, type => 'element not allowed');
2997     }
2998    
2999     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3000     unshift @nodes, @$sib;
3001     push @$new_todos, @$ch;
3002     } elsif ($nt == 3 or $nt == 4) {
3003     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3004     $self->{onerror}->(node => $node, type => 'character not allowed');
3005     }
3006     } elsif ($nt == 5) {
3007     unshift @nodes, @{$node->child_nodes};
3008     }
3009     }
3010     return ($new_todos);
3011     } else {
3012     my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}});
3013     my ($sib, $ch) = $HTMLBlockOrInlineChecker->($self, $todo);
3014     push @$sib, $end;
3015     return ($sib, $ch);
3016     }
3017 wakaba 1.1 },
3018     };
3019 wakaba 1.3
3020     ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
3021 wakaba 1.1
3022     $Element->{$HTML_NS}->{'event-source'} = {
3023     attrs_checker => $GetHTMLAttrsChecker->({
3024     src => $HTMLURIAttrChecker,
3025     }),
3026     checker => $HTMLEmptyChecker,
3027     };
3028    
3029     $Element->{$HTML_NS}->{details} = {
3030     attrs_checker => $GetHTMLAttrsChecker->({
3031     open => $GetHTMLBooleanAttrChecker->('open'),
3032     }),
3033     checker => sub {
3034     my ($self, $todo) = @_;
3035    
3036     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
3037     my ($sib, $ch)
3038     = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
3039     ->($self, $todo);
3040     push @$sib, $end;
3041     return ($sib, $ch);
3042     },
3043     };
3044    
3045     $Element->{$HTML_NS}->{datagrid} = {
3046     attrs_checker => $GetHTMLAttrsChecker->({
3047     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3048     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
3049     }),
3050     checker => sub {
3051     my ($self, $todo) = @_;
3052     my $el = $todo->{node};
3053     my $new_todos = [];
3054     my @nodes = (@{$el->child_nodes});
3055    
3056     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
3057    
3058     ## Block-table Block* | table | select | datalist | Empty
3059     my $mode = 'any';
3060     while (@nodes) {
3061     my $node = shift @nodes;
3062     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3063    
3064     my $nt = $node->node_type;
3065     if ($nt == 1) {
3066     my $node_ns = $node->namespace_uri;
3067     $node_ns = '' unless defined $node_ns;
3068     my $node_ln = $node->manakai_local_name;
3069     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
3070 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3071     #
3072     } elsif ($mode eq 'block') {
3073 wakaba 1.1 $not_allowed = 1
3074     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
3075     } elsif ($mode eq 'any') {
3076     if ($node_ns eq $HTML_NS and
3077     {table => 1, select => 1, datalist => 1}->{$node_ln}) {
3078     $mode = 'none';
3079     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
3080     $mode = 'block';
3081     } else {
3082     $not_allowed = 1;
3083     }
3084     } else {
3085     $not_allowed = 1;
3086     }
3087     $self->{onerror}->(node => $node, type => 'element not allowed')
3088     if $not_allowed;
3089     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3090     unshift @nodes, @$sib;
3091     push @$new_todos, @$ch;
3092     } elsif ($nt == 3 or $nt == 4) {
3093     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3094     $self->{onerror}->(node => $node, type => 'character not allowed');
3095     }
3096     } elsif ($nt == 5) {
3097     unshift @nodes, @{$node->child_nodes};
3098     }
3099     }
3100    
3101     push @$new_todos, $end;
3102     return ($new_todos);
3103     },
3104     };
3105    
3106     $Element->{$HTML_NS}->{command} = {
3107     attrs_checker => $GetHTMLAttrsChecker->({
3108     checked => $GetHTMLBooleanAttrChecker->('checked'),
3109     default => $GetHTMLBooleanAttrChecker->('default'),
3110     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3111     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
3112     icon => $HTMLURIAttrChecker,
3113     label => sub { }, ## NOTE: No conformance creteria
3114     radiogroup => sub { }, ## NOTE: No conformance creteria
3115     ## NOTE: |title| has special semantics, but no syntactical difference
3116     type => sub {
3117     my ($self, $attr) = @_;
3118     my $value = $attr->value;
3119     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
3120     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
3121     }
3122     },
3123     }),
3124     checker => $HTMLEmptyChecker,
3125     };
3126    
3127     $Element->{$HTML_NS}->{menu} = {
3128     attrs_checker => $GetHTMLAttrsChecker->({
3129     autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
3130     id => sub {
3131     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
3132     my ($self, $attr) = @_;
3133     my $value = $attr->value;
3134     if (length $value > 0) {
3135     if ($self->{id}->{$value}) {
3136     $self->{onerror}->(node => $attr, type => 'duplicate ID');
3137     push @{$self->{id}->{$value}}, $attr;
3138     } else {
3139     $self->{id}->{$value} = [$attr];
3140     }
3141     } else {
3142     ## NOTE: MUST contain at least one character
3143     $self->{onerror}->(node => $attr, type => 'empty attribute value');
3144     }
3145     if ($value =~ /[\x09-\x0D\x20]/) {
3146     $self->{onerror}->(node => $attr, type => 'space in ID');
3147     }
3148     $self->{menu}->{$value} ||= $attr;
3149     ## ISSUE: <menu id=""><p contextmenu=""> match?
3150     },
3151     label => sub { }, ## NOTE: No conformance creteria
3152     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
3153     }),
3154     checker => sub {
3155     my ($self, $todo) = @_;
3156     my $el = $todo->{node};
3157     my $new_todos = [];
3158     my @nodes = (@{$el->child_nodes});
3159    
3160     my $content = 'li or inline';
3161     while (@nodes) {
3162     my $node = shift @nodes;
3163     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3164    
3165     my $nt = $node->node_type;
3166     if ($nt == 1) {
3167     my $node_ns = $node->namespace_uri;
3168     $node_ns = '' unless defined $node_ns;
3169     my $node_ln = $node->manakai_local_name;
3170     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
3171 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3172     #
3173     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'li') {
3174 wakaba 1.1 if ($content eq 'inline') {
3175     $not_allowed = 1;
3176     } elsif ($content eq 'li or inline') {
3177     $content = 'li';
3178     }
3179     } else {
3180     if ($HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
3181     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln}) {
3182     $content = 'inline';
3183     } else {
3184     $not_allowed = 1;
3185     }
3186     }
3187     $self->{onerror}->(node => $node, type => 'element not allowed')
3188     if $not_allowed;
3189     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3190     unshift @nodes, @$sib;
3191     push @$new_todos, @$ch;
3192     } elsif ($nt == 3 or $nt == 4) {
3193     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3194     if ($content eq 'li') {
3195     $self->{onerror}->(node => $node, type => 'character not allowed');
3196     } elsif ($content eq 'li or inline') {
3197     $content = 'inline';
3198     }
3199     }
3200     } elsif ($nt == 5) {
3201     unshift @nodes, @{$node->child_nodes};
3202     }
3203     }
3204    
3205     for (@$new_todos) {
3206     $_->{inline} = 1;
3207     }
3208     return ($new_todos);
3209     },
3210 wakaba 1.8 };
3211    
3212     $Element->{$HTML_NS}->{datatemplate} = {
3213     attrs_checker => $GetHTMLAttrsChecker->({}),
3214     checker => sub {
3215     my ($self, $todo) = @_;
3216     my $el = $todo->{node};
3217     my $new_todos = [];
3218     my @nodes = (@{$el->child_nodes});
3219    
3220     while (@nodes) {
3221     my $node = shift @nodes;
3222     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3223    
3224     my $nt = $node->node_type;
3225     if ($nt == 1) {
3226     my $node_ns = $node->namespace_uri;
3227     $node_ns = '' unless defined $node_ns;
3228     my $node_ln = $node->manakai_local_name;
3229     ## NOTE: |minuses| list is not checked since redundant
3230     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3231     #
3232     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'rule')) {
3233     $self->{onerror}->(node => $node,
3234     type => 'element not allowed:datatemplate');
3235     }
3236     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3237     unshift @nodes, @$sib;
3238     push @$new_todos, @$ch;
3239     } elsif ($nt == 3 or $nt == 4) {
3240     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3241     $self->{onerror}->(node => $node, type => 'character not allowed');
3242     }
3243     } elsif ($nt == 5) {
3244     unshift @nodes, @{$node->child_nodes};
3245     }
3246     }
3247     return ($new_todos);
3248     },
3249     is_xml_root => 1,
3250     };
3251    
3252     $Element->{$HTML_NS}->{rule} = {
3253     attrs_checker => $GetHTMLAttrsChecker->({
3254 wakaba 1.23 condition => $HTMLSelectorsAttrChecker,
3255 wakaba 1.18 mode => $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker,
3256 wakaba 1.8 }),
3257     checker => sub {
3258     my ($self, $todo) = @_;
3259    
3260     my $end = $self->_add_pluses ({$HTML_NS => {nest => 1}});
3261     my ($sib, $ch) = $AnyChecker->($self, $todo);
3262     push @$sib, $end;
3263     return ($sib, $ch);
3264     },
3265     ## NOTE: "MAY be anything that, when the parent |datatemplate|
3266     ## is applied to some conforming data, results in a conforming DOM tree.":
3267     ## We don't check against this.
3268     };
3269    
3270     $Element->{$HTML_NS}->{nest} = {
3271     attrs_checker => $GetHTMLAttrsChecker->({
3272 wakaba 1.23 filter => $HTMLSelectorsAttrChecker,
3273     mode => sub {
3274     my ($self, $attr) = @_;
3275     my $value = $attr->value;
3276     if ($value !~ /\A[^\x09-\x0D\x20]+\z/) {
3277     $self->{onerror}->(node => $attr, type => 'mode:syntax error');
3278     }
3279     },
3280 wakaba 1.8 }),
3281     checker => $HTMLEmptyChecker,
3282 wakaba 1.1 };
3283    
3284     $Element->{$HTML_NS}->{legend} = {
3285     attrs_checker => $GetHTMLAttrsChecker->({}),
3286     checker => sub {
3287     my ($self, $todo) = @_;
3288    
3289     my $parent = $todo->{node}->manakai_parent_element;
3290     if (defined $parent) {
3291     my $nsuri = $parent->namespace_uri;
3292     $nsuri = '' unless defined $nsuri;
3293     my $ln = $parent->manakai_local_name;
3294     if ($nsuri eq $HTML_NS and $ln eq 'figure') {
3295     return $HTMLInlineChecker->($self, $todo);
3296     } else {
3297 wakaba 1.13 return $HTMLStrictlyInlineChecker->($self, $todo);
3298 wakaba 1.1 }
3299     } else {
3300     return $HTMLInlineChecker->($self, $todo);
3301     }
3302    
3303     ## ISSUE: Content model is defined only for fieldset/legend,
3304     ## details/legend, and figure/legend.
3305     },
3306     };
3307    
3308     $Element->{$HTML_NS}->{div} = {
3309     attrs_checker => $GetHTMLAttrsChecker->({}),
3310     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
3311     };
3312    
3313     $Element->{$HTML_NS}->{font} = {
3314     attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
3315     checker => $HTMLTransparentChecker,
3316     };
3317    
3318     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
3319    
3320     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24