/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (hide annotations) (download)
Fri Aug 17 05:55:44 2007 UTC (17 years, 11 months ago) by wakaba
Branch: MAIN
Changes since 1.2: +62 -7 lines
++ whatpm/t/ChangeLog	17 Aug 2007 05:53:49 -0000
2007-08-17  Wakaba  <wakaba@suika.fam.cx>

	* content-model-1.dat: New tests for |noscript| in |head| are added.

	* content-model-2.dat: A wrong result for |xml:lang| in
	HTML document is fixed.

++ whatpm/Whatpm/ChangeLog	17 Aug 2007 05:54:48 -0000
2007-08-17  Wakaba  <wakaba@suika.fam.cx>

	* ContentChecker.pm (_check_get_children): Support
	for |noscript| in |head|.

++ whatpm/Whatpm/ContentChecker/ChangeLog	17 Aug 2007 05:55:27 -0000
2007-08-17  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm: Support for |noscript| in |head|.  Make an
	error for |noscript| in XML document.

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7     my $HTMLMetadataElements = {
8     $HTML_NS => {
9     qw/link 1 meta 1 style 1 script 1 event-source 1 command 1 base 1 title 1
10     noscript 1
11     /,
12     },
13     };
14    
15     my $HTMLSectioningElements = {
16     $HTML_NS => {qw/body 1 section 1 nav 1 article 1 blockquote 1 aside 1/},
17     };
18    
19     my $HTMLBlockLevelElements = {
20     $HTML_NS => {
21     qw/
22     section 1 nav 1 article 1 blockquote 1 aside 1
23     h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1
24     address 1 p 1 hr 1 dialog 1 pre 1 ol 1 ul 1 dl 1
25     ins 1 del 1 figure 1 map 1 table 1 script 1 noscript 1
26     event-source 1 details 1 datagrid 1 menu 1 div 1 font 1
27     /,
28     },
29     };
30    
31     my $HTMLStrictlyInlineLevelElements = {
32     $HTML_NS => {
33     qw/
34     br 1 a 1 q 1 cite 1 em 1 strong 1 small 1 m 1 dfn 1 abbr 1
35     time 1 meter 1 progress 1 code 1 var 1 samp 1 kbd 1
36     sub 1 sup 1 span 1 i 1 b 1 bdo 1 ins 1 del 1 img 1
37     iframe 1 embed 1 object 1 video 1 audio 1 canvas 1 area 1
38     script 1 noscript 1 event-source 1 command 1 font 1
39     /,
40     },
41     };
42    
43     my $HTMLStructuredInlineLevelElements = {
44     $HTML_NS => {qw/blockquote 1 pre 1 ol 1 ul 1 dl 1 table 1 menu 1/},
45     };
46    
47     my $HTMLInteractiveElements = {
48     $HTML_NS => {a => 1, details => 1, datagrid => 1},
49     };
50     ## NOTE: |html:a| and |html:datagrid| are not allowed as a descendant
51     ## of interactive elements
52    
53     # my $HTMLTransparentElements : in |Whatpm/ContentChecker.pm|.
54    
55     #my $HTMLSemiTransparentElements = {
56     # $HTML_NS => {qw/video 1 audio 1/},
57     #};
58    
59     my $HTMLEmbededElements = {
60     $HTML_NS => {qw/img 1 iframe 1 embed 1 object 1 video 1 audio 1 canvas 1/},
61     };
62    
63     ## Empty
64     my $HTMLEmptyChecker = sub {
65     my ($self, $todo) = @_;
66     my $el = $todo->{node};
67     my $new_todos = [];
68     my @nodes = (@{$el->child_nodes});
69    
70     while (@nodes) {
71     my $node = shift @nodes;
72     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
73    
74     my $nt = $node->node_type;
75     if ($nt == 1) {
76     ## NOTE: |minuses| list is not checked since redundant
77     $self->{onerror}->(node => $node, type => 'element not allowed');
78     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
79     unshift @nodes, @$sib;
80     push @$new_todos, @$ch;
81     } elsif ($nt == 3 or $nt == 4) {
82     if ($node->data =~ /[^\x09-\x0D\x20]/) {
83     $self->{onerror}->(node => $node, type => 'character not allowed');
84     }
85     } elsif ($nt == 5) {
86     unshift @nodes, @{$node->child_nodes};
87     }
88     }
89     return ($new_todos);
90     };
91    
92     ## Text
93     my $HTMLTextChecker = sub {
94     my ($self, $todo) = @_;
95     my $el = $todo->{node};
96     my $new_todos = [];
97     my @nodes = (@{$el->child_nodes});
98    
99     while (@nodes) {
100     my $node = shift @nodes;
101     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
102    
103     my $nt = $node->node_type;
104     if ($nt == 1) {
105     ## NOTE: |minuses| list is not checked since redundant
106     $self->{onerror}->(node => $node, type => 'element not allowed');
107     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
108     unshift @nodes, @$sib;
109     push @$new_todos, @$ch;
110     } elsif ($nt == 5) {
111     unshift @nodes, @{$node->child_nodes};
112     }
113     }
114     return ($new_todos);
115     };
116    
117     ## Zero or more |html:style| elements,
118     ## followed by zero or more block-level elements
119     my $HTMLStylableBlockChecker = sub {
120     my ($self, $todo) = @_;
121     my $el = $todo->{node};
122     my $new_todos = [];
123     my @nodes = (@{$el->child_nodes});
124    
125     my $has_non_style;
126     while (@nodes) {
127     my $node = shift @nodes;
128     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
129    
130     my $nt = $node->node_type;
131     if ($nt == 1) {
132     my $node_ns = $node->namespace_uri;
133     $node_ns = '' unless defined $node_ns;
134     my $node_ln = $node->manakai_local_name;
135     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
136     if ($node_ns eq $HTML_NS and $node_ln eq 'style') {
137     $not_allowed = 1 if $has_non_style or
138     not $node->has_attribute_ns (undef, 'scoped');
139     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
140     $has_non_style = 1;
141     } else {
142     $has_non_style = 1;
143     $not_allowed = 1;
144     }
145     $self->{onerror}->(node => $node, type => 'element not allowed')
146     if $not_allowed;
147     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
148     unshift @nodes, @$sib;
149     push @$new_todos, @$ch;
150     } elsif ($nt == 3 or $nt == 4) {
151     if ($node->data =~ /[^\x09-\x0D\x20]/) {
152     $self->{onerror}->(node => $node, type => 'character not allowed');
153     }
154     } elsif ($nt == 5) {
155     unshift @nodes, @{$node->child_nodes};
156     }
157     }
158     return ($new_todos);
159     }; # $HTMLStylableBlockChecker
160    
161     ## Zero or more block-level elements
162     my $HTMLBlockChecker = sub {
163     my ($self, $todo) = @_;
164     my $el = $todo->{node};
165     my $new_todos = [];
166     my @nodes = (@{$el->child_nodes});
167    
168     while (@nodes) {
169     my $node = shift @nodes;
170     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
171    
172     my $nt = $node->node_type;
173     if ($nt == 1) {
174     my $node_ns = $node->namespace_uri;
175     $node_ns = '' unless defined $node_ns;
176     my $node_ln = $node->manakai_local_name;
177     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
178     $not_allowed = 1
179     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
180     $self->{onerror}->(node => $node, type => 'element not allowed')
181     if $not_allowed;
182     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
183     unshift @nodes, @$sib;
184     push @$new_todos, @$ch;
185     } elsif ($nt == 3 or $nt == 4) {
186     if ($node->data =~ /[^\x09-\x0D\x20]/) {
187     $self->{onerror}->(node => $node, type => 'character not allowed');
188     }
189     } elsif ($nt == 5) {
190     unshift @nodes, @{$node->child_nodes};
191     }
192     }
193     return ($new_todos);
194     }; # $HTMLBlockChecker
195    
196     ## Inline-level content
197     my $HTMLInlineChecker = sub {
198     my ($self, $todo) = @_;
199     my $el = $todo->{node};
200     my $new_todos = [];
201     my @nodes = (@{$el->child_nodes});
202    
203     while (@nodes) {
204     my $node = shift @nodes;
205     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
206    
207     my $nt = $node->node_type;
208     if ($nt == 1) {
209     my $node_ns = $node->namespace_uri;
210     $node_ns = '' unless defined $node_ns;
211     my $node_ln = $node->manakai_local_name;
212     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
213     $not_allowed = 1
214     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
215     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
216     $self->{onerror}->(node => $node, type => 'element not allowed')
217     if $not_allowed;
218     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
219     unshift @nodes, @$sib;
220     push @$new_todos, @$ch;
221     } elsif ($nt == 5) {
222     unshift @nodes, @{$node->child_nodes};
223     }
224     }
225    
226     for (@$new_todos) {
227     $_->{inline} = 1;
228     }
229     return ($new_todos);
230     }; # $HTMLInlineChecker
231    
232     my $HTMLSignificantInlineChecker = $HTMLInlineChecker;
233     ## TODO: check significant content
234    
235     ## Strictly inline-level content
236     my $HTMLStrictlyInlineChecker = sub {
237     my ($self, $todo) = @_;
238     my $el = $todo->{node};
239     my $new_todos = [];
240     my @nodes = (@{$el->child_nodes});
241    
242     while (@nodes) {
243     my $node = shift @nodes;
244     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
245    
246     my $nt = $node->node_type;
247     if ($nt == 1) {
248     my $node_ns = $node->namespace_uri;
249     $node_ns = '' unless defined $node_ns;
250     my $node_ln = $node->manakai_local_name;
251     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
252     $not_allowed = 1
253     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln};
254     $self->{onerror}->(node => $node, type => 'element not allowed')
255     if $not_allowed;
256     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
257     unshift @nodes, @$sib;
258     push @$new_todos, @$ch;
259     } elsif ($nt == 5) {
260     unshift @nodes, @{$node->child_nodes};
261     }
262     }
263    
264     for (@$new_todos) {
265     $_->{inline} = 1;
266     $_->{strictly_inline} = 1;
267     }
268     return ($new_todos);
269     }; # $HTMLStrictlyInlineChecker
270    
271     my $HTMLSignificantStrictlyInlineChecker = $HTMLStrictlyInlineChecker;
272     ## TODO: check significant content
273    
274     ## Inline-level or strictly inline-kevek content
275     my $HTMLInlineOrStrictlyInlineChecker = sub {
276     my ($self, $todo) = @_;
277     my $el = $todo->{node};
278     my $new_todos = [];
279     my @nodes = (@{$el->child_nodes});
280    
281     while (@nodes) {
282     my $node = shift @nodes;
283     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
284    
285     my $nt = $node->node_type;
286     if ($nt == 1) {
287     my $node_ns = $node->namespace_uri;
288     $node_ns = '' unless defined $node_ns;
289     my $node_ln = $node->manakai_local_name;
290     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
291     if ($todo->{strictly_inline}) {
292     $not_allowed = 1
293     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln};
294     } else {
295     $not_allowed = 1
296     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
297     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
298     }
299     $self->{onerror}->(node => $node, type => 'element not allowed')
300     if $not_allowed;
301     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
302     unshift @nodes, @$sib;
303     push @$new_todos, @$ch;
304     } elsif ($nt == 5) {
305     unshift @nodes, @{$node->child_nodes};
306     }
307     }
308    
309     for (@$new_todos) {
310     $_->{inline} = 1;
311     $_->{strictly_inline} = 1;
312     }
313     return ($new_todos);
314     }; # $HTMLInlineOrStrictlyInlineChecker
315    
316     my $HTMLSignificantInlineOrStrictlyInlineChecker
317     = $HTMLInlineOrStrictlyInlineChecker;
318     ## TODO: check significant content
319    
320     my $HTMLBlockOrInlineChecker = sub {
321     my ($self, $todo) = @_;
322     my $el = $todo->{node};
323     my $new_todos = [];
324     my @nodes = (@{$el->child_nodes});
325    
326     my $content = 'block-or-inline'; # or 'block' or 'inline'
327     my @block_not_inline;
328     while (@nodes) {
329     my $node = shift @nodes;
330     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
331    
332     my $nt = $node->node_type;
333     if ($nt == 1) {
334     my $node_ns = $node->namespace_uri;
335     $node_ns = '' unless defined $node_ns;
336     my $node_ln = $node->manakai_local_name;
337     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
338     if ($content eq 'block') {
339     $not_allowed = 1
340     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
341     } elsif ($content eq 'inline') {
342     $not_allowed = 1
343     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
344     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
345     } else {
346     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
347     my $is_inline
348     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
349     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
350    
351     push @block_not_inline, $node
352     if $is_block and not $is_inline and not $not_allowed;
353     unless ($is_block) {
354     $content = 'inline';
355     for (@block_not_inline) {
356     $self->{onerror}->(node => $_, type => 'element not allowed');
357     }
358     $not_allowed = 1 unless $is_inline;
359     }
360     }
361     $self->{onerror}->(node => $node, type => 'element not allowed')
362     if $not_allowed;
363     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
364     unshift @nodes, @$sib;
365     push @$new_todos, @$ch;
366     } elsif ($nt == 3 or $nt == 4) {
367     if ($node->data =~ /[^\x09-\x0D\x20]/) {
368     if ($content eq 'block') {
369     $self->{onerror}->(node => $node, type => 'character not allowed');
370     } else {
371     $content = 'inline';
372     for (@block_not_inline) {
373     $self->{onerror}->(node => $_, type => 'element not allowed');
374     }
375     }
376     }
377     } elsif ($nt == 5) {
378     unshift @nodes, @{$node->child_nodes};
379     }
380     }
381    
382     if ($content eq 'inline') {
383     for (@$new_todos) {
384     $_->{inline} = 1;
385     }
386     }
387     return ($new_todos);
388     };
389    
390     ## Zero or more XXX element, then either block-level or inline-level
391     my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) {
392     my ($elnsuri, $ellname) = @_;
393     return sub {
394     my ($self, $todo) = @_;
395     my $el = $todo->{node};
396     my $new_todos = [];
397     my @nodes = (@{$el->child_nodes});
398    
399     my $has_non_style;
400     my $content = 'block-or-inline'; # or 'block' or 'inline'
401     my @block_not_inline;
402     while (@nodes) {
403     my $node = shift @nodes;
404     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
405    
406     my $nt = $node->node_type;
407     if ($nt == 1) {
408     my $node_ns = $node->namespace_uri;
409     $node_ns = '' unless defined $node_ns;
410     my $node_ln = $node->manakai_local_name;
411     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
412     if ($node_ns eq $elnsuri and $node_ln eq $ellname) {
413     $not_allowed = 1 if $has_non_style;
414     if ($ellname eq 'style' and
415     not $node->has_attribute_ns (undef, 'scoped')) {
416     $not_allowed = 1;
417     }
418     } elsif ($content eq 'block') {
419     $has_non_style = 1;
420     $not_allowed = 1
421     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
422     } elsif ($content eq 'inline') {
423     $has_non_style = 1;
424     $not_allowed = 1
425     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
426     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
427     } else {
428     $has_non_style = 1;
429     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
430     my $is_inline
431     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
432     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
433    
434     push @block_not_inline, $node
435     if $is_block and not $is_inline and not $not_allowed;
436     unless ($is_block) {
437     $content = 'inline';
438     for (@block_not_inline) {
439     $self->{onerror}->(node => $_, type => 'element not allowed');
440     }
441     $not_allowed = 1 unless $is_inline;
442     }
443     }
444     $self->{onerror}->(node => $node, type => 'element not allowed')
445     if $not_allowed;
446     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
447     unshift @nodes, @$sib;
448     push @$new_todos, @$ch;
449     } elsif ($nt == 3 or $nt == 4) {
450     if ($node->data =~ /[^\x09-\x0D\x20]/) {
451     $has_non_style = 1;
452     if ($content eq 'block') {
453     $self->{onerror}->(node => $node, type => 'character not allowed');
454     } else {
455     $content = 'inline';
456     for (@block_not_inline) {
457     $self->{onerror}->(node => $_, type => 'element not allowed');
458     }
459     }
460     }
461     } elsif ($nt == 5) {
462     unshift @nodes, @{$node->child_nodes};
463     }
464     }
465    
466     if ($content eq 'inline') {
467     for (@$new_todos) {
468     $_->{inline} = 1;
469     }
470     }
471     return ($new_todos);
472     };
473     }; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker
474    
475     my $HTMLTransparentChecker = $HTMLBlockOrInlineChecker;
476    
477     our $AttrChecker;
478    
479     my $GetHTMLEnumeratedAttrChecker = sub {
480     my $states = shift; # {value => conforming ? 1 : -1}
481     return sub {
482     my ($self, $attr) = @_;
483     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
484     if ($states->{$value} > 0) {
485     #
486     } elsif ($states->{$value}) {
487     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
488     } else {
489     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
490     }
491     };
492     }; # $GetHTMLEnumeratedAttrChecker
493    
494     my $GetHTMLBooleanAttrChecker = sub {
495     my $local_name = shift;
496     return sub {
497     my ($self, $attr) = @_;
498     my $value = $attr->value;
499     unless ($value eq $local_name or $value eq '') {
500     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
501     }
502     };
503     }; # $GetHTMLBooleanAttrChecker
504    
505     ## |rel| attribute (unordered set of space separated tokens,
506     ## whose allowed values are defined by the section on link types)
507     my $HTMLLinkTypesAttrChecker = sub {
508     my ($a_or_area, $self, $attr) = @_;
509     my %word;
510     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
511     unless ($word{$word}) {
512     $word{$word} = 1;
513     } else {
514     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
515     }
516     }
517     ## NOTE: Case sensitive match (since HTML5 spec does not say link
518     ## types are case-insensitive and it says "The value should not
519     ## be confusingly similar to any other defined value (e.g.
520     ## differing only in case).").
521     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
522     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
523     ## values to be used conformingly.
524     require Whatpm::_LinkTypeList;
525     our $LinkType;
526     for my $word (keys %word) {
527     my $def = $LinkType->{$word};
528     if (defined $def) {
529     if ($def->{status} eq 'accepted') {
530     if (defined $def->{effect}->[$a_or_area]) {
531     #
532     } else {
533     $self->{onerror}->(node => $attr,
534     type => 'link type:bad context:'.$word);
535     }
536     } elsif ($def->{status} eq 'proposal') {
537     $self->{onerror}->(node => $attr, level => 's',
538     type => 'link type:proposed:'.$word);
539     } else { # rejected or synonym
540     $self->{onerror}->(node => $attr,
541     type => 'link type:non-conforming:'.$word);
542     }
543     if ($def->{unique}) {
544     unless ($self->{has_link_type}->{$word}) {
545     $self->{has_link_type}->{$word} = 1;
546     } else {
547     $self->{onerror}->(node => $attr,
548     type => 'link type:duplicate:'.$word);
549     }
550     }
551     } else {
552     $self->{onerror}->(node => $attr, level => 'unsupported',
553     type => 'link type:'.$word);
554     }
555     }
556     ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
557     ## says that using both X-Pingback: header field and HTML
558     ## <link rel=pingback> is deprecated and if both appears they
559     ## SHOULD contain exactly the same value.
560     ## ISSUE: Pingback 1.0 specification defines the exact representation
561     ## of its link element, which cannot be tested by the current arch.
562     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
563     ## include any string that matches to the pattern for the rel=pingback link,
564     ## which again inpossible to test.
565     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
566     }; # $HTMLLinkTypesAttrChecker
567    
568     ## URI (or IRI)
569     my $HTMLURIAttrChecker = sub {
570     my ($self, $attr) = @_;
571     ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
572     my $value = $attr->value;
573     Whatpm::URIChecker->check_iri_reference ($value, sub {
574     my %opt = @_;
575     $self->{onerror}->(node => $attr, level => $opt{level},
576     type => 'URI::'.$opt{type}.
577     (defined $opt{position} ? ':'.$opt{position} : ''));
578     });
579     }; # $HTMLURIAttrChecker
580    
581     ## A space separated list of one or more URIs (or IRIs)
582     my $HTMLSpaceURIsAttrChecker = sub {
583     my ($self, $attr) = @_;
584     my $i = 0;
585     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
586     Whatpm::URIChecker->check_iri_reference ($value, sub {
587     my %opt = @_;
588     $self->{onerror}->(node => $attr, level => $opt{level},
589 wakaba 1.2 type => 'URIs:'.':'.
590     $opt{type}.':'.$i.
591 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
592     });
593     $i++;
594     }
595     ## ISSUE: Relative references?
596     ## ISSUE: Leading or trailing white spaces are conformant?
597     ## ISSUE: A sequence of white space characters are conformant?
598     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
599     ## NOTE: Duplication seems not an error.
600     }; # $HTMLSpaceURIsAttrChecker
601    
602     my $HTMLDatetimeAttrChecker = sub {
603     my ($self, $attr) = @_;
604     my $value = $attr->value;
605     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
606     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
607     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
608     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
609     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
610     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
611     if $d < 1 or
612     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
613     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
614     if $M == 2 and $d == 29 and
615     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
616     } else {
617     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
618     }
619     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
620     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
621     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
622     if defined $s and $s > 59;
623     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
624     if $zh > 23;
625     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
626     if $zm > 59;
627     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
628     } else {
629     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
630     }
631     }; # $HTMLDatetimeAttrChecker
632    
633     my $HTMLIntegerAttrChecker = sub {
634     my ($self, $attr) = @_;
635     my $value = $attr->value;
636     unless ($value =~ /\A-?[0-9]+\z/) {
637     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
638     }
639     }; # $HTMLIntegerAttrChecker
640    
641     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
642     my $range_check = shift;
643     return sub {
644     my ($self, $attr) = @_;
645     my $value = $attr->value;
646     if ($value =~ /\A[0-9]+\z/) {
647     unless ($range_check->($value + 0)) {
648     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
649     }
650     } else {
651     $self->{onerror}->(node => $attr,
652     type => 'nninteger:syntax error');
653     }
654     };
655     }; # $GetHTMLNonNegativeIntegerAttrChecker
656    
657     my $GetHTMLFloatingPointNumberAttrChecker = sub {
658     my $range_check = shift;
659     return sub {
660     my ($self, $attr) = @_;
661     my $value = $attr->value;
662     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
663     unless ($range_check->($value + 0)) {
664     $self->{onerror}->(node => $attr, type => 'float:out of range');
665     }
666     } else {
667     $self->{onerror}->(node => $attr,
668     type => 'float:syntax error');
669     }
670     };
671     }; # $GetHTMLFloatingPointNumberAttrChecker
672    
673     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
674     ## ISSUE: RFC 2046 does not define syntax of media types.
675     ## ISSUE: The definition of "a valid MIME type" is unknown.
676     ## Syntactical correctness?
677     my $HTMLIMTAttrChecker = sub {
678     my ($self, $attr) = @_;
679     my $value = $attr->value;
680     ## ISSUE: RFC 2045 Content-Type header field allows insertion
681     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
682     ## ISSUE: RFC 2231 extension? Maybe no.
683     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
684     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
685     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
686     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
687     my @type = ($1, $2);
688     my $param = $3;
689     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
690     if (defined $2) {
691     push @type, $1 => $2;
692     } else {
693     my $n = $1;
694     my $v = $2;
695     $v =~ s/\\(.)/$1/gs;
696     push @type, $n => $v;
697     }
698     }
699     require Whatpm::IMTChecker;
700     Whatpm::IMTChecker->check_imt (sub {
701     my %opt = @_;
702     $self->{onerror}->(node => $attr, level => $opt{level},
703     type => 'IMT:'.$opt{type});
704     }, @type);
705     } else {
706     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
707     }
708     }; # $HTMLIMTAttrChecker
709    
710     my $HTMLLanguageTagAttrChecker = sub {
711     my ($self, $attr) = @_;
712     $self->{onerror}->(node => $attr, level => 'unsupported',
713     type => 'language tag');
714     if ($attr->value eq '') {
715     $self->{onerror}->(node => $attr, type => 'language tag:syntax error');
716     }
717     ## TODO: RFC 3066 test
718     ## ISSUE: RFC 4646 (3066bis)?
719     }; # $HTMLLanguageTagAttrChecker
720    
721     ## "A valid media query [MQ]"
722     my $HTMLMQAttrChecker = sub {
723     my ($self, $attr) = @_;
724     $self->{onerror}->(node => $attr, level => 'unsupported',
725     type => 'media query');
726     ## ISSUE: What is "a valid media query"?
727     }; # $HTMLMQAttrChecker
728    
729     my $HTMLEventHandlerAttrChecker = sub {
730     my ($self, $attr) = @_;
731     $self->{onerror}->(node => $attr, level => 'unsupported',
732     type => 'event handler');
733     ## TODO: MUST contain valid ECMAScript code matching the
734     ## ECMAScript |FunctionBody| production. [ECMA262]
735     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
736     ## ISSUE: Automatic semicolon insertion does not apply?
737     ## ISSUE: Other script languages?
738     }; # $HTMLEventHandlerAttrChecker
739    
740     my $HTMLUsemapAttrChecker = sub {
741     my ($self, $attr) = @_;
742     ## MUST be a valid hashed ID reference to a |map| element
743     my $value = $attr->value;
744     if ($value =~ s/^#//) {
745     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
746     push @{$self->{usemap}}, [$value => $attr];
747     } else {
748     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
749     }
750     ## NOTE: Space characters in hashed ID references are conforming.
751     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
752     }; # $HTMLUsemapAttrChecker
753    
754     my $HTMLTargetAttrChecker = sub {
755     my ($self, $attr) = @_;
756     my $value = $attr->value;
757     if ($value =~ /^_/) {
758     $value = lc $value; ## ISSUE: ASCII case-insentitive?
759     unless ({
760     _self => 1, _parent => 1, _top => 1,
761     }->{$value}) {
762     $self->{onerror}->(node => $attr,
763     type => 'reserved browsing context name');
764     }
765     } else {
766     #$ ISSUE: An empty string is conforming?
767     }
768     }; # $HTMLTargetAttrChecker
769    
770     my $HTMLAttrChecker = {
771     id => sub {
772     ## NOTE: |map| has its own variant of |id=""| checker
773     my ($self, $attr) = @_;
774     my $value = $attr->value;
775     if (length $value > 0) {
776     if ($self->{id}->{$value}) {
777     $self->{onerror}->(node => $attr, type => 'duplicate ID');
778     push @{$self->{id}->{$value}}, $attr;
779     } else {
780     $self->{id}->{$value} = [$attr];
781     }
782     if ($value =~ /[\x09-\x0D\x20]/) {
783     $self->{onerror}->(node => $attr, type => 'space in ID');
784     }
785     } else {
786     ## NOTE: MUST contain at least one character
787     $self->{onerror}->(node => $attr, type => 'empty attribute value');
788     }
789     },
790     title => sub {}, ## NOTE: No conformance creteria
791     lang => sub {
792     my ($self, $attr) = @_;
793     $self->{onerror}->(node => $attr, level => 'unsupported',
794     type => 'language tag');
795     ## TODO: RFC 3066 or empty test
796     ## ISSUE: RFC 4646 (3066bis)?
797     unless ($attr->owner_document->manakai_is_html) {
798     $self->{onerror}->(node => $attr, type => 'in XML:lang');
799     }
800     },
801     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
802     class => sub {
803     my ($self, $attr) = @_;
804     my %word;
805     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
806     unless ($word{$word}) {
807     $word{$word} = 1;
808     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
809     } else {
810     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
811     }
812     }
813     },
814     contextmenu => sub {
815     my ($self, $attr) = @_;
816     my $value = $attr->value;
817     push @{$self->{contextmenu}}, [$value => $attr];
818     ## ISSUE: "The value must be the ID of a menu element in the DOM."
819     ## What is "in the DOM"? A menu Element node that is not part
820     ## of the Document tree is in the DOM? A menu Element node that
821     ## belong to another Document tree is in the DOM?
822     },
823     irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
824     tabindex => $HTMLIntegerAttrChecker,
825     };
826    
827     for (qw/
828     onabort onbeforeunload onblur onchange onclick oncontextmenu
829     ondblclick ondrag ondragend ondragenter ondragleave ondragover
830     ondragstart ondrop onerror onfocus onkeydown onkeypress
831     onkeyup onload onmessage onmousedown onmousemove onmouseout
832     onmouseover onmouseup onmousewheel onresize onscroll onselect
833     onsubmit onunload
834     /) {
835     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
836     }
837    
838     my $GetHTMLAttrsChecker = sub {
839     my $element_specific_checker = shift;
840     return sub {
841     my ($self, $todo) = @_;
842     for my $attr (@{$todo->{node}->attributes}) {
843     my $attr_ns = $attr->namespace_uri;
844     $attr_ns = '' unless defined $attr_ns;
845     my $attr_ln = $attr->manakai_local_name;
846     my $checker;
847     if ($attr_ns eq '') {
848     $checker = $element_specific_checker->{$attr_ln}
849     || $HTMLAttrChecker->{$attr_ln};
850     }
851     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
852     || $AttrChecker->{$attr_ns}->{''};
853     if ($checker) {
854     $checker->($self, $attr, $todo);
855     } else {
856     $self->{onerror}->(node => $attr, level => 'unsupported',
857     type => 'attribute');
858     ## ISSUE: No comformance createria for unknown attributes in the spec
859     }
860     }
861     };
862     }; # $GetHTMLAttrsChecker
863    
864     our $Element;
865     our $ElementDefault;
866     our $AnyChecker;
867    
868     $Element->{$HTML_NS}->{''} = {
869     attrs_checker => $GetHTMLAttrsChecker->({}),
870     checker => $ElementDefault->{checker},
871     };
872    
873     $Element->{$HTML_NS}->{html} = {
874     is_root => 1,
875     attrs_checker => $GetHTMLAttrsChecker->({
876     xmlns => sub {
877     my ($self, $attr) = @_;
878     my $value = $attr->value;
879     unless ($value eq $HTML_NS) {
880     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
881     }
882     unless ($attr->owner_document->manakai_is_html) {
883     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
884     ## TODO: Test
885     }
886     },
887     }),
888     checker => sub {
889     my ($self, $todo) = @_;
890     my $el = $todo->{node};
891     my $new_todos = [];
892     my @nodes = (@{$el->child_nodes});
893    
894     my $phase = 'before head';
895     while (@nodes) {
896     my $node = shift @nodes;
897     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
898    
899     my $nt = $node->node_type;
900     if ($nt == 1) {
901     my $node_ns = $node->namespace_uri;
902     $node_ns = '' unless defined $node_ns;
903     my $node_ln = $node->manakai_local_name;
904     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
905     if ($phase eq 'before head') {
906     if ($node_ns eq $HTML_NS and $node_ln eq 'head') {
907     $phase = 'after head';
908     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'body') {
909     $self->{onerror}->(node => $node, type => 'ps element missing:head');
910     $phase = 'after body';
911     } else {
912     $not_allowed = 1;
913     # before head
914     }
915     } elsif ($phase eq 'after head') {
916     if ($node_ns eq $HTML_NS and $node_ln eq 'body') {
917     $phase = 'after body';
918     } else {
919     $not_allowed = 1;
920     # after head
921     }
922     } else { #elsif ($phase eq 'after body') {
923     $not_allowed = 1;
924     # after body
925     }
926     $self->{onerror}->(node => $node, type => 'element not allowed')
927     if $not_allowed;
928     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
929     unshift @nodes, @$sib;
930     push @$new_todos, @$ch;
931     } elsif ($nt == 3 or $nt == 4) {
932     if ($node->data =~ /[^\x09-\x0D\x20]/) {
933     $self->{onerror}->(node => $node, type => 'character not allowed');
934     }
935     } elsif ($nt == 5) {
936     unshift @nodes, @{$node->child_nodes};
937     }
938     }
939    
940     if ($phase eq 'before head') {
941     $self->{onerror}->(node => $el, type => 'child element missing:head');
942     $self->{onerror}->(node => $el, type => 'child element missing:body');
943     } elsif ($phase eq 'after head') {
944     $self->{onerror}->(node => $el, type => 'child element missing:body');
945     }
946    
947     return ($new_todos);
948     },
949     };
950    
951     $Element->{$HTML_NS}->{head} = {
952     attrs_checker => $GetHTMLAttrsChecker->({}),
953     checker => sub {
954     my ($self, $todo) = @_;
955     my $el = $todo->{node};
956     my $new_todos = [];
957     my @nodes = (@{$el->child_nodes});
958    
959     my $has_title;
960     my $phase = 'initial'; # 'after charset', 'after base'
961     while (@nodes) {
962     my $node = shift @nodes;
963     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
964    
965     my $nt = $node->node_type;
966     if ($nt == 1) {
967     my $node_ns = $node->namespace_uri;
968     $node_ns = '' unless defined $node_ns;
969     my $node_ln = $node->manakai_local_name;
970     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
971     if ($node_ns eq $HTML_NS and $node_ln eq 'title') {
972     $phase = 'after base';
973     unless ($has_title) {
974     $has_title = 1;
975     } else {
976     $not_allowed = 1;
977     }
978     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'meta') {
979     if ($node->has_attribute_ns (undef, 'charset')) {
980     if ($phase eq 'initial') {
981     $phase = 'after charset';
982     } else {
983     $not_allowed = 1;
984     ## NOTE: See also |base|'s "contexts" field in the spec
985     }
986     } else {
987     $phase = 'after base';
988     }
989     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'base') {
990     if ($phase eq 'initial' or $phase eq 'after charset') {
991     $phase = 'after base';
992     } else {
993     $not_allowed = 1;
994     }
995     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'style') {
996     $phase = 'after base';
997     if ($node->has_attribute_ns (undef, 'scoped')) {
998     $not_allowed = 1;
999     }
1000     } elsif ($HTMLMetadataElements->{$node_ns}->{$node_ln}) {
1001     $phase = 'after base';
1002     } else {
1003     $not_allowed = 1;
1004     }
1005     $self->{onerror}->(node => $node, type => 'element not allowed')
1006     if $not_allowed;
1007 wakaba 1.3 local $todo->{flag}->{in_head} = 1;
1008 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1009     unshift @nodes, @$sib;
1010     push @$new_todos, @$ch;
1011     } elsif ($nt == 3 or $nt == 4) {
1012     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1013     $self->{onerror}->(node => $node, type => 'character not allowed');
1014     }
1015     } elsif ($nt == 5) {
1016     unshift @nodes, @{$node->child_nodes};
1017     }
1018     }
1019     unless ($has_title) {
1020     $self->{onerror}->(node => $el, type => 'child element missing:title');
1021     }
1022     return ($new_todos);
1023     },
1024     };
1025    
1026     $Element->{$HTML_NS}->{title} = {
1027     attrs_checker => $GetHTMLAttrsChecker->({}),
1028     checker => $HTMLTextChecker,
1029     };
1030    
1031 wakaba 1.2 ## TODO: |base| with |href| MUST come before elements with URI attributes.
1032     ## For example: <title xml:base=""/><base href=""/> is non-conformant.
1033     ## TODO: |base| with |target| MUST come before any hyperlink.
1034 wakaba 1.1 $Element->{$HTML_NS}->{base} = {
1035     attrs_checker => $GetHTMLAttrsChecker->({
1036     href => $HTMLURIAttrChecker,
1037     target => $HTMLTargetAttrChecker,
1038     }),
1039     checker => $HTMLEmptyChecker,
1040     };
1041    
1042     $Element->{$HTML_NS}->{link} = {
1043     attrs_checker => sub {
1044     my ($self, $todo) = @_;
1045     $GetHTMLAttrsChecker->({
1046     href => $HTMLURIAttrChecker,
1047     rel => sub { $HTMLLinkTypesAttrChecker->(0, @_) },
1048     media => $HTMLMQAttrChecker,
1049     hreflang => $HTMLLanguageTagAttrChecker,
1050     type => $HTMLIMTAttrChecker,
1051     ## NOTE: Though |title| has special semantics,
1052     ## syntactically same as the |title| as global attribute.
1053     })->($self, $todo);
1054     unless ($todo->{node}->has_attribute_ns (undef, 'href')) {
1055     $self->{onerror}->(node => $todo->{node},
1056     type => 'attribute missing:href');
1057     }
1058     unless ($todo->{node}->has_attribute_ns (undef, 'rel')) {
1059     $self->{onerror}->(node => $todo->{node},
1060     type => 'attribute missing:rel');
1061     }
1062     },
1063     checker => $HTMLEmptyChecker,
1064     };
1065    
1066     $Element->{$HTML_NS}->{meta} = {
1067     attrs_checker => sub {
1068     my ($self, $todo) = @_;
1069     my $name_attr;
1070     my $http_equiv_attr;
1071     my $charset_attr;
1072     my $content_attr;
1073     for my $attr (@{$todo->{node}->attributes}) {
1074     my $attr_ns = $attr->namespace_uri;
1075     $attr_ns = '' unless defined $attr_ns;
1076     my $attr_ln = $attr->manakai_local_name;
1077     my $checker;
1078     if ($attr_ns eq '') {
1079     if ($attr_ln eq 'content') {
1080     $content_attr = $attr;
1081     $checker = 1;
1082     } elsif ($attr_ln eq 'name') {
1083     $name_attr = $attr;
1084     $checker = 1;
1085     } elsif ($attr_ln eq 'http-equiv') {
1086     $http_equiv_attr = $attr;
1087     $checker = 1;
1088     } elsif ($attr_ln eq 'charset') {
1089     $charset_attr = $attr;
1090     $checker = 1;
1091     } else {
1092     $checker = $HTMLAttrChecker->{$attr_ln}
1093     || $AttrChecker->{$attr_ns}->{$attr_ln}
1094     || $AttrChecker->{$attr_ns}->{''};
1095     }
1096     } else {
1097     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1098     || $AttrChecker->{$attr_ns}->{''};
1099     }
1100     if ($checker) {
1101     $checker->($self, $attr) if ref $checker;
1102     } else {
1103     $self->{onerror}->(node => $attr, level => 'unsupported',
1104     type => 'attribute');
1105     ## ISSUE: No comformance createria for unknown attributes in the spec
1106     }
1107     }
1108    
1109     if (defined $name_attr) {
1110     if (defined $http_equiv_attr) {
1111     $self->{onerror}->(node => $http_equiv_attr,
1112     type => 'attribute not allowed');
1113     } elsif (defined $charset_attr) {
1114     $self->{onerror}->(node => $charset_attr,
1115     type => 'attribute not allowed');
1116     }
1117     my $metadata_name = $name_attr->value;
1118     my $metadata_value;
1119     if (defined $content_attr) {
1120     $metadata_value = $content_attr->value;
1121     } else {
1122     $self->{onerror}->(node => $todo->{node},
1123     type => 'attribute missing:content');
1124     $metadata_value = '';
1125     }
1126     } elsif (defined $http_equiv_attr) {
1127     if (defined $charset_attr) {
1128     $self->{onerror}->(node => $charset_attr,
1129     type => 'attribute not allowed');
1130     }
1131     unless (defined $content_attr) {
1132     $self->{onerror}->(node => $todo->{node},
1133     type => 'attribute missing:content');
1134     }
1135     } elsif (defined $charset_attr) {
1136     if (defined $content_attr) {
1137     $self->{onerror}->(node => $content_attr,
1138     type => 'attribute not allowed');
1139     }
1140     } else {
1141     if (defined $content_attr) {
1142     $self->{onerror}->(node => $content_attr,
1143     type => 'attribute not allowed');
1144     $self->{onerror}->(node => $todo->{node},
1145     type => 'attribute missing:name|http-equiv');
1146     } else {
1147     $self->{onerror}->(node => $todo->{node},
1148     type => 'attribute missing:name|http-equiv|charset');
1149     }
1150     }
1151    
1152     ## TODO: metadata conformance
1153    
1154     ## TODO: pragma conformance
1155     if (defined $http_equiv_attr) { ## An enumerated attribute
1156     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1157     if ({
1158     'refresh' => 1,
1159     'default-style' => 1,
1160     }->{$keyword}) {
1161     #
1162     } else {
1163     $self->{onerror}->(node => $http_equiv_attr,
1164     type => 'enumerated:invalid');
1165     }
1166     }
1167    
1168     if (defined $charset_attr) {
1169     unless ($todo->{node}->owner_document->manakai_is_html) {
1170     $self->{onerror}->(node => $charset_attr,
1171     type => 'in XML:charset');
1172     }
1173     ## TODO: charset
1174     }
1175     },
1176     checker => $HTMLEmptyChecker,
1177     };
1178    
1179     $Element->{$HTML_NS}->{style} = {
1180     attrs_checker => $GetHTMLAttrsChecker->({
1181     type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1182     media => $HTMLMQAttrChecker,
1183     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1184     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1185     ## not different
1186     }),
1187     checker => sub {
1188     ## NOTE: |html:style| has no conformance creteria on content model
1189     my ($self, $todo) = @_;
1190     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
1191     $type = 'text/css' unless defined $type;
1192     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
1193     type => 'style:'.$type); ## TODO: $type normalization
1194     return $AnyChecker->($self, $todo);
1195     },
1196     };
1197    
1198     $Element->{$HTML_NS}->{body} = {
1199     attrs_checker => $GetHTMLAttrsChecker->({}),
1200     checker => $HTMLBlockChecker,
1201     };
1202    
1203     $Element->{$HTML_NS}->{section} = {
1204     attrs_checker => $GetHTMLAttrsChecker->({}),
1205     checker => $HTMLStylableBlockChecker,
1206     };
1207    
1208     $Element->{$HTML_NS}->{nav} = {
1209     attrs_checker => $GetHTMLAttrsChecker->({}),
1210     checker => $HTMLBlockOrInlineChecker,
1211     };
1212    
1213     $Element->{$HTML_NS}->{article} = {
1214     attrs_checker => $GetHTMLAttrsChecker->({}),
1215     checker => $HTMLStylableBlockChecker,
1216     };
1217    
1218     $Element->{$HTML_NS}->{blockquote} = {
1219     attrs_checker => $GetHTMLAttrsChecker->({
1220     cite => $HTMLURIAttrChecker,
1221     }),
1222     checker => $HTMLBlockChecker,
1223     };
1224    
1225     $Element->{$HTML_NS}->{aside} = {
1226     attrs_checker => $GetHTMLAttrsChecker->({}),
1227     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
1228     };
1229    
1230     $Element->{$HTML_NS}->{h1} = {
1231     attrs_checker => $GetHTMLAttrsChecker->({}),
1232     checker => sub {
1233     my ($self, $todo) = @_;
1234     $todo->{flag}->{has_heading}->[0] = 1;
1235     return $HTMLSignificantStrictlyInlineChecker->($self, $todo);
1236     },
1237     };
1238    
1239     $Element->{$HTML_NS}->{h2} = {
1240     attrs_checker => $GetHTMLAttrsChecker->({}),
1241     checker => $Element->{$HTML_NS}->{h1}->{checker},
1242     };
1243    
1244     $Element->{$HTML_NS}->{h3} = {
1245     attrs_checker => $GetHTMLAttrsChecker->({}),
1246     checker => $Element->{$HTML_NS}->{h1}->{checker},
1247     };
1248    
1249     $Element->{$HTML_NS}->{h4} = {
1250     attrs_checker => $GetHTMLAttrsChecker->({}),
1251     checker => $Element->{$HTML_NS}->{h1}->{checker},
1252     };
1253    
1254     $Element->{$HTML_NS}->{h5} = {
1255     attrs_checker => $GetHTMLAttrsChecker->({}),
1256     checker => $Element->{$HTML_NS}->{h1}->{checker},
1257     };
1258    
1259     $Element->{$HTML_NS}->{h6} = {
1260     attrs_checker => $GetHTMLAttrsChecker->({}),
1261     checker => $Element->{$HTML_NS}->{h1}->{checker},
1262     };
1263    
1264     $Element->{$HTML_NS}->{header} = {
1265     attrs_checker => $GetHTMLAttrsChecker->({}),
1266     checker => sub {
1267     my ($self, $todo) = @_;
1268     my $old_flag = $todo->{flag}->{has_heading} || [];
1269     my $new_flag = [];
1270     local $todo->{flag}->{has_heading} = $new_flag;
1271     my $node = $todo->{node};
1272    
1273     my $end = $self->_add_minuses
1274     ({$HTML_NS => {qw/header 1 footer 1/}},
1275     $HTMLSectioningElements);
1276     my ($new_todos, $ch) = $HTMLBlockChecker->($self, $todo);
1277     push @$new_todos, $end,
1278     {type => 'code', code => sub {
1279     if ($new_flag->[0]) {
1280     $old_flag->[0] = 1;
1281     } else {
1282     $self->{onerror}->(node => $node, type => 'element missing:hn');
1283     }
1284     }};
1285     return ($new_todos, $ch);
1286     },
1287     };
1288    
1289     $Element->{$HTML_NS}->{footer} = {
1290     attrs_checker => $GetHTMLAttrsChecker->({}),
1291     checker => sub { ## block -hn -header -footer -sectioning or inline
1292     my ($self, $todo) = @_;
1293     my $el = $todo->{node};
1294     my $new_todos = [];
1295     my @nodes = (@{$el->child_nodes});
1296    
1297     my $content = 'block-or-inline'; # or 'block' or 'inline'
1298     my @block_not_inline;
1299     while (@nodes) {
1300     my $node = shift @nodes;
1301     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1302    
1303     my $nt = $node->node_type;
1304     if ($nt == 1) {
1305     my $node_ns = $node->namespace_uri;
1306     $node_ns = '' unless defined $node_ns;
1307     my $node_ln = $node->manakai_local_name;
1308     my $not_allowed;
1309     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
1310     $not_allowed = 1;
1311     } elsif ($node_ns eq $HTML_NS and
1312     {
1313     qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1/
1314     }->{$node_ln}) {
1315     $not_allowed = 1;
1316     } elsif ($HTMLSectioningElements->{$node_ns}->{$node_ln}) {
1317     $not_allowed = 1;
1318     }
1319     if ($content eq 'block') {
1320     $not_allowed = 1
1321     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1322     } elsif ($content eq 'inline') {
1323     $not_allowed = 1
1324     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
1325     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1326     } else {
1327     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1328     my $is_inline
1329     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
1330     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1331    
1332     push @block_not_inline, $node
1333     if $is_block and not $is_inline and not $not_allowed;
1334     unless ($is_block) {
1335     $content = 'inline';
1336     for (@block_not_inline) {
1337     $self->{onerror}->(node => $_, type => 'element not allowed');
1338     }
1339     $not_allowed = 1 unless $is_inline;
1340     }
1341     }
1342     $self->{onerror}->(node => $node, type => 'element not allowed')
1343     if $not_allowed;
1344     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1345     unshift @nodes, @$sib;
1346     push @$new_todos, @$ch;
1347     } elsif ($nt == 3 or $nt == 4) {
1348     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1349     if ($content eq 'block') {
1350     $self->{onerror}->(node => $node, type => 'character not allowed');
1351     } else {
1352     $content = 'inline';
1353     for (@block_not_inline) {
1354     $self->{onerror}->(node => $_, type => 'element not allowed');
1355     }
1356     }
1357     }
1358     } elsif ($nt == 5) {
1359     unshift @nodes, @{$node->child_nodes};
1360     }
1361     }
1362    
1363     my $end = $self->_add_minuses
1364     ({$HTML_NS => {qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1/}},
1365     $HTMLSectioningElements);
1366     push @$new_todos, $end;
1367    
1368     if ($content eq 'inline') {
1369     for (@$new_todos) {
1370     $_->{inline} = 1;
1371     }
1372     }
1373    
1374     return ($new_todos);
1375     },
1376     };
1377    
1378     $Element->{$HTML_NS}->{address} = {
1379     attrs_checker => $GetHTMLAttrsChecker->({}),
1380     checker => $HTMLInlineChecker,
1381     };
1382    
1383     $Element->{$HTML_NS}->{p} = {
1384     attrs_checker => $GetHTMLAttrsChecker->({}),
1385     checker => $HTMLSignificantInlineChecker,
1386     };
1387    
1388     $Element->{$HTML_NS}->{hr} = {
1389     attrs_checker => $GetHTMLAttrsChecker->({}),
1390     checker => $HTMLEmptyChecker,
1391     };
1392    
1393     $Element->{$HTML_NS}->{br} = {
1394     attrs_checker => $GetHTMLAttrsChecker->({}),
1395     checker => $HTMLEmptyChecker,
1396     };
1397    
1398     $Element->{$HTML_NS}->{dialog} = {
1399     attrs_checker => $GetHTMLAttrsChecker->({}),
1400     checker => sub {
1401     my ($self, $todo) = @_;
1402     my $el = $todo->{node};
1403     my $new_todos = [];
1404     my @nodes = (@{$el->child_nodes});
1405    
1406     my $phase = 'before dt';
1407     while (@nodes) {
1408     my $node = shift @nodes;
1409     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1410    
1411     my $nt = $node->node_type;
1412     if ($nt == 1) {
1413     my $node_ns = $node->namespace_uri;
1414     $node_ns = '' unless defined $node_ns;
1415     my $node_ln = $node->manakai_local_name;
1416     ## NOTE: |minuses| list is not checked since redundant
1417     if ($phase eq 'before dt') {
1418     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1419     $phase = 'before dd';
1420     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1421     $self->{onerror}
1422     ->(node => $node, type => 'ps element missing:dt');
1423     $phase = 'before dt';
1424     } else {
1425     $self->{onerror}->(node => $node, type => 'element not allowed');
1426     }
1427     } else { # before dd
1428     if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1429     $phase = 'before dt';
1430     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1431     $self->{onerror}
1432     ->(node => $node, type => 'ps element missing:dd');
1433     $phase = 'before dd';
1434     } else {
1435     $self->{onerror}->(node => $node, type => 'element not allowed');
1436     }
1437     }
1438     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1439     unshift @nodes, @$sib;
1440     push @$new_todos, @$ch;
1441     } elsif ($nt == 3 or $nt == 4) {
1442     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1443     $self->{onerror}->(node => $node, type => 'character not allowed');
1444     }
1445     } elsif ($nt == 5) {
1446     unshift @nodes, @{$node->child_nodes};
1447     }
1448     }
1449     if ($phase eq 'before dd') {
1450     $self->{onerror}->(node => $el, type => 'ps element missing:dd');
1451     }
1452     return ($new_todos);
1453     },
1454     };
1455    
1456     $Element->{$HTML_NS}->{pre} = {
1457     attrs_checker => $GetHTMLAttrsChecker->({}),
1458     checker => $HTMLStrictlyInlineChecker,
1459     };
1460    
1461     $Element->{$HTML_NS}->{ol} = {
1462     attrs_checker => $GetHTMLAttrsChecker->({
1463     start => $HTMLIntegerAttrChecker,
1464     }),
1465     checker => sub {
1466     my ($self, $todo) = @_;
1467     my $el = $todo->{node};
1468     my $new_todos = [];
1469     my @nodes = (@{$el->child_nodes});
1470    
1471     while (@nodes) {
1472     my $node = shift @nodes;
1473     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1474    
1475     my $nt = $node->node_type;
1476     if ($nt == 1) {
1477     my $node_ns = $node->namespace_uri;
1478     $node_ns = '' unless defined $node_ns;
1479     my $node_ln = $node->manakai_local_name;
1480     ## NOTE: |minuses| list is not checked since redundant
1481     unless ($node_ns eq $HTML_NS and $node_ln eq 'li') {
1482     $self->{onerror}->(node => $node, type => 'element not allowed');
1483     }
1484     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1485     unshift @nodes, @$sib;
1486     push @$new_todos, @$ch;
1487     } elsif ($nt == 3 or $nt == 4) {
1488     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1489     $self->{onerror}->(node => $node, type => 'character not allowed');
1490     }
1491     } elsif ($nt == 5) {
1492     unshift @nodes, @{$node->child_nodes};
1493     }
1494     }
1495    
1496     if ($todo->{inline}) {
1497     for (@$new_todos) {
1498     $_->{inline} = 1;
1499     }
1500     }
1501     return ($new_todos);
1502     },
1503     };
1504    
1505     $Element->{$HTML_NS}->{ul} = {
1506     attrs_checker => $GetHTMLAttrsChecker->({}),
1507     checker => $Element->{$HTML_NS}->{ol}->{checker},
1508     };
1509    
1510    
1511     $Element->{$HTML_NS}->{li} = {
1512     attrs_checker => $GetHTMLAttrsChecker->({
1513     start => sub {
1514     my ($self, $attr) = @_;
1515     my $parent = $attr->owner_element->manakai_parent_element;
1516     if (defined $parent) {
1517     my $parent_ns = $parent->namespace_uri;
1518     $parent_ns = '' unless defined $parent_ns;
1519     my $parent_ln = $parent->manakai_local_name;
1520     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1521     $self->{onerror}->(node => $attr, level => 'unsupported',
1522     type => 'attribute');
1523     }
1524     }
1525     $HTMLIntegerAttrChecker->($self, $attr);
1526     },
1527     }),
1528     checker => sub {
1529     my ($self, $todo) = @_;
1530     if ($todo->{inline}) {
1531     return $HTMLInlineChecker->($self, $todo);
1532     } else {
1533     return $HTMLBlockOrInlineChecker->($self, $todo);
1534     }
1535     },
1536     };
1537    
1538     $Element->{$HTML_NS}->{dl} = {
1539     attrs_checker => $GetHTMLAttrsChecker->({}),
1540     checker => sub {
1541     my ($self, $todo) = @_;
1542     my $el = $todo->{node};
1543     my $new_todos = [];
1544     my @nodes = (@{$el->child_nodes});
1545    
1546     my $phase = 'before dt';
1547     while (@nodes) {
1548     my $node = shift @nodes;
1549     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1550    
1551     my $nt = $node->node_type;
1552     if ($nt == 1) {
1553     my $node_ns = $node->namespace_uri;
1554     $node_ns = '' unless defined $node_ns;
1555     my $node_ln = $node->manakai_local_name;
1556     ## NOTE: |minuses| list is not checked since redundant
1557     if ($phase eq 'in dds') {
1558     if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1559     #$phase = 'in dds';
1560     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1561     $phase = 'in dts';
1562     } else {
1563     $self->{onerror}->(node => $node, type => 'element not allowed');
1564     }
1565     } elsif ($phase eq 'in dts') {
1566     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1567     #$phase = 'in dts';
1568     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1569     $phase = 'in dds';
1570     } else {
1571     $self->{onerror}->(node => $node, type => 'element not allowed');
1572     }
1573     } else { # before dt
1574     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1575     $phase = 'in dts';
1576     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1577     $self->{onerror}
1578     ->(node => $node, type => 'ps element missing:dt');
1579     $phase = 'in dds';
1580     } else {
1581     $self->{onerror}->(node => $node, type => 'element not allowed');
1582     }
1583     }
1584     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1585     unshift @nodes, @$sib;
1586     push @$new_todos, @$ch;
1587     } elsif ($nt == 3 or $nt == 4) {
1588     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1589     $self->{onerror}->(node => $node, type => 'character not allowed');
1590     }
1591     } elsif ($nt == 5) {
1592     unshift @nodes, @{$node->child_nodes};
1593     }
1594     }
1595     if ($phase eq 'in dts') {
1596     $self->{onerror}->(node => $el, type => 'ps element missing:dd');
1597     }
1598    
1599     if ($todo->{inline}) {
1600     for (@$new_todos) {
1601     $_->{inline} = 1;
1602     }
1603     }
1604     return ($new_todos);
1605     },
1606     };
1607    
1608     $Element->{$HTML_NS}->{dt} = {
1609     attrs_checker => $GetHTMLAttrsChecker->({}),
1610     checker => $HTMLStrictlyInlineChecker,
1611     };
1612    
1613     $Element->{$HTML_NS}->{dd} = {
1614     attrs_checker => $GetHTMLAttrsChecker->({}),
1615     checker => $Element->{$HTML_NS}->{li}->{checker},
1616     };
1617    
1618     $Element->{$HTML_NS}->{a} = {
1619     attrs_checker => sub {
1620     my ($self, $todo) = @_;
1621     my %attr;
1622     for my $attr (@{$todo->{node}->attributes}) {
1623     my $attr_ns = $attr->namespace_uri;
1624     $attr_ns = '' unless defined $attr_ns;
1625     my $attr_ln = $attr->manakai_local_name;
1626     my $checker;
1627     if ($attr_ns eq '') {
1628     $checker = {
1629     target => $HTMLTargetAttrChecker,
1630     href => $HTMLURIAttrChecker,
1631     ping => $HTMLSpaceURIsAttrChecker,
1632     rel => sub { $HTMLLinkTypesAttrChecker->(1, @_) },
1633     media => $HTMLMQAttrChecker,
1634     hreflang => $HTMLLanguageTagAttrChecker,
1635     type => $HTMLIMTAttrChecker,
1636     }->{$attr_ln};
1637     if ($checker) {
1638     $attr{$attr_ln} = $attr;
1639     } else {
1640     $checker = $HTMLAttrChecker->{$attr_ln};
1641     }
1642     }
1643     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1644     || $AttrChecker->{$attr_ns}->{''};
1645     if ($checker) {
1646     $checker->($self, $attr) if ref $checker;
1647     } else {
1648     $self->{onerror}->(node => $attr, level => 'unsupported',
1649     type => 'attribute');
1650     ## ISSUE: No comformance createria for unknown attributes in the spec
1651     }
1652     }
1653    
1654     unless (defined $attr{href}) {
1655     for (qw/target ping rel media hreflang type/) {
1656     if (defined $attr{$_}) {
1657     $self->{onerror}->(node => $attr{$_},
1658     type => 'attribute not allowed');
1659     }
1660     }
1661     }
1662     },
1663     checker => sub {
1664     my ($self, $todo) = @_;
1665    
1666     my $end = $self->_add_minuses ($HTMLInteractiveElements);
1667     my ($new_todos, $ch)
1668     = $HTMLSignificantInlineOrStrictlyInlineChecker->($self, $todo);
1669     push @$new_todos, $end;
1670    
1671     $_->{flag}->{has_a} = 1 for @$new_todos;
1672    
1673     return ($new_todos, $ch);
1674     },
1675     };
1676    
1677     $Element->{$HTML_NS}->{q} = {
1678     attrs_checker => $GetHTMLAttrsChecker->({
1679     cite => $HTMLURIAttrChecker,
1680     }),
1681     checker => $HTMLInlineOrStrictlyInlineChecker,
1682     };
1683    
1684     $Element->{$HTML_NS}->{cite} = {
1685     attrs_checker => $GetHTMLAttrsChecker->({}),
1686     checker => $HTMLStrictlyInlineChecker,
1687     };
1688    
1689     $Element->{$HTML_NS}->{em} = {
1690     attrs_checker => $GetHTMLAttrsChecker->({}),
1691     checker => $HTMLInlineOrStrictlyInlineChecker,
1692     };
1693    
1694     $Element->{$HTML_NS}->{strong} = {
1695     attrs_checker => $GetHTMLAttrsChecker->({}),
1696     checker => $HTMLInlineOrStrictlyInlineChecker,
1697     };
1698    
1699     $Element->{$HTML_NS}->{small} = {
1700     attrs_checker => $GetHTMLAttrsChecker->({}),
1701     checker => $HTMLInlineOrStrictlyInlineChecker,
1702     };
1703    
1704     $Element->{$HTML_NS}->{m} = {
1705     attrs_checker => $GetHTMLAttrsChecker->({}),
1706     checker => $HTMLInlineOrStrictlyInlineChecker,
1707     };
1708    
1709     $Element->{$HTML_NS}->{dfn} = {
1710     attrs_checker => $GetHTMLAttrsChecker->({}),
1711     checker => sub {
1712     my ($self, $todo) = @_;
1713    
1714     my $end = $self->_add_minuses ({$HTML_NS => {dfn => 1}});
1715     my ($sib, $ch) = $HTMLStrictlyInlineChecker->($self, $todo);
1716     push @$sib, $end;
1717    
1718     my $node = $todo->{node};
1719     my $term = $node->get_attribute_ns (undef, 'title');
1720     unless (defined $term) {
1721     for my $child (@{$node->child_nodes}) {
1722     if ($child->node_type == 1) { # ELEMENT_NODE
1723     if (defined $term) {
1724     undef $term;
1725     last;
1726     } elsif ($child->manakai_local_name eq 'abbr') {
1727     my $nsuri = $child->namespace_uri;
1728     if (defined $nsuri and $nsuri eq $HTML_NS) {
1729     my $attr = $child->get_attribute_node_ns (undef, 'title');
1730     if ($attr) {
1731     $term = $attr->value;
1732     }
1733     }
1734     }
1735     } elsif ($child->node_type == 3 or $child->node_type == 4) {
1736     ## TEXT_NODE or CDATA_SECTION_NODE
1737     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
1738     next;
1739     }
1740     undef $term;
1741     last;
1742     }
1743     }
1744     unless (defined $term) {
1745     $term = $node->text_content;
1746     }
1747     }
1748     if ($self->{term}->{$term}) {
1749     $self->{onerror}->(node => $node, type => 'duplicate term');
1750     push @{$self->{term}->{$term}}, $node;
1751     } else {
1752     $self->{term}->{$term} = [$node];
1753     }
1754     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
1755     ## has |title|.
1756    
1757     return ($sib, $ch);
1758     },
1759     };
1760    
1761     $Element->{$HTML_NS}->{abbr} = {
1762     attrs_checker => $GetHTMLAttrsChecker->({
1763     ## NOTE: |title| has special semantics for |abbr|s, but is syntactically
1764     ## not different. The spec says that the |title| MAY be omitted
1765     ## if there is a |dfn| whose defining term is the abbreviation,
1766     ## but it does not prohibit |abbr| w/o |title| in other cases.
1767     }),
1768     checker => $HTMLStrictlyInlineChecker,
1769     };
1770    
1771     $Element->{$HTML_NS}->{time} = {
1772     attrs_checker => $GetHTMLAttrsChecker->({
1773     datetime => sub { 1 }, # checked in |checker|
1774     }),
1775     ## TODO: Write tests
1776     checker => sub {
1777     my ($self, $todo) = @_;
1778    
1779     my $attr = $todo->{node}->get_attribute_node_ns (undef, 'datetime');
1780     my $input;
1781     my $reg_sp;
1782     my $input_node;
1783     if ($attr) {
1784     $input = $attr->value;
1785     $reg_sp = qr/[\x09-\x0D\x20]*/;
1786     $input_node = $attr;
1787     } else {
1788     $input = $todo->{node}->text_content;
1789     $reg_sp = qr/\p{Zs}*/;
1790     $input_node = $todo->{node};
1791    
1792     ## ISSUE: What is the definition for "successfully extracts a date
1793     ## or time"? If the algorithm says the string is invalid but
1794     ## return some date or time, is it "successfully"?
1795     }
1796    
1797     my $hour;
1798     my $minute;
1799     my $second;
1800     if ($input =~ /
1801     \A
1802     [\x09-\x0D\x20]*
1803     ([0-9]+) # 1
1804     (?>
1805     -([0-9]+) # 2
1806     -([0-9]+) # 3
1807     [\x09-\x0D\x20]*
1808     (?>
1809     T
1810     [\x09-\x0D\x20]*
1811     )?
1812     ([0-9]+) # 4
1813     :([0-9]+) # 5
1814     (?>
1815     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
1816     )?
1817     [\x09-\x0D\x20]*
1818     (?>
1819     Z
1820     [\x09-\x0D\x20]*
1821     |
1822     [+-]([0-9]+):([0-9]+) # 7, 8
1823     [\x09-\x0D\x20]*
1824     )?
1825     \z
1826     |
1827     :([0-9]+) # 9
1828     (?>
1829     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
1830     )?
1831     [\x09-\x0D\x20]*\z
1832     )
1833     /x) {
1834     if (defined $2) { ## YYYY-MM-DD T? hh:mm
1835     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
1836     length $4 != 2 or length $5 != 2) {
1837     $self->{onerror}->(node => $input_node,
1838     type => 'dateortime:syntax error');
1839     }
1840    
1841     if (1 <= $2 and $2 <= 12) {
1842     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1843     if $3 < 1 or
1844     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
1845     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1846     if $2 == 2 and $3 == 29 and
1847     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
1848     } else {
1849     $self->{onerror}->(node => $input_node,
1850     type => 'datetime:bad month');
1851     }
1852    
1853     ($hour, $minute, $second) = ($4, $5, $6);
1854    
1855     if (defined $7) { ## [+-]hh:mm
1856     if (length $7 != 2 or length $8 != 2) {
1857     $self->{onerror}->(node => $input_node,
1858     type => 'dateortime:syntax error');
1859     }
1860    
1861     $self->{onerror}->(node => $input_node,
1862     type => 'datetime:bad timezone hour')
1863     if $7 > 23;
1864     $self->{onerror}->(node => $input_node,
1865     type => 'datetime:bad timezone minute')
1866     if $8 > 59;
1867     }
1868     } else { ## hh:mm
1869     if (length $1 != 2 or length $9 != 2) {
1870     $self->{onerror}->(node => $input_node,
1871     type => qq'dateortime:syntax error');
1872     }
1873    
1874     ($hour, $minute, $second) = ($1, $9, $10);
1875     }
1876    
1877     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
1878     if $hour > 23;
1879     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
1880     if $minute > 59;
1881    
1882     if (defined $second) { ## s
1883     ## NOTE: Integer part of second don't have to have length of two.
1884    
1885     if (substr ($second, 0, 1) eq '.') {
1886     $self->{onerror}->(node => $input_node,
1887     type => 'dateortime:syntax error');
1888     }
1889    
1890     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
1891     if $second >= 60;
1892     }
1893     } else {
1894     $self->{onerror}->(node => $input_node,
1895     type => 'dateortime:syntax error');
1896     }
1897    
1898     return $HTMLStrictlyInlineChecker->($self, $todo);
1899     },
1900     };
1901    
1902     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
1903     attrs_checker => $GetHTMLAttrsChecker->({
1904     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1905     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1906     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1907     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1908     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1909     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1910     }),
1911     checker => $HTMLStrictlyInlineChecker,
1912     };
1913    
1914     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
1915     attrs_checker => $GetHTMLAttrsChecker->({
1916     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
1917     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
1918     }),
1919     checker => $HTMLStrictlyInlineChecker,
1920     };
1921    
1922     $Element->{$HTML_NS}->{code} = {
1923     attrs_checker => $GetHTMLAttrsChecker->({}),
1924     ## NOTE: Though |title| has special semantics,
1925     ## syntatically same as the |title| as global attribute.
1926     checker => $HTMLInlineOrStrictlyInlineChecker,
1927     };
1928    
1929     $Element->{$HTML_NS}->{var} = {
1930     attrs_checker => $GetHTMLAttrsChecker->({}),
1931     ## NOTE: Though |title| has special semantics,
1932     ## syntatically same as the |title| as global attribute.
1933     checker => $HTMLStrictlyInlineChecker,
1934     };
1935    
1936     $Element->{$HTML_NS}->{samp} = {
1937     attrs_checker => $GetHTMLAttrsChecker->({}),
1938     ## NOTE: Though |title| has special semantics,
1939     ## syntatically same as the |title| as global attribute.
1940     checker => $HTMLInlineOrStrictlyInlineChecker,
1941     };
1942    
1943     $Element->{$HTML_NS}->{kbd} = {
1944     attrs_checker => $GetHTMLAttrsChecker->({}),
1945     checker => $HTMLStrictlyInlineChecker,
1946     };
1947    
1948     $Element->{$HTML_NS}->{sub} = {
1949     attrs_checker => $GetHTMLAttrsChecker->({}),
1950     checker => $HTMLStrictlyInlineChecker,
1951     };
1952    
1953     $Element->{$HTML_NS}->{sup} = {
1954     attrs_checker => $GetHTMLAttrsChecker->({}),
1955     checker => $HTMLStrictlyInlineChecker,
1956     };
1957    
1958     $Element->{$HTML_NS}->{span} = {
1959     attrs_checker => $GetHTMLAttrsChecker->({}),
1960     ## NOTE: Though |title| has special semantics,
1961     ## syntatically same as the |title| as global attribute.
1962     checker => $HTMLInlineOrStrictlyInlineChecker,
1963     };
1964    
1965     $Element->{$HTML_NS}->{i} = {
1966     attrs_checker => $GetHTMLAttrsChecker->({}),
1967     ## NOTE: Though |title| has special semantics,
1968     ## syntatically same as the |title| as global attribute.
1969     checker => $HTMLStrictlyInlineChecker,
1970     };
1971    
1972     $Element->{$HTML_NS}->{b} = {
1973     attrs_checker => $GetHTMLAttrsChecker->({}),
1974     checker => $HTMLStrictlyInlineChecker,
1975     };
1976    
1977     $Element->{$HTML_NS}->{bdo} = {
1978     attrs_checker => sub {
1979     my ($self, $todo) = @_;
1980     $GetHTMLAttrsChecker->({})->($self, $todo);
1981     unless ($todo->{node}->has_attribute_ns (undef, 'dir')) {
1982     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:dir');
1983     }
1984     },
1985     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
1986     checker => $HTMLStrictlyInlineChecker,
1987     };
1988    
1989     $Element->{$HTML_NS}->{ins} = {
1990     attrs_checker => $GetHTMLAttrsChecker->({
1991     cite => $HTMLURIAttrChecker,
1992     datetime => $HTMLDatetimeAttrChecker,
1993     }),
1994     checker => $HTMLTransparentChecker,
1995     };
1996    
1997     $Element->{$HTML_NS}->{del} = {
1998     attrs_checker => $GetHTMLAttrsChecker->({
1999     cite => $HTMLURIAttrChecker,
2000     datetime => $HTMLDatetimeAttrChecker,
2001     }),
2002     checker => sub {
2003     my ($self, $todo) = @_;
2004    
2005     my $parent = $todo->{node}->manakai_parent_element;
2006     if (defined $parent) {
2007     my $nsuri = $parent->namespace_uri;
2008     $nsuri = '' unless defined $nsuri;
2009     my $ln = $parent->manakai_local_name;
2010     my $eldef = $Element->{$nsuri}->{$ln} ||
2011     $Element->{$nsuri}->{''} ||
2012     $ElementDefault;
2013     return $eldef->{checker}->($self, $todo);
2014     } else {
2015     return $HTMLBlockOrInlineChecker->($self, $todo);
2016     }
2017     },
2018     };
2019    
2020     ## TODO: figure
2021    
2022     $Element->{$HTML_NS}->{img} = {
2023     attrs_checker => sub {
2024     my ($self, $todo) = @_;
2025     $GetHTMLAttrsChecker->({
2026     alt => sub { }, ## NOTE: No syntactical requirement
2027     src => $HTMLURIAttrChecker,
2028     usemap => $HTMLUsemapAttrChecker,
2029     ismap => sub {
2030     my ($self, $attr, $parent_todo) = @_;
2031     if (not $todo->{flag}->{has_a}) {
2032     $self->{onerror}->(node => $attr, type => 'attribute not allowed');
2033     }
2034     $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_todo);
2035     },
2036     ## TODO: height
2037     ## TODO: width
2038     })->($self, $todo);
2039     unless ($todo->{node}->has_attribute_ns (undef, 'alt')) {
2040     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:alt');
2041     }
2042     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2043     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:src');
2044     }
2045     },
2046     checker => $HTMLEmptyChecker,
2047     };
2048    
2049     $Element->{$HTML_NS}->{iframe} = {
2050     attrs_checker => $GetHTMLAttrsChecker->({
2051     src => $HTMLURIAttrChecker,
2052     }),
2053     checker => $HTMLTextChecker,
2054     };
2055    
2056     $Element->{$HTML_NS}->{embed} = {
2057     attrs_checker => sub {
2058     my ($self, $todo) = @_;
2059     my $has_src;
2060     for my $attr (@{$todo->{node}->attributes}) {
2061     my $attr_ns = $attr->namespace_uri;
2062     $attr_ns = '' unless defined $attr_ns;
2063     my $attr_ln = $attr->manakai_local_name;
2064     my $checker;
2065     if ($attr_ns eq '') {
2066     if ($attr_ln eq 'src') {
2067     $checker = $HTMLURIAttrChecker;
2068     $has_src = 1;
2069     } elsif ($attr_ln eq 'type') {
2070     $checker = $HTMLIMTAttrChecker;
2071     } else {
2072     ## TODO: height
2073     ## TODO: width
2074     $checker = $HTMLAttrChecker->{$attr_ln}
2075     || sub { }; ## NOTE: Any local attribute is ok.
2076     }
2077     }
2078     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2079     || $AttrChecker->{$attr_ns}->{''};
2080     if ($checker) {
2081     $checker->($self, $attr);
2082     } else {
2083     $self->{onerror}->(node => $attr, level => 'unsupported',
2084     type => 'attribute');
2085     ## ISSUE: No comformance createria for global attributes in the spec
2086     }
2087     }
2088    
2089     unless ($has_src) {
2090     $self->{onerror}->(node => $todo->{node},
2091     type => 'attribute missing:src');
2092     }
2093     },
2094     checker => $HTMLEmptyChecker,
2095     };
2096    
2097     $Element->{$HTML_NS}->{object} = {
2098     attrs_checker => sub {
2099     my ($self, $todo) = @_;
2100     $GetHTMLAttrsChecker->({
2101     data => $HTMLURIAttrChecker,
2102     type => $HTMLIMTAttrChecker,
2103     usemap => $HTMLUsemapAttrChecker,
2104     ## TODO: width
2105     ## TODO: height
2106     })->($self, $todo);
2107     unless ($todo->{node}->has_attribute_ns (undef, 'data')) {
2108     unless ($todo->{node}->has_attribute_ns (undef, 'type')) {
2109     $self->{onerror}->(node => $todo->{node},
2110     type => 'attribute missing:data|type');
2111     }
2112     }
2113     },
2114     checker => $ElementDefault->{checker}, ## TODO
2115     };
2116    
2117     $Element->{$HTML_NS}->{param} = {
2118     attrs_checker => sub {
2119     my ($self, $todo) = @_;
2120     $GetHTMLAttrsChecker->({
2121     name => sub { },
2122     value => sub { },
2123     })->($self, $todo);
2124     unless ($todo->{node}->has_attribute_ns (undef, 'name')) {
2125     $self->{onerror}->(node => $todo->{node},
2126     type => 'attribute missing:name');
2127     }
2128     unless ($todo->{node}->has_attribute_ns (undef, 'value')) {
2129     $self->{onerror}->(node => $todo->{node},
2130     type => 'attribute missing:value');
2131     }
2132     },
2133     checker => $HTMLEmptyChecker,
2134     };
2135    
2136     $Element->{$HTML_NS}->{video} = {
2137     attrs_checker => $GetHTMLAttrsChecker->({
2138     src => $HTMLURIAttrChecker,
2139     ## TODO: start, loopstart, loopend, end
2140     ## ISSUE: they MUST be "value time offset"s. Value?
2141     ## ISSUE: loopcount has no conformance creteria
2142     autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2143     controls => $GetHTMLBooleanAttrChecker->('controls'),
2144     }),
2145     checker => sub {
2146     my ($self, $todo) = @_;
2147    
2148     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2149     return $HTMLBlockOrInlineChecker->($self, $todo);
2150     } else {
2151     return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
2152     ->($self, $todo);
2153     }
2154     },
2155     };
2156    
2157     $Element->{$HTML_NS}->{audio} = {
2158     attrs_checker => $Element->{$HTML_NS}->{video}->{attrs_checker},
2159     checker => $Element->{$HTML_NS}->{video}->{checker},
2160     };
2161    
2162     $Element->{$HTML_NS}->{source} = {
2163     attrs_checker => sub {
2164     my ($self, $todo) = @_;
2165     $GetHTMLAttrsChecker->({
2166     src => $HTMLURIAttrChecker,
2167     type => $HTMLIMTAttrChecker,
2168     media => $HTMLMQAttrChecker,
2169     })->($self, $todo);
2170     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2171     $self->{onerror}->(node => $todo->{node},
2172     type => 'attribute missing:src');
2173     }
2174     },
2175     checker => $HTMLEmptyChecker,
2176     };
2177    
2178     $Element->{$HTML_NS}->{canvas} = {
2179     attrs_checker => $GetHTMLAttrsChecker->({
2180     height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2181     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2182     }),
2183     checker => $HTMLInlineChecker,
2184     };
2185    
2186     $Element->{$HTML_NS}->{map} = {
2187     attrs_checker => $GetHTMLAttrsChecker->({
2188     id => sub {
2189     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2190     my ($self, $attr) = @_;
2191     my $value = $attr->value;
2192     if (length $value > 0) {
2193     if ($self->{id}->{$value}) {
2194     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2195     push @{$self->{id}->{$value}}, $attr;
2196     } else {
2197     $self->{id}->{$value} = [$attr];
2198     }
2199     } else {
2200     ## NOTE: MUST contain at least one character
2201     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2202     }
2203     if ($value =~ /[\x09-\x0D\x20]/) {
2204     $self->{onerror}->(node => $attr, type => 'space in ID');
2205     }
2206     $self->{map}->{$value} ||= $attr;
2207     },
2208     }),
2209     checker => $HTMLBlockChecker,
2210 wakaba 1.2 ## TODO: |id| is required.
2211 wakaba 1.1 };
2212    
2213     $Element->{$HTML_NS}->{area} = {
2214     attrs_checker => sub {
2215     my ($self, $todo) = @_;
2216     my %attr;
2217     my $coords;
2218     for my $attr (@{$todo->{node}->attributes}) {
2219     my $attr_ns = $attr->namespace_uri;
2220     $attr_ns = '' unless defined $attr_ns;
2221     my $attr_ln = $attr->manakai_local_name;
2222     my $checker;
2223     if ($attr_ns eq '') {
2224     $checker = {
2225     alt => sub { },
2226     ## NOTE: |alt| value has no conformance creteria.
2227     shape => $GetHTMLEnumeratedAttrChecker->({
2228     circ => -1, circle => 1,
2229     default => 1,
2230     poly => 1, polygon => -1,
2231     rect => 1, rectangle => -1,
2232     }),
2233     coords => sub {
2234     my ($self, $attr) = @_;
2235     my $value = $attr->value;
2236     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
2237     $coords = [split /,/, $value];
2238     } else {
2239     $self->{onerror}->(node => $attr,
2240     type => 'coords:syntax error');
2241     }
2242     },
2243     target => $HTMLTargetAttrChecker,
2244     href => $HTMLURIAttrChecker,
2245     ping => $HTMLSpaceURIsAttrChecker,
2246     rel => sub { $HTMLLinkTypesAttrChecker->(1, @_) },
2247     media => $HTMLMQAttrChecker,
2248     hreflang => $HTMLLanguageTagAttrChecker,
2249     type => $HTMLIMTAttrChecker,
2250     }->{$attr_ln};
2251     if ($checker) {
2252     $attr{$attr_ln} = $attr;
2253     } else {
2254     $checker = $HTMLAttrChecker->{$attr_ln};
2255     }
2256     }
2257     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2258     || $AttrChecker->{$attr_ns}->{''};
2259     if ($checker) {
2260     $checker->($self, $attr) if ref $checker;
2261     } else {
2262     $self->{onerror}->(node => $attr, level => 'unsupported',
2263     type => 'attribute');
2264     ## ISSUE: No comformance createria for unknown attributes in the spec
2265     }
2266     }
2267    
2268     if (defined $attr{href}) {
2269     unless (defined $attr{alt}) {
2270     $self->{onerror}->(node => $todo->{node},
2271     type => 'attribute missing:alt');
2272     }
2273     } else {
2274     for (qw/target ping rel media hreflang type alt/) {
2275     if (defined $attr{$_}) {
2276     $self->{onerror}->(node => $attr{$_},
2277     type => 'attribute not allowed');
2278     }
2279     }
2280     }
2281    
2282     my $shape = 'rectangle';
2283     if (defined $attr{shape}) {
2284     $shape = {
2285     circ => 'circle', circle => 'circle',
2286     default => 'default',
2287     poly => 'polygon', polygon => 'polygon',
2288     rect => 'rectangle', rectangle => 'rectangle',
2289     }->{lc $attr{shape}->value} || 'rectangle';
2290     ## TODO: ASCII lowercase?
2291     }
2292    
2293     if ($shape eq 'circle') {
2294     if (defined $attr{coords}) {
2295     if (defined $coords) {
2296     if (@$coords == 3) {
2297     if ($coords->[2] < 0) {
2298     $self->{onerror}->(node => $attr{coords},
2299     type => 'coords:out of range:2');
2300     }
2301     } else {
2302     $self->{onerror}->(node => $attr{coords},
2303     type => 'coords:number:3:'.@$coords);
2304     }
2305     } else {
2306     ## NOTE: A syntax error has been reported.
2307     }
2308     } else {
2309     $self->{onerror}->(node => $todo->{node},
2310     type => 'attribute missing:coords');
2311     }
2312     } elsif ($shape eq 'default') {
2313     if (defined $attr{coords}) {
2314     $self->{onerror}->(node => $attr{coords},
2315     type => 'attribute not allowed');
2316     }
2317     } elsif ($shape eq 'polygon') {
2318     if (defined $attr{coords}) {
2319     if (defined $coords) {
2320     if (@$coords >= 6) {
2321     unless (@$coords % 2 == 0) {
2322     $self->{onerror}->(node => $attr{coords},
2323     type => 'coords:number:even:'.@$coords);
2324     }
2325     } else {
2326     $self->{onerror}->(node => $attr{coords},
2327     type => 'coords:number:>=6:'.@$coords);
2328     }
2329     } else {
2330     ## NOTE: A syntax error has been reported.
2331     }
2332     } else {
2333     $self->{onerror}->(node => $todo->{node},
2334     type => 'attribute missing:coords');
2335     }
2336     } elsif ($shape eq 'rectangle') {
2337     if (defined $attr{coords}) {
2338     if (defined $coords) {
2339     if (@$coords == 4) {
2340     unless ($coords->[0] < $coords->[2]) {
2341     $self->{onerror}->(node => $attr{coords},
2342     type => 'coords:out of range:0');
2343     }
2344     unless ($coords->[1] < $coords->[3]) {
2345     $self->{onerror}->(node => $attr{coords},
2346     type => 'coords:out of range:1');
2347     }
2348     } else {
2349     $self->{onerror}->(node => $attr{coords},
2350     type => 'coords:number:4:'.@$coords);
2351     }
2352     } else {
2353     ## NOTE: A syntax error has been reported.
2354     }
2355     } else {
2356     $self->{onerror}->(node => $todo->{node},
2357     type => 'attribute missing:coords');
2358     }
2359     }
2360     },
2361     checker => $HTMLEmptyChecker,
2362     };
2363     ## TODO: only in map
2364    
2365     $Element->{$HTML_NS}->{table} = {
2366     attrs_checker => $GetHTMLAttrsChecker->({}),
2367     checker => sub {
2368     my ($self, $todo) = @_;
2369     my $el = $todo->{node};
2370     my $new_todos = [];
2371     my @nodes = (@{$el->child_nodes});
2372    
2373     my $phase = 'before caption';
2374     my $has_tfoot;
2375     while (@nodes) {
2376     my $node = shift @nodes;
2377     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2378    
2379     my $nt = $node->node_type;
2380     if ($nt == 1) {
2381     my $node_ns = $node->namespace_uri;
2382     $node_ns = '' unless defined $node_ns;
2383     my $node_ln = $node->manakai_local_name;
2384     ## NOTE: |minuses| list is not checked since redundant
2385     if ($phase eq 'in tbodys') {
2386     if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2387     #$phase = 'in tbodys';
2388     } elsif (not $has_tfoot and
2389     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2390     $phase = 'after tfoot';
2391     $has_tfoot = 1;
2392     } else {
2393     $self->{onerror}->(node => $node, type => 'element not allowed');
2394     }
2395     } elsif ($phase eq 'in trs') {
2396     if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2397     #$phase = 'in trs';
2398     } elsif (not $has_tfoot and
2399     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2400     $phase = 'after tfoot';
2401     $has_tfoot = 1;
2402     } else {
2403     $self->{onerror}->(node => $node, type => 'element not allowed');
2404     }
2405     } elsif ($phase eq 'after thead') {
2406     if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2407     $phase = 'in tbodys';
2408     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2409     $phase = 'in trs';
2410     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2411     $phase = 'in tbodys';
2412     $has_tfoot = 1;
2413     } else {
2414     $self->{onerror}->(node => $node, type => 'element not allowed');
2415     }
2416     } elsif ($phase eq 'in colgroup') {
2417     if ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2418     $phase = 'in colgroup';
2419     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2420     $phase = 'after thead';
2421     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2422     $phase = 'in tbodys';
2423     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2424     $phase = 'in trs';
2425     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2426     $phase = 'in tbodys';
2427     $has_tfoot = 1;
2428     } else {
2429     $self->{onerror}->(node => $node, type => 'element not allowed');
2430     }
2431     } elsif ($phase eq 'before caption') {
2432     if ($node_ns eq $HTML_NS and $node_ln eq 'caption') {
2433     $phase = 'in colgroup';
2434     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2435     $phase = 'in colgroup';
2436     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2437     $phase = 'after thead';
2438     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2439     $phase = 'in tbodys';
2440     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2441     $phase = 'in trs';
2442     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2443     $phase = 'in tbodys';
2444     $has_tfoot = 1;
2445     } else {
2446     $self->{onerror}->(node => $node, type => 'element not allowed');
2447     }
2448     } else { # after tfoot
2449     $self->{onerror}->(node => $node, type => 'element not allowed');
2450     }
2451     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2452     unshift @nodes, @$sib;
2453     push @$new_todos, @$ch;
2454     } elsif ($nt == 3 or $nt == 4) {
2455     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2456     $self->{onerror}->(node => $node, type => 'character not allowed');
2457     }
2458     } elsif ($nt == 5) {
2459     unshift @nodes, @{$node->child_nodes};
2460     }
2461     }
2462    
2463     ## Table model errors
2464     require Whatpm::HTMLTable;
2465     Whatpm::HTMLTable->form_table ($todo->{node}, sub {
2466     my %opt = @_;
2467     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
2468     });
2469     push @{$self->{return}->{table}}, $todo->{node};
2470    
2471     return ($new_todos);
2472     },
2473     };
2474    
2475     $Element->{$HTML_NS}->{caption} = {
2476     attrs_checker => $GetHTMLAttrsChecker->({}),
2477     checker => $HTMLSignificantStrictlyInlineChecker,
2478     };
2479    
2480     $Element->{$HTML_NS}->{colgroup} = {
2481     attrs_checker => $GetHTMLAttrsChecker->({
2482     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2483     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
2484     ## TODO: "attribute not supported" if |col|.
2485     ## ISSUE: MUST NOT if any |col|?
2486     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
2487     }),
2488     checker => sub {
2489     my ($self, $todo) = @_;
2490     my $el = $todo->{node};
2491     my $new_todos = [];
2492     my @nodes = (@{$el->child_nodes});
2493    
2494     while (@nodes) {
2495     my $node = shift @nodes;
2496     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2497    
2498     my $nt = $node->node_type;
2499     if ($nt == 1) {
2500     my $node_ns = $node->namespace_uri;
2501     $node_ns = '' unless defined $node_ns;
2502     my $node_ln = $node->manakai_local_name;
2503     ## NOTE: |minuses| list is not checked since redundant
2504     unless ($node_ns eq $HTML_NS and $node_ln eq 'col') {
2505     $self->{onerror}->(node => $node, type => 'element not allowed');
2506     }
2507     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2508     unshift @nodes, @$sib;
2509     push @$new_todos, @$ch;
2510     } elsif ($nt == 3 or $nt == 4) {
2511     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2512     $self->{onerror}->(node => $node, type => 'character not allowed');
2513     }
2514     } elsif ($nt == 5) {
2515     unshift @nodes, @{$node->child_nodes};
2516     }
2517     }
2518     return ($new_todos);
2519     },
2520     };
2521    
2522     $Element->{$HTML_NS}->{col} = {
2523     attrs_checker => $GetHTMLAttrsChecker->({
2524     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2525     }),
2526     checker => $HTMLEmptyChecker,
2527     };
2528    
2529     $Element->{$HTML_NS}->{tbody} = {
2530     attrs_checker => $GetHTMLAttrsChecker->({}),
2531     checker => sub {
2532     my ($self, $todo) = @_;
2533     my $el = $todo->{node};
2534     my $new_todos = [];
2535     my @nodes = (@{$el->child_nodes});
2536    
2537     my $has_tr;
2538     while (@nodes) {
2539     my $node = shift @nodes;
2540     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2541    
2542     my $nt = $node->node_type;
2543     if ($nt == 1) {
2544     my $node_ns = $node->namespace_uri;
2545     $node_ns = '' unless defined $node_ns;
2546     my $node_ln = $node->manakai_local_name;
2547     ## NOTE: |minuses| list is not checked since redundant
2548     if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2549     $has_tr = 1;
2550     } else {
2551     $self->{onerror}->(node => $node, type => 'element not allowed');
2552     }
2553     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2554     unshift @nodes, @$sib;
2555     push @$new_todos, @$ch;
2556     } elsif ($nt == 3 or $nt == 4) {
2557     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2558     $self->{onerror}->(node => $node, type => 'character not allowed');
2559     }
2560     } elsif ($nt == 5) {
2561     unshift @nodes, @{$node->child_nodes};
2562     }
2563     }
2564     unless ($has_tr) {
2565     $self->{onerror}->(node => $el, type => 'child element missing:tr');
2566     }
2567     return ($new_todos);
2568     },
2569     };
2570    
2571     $Element->{$HTML_NS}->{thead} = {
2572     attrs_checker => $GetHTMLAttrsChecker->({}),
2573     checker => $Element->{$HTML_NS}->{tbody}->{checker},
2574     };
2575    
2576     $Element->{$HTML_NS}->{tfoot} = {
2577     attrs_checker => $GetHTMLAttrsChecker->({}),
2578     checker => $Element->{$HTML_NS}->{tbody}->{checker},
2579     };
2580    
2581     $Element->{$HTML_NS}->{tr} = {
2582     attrs_checker => $GetHTMLAttrsChecker->({}),
2583     checker => sub {
2584     my ($self, $todo) = @_;
2585     my $el = $todo->{node};
2586     my $new_todos = [];
2587     my @nodes = (@{$el->child_nodes});
2588    
2589     my $has_td;
2590     while (@nodes) {
2591     my $node = shift @nodes;
2592     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2593    
2594     my $nt = $node->node_type;
2595     if ($nt == 1) {
2596     my $node_ns = $node->namespace_uri;
2597     $node_ns = '' unless defined $node_ns;
2598     my $node_ln = $node->manakai_local_name;
2599     ## NOTE: |minuses| list is not checked since redundant
2600     if ($node_ns eq $HTML_NS and ($node_ln eq 'td' or $node_ln eq 'th')) {
2601     $has_td = 1;
2602     } else {
2603     $self->{onerror}->(node => $node, type => 'element not allowed');
2604     }
2605     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2606     unshift @nodes, @$sib;
2607     push @$new_todos, @$ch;
2608     } elsif ($nt == 3 or $nt == 4) {
2609     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2610     $self->{onerror}->(node => $node, type => 'character not allowed');
2611     }
2612     } elsif ($nt == 5) {
2613     unshift @nodes, @{$node->child_nodes};
2614     }
2615     }
2616     unless ($has_td) {
2617     $self->{onerror}->(node => $el, type => 'child element missing:td|th');
2618     }
2619     return ($new_todos);
2620     },
2621     };
2622    
2623     $Element->{$HTML_NS}->{td} = {
2624     attrs_checker => $GetHTMLAttrsChecker->({
2625     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2626     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2627     }),
2628     checker => $HTMLBlockOrInlineChecker,
2629     };
2630    
2631     $Element->{$HTML_NS}->{th} = {
2632     attrs_checker => $GetHTMLAttrsChecker->({
2633     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2634     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2635     scope => $GetHTMLEnumeratedAttrChecker
2636     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
2637     }),
2638     checker => $HTMLBlockOrInlineChecker,
2639     };
2640    
2641     ## TODO: forms
2642    
2643     $Element->{$HTML_NS}->{script} = {
2644     attrs_checker => sub {
2645     my ($self, $todo) = @_;
2646     $GetHTMLAttrsChecker->({
2647     src => $HTMLURIAttrChecker,
2648     defer => $GetHTMLBooleanAttrChecker->('defer'),
2649     async => $GetHTMLBooleanAttrChecker->('async'),
2650     type => $HTMLIMTAttrChecker,
2651     })->($self, $todo);
2652     if ($todo->{node}->has_attribute_ns (undef, 'defer')) {
2653     my $async_attr = $todo->{node}->get_attribute_node_ns (undef, 'async');
2654     if ($async_attr) {
2655     $self->{onerror}->(node => $async_attr,
2656     type => 'attribute not allowed'); # MUST NOT
2657     }
2658     }
2659     },
2660     checker => sub {
2661     my ($self, $todo) = @_;
2662    
2663     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2664     return $HTMLEmptyChecker->($self, $todo);
2665     } else {
2666     ## NOTE: No content model conformance in HTML5 spec.
2667     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
2668     my $language = $todo->{node}->get_attribute_ns (undef, 'language');
2669     if ((defined $type and $type eq '') or
2670     (defined $language and $language eq '')) {
2671     $type = 'text/javascript';
2672     } elsif (defined $type) {
2673     #
2674     } elsif (defined $language) {
2675     $type = 'text/' . $language;
2676     } else {
2677     $type = 'text/javascript';
2678     }
2679     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
2680     type => 'script:'.$type); ## TODO: $type normalization
2681     return $AnyChecker->($self, $todo);
2682     }
2683     },
2684     };
2685    
2686     ## NOTE: When script is disabled.
2687     $Element->{$HTML_NS}->{noscript} = {
2688 wakaba 1.3 attrs_checker => sub {
2689     my ($self, $todo) = @_;
2690    
2691     ## NOTE: This check is inserted in |attrs_checker|, rather than |checker|,
2692     ## since the later is not invoked when the |noscript| is used as a
2693     ## transparent element.
2694     unless ($todo->{node}->owner_document->manakai_is_html) {
2695     $self->{onerror}->(node => $todo->{node}, type => 'in XML:noscript');
2696     }
2697    
2698     $GetHTMLAttrsChecker->({})->($self, $todo);
2699     },
2700 wakaba 1.1 checker => sub {
2701     my ($self, $todo) = @_;
2702    
2703 wakaba 1.3 if ($todo->{flag}->{in_head}) {
2704     my $new_todos = [];
2705     my @nodes = (@{$todo->{node}->child_nodes});
2706    
2707     while (@nodes) {
2708     my $node = shift @nodes;
2709     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2710    
2711     my $nt = $node->node_type;
2712     if ($nt == 1) {
2713     my $node_ns = $node->namespace_uri;
2714     $node_ns = '' unless defined $node_ns;
2715     my $node_ln = $node->manakai_local_name;
2716     if ($node_ns eq $HTML_NS) {
2717     if ({link => 1, style => 1}->{$node_ln}) {
2718     #
2719     } elsif ($node_ln eq 'meta') {
2720     if ($node->has_attribute_ns (undef, 'charset')) {
2721     $self->{onerror}->(node => $node, type => 'element not allowed');
2722     } else {
2723     #
2724     }
2725     } else {
2726     $self->{onerror}->(node => $node, type => 'element not allowed');
2727     }
2728     } else {
2729     $self->{onerror}->(node => $node, type => 'element not allowed');
2730     }
2731    
2732     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2733     unshift @nodes, @$sib;
2734     push @$new_todos, @$ch;
2735     } elsif ($nt == 3 or $nt == 4) {
2736     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2737     $self->{onerror}->(node => $node, type => 'character not allowed');
2738     }
2739     } elsif ($nt == 5) {
2740     unshift @nodes, @{$node->child_nodes};
2741     }
2742     }
2743     return ($new_todos);
2744     } else {
2745     my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}});
2746     my ($sib, $ch) = $HTMLBlockOrInlineChecker->($self, $todo);
2747     push @$sib, $end;
2748     return ($sib, $ch);
2749     }
2750 wakaba 1.1 },
2751     };
2752 wakaba 1.3
2753     ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
2754 wakaba 1.1
2755     $Element->{$HTML_NS}->{'event-source'} = {
2756     attrs_checker => $GetHTMLAttrsChecker->({
2757     src => $HTMLURIAttrChecker,
2758     }),
2759     checker => $HTMLEmptyChecker,
2760     };
2761    
2762     $Element->{$HTML_NS}->{details} = {
2763     attrs_checker => $GetHTMLAttrsChecker->({
2764     open => $GetHTMLBooleanAttrChecker->('open'),
2765     }),
2766     checker => sub {
2767     my ($self, $todo) = @_;
2768    
2769     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2770     my ($sib, $ch)
2771     = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
2772     ->($self, $todo);
2773     push @$sib, $end;
2774     return ($sib, $ch);
2775     },
2776     };
2777    
2778     $Element->{$HTML_NS}->{datagrid} = {
2779     attrs_checker => $GetHTMLAttrsChecker->({
2780     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2781     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
2782     }),
2783     checker => sub {
2784     my ($self, $todo) = @_;
2785     my $el = $todo->{node};
2786     my $new_todos = [];
2787     my @nodes = (@{$el->child_nodes});
2788    
2789     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2790    
2791     ## Block-table Block* | table | select | datalist | Empty
2792     my $mode = 'any';
2793     while (@nodes) {
2794     my $node = shift @nodes;
2795     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2796    
2797     my $nt = $node->node_type;
2798     if ($nt == 1) {
2799     my $node_ns = $node->namespace_uri;
2800     $node_ns = '' unless defined $node_ns;
2801     my $node_ln = $node->manakai_local_name;
2802     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
2803     if ($mode eq 'block') {
2804     $not_allowed = 1
2805     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
2806     } elsif ($mode eq 'any') {
2807     if ($node_ns eq $HTML_NS and
2808     {table => 1, select => 1, datalist => 1}->{$node_ln}) {
2809     $mode = 'none';
2810     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
2811     $mode = 'block';
2812     } else {
2813     $not_allowed = 1;
2814     }
2815     } else {
2816     $not_allowed = 1;
2817     }
2818     $self->{onerror}->(node => $node, type => 'element not allowed')
2819     if $not_allowed;
2820     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2821     unshift @nodes, @$sib;
2822     push @$new_todos, @$ch;
2823     } elsif ($nt == 3 or $nt == 4) {
2824     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2825     $self->{onerror}->(node => $node, type => 'character not allowed');
2826     }
2827     } elsif ($nt == 5) {
2828     unshift @nodes, @{$node->child_nodes};
2829     }
2830     }
2831    
2832     push @$new_todos, $end;
2833     return ($new_todos);
2834     },
2835     };
2836    
2837     $Element->{$HTML_NS}->{command} = {
2838     attrs_checker => $GetHTMLAttrsChecker->({
2839     checked => $GetHTMLBooleanAttrChecker->('checked'),
2840     default => $GetHTMLBooleanAttrChecker->('default'),
2841     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2842     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
2843     icon => $HTMLURIAttrChecker,
2844     label => sub { }, ## NOTE: No conformance creteria
2845     radiogroup => sub { }, ## NOTE: No conformance creteria
2846     ## NOTE: |title| has special semantics, but no syntactical difference
2847     type => sub {
2848     my ($self, $attr) = @_;
2849     my $value = $attr->value;
2850     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
2851     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
2852     }
2853     },
2854     }),
2855     checker => $HTMLEmptyChecker,
2856     };
2857    
2858     $Element->{$HTML_NS}->{menu} = {
2859     attrs_checker => $GetHTMLAttrsChecker->({
2860     autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
2861     id => sub {
2862     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
2863     my ($self, $attr) = @_;
2864     my $value = $attr->value;
2865     if (length $value > 0) {
2866     if ($self->{id}->{$value}) {
2867     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2868     push @{$self->{id}->{$value}}, $attr;
2869     } else {
2870     $self->{id}->{$value} = [$attr];
2871     }
2872     } else {
2873     ## NOTE: MUST contain at least one character
2874     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2875     }
2876     if ($value =~ /[\x09-\x0D\x20]/) {
2877     $self->{onerror}->(node => $attr, type => 'space in ID');
2878     }
2879     $self->{menu}->{$value} ||= $attr;
2880     ## ISSUE: <menu id=""><p contextmenu=""> match?
2881     },
2882     label => sub { }, ## NOTE: No conformance creteria
2883     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
2884     }),
2885     checker => sub {
2886     my ($self, $todo) = @_;
2887     my $el = $todo->{node};
2888     my $new_todos = [];
2889     my @nodes = (@{$el->child_nodes});
2890    
2891     my $content = 'li or inline';
2892     while (@nodes) {
2893     my $node = shift @nodes;
2894     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2895    
2896     my $nt = $node->node_type;
2897     if ($nt == 1) {
2898     my $node_ns = $node->namespace_uri;
2899     $node_ns = '' unless defined $node_ns;
2900     my $node_ln = $node->manakai_local_name;
2901     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
2902     if ($node_ns eq $HTML_NS and $node_ln eq 'li') {
2903     if ($content eq 'inline') {
2904     $not_allowed = 1;
2905     } elsif ($content eq 'li or inline') {
2906     $content = 'li';
2907     }
2908     } else {
2909     if ($HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
2910     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln}) {
2911     $content = 'inline';
2912     } else {
2913     $not_allowed = 1;
2914     }
2915     }
2916     $self->{onerror}->(node => $node, type => 'element not allowed')
2917     if $not_allowed;
2918     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2919     unshift @nodes, @$sib;
2920     push @$new_todos, @$ch;
2921     } elsif ($nt == 3 or $nt == 4) {
2922     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2923     if ($content eq 'li') {
2924     $self->{onerror}->(node => $node, type => 'character not allowed');
2925     } elsif ($content eq 'li or inline') {
2926     $content = 'inline';
2927     }
2928     }
2929     } elsif ($nt == 5) {
2930     unshift @nodes, @{$node->child_nodes};
2931     }
2932     }
2933    
2934     for (@$new_todos) {
2935     $_->{inline} = 1;
2936     }
2937     return ($new_todos);
2938     },
2939     };
2940    
2941     $Element->{$HTML_NS}->{legend} = {
2942     attrs_checker => $GetHTMLAttrsChecker->({}),
2943     checker => sub {
2944     my ($self, $todo) = @_;
2945    
2946     my $parent = $todo->{node}->manakai_parent_element;
2947     if (defined $parent) {
2948     my $nsuri = $parent->namespace_uri;
2949     $nsuri = '' unless defined $nsuri;
2950     my $ln = $parent->manakai_local_name;
2951     if ($nsuri eq $HTML_NS and $ln eq 'figure') {
2952     return $HTMLInlineChecker->($self, $todo);
2953     } else {
2954     return $HTMLSignificantStrictlyInlineChecker->($self, $todo);
2955     }
2956     } else {
2957     return $HTMLInlineChecker->($self, $todo);
2958     }
2959    
2960     ## ISSUE: Content model is defined only for fieldset/legend,
2961     ## details/legend, and figure/legend.
2962     },
2963     };
2964    
2965     $Element->{$HTML_NS}->{div} = {
2966     attrs_checker => $GetHTMLAttrsChecker->({}),
2967     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
2968     };
2969    
2970     $Element->{$HTML_NS}->{font} = {
2971     attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
2972     checker => $HTMLTransparentChecker,
2973     };
2974    
2975     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
2976    
2977     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24