/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (hide annotations) (download)
Sat Aug 25 02:44:38 2007 UTC (17 years, 10 months ago) by wakaba
Branch: MAIN
Changes since 1.4: +8 -3 lines
++ whatpm/t/ChangeLog	25 Aug 2007 02:43:56 -0000
2007-08-25  Wakaba  <wakaba@suika.fam.cx>

	* content-model-1.dat: New tests for |meta| corner cases
	are added.

2007-08-22  Wakaba  <wakaba@suika.fam.cx>

	* tree-test-2.dat: New tests for |html| innerHTML are added.

++ whatpm/Whatpm/ContentChecker/ChangeLog	25 Aug 2007 02:39:52 -0000
2007-08-25  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm (head, noscript): Treatement for invalid |meta|
	elenments are changed as per HTML5 revision 1018.

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7     my $HTMLMetadataElements = {
8     $HTML_NS => {
9     qw/link 1 meta 1 style 1 script 1 event-source 1 command 1 base 1 title 1
10     noscript 1
11     /,
12     },
13     };
14    
15     my $HTMLSectioningElements = {
16     $HTML_NS => {qw/body 1 section 1 nav 1 article 1 blockquote 1 aside 1/},
17     };
18    
19     my $HTMLBlockLevelElements = {
20     $HTML_NS => {
21     qw/
22     section 1 nav 1 article 1 blockquote 1 aside 1
23     h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1
24     address 1 p 1 hr 1 dialog 1 pre 1 ol 1 ul 1 dl 1
25     ins 1 del 1 figure 1 map 1 table 1 script 1 noscript 1
26     event-source 1 details 1 datagrid 1 menu 1 div 1 font 1
27     /,
28     },
29     };
30    
31     my $HTMLStrictlyInlineLevelElements = {
32     $HTML_NS => {
33     qw/
34     br 1 a 1 q 1 cite 1 em 1 strong 1 small 1 m 1 dfn 1 abbr 1
35     time 1 meter 1 progress 1 code 1 var 1 samp 1 kbd 1
36     sub 1 sup 1 span 1 i 1 b 1 bdo 1 ins 1 del 1 img 1
37     iframe 1 embed 1 object 1 video 1 audio 1 canvas 1 area 1
38     script 1 noscript 1 event-source 1 command 1 font 1
39     /,
40     },
41     };
42    
43     my $HTMLStructuredInlineLevelElements = {
44     $HTML_NS => {qw/blockquote 1 pre 1 ol 1 ul 1 dl 1 table 1 menu 1/},
45     };
46    
47     my $HTMLInteractiveElements = {
48     $HTML_NS => {a => 1, details => 1, datagrid => 1},
49     };
50     ## NOTE: |html:a| and |html:datagrid| are not allowed as a descendant
51     ## of interactive elements
52    
53     # my $HTMLTransparentElements : in |Whatpm/ContentChecker.pm|.
54    
55     #my $HTMLSemiTransparentElements = {
56     # $HTML_NS => {qw/video 1 audio 1/},
57     #};
58    
59     my $HTMLEmbededElements = {
60     $HTML_NS => {qw/img 1 iframe 1 embed 1 object 1 video 1 audio 1 canvas 1/},
61     };
62    
63     ## Empty
64     my $HTMLEmptyChecker = sub {
65     my ($self, $todo) = @_;
66     my $el = $todo->{node};
67     my $new_todos = [];
68     my @nodes = (@{$el->child_nodes});
69    
70     while (@nodes) {
71     my $node = shift @nodes;
72     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
73    
74     my $nt = $node->node_type;
75     if ($nt == 1) {
76     ## NOTE: |minuses| list is not checked since redundant
77     $self->{onerror}->(node => $node, type => 'element not allowed');
78     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
79     unshift @nodes, @$sib;
80     push @$new_todos, @$ch;
81     } elsif ($nt == 3 or $nt == 4) {
82     if ($node->data =~ /[^\x09-\x0D\x20]/) {
83     $self->{onerror}->(node => $node, type => 'character not allowed');
84     }
85     } elsif ($nt == 5) {
86     unshift @nodes, @{$node->child_nodes};
87     }
88     }
89     return ($new_todos);
90     };
91    
92     ## Text
93     my $HTMLTextChecker = sub {
94     my ($self, $todo) = @_;
95     my $el = $todo->{node};
96     my $new_todos = [];
97     my @nodes = (@{$el->child_nodes});
98    
99     while (@nodes) {
100     my $node = shift @nodes;
101     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
102    
103     my $nt = $node->node_type;
104     if ($nt == 1) {
105     ## NOTE: |minuses| list is not checked since redundant
106     $self->{onerror}->(node => $node, type => 'element not allowed');
107     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
108     unshift @nodes, @$sib;
109     push @$new_todos, @$ch;
110     } elsif ($nt == 5) {
111     unshift @nodes, @{$node->child_nodes};
112     }
113     }
114     return ($new_todos);
115     };
116    
117     ## Zero or more |html:style| elements,
118     ## followed by zero or more block-level elements
119     my $HTMLStylableBlockChecker = sub {
120     my ($self, $todo) = @_;
121     my $el = $todo->{node};
122     my $new_todos = [];
123     my @nodes = (@{$el->child_nodes});
124    
125     my $has_non_style;
126     while (@nodes) {
127     my $node = shift @nodes;
128     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
129    
130     my $nt = $node->node_type;
131     if ($nt == 1) {
132     my $node_ns = $node->namespace_uri;
133     $node_ns = '' unless defined $node_ns;
134     my $node_ln = $node->manakai_local_name;
135     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
136     if ($node_ns eq $HTML_NS and $node_ln eq 'style') {
137     $not_allowed = 1 if $has_non_style or
138     not $node->has_attribute_ns (undef, 'scoped');
139     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
140     $has_non_style = 1;
141     } else {
142     $has_non_style = 1;
143     $not_allowed = 1;
144     }
145     $self->{onerror}->(node => $node, type => 'element not allowed')
146     if $not_allowed;
147     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
148     unshift @nodes, @$sib;
149     push @$new_todos, @$ch;
150     } elsif ($nt == 3 or $nt == 4) {
151     if ($node->data =~ /[^\x09-\x0D\x20]/) {
152     $self->{onerror}->(node => $node, type => 'character not allowed');
153     }
154     } elsif ($nt == 5) {
155     unshift @nodes, @{$node->child_nodes};
156     }
157     }
158     return ($new_todos);
159     }; # $HTMLStylableBlockChecker
160    
161     ## Zero or more block-level elements
162     my $HTMLBlockChecker = sub {
163     my ($self, $todo) = @_;
164     my $el = $todo->{node};
165     my $new_todos = [];
166     my @nodes = (@{$el->child_nodes});
167    
168     while (@nodes) {
169     my $node = shift @nodes;
170     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
171    
172     my $nt = $node->node_type;
173     if ($nt == 1) {
174     my $node_ns = $node->namespace_uri;
175     $node_ns = '' unless defined $node_ns;
176     my $node_ln = $node->manakai_local_name;
177     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
178     $not_allowed = 1
179     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
180     $self->{onerror}->(node => $node, type => 'element not allowed')
181     if $not_allowed;
182     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
183     unshift @nodes, @$sib;
184     push @$new_todos, @$ch;
185     } elsif ($nt == 3 or $nt == 4) {
186     if ($node->data =~ /[^\x09-\x0D\x20]/) {
187     $self->{onerror}->(node => $node, type => 'character not allowed');
188     }
189     } elsif ($nt == 5) {
190     unshift @nodes, @{$node->child_nodes};
191     }
192     }
193     return ($new_todos);
194     }; # $HTMLBlockChecker
195    
196     ## Inline-level content
197     my $HTMLInlineChecker = sub {
198     my ($self, $todo) = @_;
199     my $el = $todo->{node};
200     my $new_todos = [];
201     my @nodes = (@{$el->child_nodes});
202    
203     while (@nodes) {
204     my $node = shift @nodes;
205     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
206    
207     my $nt = $node->node_type;
208     if ($nt == 1) {
209     my $node_ns = $node->namespace_uri;
210     $node_ns = '' unless defined $node_ns;
211     my $node_ln = $node->manakai_local_name;
212     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
213     $not_allowed = 1
214     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
215     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
216     $self->{onerror}->(node => $node, type => 'element not allowed')
217     if $not_allowed;
218     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
219     unshift @nodes, @$sib;
220     push @$new_todos, @$ch;
221     } elsif ($nt == 5) {
222     unshift @nodes, @{$node->child_nodes};
223     }
224     }
225    
226     for (@$new_todos) {
227     $_->{inline} = 1;
228     }
229     return ($new_todos);
230     }; # $HTMLInlineChecker
231    
232     my $HTMLSignificantInlineChecker = $HTMLInlineChecker;
233     ## TODO: check significant content
234    
235     ## Strictly inline-level content
236     my $HTMLStrictlyInlineChecker = sub {
237     my ($self, $todo) = @_;
238     my $el = $todo->{node};
239     my $new_todos = [];
240     my @nodes = (@{$el->child_nodes});
241    
242     while (@nodes) {
243     my $node = shift @nodes;
244     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
245    
246     my $nt = $node->node_type;
247     if ($nt == 1) {
248     my $node_ns = $node->namespace_uri;
249     $node_ns = '' unless defined $node_ns;
250     my $node_ln = $node->manakai_local_name;
251     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
252     $not_allowed = 1
253     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln};
254     $self->{onerror}->(node => $node, type => 'element not allowed')
255     if $not_allowed;
256     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
257     unshift @nodes, @$sib;
258     push @$new_todos, @$ch;
259     } elsif ($nt == 5) {
260     unshift @nodes, @{$node->child_nodes};
261     }
262     }
263    
264     for (@$new_todos) {
265     $_->{inline} = 1;
266     $_->{strictly_inline} = 1;
267     }
268     return ($new_todos);
269     }; # $HTMLStrictlyInlineChecker
270    
271     my $HTMLSignificantStrictlyInlineChecker = $HTMLStrictlyInlineChecker;
272     ## TODO: check significant content
273    
274     ## Inline-level or strictly inline-kevek content
275     my $HTMLInlineOrStrictlyInlineChecker = sub {
276     my ($self, $todo) = @_;
277     my $el = $todo->{node};
278     my $new_todos = [];
279     my @nodes = (@{$el->child_nodes});
280    
281     while (@nodes) {
282     my $node = shift @nodes;
283     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
284    
285     my $nt = $node->node_type;
286     if ($nt == 1) {
287     my $node_ns = $node->namespace_uri;
288     $node_ns = '' unless defined $node_ns;
289     my $node_ln = $node->manakai_local_name;
290     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
291     if ($todo->{strictly_inline}) {
292     $not_allowed = 1
293     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln};
294     } else {
295     $not_allowed = 1
296     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
297     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
298     }
299     $self->{onerror}->(node => $node, type => 'element not allowed')
300     if $not_allowed;
301     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
302     unshift @nodes, @$sib;
303     push @$new_todos, @$ch;
304     } elsif ($nt == 5) {
305     unshift @nodes, @{$node->child_nodes};
306     }
307     }
308    
309     for (@$new_todos) {
310     $_->{inline} = 1;
311     $_->{strictly_inline} = 1;
312     }
313     return ($new_todos);
314     }; # $HTMLInlineOrStrictlyInlineChecker
315    
316     my $HTMLSignificantInlineOrStrictlyInlineChecker
317     = $HTMLInlineOrStrictlyInlineChecker;
318     ## TODO: check significant content
319    
320     my $HTMLBlockOrInlineChecker = sub {
321     my ($self, $todo) = @_;
322     my $el = $todo->{node};
323     my $new_todos = [];
324     my @nodes = (@{$el->child_nodes});
325    
326     my $content = 'block-or-inline'; # or 'block' or 'inline'
327     my @block_not_inline;
328     while (@nodes) {
329     my $node = shift @nodes;
330     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
331    
332     my $nt = $node->node_type;
333     if ($nt == 1) {
334     my $node_ns = $node->namespace_uri;
335     $node_ns = '' unless defined $node_ns;
336     my $node_ln = $node->manakai_local_name;
337     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
338     if ($content eq 'block') {
339     $not_allowed = 1
340     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
341     } elsif ($content eq 'inline') {
342     $not_allowed = 1
343     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
344     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
345     } else {
346     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
347     my $is_inline
348     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
349     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
350    
351     push @block_not_inline, $node
352     if $is_block and not $is_inline and not $not_allowed;
353     unless ($is_block) {
354     $content = 'inline';
355     for (@block_not_inline) {
356     $self->{onerror}->(node => $_, type => 'element not allowed');
357     }
358     $not_allowed = 1 unless $is_inline;
359     }
360     }
361     $self->{onerror}->(node => $node, type => 'element not allowed')
362     if $not_allowed;
363     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
364     unshift @nodes, @$sib;
365     push @$new_todos, @$ch;
366     } elsif ($nt == 3 or $nt == 4) {
367     if ($node->data =~ /[^\x09-\x0D\x20]/) {
368     if ($content eq 'block') {
369     $self->{onerror}->(node => $node, type => 'character not allowed');
370     } else {
371     $content = 'inline';
372     for (@block_not_inline) {
373     $self->{onerror}->(node => $_, type => 'element not allowed');
374     }
375     }
376     }
377     } elsif ($nt == 5) {
378     unshift @nodes, @{$node->child_nodes};
379     }
380     }
381    
382     if ($content eq 'inline') {
383     for (@$new_todos) {
384     $_->{inline} = 1;
385     }
386     }
387     return ($new_todos);
388     };
389    
390     ## Zero or more XXX element, then either block-level or inline-level
391     my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) {
392     my ($elnsuri, $ellname) = @_;
393     return sub {
394     my ($self, $todo) = @_;
395     my $el = $todo->{node};
396     my $new_todos = [];
397     my @nodes = (@{$el->child_nodes});
398    
399     my $has_non_style;
400     my $content = 'block-or-inline'; # or 'block' or 'inline'
401     my @block_not_inline;
402     while (@nodes) {
403     my $node = shift @nodes;
404     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
405    
406     my $nt = $node->node_type;
407     if ($nt == 1) {
408     my $node_ns = $node->namespace_uri;
409     $node_ns = '' unless defined $node_ns;
410     my $node_ln = $node->manakai_local_name;
411     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
412     if ($node_ns eq $elnsuri and $node_ln eq $ellname) {
413     $not_allowed = 1 if $has_non_style;
414     if ($ellname eq 'style' and
415     not $node->has_attribute_ns (undef, 'scoped')) {
416     $not_allowed = 1;
417     }
418     } elsif ($content eq 'block') {
419     $has_non_style = 1;
420     $not_allowed = 1
421     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
422     } elsif ($content eq 'inline') {
423     $has_non_style = 1;
424     $not_allowed = 1
425     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
426     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
427     } else {
428     $has_non_style = 1;
429     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
430     my $is_inline
431     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
432     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
433    
434     push @block_not_inline, $node
435     if $is_block and not $is_inline and not $not_allowed;
436     unless ($is_block) {
437     $content = 'inline';
438     for (@block_not_inline) {
439     $self->{onerror}->(node => $_, type => 'element not allowed');
440     }
441     $not_allowed = 1 unless $is_inline;
442     }
443     }
444     $self->{onerror}->(node => $node, type => 'element not allowed')
445     if $not_allowed;
446     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
447     unshift @nodes, @$sib;
448     push @$new_todos, @$ch;
449     } elsif ($nt == 3 or $nt == 4) {
450     if ($node->data =~ /[^\x09-\x0D\x20]/) {
451     $has_non_style = 1;
452     if ($content eq 'block') {
453     $self->{onerror}->(node => $node, type => 'character not allowed');
454     } else {
455     $content = 'inline';
456     for (@block_not_inline) {
457     $self->{onerror}->(node => $_, type => 'element not allowed');
458     }
459     }
460     }
461     } elsif ($nt == 5) {
462     unshift @nodes, @{$node->child_nodes};
463     }
464     }
465    
466     if ($content eq 'inline') {
467     for (@$new_todos) {
468     $_->{inline} = 1;
469     }
470     }
471     return ($new_todos);
472     };
473     }; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker
474    
475     my $HTMLTransparentChecker = $HTMLBlockOrInlineChecker;
476    
477     our $AttrChecker;
478    
479     my $GetHTMLEnumeratedAttrChecker = sub {
480     my $states = shift; # {value => conforming ? 1 : -1}
481     return sub {
482     my ($self, $attr) = @_;
483     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
484     if ($states->{$value} > 0) {
485     #
486     } elsif ($states->{$value}) {
487     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
488     } else {
489     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
490     }
491     };
492     }; # $GetHTMLEnumeratedAttrChecker
493    
494     my $GetHTMLBooleanAttrChecker = sub {
495     my $local_name = shift;
496     return sub {
497     my ($self, $attr) = @_;
498     my $value = $attr->value;
499     unless ($value eq $local_name or $value eq '') {
500     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
501     }
502     };
503     }; # $GetHTMLBooleanAttrChecker
504    
505     ## |rel| attribute (unordered set of space separated tokens,
506     ## whose allowed values are defined by the section on link types)
507     my $HTMLLinkTypesAttrChecker = sub {
508 wakaba 1.4 my ($a_or_area, $todo, $self, $attr) = @_;
509 wakaba 1.1 my %word;
510     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
511     unless ($word{$word}) {
512     $word{$word} = 1;
513     } else {
514     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
515     }
516     }
517     ## NOTE: Case sensitive match (since HTML5 spec does not say link
518     ## types are case-insensitive and it says "The value should not
519     ## be confusingly similar to any other defined value (e.g.
520     ## differing only in case).").
521     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
522     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
523     ## values to be used conformingly.
524     require Whatpm::_LinkTypeList;
525     our $LinkType;
526     for my $word (keys %word) {
527     my $def = $LinkType->{$word};
528     if (defined $def) {
529     if ($def->{status} eq 'accepted') {
530     if (defined $def->{effect}->[$a_or_area]) {
531     #
532     } else {
533     $self->{onerror}->(node => $attr,
534     type => 'link type:bad context:'.$word);
535     }
536     } elsif ($def->{status} eq 'proposal') {
537     $self->{onerror}->(node => $attr, level => 's',
538     type => 'link type:proposed:'.$word);
539     } else { # rejected or synonym
540     $self->{onerror}->(node => $attr,
541     type => 'link type:non-conforming:'.$word);
542     }
543 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
544     if ($word eq 'alternate') {
545     #
546     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
547     $todo->{has_hyperlink_link_type} = 1;
548     }
549     }
550 wakaba 1.1 if ($def->{unique}) {
551     unless ($self->{has_link_type}->{$word}) {
552     $self->{has_link_type}->{$word} = 1;
553     } else {
554     $self->{onerror}->(node => $attr,
555     type => 'link type:duplicate:'.$word);
556     }
557     }
558     } else {
559     $self->{onerror}->(node => $attr, level => 'unsupported',
560     type => 'link type:'.$word);
561     }
562     }
563 wakaba 1.4 $todo->{has_hyperlink_link_type} = 1
564     if $word{alternate} and not $word{stylesheet};
565 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
566     ## says that using both X-Pingback: header field and HTML
567     ## <link rel=pingback> is deprecated and if both appears they
568     ## SHOULD contain exactly the same value.
569     ## ISSUE: Pingback 1.0 specification defines the exact representation
570     ## of its link element, which cannot be tested by the current arch.
571     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
572     ## include any string that matches to the pattern for the rel=pingback link,
573     ## which again inpossible to test.
574     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
575     }; # $HTMLLinkTypesAttrChecker
576    
577     ## URI (or IRI)
578     my $HTMLURIAttrChecker = sub {
579     my ($self, $attr) = @_;
580     ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
581     my $value = $attr->value;
582     Whatpm::URIChecker->check_iri_reference ($value, sub {
583     my %opt = @_;
584     $self->{onerror}->(node => $attr, level => $opt{level},
585     type => 'URI::'.$opt{type}.
586     (defined $opt{position} ? ':'.$opt{position} : ''));
587     });
588 wakaba 1.4 $self->{has_uri_attr} = 1;
589 wakaba 1.1 }; # $HTMLURIAttrChecker
590    
591     ## A space separated list of one or more URIs (or IRIs)
592     my $HTMLSpaceURIsAttrChecker = sub {
593     my ($self, $attr) = @_;
594     my $i = 0;
595     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
596     Whatpm::URIChecker->check_iri_reference ($value, sub {
597     my %opt = @_;
598     $self->{onerror}->(node => $attr, level => $opt{level},
599 wakaba 1.2 type => 'URIs:'.':'.
600     $opt{type}.':'.$i.
601 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
602     });
603     $i++;
604     }
605     ## ISSUE: Relative references?
606     ## ISSUE: Leading or trailing white spaces are conformant?
607     ## ISSUE: A sequence of white space characters are conformant?
608     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
609     ## NOTE: Duplication seems not an error.
610 wakaba 1.4 $self->{has_uri_attr} = 1;
611 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
612    
613     my $HTMLDatetimeAttrChecker = sub {
614     my ($self, $attr) = @_;
615     my $value = $attr->value;
616     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
617     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
618     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
619     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
620     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
621     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
622     if $d < 1 or
623     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
624     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
625     if $M == 2 and $d == 29 and
626     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
627     } else {
628     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
629     }
630     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
631     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
632     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
633     if defined $s and $s > 59;
634     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
635     if $zh > 23;
636     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
637     if $zm > 59;
638     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
639     } else {
640     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
641     }
642     }; # $HTMLDatetimeAttrChecker
643    
644     my $HTMLIntegerAttrChecker = sub {
645     my ($self, $attr) = @_;
646     my $value = $attr->value;
647     unless ($value =~ /\A-?[0-9]+\z/) {
648     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
649     }
650     }; # $HTMLIntegerAttrChecker
651    
652     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
653     my $range_check = shift;
654     return sub {
655     my ($self, $attr) = @_;
656     my $value = $attr->value;
657     if ($value =~ /\A[0-9]+\z/) {
658     unless ($range_check->($value + 0)) {
659     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
660     }
661     } else {
662     $self->{onerror}->(node => $attr,
663     type => 'nninteger:syntax error');
664     }
665     };
666     }; # $GetHTMLNonNegativeIntegerAttrChecker
667    
668     my $GetHTMLFloatingPointNumberAttrChecker = sub {
669     my $range_check = shift;
670     return sub {
671     my ($self, $attr) = @_;
672     my $value = $attr->value;
673     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
674     unless ($range_check->($value + 0)) {
675     $self->{onerror}->(node => $attr, type => 'float:out of range');
676     }
677     } else {
678     $self->{onerror}->(node => $attr,
679     type => 'float:syntax error');
680     }
681     };
682     }; # $GetHTMLFloatingPointNumberAttrChecker
683    
684     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
685     ## ISSUE: RFC 2046 does not define syntax of media types.
686     ## ISSUE: The definition of "a valid MIME type" is unknown.
687     ## Syntactical correctness?
688     my $HTMLIMTAttrChecker = sub {
689     my ($self, $attr) = @_;
690     my $value = $attr->value;
691     ## ISSUE: RFC 2045 Content-Type header field allows insertion
692     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
693     ## ISSUE: RFC 2231 extension? Maybe no.
694     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
695     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
696     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
697     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
698     my @type = ($1, $2);
699     my $param = $3;
700     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
701     if (defined $2) {
702     push @type, $1 => $2;
703     } else {
704     my $n = $1;
705     my $v = $2;
706     $v =~ s/\\(.)/$1/gs;
707     push @type, $n => $v;
708     }
709     }
710     require Whatpm::IMTChecker;
711     Whatpm::IMTChecker->check_imt (sub {
712     my %opt = @_;
713     $self->{onerror}->(node => $attr, level => $opt{level},
714     type => 'IMT:'.$opt{type});
715     }, @type);
716     } else {
717     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
718     }
719     }; # $HTMLIMTAttrChecker
720    
721     my $HTMLLanguageTagAttrChecker = sub {
722     my ($self, $attr) = @_;
723     $self->{onerror}->(node => $attr, level => 'unsupported',
724     type => 'language tag');
725     if ($attr->value eq '') {
726     $self->{onerror}->(node => $attr, type => 'language tag:syntax error');
727     }
728     ## TODO: RFC 3066 test
729     ## ISSUE: RFC 4646 (3066bis)?
730     }; # $HTMLLanguageTagAttrChecker
731    
732     ## "A valid media query [MQ]"
733     my $HTMLMQAttrChecker = sub {
734     my ($self, $attr) = @_;
735     $self->{onerror}->(node => $attr, level => 'unsupported',
736     type => 'media query');
737     ## ISSUE: What is "a valid media query"?
738     }; # $HTMLMQAttrChecker
739    
740     my $HTMLEventHandlerAttrChecker = sub {
741     my ($self, $attr) = @_;
742     $self->{onerror}->(node => $attr, level => 'unsupported',
743     type => 'event handler');
744     ## TODO: MUST contain valid ECMAScript code matching the
745     ## ECMAScript |FunctionBody| production. [ECMA262]
746     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
747     ## ISSUE: Automatic semicolon insertion does not apply?
748     ## ISSUE: Other script languages?
749     }; # $HTMLEventHandlerAttrChecker
750    
751     my $HTMLUsemapAttrChecker = sub {
752     my ($self, $attr) = @_;
753     ## MUST be a valid hashed ID reference to a |map| element
754     my $value = $attr->value;
755     if ($value =~ s/^#//) {
756     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
757     push @{$self->{usemap}}, [$value => $attr];
758     } else {
759     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
760     }
761     ## NOTE: Space characters in hashed ID references are conforming.
762     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
763     }; # $HTMLUsemapAttrChecker
764    
765     my $HTMLTargetAttrChecker = sub {
766     my ($self, $attr) = @_;
767     my $value = $attr->value;
768     if ($value =~ /^_/) {
769     $value = lc $value; ## ISSUE: ASCII case-insentitive?
770     unless ({
771     _self => 1, _parent => 1, _top => 1,
772     }->{$value}) {
773     $self->{onerror}->(node => $attr,
774     type => 'reserved browsing context name');
775     }
776     } else {
777     #$ ISSUE: An empty string is conforming?
778     }
779     }; # $HTMLTargetAttrChecker
780    
781     my $HTMLAttrChecker = {
782     id => sub {
783     ## NOTE: |map| has its own variant of |id=""| checker
784     my ($self, $attr) = @_;
785     my $value = $attr->value;
786     if (length $value > 0) {
787     if ($self->{id}->{$value}) {
788     $self->{onerror}->(node => $attr, type => 'duplicate ID');
789     push @{$self->{id}->{$value}}, $attr;
790     } else {
791     $self->{id}->{$value} = [$attr];
792     }
793     if ($value =~ /[\x09-\x0D\x20]/) {
794     $self->{onerror}->(node => $attr, type => 'space in ID');
795     }
796     } else {
797     ## NOTE: MUST contain at least one character
798     $self->{onerror}->(node => $attr, type => 'empty attribute value');
799     }
800     },
801     title => sub {}, ## NOTE: No conformance creteria
802     lang => sub {
803     my ($self, $attr) = @_;
804     $self->{onerror}->(node => $attr, level => 'unsupported',
805     type => 'language tag');
806     ## TODO: RFC 3066 or empty test
807     ## ISSUE: RFC 4646 (3066bis)?
808     unless ($attr->owner_document->manakai_is_html) {
809     $self->{onerror}->(node => $attr, type => 'in XML:lang');
810     }
811     },
812     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
813     class => sub {
814     my ($self, $attr) = @_;
815     my %word;
816     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
817     unless ($word{$word}) {
818     $word{$word} = 1;
819     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
820     } else {
821     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
822     }
823     }
824     },
825     contextmenu => sub {
826     my ($self, $attr) = @_;
827     my $value = $attr->value;
828     push @{$self->{contextmenu}}, [$value => $attr];
829     ## ISSUE: "The value must be the ID of a menu element in the DOM."
830     ## What is "in the DOM"? A menu Element node that is not part
831     ## of the Document tree is in the DOM? A menu Element node that
832     ## belong to another Document tree is in the DOM?
833     },
834     irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
835     tabindex => $HTMLIntegerAttrChecker,
836     };
837    
838     for (qw/
839     onabort onbeforeunload onblur onchange onclick oncontextmenu
840     ondblclick ondrag ondragend ondragenter ondragleave ondragover
841     ondragstart ondrop onerror onfocus onkeydown onkeypress
842     onkeyup onload onmessage onmousedown onmousemove onmouseout
843     onmouseover onmouseup onmousewheel onresize onscroll onselect
844     onsubmit onunload
845     /) {
846     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
847     }
848    
849     my $GetHTMLAttrsChecker = sub {
850     my $element_specific_checker = shift;
851     return sub {
852     my ($self, $todo) = @_;
853     for my $attr (@{$todo->{node}->attributes}) {
854     my $attr_ns = $attr->namespace_uri;
855     $attr_ns = '' unless defined $attr_ns;
856     my $attr_ln = $attr->manakai_local_name;
857     my $checker;
858     if ($attr_ns eq '') {
859     $checker = $element_specific_checker->{$attr_ln}
860     || $HTMLAttrChecker->{$attr_ln};
861     }
862     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
863     || $AttrChecker->{$attr_ns}->{''};
864     if ($checker) {
865     $checker->($self, $attr, $todo);
866     } else {
867     $self->{onerror}->(node => $attr, level => 'unsupported',
868     type => 'attribute');
869     ## ISSUE: No comformance createria for unknown attributes in the spec
870     }
871     }
872     };
873     }; # $GetHTMLAttrsChecker
874    
875     our $Element;
876     our $ElementDefault;
877     our $AnyChecker;
878    
879     $Element->{$HTML_NS}->{''} = {
880     attrs_checker => $GetHTMLAttrsChecker->({}),
881     checker => $ElementDefault->{checker},
882     };
883    
884     $Element->{$HTML_NS}->{html} = {
885     is_root => 1,
886     attrs_checker => $GetHTMLAttrsChecker->({
887     xmlns => sub {
888     my ($self, $attr) = @_;
889     my $value = $attr->value;
890     unless ($value eq $HTML_NS) {
891     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
892     }
893     unless ($attr->owner_document->manakai_is_html) {
894     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
895     ## TODO: Test
896     }
897     },
898     }),
899     checker => sub {
900     my ($self, $todo) = @_;
901     my $el = $todo->{node};
902     my $new_todos = [];
903     my @nodes = (@{$el->child_nodes});
904    
905     my $phase = 'before head';
906     while (@nodes) {
907     my $node = shift @nodes;
908     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
909    
910     my $nt = $node->node_type;
911     if ($nt == 1) {
912     my $node_ns = $node->namespace_uri;
913     $node_ns = '' unless defined $node_ns;
914     my $node_ln = $node->manakai_local_name;
915     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
916     if ($phase eq 'before head') {
917     if ($node_ns eq $HTML_NS and $node_ln eq 'head') {
918     $phase = 'after head';
919     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'body') {
920     $self->{onerror}->(node => $node, type => 'ps element missing:head');
921     $phase = 'after body';
922     } else {
923     $not_allowed = 1;
924     # before head
925     }
926     } elsif ($phase eq 'after head') {
927     if ($node_ns eq $HTML_NS and $node_ln eq 'body') {
928     $phase = 'after body';
929     } else {
930     $not_allowed = 1;
931     # after head
932     }
933     } else { #elsif ($phase eq 'after body') {
934     $not_allowed = 1;
935     # after body
936     }
937     $self->{onerror}->(node => $node, type => 'element not allowed')
938     if $not_allowed;
939     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
940     unshift @nodes, @$sib;
941     push @$new_todos, @$ch;
942     } elsif ($nt == 3 or $nt == 4) {
943     if ($node->data =~ /[^\x09-\x0D\x20]/) {
944     $self->{onerror}->(node => $node, type => 'character not allowed');
945     }
946     } elsif ($nt == 5) {
947     unshift @nodes, @{$node->child_nodes};
948     }
949     }
950    
951     if ($phase eq 'before head') {
952     $self->{onerror}->(node => $el, type => 'child element missing:head');
953     $self->{onerror}->(node => $el, type => 'child element missing:body');
954     } elsif ($phase eq 'after head') {
955     $self->{onerror}->(node => $el, type => 'child element missing:body');
956     }
957    
958     return ($new_todos);
959     },
960     };
961    
962     $Element->{$HTML_NS}->{head} = {
963     attrs_checker => $GetHTMLAttrsChecker->({}),
964     checker => sub {
965     my ($self, $todo) = @_;
966     my $el = $todo->{node};
967     my $new_todos = [];
968     my @nodes = (@{$el->child_nodes});
969    
970     my $has_title;
971     my $phase = 'initial'; # 'after charset', 'after base'
972     while (@nodes) {
973     my $node = shift @nodes;
974     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
975    
976     my $nt = $node->node_type;
977     if ($nt == 1) {
978     my $node_ns = $node->namespace_uri;
979     $node_ns = '' unless defined $node_ns;
980     my $node_ln = $node->manakai_local_name;
981     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
982     if ($node_ns eq $HTML_NS and $node_ln eq 'title') {
983     $phase = 'after base';
984     unless ($has_title) {
985     $has_title = 1;
986     } else {
987     $not_allowed = 1;
988     }
989     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'meta') {
990     if ($node->has_attribute_ns (undef, 'charset')) {
991     if ($phase eq 'initial') {
992     $phase = 'after charset';
993     } else {
994     $not_allowed = 1;
995     ## NOTE: See also |base|'s "contexts" field in the spec
996     }
997 wakaba 1.5 } elsif ($node->has_attribute_ns (undef, 'name') or
998     $node->has_attribute_ns (undef, 'http-equiv')) {
999     $phase = 'after base';
1000 wakaba 1.1 } else {
1001     $phase = 'after base';
1002 wakaba 1.5 $not_allowed = 1;
1003 wakaba 1.1 }
1004     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'base') {
1005     if ($phase eq 'initial' or $phase eq 'after charset') {
1006     $phase = 'after base';
1007     } else {
1008     $not_allowed = 1;
1009     }
1010     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'style') {
1011     $phase = 'after base';
1012     if ($node->has_attribute_ns (undef, 'scoped')) {
1013     $not_allowed = 1;
1014     }
1015     } elsif ($HTMLMetadataElements->{$node_ns}->{$node_ln}) {
1016     $phase = 'after base';
1017     } else {
1018     $not_allowed = 1;
1019     }
1020     $self->{onerror}->(node => $node, type => 'element not allowed')
1021     if $not_allowed;
1022 wakaba 1.3 local $todo->{flag}->{in_head} = 1;
1023 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1024     unshift @nodes, @$sib;
1025     push @$new_todos, @$ch;
1026     } elsif ($nt == 3 or $nt == 4) {
1027     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1028     $self->{onerror}->(node => $node, type => 'character not allowed');
1029     }
1030     } elsif ($nt == 5) {
1031     unshift @nodes, @{$node->child_nodes};
1032     }
1033     }
1034     unless ($has_title) {
1035     $self->{onerror}->(node => $el, type => 'child element missing:title');
1036     }
1037     return ($new_todos);
1038     },
1039     };
1040    
1041     $Element->{$HTML_NS}->{title} = {
1042     attrs_checker => $GetHTMLAttrsChecker->({}),
1043     checker => $HTMLTextChecker,
1044     };
1045    
1046     $Element->{$HTML_NS}->{base} = {
1047 wakaba 1.4 attrs_checker => sub {
1048     my ($self, $todo) = @_;
1049    
1050     if ($self->{has_uri_attr} and
1051     $todo->{node}->has_attribute_ns (undef, 'href')) {
1052     ## ISSUE: Are these examples conforming?
1053     ## <head profile="a b c"><base href> (except for |profile|'s
1054     ## non-conformance)
1055     ## <title xml:base="relative"/><base href/> (maybe it should be)
1056     ## <unknown xmlns="relative"/><base href/> (assuming that
1057     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1058     ## <?xml-stylesheet href="relative"?>...<base href=""/>
1059     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1060     ## <style>@import 'relative';</style><base href>
1061     ## <script>location.href = 'relative';</script><base href>
1062     $self->{onerror}->(node => $todo->{node},
1063     type => 'basehref after URI attribute');
1064     }
1065     if ($self->{has_hyperlink_element} and
1066     $todo->{node}->has_attribute_ns (undef, 'target')) {
1067     ## ISSUE: Are these examples conforming?
1068     ## <head><title xlink:href=""/><base target="name"/></head>
1069     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1070     ## (assuming that |xbl:xbl| is allowed before |base|)
1071     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1072     ## <link href=""/><base target="name"/>
1073     ## <link rel=unknown href=""><base target=name>
1074     $self->{onerror}->(node => $todo->{node},
1075     type => 'basetarget after hyperlink');
1076     }
1077    
1078     return $GetHTMLAttrsChecker->({
1079     href => $HTMLURIAttrChecker,
1080     target => $HTMLTargetAttrChecker,
1081     })->($self, $todo);
1082     },
1083 wakaba 1.1 checker => $HTMLEmptyChecker,
1084     };
1085    
1086     $Element->{$HTML_NS}->{link} = {
1087     attrs_checker => sub {
1088     my ($self, $todo) = @_;
1089     $GetHTMLAttrsChecker->({
1090     href => $HTMLURIAttrChecker,
1091 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(0, $todo, @_) },
1092 wakaba 1.1 media => $HTMLMQAttrChecker,
1093     hreflang => $HTMLLanguageTagAttrChecker,
1094     type => $HTMLIMTAttrChecker,
1095     ## NOTE: Though |title| has special semantics,
1096     ## syntactically same as the |title| as global attribute.
1097     })->($self, $todo);
1098 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
1099     $self->{has_hyperlink_element} = 1 if $todo->{has_hyperlink_link_type};
1100     } else {
1101 wakaba 1.1 $self->{onerror}->(node => $todo->{node},
1102     type => 'attribute missing:href');
1103     }
1104     unless ($todo->{node}->has_attribute_ns (undef, 'rel')) {
1105     $self->{onerror}->(node => $todo->{node},
1106     type => 'attribute missing:rel');
1107     }
1108     },
1109     checker => $HTMLEmptyChecker,
1110     };
1111    
1112     $Element->{$HTML_NS}->{meta} = {
1113     attrs_checker => sub {
1114     my ($self, $todo) = @_;
1115     my $name_attr;
1116     my $http_equiv_attr;
1117     my $charset_attr;
1118     my $content_attr;
1119     for my $attr (@{$todo->{node}->attributes}) {
1120     my $attr_ns = $attr->namespace_uri;
1121     $attr_ns = '' unless defined $attr_ns;
1122     my $attr_ln = $attr->manakai_local_name;
1123     my $checker;
1124     if ($attr_ns eq '') {
1125     if ($attr_ln eq 'content') {
1126     $content_attr = $attr;
1127     $checker = 1;
1128     } elsif ($attr_ln eq 'name') {
1129     $name_attr = $attr;
1130     $checker = 1;
1131     } elsif ($attr_ln eq 'http-equiv') {
1132     $http_equiv_attr = $attr;
1133     $checker = 1;
1134     } elsif ($attr_ln eq 'charset') {
1135     $charset_attr = $attr;
1136     $checker = 1;
1137     } else {
1138     $checker = $HTMLAttrChecker->{$attr_ln}
1139     || $AttrChecker->{$attr_ns}->{$attr_ln}
1140     || $AttrChecker->{$attr_ns}->{''};
1141     }
1142     } else {
1143     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1144     || $AttrChecker->{$attr_ns}->{''};
1145     }
1146     if ($checker) {
1147     $checker->($self, $attr) if ref $checker;
1148     } else {
1149     $self->{onerror}->(node => $attr, level => 'unsupported',
1150     type => 'attribute');
1151     ## ISSUE: No comformance createria for unknown attributes in the spec
1152     }
1153     }
1154    
1155     if (defined $name_attr) {
1156     if (defined $http_equiv_attr) {
1157     $self->{onerror}->(node => $http_equiv_attr,
1158     type => 'attribute not allowed');
1159     } elsif (defined $charset_attr) {
1160     $self->{onerror}->(node => $charset_attr,
1161     type => 'attribute not allowed');
1162     }
1163     my $metadata_name = $name_attr->value;
1164     my $metadata_value;
1165     if (defined $content_attr) {
1166     $metadata_value = $content_attr->value;
1167     } else {
1168     $self->{onerror}->(node => $todo->{node},
1169     type => 'attribute missing:content');
1170     $metadata_value = '';
1171     }
1172     } elsif (defined $http_equiv_attr) {
1173     if (defined $charset_attr) {
1174     $self->{onerror}->(node => $charset_attr,
1175     type => 'attribute not allowed');
1176     }
1177     unless (defined $content_attr) {
1178     $self->{onerror}->(node => $todo->{node},
1179     type => 'attribute missing:content');
1180     }
1181     } elsif (defined $charset_attr) {
1182     if (defined $content_attr) {
1183     $self->{onerror}->(node => $content_attr,
1184     type => 'attribute not allowed');
1185     }
1186     } else {
1187     if (defined $content_attr) {
1188     $self->{onerror}->(node => $content_attr,
1189     type => 'attribute not allowed');
1190     $self->{onerror}->(node => $todo->{node},
1191     type => 'attribute missing:name|http-equiv');
1192     } else {
1193     $self->{onerror}->(node => $todo->{node},
1194     type => 'attribute missing:name|http-equiv|charset');
1195     }
1196     }
1197    
1198     ## TODO: metadata conformance
1199    
1200     ## TODO: pragma conformance
1201     if (defined $http_equiv_attr) { ## An enumerated attribute
1202     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1203     if ({
1204     'refresh' => 1,
1205     'default-style' => 1,
1206     }->{$keyword}) {
1207     #
1208     } else {
1209     $self->{onerror}->(node => $http_equiv_attr,
1210     type => 'enumerated:invalid');
1211     }
1212     }
1213    
1214     if (defined $charset_attr) {
1215     unless ($todo->{node}->owner_document->manakai_is_html) {
1216     $self->{onerror}->(node => $charset_attr,
1217     type => 'in XML:charset');
1218     }
1219     ## TODO: charset
1220     }
1221     },
1222     checker => $HTMLEmptyChecker,
1223     };
1224    
1225     $Element->{$HTML_NS}->{style} = {
1226     attrs_checker => $GetHTMLAttrsChecker->({
1227     type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1228     media => $HTMLMQAttrChecker,
1229     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1230     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1231     ## not different
1232     }),
1233     checker => sub {
1234     ## NOTE: |html:style| has no conformance creteria on content model
1235     my ($self, $todo) = @_;
1236     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
1237     $type = 'text/css' unless defined $type;
1238     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
1239     type => 'style:'.$type); ## TODO: $type normalization
1240     return $AnyChecker->($self, $todo);
1241     },
1242     };
1243    
1244     $Element->{$HTML_NS}->{body} = {
1245     attrs_checker => $GetHTMLAttrsChecker->({}),
1246     checker => $HTMLBlockChecker,
1247     };
1248    
1249     $Element->{$HTML_NS}->{section} = {
1250     attrs_checker => $GetHTMLAttrsChecker->({}),
1251     checker => $HTMLStylableBlockChecker,
1252     };
1253    
1254     $Element->{$HTML_NS}->{nav} = {
1255     attrs_checker => $GetHTMLAttrsChecker->({}),
1256     checker => $HTMLBlockOrInlineChecker,
1257     };
1258    
1259     $Element->{$HTML_NS}->{article} = {
1260     attrs_checker => $GetHTMLAttrsChecker->({}),
1261     checker => $HTMLStylableBlockChecker,
1262     };
1263    
1264     $Element->{$HTML_NS}->{blockquote} = {
1265     attrs_checker => $GetHTMLAttrsChecker->({
1266     cite => $HTMLURIAttrChecker,
1267     }),
1268     checker => $HTMLBlockChecker,
1269     };
1270    
1271     $Element->{$HTML_NS}->{aside} = {
1272     attrs_checker => $GetHTMLAttrsChecker->({}),
1273     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
1274     };
1275    
1276     $Element->{$HTML_NS}->{h1} = {
1277     attrs_checker => $GetHTMLAttrsChecker->({}),
1278     checker => sub {
1279     my ($self, $todo) = @_;
1280     $todo->{flag}->{has_heading}->[0] = 1;
1281     return $HTMLSignificantStrictlyInlineChecker->($self, $todo);
1282     },
1283     };
1284    
1285     $Element->{$HTML_NS}->{h2} = {
1286     attrs_checker => $GetHTMLAttrsChecker->({}),
1287     checker => $Element->{$HTML_NS}->{h1}->{checker},
1288     };
1289    
1290     $Element->{$HTML_NS}->{h3} = {
1291     attrs_checker => $GetHTMLAttrsChecker->({}),
1292     checker => $Element->{$HTML_NS}->{h1}->{checker},
1293     };
1294    
1295     $Element->{$HTML_NS}->{h4} = {
1296     attrs_checker => $GetHTMLAttrsChecker->({}),
1297     checker => $Element->{$HTML_NS}->{h1}->{checker},
1298     };
1299    
1300     $Element->{$HTML_NS}->{h5} = {
1301     attrs_checker => $GetHTMLAttrsChecker->({}),
1302     checker => $Element->{$HTML_NS}->{h1}->{checker},
1303     };
1304    
1305     $Element->{$HTML_NS}->{h6} = {
1306     attrs_checker => $GetHTMLAttrsChecker->({}),
1307     checker => $Element->{$HTML_NS}->{h1}->{checker},
1308     };
1309    
1310     $Element->{$HTML_NS}->{header} = {
1311     attrs_checker => $GetHTMLAttrsChecker->({}),
1312     checker => sub {
1313     my ($self, $todo) = @_;
1314     my $old_flag = $todo->{flag}->{has_heading} || [];
1315     my $new_flag = [];
1316     local $todo->{flag}->{has_heading} = $new_flag;
1317     my $node = $todo->{node};
1318    
1319     my $end = $self->_add_minuses
1320     ({$HTML_NS => {qw/header 1 footer 1/}},
1321     $HTMLSectioningElements);
1322     my ($new_todos, $ch) = $HTMLBlockChecker->($self, $todo);
1323     push @$new_todos, $end,
1324     {type => 'code', code => sub {
1325     if ($new_flag->[0]) {
1326     $old_flag->[0] = 1;
1327     } else {
1328     $self->{onerror}->(node => $node, type => 'element missing:hn');
1329     }
1330     }};
1331     return ($new_todos, $ch);
1332     },
1333     };
1334    
1335     $Element->{$HTML_NS}->{footer} = {
1336     attrs_checker => $GetHTMLAttrsChecker->({}),
1337     checker => sub { ## block -hn -header -footer -sectioning or inline
1338     my ($self, $todo) = @_;
1339     my $el = $todo->{node};
1340     my $new_todos = [];
1341     my @nodes = (@{$el->child_nodes});
1342    
1343     my $content = 'block-or-inline'; # or 'block' or 'inline'
1344     my @block_not_inline;
1345     while (@nodes) {
1346     my $node = shift @nodes;
1347     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1348    
1349     my $nt = $node->node_type;
1350     if ($nt == 1) {
1351     my $node_ns = $node->namespace_uri;
1352     $node_ns = '' unless defined $node_ns;
1353     my $node_ln = $node->manakai_local_name;
1354     my $not_allowed;
1355     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
1356     $not_allowed = 1;
1357     } elsif ($node_ns eq $HTML_NS and
1358     {
1359     qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1/
1360     }->{$node_ln}) {
1361     $not_allowed = 1;
1362     } elsif ($HTMLSectioningElements->{$node_ns}->{$node_ln}) {
1363     $not_allowed = 1;
1364     }
1365     if ($content eq 'block') {
1366     $not_allowed = 1
1367     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1368     } elsif ($content eq 'inline') {
1369     $not_allowed = 1
1370     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
1371     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1372     } else {
1373     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1374     my $is_inline
1375     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
1376     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1377    
1378     push @block_not_inline, $node
1379     if $is_block and not $is_inline and not $not_allowed;
1380     unless ($is_block) {
1381     $content = 'inline';
1382     for (@block_not_inline) {
1383     $self->{onerror}->(node => $_, type => 'element not allowed');
1384     }
1385     $not_allowed = 1 unless $is_inline;
1386     }
1387     }
1388     $self->{onerror}->(node => $node, type => 'element not allowed')
1389     if $not_allowed;
1390     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1391     unshift @nodes, @$sib;
1392     push @$new_todos, @$ch;
1393     } elsif ($nt == 3 or $nt == 4) {
1394     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1395     if ($content eq 'block') {
1396     $self->{onerror}->(node => $node, type => 'character not allowed');
1397     } else {
1398     $content = 'inline';
1399     for (@block_not_inline) {
1400     $self->{onerror}->(node => $_, type => 'element not allowed');
1401     }
1402     }
1403     }
1404     } elsif ($nt == 5) {
1405     unshift @nodes, @{$node->child_nodes};
1406     }
1407     }
1408    
1409     my $end = $self->_add_minuses
1410     ({$HTML_NS => {qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1/}},
1411     $HTMLSectioningElements);
1412     push @$new_todos, $end;
1413    
1414     if ($content eq 'inline') {
1415     for (@$new_todos) {
1416     $_->{inline} = 1;
1417     }
1418     }
1419    
1420     return ($new_todos);
1421     },
1422     };
1423    
1424     $Element->{$HTML_NS}->{address} = {
1425     attrs_checker => $GetHTMLAttrsChecker->({}),
1426     checker => $HTMLInlineChecker,
1427     };
1428    
1429     $Element->{$HTML_NS}->{p} = {
1430     attrs_checker => $GetHTMLAttrsChecker->({}),
1431     checker => $HTMLSignificantInlineChecker,
1432     };
1433    
1434     $Element->{$HTML_NS}->{hr} = {
1435     attrs_checker => $GetHTMLAttrsChecker->({}),
1436     checker => $HTMLEmptyChecker,
1437     };
1438    
1439     $Element->{$HTML_NS}->{br} = {
1440     attrs_checker => $GetHTMLAttrsChecker->({}),
1441     checker => $HTMLEmptyChecker,
1442     };
1443    
1444     $Element->{$HTML_NS}->{dialog} = {
1445     attrs_checker => $GetHTMLAttrsChecker->({}),
1446     checker => sub {
1447     my ($self, $todo) = @_;
1448     my $el = $todo->{node};
1449     my $new_todos = [];
1450     my @nodes = (@{$el->child_nodes});
1451    
1452     my $phase = 'before dt';
1453     while (@nodes) {
1454     my $node = shift @nodes;
1455     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1456    
1457     my $nt = $node->node_type;
1458     if ($nt == 1) {
1459     my $node_ns = $node->namespace_uri;
1460     $node_ns = '' unless defined $node_ns;
1461     my $node_ln = $node->manakai_local_name;
1462     ## NOTE: |minuses| list is not checked since redundant
1463     if ($phase eq 'before dt') {
1464     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1465     $phase = 'before dd';
1466     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1467     $self->{onerror}
1468     ->(node => $node, type => 'ps element missing:dt');
1469     $phase = 'before dt';
1470     } else {
1471     $self->{onerror}->(node => $node, type => 'element not allowed');
1472     }
1473     } else { # before dd
1474     if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1475     $phase = 'before dt';
1476     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1477     $self->{onerror}
1478     ->(node => $node, type => 'ps element missing:dd');
1479     $phase = 'before dd';
1480     } else {
1481     $self->{onerror}->(node => $node, type => 'element not allowed');
1482     }
1483     }
1484     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1485     unshift @nodes, @$sib;
1486     push @$new_todos, @$ch;
1487     } elsif ($nt == 3 or $nt == 4) {
1488     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1489     $self->{onerror}->(node => $node, type => 'character not allowed');
1490     }
1491     } elsif ($nt == 5) {
1492     unshift @nodes, @{$node->child_nodes};
1493     }
1494     }
1495     if ($phase eq 'before dd') {
1496     $self->{onerror}->(node => $el, type => 'ps element missing:dd');
1497     }
1498     return ($new_todos);
1499     },
1500     };
1501    
1502     $Element->{$HTML_NS}->{pre} = {
1503     attrs_checker => $GetHTMLAttrsChecker->({}),
1504     checker => $HTMLStrictlyInlineChecker,
1505     };
1506    
1507     $Element->{$HTML_NS}->{ol} = {
1508     attrs_checker => $GetHTMLAttrsChecker->({
1509     start => $HTMLIntegerAttrChecker,
1510     }),
1511     checker => sub {
1512     my ($self, $todo) = @_;
1513     my $el = $todo->{node};
1514     my $new_todos = [];
1515     my @nodes = (@{$el->child_nodes});
1516    
1517     while (@nodes) {
1518     my $node = shift @nodes;
1519     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1520    
1521     my $nt = $node->node_type;
1522     if ($nt == 1) {
1523     my $node_ns = $node->namespace_uri;
1524     $node_ns = '' unless defined $node_ns;
1525     my $node_ln = $node->manakai_local_name;
1526     ## NOTE: |minuses| list is not checked since redundant
1527     unless ($node_ns eq $HTML_NS and $node_ln eq 'li') {
1528     $self->{onerror}->(node => $node, type => 'element not allowed');
1529     }
1530     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1531     unshift @nodes, @$sib;
1532     push @$new_todos, @$ch;
1533     } elsif ($nt == 3 or $nt == 4) {
1534     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1535     $self->{onerror}->(node => $node, type => 'character not allowed');
1536     }
1537     } elsif ($nt == 5) {
1538     unshift @nodes, @{$node->child_nodes};
1539     }
1540     }
1541    
1542     if ($todo->{inline}) {
1543     for (@$new_todos) {
1544     $_->{inline} = 1;
1545     }
1546     }
1547     return ($new_todos);
1548     },
1549     };
1550    
1551     $Element->{$HTML_NS}->{ul} = {
1552     attrs_checker => $GetHTMLAttrsChecker->({}),
1553     checker => $Element->{$HTML_NS}->{ol}->{checker},
1554     };
1555    
1556    
1557     $Element->{$HTML_NS}->{li} = {
1558     attrs_checker => $GetHTMLAttrsChecker->({
1559     start => sub {
1560     my ($self, $attr) = @_;
1561     my $parent = $attr->owner_element->manakai_parent_element;
1562     if (defined $parent) {
1563     my $parent_ns = $parent->namespace_uri;
1564     $parent_ns = '' unless defined $parent_ns;
1565     my $parent_ln = $parent->manakai_local_name;
1566     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1567     $self->{onerror}->(node => $attr, level => 'unsupported',
1568     type => 'attribute');
1569     }
1570     }
1571     $HTMLIntegerAttrChecker->($self, $attr);
1572     },
1573     }),
1574     checker => sub {
1575     my ($self, $todo) = @_;
1576     if ($todo->{inline}) {
1577     return $HTMLInlineChecker->($self, $todo);
1578     } else {
1579     return $HTMLBlockOrInlineChecker->($self, $todo);
1580     }
1581     },
1582     };
1583    
1584     $Element->{$HTML_NS}->{dl} = {
1585     attrs_checker => $GetHTMLAttrsChecker->({}),
1586     checker => sub {
1587     my ($self, $todo) = @_;
1588     my $el = $todo->{node};
1589     my $new_todos = [];
1590     my @nodes = (@{$el->child_nodes});
1591    
1592     my $phase = 'before dt';
1593     while (@nodes) {
1594     my $node = shift @nodes;
1595     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1596    
1597     my $nt = $node->node_type;
1598     if ($nt == 1) {
1599     my $node_ns = $node->namespace_uri;
1600     $node_ns = '' unless defined $node_ns;
1601     my $node_ln = $node->manakai_local_name;
1602     ## NOTE: |minuses| list is not checked since redundant
1603     if ($phase eq 'in dds') {
1604     if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1605     #$phase = 'in dds';
1606     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1607     $phase = 'in dts';
1608     } else {
1609     $self->{onerror}->(node => $node, type => 'element not allowed');
1610     }
1611     } elsif ($phase eq 'in dts') {
1612     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1613     #$phase = 'in dts';
1614     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1615     $phase = 'in dds';
1616     } else {
1617     $self->{onerror}->(node => $node, type => 'element not allowed');
1618     }
1619     } else { # before dt
1620     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1621     $phase = 'in dts';
1622     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1623     $self->{onerror}
1624     ->(node => $node, type => 'ps element missing:dt');
1625     $phase = 'in dds';
1626     } else {
1627     $self->{onerror}->(node => $node, type => 'element not allowed');
1628     }
1629     }
1630     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1631     unshift @nodes, @$sib;
1632     push @$new_todos, @$ch;
1633     } elsif ($nt == 3 or $nt == 4) {
1634     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1635     $self->{onerror}->(node => $node, type => 'character not allowed');
1636     }
1637     } elsif ($nt == 5) {
1638     unshift @nodes, @{$node->child_nodes};
1639     }
1640     }
1641     if ($phase eq 'in dts') {
1642     $self->{onerror}->(node => $el, type => 'ps element missing:dd');
1643     }
1644    
1645     if ($todo->{inline}) {
1646     for (@$new_todos) {
1647     $_->{inline} = 1;
1648     }
1649     }
1650     return ($new_todos);
1651     },
1652     };
1653    
1654     $Element->{$HTML_NS}->{dt} = {
1655     attrs_checker => $GetHTMLAttrsChecker->({}),
1656     checker => $HTMLStrictlyInlineChecker,
1657     };
1658    
1659     $Element->{$HTML_NS}->{dd} = {
1660     attrs_checker => $GetHTMLAttrsChecker->({}),
1661     checker => $Element->{$HTML_NS}->{li}->{checker},
1662     };
1663    
1664     $Element->{$HTML_NS}->{a} = {
1665     attrs_checker => sub {
1666     my ($self, $todo) = @_;
1667     my %attr;
1668     for my $attr (@{$todo->{node}->attributes}) {
1669     my $attr_ns = $attr->namespace_uri;
1670     $attr_ns = '' unless defined $attr_ns;
1671     my $attr_ln = $attr->manakai_local_name;
1672     my $checker;
1673     if ($attr_ns eq '') {
1674     $checker = {
1675     target => $HTMLTargetAttrChecker,
1676     href => $HTMLURIAttrChecker,
1677     ping => $HTMLSpaceURIsAttrChecker,
1678 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
1679 wakaba 1.1 media => $HTMLMQAttrChecker,
1680     hreflang => $HTMLLanguageTagAttrChecker,
1681     type => $HTMLIMTAttrChecker,
1682     }->{$attr_ln};
1683     if ($checker) {
1684     $attr{$attr_ln} = $attr;
1685     } else {
1686     $checker = $HTMLAttrChecker->{$attr_ln};
1687     }
1688     }
1689     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1690     || $AttrChecker->{$attr_ns}->{''};
1691     if ($checker) {
1692     $checker->($self, $attr) if ref $checker;
1693     } else {
1694     $self->{onerror}->(node => $attr, level => 'unsupported',
1695     type => 'attribute');
1696     ## ISSUE: No comformance createria for unknown attributes in the spec
1697     }
1698     }
1699    
1700 wakaba 1.4 if (defined $attr{href}) {
1701     $self->{has_hyperlink_element} = 1;
1702     } else {
1703 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
1704     if (defined $attr{$_}) {
1705     $self->{onerror}->(node => $attr{$_},
1706     type => 'attribute not allowed');
1707     }
1708     }
1709     }
1710     },
1711     checker => sub {
1712     my ($self, $todo) = @_;
1713    
1714     my $end = $self->_add_minuses ($HTMLInteractiveElements);
1715     my ($new_todos, $ch)
1716     = $HTMLSignificantInlineOrStrictlyInlineChecker->($self, $todo);
1717     push @$new_todos, $end;
1718    
1719     $_->{flag}->{has_a} = 1 for @$new_todos;
1720    
1721     return ($new_todos, $ch);
1722     },
1723     };
1724    
1725     $Element->{$HTML_NS}->{q} = {
1726     attrs_checker => $GetHTMLAttrsChecker->({
1727     cite => $HTMLURIAttrChecker,
1728     }),
1729     checker => $HTMLInlineOrStrictlyInlineChecker,
1730     };
1731    
1732     $Element->{$HTML_NS}->{cite} = {
1733     attrs_checker => $GetHTMLAttrsChecker->({}),
1734     checker => $HTMLStrictlyInlineChecker,
1735     };
1736    
1737     $Element->{$HTML_NS}->{em} = {
1738     attrs_checker => $GetHTMLAttrsChecker->({}),
1739     checker => $HTMLInlineOrStrictlyInlineChecker,
1740     };
1741    
1742     $Element->{$HTML_NS}->{strong} = {
1743     attrs_checker => $GetHTMLAttrsChecker->({}),
1744     checker => $HTMLInlineOrStrictlyInlineChecker,
1745     };
1746    
1747     $Element->{$HTML_NS}->{small} = {
1748     attrs_checker => $GetHTMLAttrsChecker->({}),
1749     checker => $HTMLInlineOrStrictlyInlineChecker,
1750     };
1751    
1752     $Element->{$HTML_NS}->{m} = {
1753     attrs_checker => $GetHTMLAttrsChecker->({}),
1754     checker => $HTMLInlineOrStrictlyInlineChecker,
1755     };
1756    
1757     $Element->{$HTML_NS}->{dfn} = {
1758     attrs_checker => $GetHTMLAttrsChecker->({}),
1759     checker => sub {
1760     my ($self, $todo) = @_;
1761    
1762     my $end = $self->_add_minuses ({$HTML_NS => {dfn => 1}});
1763     my ($sib, $ch) = $HTMLStrictlyInlineChecker->($self, $todo);
1764     push @$sib, $end;
1765    
1766     my $node = $todo->{node};
1767     my $term = $node->get_attribute_ns (undef, 'title');
1768     unless (defined $term) {
1769     for my $child (@{$node->child_nodes}) {
1770     if ($child->node_type == 1) { # ELEMENT_NODE
1771     if (defined $term) {
1772     undef $term;
1773     last;
1774     } elsif ($child->manakai_local_name eq 'abbr') {
1775     my $nsuri = $child->namespace_uri;
1776     if (defined $nsuri and $nsuri eq $HTML_NS) {
1777     my $attr = $child->get_attribute_node_ns (undef, 'title');
1778     if ($attr) {
1779     $term = $attr->value;
1780     }
1781     }
1782     }
1783     } elsif ($child->node_type == 3 or $child->node_type == 4) {
1784     ## TEXT_NODE or CDATA_SECTION_NODE
1785     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
1786     next;
1787     }
1788     undef $term;
1789     last;
1790     }
1791     }
1792     unless (defined $term) {
1793     $term = $node->text_content;
1794     }
1795     }
1796     if ($self->{term}->{$term}) {
1797     $self->{onerror}->(node => $node, type => 'duplicate term');
1798     push @{$self->{term}->{$term}}, $node;
1799     } else {
1800     $self->{term}->{$term} = [$node];
1801     }
1802     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
1803     ## has |title|.
1804    
1805     return ($sib, $ch);
1806     },
1807     };
1808    
1809     $Element->{$HTML_NS}->{abbr} = {
1810     attrs_checker => $GetHTMLAttrsChecker->({
1811     ## NOTE: |title| has special semantics for |abbr|s, but is syntactically
1812     ## not different. The spec says that the |title| MAY be omitted
1813     ## if there is a |dfn| whose defining term is the abbreviation,
1814     ## but it does not prohibit |abbr| w/o |title| in other cases.
1815     }),
1816     checker => $HTMLStrictlyInlineChecker,
1817     };
1818    
1819     $Element->{$HTML_NS}->{time} = {
1820     attrs_checker => $GetHTMLAttrsChecker->({
1821     datetime => sub { 1 }, # checked in |checker|
1822     }),
1823     ## TODO: Write tests
1824     checker => sub {
1825     my ($self, $todo) = @_;
1826    
1827     my $attr = $todo->{node}->get_attribute_node_ns (undef, 'datetime');
1828     my $input;
1829     my $reg_sp;
1830     my $input_node;
1831     if ($attr) {
1832     $input = $attr->value;
1833     $reg_sp = qr/[\x09-\x0D\x20]*/;
1834     $input_node = $attr;
1835     } else {
1836     $input = $todo->{node}->text_content;
1837     $reg_sp = qr/\p{Zs}*/;
1838     $input_node = $todo->{node};
1839    
1840     ## ISSUE: What is the definition for "successfully extracts a date
1841     ## or time"? If the algorithm says the string is invalid but
1842     ## return some date or time, is it "successfully"?
1843     }
1844    
1845     my $hour;
1846     my $minute;
1847     my $second;
1848     if ($input =~ /
1849     \A
1850     [\x09-\x0D\x20]*
1851     ([0-9]+) # 1
1852     (?>
1853     -([0-9]+) # 2
1854     -([0-9]+) # 3
1855     [\x09-\x0D\x20]*
1856     (?>
1857     T
1858     [\x09-\x0D\x20]*
1859     )?
1860     ([0-9]+) # 4
1861     :([0-9]+) # 5
1862     (?>
1863     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
1864     )?
1865     [\x09-\x0D\x20]*
1866     (?>
1867     Z
1868     [\x09-\x0D\x20]*
1869     |
1870     [+-]([0-9]+):([0-9]+) # 7, 8
1871     [\x09-\x0D\x20]*
1872     )?
1873     \z
1874     |
1875     :([0-9]+) # 9
1876     (?>
1877     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
1878     )?
1879     [\x09-\x0D\x20]*\z
1880     )
1881     /x) {
1882     if (defined $2) { ## YYYY-MM-DD T? hh:mm
1883     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
1884     length $4 != 2 or length $5 != 2) {
1885     $self->{onerror}->(node => $input_node,
1886     type => 'dateortime:syntax error');
1887     }
1888    
1889     if (1 <= $2 and $2 <= 12) {
1890     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1891     if $3 < 1 or
1892     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
1893     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1894     if $2 == 2 and $3 == 29 and
1895     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
1896     } else {
1897     $self->{onerror}->(node => $input_node,
1898     type => 'datetime:bad month');
1899     }
1900    
1901     ($hour, $minute, $second) = ($4, $5, $6);
1902    
1903     if (defined $7) { ## [+-]hh:mm
1904     if (length $7 != 2 or length $8 != 2) {
1905     $self->{onerror}->(node => $input_node,
1906     type => 'dateortime:syntax error');
1907     }
1908    
1909     $self->{onerror}->(node => $input_node,
1910     type => 'datetime:bad timezone hour')
1911     if $7 > 23;
1912     $self->{onerror}->(node => $input_node,
1913     type => 'datetime:bad timezone minute')
1914     if $8 > 59;
1915     }
1916     } else { ## hh:mm
1917     if (length $1 != 2 or length $9 != 2) {
1918     $self->{onerror}->(node => $input_node,
1919     type => qq'dateortime:syntax error');
1920     }
1921    
1922     ($hour, $minute, $second) = ($1, $9, $10);
1923     }
1924    
1925     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
1926     if $hour > 23;
1927     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
1928     if $minute > 59;
1929    
1930     if (defined $second) { ## s
1931     ## NOTE: Integer part of second don't have to have length of two.
1932    
1933     if (substr ($second, 0, 1) eq '.') {
1934     $self->{onerror}->(node => $input_node,
1935     type => 'dateortime:syntax error');
1936     }
1937    
1938     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
1939     if $second >= 60;
1940     }
1941     } else {
1942     $self->{onerror}->(node => $input_node,
1943     type => 'dateortime:syntax error');
1944     }
1945    
1946     return $HTMLStrictlyInlineChecker->($self, $todo);
1947     },
1948     };
1949    
1950     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
1951     attrs_checker => $GetHTMLAttrsChecker->({
1952     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1953     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1954     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1955     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1956     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1957     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1958     }),
1959     checker => $HTMLStrictlyInlineChecker,
1960     };
1961    
1962     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
1963     attrs_checker => $GetHTMLAttrsChecker->({
1964     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
1965     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
1966     }),
1967     checker => $HTMLStrictlyInlineChecker,
1968     };
1969    
1970     $Element->{$HTML_NS}->{code} = {
1971     attrs_checker => $GetHTMLAttrsChecker->({}),
1972     ## NOTE: Though |title| has special semantics,
1973     ## syntatically same as the |title| as global attribute.
1974     checker => $HTMLInlineOrStrictlyInlineChecker,
1975     };
1976    
1977     $Element->{$HTML_NS}->{var} = {
1978     attrs_checker => $GetHTMLAttrsChecker->({}),
1979     ## NOTE: Though |title| has special semantics,
1980     ## syntatically same as the |title| as global attribute.
1981     checker => $HTMLStrictlyInlineChecker,
1982     };
1983    
1984     $Element->{$HTML_NS}->{samp} = {
1985     attrs_checker => $GetHTMLAttrsChecker->({}),
1986     ## NOTE: Though |title| has special semantics,
1987     ## syntatically same as the |title| as global attribute.
1988     checker => $HTMLInlineOrStrictlyInlineChecker,
1989     };
1990    
1991     $Element->{$HTML_NS}->{kbd} = {
1992     attrs_checker => $GetHTMLAttrsChecker->({}),
1993     checker => $HTMLStrictlyInlineChecker,
1994     };
1995    
1996     $Element->{$HTML_NS}->{sub} = {
1997     attrs_checker => $GetHTMLAttrsChecker->({}),
1998     checker => $HTMLStrictlyInlineChecker,
1999     };
2000    
2001     $Element->{$HTML_NS}->{sup} = {
2002     attrs_checker => $GetHTMLAttrsChecker->({}),
2003     checker => $HTMLStrictlyInlineChecker,
2004     };
2005    
2006     $Element->{$HTML_NS}->{span} = {
2007     attrs_checker => $GetHTMLAttrsChecker->({}),
2008     ## NOTE: Though |title| has special semantics,
2009     ## syntatically same as the |title| as global attribute.
2010     checker => $HTMLInlineOrStrictlyInlineChecker,
2011     };
2012    
2013     $Element->{$HTML_NS}->{i} = {
2014     attrs_checker => $GetHTMLAttrsChecker->({}),
2015     ## NOTE: Though |title| has special semantics,
2016     ## syntatically same as the |title| as global attribute.
2017     checker => $HTMLStrictlyInlineChecker,
2018     };
2019    
2020     $Element->{$HTML_NS}->{b} = {
2021     attrs_checker => $GetHTMLAttrsChecker->({}),
2022     checker => $HTMLStrictlyInlineChecker,
2023     };
2024    
2025     $Element->{$HTML_NS}->{bdo} = {
2026     attrs_checker => sub {
2027     my ($self, $todo) = @_;
2028     $GetHTMLAttrsChecker->({})->($self, $todo);
2029     unless ($todo->{node}->has_attribute_ns (undef, 'dir')) {
2030     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:dir');
2031     }
2032     },
2033     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
2034     checker => $HTMLStrictlyInlineChecker,
2035     };
2036    
2037     $Element->{$HTML_NS}->{ins} = {
2038     attrs_checker => $GetHTMLAttrsChecker->({
2039     cite => $HTMLURIAttrChecker,
2040     datetime => $HTMLDatetimeAttrChecker,
2041     }),
2042     checker => $HTMLTransparentChecker,
2043     };
2044    
2045     $Element->{$HTML_NS}->{del} = {
2046     attrs_checker => $GetHTMLAttrsChecker->({
2047     cite => $HTMLURIAttrChecker,
2048     datetime => $HTMLDatetimeAttrChecker,
2049     }),
2050     checker => sub {
2051     my ($self, $todo) = @_;
2052    
2053     my $parent = $todo->{node}->manakai_parent_element;
2054     if (defined $parent) {
2055     my $nsuri = $parent->namespace_uri;
2056     $nsuri = '' unless defined $nsuri;
2057     my $ln = $parent->manakai_local_name;
2058     my $eldef = $Element->{$nsuri}->{$ln} ||
2059     $Element->{$nsuri}->{''} ||
2060     $ElementDefault;
2061     return $eldef->{checker}->($self, $todo);
2062     } else {
2063     return $HTMLBlockOrInlineChecker->($self, $todo);
2064     }
2065     },
2066     };
2067    
2068     ## TODO: figure
2069    
2070 wakaba 1.4 ## TODO: |alt|
2071 wakaba 1.1 $Element->{$HTML_NS}->{img} = {
2072     attrs_checker => sub {
2073     my ($self, $todo) = @_;
2074     $GetHTMLAttrsChecker->({
2075     alt => sub { }, ## NOTE: No syntactical requirement
2076     src => $HTMLURIAttrChecker,
2077     usemap => $HTMLUsemapAttrChecker,
2078     ismap => sub {
2079     my ($self, $attr, $parent_todo) = @_;
2080     if (not $todo->{flag}->{has_a}) {
2081     $self->{onerror}->(node => $attr, type => 'attribute not allowed');
2082     }
2083     $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_todo);
2084     },
2085     ## TODO: height
2086     ## TODO: width
2087     })->($self, $todo);
2088     unless ($todo->{node}->has_attribute_ns (undef, 'alt')) {
2089     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:alt');
2090     }
2091     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2092     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:src');
2093     }
2094     },
2095     checker => $HTMLEmptyChecker,
2096     };
2097    
2098     $Element->{$HTML_NS}->{iframe} = {
2099     attrs_checker => $GetHTMLAttrsChecker->({
2100     src => $HTMLURIAttrChecker,
2101     }),
2102     checker => $HTMLTextChecker,
2103     };
2104    
2105     $Element->{$HTML_NS}->{embed} = {
2106     attrs_checker => sub {
2107     my ($self, $todo) = @_;
2108     my $has_src;
2109     for my $attr (@{$todo->{node}->attributes}) {
2110     my $attr_ns = $attr->namespace_uri;
2111     $attr_ns = '' unless defined $attr_ns;
2112     my $attr_ln = $attr->manakai_local_name;
2113     my $checker;
2114     if ($attr_ns eq '') {
2115     if ($attr_ln eq 'src') {
2116     $checker = $HTMLURIAttrChecker;
2117     $has_src = 1;
2118     } elsif ($attr_ln eq 'type') {
2119     $checker = $HTMLIMTAttrChecker;
2120     } else {
2121     ## TODO: height
2122     ## TODO: width
2123     $checker = $HTMLAttrChecker->{$attr_ln}
2124     || sub { }; ## NOTE: Any local attribute is ok.
2125     }
2126     }
2127     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2128     || $AttrChecker->{$attr_ns}->{''};
2129     if ($checker) {
2130     $checker->($self, $attr);
2131     } else {
2132     $self->{onerror}->(node => $attr, level => 'unsupported',
2133     type => 'attribute');
2134     ## ISSUE: No comformance createria for global attributes in the spec
2135     }
2136     }
2137    
2138     unless ($has_src) {
2139     $self->{onerror}->(node => $todo->{node},
2140     type => 'attribute missing:src');
2141     }
2142     },
2143     checker => $HTMLEmptyChecker,
2144     };
2145    
2146     $Element->{$HTML_NS}->{object} = {
2147     attrs_checker => sub {
2148     my ($self, $todo) = @_;
2149     $GetHTMLAttrsChecker->({
2150     data => $HTMLURIAttrChecker,
2151     type => $HTMLIMTAttrChecker,
2152     usemap => $HTMLUsemapAttrChecker,
2153     ## TODO: width
2154     ## TODO: height
2155     })->($self, $todo);
2156     unless ($todo->{node}->has_attribute_ns (undef, 'data')) {
2157     unless ($todo->{node}->has_attribute_ns (undef, 'type')) {
2158     $self->{onerror}->(node => $todo->{node},
2159     type => 'attribute missing:data|type');
2160     }
2161     }
2162     },
2163     checker => $ElementDefault->{checker}, ## TODO
2164     };
2165    
2166     $Element->{$HTML_NS}->{param} = {
2167     attrs_checker => sub {
2168     my ($self, $todo) = @_;
2169     $GetHTMLAttrsChecker->({
2170     name => sub { },
2171     value => sub { },
2172     })->($self, $todo);
2173     unless ($todo->{node}->has_attribute_ns (undef, 'name')) {
2174     $self->{onerror}->(node => $todo->{node},
2175     type => 'attribute missing:name');
2176     }
2177     unless ($todo->{node}->has_attribute_ns (undef, 'value')) {
2178     $self->{onerror}->(node => $todo->{node},
2179     type => 'attribute missing:value');
2180     }
2181     },
2182     checker => $HTMLEmptyChecker,
2183     };
2184    
2185     $Element->{$HTML_NS}->{video} = {
2186     attrs_checker => $GetHTMLAttrsChecker->({
2187     src => $HTMLURIAttrChecker,
2188     ## TODO: start, loopstart, loopend, end
2189     ## ISSUE: they MUST be "value time offset"s. Value?
2190     ## ISSUE: loopcount has no conformance creteria
2191     autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2192     controls => $GetHTMLBooleanAttrChecker->('controls'),
2193     }),
2194     checker => sub {
2195     my ($self, $todo) = @_;
2196    
2197     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2198     return $HTMLBlockOrInlineChecker->($self, $todo);
2199     } else {
2200     return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
2201     ->($self, $todo);
2202     }
2203     },
2204     };
2205    
2206     $Element->{$HTML_NS}->{audio} = {
2207     attrs_checker => $Element->{$HTML_NS}->{video}->{attrs_checker},
2208     checker => $Element->{$HTML_NS}->{video}->{checker},
2209     };
2210    
2211     $Element->{$HTML_NS}->{source} = {
2212     attrs_checker => sub {
2213     my ($self, $todo) = @_;
2214     $GetHTMLAttrsChecker->({
2215     src => $HTMLURIAttrChecker,
2216     type => $HTMLIMTAttrChecker,
2217     media => $HTMLMQAttrChecker,
2218     })->($self, $todo);
2219     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2220     $self->{onerror}->(node => $todo->{node},
2221     type => 'attribute missing:src');
2222     }
2223     },
2224     checker => $HTMLEmptyChecker,
2225     };
2226    
2227     $Element->{$HTML_NS}->{canvas} = {
2228     attrs_checker => $GetHTMLAttrsChecker->({
2229     height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2230     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2231     }),
2232     checker => $HTMLInlineChecker,
2233     };
2234    
2235     $Element->{$HTML_NS}->{map} = {
2236 wakaba 1.4 attrs_checker => sub {
2237     my ($self, $todo) = @_;
2238     my $has_id;
2239     $GetHTMLAttrsChecker->({
2240     id => sub {
2241     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2242     my ($self, $attr) = @_;
2243     my $value = $attr->value;
2244     if (length $value > 0) {
2245     if ($self->{id}->{$value}) {
2246     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2247     push @{$self->{id}->{$value}}, $attr;
2248     } else {
2249     $self->{id}->{$value} = [$attr];
2250     }
2251 wakaba 1.1 } else {
2252 wakaba 1.4 ## NOTE: MUST contain at least one character
2253     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2254 wakaba 1.1 }
2255 wakaba 1.4 if ($value =~ /[\x09-\x0D\x20]/) {
2256     $self->{onerror}->(node => $attr, type => 'space in ID');
2257     }
2258     $self->{map}->{$value} ||= $attr;
2259     $has_id = 1;
2260     },
2261     })->($self, $todo);
2262     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:id')
2263     unless $has_id;
2264     },
2265 wakaba 1.1 checker => $HTMLBlockChecker,
2266     };
2267    
2268     $Element->{$HTML_NS}->{area} = {
2269     attrs_checker => sub {
2270     my ($self, $todo) = @_;
2271     my %attr;
2272     my $coords;
2273     for my $attr (@{$todo->{node}->attributes}) {
2274     my $attr_ns = $attr->namespace_uri;
2275     $attr_ns = '' unless defined $attr_ns;
2276     my $attr_ln = $attr->manakai_local_name;
2277     my $checker;
2278     if ($attr_ns eq '') {
2279     $checker = {
2280     alt => sub { },
2281     ## NOTE: |alt| value has no conformance creteria.
2282     shape => $GetHTMLEnumeratedAttrChecker->({
2283     circ => -1, circle => 1,
2284     default => 1,
2285     poly => 1, polygon => -1,
2286     rect => 1, rectangle => -1,
2287     }),
2288     coords => sub {
2289     my ($self, $attr) = @_;
2290     my $value = $attr->value;
2291     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
2292     $coords = [split /,/, $value];
2293     } else {
2294     $self->{onerror}->(node => $attr,
2295     type => 'coords:syntax error');
2296     }
2297     },
2298     target => $HTMLTargetAttrChecker,
2299     href => $HTMLURIAttrChecker,
2300     ping => $HTMLSpaceURIsAttrChecker,
2301 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
2302 wakaba 1.1 media => $HTMLMQAttrChecker,
2303     hreflang => $HTMLLanguageTagAttrChecker,
2304     type => $HTMLIMTAttrChecker,
2305     }->{$attr_ln};
2306     if ($checker) {
2307     $attr{$attr_ln} = $attr;
2308     } else {
2309     $checker = $HTMLAttrChecker->{$attr_ln};
2310     }
2311     }
2312     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2313     || $AttrChecker->{$attr_ns}->{''};
2314     if ($checker) {
2315     $checker->($self, $attr) if ref $checker;
2316     } else {
2317     $self->{onerror}->(node => $attr, level => 'unsupported',
2318     type => 'attribute');
2319     ## ISSUE: No comformance createria for unknown attributes in the spec
2320     }
2321     }
2322    
2323     if (defined $attr{href}) {
2324 wakaba 1.4 $self->{has_hyperlink_element} = 1;
2325 wakaba 1.1 unless (defined $attr{alt}) {
2326     $self->{onerror}->(node => $todo->{node},
2327     type => 'attribute missing:alt');
2328     }
2329     } else {
2330     for (qw/target ping rel media hreflang type alt/) {
2331     if (defined $attr{$_}) {
2332     $self->{onerror}->(node => $attr{$_},
2333     type => 'attribute not allowed');
2334     }
2335     }
2336     }
2337    
2338     my $shape = 'rectangle';
2339     if (defined $attr{shape}) {
2340     $shape = {
2341     circ => 'circle', circle => 'circle',
2342     default => 'default',
2343     poly => 'polygon', polygon => 'polygon',
2344     rect => 'rectangle', rectangle => 'rectangle',
2345     }->{lc $attr{shape}->value} || 'rectangle';
2346     ## TODO: ASCII lowercase?
2347     }
2348    
2349     if ($shape eq 'circle') {
2350     if (defined $attr{coords}) {
2351     if (defined $coords) {
2352     if (@$coords == 3) {
2353     if ($coords->[2] < 0) {
2354     $self->{onerror}->(node => $attr{coords},
2355     type => 'coords:out of range:2');
2356     }
2357     } else {
2358     $self->{onerror}->(node => $attr{coords},
2359     type => 'coords:number:3:'.@$coords);
2360     }
2361     } else {
2362     ## NOTE: A syntax error has been reported.
2363     }
2364     } else {
2365     $self->{onerror}->(node => $todo->{node},
2366     type => 'attribute missing:coords');
2367     }
2368     } elsif ($shape eq 'default') {
2369     if (defined $attr{coords}) {
2370     $self->{onerror}->(node => $attr{coords},
2371     type => 'attribute not allowed');
2372     }
2373     } elsif ($shape eq 'polygon') {
2374     if (defined $attr{coords}) {
2375     if (defined $coords) {
2376     if (@$coords >= 6) {
2377     unless (@$coords % 2 == 0) {
2378     $self->{onerror}->(node => $attr{coords},
2379     type => 'coords:number:even:'.@$coords);
2380     }
2381     } else {
2382     $self->{onerror}->(node => $attr{coords},
2383     type => 'coords:number:>=6:'.@$coords);
2384     }
2385     } else {
2386     ## NOTE: A syntax error has been reported.
2387     }
2388     } else {
2389     $self->{onerror}->(node => $todo->{node},
2390     type => 'attribute missing:coords');
2391     }
2392     } elsif ($shape eq 'rectangle') {
2393     if (defined $attr{coords}) {
2394     if (defined $coords) {
2395     if (@$coords == 4) {
2396     unless ($coords->[0] < $coords->[2]) {
2397     $self->{onerror}->(node => $attr{coords},
2398     type => 'coords:out of range:0');
2399     }
2400     unless ($coords->[1] < $coords->[3]) {
2401     $self->{onerror}->(node => $attr{coords},
2402     type => 'coords:out of range:1');
2403     }
2404     } else {
2405     $self->{onerror}->(node => $attr{coords},
2406     type => 'coords:number:4:'.@$coords);
2407     }
2408     } else {
2409     ## NOTE: A syntax error has been reported.
2410     }
2411     } else {
2412     $self->{onerror}->(node => $todo->{node},
2413     type => 'attribute missing:coords');
2414     }
2415     }
2416     },
2417     checker => $HTMLEmptyChecker,
2418     };
2419     ## TODO: only in map
2420    
2421     $Element->{$HTML_NS}->{table} = {
2422     attrs_checker => $GetHTMLAttrsChecker->({}),
2423     checker => sub {
2424     my ($self, $todo) = @_;
2425     my $el = $todo->{node};
2426     my $new_todos = [];
2427     my @nodes = (@{$el->child_nodes});
2428    
2429     my $phase = 'before caption';
2430     my $has_tfoot;
2431     while (@nodes) {
2432     my $node = shift @nodes;
2433     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2434    
2435     my $nt = $node->node_type;
2436     if ($nt == 1) {
2437     my $node_ns = $node->namespace_uri;
2438     $node_ns = '' unless defined $node_ns;
2439     my $node_ln = $node->manakai_local_name;
2440     ## NOTE: |minuses| list is not checked since redundant
2441     if ($phase eq 'in tbodys') {
2442     if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2443     #$phase = 'in tbodys';
2444     } elsif (not $has_tfoot and
2445     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2446     $phase = 'after tfoot';
2447     $has_tfoot = 1;
2448     } else {
2449     $self->{onerror}->(node => $node, type => 'element not allowed');
2450     }
2451     } elsif ($phase eq 'in trs') {
2452     if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2453     #$phase = 'in trs';
2454     } elsif (not $has_tfoot and
2455     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2456     $phase = 'after tfoot';
2457     $has_tfoot = 1;
2458     } else {
2459     $self->{onerror}->(node => $node, type => 'element not allowed');
2460     }
2461     } elsif ($phase eq 'after thead') {
2462     if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2463     $phase = 'in tbodys';
2464     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2465     $phase = 'in trs';
2466     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2467     $phase = 'in tbodys';
2468     $has_tfoot = 1;
2469     } else {
2470     $self->{onerror}->(node => $node, type => 'element not allowed');
2471     }
2472     } elsif ($phase eq 'in colgroup') {
2473     if ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2474     $phase = 'in colgroup';
2475     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2476     $phase = 'after thead';
2477     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2478     $phase = 'in tbodys';
2479     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2480     $phase = 'in trs';
2481     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2482     $phase = 'in tbodys';
2483     $has_tfoot = 1;
2484     } else {
2485     $self->{onerror}->(node => $node, type => 'element not allowed');
2486     }
2487     } elsif ($phase eq 'before caption') {
2488     if ($node_ns eq $HTML_NS and $node_ln eq 'caption') {
2489     $phase = 'in colgroup';
2490     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2491     $phase = 'in colgroup';
2492     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2493     $phase = 'after thead';
2494     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2495     $phase = 'in tbodys';
2496     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2497     $phase = 'in trs';
2498     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2499     $phase = 'in tbodys';
2500     $has_tfoot = 1;
2501     } else {
2502     $self->{onerror}->(node => $node, type => 'element not allowed');
2503     }
2504     } else { # after tfoot
2505     $self->{onerror}->(node => $node, type => 'element not allowed');
2506     }
2507     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2508     unshift @nodes, @$sib;
2509     push @$new_todos, @$ch;
2510     } elsif ($nt == 3 or $nt == 4) {
2511     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2512     $self->{onerror}->(node => $node, type => 'character not allowed');
2513     }
2514     } elsif ($nt == 5) {
2515     unshift @nodes, @{$node->child_nodes};
2516     }
2517     }
2518    
2519     ## Table model errors
2520     require Whatpm::HTMLTable;
2521     Whatpm::HTMLTable->form_table ($todo->{node}, sub {
2522     my %opt = @_;
2523     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
2524     });
2525     push @{$self->{return}->{table}}, $todo->{node};
2526    
2527     return ($new_todos);
2528     },
2529     };
2530    
2531     $Element->{$HTML_NS}->{caption} = {
2532     attrs_checker => $GetHTMLAttrsChecker->({}),
2533     checker => $HTMLSignificantStrictlyInlineChecker,
2534     };
2535    
2536     $Element->{$HTML_NS}->{colgroup} = {
2537     attrs_checker => $GetHTMLAttrsChecker->({
2538     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2539     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
2540     ## TODO: "attribute not supported" if |col|.
2541     ## ISSUE: MUST NOT if any |col|?
2542     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
2543     }),
2544     checker => sub {
2545     my ($self, $todo) = @_;
2546     my $el = $todo->{node};
2547     my $new_todos = [];
2548     my @nodes = (@{$el->child_nodes});
2549    
2550     while (@nodes) {
2551     my $node = shift @nodes;
2552     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2553    
2554     my $nt = $node->node_type;
2555     if ($nt == 1) {
2556     my $node_ns = $node->namespace_uri;
2557     $node_ns = '' unless defined $node_ns;
2558     my $node_ln = $node->manakai_local_name;
2559     ## NOTE: |minuses| list is not checked since redundant
2560     unless ($node_ns eq $HTML_NS and $node_ln eq 'col') {
2561     $self->{onerror}->(node => $node, type => 'element not allowed');
2562     }
2563     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2564     unshift @nodes, @$sib;
2565     push @$new_todos, @$ch;
2566     } elsif ($nt == 3 or $nt == 4) {
2567     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2568     $self->{onerror}->(node => $node, type => 'character not allowed');
2569     }
2570     } elsif ($nt == 5) {
2571     unshift @nodes, @{$node->child_nodes};
2572     }
2573     }
2574     return ($new_todos);
2575     },
2576     };
2577    
2578     $Element->{$HTML_NS}->{col} = {
2579     attrs_checker => $GetHTMLAttrsChecker->({
2580     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2581     }),
2582     checker => $HTMLEmptyChecker,
2583     };
2584    
2585     $Element->{$HTML_NS}->{tbody} = {
2586     attrs_checker => $GetHTMLAttrsChecker->({}),
2587     checker => sub {
2588     my ($self, $todo) = @_;
2589     my $el = $todo->{node};
2590     my $new_todos = [];
2591     my @nodes = (@{$el->child_nodes});
2592    
2593     my $has_tr;
2594     while (@nodes) {
2595     my $node = shift @nodes;
2596     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2597    
2598     my $nt = $node->node_type;
2599     if ($nt == 1) {
2600     my $node_ns = $node->namespace_uri;
2601     $node_ns = '' unless defined $node_ns;
2602     my $node_ln = $node->manakai_local_name;
2603     ## NOTE: |minuses| list is not checked since redundant
2604     if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2605     $has_tr = 1;
2606     } else {
2607     $self->{onerror}->(node => $node, type => 'element not allowed');
2608     }
2609     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2610     unshift @nodes, @$sib;
2611     push @$new_todos, @$ch;
2612     } elsif ($nt == 3 or $nt == 4) {
2613     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2614     $self->{onerror}->(node => $node, type => 'character not allowed');
2615     }
2616     } elsif ($nt == 5) {
2617     unshift @nodes, @{$node->child_nodes};
2618     }
2619     }
2620     unless ($has_tr) {
2621     $self->{onerror}->(node => $el, type => 'child element missing:tr');
2622     }
2623     return ($new_todos);
2624     },
2625     };
2626    
2627     $Element->{$HTML_NS}->{thead} = {
2628     attrs_checker => $GetHTMLAttrsChecker->({}),
2629     checker => $Element->{$HTML_NS}->{tbody}->{checker},
2630     };
2631    
2632     $Element->{$HTML_NS}->{tfoot} = {
2633     attrs_checker => $GetHTMLAttrsChecker->({}),
2634     checker => $Element->{$HTML_NS}->{tbody}->{checker},
2635     };
2636    
2637     $Element->{$HTML_NS}->{tr} = {
2638     attrs_checker => $GetHTMLAttrsChecker->({}),
2639     checker => sub {
2640     my ($self, $todo) = @_;
2641     my $el = $todo->{node};
2642     my $new_todos = [];
2643     my @nodes = (@{$el->child_nodes});
2644    
2645     my $has_td;
2646     while (@nodes) {
2647     my $node = shift @nodes;
2648     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2649    
2650     my $nt = $node->node_type;
2651     if ($nt == 1) {
2652     my $node_ns = $node->namespace_uri;
2653     $node_ns = '' unless defined $node_ns;
2654     my $node_ln = $node->manakai_local_name;
2655     ## NOTE: |minuses| list is not checked since redundant
2656     if ($node_ns eq $HTML_NS and ($node_ln eq 'td' or $node_ln eq 'th')) {
2657     $has_td = 1;
2658     } else {
2659     $self->{onerror}->(node => $node, type => 'element not allowed');
2660     }
2661     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2662     unshift @nodes, @$sib;
2663     push @$new_todos, @$ch;
2664     } elsif ($nt == 3 or $nt == 4) {
2665     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2666     $self->{onerror}->(node => $node, type => 'character not allowed');
2667     }
2668     } elsif ($nt == 5) {
2669     unshift @nodes, @{$node->child_nodes};
2670     }
2671     }
2672     unless ($has_td) {
2673     $self->{onerror}->(node => $el, type => 'child element missing:td|th');
2674     }
2675     return ($new_todos);
2676     },
2677     };
2678    
2679     $Element->{$HTML_NS}->{td} = {
2680     attrs_checker => $GetHTMLAttrsChecker->({
2681     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2682     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2683     }),
2684     checker => $HTMLBlockOrInlineChecker,
2685     };
2686    
2687     $Element->{$HTML_NS}->{th} = {
2688     attrs_checker => $GetHTMLAttrsChecker->({
2689     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2690     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2691     scope => $GetHTMLEnumeratedAttrChecker
2692     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
2693     }),
2694     checker => $HTMLBlockOrInlineChecker,
2695     };
2696    
2697     ## TODO: forms
2698    
2699     $Element->{$HTML_NS}->{script} = {
2700     attrs_checker => sub {
2701     my ($self, $todo) = @_;
2702     $GetHTMLAttrsChecker->({
2703     src => $HTMLURIAttrChecker,
2704     defer => $GetHTMLBooleanAttrChecker->('defer'),
2705     async => $GetHTMLBooleanAttrChecker->('async'),
2706     type => $HTMLIMTAttrChecker,
2707     })->($self, $todo);
2708     if ($todo->{node}->has_attribute_ns (undef, 'defer')) {
2709     my $async_attr = $todo->{node}->get_attribute_node_ns (undef, 'async');
2710     if ($async_attr) {
2711     $self->{onerror}->(node => $async_attr,
2712     type => 'attribute not allowed'); # MUST NOT
2713     }
2714     }
2715     },
2716     checker => sub {
2717     my ($self, $todo) = @_;
2718    
2719     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2720     return $HTMLEmptyChecker->($self, $todo);
2721     } else {
2722     ## NOTE: No content model conformance in HTML5 spec.
2723     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
2724     my $language = $todo->{node}->get_attribute_ns (undef, 'language');
2725     if ((defined $type and $type eq '') or
2726     (defined $language and $language eq '')) {
2727     $type = 'text/javascript';
2728     } elsif (defined $type) {
2729     #
2730     } elsif (defined $language) {
2731     $type = 'text/' . $language;
2732     } else {
2733     $type = 'text/javascript';
2734     }
2735     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
2736     type => 'script:'.$type); ## TODO: $type normalization
2737     return $AnyChecker->($self, $todo);
2738     }
2739     },
2740     };
2741    
2742     ## NOTE: When script is disabled.
2743     $Element->{$HTML_NS}->{noscript} = {
2744 wakaba 1.3 attrs_checker => sub {
2745     my ($self, $todo) = @_;
2746    
2747     ## NOTE: This check is inserted in |attrs_checker|, rather than |checker|,
2748     ## since the later is not invoked when the |noscript| is used as a
2749     ## transparent element.
2750     unless ($todo->{node}->owner_document->manakai_is_html) {
2751     $self->{onerror}->(node => $todo->{node}, type => 'in XML:noscript');
2752     }
2753    
2754     $GetHTMLAttrsChecker->({})->($self, $todo);
2755     },
2756 wakaba 1.1 checker => sub {
2757     my ($self, $todo) = @_;
2758    
2759 wakaba 1.3 if ($todo->{flag}->{in_head}) {
2760     my $new_todos = [];
2761     my @nodes = (@{$todo->{node}->child_nodes});
2762    
2763     while (@nodes) {
2764     my $node = shift @nodes;
2765     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2766    
2767     my $nt = $node->node_type;
2768     if ($nt == 1) {
2769     my $node_ns = $node->namespace_uri;
2770     $node_ns = '' unless defined $node_ns;
2771     my $node_ln = $node->manakai_local_name;
2772     if ($node_ns eq $HTML_NS) {
2773     if ({link => 1, style => 1}->{$node_ln}) {
2774     #
2775     } elsif ($node_ln eq 'meta') {
2776 wakaba 1.5 if ($node->has_attribute_ns (undef, 'name')) {
2777     #
2778 wakaba 1.3 } else {
2779 wakaba 1.5 $self->{onerror}->(node => $node,
2780     type => 'element not allowed');
2781 wakaba 1.3 }
2782     } else {
2783     $self->{onerror}->(node => $node, type => 'element not allowed');
2784     }
2785     } else {
2786     $self->{onerror}->(node => $node, type => 'element not allowed');
2787     }
2788    
2789     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2790     unshift @nodes, @$sib;
2791     push @$new_todos, @$ch;
2792     } elsif ($nt == 3 or $nt == 4) {
2793     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2794     $self->{onerror}->(node => $node, type => 'character not allowed');
2795     }
2796     } elsif ($nt == 5) {
2797     unshift @nodes, @{$node->child_nodes};
2798     }
2799     }
2800     return ($new_todos);
2801     } else {
2802     my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}});
2803     my ($sib, $ch) = $HTMLBlockOrInlineChecker->($self, $todo);
2804     push @$sib, $end;
2805     return ($sib, $ch);
2806     }
2807 wakaba 1.1 },
2808     };
2809 wakaba 1.3
2810     ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
2811 wakaba 1.1
2812     $Element->{$HTML_NS}->{'event-source'} = {
2813     attrs_checker => $GetHTMLAttrsChecker->({
2814     src => $HTMLURIAttrChecker,
2815     }),
2816     checker => $HTMLEmptyChecker,
2817     };
2818    
2819     $Element->{$HTML_NS}->{details} = {
2820     attrs_checker => $GetHTMLAttrsChecker->({
2821     open => $GetHTMLBooleanAttrChecker->('open'),
2822     }),
2823     checker => sub {
2824     my ($self, $todo) = @_;
2825    
2826     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2827     my ($sib, $ch)
2828     = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
2829     ->($self, $todo);
2830     push @$sib, $end;
2831     return ($sib, $ch);
2832     },
2833     };
2834    
2835     $Element->{$HTML_NS}->{datagrid} = {
2836     attrs_checker => $GetHTMLAttrsChecker->({
2837     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2838     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
2839     }),
2840     checker => sub {
2841     my ($self, $todo) = @_;
2842     my $el = $todo->{node};
2843     my $new_todos = [];
2844     my @nodes = (@{$el->child_nodes});
2845    
2846     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2847    
2848     ## Block-table Block* | table | select | datalist | Empty
2849     my $mode = 'any';
2850     while (@nodes) {
2851     my $node = shift @nodes;
2852     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2853    
2854     my $nt = $node->node_type;
2855     if ($nt == 1) {
2856     my $node_ns = $node->namespace_uri;
2857     $node_ns = '' unless defined $node_ns;
2858     my $node_ln = $node->manakai_local_name;
2859     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
2860     if ($mode eq 'block') {
2861     $not_allowed = 1
2862     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
2863     } elsif ($mode eq 'any') {
2864     if ($node_ns eq $HTML_NS and
2865     {table => 1, select => 1, datalist => 1}->{$node_ln}) {
2866     $mode = 'none';
2867     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
2868     $mode = 'block';
2869     } else {
2870     $not_allowed = 1;
2871     }
2872     } else {
2873     $not_allowed = 1;
2874     }
2875     $self->{onerror}->(node => $node, type => 'element not allowed')
2876     if $not_allowed;
2877     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2878     unshift @nodes, @$sib;
2879     push @$new_todos, @$ch;
2880     } elsif ($nt == 3 or $nt == 4) {
2881     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2882     $self->{onerror}->(node => $node, type => 'character not allowed');
2883     }
2884     } elsif ($nt == 5) {
2885     unshift @nodes, @{$node->child_nodes};
2886     }
2887     }
2888    
2889     push @$new_todos, $end;
2890     return ($new_todos);
2891     },
2892     };
2893    
2894     $Element->{$HTML_NS}->{command} = {
2895     attrs_checker => $GetHTMLAttrsChecker->({
2896     checked => $GetHTMLBooleanAttrChecker->('checked'),
2897     default => $GetHTMLBooleanAttrChecker->('default'),
2898     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2899     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
2900     icon => $HTMLURIAttrChecker,
2901     label => sub { }, ## NOTE: No conformance creteria
2902     radiogroup => sub { }, ## NOTE: No conformance creteria
2903     ## NOTE: |title| has special semantics, but no syntactical difference
2904     type => sub {
2905     my ($self, $attr) = @_;
2906     my $value = $attr->value;
2907     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
2908     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
2909     }
2910     },
2911     }),
2912     checker => $HTMLEmptyChecker,
2913     };
2914    
2915     $Element->{$HTML_NS}->{menu} = {
2916     attrs_checker => $GetHTMLAttrsChecker->({
2917     autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
2918     id => sub {
2919     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
2920     my ($self, $attr) = @_;
2921     my $value = $attr->value;
2922     if (length $value > 0) {
2923     if ($self->{id}->{$value}) {
2924     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2925     push @{$self->{id}->{$value}}, $attr;
2926     } else {
2927     $self->{id}->{$value} = [$attr];
2928     }
2929     } else {
2930     ## NOTE: MUST contain at least one character
2931     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2932     }
2933     if ($value =~ /[\x09-\x0D\x20]/) {
2934     $self->{onerror}->(node => $attr, type => 'space in ID');
2935     }
2936     $self->{menu}->{$value} ||= $attr;
2937     ## ISSUE: <menu id=""><p contextmenu=""> match?
2938     },
2939     label => sub { }, ## NOTE: No conformance creteria
2940     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
2941     }),
2942     checker => sub {
2943     my ($self, $todo) = @_;
2944     my $el = $todo->{node};
2945     my $new_todos = [];
2946     my @nodes = (@{$el->child_nodes});
2947    
2948     my $content = 'li or inline';
2949     while (@nodes) {
2950     my $node = shift @nodes;
2951     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2952    
2953     my $nt = $node->node_type;
2954     if ($nt == 1) {
2955     my $node_ns = $node->namespace_uri;
2956     $node_ns = '' unless defined $node_ns;
2957     my $node_ln = $node->manakai_local_name;
2958     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
2959     if ($node_ns eq $HTML_NS and $node_ln eq 'li') {
2960     if ($content eq 'inline') {
2961     $not_allowed = 1;
2962     } elsif ($content eq 'li or inline') {
2963     $content = 'li';
2964     }
2965     } else {
2966     if ($HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
2967     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln}) {
2968     $content = 'inline';
2969     } else {
2970     $not_allowed = 1;
2971     }
2972     }
2973     $self->{onerror}->(node => $node, type => 'element not allowed')
2974     if $not_allowed;
2975     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2976     unshift @nodes, @$sib;
2977     push @$new_todos, @$ch;
2978     } elsif ($nt == 3 or $nt == 4) {
2979     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2980     if ($content eq 'li') {
2981     $self->{onerror}->(node => $node, type => 'character not allowed');
2982     } elsif ($content eq 'li or inline') {
2983     $content = 'inline';
2984     }
2985     }
2986     } elsif ($nt == 5) {
2987     unshift @nodes, @{$node->child_nodes};
2988     }
2989     }
2990    
2991     for (@$new_todos) {
2992     $_->{inline} = 1;
2993     }
2994     return ($new_todos);
2995     },
2996     };
2997    
2998     $Element->{$HTML_NS}->{legend} = {
2999     attrs_checker => $GetHTMLAttrsChecker->({}),
3000     checker => sub {
3001     my ($self, $todo) = @_;
3002    
3003     my $parent = $todo->{node}->manakai_parent_element;
3004     if (defined $parent) {
3005     my $nsuri = $parent->namespace_uri;
3006     $nsuri = '' unless defined $nsuri;
3007     my $ln = $parent->manakai_local_name;
3008     if ($nsuri eq $HTML_NS and $ln eq 'figure') {
3009     return $HTMLInlineChecker->($self, $todo);
3010     } else {
3011     return $HTMLSignificantStrictlyInlineChecker->($self, $todo);
3012     }
3013     } else {
3014     return $HTMLInlineChecker->($self, $todo);
3015     }
3016    
3017     ## ISSUE: Content model is defined only for fieldset/legend,
3018     ## details/legend, and figure/legend.
3019     },
3020     };
3021    
3022     $Element->{$HTML_NS}->{div} = {
3023     attrs_checker => $GetHTMLAttrsChecker->({}),
3024     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
3025     };
3026    
3027     $Element->{$HTML_NS}->{font} = {
3028     attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
3029     checker => $HTMLTransparentChecker,
3030     };
3031    
3032     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
3033    
3034     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24