/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (hide annotations) (download)
Fri Aug 17 11:53:52 2007 UTC (17 years, 11 months ago) by wakaba
Branch: MAIN
Changes since 1.3: +86 -34 lines
++ whatpm/t/ChangeLog	17 Aug 2007 07:08:23 -0000
	* content-model-2.dat: New tests for |base|
	following URI or hyperlink are added.

2007-08-17  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	17 Aug 2007 07:44:01 -0000
	* CSS/: New directory.

2007-08-17  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/CSS/ChangeLog	17 Aug 2007 11:53:38 -0000
2007-08-17  Wakaba  <wakaba@suika.fam.cx>

	* Tokenizer.pm: New module.

	* ChangeLog: New file.

++ whatpm/Whatpm/ContentChecker/ChangeLog	17 Aug 2007 07:08:56 -0000
	* HTML.pm: Raise new errors if |base| is following
	URI attributes or hyperlink attributes.

2007-08-17  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7     my $HTMLMetadataElements = {
8     $HTML_NS => {
9     qw/link 1 meta 1 style 1 script 1 event-source 1 command 1 base 1 title 1
10     noscript 1
11     /,
12     },
13     };
14    
15     my $HTMLSectioningElements = {
16     $HTML_NS => {qw/body 1 section 1 nav 1 article 1 blockquote 1 aside 1/},
17     };
18    
19     my $HTMLBlockLevelElements = {
20     $HTML_NS => {
21     qw/
22     section 1 nav 1 article 1 blockquote 1 aside 1
23     h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1
24     address 1 p 1 hr 1 dialog 1 pre 1 ol 1 ul 1 dl 1
25     ins 1 del 1 figure 1 map 1 table 1 script 1 noscript 1
26     event-source 1 details 1 datagrid 1 menu 1 div 1 font 1
27     /,
28     },
29     };
30    
31     my $HTMLStrictlyInlineLevelElements = {
32     $HTML_NS => {
33     qw/
34     br 1 a 1 q 1 cite 1 em 1 strong 1 small 1 m 1 dfn 1 abbr 1
35     time 1 meter 1 progress 1 code 1 var 1 samp 1 kbd 1
36     sub 1 sup 1 span 1 i 1 b 1 bdo 1 ins 1 del 1 img 1
37     iframe 1 embed 1 object 1 video 1 audio 1 canvas 1 area 1
38     script 1 noscript 1 event-source 1 command 1 font 1
39     /,
40     },
41     };
42    
43     my $HTMLStructuredInlineLevelElements = {
44     $HTML_NS => {qw/blockquote 1 pre 1 ol 1 ul 1 dl 1 table 1 menu 1/},
45     };
46    
47     my $HTMLInteractiveElements = {
48     $HTML_NS => {a => 1, details => 1, datagrid => 1},
49     };
50     ## NOTE: |html:a| and |html:datagrid| are not allowed as a descendant
51     ## of interactive elements
52    
53     # my $HTMLTransparentElements : in |Whatpm/ContentChecker.pm|.
54    
55     #my $HTMLSemiTransparentElements = {
56     # $HTML_NS => {qw/video 1 audio 1/},
57     #};
58    
59     my $HTMLEmbededElements = {
60     $HTML_NS => {qw/img 1 iframe 1 embed 1 object 1 video 1 audio 1 canvas 1/},
61     };
62    
63     ## Empty
64     my $HTMLEmptyChecker = sub {
65     my ($self, $todo) = @_;
66     my $el = $todo->{node};
67     my $new_todos = [];
68     my @nodes = (@{$el->child_nodes});
69    
70     while (@nodes) {
71     my $node = shift @nodes;
72     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
73    
74     my $nt = $node->node_type;
75     if ($nt == 1) {
76     ## NOTE: |minuses| list is not checked since redundant
77     $self->{onerror}->(node => $node, type => 'element not allowed');
78     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
79     unshift @nodes, @$sib;
80     push @$new_todos, @$ch;
81     } elsif ($nt == 3 or $nt == 4) {
82     if ($node->data =~ /[^\x09-\x0D\x20]/) {
83     $self->{onerror}->(node => $node, type => 'character not allowed');
84     }
85     } elsif ($nt == 5) {
86     unshift @nodes, @{$node->child_nodes};
87     }
88     }
89     return ($new_todos);
90     };
91    
92     ## Text
93     my $HTMLTextChecker = sub {
94     my ($self, $todo) = @_;
95     my $el = $todo->{node};
96     my $new_todos = [];
97     my @nodes = (@{$el->child_nodes});
98    
99     while (@nodes) {
100     my $node = shift @nodes;
101     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
102    
103     my $nt = $node->node_type;
104     if ($nt == 1) {
105     ## NOTE: |minuses| list is not checked since redundant
106     $self->{onerror}->(node => $node, type => 'element not allowed');
107     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
108     unshift @nodes, @$sib;
109     push @$new_todos, @$ch;
110     } elsif ($nt == 5) {
111     unshift @nodes, @{$node->child_nodes};
112     }
113     }
114     return ($new_todos);
115     };
116    
117     ## Zero or more |html:style| elements,
118     ## followed by zero or more block-level elements
119     my $HTMLStylableBlockChecker = sub {
120     my ($self, $todo) = @_;
121     my $el = $todo->{node};
122     my $new_todos = [];
123     my @nodes = (@{$el->child_nodes});
124    
125     my $has_non_style;
126     while (@nodes) {
127     my $node = shift @nodes;
128     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
129    
130     my $nt = $node->node_type;
131     if ($nt == 1) {
132     my $node_ns = $node->namespace_uri;
133     $node_ns = '' unless defined $node_ns;
134     my $node_ln = $node->manakai_local_name;
135     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
136     if ($node_ns eq $HTML_NS and $node_ln eq 'style') {
137     $not_allowed = 1 if $has_non_style or
138     not $node->has_attribute_ns (undef, 'scoped');
139     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
140     $has_non_style = 1;
141     } else {
142     $has_non_style = 1;
143     $not_allowed = 1;
144     }
145     $self->{onerror}->(node => $node, type => 'element not allowed')
146     if $not_allowed;
147     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
148     unshift @nodes, @$sib;
149     push @$new_todos, @$ch;
150     } elsif ($nt == 3 or $nt == 4) {
151     if ($node->data =~ /[^\x09-\x0D\x20]/) {
152     $self->{onerror}->(node => $node, type => 'character not allowed');
153     }
154     } elsif ($nt == 5) {
155     unshift @nodes, @{$node->child_nodes};
156     }
157     }
158     return ($new_todos);
159     }; # $HTMLStylableBlockChecker
160    
161     ## Zero or more block-level elements
162     my $HTMLBlockChecker = sub {
163     my ($self, $todo) = @_;
164     my $el = $todo->{node};
165     my $new_todos = [];
166     my @nodes = (@{$el->child_nodes});
167    
168     while (@nodes) {
169     my $node = shift @nodes;
170     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
171    
172     my $nt = $node->node_type;
173     if ($nt == 1) {
174     my $node_ns = $node->namespace_uri;
175     $node_ns = '' unless defined $node_ns;
176     my $node_ln = $node->manakai_local_name;
177     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
178     $not_allowed = 1
179     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
180     $self->{onerror}->(node => $node, type => 'element not allowed')
181     if $not_allowed;
182     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
183     unshift @nodes, @$sib;
184     push @$new_todos, @$ch;
185     } elsif ($nt == 3 or $nt == 4) {
186     if ($node->data =~ /[^\x09-\x0D\x20]/) {
187     $self->{onerror}->(node => $node, type => 'character not allowed');
188     }
189     } elsif ($nt == 5) {
190     unshift @nodes, @{$node->child_nodes};
191     }
192     }
193     return ($new_todos);
194     }; # $HTMLBlockChecker
195    
196     ## Inline-level content
197     my $HTMLInlineChecker = sub {
198     my ($self, $todo) = @_;
199     my $el = $todo->{node};
200     my $new_todos = [];
201     my @nodes = (@{$el->child_nodes});
202    
203     while (@nodes) {
204     my $node = shift @nodes;
205     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
206    
207     my $nt = $node->node_type;
208     if ($nt == 1) {
209     my $node_ns = $node->namespace_uri;
210     $node_ns = '' unless defined $node_ns;
211     my $node_ln = $node->manakai_local_name;
212     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
213     $not_allowed = 1
214     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
215     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
216     $self->{onerror}->(node => $node, type => 'element not allowed')
217     if $not_allowed;
218     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
219     unshift @nodes, @$sib;
220     push @$new_todos, @$ch;
221     } elsif ($nt == 5) {
222     unshift @nodes, @{$node->child_nodes};
223     }
224     }
225    
226     for (@$new_todos) {
227     $_->{inline} = 1;
228     }
229     return ($new_todos);
230     }; # $HTMLInlineChecker
231    
232     my $HTMLSignificantInlineChecker = $HTMLInlineChecker;
233     ## TODO: check significant content
234    
235     ## Strictly inline-level content
236     my $HTMLStrictlyInlineChecker = sub {
237     my ($self, $todo) = @_;
238     my $el = $todo->{node};
239     my $new_todos = [];
240     my @nodes = (@{$el->child_nodes});
241    
242     while (@nodes) {
243     my $node = shift @nodes;
244     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
245    
246     my $nt = $node->node_type;
247     if ($nt == 1) {
248     my $node_ns = $node->namespace_uri;
249     $node_ns = '' unless defined $node_ns;
250     my $node_ln = $node->manakai_local_name;
251     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
252     $not_allowed = 1
253     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln};
254     $self->{onerror}->(node => $node, type => 'element not allowed')
255     if $not_allowed;
256     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
257     unshift @nodes, @$sib;
258     push @$new_todos, @$ch;
259     } elsif ($nt == 5) {
260     unshift @nodes, @{$node->child_nodes};
261     }
262     }
263    
264     for (@$new_todos) {
265     $_->{inline} = 1;
266     $_->{strictly_inline} = 1;
267     }
268     return ($new_todos);
269     }; # $HTMLStrictlyInlineChecker
270    
271     my $HTMLSignificantStrictlyInlineChecker = $HTMLStrictlyInlineChecker;
272     ## TODO: check significant content
273    
274     ## Inline-level or strictly inline-kevek content
275     my $HTMLInlineOrStrictlyInlineChecker = sub {
276     my ($self, $todo) = @_;
277     my $el = $todo->{node};
278     my $new_todos = [];
279     my @nodes = (@{$el->child_nodes});
280    
281     while (@nodes) {
282     my $node = shift @nodes;
283     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
284    
285     my $nt = $node->node_type;
286     if ($nt == 1) {
287     my $node_ns = $node->namespace_uri;
288     $node_ns = '' unless defined $node_ns;
289     my $node_ln = $node->manakai_local_name;
290     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
291     if ($todo->{strictly_inline}) {
292     $not_allowed = 1
293     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln};
294     } else {
295     $not_allowed = 1
296     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
297     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
298     }
299     $self->{onerror}->(node => $node, type => 'element not allowed')
300     if $not_allowed;
301     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
302     unshift @nodes, @$sib;
303     push @$new_todos, @$ch;
304     } elsif ($nt == 5) {
305     unshift @nodes, @{$node->child_nodes};
306     }
307     }
308    
309     for (@$new_todos) {
310     $_->{inline} = 1;
311     $_->{strictly_inline} = 1;
312     }
313     return ($new_todos);
314     }; # $HTMLInlineOrStrictlyInlineChecker
315    
316     my $HTMLSignificantInlineOrStrictlyInlineChecker
317     = $HTMLInlineOrStrictlyInlineChecker;
318     ## TODO: check significant content
319    
320     my $HTMLBlockOrInlineChecker = sub {
321     my ($self, $todo) = @_;
322     my $el = $todo->{node};
323     my $new_todos = [];
324     my @nodes = (@{$el->child_nodes});
325    
326     my $content = 'block-or-inline'; # or 'block' or 'inline'
327     my @block_not_inline;
328     while (@nodes) {
329     my $node = shift @nodes;
330     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
331    
332     my $nt = $node->node_type;
333     if ($nt == 1) {
334     my $node_ns = $node->namespace_uri;
335     $node_ns = '' unless defined $node_ns;
336     my $node_ln = $node->manakai_local_name;
337     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
338     if ($content eq 'block') {
339     $not_allowed = 1
340     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
341     } elsif ($content eq 'inline') {
342     $not_allowed = 1
343     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
344     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
345     } else {
346     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
347     my $is_inline
348     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
349     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
350    
351     push @block_not_inline, $node
352     if $is_block and not $is_inline and not $not_allowed;
353     unless ($is_block) {
354     $content = 'inline';
355     for (@block_not_inline) {
356     $self->{onerror}->(node => $_, type => 'element not allowed');
357     }
358     $not_allowed = 1 unless $is_inline;
359     }
360     }
361     $self->{onerror}->(node => $node, type => 'element not allowed')
362     if $not_allowed;
363     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
364     unshift @nodes, @$sib;
365     push @$new_todos, @$ch;
366     } elsif ($nt == 3 or $nt == 4) {
367     if ($node->data =~ /[^\x09-\x0D\x20]/) {
368     if ($content eq 'block') {
369     $self->{onerror}->(node => $node, type => 'character not allowed');
370     } else {
371     $content = 'inline';
372     for (@block_not_inline) {
373     $self->{onerror}->(node => $_, type => 'element not allowed');
374     }
375     }
376     }
377     } elsif ($nt == 5) {
378     unshift @nodes, @{$node->child_nodes};
379     }
380     }
381    
382     if ($content eq 'inline') {
383     for (@$new_todos) {
384     $_->{inline} = 1;
385     }
386     }
387     return ($new_todos);
388     };
389    
390     ## Zero or more XXX element, then either block-level or inline-level
391     my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) {
392     my ($elnsuri, $ellname) = @_;
393     return sub {
394     my ($self, $todo) = @_;
395     my $el = $todo->{node};
396     my $new_todos = [];
397     my @nodes = (@{$el->child_nodes});
398    
399     my $has_non_style;
400     my $content = 'block-or-inline'; # or 'block' or 'inline'
401     my @block_not_inline;
402     while (@nodes) {
403     my $node = shift @nodes;
404     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
405    
406     my $nt = $node->node_type;
407     if ($nt == 1) {
408     my $node_ns = $node->namespace_uri;
409     $node_ns = '' unless defined $node_ns;
410     my $node_ln = $node->manakai_local_name;
411     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
412     if ($node_ns eq $elnsuri and $node_ln eq $ellname) {
413     $not_allowed = 1 if $has_non_style;
414     if ($ellname eq 'style' and
415     not $node->has_attribute_ns (undef, 'scoped')) {
416     $not_allowed = 1;
417     }
418     } elsif ($content eq 'block') {
419     $has_non_style = 1;
420     $not_allowed = 1
421     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
422     } elsif ($content eq 'inline') {
423     $has_non_style = 1;
424     $not_allowed = 1
425     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
426     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
427     } else {
428     $has_non_style = 1;
429     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
430     my $is_inline
431     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
432     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
433    
434     push @block_not_inline, $node
435     if $is_block and not $is_inline and not $not_allowed;
436     unless ($is_block) {
437     $content = 'inline';
438     for (@block_not_inline) {
439     $self->{onerror}->(node => $_, type => 'element not allowed');
440     }
441     $not_allowed = 1 unless $is_inline;
442     }
443     }
444     $self->{onerror}->(node => $node, type => 'element not allowed')
445     if $not_allowed;
446     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
447     unshift @nodes, @$sib;
448     push @$new_todos, @$ch;
449     } elsif ($nt == 3 or $nt == 4) {
450     if ($node->data =~ /[^\x09-\x0D\x20]/) {
451     $has_non_style = 1;
452     if ($content eq 'block') {
453     $self->{onerror}->(node => $node, type => 'character not allowed');
454     } else {
455     $content = 'inline';
456     for (@block_not_inline) {
457     $self->{onerror}->(node => $_, type => 'element not allowed');
458     }
459     }
460     }
461     } elsif ($nt == 5) {
462     unshift @nodes, @{$node->child_nodes};
463     }
464     }
465    
466     if ($content eq 'inline') {
467     for (@$new_todos) {
468     $_->{inline} = 1;
469     }
470     }
471     return ($new_todos);
472     };
473     }; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker
474    
475     my $HTMLTransparentChecker = $HTMLBlockOrInlineChecker;
476    
477     our $AttrChecker;
478    
479     my $GetHTMLEnumeratedAttrChecker = sub {
480     my $states = shift; # {value => conforming ? 1 : -1}
481     return sub {
482     my ($self, $attr) = @_;
483     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
484     if ($states->{$value} > 0) {
485     #
486     } elsif ($states->{$value}) {
487     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
488     } else {
489     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
490     }
491     };
492     }; # $GetHTMLEnumeratedAttrChecker
493    
494     my $GetHTMLBooleanAttrChecker = sub {
495     my $local_name = shift;
496     return sub {
497     my ($self, $attr) = @_;
498     my $value = $attr->value;
499     unless ($value eq $local_name or $value eq '') {
500     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
501     }
502     };
503     }; # $GetHTMLBooleanAttrChecker
504    
505     ## |rel| attribute (unordered set of space separated tokens,
506     ## whose allowed values are defined by the section on link types)
507     my $HTMLLinkTypesAttrChecker = sub {
508 wakaba 1.4 my ($a_or_area, $todo, $self, $attr) = @_;
509 wakaba 1.1 my %word;
510     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
511     unless ($word{$word}) {
512     $word{$word} = 1;
513     } else {
514     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
515     }
516     }
517     ## NOTE: Case sensitive match (since HTML5 spec does not say link
518     ## types are case-insensitive and it says "The value should not
519     ## be confusingly similar to any other defined value (e.g.
520     ## differing only in case).").
521     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
522     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
523     ## values to be used conformingly.
524     require Whatpm::_LinkTypeList;
525     our $LinkType;
526     for my $word (keys %word) {
527     my $def = $LinkType->{$word};
528     if (defined $def) {
529     if ($def->{status} eq 'accepted') {
530     if (defined $def->{effect}->[$a_or_area]) {
531     #
532     } else {
533     $self->{onerror}->(node => $attr,
534     type => 'link type:bad context:'.$word);
535     }
536     } elsif ($def->{status} eq 'proposal') {
537     $self->{onerror}->(node => $attr, level => 's',
538     type => 'link type:proposed:'.$word);
539     } else { # rejected or synonym
540     $self->{onerror}->(node => $attr,
541     type => 'link type:non-conforming:'.$word);
542     }
543 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
544     if ($word eq 'alternate') {
545     #
546     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
547     $todo->{has_hyperlink_link_type} = 1;
548     }
549     }
550 wakaba 1.1 if ($def->{unique}) {
551     unless ($self->{has_link_type}->{$word}) {
552     $self->{has_link_type}->{$word} = 1;
553     } else {
554     $self->{onerror}->(node => $attr,
555     type => 'link type:duplicate:'.$word);
556     }
557     }
558     } else {
559     $self->{onerror}->(node => $attr, level => 'unsupported',
560     type => 'link type:'.$word);
561     }
562     }
563 wakaba 1.4 $todo->{has_hyperlink_link_type} = 1
564     if $word{alternate} and not $word{stylesheet};
565 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
566     ## says that using both X-Pingback: header field and HTML
567     ## <link rel=pingback> is deprecated and if both appears they
568     ## SHOULD contain exactly the same value.
569     ## ISSUE: Pingback 1.0 specification defines the exact representation
570     ## of its link element, which cannot be tested by the current arch.
571     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
572     ## include any string that matches to the pattern for the rel=pingback link,
573     ## which again inpossible to test.
574     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
575     }; # $HTMLLinkTypesAttrChecker
576    
577     ## URI (or IRI)
578     my $HTMLURIAttrChecker = sub {
579     my ($self, $attr) = @_;
580     ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
581     my $value = $attr->value;
582     Whatpm::URIChecker->check_iri_reference ($value, sub {
583     my %opt = @_;
584     $self->{onerror}->(node => $attr, level => $opt{level},
585     type => 'URI::'.$opt{type}.
586     (defined $opt{position} ? ':'.$opt{position} : ''));
587     });
588 wakaba 1.4 $self->{has_uri_attr} = 1;
589 wakaba 1.1 }; # $HTMLURIAttrChecker
590    
591     ## A space separated list of one or more URIs (or IRIs)
592     my $HTMLSpaceURIsAttrChecker = sub {
593     my ($self, $attr) = @_;
594     my $i = 0;
595     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
596     Whatpm::URIChecker->check_iri_reference ($value, sub {
597     my %opt = @_;
598     $self->{onerror}->(node => $attr, level => $opt{level},
599 wakaba 1.2 type => 'URIs:'.':'.
600     $opt{type}.':'.$i.
601 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
602     });
603     $i++;
604     }
605     ## ISSUE: Relative references?
606     ## ISSUE: Leading or trailing white spaces are conformant?
607     ## ISSUE: A sequence of white space characters are conformant?
608     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
609     ## NOTE: Duplication seems not an error.
610 wakaba 1.4 $self->{has_uri_attr} = 1;
611 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
612    
613     my $HTMLDatetimeAttrChecker = sub {
614     my ($self, $attr) = @_;
615     my $value = $attr->value;
616     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
617     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
618     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
619     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
620     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
621     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
622     if $d < 1 or
623     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
624     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
625     if $M == 2 and $d == 29 and
626     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
627     } else {
628     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
629     }
630     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
631     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
632     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
633     if defined $s and $s > 59;
634     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
635     if $zh > 23;
636     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
637     if $zm > 59;
638     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
639     } else {
640     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
641     }
642     }; # $HTMLDatetimeAttrChecker
643    
644     my $HTMLIntegerAttrChecker = sub {
645     my ($self, $attr) = @_;
646     my $value = $attr->value;
647     unless ($value =~ /\A-?[0-9]+\z/) {
648     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
649     }
650     }; # $HTMLIntegerAttrChecker
651    
652     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
653     my $range_check = shift;
654     return sub {
655     my ($self, $attr) = @_;
656     my $value = $attr->value;
657     if ($value =~ /\A[0-9]+\z/) {
658     unless ($range_check->($value + 0)) {
659     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
660     }
661     } else {
662     $self->{onerror}->(node => $attr,
663     type => 'nninteger:syntax error');
664     }
665     };
666     }; # $GetHTMLNonNegativeIntegerAttrChecker
667    
668     my $GetHTMLFloatingPointNumberAttrChecker = sub {
669     my $range_check = shift;
670     return sub {
671     my ($self, $attr) = @_;
672     my $value = $attr->value;
673     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
674     unless ($range_check->($value + 0)) {
675     $self->{onerror}->(node => $attr, type => 'float:out of range');
676     }
677     } else {
678     $self->{onerror}->(node => $attr,
679     type => 'float:syntax error');
680     }
681     };
682     }; # $GetHTMLFloatingPointNumberAttrChecker
683    
684     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
685     ## ISSUE: RFC 2046 does not define syntax of media types.
686     ## ISSUE: The definition of "a valid MIME type" is unknown.
687     ## Syntactical correctness?
688     my $HTMLIMTAttrChecker = sub {
689     my ($self, $attr) = @_;
690     my $value = $attr->value;
691     ## ISSUE: RFC 2045 Content-Type header field allows insertion
692     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
693     ## ISSUE: RFC 2231 extension? Maybe no.
694     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
695     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
696     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
697     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
698     my @type = ($1, $2);
699     my $param = $3;
700     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
701     if (defined $2) {
702     push @type, $1 => $2;
703     } else {
704     my $n = $1;
705     my $v = $2;
706     $v =~ s/\\(.)/$1/gs;
707     push @type, $n => $v;
708     }
709     }
710     require Whatpm::IMTChecker;
711     Whatpm::IMTChecker->check_imt (sub {
712     my %opt = @_;
713     $self->{onerror}->(node => $attr, level => $opt{level},
714     type => 'IMT:'.$opt{type});
715     }, @type);
716     } else {
717     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
718     }
719     }; # $HTMLIMTAttrChecker
720    
721     my $HTMLLanguageTagAttrChecker = sub {
722     my ($self, $attr) = @_;
723     $self->{onerror}->(node => $attr, level => 'unsupported',
724     type => 'language tag');
725     if ($attr->value eq '') {
726     $self->{onerror}->(node => $attr, type => 'language tag:syntax error');
727     }
728     ## TODO: RFC 3066 test
729     ## ISSUE: RFC 4646 (3066bis)?
730     }; # $HTMLLanguageTagAttrChecker
731    
732     ## "A valid media query [MQ]"
733     my $HTMLMQAttrChecker = sub {
734     my ($self, $attr) = @_;
735     $self->{onerror}->(node => $attr, level => 'unsupported',
736     type => 'media query');
737     ## ISSUE: What is "a valid media query"?
738     }; # $HTMLMQAttrChecker
739    
740     my $HTMLEventHandlerAttrChecker = sub {
741     my ($self, $attr) = @_;
742     $self->{onerror}->(node => $attr, level => 'unsupported',
743     type => 'event handler');
744     ## TODO: MUST contain valid ECMAScript code matching the
745     ## ECMAScript |FunctionBody| production. [ECMA262]
746     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
747     ## ISSUE: Automatic semicolon insertion does not apply?
748     ## ISSUE: Other script languages?
749     }; # $HTMLEventHandlerAttrChecker
750    
751     my $HTMLUsemapAttrChecker = sub {
752     my ($self, $attr) = @_;
753     ## MUST be a valid hashed ID reference to a |map| element
754     my $value = $attr->value;
755     if ($value =~ s/^#//) {
756     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
757     push @{$self->{usemap}}, [$value => $attr];
758     } else {
759     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
760     }
761     ## NOTE: Space characters in hashed ID references are conforming.
762     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
763     }; # $HTMLUsemapAttrChecker
764    
765     my $HTMLTargetAttrChecker = sub {
766     my ($self, $attr) = @_;
767     my $value = $attr->value;
768     if ($value =~ /^_/) {
769     $value = lc $value; ## ISSUE: ASCII case-insentitive?
770     unless ({
771     _self => 1, _parent => 1, _top => 1,
772     }->{$value}) {
773     $self->{onerror}->(node => $attr,
774     type => 'reserved browsing context name');
775     }
776     } else {
777     #$ ISSUE: An empty string is conforming?
778     }
779     }; # $HTMLTargetAttrChecker
780    
781     my $HTMLAttrChecker = {
782     id => sub {
783     ## NOTE: |map| has its own variant of |id=""| checker
784     my ($self, $attr) = @_;
785     my $value = $attr->value;
786     if (length $value > 0) {
787     if ($self->{id}->{$value}) {
788     $self->{onerror}->(node => $attr, type => 'duplicate ID');
789     push @{$self->{id}->{$value}}, $attr;
790     } else {
791     $self->{id}->{$value} = [$attr];
792     }
793     if ($value =~ /[\x09-\x0D\x20]/) {
794     $self->{onerror}->(node => $attr, type => 'space in ID');
795     }
796     } else {
797     ## NOTE: MUST contain at least one character
798     $self->{onerror}->(node => $attr, type => 'empty attribute value');
799     }
800     },
801     title => sub {}, ## NOTE: No conformance creteria
802     lang => sub {
803     my ($self, $attr) = @_;
804     $self->{onerror}->(node => $attr, level => 'unsupported',
805     type => 'language tag');
806     ## TODO: RFC 3066 or empty test
807     ## ISSUE: RFC 4646 (3066bis)?
808     unless ($attr->owner_document->manakai_is_html) {
809     $self->{onerror}->(node => $attr, type => 'in XML:lang');
810     }
811     },
812     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
813     class => sub {
814     my ($self, $attr) = @_;
815     my %word;
816     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
817     unless ($word{$word}) {
818     $word{$word} = 1;
819     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
820     } else {
821     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
822     }
823     }
824     },
825     contextmenu => sub {
826     my ($self, $attr) = @_;
827     my $value = $attr->value;
828     push @{$self->{contextmenu}}, [$value => $attr];
829     ## ISSUE: "The value must be the ID of a menu element in the DOM."
830     ## What is "in the DOM"? A menu Element node that is not part
831     ## of the Document tree is in the DOM? A menu Element node that
832     ## belong to another Document tree is in the DOM?
833     },
834     irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
835     tabindex => $HTMLIntegerAttrChecker,
836     };
837    
838     for (qw/
839     onabort onbeforeunload onblur onchange onclick oncontextmenu
840     ondblclick ondrag ondragend ondragenter ondragleave ondragover
841     ondragstart ondrop onerror onfocus onkeydown onkeypress
842     onkeyup onload onmessage onmousedown onmousemove onmouseout
843     onmouseover onmouseup onmousewheel onresize onscroll onselect
844     onsubmit onunload
845     /) {
846     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
847     }
848    
849     my $GetHTMLAttrsChecker = sub {
850     my $element_specific_checker = shift;
851     return sub {
852     my ($self, $todo) = @_;
853     for my $attr (@{$todo->{node}->attributes}) {
854     my $attr_ns = $attr->namespace_uri;
855     $attr_ns = '' unless defined $attr_ns;
856     my $attr_ln = $attr->manakai_local_name;
857     my $checker;
858     if ($attr_ns eq '') {
859     $checker = $element_specific_checker->{$attr_ln}
860     || $HTMLAttrChecker->{$attr_ln};
861     }
862     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
863     || $AttrChecker->{$attr_ns}->{''};
864     if ($checker) {
865     $checker->($self, $attr, $todo);
866     } else {
867     $self->{onerror}->(node => $attr, level => 'unsupported',
868     type => 'attribute');
869     ## ISSUE: No comformance createria for unknown attributes in the spec
870     }
871     }
872     };
873     }; # $GetHTMLAttrsChecker
874    
875     our $Element;
876     our $ElementDefault;
877     our $AnyChecker;
878    
879     $Element->{$HTML_NS}->{''} = {
880     attrs_checker => $GetHTMLAttrsChecker->({}),
881     checker => $ElementDefault->{checker},
882     };
883    
884     $Element->{$HTML_NS}->{html} = {
885     is_root => 1,
886     attrs_checker => $GetHTMLAttrsChecker->({
887     xmlns => sub {
888     my ($self, $attr) = @_;
889     my $value = $attr->value;
890     unless ($value eq $HTML_NS) {
891     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
892     }
893     unless ($attr->owner_document->manakai_is_html) {
894     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
895     ## TODO: Test
896     }
897     },
898     }),
899     checker => sub {
900     my ($self, $todo) = @_;
901     my $el = $todo->{node};
902     my $new_todos = [];
903     my @nodes = (@{$el->child_nodes});
904    
905     my $phase = 'before head';
906     while (@nodes) {
907     my $node = shift @nodes;
908     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
909    
910     my $nt = $node->node_type;
911     if ($nt == 1) {
912     my $node_ns = $node->namespace_uri;
913     $node_ns = '' unless defined $node_ns;
914     my $node_ln = $node->manakai_local_name;
915     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
916     if ($phase eq 'before head') {
917     if ($node_ns eq $HTML_NS and $node_ln eq 'head') {
918     $phase = 'after head';
919     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'body') {
920     $self->{onerror}->(node => $node, type => 'ps element missing:head');
921     $phase = 'after body';
922     } else {
923     $not_allowed = 1;
924     # before head
925     }
926     } elsif ($phase eq 'after head') {
927     if ($node_ns eq $HTML_NS and $node_ln eq 'body') {
928     $phase = 'after body';
929     } else {
930     $not_allowed = 1;
931     # after head
932     }
933     } else { #elsif ($phase eq 'after body') {
934     $not_allowed = 1;
935     # after body
936     }
937     $self->{onerror}->(node => $node, type => 'element not allowed')
938     if $not_allowed;
939     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
940     unshift @nodes, @$sib;
941     push @$new_todos, @$ch;
942     } elsif ($nt == 3 or $nt == 4) {
943     if ($node->data =~ /[^\x09-\x0D\x20]/) {
944     $self->{onerror}->(node => $node, type => 'character not allowed');
945     }
946     } elsif ($nt == 5) {
947     unshift @nodes, @{$node->child_nodes};
948     }
949     }
950    
951     if ($phase eq 'before head') {
952     $self->{onerror}->(node => $el, type => 'child element missing:head');
953     $self->{onerror}->(node => $el, type => 'child element missing:body');
954     } elsif ($phase eq 'after head') {
955     $self->{onerror}->(node => $el, type => 'child element missing:body');
956     }
957    
958     return ($new_todos);
959     },
960     };
961    
962     $Element->{$HTML_NS}->{head} = {
963     attrs_checker => $GetHTMLAttrsChecker->({}),
964     checker => sub {
965     my ($self, $todo) = @_;
966     my $el = $todo->{node};
967     my $new_todos = [];
968     my @nodes = (@{$el->child_nodes});
969    
970     my $has_title;
971     my $phase = 'initial'; # 'after charset', 'after base'
972     while (@nodes) {
973     my $node = shift @nodes;
974     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
975    
976     my $nt = $node->node_type;
977     if ($nt == 1) {
978     my $node_ns = $node->namespace_uri;
979     $node_ns = '' unless defined $node_ns;
980     my $node_ln = $node->manakai_local_name;
981     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
982     if ($node_ns eq $HTML_NS and $node_ln eq 'title') {
983     $phase = 'after base';
984     unless ($has_title) {
985     $has_title = 1;
986     } else {
987     $not_allowed = 1;
988     }
989     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'meta') {
990     if ($node->has_attribute_ns (undef, 'charset')) {
991     if ($phase eq 'initial') {
992     $phase = 'after charset';
993     } else {
994     $not_allowed = 1;
995     ## NOTE: See also |base|'s "contexts" field in the spec
996     }
997     } else {
998     $phase = 'after base';
999     }
1000     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'base') {
1001     if ($phase eq 'initial' or $phase eq 'after charset') {
1002     $phase = 'after base';
1003     } else {
1004     $not_allowed = 1;
1005     }
1006     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'style') {
1007     $phase = 'after base';
1008     if ($node->has_attribute_ns (undef, 'scoped')) {
1009     $not_allowed = 1;
1010     }
1011     } elsif ($HTMLMetadataElements->{$node_ns}->{$node_ln}) {
1012     $phase = 'after base';
1013     } else {
1014     $not_allowed = 1;
1015     }
1016     $self->{onerror}->(node => $node, type => 'element not allowed')
1017     if $not_allowed;
1018 wakaba 1.3 local $todo->{flag}->{in_head} = 1;
1019 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1020     unshift @nodes, @$sib;
1021     push @$new_todos, @$ch;
1022     } elsif ($nt == 3 or $nt == 4) {
1023     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1024     $self->{onerror}->(node => $node, type => 'character not allowed');
1025     }
1026     } elsif ($nt == 5) {
1027     unshift @nodes, @{$node->child_nodes};
1028     }
1029     }
1030     unless ($has_title) {
1031     $self->{onerror}->(node => $el, type => 'child element missing:title');
1032     }
1033     return ($new_todos);
1034     },
1035     };
1036    
1037     $Element->{$HTML_NS}->{title} = {
1038     attrs_checker => $GetHTMLAttrsChecker->({}),
1039     checker => $HTMLTextChecker,
1040     };
1041    
1042     $Element->{$HTML_NS}->{base} = {
1043 wakaba 1.4 attrs_checker => sub {
1044     my ($self, $todo) = @_;
1045    
1046     if ($self->{has_uri_attr} and
1047     $todo->{node}->has_attribute_ns (undef, 'href')) {
1048     ## ISSUE: Are these examples conforming?
1049     ## <head profile="a b c"><base href> (except for |profile|'s
1050     ## non-conformance)
1051     ## <title xml:base="relative"/><base href/> (maybe it should be)
1052     ## <unknown xmlns="relative"/><base href/> (assuming that
1053     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1054     ## <?xml-stylesheet href="relative"?>...<base href=""/>
1055     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1056     ## <style>@import 'relative';</style><base href>
1057     ## <script>location.href = 'relative';</script><base href>
1058     $self->{onerror}->(node => $todo->{node},
1059     type => 'basehref after URI attribute');
1060     }
1061     if ($self->{has_hyperlink_element} and
1062     $todo->{node}->has_attribute_ns (undef, 'target')) {
1063     ## ISSUE: Are these examples conforming?
1064     ## <head><title xlink:href=""/><base target="name"/></head>
1065     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1066     ## (assuming that |xbl:xbl| is allowed before |base|)
1067     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1068     ## <link href=""/><base target="name"/>
1069     ## <link rel=unknown href=""><base target=name>
1070     $self->{onerror}->(node => $todo->{node},
1071     type => 'basetarget after hyperlink');
1072     }
1073    
1074     return $GetHTMLAttrsChecker->({
1075     href => $HTMLURIAttrChecker,
1076     target => $HTMLTargetAttrChecker,
1077     })->($self, $todo);
1078     },
1079 wakaba 1.1 checker => $HTMLEmptyChecker,
1080     };
1081    
1082     $Element->{$HTML_NS}->{link} = {
1083     attrs_checker => sub {
1084     my ($self, $todo) = @_;
1085     $GetHTMLAttrsChecker->({
1086     href => $HTMLURIAttrChecker,
1087 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(0, $todo, @_) },
1088 wakaba 1.1 media => $HTMLMQAttrChecker,
1089     hreflang => $HTMLLanguageTagAttrChecker,
1090     type => $HTMLIMTAttrChecker,
1091     ## NOTE: Though |title| has special semantics,
1092     ## syntactically same as the |title| as global attribute.
1093     })->($self, $todo);
1094 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
1095     $self->{has_hyperlink_element} = 1 if $todo->{has_hyperlink_link_type};
1096     } else {
1097 wakaba 1.1 $self->{onerror}->(node => $todo->{node},
1098     type => 'attribute missing:href');
1099     }
1100     unless ($todo->{node}->has_attribute_ns (undef, 'rel')) {
1101     $self->{onerror}->(node => $todo->{node},
1102     type => 'attribute missing:rel');
1103     }
1104     },
1105     checker => $HTMLEmptyChecker,
1106     };
1107    
1108     $Element->{$HTML_NS}->{meta} = {
1109     attrs_checker => sub {
1110     my ($self, $todo) = @_;
1111     my $name_attr;
1112     my $http_equiv_attr;
1113     my $charset_attr;
1114     my $content_attr;
1115     for my $attr (@{$todo->{node}->attributes}) {
1116     my $attr_ns = $attr->namespace_uri;
1117     $attr_ns = '' unless defined $attr_ns;
1118     my $attr_ln = $attr->manakai_local_name;
1119     my $checker;
1120     if ($attr_ns eq '') {
1121     if ($attr_ln eq 'content') {
1122     $content_attr = $attr;
1123     $checker = 1;
1124     } elsif ($attr_ln eq 'name') {
1125     $name_attr = $attr;
1126     $checker = 1;
1127     } elsif ($attr_ln eq 'http-equiv') {
1128     $http_equiv_attr = $attr;
1129     $checker = 1;
1130     } elsif ($attr_ln eq 'charset') {
1131     $charset_attr = $attr;
1132     $checker = 1;
1133     } else {
1134     $checker = $HTMLAttrChecker->{$attr_ln}
1135     || $AttrChecker->{$attr_ns}->{$attr_ln}
1136     || $AttrChecker->{$attr_ns}->{''};
1137     }
1138     } else {
1139     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1140     || $AttrChecker->{$attr_ns}->{''};
1141     }
1142     if ($checker) {
1143     $checker->($self, $attr) if ref $checker;
1144     } else {
1145     $self->{onerror}->(node => $attr, level => 'unsupported',
1146     type => 'attribute');
1147     ## ISSUE: No comformance createria for unknown attributes in the spec
1148     }
1149     }
1150    
1151     if (defined $name_attr) {
1152     if (defined $http_equiv_attr) {
1153     $self->{onerror}->(node => $http_equiv_attr,
1154     type => 'attribute not allowed');
1155     } elsif (defined $charset_attr) {
1156     $self->{onerror}->(node => $charset_attr,
1157     type => 'attribute not allowed');
1158     }
1159     my $metadata_name = $name_attr->value;
1160     my $metadata_value;
1161     if (defined $content_attr) {
1162     $metadata_value = $content_attr->value;
1163     } else {
1164     $self->{onerror}->(node => $todo->{node},
1165     type => 'attribute missing:content');
1166     $metadata_value = '';
1167     }
1168     } elsif (defined $http_equiv_attr) {
1169     if (defined $charset_attr) {
1170     $self->{onerror}->(node => $charset_attr,
1171     type => 'attribute not allowed');
1172     }
1173     unless (defined $content_attr) {
1174     $self->{onerror}->(node => $todo->{node},
1175     type => 'attribute missing:content');
1176     }
1177     } elsif (defined $charset_attr) {
1178     if (defined $content_attr) {
1179     $self->{onerror}->(node => $content_attr,
1180     type => 'attribute not allowed');
1181     }
1182     } else {
1183     if (defined $content_attr) {
1184     $self->{onerror}->(node => $content_attr,
1185     type => 'attribute not allowed');
1186     $self->{onerror}->(node => $todo->{node},
1187     type => 'attribute missing:name|http-equiv');
1188     } else {
1189     $self->{onerror}->(node => $todo->{node},
1190     type => 'attribute missing:name|http-equiv|charset');
1191     }
1192     }
1193    
1194     ## TODO: metadata conformance
1195    
1196     ## TODO: pragma conformance
1197     if (defined $http_equiv_attr) { ## An enumerated attribute
1198     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1199     if ({
1200     'refresh' => 1,
1201     'default-style' => 1,
1202     }->{$keyword}) {
1203     #
1204     } else {
1205     $self->{onerror}->(node => $http_equiv_attr,
1206     type => 'enumerated:invalid');
1207     }
1208     }
1209    
1210     if (defined $charset_attr) {
1211     unless ($todo->{node}->owner_document->manakai_is_html) {
1212     $self->{onerror}->(node => $charset_attr,
1213     type => 'in XML:charset');
1214     }
1215     ## TODO: charset
1216     }
1217     },
1218     checker => $HTMLEmptyChecker,
1219     };
1220    
1221     $Element->{$HTML_NS}->{style} = {
1222     attrs_checker => $GetHTMLAttrsChecker->({
1223     type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1224     media => $HTMLMQAttrChecker,
1225     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1226     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1227     ## not different
1228     }),
1229     checker => sub {
1230     ## NOTE: |html:style| has no conformance creteria on content model
1231     my ($self, $todo) = @_;
1232     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
1233     $type = 'text/css' unless defined $type;
1234     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
1235     type => 'style:'.$type); ## TODO: $type normalization
1236     return $AnyChecker->($self, $todo);
1237     },
1238     };
1239    
1240     $Element->{$HTML_NS}->{body} = {
1241     attrs_checker => $GetHTMLAttrsChecker->({}),
1242     checker => $HTMLBlockChecker,
1243     };
1244    
1245     $Element->{$HTML_NS}->{section} = {
1246     attrs_checker => $GetHTMLAttrsChecker->({}),
1247     checker => $HTMLStylableBlockChecker,
1248     };
1249    
1250     $Element->{$HTML_NS}->{nav} = {
1251     attrs_checker => $GetHTMLAttrsChecker->({}),
1252     checker => $HTMLBlockOrInlineChecker,
1253     };
1254    
1255     $Element->{$HTML_NS}->{article} = {
1256     attrs_checker => $GetHTMLAttrsChecker->({}),
1257     checker => $HTMLStylableBlockChecker,
1258     };
1259    
1260     $Element->{$HTML_NS}->{blockquote} = {
1261     attrs_checker => $GetHTMLAttrsChecker->({
1262     cite => $HTMLURIAttrChecker,
1263     }),
1264     checker => $HTMLBlockChecker,
1265     };
1266    
1267     $Element->{$HTML_NS}->{aside} = {
1268     attrs_checker => $GetHTMLAttrsChecker->({}),
1269     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
1270     };
1271    
1272     $Element->{$HTML_NS}->{h1} = {
1273     attrs_checker => $GetHTMLAttrsChecker->({}),
1274     checker => sub {
1275     my ($self, $todo) = @_;
1276     $todo->{flag}->{has_heading}->[0] = 1;
1277     return $HTMLSignificantStrictlyInlineChecker->($self, $todo);
1278     },
1279     };
1280    
1281     $Element->{$HTML_NS}->{h2} = {
1282     attrs_checker => $GetHTMLAttrsChecker->({}),
1283     checker => $Element->{$HTML_NS}->{h1}->{checker},
1284     };
1285    
1286     $Element->{$HTML_NS}->{h3} = {
1287     attrs_checker => $GetHTMLAttrsChecker->({}),
1288     checker => $Element->{$HTML_NS}->{h1}->{checker},
1289     };
1290    
1291     $Element->{$HTML_NS}->{h4} = {
1292     attrs_checker => $GetHTMLAttrsChecker->({}),
1293     checker => $Element->{$HTML_NS}->{h1}->{checker},
1294     };
1295    
1296     $Element->{$HTML_NS}->{h5} = {
1297     attrs_checker => $GetHTMLAttrsChecker->({}),
1298     checker => $Element->{$HTML_NS}->{h1}->{checker},
1299     };
1300    
1301     $Element->{$HTML_NS}->{h6} = {
1302     attrs_checker => $GetHTMLAttrsChecker->({}),
1303     checker => $Element->{$HTML_NS}->{h1}->{checker},
1304     };
1305    
1306     $Element->{$HTML_NS}->{header} = {
1307     attrs_checker => $GetHTMLAttrsChecker->({}),
1308     checker => sub {
1309     my ($self, $todo) = @_;
1310     my $old_flag = $todo->{flag}->{has_heading} || [];
1311     my $new_flag = [];
1312     local $todo->{flag}->{has_heading} = $new_flag;
1313     my $node = $todo->{node};
1314    
1315     my $end = $self->_add_minuses
1316     ({$HTML_NS => {qw/header 1 footer 1/}},
1317     $HTMLSectioningElements);
1318     my ($new_todos, $ch) = $HTMLBlockChecker->($self, $todo);
1319     push @$new_todos, $end,
1320     {type => 'code', code => sub {
1321     if ($new_flag->[0]) {
1322     $old_flag->[0] = 1;
1323     } else {
1324     $self->{onerror}->(node => $node, type => 'element missing:hn');
1325     }
1326     }};
1327     return ($new_todos, $ch);
1328     },
1329     };
1330    
1331     $Element->{$HTML_NS}->{footer} = {
1332     attrs_checker => $GetHTMLAttrsChecker->({}),
1333     checker => sub { ## block -hn -header -footer -sectioning or inline
1334     my ($self, $todo) = @_;
1335     my $el = $todo->{node};
1336     my $new_todos = [];
1337     my @nodes = (@{$el->child_nodes});
1338    
1339     my $content = 'block-or-inline'; # or 'block' or 'inline'
1340     my @block_not_inline;
1341     while (@nodes) {
1342     my $node = shift @nodes;
1343     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1344    
1345     my $nt = $node->node_type;
1346     if ($nt == 1) {
1347     my $node_ns = $node->namespace_uri;
1348     $node_ns = '' unless defined $node_ns;
1349     my $node_ln = $node->manakai_local_name;
1350     my $not_allowed;
1351     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
1352     $not_allowed = 1;
1353     } elsif ($node_ns eq $HTML_NS and
1354     {
1355     qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1/
1356     }->{$node_ln}) {
1357     $not_allowed = 1;
1358     } elsif ($HTMLSectioningElements->{$node_ns}->{$node_ln}) {
1359     $not_allowed = 1;
1360     }
1361     if ($content eq 'block') {
1362     $not_allowed = 1
1363     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1364     } elsif ($content eq 'inline') {
1365     $not_allowed = 1
1366     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
1367     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1368     } else {
1369     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1370     my $is_inline
1371     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
1372     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1373    
1374     push @block_not_inline, $node
1375     if $is_block and not $is_inline and not $not_allowed;
1376     unless ($is_block) {
1377     $content = 'inline';
1378     for (@block_not_inline) {
1379     $self->{onerror}->(node => $_, type => 'element not allowed');
1380     }
1381     $not_allowed = 1 unless $is_inline;
1382     }
1383     }
1384     $self->{onerror}->(node => $node, type => 'element not allowed')
1385     if $not_allowed;
1386     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1387     unshift @nodes, @$sib;
1388     push @$new_todos, @$ch;
1389     } elsif ($nt == 3 or $nt == 4) {
1390     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1391     if ($content eq 'block') {
1392     $self->{onerror}->(node => $node, type => 'character not allowed');
1393     } else {
1394     $content = 'inline';
1395     for (@block_not_inline) {
1396     $self->{onerror}->(node => $_, type => 'element not allowed');
1397     }
1398     }
1399     }
1400     } elsif ($nt == 5) {
1401     unshift @nodes, @{$node->child_nodes};
1402     }
1403     }
1404    
1405     my $end = $self->_add_minuses
1406     ({$HTML_NS => {qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1/}},
1407     $HTMLSectioningElements);
1408     push @$new_todos, $end;
1409    
1410     if ($content eq 'inline') {
1411     for (@$new_todos) {
1412     $_->{inline} = 1;
1413     }
1414     }
1415    
1416     return ($new_todos);
1417     },
1418     };
1419    
1420     $Element->{$HTML_NS}->{address} = {
1421     attrs_checker => $GetHTMLAttrsChecker->({}),
1422     checker => $HTMLInlineChecker,
1423     };
1424    
1425     $Element->{$HTML_NS}->{p} = {
1426     attrs_checker => $GetHTMLAttrsChecker->({}),
1427     checker => $HTMLSignificantInlineChecker,
1428     };
1429    
1430     $Element->{$HTML_NS}->{hr} = {
1431     attrs_checker => $GetHTMLAttrsChecker->({}),
1432     checker => $HTMLEmptyChecker,
1433     };
1434    
1435     $Element->{$HTML_NS}->{br} = {
1436     attrs_checker => $GetHTMLAttrsChecker->({}),
1437     checker => $HTMLEmptyChecker,
1438     };
1439    
1440     $Element->{$HTML_NS}->{dialog} = {
1441     attrs_checker => $GetHTMLAttrsChecker->({}),
1442     checker => sub {
1443     my ($self, $todo) = @_;
1444     my $el = $todo->{node};
1445     my $new_todos = [];
1446     my @nodes = (@{$el->child_nodes});
1447    
1448     my $phase = 'before dt';
1449     while (@nodes) {
1450     my $node = shift @nodes;
1451     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1452    
1453     my $nt = $node->node_type;
1454     if ($nt == 1) {
1455     my $node_ns = $node->namespace_uri;
1456     $node_ns = '' unless defined $node_ns;
1457     my $node_ln = $node->manakai_local_name;
1458     ## NOTE: |minuses| list is not checked since redundant
1459     if ($phase eq 'before dt') {
1460     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1461     $phase = 'before dd';
1462     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1463     $self->{onerror}
1464     ->(node => $node, type => 'ps element missing:dt');
1465     $phase = 'before dt';
1466     } else {
1467     $self->{onerror}->(node => $node, type => 'element not allowed');
1468     }
1469     } else { # before dd
1470     if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1471     $phase = 'before dt';
1472     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1473     $self->{onerror}
1474     ->(node => $node, type => 'ps element missing:dd');
1475     $phase = 'before dd';
1476     } else {
1477     $self->{onerror}->(node => $node, type => 'element not allowed');
1478     }
1479     }
1480     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1481     unshift @nodes, @$sib;
1482     push @$new_todos, @$ch;
1483     } elsif ($nt == 3 or $nt == 4) {
1484     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1485     $self->{onerror}->(node => $node, type => 'character not allowed');
1486     }
1487     } elsif ($nt == 5) {
1488     unshift @nodes, @{$node->child_nodes};
1489     }
1490     }
1491     if ($phase eq 'before dd') {
1492     $self->{onerror}->(node => $el, type => 'ps element missing:dd');
1493     }
1494     return ($new_todos);
1495     },
1496     };
1497    
1498     $Element->{$HTML_NS}->{pre} = {
1499     attrs_checker => $GetHTMLAttrsChecker->({}),
1500     checker => $HTMLStrictlyInlineChecker,
1501     };
1502    
1503     $Element->{$HTML_NS}->{ol} = {
1504     attrs_checker => $GetHTMLAttrsChecker->({
1505     start => $HTMLIntegerAttrChecker,
1506     }),
1507     checker => sub {
1508     my ($self, $todo) = @_;
1509     my $el = $todo->{node};
1510     my $new_todos = [];
1511     my @nodes = (@{$el->child_nodes});
1512    
1513     while (@nodes) {
1514     my $node = shift @nodes;
1515     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1516    
1517     my $nt = $node->node_type;
1518     if ($nt == 1) {
1519     my $node_ns = $node->namespace_uri;
1520     $node_ns = '' unless defined $node_ns;
1521     my $node_ln = $node->manakai_local_name;
1522     ## NOTE: |minuses| list is not checked since redundant
1523     unless ($node_ns eq $HTML_NS and $node_ln eq 'li') {
1524     $self->{onerror}->(node => $node, type => 'element not allowed');
1525     }
1526     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1527     unshift @nodes, @$sib;
1528     push @$new_todos, @$ch;
1529     } elsif ($nt == 3 or $nt == 4) {
1530     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1531     $self->{onerror}->(node => $node, type => 'character not allowed');
1532     }
1533     } elsif ($nt == 5) {
1534     unshift @nodes, @{$node->child_nodes};
1535     }
1536     }
1537    
1538     if ($todo->{inline}) {
1539     for (@$new_todos) {
1540     $_->{inline} = 1;
1541     }
1542     }
1543     return ($new_todos);
1544     },
1545     };
1546    
1547     $Element->{$HTML_NS}->{ul} = {
1548     attrs_checker => $GetHTMLAttrsChecker->({}),
1549     checker => $Element->{$HTML_NS}->{ol}->{checker},
1550     };
1551    
1552    
1553     $Element->{$HTML_NS}->{li} = {
1554     attrs_checker => $GetHTMLAttrsChecker->({
1555     start => sub {
1556     my ($self, $attr) = @_;
1557     my $parent = $attr->owner_element->manakai_parent_element;
1558     if (defined $parent) {
1559     my $parent_ns = $parent->namespace_uri;
1560     $parent_ns = '' unless defined $parent_ns;
1561     my $parent_ln = $parent->manakai_local_name;
1562     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1563     $self->{onerror}->(node => $attr, level => 'unsupported',
1564     type => 'attribute');
1565     }
1566     }
1567     $HTMLIntegerAttrChecker->($self, $attr);
1568     },
1569     }),
1570     checker => sub {
1571     my ($self, $todo) = @_;
1572     if ($todo->{inline}) {
1573     return $HTMLInlineChecker->($self, $todo);
1574     } else {
1575     return $HTMLBlockOrInlineChecker->($self, $todo);
1576     }
1577     },
1578     };
1579    
1580     $Element->{$HTML_NS}->{dl} = {
1581     attrs_checker => $GetHTMLAttrsChecker->({}),
1582     checker => sub {
1583     my ($self, $todo) = @_;
1584     my $el = $todo->{node};
1585     my $new_todos = [];
1586     my @nodes = (@{$el->child_nodes});
1587    
1588     my $phase = 'before dt';
1589     while (@nodes) {
1590     my $node = shift @nodes;
1591     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1592    
1593     my $nt = $node->node_type;
1594     if ($nt == 1) {
1595     my $node_ns = $node->namespace_uri;
1596     $node_ns = '' unless defined $node_ns;
1597     my $node_ln = $node->manakai_local_name;
1598     ## NOTE: |minuses| list is not checked since redundant
1599     if ($phase eq 'in dds') {
1600     if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1601     #$phase = 'in dds';
1602     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1603     $phase = 'in dts';
1604     } else {
1605     $self->{onerror}->(node => $node, type => 'element not allowed');
1606     }
1607     } elsif ($phase eq 'in dts') {
1608     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1609     #$phase = 'in dts';
1610     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1611     $phase = 'in dds';
1612     } else {
1613     $self->{onerror}->(node => $node, type => 'element not allowed');
1614     }
1615     } else { # before dt
1616     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1617     $phase = 'in dts';
1618     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1619     $self->{onerror}
1620     ->(node => $node, type => 'ps element missing:dt');
1621     $phase = 'in dds';
1622     } else {
1623     $self->{onerror}->(node => $node, type => 'element not allowed');
1624     }
1625     }
1626     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1627     unshift @nodes, @$sib;
1628     push @$new_todos, @$ch;
1629     } elsif ($nt == 3 or $nt == 4) {
1630     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1631     $self->{onerror}->(node => $node, type => 'character not allowed');
1632     }
1633     } elsif ($nt == 5) {
1634     unshift @nodes, @{$node->child_nodes};
1635     }
1636     }
1637     if ($phase eq 'in dts') {
1638     $self->{onerror}->(node => $el, type => 'ps element missing:dd');
1639     }
1640    
1641     if ($todo->{inline}) {
1642     for (@$new_todos) {
1643     $_->{inline} = 1;
1644     }
1645     }
1646     return ($new_todos);
1647     },
1648     };
1649    
1650     $Element->{$HTML_NS}->{dt} = {
1651     attrs_checker => $GetHTMLAttrsChecker->({}),
1652     checker => $HTMLStrictlyInlineChecker,
1653     };
1654    
1655     $Element->{$HTML_NS}->{dd} = {
1656     attrs_checker => $GetHTMLAttrsChecker->({}),
1657     checker => $Element->{$HTML_NS}->{li}->{checker},
1658     };
1659    
1660     $Element->{$HTML_NS}->{a} = {
1661     attrs_checker => sub {
1662     my ($self, $todo) = @_;
1663     my %attr;
1664     for my $attr (@{$todo->{node}->attributes}) {
1665     my $attr_ns = $attr->namespace_uri;
1666     $attr_ns = '' unless defined $attr_ns;
1667     my $attr_ln = $attr->manakai_local_name;
1668     my $checker;
1669     if ($attr_ns eq '') {
1670     $checker = {
1671     target => $HTMLTargetAttrChecker,
1672     href => $HTMLURIAttrChecker,
1673     ping => $HTMLSpaceURIsAttrChecker,
1674 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
1675 wakaba 1.1 media => $HTMLMQAttrChecker,
1676     hreflang => $HTMLLanguageTagAttrChecker,
1677     type => $HTMLIMTAttrChecker,
1678     }->{$attr_ln};
1679     if ($checker) {
1680     $attr{$attr_ln} = $attr;
1681     } else {
1682     $checker = $HTMLAttrChecker->{$attr_ln};
1683     }
1684     }
1685     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1686     || $AttrChecker->{$attr_ns}->{''};
1687     if ($checker) {
1688     $checker->($self, $attr) if ref $checker;
1689     } else {
1690     $self->{onerror}->(node => $attr, level => 'unsupported',
1691     type => 'attribute');
1692     ## ISSUE: No comformance createria for unknown attributes in the spec
1693     }
1694     }
1695    
1696 wakaba 1.4 if (defined $attr{href}) {
1697     $self->{has_hyperlink_element} = 1;
1698     } else {
1699 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
1700     if (defined $attr{$_}) {
1701     $self->{onerror}->(node => $attr{$_},
1702     type => 'attribute not allowed');
1703     }
1704     }
1705     }
1706     },
1707     checker => sub {
1708     my ($self, $todo) = @_;
1709    
1710     my $end = $self->_add_minuses ($HTMLInteractiveElements);
1711     my ($new_todos, $ch)
1712     = $HTMLSignificantInlineOrStrictlyInlineChecker->($self, $todo);
1713     push @$new_todos, $end;
1714    
1715     $_->{flag}->{has_a} = 1 for @$new_todos;
1716    
1717     return ($new_todos, $ch);
1718     },
1719     };
1720    
1721     $Element->{$HTML_NS}->{q} = {
1722     attrs_checker => $GetHTMLAttrsChecker->({
1723     cite => $HTMLURIAttrChecker,
1724     }),
1725     checker => $HTMLInlineOrStrictlyInlineChecker,
1726     };
1727    
1728     $Element->{$HTML_NS}->{cite} = {
1729     attrs_checker => $GetHTMLAttrsChecker->({}),
1730     checker => $HTMLStrictlyInlineChecker,
1731     };
1732    
1733     $Element->{$HTML_NS}->{em} = {
1734     attrs_checker => $GetHTMLAttrsChecker->({}),
1735     checker => $HTMLInlineOrStrictlyInlineChecker,
1736     };
1737    
1738     $Element->{$HTML_NS}->{strong} = {
1739     attrs_checker => $GetHTMLAttrsChecker->({}),
1740     checker => $HTMLInlineOrStrictlyInlineChecker,
1741     };
1742    
1743     $Element->{$HTML_NS}->{small} = {
1744     attrs_checker => $GetHTMLAttrsChecker->({}),
1745     checker => $HTMLInlineOrStrictlyInlineChecker,
1746     };
1747    
1748     $Element->{$HTML_NS}->{m} = {
1749     attrs_checker => $GetHTMLAttrsChecker->({}),
1750     checker => $HTMLInlineOrStrictlyInlineChecker,
1751     };
1752    
1753     $Element->{$HTML_NS}->{dfn} = {
1754     attrs_checker => $GetHTMLAttrsChecker->({}),
1755     checker => sub {
1756     my ($self, $todo) = @_;
1757    
1758     my $end = $self->_add_minuses ({$HTML_NS => {dfn => 1}});
1759     my ($sib, $ch) = $HTMLStrictlyInlineChecker->($self, $todo);
1760     push @$sib, $end;
1761    
1762     my $node = $todo->{node};
1763     my $term = $node->get_attribute_ns (undef, 'title');
1764     unless (defined $term) {
1765     for my $child (@{$node->child_nodes}) {
1766     if ($child->node_type == 1) { # ELEMENT_NODE
1767     if (defined $term) {
1768     undef $term;
1769     last;
1770     } elsif ($child->manakai_local_name eq 'abbr') {
1771     my $nsuri = $child->namespace_uri;
1772     if (defined $nsuri and $nsuri eq $HTML_NS) {
1773     my $attr = $child->get_attribute_node_ns (undef, 'title');
1774     if ($attr) {
1775     $term = $attr->value;
1776     }
1777     }
1778     }
1779     } elsif ($child->node_type == 3 or $child->node_type == 4) {
1780     ## TEXT_NODE or CDATA_SECTION_NODE
1781     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
1782     next;
1783     }
1784     undef $term;
1785     last;
1786     }
1787     }
1788     unless (defined $term) {
1789     $term = $node->text_content;
1790     }
1791     }
1792     if ($self->{term}->{$term}) {
1793     $self->{onerror}->(node => $node, type => 'duplicate term');
1794     push @{$self->{term}->{$term}}, $node;
1795     } else {
1796     $self->{term}->{$term} = [$node];
1797     }
1798     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
1799     ## has |title|.
1800    
1801     return ($sib, $ch);
1802     },
1803     };
1804    
1805     $Element->{$HTML_NS}->{abbr} = {
1806     attrs_checker => $GetHTMLAttrsChecker->({
1807     ## NOTE: |title| has special semantics for |abbr|s, but is syntactically
1808     ## not different. The spec says that the |title| MAY be omitted
1809     ## if there is a |dfn| whose defining term is the abbreviation,
1810     ## but it does not prohibit |abbr| w/o |title| in other cases.
1811     }),
1812     checker => $HTMLStrictlyInlineChecker,
1813     };
1814    
1815     $Element->{$HTML_NS}->{time} = {
1816     attrs_checker => $GetHTMLAttrsChecker->({
1817     datetime => sub { 1 }, # checked in |checker|
1818     }),
1819     ## TODO: Write tests
1820     checker => sub {
1821     my ($self, $todo) = @_;
1822    
1823     my $attr = $todo->{node}->get_attribute_node_ns (undef, 'datetime');
1824     my $input;
1825     my $reg_sp;
1826     my $input_node;
1827     if ($attr) {
1828     $input = $attr->value;
1829     $reg_sp = qr/[\x09-\x0D\x20]*/;
1830     $input_node = $attr;
1831     } else {
1832     $input = $todo->{node}->text_content;
1833     $reg_sp = qr/\p{Zs}*/;
1834     $input_node = $todo->{node};
1835    
1836     ## ISSUE: What is the definition for "successfully extracts a date
1837     ## or time"? If the algorithm says the string is invalid but
1838     ## return some date or time, is it "successfully"?
1839     }
1840    
1841     my $hour;
1842     my $minute;
1843     my $second;
1844     if ($input =~ /
1845     \A
1846     [\x09-\x0D\x20]*
1847     ([0-9]+) # 1
1848     (?>
1849     -([0-9]+) # 2
1850     -([0-9]+) # 3
1851     [\x09-\x0D\x20]*
1852     (?>
1853     T
1854     [\x09-\x0D\x20]*
1855     )?
1856     ([0-9]+) # 4
1857     :([0-9]+) # 5
1858     (?>
1859     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
1860     )?
1861     [\x09-\x0D\x20]*
1862     (?>
1863     Z
1864     [\x09-\x0D\x20]*
1865     |
1866     [+-]([0-9]+):([0-9]+) # 7, 8
1867     [\x09-\x0D\x20]*
1868     )?
1869     \z
1870     |
1871     :([0-9]+) # 9
1872     (?>
1873     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
1874     )?
1875     [\x09-\x0D\x20]*\z
1876     )
1877     /x) {
1878     if (defined $2) { ## YYYY-MM-DD T? hh:mm
1879     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
1880     length $4 != 2 or length $5 != 2) {
1881     $self->{onerror}->(node => $input_node,
1882     type => 'dateortime:syntax error');
1883     }
1884    
1885     if (1 <= $2 and $2 <= 12) {
1886     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1887     if $3 < 1 or
1888     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
1889     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1890     if $2 == 2 and $3 == 29 and
1891     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
1892     } else {
1893     $self->{onerror}->(node => $input_node,
1894     type => 'datetime:bad month');
1895     }
1896    
1897     ($hour, $minute, $second) = ($4, $5, $6);
1898    
1899     if (defined $7) { ## [+-]hh:mm
1900     if (length $7 != 2 or length $8 != 2) {
1901     $self->{onerror}->(node => $input_node,
1902     type => 'dateortime:syntax error');
1903     }
1904    
1905     $self->{onerror}->(node => $input_node,
1906     type => 'datetime:bad timezone hour')
1907     if $7 > 23;
1908     $self->{onerror}->(node => $input_node,
1909     type => 'datetime:bad timezone minute')
1910     if $8 > 59;
1911     }
1912     } else { ## hh:mm
1913     if (length $1 != 2 or length $9 != 2) {
1914     $self->{onerror}->(node => $input_node,
1915     type => qq'dateortime:syntax error');
1916     }
1917    
1918     ($hour, $minute, $second) = ($1, $9, $10);
1919     }
1920    
1921     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
1922     if $hour > 23;
1923     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
1924     if $minute > 59;
1925    
1926     if (defined $second) { ## s
1927     ## NOTE: Integer part of second don't have to have length of two.
1928    
1929     if (substr ($second, 0, 1) eq '.') {
1930     $self->{onerror}->(node => $input_node,
1931     type => 'dateortime:syntax error');
1932     }
1933    
1934     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
1935     if $second >= 60;
1936     }
1937     } else {
1938     $self->{onerror}->(node => $input_node,
1939     type => 'dateortime:syntax error');
1940     }
1941    
1942     return $HTMLStrictlyInlineChecker->($self, $todo);
1943     },
1944     };
1945    
1946     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
1947     attrs_checker => $GetHTMLAttrsChecker->({
1948     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1949     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1950     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1951     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1952     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1953     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1954     }),
1955     checker => $HTMLStrictlyInlineChecker,
1956     };
1957    
1958     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
1959     attrs_checker => $GetHTMLAttrsChecker->({
1960     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
1961     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
1962     }),
1963     checker => $HTMLStrictlyInlineChecker,
1964     };
1965    
1966     $Element->{$HTML_NS}->{code} = {
1967     attrs_checker => $GetHTMLAttrsChecker->({}),
1968     ## NOTE: Though |title| has special semantics,
1969     ## syntatically same as the |title| as global attribute.
1970     checker => $HTMLInlineOrStrictlyInlineChecker,
1971     };
1972    
1973     $Element->{$HTML_NS}->{var} = {
1974     attrs_checker => $GetHTMLAttrsChecker->({}),
1975     ## NOTE: Though |title| has special semantics,
1976     ## syntatically same as the |title| as global attribute.
1977     checker => $HTMLStrictlyInlineChecker,
1978     };
1979    
1980     $Element->{$HTML_NS}->{samp} = {
1981     attrs_checker => $GetHTMLAttrsChecker->({}),
1982     ## NOTE: Though |title| has special semantics,
1983     ## syntatically same as the |title| as global attribute.
1984     checker => $HTMLInlineOrStrictlyInlineChecker,
1985     };
1986    
1987     $Element->{$HTML_NS}->{kbd} = {
1988     attrs_checker => $GetHTMLAttrsChecker->({}),
1989     checker => $HTMLStrictlyInlineChecker,
1990     };
1991    
1992     $Element->{$HTML_NS}->{sub} = {
1993     attrs_checker => $GetHTMLAttrsChecker->({}),
1994     checker => $HTMLStrictlyInlineChecker,
1995     };
1996    
1997     $Element->{$HTML_NS}->{sup} = {
1998     attrs_checker => $GetHTMLAttrsChecker->({}),
1999     checker => $HTMLStrictlyInlineChecker,
2000     };
2001    
2002     $Element->{$HTML_NS}->{span} = {
2003     attrs_checker => $GetHTMLAttrsChecker->({}),
2004     ## NOTE: Though |title| has special semantics,
2005     ## syntatically same as the |title| as global attribute.
2006     checker => $HTMLInlineOrStrictlyInlineChecker,
2007     };
2008    
2009     $Element->{$HTML_NS}->{i} = {
2010     attrs_checker => $GetHTMLAttrsChecker->({}),
2011     ## NOTE: Though |title| has special semantics,
2012     ## syntatically same as the |title| as global attribute.
2013     checker => $HTMLStrictlyInlineChecker,
2014     };
2015    
2016     $Element->{$HTML_NS}->{b} = {
2017     attrs_checker => $GetHTMLAttrsChecker->({}),
2018     checker => $HTMLStrictlyInlineChecker,
2019     };
2020    
2021     $Element->{$HTML_NS}->{bdo} = {
2022     attrs_checker => sub {
2023     my ($self, $todo) = @_;
2024     $GetHTMLAttrsChecker->({})->($self, $todo);
2025     unless ($todo->{node}->has_attribute_ns (undef, 'dir')) {
2026     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:dir');
2027     }
2028     },
2029     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
2030     checker => $HTMLStrictlyInlineChecker,
2031     };
2032    
2033     $Element->{$HTML_NS}->{ins} = {
2034     attrs_checker => $GetHTMLAttrsChecker->({
2035     cite => $HTMLURIAttrChecker,
2036     datetime => $HTMLDatetimeAttrChecker,
2037     }),
2038     checker => $HTMLTransparentChecker,
2039     };
2040    
2041     $Element->{$HTML_NS}->{del} = {
2042     attrs_checker => $GetHTMLAttrsChecker->({
2043     cite => $HTMLURIAttrChecker,
2044     datetime => $HTMLDatetimeAttrChecker,
2045     }),
2046     checker => sub {
2047     my ($self, $todo) = @_;
2048    
2049     my $parent = $todo->{node}->manakai_parent_element;
2050     if (defined $parent) {
2051     my $nsuri = $parent->namespace_uri;
2052     $nsuri = '' unless defined $nsuri;
2053     my $ln = $parent->manakai_local_name;
2054     my $eldef = $Element->{$nsuri}->{$ln} ||
2055     $Element->{$nsuri}->{''} ||
2056     $ElementDefault;
2057     return $eldef->{checker}->($self, $todo);
2058     } else {
2059     return $HTMLBlockOrInlineChecker->($self, $todo);
2060     }
2061     },
2062     };
2063    
2064     ## TODO: figure
2065    
2066 wakaba 1.4 ## TODO: |alt|
2067 wakaba 1.1 $Element->{$HTML_NS}->{img} = {
2068     attrs_checker => sub {
2069     my ($self, $todo) = @_;
2070     $GetHTMLAttrsChecker->({
2071     alt => sub { }, ## NOTE: No syntactical requirement
2072     src => $HTMLURIAttrChecker,
2073     usemap => $HTMLUsemapAttrChecker,
2074     ismap => sub {
2075     my ($self, $attr, $parent_todo) = @_;
2076     if (not $todo->{flag}->{has_a}) {
2077     $self->{onerror}->(node => $attr, type => 'attribute not allowed');
2078     }
2079     $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_todo);
2080     },
2081     ## TODO: height
2082     ## TODO: width
2083     })->($self, $todo);
2084     unless ($todo->{node}->has_attribute_ns (undef, 'alt')) {
2085     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:alt');
2086     }
2087     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2088     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:src');
2089     }
2090     },
2091     checker => $HTMLEmptyChecker,
2092     };
2093    
2094     $Element->{$HTML_NS}->{iframe} = {
2095     attrs_checker => $GetHTMLAttrsChecker->({
2096     src => $HTMLURIAttrChecker,
2097     }),
2098     checker => $HTMLTextChecker,
2099     };
2100    
2101     $Element->{$HTML_NS}->{embed} = {
2102     attrs_checker => sub {
2103     my ($self, $todo) = @_;
2104     my $has_src;
2105     for my $attr (@{$todo->{node}->attributes}) {
2106     my $attr_ns = $attr->namespace_uri;
2107     $attr_ns = '' unless defined $attr_ns;
2108     my $attr_ln = $attr->manakai_local_name;
2109     my $checker;
2110     if ($attr_ns eq '') {
2111     if ($attr_ln eq 'src') {
2112     $checker = $HTMLURIAttrChecker;
2113     $has_src = 1;
2114     } elsif ($attr_ln eq 'type') {
2115     $checker = $HTMLIMTAttrChecker;
2116     } else {
2117     ## TODO: height
2118     ## TODO: width
2119     $checker = $HTMLAttrChecker->{$attr_ln}
2120     || sub { }; ## NOTE: Any local attribute is ok.
2121     }
2122     }
2123     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2124     || $AttrChecker->{$attr_ns}->{''};
2125     if ($checker) {
2126     $checker->($self, $attr);
2127     } else {
2128     $self->{onerror}->(node => $attr, level => 'unsupported',
2129     type => 'attribute');
2130     ## ISSUE: No comformance createria for global attributes in the spec
2131     }
2132     }
2133    
2134     unless ($has_src) {
2135     $self->{onerror}->(node => $todo->{node},
2136     type => 'attribute missing:src');
2137     }
2138     },
2139     checker => $HTMLEmptyChecker,
2140     };
2141    
2142     $Element->{$HTML_NS}->{object} = {
2143     attrs_checker => sub {
2144     my ($self, $todo) = @_;
2145     $GetHTMLAttrsChecker->({
2146     data => $HTMLURIAttrChecker,
2147     type => $HTMLIMTAttrChecker,
2148     usemap => $HTMLUsemapAttrChecker,
2149     ## TODO: width
2150     ## TODO: height
2151     })->($self, $todo);
2152     unless ($todo->{node}->has_attribute_ns (undef, 'data')) {
2153     unless ($todo->{node}->has_attribute_ns (undef, 'type')) {
2154     $self->{onerror}->(node => $todo->{node},
2155     type => 'attribute missing:data|type');
2156     }
2157     }
2158     },
2159     checker => $ElementDefault->{checker}, ## TODO
2160     };
2161    
2162     $Element->{$HTML_NS}->{param} = {
2163     attrs_checker => sub {
2164     my ($self, $todo) = @_;
2165     $GetHTMLAttrsChecker->({
2166     name => sub { },
2167     value => sub { },
2168     })->($self, $todo);
2169     unless ($todo->{node}->has_attribute_ns (undef, 'name')) {
2170     $self->{onerror}->(node => $todo->{node},
2171     type => 'attribute missing:name');
2172     }
2173     unless ($todo->{node}->has_attribute_ns (undef, 'value')) {
2174     $self->{onerror}->(node => $todo->{node},
2175     type => 'attribute missing:value');
2176     }
2177     },
2178     checker => $HTMLEmptyChecker,
2179     };
2180    
2181     $Element->{$HTML_NS}->{video} = {
2182     attrs_checker => $GetHTMLAttrsChecker->({
2183     src => $HTMLURIAttrChecker,
2184     ## TODO: start, loopstart, loopend, end
2185     ## ISSUE: they MUST be "value time offset"s. Value?
2186     ## ISSUE: loopcount has no conformance creteria
2187     autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2188     controls => $GetHTMLBooleanAttrChecker->('controls'),
2189     }),
2190     checker => sub {
2191     my ($self, $todo) = @_;
2192    
2193     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2194     return $HTMLBlockOrInlineChecker->($self, $todo);
2195     } else {
2196     return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
2197     ->($self, $todo);
2198     }
2199     },
2200     };
2201    
2202     $Element->{$HTML_NS}->{audio} = {
2203     attrs_checker => $Element->{$HTML_NS}->{video}->{attrs_checker},
2204     checker => $Element->{$HTML_NS}->{video}->{checker},
2205     };
2206    
2207     $Element->{$HTML_NS}->{source} = {
2208     attrs_checker => sub {
2209     my ($self, $todo) = @_;
2210     $GetHTMLAttrsChecker->({
2211     src => $HTMLURIAttrChecker,
2212     type => $HTMLIMTAttrChecker,
2213     media => $HTMLMQAttrChecker,
2214     })->($self, $todo);
2215     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2216     $self->{onerror}->(node => $todo->{node},
2217     type => 'attribute missing:src');
2218     }
2219     },
2220     checker => $HTMLEmptyChecker,
2221     };
2222    
2223     $Element->{$HTML_NS}->{canvas} = {
2224     attrs_checker => $GetHTMLAttrsChecker->({
2225     height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2226     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2227     }),
2228     checker => $HTMLInlineChecker,
2229     };
2230    
2231     $Element->{$HTML_NS}->{map} = {
2232 wakaba 1.4 attrs_checker => sub {
2233     my ($self, $todo) = @_;
2234     my $has_id;
2235     $GetHTMLAttrsChecker->({
2236     id => sub {
2237     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2238     my ($self, $attr) = @_;
2239     my $value = $attr->value;
2240     if (length $value > 0) {
2241     if ($self->{id}->{$value}) {
2242     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2243     push @{$self->{id}->{$value}}, $attr;
2244     } else {
2245     $self->{id}->{$value} = [$attr];
2246     }
2247 wakaba 1.1 } else {
2248 wakaba 1.4 ## NOTE: MUST contain at least one character
2249     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2250 wakaba 1.1 }
2251 wakaba 1.4 if ($value =~ /[\x09-\x0D\x20]/) {
2252     $self->{onerror}->(node => $attr, type => 'space in ID');
2253     }
2254     $self->{map}->{$value} ||= $attr;
2255     $has_id = 1;
2256     },
2257     })->($self, $todo);
2258     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:id')
2259     unless $has_id;
2260     },
2261 wakaba 1.1 checker => $HTMLBlockChecker,
2262     };
2263    
2264     $Element->{$HTML_NS}->{area} = {
2265     attrs_checker => sub {
2266     my ($self, $todo) = @_;
2267     my %attr;
2268     my $coords;
2269     for my $attr (@{$todo->{node}->attributes}) {
2270     my $attr_ns = $attr->namespace_uri;
2271     $attr_ns = '' unless defined $attr_ns;
2272     my $attr_ln = $attr->manakai_local_name;
2273     my $checker;
2274     if ($attr_ns eq '') {
2275     $checker = {
2276     alt => sub { },
2277     ## NOTE: |alt| value has no conformance creteria.
2278     shape => $GetHTMLEnumeratedAttrChecker->({
2279     circ => -1, circle => 1,
2280     default => 1,
2281     poly => 1, polygon => -1,
2282     rect => 1, rectangle => -1,
2283     }),
2284     coords => sub {
2285     my ($self, $attr) = @_;
2286     my $value = $attr->value;
2287     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
2288     $coords = [split /,/, $value];
2289     } else {
2290     $self->{onerror}->(node => $attr,
2291     type => 'coords:syntax error');
2292     }
2293     },
2294     target => $HTMLTargetAttrChecker,
2295     href => $HTMLURIAttrChecker,
2296     ping => $HTMLSpaceURIsAttrChecker,
2297 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
2298 wakaba 1.1 media => $HTMLMQAttrChecker,
2299     hreflang => $HTMLLanguageTagAttrChecker,
2300     type => $HTMLIMTAttrChecker,
2301     }->{$attr_ln};
2302     if ($checker) {
2303     $attr{$attr_ln} = $attr;
2304     } else {
2305     $checker = $HTMLAttrChecker->{$attr_ln};
2306     }
2307     }
2308     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2309     || $AttrChecker->{$attr_ns}->{''};
2310     if ($checker) {
2311     $checker->($self, $attr) if ref $checker;
2312     } else {
2313     $self->{onerror}->(node => $attr, level => 'unsupported',
2314     type => 'attribute');
2315     ## ISSUE: No comformance createria for unknown attributes in the spec
2316     }
2317     }
2318    
2319     if (defined $attr{href}) {
2320 wakaba 1.4 $self->{has_hyperlink_element} = 1;
2321 wakaba 1.1 unless (defined $attr{alt}) {
2322     $self->{onerror}->(node => $todo->{node},
2323     type => 'attribute missing:alt');
2324     }
2325     } else {
2326     for (qw/target ping rel media hreflang type alt/) {
2327     if (defined $attr{$_}) {
2328     $self->{onerror}->(node => $attr{$_},
2329     type => 'attribute not allowed');
2330     }
2331     }
2332     }
2333    
2334     my $shape = 'rectangle';
2335     if (defined $attr{shape}) {
2336     $shape = {
2337     circ => 'circle', circle => 'circle',
2338     default => 'default',
2339     poly => 'polygon', polygon => 'polygon',
2340     rect => 'rectangle', rectangle => 'rectangle',
2341     }->{lc $attr{shape}->value} || 'rectangle';
2342     ## TODO: ASCII lowercase?
2343     }
2344    
2345     if ($shape eq 'circle') {
2346     if (defined $attr{coords}) {
2347     if (defined $coords) {
2348     if (@$coords == 3) {
2349     if ($coords->[2] < 0) {
2350     $self->{onerror}->(node => $attr{coords},
2351     type => 'coords:out of range:2');
2352     }
2353     } else {
2354     $self->{onerror}->(node => $attr{coords},
2355     type => 'coords:number:3:'.@$coords);
2356     }
2357     } else {
2358     ## NOTE: A syntax error has been reported.
2359     }
2360     } else {
2361     $self->{onerror}->(node => $todo->{node},
2362     type => 'attribute missing:coords');
2363     }
2364     } elsif ($shape eq 'default') {
2365     if (defined $attr{coords}) {
2366     $self->{onerror}->(node => $attr{coords},
2367     type => 'attribute not allowed');
2368     }
2369     } elsif ($shape eq 'polygon') {
2370     if (defined $attr{coords}) {
2371     if (defined $coords) {
2372     if (@$coords >= 6) {
2373     unless (@$coords % 2 == 0) {
2374     $self->{onerror}->(node => $attr{coords},
2375     type => 'coords:number:even:'.@$coords);
2376     }
2377     } else {
2378     $self->{onerror}->(node => $attr{coords},
2379     type => 'coords:number:>=6:'.@$coords);
2380     }
2381     } else {
2382     ## NOTE: A syntax error has been reported.
2383     }
2384     } else {
2385     $self->{onerror}->(node => $todo->{node},
2386     type => 'attribute missing:coords');
2387     }
2388     } elsif ($shape eq 'rectangle') {
2389     if (defined $attr{coords}) {
2390     if (defined $coords) {
2391     if (@$coords == 4) {
2392     unless ($coords->[0] < $coords->[2]) {
2393     $self->{onerror}->(node => $attr{coords},
2394     type => 'coords:out of range:0');
2395     }
2396     unless ($coords->[1] < $coords->[3]) {
2397     $self->{onerror}->(node => $attr{coords},
2398     type => 'coords:out of range:1');
2399     }
2400     } else {
2401     $self->{onerror}->(node => $attr{coords},
2402     type => 'coords:number:4:'.@$coords);
2403     }
2404     } else {
2405     ## NOTE: A syntax error has been reported.
2406     }
2407     } else {
2408     $self->{onerror}->(node => $todo->{node},
2409     type => 'attribute missing:coords');
2410     }
2411     }
2412     },
2413     checker => $HTMLEmptyChecker,
2414     };
2415     ## TODO: only in map
2416    
2417     $Element->{$HTML_NS}->{table} = {
2418     attrs_checker => $GetHTMLAttrsChecker->({}),
2419     checker => sub {
2420     my ($self, $todo) = @_;
2421     my $el = $todo->{node};
2422     my $new_todos = [];
2423     my @nodes = (@{$el->child_nodes});
2424    
2425     my $phase = 'before caption';
2426     my $has_tfoot;
2427     while (@nodes) {
2428     my $node = shift @nodes;
2429     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2430    
2431     my $nt = $node->node_type;
2432     if ($nt == 1) {
2433     my $node_ns = $node->namespace_uri;
2434     $node_ns = '' unless defined $node_ns;
2435     my $node_ln = $node->manakai_local_name;
2436     ## NOTE: |minuses| list is not checked since redundant
2437     if ($phase eq 'in tbodys') {
2438     if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2439     #$phase = 'in tbodys';
2440     } elsif (not $has_tfoot and
2441     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2442     $phase = 'after tfoot';
2443     $has_tfoot = 1;
2444     } else {
2445     $self->{onerror}->(node => $node, type => 'element not allowed');
2446     }
2447     } elsif ($phase eq 'in trs') {
2448     if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2449     #$phase = 'in trs';
2450     } elsif (not $has_tfoot and
2451     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2452     $phase = 'after tfoot';
2453     $has_tfoot = 1;
2454     } else {
2455     $self->{onerror}->(node => $node, type => 'element not allowed');
2456     }
2457     } elsif ($phase eq 'after thead') {
2458     if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2459     $phase = 'in tbodys';
2460     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2461     $phase = 'in trs';
2462     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2463     $phase = 'in tbodys';
2464     $has_tfoot = 1;
2465     } else {
2466     $self->{onerror}->(node => $node, type => 'element not allowed');
2467     }
2468     } elsif ($phase eq 'in colgroup') {
2469     if ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2470     $phase = 'in colgroup';
2471     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2472     $phase = 'after thead';
2473     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2474     $phase = 'in tbodys';
2475     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2476     $phase = 'in trs';
2477     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2478     $phase = 'in tbodys';
2479     $has_tfoot = 1;
2480     } else {
2481     $self->{onerror}->(node => $node, type => 'element not allowed');
2482     }
2483     } elsif ($phase eq 'before caption') {
2484     if ($node_ns eq $HTML_NS and $node_ln eq 'caption') {
2485     $phase = 'in colgroup';
2486     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2487     $phase = 'in colgroup';
2488     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2489     $phase = 'after thead';
2490     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2491     $phase = 'in tbodys';
2492     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2493     $phase = 'in trs';
2494     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2495     $phase = 'in tbodys';
2496     $has_tfoot = 1;
2497     } else {
2498     $self->{onerror}->(node => $node, type => 'element not allowed');
2499     }
2500     } else { # after tfoot
2501     $self->{onerror}->(node => $node, type => 'element not allowed');
2502     }
2503     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2504     unshift @nodes, @$sib;
2505     push @$new_todos, @$ch;
2506     } elsif ($nt == 3 or $nt == 4) {
2507     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2508     $self->{onerror}->(node => $node, type => 'character not allowed');
2509     }
2510     } elsif ($nt == 5) {
2511     unshift @nodes, @{$node->child_nodes};
2512     }
2513     }
2514    
2515     ## Table model errors
2516     require Whatpm::HTMLTable;
2517     Whatpm::HTMLTable->form_table ($todo->{node}, sub {
2518     my %opt = @_;
2519     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
2520     });
2521     push @{$self->{return}->{table}}, $todo->{node};
2522    
2523     return ($new_todos);
2524     },
2525     };
2526    
2527     $Element->{$HTML_NS}->{caption} = {
2528     attrs_checker => $GetHTMLAttrsChecker->({}),
2529     checker => $HTMLSignificantStrictlyInlineChecker,
2530     };
2531    
2532     $Element->{$HTML_NS}->{colgroup} = {
2533     attrs_checker => $GetHTMLAttrsChecker->({
2534     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2535     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
2536     ## TODO: "attribute not supported" if |col|.
2537     ## ISSUE: MUST NOT if any |col|?
2538     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
2539     }),
2540     checker => sub {
2541     my ($self, $todo) = @_;
2542     my $el = $todo->{node};
2543     my $new_todos = [];
2544     my @nodes = (@{$el->child_nodes});
2545    
2546     while (@nodes) {
2547     my $node = shift @nodes;
2548     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2549    
2550     my $nt = $node->node_type;
2551     if ($nt == 1) {
2552     my $node_ns = $node->namespace_uri;
2553     $node_ns = '' unless defined $node_ns;
2554     my $node_ln = $node->manakai_local_name;
2555     ## NOTE: |minuses| list is not checked since redundant
2556     unless ($node_ns eq $HTML_NS and $node_ln eq 'col') {
2557     $self->{onerror}->(node => $node, type => 'element not allowed');
2558     }
2559     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2560     unshift @nodes, @$sib;
2561     push @$new_todos, @$ch;
2562     } elsif ($nt == 3 or $nt == 4) {
2563     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2564     $self->{onerror}->(node => $node, type => 'character not allowed');
2565     }
2566     } elsif ($nt == 5) {
2567     unshift @nodes, @{$node->child_nodes};
2568     }
2569     }
2570     return ($new_todos);
2571     },
2572     };
2573    
2574     $Element->{$HTML_NS}->{col} = {
2575     attrs_checker => $GetHTMLAttrsChecker->({
2576     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2577     }),
2578     checker => $HTMLEmptyChecker,
2579     };
2580    
2581     $Element->{$HTML_NS}->{tbody} = {
2582     attrs_checker => $GetHTMLAttrsChecker->({}),
2583     checker => sub {
2584     my ($self, $todo) = @_;
2585     my $el = $todo->{node};
2586     my $new_todos = [];
2587     my @nodes = (@{$el->child_nodes});
2588    
2589     my $has_tr;
2590     while (@nodes) {
2591     my $node = shift @nodes;
2592     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2593    
2594     my $nt = $node->node_type;
2595     if ($nt == 1) {
2596     my $node_ns = $node->namespace_uri;
2597     $node_ns = '' unless defined $node_ns;
2598     my $node_ln = $node->manakai_local_name;
2599     ## NOTE: |minuses| list is not checked since redundant
2600     if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2601     $has_tr = 1;
2602     } else {
2603     $self->{onerror}->(node => $node, type => 'element not allowed');
2604     }
2605     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2606     unshift @nodes, @$sib;
2607     push @$new_todos, @$ch;
2608     } elsif ($nt == 3 or $nt == 4) {
2609     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2610     $self->{onerror}->(node => $node, type => 'character not allowed');
2611     }
2612     } elsif ($nt == 5) {
2613     unshift @nodes, @{$node->child_nodes};
2614     }
2615     }
2616     unless ($has_tr) {
2617     $self->{onerror}->(node => $el, type => 'child element missing:tr');
2618     }
2619     return ($new_todos);
2620     },
2621     };
2622    
2623     $Element->{$HTML_NS}->{thead} = {
2624     attrs_checker => $GetHTMLAttrsChecker->({}),
2625     checker => $Element->{$HTML_NS}->{tbody}->{checker},
2626     };
2627    
2628     $Element->{$HTML_NS}->{tfoot} = {
2629     attrs_checker => $GetHTMLAttrsChecker->({}),
2630     checker => $Element->{$HTML_NS}->{tbody}->{checker},
2631     };
2632    
2633     $Element->{$HTML_NS}->{tr} = {
2634     attrs_checker => $GetHTMLAttrsChecker->({}),
2635     checker => sub {
2636     my ($self, $todo) = @_;
2637     my $el = $todo->{node};
2638     my $new_todos = [];
2639     my @nodes = (@{$el->child_nodes});
2640    
2641     my $has_td;
2642     while (@nodes) {
2643     my $node = shift @nodes;
2644     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2645    
2646     my $nt = $node->node_type;
2647     if ($nt == 1) {
2648     my $node_ns = $node->namespace_uri;
2649     $node_ns = '' unless defined $node_ns;
2650     my $node_ln = $node->manakai_local_name;
2651     ## NOTE: |minuses| list is not checked since redundant
2652     if ($node_ns eq $HTML_NS and ($node_ln eq 'td' or $node_ln eq 'th')) {
2653     $has_td = 1;
2654     } else {
2655     $self->{onerror}->(node => $node, type => 'element not allowed');
2656     }
2657     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2658     unshift @nodes, @$sib;
2659     push @$new_todos, @$ch;
2660     } elsif ($nt == 3 or $nt == 4) {
2661     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2662     $self->{onerror}->(node => $node, type => 'character not allowed');
2663     }
2664     } elsif ($nt == 5) {
2665     unshift @nodes, @{$node->child_nodes};
2666     }
2667     }
2668     unless ($has_td) {
2669     $self->{onerror}->(node => $el, type => 'child element missing:td|th');
2670     }
2671     return ($new_todos);
2672     },
2673     };
2674    
2675     $Element->{$HTML_NS}->{td} = {
2676     attrs_checker => $GetHTMLAttrsChecker->({
2677     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2678     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2679     }),
2680     checker => $HTMLBlockOrInlineChecker,
2681     };
2682    
2683     $Element->{$HTML_NS}->{th} = {
2684     attrs_checker => $GetHTMLAttrsChecker->({
2685     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2686     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2687     scope => $GetHTMLEnumeratedAttrChecker
2688     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
2689     }),
2690     checker => $HTMLBlockOrInlineChecker,
2691     };
2692    
2693     ## TODO: forms
2694    
2695     $Element->{$HTML_NS}->{script} = {
2696     attrs_checker => sub {
2697     my ($self, $todo) = @_;
2698     $GetHTMLAttrsChecker->({
2699     src => $HTMLURIAttrChecker,
2700     defer => $GetHTMLBooleanAttrChecker->('defer'),
2701     async => $GetHTMLBooleanAttrChecker->('async'),
2702     type => $HTMLIMTAttrChecker,
2703     })->($self, $todo);
2704     if ($todo->{node}->has_attribute_ns (undef, 'defer')) {
2705     my $async_attr = $todo->{node}->get_attribute_node_ns (undef, 'async');
2706     if ($async_attr) {
2707     $self->{onerror}->(node => $async_attr,
2708     type => 'attribute not allowed'); # MUST NOT
2709     }
2710     }
2711     },
2712     checker => sub {
2713     my ($self, $todo) = @_;
2714    
2715     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2716     return $HTMLEmptyChecker->($self, $todo);
2717     } else {
2718     ## NOTE: No content model conformance in HTML5 spec.
2719     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
2720     my $language = $todo->{node}->get_attribute_ns (undef, 'language');
2721     if ((defined $type and $type eq '') or
2722     (defined $language and $language eq '')) {
2723     $type = 'text/javascript';
2724     } elsif (defined $type) {
2725     #
2726     } elsif (defined $language) {
2727     $type = 'text/' . $language;
2728     } else {
2729     $type = 'text/javascript';
2730     }
2731     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
2732     type => 'script:'.$type); ## TODO: $type normalization
2733     return $AnyChecker->($self, $todo);
2734     }
2735     },
2736     };
2737    
2738     ## NOTE: When script is disabled.
2739     $Element->{$HTML_NS}->{noscript} = {
2740 wakaba 1.3 attrs_checker => sub {
2741     my ($self, $todo) = @_;
2742    
2743     ## NOTE: This check is inserted in |attrs_checker|, rather than |checker|,
2744     ## since the later is not invoked when the |noscript| is used as a
2745     ## transparent element.
2746     unless ($todo->{node}->owner_document->manakai_is_html) {
2747     $self->{onerror}->(node => $todo->{node}, type => 'in XML:noscript');
2748     }
2749    
2750     $GetHTMLAttrsChecker->({})->($self, $todo);
2751     },
2752 wakaba 1.1 checker => sub {
2753     my ($self, $todo) = @_;
2754    
2755 wakaba 1.3 if ($todo->{flag}->{in_head}) {
2756     my $new_todos = [];
2757     my @nodes = (@{$todo->{node}->child_nodes});
2758    
2759     while (@nodes) {
2760     my $node = shift @nodes;
2761     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2762    
2763     my $nt = $node->node_type;
2764     if ($nt == 1) {
2765     my $node_ns = $node->namespace_uri;
2766     $node_ns = '' unless defined $node_ns;
2767     my $node_ln = $node->manakai_local_name;
2768     if ($node_ns eq $HTML_NS) {
2769     if ({link => 1, style => 1}->{$node_ln}) {
2770     #
2771     } elsif ($node_ln eq 'meta') {
2772     if ($node->has_attribute_ns (undef, 'charset')) {
2773     $self->{onerror}->(node => $node, type => 'element not allowed');
2774     } else {
2775     #
2776     }
2777     } else {
2778     $self->{onerror}->(node => $node, type => 'element not allowed');
2779     }
2780     } else {
2781     $self->{onerror}->(node => $node, type => 'element not allowed');
2782     }
2783    
2784     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2785     unshift @nodes, @$sib;
2786     push @$new_todos, @$ch;
2787     } elsif ($nt == 3 or $nt == 4) {
2788     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2789     $self->{onerror}->(node => $node, type => 'character not allowed');
2790     }
2791     } elsif ($nt == 5) {
2792     unshift @nodes, @{$node->child_nodes};
2793     }
2794     }
2795     return ($new_todos);
2796     } else {
2797     my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}});
2798     my ($sib, $ch) = $HTMLBlockOrInlineChecker->($self, $todo);
2799     push @$sib, $end;
2800     return ($sib, $ch);
2801     }
2802 wakaba 1.1 },
2803     };
2804 wakaba 1.3
2805     ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
2806 wakaba 1.1
2807     $Element->{$HTML_NS}->{'event-source'} = {
2808     attrs_checker => $GetHTMLAttrsChecker->({
2809     src => $HTMLURIAttrChecker,
2810     }),
2811     checker => $HTMLEmptyChecker,
2812     };
2813    
2814     $Element->{$HTML_NS}->{details} = {
2815     attrs_checker => $GetHTMLAttrsChecker->({
2816     open => $GetHTMLBooleanAttrChecker->('open'),
2817     }),
2818     checker => sub {
2819     my ($self, $todo) = @_;
2820    
2821     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2822     my ($sib, $ch)
2823     = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
2824     ->($self, $todo);
2825     push @$sib, $end;
2826     return ($sib, $ch);
2827     },
2828     };
2829    
2830     $Element->{$HTML_NS}->{datagrid} = {
2831     attrs_checker => $GetHTMLAttrsChecker->({
2832     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2833     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
2834     }),
2835     checker => sub {
2836     my ($self, $todo) = @_;
2837     my $el = $todo->{node};
2838     my $new_todos = [];
2839     my @nodes = (@{$el->child_nodes});
2840    
2841     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2842    
2843     ## Block-table Block* | table | select | datalist | Empty
2844     my $mode = 'any';
2845     while (@nodes) {
2846     my $node = shift @nodes;
2847     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2848    
2849     my $nt = $node->node_type;
2850     if ($nt == 1) {
2851     my $node_ns = $node->namespace_uri;
2852     $node_ns = '' unless defined $node_ns;
2853     my $node_ln = $node->manakai_local_name;
2854     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
2855     if ($mode eq 'block') {
2856     $not_allowed = 1
2857     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
2858     } elsif ($mode eq 'any') {
2859     if ($node_ns eq $HTML_NS and
2860     {table => 1, select => 1, datalist => 1}->{$node_ln}) {
2861     $mode = 'none';
2862     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
2863     $mode = 'block';
2864     } else {
2865     $not_allowed = 1;
2866     }
2867     } else {
2868     $not_allowed = 1;
2869     }
2870     $self->{onerror}->(node => $node, type => 'element not allowed')
2871     if $not_allowed;
2872     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2873     unshift @nodes, @$sib;
2874     push @$new_todos, @$ch;
2875     } elsif ($nt == 3 or $nt == 4) {
2876     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2877     $self->{onerror}->(node => $node, type => 'character not allowed');
2878     }
2879     } elsif ($nt == 5) {
2880     unshift @nodes, @{$node->child_nodes};
2881     }
2882     }
2883    
2884     push @$new_todos, $end;
2885     return ($new_todos);
2886     },
2887     };
2888    
2889     $Element->{$HTML_NS}->{command} = {
2890     attrs_checker => $GetHTMLAttrsChecker->({
2891     checked => $GetHTMLBooleanAttrChecker->('checked'),
2892     default => $GetHTMLBooleanAttrChecker->('default'),
2893     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2894     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
2895     icon => $HTMLURIAttrChecker,
2896     label => sub { }, ## NOTE: No conformance creteria
2897     radiogroup => sub { }, ## NOTE: No conformance creteria
2898     ## NOTE: |title| has special semantics, but no syntactical difference
2899     type => sub {
2900     my ($self, $attr) = @_;
2901     my $value = $attr->value;
2902     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
2903     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
2904     }
2905     },
2906     }),
2907     checker => $HTMLEmptyChecker,
2908     };
2909    
2910     $Element->{$HTML_NS}->{menu} = {
2911     attrs_checker => $GetHTMLAttrsChecker->({
2912     autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
2913     id => sub {
2914     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
2915     my ($self, $attr) = @_;
2916     my $value = $attr->value;
2917     if (length $value > 0) {
2918     if ($self->{id}->{$value}) {
2919     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2920     push @{$self->{id}->{$value}}, $attr;
2921     } else {
2922     $self->{id}->{$value} = [$attr];
2923     }
2924     } else {
2925     ## NOTE: MUST contain at least one character
2926     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2927     }
2928     if ($value =~ /[\x09-\x0D\x20]/) {
2929     $self->{onerror}->(node => $attr, type => 'space in ID');
2930     }
2931     $self->{menu}->{$value} ||= $attr;
2932     ## ISSUE: <menu id=""><p contextmenu=""> match?
2933     },
2934     label => sub { }, ## NOTE: No conformance creteria
2935     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
2936     }),
2937     checker => sub {
2938     my ($self, $todo) = @_;
2939     my $el = $todo->{node};
2940     my $new_todos = [];
2941     my @nodes = (@{$el->child_nodes});
2942    
2943     my $content = 'li or inline';
2944     while (@nodes) {
2945     my $node = shift @nodes;
2946     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2947    
2948     my $nt = $node->node_type;
2949     if ($nt == 1) {
2950     my $node_ns = $node->namespace_uri;
2951     $node_ns = '' unless defined $node_ns;
2952     my $node_ln = $node->manakai_local_name;
2953     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
2954     if ($node_ns eq $HTML_NS and $node_ln eq 'li') {
2955     if ($content eq 'inline') {
2956     $not_allowed = 1;
2957     } elsif ($content eq 'li or inline') {
2958     $content = 'li';
2959     }
2960     } else {
2961     if ($HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
2962     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln}) {
2963     $content = 'inline';
2964     } else {
2965     $not_allowed = 1;
2966     }
2967     }
2968     $self->{onerror}->(node => $node, type => 'element not allowed')
2969     if $not_allowed;
2970     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2971     unshift @nodes, @$sib;
2972     push @$new_todos, @$ch;
2973     } elsif ($nt == 3 or $nt == 4) {
2974     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2975     if ($content eq 'li') {
2976     $self->{onerror}->(node => $node, type => 'character not allowed');
2977     } elsif ($content eq 'li or inline') {
2978     $content = 'inline';
2979     }
2980     }
2981     } elsif ($nt == 5) {
2982     unshift @nodes, @{$node->child_nodes};
2983     }
2984     }
2985    
2986     for (@$new_todos) {
2987     $_->{inline} = 1;
2988     }
2989     return ($new_todos);
2990     },
2991     };
2992    
2993     $Element->{$HTML_NS}->{legend} = {
2994     attrs_checker => $GetHTMLAttrsChecker->({}),
2995     checker => sub {
2996     my ($self, $todo) = @_;
2997    
2998     my $parent = $todo->{node}->manakai_parent_element;
2999     if (defined $parent) {
3000     my $nsuri = $parent->namespace_uri;
3001     $nsuri = '' unless defined $nsuri;
3002     my $ln = $parent->manakai_local_name;
3003     if ($nsuri eq $HTML_NS and $ln eq 'figure') {
3004     return $HTMLInlineChecker->($self, $todo);
3005     } else {
3006     return $HTMLSignificantStrictlyInlineChecker->($self, $todo);
3007     }
3008     } else {
3009     return $HTMLInlineChecker->($self, $todo);
3010     }
3011    
3012     ## ISSUE: Content model is defined only for fieldset/legend,
3013     ## details/legend, and figure/legend.
3014     },
3015     };
3016    
3017     $Element->{$HTML_NS}->{div} = {
3018     attrs_checker => $GetHTMLAttrsChecker->({}),
3019     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
3020     };
3021    
3022     $Element->{$HTML_NS}->{font} = {
3023     attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
3024     checker => $HTMLTransparentChecker,
3025     };
3026    
3027     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
3028    
3029     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24