/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.7 - (hide annotations) (download)
Mon Sep 24 04:23:45 2007 UTC (17 years, 9 months ago) by wakaba
Branch: MAIN
Changes since 1.6: +2 -0 lines
++ whatpm/t/ChangeLog	24 Sep 2007 04:23:24 -0000
2007-09-24  Wakaba  <wakaba@suika.fam.cx>

	* ContentChecker.t: New test data files are added.

	* content-model-atom-1.dat, content-model-atom-2.dat: New test data.

++ whatpm/Whatpm/ChangeLog	24 Sep 2007 04:21:59 -0000
2007-09-24  Wakaba  <wakaba@suika.fam.cx>

	* ContentChecker.pm: Set level values for later uses.

++ whatpm/Whatpm/ContentChecker/ChangeLog	24 Sep 2007 04:22:38 -0000
2007-09-24  Wakaba  <wakaba@suika.fam.cx>

	* Atom.pm (hreflang): Checker is implemented.
	(AtomDateConstruct): Checking for content is implemented.

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7     my $HTMLMetadataElements = {
8     $HTML_NS => {
9     qw/link 1 meta 1 style 1 script 1 event-source 1 command 1 base 1 title 1
10     noscript 1
11     /,
12     },
13     };
14    
15     my $HTMLSectioningElements = {
16     $HTML_NS => {qw/body 1 section 1 nav 1 article 1 blockquote 1 aside 1/},
17     };
18    
19     my $HTMLBlockLevelElements = {
20     $HTML_NS => {
21     qw/
22     section 1 nav 1 article 1 blockquote 1 aside 1
23     h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1
24     address 1 p 1 hr 1 dialog 1 pre 1 ol 1 ul 1 dl 1
25     ins 1 del 1 figure 1 map 1 table 1 script 1 noscript 1
26     event-source 1 details 1 datagrid 1 menu 1 div 1 font 1
27     /,
28     },
29     };
30    
31     my $HTMLStrictlyInlineLevelElements = {
32     $HTML_NS => {
33     qw/
34     br 1 a 1 q 1 cite 1 em 1 strong 1 small 1 m 1 dfn 1 abbr 1
35     time 1 meter 1 progress 1 code 1 var 1 samp 1 kbd 1
36     sub 1 sup 1 span 1 i 1 b 1 bdo 1 ins 1 del 1 img 1
37     iframe 1 embed 1 object 1 video 1 audio 1 canvas 1 area 1
38     script 1 noscript 1 event-source 1 command 1 font 1
39     /,
40     },
41     };
42    
43     my $HTMLStructuredInlineLevelElements = {
44     $HTML_NS => {qw/blockquote 1 pre 1 ol 1 ul 1 dl 1 table 1 menu 1/},
45     };
46    
47     my $HTMLInteractiveElements = {
48     $HTML_NS => {a => 1, details => 1, datagrid => 1},
49     };
50     ## NOTE: |html:a| and |html:datagrid| are not allowed as a descendant
51     ## of interactive elements
52    
53     # my $HTMLTransparentElements : in |Whatpm/ContentChecker.pm|.
54    
55     #my $HTMLSemiTransparentElements = {
56     # $HTML_NS => {qw/video 1 audio 1/},
57     #};
58    
59     my $HTMLEmbededElements = {
60     $HTML_NS => {qw/img 1 iframe 1 embed 1 object 1 video 1 audio 1 canvas 1/},
61     };
62    
63     ## Empty
64     my $HTMLEmptyChecker = sub {
65     my ($self, $todo) = @_;
66     my $el = $todo->{node};
67     my $new_todos = [];
68     my @nodes = (@{$el->child_nodes});
69    
70     while (@nodes) {
71     my $node = shift @nodes;
72     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
73    
74     my $nt = $node->node_type;
75     if ($nt == 1) {
76     ## NOTE: |minuses| list is not checked since redundant
77     $self->{onerror}->(node => $node, type => 'element not allowed');
78     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
79     unshift @nodes, @$sib;
80     push @$new_todos, @$ch;
81     } elsif ($nt == 3 or $nt == 4) {
82     if ($node->data =~ /[^\x09-\x0D\x20]/) {
83     $self->{onerror}->(node => $node, type => 'character not allowed');
84     }
85     } elsif ($nt == 5) {
86     unshift @nodes, @{$node->child_nodes};
87     }
88     }
89     return ($new_todos);
90     };
91    
92     ## Text
93     my $HTMLTextChecker = sub {
94     my ($self, $todo) = @_;
95     my $el = $todo->{node};
96     my $new_todos = [];
97     my @nodes = (@{$el->child_nodes});
98    
99     while (@nodes) {
100     my $node = shift @nodes;
101     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
102    
103     my $nt = $node->node_type;
104     if ($nt == 1) {
105     ## NOTE: |minuses| list is not checked since redundant
106     $self->{onerror}->(node => $node, type => 'element not allowed');
107     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
108     unshift @nodes, @$sib;
109     push @$new_todos, @$ch;
110     } elsif ($nt == 5) {
111     unshift @nodes, @{$node->child_nodes};
112     }
113     }
114     return ($new_todos);
115     };
116    
117     ## Zero or more |html:style| elements,
118     ## followed by zero or more block-level elements
119     my $HTMLStylableBlockChecker = sub {
120     my ($self, $todo) = @_;
121     my $el = $todo->{node};
122     my $new_todos = [];
123     my @nodes = (@{$el->child_nodes});
124    
125     my $has_non_style;
126     while (@nodes) {
127     my $node = shift @nodes;
128     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
129    
130     my $nt = $node->node_type;
131     if ($nt == 1) {
132     my $node_ns = $node->namespace_uri;
133     $node_ns = '' unless defined $node_ns;
134     my $node_ln = $node->manakai_local_name;
135     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
136     if ($node_ns eq $HTML_NS and $node_ln eq 'style') {
137     $not_allowed = 1 if $has_non_style or
138     not $node->has_attribute_ns (undef, 'scoped');
139     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
140     $has_non_style = 1;
141     } else {
142     $has_non_style = 1;
143     $not_allowed = 1;
144     }
145     $self->{onerror}->(node => $node, type => 'element not allowed')
146     if $not_allowed;
147     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
148     unshift @nodes, @$sib;
149     push @$new_todos, @$ch;
150     } elsif ($nt == 3 or $nt == 4) {
151     if ($node->data =~ /[^\x09-\x0D\x20]/) {
152     $self->{onerror}->(node => $node, type => 'character not allowed');
153     }
154     } elsif ($nt == 5) {
155     unshift @nodes, @{$node->child_nodes};
156     }
157     }
158     return ($new_todos);
159     }; # $HTMLStylableBlockChecker
160    
161     ## Zero or more block-level elements
162     my $HTMLBlockChecker = sub {
163     my ($self, $todo) = @_;
164     my $el = $todo->{node};
165     my $new_todos = [];
166     my @nodes = (@{$el->child_nodes});
167    
168     while (@nodes) {
169     my $node = shift @nodes;
170     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
171    
172     my $nt = $node->node_type;
173     if ($nt == 1) {
174     my $node_ns = $node->namespace_uri;
175     $node_ns = '' unless defined $node_ns;
176     my $node_ln = $node->manakai_local_name;
177     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
178     $not_allowed = 1
179     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
180     $self->{onerror}->(node => $node, type => 'element not allowed')
181     if $not_allowed;
182     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
183     unshift @nodes, @$sib;
184     push @$new_todos, @$ch;
185     } elsif ($nt == 3 or $nt == 4) {
186     if ($node->data =~ /[^\x09-\x0D\x20]/) {
187     $self->{onerror}->(node => $node, type => 'character not allowed');
188     }
189     } elsif ($nt == 5) {
190     unshift @nodes, @{$node->child_nodes};
191     }
192     }
193     return ($new_todos);
194     }; # $HTMLBlockChecker
195    
196     ## Inline-level content
197     my $HTMLInlineChecker = sub {
198     my ($self, $todo) = @_;
199     my $el = $todo->{node};
200     my $new_todos = [];
201     my @nodes = (@{$el->child_nodes});
202    
203     while (@nodes) {
204     my $node = shift @nodes;
205     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
206    
207     my $nt = $node->node_type;
208     if ($nt == 1) {
209     my $node_ns = $node->namespace_uri;
210     $node_ns = '' unless defined $node_ns;
211     my $node_ln = $node->manakai_local_name;
212     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
213     $not_allowed = 1
214     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
215     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
216     $self->{onerror}->(node => $node, type => 'element not allowed')
217     if $not_allowed;
218     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
219     unshift @nodes, @$sib;
220     push @$new_todos, @$ch;
221     } elsif ($nt == 5) {
222     unshift @nodes, @{$node->child_nodes};
223     }
224     }
225    
226     for (@$new_todos) {
227     $_->{inline} = 1;
228     }
229     return ($new_todos);
230     }; # $HTMLInlineChecker
231    
232     my $HTMLSignificantInlineChecker = $HTMLInlineChecker;
233     ## TODO: check significant content
234    
235     ## Strictly inline-level content
236     my $HTMLStrictlyInlineChecker = sub {
237     my ($self, $todo) = @_;
238     my $el = $todo->{node};
239     my $new_todos = [];
240     my @nodes = (@{$el->child_nodes});
241    
242     while (@nodes) {
243     my $node = shift @nodes;
244     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
245    
246     my $nt = $node->node_type;
247     if ($nt == 1) {
248     my $node_ns = $node->namespace_uri;
249     $node_ns = '' unless defined $node_ns;
250     my $node_ln = $node->manakai_local_name;
251     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
252     $not_allowed = 1
253     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln};
254     $self->{onerror}->(node => $node, type => 'element not allowed')
255     if $not_allowed;
256     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
257     unshift @nodes, @$sib;
258     push @$new_todos, @$ch;
259     } elsif ($nt == 5) {
260     unshift @nodes, @{$node->child_nodes};
261     }
262     }
263    
264     for (@$new_todos) {
265     $_->{inline} = 1;
266     $_->{strictly_inline} = 1;
267     }
268     return ($new_todos);
269     }; # $HTMLStrictlyInlineChecker
270    
271     my $HTMLSignificantStrictlyInlineChecker = $HTMLStrictlyInlineChecker;
272     ## TODO: check significant content
273    
274     ## Inline-level or strictly inline-kevek content
275     my $HTMLInlineOrStrictlyInlineChecker = sub {
276     my ($self, $todo) = @_;
277     my $el = $todo->{node};
278     my $new_todos = [];
279     my @nodes = (@{$el->child_nodes});
280    
281     while (@nodes) {
282     my $node = shift @nodes;
283     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
284    
285     my $nt = $node->node_type;
286     if ($nt == 1) {
287     my $node_ns = $node->namespace_uri;
288     $node_ns = '' unless defined $node_ns;
289     my $node_ln = $node->manakai_local_name;
290     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
291     if ($todo->{strictly_inline}) {
292     $not_allowed = 1
293     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln};
294     } else {
295     $not_allowed = 1
296     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
297     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
298     }
299     $self->{onerror}->(node => $node, type => 'element not allowed')
300     if $not_allowed;
301     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
302     unshift @nodes, @$sib;
303     push @$new_todos, @$ch;
304     } elsif ($nt == 5) {
305     unshift @nodes, @{$node->child_nodes};
306     }
307     }
308    
309     for (@$new_todos) {
310     $_->{inline} = 1;
311     $_->{strictly_inline} = 1;
312     }
313     return ($new_todos);
314     }; # $HTMLInlineOrStrictlyInlineChecker
315    
316     my $HTMLSignificantInlineOrStrictlyInlineChecker
317     = $HTMLInlineOrStrictlyInlineChecker;
318     ## TODO: check significant content
319    
320     my $HTMLBlockOrInlineChecker = sub {
321     my ($self, $todo) = @_;
322     my $el = $todo->{node};
323     my $new_todos = [];
324     my @nodes = (@{$el->child_nodes});
325    
326     my $content = 'block-or-inline'; # or 'block' or 'inline'
327     my @block_not_inline;
328     while (@nodes) {
329     my $node = shift @nodes;
330     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
331    
332     my $nt = $node->node_type;
333     if ($nt == 1) {
334     my $node_ns = $node->namespace_uri;
335     $node_ns = '' unless defined $node_ns;
336     my $node_ln = $node->manakai_local_name;
337     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
338     if ($content eq 'block') {
339     $not_allowed = 1
340     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
341     } elsif ($content eq 'inline') {
342     $not_allowed = 1
343     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
344     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
345     } else {
346     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
347     my $is_inline
348     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
349     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
350    
351     push @block_not_inline, $node
352     if $is_block and not $is_inline and not $not_allowed;
353     unless ($is_block) {
354     $content = 'inline';
355     for (@block_not_inline) {
356     $self->{onerror}->(node => $_, type => 'element not allowed');
357     }
358     $not_allowed = 1 unless $is_inline;
359     }
360     }
361     $self->{onerror}->(node => $node, type => 'element not allowed')
362     if $not_allowed;
363     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
364     unshift @nodes, @$sib;
365     push @$new_todos, @$ch;
366     } elsif ($nt == 3 or $nt == 4) {
367     if ($node->data =~ /[^\x09-\x0D\x20]/) {
368     if ($content eq 'block') {
369     $self->{onerror}->(node => $node, type => 'character not allowed');
370     } else {
371     $content = 'inline';
372     for (@block_not_inline) {
373     $self->{onerror}->(node => $_, type => 'element not allowed');
374     }
375     }
376     }
377     } elsif ($nt == 5) {
378     unshift @nodes, @{$node->child_nodes};
379     }
380     }
381    
382     if ($content eq 'inline') {
383     for (@$new_todos) {
384     $_->{inline} = 1;
385     }
386     }
387     return ($new_todos);
388     };
389    
390     ## Zero or more XXX element, then either block-level or inline-level
391     my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) {
392     my ($elnsuri, $ellname) = @_;
393     return sub {
394     my ($self, $todo) = @_;
395     my $el = $todo->{node};
396     my $new_todos = [];
397     my @nodes = (@{$el->child_nodes});
398    
399     my $has_non_style;
400     my $content = 'block-or-inline'; # or 'block' or 'inline'
401     my @block_not_inline;
402     while (@nodes) {
403     my $node = shift @nodes;
404     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
405    
406     my $nt = $node->node_type;
407     if ($nt == 1) {
408     my $node_ns = $node->namespace_uri;
409     $node_ns = '' unless defined $node_ns;
410     my $node_ln = $node->manakai_local_name;
411     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
412     if ($node_ns eq $elnsuri and $node_ln eq $ellname) {
413     $not_allowed = 1 if $has_non_style;
414     if ($ellname eq 'style' and
415     not $node->has_attribute_ns (undef, 'scoped')) {
416     $not_allowed = 1;
417     }
418     } elsif ($content eq 'block') {
419     $has_non_style = 1;
420     $not_allowed = 1
421     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
422     } elsif ($content eq 'inline') {
423     $has_non_style = 1;
424     $not_allowed = 1
425     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
426     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
427     } else {
428     $has_non_style = 1;
429     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
430     my $is_inline
431     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
432     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
433    
434     push @block_not_inline, $node
435     if $is_block and not $is_inline and not $not_allowed;
436     unless ($is_block) {
437     $content = 'inline';
438     for (@block_not_inline) {
439     $self->{onerror}->(node => $_, type => 'element not allowed');
440     }
441     $not_allowed = 1 unless $is_inline;
442     }
443     }
444     $self->{onerror}->(node => $node, type => 'element not allowed')
445     if $not_allowed;
446     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
447     unshift @nodes, @$sib;
448     push @$new_todos, @$ch;
449     } elsif ($nt == 3 or $nt == 4) {
450     if ($node->data =~ /[^\x09-\x0D\x20]/) {
451     $has_non_style = 1;
452     if ($content eq 'block') {
453     $self->{onerror}->(node => $node, type => 'character not allowed');
454     } else {
455     $content = 'inline';
456     for (@block_not_inline) {
457     $self->{onerror}->(node => $_, type => 'element not allowed');
458     }
459     }
460     }
461     } elsif ($nt == 5) {
462     unshift @nodes, @{$node->child_nodes};
463     }
464     }
465    
466     if ($content eq 'inline') {
467     for (@$new_todos) {
468     $_->{inline} = 1;
469     }
470     }
471     return ($new_todos);
472     };
473     }; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker
474    
475     my $HTMLTransparentChecker = $HTMLBlockOrInlineChecker;
476    
477     our $AttrChecker;
478    
479     my $GetHTMLEnumeratedAttrChecker = sub {
480     my $states = shift; # {value => conforming ? 1 : -1}
481     return sub {
482     my ($self, $attr) = @_;
483     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
484     if ($states->{$value} > 0) {
485     #
486     } elsif ($states->{$value}) {
487     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
488     } else {
489     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
490     }
491     };
492     }; # $GetHTMLEnumeratedAttrChecker
493    
494     my $GetHTMLBooleanAttrChecker = sub {
495     my $local_name = shift;
496     return sub {
497     my ($self, $attr) = @_;
498     my $value = $attr->value;
499     unless ($value eq $local_name or $value eq '') {
500     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
501     }
502     };
503     }; # $GetHTMLBooleanAttrChecker
504    
505     ## |rel| attribute (unordered set of space separated tokens,
506     ## whose allowed values are defined by the section on link types)
507     my $HTMLLinkTypesAttrChecker = sub {
508 wakaba 1.4 my ($a_or_area, $todo, $self, $attr) = @_;
509 wakaba 1.1 my %word;
510     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
511     unless ($word{$word}) {
512     $word{$word} = 1;
513     } else {
514     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
515     }
516     }
517     ## NOTE: Case sensitive match (since HTML5 spec does not say link
518     ## types are case-insensitive and it says "The value should not
519     ## be confusingly similar to any other defined value (e.g.
520     ## differing only in case).").
521     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
522     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
523     ## values to be used conformingly.
524     require Whatpm::_LinkTypeList;
525     our $LinkType;
526     for my $word (keys %word) {
527     my $def = $LinkType->{$word};
528     if (defined $def) {
529     if ($def->{status} eq 'accepted') {
530     if (defined $def->{effect}->[$a_or_area]) {
531     #
532     } else {
533     $self->{onerror}->(node => $attr,
534     type => 'link type:bad context:'.$word);
535     }
536     } elsif ($def->{status} eq 'proposal') {
537     $self->{onerror}->(node => $attr, level => 's',
538     type => 'link type:proposed:'.$word);
539     } else { # rejected or synonym
540     $self->{onerror}->(node => $attr,
541     type => 'link type:non-conforming:'.$word);
542     }
543 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
544     if ($word eq 'alternate') {
545     #
546     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
547     $todo->{has_hyperlink_link_type} = 1;
548     }
549     }
550 wakaba 1.1 if ($def->{unique}) {
551     unless ($self->{has_link_type}->{$word}) {
552     $self->{has_link_type}->{$word} = 1;
553     } else {
554     $self->{onerror}->(node => $attr,
555     type => 'link type:duplicate:'.$word);
556     }
557     }
558     } else {
559     $self->{onerror}->(node => $attr, level => 'unsupported',
560     type => 'link type:'.$word);
561     }
562     }
563 wakaba 1.4 $todo->{has_hyperlink_link_type} = 1
564     if $word{alternate} and not $word{stylesheet};
565 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
566     ## says that using both X-Pingback: header field and HTML
567     ## <link rel=pingback> is deprecated and if both appears they
568     ## SHOULD contain exactly the same value.
569     ## ISSUE: Pingback 1.0 specification defines the exact representation
570     ## of its link element, which cannot be tested by the current arch.
571     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
572     ## include any string that matches to the pattern for the rel=pingback link,
573     ## which again inpossible to test.
574     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
575     }; # $HTMLLinkTypesAttrChecker
576    
577     ## URI (or IRI)
578     my $HTMLURIAttrChecker = sub {
579     my ($self, $attr) = @_;
580     ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
581     my $value = $attr->value;
582     Whatpm::URIChecker->check_iri_reference ($value, sub {
583     my %opt = @_;
584     $self->{onerror}->(node => $attr, level => $opt{level},
585     type => 'URI::'.$opt{type}.
586     (defined $opt{position} ? ':'.$opt{position} : ''));
587     });
588 wakaba 1.4 $self->{has_uri_attr} = 1;
589 wakaba 1.1 }; # $HTMLURIAttrChecker
590    
591     ## A space separated list of one or more URIs (or IRIs)
592     my $HTMLSpaceURIsAttrChecker = sub {
593     my ($self, $attr) = @_;
594     my $i = 0;
595     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
596     Whatpm::URIChecker->check_iri_reference ($value, sub {
597     my %opt = @_;
598     $self->{onerror}->(node => $attr, level => $opt{level},
599 wakaba 1.2 type => 'URIs:'.':'.
600     $opt{type}.':'.$i.
601 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
602     });
603     $i++;
604     }
605     ## ISSUE: Relative references?
606     ## ISSUE: Leading or trailing white spaces are conformant?
607     ## ISSUE: A sequence of white space characters are conformant?
608     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
609     ## NOTE: Duplication seems not an error.
610 wakaba 1.4 $self->{has_uri_attr} = 1;
611 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
612    
613     my $HTMLDatetimeAttrChecker = sub {
614     my ($self, $attr) = @_;
615     my $value = $attr->value;
616     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
617     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
618     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
619     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
620     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
621     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
622     if $d < 1 or
623     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
624     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
625     if $M == 2 and $d == 29 and
626     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
627     } else {
628     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
629     }
630     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
631     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
632     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
633     if defined $s and $s > 59;
634     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
635     if $zh > 23;
636     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
637     if $zm > 59;
638     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
639     } else {
640     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
641     }
642     }; # $HTMLDatetimeAttrChecker
643    
644     my $HTMLIntegerAttrChecker = sub {
645     my ($self, $attr) = @_;
646     my $value = $attr->value;
647     unless ($value =~ /\A-?[0-9]+\z/) {
648     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
649     }
650     }; # $HTMLIntegerAttrChecker
651    
652     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
653     my $range_check = shift;
654     return sub {
655     my ($self, $attr) = @_;
656     my $value = $attr->value;
657     if ($value =~ /\A[0-9]+\z/) {
658     unless ($range_check->($value + 0)) {
659     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
660     }
661     } else {
662     $self->{onerror}->(node => $attr,
663     type => 'nninteger:syntax error');
664     }
665     };
666     }; # $GetHTMLNonNegativeIntegerAttrChecker
667    
668     my $GetHTMLFloatingPointNumberAttrChecker = sub {
669     my $range_check = shift;
670     return sub {
671     my ($self, $attr) = @_;
672     my $value = $attr->value;
673     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
674     unless ($range_check->($value + 0)) {
675     $self->{onerror}->(node => $attr, type => 'float:out of range');
676     }
677     } else {
678     $self->{onerror}->(node => $attr,
679     type => 'float:syntax error');
680     }
681     };
682     }; # $GetHTMLFloatingPointNumberAttrChecker
683    
684     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
685     ## ISSUE: RFC 2046 does not define syntax of media types.
686     ## ISSUE: The definition of "a valid MIME type" is unknown.
687     ## Syntactical correctness?
688     my $HTMLIMTAttrChecker = sub {
689     my ($self, $attr) = @_;
690     my $value = $attr->value;
691     ## ISSUE: RFC 2045 Content-Type header field allows insertion
692     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
693     ## ISSUE: RFC 2231 extension? Maybe no.
694     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
695     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
696     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
697     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
698     my @type = ($1, $2);
699     my $param = $3;
700     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
701     if (defined $2) {
702     push @type, $1 => $2;
703     } else {
704     my $n = $1;
705     my $v = $2;
706     $v =~ s/\\(.)/$1/gs;
707     push @type, $n => $v;
708     }
709     }
710     require Whatpm::IMTChecker;
711     Whatpm::IMTChecker->check_imt (sub {
712     my %opt = @_;
713     $self->{onerror}->(node => $attr, level => $opt{level},
714     type => 'IMT:'.$opt{type});
715     }, @type);
716     } else {
717     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
718     }
719     }; # $HTMLIMTAttrChecker
720    
721     my $HTMLLanguageTagAttrChecker = sub {
722 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
723    
724 wakaba 1.1 my ($self, $attr) = @_;
725 wakaba 1.6 my $value = $attr->value;
726     require Whatpm::LangTag;
727     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
728     my %opt = @_;
729     my $type = 'LangTag:'.$opt{type};
730     $type .= ':' . $opt{subtag} if defined $opt{subtag};
731     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
732     level => $opt{level});
733     });
734 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
735 wakaba 1.6
736     ## TODO: testdata
737 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
738    
739     ## "A valid media query [MQ]"
740     my $HTMLMQAttrChecker = sub {
741     my ($self, $attr) = @_;
742     $self->{onerror}->(node => $attr, level => 'unsupported',
743     type => 'media query');
744     ## ISSUE: What is "a valid media query"?
745     }; # $HTMLMQAttrChecker
746    
747     my $HTMLEventHandlerAttrChecker = sub {
748     my ($self, $attr) = @_;
749     $self->{onerror}->(node => $attr, level => 'unsupported',
750     type => 'event handler');
751     ## TODO: MUST contain valid ECMAScript code matching the
752     ## ECMAScript |FunctionBody| production. [ECMA262]
753     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
754     ## ISSUE: Automatic semicolon insertion does not apply?
755     ## ISSUE: Other script languages?
756     }; # $HTMLEventHandlerAttrChecker
757    
758     my $HTMLUsemapAttrChecker = sub {
759     my ($self, $attr) = @_;
760     ## MUST be a valid hashed ID reference to a |map| element
761     my $value = $attr->value;
762     if ($value =~ s/^#//) {
763     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
764     push @{$self->{usemap}}, [$value => $attr];
765     } else {
766     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
767     }
768     ## NOTE: Space characters in hashed ID references are conforming.
769     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
770     }; # $HTMLUsemapAttrChecker
771    
772     my $HTMLTargetAttrChecker = sub {
773     my ($self, $attr) = @_;
774     my $value = $attr->value;
775     if ($value =~ /^_/) {
776     $value = lc $value; ## ISSUE: ASCII case-insentitive?
777     unless ({
778     _self => 1, _parent => 1, _top => 1,
779     }->{$value}) {
780     $self->{onerror}->(node => $attr,
781     type => 'reserved browsing context name');
782     }
783     } else {
784     #$ ISSUE: An empty string is conforming?
785     }
786     }; # $HTMLTargetAttrChecker
787    
788     my $HTMLAttrChecker = {
789     id => sub {
790     ## NOTE: |map| has its own variant of |id=""| checker
791     my ($self, $attr) = @_;
792     my $value = $attr->value;
793     if (length $value > 0) {
794     if ($self->{id}->{$value}) {
795     $self->{onerror}->(node => $attr, type => 'duplicate ID');
796     push @{$self->{id}->{$value}}, $attr;
797     } else {
798     $self->{id}->{$value} = [$attr];
799     }
800     if ($value =~ /[\x09-\x0D\x20]/) {
801     $self->{onerror}->(node => $attr, type => 'space in ID');
802     }
803     } else {
804     ## NOTE: MUST contain at least one character
805     $self->{onerror}->(node => $attr, type => 'empty attribute value');
806     }
807     },
808     title => sub {}, ## NOTE: No conformance creteria
809     lang => sub {
810     my ($self, $attr) = @_;
811 wakaba 1.6 my $value = $attr->value;
812     if ($value eq '') {
813     #
814     } else {
815     require Whatpm::LangTag;
816     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
817     my %opt = @_;
818     my $type = 'LangTag:'.$opt{type};
819     $type .= ':' . $opt{subtag} if defined $opt{subtag};
820     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
821     level => $opt{level});
822     });
823     }
824 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
825     unless ($attr->owner_document->manakai_is_html) {
826     $self->{onerror}->(node => $attr, type => 'in XML:lang');
827     }
828 wakaba 1.6
829     ## TODO: test data
830 wakaba 1.1 },
831     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
832     class => sub {
833     my ($self, $attr) = @_;
834     my %word;
835     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
836     unless ($word{$word}) {
837     $word{$word} = 1;
838     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
839     } else {
840     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
841     }
842     }
843     },
844     contextmenu => sub {
845     my ($self, $attr) = @_;
846     my $value = $attr->value;
847     push @{$self->{contextmenu}}, [$value => $attr];
848     ## ISSUE: "The value must be the ID of a menu element in the DOM."
849     ## What is "in the DOM"? A menu Element node that is not part
850     ## of the Document tree is in the DOM? A menu Element node that
851     ## belong to another Document tree is in the DOM?
852     },
853     irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
854     tabindex => $HTMLIntegerAttrChecker,
855     };
856    
857     for (qw/
858     onabort onbeforeunload onblur onchange onclick oncontextmenu
859     ondblclick ondrag ondragend ondragenter ondragleave ondragover
860     ondragstart ondrop onerror onfocus onkeydown onkeypress
861     onkeyup onload onmessage onmousedown onmousemove onmouseout
862     onmouseover onmouseup onmousewheel onresize onscroll onselect
863     onsubmit onunload
864     /) {
865     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
866     }
867    
868     my $GetHTMLAttrsChecker = sub {
869     my $element_specific_checker = shift;
870     return sub {
871     my ($self, $todo) = @_;
872     for my $attr (@{$todo->{node}->attributes}) {
873     my $attr_ns = $attr->namespace_uri;
874     $attr_ns = '' unless defined $attr_ns;
875     my $attr_ln = $attr->manakai_local_name;
876     my $checker;
877     if ($attr_ns eq '') {
878     $checker = $element_specific_checker->{$attr_ln}
879     || $HTMLAttrChecker->{$attr_ln};
880     }
881     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
882     || $AttrChecker->{$attr_ns}->{''};
883     if ($checker) {
884     $checker->($self, $attr, $todo);
885     } else {
886     $self->{onerror}->(node => $attr, level => 'unsupported',
887     type => 'attribute');
888     ## ISSUE: No comformance createria for unknown attributes in the spec
889     }
890     }
891     };
892     }; # $GetHTMLAttrsChecker
893    
894     our $Element;
895     our $ElementDefault;
896     our $AnyChecker;
897    
898     $Element->{$HTML_NS}->{''} = {
899     attrs_checker => $GetHTMLAttrsChecker->({}),
900     checker => $ElementDefault->{checker},
901     };
902    
903     $Element->{$HTML_NS}->{html} = {
904     is_root => 1,
905     attrs_checker => $GetHTMLAttrsChecker->({
906     xmlns => sub {
907     my ($self, $attr) = @_;
908     my $value = $attr->value;
909     unless ($value eq $HTML_NS) {
910     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
911     }
912     unless ($attr->owner_document->manakai_is_html) {
913     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
914     ## TODO: Test
915     }
916     },
917     }),
918     checker => sub {
919     my ($self, $todo) = @_;
920     my $el = $todo->{node};
921     my $new_todos = [];
922     my @nodes = (@{$el->child_nodes});
923    
924     my $phase = 'before head';
925     while (@nodes) {
926     my $node = shift @nodes;
927     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
928    
929     my $nt = $node->node_type;
930     if ($nt == 1) {
931     my $node_ns = $node->namespace_uri;
932     $node_ns = '' unless defined $node_ns;
933     my $node_ln = $node->manakai_local_name;
934     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
935     if ($phase eq 'before head') {
936     if ($node_ns eq $HTML_NS and $node_ln eq 'head') {
937     $phase = 'after head';
938     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'body') {
939     $self->{onerror}->(node => $node, type => 'ps element missing:head');
940     $phase = 'after body';
941     } else {
942     $not_allowed = 1;
943     # before head
944     }
945     } elsif ($phase eq 'after head') {
946     if ($node_ns eq $HTML_NS and $node_ln eq 'body') {
947     $phase = 'after body';
948     } else {
949     $not_allowed = 1;
950     # after head
951     }
952     } else { #elsif ($phase eq 'after body') {
953     $not_allowed = 1;
954     # after body
955     }
956     $self->{onerror}->(node => $node, type => 'element not allowed')
957     if $not_allowed;
958     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
959     unshift @nodes, @$sib;
960     push @$new_todos, @$ch;
961     } elsif ($nt == 3 or $nt == 4) {
962     if ($node->data =~ /[^\x09-\x0D\x20]/) {
963     $self->{onerror}->(node => $node, type => 'character not allowed');
964     }
965     } elsif ($nt == 5) {
966     unshift @nodes, @{$node->child_nodes};
967     }
968     }
969    
970     if ($phase eq 'before head') {
971     $self->{onerror}->(node => $el, type => 'child element missing:head');
972     $self->{onerror}->(node => $el, type => 'child element missing:body');
973     } elsif ($phase eq 'after head') {
974     $self->{onerror}->(node => $el, type => 'child element missing:body');
975     }
976    
977     return ($new_todos);
978     },
979     };
980    
981     $Element->{$HTML_NS}->{head} = {
982     attrs_checker => $GetHTMLAttrsChecker->({}),
983     checker => sub {
984     my ($self, $todo) = @_;
985     my $el = $todo->{node};
986     my $new_todos = [];
987     my @nodes = (@{$el->child_nodes});
988    
989     my $has_title;
990     my $phase = 'initial'; # 'after charset', 'after base'
991     while (@nodes) {
992     my $node = shift @nodes;
993     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
994    
995     my $nt = $node->node_type;
996     if ($nt == 1) {
997     my $node_ns = $node->namespace_uri;
998     $node_ns = '' unless defined $node_ns;
999     my $node_ln = $node->manakai_local_name;
1000     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
1001     if ($node_ns eq $HTML_NS and $node_ln eq 'title') {
1002     $phase = 'after base';
1003     unless ($has_title) {
1004     $has_title = 1;
1005     } else {
1006     $not_allowed = 1;
1007     }
1008     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'meta') {
1009     if ($node->has_attribute_ns (undef, 'charset')) {
1010     if ($phase eq 'initial') {
1011     $phase = 'after charset';
1012     } else {
1013     $not_allowed = 1;
1014     ## NOTE: See also |base|'s "contexts" field in the spec
1015     }
1016 wakaba 1.5 } elsif ($node->has_attribute_ns (undef, 'name') or
1017     $node->has_attribute_ns (undef, 'http-equiv')) {
1018     $phase = 'after base';
1019 wakaba 1.1 } else {
1020     $phase = 'after base';
1021 wakaba 1.5 $not_allowed = 1;
1022 wakaba 1.1 }
1023     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'base') {
1024     if ($phase eq 'initial' or $phase eq 'after charset') {
1025     $phase = 'after base';
1026     } else {
1027     $not_allowed = 1;
1028     }
1029     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'style') {
1030     $phase = 'after base';
1031     if ($node->has_attribute_ns (undef, 'scoped')) {
1032     $not_allowed = 1;
1033     }
1034     } elsif ($HTMLMetadataElements->{$node_ns}->{$node_ln}) {
1035     $phase = 'after base';
1036     } else {
1037     $not_allowed = 1;
1038     }
1039     $self->{onerror}->(node => $node, type => 'element not allowed')
1040     if $not_allowed;
1041 wakaba 1.3 local $todo->{flag}->{in_head} = 1;
1042 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1043     unshift @nodes, @$sib;
1044     push @$new_todos, @$ch;
1045     } elsif ($nt == 3 or $nt == 4) {
1046     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1047     $self->{onerror}->(node => $node, type => 'character not allowed');
1048     }
1049     } elsif ($nt == 5) {
1050     unshift @nodes, @{$node->child_nodes};
1051     }
1052     }
1053     unless ($has_title) {
1054     $self->{onerror}->(node => $el, type => 'child element missing:title');
1055     }
1056     return ($new_todos);
1057     },
1058     };
1059    
1060     $Element->{$HTML_NS}->{title} = {
1061     attrs_checker => $GetHTMLAttrsChecker->({}),
1062     checker => $HTMLTextChecker,
1063     };
1064    
1065     $Element->{$HTML_NS}->{base} = {
1066 wakaba 1.4 attrs_checker => sub {
1067     my ($self, $todo) = @_;
1068    
1069     if ($self->{has_uri_attr} and
1070     $todo->{node}->has_attribute_ns (undef, 'href')) {
1071     ## ISSUE: Are these examples conforming?
1072     ## <head profile="a b c"><base href> (except for |profile|'s
1073     ## non-conformance)
1074     ## <title xml:base="relative"/><base href/> (maybe it should be)
1075     ## <unknown xmlns="relative"/><base href/> (assuming that
1076     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1077     ## <?xml-stylesheet href="relative"?>...<base href=""/>
1078     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1079     ## <style>@import 'relative';</style><base href>
1080     ## <script>location.href = 'relative';</script><base href>
1081     $self->{onerror}->(node => $todo->{node},
1082     type => 'basehref after URI attribute');
1083     }
1084     if ($self->{has_hyperlink_element} and
1085     $todo->{node}->has_attribute_ns (undef, 'target')) {
1086     ## ISSUE: Are these examples conforming?
1087     ## <head><title xlink:href=""/><base target="name"/></head>
1088     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1089     ## (assuming that |xbl:xbl| is allowed before |base|)
1090     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1091     ## <link href=""/><base target="name"/>
1092     ## <link rel=unknown href=""><base target=name>
1093     $self->{onerror}->(node => $todo->{node},
1094     type => 'basetarget after hyperlink');
1095     }
1096    
1097     return $GetHTMLAttrsChecker->({
1098     href => $HTMLURIAttrChecker,
1099     target => $HTMLTargetAttrChecker,
1100     })->($self, $todo);
1101     },
1102 wakaba 1.1 checker => $HTMLEmptyChecker,
1103     };
1104    
1105     $Element->{$HTML_NS}->{link} = {
1106     attrs_checker => sub {
1107     my ($self, $todo) = @_;
1108     $GetHTMLAttrsChecker->({
1109     href => $HTMLURIAttrChecker,
1110 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(0, $todo, @_) },
1111 wakaba 1.1 media => $HTMLMQAttrChecker,
1112     hreflang => $HTMLLanguageTagAttrChecker,
1113     type => $HTMLIMTAttrChecker,
1114     ## NOTE: Though |title| has special semantics,
1115     ## syntactically same as the |title| as global attribute.
1116     })->($self, $todo);
1117 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
1118     $self->{has_hyperlink_element} = 1 if $todo->{has_hyperlink_link_type};
1119     } else {
1120 wakaba 1.1 $self->{onerror}->(node => $todo->{node},
1121     type => 'attribute missing:href');
1122     }
1123     unless ($todo->{node}->has_attribute_ns (undef, 'rel')) {
1124     $self->{onerror}->(node => $todo->{node},
1125     type => 'attribute missing:rel');
1126     }
1127     },
1128     checker => $HTMLEmptyChecker,
1129     };
1130    
1131     $Element->{$HTML_NS}->{meta} = {
1132     attrs_checker => sub {
1133     my ($self, $todo) = @_;
1134     my $name_attr;
1135     my $http_equiv_attr;
1136     my $charset_attr;
1137     my $content_attr;
1138     for my $attr (@{$todo->{node}->attributes}) {
1139     my $attr_ns = $attr->namespace_uri;
1140     $attr_ns = '' unless defined $attr_ns;
1141     my $attr_ln = $attr->manakai_local_name;
1142     my $checker;
1143     if ($attr_ns eq '') {
1144     if ($attr_ln eq 'content') {
1145     $content_attr = $attr;
1146     $checker = 1;
1147     } elsif ($attr_ln eq 'name') {
1148     $name_attr = $attr;
1149     $checker = 1;
1150     } elsif ($attr_ln eq 'http-equiv') {
1151     $http_equiv_attr = $attr;
1152     $checker = 1;
1153     } elsif ($attr_ln eq 'charset') {
1154     $charset_attr = $attr;
1155     $checker = 1;
1156     } else {
1157     $checker = $HTMLAttrChecker->{$attr_ln}
1158     || $AttrChecker->{$attr_ns}->{$attr_ln}
1159     || $AttrChecker->{$attr_ns}->{''};
1160     }
1161     } else {
1162     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1163     || $AttrChecker->{$attr_ns}->{''};
1164     }
1165     if ($checker) {
1166     $checker->($self, $attr) if ref $checker;
1167     } else {
1168     $self->{onerror}->(node => $attr, level => 'unsupported',
1169     type => 'attribute');
1170     ## ISSUE: No comformance createria for unknown attributes in the spec
1171     }
1172     }
1173    
1174     if (defined $name_attr) {
1175     if (defined $http_equiv_attr) {
1176     $self->{onerror}->(node => $http_equiv_attr,
1177     type => 'attribute not allowed');
1178     } elsif (defined $charset_attr) {
1179     $self->{onerror}->(node => $charset_attr,
1180     type => 'attribute not allowed');
1181     }
1182     my $metadata_name = $name_attr->value;
1183     my $metadata_value;
1184     if (defined $content_attr) {
1185     $metadata_value = $content_attr->value;
1186     } else {
1187     $self->{onerror}->(node => $todo->{node},
1188     type => 'attribute missing:content');
1189     $metadata_value = '';
1190     }
1191     } elsif (defined $http_equiv_attr) {
1192     if (defined $charset_attr) {
1193     $self->{onerror}->(node => $charset_attr,
1194     type => 'attribute not allowed');
1195     }
1196     unless (defined $content_attr) {
1197     $self->{onerror}->(node => $todo->{node},
1198     type => 'attribute missing:content');
1199     }
1200     } elsif (defined $charset_attr) {
1201     if (defined $content_attr) {
1202     $self->{onerror}->(node => $content_attr,
1203     type => 'attribute not allowed');
1204     }
1205     } else {
1206     if (defined $content_attr) {
1207     $self->{onerror}->(node => $content_attr,
1208     type => 'attribute not allowed');
1209     $self->{onerror}->(node => $todo->{node},
1210     type => 'attribute missing:name|http-equiv');
1211     } else {
1212     $self->{onerror}->(node => $todo->{node},
1213     type => 'attribute missing:name|http-equiv|charset');
1214     }
1215     }
1216    
1217     ## TODO: metadata conformance
1218    
1219     ## TODO: pragma conformance
1220     if (defined $http_equiv_attr) { ## An enumerated attribute
1221     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1222     if ({
1223     'refresh' => 1,
1224     'default-style' => 1,
1225     }->{$keyword}) {
1226     #
1227     } else {
1228     $self->{onerror}->(node => $http_equiv_attr,
1229     type => 'enumerated:invalid');
1230     }
1231     }
1232    
1233     if (defined $charset_attr) {
1234     unless ($todo->{node}->owner_document->manakai_is_html) {
1235     $self->{onerror}->(node => $charset_attr,
1236     type => 'in XML:charset');
1237     }
1238     ## TODO: charset
1239     }
1240     },
1241     checker => $HTMLEmptyChecker,
1242     };
1243    
1244     $Element->{$HTML_NS}->{style} = {
1245     attrs_checker => $GetHTMLAttrsChecker->({
1246     type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1247     media => $HTMLMQAttrChecker,
1248     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1249     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1250     ## not different
1251     }),
1252     checker => sub {
1253     ## NOTE: |html:style| has no conformance creteria on content model
1254     my ($self, $todo) = @_;
1255     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
1256     $type = 'text/css' unless defined $type;
1257     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
1258     type => 'style:'.$type); ## TODO: $type normalization
1259     return $AnyChecker->($self, $todo);
1260     },
1261     };
1262    
1263     $Element->{$HTML_NS}->{body} = {
1264     attrs_checker => $GetHTMLAttrsChecker->({}),
1265     checker => $HTMLBlockChecker,
1266     };
1267    
1268     $Element->{$HTML_NS}->{section} = {
1269     attrs_checker => $GetHTMLAttrsChecker->({}),
1270     checker => $HTMLStylableBlockChecker,
1271     };
1272    
1273     $Element->{$HTML_NS}->{nav} = {
1274     attrs_checker => $GetHTMLAttrsChecker->({}),
1275     checker => $HTMLBlockOrInlineChecker,
1276     };
1277    
1278     $Element->{$HTML_NS}->{article} = {
1279     attrs_checker => $GetHTMLAttrsChecker->({}),
1280     checker => $HTMLStylableBlockChecker,
1281     };
1282    
1283     $Element->{$HTML_NS}->{blockquote} = {
1284     attrs_checker => $GetHTMLAttrsChecker->({
1285     cite => $HTMLURIAttrChecker,
1286     }),
1287     checker => $HTMLBlockChecker,
1288     };
1289    
1290     $Element->{$HTML_NS}->{aside} = {
1291     attrs_checker => $GetHTMLAttrsChecker->({}),
1292     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
1293     };
1294    
1295     $Element->{$HTML_NS}->{h1} = {
1296     attrs_checker => $GetHTMLAttrsChecker->({}),
1297     checker => sub {
1298     my ($self, $todo) = @_;
1299     $todo->{flag}->{has_heading}->[0] = 1;
1300     return $HTMLSignificantStrictlyInlineChecker->($self, $todo);
1301     },
1302     };
1303    
1304     $Element->{$HTML_NS}->{h2} = {
1305     attrs_checker => $GetHTMLAttrsChecker->({}),
1306     checker => $Element->{$HTML_NS}->{h1}->{checker},
1307     };
1308    
1309     $Element->{$HTML_NS}->{h3} = {
1310     attrs_checker => $GetHTMLAttrsChecker->({}),
1311     checker => $Element->{$HTML_NS}->{h1}->{checker},
1312     };
1313    
1314     $Element->{$HTML_NS}->{h4} = {
1315     attrs_checker => $GetHTMLAttrsChecker->({}),
1316     checker => $Element->{$HTML_NS}->{h1}->{checker},
1317     };
1318    
1319     $Element->{$HTML_NS}->{h5} = {
1320     attrs_checker => $GetHTMLAttrsChecker->({}),
1321     checker => $Element->{$HTML_NS}->{h1}->{checker},
1322     };
1323    
1324     $Element->{$HTML_NS}->{h6} = {
1325     attrs_checker => $GetHTMLAttrsChecker->({}),
1326     checker => $Element->{$HTML_NS}->{h1}->{checker},
1327     };
1328    
1329     $Element->{$HTML_NS}->{header} = {
1330     attrs_checker => $GetHTMLAttrsChecker->({}),
1331     checker => sub {
1332     my ($self, $todo) = @_;
1333     my $old_flag = $todo->{flag}->{has_heading} || [];
1334     my $new_flag = [];
1335     local $todo->{flag}->{has_heading} = $new_flag;
1336     my $node = $todo->{node};
1337    
1338     my $end = $self->_add_minuses
1339     ({$HTML_NS => {qw/header 1 footer 1/}},
1340     $HTMLSectioningElements);
1341     my ($new_todos, $ch) = $HTMLBlockChecker->($self, $todo);
1342     push @$new_todos, $end,
1343     {type => 'code', code => sub {
1344     if ($new_flag->[0]) {
1345     $old_flag->[0] = 1;
1346     } else {
1347     $self->{onerror}->(node => $node, type => 'element missing:hn');
1348     }
1349     }};
1350     return ($new_todos, $ch);
1351     },
1352     };
1353    
1354     $Element->{$HTML_NS}->{footer} = {
1355     attrs_checker => $GetHTMLAttrsChecker->({}),
1356     checker => sub { ## block -hn -header -footer -sectioning or inline
1357     my ($self, $todo) = @_;
1358     my $el = $todo->{node};
1359     my $new_todos = [];
1360     my @nodes = (@{$el->child_nodes});
1361    
1362     my $content = 'block-or-inline'; # or 'block' or 'inline'
1363     my @block_not_inline;
1364     while (@nodes) {
1365     my $node = shift @nodes;
1366     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1367    
1368     my $nt = $node->node_type;
1369     if ($nt == 1) {
1370     my $node_ns = $node->namespace_uri;
1371     $node_ns = '' unless defined $node_ns;
1372     my $node_ln = $node->manakai_local_name;
1373     my $not_allowed;
1374     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
1375     $not_allowed = 1;
1376     } elsif ($node_ns eq $HTML_NS and
1377     {
1378     qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1/
1379     }->{$node_ln}) {
1380     $not_allowed = 1;
1381     } elsif ($HTMLSectioningElements->{$node_ns}->{$node_ln}) {
1382     $not_allowed = 1;
1383     }
1384     if ($content eq 'block') {
1385     $not_allowed = 1
1386     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1387     } elsif ($content eq 'inline') {
1388     $not_allowed = 1
1389     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
1390     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1391     } else {
1392     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1393     my $is_inline
1394     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
1395     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1396    
1397     push @block_not_inline, $node
1398     if $is_block and not $is_inline and not $not_allowed;
1399     unless ($is_block) {
1400     $content = 'inline';
1401     for (@block_not_inline) {
1402     $self->{onerror}->(node => $_, type => 'element not allowed');
1403     }
1404     $not_allowed = 1 unless $is_inline;
1405     }
1406     }
1407     $self->{onerror}->(node => $node, type => 'element not allowed')
1408     if $not_allowed;
1409     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1410     unshift @nodes, @$sib;
1411     push @$new_todos, @$ch;
1412     } elsif ($nt == 3 or $nt == 4) {
1413     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1414     if ($content eq 'block') {
1415     $self->{onerror}->(node => $node, type => 'character not allowed');
1416     } else {
1417     $content = 'inline';
1418     for (@block_not_inline) {
1419     $self->{onerror}->(node => $_, type => 'element not allowed');
1420     }
1421     }
1422     }
1423     } elsif ($nt == 5) {
1424     unshift @nodes, @{$node->child_nodes};
1425     }
1426     }
1427    
1428     my $end = $self->_add_minuses
1429     ({$HTML_NS => {qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1/}},
1430     $HTMLSectioningElements);
1431     push @$new_todos, $end;
1432    
1433     if ($content eq 'inline') {
1434     for (@$new_todos) {
1435     $_->{inline} = 1;
1436     }
1437     }
1438    
1439     return ($new_todos);
1440     },
1441     };
1442    
1443     $Element->{$HTML_NS}->{address} = {
1444     attrs_checker => $GetHTMLAttrsChecker->({}),
1445     checker => $HTMLInlineChecker,
1446     };
1447    
1448     $Element->{$HTML_NS}->{p} = {
1449     attrs_checker => $GetHTMLAttrsChecker->({}),
1450     checker => $HTMLSignificantInlineChecker,
1451     };
1452    
1453     $Element->{$HTML_NS}->{hr} = {
1454     attrs_checker => $GetHTMLAttrsChecker->({}),
1455     checker => $HTMLEmptyChecker,
1456     };
1457    
1458     $Element->{$HTML_NS}->{br} = {
1459     attrs_checker => $GetHTMLAttrsChecker->({}),
1460     checker => $HTMLEmptyChecker,
1461     };
1462    
1463     $Element->{$HTML_NS}->{dialog} = {
1464     attrs_checker => $GetHTMLAttrsChecker->({}),
1465     checker => sub {
1466     my ($self, $todo) = @_;
1467     my $el = $todo->{node};
1468     my $new_todos = [];
1469     my @nodes = (@{$el->child_nodes});
1470    
1471     my $phase = 'before dt';
1472     while (@nodes) {
1473     my $node = shift @nodes;
1474     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1475    
1476     my $nt = $node->node_type;
1477     if ($nt == 1) {
1478     my $node_ns = $node->namespace_uri;
1479     $node_ns = '' unless defined $node_ns;
1480     my $node_ln = $node->manakai_local_name;
1481     ## NOTE: |minuses| list is not checked since redundant
1482     if ($phase eq 'before dt') {
1483     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1484     $phase = 'before dd';
1485     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1486     $self->{onerror}
1487     ->(node => $node, type => 'ps element missing:dt');
1488     $phase = 'before dt';
1489     } else {
1490     $self->{onerror}->(node => $node, type => 'element not allowed');
1491     }
1492     } else { # before dd
1493     if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1494     $phase = 'before dt';
1495     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1496     $self->{onerror}
1497     ->(node => $node, type => 'ps element missing:dd');
1498     $phase = 'before dd';
1499     } else {
1500     $self->{onerror}->(node => $node, type => 'element not allowed');
1501     }
1502     }
1503     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1504     unshift @nodes, @$sib;
1505     push @$new_todos, @$ch;
1506     } elsif ($nt == 3 or $nt == 4) {
1507     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1508     $self->{onerror}->(node => $node, type => 'character not allowed');
1509     }
1510     } elsif ($nt == 5) {
1511     unshift @nodes, @{$node->child_nodes};
1512     }
1513     }
1514     if ($phase eq 'before dd') {
1515     $self->{onerror}->(node => $el, type => 'ps element missing:dd');
1516     }
1517     return ($new_todos);
1518     },
1519     };
1520    
1521     $Element->{$HTML_NS}->{pre} = {
1522     attrs_checker => $GetHTMLAttrsChecker->({}),
1523     checker => $HTMLStrictlyInlineChecker,
1524     };
1525    
1526     $Element->{$HTML_NS}->{ol} = {
1527     attrs_checker => $GetHTMLAttrsChecker->({
1528     start => $HTMLIntegerAttrChecker,
1529     }),
1530     checker => sub {
1531     my ($self, $todo) = @_;
1532     my $el = $todo->{node};
1533     my $new_todos = [];
1534     my @nodes = (@{$el->child_nodes});
1535    
1536     while (@nodes) {
1537     my $node = shift @nodes;
1538     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1539    
1540     my $nt = $node->node_type;
1541     if ($nt == 1) {
1542     my $node_ns = $node->namespace_uri;
1543     $node_ns = '' unless defined $node_ns;
1544     my $node_ln = $node->manakai_local_name;
1545     ## NOTE: |minuses| list is not checked since redundant
1546     unless ($node_ns eq $HTML_NS and $node_ln eq 'li') {
1547     $self->{onerror}->(node => $node, type => 'element not allowed');
1548     }
1549     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1550     unshift @nodes, @$sib;
1551     push @$new_todos, @$ch;
1552     } elsif ($nt == 3 or $nt == 4) {
1553     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1554     $self->{onerror}->(node => $node, type => 'character not allowed');
1555     }
1556     } elsif ($nt == 5) {
1557     unshift @nodes, @{$node->child_nodes};
1558     }
1559     }
1560    
1561     if ($todo->{inline}) {
1562     for (@$new_todos) {
1563     $_->{inline} = 1;
1564     }
1565     }
1566     return ($new_todos);
1567     },
1568     };
1569    
1570     $Element->{$HTML_NS}->{ul} = {
1571     attrs_checker => $GetHTMLAttrsChecker->({}),
1572     checker => $Element->{$HTML_NS}->{ol}->{checker},
1573     };
1574    
1575    
1576     $Element->{$HTML_NS}->{li} = {
1577     attrs_checker => $GetHTMLAttrsChecker->({
1578     start => sub {
1579     my ($self, $attr) = @_;
1580     my $parent = $attr->owner_element->manakai_parent_element;
1581     if (defined $parent) {
1582     my $parent_ns = $parent->namespace_uri;
1583     $parent_ns = '' unless defined $parent_ns;
1584     my $parent_ln = $parent->manakai_local_name;
1585     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1586     $self->{onerror}->(node => $attr, level => 'unsupported',
1587     type => 'attribute');
1588     }
1589     }
1590     $HTMLIntegerAttrChecker->($self, $attr);
1591     },
1592     }),
1593     checker => sub {
1594     my ($self, $todo) = @_;
1595     if ($todo->{inline}) {
1596     return $HTMLInlineChecker->($self, $todo);
1597     } else {
1598     return $HTMLBlockOrInlineChecker->($self, $todo);
1599     }
1600     },
1601     };
1602    
1603     $Element->{$HTML_NS}->{dl} = {
1604     attrs_checker => $GetHTMLAttrsChecker->({}),
1605     checker => sub {
1606     my ($self, $todo) = @_;
1607     my $el = $todo->{node};
1608     my $new_todos = [];
1609     my @nodes = (@{$el->child_nodes});
1610    
1611     my $phase = 'before dt';
1612     while (@nodes) {
1613     my $node = shift @nodes;
1614     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1615    
1616     my $nt = $node->node_type;
1617     if ($nt == 1) {
1618     my $node_ns = $node->namespace_uri;
1619     $node_ns = '' unless defined $node_ns;
1620     my $node_ln = $node->manakai_local_name;
1621     ## NOTE: |minuses| list is not checked since redundant
1622     if ($phase eq 'in dds') {
1623     if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1624     #$phase = 'in dds';
1625     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1626     $phase = 'in dts';
1627     } else {
1628     $self->{onerror}->(node => $node, type => 'element not allowed');
1629     }
1630     } elsif ($phase eq 'in dts') {
1631     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1632     #$phase = 'in dts';
1633     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1634     $phase = 'in dds';
1635     } else {
1636     $self->{onerror}->(node => $node, type => 'element not allowed');
1637     }
1638     } else { # before dt
1639     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1640     $phase = 'in dts';
1641     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1642     $self->{onerror}
1643     ->(node => $node, type => 'ps element missing:dt');
1644     $phase = 'in dds';
1645     } else {
1646     $self->{onerror}->(node => $node, type => 'element not allowed');
1647     }
1648     }
1649     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1650     unshift @nodes, @$sib;
1651     push @$new_todos, @$ch;
1652     } elsif ($nt == 3 or $nt == 4) {
1653     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1654     $self->{onerror}->(node => $node, type => 'character not allowed');
1655     }
1656     } elsif ($nt == 5) {
1657     unshift @nodes, @{$node->child_nodes};
1658     }
1659     }
1660     if ($phase eq 'in dts') {
1661     $self->{onerror}->(node => $el, type => 'ps element missing:dd');
1662     }
1663    
1664     if ($todo->{inline}) {
1665     for (@$new_todos) {
1666     $_->{inline} = 1;
1667     }
1668     }
1669     return ($new_todos);
1670     },
1671     };
1672    
1673     $Element->{$HTML_NS}->{dt} = {
1674     attrs_checker => $GetHTMLAttrsChecker->({}),
1675     checker => $HTMLStrictlyInlineChecker,
1676     };
1677    
1678     $Element->{$HTML_NS}->{dd} = {
1679     attrs_checker => $GetHTMLAttrsChecker->({}),
1680     checker => $Element->{$HTML_NS}->{li}->{checker},
1681     };
1682    
1683     $Element->{$HTML_NS}->{a} = {
1684     attrs_checker => sub {
1685     my ($self, $todo) = @_;
1686     my %attr;
1687     for my $attr (@{$todo->{node}->attributes}) {
1688     my $attr_ns = $attr->namespace_uri;
1689     $attr_ns = '' unless defined $attr_ns;
1690     my $attr_ln = $attr->manakai_local_name;
1691     my $checker;
1692     if ($attr_ns eq '') {
1693     $checker = {
1694     target => $HTMLTargetAttrChecker,
1695     href => $HTMLURIAttrChecker,
1696     ping => $HTMLSpaceURIsAttrChecker,
1697 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
1698 wakaba 1.1 media => $HTMLMQAttrChecker,
1699     hreflang => $HTMLLanguageTagAttrChecker,
1700     type => $HTMLIMTAttrChecker,
1701     }->{$attr_ln};
1702     if ($checker) {
1703     $attr{$attr_ln} = $attr;
1704     } else {
1705     $checker = $HTMLAttrChecker->{$attr_ln};
1706     }
1707     }
1708     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1709     || $AttrChecker->{$attr_ns}->{''};
1710     if ($checker) {
1711     $checker->($self, $attr) if ref $checker;
1712     } else {
1713     $self->{onerror}->(node => $attr, level => 'unsupported',
1714     type => 'attribute');
1715     ## ISSUE: No comformance createria for unknown attributes in the spec
1716     }
1717     }
1718    
1719 wakaba 1.4 if (defined $attr{href}) {
1720     $self->{has_hyperlink_element} = 1;
1721     } else {
1722 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
1723     if (defined $attr{$_}) {
1724     $self->{onerror}->(node => $attr{$_},
1725     type => 'attribute not allowed');
1726     }
1727     }
1728     }
1729     },
1730     checker => sub {
1731     my ($self, $todo) = @_;
1732    
1733     my $end = $self->_add_minuses ($HTMLInteractiveElements);
1734     my ($new_todos, $ch)
1735     = $HTMLSignificantInlineOrStrictlyInlineChecker->($self, $todo);
1736     push @$new_todos, $end;
1737    
1738     $_->{flag}->{has_a} = 1 for @$new_todos;
1739    
1740     return ($new_todos, $ch);
1741     },
1742     };
1743    
1744     $Element->{$HTML_NS}->{q} = {
1745     attrs_checker => $GetHTMLAttrsChecker->({
1746     cite => $HTMLURIAttrChecker,
1747     }),
1748     checker => $HTMLInlineOrStrictlyInlineChecker,
1749     };
1750    
1751     $Element->{$HTML_NS}->{cite} = {
1752     attrs_checker => $GetHTMLAttrsChecker->({}),
1753     checker => $HTMLStrictlyInlineChecker,
1754     };
1755    
1756     $Element->{$HTML_NS}->{em} = {
1757     attrs_checker => $GetHTMLAttrsChecker->({}),
1758     checker => $HTMLInlineOrStrictlyInlineChecker,
1759     };
1760    
1761     $Element->{$HTML_NS}->{strong} = {
1762     attrs_checker => $GetHTMLAttrsChecker->({}),
1763     checker => $HTMLInlineOrStrictlyInlineChecker,
1764     };
1765    
1766     $Element->{$HTML_NS}->{small} = {
1767     attrs_checker => $GetHTMLAttrsChecker->({}),
1768     checker => $HTMLInlineOrStrictlyInlineChecker,
1769     };
1770    
1771     $Element->{$HTML_NS}->{m} = {
1772     attrs_checker => $GetHTMLAttrsChecker->({}),
1773     checker => $HTMLInlineOrStrictlyInlineChecker,
1774     };
1775    
1776     $Element->{$HTML_NS}->{dfn} = {
1777     attrs_checker => $GetHTMLAttrsChecker->({}),
1778     checker => sub {
1779     my ($self, $todo) = @_;
1780    
1781     my $end = $self->_add_minuses ({$HTML_NS => {dfn => 1}});
1782     my ($sib, $ch) = $HTMLStrictlyInlineChecker->($self, $todo);
1783     push @$sib, $end;
1784    
1785     my $node = $todo->{node};
1786     my $term = $node->get_attribute_ns (undef, 'title');
1787     unless (defined $term) {
1788     for my $child (@{$node->child_nodes}) {
1789     if ($child->node_type == 1) { # ELEMENT_NODE
1790     if (defined $term) {
1791     undef $term;
1792     last;
1793     } elsif ($child->manakai_local_name eq 'abbr') {
1794     my $nsuri = $child->namespace_uri;
1795     if (defined $nsuri and $nsuri eq $HTML_NS) {
1796     my $attr = $child->get_attribute_node_ns (undef, 'title');
1797     if ($attr) {
1798     $term = $attr->value;
1799     }
1800     }
1801     }
1802     } elsif ($child->node_type == 3 or $child->node_type == 4) {
1803     ## TEXT_NODE or CDATA_SECTION_NODE
1804     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
1805     next;
1806     }
1807     undef $term;
1808     last;
1809     }
1810     }
1811     unless (defined $term) {
1812     $term = $node->text_content;
1813     }
1814     }
1815     if ($self->{term}->{$term}) {
1816     $self->{onerror}->(node => $node, type => 'duplicate term');
1817     push @{$self->{term}->{$term}}, $node;
1818     } else {
1819     $self->{term}->{$term} = [$node];
1820     }
1821     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
1822     ## has |title|.
1823    
1824     return ($sib, $ch);
1825     },
1826     };
1827    
1828     $Element->{$HTML_NS}->{abbr} = {
1829     attrs_checker => $GetHTMLAttrsChecker->({
1830     ## NOTE: |title| has special semantics for |abbr|s, but is syntactically
1831     ## not different. The spec says that the |title| MAY be omitted
1832     ## if there is a |dfn| whose defining term is the abbreviation,
1833     ## but it does not prohibit |abbr| w/o |title| in other cases.
1834     }),
1835     checker => $HTMLStrictlyInlineChecker,
1836     };
1837    
1838     $Element->{$HTML_NS}->{time} = {
1839     attrs_checker => $GetHTMLAttrsChecker->({
1840     datetime => sub { 1 }, # checked in |checker|
1841     }),
1842     ## TODO: Write tests
1843     checker => sub {
1844     my ($self, $todo) = @_;
1845    
1846     my $attr = $todo->{node}->get_attribute_node_ns (undef, 'datetime');
1847     my $input;
1848     my $reg_sp;
1849     my $input_node;
1850     if ($attr) {
1851     $input = $attr->value;
1852     $reg_sp = qr/[\x09-\x0D\x20]*/;
1853     $input_node = $attr;
1854     } else {
1855     $input = $todo->{node}->text_content;
1856     $reg_sp = qr/\p{Zs}*/;
1857     $input_node = $todo->{node};
1858    
1859     ## ISSUE: What is the definition for "successfully extracts a date
1860     ## or time"? If the algorithm says the string is invalid but
1861     ## return some date or time, is it "successfully"?
1862     }
1863    
1864     my $hour;
1865     my $minute;
1866     my $second;
1867     if ($input =~ /
1868     \A
1869     [\x09-\x0D\x20]*
1870     ([0-9]+) # 1
1871     (?>
1872     -([0-9]+) # 2
1873     -([0-9]+) # 3
1874     [\x09-\x0D\x20]*
1875     (?>
1876     T
1877     [\x09-\x0D\x20]*
1878     )?
1879     ([0-9]+) # 4
1880     :([0-9]+) # 5
1881     (?>
1882     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
1883     )?
1884     [\x09-\x0D\x20]*
1885     (?>
1886     Z
1887     [\x09-\x0D\x20]*
1888     |
1889     [+-]([0-9]+):([0-9]+) # 7, 8
1890     [\x09-\x0D\x20]*
1891     )?
1892     \z
1893     |
1894     :([0-9]+) # 9
1895     (?>
1896     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
1897     )?
1898     [\x09-\x0D\x20]*\z
1899     )
1900     /x) {
1901     if (defined $2) { ## YYYY-MM-DD T? hh:mm
1902     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
1903     length $4 != 2 or length $5 != 2) {
1904     $self->{onerror}->(node => $input_node,
1905     type => 'dateortime:syntax error');
1906     }
1907    
1908     if (1 <= $2 and $2 <= 12) {
1909     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1910     if $3 < 1 or
1911     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
1912     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1913     if $2 == 2 and $3 == 29 and
1914     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
1915     } else {
1916     $self->{onerror}->(node => $input_node,
1917     type => 'datetime:bad month');
1918     }
1919    
1920     ($hour, $minute, $second) = ($4, $5, $6);
1921    
1922     if (defined $7) { ## [+-]hh:mm
1923     if (length $7 != 2 or length $8 != 2) {
1924     $self->{onerror}->(node => $input_node,
1925     type => 'dateortime:syntax error');
1926     }
1927    
1928     $self->{onerror}->(node => $input_node,
1929     type => 'datetime:bad timezone hour')
1930     if $7 > 23;
1931     $self->{onerror}->(node => $input_node,
1932     type => 'datetime:bad timezone minute')
1933     if $8 > 59;
1934     }
1935     } else { ## hh:mm
1936     if (length $1 != 2 or length $9 != 2) {
1937     $self->{onerror}->(node => $input_node,
1938     type => qq'dateortime:syntax error');
1939     }
1940    
1941     ($hour, $minute, $second) = ($1, $9, $10);
1942     }
1943    
1944     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
1945     if $hour > 23;
1946     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
1947     if $minute > 59;
1948    
1949     if (defined $second) { ## s
1950     ## NOTE: Integer part of second don't have to have length of two.
1951    
1952     if (substr ($second, 0, 1) eq '.') {
1953     $self->{onerror}->(node => $input_node,
1954     type => 'dateortime:syntax error');
1955     }
1956    
1957     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
1958     if $second >= 60;
1959     }
1960     } else {
1961     $self->{onerror}->(node => $input_node,
1962     type => 'dateortime:syntax error');
1963     }
1964    
1965     return $HTMLStrictlyInlineChecker->($self, $todo);
1966     },
1967     };
1968    
1969     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
1970     attrs_checker => $GetHTMLAttrsChecker->({
1971     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1972     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1973     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1974     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1975     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1976     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1977     }),
1978     checker => $HTMLStrictlyInlineChecker,
1979     };
1980    
1981     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
1982     attrs_checker => $GetHTMLAttrsChecker->({
1983     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
1984     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
1985     }),
1986     checker => $HTMLStrictlyInlineChecker,
1987     };
1988    
1989     $Element->{$HTML_NS}->{code} = {
1990     attrs_checker => $GetHTMLAttrsChecker->({}),
1991     ## NOTE: Though |title| has special semantics,
1992     ## syntatically same as the |title| as global attribute.
1993     checker => $HTMLInlineOrStrictlyInlineChecker,
1994     };
1995    
1996     $Element->{$HTML_NS}->{var} = {
1997     attrs_checker => $GetHTMLAttrsChecker->({}),
1998     ## NOTE: Though |title| has special semantics,
1999     ## syntatically same as the |title| as global attribute.
2000     checker => $HTMLStrictlyInlineChecker,
2001     };
2002    
2003     $Element->{$HTML_NS}->{samp} = {
2004     attrs_checker => $GetHTMLAttrsChecker->({}),
2005     ## NOTE: Though |title| has special semantics,
2006     ## syntatically same as the |title| as global attribute.
2007     checker => $HTMLInlineOrStrictlyInlineChecker,
2008     };
2009    
2010     $Element->{$HTML_NS}->{kbd} = {
2011     attrs_checker => $GetHTMLAttrsChecker->({}),
2012     checker => $HTMLStrictlyInlineChecker,
2013     };
2014    
2015     $Element->{$HTML_NS}->{sub} = {
2016     attrs_checker => $GetHTMLAttrsChecker->({}),
2017     checker => $HTMLStrictlyInlineChecker,
2018     };
2019    
2020     $Element->{$HTML_NS}->{sup} = {
2021     attrs_checker => $GetHTMLAttrsChecker->({}),
2022     checker => $HTMLStrictlyInlineChecker,
2023     };
2024    
2025     $Element->{$HTML_NS}->{span} = {
2026     attrs_checker => $GetHTMLAttrsChecker->({}),
2027     ## NOTE: Though |title| has special semantics,
2028     ## syntatically same as the |title| as global attribute.
2029     checker => $HTMLInlineOrStrictlyInlineChecker,
2030     };
2031    
2032     $Element->{$HTML_NS}->{i} = {
2033     attrs_checker => $GetHTMLAttrsChecker->({}),
2034     ## NOTE: Though |title| has special semantics,
2035     ## syntatically same as the |title| as global attribute.
2036     checker => $HTMLStrictlyInlineChecker,
2037     };
2038    
2039     $Element->{$HTML_NS}->{b} = {
2040     attrs_checker => $GetHTMLAttrsChecker->({}),
2041     checker => $HTMLStrictlyInlineChecker,
2042     };
2043    
2044     $Element->{$HTML_NS}->{bdo} = {
2045     attrs_checker => sub {
2046     my ($self, $todo) = @_;
2047     $GetHTMLAttrsChecker->({})->($self, $todo);
2048     unless ($todo->{node}->has_attribute_ns (undef, 'dir')) {
2049     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:dir');
2050     }
2051     },
2052     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
2053     checker => $HTMLStrictlyInlineChecker,
2054     };
2055    
2056     $Element->{$HTML_NS}->{ins} = {
2057     attrs_checker => $GetHTMLAttrsChecker->({
2058     cite => $HTMLURIAttrChecker,
2059     datetime => $HTMLDatetimeAttrChecker,
2060     }),
2061     checker => $HTMLTransparentChecker,
2062     };
2063    
2064     $Element->{$HTML_NS}->{del} = {
2065     attrs_checker => $GetHTMLAttrsChecker->({
2066     cite => $HTMLURIAttrChecker,
2067     datetime => $HTMLDatetimeAttrChecker,
2068     }),
2069     checker => sub {
2070     my ($self, $todo) = @_;
2071    
2072     my $parent = $todo->{node}->manakai_parent_element;
2073     if (defined $parent) {
2074     my $nsuri = $parent->namespace_uri;
2075     $nsuri = '' unless defined $nsuri;
2076     my $ln = $parent->manakai_local_name;
2077     my $eldef = $Element->{$nsuri}->{$ln} ||
2078     $Element->{$nsuri}->{''} ||
2079     $ElementDefault;
2080     return $eldef->{checker}->($self, $todo);
2081     } else {
2082     return $HTMLBlockOrInlineChecker->($self, $todo);
2083     }
2084     },
2085     };
2086    
2087     ## TODO: figure
2088    
2089 wakaba 1.4 ## TODO: |alt|
2090 wakaba 1.1 $Element->{$HTML_NS}->{img} = {
2091     attrs_checker => sub {
2092     my ($self, $todo) = @_;
2093     $GetHTMLAttrsChecker->({
2094     alt => sub { }, ## NOTE: No syntactical requirement
2095     src => $HTMLURIAttrChecker,
2096     usemap => $HTMLUsemapAttrChecker,
2097     ismap => sub {
2098     my ($self, $attr, $parent_todo) = @_;
2099     if (not $todo->{flag}->{has_a}) {
2100     $self->{onerror}->(node => $attr, type => 'attribute not allowed');
2101     }
2102     $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_todo);
2103     },
2104     ## TODO: height
2105     ## TODO: width
2106     })->($self, $todo);
2107     unless ($todo->{node}->has_attribute_ns (undef, 'alt')) {
2108     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:alt');
2109     }
2110     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2111     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:src');
2112     }
2113     },
2114     checker => $HTMLEmptyChecker,
2115     };
2116    
2117     $Element->{$HTML_NS}->{iframe} = {
2118     attrs_checker => $GetHTMLAttrsChecker->({
2119     src => $HTMLURIAttrChecker,
2120     }),
2121     checker => $HTMLTextChecker,
2122     };
2123    
2124     $Element->{$HTML_NS}->{embed} = {
2125     attrs_checker => sub {
2126     my ($self, $todo) = @_;
2127     my $has_src;
2128     for my $attr (@{$todo->{node}->attributes}) {
2129     my $attr_ns = $attr->namespace_uri;
2130     $attr_ns = '' unless defined $attr_ns;
2131     my $attr_ln = $attr->manakai_local_name;
2132     my $checker;
2133     if ($attr_ns eq '') {
2134     if ($attr_ln eq 'src') {
2135     $checker = $HTMLURIAttrChecker;
2136     $has_src = 1;
2137     } elsif ($attr_ln eq 'type') {
2138     $checker = $HTMLIMTAttrChecker;
2139     } else {
2140     ## TODO: height
2141     ## TODO: width
2142     $checker = $HTMLAttrChecker->{$attr_ln}
2143     || sub { }; ## NOTE: Any local attribute is ok.
2144     }
2145     }
2146     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2147     || $AttrChecker->{$attr_ns}->{''};
2148     if ($checker) {
2149     $checker->($self, $attr);
2150     } else {
2151     $self->{onerror}->(node => $attr, level => 'unsupported',
2152     type => 'attribute');
2153     ## ISSUE: No comformance createria for global attributes in the spec
2154     }
2155     }
2156    
2157     unless ($has_src) {
2158     $self->{onerror}->(node => $todo->{node},
2159     type => 'attribute missing:src');
2160     }
2161     },
2162     checker => $HTMLEmptyChecker,
2163     };
2164    
2165     $Element->{$HTML_NS}->{object} = {
2166     attrs_checker => sub {
2167     my ($self, $todo) = @_;
2168     $GetHTMLAttrsChecker->({
2169     data => $HTMLURIAttrChecker,
2170     type => $HTMLIMTAttrChecker,
2171     usemap => $HTMLUsemapAttrChecker,
2172     ## TODO: width
2173     ## TODO: height
2174     })->($self, $todo);
2175     unless ($todo->{node}->has_attribute_ns (undef, 'data')) {
2176     unless ($todo->{node}->has_attribute_ns (undef, 'type')) {
2177     $self->{onerror}->(node => $todo->{node},
2178     type => 'attribute missing:data|type');
2179     }
2180     }
2181     },
2182     checker => $ElementDefault->{checker}, ## TODO
2183     };
2184    
2185     $Element->{$HTML_NS}->{param} = {
2186     attrs_checker => sub {
2187     my ($self, $todo) = @_;
2188     $GetHTMLAttrsChecker->({
2189     name => sub { },
2190     value => sub { },
2191     })->($self, $todo);
2192     unless ($todo->{node}->has_attribute_ns (undef, 'name')) {
2193     $self->{onerror}->(node => $todo->{node},
2194     type => 'attribute missing:name');
2195     }
2196     unless ($todo->{node}->has_attribute_ns (undef, 'value')) {
2197     $self->{onerror}->(node => $todo->{node},
2198     type => 'attribute missing:value');
2199     }
2200     },
2201     checker => $HTMLEmptyChecker,
2202     };
2203    
2204     $Element->{$HTML_NS}->{video} = {
2205     attrs_checker => $GetHTMLAttrsChecker->({
2206     src => $HTMLURIAttrChecker,
2207     ## TODO: start, loopstart, loopend, end
2208     ## ISSUE: they MUST be "value time offset"s. Value?
2209     ## ISSUE: loopcount has no conformance creteria
2210     autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2211     controls => $GetHTMLBooleanAttrChecker->('controls'),
2212     }),
2213     checker => sub {
2214     my ($self, $todo) = @_;
2215    
2216     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2217     return $HTMLBlockOrInlineChecker->($self, $todo);
2218     } else {
2219     return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
2220     ->($self, $todo);
2221     }
2222     },
2223     };
2224    
2225     $Element->{$HTML_NS}->{audio} = {
2226     attrs_checker => $Element->{$HTML_NS}->{video}->{attrs_checker},
2227     checker => $Element->{$HTML_NS}->{video}->{checker},
2228     };
2229    
2230     $Element->{$HTML_NS}->{source} = {
2231     attrs_checker => sub {
2232     my ($self, $todo) = @_;
2233     $GetHTMLAttrsChecker->({
2234     src => $HTMLURIAttrChecker,
2235     type => $HTMLIMTAttrChecker,
2236     media => $HTMLMQAttrChecker,
2237     })->($self, $todo);
2238     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2239     $self->{onerror}->(node => $todo->{node},
2240     type => 'attribute missing:src');
2241     }
2242     },
2243     checker => $HTMLEmptyChecker,
2244     };
2245    
2246     $Element->{$HTML_NS}->{canvas} = {
2247     attrs_checker => $GetHTMLAttrsChecker->({
2248     height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2249     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2250     }),
2251     checker => $HTMLInlineChecker,
2252     };
2253    
2254     $Element->{$HTML_NS}->{map} = {
2255 wakaba 1.4 attrs_checker => sub {
2256     my ($self, $todo) = @_;
2257     my $has_id;
2258     $GetHTMLAttrsChecker->({
2259     id => sub {
2260     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2261     my ($self, $attr) = @_;
2262     my $value = $attr->value;
2263     if (length $value > 0) {
2264     if ($self->{id}->{$value}) {
2265     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2266     push @{$self->{id}->{$value}}, $attr;
2267     } else {
2268     $self->{id}->{$value} = [$attr];
2269     }
2270 wakaba 1.1 } else {
2271 wakaba 1.4 ## NOTE: MUST contain at least one character
2272     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2273 wakaba 1.1 }
2274 wakaba 1.4 if ($value =~ /[\x09-\x0D\x20]/) {
2275     $self->{onerror}->(node => $attr, type => 'space in ID');
2276     }
2277     $self->{map}->{$value} ||= $attr;
2278     $has_id = 1;
2279     },
2280     })->($self, $todo);
2281     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:id')
2282     unless $has_id;
2283     },
2284 wakaba 1.1 checker => $HTMLBlockChecker,
2285     };
2286    
2287     $Element->{$HTML_NS}->{area} = {
2288     attrs_checker => sub {
2289     my ($self, $todo) = @_;
2290     my %attr;
2291     my $coords;
2292     for my $attr (@{$todo->{node}->attributes}) {
2293     my $attr_ns = $attr->namespace_uri;
2294     $attr_ns = '' unless defined $attr_ns;
2295     my $attr_ln = $attr->manakai_local_name;
2296     my $checker;
2297     if ($attr_ns eq '') {
2298     $checker = {
2299     alt => sub { },
2300     ## NOTE: |alt| value has no conformance creteria.
2301     shape => $GetHTMLEnumeratedAttrChecker->({
2302     circ => -1, circle => 1,
2303     default => 1,
2304     poly => 1, polygon => -1,
2305     rect => 1, rectangle => -1,
2306     }),
2307     coords => sub {
2308     my ($self, $attr) = @_;
2309     my $value = $attr->value;
2310     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
2311     $coords = [split /,/, $value];
2312     } else {
2313     $self->{onerror}->(node => $attr,
2314     type => 'coords:syntax error');
2315     }
2316     },
2317     target => $HTMLTargetAttrChecker,
2318     href => $HTMLURIAttrChecker,
2319     ping => $HTMLSpaceURIsAttrChecker,
2320 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
2321 wakaba 1.1 media => $HTMLMQAttrChecker,
2322     hreflang => $HTMLLanguageTagAttrChecker,
2323     type => $HTMLIMTAttrChecker,
2324     }->{$attr_ln};
2325     if ($checker) {
2326     $attr{$attr_ln} = $attr;
2327     } else {
2328     $checker = $HTMLAttrChecker->{$attr_ln};
2329     }
2330     }
2331     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2332     || $AttrChecker->{$attr_ns}->{''};
2333     if ($checker) {
2334     $checker->($self, $attr) if ref $checker;
2335     } else {
2336     $self->{onerror}->(node => $attr, level => 'unsupported',
2337     type => 'attribute');
2338     ## ISSUE: No comformance createria for unknown attributes in the spec
2339     }
2340     }
2341    
2342     if (defined $attr{href}) {
2343 wakaba 1.4 $self->{has_hyperlink_element} = 1;
2344 wakaba 1.1 unless (defined $attr{alt}) {
2345     $self->{onerror}->(node => $todo->{node},
2346     type => 'attribute missing:alt');
2347     }
2348     } else {
2349     for (qw/target ping rel media hreflang type alt/) {
2350     if (defined $attr{$_}) {
2351     $self->{onerror}->(node => $attr{$_},
2352     type => 'attribute not allowed');
2353     }
2354     }
2355     }
2356    
2357     my $shape = 'rectangle';
2358     if (defined $attr{shape}) {
2359     $shape = {
2360     circ => 'circle', circle => 'circle',
2361     default => 'default',
2362     poly => 'polygon', polygon => 'polygon',
2363     rect => 'rectangle', rectangle => 'rectangle',
2364     }->{lc $attr{shape}->value} || 'rectangle';
2365     ## TODO: ASCII lowercase?
2366     }
2367    
2368     if ($shape eq 'circle') {
2369     if (defined $attr{coords}) {
2370     if (defined $coords) {
2371     if (@$coords == 3) {
2372     if ($coords->[2] < 0) {
2373     $self->{onerror}->(node => $attr{coords},
2374     type => 'coords:out of range:2');
2375     }
2376     } else {
2377     $self->{onerror}->(node => $attr{coords},
2378     type => 'coords:number:3:'.@$coords);
2379     }
2380     } else {
2381     ## NOTE: A syntax error has been reported.
2382     }
2383     } else {
2384     $self->{onerror}->(node => $todo->{node},
2385     type => 'attribute missing:coords');
2386     }
2387     } elsif ($shape eq 'default') {
2388     if (defined $attr{coords}) {
2389     $self->{onerror}->(node => $attr{coords},
2390     type => 'attribute not allowed');
2391     }
2392     } elsif ($shape eq 'polygon') {
2393     if (defined $attr{coords}) {
2394     if (defined $coords) {
2395     if (@$coords >= 6) {
2396     unless (@$coords % 2 == 0) {
2397     $self->{onerror}->(node => $attr{coords},
2398     type => 'coords:number:even:'.@$coords);
2399     }
2400     } else {
2401     $self->{onerror}->(node => $attr{coords},
2402     type => 'coords:number:>=6:'.@$coords);
2403     }
2404     } else {
2405     ## NOTE: A syntax error has been reported.
2406     }
2407     } else {
2408     $self->{onerror}->(node => $todo->{node},
2409     type => 'attribute missing:coords');
2410     }
2411     } elsif ($shape eq 'rectangle') {
2412     if (defined $attr{coords}) {
2413     if (defined $coords) {
2414     if (@$coords == 4) {
2415     unless ($coords->[0] < $coords->[2]) {
2416     $self->{onerror}->(node => $attr{coords},
2417     type => 'coords:out of range:0');
2418     }
2419     unless ($coords->[1] < $coords->[3]) {
2420     $self->{onerror}->(node => $attr{coords},
2421     type => 'coords:out of range:1');
2422     }
2423     } else {
2424     $self->{onerror}->(node => $attr{coords},
2425     type => 'coords:number:4:'.@$coords);
2426     }
2427     } else {
2428     ## NOTE: A syntax error has been reported.
2429     }
2430     } else {
2431     $self->{onerror}->(node => $todo->{node},
2432     type => 'attribute missing:coords');
2433     }
2434     }
2435     },
2436     checker => $HTMLEmptyChecker,
2437     };
2438     ## TODO: only in map
2439    
2440     $Element->{$HTML_NS}->{table} = {
2441     attrs_checker => $GetHTMLAttrsChecker->({}),
2442     checker => sub {
2443     my ($self, $todo) = @_;
2444     my $el = $todo->{node};
2445     my $new_todos = [];
2446     my @nodes = (@{$el->child_nodes});
2447    
2448     my $phase = 'before caption';
2449     my $has_tfoot;
2450     while (@nodes) {
2451     my $node = shift @nodes;
2452     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2453    
2454     my $nt = $node->node_type;
2455     if ($nt == 1) {
2456     my $node_ns = $node->namespace_uri;
2457     $node_ns = '' unless defined $node_ns;
2458     my $node_ln = $node->manakai_local_name;
2459     ## NOTE: |minuses| list is not checked since redundant
2460     if ($phase eq 'in tbodys') {
2461     if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2462     #$phase = 'in tbodys';
2463     } elsif (not $has_tfoot and
2464     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2465     $phase = 'after tfoot';
2466     $has_tfoot = 1;
2467     } else {
2468     $self->{onerror}->(node => $node, type => 'element not allowed');
2469     }
2470     } elsif ($phase eq 'in trs') {
2471     if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2472     #$phase = 'in trs';
2473     } elsif (not $has_tfoot and
2474     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2475     $phase = 'after tfoot';
2476     $has_tfoot = 1;
2477     } else {
2478     $self->{onerror}->(node => $node, type => 'element not allowed');
2479     }
2480     } elsif ($phase eq 'after thead') {
2481     if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2482     $phase = 'in tbodys';
2483     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2484     $phase = 'in trs';
2485     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2486     $phase = 'in tbodys';
2487     $has_tfoot = 1;
2488     } else {
2489     $self->{onerror}->(node => $node, type => 'element not allowed');
2490     }
2491     } elsif ($phase eq 'in colgroup') {
2492     if ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2493     $phase = 'in colgroup';
2494     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2495     $phase = 'after thead';
2496     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2497     $phase = 'in tbodys';
2498     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2499     $phase = 'in trs';
2500     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2501     $phase = 'in tbodys';
2502     $has_tfoot = 1;
2503     } else {
2504     $self->{onerror}->(node => $node, type => 'element not allowed');
2505     }
2506     } elsif ($phase eq 'before caption') {
2507     if ($node_ns eq $HTML_NS and $node_ln eq 'caption') {
2508     $phase = 'in colgroup';
2509     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2510     $phase = 'in colgroup';
2511     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2512     $phase = 'after thead';
2513     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2514     $phase = 'in tbodys';
2515     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2516     $phase = 'in trs';
2517     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2518     $phase = 'in tbodys';
2519     $has_tfoot = 1;
2520     } else {
2521     $self->{onerror}->(node => $node, type => 'element not allowed');
2522     }
2523     } else { # after tfoot
2524     $self->{onerror}->(node => $node, type => 'element not allowed');
2525     }
2526     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2527     unshift @nodes, @$sib;
2528     push @$new_todos, @$ch;
2529     } elsif ($nt == 3 or $nt == 4) {
2530     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2531     $self->{onerror}->(node => $node, type => 'character not allowed');
2532     }
2533     } elsif ($nt == 5) {
2534     unshift @nodes, @{$node->child_nodes};
2535     }
2536     }
2537    
2538     ## Table model errors
2539     require Whatpm::HTMLTable;
2540     Whatpm::HTMLTable->form_table ($todo->{node}, sub {
2541     my %opt = @_;
2542     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
2543     });
2544     push @{$self->{return}->{table}}, $todo->{node};
2545    
2546     return ($new_todos);
2547     },
2548     };
2549    
2550     $Element->{$HTML_NS}->{caption} = {
2551     attrs_checker => $GetHTMLAttrsChecker->({}),
2552     checker => $HTMLSignificantStrictlyInlineChecker,
2553     };
2554    
2555     $Element->{$HTML_NS}->{colgroup} = {
2556     attrs_checker => $GetHTMLAttrsChecker->({
2557     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2558     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
2559     ## TODO: "attribute not supported" if |col|.
2560     ## ISSUE: MUST NOT if any |col|?
2561     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
2562     }),
2563     checker => sub {
2564     my ($self, $todo) = @_;
2565     my $el = $todo->{node};
2566     my $new_todos = [];
2567     my @nodes = (@{$el->child_nodes});
2568    
2569     while (@nodes) {
2570     my $node = shift @nodes;
2571     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2572    
2573     my $nt = $node->node_type;
2574     if ($nt == 1) {
2575     my $node_ns = $node->namespace_uri;
2576     $node_ns = '' unless defined $node_ns;
2577     my $node_ln = $node->manakai_local_name;
2578     ## NOTE: |minuses| list is not checked since redundant
2579     unless ($node_ns eq $HTML_NS and $node_ln eq 'col') {
2580     $self->{onerror}->(node => $node, type => 'element not allowed');
2581     }
2582     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2583     unshift @nodes, @$sib;
2584     push @$new_todos, @$ch;
2585     } elsif ($nt == 3 or $nt == 4) {
2586     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2587     $self->{onerror}->(node => $node, type => 'character not allowed');
2588     }
2589     } elsif ($nt == 5) {
2590     unshift @nodes, @{$node->child_nodes};
2591     }
2592     }
2593     return ($new_todos);
2594     },
2595     };
2596    
2597     $Element->{$HTML_NS}->{col} = {
2598     attrs_checker => $GetHTMLAttrsChecker->({
2599     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2600     }),
2601     checker => $HTMLEmptyChecker,
2602     };
2603    
2604     $Element->{$HTML_NS}->{tbody} = {
2605     attrs_checker => $GetHTMLAttrsChecker->({}),
2606     checker => sub {
2607     my ($self, $todo) = @_;
2608     my $el = $todo->{node};
2609     my $new_todos = [];
2610     my @nodes = (@{$el->child_nodes});
2611    
2612     my $has_tr;
2613     while (@nodes) {
2614     my $node = shift @nodes;
2615     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2616    
2617     my $nt = $node->node_type;
2618     if ($nt == 1) {
2619     my $node_ns = $node->namespace_uri;
2620     $node_ns = '' unless defined $node_ns;
2621     my $node_ln = $node->manakai_local_name;
2622     ## NOTE: |minuses| list is not checked since redundant
2623     if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2624     $has_tr = 1;
2625     } else {
2626     $self->{onerror}->(node => $node, type => 'element not allowed');
2627     }
2628     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2629     unshift @nodes, @$sib;
2630     push @$new_todos, @$ch;
2631     } elsif ($nt == 3 or $nt == 4) {
2632     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2633     $self->{onerror}->(node => $node, type => 'character not allowed');
2634     }
2635     } elsif ($nt == 5) {
2636     unshift @nodes, @{$node->child_nodes};
2637     }
2638     }
2639     unless ($has_tr) {
2640     $self->{onerror}->(node => $el, type => 'child element missing:tr');
2641     }
2642     return ($new_todos);
2643     },
2644     };
2645    
2646     $Element->{$HTML_NS}->{thead} = {
2647     attrs_checker => $GetHTMLAttrsChecker->({}),
2648     checker => $Element->{$HTML_NS}->{tbody}->{checker},
2649     };
2650    
2651     $Element->{$HTML_NS}->{tfoot} = {
2652     attrs_checker => $GetHTMLAttrsChecker->({}),
2653     checker => $Element->{$HTML_NS}->{tbody}->{checker},
2654     };
2655    
2656     $Element->{$HTML_NS}->{tr} = {
2657     attrs_checker => $GetHTMLAttrsChecker->({}),
2658     checker => sub {
2659     my ($self, $todo) = @_;
2660     my $el = $todo->{node};
2661     my $new_todos = [];
2662     my @nodes = (@{$el->child_nodes});
2663    
2664     my $has_td;
2665     while (@nodes) {
2666     my $node = shift @nodes;
2667     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2668    
2669     my $nt = $node->node_type;
2670     if ($nt == 1) {
2671     my $node_ns = $node->namespace_uri;
2672     $node_ns = '' unless defined $node_ns;
2673     my $node_ln = $node->manakai_local_name;
2674     ## NOTE: |minuses| list is not checked since redundant
2675     if ($node_ns eq $HTML_NS and ($node_ln eq 'td' or $node_ln eq 'th')) {
2676     $has_td = 1;
2677     } else {
2678     $self->{onerror}->(node => $node, type => 'element not allowed');
2679     }
2680     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2681     unshift @nodes, @$sib;
2682     push @$new_todos, @$ch;
2683     } elsif ($nt == 3 or $nt == 4) {
2684     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2685     $self->{onerror}->(node => $node, type => 'character not allowed');
2686     }
2687     } elsif ($nt == 5) {
2688     unshift @nodes, @{$node->child_nodes};
2689     }
2690     }
2691     unless ($has_td) {
2692     $self->{onerror}->(node => $el, type => 'child element missing:td|th');
2693     }
2694     return ($new_todos);
2695     },
2696     };
2697    
2698     $Element->{$HTML_NS}->{td} = {
2699     attrs_checker => $GetHTMLAttrsChecker->({
2700     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2701     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2702     }),
2703     checker => $HTMLBlockOrInlineChecker,
2704     };
2705    
2706     $Element->{$HTML_NS}->{th} = {
2707     attrs_checker => $GetHTMLAttrsChecker->({
2708     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2709     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2710     scope => $GetHTMLEnumeratedAttrChecker
2711     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
2712     }),
2713     checker => $HTMLBlockOrInlineChecker,
2714     };
2715    
2716     ## TODO: forms
2717    
2718     $Element->{$HTML_NS}->{script} = {
2719     attrs_checker => sub {
2720     my ($self, $todo) = @_;
2721     $GetHTMLAttrsChecker->({
2722     src => $HTMLURIAttrChecker,
2723     defer => $GetHTMLBooleanAttrChecker->('defer'),
2724     async => $GetHTMLBooleanAttrChecker->('async'),
2725     type => $HTMLIMTAttrChecker,
2726     })->($self, $todo);
2727     if ($todo->{node}->has_attribute_ns (undef, 'defer')) {
2728     my $async_attr = $todo->{node}->get_attribute_node_ns (undef, 'async');
2729     if ($async_attr) {
2730     $self->{onerror}->(node => $async_attr,
2731     type => 'attribute not allowed'); # MUST NOT
2732     }
2733     }
2734     },
2735     checker => sub {
2736     my ($self, $todo) = @_;
2737    
2738     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2739     return $HTMLEmptyChecker->($self, $todo);
2740     } else {
2741     ## NOTE: No content model conformance in HTML5 spec.
2742     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
2743     my $language = $todo->{node}->get_attribute_ns (undef, 'language');
2744     if ((defined $type and $type eq '') or
2745     (defined $language and $language eq '')) {
2746     $type = 'text/javascript';
2747     } elsif (defined $type) {
2748     #
2749     } elsif (defined $language) {
2750     $type = 'text/' . $language;
2751     } else {
2752     $type = 'text/javascript';
2753     }
2754     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
2755     type => 'script:'.$type); ## TODO: $type normalization
2756     return $AnyChecker->($self, $todo);
2757     }
2758     },
2759     };
2760    
2761     ## NOTE: When script is disabled.
2762     $Element->{$HTML_NS}->{noscript} = {
2763 wakaba 1.3 attrs_checker => sub {
2764     my ($self, $todo) = @_;
2765    
2766     ## NOTE: This check is inserted in |attrs_checker|, rather than |checker|,
2767     ## since the later is not invoked when the |noscript| is used as a
2768     ## transparent element.
2769     unless ($todo->{node}->owner_document->manakai_is_html) {
2770     $self->{onerror}->(node => $todo->{node}, type => 'in XML:noscript');
2771     }
2772    
2773     $GetHTMLAttrsChecker->({})->($self, $todo);
2774     },
2775 wakaba 1.1 checker => sub {
2776     my ($self, $todo) = @_;
2777    
2778 wakaba 1.3 if ($todo->{flag}->{in_head}) {
2779     my $new_todos = [];
2780     my @nodes = (@{$todo->{node}->child_nodes});
2781    
2782     while (@nodes) {
2783     my $node = shift @nodes;
2784     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2785    
2786     my $nt = $node->node_type;
2787     if ($nt == 1) {
2788     my $node_ns = $node->namespace_uri;
2789     $node_ns = '' unless defined $node_ns;
2790     my $node_ln = $node->manakai_local_name;
2791     if ($node_ns eq $HTML_NS) {
2792     if ({link => 1, style => 1}->{$node_ln}) {
2793     #
2794     } elsif ($node_ln eq 'meta') {
2795 wakaba 1.5 if ($node->has_attribute_ns (undef, 'name')) {
2796     #
2797 wakaba 1.3 } else {
2798 wakaba 1.5 $self->{onerror}->(node => $node,
2799     type => 'element not allowed');
2800 wakaba 1.3 }
2801     } else {
2802     $self->{onerror}->(node => $node, type => 'element not allowed');
2803     }
2804     } else {
2805     $self->{onerror}->(node => $node, type => 'element not allowed');
2806     }
2807    
2808     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2809     unshift @nodes, @$sib;
2810     push @$new_todos, @$ch;
2811     } elsif ($nt == 3 or $nt == 4) {
2812     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2813     $self->{onerror}->(node => $node, type => 'character not allowed');
2814     }
2815     } elsif ($nt == 5) {
2816     unshift @nodes, @{$node->child_nodes};
2817     }
2818     }
2819     return ($new_todos);
2820     } else {
2821     my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}});
2822     my ($sib, $ch) = $HTMLBlockOrInlineChecker->($self, $todo);
2823     push @$sib, $end;
2824     return ($sib, $ch);
2825     }
2826 wakaba 1.1 },
2827     };
2828 wakaba 1.3
2829     ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
2830 wakaba 1.1
2831     $Element->{$HTML_NS}->{'event-source'} = {
2832     attrs_checker => $GetHTMLAttrsChecker->({
2833     src => $HTMLURIAttrChecker,
2834     }),
2835     checker => $HTMLEmptyChecker,
2836     };
2837    
2838     $Element->{$HTML_NS}->{details} = {
2839     attrs_checker => $GetHTMLAttrsChecker->({
2840     open => $GetHTMLBooleanAttrChecker->('open'),
2841     }),
2842     checker => sub {
2843     my ($self, $todo) = @_;
2844    
2845     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2846     my ($sib, $ch)
2847     = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
2848     ->($self, $todo);
2849     push @$sib, $end;
2850     return ($sib, $ch);
2851     },
2852     };
2853    
2854     $Element->{$HTML_NS}->{datagrid} = {
2855     attrs_checker => $GetHTMLAttrsChecker->({
2856     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2857     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
2858     }),
2859     checker => sub {
2860     my ($self, $todo) = @_;
2861     my $el = $todo->{node};
2862     my $new_todos = [];
2863     my @nodes = (@{$el->child_nodes});
2864    
2865     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2866    
2867     ## Block-table Block* | table | select | datalist | Empty
2868     my $mode = 'any';
2869     while (@nodes) {
2870     my $node = shift @nodes;
2871     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2872    
2873     my $nt = $node->node_type;
2874     if ($nt == 1) {
2875     my $node_ns = $node->namespace_uri;
2876     $node_ns = '' unless defined $node_ns;
2877     my $node_ln = $node->manakai_local_name;
2878     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
2879     if ($mode eq 'block') {
2880     $not_allowed = 1
2881     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
2882     } elsif ($mode eq 'any') {
2883     if ($node_ns eq $HTML_NS and
2884     {table => 1, select => 1, datalist => 1}->{$node_ln}) {
2885     $mode = 'none';
2886     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
2887     $mode = 'block';
2888     } else {
2889     $not_allowed = 1;
2890     }
2891     } else {
2892     $not_allowed = 1;
2893     }
2894     $self->{onerror}->(node => $node, type => 'element not allowed')
2895     if $not_allowed;
2896     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2897     unshift @nodes, @$sib;
2898     push @$new_todos, @$ch;
2899     } elsif ($nt == 3 or $nt == 4) {
2900     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2901     $self->{onerror}->(node => $node, type => 'character not allowed');
2902     }
2903     } elsif ($nt == 5) {
2904     unshift @nodes, @{$node->child_nodes};
2905     }
2906     }
2907    
2908     push @$new_todos, $end;
2909     return ($new_todos);
2910     },
2911     };
2912    
2913     $Element->{$HTML_NS}->{command} = {
2914     attrs_checker => $GetHTMLAttrsChecker->({
2915     checked => $GetHTMLBooleanAttrChecker->('checked'),
2916     default => $GetHTMLBooleanAttrChecker->('default'),
2917     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2918     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
2919     icon => $HTMLURIAttrChecker,
2920     label => sub { }, ## NOTE: No conformance creteria
2921     radiogroup => sub { }, ## NOTE: No conformance creteria
2922     ## NOTE: |title| has special semantics, but no syntactical difference
2923     type => sub {
2924     my ($self, $attr) = @_;
2925     my $value = $attr->value;
2926     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
2927     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
2928     }
2929     },
2930     }),
2931     checker => $HTMLEmptyChecker,
2932     };
2933    
2934     $Element->{$HTML_NS}->{menu} = {
2935     attrs_checker => $GetHTMLAttrsChecker->({
2936     autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
2937     id => sub {
2938     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
2939     my ($self, $attr) = @_;
2940     my $value = $attr->value;
2941     if (length $value > 0) {
2942     if ($self->{id}->{$value}) {
2943     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2944     push @{$self->{id}->{$value}}, $attr;
2945     } else {
2946     $self->{id}->{$value} = [$attr];
2947     }
2948     } else {
2949     ## NOTE: MUST contain at least one character
2950     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2951     }
2952     if ($value =~ /[\x09-\x0D\x20]/) {
2953     $self->{onerror}->(node => $attr, type => 'space in ID');
2954     }
2955     $self->{menu}->{$value} ||= $attr;
2956     ## ISSUE: <menu id=""><p contextmenu=""> match?
2957     },
2958     label => sub { }, ## NOTE: No conformance creteria
2959     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
2960     }),
2961     checker => sub {
2962     my ($self, $todo) = @_;
2963     my $el = $todo->{node};
2964     my $new_todos = [];
2965     my @nodes = (@{$el->child_nodes});
2966    
2967     my $content = 'li or inline';
2968     while (@nodes) {
2969     my $node = shift @nodes;
2970     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2971    
2972     my $nt = $node->node_type;
2973     if ($nt == 1) {
2974     my $node_ns = $node->namespace_uri;
2975     $node_ns = '' unless defined $node_ns;
2976     my $node_ln = $node->manakai_local_name;
2977     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
2978     if ($node_ns eq $HTML_NS and $node_ln eq 'li') {
2979     if ($content eq 'inline') {
2980     $not_allowed = 1;
2981     } elsif ($content eq 'li or inline') {
2982     $content = 'li';
2983     }
2984     } else {
2985     if ($HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
2986     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln}) {
2987     $content = 'inline';
2988     } else {
2989     $not_allowed = 1;
2990     }
2991     }
2992     $self->{onerror}->(node => $node, type => 'element not allowed')
2993     if $not_allowed;
2994     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2995     unshift @nodes, @$sib;
2996     push @$new_todos, @$ch;
2997     } elsif ($nt == 3 or $nt == 4) {
2998     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2999     if ($content eq 'li') {
3000     $self->{onerror}->(node => $node, type => 'character not allowed');
3001     } elsif ($content eq 'li or inline') {
3002     $content = 'inline';
3003     }
3004     }
3005     } elsif ($nt == 5) {
3006     unshift @nodes, @{$node->child_nodes};
3007     }
3008     }
3009    
3010     for (@$new_todos) {
3011     $_->{inline} = 1;
3012     }
3013     return ($new_todos);
3014     },
3015     };
3016    
3017     $Element->{$HTML_NS}->{legend} = {
3018     attrs_checker => $GetHTMLAttrsChecker->({}),
3019     checker => sub {
3020     my ($self, $todo) = @_;
3021    
3022     my $parent = $todo->{node}->manakai_parent_element;
3023     if (defined $parent) {
3024     my $nsuri = $parent->namespace_uri;
3025     $nsuri = '' unless defined $nsuri;
3026     my $ln = $parent->manakai_local_name;
3027     if ($nsuri eq $HTML_NS and $ln eq 'figure') {
3028     return $HTMLInlineChecker->($self, $todo);
3029     } else {
3030     return $HTMLSignificantStrictlyInlineChecker->($self, $todo);
3031     }
3032     } else {
3033     return $HTMLInlineChecker->($self, $todo);
3034     }
3035    
3036     ## ISSUE: Content model is defined only for fieldset/legend,
3037     ## details/legend, and figure/legend.
3038     },
3039     };
3040    
3041     $Element->{$HTML_NS}->{div} = {
3042     attrs_checker => $GetHTMLAttrsChecker->({}),
3043     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
3044     };
3045    
3046     $Element->{$HTML_NS}->{font} = {
3047     attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
3048     checker => $HTMLTransparentChecker,
3049     };
3050    
3051     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
3052    
3053     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24