/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.22 - (hide annotations) (download)
Fri Nov 23 07:35:03 2007 UTC (17 years, 8 months ago) by wakaba
Branch: MAIN
Changes since 1.21: +5 -1 lines
++ ChangeLog	23 Nov 2007 07:29:08 -0000
	* readme.en.html: Link to user data names documentation
	is added.

2007-11-23  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/t/ChangeLog	23 Nov 2007 07:08:15 -0000
	* content-model-2.dat: New test data for character references
	in |charset| attribute values.

2007-11-23  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	23 Nov 2007 07:09:44 -0000
	* NanoDOM.pm (get_user_data, set_user_data): New methods.

	* HTML.pm.src: A flag for character references in attribute
	values are added.  Set |manakai_has_reference| user data
	to |charset| attribute.

2007-11-23  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ContentChecker/ChangeLog	23 Nov 2007 07:08:45 -0000
	* HTML.pm (meta): Character references in |charset| attribute
	values are now erred.

2007-11-23  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7     my $HTMLMetadataElements = {
8     $HTML_NS => {
9     qw/link 1 meta 1 style 1 script 1 event-source 1 command 1 base 1 title 1
10 wakaba 1.8 noscript 1 datatemplate 1
11 wakaba 1.1 /,
12     },
13     };
14    
15     my $HTMLSectioningElements = {
16     $HTML_NS => {qw/body 1 section 1 nav 1 article 1 blockquote 1 aside 1/},
17     };
18    
19     my $HTMLBlockLevelElements = {
20     $HTML_NS => {
21     qw/
22     section 1 nav 1 article 1 blockquote 1 aside 1
23     h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1
24     address 1 p 1 hr 1 dialog 1 pre 1 ol 1 ul 1 dl 1
25     ins 1 del 1 figure 1 map 1 table 1 script 1 noscript 1
26     event-source 1 details 1 datagrid 1 menu 1 div 1 font 1
27 wakaba 1.8 datatemplate 1
28 wakaba 1.1 /,
29     },
30     };
31    
32     my $HTMLStrictlyInlineLevelElements = {
33     $HTML_NS => {
34     qw/
35     br 1 a 1 q 1 cite 1 em 1 strong 1 small 1 m 1 dfn 1 abbr 1
36     time 1 meter 1 progress 1 code 1 var 1 samp 1 kbd 1
37     sub 1 sup 1 span 1 i 1 b 1 bdo 1 ins 1 del 1 img 1
38     iframe 1 embed 1 object 1 video 1 audio 1 canvas 1 area 1
39     script 1 noscript 1 event-source 1 command 1 font 1
40     /,
41     },
42     };
43    
44     my $HTMLStructuredInlineLevelElements = {
45     $HTML_NS => {qw/blockquote 1 pre 1 ol 1 ul 1 dl 1 table 1 menu 1/},
46     };
47    
48     my $HTMLInteractiveElements = {
49     $HTML_NS => {a => 1, details => 1, datagrid => 1},
50     };
51     ## NOTE: |html:a| and |html:datagrid| are not allowed as a descendant
52     ## of interactive elements
53    
54     # my $HTMLTransparentElements : in |Whatpm/ContentChecker.pm|.
55    
56     #my $HTMLSemiTransparentElements = {
57     # $HTML_NS => {qw/video 1 audio 1/},
58     #};
59    
60     my $HTMLEmbededElements = {
61     $HTML_NS => {qw/img 1 iframe 1 embed 1 object 1 video 1 audio 1 canvas 1/},
62     };
63    
64     ## Empty
65     my $HTMLEmptyChecker = sub {
66     my ($self, $todo) = @_;
67     my $el = $todo->{node};
68     my $new_todos = [];
69     my @nodes = (@{$el->child_nodes});
70    
71     while (@nodes) {
72     my $node = shift @nodes;
73     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
74    
75     my $nt = $node->node_type;
76     if ($nt == 1) {
77 wakaba 1.8 my $node_ns = $node->namespace_uri;
78     $node_ns = '' unless defined $node_ns;
79     my $node_ln = $node->manakai_local_name;
80     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
81     #
82     } else {
83     ## NOTE: |minuses| list is not checked since redundant
84     $self->{onerror}->(node => $node, type => 'element not allowed');
85     }
86 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
87     unshift @nodes, @$sib;
88     push @$new_todos, @$ch;
89     } elsif ($nt == 3 or $nt == 4) {
90     if ($node->data =~ /[^\x09-\x0D\x20]/) {
91     $self->{onerror}->(node => $node, type => 'character not allowed');
92     }
93     } elsif ($nt == 5) {
94     unshift @nodes, @{$node->child_nodes};
95     }
96     }
97     return ($new_todos);
98     };
99    
100     ## Text
101     my $HTMLTextChecker = sub {
102     my ($self, $todo) = @_;
103     my $el = $todo->{node};
104     my $new_todos = [];
105     my @nodes = (@{$el->child_nodes});
106    
107     while (@nodes) {
108     my $node = shift @nodes;
109     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
110    
111     my $nt = $node->node_type;
112     if ($nt == 1) {
113 wakaba 1.8 my $node_ns = $node->namespace_uri;
114     $node_ns = '' unless defined $node_ns;
115     my $node_ln = $node->manakai_local_name;
116     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
117     #
118     } else {
119     ## NOTE: |minuses| list is not checked since redundant
120     $self->{onerror}->(node => $node, type => 'element not allowed');
121     }
122 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
123     unshift @nodes, @$sib;
124     push @$new_todos, @$ch;
125     } elsif ($nt == 5) {
126     unshift @nodes, @{$node->child_nodes};
127     }
128     }
129     return ($new_todos);
130     };
131    
132     ## Zero or more |html:style| elements,
133     ## followed by zero or more block-level elements
134     my $HTMLStylableBlockChecker = sub {
135     my ($self, $todo) = @_;
136     my $el = $todo->{node};
137     my $new_todos = [];
138     my @nodes = (@{$el->child_nodes});
139    
140     my $has_non_style;
141     while (@nodes) {
142     my $node = shift @nodes;
143     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
144    
145     my $nt = $node->node_type;
146     if ($nt == 1) {
147     my $node_ns = $node->namespace_uri;
148     $node_ns = '' unless defined $node_ns;
149     my $node_ln = $node->manakai_local_name;
150     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
151     if ($node_ns eq $HTML_NS and $node_ln eq 'style') {
152     $not_allowed = 1 if $has_non_style or
153     not $node->has_attribute_ns (undef, 'scoped');
154     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
155     $has_non_style = 1;
156 wakaba 1.8 } elsif ($self->{pluses}->{$node_ns}->{$node_ln}) {
157     #
158 wakaba 1.1 } else {
159     $has_non_style = 1;
160     $not_allowed = 1;
161     }
162     $self->{onerror}->(node => $node, type => 'element not allowed')
163     if $not_allowed;
164     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
165     unshift @nodes, @$sib;
166     push @$new_todos, @$ch;
167     } elsif ($nt == 3 or $nt == 4) {
168     if ($node->data =~ /[^\x09-\x0D\x20]/) {
169     $self->{onerror}->(node => $node, type => 'character not allowed');
170     }
171     } elsif ($nt == 5) {
172     unshift @nodes, @{$node->child_nodes};
173     }
174     }
175     return ($new_todos);
176     }; # $HTMLStylableBlockChecker
177    
178     ## Zero or more block-level elements
179     my $HTMLBlockChecker = sub {
180     my ($self, $todo) = @_;
181     my $el = $todo->{node};
182     my $new_todos = [];
183     my @nodes = (@{$el->child_nodes});
184    
185     while (@nodes) {
186     my $node = shift @nodes;
187     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
188    
189     my $nt = $node->node_type;
190     if ($nt == 1) {
191     my $node_ns = $node->namespace_uri;
192     $node_ns = '' unless defined $node_ns;
193     my $node_ln = $node->manakai_local_name;
194     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
195     $not_allowed = 1
196 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
197     $self->{pluses}->{$node_ns}->{$node_ln};
198 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
199     if $not_allowed;
200     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
201     unshift @nodes, @$sib;
202     push @$new_todos, @$ch;
203     } elsif ($nt == 3 or $nt == 4) {
204     if ($node->data =~ /[^\x09-\x0D\x20]/) {
205     $self->{onerror}->(node => $node, type => 'character not allowed');
206     }
207     } elsif ($nt == 5) {
208     unshift @nodes, @{$node->child_nodes};
209     }
210     }
211     return ($new_todos);
212     }; # $HTMLBlockChecker
213    
214     ## Inline-level content
215     my $HTMLInlineChecker = sub {
216     my ($self, $todo) = @_;
217     my $el = $todo->{node};
218     my $new_todos = [];
219     my @nodes = (@{$el->child_nodes});
220    
221     while (@nodes) {
222     my $node = shift @nodes;
223     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
224    
225     my $nt = $node->node_type;
226     if ($nt == 1) {
227     my $node_ns = $node->namespace_uri;
228     $node_ns = '' unless defined $node_ns;
229     my $node_ln = $node->manakai_local_name;
230     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
231     $not_allowed = 1
232     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
233 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
234     $self->{pluses}->{$node_ns}->{$node_ln};
235 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
236     if $not_allowed;
237     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
238     unshift @nodes, @$sib;
239     push @$new_todos, @$ch;
240     } elsif ($nt == 5) {
241     unshift @nodes, @{$node->child_nodes};
242     }
243     }
244    
245     for (@$new_todos) {
246     $_->{inline} = 1;
247     }
248     return ($new_todos);
249     }; # $HTMLInlineChecker
250    
251     ## Strictly inline-level content
252     my $HTMLStrictlyInlineChecker = sub {
253     my ($self, $todo) = @_;
254     my $el = $todo->{node};
255     my $new_todos = [];
256     my @nodes = (@{$el->child_nodes});
257    
258     while (@nodes) {
259     my $node = shift @nodes;
260     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
261    
262     my $nt = $node->node_type;
263     if ($nt == 1) {
264     my $node_ns = $node->namespace_uri;
265     $node_ns = '' unless defined $node_ns;
266     my $node_ln = $node->manakai_local_name;
267     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
268     $not_allowed = 1
269 wakaba 1.8 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
270     $self->{pluses}->{$node_ns}->{$node_ln};
271 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
272     if $not_allowed;
273     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
274     unshift @nodes, @$sib;
275     push @$new_todos, @$ch;
276     } elsif ($nt == 5) {
277     unshift @nodes, @{$node->child_nodes};
278     }
279     }
280    
281     for (@$new_todos) {
282     $_->{inline} = 1;
283     $_->{strictly_inline} = 1;
284     }
285     return ($new_todos);
286     }; # $HTMLStrictlyInlineChecker
287    
288 wakaba 1.8 ## Inline-level or strictly inline-level content
289 wakaba 1.1 my $HTMLInlineOrStrictlyInlineChecker = sub {
290     my ($self, $todo) = @_;
291     my $el = $todo->{node};
292     my $new_todos = [];
293     my @nodes = (@{$el->child_nodes});
294    
295     while (@nodes) {
296     my $node = shift @nodes;
297     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
298    
299     my $nt = $node->node_type;
300     if ($nt == 1) {
301     my $node_ns = $node->namespace_uri;
302     $node_ns = '' unless defined $node_ns;
303     my $node_ln = $node->manakai_local_name;
304     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
305     if ($todo->{strictly_inline}) {
306     $not_allowed = 1
307 wakaba 1.8 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
308     $self->{pluses}->{$node_ns}->{$node_ln};
309 wakaba 1.1 } else {
310     $not_allowed = 1
311     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
312 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
313     $self->{pluses}->{$node_ns}->{$node_ln};
314 wakaba 1.1 }
315     $self->{onerror}->(node => $node, type => 'element not allowed')
316     if $not_allowed;
317     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
318     unshift @nodes, @$sib;
319     push @$new_todos, @$ch;
320     } elsif ($nt == 5) {
321     unshift @nodes, @{$node->child_nodes};
322     }
323     }
324    
325     for (@$new_todos) {
326     $_->{inline} = 1;
327     $_->{strictly_inline} = 1;
328     }
329     return ($new_todos);
330     }; # $HTMLInlineOrStrictlyInlineChecker
331    
332 wakaba 1.13 ## TODO: New "significant" SHOULD-level requirement (revision 1114).
333 wakaba 1.1
334 wakaba 1.8 ## Block-level content or inline-level content (i.e. bimorphic content model)
335 wakaba 1.1 my $HTMLBlockOrInlineChecker = sub {
336     my ($self, $todo) = @_;
337     my $el = $todo->{node};
338     my $new_todos = [];
339     my @nodes = (@{$el->child_nodes});
340    
341     my $content = 'block-or-inline'; # or 'block' or 'inline'
342     my @block_not_inline;
343     while (@nodes) {
344     my $node = shift @nodes;
345     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
346    
347 wakaba 1.8 ## ISSUE: It is unclear whether "<rule><div><p/><nest/></div></rule>"
348     ## is conforming or not.
349    
350 wakaba 1.1 my $nt = $node->node_type;
351     if ($nt == 1) {
352     my $node_ns = $node->namespace_uri;
353     $node_ns = '' unless defined $node_ns;
354     my $node_ln = $node->manakai_local_name;
355     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
356     if ($content eq 'block') {
357     $not_allowed = 1
358 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
359     $self->{pluses}->{$node_ns}->{$node_ln};
360 wakaba 1.1 } elsif ($content eq 'inline') {
361     $not_allowed = 1
362     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
363 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
364     $self->{pluses}->{$node_ns}->{$node_ln};
365 wakaba 1.1 } else {
366     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
367     my $is_inline
368     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
369     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
370    
371     push @block_not_inline, $node
372     if $is_block and not $is_inline and not $not_allowed;
373 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
374 wakaba 1.1 $content = 'inline';
375     for (@block_not_inline) {
376     $self->{onerror}->(node => $_, type => 'element not allowed');
377     }
378     $not_allowed = 1 unless $is_inline;
379     }
380     }
381     $self->{onerror}->(node => $node, type => 'element not allowed')
382     if $not_allowed;
383     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
384     unshift @nodes, @$sib;
385     push @$new_todos, @$ch;
386     } elsif ($nt == 3 or $nt == 4) {
387     if ($node->data =~ /[^\x09-\x0D\x20]/) {
388     if ($content eq 'block') {
389     $self->{onerror}->(node => $node, type => 'character not allowed');
390     } else {
391     $content = 'inline';
392     for (@block_not_inline) {
393     $self->{onerror}->(node => $_, type => 'element not allowed');
394     }
395     }
396     }
397     } elsif ($nt == 5) {
398     unshift @nodes, @{$node->child_nodes};
399     }
400     }
401    
402     if ($content eq 'inline') {
403     for (@$new_todos) {
404     $_->{inline} = 1;
405     }
406     }
407     return ($new_todos);
408     };
409    
410     ## Zero or more XXX element, then either block-level or inline-level
411     my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) {
412     my ($elnsuri, $ellname) = @_;
413     return sub {
414     my ($self, $todo) = @_;
415     my $el = $todo->{node};
416     my $new_todos = [];
417     my @nodes = (@{$el->child_nodes});
418    
419     my $has_non_style;
420     my $content = 'block-or-inline'; # or 'block' or 'inline'
421     my @block_not_inline;
422     while (@nodes) {
423     my $node = shift @nodes;
424     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
425    
426     my $nt = $node->node_type;
427     if ($nt == 1) {
428     my $node_ns = $node->namespace_uri;
429     $node_ns = '' unless defined $node_ns;
430     my $node_ln = $node->manakai_local_name;
431     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
432     if ($node_ns eq $elnsuri and $node_ln eq $ellname) {
433     $not_allowed = 1 if $has_non_style;
434     if ($ellname eq 'style' and
435     not $node->has_attribute_ns (undef, 'scoped')) {
436     $not_allowed = 1;
437     }
438     } elsif ($content eq 'block') {
439     $has_non_style = 1;
440     $not_allowed = 1
441 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
442     $self->{pluses}->{$node_ns}->{$node_ln};
443 wakaba 1.1 } elsif ($content eq 'inline') {
444     $has_non_style = 1;
445     $not_allowed = 1
446     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
447 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
448     $self->{pluses}->{$node_ns}->{$node_ln};
449 wakaba 1.1 } else {
450 wakaba 1.8 $has_non_style = 1 unless $self->{pluses}->{$node_ns}->{$node_ln};
451 wakaba 1.1 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
452     my $is_inline
453     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
454     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
455    
456     push @block_not_inline, $node
457     if $is_block and not $is_inline and not $not_allowed;
458 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
459 wakaba 1.1 $content = 'inline';
460     for (@block_not_inline) {
461     $self->{onerror}->(node => $_, type => 'element not allowed');
462     }
463     $not_allowed = 1 unless $is_inline;
464     }
465     }
466     $self->{onerror}->(node => $node, type => 'element not allowed')
467     if $not_allowed;
468     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
469     unshift @nodes, @$sib;
470     push @$new_todos, @$ch;
471     } elsif ($nt == 3 or $nt == 4) {
472     if ($node->data =~ /[^\x09-\x0D\x20]/) {
473     $has_non_style = 1;
474     if ($content eq 'block') {
475     $self->{onerror}->(node => $node, type => 'character not allowed');
476     } else {
477     $content = 'inline';
478     for (@block_not_inline) {
479     $self->{onerror}->(node => $_, type => 'element not allowed');
480     }
481     }
482     }
483     } elsif ($nt == 5) {
484     unshift @nodes, @{$node->child_nodes};
485     }
486     }
487    
488     if ($content eq 'inline') {
489     for (@$new_todos) {
490     $_->{inline} = 1;
491     }
492     }
493     return ($new_todos);
494     };
495     }; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker
496    
497     my $HTMLTransparentChecker = $HTMLBlockOrInlineChecker;
498    
499     our $AttrChecker;
500    
501     my $GetHTMLEnumeratedAttrChecker = sub {
502     my $states = shift; # {value => conforming ? 1 : -1}
503     return sub {
504     my ($self, $attr) = @_;
505     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
506     if ($states->{$value} > 0) {
507     #
508     } elsif ($states->{$value}) {
509     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
510     } else {
511     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
512     }
513     };
514     }; # $GetHTMLEnumeratedAttrChecker
515    
516     my $GetHTMLBooleanAttrChecker = sub {
517     my $local_name = shift;
518     return sub {
519     my ($self, $attr) = @_;
520     my $value = $attr->value;
521     unless ($value eq $local_name or $value eq '') {
522     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
523     }
524     };
525     }; # $GetHTMLBooleanAttrChecker
526    
527 wakaba 1.8 ## Unordered set of space-separated tokens
528 wakaba 1.18 my $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
529 wakaba 1.8 my ($self, $attr) = @_;
530     my %word;
531     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
532     unless ($word{$word}) {
533     $word{$word} = 1;
534     } else {
535     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
536     }
537     }
538 wakaba 1.18 }; # $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
539 wakaba 1.8
540 wakaba 1.1 ## |rel| attribute (unordered set of space separated tokens,
541     ## whose allowed values are defined by the section on link types)
542     my $HTMLLinkTypesAttrChecker = sub {
543 wakaba 1.4 my ($a_or_area, $todo, $self, $attr) = @_;
544 wakaba 1.1 my %word;
545     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
546     unless ($word{$word}) {
547     $word{$word} = 1;
548 wakaba 1.18 } elsif ($word eq 'up') {
549     #
550 wakaba 1.1 } else {
551     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
552     }
553     }
554     ## NOTE: Case sensitive match (since HTML5 spec does not say link
555     ## types are case-insensitive and it says "The value should not
556     ## be confusingly similar to any other defined value (e.g.
557     ## differing only in case).").
558     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
559     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
560     ## values to be used conformingly.
561     require Whatpm::_LinkTypeList;
562     our $LinkType;
563     for my $word (keys %word) {
564     my $def = $LinkType->{$word};
565     if (defined $def) {
566     if ($def->{status} eq 'accepted') {
567     if (defined $def->{effect}->[$a_or_area]) {
568     #
569     } else {
570     $self->{onerror}->(node => $attr,
571     type => 'link type:bad context:'.$word);
572     }
573     } elsif ($def->{status} eq 'proposal') {
574     $self->{onerror}->(node => $attr, level => 's',
575     type => 'link type:proposed:'.$word);
576 wakaba 1.20 if (defined $def->{effect}->[$a_or_area]) {
577     #
578     } else {
579     $self->{onerror}->(node => $attr,
580     type => 'link type:bad context:'.$word);
581     }
582 wakaba 1.1 } else { # rejected or synonym
583     $self->{onerror}->(node => $attr,
584     type => 'link type:non-conforming:'.$word);
585     }
586 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
587     if ($word eq 'alternate') {
588     #
589     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
590     $todo->{has_hyperlink_link_type} = 1;
591     }
592     }
593 wakaba 1.1 if ($def->{unique}) {
594     unless ($self->{has_link_type}->{$word}) {
595     $self->{has_link_type}->{$word} = 1;
596     } else {
597     $self->{onerror}->(node => $attr,
598     type => 'link type:duplicate:'.$word);
599     }
600     }
601     } else {
602     $self->{onerror}->(node => $attr, level => 'unsupported',
603     type => 'link type:'.$word);
604     }
605     }
606 wakaba 1.4 $todo->{has_hyperlink_link_type} = 1
607     if $word{alternate} and not $word{stylesheet};
608 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
609     ## says that using both X-Pingback: header field and HTML
610     ## <link rel=pingback> is deprecated and if both appears they
611     ## SHOULD contain exactly the same value.
612     ## ISSUE: Pingback 1.0 specification defines the exact representation
613     ## of its link element, which cannot be tested by the current arch.
614     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
615     ## include any string that matches to the pattern for the rel=pingback link,
616     ## which again inpossible to test.
617     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
618 wakaba 1.12
619     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
620 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
621     ## then they SHOULD be described in different paragraphs.".
622 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
623 wakaba 1.20
624     ## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status."
625 wakaba 1.1
626     ## URI (or IRI)
627     my $HTMLURIAttrChecker = sub {
628     my ($self, $attr) = @_;
629     ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
630     my $value = $attr->value;
631     Whatpm::URIChecker->check_iri_reference ($value, sub {
632     my %opt = @_;
633     $self->{onerror}->(node => $attr, level => $opt{level},
634     type => 'URI::'.$opt{type}.
635     (defined $opt{position} ? ':'.$opt{position} : ''));
636     });
637 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
638 wakaba 1.1 }; # $HTMLURIAttrChecker
639    
640     ## A space separated list of one or more URIs (or IRIs)
641     my $HTMLSpaceURIsAttrChecker = sub {
642     my ($self, $attr) = @_;
643     my $i = 0;
644     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
645     Whatpm::URIChecker->check_iri_reference ($value, sub {
646     my %opt = @_;
647     $self->{onerror}->(node => $attr, level => $opt{level},
648 wakaba 1.2 type => 'URIs:'.':'.
649     $opt{type}.':'.$i.
650 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
651     });
652     $i++;
653     }
654     ## ISSUE: Relative references?
655     ## ISSUE: Leading or trailing white spaces are conformant?
656     ## ISSUE: A sequence of white space characters are conformant?
657     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
658     ## NOTE: Duplication seems not an error.
659 wakaba 1.4 $self->{has_uri_attr} = 1;
660 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
661    
662     my $HTMLDatetimeAttrChecker = sub {
663     my ($self, $attr) = @_;
664     my $value = $attr->value;
665     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
666     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
667     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
668     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
669     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
670     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
671     if $d < 1 or
672     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
673     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
674     if $M == 2 and $d == 29 and
675     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
676     } else {
677     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
678     }
679     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
680     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
681     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
682     if defined $s and $s > 59;
683     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
684     if $zh > 23;
685     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
686     if $zm > 59;
687     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
688     } else {
689     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
690     }
691     }; # $HTMLDatetimeAttrChecker
692    
693     my $HTMLIntegerAttrChecker = sub {
694     my ($self, $attr) = @_;
695     my $value = $attr->value;
696     unless ($value =~ /\A-?[0-9]+\z/) {
697     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
698     }
699     }; # $HTMLIntegerAttrChecker
700    
701     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
702     my $range_check = shift;
703     return sub {
704     my ($self, $attr) = @_;
705     my $value = $attr->value;
706     if ($value =~ /\A[0-9]+\z/) {
707     unless ($range_check->($value + 0)) {
708     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
709     }
710     } else {
711     $self->{onerror}->(node => $attr,
712     type => 'nninteger:syntax error');
713     }
714     };
715     }; # $GetHTMLNonNegativeIntegerAttrChecker
716    
717     my $GetHTMLFloatingPointNumberAttrChecker = sub {
718     my $range_check = shift;
719     return sub {
720     my ($self, $attr) = @_;
721     my $value = $attr->value;
722     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
723     unless ($range_check->($value + 0)) {
724     $self->{onerror}->(node => $attr, type => 'float:out of range');
725     }
726     } else {
727     $self->{onerror}->(node => $attr,
728     type => 'float:syntax error');
729     }
730     };
731     }; # $GetHTMLFloatingPointNumberAttrChecker
732    
733     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
734     ## ISSUE: RFC 2046 does not define syntax of media types.
735     ## ISSUE: The definition of "a valid MIME type" is unknown.
736     ## Syntactical correctness?
737     my $HTMLIMTAttrChecker = sub {
738     my ($self, $attr) = @_;
739     my $value = $attr->value;
740     ## ISSUE: RFC 2045 Content-Type header field allows insertion
741     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
742     ## ISSUE: RFC 2231 extension? Maybe no.
743     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
744     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
745     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
746     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
747     my @type = ($1, $2);
748     my $param = $3;
749     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
750     if (defined $2) {
751     push @type, $1 => $2;
752     } else {
753     my $n = $1;
754     my $v = $2;
755     $v =~ s/\\(.)/$1/gs;
756     push @type, $n => $v;
757     }
758     }
759     require Whatpm::IMTChecker;
760     Whatpm::IMTChecker->check_imt (sub {
761     my %opt = @_;
762     $self->{onerror}->(node => $attr, level => $opt{level},
763     type => 'IMT:'.$opt{type});
764     }, @type);
765     } else {
766     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
767     }
768     }; # $HTMLIMTAttrChecker
769    
770     my $HTMLLanguageTagAttrChecker = sub {
771 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
772    
773 wakaba 1.1 my ($self, $attr) = @_;
774 wakaba 1.6 my $value = $attr->value;
775     require Whatpm::LangTag;
776     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
777     my %opt = @_;
778     my $type = 'LangTag:'.$opt{type};
779     $type .= ':' . $opt{subtag} if defined $opt{subtag};
780     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
781     level => $opt{level});
782     });
783 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
784 wakaba 1.6
785     ## TODO: testdata
786 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
787    
788     ## "A valid media query [MQ]"
789     my $HTMLMQAttrChecker = sub {
790     my ($self, $attr) = @_;
791     $self->{onerror}->(node => $attr, level => 'unsupported',
792     type => 'media query');
793     ## ISSUE: What is "a valid media query"?
794     }; # $HTMLMQAttrChecker
795    
796     my $HTMLEventHandlerAttrChecker = sub {
797     my ($self, $attr) = @_;
798     $self->{onerror}->(node => $attr, level => 'unsupported',
799     type => 'event handler');
800     ## TODO: MUST contain valid ECMAScript code matching the
801     ## ECMAScript |FunctionBody| production. [ECMA262]
802     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
803     ## ISSUE: Automatic semicolon insertion does not apply?
804     ## ISSUE: Other script languages?
805     }; # $HTMLEventHandlerAttrChecker
806    
807     my $HTMLUsemapAttrChecker = sub {
808     my ($self, $attr) = @_;
809     ## MUST be a valid hashed ID reference to a |map| element
810     my $value = $attr->value;
811     if ($value =~ s/^#//) {
812     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
813     push @{$self->{usemap}}, [$value => $attr];
814     } else {
815     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
816     }
817     ## NOTE: Space characters in hashed ID references are conforming.
818     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
819     }; # $HTMLUsemapAttrChecker
820    
821     my $HTMLTargetAttrChecker = sub {
822     my ($self, $attr) = @_;
823     my $value = $attr->value;
824     if ($value =~ /^_/) {
825     $value = lc $value; ## ISSUE: ASCII case-insentitive?
826     unless ({
827     _self => 1, _parent => 1, _top => 1,
828     }->{$value}) {
829     $self->{onerror}->(node => $attr,
830     type => 'reserved browsing context name');
831     }
832     } else {
833     #$ ISSUE: An empty string is conforming?
834     }
835     }; # $HTMLTargetAttrChecker
836    
837     my $HTMLAttrChecker = {
838     id => sub {
839     ## NOTE: |map| has its own variant of |id=""| checker
840     my ($self, $attr) = @_;
841     my $value = $attr->value;
842     if (length $value > 0) {
843     if ($self->{id}->{$value}) {
844     $self->{onerror}->(node => $attr, type => 'duplicate ID');
845     push @{$self->{id}->{$value}}, $attr;
846     } else {
847     $self->{id}->{$value} = [$attr];
848     }
849     if ($value =~ /[\x09-\x0D\x20]/) {
850     $self->{onerror}->(node => $attr, type => 'space in ID');
851     }
852     } else {
853     ## NOTE: MUST contain at least one character
854     $self->{onerror}->(node => $attr, type => 'empty attribute value');
855     }
856     },
857     title => sub {}, ## NOTE: No conformance creteria
858     lang => sub {
859     my ($self, $attr) = @_;
860 wakaba 1.6 my $value = $attr->value;
861     if ($value eq '') {
862     #
863     } else {
864     require Whatpm::LangTag;
865     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
866     my %opt = @_;
867     my $type = 'LangTag:'.$opt{type};
868     $type .= ':' . $opt{subtag} if defined $opt{subtag};
869     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
870     level => $opt{level});
871     });
872     }
873 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
874     unless ($attr->owner_document->manakai_is_html) {
875     $self->{onerror}->(node => $attr, type => 'in XML:lang');
876     }
877 wakaba 1.6
878     ## TODO: test data
879 wakaba 1.1 },
880     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
881     class => sub {
882     my ($self, $attr) = @_;
883     my %word;
884     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
885     unless ($word{$word}) {
886     $word{$word} = 1;
887     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
888     } else {
889     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
890     }
891     }
892     },
893     contextmenu => sub {
894     my ($self, $attr) = @_;
895     my $value = $attr->value;
896     push @{$self->{contextmenu}}, [$value => $attr];
897     ## ISSUE: "The value must be the ID of a menu element in the DOM."
898     ## What is "in the DOM"? A menu Element node that is not part
899     ## of the Document tree is in the DOM? A menu Element node that
900     ## belong to another Document tree is in the DOM?
901     },
902     irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
903 wakaba 1.8 tabindex => $HTMLIntegerAttrChecker
904     ## TODO: ref, template, registrationmark
905 wakaba 1.1 };
906    
907     for (qw/
908     onabort onbeforeunload onblur onchange onclick oncontextmenu
909     ondblclick ondrag ondragend ondragenter ondragleave ondragover
910     ondragstart ondrop onerror onfocus onkeydown onkeypress
911     onkeyup onload onmessage onmousedown onmousemove onmouseout
912     onmouseover onmouseup onmousewheel onresize onscroll onselect
913     onsubmit onunload
914     /) {
915     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
916     }
917    
918     my $GetHTMLAttrsChecker = sub {
919     my $element_specific_checker = shift;
920     return sub {
921     my ($self, $todo) = @_;
922     for my $attr (@{$todo->{node}->attributes}) {
923     my $attr_ns = $attr->namespace_uri;
924     $attr_ns = '' unless defined $attr_ns;
925     my $attr_ln = $attr->manakai_local_name;
926     my $checker;
927     if ($attr_ns eq '') {
928     $checker = $element_specific_checker->{$attr_ln}
929     || $HTMLAttrChecker->{$attr_ln};
930     }
931     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
932     || $AttrChecker->{$attr_ns}->{''};
933     if ($checker) {
934     $checker->($self, $attr, $todo);
935     } else {
936     $self->{onerror}->(node => $attr, level => 'unsupported',
937     type => 'attribute');
938     ## ISSUE: No comformance createria for unknown attributes in the spec
939     }
940     }
941     };
942     }; # $GetHTMLAttrsChecker
943    
944     our $Element;
945     our $ElementDefault;
946     our $AnyChecker;
947    
948     $Element->{$HTML_NS}->{''} = {
949     attrs_checker => $GetHTMLAttrsChecker->({}),
950     checker => $ElementDefault->{checker},
951     };
952    
953     $Element->{$HTML_NS}->{html} = {
954     is_root => 1,
955     attrs_checker => $GetHTMLAttrsChecker->({
956 wakaba 1.16 manifest => $HTMLURIAttrChecker,
957 wakaba 1.1 xmlns => sub {
958     my ($self, $attr) = @_;
959     my $value = $attr->value;
960     unless ($value eq $HTML_NS) {
961     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
962     }
963     unless ($attr->owner_document->manakai_is_html) {
964     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
965     ## TODO: Test
966     }
967     },
968     }),
969     checker => sub {
970     my ($self, $todo) = @_;
971     my $el = $todo->{node};
972     my $new_todos = [];
973     my @nodes = (@{$el->child_nodes});
974    
975     my $phase = 'before head';
976     while (@nodes) {
977     my $node = shift @nodes;
978     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
979    
980     my $nt = $node->node_type;
981     if ($nt == 1) {
982     my $node_ns = $node->namespace_uri;
983     $node_ns = '' unless defined $node_ns;
984     my $node_ln = $node->manakai_local_name;
985     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
986 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
987     #
988     } elsif ($phase eq 'before head') {
989 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'head') {
990     $phase = 'after head';
991     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'body') {
992     $self->{onerror}->(node => $node, type => 'ps element missing:head');
993     $phase = 'after body';
994     } else {
995     $not_allowed = 1;
996     # before head
997     }
998     } elsif ($phase eq 'after head') {
999     if ($node_ns eq $HTML_NS and $node_ln eq 'body') {
1000     $phase = 'after body';
1001     } else {
1002     $not_allowed = 1;
1003     # after head
1004     }
1005     } else { #elsif ($phase eq 'after body') {
1006     $not_allowed = 1;
1007     # after body
1008     }
1009     $self->{onerror}->(node => $node, type => 'element not allowed')
1010     if $not_allowed;
1011     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1012     unshift @nodes, @$sib;
1013     push @$new_todos, @$ch;
1014     } elsif ($nt == 3 or $nt == 4) {
1015     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1016     $self->{onerror}->(node => $node, type => 'character not allowed');
1017     }
1018     } elsif ($nt == 5) {
1019     unshift @nodes, @{$node->child_nodes};
1020     }
1021     }
1022    
1023     if ($phase eq 'before head') {
1024     $self->{onerror}->(node => $el, type => 'child element missing:head');
1025     $self->{onerror}->(node => $el, type => 'child element missing:body');
1026     } elsif ($phase eq 'after head') {
1027     $self->{onerror}->(node => $el, type => 'child element missing:body');
1028     }
1029    
1030     return ($new_todos);
1031     },
1032     };
1033    
1034     $Element->{$HTML_NS}->{head} = {
1035     attrs_checker => $GetHTMLAttrsChecker->({}),
1036     checker => sub {
1037     my ($self, $todo) = @_;
1038     my $el = $todo->{node};
1039     my $new_todos = [];
1040     my @nodes = (@{$el->child_nodes});
1041    
1042     my $has_title;
1043     my $phase = 'initial'; # 'after charset', 'after base'
1044     while (@nodes) {
1045     my $node = shift @nodes;
1046     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1047    
1048     my $nt = $node->node_type;
1049     if ($nt == 1) {
1050     my $node_ns = $node->namespace_uri;
1051     $node_ns = '' unless defined $node_ns;
1052     my $node_ln = $node->manakai_local_name;
1053     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
1054 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1055     #
1056     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'title') {
1057 wakaba 1.1 $phase = 'after base';
1058     unless ($has_title) {
1059     $has_title = 1;
1060     } else {
1061     $not_allowed = 1;
1062     }
1063     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'meta') {
1064     if ($node->has_attribute_ns (undef, 'charset')) {
1065     if ($phase eq 'initial') {
1066     $phase = 'after charset';
1067     } else {
1068     $not_allowed = 1;
1069     ## NOTE: See also |base|'s "contexts" field in the spec
1070     }
1071 wakaba 1.5 } elsif ($node->has_attribute_ns (undef, 'name') or
1072     $node->has_attribute_ns (undef, 'http-equiv')) {
1073     $phase = 'after base';
1074 wakaba 1.1 } else {
1075     $phase = 'after base';
1076 wakaba 1.5 $not_allowed = 1;
1077 wakaba 1.1 }
1078     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'base') {
1079     if ($phase eq 'initial' or $phase eq 'after charset') {
1080     $phase = 'after base';
1081     } else {
1082     $not_allowed = 1;
1083     }
1084     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'style') {
1085     $phase = 'after base';
1086     if ($node->has_attribute_ns (undef, 'scoped')) {
1087     $not_allowed = 1;
1088     }
1089     } elsif ($HTMLMetadataElements->{$node_ns}->{$node_ln}) {
1090     $phase = 'after base';
1091     } else {
1092     $not_allowed = 1;
1093     }
1094     $self->{onerror}->(node => $node, type => 'element not allowed')
1095     if $not_allowed;
1096 wakaba 1.3 local $todo->{flag}->{in_head} = 1;
1097 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1098     unshift @nodes, @$sib;
1099     push @$new_todos, @$ch;
1100     } elsif ($nt == 3 or $nt == 4) {
1101     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1102     $self->{onerror}->(node => $node, type => 'character not allowed');
1103     }
1104     } elsif ($nt == 5) {
1105     unshift @nodes, @{$node->child_nodes};
1106     }
1107     }
1108     unless ($has_title) {
1109     $self->{onerror}->(node => $el, type => 'child element missing:title');
1110     }
1111     return ($new_todos);
1112     },
1113     };
1114    
1115     $Element->{$HTML_NS}->{title} = {
1116     attrs_checker => $GetHTMLAttrsChecker->({}),
1117     checker => $HTMLTextChecker,
1118     };
1119    
1120     $Element->{$HTML_NS}->{base} = {
1121 wakaba 1.4 attrs_checker => sub {
1122     my ($self, $todo) = @_;
1123    
1124 wakaba 1.14 my $has_href = $todo->{node}->has_attribute_ns (undef, 'href');
1125     my $has_target = $todo->{node}->has_attribute_ns (undef, 'target');
1126    
1127     if ($self->{has_uri_attr} and $has_href) {
1128 wakaba 1.4 ## ISSUE: Are these examples conforming?
1129     ## <head profile="a b c"><base href> (except for |profile|'s
1130     ## non-conformance)
1131     ## <title xml:base="relative"/><base href/> (maybe it should be)
1132     ## <unknown xmlns="relative"/><base href/> (assuming that
1133     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1134     ## <?xml-stylesheet href="relative"?>...<base href=""/>
1135     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1136     ## <style>@import 'relative';</style><base href>
1137     ## <script>location.href = 'relative';</script><base href>
1138 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
1139     ## an exception.
1140 wakaba 1.4 $self->{onerror}->(node => $todo->{node},
1141     type => 'basehref after URI attribute');
1142     }
1143 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
1144 wakaba 1.4 ## ISSUE: Are these examples conforming?
1145     ## <head><title xlink:href=""/><base target="name"/></head>
1146     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1147     ## (assuming that |xbl:xbl| is allowed before |base|)
1148     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1149     ## <link href=""/><base target="name"/>
1150     ## <link rel=unknown href=""><base target=name>
1151     $self->{onerror}->(node => $todo->{node},
1152     type => 'basetarget after hyperlink');
1153     }
1154    
1155 wakaba 1.14 if (not $has_href and not $has_target) {
1156     $self->{onerror}->(node => $todo->{node},
1157     type => 'attribute missing:href|target');
1158     }
1159    
1160 wakaba 1.4 return $GetHTMLAttrsChecker->({
1161     href => $HTMLURIAttrChecker,
1162     target => $HTMLTargetAttrChecker,
1163     })->($self, $todo);
1164     },
1165 wakaba 1.1 checker => $HTMLEmptyChecker,
1166     };
1167    
1168     $Element->{$HTML_NS}->{link} = {
1169     attrs_checker => sub {
1170     my ($self, $todo) = @_;
1171     $GetHTMLAttrsChecker->({
1172     href => $HTMLURIAttrChecker,
1173 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(0, $todo, @_) },
1174 wakaba 1.1 media => $HTMLMQAttrChecker,
1175     hreflang => $HTMLLanguageTagAttrChecker,
1176     type => $HTMLIMTAttrChecker,
1177     ## NOTE: Though |title| has special semantics,
1178     ## syntactically same as the |title| as global attribute.
1179     })->($self, $todo);
1180 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
1181     $self->{has_hyperlink_element} = 1 if $todo->{has_hyperlink_link_type};
1182     } else {
1183 wakaba 1.1 $self->{onerror}->(node => $todo->{node},
1184     type => 'attribute missing:href');
1185     }
1186     unless ($todo->{node}->has_attribute_ns (undef, 'rel')) {
1187     $self->{onerror}->(node => $todo->{node},
1188     type => 'attribute missing:rel');
1189     }
1190     },
1191     checker => $HTMLEmptyChecker,
1192     };
1193    
1194     $Element->{$HTML_NS}->{meta} = {
1195     attrs_checker => sub {
1196     my ($self, $todo) = @_;
1197     my $name_attr;
1198     my $http_equiv_attr;
1199     my $charset_attr;
1200     my $content_attr;
1201     for my $attr (@{$todo->{node}->attributes}) {
1202     my $attr_ns = $attr->namespace_uri;
1203     $attr_ns = '' unless defined $attr_ns;
1204     my $attr_ln = $attr->manakai_local_name;
1205     my $checker;
1206     if ($attr_ns eq '') {
1207     if ($attr_ln eq 'content') {
1208     $content_attr = $attr;
1209     $checker = 1;
1210     } elsif ($attr_ln eq 'name') {
1211     $name_attr = $attr;
1212     $checker = 1;
1213     } elsif ($attr_ln eq 'http-equiv') {
1214     $http_equiv_attr = $attr;
1215     $checker = 1;
1216     } elsif ($attr_ln eq 'charset') {
1217     $charset_attr = $attr;
1218     $checker = 1;
1219     } else {
1220     $checker = $HTMLAttrChecker->{$attr_ln}
1221     || $AttrChecker->{$attr_ns}->{$attr_ln}
1222     || $AttrChecker->{$attr_ns}->{''};
1223     }
1224     } else {
1225     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1226     || $AttrChecker->{$attr_ns}->{''};
1227     }
1228     if ($checker) {
1229     $checker->($self, $attr) if ref $checker;
1230     } else {
1231     $self->{onerror}->(node => $attr, level => 'unsupported',
1232     type => 'attribute');
1233     ## ISSUE: No comformance createria for unknown attributes in the spec
1234     }
1235     }
1236    
1237     if (defined $name_attr) {
1238     if (defined $http_equiv_attr) {
1239     $self->{onerror}->(node => $http_equiv_attr,
1240     type => 'attribute not allowed');
1241     } elsif (defined $charset_attr) {
1242     $self->{onerror}->(node => $charset_attr,
1243     type => 'attribute not allowed');
1244     }
1245     my $metadata_name = $name_attr->value;
1246     my $metadata_value;
1247     if (defined $content_attr) {
1248     $metadata_value = $content_attr->value;
1249     } else {
1250     $self->{onerror}->(node => $todo->{node},
1251     type => 'attribute missing:content');
1252     $metadata_value = '';
1253     }
1254     } elsif (defined $http_equiv_attr) {
1255     if (defined $charset_attr) {
1256     $self->{onerror}->(node => $charset_attr,
1257     type => 'attribute not allowed');
1258     }
1259     unless (defined $content_attr) {
1260     $self->{onerror}->(node => $todo->{node},
1261     type => 'attribute missing:content');
1262     }
1263     } elsif (defined $charset_attr) {
1264     if (defined $content_attr) {
1265     $self->{onerror}->(node => $content_attr,
1266     type => 'attribute not allowed');
1267     }
1268     } else {
1269     if (defined $content_attr) {
1270     $self->{onerror}->(node => $content_attr,
1271     type => 'attribute not allowed');
1272     $self->{onerror}->(node => $todo->{node},
1273     type => 'attribute missing:name|http-equiv');
1274     } else {
1275     $self->{onerror}->(node => $todo->{node},
1276     type => 'attribute missing:name|http-equiv|charset');
1277     }
1278     }
1279    
1280     ## TODO: metadata conformance
1281    
1282     ## TODO: pragma conformance
1283     if (defined $http_equiv_attr) { ## An enumerated attribute
1284     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1285     if ({
1286     'refresh' => 1,
1287     'default-style' => 1,
1288     }->{$keyword}) {
1289     #
1290 wakaba 1.19 } elsif ($keyword eq 'content-type') {
1291     $self->{onerror}
1292     ->(node => $http_equiv_attr,
1293     type => 'enumerated:invalid:http-equiv:content-type');
1294 wakaba 1.1 } else {
1295     $self->{onerror}->(node => $http_equiv_attr,
1296     type => 'enumerated:invalid');
1297     }
1298     }
1299    
1300     if (defined $charset_attr) {
1301     unless ($todo->{node}->owner_document->manakai_is_html) {
1302     $self->{onerror}->(node => $charset_attr,
1303     type => 'in XML:charset');
1304     }
1305 wakaba 1.21
1306     my $charset_value = $charset_attr->value;
1307     ## NOTE: Though the case-sensitivility of |charset| attribute value
1308     ## is not explicitly spelled in the HTML5 spec, the Character Set
1309     ## registry of IANA, which is referenced from HTML5 spec, says that
1310     ## charset name is case-insensitive.
1311     $charset_value =~ tr/A-Z/a-z/; ## NOTE: ASCII Case-insensitive.
1312    
1313     require Message::Charset::Info;
1314     my $charset = $Message::Charset::Info::IANACharset->{$charset_value};
1315     my $ic = $todo->{node}->owner_document->input_encoding;
1316     if (defined $ic) {
1317     ## TODO: Test for this case
1318     my $ic_charset = $Message::Charset::Info::IANACharset->{$ic};
1319     if ($charset ne $ic_charset) {
1320     $self->{onerror}->(node => $charset_attr,
1321     type => 'mismatched charset name:'.$ic.
1322     ':'.$charset_value,
1323     level => 'm');
1324     }
1325     } else {
1326     ## NOTE: MUST, but not checkable, since the document is not originally
1327     ## in serialized form (or the parser does not preserve the input
1328     ## encoding information).
1329     $self->{onerror}->(node => $charset_attr,
1330     type => 'mismatched charset name::'.$charset_value,
1331     level => 'unsupported');
1332     }
1333    
1334     ## ISSUE: What is "valid character encoding name"? Syntactically valid?
1335     ## Syntactically valid and registered? What about x-charset names?
1336     unless (Message::Charset::Info::is_syntactically_valid_iana_charset_name
1337     ($charset_value)) {
1338     $self->{onerror}->(node => $charset_attr,
1339     type => 'charset:syntax error:'.$charset_value,
1340     level => 'm');
1341     }
1342    
1343     if ($charset) {
1344     ## ISSUE: What is "the preferred name for that encoding" (for a charset
1345     ## with no "preferred MIME name" label)?
1346     my $charset_status = $charset->{iana_names}->{$charset_value} || 0;
1347     if (($charset_status &
1348     Message::Charset::Info::PREFERRED_CHARSET_NAME ())
1349     != Message::Charset::Info::PREFERRED_CHARSET_NAME ()) {
1350     $self->{onerror}->(node => $charset_attr,
1351     type => 'charset:not preferred:'.
1352     $charset_value,
1353     level => 'm');
1354     }
1355     if (($charset_status &
1356     Message::Charset::Info::REGISTERED_CHARSET_NAME ())
1357     != Message::Charset::Info::REGISTERED_CHARSET_NAME ()) {
1358     if ($charset_value =~ /^x-/) {
1359     $self->{onerror}->(node => $charset_attr,
1360     type => 'charset:private:'.$charset_value,
1361     level => $self->{good_level});
1362     } else {
1363     $self->{onerror}->(node => $charset_attr,
1364     type => 'charset:not registered:'.
1365     $charset_value,
1366     level => $self->{good_level});
1367     }
1368     }
1369     } elsif ($charset_value =~ /^x-/) {
1370     $self->{onerror}->(node => $charset_attr,
1371     type => 'charset:private:'.$charset_value,
1372     level => $self->{good_level});
1373     } else {
1374     $self->{onerror}->(node => $charset_attr,
1375     type => 'charset:not registered:'.$charset_value,
1376     level => $self->{good_level});
1377     }
1378    
1379 wakaba 1.22 if ($charset_attr->get_user_data ('manakai_has_reference')) {
1380     $self->{onerror}->(node => $charset_attr,
1381     type => 'character reference in charset',
1382     level => $self->{must_level});
1383     }
1384 wakaba 1.1 }
1385     },
1386     checker => $HTMLEmptyChecker,
1387     };
1388    
1389     $Element->{$HTML_NS}->{style} = {
1390     attrs_checker => $GetHTMLAttrsChecker->({
1391     type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1392     media => $HTMLMQAttrChecker,
1393     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1394     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1395     ## not different
1396     }),
1397     checker => sub {
1398     ## NOTE: |html:style| has no conformance creteria on content model
1399     my ($self, $todo) = @_;
1400     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
1401     $type = 'text/css' unless defined $type;
1402     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
1403     type => 'style:'.$type); ## TODO: $type normalization
1404     return $AnyChecker->($self, $todo);
1405     },
1406     };
1407    
1408     $Element->{$HTML_NS}->{body} = {
1409     attrs_checker => $GetHTMLAttrsChecker->({}),
1410     checker => $HTMLBlockChecker,
1411     };
1412    
1413     $Element->{$HTML_NS}->{section} = {
1414     attrs_checker => $GetHTMLAttrsChecker->({}),
1415     checker => $HTMLStylableBlockChecker,
1416     };
1417    
1418     $Element->{$HTML_NS}->{nav} = {
1419     attrs_checker => $GetHTMLAttrsChecker->({}),
1420     checker => $HTMLBlockOrInlineChecker,
1421     };
1422    
1423     $Element->{$HTML_NS}->{article} = {
1424     attrs_checker => $GetHTMLAttrsChecker->({}),
1425     checker => $HTMLStylableBlockChecker,
1426     };
1427    
1428     $Element->{$HTML_NS}->{blockquote} = {
1429     attrs_checker => $GetHTMLAttrsChecker->({
1430     cite => $HTMLURIAttrChecker,
1431     }),
1432     checker => $HTMLBlockChecker,
1433     };
1434    
1435     $Element->{$HTML_NS}->{aside} = {
1436     attrs_checker => $GetHTMLAttrsChecker->({}),
1437     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
1438     };
1439    
1440     $Element->{$HTML_NS}->{h1} = {
1441     attrs_checker => $GetHTMLAttrsChecker->({}),
1442     checker => sub {
1443     my ($self, $todo) = @_;
1444     $todo->{flag}->{has_heading}->[0] = 1;
1445 wakaba 1.13 return $HTMLStrictlyInlineChecker->($self, $todo);
1446 wakaba 1.1 },
1447     };
1448    
1449     $Element->{$HTML_NS}->{h2} = {
1450     attrs_checker => $GetHTMLAttrsChecker->({}),
1451     checker => $Element->{$HTML_NS}->{h1}->{checker},
1452     };
1453    
1454     $Element->{$HTML_NS}->{h3} = {
1455     attrs_checker => $GetHTMLAttrsChecker->({}),
1456     checker => $Element->{$HTML_NS}->{h1}->{checker},
1457     };
1458    
1459     $Element->{$HTML_NS}->{h4} = {
1460     attrs_checker => $GetHTMLAttrsChecker->({}),
1461     checker => $Element->{$HTML_NS}->{h1}->{checker},
1462     };
1463    
1464     $Element->{$HTML_NS}->{h5} = {
1465     attrs_checker => $GetHTMLAttrsChecker->({}),
1466     checker => $Element->{$HTML_NS}->{h1}->{checker},
1467     };
1468    
1469     $Element->{$HTML_NS}->{h6} = {
1470     attrs_checker => $GetHTMLAttrsChecker->({}),
1471     checker => $Element->{$HTML_NS}->{h1}->{checker},
1472     };
1473    
1474     $Element->{$HTML_NS}->{header} = {
1475     attrs_checker => $GetHTMLAttrsChecker->({}),
1476     checker => sub {
1477     my ($self, $todo) = @_;
1478     my $old_flag = $todo->{flag}->{has_heading} || [];
1479     my $new_flag = [];
1480     local $todo->{flag}->{has_heading} = $new_flag;
1481     my $node = $todo->{node};
1482    
1483     my $end = $self->_add_minuses
1484     ({$HTML_NS => {qw/header 1 footer 1/}},
1485     $HTMLSectioningElements);
1486     my ($new_todos, $ch) = $HTMLBlockChecker->($self, $todo);
1487     push @$new_todos, $end,
1488     {type => 'code', code => sub {
1489     if ($new_flag->[0]) {
1490     $old_flag->[0] = 1;
1491     } else {
1492     $self->{onerror}->(node => $node, type => 'element missing:hn');
1493     }
1494     }};
1495     return ($new_todos, $ch);
1496     },
1497     };
1498    
1499     $Element->{$HTML_NS}->{footer} = {
1500     attrs_checker => $GetHTMLAttrsChecker->({}),
1501     checker => sub { ## block -hn -header -footer -sectioning or inline
1502     my ($self, $todo) = @_;
1503     my $el = $todo->{node};
1504     my $new_todos = [];
1505     my @nodes = (@{$el->child_nodes});
1506    
1507     my $content = 'block-or-inline'; # or 'block' or 'inline'
1508     my @block_not_inline;
1509     while (@nodes) {
1510     my $node = shift @nodes;
1511     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1512    
1513     my $nt = $node->node_type;
1514     if ($nt == 1) {
1515     my $node_ns = $node->namespace_uri;
1516     $node_ns = '' unless defined $node_ns;
1517     my $node_ln = $node->manakai_local_name;
1518     my $not_allowed;
1519     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
1520     $not_allowed = 1;
1521     } elsif ($node_ns eq $HTML_NS and
1522     {
1523     qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1/
1524     }->{$node_ln}) {
1525     $not_allowed = 1;
1526     } elsif ($HTMLSectioningElements->{$node_ns}->{$node_ln}) {
1527     $not_allowed = 1;
1528     }
1529     if ($content eq 'block') {
1530     $not_allowed = 1
1531 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
1532     $self->{pluses}->{$node_ns}->{$node_ln};
1533 wakaba 1.1 } elsif ($content eq 'inline') {
1534     $not_allowed = 1
1535     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
1536 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
1537     $self->{pluses}->{$node_ns}->{$node_ln};
1538 wakaba 1.1 } else {
1539     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1540     my $is_inline
1541     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
1542     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1543    
1544     push @block_not_inline, $node
1545     if $is_block and not $is_inline and not $not_allowed;
1546 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
1547 wakaba 1.1 $content = 'inline';
1548     for (@block_not_inline) {
1549     $self->{onerror}->(node => $_, type => 'element not allowed');
1550     }
1551     $not_allowed = 1 unless $is_inline;
1552     }
1553     }
1554     $self->{onerror}->(node => $node, type => 'element not allowed')
1555     if $not_allowed;
1556     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1557     unshift @nodes, @$sib;
1558     push @$new_todos, @$ch;
1559     } elsif ($nt == 3 or $nt == 4) {
1560     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1561     if ($content eq 'block') {
1562     $self->{onerror}->(node => $node, type => 'character not allowed');
1563     } else {
1564     $content = 'inline';
1565     for (@block_not_inline) {
1566     $self->{onerror}->(node => $_, type => 'element not allowed');
1567     }
1568     }
1569     }
1570     } elsif ($nt == 5) {
1571     unshift @nodes, @{$node->child_nodes};
1572     }
1573     }
1574    
1575     my $end = $self->_add_minuses
1576     ({$HTML_NS => {qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1/}},
1577     $HTMLSectioningElements);
1578     push @$new_todos, $end;
1579    
1580     if ($content eq 'inline') {
1581     for (@$new_todos) {
1582     $_->{inline} = 1;
1583     }
1584     }
1585    
1586     return ($new_todos);
1587     },
1588     };
1589    
1590     $Element->{$HTML_NS}->{address} = {
1591     attrs_checker => $GetHTMLAttrsChecker->({}),
1592     checker => $HTMLInlineChecker,
1593     };
1594    
1595     $Element->{$HTML_NS}->{p} = {
1596     attrs_checker => $GetHTMLAttrsChecker->({}),
1597 wakaba 1.13 checker => $HTMLInlineChecker,
1598 wakaba 1.1 };
1599    
1600     $Element->{$HTML_NS}->{hr} = {
1601     attrs_checker => $GetHTMLAttrsChecker->({}),
1602     checker => $HTMLEmptyChecker,
1603     };
1604    
1605     $Element->{$HTML_NS}->{br} = {
1606     attrs_checker => $GetHTMLAttrsChecker->({}),
1607     checker => $HTMLEmptyChecker,
1608     };
1609    
1610     $Element->{$HTML_NS}->{dialog} = {
1611     attrs_checker => $GetHTMLAttrsChecker->({}),
1612     checker => sub {
1613     my ($self, $todo) = @_;
1614     my $el = $todo->{node};
1615     my $new_todos = [];
1616     my @nodes = (@{$el->child_nodes});
1617    
1618     my $phase = 'before dt';
1619     while (@nodes) {
1620     my $node = shift @nodes;
1621     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1622    
1623     my $nt = $node->node_type;
1624     if ($nt == 1) {
1625     my $node_ns = $node->namespace_uri;
1626     $node_ns = '' unless defined $node_ns;
1627     my $node_ln = $node->manakai_local_name;
1628     ## NOTE: |minuses| list is not checked since redundant
1629 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1630     #
1631     } elsif ($phase eq 'before dt') {
1632 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1633     $phase = 'before dd';
1634     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1635     $self->{onerror}
1636     ->(node => $node, type => 'ps element missing:dt');
1637     $phase = 'before dt';
1638     } else {
1639     $self->{onerror}->(node => $node, type => 'element not allowed');
1640     }
1641     } else { # before dd
1642     if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1643     $phase = 'before dt';
1644     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1645     $self->{onerror}
1646     ->(node => $node, type => 'ps element missing:dd');
1647     $phase = 'before dd';
1648     } else {
1649     $self->{onerror}->(node => $node, type => 'element not allowed');
1650     }
1651     }
1652     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1653     unshift @nodes, @$sib;
1654     push @$new_todos, @$ch;
1655     } elsif ($nt == 3 or $nt == 4) {
1656     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1657     $self->{onerror}->(node => $node, type => 'character not allowed');
1658     }
1659     } elsif ($nt == 5) {
1660     unshift @nodes, @{$node->child_nodes};
1661     }
1662     }
1663     if ($phase eq 'before dd') {
1664 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
1665 wakaba 1.1 }
1666     return ($new_todos);
1667     },
1668     };
1669    
1670     $Element->{$HTML_NS}->{pre} = {
1671     attrs_checker => $GetHTMLAttrsChecker->({}),
1672     checker => $HTMLStrictlyInlineChecker,
1673     };
1674    
1675     $Element->{$HTML_NS}->{ol} = {
1676     attrs_checker => $GetHTMLAttrsChecker->({
1677     start => $HTMLIntegerAttrChecker,
1678     }),
1679     checker => sub {
1680     my ($self, $todo) = @_;
1681     my $el = $todo->{node};
1682     my $new_todos = [];
1683     my @nodes = (@{$el->child_nodes});
1684    
1685     while (@nodes) {
1686     my $node = shift @nodes;
1687     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1688    
1689     my $nt = $node->node_type;
1690     if ($nt == 1) {
1691     my $node_ns = $node->namespace_uri;
1692     $node_ns = '' unless defined $node_ns;
1693     my $node_ln = $node->manakai_local_name;
1694     ## NOTE: |minuses| list is not checked since redundant
1695 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1696     #
1697     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'li')) {
1698 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
1699     }
1700     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1701     unshift @nodes, @$sib;
1702     push @$new_todos, @$ch;
1703     } elsif ($nt == 3 or $nt == 4) {
1704     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1705     $self->{onerror}->(node => $node, type => 'character not allowed');
1706     }
1707     } elsif ($nt == 5) {
1708     unshift @nodes, @{$node->child_nodes};
1709     }
1710     }
1711    
1712     if ($todo->{inline}) {
1713     for (@$new_todos) {
1714     $_->{inline} = 1;
1715     }
1716     }
1717     return ($new_todos);
1718     },
1719     };
1720    
1721     $Element->{$HTML_NS}->{ul} = {
1722     attrs_checker => $GetHTMLAttrsChecker->({}),
1723     checker => $Element->{$HTML_NS}->{ol}->{checker},
1724     };
1725    
1726     $Element->{$HTML_NS}->{li} = {
1727     attrs_checker => $GetHTMLAttrsChecker->({
1728     start => sub {
1729     my ($self, $attr) = @_;
1730     my $parent = $attr->owner_element->manakai_parent_element;
1731     if (defined $parent) {
1732     my $parent_ns = $parent->namespace_uri;
1733     $parent_ns = '' unless defined $parent_ns;
1734     my $parent_ln = $parent->manakai_local_name;
1735     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1736     $self->{onerror}->(node => $attr, level => 'unsupported',
1737     type => 'attribute');
1738     }
1739     }
1740     $HTMLIntegerAttrChecker->($self, $attr);
1741     },
1742     }),
1743     checker => sub {
1744     my ($self, $todo) = @_;
1745     if ($todo->{inline}) {
1746     return $HTMLInlineChecker->($self, $todo);
1747     } else {
1748     return $HTMLBlockOrInlineChecker->($self, $todo);
1749     }
1750     },
1751     };
1752    
1753     $Element->{$HTML_NS}->{dl} = {
1754     attrs_checker => $GetHTMLAttrsChecker->({}),
1755     checker => sub {
1756     my ($self, $todo) = @_;
1757     my $el = $todo->{node};
1758     my $new_todos = [];
1759     my @nodes = (@{$el->child_nodes});
1760    
1761     my $phase = 'before dt';
1762     while (@nodes) {
1763     my $node = shift @nodes;
1764     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1765    
1766     my $nt = $node->node_type;
1767     if ($nt == 1) {
1768     my $node_ns = $node->namespace_uri;
1769     $node_ns = '' unless defined $node_ns;
1770     my $node_ln = $node->manakai_local_name;
1771     ## NOTE: |minuses| list is not checked since redundant
1772 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1773     #
1774     } elsif ($phase eq 'in dds') {
1775 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1776     #$phase = 'in dds';
1777     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1778     $phase = 'in dts';
1779     } else {
1780     $self->{onerror}->(node => $node, type => 'element not allowed');
1781     }
1782     } elsif ($phase eq 'in dts') {
1783     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1784     #$phase = 'in dts';
1785     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1786     $phase = 'in dds';
1787     } else {
1788     $self->{onerror}->(node => $node, type => 'element not allowed');
1789     }
1790     } else { # before dt
1791     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1792     $phase = 'in dts';
1793     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1794     $self->{onerror}
1795     ->(node => $node, type => 'ps element missing:dt');
1796     $phase = 'in dds';
1797     } else {
1798     $self->{onerror}->(node => $node, type => 'element not allowed');
1799     }
1800     }
1801     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1802     unshift @nodes, @$sib;
1803     push @$new_todos, @$ch;
1804     } elsif ($nt == 3 or $nt == 4) {
1805     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1806     $self->{onerror}->(node => $node, type => 'character not allowed');
1807     }
1808     } elsif ($nt == 5) {
1809     unshift @nodes, @{$node->child_nodes};
1810     }
1811     }
1812     if ($phase eq 'in dts') {
1813 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
1814 wakaba 1.1 }
1815    
1816     if ($todo->{inline}) {
1817     for (@$new_todos) {
1818     $_->{inline} = 1;
1819     }
1820     }
1821     return ($new_todos);
1822     },
1823     };
1824    
1825     $Element->{$HTML_NS}->{dt} = {
1826     attrs_checker => $GetHTMLAttrsChecker->({}),
1827     checker => $HTMLStrictlyInlineChecker,
1828     };
1829    
1830     $Element->{$HTML_NS}->{dd} = {
1831     attrs_checker => $GetHTMLAttrsChecker->({}),
1832     checker => $Element->{$HTML_NS}->{li}->{checker},
1833     };
1834    
1835     $Element->{$HTML_NS}->{a} = {
1836     attrs_checker => sub {
1837     my ($self, $todo) = @_;
1838     my %attr;
1839     for my $attr (@{$todo->{node}->attributes}) {
1840     my $attr_ns = $attr->namespace_uri;
1841     $attr_ns = '' unless defined $attr_ns;
1842     my $attr_ln = $attr->manakai_local_name;
1843     my $checker;
1844     if ($attr_ns eq '') {
1845     $checker = {
1846     target => $HTMLTargetAttrChecker,
1847     href => $HTMLURIAttrChecker,
1848     ping => $HTMLSpaceURIsAttrChecker,
1849 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
1850 wakaba 1.1 media => $HTMLMQAttrChecker,
1851     hreflang => $HTMLLanguageTagAttrChecker,
1852     type => $HTMLIMTAttrChecker,
1853     }->{$attr_ln};
1854     if ($checker) {
1855     $attr{$attr_ln} = $attr;
1856     } else {
1857     $checker = $HTMLAttrChecker->{$attr_ln};
1858     }
1859     }
1860     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1861     || $AttrChecker->{$attr_ns}->{''};
1862     if ($checker) {
1863     $checker->($self, $attr) if ref $checker;
1864     } else {
1865     $self->{onerror}->(node => $attr, level => 'unsupported',
1866     type => 'attribute');
1867     ## ISSUE: No comformance createria for unknown attributes in the spec
1868     }
1869     }
1870    
1871 wakaba 1.4 if (defined $attr{href}) {
1872     $self->{has_hyperlink_element} = 1;
1873     } else {
1874 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
1875     if (defined $attr{$_}) {
1876     $self->{onerror}->(node => $attr{$_},
1877     type => 'attribute not allowed');
1878     }
1879     }
1880     }
1881     },
1882     checker => sub {
1883     my ($self, $todo) = @_;
1884    
1885     my $end = $self->_add_minuses ($HTMLInteractiveElements);
1886     my ($new_todos, $ch)
1887 wakaba 1.13 = $HTMLInlineOrStrictlyInlineChecker->($self, $todo);
1888 wakaba 1.1 push @$new_todos, $end;
1889    
1890 wakaba 1.15 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
1891     $_->{flag}->{in_a_href} = 1 for @$new_todos;
1892     }
1893 wakaba 1.1
1894     return ($new_todos, $ch);
1895     },
1896     };
1897    
1898     $Element->{$HTML_NS}->{q} = {
1899     attrs_checker => $GetHTMLAttrsChecker->({
1900     cite => $HTMLURIAttrChecker,
1901     }),
1902     checker => $HTMLInlineOrStrictlyInlineChecker,
1903     };
1904    
1905     $Element->{$HTML_NS}->{cite} = {
1906     attrs_checker => $GetHTMLAttrsChecker->({}),
1907     checker => $HTMLStrictlyInlineChecker,
1908     };
1909    
1910     $Element->{$HTML_NS}->{em} = {
1911     attrs_checker => $GetHTMLAttrsChecker->({}),
1912     checker => $HTMLInlineOrStrictlyInlineChecker,
1913     };
1914    
1915     $Element->{$HTML_NS}->{strong} = {
1916     attrs_checker => $GetHTMLAttrsChecker->({}),
1917     checker => $HTMLInlineOrStrictlyInlineChecker,
1918     };
1919    
1920     $Element->{$HTML_NS}->{small} = {
1921     attrs_checker => $GetHTMLAttrsChecker->({}),
1922     checker => $HTMLInlineOrStrictlyInlineChecker,
1923     };
1924    
1925     $Element->{$HTML_NS}->{m} = {
1926     attrs_checker => $GetHTMLAttrsChecker->({}),
1927     checker => $HTMLInlineOrStrictlyInlineChecker,
1928     };
1929    
1930     $Element->{$HTML_NS}->{dfn} = {
1931     attrs_checker => $GetHTMLAttrsChecker->({}),
1932     checker => sub {
1933     my ($self, $todo) = @_;
1934    
1935     my $end = $self->_add_minuses ({$HTML_NS => {dfn => 1}});
1936     my ($sib, $ch) = $HTMLStrictlyInlineChecker->($self, $todo);
1937     push @$sib, $end;
1938    
1939     my $node = $todo->{node};
1940     my $term = $node->get_attribute_ns (undef, 'title');
1941     unless (defined $term) {
1942     for my $child (@{$node->child_nodes}) {
1943     if ($child->node_type == 1) { # ELEMENT_NODE
1944     if (defined $term) {
1945     undef $term;
1946     last;
1947     } elsif ($child->manakai_local_name eq 'abbr') {
1948     my $nsuri = $child->namespace_uri;
1949     if (defined $nsuri and $nsuri eq $HTML_NS) {
1950     my $attr = $child->get_attribute_node_ns (undef, 'title');
1951     if ($attr) {
1952     $term = $attr->value;
1953     }
1954     }
1955     }
1956     } elsif ($child->node_type == 3 or $child->node_type == 4) {
1957     ## TEXT_NODE or CDATA_SECTION_NODE
1958     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
1959     next;
1960     }
1961     undef $term;
1962     last;
1963     }
1964     }
1965     unless (defined $term) {
1966     $term = $node->text_content;
1967     }
1968     }
1969     if ($self->{term}->{$term}) {
1970     $self->{onerror}->(node => $node, type => 'duplicate term');
1971     push @{$self->{term}->{$term}}, $node;
1972     } else {
1973     $self->{term}->{$term} = [$node];
1974     }
1975     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
1976     ## has |title|.
1977    
1978     return ($sib, $ch);
1979     },
1980     };
1981    
1982     $Element->{$HTML_NS}->{abbr} = {
1983     attrs_checker => $GetHTMLAttrsChecker->({
1984     ## NOTE: |title| has special semantics for |abbr|s, but is syntactically
1985     ## not different. The spec says that the |title| MAY be omitted
1986     ## if there is a |dfn| whose defining term is the abbreviation,
1987     ## but it does not prohibit |abbr| w/o |title| in other cases.
1988     }),
1989     checker => $HTMLStrictlyInlineChecker,
1990     };
1991    
1992     $Element->{$HTML_NS}->{time} = {
1993     attrs_checker => $GetHTMLAttrsChecker->({
1994     datetime => sub { 1 }, # checked in |checker|
1995     }),
1996     ## TODO: Write tests
1997     checker => sub {
1998     my ($self, $todo) = @_;
1999    
2000     my $attr = $todo->{node}->get_attribute_node_ns (undef, 'datetime');
2001     my $input;
2002     my $reg_sp;
2003     my $input_node;
2004     if ($attr) {
2005     $input = $attr->value;
2006     $reg_sp = qr/[\x09-\x0D\x20]*/;
2007     $input_node = $attr;
2008     } else {
2009     $input = $todo->{node}->text_content;
2010     $reg_sp = qr/\p{Zs}*/;
2011     $input_node = $todo->{node};
2012    
2013     ## ISSUE: What is the definition for "successfully extracts a date
2014     ## or time"? If the algorithm says the string is invalid but
2015     ## return some date or time, is it "successfully"?
2016     }
2017    
2018     my $hour;
2019     my $minute;
2020     my $second;
2021     if ($input =~ /
2022     \A
2023     [\x09-\x0D\x20]*
2024     ([0-9]+) # 1
2025     (?>
2026     -([0-9]+) # 2
2027     -([0-9]+) # 3
2028     [\x09-\x0D\x20]*
2029     (?>
2030     T
2031     [\x09-\x0D\x20]*
2032     )?
2033     ([0-9]+) # 4
2034     :([0-9]+) # 5
2035     (?>
2036     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
2037     )?
2038     [\x09-\x0D\x20]*
2039     (?>
2040     Z
2041     [\x09-\x0D\x20]*
2042     |
2043     [+-]([0-9]+):([0-9]+) # 7, 8
2044     [\x09-\x0D\x20]*
2045     )?
2046     \z
2047     |
2048     :([0-9]+) # 9
2049     (?>
2050     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
2051     )?
2052     [\x09-\x0D\x20]*\z
2053     )
2054     /x) {
2055     if (defined $2) { ## YYYY-MM-DD T? hh:mm
2056     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
2057     length $4 != 2 or length $5 != 2) {
2058     $self->{onerror}->(node => $input_node,
2059     type => 'dateortime:syntax error');
2060     }
2061    
2062     if (1 <= $2 and $2 <= 12) {
2063     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
2064     if $3 < 1 or
2065     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
2066     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
2067     if $2 == 2 and $3 == 29 and
2068     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
2069     } else {
2070     $self->{onerror}->(node => $input_node,
2071     type => 'datetime:bad month');
2072     }
2073    
2074     ($hour, $minute, $second) = ($4, $5, $6);
2075    
2076     if (defined $7) { ## [+-]hh:mm
2077     if (length $7 != 2 or length $8 != 2) {
2078     $self->{onerror}->(node => $input_node,
2079     type => 'dateortime:syntax error');
2080     }
2081    
2082     $self->{onerror}->(node => $input_node,
2083     type => 'datetime:bad timezone hour')
2084     if $7 > 23;
2085     $self->{onerror}->(node => $input_node,
2086     type => 'datetime:bad timezone minute')
2087     if $8 > 59;
2088     }
2089     } else { ## hh:mm
2090     if (length $1 != 2 or length $9 != 2) {
2091     $self->{onerror}->(node => $input_node,
2092     type => qq'dateortime:syntax error');
2093     }
2094    
2095     ($hour, $minute, $second) = ($1, $9, $10);
2096     }
2097    
2098     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
2099     if $hour > 23;
2100     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
2101     if $minute > 59;
2102    
2103     if (defined $second) { ## s
2104     ## NOTE: Integer part of second don't have to have length of two.
2105    
2106     if (substr ($second, 0, 1) eq '.') {
2107     $self->{onerror}->(node => $input_node,
2108     type => 'dateortime:syntax error');
2109     }
2110    
2111     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
2112     if $second >= 60;
2113     }
2114     } else {
2115     $self->{onerror}->(node => $input_node,
2116     type => 'dateortime:syntax error');
2117     }
2118    
2119     return $HTMLStrictlyInlineChecker->($self, $todo);
2120     },
2121     };
2122    
2123     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
2124     attrs_checker => $GetHTMLAttrsChecker->({
2125     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2126     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2127     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2128     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2129     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2130     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2131     }),
2132     checker => $HTMLStrictlyInlineChecker,
2133     };
2134    
2135     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
2136     attrs_checker => $GetHTMLAttrsChecker->({
2137     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
2138     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
2139     }),
2140     checker => $HTMLStrictlyInlineChecker,
2141     };
2142    
2143     $Element->{$HTML_NS}->{code} = {
2144     attrs_checker => $GetHTMLAttrsChecker->({}),
2145     ## NOTE: Though |title| has special semantics,
2146     ## syntatically same as the |title| as global attribute.
2147     checker => $HTMLInlineOrStrictlyInlineChecker,
2148     };
2149    
2150     $Element->{$HTML_NS}->{var} = {
2151     attrs_checker => $GetHTMLAttrsChecker->({}),
2152     ## NOTE: Though |title| has special semantics,
2153     ## syntatically same as the |title| as global attribute.
2154     checker => $HTMLStrictlyInlineChecker,
2155     };
2156    
2157     $Element->{$HTML_NS}->{samp} = {
2158     attrs_checker => $GetHTMLAttrsChecker->({}),
2159     ## NOTE: Though |title| has special semantics,
2160     ## syntatically same as the |title| as global attribute.
2161     checker => $HTMLInlineOrStrictlyInlineChecker,
2162     };
2163    
2164     $Element->{$HTML_NS}->{kbd} = {
2165     attrs_checker => $GetHTMLAttrsChecker->({}),
2166     checker => $HTMLStrictlyInlineChecker,
2167     };
2168    
2169     $Element->{$HTML_NS}->{sub} = {
2170     attrs_checker => $GetHTMLAttrsChecker->({}),
2171     checker => $HTMLStrictlyInlineChecker,
2172     };
2173    
2174     $Element->{$HTML_NS}->{sup} = {
2175     attrs_checker => $GetHTMLAttrsChecker->({}),
2176     checker => $HTMLStrictlyInlineChecker,
2177     };
2178    
2179     $Element->{$HTML_NS}->{span} = {
2180     attrs_checker => $GetHTMLAttrsChecker->({}),
2181     ## NOTE: Though |title| has special semantics,
2182     ## syntatically same as the |title| as global attribute.
2183     checker => $HTMLInlineOrStrictlyInlineChecker,
2184     };
2185    
2186     $Element->{$HTML_NS}->{i} = {
2187     attrs_checker => $GetHTMLAttrsChecker->({}),
2188     ## NOTE: Though |title| has special semantics,
2189     ## syntatically same as the |title| as global attribute.
2190     checker => $HTMLStrictlyInlineChecker,
2191     };
2192    
2193     $Element->{$HTML_NS}->{b} = {
2194     attrs_checker => $GetHTMLAttrsChecker->({}),
2195     checker => $HTMLStrictlyInlineChecker,
2196     };
2197    
2198     $Element->{$HTML_NS}->{bdo} = {
2199     attrs_checker => sub {
2200     my ($self, $todo) = @_;
2201     $GetHTMLAttrsChecker->({})->($self, $todo);
2202     unless ($todo->{node}->has_attribute_ns (undef, 'dir')) {
2203     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:dir');
2204     }
2205     },
2206     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
2207     checker => $HTMLStrictlyInlineChecker,
2208     };
2209    
2210     $Element->{$HTML_NS}->{ins} = {
2211     attrs_checker => $GetHTMLAttrsChecker->({
2212     cite => $HTMLURIAttrChecker,
2213     datetime => $HTMLDatetimeAttrChecker,
2214     }),
2215     checker => $HTMLTransparentChecker,
2216     };
2217    
2218     $Element->{$HTML_NS}->{del} = {
2219     attrs_checker => $GetHTMLAttrsChecker->({
2220     cite => $HTMLURIAttrChecker,
2221     datetime => $HTMLDatetimeAttrChecker,
2222     }),
2223     checker => sub {
2224     my ($self, $todo) = @_;
2225    
2226     my $parent = $todo->{node}->manakai_parent_element;
2227     if (defined $parent) {
2228     my $nsuri = $parent->namespace_uri;
2229     $nsuri = '' unless defined $nsuri;
2230     my $ln = $parent->manakai_local_name;
2231     my $eldef = $Element->{$nsuri}->{$ln} ||
2232     $Element->{$nsuri}->{''} ||
2233     $ElementDefault;
2234     return $eldef->{checker}->($self, $todo);
2235     } else {
2236     return $HTMLBlockOrInlineChecker->($self, $todo);
2237     }
2238     },
2239     };
2240    
2241     ## TODO: figure
2242 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
2243 wakaba 1.1
2244 wakaba 1.4 ## TODO: |alt|
2245 wakaba 1.1 $Element->{$HTML_NS}->{img} = {
2246     attrs_checker => sub {
2247     my ($self, $todo) = @_;
2248     $GetHTMLAttrsChecker->({
2249     alt => sub { }, ## NOTE: No syntactical requirement
2250     src => $HTMLURIAttrChecker,
2251     usemap => $HTMLUsemapAttrChecker,
2252     ismap => sub {
2253     my ($self, $attr, $parent_todo) = @_;
2254 wakaba 1.15 if (not $todo->{flag}->{in_a_href}) {
2255     $self->{onerror}->(node => $attr,
2256     type => 'attribute not allowed:ismap');
2257 wakaba 1.1 }
2258     $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_todo);
2259     },
2260     ## TODO: height
2261     ## TODO: width
2262     })->($self, $todo);
2263     unless ($todo->{node}->has_attribute_ns (undef, 'alt')) {
2264     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:alt');
2265     }
2266     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2267     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:src');
2268     }
2269     },
2270     checker => $HTMLEmptyChecker,
2271     };
2272    
2273     $Element->{$HTML_NS}->{iframe} = {
2274     attrs_checker => $GetHTMLAttrsChecker->({
2275     src => $HTMLURIAttrChecker,
2276     }),
2277     checker => $HTMLTextChecker,
2278     };
2279    
2280     $Element->{$HTML_NS}->{embed} = {
2281     attrs_checker => sub {
2282     my ($self, $todo) = @_;
2283     my $has_src;
2284     for my $attr (@{$todo->{node}->attributes}) {
2285     my $attr_ns = $attr->namespace_uri;
2286     $attr_ns = '' unless defined $attr_ns;
2287     my $attr_ln = $attr->manakai_local_name;
2288     my $checker;
2289     if ($attr_ns eq '') {
2290     if ($attr_ln eq 'src') {
2291     $checker = $HTMLURIAttrChecker;
2292     $has_src = 1;
2293     } elsif ($attr_ln eq 'type') {
2294     $checker = $HTMLIMTAttrChecker;
2295     } else {
2296     ## TODO: height
2297     ## TODO: width
2298     $checker = $HTMLAttrChecker->{$attr_ln}
2299     || sub { }; ## NOTE: Any local attribute is ok.
2300     }
2301     }
2302     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2303     || $AttrChecker->{$attr_ns}->{''};
2304     if ($checker) {
2305     $checker->($self, $attr);
2306     } else {
2307     $self->{onerror}->(node => $attr, level => 'unsupported',
2308     type => 'attribute');
2309     ## ISSUE: No comformance createria for global attributes in the spec
2310     }
2311     }
2312    
2313     unless ($has_src) {
2314     $self->{onerror}->(node => $todo->{node},
2315     type => 'attribute missing:src');
2316     }
2317     },
2318     checker => $HTMLEmptyChecker,
2319     };
2320    
2321     $Element->{$HTML_NS}->{object} = {
2322     attrs_checker => sub {
2323     my ($self, $todo) = @_;
2324     $GetHTMLAttrsChecker->({
2325     data => $HTMLURIAttrChecker,
2326     type => $HTMLIMTAttrChecker,
2327     usemap => $HTMLUsemapAttrChecker,
2328     ## TODO: width
2329     ## TODO: height
2330     })->($self, $todo);
2331     unless ($todo->{node}->has_attribute_ns (undef, 'data')) {
2332     unless ($todo->{node}->has_attribute_ns (undef, 'type')) {
2333     $self->{onerror}->(node => $todo->{node},
2334     type => 'attribute missing:data|type');
2335     }
2336     }
2337     },
2338     checker => $ElementDefault->{checker}, ## TODO
2339 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
2340 wakaba 1.1 };
2341    
2342     $Element->{$HTML_NS}->{param} = {
2343     attrs_checker => sub {
2344     my ($self, $todo) = @_;
2345     $GetHTMLAttrsChecker->({
2346     name => sub { },
2347     value => sub { },
2348     })->($self, $todo);
2349     unless ($todo->{node}->has_attribute_ns (undef, 'name')) {
2350     $self->{onerror}->(node => $todo->{node},
2351     type => 'attribute missing:name');
2352     }
2353     unless ($todo->{node}->has_attribute_ns (undef, 'value')) {
2354     $self->{onerror}->(node => $todo->{node},
2355     type => 'attribute missing:value');
2356     }
2357     },
2358     checker => $HTMLEmptyChecker,
2359     };
2360    
2361     $Element->{$HTML_NS}->{video} = {
2362     attrs_checker => $GetHTMLAttrsChecker->({
2363     src => $HTMLURIAttrChecker,
2364     ## TODO: start, loopstart, loopend, end
2365     ## ISSUE: they MUST be "value time offset"s. Value?
2366 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
2367 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2368     controls => $GetHTMLBooleanAttrChecker->('controls'),
2369 wakaba 1.11 poster => $HTMLURIAttrChecker, ## TODO: not for audio!
2370     ## TODO: width, height (not for audio!)
2371 wakaba 1.1 }),
2372     checker => sub {
2373     my ($self, $todo) = @_;
2374    
2375     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2376     return $HTMLBlockOrInlineChecker->($self, $todo);
2377     } else {
2378     return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
2379     ->($self, $todo);
2380     }
2381     },
2382     };
2383    
2384     $Element->{$HTML_NS}->{audio} = {
2385     attrs_checker => $Element->{$HTML_NS}->{video}->{attrs_checker},
2386     checker => $Element->{$HTML_NS}->{video}->{checker},
2387     };
2388    
2389     $Element->{$HTML_NS}->{source} = {
2390     attrs_checker => sub {
2391     my ($self, $todo) = @_;
2392     $GetHTMLAttrsChecker->({
2393     src => $HTMLURIAttrChecker,
2394     type => $HTMLIMTAttrChecker,
2395     media => $HTMLMQAttrChecker,
2396     })->($self, $todo);
2397     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2398     $self->{onerror}->(node => $todo->{node},
2399     type => 'attribute missing:src');
2400     }
2401     },
2402     checker => $HTMLEmptyChecker,
2403     };
2404    
2405     $Element->{$HTML_NS}->{canvas} = {
2406     attrs_checker => $GetHTMLAttrsChecker->({
2407     height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2408     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2409     }),
2410     checker => $HTMLInlineChecker,
2411     };
2412    
2413     $Element->{$HTML_NS}->{map} = {
2414 wakaba 1.4 attrs_checker => sub {
2415     my ($self, $todo) = @_;
2416     my $has_id;
2417     $GetHTMLAttrsChecker->({
2418     id => sub {
2419     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2420     my ($self, $attr) = @_;
2421     my $value = $attr->value;
2422     if (length $value > 0) {
2423     if ($self->{id}->{$value}) {
2424     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2425     push @{$self->{id}->{$value}}, $attr;
2426     } else {
2427     $self->{id}->{$value} = [$attr];
2428     }
2429 wakaba 1.1 } else {
2430 wakaba 1.4 ## NOTE: MUST contain at least one character
2431     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2432 wakaba 1.1 }
2433 wakaba 1.4 if ($value =~ /[\x09-\x0D\x20]/) {
2434     $self->{onerror}->(node => $attr, type => 'space in ID');
2435     }
2436     $self->{map}->{$value} ||= $attr;
2437     $has_id = 1;
2438     },
2439     })->($self, $todo);
2440     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:id')
2441     unless $has_id;
2442     },
2443 wakaba 1.1 checker => $HTMLBlockChecker,
2444     };
2445    
2446     $Element->{$HTML_NS}->{area} = {
2447     attrs_checker => sub {
2448     my ($self, $todo) = @_;
2449     my %attr;
2450     my $coords;
2451     for my $attr (@{$todo->{node}->attributes}) {
2452     my $attr_ns = $attr->namespace_uri;
2453     $attr_ns = '' unless defined $attr_ns;
2454     my $attr_ln = $attr->manakai_local_name;
2455     my $checker;
2456     if ($attr_ns eq '') {
2457     $checker = {
2458     alt => sub { },
2459     ## NOTE: |alt| value has no conformance creteria.
2460     shape => $GetHTMLEnumeratedAttrChecker->({
2461     circ => -1, circle => 1,
2462     default => 1,
2463     poly => 1, polygon => -1,
2464     rect => 1, rectangle => -1,
2465     }),
2466     coords => sub {
2467     my ($self, $attr) = @_;
2468     my $value = $attr->value;
2469     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
2470     $coords = [split /,/, $value];
2471     } else {
2472     $self->{onerror}->(node => $attr,
2473     type => 'coords:syntax error');
2474     }
2475     },
2476     target => $HTMLTargetAttrChecker,
2477     href => $HTMLURIAttrChecker,
2478     ping => $HTMLSpaceURIsAttrChecker,
2479 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
2480 wakaba 1.1 media => $HTMLMQAttrChecker,
2481     hreflang => $HTMLLanguageTagAttrChecker,
2482     type => $HTMLIMTAttrChecker,
2483     }->{$attr_ln};
2484     if ($checker) {
2485     $attr{$attr_ln} = $attr;
2486     } else {
2487     $checker = $HTMLAttrChecker->{$attr_ln};
2488     }
2489     }
2490     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2491     || $AttrChecker->{$attr_ns}->{''};
2492     if ($checker) {
2493     $checker->($self, $attr) if ref $checker;
2494     } else {
2495     $self->{onerror}->(node => $attr, level => 'unsupported',
2496     type => 'attribute');
2497     ## ISSUE: No comformance createria for unknown attributes in the spec
2498     }
2499     }
2500    
2501     if (defined $attr{href}) {
2502 wakaba 1.4 $self->{has_hyperlink_element} = 1;
2503 wakaba 1.1 unless (defined $attr{alt}) {
2504     $self->{onerror}->(node => $todo->{node},
2505     type => 'attribute missing:alt');
2506     }
2507     } else {
2508     for (qw/target ping rel media hreflang type alt/) {
2509     if (defined $attr{$_}) {
2510     $self->{onerror}->(node => $attr{$_},
2511     type => 'attribute not allowed');
2512     }
2513     }
2514     }
2515    
2516     my $shape = 'rectangle';
2517     if (defined $attr{shape}) {
2518     $shape = {
2519     circ => 'circle', circle => 'circle',
2520     default => 'default',
2521     poly => 'polygon', polygon => 'polygon',
2522     rect => 'rectangle', rectangle => 'rectangle',
2523     }->{lc $attr{shape}->value} || 'rectangle';
2524     ## TODO: ASCII lowercase?
2525     }
2526    
2527     if ($shape eq 'circle') {
2528     if (defined $attr{coords}) {
2529     if (defined $coords) {
2530     if (@$coords == 3) {
2531     if ($coords->[2] < 0) {
2532     $self->{onerror}->(node => $attr{coords},
2533     type => 'coords:out of range:2');
2534     }
2535     } else {
2536     $self->{onerror}->(node => $attr{coords},
2537     type => 'coords:number:3:'.@$coords);
2538     }
2539     } else {
2540     ## NOTE: A syntax error has been reported.
2541     }
2542     } else {
2543     $self->{onerror}->(node => $todo->{node},
2544     type => 'attribute missing:coords');
2545     }
2546     } elsif ($shape eq 'default') {
2547     if (defined $attr{coords}) {
2548     $self->{onerror}->(node => $attr{coords},
2549     type => 'attribute not allowed');
2550     }
2551     } elsif ($shape eq 'polygon') {
2552     if (defined $attr{coords}) {
2553     if (defined $coords) {
2554     if (@$coords >= 6) {
2555     unless (@$coords % 2 == 0) {
2556     $self->{onerror}->(node => $attr{coords},
2557     type => 'coords:number:even:'.@$coords);
2558     }
2559     } else {
2560     $self->{onerror}->(node => $attr{coords},
2561     type => 'coords:number:>=6:'.@$coords);
2562     }
2563     } else {
2564     ## NOTE: A syntax error has been reported.
2565     }
2566     } else {
2567     $self->{onerror}->(node => $todo->{node},
2568     type => 'attribute missing:coords');
2569     }
2570     } elsif ($shape eq 'rectangle') {
2571     if (defined $attr{coords}) {
2572     if (defined $coords) {
2573     if (@$coords == 4) {
2574     unless ($coords->[0] < $coords->[2]) {
2575     $self->{onerror}->(node => $attr{coords},
2576     type => 'coords:out of range:0');
2577     }
2578     unless ($coords->[1] < $coords->[3]) {
2579     $self->{onerror}->(node => $attr{coords},
2580     type => 'coords:out of range:1');
2581     }
2582     } else {
2583     $self->{onerror}->(node => $attr{coords},
2584     type => 'coords:number:4:'.@$coords);
2585     }
2586     } else {
2587     ## NOTE: A syntax error has been reported.
2588     }
2589     } else {
2590     $self->{onerror}->(node => $todo->{node},
2591     type => 'attribute missing:coords');
2592     }
2593     }
2594     },
2595     checker => $HTMLEmptyChecker,
2596     };
2597     ## TODO: only in map
2598    
2599     $Element->{$HTML_NS}->{table} = {
2600     attrs_checker => $GetHTMLAttrsChecker->({}),
2601     checker => sub {
2602     my ($self, $todo) = @_;
2603     my $el = $todo->{node};
2604     my $new_todos = [];
2605     my @nodes = (@{$el->child_nodes});
2606    
2607     my $phase = 'before caption';
2608     my $has_tfoot;
2609     while (@nodes) {
2610     my $node = shift @nodes;
2611     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2612    
2613     my $nt = $node->node_type;
2614     if ($nt == 1) {
2615     my $node_ns = $node->namespace_uri;
2616     $node_ns = '' unless defined $node_ns;
2617     my $node_ln = $node->manakai_local_name;
2618     ## NOTE: |minuses| list is not checked since redundant
2619 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2620     #
2621     } elsif ($phase eq 'in tbodys') {
2622 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2623     #$phase = 'in tbodys';
2624     } elsif (not $has_tfoot and
2625     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2626     $phase = 'after tfoot';
2627     $has_tfoot = 1;
2628     } else {
2629     $self->{onerror}->(node => $node, type => 'element not allowed');
2630     }
2631     } elsif ($phase eq 'in trs') {
2632     if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2633     #$phase = 'in trs';
2634     } elsif (not $has_tfoot and
2635     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2636     $phase = 'after tfoot';
2637     $has_tfoot = 1;
2638     } else {
2639     $self->{onerror}->(node => $node, type => 'element not allowed');
2640     }
2641     } elsif ($phase eq 'after thead') {
2642     if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2643     $phase = 'in tbodys';
2644     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2645     $phase = 'in trs';
2646     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2647     $phase = 'in tbodys';
2648     $has_tfoot = 1;
2649     } else {
2650     $self->{onerror}->(node => $node, type => 'element not allowed');
2651     }
2652     } elsif ($phase eq 'in colgroup') {
2653     if ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2654     $phase = 'in colgroup';
2655     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2656     $phase = 'after thead';
2657     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2658     $phase = 'in tbodys';
2659     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2660     $phase = 'in trs';
2661     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2662     $phase = 'in tbodys';
2663     $has_tfoot = 1;
2664     } else {
2665     $self->{onerror}->(node => $node, type => 'element not allowed');
2666     }
2667     } elsif ($phase eq 'before caption') {
2668     if ($node_ns eq $HTML_NS and $node_ln eq 'caption') {
2669     $phase = 'in colgroup';
2670     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2671     $phase = 'in colgroup';
2672     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2673     $phase = 'after thead';
2674     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2675     $phase = 'in tbodys';
2676     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2677     $phase = 'in trs';
2678     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2679     $phase = 'in tbodys';
2680     $has_tfoot = 1;
2681     } else {
2682     $self->{onerror}->(node => $node, type => 'element not allowed');
2683     }
2684     } else { # after tfoot
2685     $self->{onerror}->(node => $node, type => 'element not allowed');
2686     }
2687     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2688     unshift @nodes, @$sib;
2689     push @$new_todos, @$ch;
2690     } elsif ($nt == 3 or $nt == 4) {
2691     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2692     $self->{onerror}->(node => $node, type => 'character not allowed');
2693     }
2694     } elsif ($nt == 5) {
2695     unshift @nodes, @{$node->child_nodes};
2696     }
2697     }
2698    
2699     ## Table model errors
2700     require Whatpm::HTMLTable;
2701     Whatpm::HTMLTable->form_table ($todo->{node}, sub {
2702     my %opt = @_;
2703     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
2704     });
2705     push @{$self->{return}->{table}}, $todo->{node};
2706    
2707     return ($new_todos);
2708     },
2709     };
2710    
2711     $Element->{$HTML_NS}->{caption} = {
2712     attrs_checker => $GetHTMLAttrsChecker->({}),
2713 wakaba 1.13 checker => $HTMLStrictlyInlineChecker,
2714 wakaba 1.1 };
2715    
2716     $Element->{$HTML_NS}->{colgroup} = {
2717     attrs_checker => $GetHTMLAttrsChecker->({
2718     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2719     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
2720     ## TODO: "attribute not supported" if |col|.
2721     ## ISSUE: MUST NOT if any |col|?
2722     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
2723     }),
2724     checker => sub {
2725     my ($self, $todo) = @_;
2726     my $el = $todo->{node};
2727     my $new_todos = [];
2728     my @nodes = (@{$el->child_nodes});
2729    
2730     while (@nodes) {
2731     my $node = shift @nodes;
2732     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2733    
2734     my $nt = $node->node_type;
2735     if ($nt == 1) {
2736     my $node_ns = $node->namespace_uri;
2737     $node_ns = '' unless defined $node_ns;
2738     my $node_ln = $node->manakai_local_name;
2739     ## NOTE: |minuses| list is not checked since redundant
2740 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2741     #
2742     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'col')) {
2743 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
2744     }
2745     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2746     unshift @nodes, @$sib;
2747     push @$new_todos, @$ch;
2748     } elsif ($nt == 3 or $nt == 4) {
2749     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2750     $self->{onerror}->(node => $node, type => 'character not allowed');
2751     }
2752     } elsif ($nt == 5) {
2753     unshift @nodes, @{$node->child_nodes};
2754     }
2755     }
2756     return ($new_todos);
2757     },
2758     };
2759    
2760     $Element->{$HTML_NS}->{col} = {
2761     attrs_checker => $GetHTMLAttrsChecker->({
2762     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2763     }),
2764     checker => $HTMLEmptyChecker,
2765     };
2766    
2767     $Element->{$HTML_NS}->{tbody} = {
2768     attrs_checker => $GetHTMLAttrsChecker->({}),
2769     checker => sub {
2770     my ($self, $todo) = @_;
2771     my $el = $todo->{node};
2772     my $new_todos = [];
2773     my @nodes = (@{$el->child_nodes});
2774    
2775     my $has_tr;
2776     while (@nodes) {
2777     my $node = shift @nodes;
2778     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2779    
2780     my $nt = $node->node_type;
2781     if ($nt == 1) {
2782     my $node_ns = $node->namespace_uri;
2783     $node_ns = '' unless defined $node_ns;
2784     my $node_ln = $node->manakai_local_name;
2785     ## NOTE: |minuses| list is not checked since redundant
2786 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2787     #
2788     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2789 wakaba 1.1 $has_tr = 1;
2790     } else {
2791     $self->{onerror}->(node => $node, type => 'element not allowed');
2792     }
2793     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2794     unshift @nodes, @$sib;
2795     push @$new_todos, @$ch;
2796     } elsif ($nt == 3 or $nt == 4) {
2797     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2798     $self->{onerror}->(node => $node, type => 'character not allowed');
2799     }
2800     } elsif ($nt == 5) {
2801     unshift @nodes, @{$node->child_nodes};
2802     }
2803     }
2804     unless ($has_tr) {
2805     $self->{onerror}->(node => $el, type => 'child element missing:tr');
2806     }
2807     return ($new_todos);
2808     },
2809     };
2810    
2811     $Element->{$HTML_NS}->{thead} = {
2812     attrs_checker => $GetHTMLAttrsChecker->({}),
2813     checker => $Element->{$HTML_NS}->{tbody}->{checker},
2814     };
2815    
2816     $Element->{$HTML_NS}->{tfoot} = {
2817     attrs_checker => $GetHTMLAttrsChecker->({}),
2818     checker => $Element->{$HTML_NS}->{tbody}->{checker},
2819     };
2820    
2821     $Element->{$HTML_NS}->{tr} = {
2822     attrs_checker => $GetHTMLAttrsChecker->({}),
2823     checker => sub {
2824     my ($self, $todo) = @_;
2825     my $el = $todo->{node};
2826     my $new_todos = [];
2827     my @nodes = (@{$el->child_nodes});
2828    
2829     my $has_td;
2830     while (@nodes) {
2831     my $node = shift @nodes;
2832     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2833    
2834     my $nt = $node->node_type;
2835     if ($nt == 1) {
2836     my $node_ns = $node->namespace_uri;
2837     $node_ns = '' unless defined $node_ns;
2838     my $node_ln = $node->manakai_local_name;
2839     ## NOTE: |minuses| list is not checked since redundant
2840 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2841     #
2842     } elsif ($node_ns eq $HTML_NS and
2843     ($node_ln eq 'td' or $node_ln eq 'th')) {
2844 wakaba 1.1 $has_td = 1;
2845     } else {
2846     $self->{onerror}->(node => $node, type => 'element not allowed');
2847     }
2848     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2849     unshift @nodes, @$sib;
2850     push @$new_todos, @$ch;
2851     } elsif ($nt == 3 or $nt == 4) {
2852     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2853     $self->{onerror}->(node => $node, type => 'character not allowed');
2854     }
2855     } elsif ($nt == 5) {
2856     unshift @nodes, @{$node->child_nodes};
2857     }
2858     }
2859     unless ($has_td) {
2860     $self->{onerror}->(node => $el, type => 'child element missing:td|th');
2861     }
2862     return ($new_todos);
2863     },
2864     };
2865    
2866     $Element->{$HTML_NS}->{td} = {
2867     attrs_checker => $GetHTMLAttrsChecker->({
2868     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2869     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2870     }),
2871     checker => $HTMLBlockOrInlineChecker,
2872     };
2873    
2874     $Element->{$HTML_NS}->{th} = {
2875     attrs_checker => $GetHTMLAttrsChecker->({
2876     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2877     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2878     scope => $GetHTMLEnumeratedAttrChecker
2879     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
2880     }),
2881     checker => $HTMLBlockOrInlineChecker,
2882     };
2883    
2884     ## TODO: forms
2885 wakaba 1.8 ## TODO: Tests for <nest/> in form elements
2886 wakaba 1.1
2887     $Element->{$HTML_NS}->{script} = {
2888 wakaba 1.9 attrs_checker => $GetHTMLAttrsChecker->({
2889 wakaba 1.1 src => $HTMLURIAttrChecker,
2890     defer => $GetHTMLBooleanAttrChecker->('defer'),
2891     async => $GetHTMLBooleanAttrChecker->('async'),
2892     type => $HTMLIMTAttrChecker,
2893 wakaba 1.9 }),
2894 wakaba 1.1 checker => sub {
2895     my ($self, $todo) = @_;
2896    
2897     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2898     return $HTMLEmptyChecker->($self, $todo);
2899     } else {
2900     ## NOTE: No content model conformance in HTML5 spec.
2901     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
2902     my $language = $todo->{node}->get_attribute_ns (undef, 'language');
2903     if ((defined $type and $type eq '') or
2904     (defined $language and $language eq '')) {
2905     $type = 'text/javascript';
2906     } elsif (defined $type) {
2907     #
2908     } elsif (defined $language) {
2909     $type = 'text/' . $language;
2910     } else {
2911     $type = 'text/javascript';
2912     }
2913     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
2914     type => 'script:'.$type); ## TODO: $type normalization
2915     return $AnyChecker->($self, $todo);
2916     }
2917     },
2918     };
2919    
2920     ## NOTE: When script is disabled.
2921     $Element->{$HTML_NS}->{noscript} = {
2922 wakaba 1.3 attrs_checker => sub {
2923     my ($self, $todo) = @_;
2924    
2925     ## NOTE: This check is inserted in |attrs_checker|, rather than |checker|,
2926     ## since the later is not invoked when the |noscript| is used as a
2927     ## transparent element.
2928     unless ($todo->{node}->owner_document->manakai_is_html) {
2929     $self->{onerror}->(node => $todo->{node}, type => 'in XML:noscript');
2930     }
2931    
2932     $GetHTMLAttrsChecker->({})->($self, $todo);
2933     },
2934 wakaba 1.1 checker => sub {
2935     my ($self, $todo) = @_;
2936    
2937 wakaba 1.3 if ($todo->{flag}->{in_head}) {
2938     my $new_todos = [];
2939     my @nodes = (@{$todo->{node}->child_nodes});
2940    
2941     while (@nodes) {
2942     my $node = shift @nodes;
2943     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2944    
2945     my $nt = $node->node_type;
2946     if ($nt == 1) {
2947     my $node_ns = $node->namespace_uri;
2948     $node_ns = '' unless defined $node_ns;
2949     my $node_ln = $node->manakai_local_name;
2950 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2951     #
2952     } elsif ($node_ns eq $HTML_NS) {
2953 wakaba 1.3 if ({link => 1, style => 1}->{$node_ln}) {
2954     #
2955     } elsif ($node_ln eq 'meta') {
2956 wakaba 1.5 if ($node->has_attribute_ns (undef, 'name')) {
2957     #
2958 wakaba 1.3 } else {
2959 wakaba 1.5 $self->{onerror}->(node => $node,
2960     type => 'element not allowed');
2961 wakaba 1.3 }
2962     } else {
2963     $self->{onerror}->(node => $node, type => 'element not allowed');
2964     }
2965     } else {
2966     $self->{onerror}->(node => $node, type => 'element not allowed');
2967     }
2968    
2969     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2970     unshift @nodes, @$sib;
2971     push @$new_todos, @$ch;
2972     } elsif ($nt == 3 or $nt == 4) {
2973     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2974     $self->{onerror}->(node => $node, type => 'character not allowed');
2975     }
2976     } elsif ($nt == 5) {
2977     unshift @nodes, @{$node->child_nodes};
2978     }
2979     }
2980     return ($new_todos);
2981     } else {
2982     my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}});
2983     my ($sib, $ch) = $HTMLBlockOrInlineChecker->($self, $todo);
2984     push @$sib, $end;
2985     return ($sib, $ch);
2986     }
2987 wakaba 1.1 },
2988     };
2989 wakaba 1.3
2990     ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
2991 wakaba 1.1
2992     $Element->{$HTML_NS}->{'event-source'} = {
2993     attrs_checker => $GetHTMLAttrsChecker->({
2994     src => $HTMLURIAttrChecker,
2995     }),
2996     checker => $HTMLEmptyChecker,
2997     };
2998    
2999     $Element->{$HTML_NS}->{details} = {
3000     attrs_checker => $GetHTMLAttrsChecker->({
3001     open => $GetHTMLBooleanAttrChecker->('open'),
3002     }),
3003     checker => sub {
3004     my ($self, $todo) = @_;
3005    
3006     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
3007     my ($sib, $ch)
3008     = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
3009     ->($self, $todo);
3010     push @$sib, $end;
3011     return ($sib, $ch);
3012     },
3013     };
3014    
3015     $Element->{$HTML_NS}->{datagrid} = {
3016     attrs_checker => $GetHTMLAttrsChecker->({
3017     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3018     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
3019     }),
3020     checker => sub {
3021     my ($self, $todo) = @_;
3022     my $el = $todo->{node};
3023     my $new_todos = [];
3024     my @nodes = (@{$el->child_nodes});
3025    
3026     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
3027    
3028     ## Block-table Block* | table | select | datalist | Empty
3029     my $mode = 'any';
3030     while (@nodes) {
3031     my $node = shift @nodes;
3032     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3033    
3034     my $nt = $node->node_type;
3035     if ($nt == 1) {
3036     my $node_ns = $node->namespace_uri;
3037     $node_ns = '' unless defined $node_ns;
3038     my $node_ln = $node->manakai_local_name;
3039     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
3040 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3041     #
3042     } elsif ($mode eq 'block') {
3043 wakaba 1.1 $not_allowed = 1
3044     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
3045     } elsif ($mode eq 'any') {
3046     if ($node_ns eq $HTML_NS and
3047     {table => 1, select => 1, datalist => 1}->{$node_ln}) {
3048     $mode = 'none';
3049     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
3050     $mode = 'block';
3051     } else {
3052     $not_allowed = 1;
3053     }
3054     } else {
3055     $not_allowed = 1;
3056     }
3057     $self->{onerror}->(node => $node, type => 'element not allowed')
3058     if $not_allowed;
3059     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3060     unshift @nodes, @$sib;
3061     push @$new_todos, @$ch;
3062     } elsif ($nt == 3 or $nt == 4) {
3063     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3064     $self->{onerror}->(node => $node, type => 'character not allowed');
3065     }
3066     } elsif ($nt == 5) {
3067     unshift @nodes, @{$node->child_nodes};
3068     }
3069     }
3070    
3071     push @$new_todos, $end;
3072     return ($new_todos);
3073     },
3074     };
3075    
3076     $Element->{$HTML_NS}->{command} = {
3077     attrs_checker => $GetHTMLAttrsChecker->({
3078     checked => $GetHTMLBooleanAttrChecker->('checked'),
3079     default => $GetHTMLBooleanAttrChecker->('default'),
3080     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3081     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
3082     icon => $HTMLURIAttrChecker,
3083     label => sub { }, ## NOTE: No conformance creteria
3084     radiogroup => sub { }, ## NOTE: No conformance creteria
3085     ## NOTE: |title| has special semantics, but no syntactical difference
3086     type => sub {
3087     my ($self, $attr) = @_;
3088     my $value = $attr->value;
3089     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
3090     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
3091     }
3092     },
3093     }),
3094     checker => $HTMLEmptyChecker,
3095     };
3096    
3097     $Element->{$HTML_NS}->{menu} = {
3098     attrs_checker => $GetHTMLAttrsChecker->({
3099     autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
3100     id => sub {
3101     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
3102     my ($self, $attr) = @_;
3103     my $value = $attr->value;
3104     if (length $value > 0) {
3105     if ($self->{id}->{$value}) {
3106     $self->{onerror}->(node => $attr, type => 'duplicate ID');
3107     push @{$self->{id}->{$value}}, $attr;
3108     } else {
3109     $self->{id}->{$value} = [$attr];
3110     }
3111     } else {
3112     ## NOTE: MUST contain at least one character
3113     $self->{onerror}->(node => $attr, type => 'empty attribute value');
3114     }
3115     if ($value =~ /[\x09-\x0D\x20]/) {
3116     $self->{onerror}->(node => $attr, type => 'space in ID');
3117     }
3118     $self->{menu}->{$value} ||= $attr;
3119     ## ISSUE: <menu id=""><p contextmenu=""> match?
3120     },
3121     label => sub { }, ## NOTE: No conformance creteria
3122     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
3123     }),
3124     checker => sub {
3125     my ($self, $todo) = @_;
3126     my $el = $todo->{node};
3127     my $new_todos = [];
3128     my @nodes = (@{$el->child_nodes});
3129    
3130     my $content = 'li or inline';
3131     while (@nodes) {
3132     my $node = shift @nodes;
3133     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3134    
3135     my $nt = $node->node_type;
3136     if ($nt == 1) {
3137     my $node_ns = $node->namespace_uri;
3138     $node_ns = '' unless defined $node_ns;
3139     my $node_ln = $node->manakai_local_name;
3140     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
3141 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3142     #
3143     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'li') {
3144 wakaba 1.1 if ($content eq 'inline') {
3145     $not_allowed = 1;
3146     } elsif ($content eq 'li or inline') {
3147     $content = 'li';
3148     }
3149     } else {
3150     if ($HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
3151     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln}) {
3152     $content = 'inline';
3153     } else {
3154     $not_allowed = 1;
3155     }
3156     }
3157     $self->{onerror}->(node => $node, type => 'element not allowed')
3158     if $not_allowed;
3159     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3160     unshift @nodes, @$sib;
3161     push @$new_todos, @$ch;
3162     } elsif ($nt == 3 or $nt == 4) {
3163     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3164     if ($content eq 'li') {
3165     $self->{onerror}->(node => $node, type => 'character not allowed');
3166     } elsif ($content eq 'li or inline') {
3167     $content = 'inline';
3168     }
3169     }
3170     } elsif ($nt == 5) {
3171     unshift @nodes, @{$node->child_nodes};
3172     }
3173     }
3174    
3175     for (@$new_todos) {
3176     $_->{inline} = 1;
3177     }
3178     return ($new_todos);
3179     },
3180 wakaba 1.8 };
3181    
3182     $Element->{$HTML_NS}->{datatemplate} = {
3183     attrs_checker => $GetHTMLAttrsChecker->({}),
3184     checker => sub {
3185     my ($self, $todo) = @_;
3186     my $el = $todo->{node};
3187     my $new_todos = [];
3188     my @nodes = (@{$el->child_nodes});
3189    
3190     while (@nodes) {
3191     my $node = shift @nodes;
3192     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3193    
3194     my $nt = $node->node_type;
3195     if ($nt == 1) {
3196     my $node_ns = $node->namespace_uri;
3197     $node_ns = '' unless defined $node_ns;
3198     my $node_ln = $node->manakai_local_name;
3199     ## NOTE: |minuses| list is not checked since redundant
3200     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3201     #
3202     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'rule')) {
3203     $self->{onerror}->(node => $node,
3204     type => 'element not allowed:datatemplate');
3205     }
3206     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3207     unshift @nodes, @$sib;
3208     push @$new_todos, @$ch;
3209     } elsif ($nt == 3 or $nt == 4) {
3210     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3211     $self->{onerror}->(node => $node, type => 'character not allowed');
3212     }
3213     } elsif ($nt == 5) {
3214     unshift @nodes, @{$node->child_nodes};
3215     }
3216     }
3217     return ($new_todos);
3218     },
3219     is_xml_root => 1,
3220     };
3221    
3222     $Element->{$HTML_NS}->{rule} = {
3223     attrs_checker => $GetHTMLAttrsChecker->({
3224     ## TODO: |condition| attribute
3225 wakaba 1.18 mode => $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker,
3226 wakaba 1.8 }),
3227     checker => sub {
3228     my ($self, $todo) = @_;
3229    
3230     my $end = $self->_add_pluses ({$HTML_NS => {nest => 1}});
3231     my ($sib, $ch) = $AnyChecker->($self, $todo);
3232     push @$sib, $end;
3233     return ($sib, $ch);
3234     },
3235     ## NOTE: "MAY be anything that, when the parent |datatemplate|
3236     ## is applied to some conforming data, results in a conforming DOM tree.":
3237     ## We don't check against this.
3238     };
3239    
3240     $Element->{$HTML_NS}->{nest} = {
3241     attrs_checker => $GetHTMLAttrsChecker->({
3242     ## TODO: |filter| attribute
3243 wakaba 1.18 mode => $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker,
3244 wakaba 1.8 }),
3245     checker => $HTMLEmptyChecker,
3246 wakaba 1.1 };
3247    
3248     $Element->{$HTML_NS}->{legend} = {
3249     attrs_checker => $GetHTMLAttrsChecker->({}),
3250     checker => sub {
3251     my ($self, $todo) = @_;
3252    
3253     my $parent = $todo->{node}->manakai_parent_element;
3254     if (defined $parent) {
3255     my $nsuri = $parent->namespace_uri;
3256     $nsuri = '' unless defined $nsuri;
3257     my $ln = $parent->manakai_local_name;
3258     if ($nsuri eq $HTML_NS and $ln eq 'figure') {
3259     return $HTMLInlineChecker->($self, $todo);
3260     } else {
3261 wakaba 1.13 return $HTMLStrictlyInlineChecker->($self, $todo);
3262 wakaba 1.1 }
3263     } else {
3264     return $HTMLInlineChecker->($self, $todo);
3265     }
3266    
3267     ## ISSUE: Content model is defined only for fieldset/legend,
3268     ## details/legend, and figure/legend.
3269     },
3270     };
3271    
3272     $Element->{$HTML_NS}->{div} = {
3273     attrs_checker => $GetHTMLAttrsChecker->({}),
3274     checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
3275     };
3276    
3277     $Element->{$HTML_NS}->{font} = {
3278     attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
3279     checker => $HTMLTransparentChecker,
3280     };
3281    
3282     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
3283    
3284     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24