/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.31 - (hide annotations) (download)
Sun Feb 17 11:04:08 2008 UTC (17 years, 5 months ago) by wakaba
Branch: MAIN
Changes since 1.30: +2 -17 lines
++ whatpm/t/ChangeLog	17 Feb 2008 11:04:02 -0000
	* content-model-1.dat: Updated.

2008-02-17  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	17 Feb 2008 11:02:38 -0000
	* ContentChecker.pm (_get_children): (Incompleted) attempt to
	imlement significant content checking for contents
	with |del| elements.

2008-02-17  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ContentChecker/ChangeLog	17 Feb 2008 11:03:40 -0000
	* HTML.pm (th): |th| now requires phrasing content.  |div|
	now requires prose content.

2008-02-17  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7 wakaba 1.29 ## December 2007 HTML5 Classification
8    
9     my $HTMLMetadataContent = {
10     $HTML_NS => {
11     title => 1, base => 1, link => 1, style => 1, script => 1, noscript => 1,
12     'event-source' => 1, command => 1, datatemplate => 1,
13     ## NOTE: A |meta| with no |name| element is not allowed as
14     ## a metadata content other than |head| element.
15     meta => 1,
16     },
17     ## NOTE: RDF is mentioned in the HTML5 spec.
18     ## TODO: Other RDF elements?
19     q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {RDF => 1},
20     };
21    
22     my $HTMLProseContent = {
23     $HTML_NS => {
24     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
25     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
26     footer => 1, address => 1, p => 1, hr => 1, dialog => 1, pre => 1,
27     ol => 1, ul => 1, dl => 1, figure => 1, map => 1, table => 1,
28     details => 1, ## ISSUE: "Prose element" in spec.
29     datagrid => 1, ## ISSUE: "Prose element" in spec.
30     datatemplate => 1,
31     div => 1, ## ISSUE: No category in spec.
32     ## NOTE: |style| is only allowed if |scoped| attribute is specified.
33     ## Additionally, it must be before any other element or
34     ## non-inter-element-whitespace text node.
35     style => 1,
36    
37     br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, m => 1,
38     dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
39     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
40     b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1,
41     command => 1, font => 1,
42     a => 1,
43     datagrid => 1, ## ISSUE: "Interactive element" in the spec.
44     ## NOTE: |area| is allowed only as a descendant of |map|.
45     area => 1,
46    
47     ins => 1, del => 1,
48    
49     ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose.
50     menu => 1,
51    
52     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
53     canvas => 1,
54     },
55    
56     ## NOTE: Embedded
57     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
58     q<http://www.w3.org/2000/svg> => {svg => 1},
59     };
60    
61     my $HTMLSectioningContent = {
62     $HTML_NS => {
63     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
64     ## NOTE: |body| is only allowed in |html| element.
65     body => 1,
66     },
67     };
68    
69     my $HTMLHeadingContent = {
70     $HTML_NS => {
71     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
72     },
73     };
74    
75     my $HTMLPhrasingContent = {
76     ## NOTE: All phrasing content is also prose content.
77     $HTML_NS => {
78     br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, m => 1,
79     dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
80     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
81     b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1,
82     command => 1, font => 1,
83     a => 1,
84     datagrid => 1, ## ISSUE: "Interactive element" in the spec.
85     ## NOTE: |area| is allowed only as a descendant of |map|.
86     area => 1,
87    
88     ## NOTE: Transparent.
89     ins => 1, del => 1,
90    
91     ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose.
92     menu => 1,
93    
94     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
95     canvas => 1,
96     },
97    
98     ## NOTE: Embedded
99     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
100     q<http://www.w3.org/2000/svg> => {svg => 1},
101    
102     ## NOTE: And non-inter-element-whitespace text nodes.
103     };
104    
105     my $HTMLEmbeddedContent = {
106     ## NOTE: All embedded content is also phrasing content.
107     $HTML_NS => {
108     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
109     canvas => 1,
110     },
111     ## NOTE: MathML is mentioned in the HTML5 spec.
112     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
113     ## NOTE: SVG is mentioned in the HTML5 spec.
114     q<http://www.w3.org/2000/svg> => {svg => 1},
115     ## NOTE: Foreign elements with content (but no metadata) are
116     ## embedded content.
117     };
118    
119     my $HTMLInteractiveContent = {
120     $HTML_NS => {
121     a => 1,
122     },
123     };
124    
125     ## Old HTML5 categories
126    
127 wakaba 1.1 my $HTMLMetadataElements = {
128     $HTML_NS => {
129     qw/link 1 meta 1 style 1 script 1 event-source 1 command 1 base 1 title 1
130 wakaba 1.8 noscript 1 datatemplate 1
131 wakaba 1.1 /,
132     },
133     };
134    
135     my $HTMLSectioningElements = {
136     $HTML_NS => {qw/body 1 section 1 nav 1 article 1 blockquote 1 aside 1/},
137     };
138    
139     my $HTMLBlockLevelElements = {
140     $HTML_NS => {
141     qw/
142     section 1 nav 1 article 1 blockquote 1 aside 1
143     h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1
144     address 1 p 1 hr 1 dialog 1 pre 1 ol 1 ul 1 dl 1
145     ins 1 del 1 figure 1 map 1 table 1 script 1 noscript 1
146     event-source 1 details 1 datagrid 1 menu 1 div 1 font 1
147 wakaba 1.8 datatemplate 1
148 wakaba 1.1 /,
149     },
150     };
151    
152     my $HTMLStrictlyInlineLevelElements = {
153     $HTML_NS => {
154     qw/
155     br 1 a 1 q 1 cite 1 em 1 strong 1 small 1 m 1 dfn 1 abbr 1
156     time 1 meter 1 progress 1 code 1 var 1 samp 1 kbd 1
157     sub 1 sup 1 span 1 i 1 b 1 bdo 1 ins 1 del 1 img 1
158     iframe 1 embed 1 object 1 video 1 audio 1 canvas 1 area 1
159     script 1 noscript 1 event-source 1 command 1 font 1
160     /,
161     },
162     };
163    
164     my $HTMLStructuredInlineLevelElements = {
165     $HTML_NS => {qw/blockquote 1 pre 1 ol 1 ul 1 dl 1 table 1 menu 1/},
166     };
167    
168     my $HTMLInteractiveElements = {
169     $HTML_NS => {a => 1, details => 1, datagrid => 1},
170     };
171     ## NOTE: |html:a| and |html:datagrid| are not allowed as a descendant
172     ## of interactive elements
173    
174     # my $HTMLTransparentElements : in |Whatpm/ContentChecker.pm|.
175    
176     #my $HTMLSemiTransparentElements = {
177     # $HTML_NS => {qw/video 1 audio 1/},
178     #};
179    
180     my $HTMLEmbededElements = {
181     $HTML_NS => {qw/img 1 iframe 1 embed 1 object 1 video 1 audio 1 canvas 1/},
182     };
183 wakaba 1.25 ## NOTE: When an element is added to this list, make sure that
184     ## the element's checker set |has_descendant| flag for |significant| content
185     ## as true.
186    
187 wakaba 1.26 my $HTMLSignificantContentErrors = {
188     significant => sub {
189     my ($self, $todo) = @_;
190     $self->{onerror}->(node => $todo->{node},
191     level => $self->{should_level},
192     type => 'no significant content');
193     },
194     }; # $HTMLSignificantContentErrors
195    
196 wakaba 1.25 our $AnyChecker;
197     my $HTMLAnyChecker = sub {
198     my ($self, $todo) = @_;
199    
200     my $old_values = {significant =>
201     $todo->{flag}->{has_descendant}->{significant}};
202     $todo->{flag}->{has_descendant}->{significant} = 0;
203    
204     my ($new_todos) = $AnyChecker->($self, $todo);
205    
206     push @$new_todos, {
207     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
208     old_values => $old_values,
209 wakaba 1.26 errors => $HTMLSignificantContentErrors,
210 wakaba 1.25 };
211    
212     return ($new_todos);
213     }; # $HTMLAnyChecker
214 wakaba 1.1
215     ## Empty
216     my $HTMLEmptyChecker = sub {
217     my ($self, $todo) = @_;
218     my $el = $todo->{node};
219     my $new_todos = [];
220     my @nodes = (@{$el->child_nodes});
221    
222     while (@nodes) {
223     my $node = shift @nodes;
224     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
225    
226     my $nt = $node->node_type;
227     if ($nt == 1) {
228 wakaba 1.8 my $node_ns = $node->namespace_uri;
229     $node_ns = '' unless defined $node_ns;
230     my $node_ln = $node->manakai_local_name;
231 wakaba 1.30 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
232     $self->{onerror}->(node => $node, type => 'element not allowed:minus',
233     level => $self->{must_level});
234     } elsif ($self->{pluses}->{$node_ns}->{$node_ln}) {
235 wakaba 1.8 #
236     } else {
237 wakaba 1.30 $self->{onerror}->(node => $node, type => 'element not allowed:empty',
238     level => $self->{must_level});
239 wakaba 1.8 }
240 wakaba 1.30
241 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
242     unshift @nodes, @$sib;
243     push @$new_todos, @$ch;
244     } elsif ($nt == 3 or $nt == 4) {
245     if ($node->data =~ /[^\x09-\x0D\x20]/) {
246 wakaba 1.30 $self->{onerror}->(node => $node,
247     type => 'character not allowed:empty',
248     level => $self->{must_level});
249 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
250 wakaba 1.1 }
251     } elsif ($nt == 5) {
252     unshift @nodes, @{$node->child_nodes};
253     }
254     }
255     return ($new_todos);
256     };
257    
258     ## Text
259     my $HTMLTextChecker = sub {
260     my ($self, $todo) = @_;
261     my $el = $todo->{node};
262     my $new_todos = [];
263     my @nodes = (@{$el->child_nodes});
264    
265 wakaba 1.25 my $old_values = {significant =>
266     $todo->{flag}->{has_descendant}->{significant}};
267     $todo->{flag}->{has_descendant}->{significant} = 0;
268    
269 wakaba 1.1 while (@nodes) {
270     my $node = shift @nodes;
271     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
272    
273     my $nt = $node->node_type;
274     if ($nt == 1) {
275 wakaba 1.8 my $node_ns = $node->namespace_uri;
276     $node_ns = '' unless defined $node_ns;
277     my $node_ln = $node->manakai_local_name;
278     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
279     #
280     } else {
281     ## NOTE: |minuses| list is not checked since redundant
282     $self->{onerror}->(node => $node, type => 'element not allowed');
283     }
284 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
285     unshift @nodes, @$sib;
286     push @$new_todos, @$ch;
287 wakaba 1.25 } elsif ($nt == 3 or $nt == 4) {
288     if ($node->data =~ /[^\x09-\x0D\x20]/) {
289     $todo->{flag}->{has_descendant}->{significant} = 1;
290     }
291 wakaba 1.1 } elsif ($nt == 5) {
292     unshift @nodes, @{$node->child_nodes};
293     }
294     }
295 wakaba 1.25
296     push @$new_todos, {
297     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
298     old_values => $old_values,
299 wakaba 1.26 errors => $HTMLSignificantContentErrors,
300 wakaba 1.25 };
301    
302 wakaba 1.1 return ($new_todos);
303     };
304    
305     ## Zero or more |html:style| elements,
306     ## followed by zero or more block-level elements
307     my $HTMLStylableBlockChecker = sub {
308     my ($self, $todo) = @_;
309     my $el = $todo->{node};
310     my $new_todos = [];
311     my @nodes = (@{$el->child_nodes});
312 wakaba 1.25
313     my $old_values = {significant =>
314     $todo->{flag}->{has_descendant}->{significant}};
315     $todo->{flag}->{has_descendant}->{significant} = 0;
316 wakaba 1.1
317     my $has_non_style;
318     while (@nodes) {
319     my $node = shift @nodes;
320     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
321    
322     my $nt = $node->node_type;
323     if ($nt == 1) {
324     my $node_ns = $node->namespace_uri;
325     $node_ns = '' unless defined $node_ns;
326     my $node_ln = $node->manakai_local_name;
327     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
328     if ($node_ns eq $HTML_NS and $node_ln eq 'style') {
329     $not_allowed = 1 if $has_non_style or
330     not $node->has_attribute_ns (undef, 'scoped');
331     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
332     $has_non_style = 1;
333 wakaba 1.8 } elsif ($self->{pluses}->{$node_ns}->{$node_ln}) {
334     #
335 wakaba 1.1 } else {
336     $has_non_style = 1;
337     $not_allowed = 1;
338     }
339     $self->{onerror}->(node => $node, type => 'element not allowed')
340     if $not_allowed;
341     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
342     unshift @nodes, @$sib;
343     push @$new_todos, @$ch;
344     } elsif ($nt == 3 or $nt == 4) {
345     if ($node->data =~ /[^\x09-\x0D\x20]/) {
346     $self->{onerror}->(node => $node, type => 'character not allowed');
347 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
348 wakaba 1.1 }
349     } elsif ($nt == 5) {
350     unshift @nodes, @{$node->child_nodes};
351     }
352     }
353 wakaba 1.25
354     push @$new_todos, {
355     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
356     old_values => $old_values,
357 wakaba 1.26 errors => $HTMLSignificantContentErrors,
358 wakaba 1.25 };
359    
360 wakaba 1.1 return ($new_todos);
361     }; # $HTMLStylableBlockChecker
362    
363 wakaba 1.29 my $HTMLProseContentChecker = sub {
364     my ($self, $todo) = @_;
365     my $el = $todo->{node};
366     my $new_todos = [];
367     my @nodes = (@{$el->child_nodes});
368    
369     my $old_values = {significant =>
370     $todo->{flag}->{has_descendant}->{significant}};
371     $todo->{flag}->{has_descendant}->{significant} = 0;
372    
373     my $has_non_style;
374     while (@nodes) {
375     my $node = shift @nodes;
376     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
377    
378     my $nt = $node->node_type;
379     if ($nt == 1) {
380     my $node_ns = $node->namespace_uri;
381     $node_ns = '' unless defined $node_ns;
382     my $node_ln = $node->manakai_local_name;
383 wakaba 1.30 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
384     $self->{onerror}->(node => $node,
385     type => 'element not allowed:minus',
386     level => $self->{must_level});
387     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'style') {
388     if ($has_non_style or
389     not $node->has_attribute_ns (undef, 'scoped')) {
390     $self->{onerror}->(node => $node,
391     type => 'element not allowed:prose style',
392     level => $self->{must_level});
393     }
394 wakaba 1.29 } elsif ($HTMLProseContent->{$node_ns}->{$node_ln}) {
395     $has_non_style = 1;
396     if ($HTMLEmbeddedContent->{$node_ns}->{$node_ln}) {
397     $todo->{flag}->{has_descendant}->{significant} = 1;
398     }
399     } elsif ($self->{pluses}->{$node_ns}->{$node_ln}) {
400     #
401     } else {
402     $has_non_style = 1;
403 wakaba 1.30 $self->{onerror}->(node => $node,
404     type => 'element not allowed:prose',
405     level => $self->{must_level})
406 wakaba 1.29 }
407 wakaba 1.30
408 wakaba 1.29 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
409     unshift @nodes, @$sib;
410     push @$new_todos, @$ch;
411     } elsif ($nt == 3 or $nt == 4) {
412     if ($node->data =~ /[^\x09-\x0D\x20]/) {
413     $has_non_style = 1;
414     $todo->{flag}->{has_descendant}->{significant} = 1;
415     }
416     } elsif ($nt == 5) {
417     unshift @nodes, @{$node->child_nodes};
418     }
419     }
420    
421     push @$new_todos, {
422     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
423     old_values => $old_values,
424     errors => $HTMLSignificantContentErrors,
425     };
426    
427     return ($new_todos);
428     }; # $HTMLProseContentChecker
429    
430 wakaba 1.1 ## Zero or more block-level elements
431     my $HTMLBlockChecker = sub {
432     my ($self, $todo) = @_;
433     my $el = $todo->{node};
434     my $new_todos = [];
435     my @nodes = (@{$el->child_nodes});
436    
437 wakaba 1.25 my $old_values = {significant =>
438     $todo->{flag}->{has_descendant}->{significant}};
439     $todo->{flag}->{has_descendant}->{significant} = 0;
440    
441 wakaba 1.1 while (@nodes) {
442     my $node = shift @nodes;
443     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
444    
445     my $nt = $node->node_type;
446     if ($nt == 1) {
447     my $node_ns = $node->namespace_uri;
448     $node_ns = '' unless defined $node_ns;
449     my $node_ln = $node->manakai_local_name;
450     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
451     $not_allowed = 1
452 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
453     $self->{pluses}->{$node_ns}->{$node_ln};
454 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
455     if $not_allowed;
456     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
457     unshift @nodes, @$sib;
458     push @$new_todos, @$ch;
459     } elsif ($nt == 3 or $nt == 4) {
460     if ($node->data =~ /[^\x09-\x0D\x20]/) {
461     $self->{onerror}->(node => $node, type => 'character not allowed');
462 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
463 wakaba 1.1 }
464     } elsif ($nt == 5) {
465     unshift @nodes, @{$node->child_nodes};
466     }
467     }
468 wakaba 1.25
469     push @$new_todos, {
470     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
471     old_values => $old_values,
472 wakaba 1.26 errors => $HTMLSignificantContentErrors,
473 wakaba 1.25 };
474    
475 wakaba 1.1 return ($new_todos);
476     }; # $HTMLBlockChecker
477    
478     ## Inline-level content
479     my $HTMLInlineChecker = sub {
480     my ($self, $todo) = @_;
481     my $el = $todo->{node};
482     my $new_todos = [];
483     my @nodes = (@{$el->child_nodes});
484    
485 wakaba 1.25 my $old_values = {significant =>
486     $todo->{flag}->{has_descendant}->{significant}};
487     $todo->{flag}->{has_descendant}->{significant} = 0;
488    
489 wakaba 1.1 while (@nodes) {
490     my $node = shift @nodes;
491     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
492    
493     my $nt = $node->node_type;
494     if ($nt == 1) {
495     my $node_ns = $node->namespace_uri;
496     $node_ns = '' unless defined $node_ns;
497     my $node_ln = $node->manakai_local_name;
498     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
499     $not_allowed = 1
500     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
501 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
502     $self->{pluses}->{$node_ns}->{$node_ln};
503 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
504     if $not_allowed;
505     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
506     unshift @nodes, @$sib;
507     push @$new_todos, @$ch;
508 wakaba 1.25 } elsif ($nt == 3 or $nt == 4) {
509     if ($node->data =~ /[^\x09-\x0D\x20]/) {
510     $todo->{flag}->{has_descendant}->{significant} = 1;
511     }
512 wakaba 1.1 } elsif ($nt == 5) {
513     unshift @nodes, @{$node->child_nodes};
514     }
515     }
516    
517     for (@$new_todos) {
518     $_->{inline} = 1;
519     }
520 wakaba 1.25
521     push @$new_todos, {
522     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
523     old_values => $old_values,
524 wakaba 1.26 errors => $HTMLSignificantContentErrors,
525 wakaba 1.25 };
526    
527 wakaba 1.1 return ($new_todos);
528     }; # $HTMLInlineChecker
529    
530     ## Strictly inline-level content
531     my $HTMLStrictlyInlineChecker = sub {
532     my ($self, $todo) = @_;
533     my $el = $todo->{node};
534     my $new_todos = [];
535     my @nodes = (@{$el->child_nodes});
536 wakaba 1.25
537     my $old_values = {significant =>
538     $todo->{flag}->{has_descendant}->{significant}};
539     $todo->{flag}->{has_descendant}->{significant} = 0;
540 wakaba 1.1
541     while (@nodes) {
542     my $node = shift @nodes;
543     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
544    
545     my $nt = $node->node_type;
546     if ($nt == 1) {
547     my $node_ns = $node->namespace_uri;
548     $node_ns = '' unless defined $node_ns;
549     my $node_ln = $node->manakai_local_name;
550     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
551     $not_allowed = 1
552 wakaba 1.8 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
553     $self->{pluses}->{$node_ns}->{$node_ln};
554 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed')
555     if $not_allowed;
556     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
557     unshift @nodes, @$sib;
558     push @$new_todos, @$ch;
559 wakaba 1.25 } elsif ($nt == 3 or $nt == 4) {
560     if ($node->data =~ /[^\x09-\x0D\x20]/) {
561     $todo->{flag}->{has_descendant}->{significant} = 1;
562     }
563 wakaba 1.1 } elsif ($nt == 5) {
564     unshift @nodes, @{$node->child_nodes};
565     }
566     }
567    
568     for (@$new_todos) {
569     $_->{inline} = 1;
570     $_->{strictly_inline} = 1;
571     }
572 wakaba 1.25
573     push @$new_todos, {
574     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
575     old_values => $old_values,
576 wakaba 1.26 errors => $HTMLSignificantContentErrors,
577 wakaba 1.25 };
578    
579 wakaba 1.1 return ($new_todos);
580     }; # $HTMLStrictlyInlineChecker
581    
582 wakaba 1.8 ## Inline-level or strictly inline-level content
583 wakaba 1.1 my $HTMLInlineOrStrictlyInlineChecker = sub {
584     my ($self, $todo) = @_;
585     my $el = $todo->{node};
586     my $new_todos = [];
587     my @nodes = (@{$el->child_nodes});
588 wakaba 1.25
589     my $old_values = {significant =>
590     $todo->{flag}->{has_descendant}->{significant}};
591     $todo->{flag}->{has_descendant}->{significant} = 0;
592 wakaba 1.1
593     while (@nodes) {
594     my $node = shift @nodes;
595     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
596    
597     my $nt = $node->node_type;
598     if ($nt == 1) {
599     my $node_ns = $node->namespace_uri;
600     $node_ns = '' unless defined $node_ns;
601     my $node_ln = $node->manakai_local_name;
602     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
603     if ($todo->{strictly_inline}) {
604     $not_allowed = 1
605 wakaba 1.8 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
606     $self->{pluses}->{$node_ns}->{$node_ln};
607 wakaba 1.1 } else {
608     $not_allowed = 1
609     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
610 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
611     $self->{pluses}->{$node_ns}->{$node_ln};
612 wakaba 1.1 }
613     $self->{onerror}->(node => $node, type => 'element not allowed')
614     if $not_allowed;
615     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
616     unshift @nodes, @$sib;
617     push @$new_todos, @$ch;
618 wakaba 1.25 } elsif ($nt == 3 or $nt == 4) {
619     if ($node->data =~ /[^\x09-\x0D\x20]/) {
620     $todo->{flag}->{has_descendant}->{significant} = 1;
621     }
622 wakaba 1.1 } elsif ($nt == 5) {
623     unshift @nodes, @{$node->child_nodes};
624     }
625     }
626    
627     for (@$new_todos) {
628     $_->{inline} = 1;
629     $_->{strictly_inline} = 1;
630     }
631 wakaba 1.25
632     push @$new_todos, {
633     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
634     old_values => $old_values,
635 wakaba 1.26 errors => $HTMLSignificantContentErrors,
636 wakaba 1.25 };
637    
638 wakaba 1.1 return ($new_todos);
639     }; # $HTMLInlineOrStrictlyInlineChecker
640    
641 wakaba 1.29 my $HTMLPhrasingContentChecker = sub {
642     my ($self, $todo) = @_;
643     my $el = $todo->{node};
644     my $new_todos = [];
645     my @nodes = (@{$el->child_nodes});
646    
647     my $old_values = {significant =>
648     $todo->{flag}->{has_descendant}->{significant}};
649     $todo->{flag}->{has_descendant}->{significant} = 0;
650    
651     while (@nodes) {
652     my $node = shift @nodes;
653     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
654    
655     my $nt = $node->node_type;
656     if ($nt == 1) {
657     my $node_ns = $node->namespace_uri;
658     $node_ns = '' unless defined $node_ns;
659     my $node_ln = $node->manakai_local_name;
660 wakaba 1.30 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
661     $self->{onerror}->(node => $node,
662     type => 'element not allowed:minus',
663     level => $self->{must_level});
664     } elsif ($HTMLPhrasingContent->{$node_ns}->{$node_ln} or
665     $self->{pluses}->{$node_ns}->{$node_ln}) {
666     #
667     } else {
668     $self->{onerror}->(node => $node,
669     type => 'element not allowed:phrasing',
670     level => $self->{must_level});
671     }
672    
673 wakaba 1.29 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
674     unshift @nodes, @$sib;
675     push @$new_todos, @$ch;
676     } elsif ($nt == 3 or $nt == 4) {
677     if ($node->data =~ /[^\x09-\x0D\x20]/) {
678     $todo->{flag}->{has_descendant}->{significant} = 1;
679     }
680     } elsif ($nt == 5) {
681     unshift @nodes, @{$node->child_nodes};
682     }
683     }
684    
685     push @$new_todos, {
686     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
687     old_values => $old_values,
688     errors => $HTMLSignificantContentErrors,
689     };
690    
691     return ($new_todos);
692     }; # $HTMLPhrasingContentChecker
693    
694 wakaba 1.8 ## Block-level content or inline-level content (i.e. bimorphic content model)
695 wakaba 1.1 my $HTMLBlockOrInlineChecker = sub {
696     my ($self, $todo) = @_;
697     my $el = $todo->{node};
698     my $new_todos = [];
699     my @nodes = (@{$el->child_nodes});
700 wakaba 1.25
701     my $old_values = {significant =>
702     $todo->{flag}->{has_descendant}->{significant}};
703     $todo->{flag}->{has_descendant}->{significant} = 0;
704 wakaba 1.1
705     my $content = 'block-or-inline'; # or 'block' or 'inline'
706     my @block_not_inline;
707     while (@nodes) {
708     my $node = shift @nodes;
709     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
710    
711 wakaba 1.8 ## ISSUE: It is unclear whether "<rule><div><p/><nest/></div></rule>"
712     ## is conforming or not.
713    
714 wakaba 1.1 my $nt = $node->node_type;
715     if ($nt == 1) {
716     my $node_ns = $node->namespace_uri;
717     $node_ns = '' unless defined $node_ns;
718     my $node_ln = $node->manakai_local_name;
719     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
720     if ($content eq 'block') {
721     $not_allowed = 1
722 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
723     $self->{pluses}->{$node_ns}->{$node_ln};
724 wakaba 1.1 } elsif ($content eq 'inline') {
725     $not_allowed = 1
726     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
727 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
728     $self->{pluses}->{$node_ns}->{$node_ln};
729 wakaba 1.1 } else {
730     my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
731     my $is_inline
732     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
733     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
734    
735     push @block_not_inline, $node
736     if $is_block and not $is_inline and not $not_allowed;
737 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
738 wakaba 1.1 $content = 'inline';
739     for (@block_not_inline) {
740     $self->{onerror}->(node => $_, type => 'element not allowed');
741     }
742     $not_allowed = 1 unless $is_inline;
743     }
744     }
745     $self->{onerror}->(node => $node, type => 'element not allowed')
746     if $not_allowed;
747     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
748     unshift @nodes, @$sib;
749     push @$new_todos, @$ch;
750     } elsif ($nt == 3 or $nt == 4) {
751     if ($node->data =~ /[^\x09-\x0D\x20]/) {
752     if ($content eq 'block') {
753     $self->{onerror}->(node => $node, type => 'character not allowed');
754     } else {
755     $content = 'inline';
756     for (@block_not_inline) {
757     $self->{onerror}->(node => $_, type => 'element not allowed');
758     }
759     }
760 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
761 wakaba 1.1 }
762     } elsif ($nt == 5) {
763     unshift @nodes, @{$node->child_nodes};
764     }
765     }
766    
767     if ($content eq 'inline') {
768     for (@$new_todos) {
769     $_->{inline} = 1;
770     }
771     }
772 wakaba 1.25
773     push @$new_todos, {
774     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
775     old_values => $old_values,
776 wakaba 1.26 errors => $HTMLSignificantContentErrors,
777 wakaba 1.25 };
778    
779 wakaba 1.1 return ($new_todos);
780     };
781    
782     ## Zero or more XXX element, then either block-level or inline-level
783     my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) {
784     my ($elnsuri, $ellname) = @_;
785     return sub {
786     my ($self, $todo) = @_;
787     my $el = $todo->{node};
788     my $new_todos = [];
789     my @nodes = (@{$el->child_nodes});
790 wakaba 1.25
791     my $old_values = {significant =>
792     $todo->{flag}->{has_descendant}->{significant}};
793     $todo->{flag}->{has_descendant}->{significant} = 0;
794 wakaba 1.1
795     my $has_non_style;
796     my $content = 'block-or-inline'; # or 'block' or 'inline'
797     my @block_not_inline;
798     while (@nodes) {
799     my $node = shift @nodes;
800     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
801    
802     my $nt = $node->node_type;
803     if ($nt == 1) {
804     my $node_ns = $node->namespace_uri;
805     $node_ns = '' unless defined $node_ns;
806     my $node_ln = $node->manakai_local_name;
807     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
808     if ($node_ns eq $elnsuri and $node_ln eq $ellname) {
809     $not_allowed = 1 if $has_non_style;
810     if ($ellname eq 'style' and
811     not $node->has_attribute_ns (undef, 'scoped')) {
812     $not_allowed = 1;
813     }
814     } elsif ($content eq 'block') {
815     $has_non_style = 1;
816     $not_allowed = 1
817 wakaba 1.8 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln} or
818     $self->{pluses}->{$node_ns}->{$node_ln};
819 wakaba 1.1 } elsif ($content eq 'inline') {
820     $has_non_style = 1;
821     $not_allowed = 1
822     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
823 wakaba 1.8 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln} or
824     $self->{pluses}->{$node_ns}->{$node_ln};
825 wakaba 1.1 } else {
826 wakaba 1.8 $has_non_style = 1 unless $self->{pluses}->{$node_ns}->{$node_ln};
827 wakaba 1.1 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
828     my $is_inline
829     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
830     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
831    
832     push @block_not_inline, $node
833     if $is_block and not $is_inline and not $not_allowed;
834 wakaba 1.8 if (not $is_block and not $self->{pluses}->{$node_ns}->{$node_ln}) {
835 wakaba 1.1 $content = 'inline';
836     for (@block_not_inline) {
837     $self->{onerror}->(node => $_, type => 'element not allowed');
838     }
839     $not_allowed = 1 unless $is_inline;
840     }
841     }
842     $self->{onerror}->(node => $node, type => 'element not allowed')
843     if $not_allowed;
844     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
845     unshift @nodes, @$sib;
846     push @$new_todos, @$ch;
847     } elsif ($nt == 3 or $nt == 4) {
848     if ($node->data =~ /[^\x09-\x0D\x20]/) {
849     $has_non_style = 1;
850     if ($content eq 'block') {
851     $self->{onerror}->(node => $node, type => 'character not allowed');
852     } else {
853     $content = 'inline';
854     for (@block_not_inline) {
855     $self->{onerror}->(node => $_, type => 'element not allowed');
856     }
857     }
858 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
859 wakaba 1.1 }
860     } elsif ($nt == 5) {
861     unshift @nodes, @{$node->child_nodes};
862     }
863     }
864    
865     if ($content eq 'inline') {
866     for (@$new_todos) {
867     $_->{inline} = 1;
868     }
869     }
870 wakaba 1.25
871     push @$new_todos, {
872     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
873     old_values => $old_values,
874 wakaba 1.26 errors => $HTMLSignificantContentErrors,
875 wakaba 1.25 };
876    
877 wakaba 1.1 return ($new_todos);
878     };
879     }; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker
880    
881 wakaba 1.29 my $HTMLTransparentChecker = $HTMLProseContentChecker;
882 wakaba 1.25 ## ISSUE: Significant content rule should be applied to transparent element
883     ## with parent? Currently, applied to |video| but not to others.
884 wakaba 1.1
885     our $AttrChecker;
886    
887     my $GetHTMLEnumeratedAttrChecker = sub {
888     my $states = shift; # {value => conforming ? 1 : -1}
889     return sub {
890     my ($self, $attr) = @_;
891     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
892     if ($states->{$value} > 0) {
893     #
894     } elsif ($states->{$value}) {
895     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
896     } else {
897     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
898     }
899     };
900     }; # $GetHTMLEnumeratedAttrChecker
901    
902     my $GetHTMLBooleanAttrChecker = sub {
903     my $local_name = shift;
904     return sub {
905     my ($self, $attr) = @_;
906     my $value = $attr->value;
907     unless ($value eq $local_name or $value eq '') {
908     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
909     }
910     };
911     }; # $GetHTMLBooleanAttrChecker
912    
913 wakaba 1.8 ## Unordered set of space-separated tokens
914 wakaba 1.18 my $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
915 wakaba 1.8 my ($self, $attr) = @_;
916     my %word;
917     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
918     unless ($word{$word}) {
919     $word{$word} = 1;
920     } else {
921     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
922     }
923     }
924 wakaba 1.18 }; # $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
925 wakaba 1.8
926 wakaba 1.1 ## |rel| attribute (unordered set of space separated tokens,
927     ## whose allowed values are defined by the section on link types)
928     my $HTMLLinkTypesAttrChecker = sub {
929 wakaba 1.4 my ($a_or_area, $todo, $self, $attr) = @_;
930 wakaba 1.1 my %word;
931     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
932     unless ($word{$word}) {
933     $word{$word} = 1;
934 wakaba 1.18 } elsif ($word eq 'up') {
935     #
936 wakaba 1.1 } else {
937     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
938     }
939     }
940     ## NOTE: Case sensitive match (since HTML5 spec does not say link
941     ## types are case-insensitive and it says "The value should not
942     ## be confusingly similar to any other defined value (e.g.
943     ## differing only in case).").
944     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
945     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
946     ## values to be used conformingly.
947     require Whatpm::_LinkTypeList;
948     our $LinkType;
949     for my $word (keys %word) {
950     my $def = $LinkType->{$word};
951     if (defined $def) {
952     if ($def->{status} eq 'accepted') {
953     if (defined $def->{effect}->[$a_or_area]) {
954     #
955     } else {
956     $self->{onerror}->(node => $attr,
957     type => 'link type:bad context:'.$word);
958     }
959     } elsif ($def->{status} eq 'proposal') {
960     $self->{onerror}->(node => $attr, level => 's',
961     type => 'link type:proposed:'.$word);
962 wakaba 1.20 if (defined $def->{effect}->[$a_or_area]) {
963     #
964     } else {
965     $self->{onerror}->(node => $attr,
966     type => 'link type:bad context:'.$word);
967     }
968 wakaba 1.1 } else { # rejected or synonym
969     $self->{onerror}->(node => $attr,
970     type => 'link type:non-conforming:'.$word);
971     }
972 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
973     if ($word eq 'alternate') {
974     #
975     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
976     $todo->{has_hyperlink_link_type} = 1;
977     }
978     }
979 wakaba 1.1 if ($def->{unique}) {
980     unless ($self->{has_link_type}->{$word}) {
981     $self->{has_link_type}->{$word} = 1;
982     } else {
983     $self->{onerror}->(node => $attr,
984     type => 'link type:duplicate:'.$word);
985     }
986     }
987     } else {
988     $self->{onerror}->(node => $attr, level => 'unsupported',
989     type => 'link type:'.$word);
990     }
991     }
992 wakaba 1.4 $todo->{has_hyperlink_link_type} = 1
993     if $word{alternate} and not $word{stylesheet};
994 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
995     ## says that using both X-Pingback: header field and HTML
996     ## <link rel=pingback> is deprecated and if both appears they
997     ## SHOULD contain exactly the same value.
998     ## ISSUE: Pingback 1.0 specification defines the exact representation
999     ## of its link element, which cannot be tested by the current arch.
1000     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
1001     ## include any string that matches to the pattern for the rel=pingback link,
1002     ## which again inpossible to test.
1003     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
1004 wakaba 1.12
1005     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
1006 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
1007     ## then they SHOULD be described in different paragraphs.".
1008 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
1009 wakaba 1.20
1010     ## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status."
1011 wakaba 1.1
1012     ## URI (or IRI)
1013     my $HTMLURIAttrChecker = sub {
1014     my ($self, $attr) = @_;
1015     ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
1016     my $value = $attr->value;
1017     Whatpm::URIChecker->check_iri_reference ($value, sub {
1018     my %opt = @_;
1019     $self->{onerror}->(node => $attr, level => $opt{level},
1020     type => 'URI::'.$opt{type}.
1021     (defined $opt{position} ? ':'.$opt{position} : ''));
1022     });
1023 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
1024 wakaba 1.1 }; # $HTMLURIAttrChecker
1025    
1026     ## A space separated list of one or more URIs (or IRIs)
1027     my $HTMLSpaceURIsAttrChecker = sub {
1028     my ($self, $attr) = @_;
1029     my $i = 0;
1030     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
1031     Whatpm::URIChecker->check_iri_reference ($value, sub {
1032     my %opt = @_;
1033     $self->{onerror}->(node => $attr, level => $opt{level},
1034 wakaba 1.2 type => 'URIs:'.':'.
1035     $opt{type}.':'.$i.
1036 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
1037     });
1038     $i++;
1039     }
1040     ## ISSUE: Relative references?
1041     ## ISSUE: Leading or trailing white spaces are conformant?
1042     ## ISSUE: A sequence of white space characters are conformant?
1043     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
1044     ## NOTE: Duplication seems not an error.
1045 wakaba 1.4 $self->{has_uri_attr} = 1;
1046 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
1047    
1048     my $HTMLDatetimeAttrChecker = sub {
1049     my ($self, $attr) = @_;
1050     my $value = $attr->value;
1051     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
1052     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
1053     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
1054     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
1055     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
1056     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
1057     if $d < 1 or
1058     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
1059     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
1060     if $M == 2 and $d == 29 and
1061     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
1062     } else {
1063     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
1064     }
1065     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
1066     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
1067     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
1068     if defined $s and $s > 59;
1069     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
1070     if $zh > 23;
1071     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
1072     if $zm > 59;
1073     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
1074     } else {
1075     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
1076     }
1077     }; # $HTMLDatetimeAttrChecker
1078    
1079     my $HTMLIntegerAttrChecker = sub {
1080     my ($self, $attr) = @_;
1081     my $value = $attr->value;
1082     unless ($value =~ /\A-?[0-9]+\z/) {
1083     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
1084     }
1085     }; # $HTMLIntegerAttrChecker
1086    
1087     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
1088     my $range_check = shift;
1089     return sub {
1090     my ($self, $attr) = @_;
1091     my $value = $attr->value;
1092     if ($value =~ /\A[0-9]+\z/) {
1093     unless ($range_check->($value + 0)) {
1094     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
1095     }
1096     } else {
1097     $self->{onerror}->(node => $attr,
1098     type => 'nninteger:syntax error');
1099     }
1100     };
1101     }; # $GetHTMLNonNegativeIntegerAttrChecker
1102    
1103     my $GetHTMLFloatingPointNumberAttrChecker = sub {
1104     my $range_check = shift;
1105     return sub {
1106     my ($self, $attr) = @_;
1107     my $value = $attr->value;
1108     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
1109     unless ($range_check->($value + 0)) {
1110     $self->{onerror}->(node => $attr, type => 'float:out of range');
1111     }
1112     } else {
1113     $self->{onerror}->(node => $attr,
1114     type => 'float:syntax error');
1115     }
1116     };
1117     }; # $GetHTMLFloatingPointNumberAttrChecker
1118    
1119     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
1120     ## ISSUE: RFC 2046 does not define syntax of media types.
1121     ## ISSUE: The definition of "a valid MIME type" is unknown.
1122     ## Syntactical correctness?
1123     my $HTMLIMTAttrChecker = sub {
1124     my ($self, $attr) = @_;
1125     my $value = $attr->value;
1126     ## ISSUE: RFC 2045 Content-Type header field allows insertion
1127     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
1128     ## ISSUE: RFC 2231 extension? Maybe no.
1129     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
1130     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
1131     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
1132     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
1133     my @type = ($1, $2);
1134     my $param = $3;
1135     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
1136     if (defined $2) {
1137     push @type, $1 => $2;
1138     } else {
1139     my $n = $1;
1140     my $v = $2;
1141     $v =~ s/\\(.)/$1/gs;
1142     push @type, $n => $v;
1143     }
1144     }
1145     require Whatpm::IMTChecker;
1146     Whatpm::IMTChecker->check_imt (sub {
1147     my %opt = @_;
1148     $self->{onerror}->(node => $attr, level => $opt{level},
1149     type => 'IMT:'.$opt{type});
1150     }, @type);
1151     } else {
1152     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
1153     }
1154     }; # $HTMLIMTAttrChecker
1155    
1156     my $HTMLLanguageTagAttrChecker = sub {
1157 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
1158    
1159 wakaba 1.1 my ($self, $attr) = @_;
1160 wakaba 1.6 my $value = $attr->value;
1161     require Whatpm::LangTag;
1162     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
1163     my %opt = @_;
1164     my $type = 'LangTag:'.$opt{type};
1165     $type .= ':' . $opt{subtag} if defined $opt{subtag};
1166     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
1167     level => $opt{level});
1168     });
1169 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
1170 wakaba 1.6
1171     ## TODO: testdata
1172 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
1173    
1174     ## "A valid media query [MQ]"
1175     my $HTMLMQAttrChecker = sub {
1176     my ($self, $attr) = @_;
1177     $self->{onerror}->(node => $attr, level => 'unsupported',
1178     type => 'media query');
1179     ## ISSUE: What is "a valid media query"?
1180     }; # $HTMLMQAttrChecker
1181    
1182     my $HTMLEventHandlerAttrChecker = sub {
1183     my ($self, $attr) = @_;
1184     $self->{onerror}->(node => $attr, level => 'unsupported',
1185     type => 'event handler');
1186     ## TODO: MUST contain valid ECMAScript code matching the
1187     ## ECMAScript |FunctionBody| production. [ECMA262]
1188     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
1189     ## ISSUE: Automatic semicolon insertion does not apply?
1190     ## ISSUE: Other script languages?
1191     }; # $HTMLEventHandlerAttrChecker
1192    
1193     my $HTMLUsemapAttrChecker = sub {
1194     my ($self, $attr) = @_;
1195     ## MUST be a valid hashed ID reference to a |map| element
1196     my $value = $attr->value;
1197     if ($value =~ s/^#//) {
1198     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
1199     push @{$self->{usemap}}, [$value => $attr];
1200     } else {
1201     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
1202     }
1203     ## NOTE: Space characters in hashed ID references are conforming.
1204     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
1205     }; # $HTMLUsemapAttrChecker
1206    
1207     my $HTMLTargetAttrChecker = sub {
1208     my ($self, $attr) = @_;
1209     my $value = $attr->value;
1210     if ($value =~ /^_/) {
1211     $value = lc $value; ## ISSUE: ASCII case-insentitive?
1212     unless ({
1213     _self => 1, _parent => 1, _top => 1,
1214     }->{$value}) {
1215     $self->{onerror}->(node => $attr,
1216     type => 'reserved browsing context name');
1217     }
1218     } else {
1219 wakaba 1.29 ## NOTE: An empty string is a valid browsing context name (same as _self).
1220 wakaba 1.1 }
1221     }; # $HTMLTargetAttrChecker
1222    
1223 wakaba 1.23 my $HTMLSelectorsAttrChecker = sub {
1224     my ($self, $attr) = @_;
1225    
1226     ## ISSUE: Namespace resolution?
1227    
1228     my $value = $attr->value;
1229    
1230     require Whatpm::CSS::SelectorsParser;
1231     my $p = Whatpm::CSS::SelectorsParser->new;
1232     $p->{pseudo_class}->{$_} = 1 for qw/
1233     active checked disabled empty enabled first-child first-of-type
1234     focus hover indeterminate last-child last-of-type link only-child
1235     only-of-type root target visited
1236     lang nth-child nth-last-child nth-of-type nth-last-of-type not
1237     -manakai-contains -manakai-current
1238     /;
1239    
1240     $p->{pseudo_element}->{$_} = 1 for qw/
1241     after before first-letter first-line
1242     /;
1243    
1244     $p->{must_level} = $self->{must_level};
1245     $p->{onerror} = sub {
1246     my %opt = @_;
1247     $opt{type} = 'selectors:'.$opt{type};
1248     $self->{onerror}->(%opt, node => $attr);
1249     };
1250     $p->parse_string ($value);
1251     }; # $HTMLSelectorsAttrChecker
1252    
1253 wakaba 1.1 my $HTMLAttrChecker = {
1254     id => sub {
1255     ## NOTE: |map| has its own variant of |id=""| checker
1256     my ($self, $attr) = @_;
1257     my $value = $attr->value;
1258     if (length $value > 0) {
1259     if ($self->{id}->{$value}) {
1260     $self->{onerror}->(node => $attr, type => 'duplicate ID');
1261     push @{$self->{id}->{$value}}, $attr;
1262     } else {
1263     $self->{id}->{$value} = [$attr];
1264     }
1265     if ($value =~ /[\x09-\x0D\x20]/) {
1266     $self->{onerror}->(node => $attr, type => 'space in ID');
1267     }
1268     } else {
1269     ## NOTE: MUST contain at least one character
1270     $self->{onerror}->(node => $attr, type => 'empty attribute value');
1271     }
1272     },
1273     title => sub {}, ## NOTE: No conformance creteria
1274     lang => sub {
1275     my ($self, $attr) = @_;
1276 wakaba 1.6 my $value = $attr->value;
1277     if ($value eq '') {
1278     #
1279     } else {
1280     require Whatpm::LangTag;
1281     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
1282     my %opt = @_;
1283     my $type = 'LangTag:'.$opt{type};
1284     $type .= ':' . $opt{subtag} if defined $opt{subtag};
1285     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
1286     level => $opt{level});
1287     });
1288     }
1289 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
1290     unless ($attr->owner_document->manakai_is_html) {
1291     $self->{onerror}->(node => $attr, type => 'in XML:lang');
1292     }
1293 wakaba 1.6
1294     ## TODO: test data
1295 wakaba 1.1 },
1296     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
1297     class => sub {
1298     my ($self, $attr) = @_;
1299     my %word;
1300     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
1301     unless ($word{$word}) {
1302     $word{$word} = 1;
1303     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
1304     } else {
1305     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
1306     }
1307     }
1308     },
1309     contextmenu => sub {
1310     my ($self, $attr) = @_;
1311     my $value = $attr->value;
1312     push @{$self->{contextmenu}}, [$value => $attr];
1313     ## ISSUE: "The value must be the ID of a menu element in the DOM."
1314     ## What is "in the DOM"? A menu Element node that is not part
1315     ## of the Document tree is in the DOM? A menu Element node that
1316     ## belong to another Document tree is in the DOM?
1317     },
1318     irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
1319 wakaba 1.8 tabindex => $HTMLIntegerAttrChecker
1320     ## TODO: ref, template, registrationmark
1321 wakaba 1.1 };
1322    
1323     for (qw/
1324     onabort onbeforeunload onblur onchange onclick oncontextmenu
1325     ondblclick ondrag ondragend ondragenter ondragleave ondragover
1326     ondragstart ondrop onerror onfocus onkeydown onkeypress
1327     onkeyup onload onmessage onmousedown onmousemove onmouseout
1328     onmouseover onmouseup onmousewheel onresize onscroll onselect
1329     onsubmit onunload
1330     /) {
1331     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
1332     }
1333    
1334     my $GetHTMLAttrsChecker = sub {
1335     my $element_specific_checker = shift;
1336     return sub {
1337     my ($self, $todo) = @_;
1338     for my $attr (@{$todo->{node}->attributes}) {
1339     my $attr_ns = $attr->namespace_uri;
1340     $attr_ns = '' unless defined $attr_ns;
1341     my $attr_ln = $attr->manakai_local_name;
1342     my $checker;
1343     if ($attr_ns eq '') {
1344     $checker = $element_specific_checker->{$attr_ln}
1345     || $HTMLAttrChecker->{$attr_ln};
1346     }
1347     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1348     || $AttrChecker->{$attr_ns}->{''};
1349     if ($checker) {
1350     $checker->($self, $attr, $todo);
1351     } else {
1352     $self->{onerror}->(node => $attr, level => 'unsupported',
1353     type => 'attribute');
1354     ## ISSUE: No comformance createria for unknown attributes in the spec
1355     }
1356     }
1357     };
1358     }; # $GetHTMLAttrsChecker
1359    
1360     our $Element;
1361     our $ElementDefault;
1362    
1363     $Element->{$HTML_NS}->{''} = {
1364     attrs_checker => $GetHTMLAttrsChecker->({}),
1365     checker => $ElementDefault->{checker},
1366     };
1367    
1368     $Element->{$HTML_NS}->{html} = {
1369     is_root => 1,
1370     attrs_checker => $GetHTMLAttrsChecker->({
1371 wakaba 1.16 manifest => $HTMLURIAttrChecker,
1372 wakaba 1.1 xmlns => sub {
1373     my ($self, $attr) = @_;
1374     my $value = $attr->value;
1375     unless ($value eq $HTML_NS) {
1376     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
1377     }
1378     unless ($attr->owner_document->manakai_is_html) {
1379     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
1380     ## TODO: Test
1381     }
1382     },
1383     }),
1384     checker => sub {
1385     my ($self, $todo) = @_;
1386     my $el = $todo->{node};
1387     my $new_todos = [];
1388     my @nodes = (@{$el->child_nodes});
1389    
1390 wakaba 1.25 my $old_values = {significant =>
1391     $todo->{flag}->{has_descendant}->{significant}};
1392     $todo->{flag}->{has_descendant}->{significant} = 0;
1393    
1394 wakaba 1.1 my $phase = 'before head';
1395     while (@nodes) {
1396     my $node = shift @nodes;
1397     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1398    
1399     my $nt = $node->node_type;
1400     if ($nt == 1) {
1401     my $node_ns = $node->namespace_uri;
1402     $node_ns = '' unless defined $node_ns;
1403     my $node_ln = $node->manakai_local_name;
1404     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
1405 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1406     #
1407     } elsif ($phase eq 'before head') {
1408 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'head') {
1409     $phase = 'after head';
1410     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'body') {
1411     $self->{onerror}->(node => $node, type => 'ps element missing:head');
1412     $phase = 'after body';
1413     } else {
1414     $not_allowed = 1;
1415     # before head
1416     }
1417     } elsif ($phase eq 'after head') {
1418     if ($node_ns eq $HTML_NS and $node_ln eq 'body') {
1419     $phase = 'after body';
1420     } else {
1421     $not_allowed = 1;
1422     # after head
1423     }
1424     } else { #elsif ($phase eq 'after body') {
1425     $not_allowed = 1;
1426     # after body
1427     }
1428     $self->{onerror}->(node => $node, type => 'element not allowed')
1429     if $not_allowed;
1430     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1431     unshift @nodes, @$sib;
1432     push @$new_todos, @$ch;
1433     } elsif ($nt == 3 or $nt == 4) {
1434     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1435     $self->{onerror}->(node => $node, type => 'character not allowed');
1436 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
1437 wakaba 1.1 }
1438     } elsif ($nt == 5) {
1439     unshift @nodes, @{$node->child_nodes};
1440     }
1441     }
1442    
1443     if ($phase eq 'before head') {
1444     $self->{onerror}->(node => $el, type => 'child element missing:head');
1445     $self->{onerror}->(node => $el, type => 'child element missing:body');
1446     } elsif ($phase eq 'after head') {
1447     $self->{onerror}->(node => $el, type => 'child element missing:body');
1448     }
1449    
1450 wakaba 1.25 ## NOTE: Significant content check - this is performed here since
1451     ## |html| content model allows a block-level element - |body|.
1452     push @$new_todos, {
1453     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
1454     old_values => $old_values,
1455 wakaba 1.26 errors => $HTMLSignificantContentErrors,
1456 wakaba 1.25 };
1457    
1458 wakaba 1.1 return ($new_todos);
1459     },
1460     };
1461    
1462     $Element->{$HTML_NS}->{head} = {
1463     attrs_checker => $GetHTMLAttrsChecker->({}),
1464     checker => sub {
1465     my ($self, $todo) = @_;
1466     my $el = $todo->{node};
1467     my $new_todos = [];
1468     my @nodes = (@{$el->child_nodes});
1469    
1470     my $has_title;
1471     while (@nodes) {
1472     my $node = shift @nodes;
1473     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1474    
1475     my $nt = $node->node_type;
1476     if ($nt == 1) {
1477     my $node_ns = $node->namespace_uri;
1478     $node_ns = '' unless defined $node_ns;
1479     my $node_ln = $node->manakai_local_name;
1480 wakaba 1.30 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
1481     $self->{onerror}->(node => $node,
1482     type => 'element not allowed:minus',
1483     level => $self->{must_level});
1484     } elsif ($self->{pluses}->{$node_ns}->{$node_ln}) {
1485 wakaba 1.8 #
1486     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'title') {
1487 wakaba 1.1 unless ($has_title) {
1488     $has_title = 1;
1489     } else {
1490 wakaba 1.30 $self->{onerror}->(node => $node,
1491     type => 'element not allowed:head title',
1492     level => $self->{must_level});
1493     }
1494     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'style') {
1495     if ($todo->{node}->has_attribute_ns (undef, 'scoped')) {
1496     $self->{onerror}->(node => $node,
1497     type => 'element not allowed:head style',
1498     level => $self->{must_level});
1499 wakaba 1.1 }
1500 wakaba 1.29 } elsif ($HTMLMetadataContent->{$node_ns}->{$node_ln}) {
1501     #
1502    
1503     ## NOTE: |meta| is a metadata content. However, strictly speaking,
1504     ## a |meta| element with none of |charset|, |name|,
1505     ## or |http-equiv| attribute is not allowed. It is non-conforming
1506     ## anyway.
1507 wakaba 1.1 } else {
1508 wakaba 1.30 $self->{onerror}->(node => $node,
1509     type => 'element not allowed:metadata',
1510     level => $self->{must_level});
1511 wakaba 1.1 }
1512 wakaba 1.3 local $todo->{flag}->{in_head} = 1;
1513 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1514     unshift @nodes, @$sib;
1515     push @$new_todos, @$ch;
1516     } elsif ($nt == 3 or $nt == 4) {
1517     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1518     $self->{onerror}->(node => $node, type => 'character not allowed');
1519 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
1520 wakaba 1.1 }
1521     } elsif ($nt == 5) {
1522     unshift @nodes, @{$node->child_nodes};
1523     }
1524     }
1525     unless ($has_title) {
1526     $self->{onerror}->(node => $el, type => 'child element missing:title');
1527     }
1528     return ($new_todos);
1529     },
1530     };
1531    
1532     $Element->{$HTML_NS}->{title} = {
1533     attrs_checker => $GetHTMLAttrsChecker->({}),
1534     checker => $HTMLTextChecker,
1535     };
1536    
1537     $Element->{$HTML_NS}->{base} = {
1538 wakaba 1.4 attrs_checker => sub {
1539     my ($self, $todo) = @_;
1540    
1541 wakaba 1.29 if ($self->{has_base}) {
1542     $self->{onerror}->(node => $todo->{node},
1543     type => 'element not allowed:base');
1544     } else {
1545     $self->{has_base} = 1;
1546     }
1547    
1548 wakaba 1.14 my $has_href = $todo->{node}->has_attribute_ns (undef, 'href');
1549     my $has_target = $todo->{node}->has_attribute_ns (undef, 'target');
1550    
1551     if ($self->{has_uri_attr} and $has_href) {
1552 wakaba 1.4 ## ISSUE: Are these examples conforming?
1553     ## <head profile="a b c"><base href> (except for |profile|'s
1554     ## non-conformance)
1555     ## <title xml:base="relative"/><base href/> (maybe it should be)
1556     ## <unknown xmlns="relative"/><base href/> (assuming that
1557     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1558     ## <style>@import 'relative';</style><base href>
1559     ## <script>location.href = 'relative';</script><base href>
1560 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
1561     ## an exception.
1562 wakaba 1.4 $self->{onerror}->(node => $todo->{node},
1563     type => 'basehref after URI attribute');
1564     }
1565 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
1566 wakaba 1.4 ## ISSUE: Are these examples conforming?
1567     ## <head><title xlink:href=""/><base target="name"/></head>
1568     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1569     ## (assuming that |xbl:xbl| is allowed before |base|)
1570     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1571     ## <link href=""/><base target="name"/>
1572     ## <link rel=unknown href=""><base target=name>
1573     $self->{onerror}->(node => $todo->{node},
1574     type => 'basetarget after hyperlink');
1575     }
1576    
1577 wakaba 1.14 if (not $has_href and not $has_target) {
1578     $self->{onerror}->(node => $todo->{node},
1579     type => 'attribute missing:href|target');
1580     }
1581    
1582 wakaba 1.4 return $GetHTMLAttrsChecker->({
1583     href => $HTMLURIAttrChecker,
1584     target => $HTMLTargetAttrChecker,
1585     })->($self, $todo);
1586     },
1587 wakaba 1.1 checker => $HTMLEmptyChecker,
1588     };
1589    
1590     $Element->{$HTML_NS}->{link} = {
1591     attrs_checker => sub {
1592     my ($self, $todo) = @_;
1593     $GetHTMLAttrsChecker->({
1594     href => $HTMLURIAttrChecker,
1595 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(0, $todo, @_) },
1596 wakaba 1.1 media => $HTMLMQAttrChecker,
1597     hreflang => $HTMLLanguageTagAttrChecker,
1598     type => $HTMLIMTAttrChecker,
1599     ## NOTE: Though |title| has special semantics,
1600     ## syntactically same as the |title| as global attribute.
1601     })->($self, $todo);
1602 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
1603     $self->{has_hyperlink_element} = 1 if $todo->{has_hyperlink_link_type};
1604     } else {
1605 wakaba 1.1 $self->{onerror}->(node => $todo->{node},
1606     type => 'attribute missing:href');
1607     }
1608     unless ($todo->{node}->has_attribute_ns (undef, 'rel')) {
1609     $self->{onerror}->(node => $todo->{node},
1610     type => 'attribute missing:rel');
1611     }
1612     },
1613     checker => $HTMLEmptyChecker,
1614     };
1615    
1616     $Element->{$HTML_NS}->{meta} = {
1617     attrs_checker => sub {
1618     my ($self, $todo) = @_;
1619     my $name_attr;
1620     my $http_equiv_attr;
1621     my $charset_attr;
1622     my $content_attr;
1623     for my $attr (@{$todo->{node}->attributes}) {
1624     my $attr_ns = $attr->namespace_uri;
1625     $attr_ns = '' unless defined $attr_ns;
1626     my $attr_ln = $attr->manakai_local_name;
1627     my $checker;
1628     if ($attr_ns eq '') {
1629     if ($attr_ln eq 'content') {
1630     $content_attr = $attr;
1631     $checker = 1;
1632     } elsif ($attr_ln eq 'name') {
1633     $name_attr = $attr;
1634     $checker = 1;
1635     } elsif ($attr_ln eq 'http-equiv') {
1636     $http_equiv_attr = $attr;
1637     $checker = 1;
1638     } elsif ($attr_ln eq 'charset') {
1639     $charset_attr = $attr;
1640     $checker = 1;
1641     } else {
1642     $checker = $HTMLAttrChecker->{$attr_ln}
1643     || $AttrChecker->{$attr_ns}->{$attr_ln}
1644     || $AttrChecker->{$attr_ns}->{''};
1645     }
1646     } else {
1647     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1648     || $AttrChecker->{$attr_ns}->{''};
1649     }
1650     if ($checker) {
1651     $checker->($self, $attr) if ref $checker;
1652     } else {
1653     $self->{onerror}->(node => $attr, level => 'unsupported',
1654     type => 'attribute');
1655     ## ISSUE: No comformance createria for unknown attributes in the spec
1656     }
1657     }
1658    
1659     if (defined $name_attr) {
1660     if (defined $http_equiv_attr) {
1661     $self->{onerror}->(node => $http_equiv_attr,
1662     type => 'attribute not allowed');
1663     } elsif (defined $charset_attr) {
1664     $self->{onerror}->(node => $charset_attr,
1665     type => 'attribute not allowed');
1666     }
1667     my $metadata_name = $name_attr->value;
1668     my $metadata_value;
1669     if (defined $content_attr) {
1670     $metadata_value = $content_attr->value;
1671     } else {
1672     $self->{onerror}->(node => $todo->{node},
1673     type => 'attribute missing:content');
1674     $metadata_value = '';
1675     }
1676     } elsif (defined $http_equiv_attr) {
1677     if (defined $charset_attr) {
1678     $self->{onerror}->(node => $charset_attr,
1679     type => 'attribute not allowed');
1680     }
1681     unless (defined $content_attr) {
1682     $self->{onerror}->(node => $todo->{node},
1683     type => 'attribute missing:content');
1684     }
1685     } elsif (defined $charset_attr) {
1686     if (defined $content_attr) {
1687     $self->{onerror}->(node => $content_attr,
1688     type => 'attribute not allowed');
1689     }
1690     } else {
1691     if (defined $content_attr) {
1692     $self->{onerror}->(node => $content_attr,
1693     type => 'attribute not allowed');
1694     $self->{onerror}->(node => $todo->{node},
1695     type => 'attribute missing:name|http-equiv');
1696     } else {
1697     $self->{onerror}->(node => $todo->{node},
1698     type => 'attribute missing:name|http-equiv|charset');
1699     }
1700     }
1701    
1702     ## TODO: metadata conformance
1703    
1704     ## TODO: pragma conformance
1705     if (defined $http_equiv_attr) { ## An enumerated attribute
1706     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1707     if ({
1708     'refresh' => 1,
1709     'default-style' => 1,
1710     }->{$keyword}) {
1711     #
1712 wakaba 1.19 } elsif ($keyword eq 'content-type') {
1713     $self->{onerror}
1714     ->(node => $http_equiv_attr,
1715     type => 'enumerated:invalid:http-equiv:content-type');
1716 wakaba 1.1 } else {
1717     $self->{onerror}->(node => $http_equiv_attr,
1718     type => 'enumerated:invalid');
1719     }
1720     }
1721    
1722     if (defined $charset_attr) {
1723 wakaba 1.29 my $parent = $todo->{node}->manakai_parent_element;
1724     if ($parent and $parent eq $parent->owner_document->manakai_head) {
1725     for my $el (@{$parent->child_nodes}) {
1726     next unless $el->node_type == 1; # ELEMENT_NODE
1727     unless ($el eq $todo->{node}) {
1728     ## NOTE: Not the first child element.
1729     $self->{onerror}->(node => $todo->{node},
1730     type => 'element not allowed:meta charset');
1731     }
1732     last;
1733     ## NOTE: Entity references are not supported.
1734     }
1735     } else {
1736     $self->{onerror}->(node => $todo->{node},
1737     type => 'element not allowed:meta charset');
1738     }
1739    
1740 wakaba 1.1 unless ($todo->{node}->owner_document->manakai_is_html) {
1741     $self->{onerror}->(node => $charset_attr,
1742     type => 'in XML:charset');
1743     }
1744 wakaba 1.21
1745     my $charset_value = $charset_attr->value;
1746     ## NOTE: Though the case-sensitivility of |charset| attribute value
1747     ## is not explicitly spelled in the HTML5 spec, the Character Set
1748     ## registry of IANA, which is referenced from HTML5 spec, says that
1749     ## charset name is case-insensitive.
1750     $charset_value =~ tr/A-Z/a-z/; ## NOTE: ASCII Case-insensitive.
1751    
1752     require Message::Charset::Info;
1753     my $charset = $Message::Charset::Info::IANACharset->{$charset_value};
1754     my $ic = $todo->{node}->owner_document->input_encoding;
1755     if (defined $ic) {
1756     ## TODO: Test for this case
1757     my $ic_charset = $Message::Charset::Info::IANACharset->{$ic};
1758     if ($charset ne $ic_charset) {
1759     $self->{onerror}->(node => $charset_attr,
1760     type => 'mismatched charset name:'.$ic.
1761     ':'.$charset_value,
1762     level => 'm');
1763     }
1764     } else {
1765     ## NOTE: MUST, but not checkable, since the document is not originally
1766     ## in serialized form (or the parser does not preserve the input
1767     ## encoding information).
1768     $self->{onerror}->(node => $charset_attr,
1769     type => 'mismatched charset name::'.$charset_value,
1770     level => 'unsupported');
1771     }
1772    
1773     ## ISSUE: What is "valid character encoding name"? Syntactically valid?
1774     ## Syntactically valid and registered? What about x-charset names?
1775     unless (Message::Charset::Info::is_syntactically_valid_iana_charset_name
1776     ($charset_value)) {
1777     $self->{onerror}->(node => $charset_attr,
1778     type => 'charset:syntax error:'.$charset_value,
1779     level => 'm');
1780     }
1781    
1782     if ($charset) {
1783     ## ISSUE: What is "the preferred name for that encoding" (for a charset
1784     ## with no "preferred MIME name" label)?
1785     my $charset_status = $charset->{iana_names}->{$charset_value} || 0;
1786     if (($charset_status &
1787     Message::Charset::Info::PREFERRED_CHARSET_NAME ())
1788     != Message::Charset::Info::PREFERRED_CHARSET_NAME ()) {
1789     $self->{onerror}->(node => $charset_attr,
1790     type => 'charset:not preferred:'.
1791     $charset_value,
1792     level => 'm');
1793     }
1794     if (($charset_status &
1795     Message::Charset::Info::REGISTERED_CHARSET_NAME ())
1796     != Message::Charset::Info::REGISTERED_CHARSET_NAME ()) {
1797     if ($charset_value =~ /^x-/) {
1798     $self->{onerror}->(node => $charset_attr,
1799     type => 'charset:private:'.$charset_value,
1800     level => $self->{good_level});
1801     } else {
1802     $self->{onerror}->(node => $charset_attr,
1803     type => 'charset:not registered:'.
1804     $charset_value,
1805     level => $self->{good_level});
1806     }
1807     }
1808     } elsif ($charset_value =~ /^x-/) {
1809     $self->{onerror}->(node => $charset_attr,
1810     type => 'charset:private:'.$charset_value,
1811     level => $self->{good_level});
1812     } else {
1813     $self->{onerror}->(node => $charset_attr,
1814     type => 'charset:not registered:'.$charset_value,
1815     level => $self->{good_level});
1816     }
1817    
1818 wakaba 1.22 if ($charset_attr->get_user_data ('manakai_has_reference')) {
1819     $self->{onerror}->(node => $charset_attr,
1820     type => 'character reference in charset',
1821     level => $self->{must_level});
1822     }
1823 wakaba 1.1 }
1824     },
1825     checker => $HTMLEmptyChecker,
1826     };
1827    
1828     $Element->{$HTML_NS}->{style} = {
1829     attrs_checker => $GetHTMLAttrsChecker->({
1830     type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1831     media => $HTMLMQAttrChecker,
1832     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1833     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1834     ## not different
1835     }),
1836     checker => sub {
1837 wakaba 1.27 ## NOTE: |html:style| itself has no conformance creteria on content model.
1838 wakaba 1.1 my ($self, $todo) = @_;
1839     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
1840 wakaba 1.27 if (not defined $type or
1841     $type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*[Tt][Ee][Xx][Tt](?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*[Cc][Ss][Ss](?>(?>\x0D\x0A)?[\x09\x20])*\z]) {
1842     my $el = $todo->{node};
1843     my $new_todos = [];
1844     my @nodes = (@{$el->child_nodes});
1845    
1846     my $ss_text = '';
1847     while (@nodes) {
1848     my $node = shift @nodes;
1849     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1850    
1851     my $nt = $node->node_type;
1852     if ($nt == 1) {
1853 wakaba 1.29 my $node_ns = $node->namespace_uri;
1854     $node_ns = '' unless defined $node_ns;
1855     my $node_ln = $node->manakai_local_name;
1856     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
1857     #
1858     } else {
1859     $self->{onerror}->(node => $node, type => 'element not allowed');
1860     }
1861 wakaba 1.27 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1862     unshift @nodes, @$sib;
1863     push @$new_todos, @$ch;
1864     } elsif ($nt == 3 or $nt == 4) {
1865     $ss_text .= $node->text_content;
1866     } elsif ($nt == 5) {
1867     unshift @nodes, @{$node->child_nodes};
1868     }
1869     }
1870    
1871 wakaba 1.28 $self->{onsubdoc}->({s => $ss_text, container_node => $el,
1872     media_type => 'text/css', is_char_string => 1});
1873 wakaba 1.27 return ($new_todos);
1874     } else {
1875     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
1876     type => 'style:'.$type); ## TODO: $type normalization
1877     return $AnyChecker->($self, $todo);
1878     }
1879 wakaba 1.1 },
1880     };
1881 wakaba 1.25 ## ISSUE: Relationship to significant content check?
1882 wakaba 1.1
1883     $Element->{$HTML_NS}->{body} = {
1884     attrs_checker => $GetHTMLAttrsChecker->({}),
1885 wakaba 1.29 checker => $HTMLProseContentChecker,
1886 wakaba 1.1 };
1887    
1888     $Element->{$HTML_NS}->{section} = {
1889     attrs_checker => $GetHTMLAttrsChecker->({}),
1890 wakaba 1.29 checker => $HTMLProseContentChecker,
1891 wakaba 1.1 };
1892    
1893     $Element->{$HTML_NS}->{nav} = {
1894     attrs_checker => $GetHTMLAttrsChecker->({}),
1895 wakaba 1.29 checker => $HTMLProseContentChecker,
1896 wakaba 1.1 };
1897    
1898     $Element->{$HTML_NS}->{article} = {
1899     attrs_checker => $GetHTMLAttrsChecker->({}),
1900 wakaba 1.29 checker => $HTMLProseContentChecker,
1901 wakaba 1.1 };
1902    
1903     $Element->{$HTML_NS}->{blockquote} = {
1904     attrs_checker => $GetHTMLAttrsChecker->({
1905     cite => $HTMLURIAttrChecker,
1906     }),
1907 wakaba 1.29 checker => $HTMLProseContentChecker,
1908 wakaba 1.1 };
1909    
1910     $Element->{$HTML_NS}->{aside} = {
1911     attrs_checker => $GetHTMLAttrsChecker->({}),
1912 wakaba 1.29 checker => $HTMLProseContentChecker,
1913 wakaba 1.1 };
1914    
1915     $Element->{$HTML_NS}->{h1} = {
1916     attrs_checker => $GetHTMLAttrsChecker->({}),
1917     checker => sub {
1918     my ($self, $todo) = @_;
1919 wakaba 1.24 $todo->{flag}->{has_descendant}->{hn} = 1;
1920 wakaba 1.29 return $HTMLPhrasingContentChecker->($self, $todo);
1921 wakaba 1.1 },
1922     };
1923    
1924     $Element->{$HTML_NS}->{h2} = {
1925     attrs_checker => $GetHTMLAttrsChecker->({}),
1926     checker => $Element->{$HTML_NS}->{h1}->{checker},
1927     };
1928    
1929     $Element->{$HTML_NS}->{h3} = {
1930     attrs_checker => $GetHTMLAttrsChecker->({}),
1931     checker => $Element->{$HTML_NS}->{h1}->{checker},
1932     };
1933    
1934     $Element->{$HTML_NS}->{h4} = {
1935     attrs_checker => $GetHTMLAttrsChecker->({}),
1936     checker => $Element->{$HTML_NS}->{h1}->{checker},
1937     };
1938    
1939     $Element->{$HTML_NS}->{h5} = {
1940     attrs_checker => $GetHTMLAttrsChecker->({}),
1941     checker => $Element->{$HTML_NS}->{h1}->{checker},
1942     };
1943    
1944     $Element->{$HTML_NS}->{h6} = {
1945     attrs_checker => $GetHTMLAttrsChecker->({}),
1946     checker => $Element->{$HTML_NS}->{h1}->{checker},
1947     };
1948    
1949 wakaba 1.29 ## TODO: Explicit sectioning is "encouraged".
1950    
1951 wakaba 1.1 $Element->{$HTML_NS}->{header} = {
1952     attrs_checker => $GetHTMLAttrsChecker->({}),
1953     checker => sub {
1954     my ($self, $todo) = @_;
1955 wakaba 1.24
1956     my $old_flags = {hn => $todo->{flag}->{has_descendant}->{hn}};
1957     $todo->{flag}->{has_descendant}->{hn} = 0;
1958 wakaba 1.1
1959     my $end = $self->_add_minuses
1960     ({$HTML_NS => {qw/header 1 footer 1/}},
1961 wakaba 1.29 $HTMLSectioningContent);
1962     my ($new_todos, $ch) = $HTMLProseContentChecker->($self, $todo);
1963 wakaba 1.24 push @$new_todos, $end,
1964     {type => 'descendant', node => $todo->{node},
1965     flag => $todo->{flag}, old_values => $old_flags,
1966     errors => {
1967     hn => sub {
1968     my ($self, $todo) = @_;
1969     $self->{onerror}->(node => $todo->{node},
1970     type => 'element missing:hn');
1971     },
1972 wakaba 1.1 }};
1973     return ($new_todos, $ch);
1974 wakaba 1.24
1975     ## ISSUE: <header><del><h1>...</h1></del></header> is conforming?
1976 wakaba 1.1 },
1977     };
1978    
1979     $Element->{$HTML_NS}->{footer} = {
1980     attrs_checker => $GetHTMLAttrsChecker->({}),
1981 wakaba 1.29 checker => sub {
1982 wakaba 1.1 my ($self, $todo) = @_;
1983 wakaba 1.25
1984 wakaba 1.29 my $old_flags = {hn => $todo->{flag}->{has_descendant}->{hn}};
1985     $todo->{flag}->{has_descendant}->{hn} = 0;
1986 wakaba 1.1
1987     my $end = $self->_add_minuses
1988 wakaba 1.29 ({$HTML_NS => {footer => 1}},
1989     $HTMLSectioningContent, $HTMLHeadingContent);
1990     my ($new_todos, $ch) = $HTMLProseContentChecker->($self, $todo);
1991 wakaba 1.1 push @$new_todos, $end;
1992    
1993 wakaba 1.29 return ($new_todos, $ch);
1994 wakaba 1.1 },
1995     };
1996    
1997     $Element->{$HTML_NS}->{address} = {
1998     attrs_checker => $GetHTMLAttrsChecker->({}),
1999 wakaba 1.29 checker => sub {
2000     my ($self, $todo) = @_;
2001    
2002     my $old_flags = {hn => $todo->{flag}->{has_descendant}->{hn}};
2003     $todo->{flag}->{has_descendant}->{hn} = 0;
2004    
2005     my $end = $self->_add_minuses
2006     ({$HTML_NS => {footer => 1, address => 1}},
2007     $HTMLSectioningContent, $HTMLHeadingContent);
2008     my ($new_todos, $ch) = $HTMLProseContentChecker->($self, $todo);
2009     push @$new_todos, $end;
2010    
2011     return ($new_todos, $ch);
2012     },
2013 wakaba 1.1 };
2014    
2015     $Element->{$HTML_NS}->{p} = {
2016     attrs_checker => $GetHTMLAttrsChecker->({}),
2017 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2018 wakaba 1.1 };
2019    
2020     $Element->{$HTML_NS}->{hr} = {
2021     attrs_checker => $GetHTMLAttrsChecker->({}),
2022     checker => $HTMLEmptyChecker,
2023     };
2024    
2025     $Element->{$HTML_NS}->{br} = {
2026     attrs_checker => $GetHTMLAttrsChecker->({}),
2027     checker => $HTMLEmptyChecker,
2028 wakaba 1.29 ## NOTE: Blank line MUST NOT be used for presentation purpose.
2029     ## (This requirement is semantic so that we cannot check.)
2030 wakaba 1.1 };
2031    
2032     $Element->{$HTML_NS}->{dialog} = {
2033     attrs_checker => $GetHTMLAttrsChecker->({}),
2034     checker => sub {
2035     my ($self, $todo) = @_;
2036     my $el = $todo->{node};
2037     my $new_todos = [];
2038     my @nodes = (@{$el->child_nodes});
2039    
2040     my $phase = 'before dt';
2041     while (@nodes) {
2042     my $node = shift @nodes;
2043     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2044    
2045     my $nt = $node->node_type;
2046     if ($nt == 1) {
2047     my $node_ns = $node->namespace_uri;
2048     $node_ns = '' unless defined $node_ns;
2049     my $node_ln = $node->manakai_local_name;
2050     ## NOTE: |minuses| list is not checked since redundant
2051 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2052     #
2053     } elsif ($phase eq 'before dt') {
2054 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
2055     $phase = 'before dd';
2056     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
2057     $self->{onerror}
2058     ->(node => $node, type => 'ps element missing:dt');
2059     $phase = 'before dt';
2060     } else {
2061     $self->{onerror}->(node => $node, type => 'element not allowed');
2062     }
2063     } else { # before dd
2064     if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
2065     $phase = 'before dt';
2066     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
2067     $self->{onerror}
2068     ->(node => $node, type => 'ps element missing:dd');
2069     $phase = 'before dd';
2070     } else {
2071     $self->{onerror}->(node => $node, type => 'element not allowed');
2072     }
2073     }
2074     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2075     unshift @nodes, @$sib;
2076     push @$new_todos, @$ch;
2077     } elsif ($nt == 3 or $nt == 4) {
2078     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2079     $self->{onerror}->(node => $node, type => 'character not allowed');
2080 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
2081 wakaba 1.1 }
2082     } elsif ($nt == 5) {
2083     unshift @nodes, @{$node->child_nodes};
2084     }
2085     }
2086     if ($phase eq 'before dd') {
2087 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
2088 wakaba 1.1 }
2089     return ($new_todos);
2090     },
2091     };
2092    
2093     $Element->{$HTML_NS}->{pre} = {
2094     attrs_checker => $GetHTMLAttrsChecker->({}),
2095 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2096 wakaba 1.1 };
2097    
2098     $Element->{$HTML_NS}->{ol} = {
2099     attrs_checker => $GetHTMLAttrsChecker->({
2100     start => $HTMLIntegerAttrChecker,
2101     }),
2102     checker => sub {
2103     my ($self, $todo) = @_;
2104     my $el = $todo->{node};
2105     my $new_todos = [];
2106     my @nodes = (@{$el->child_nodes});
2107    
2108     while (@nodes) {
2109     my $node = shift @nodes;
2110     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2111    
2112     my $nt = $node->node_type;
2113     if ($nt == 1) {
2114     my $node_ns = $node->namespace_uri;
2115     $node_ns = '' unless defined $node_ns;
2116     my $node_ln = $node->manakai_local_name;
2117     ## NOTE: |minuses| list is not checked since redundant
2118 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2119     #
2120     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'li')) {
2121 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
2122     }
2123     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2124     unshift @nodes, @$sib;
2125     push @$new_todos, @$ch;
2126     } elsif ($nt == 3 or $nt == 4) {
2127     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2128     $self->{onerror}->(node => $node, type => 'character not allowed');
2129 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
2130 wakaba 1.1 }
2131     } elsif ($nt == 5) {
2132     unshift @nodes, @{$node->child_nodes};
2133     }
2134     }
2135    
2136     if ($todo->{inline}) {
2137     for (@$new_todos) {
2138     $_->{inline} = 1;
2139     }
2140     }
2141     return ($new_todos);
2142     },
2143     };
2144    
2145     $Element->{$HTML_NS}->{ul} = {
2146     attrs_checker => $GetHTMLAttrsChecker->({}),
2147     checker => $Element->{$HTML_NS}->{ol}->{checker},
2148     };
2149    
2150     $Element->{$HTML_NS}->{li} = {
2151     attrs_checker => $GetHTMLAttrsChecker->({
2152     start => sub {
2153     my ($self, $attr) = @_;
2154     my $parent = $attr->owner_element->manakai_parent_element;
2155     if (defined $parent) {
2156     my $parent_ns = $parent->namespace_uri;
2157     $parent_ns = '' unless defined $parent_ns;
2158     my $parent_ln = $parent->manakai_local_name;
2159     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
2160     $self->{onerror}->(node => $attr, level => 'unsupported',
2161     type => 'attribute');
2162     }
2163     }
2164     $HTMLIntegerAttrChecker->($self, $attr);
2165     },
2166     }),
2167     checker => sub {
2168     my ($self, $todo) = @_;
2169 wakaba 1.29 if ($todo->{flag}->{in_menu}) {
2170     return $HTMLPhrasingContentChecker->($self, $todo);
2171 wakaba 1.1 } else {
2172 wakaba 1.29 return $HTMLProseContentChecker->($self, $todo);
2173 wakaba 1.1 }
2174     },
2175     };
2176    
2177     $Element->{$HTML_NS}->{dl} = {
2178     attrs_checker => $GetHTMLAttrsChecker->({}),
2179     checker => sub {
2180     my ($self, $todo) = @_;
2181     my $el = $todo->{node};
2182     my $new_todos = [];
2183     my @nodes = (@{$el->child_nodes});
2184    
2185     my $phase = 'before dt';
2186     while (@nodes) {
2187     my $node = shift @nodes;
2188     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2189    
2190     my $nt = $node->node_type;
2191     if ($nt == 1) {
2192     my $node_ns = $node->namespace_uri;
2193     $node_ns = '' unless defined $node_ns;
2194     my $node_ln = $node->manakai_local_name;
2195     ## NOTE: |minuses| list is not checked since redundant
2196 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
2197     #
2198     } elsif ($phase eq 'in dds') {
2199 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
2200     #$phase = 'in dds';
2201     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
2202     $phase = 'in dts';
2203     } else {
2204     $self->{onerror}->(node => $node, type => 'element not allowed');
2205     }
2206     } elsif ($phase eq 'in dts') {
2207     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
2208     #$phase = 'in dts';
2209     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
2210     $phase = 'in dds';
2211     } else {
2212     $self->{onerror}->(node => $node, type => 'element not allowed');
2213     }
2214     } else { # before dt
2215     if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
2216     $phase = 'in dts';
2217     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
2218     $self->{onerror}
2219     ->(node => $node, type => 'ps element missing:dt');
2220     $phase = 'in dds';
2221     } else {
2222     $self->{onerror}->(node => $node, type => 'element not allowed');
2223     }
2224     }
2225     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2226     unshift @nodes, @$sib;
2227     push @$new_todos, @$ch;
2228     } elsif ($nt == 3 or $nt == 4) {
2229     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2230     $self->{onerror}->(node => $node, type => 'character not allowed');
2231 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
2232 wakaba 1.1 }
2233     } elsif ($nt == 5) {
2234     unshift @nodes, @{$node->child_nodes};
2235     }
2236     }
2237     if ($phase eq 'in dts') {
2238 wakaba 1.8 $self->{onerror}->(node => $el, type => 'child element missing:dd');
2239 wakaba 1.1 }
2240    
2241     if ($todo->{inline}) {
2242     for (@$new_todos) {
2243     $_->{inline} = 1;
2244     }
2245     }
2246     return ($new_todos);
2247     },
2248     };
2249    
2250     $Element->{$HTML_NS}->{dt} = {
2251     attrs_checker => $GetHTMLAttrsChecker->({}),
2252 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2253 wakaba 1.1 };
2254    
2255     $Element->{$HTML_NS}->{dd} = {
2256     attrs_checker => $GetHTMLAttrsChecker->({}),
2257 wakaba 1.29 checker => $HTMLProseContentChecker,
2258 wakaba 1.1 };
2259    
2260     $Element->{$HTML_NS}->{a} = {
2261     attrs_checker => sub {
2262     my ($self, $todo) = @_;
2263     my %attr;
2264     for my $attr (@{$todo->{node}->attributes}) {
2265     my $attr_ns = $attr->namespace_uri;
2266     $attr_ns = '' unless defined $attr_ns;
2267     my $attr_ln = $attr->manakai_local_name;
2268     my $checker;
2269     if ($attr_ns eq '') {
2270     $checker = {
2271     target => $HTMLTargetAttrChecker,
2272     href => $HTMLURIAttrChecker,
2273     ping => $HTMLSpaceURIsAttrChecker,
2274 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
2275 wakaba 1.1 media => $HTMLMQAttrChecker,
2276     hreflang => $HTMLLanguageTagAttrChecker,
2277     type => $HTMLIMTAttrChecker,
2278     }->{$attr_ln};
2279     if ($checker) {
2280     $attr{$attr_ln} = $attr;
2281     } else {
2282     $checker = $HTMLAttrChecker->{$attr_ln};
2283     }
2284     }
2285     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2286     || $AttrChecker->{$attr_ns}->{''};
2287     if ($checker) {
2288     $checker->($self, $attr) if ref $checker;
2289     } else {
2290     $self->{onerror}->(node => $attr, level => 'unsupported',
2291     type => 'attribute');
2292     ## ISSUE: No comformance createria for unknown attributes in the spec
2293     }
2294     }
2295    
2296 wakaba 1.4 if (defined $attr{href}) {
2297     $self->{has_hyperlink_element} = 1;
2298     } else {
2299 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
2300     if (defined $attr{$_}) {
2301     $self->{onerror}->(node => $attr{$_},
2302     type => 'attribute not allowed');
2303     }
2304     }
2305     }
2306     },
2307     checker => sub {
2308     my ($self, $todo) = @_;
2309    
2310     my $end = $self->_add_minuses ($HTMLInteractiveElements);
2311 wakaba 1.29 my ($new_todos, $ch) = $HTMLPhrasingContentChecker->($self, $todo);
2312 wakaba 1.1 push @$new_todos, $end;
2313    
2314 wakaba 1.15 if ($todo->{node}->has_attribute_ns (undef, 'href')) {
2315     $_->{flag}->{in_a_href} = 1 for @$new_todos;
2316     }
2317 wakaba 1.1
2318     return ($new_todos, $ch);
2319     },
2320     };
2321    
2322     $Element->{$HTML_NS}->{q} = {
2323     attrs_checker => $GetHTMLAttrsChecker->({
2324     cite => $HTMLURIAttrChecker,
2325     }),
2326 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2327 wakaba 1.1 };
2328    
2329     $Element->{$HTML_NS}->{cite} = {
2330     attrs_checker => $GetHTMLAttrsChecker->({}),
2331 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2332 wakaba 1.1 };
2333    
2334     $Element->{$HTML_NS}->{em} = {
2335     attrs_checker => $GetHTMLAttrsChecker->({}),
2336 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2337 wakaba 1.1 };
2338    
2339     $Element->{$HTML_NS}->{strong} = {
2340     attrs_checker => $GetHTMLAttrsChecker->({}),
2341 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2342 wakaba 1.1 };
2343    
2344     $Element->{$HTML_NS}->{small} = {
2345     attrs_checker => $GetHTMLAttrsChecker->({}),
2346 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2347 wakaba 1.1 };
2348    
2349     $Element->{$HTML_NS}->{m} = {
2350     attrs_checker => $GetHTMLAttrsChecker->({}),
2351 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2352 wakaba 1.1 };
2353    
2354     $Element->{$HTML_NS}->{dfn} = {
2355     attrs_checker => $GetHTMLAttrsChecker->({}),
2356     checker => sub {
2357     my ($self, $todo) = @_;
2358    
2359     my $end = $self->_add_minuses ({$HTML_NS => {dfn => 1}});
2360 wakaba 1.29 my ($sib, $ch) = $HTMLPhrasingContentChecker->($self, $todo);
2361 wakaba 1.1 push @$sib, $end;
2362    
2363     my $node = $todo->{node};
2364     my $term = $node->get_attribute_ns (undef, 'title');
2365     unless (defined $term) {
2366     for my $child (@{$node->child_nodes}) {
2367     if ($child->node_type == 1) { # ELEMENT_NODE
2368     if (defined $term) {
2369     undef $term;
2370     last;
2371     } elsif ($child->manakai_local_name eq 'abbr') {
2372     my $nsuri = $child->namespace_uri;
2373     if (defined $nsuri and $nsuri eq $HTML_NS) {
2374     my $attr = $child->get_attribute_node_ns (undef, 'title');
2375     if ($attr) {
2376     $term = $attr->value;
2377     }
2378     }
2379     }
2380     } elsif ($child->node_type == 3 or $child->node_type == 4) {
2381     ## TEXT_NODE or CDATA_SECTION_NODE
2382     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
2383     next;
2384     }
2385     undef $term;
2386     last;
2387     }
2388     }
2389     unless (defined $term) {
2390     $term = $node->text_content;
2391     }
2392     }
2393     if ($self->{term}->{$term}) {
2394     $self->{onerror}->(node => $node, type => 'duplicate term');
2395     push @{$self->{term}->{$term}}, $node;
2396     } else {
2397     $self->{term}->{$term} = [$node];
2398     }
2399     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
2400     ## has |title|.
2401    
2402     return ($sib, $ch);
2403     },
2404     };
2405    
2406     $Element->{$HTML_NS}->{abbr} = {
2407     attrs_checker => $GetHTMLAttrsChecker->({
2408     ## NOTE: |title| has special semantics for |abbr|s, but is syntactically
2409     ## not different. The spec says that the |title| MAY be omitted
2410     ## if there is a |dfn| whose defining term is the abbreviation,
2411     ## but it does not prohibit |abbr| w/o |title| in other cases.
2412     }),
2413 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2414 wakaba 1.1 };
2415    
2416     $Element->{$HTML_NS}->{time} = {
2417     attrs_checker => $GetHTMLAttrsChecker->({
2418     datetime => sub { 1 }, # checked in |checker|
2419     }),
2420     ## TODO: Write tests
2421     checker => sub {
2422     my ($self, $todo) = @_;
2423    
2424     my $attr = $todo->{node}->get_attribute_node_ns (undef, 'datetime');
2425     my $input;
2426     my $reg_sp;
2427     my $input_node;
2428     if ($attr) {
2429     $input = $attr->value;
2430     $reg_sp = qr/[\x09-\x0D\x20]*/;
2431     $input_node = $attr;
2432     } else {
2433     $input = $todo->{node}->text_content;
2434     $reg_sp = qr/\p{Zs}*/;
2435     $input_node = $todo->{node};
2436    
2437     ## ISSUE: What is the definition for "successfully extracts a date
2438     ## or time"? If the algorithm says the string is invalid but
2439     ## return some date or time, is it "successfully"?
2440     }
2441    
2442     my $hour;
2443     my $minute;
2444     my $second;
2445     if ($input =~ /
2446     \A
2447     [\x09-\x0D\x20]*
2448     ([0-9]+) # 1
2449     (?>
2450     -([0-9]+) # 2
2451     -([0-9]+) # 3
2452     [\x09-\x0D\x20]*
2453     (?>
2454     T
2455     [\x09-\x0D\x20]*
2456     )?
2457     ([0-9]+) # 4
2458     :([0-9]+) # 5
2459     (?>
2460     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
2461     )?
2462     [\x09-\x0D\x20]*
2463     (?>
2464     Z
2465     [\x09-\x0D\x20]*
2466     |
2467     [+-]([0-9]+):([0-9]+) # 7, 8
2468     [\x09-\x0D\x20]*
2469     )?
2470     \z
2471     |
2472     :([0-9]+) # 9
2473     (?>
2474     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
2475     )?
2476     [\x09-\x0D\x20]*\z
2477     )
2478     /x) {
2479     if (defined $2) { ## YYYY-MM-DD T? hh:mm
2480     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
2481     length $4 != 2 or length $5 != 2) {
2482     $self->{onerror}->(node => $input_node,
2483     type => 'dateortime:syntax error');
2484     }
2485    
2486     if (1 <= $2 and $2 <= 12) {
2487     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
2488     if $3 < 1 or
2489     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
2490     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
2491     if $2 == 2 and $3 == 29 and
2492     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
2493     } else {
2494     $self->{onerror}->(node => $input_node,
2495     type => 'datetime:bad month');
2496     }
2497    
2498     ($hour, $minute, $second) = ($4, $5, $6);
2499    
2500     if (defined $7) { ## [+-]hh:mm
2501     if (length $7 != 2 or length $8 != 2) {
2502     $self->{onerror}->(node => $input_node,
2503     type => 'dateortime:syntax error');
2504     }
2505    
2506     $self->{onerror}->(node => $input_node,
2507     type => 'datetime:bad timezone hour')
2508     if $7 > 23;
2509     $self->{onerror}->(node => $input_node,
2510     type => 'datetime:bad timezone minute')
2511     if $8 > 59;
2512     }
2513     } else { ## hh:mm
2514     if (length $1 != 2 or length $9 != 2) {
2515     $self->{onerror}->(node => $input_node,
2516     type => qq'dateortime:syntax error');
2517     }
2518    
2519     ($hour, $minute, $second) = ($1, $9, $10);
2520     }
2521    
2522     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
2523     if $hour > 23;
2524     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
2525     if $minute > 59;
2526    
2527     if (defined $second) { ## s
2528     ## NOTE: Integer part of second don't have to have length of two.
2529    
2530     if (substr ($second, 0, 1) eq '.') {
2531     $self->{onerror}->(node => $input_node,
2532     type => 'dateortime:syntax error');
2533     }
2534    
2535     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
2536     if $second >= 60;
2537     }
2538     } else {
2539     $self->{onerror}->(node => $input_node,
2540     type => 'dateortime:syntax error');
2541     }
2542    
2543 wakaba 1.29 return $HTMLPhrasingContentChecker->($self, $todo);
2544 wakaba 1.1 },
2545     };
2546    
2547     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
2548     attrs_checker => $GetHTMLAttrsChecker->({
2549     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2550     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2551     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2552     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2553     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2554     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2555     }),
2556 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2557 wakaba 1.1 };
2558    
2559     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
2560     attrs_checker => $GetHTMLAttrsChecker->({
2561     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
2562     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
2563     }),
2564 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2565 wakaba 1.1 };
2566    
2567     $Element->{$HTML_NS}->{code} = {
2568     attrs_checker => $GetHTMLAttrsChecker->({}),
2569     ## NOTE: Though |title| has special semantics,
2570     ## syntatically same as the |title| as global attribute.
2571 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2572 wakaba 1.1 };
2573    
2574     $Element->{$HTML_NS}->{var} = {
2575     attrs_checker => $GetHTMLAttrsChecker->({}),
2576     ## NOTE: Though |title| has special semantics,
2577     ## syntatically same as the |title| as global attribute.
2578 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2579 wakaba 1.1 };
2580    
2581     $Element->{$HTML_NS}->{samp} = {
2582     attrs_checker => $GetHTMLAttrsChecker->({}),
2583     ## NOTE: Though |title| has special semantics,
2584     ## syntatically same as the |title| as global attribute.
2585 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2586 wakaba 1.1 };
2587    
2588     $Element->{$HTML_NS}->{kbd} = {
2589     attrs_checker => $GetHTMLAttrsChecker->({}),
2590 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2591 wakaba 1.1 };
2592    
2593     $Element->{$HTML_NS}->{sub} = {
2594     attrs_checker => $GetHTMLAttrsChecker->({}),
2595 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2596 wakaba 1.1 };
2597    
2598     $Element->{$HTML_NS}->{sup} = {
2599     attrs_checker => $GetHTMLAttrsChecker->({}),
2600 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2601 wakaba 1.1 };
2602    
2603     $Element->{$HTML_NS}->{span} = {
2604     attrs_checker => $GetHTMLAttrsChecker->({}),
2605     ## NOTE: Though |title| has special semantics,
2606     ## syntatically same as the |title| as global attribute.
2607 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2608 wakaba 1.1 };
2609    
2610     $Element->{$HTML_NS}->{i} = {
2611     attrs_checker => $GetHTMLAttrsChecker->({}),
2612     ## NOTE: Though |title| has special semantics,
2613     ## syntatically same as the |title| as global attribute.
2614 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2615 wakaba 1.1 };
2616    
2617     $Element->{$HTML_NS}->{b} = {
2618     attrs_checker => $GetHTMLAttrsChecker->({}),
2619 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2620 wakaba 1.1 };
2621    
2622     $Element->{$HTML_NS}->{bdo} = {
2623     attrs_checker => sub {
2624     my ($self, $todo) = @_;
2625     $GetHTMLAttrsChecker->({})->($self, $todo);
2626     unless ($todo->{node}->has_attribute_ns (undef, 'dir')) {
2627     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:dir');
2628     }
2629     },
2630     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
2631 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
2632 wakaba 1.1 };
2633    
2634 wakaba 1.29 =pod
2635    
2636     ## TODO:
2637    
2638     +
2639     + <p>Partly because of the confusion described above, authors are
2640     + strongly recommended to always mark up all paragraphs with the
2641     + <code>p</code> element, and to not have any <code>ins</code> or
2642     + <code>del</code> elements that cross across any <span
2643     + title="paragraph">implied paragraphs</span>.</p>
2644     +
2645     (An informative note)
2646    
2647     <p><code>ins</code> elements should not cross <span
2648     + title="paragraph">implied paragraph</span> boundaries.</p>
2649     (normative)
2650    
2651     + <p><code>del</code> elements should not cross <span
2652     + title="paragraph">implied paragraph</span> boundaries.</p>
2653     (normative)
2654    
2655     =cut
2656    
2657 wakaba 1.1 $Element->{$HTML_NS}->{ins} = {
2658     attrs_checker => $GetHTMLAttrsChecker->({
2659     cite => $HTMLURIAttrChecker,
2660     datetime => $HTMLDatetimeAttrChecker,
2661     }),
2662     checker => $HTMLTransparentChecker,
2663     };
2664    
2665     $Element->{$HTML_NS}->{del} = {
2666     attrs_checker => $GetHTMLAttrsChecker->({
2667     cite => $HTMLURIAttrChecker,
2668     datetime => $HTMLDatetimeAttrChecker,
2669     }),
2670     checker => sub {
2671     my ($self, $todo) = @_;
2672 wakaba 1.29 my $sig_flag = $todo->{flag}->{has_descendant}->{significant};
2673     my ($new_todos) = $HTMLTransparentChecker->($self, $todo);
2674     push @$new_todos, {type => 'code', code => sub {
2675     $todo->{flag}->{has_descendant}->{significant} = 0;
2676     }} if not $sig_flag;
2677     return $new_todos;
2678 wakaba 1.1 },
2679     };
2680    
2681     ## TODO: figure
2682 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
2683 wakaba 1.1
2684 wakaba 1.4 ## TODO: |alt|
2685 wakaba 1.1 $Element->{$HTML_NS}->{img} = {
2686     attrs_checker => sub {
2687     my ($self, $todo) = @_;
2688     $GetHTMLAttrsChecker->({
2689     alt => sub { }, ## NOTE: No syntactical requirement
2690     src => $HTMLURIAttrChecker,
2691     usemap => $HTMLUsemapAttrChecker,
2692     ismap => sub {
2693     my ($self, $attr, $parent_todo) = @_;
2694 wakaba 1.15 if (not $todo->{flag}->{in_a_href}) {
2695     $self->{onerror}->(node => $attr,
2696     type => 'attribute not allowed:ismap');
2697 wakaba 1.1 }
2698     $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_todo);
2699     },
2700     ## TODO: height
2701     ## TODO: width
2702     })->($self, $todo);
2703     unless ($todo->{node}->has_attribute_ns (undef, 'alt')) {
2704     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:alt');
2705     }
2706     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2707     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:src');
2708     }
2709     },
2710 wakaba 1.25 checker => sub {
2711     my ($self, $todo) = @_;
2712     $todo->{flag}->{has_descendant}->{significant} = 1;
2713     return $HTMLEmptyChecker->($self, $todo);
2714     },
2715 wakaba 1.1 };
2716    
2717     $Element->{$HTML_NS}->{iframe} = {
2718     attrs_checker => $GetHTMLAttrsChecker->({
2719     src => $HTMLURIAttrChecker,
2720     }),
2721 wakaba 1.25 checker => sub {
2722     my ($self, $todo) = @_;
2723     $todo->{flag}->{has_descendant}->{significant} = 1;
2724     return $HTMLTextChecker->($self, $todo);
2725     },
2726 wakaba 1.1 };
2727    
2728     $Element->{$HTML_NS}->{embed} = {
2729     attrs_checker => sub {
2730     my ($self, $todo) = @_;
2731     my $has_src;
2732     for my $attr (@{$todo->{node}->attributes}) {
2733     my $attr_ns = $attr->namespace_uri;
2734     $attr_ns = '' unless defined $attr_ns;
2735     my $attr_ln = $attr->manakai_local_name;
2736     my $checker;
2737     if ($attr_ns eq '') {
2738     if ($attr_ln eq 'src') {
2739     $checker = $HTMLURIAttrChecker;
2740     $has_src = 1;
2741     } elsif ($attr_ln eq 'type') {
2742     $checker = $HTMLIMTAttrChecker;
2743     } else {
2744     ## TODO: height
2745     ## TODO: width
2746     $checker = $HTMLAttrChecker->{$attr_ln}
2747     || sub { }; ## NOTE: Any local attribute is ok.
2748     }
2749     }
2750     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2751     || $AttrChecker->{$attr_ns}->{''};
2752     if ($checker) {
2753     $checker->($self, $attr);
2754     } else {
2755     $self->{onerror}->(node => $attr, level => 'unsupported',
2756     type => 'attribute');
2757     ## ISSUE: No comformance createria for global attributes in the spec
2758     }
2759     }
2760    
2761     unless ($has_src) {
2762     $self->{onerror}->(node => $todo->{node},
2763     type => 'attribute missing:src');
2764     }
2765     },
2766 wakaba 1.25 checker => sub {
2767     my ($self, $todo) = @_;
2768     $todo->{flag}->{has_descendant}->{significant} = 1;
2769     return $HTMLEmptyChecker->($self, $todo);
2770     },
2771 wakaba 1.1 };
2772    
2773     $Element->{$HTML_NS}->{object} = {
2774     attrs_checker => sub {
2775     my ($self, $todo) = @_;
2776     $GetHTMLAttrsChecker->({
2777     data => $HTMLURIAttrChecker,
2778     type => $HTMLIMTAttrChecker,
2779     usemap => $HTMLUsemapAttrChecker,
2780     ## TODO: width
2781     ## TODO: height
2782     })->($self, $todo);
2783     unless ($todo->{node}->has_attribute_ns (undef, 'data')) {
2784     unless ($todo->{node}->has_attribute_ns (undef, 'type')) {
2785     $self->{onerror}->(node => $todo->{node},
2786     type => 'attribute missing:data|type');
2787     }
2788     }
2789     },
2790 wakaba 1.29 ## NOTE: param*, then transparent.
2791 wakaba 1.25 checker => sub {
2792     my ($self, $todo) = @_;
2793     $todo->{flag}->{has_descendant}->{significant} = 1;
2794     return $ElementDefault->{checker}->($self, $todo); ## TODO
2795     },
2796 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
2797 wakaba 1.1 };
2798    
2799     $Element->{$HTML_NS}->{param} = {
2800     attrs_checker => sub {
2801     my ($self, $todo) = @_;
2802     $GetHTMLAttrsChecker->({
2803     name => sub { },
2804     value => sub { },
2805     })->($self, $todo);
2806     unless ($todo->{node}->has_attribute_ns (undef, 'name')) {
2807     $self->{onerror}->(node => $todo->{node},
2808     type => 'attribute missing:name');
2809     }
2810     unless ($todo->{node}->has_attribute_ns (undef, 'value')) {
2811     $self->{onerror}->(node => $todo->{node},
2812     type => 'attribute missing:value');
2813     }
2814     },
2815     checker => $HTMLEmptyChecker,
2816     };
2817    
2818     $Element->{$HTML_NS}->{video} = {
2819     attrs_checker => $GetHTMLAttrsChecker->({
2820     src => $HTMLURIAttrChecker,
2821     ## TODO: start, loopstart, loopend, end
2822     ## ISSUE: they MUST be "value time offset"s. Value?
2823 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
2824 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2825     controls => $GetHTMLBooleanAttrChecker->('controls'),
2826 wakaba 1.11 poster => $HTMLURIAttrChecker, ## TODO: not for audio!
2827     ## TODO: width, height (not for audio!)
2828 wakaba 1.1 }),
2829     checker => sub {
2830     my ($self, $todo) = @_;
2831 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
2832 wakaba 1.1
2833 wakaba 1.29 ## TODO:
2834 wakaba 1.1 if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2835     return $HTMLBlockOrInlineChecker->($self, $todo);
2836     } else {
2837     return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
2838     ->($self, $todo);
2839     }
2840     },
2841     };
2842    
2843     $Element->{$HTML_NS}->{audio} = {
2844     attrs_checker => $Element->{$HTML_NS}->{video}->{attrs_checker},
2845     checker => $Element->{$HTML_NS}->{video}->{checker},
2846     };
2847    
2848     $Element->{$HTML_NS}->{source} = {
2849     attrs_checker => sub {
2850     my ($self, $todo) = @_;
2851     $GetHTMLAttrsChecker->({
2852     src => $HTMLURIAttrChecker,
2853     type => $HTMLIMTAttrChecker,
2854     media => $HTMLMQAttrChecker,
2855     })->($self, $todo);
2856     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2857     $self->{onerror}->(node => $todo->{node},
2858     type => 'attribute missing:src');
2859     }
2860     },
2861     checker => $HTMLEmptyChecker,
2862     };
2863    
2864     $Element->{$HTML_NS}->{canvas} = {
2865     attrs_checker => $GetHTMLAttrsChecker->({
2866     height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2867     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2868     }),
2869 wakaba 1.25 checker => sub {
2870     my ($self, $todo) = @_;
2871     $todo->{flag}->{has_descendant}->{significant} = 1;
2872 wakaba 1.29 return $HTMLTransparentChecker->($self, $todo);
2873 wakaba 1.25 },
2874 wakaba 1.1 };
2875    
2876     $Element->{$HTML_NS}->{map} = {
2877 wakaba 1.4 attrs_checker => sub {
2878     my ($self, $todo) = @_;
2879     my $has_id;
2880     $GetHTMLAttrsChecker->({
2881     id => sub {
2882     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2883     my ($self, $attr) = @_;
2884     my $value = $attr->value;
2885     if (length $value > 0) {
2886     if ($self->{id}->{$value}) {
2887     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2888     push @{$self->{id}->{$value}}, $attr;
2889     } else {
2890     $self->{id}->{$value} = [$attr];
2891     }
2892 wakaba 1.1 } else {
2893 wakaba 1.4 ## NOTE: MUST contain at least one character
2894     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2895 wakaba 1.1 }
2896 wakaba 1.4 if ($value =~ /[\x09-\x0D\x20]/) {
2897     $self->{onerror}->(node => $attr, type => 'space in ID');
2898     }
2899     $self->{map}->{$value} ||= $attr;
2900     $has_id = 1;
2901     },
2902     })->($self, $todo);
2903     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:id')
2904     unless $has_id;
2905     },
2906 wakaba 1.29 checker => $HTMLProseContentChecker,
2907 wakaba 1.1 };
2908    
2909     $Element->{$HTML_NS}->{area} = {
2910     attrs_checker => sub {
2911     my ($self, $todo) = @_;
2912     my %attr;
2913     my $coords;
2914     for my $attr (@{$todo->{node}->attributes}) {
2915     my $attr_ns = $attr->namespace_uri;
2916     $attr_ns = '' unless defined $attr_ns;
2917     my $attr_ln = $attr->manakai_local_name;
2918     my $checker;
2919     if ($attr_ns eq '') {
2920     $checker = {
2921     alt => sub { },
2922     ## NOTE: |alt| value has no conformance creteria.
2923     shape => $GetHTMLEnumeratedAttrChecker->({
2924     circ => -1, circle => 1,
2925     default => 1,
2926     poly => 1, polygon => -1,
2927     rect => 1, rectangle => -1,
2928     }),
2929     coords => sub {
2930     my ($self, $attr) = @_;
2931     my $value = $attr->value;
2932     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
2933     $coords = [split /,/, $value];
2934     } else {
2935     $self->{onerror}->(node => $attr,
2936     type => 'coords:syntax error');
2937     }
2938     },
2939     target => $HTMLTargetAttrChecker,
2940     href => $HTMLURIAttrChecker,
2941     ping => $HTMLSpaceURIsAttrChecker,
2942 wakaba 1.4 rel => sub { $HTMLLinkTypesAttrChecker->(1, $todo, @_) },
2943 wakaba 1.1 media => $HTMLMQAttrChecker,
2944     hreflang => $HTMLLanguageTagAttrChecker,
2945     type => $HTMLIMTAttrChecker,
2946     }->{$attr_ln};
2947     if ($checker) {
2948     $attr{$attr_ln} = $attr;
2949     } else {
2950     $checker = $HTMLAttrChecker->{$attr_ln};
2951     }
2952     }
2953     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2954     || $AttrChecker->{$attr_ns}->{''};
2955     if ($checker) {
2956     $checker->($self, $attr) if ref $checker;
2957     } else {
2958     $self->{onerror}->(node => $attr, level => 'unsupported',
2959     type => 'attribute');
2960     ## ISSUE: No comformance createria for unknown attributes in the spec
2961     }
2962     }
2963    
2964     if (defined $attr{href}) {
2965 wakaba 1.4 $self->{has_hyperlink_element} = 1;
2966 wakaba 1.1 unless (defined $attr{alt}) {
2967     $self->{onerror}->(node => $todo->{node},
2968     type => 'attribute missing:alt');
2969     }
2970     } else {
2971     for (qw/target ping rel media hreflang type alt/) {
2972     if (defined $attr{$_}) {
2973     $self->{onerror}->(node => $attr{$_},
2974     type => 'attribute not allowed');
2975     }
2976     }
2977     }
2978    
2979     my $shape = 'rectangle';
2980     if (defined $attr{shape}) {
2981     $shape = {
2982     circ => 'circle', circle => 'circle',
2983     default => 'default',
2984     poly => 'polygon', polygon => 'polygon',
2985     rect => 'rectangle', rectangle => 'rectangle',
2986     }->{lc $attr{shape}->value} || 'rectangle';
2987     ## TODO: ASCII lowercase?
2988     }
2989    
2990     if ($shape eq 'circle') {
2991     if (defined $attr{coords}) {
2992     if (defined $coords) {
2993     if (@$coords == 3) {
2994     if ($coords->[2] < 0) {
2995     $self->{onerror}->(node => $attr{coords},
2996     type => 'coords:out of range:2');
2997     }
2998     } else {
2999     $self->{onerror}->(node => $attr{coords},
3000     type => 'coords:number:3:'.@$coords);
3001     }
3002     } else {
3003     ## NOTE: A syntax error has been reported.
3004     }
3005     } else {
3006     $self->{onerror}->(node => $todo->{node},
3007     type => 'attribute missing:coords');
3008     }
3009     } elsif ($shape eq 'default') {
3010     if (defined $attr{coords}) {
3011     $self->{onerror}->(node => $attr{coords},
3012     type => 'attribute not allowed');
3013     }
3014     } elsif ($shape eq 'polygon') {
3015     if (defined $attr{coords}) {
3016     if (defined $coords) {
3017     if (@$coords >= 6) {
3018     unless (@$coords % 2 == 0) {
3019     $self->{onerror}->(node => $attr{coords},
3020     type => 'coords:number:even:'.@$coords);
3021     }
3022     } else {
3023     $self->{onerror}->(node => $attr{coords},
3024     type => 'coords:number:>=6:'.@$coords);
3025     }
3026     } else {
3027     ## NOTE: A syntax error has been reported.
3028     }
3029     } else {
3030     $self->{onerror}->(node => $todo->{node},
3031     type => 'attribute missing:coords');
3032     }
3033     } elsif ($shape eq 'rectangle') {
3034     if (defined $attr{coords}) {
3035     if (defined $coords) {
3036     if (@$coords == 4) {
3037     unless ($coords->[0] < $coords->[2]) {
3038     $self->{onerror}->(node => $attr{coords},
3039     type => 'coords:out of range:0');
3040     }
3041     unless ($coords->[1] < $coords->[3]) {
3042     $self->{onerror}->(node => $attr{coords},
3043     type => 'coords:out of range:1');
3044     }
3045     } else {
3046     $self->{onerror}->(node => $attr{coords},
3047     type => 'coords:number:4:'.@$coords);
3048     }
3049     } else {
3050     ## NOTE: A syntax error has been reported.
3051     }
3052     } else {
3053     $self->{onerror}->(node => $todo->{node},
3054     type => 'attribute missing:coords');
3055     }
3056     }
3057     },
3058     checker => $HTMLEmptyChecker,
3059     };
3060     ## TODO: only in map
3061    
3062     $Element->{$HTML_NS}->{table} = {
3063     attrs_checker => $GetHTMLAttrsChecker->({}),
3064     checker => sub {
3065     my ($self, $todo) = @_;
3066     my $el = $todo->{node};
3067     my $new_todos = [];
3068     my @nodes = (@{$el->child_nodes});
3069    
3070     my $phase = 'before caption';
3071     my $has_tfoot;
3072     while (@nodes) {
3073     my $node = shift @nodes;
3074     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3075    
3076     my $nt = $node->node_type;
3077     if ($nt == 1) {
3078     my $node_ns = $node->namespace_uri;
3079     $node_ns = '' unless defined $node_ns;
3080     my $node_ln = $node->manakai_local_name;
3081     ## NOTE: |minuses| list is not checked since redundant
3082 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3083     #
3084     } elsif ($phase eq 'in tbodys') {
3085 wakaba 1.1 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
3086     #$phase = 'in tbodys';
3087     } elsif (not $has_tfoot and
3088     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
3089     $phase = 'after tfoot';
3090     $has_tfoot = 1;
3091     } else {
3092     $self->{onerror}->(node => $node, type => 'element not allowed');
3093     }
3094     } elsif ($phase eq 'in trs') {
3095     if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
3096     #$phase = 'in trs';
3097     } elsif (not $has_tfoot and
3098     $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
3099     $phase = 'after tfoot';
3100     $has_tfoot = 1;
3101     } else {
3102     $self->{onerror}->(node => $node, type => 'element not allowed');
3103     }
3104     } elsif ($phase eq 'after thead') {
3105     if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
3106     $phase = 'in tbodys';
3107     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
3108     $phase = 'in trs';
3109     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
3110     $phase = 'in tbodys';
3111     $has_tfoot = 1;
3112     } else {
3113     $self->{onerror}->(node => $node, type => 'element not allowed');
3114     }
3115     } elsif ($phase eq 'in colgroup') {
3116     if ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
3117     $phase = 'in colgroup';
3118     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
3119     $phase = 'after thead';
3120     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
3121     $phase = 'in tbodys';
3122     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
3123     $phase = 'in trs';
3124     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
3125     $phase = 'in tbodys';
3126     $has_tfoot = 1;
3127     } else {
3128     $self->{onerror}->(node => $node, type => 'element not allowed');
3129     }
3130     } elsif ($phase eq 'before caption') {
3131     if ($node_ns eq $HTML_NS and $node_ln eq 'caption') {
3132     $phase = 'in colgroup';
3133     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
3134     $phase = 'in colgroup';
3135     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
3136     $phase = 'after thead';
3137     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
3138     $phase = 'in tbodys';
3139     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
3140     $phase = 'in trs';
3141     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
3142     $phase = 'in tbodys';
3143     $has_tfoot = 1;
3144     } else {
3145     $self->{onerror}->(node => $node, type => 'element not allowed');
3146     }
3147     } else { # after tfoot
3148     $self->{onerror}->(node => $node, type => 'element not allowed');
3149     }
3150     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3151     unshift @nodes, @$sib;
3152     push @$new_todos, @$ch;
3153     } elsif ($nt == 3 or $nt == 4) {
3154     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3155     $self->{onerror}->(node => $node, type => 'character not allowed');
3156 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3157 wakaba 1.1 }
3158     } elsif ($nt == 5) {
3159     unshift @nodes, @{$node->child_nodes};
3160     }
3161     }
3162    
3163     ## Table model errors
3164     require Whatpm::HTMLTable;
3165     Whatpm::HTMLTable->form_table ($todo->{node}, sub {
3166     my %opt = @_;
3167     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
3168     });
3169     push @{$self->{return}->{table}}, $todo->{node};
3170    
3171     return ($new_todos);
3172     },
3173     };
3174    
3175     $Element->{$HTML_NS}->{caption} = {
3176     attrs_checker => $GetHTMLAttrsChecker->({}),
3177 wakaba 1.13 checker => $HTMLStrictlyInlineChecker,
3178 wakaba 1.1 };
3179    
3180     $Element->{$HTML_NS}->{colgroup} = {
3181     attrs_checker => $GetHTMLAttrsChecker->({
3182     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3183     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
3184     ## TODO: "attribute not supported" if |col|.
3185     ## ISSUE: MUST NOT if any |col|?
3186     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
3187     }),
3188     checker => sub {
3189     my ($self, $todo) = @_;
3190     my $el = $todo->{node};
3191     my $new_todos = [];
3192     my @nodes = (@{$el->child_nodes});
3193    
3194     while (@nodes) {
3195     my $node = shift @nodes;
3196     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3197    
3198     my $nt = $node->node_type;
3199     if ($nt == 1) {
3200     my $node_ns = $node->namespace_uri;
3201     $node_ns = '' unless defined $node_ns;
3202     my $node_ln = $node->manakai_local_name;
3203     ## NOTE: |minuses| list is not checked since redundant
3204 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3205     #
3206     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'col')) {
3207 wakaba 1.1 $self->{onerror}->(node => $node, type => 'element not allowed');
3208     }
3209     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3210     unshift @nodes, @$sib;
3211     push @$new_todos, @$ch;
3212     } elsif ($nt == 3 or $nt == 4) {
3213     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3214     $self->{onerror}->(node => $node, type => 'character not allowed');
3215 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3216 wakaba 1.1 }
3217     } elsif ($nt == 5) {
3218     unshift @nodes, @{$node->child_nodes};
3219     }
3220     }
3221     return ($new_todos);
3222     },
3223     };
3224    
3225     $Element->{$HTML_NS}->{col} = {
3226     attrs_checker => $GetHTMLAttrsChecker->({
3227     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3228     }),
3229     checker => $HTMLEmptyChecker,
3230     };
3231    
3232     $Element->{$HTML_NS}->{tbody} = {
3233     attrs_checker => $GetHTMLAttrsChecker->({}),
3234     checker => sub {
3235     my ($self, $todo) = @_;
3236     my $el = $todo->{node};
3237     my $new_todos = [];
3238     my @nodes = (@{$el->child_nodes});
3239    
3240     my $has_tr;
3241     while (@nodes) {
3242     my $node = shift @nodes;
3243     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3244    
3245     my $nt = $node->node_type;
3246     if ($nt == 1) {
3247     my $node_ns = $node->namespace_uri;
3248     $node_ns = '' unless defined $node_ns;
3249     my $node_ln = $node->manakai_local_name;
3250     ## NOTE: |minuses| list is not checked since redundant
3251 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3252     #
3253     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
3254 wakaba 1.1 $has_tr = 1;
3255     } else {
3256     $self->{onerror}->(node => $node, type => 'element not allowed');
3257     }
3258     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3259     unshift @nodes, @$sib;
3260     push @$new_todos, @$ch;
3261     } elsif ($nt == 3 or $nt == 4) {
3262     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3263     $self->{onerror}->(node => $node, type => 'character not allowed');
3264 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3265 wakaba 1.1 }
3266     } elsif ($nt == 5) {
3267     unshift @nodes, @{$node->child_nodes};
3268     }
3269     }
3270     unless ($has_tr) {
3271     $self->{onerror}->(node => $el, type => 'child element missing:tr');
3272     }
3273     return ($new_todos);
3274     },
3275     };
3276    
3277     $Element->{$HTML_NS}->{thead} = {
3278     attrs_checker => $GetHTMLAttrsChecker->({}),
3279     checker => $Element->{$HTML_NS}->{tbody}->{checker},
3280     };
3281    
3282     $Element->{$HTML_NS}->{tfoot} = {
3283     attrs_checker => $GetHTMLAttrsChecker->({}),
3284     checker => $Element->{$HTML_NS}->{tbody}->{checker},
3285     };
3286    
3287     $Element->{$HTML_NS}->{tr} = {
3288     attrs_checker => $GetHTMLAttrsChecker->({}),
3289     checker => sub {
3290     my ($self, $todo) = @_;
3291     my $el = $todo->{node};
3292     my $new_todos = [];
3293     my @nodes = (@{$el->child_nodes});
3294    
3295     my $has_td;
3296     while (@nodes) {
3297     my $node = shift @nodes;
3298     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3299    
3300     my $nt = $node->node_type;
3301     if ($nt == 1) {
3302     my $node_ns = $node->namespace_uri;
3303     $node_ns = '' unless defined $node_ns;
3304     my $node_ln = $node->manakai_local_name;
3305     ## NOTE: |minuses| list is not checked since redundant
3306 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3307     #
3308     } elsif ($node_ns eq $HTML_NS and
3309     ($node_ln eq 'td' or $node_ln eq 'th')) {
3310 wakaba 1.1 $has_td = 1;
3311     } else {
3312     $self->{onerror}->(node => $node, type => 'element not allowed');
3313     }
3314     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3315     unshift @nodes, @$sib;
3316     push @$new_todos, @$ch;
3317     } elsif ($nt == 3 or $nt == 4) {
3318     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3319     $self->{onerror}->(node => $node, type => 'character not allowed');
3320 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3321 wakaba 1.1 }
3322     } elsif ($nt == 5) {
3323     unshift @nodes, @{$node->child_nodes};
3324     }
3325     }
3326     unless ($has_td) {
3327     $self->{onerror}->(node => $el, type => 'child element missing:td|th');
3328     }
3329     return ($new_todos);
3330     },
3331     };
3332    
3333     $Element->{$HTML_NS}->{td} = {
3334     attrs_checker => $GetHTMLAttrsChecker->({
3335     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3336     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3337     }),
3338 wakaba 1.29 checker => $HTMLProseContentChecker,
3339 wakaba 1.1 };
3340    
3341     $Element->{$HTML_NS}->{th} = {
3342     attrs_checker => $GetHTMLAttrsChecker->({
3343     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3344     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3345     scope => $GetHTMLEnumeratedAttrChecker
3346     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
3347     }),
3348 wakaba 1.31 checker => $HTMLPhrasingContentChecker,
3349 wakaba 1.1 };
3350    
3351     ## TODO: forms
3352 wakaba 1.8 ## TODO: Tests for <nest/> in form elements
3353 wakaba 1.1
3354     $Element->{$HTML_NS}->{script} = {
3355 wakaba 1.9 attrs_checker => $GetHTMLAttrsChecker->({
3356 wakaba 1.1 src => $HTMLURIAttrChecker,
3357     defer => $GetHTMLBooleanAttrChecker->('defer'),
3358     async => $GetHTMLBooleanAttrChecker->('async'),
3359     type => $HTMLIMTAttrChecker,
3360 wakaba 1.9 }),
3361 wakaba 1.1 checker => sub {
3362     my ($self, $todo) = @_;
3363    
3364     if ($todo->{node}->has_attribute_ns (undef, 'src')) {
3365     return $HTMLEmptyChecker->($self, $todo);
3366     } else {
3367     ## NOTE: No content model conformance in HTML5 spec.
3368     my $type = $todo->{node}->get_attribute_ns (undef, 'type');
3369     my $language = $todo->{node}->get_attribute_ns (undef, 'language');
3370     if ((defined $type and $type eq '') or
3371     (defined $language and $language eq '')) {
3372     $type = 'text/javascript';
3373     } elsif (defined $type) {
3374     #
3375     } elsif (defined $language) {
3376     $type = 'text/' . $language;
3377     } else {
3378     $type = 'text/javascript';
3379     }
3380     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
3381     type => 'script:'.$type); ## TODO: $type normalization
3382     return $AnyChecker->($self, $todo);
3383     }
3384     },
3385     };
3386 wakaba 1.25 ## ISSUE: Significant check and text child node
3387 wakaba 1.1
3388     ## NOTE: When script is disabled.
3389     $Element->{$HTML_NS}->{noscript} = {
3390 wakaba 1.3 attrs_checker => sub {
3391     my ($self, $todo) = @_;
3392    
3393     ## NOTE: This check is inserted in |attrs_checker|, rather than |checker|,
3394     ## since the later is not invoked when the |noscript| is used as a
3395     ## transparent element.
3396     unless ($todo->{node}->owner_document->manakai_is_html) {
3397     $self->{onerror}->(node => $todo->{node}, type => 'in XML:noscript');
3398     }
3399    
3400     $GetHTMLAttrsChecker->({})->($self, $todo);
3401     },
3402 wakaba 1.1 checker => sub {
3403     my ($self, $todo) = @_;
3404    
3405 wakaba 1.3 if ($todo->{flag}->{in_head}) {
3406     my $new_todos = [];
3407     my @nodes = (@{$todo->{node}->child_nodes});
3408    
3409     while (@nodes) {
3410     my $node = shift @nodes;
3411     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3412    
3413     my $nt = $node->node_type;
3414     if ($nt == 1) {
3415     my $node_ns = $node->namespace_uri;
3416     $node_ns = '' unless defined $node_ns;
3417     my $node_ln = $node->manakai_local_name;
3418 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3419     #
3420     } elsif ($node_ns eq $HTML_NS) {
3421 wakaba 1.3 if ({link => 1, style => 1}->{$node_ln}) {
3422     #
3423     } elsif ($node_ln eq 'meta') {
3424 wakaba 1.5 if ($node->has_attribute_ns (undef, 'name')) {
3425     #
3426 wakaba 1.3 } else {
3427 wakaba 1.5 $self->{onerror}->(node => $node,
3428     type => 'element not allowed');
3429 wakaba 1.3 }
3430     } else {
3431     $self->{onerror}->(node => $node, type => 'element not allowed');
3432     }
3433     } else {
3434     $self->{onerror}->(node => $node, type => 'element not allowed');
3435     }
3436    
3437     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3438     unshift @nodes, @$sib;
3439     push @$new_todos, @$ch;
3440     } elsif ($nt == 3 or $nt == 4) {
3441     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3442     $self->{onerror}->(node => $node, type => 'character not allowed');
3443 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3444 wakaba 1.3 }
3445     } elsif ($nt == 5) {
3446     unshift @nodes, @{$node->child_nodes};
3447     }
3448     }
3449     return ($new_todos);
3450     } else {
3451     my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}});
3452 wakaba 1.29 my ($sib, $ch) = $HTMLTransparentChecker->($self, $todo);
3453 wakaba 1.3 push @$sib, $end;
3454     return ($sib, $ch);
3455     }
3456 wakaba 1.1 },
3457     };
3458 wakaba 1.3
3459     ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
3460 wakaba 1.1
3461     $Element->{$HTML_NS}->{'event-source'} = {
3462     attrs_checker => $GetHTMLAttrsChecker->({
3463     src => $HTMLURIAttrChecker,
3464     }),
3465     checker => $HTMLEmptyChecker,
3466     };
3467    
3468     $Element->{$HTML_NS}->{details} = {
3469     attrs_checker => $GetHTMLAttrsChecker->({
3470     open => $GetHTMLBooleanAttrChecker->('open'),
3471     }),
3472     checker => sub {
3473     my ($self, $todo) = @_;
3474    
3475 wakaba 1.29 ## TODO:
3476 wakaba 1.1 my ($sib, $ch)
3477     = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
3478     ->($self, $todo);
3479     return ($sib, $ch);
3480     },
3481     };
3482    
3483     $Element->{$HTML_NS}->{datagrid} = {
3484     attrs_checker => $GetHTMLAttrsChecker->({
3485     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3486     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
3487     }),
3488     checker => sub {
3489     my ($self, $todo) = @_;
3490     my $el = $todo->{node};
3491     my $new_todos = [];
3492     my @nodes = (@{$el->child_nodes});
3493    
3494 wakaba 1.25 my $old_values = {significant =>
3495     $todo->{flag}->{has_descendant}->{significant}};
3496     $todo->{flag}->{has_descendant}->{significant} = 0;
3497    
3498 wakaba 1.1 my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
3499    
3500 wakaba 1.29 ## Prose -(text* table Prose*) | table | select | datalist | Empty
3501 wakaba 1.1 my $mode = 'any';
3502     while (@nodes) {
3503     my $node = shift @nodes;
3504     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3505    
3506     my $nt = $node->node_type;
3507     if ($nt == 1) {
3508     my $node_ns = $node->namespace_uri;
3509     $node_ns = '' unless defined $node_ns;
3510     my $node_ln = $node->manakai_local_name;
3511     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
3512 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3513     #
3514 wakaba 1.29 } elsif ($mode eq 'prose') {
3515 wakaba 1.1 $not_allowed = 1
3516 wakaba 1.29 unless $HTMLProseContent->{$node_ns}->{$node_ln};
3517 wakaba 1.1 } elsif ($mode eq 'any') {
3518     if ($node_ns eq $HTML_NS and
3519     {table => 1, select => 1, datalist => 1}->{$node_ln}) {
3520     $mode = 'none';
3521 wakaba 1.29 } elsif ($HTMLProseContent->{$node_ns}->{$node_ln}) {
3522     $mode = 'prose';
3523 wakaba 1.1 } else {
3524     $not_allowed = 1;
3525     }
3526     } else {
3527     $not_allowed = 1;
3528     }
3529     $self->{onerror}->(node => $node, type => 'element not allowed')
3530     if $not_allowed;
3531     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3532     unshift @nodes, @$sib;
3533     push @$new_todos, @$ch;
3534     } elsif ($nt == 3 or $nt == 4) {
3535     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3536 wakaba 1.29 if ($mode eq 'prose') {
3537     #
3538     } elsif ($mode eq 'any') {
3539     $mode = 'prose';
3540     } else {
3541     $self->{onerror}->(node => $node, type => 'character not allowed');
3542     }
3543 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3544 wakaba 1.1 }
3545     } elsif ($nt == 5) {
3546     unshift @nodes, @{$node->child_nodes};
3547     }
3548     }
3549    
3550     push @$new_todos, $end;
3551 wakaba 1.25
3552     push @$new_todos, {
3553     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
3554     old_values => $old_values,
3555 wakaba 1.26 errors => $HTMLSignificantContentErrors,
3556 wakaba 1.25 };
3557    
3558 wakaba 1.1 return ($new_todos);
3559 wakaba 1.29
3560     ## ISSUE: "xxx<table/>" is disallowed; "<select/>aaa" and "<datalist/>aa"
3561     ## are not disallowed (assuming that form control contents are also
3562     ## prose content).
3563 wakaba 1.1 },
3564     };
3565    
3566     $Element->{$HTML_NS}->{command} = {
3567     attrs_checker => $GetHTMLAttrsChecker->({
3568     checked => $GetHTMLBooleanAttrChecker->('checked'),
3569     default => $GetHTMLBooleanAttrChecker->('default'),
3570     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3571     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
3572     icon => $HTMLURIAttrChecker,
3573     label => sub { }, ## NOTE: No conformance creteria
3574     radiogroup => sub { }, ## NOTE: No conformance creteria
3575     ## NOTE: |title| has special semantics, but no syntactical difference
3576     type => sub {
3577     my ($self, $attr) = @_;
3578     my $value = $attr->value;
3579     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
3580     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
3581     }
3582     },
3583     }),
3584     checker => $HTMLEmptyChecker,
3585     };
3586    
3587     $Element->{$HTML_NS}->{menu} = {
3588     attrs_checker => $GetHTMLAttrsChecker->({
3589     autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
3590     id => sub {
3591     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
3592     my ($self, $attr) = @_;
3593     my $value = $attr->value;
3594     if (length $value > 0) {
3595     if ($self->{id}->{$value}) {
3596     $self->{onerror}->(node => $attr, type => 'duplicate ID');
3597     push @{$self->{id}->{$value}}, $attr;
3598     } else {
3599     $self->{id}->{$value} = [$attr];
3600     }
3601     } else {
3602     ## NOTE: MUST contain at least one character
3603     $self->{onerror}->(node => $attr, type => 'empty attribute value');
3604     }
3605     if ($value =~ /[\x09-\x0D\x20]/) {
3606     $self->{onerror}->(node => $attr, type => 'space in ID');
3607     }
3608     $self->{menu}->{$value} ||= $attr;
3609     ## ISSUE: <menu id=""><p contextmenu=""> match?
3610     },
3611     label => sub { }, ## NOTE: No conformance creteria
3612     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
3613     }),
3614     checker => sub {
3615     my ($self, $todo) = @_;
3616     my $el = $todo->{node};
3617     my $new_todos = [];
3618     my @nodes = (@{$el->child_nodes});
3619 wakaba 1.25
3620     my $old_values = {significant =>
3621     $todo->{flag}->{has_descendant}->{significant}};
3622     $todo->{flag}->{has_descendant}->{significant} = 0;
3623 wakaba 1.1
3624 wakaba 1.29 my $content = 'li or phrasing';
3625 wakaba 1.1 while (@nodes) {
3626     my $node = shift @nodes;
3627     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3628    
3629     my $nt = $node->node_type;
3630     if ($nt == 1) {
3631     my $node_ns = $node->namespace_uri;
3632     $node_ns = '' unless defined $node_ns;
3633     my $node_ln = $node->manakai_local_name;
3634     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
3635 wakaba 1.8 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3636     #
3637     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'li') {
3638 wakaba 1.29 if ($content eq 'phrasing') {
3639 wakaba 1.1 $not_allowed = 1;
3640 wakaba 1.29 } elsif ($content eq 'li or phrasing') {
3641 wakaba 1.1 $content = 'li';
3642     }
3643     } else {
3644 wakaba 1.29 if ($HTMLPhrasingContent->{$node_ns}->{$node_ln}) {
3645     $content = 'phrasing';
3646 wakaba 1.1 } else {
3647     $not_allowed = 1;
3648     }
3649     }
3650     $self->{onerror}->(node => $node, type => 'element not allowed')
3651     if $not_allowed;
3652     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3653     unshift @nodes, @$sib;
3654     push @$new_todos, @$ch;
3655     } elsif ($nt == 3 or $nt == 4) {
3656     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3657     if ($content eq 'li') {
3658     $self->{onerror}->(node => $node, type => 'character not allowed');
3659 wakaba 1.29 } elsif ($content eq 'li or phrasing') {
3660     $content = 'phrasing';
3661 wakaba 1.1 }
3662 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3663 wakaba 1.1 }
3664     } elsif ($nt == 5) {
3665     unshift @nodes, @{$node->child_nodes};
3666     }
3667     }
3668    
3669     for (@$new_todos) {
3670 wakaba 1.29 $_->{flag}->{in_menu} = 1;
3671 wakaba 1.1 }
3672 wakaba 1.25
3673     push @$new_todos, {
3674     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
3675     old_values => $old_values,
3676 wakaba 1.26 errors => $HTMLSignificantContentErrors,
3677 wakaba 1.25 };
3678    
3679 wakaba 1.1 return ($new_todos);
3680     },
3681 wakaba 1.8 };
3682    
3683     $Element->{$HTML_NS}->{datatemplate} = {
3684     attrs_checker => $GetHTMLAttrsChecker->({}),
3685     checker => sub {
3686     my ($self, $todo) = @_;
3687     my $el = $todo->{node};
3688     my $new_todos = [];
3689     my @nodes = (@{$el->child_nodes});
3690    
3691     while (@nodes) {
3692     my $node = shift @nodes;
3693     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
3694    
3695     my $nt = $node->node_type;
3696     if ($nt == 1) {
3697     my $node_ns = $node->namespace_uri;
3698     $node_ns = '' unless defined $node_ns;
3699     my $node_ln = $node->manakai_local_name;
3700     ## NOTE: |minuses| list is not checked since redundant
3701     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
3702     #
3703     } elsif (not ($node_ns eq $HTML_NS and $node_ln eq 'rule')) {
3704     $self->{onerror}->(node => $node,
3705     type => 'element not allowed:datatemplate');
3706     }
3707     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
3708     unshift @nodes, @$sib;
3709     push @$new_todos, @$ch;
3710     } elsif ($nt == 3 or $nt == 4) {
3711     if ($node->data =~ /[^\x09-\x0D\x20]/) {
3712     $self->{onerror}->(node => $node, type => 'character not allowed');
3713 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
3714 wakaba 1.8 }
3715     } elsif ($nt == 5) {
3716     unshift @nodes, @{$node->child_nodes};
3717     }
3718     }
3719     return ($new_todos);
3720     },
3721     is_xml_root => 1,
3722     };
3723    
3724     $Element->{$HTML_NS}->{rule} = {
3725     attrs_checker => $GetHTMLAttrsChecker->({
3726 wakaba 1.23 condition => $HTMLSelectorsAttrChecker,
3727 wakaba 1.18 mode => $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker,
3728 wakaba 1.8 }),
3729     checker => sub {
3730     my ($self, $todo) = @_;
3731    
3732     my $end = $self->_add_pluses ({$HTML_NS => {nest => 1}});
3733 wakaba 1.25 my ($sib, $ch) = $HTMLAnyChecker->($self, $todo);
3734 wakaba 1.8 push @$sib, $end;
3735     return ($sib, $ch);
3736     },
3737     ## NOTE: "MAY be anything that, when the parent |datatemplate|
3738     ## is applied to some conforming data, results in a conforming DOM tree.":
3739     ## We don't check against this.
3740     };
3741    
3742     $Element->{$HTML_NS}->{nest} = {
3743     attrs_checker => $GetHTMLAttrsChecker->({
3744 wakaba 1.23 filter => $HTMLSelectorsAttrChecker,
3745     mode => sub {
3746     my ($self, $attr) = @_;
3747     my $value = $attr->value;
3748     if ($value !~ /\A[^\x09-\x0D\x20]+\z/) {
3749     $self->{onerror}->(node => $attr, type => 'mode:syntax error');
3750     }
3751     },
3752 wakaba 1.8 }),
3753     checker => $HTMLEmptyChecker,
3754 wakaba 1.1 };
3755    
3756     $Element->{$HTML_NS}->{legend} = {
3757     attrs_checker => $GetHTMLAttrsChecker->({}),
3758 wakaba 1.29 checker => $HTMLPhrasingContentChecker,
3759 wakaba 1.1 };
3760    
3761     $Element->{$HTML_NS}->{div} = {
3762     attrs_checker => $GetHTMLAttrsChecker->({}),
3763 wakaba 1.31 checker => $HTMLProseContentChecker,
3764 wakaba 1.1 };
3765    
3766     $Element->{$HTML_NS}->{font} = {
3767     attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
3768     checker => $HTMLTransparentChecker,
3769     };
3770    
3771     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
3772    
3773     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24