/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.26 - (hide annotations) (download)
Sat Jun 23 02:26:51 2007 UTC (17 years, 4 months ago) by wakaba
Branch: MAIN
Changes since 1.25: +11 -1 lines
++ whatpm/t/ChangeLog	23 Jun 2007 02:21:24 -0000
2007-06-23  Wakaba  <wakaba@suika.fam.cx>

	* Makefile, HTML-tokenizer.t, HTML-tree.t: New test
	files are added.

	* tokenize/, tree-construction/: Sync with latest html5lib
	trunk.

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3    
4 wakaba 1.18 require Whatpm::URIChecker;
5    
6 wakaba 1.13 ## ISSUE: How XML and XML Namespaces conformance can (or cannot)
7     ## be applied to an in-memory representation (i.e. DOM)?
8    
9 wakaba 1.9 my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
10     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
11    
12     my $AttrChecker = {
13     $XML_NS => {
14 wakaba 1.13 space => sub {
15     my ($self, $attr) = @_;
16     my $value = $attr->value;
17     if ($value eq 'default' or $value eq 'preserve') {
18     #
19     } else {
20     ## NOTE: An XML "error"
21     $self->{onerror}->(node => $attr,
22     type => 'XML error:invalid xml:space value');
23     }
24     },
25     lang => sub {
26     ## NOTE: "The values of the attribute are language identifiers
27     ## as defined by [IETF RFC 3066], Tags for the Identification
28     ## of Languages, or its successor; in addition, the empty string
29     ## may be specified." ("may" in lower case)
30     ## TODO: xml:lang MUST NOT in HTML document
31     },
32     base => sub {
33     my ($self, $attr) = @_;
34     my $value = $attr->value;
35     if ($value =~ /[^\x{0000}-\x{10FFFF}]/) { ## ISSUE: Should we disallow noncharacters?
36     $self->{onerror}->(node => $attr,
37     type => 'syntax error');
38     }
39 wakaba 1.18 ## NOTE: Conformance to URI standard is not checked since there is
40     ## no author requirement on conformance in the XML Base specification.
41 wakaba 1.13 },
42     id => sub {
43     my ($self, $attr) = @_;
44     my $value = $attr->value;
45     $value =~ s/[\x09\x0A\x0D\x20]+/ /g;
46     $value =~ s/^\x20//;
47     $value =~ s/\x20$//;
48     ## TODO: NCName in XML 1.0 or 1.1
49     ## TODO: declared type is ID?
50     if ($self->{id}->{$value}) {
51     $self->{onerror}->(node => $attr, type => 'xml:id error:duplicate ID');
52     } else {
53     $self->{id}->{$value} = 1;
54     }
55     },
56 wakaba 1.9 },
57     $XMLNS_NS => {
58 wakaba 1.13 '' => sub {
59     my ($self, $attr) = @_;
60     my $ln = $attr->manakai_local_name;
61     my $value = $attr->value;
62     if ($value eq $XML_NS and $ln ne 'xml') {
63     $self->{onerror}
64     ->(node => $attr,
65     type => 'NC:Reserved Prefixes and Namespace Names:=xml');
66     } elsif ($value eq $XMLNS_NS) {
67     $self->{onerror}
68     ->(node => $attr,
69     type => 'NC:Reserved Prefixes and Namespace Names:=xmlns');
70     }
71     if ($ln eq 'xml' and $value ne $XML_NS) {
72     $self->{onerror}
73     ->(node => $attr,
74     type => 'NC:Reserved Prefixes and Namespace Names:xmlns:xml=');
75     } elsif ($ln eq 'xmlns') {
76     $self->{onerror}
77     ->(node => $attr,
78     type => 'NC:Reserved Prefixes and Namespace Names:xmlns:xmlns=');
79     }
80     ## TODO: If XML 1.0 and empty
81     },
82     xmlns => sub {
83     my ($self, $attr) = @_;
84     ## TODO: In XML 1.0, URI reference [RFC 3986] or an empty string
85     ## TODO: In XML 1.1, IRI reference [RFC 3987] or an empty string
86 wakaba 1.18 ## TODO: relative references are deprecated
87 wakaba 1.13 my $value = $attr->value;
88     if ($value eq $XML_NS) {
89     $self->{onerror}
90     ->(node => $attr,
91     type => 'NC:Reserved Prefixes and Namespace Names:=xml');
92     } elsif ($value eq $XMLNS_NS) {
93     $self->{onerror}
94     ->(node => $attr,
95     type => 'NC:Reserved Prefixes and Namespace Names:=xmlns');
96     }
97     },
98 wakaba 1.9 },
99     };
100    
101 wakaba 1.14 ## ISSUE: Should we really allow these attributes?
102 wakaba 1.13 $AttrChecker->{''}->{'xml:space'} = $AttrChecker->{$XML_NS}->{space};
103     $AttrChecker->{''}->{'xml:lang'} = $AttrChecker->{$XML_NS}->{lang};
104     $AttrChecker->{''}->{'xml:base'} = $AttrChecker->{$XML_NS}->{base};
105     $AttrChecker->{''}->{'xml:id'} = $AttrChecker->{$XML_NS}->{id};
106    
107 wakaba 1.3 ## ANY
108     my $AnyChecker = sub {
109 wakaba 1.4 my ($self, $todo) = @_;
110     my $el = $todo->{node};
111     my $new_todos = [];
112 wakaba 1.3 my @nodes = (@{$el->child_nodes});
113     while (@nodes) {
114     my $node = shift @nodes;
115     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
116    
117     my $nt = $node->node_type;
118     if ($nt == 1) {
119     my $node_ns = $node->namespace_uri;
120     $node_ns = '' unless defined $node_ns;
121     my $node_ln = $node->manakai_local_name;
122     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
123     $self->{onerror}->(node => $node, type => 'element not allowed');
124     }
125 wakaba 1.4 push @$new_todos, {type => 'element', node => $node};
126 wakaba 1.3 } elsif ($nt == 5) {
127     unshift @nodes, @{$node->child_nodes};
128     }
129     }
130 wakaba 1.4 return ($new_todos);
131 wakaba 1.3 }; # $AnyChecker
132    
133 wakaba 1.1 my $ElementDefault = {
134     checker => sub {
135 wakaba 1.4 my ($self, $todo) = @_;
136     $self->{onerror}->(node => $todo->{node}, type => 'element not supported');
137     return $AnyChecker->($self, $todo);
138 wakaba 1.1 },
139 wakaba 1.9 attrs_checker => sub {
140     my ($self, $todo) = @_;
141     for my $attr (@{$todo->{node}->attributes}) {
142     my $attr_ns = $attr->namespace_uri;
143     $attr_ns = '' unless defined $attr_ns;
144     my $attr_ln = $attr->manakai_local_name;
145     my $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
146     || $AttrChecker->{$attr_ns}->{''};
147     if ($checker) {
148     $checker->($self, $attr);
149 wakaba 1.17 } else {
150     $self->{onerror}->(node => $attr, type => 'attribute not supported');
151 wakaba 1.9 }
152     }
153     },
154 wakaba 1.1 };
155    
156     my $Element = {};
157    
158     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
159    
160 wakaba 1.7 my $HTMLMetadataElements = {
161     $HTML_NS => {
162     qw/link 1 meta 1 style 1 script 1 event-source 1 command 1 base 1 title 1/,
163     },
164     };
165 wakaba 1.1
166 wakaba 1.2 my $HTMLSectioningElements = {
167     $HTML_NS => {qw/body 1 section 1 nav 1 article 1 blockquote 1 aside 1/},
168     };
169 wakaba 1.1
170 wakaba 1.7 my $HTMLBlockLevelElements = {
171     $HTML_NS => {
172     qw/
173     section 1 nav 1 article 1 blockquote 1 aside 1
174     h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1
175     address 1 p 1 hr 1 dialog 1 pre 1 ol 1 ul 1 dl 1
176     ins 1 del 1 figure 1 map 1 table 1 script 1 noscript 1
177     event-source 1 details 1 datagrid 1 menu 1 div 1 font 1
178     /,
179     },
180     };
181    
182     my $HTMLStrictlyInlineLevelElements = {
183     $HTML_NS => {
184     qw/
185     br 1 a 1 q 1 cite 1 em 1 strong 1 small 1 m 1 dfn 1 abbr 1
186     time 1 meter 1 progress 1 code 1 var 1 samp 1 kbd 1
187     sub 1 sup 1 span 1 i 1 b 1 bdo 1 ins 1 del 1 img 1
188     iframe 1 embed 1 object 1 video 1 audio 1 canvas 1 area 1
189     script 1 noscript 1 event-source 1 command 1 font 1
190     /,
191     },
192     };
193    
194     my $HTMLStructuredInlineLevelElements = {
195     $HTML_NS => {qw/blockquote 1 pre 1 ol 1 ul 1 dl 1 table 1 menu 1/},
196     };
197 wakaba 1.1
198 wakaba 1.6 my $HTMLInteractiveElements = {
199     $HTML_NS => {a => 1, details => 1, datagrid => 1},
200     };
201     ## NOTE: |html:a| and |html:datagrid| are not allowed as a descendant
202     ## of interactive elements
203 wakaba 1.1
204 wakaba 1.7 my $HTMLTransparentElements = {
205     $HTML_NS => {qw/ins 1 font 1 noscript 1/},
206     ## NOTE: |html:noscript| is transparent if scripting is disabled.
207     };
208    
209     #my $HTMLSemiTransparentElements = {
210     # $HTML_NS => {qw/video 1 audio 1/},
211     #};
212    
213     my $HTMLEmbededElements = {
214     $HTML_NS => {qw/img 1 iframe 1 embed 1 object 1 video 1 audio 1 canvas 1/},
215     };
216 wakaba 1.1
217     ## Empty
218     my $HTMLEmptyChecker = sub {
219 wakaba 1.4 my ($self, $todo) = @_;
220     my $el = $todo->{node};
221     my $new_todos = [];
222 wakaba 1.1 my @nodes = (@{$el->child_nodes});
223    
224     while (@nodes) {
225     my $node = shift @nodes;
226 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
227    
228 wakaba 1.1 my $nt = $node->node_type;
229     if ($nt == 1) {
230 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
231     $self->{onerror}->(node => $node, type => 'element not allowed');
232     my ($sib, $ch) = $self->_check_get_children ($node);
233     unshift @nodes, @$sib;
234 wakaba 1.4 push @$new_todos, @$ch;
235 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
236 wakaba 1.3 if ($node->data =~ /[^\x09-\x0D\x20]/) {
237     $self->{onerror}->(node => $node, type => 'character not allowed');
238     }
239 wakaba 1.1 } elsif ($nt == 5) {
240     unshift @nodes, @{$node->child_nodes};
241     }
242     }
243 wakaba 1.4 return ($new_todos);
244 wakaba 1.1 };
245    
246     ## Text
247     my $HTMLTextChecker = sub {
248 wakaba 1.4 my ($self, $todo) = @_;
249     my $el = $todo->{node};
250     my $new_todos = [];
251 wakaba 1.1 my @nodes = (@{$el->child_nodes});
252    
253     while (@nodes) {
254     my $node = shift @nodes;
255 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
256    
257 wakaba 1.1 my $nt = $node->node_type;
258     if ($nt == 1) {
259 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
260     $self->{onerror}->(node => $node, type => 'element not allowed');
261     my ($sib, $ch) = $self->_check_get_children ($node);
262     unshift @nodes, @$sib;
263 wakaba 1.4 push @$new_todos, @$ch;
264 wakaba 1.1 } elsif ($nt == 5) {
265     unshift @nodes, @{$node->child_nodes};
266     }
267     }
268 wakaba 1.4 return ($new_todos);
269 wakaba 1.1 };
270    
271     ## Zero or more |html:style| elements,
272     ## followed by zero or more block-level elements
273     my $HTMLStylableBlockChecker = sub {
274 wakaba 1.4 my ($self, $todo) = @_;
275     my $el = $todo->{node};
276     my $new_todos = [];
277 wakaba 1.1 my @nodes = (@{$el->child_nodes});
278    
279     my $has_non_style;
280     while (@nodes) {
281     my $node = shift @nodes;
282 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
283    
284 wakaba 1.1 my $nt = $node->node_type;
285     if ($nt == 1) {
286 wakaba 1.2 my $node_ns = $node->namespace_uri;
287     $node_ns = '' unless defined $node_ns;
288     my $node_ln = $node->manakai_local_name;
289 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
290 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'style') {
291 wakaba 1.6 $not_allowed = 1 if $has_non_style;
292 wakaba 1.7 } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
293     $has_non_style = 1;
294 wakaba 1.1 } else {
295     $has_non_style = 1;
296 wakaba 1.7 $not_allowed = 1;
297 wakaba 1.1 }
298 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
299     if $not_allowed;
300 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
301     unshift @nodes, @$sib;
302 wakaba 1.4 push @$new_todos, @$ch;
303 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
304     if ($node->data =~ /[^\x09-\x0D\x20]/) {
305 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
306 wakaba 1.1 }
307     } elsif ($nt == 5) {
308     unshift @nodes, @{$node->child_nodes};
309     }
310     }
311 wakaba 1.4 return ($new_todos);
312 wakaba 1.1 }; # $HTMLStylableBlockChecker
313    
314     ## Zero or more block-level elements
315     my $HTMLBlockChecker = sub {
316 wakaba 1.4 my ($self, $todo) = @_;
317     my $el = $todo->{node};
318     my $new_todos = [];
319 wakaba 1.1 my @nodes = (@{$el->child_nodes});
320    
321     while (@nodes) {
322     my $node = shift @nodes;
323 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
324    
325 wakaba 1.1 my $nt = $node->node_type;
326     if ($nt == 1) {
327 wakaba 1.2 my $node_ns = $node->namespace_uri;
328     $node_ns = '' unless defined $node_ns;
329     my $node_ln = $node->manakai_local_name;
330 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
331 wakaba 1.7 $not_allowed = 1
332     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
333 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
334     if $not_allowed;
335 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
336     unshift @nodes, @$sib;
337 wakaba 1.4 push @$new_todos, @$ch;
338 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
339     if ($node->data =~ /[^\x09-\x0D\x20]/) {
340 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
341 wakaba 1.1 }
342     } elsif ($nt == 5) {
343     unshift @nodes, @{$node->child_nodes};
344     }
345     }
346 wakaba 1.4 return ($new_todos);
347 wakaba 1.1 }; # $HTMLBlockChecker
348    
349     ## Inline-level content
350     my $HTMLInlineChecker = sub {
351 wakaba 1.4 my ($self, $todo) = @_;
352     my $el = $todo->{node};
353     my $new_todos = [];
354 wakaba 1.1 my @nodes = (@{$el->child_nodes});
355    
356     while (@nodes) {
357     my $node = shift @nodes;
358 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
359    
360 wakaba 1.1 my $nt = $node->node_type;
361     if ($nt == 1) {
362 wakaba 1.2 my $node_ns = $node->namespace_uri;
363     $node_ns = '' unless defined $node_ns;
364     my $node_ln = $node->manakai_local_name;
365 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
366 wakaba 1.7 $not_allowed = 1
367     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
368     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
369 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
370     if $not_allowed;
371 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
372     unshift @nodes, @$sib;
373 wakaba 1.4 push @$new_todos, @$ch;
374 wakaba 1.1 } elsif ($nt == 5) {
375     unshift @nodes, @{$node->child_nodes};
376     }
377     }
378 wakaba 1.4
379     for (@$new_todos) {
380     $_->{inline} = 1;
381     }
382     return ($new_todos);
383     }; # $HTMLInlineChecker
384 wakaba 1.1
385     my $HTMLSignificantInlineChecker = $HTMLInlineChecker;
386     ## TODO: check significant content
387    
388     ## Strictly inline-level content
389     my $HTMLStrictlyInlineChecker = sub {
390 wakaba 1.4 my ($self, $todo) = @_;
391     my $el = $todo->{node};
392     my $new_todos = [];
393 wakaba 1.1 my @nodes = (@{$el->child_nodes});
394    
395     while (@nodes) {
396     my $node = shift @nodes;
397 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
398    
399 wakaba 1.1 my $nt = $node->node_type;
400     if ($nt == 1) {
401 wakaba 1.2 my $node_ns = $node->namespace_uri;
402     $node_ns = '' unless defined $node_ns;
403     my $node_ln = $node->manakai_local_name;
404 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
405 wakaba 1.7 $not_allowed = 1
406     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln};
407 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
408     if $not_allowed;
409 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
410     unshift @nodes, @$sib;
411 wakaba 1.4 push @$new_todos, @$ch;
412 wakaba 1.1 } elsif ($nt == 5) {
413     unshift @nodes, @{$node->child_nodes};
414     }
415     }
416 wakaba 1.4
417     for (@$new_todos) {
418     $_->{inline} = 1;
419     $_->{strictly_inline} = 1;
420     }
421     return ($new_todos);
422 wakaba 1.1 }; # $HTMLStrictlyInlineChecker
423    
424     my $HTMLSignificantStrictlyInlineChecker = $HTMLStrictlyInlineChecker;
425     ## TODO: check significant content
426    
427 wakaba 1.4 ## Inline-level or strictly inline-kevek content
428     my $HTMLInlineOrStrictlyInlineChecker = sub {
429     my ($self, $todo) = @_;
430     my $el = $todo->{node};
431     my $new_todos = [];
432     my @nodes = (@{$el->child_nodes});
433    
434     while (@nodes) {
435     my $node = shift @nodes;
436     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
437    
438     my $nt = $node->node_type;
439     if ($nt == 1) {
440     my $node_ns = $node->namespace_uri;
441     $node_ns = '' unless defined $node_ns;
442     my $node_ln = $node->manakai_local_name;
443 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
444 wakaba 1.7 if ($todo->{strictly_inline}) {
445     $not_allowed = 1
446     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln};
447     } else {
448     $not_allowed = 1
449     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
450     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
451     }
452 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
453     if $not_allowed;
454 wakaba 1.4 my ($sib, $ch) = $self->_check_get_children ($node);
455     unshift @nodes, @$sib;
456     push @$new_todos, @$ch;
457     } elsif ($nt == 5) {
458     unshift @nodes, @{$node->child_nodes};
459     }
460     }
461    
462     for (@$new_todos) {
463     $_->{inline} = 1;
464     $_->{strictly_inline} = 1;
465     }
466     return ($new_todos);
467     }; # $HTMLInlineOrStrictlyInlineChecker
468    
469 wakaba 1.6 my $HTMLSignificantInlineOrStrictlyInlineChecker
470     = $HTMLInlineOrStrictlyInlineChecker;
471     ## TODO: check significant content
472    
473 wakaba 1.1 my $HTMLBlockOrInlineChecker = sub {
474 wakaba 1.4 my ($self, $todo) = @_;
475     my $el = $todo->{node};
476     my $new_todos = [];
477 wakaba 1.1 my @nodes = (@{$el->child_nodes});
478    
479     my $content = 'block-or-inline'; # or 'block' or 'inline'
480     my @block_not_inline;
481     while (@nodes) {
482     my $node = shift @nodes;
483 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
484    
485 wakaba 1.1 my $nt = $node->node_type;
486     if ($nt == 1) {
487 wakaba 1.2 my $node_ns = $node->namespace_uri;
488     $node_ns = '' unless defined $node_ns;
489     my $node_ln = $node->manakai_local_name;
490 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
491 wakaba 1.1 if ($content eq 'block') {
492 wakaba 1.7 $not_allowed = 1
493     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
494 wakaba 1.1 } elsif ($content eq 'inline') {
495 wakaba 1.7 $not_allowed = 1
496     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
497     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
498 wakaba 1.1 } else {
499 wakaba 1.7 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
500     my $is_inline
501     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
502     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
503 wakaba 1.1
504 wakaba 1.6 push @block_not_inline, $node
505     if $is_block and not $is_inline and not $not_allowed;
506 wakaba 1.1 unless ($is_block) {
507     $content = 'inline';
508     for (@block_not_inline) {
509 wakaba 1.2 $self->{onerror}->(node => $_, type => 'element not allowed');
510 wakaba 1.1 }
511 wakaba 1.6 $not_allowed = 1 unless $is_inline;
512 wakaba 1.1 }
513     }
514 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
515     if $not_allowed;
516 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
517     unshift @nodes, @$sib;
518 wakaba 1.4 push @$new_todos, @$ch;
519 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
520     if ($node->data =~ /[^\x09-\x0D\x20]/) {
521     if ($content eq 'block') {
522 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
523 wakaba 1.1 } else {
524     $content = 'inline';
525     for (@block_not_inline) {
526 wakaba 1.2 $self->{onerror}->(node => $_, type => 'element not allowed');
527 wakaba 1.1 }
528     }
529     }
530     } elsif ($nt == 5) {
531     unshift @nodes, @{$node->child_nodes};
532     }
533     }
534 wakaba 1.4
535     if ($content eq 'inline') {
536     for (@$new_todos) {
537     $_->{inline} = 1;
538     }
539     }
540     return ($new_todos);
541 wakaba 1.1 };
542    
543 wakaba 1.2 ## Zero or more XXX element, then either block-level or inline-level
544     my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) {
545     my ($elnsuri, $ellname) = @_;
546     return sub {
547 wakaba 1.4 my ($self, $todo) = @_;
548     my $el = $todo->{node};
549     my $new_todos = [];
550 wakaba 1.2 my @nodes = (@{$el->child_nodes});
551    
552     my $has_non_style;
553     my $content = 'block-or-inline'; # or 'block' or 'inline'
554     my @block_not_inline;
555     while (@nodes) {
556     my $node = shift @nodes;
557     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
558    
559     my $nt = $node->node_type;
560     if ($nt == 1) {
561     my $node_ns = $node->namespace_uri;
562     $node_ns = '' unless defined $node_ns;
563     my $node_ln = $node->manakai_local_name;
564 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
565 wakaba 1.8 if ($node_ns eq $elnsuri and $node_ln eq $ellname) {
566 wakaba 1.6 $not_allowed = 1 if $has_non_style;
567 wakaba 1.2 } elsif ($content eq 'block') {
568     $has_non_style = 1;
569 wakaba 1.7 $not_allowed = 1
570     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
571 wakaba 1.2 } elsif ($content eq 'inline') {
572     $has_non_style = 1;
573 wakaba 1.7 $not_allowed = 1
574     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
575     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
576 wakaba 1.2 } else {
577     $has_non_style = 1;
578 wakaba 1.7 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
579     my $is_inline
580     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
581     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
582 wakaba 1.2
583 wakaba 1.6 push @block_not_inline, $node
584     if $is_block and not $is_inline and not $not_allowed;
585 wakaba 1.2 unless ($is_block) {
586     $content = 'inline';
587     for (@block_not_inline) {
588     $self->{onerror}->(node => $_, type => 'element not allowed');
589     }
590 wakaba 1.6 $not_allowed = 1 unless $is_inline;
591 wakaba 1.1 }
592     }
593 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
594     if $not_allowed;
595 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
596     unshift @nodes, @$sib;
597 wakaba 1.4 push @$new_todos, @$ch;
598 wakaba 1.2 } elsif ($nt == 3 or $nt == 4) {
599     if ($node->data =~ /[^\x09-\x0D\x20]/) {
600     $has_non_style = 1;
601     if ($content eq 'block') {
602     $self->{onerror}->(node => $node, type => 'character not allowed');
603     } else {
604     $content = 'inline';
605     for (@block_not_inline) {
606     $self->{onerror}->(node => $_, type => 'element not allowed');
607     }
608 wakaba 1.1 }
609     }
610 wakaba 1.2 } elsif ($nt == 5) {
611     unshift @nodes, @{$node->child_nodes};
612 wakaba 1.1 }
613     }
614 wakaba 1.4
615     if ($content eq 'inline') {
616     for (@$new_todos) {
617     $_->{inline} = 1;
618     }
619     }
620     return ($new_todos);
621 wakaba 1.2 };
622     }; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker
623 wakaba 1.1
624     my $HTMLTransparentChecker = $HTMLBlockOrInlineChecker;
625    
626 wakaba 1.10 my $GetHTMLEnumeratedAttrChecker = sub {
627     my $states = shift; # {value => conforming ? 1 : -1}
628     return sub {
629     my ($self, $attr) = @_;
630     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
631     if ($states->{$value} > 0) {
632     #
633     } elsif ($states->{$value}) {
634     $self->{onerror}->(node => $attr,
635     type => 'non-conforming enumerated attribute value');
636     } else {
637     $self->{onerror}->(node => $attr,
638     type => 'invalid enumerated attribute value');
639     }
640     };
641     }; # $GetHTMLEnumeratedAttrChecker
642 wakaba 1.9
643 wakaba 1.10 my $GetHTMLBooleanAttrChecker = sub {
644     my $local_name = shift;
645     return sub {
646     my ($self, $attr) = @_;
647     my $value = $attr->value;
648     unless ($value eq $local_name or $value eq '') {
649     $self->{onerror}->(node => $attr,
650     type => 'invalid boolean attribute value');
651     }
652     };
653     }; # $GetHTMLBooleanAttrChecker
654 wakaba 1.9
655 wakaba 1.10 my $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker = sub {
656     my ($self, $attr) = @_;
657     my %word;
658     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
659     unless ($word{$word}) {
660     $word{$word} = 1;
661 wakaba 1.9 } else {
662 wakaba 1.10 $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
663 wakaba 1.9 }
664     }
665 wakaba 1.10 }; # $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker
666    
667 wakaba 1.20 ## |rel| attribute (unordered set of space separated tokens,
668     ## whose allowed values are defined by the section on link types)
669     my $HTMLLinkTypesAttrChecker = sub {
670     my ($a_or_area, $self, $attr) = @_;
671     my %word;
672     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
673     unless ($word{$word}) {
674     $word{$word} = 1;
675     } else {
676     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
677     }
678     }
679     ## NOTE: Case sensitive match (since HTML5 spec does not say link
680     ## types are case-insensitive and it says "The value should not
681     ## be confusingly similar to any other defined value (e.g.
682     ## differing only in case).").
683     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
684     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
685     ## values to be used conformingly.
686     require Whatpm::_LinkTypeList;
687     our $LinkType;
688     for my $word (keys %word) {
689     my $def = $LinkType->{$word};
690     if (defined $def) {
691     if ($def->{status} eq 'accepted') {
692     if (defined $def->{effect}->[$a_or_area]) {
693     #
694     } else {
695     $self->{onerror}->(node => $attr,
696     type => 'link type bad context:'.$word);
697     }
698     } elsif ($def->{status} eq 'proposal') {
699     $self->{onerror}->(node => $attr,
700     type => 'proposed link type:'.$word);
701     } else { # rejected or synonym
702     $self->{onerror}->(node => $attr,
703     type => 'non-conforming link type:'.$word);
704     }
705     if ($def->{unique}) {
706     unless ($self->{has_link_type}->{$word}) {
707     $self->{has_link_type}->{$word} = 1;
708     } else {
709     $self->{onerror}->(node => $attr,
710     type => 'link with type not unique:'.$word);
711     }
712     }
713     } else {
714     $self->{onerror}->(node => $attr,
715     type => 'link type not supported:'.$word);
716     }
717     }
718     ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
719     ## says that using both X-Pingback: header field and HTML
720     ## <link rel=pingback> is deprecated and if both appears they
721     ## SHOULD contain exactly the same value.
722     ## ISSUE: Pingback 1.0 specification defines the exact representation
723     ## of its link element, which cannot be tested by the current arch.
724     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
725     ## include any string that matches to the pattern for the rel=pingback link,
726     ## which again inpossible to test.
727     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
728     }; # $HTMLLinkTypesAttrChecker
729    
730 wakaba 1.18 ## URI (or IRI)
731 wakaba 1.11 my $HTMLURIAttrChecker = sub {
732     my ($self, $attr) = @_;
733 wakaba 1.15 ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
734 wakaba 1.18 my $value = $attr->value;
735     Whatpm::URIChecker->check_iri_reference ($value, sub {
736     my %opt = @_;
737 wakaba 1.22 $self->{onerror}->(node => $attr,
738     type => 'URI:'.$opt{level}.':'.
739     (defined $opt{position} ? $opt{position} : '').':'.
740     $opt{type});
741 wakaba 1.18 });
742 wakaba 1.11 }; # $HTMLURIAttrChecker
743    
744 wakaba 1.15 ## A space separated list of one or more URIs (or IRIs)
745     my $HTMLSpaceURIsAttrChecker = sub {
746     my ($self, $attr) = @_;
747     ## TODO: URI or IRI check
748     ## ISSUE: Relative references?
749     ## ISSUE: Leading or trailing white spaces are conformant?
750     ## ISSUE: A sequence of white space characters are conformant?
751     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
752     ## NOTE: Duplication seems not an error.
753     }; # $HTMLSpaceURIsAttrChecker
754    
755 wakaba 1.11 my $HTMLIntegerAttrChecker = sub {
756     my ($self, $attr) = @_;
757     my $value = $attr->value;
758     unless ($value =~ /\A-?[0-9]+\z/) {
759 wakaba 1.15 $self->{onerror}->(node => $attr, type => 'integer syntax error');
760 wakaba 1.11 }
761     }; # $HTMLIntegerAttrChecker
762    
763 wakaba 1.12 my $GetHTMLNonNegativeIntegerAttrChecker = sub {
764     my $range_check = shift;
765     return sub {
766     my ($self, $attr) = @_;
767     my $value = $attr->value;
768     if ($value =~ /\A[0-9]+\z/) {
769     unless ($range_check->($value + 0)) {
770     $self->{onerror}->(node => $attr, type => 'out of range');
771     }
772     } else {
773 wakaba 1.15 $self->{onerror}->(node => $attr,
774     type => 'non-negative integer syntax error');
775 wakaba 1.12 }
776     };
777     }; # $GetHTMLNonNegativeIntegerAttrChecker
778    
779 wakaba 1.11 my $GetHTMLFloatingPointNumberAttrChecker = sub {
780     my $range_check = shift;
781     return sub {
782     my ($self, $attr) = @_;
783     my $value = $attr->value;
784     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
785     unless ($range_check->($value + 0)) {
786     $self->{onerror}->(node => $attr, type => 'out of range');
787     }
788     } else {
789 wakaba 1.15 $self->{onerror}->(node => $attr,
790     type => 'floating point number syntax error');
791 wakaba 1.11 }
792     };
793     }; # $GetHTMLFloatingPointNumberAttrChecker
794    
795 wakaba 1.15 ## "A valid MIME type, optionally with parameters. [RFC 2046]"
796     ## ISSUE: RFC 2046 does not define syntax of media types.
797     ## ISSUE: The definition of "a valid MIME type" is unknown.
798     ## Syntactical correctness?
799     my $HTMLIMTAttrChecker = sub {
800     my ($self, $attr) = @_;
801     my $value = $attr->value;
802     ## ISSUE: RFC 2045 Content-Type header field allows insertion
803     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
804     ## ISSUE: RFC 2231 extension? Maybe no.
805     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
806     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
807     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
808 wakaba 1.19 if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
809     my @type = ($1, $2);
810     my $param = $3;
811     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
812     if (defined $2) {
813     push @type, $1 => $2;
814     } else {
815     my $n = $1;
816     my $v = $2;
817     $v =~ s/\\(.)/$1/gs;
818     push @type, $n => $v;
819     }
820     }
821     require Whatpm::IMTChecker;
822     Whatpm::IMTChecker->check_imt (sub {
823     my %opt = @_;
824     $self->{onerror}->(node => $attr,
825     type => 'IMT:'.$opt{level}.':'.$opt{type});
826     }, @type);
827     } else {
828 wakaba 1.15 $self->{onerror}->(node => $attr, type => 'IMT syntax error');
829     }
830     }; # $HTMLIMTAttrChecker
831    
832 wakaba 1.17 my $HTMLLanguageTagAttrChecker = sub {
833     my ($self, $attr) = @_;
834     if ($attr->value eq '') {
835     $self->{onerror}->(node => $attr, type => 'language tag syntax error');
836     }
837     ## TODO: RFC 3066 test
838     ## ISSUE: RFC 4646 (3066bis)?
839     }; # $HTMLLanguageTagAttrChecker
840    
841     ## "A valid media query [MQ]"
842     my $HTMLMQAttrChecker = sub {
843     ## ISSUE: What is "a valid media query"?
844     }; # $HTMLMQAttrChecker
845    
846 wakaba 1.16 my $HTMLEventHandlerAttrChecker = sub {
847     ## TODO: MUST contain valid ECMAScript code matching the
848     ## ECMAScript |FunctionBody| production. [ECMA262]
849     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
850     ## ISSUE: Automatic semicolon insertion does not apply?
851     ## ISSUE: Other script languages?
852     }; # $HTMLEventHandlerAttrChecker
853    
854 wakaba 1.17 my $HTMLUsemapAttrChecker = sub {
855     my ($self, $attr) = @_;
856     ## MUST be a valid hashed ID reference to a |map| element
857     my $value = $attr->value;
858     if ($value =~ s/^#//) {
859     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
860     push @{$self->{usemap}}, [$value => $attr];
861     } else {
862     $self->{onerror}->(node => $attr, type => 'hashed idref syntax error');
863     }
864 wakaba 1.20 ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
865 wakaba 1.17 }; # $HTMLUsemapAttrChecker
866    
867     my $HTMLTargetAttrChecker = sub {
868     my ($self, $attr) = @_;
869     my $value = $attr->value;
870     if ($value =~ /^_/) {
871     $value = lc $value; ## ISSUE: ASCII case-insentitive?
872     unless ({
873     _self => 1, _parent => 1, _top => 1,
874     }->{$value}) {
875     $self->{onerror}->(node => $attr,
876     type => 'reserved browsing context name');
877     }
878     } else {
879     #$ ISSUE: An empty string is conforming?
880     }
881     }; # $HTMLTargetAttrChecker
882    
883 wakaba 1.10 my $HTMLAttrChecker = {
884     id => sub {
885 wakaba 1.17 ## NOTE: |map| has its own variant of |id=""| checker
886 wakaba 1.10 my ($self, $attr) = @_;
887     my $value = $attr->value;
888 wakaba 1.17 if (length $value > 0) {
889 wakaba 1.10 if ($self->{id}->{$value}) {
890     $self->{onerror}->(node => $attr, type => 'duplicate ID');
891     } else {
892     $self->{id}->{$value} = 1;
893     }
894 wakaba 1.26 ## TODO: no space characters <http://html5.org/tools/web-apps-tracker?from=880&to=881>
895 wakaba 1.17 } else {
896     ## NOTE: MUST contain at least one character
897     $self->{onerror}->(node => $attr, type => 'attribute value is empty');
898 wakaba 1.10 }
899     },
900     title => sub {}, ## NOTE: No conformance creteria
901     lang => sub {
902 wakaba 1.17 ## TODO: RFC 3066 or empty test
903 wakaba 1.10 ## ISSUE: RFC 4646 (3066bis)?
904     ## TODO: HTML vs XHTML
905     },
906     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
907     class => $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker,
908     irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
909     ## TODO: tabindex
910     };
911    
912 wakaba 1.16 for (qw/
913     onabort onbeforeunload onblur onchange onclick oncontextmenu
914     ondblclick ondrag ondragend ondragenter ondragleave ondragover
915     ondragstart ondrop onerror onfocus onkeydown onkeypress
916     onkeyup onload onmessage onmousedown onmousemove onmouseout
917     onmouseover onmouseup onmousewheel onresize onscroll onselect
918     onsubmit onunload
919     /) {
920     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
921     }
922    
923 wakaba 1.10 my $GetHTMLAttrsChecker = sub {
924     my $element_specific_checker = shift;
925     return sub {
926     my ($self, $todo) = @_;
927     for my $attr (@{$todo->{node}->attributes}) {
928     my $attr_ns = $attr->namespace_uri;
929     $attr_ns = '' unless defined $attr_ns;
930     my $attr_ln = $attr->manakai_local_name;
931     my $checker;
932     if ($attr_ns eq '') {
933     $checker = $element_specific_checker->{$attr_ln}
934     || $HTMLAttrChecker->{$attr_ln};
935     }
936     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
937     || $AttrChecker->{$attr_ns}->{''};
938     if ($checker) {
939     $checker->($self, $attr);
940     } else {
941     $self->{onerror}->(node => $attr, type => 'attribute not supported');
942     ## ISSUE: No comformance createria for unknown attributes in the spec
943     }
944     }
945     };
946     }; # $GetHTMLAttrsChecker
947 wakaba 1.9
948     $Element->{$HTML_NS}->{''} = {
949 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
950 wakaba 1.9 checker => $ElementDefault->{checker},
951     };
952    
953 wakaba 1.1 $Element->{$HTML_NS}->{html} = {
954 wakaba 1.24 is_root => 1,
955 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({
956     xmlns => sub {
957     my ($self, $attr) = @_;
958     my $value = $attr->value;
959     unless ($value eq $HTML_NS) {
960     $self->{onerror}->(node => $attr, type => 'syntax error');
961     ## TODO: only in HTML documents
962     }
963     },
964     }),
965 wakaba 1.1 checker => sub {
966 wakaba 1.4 my ($self, $todo) = @_;
967     my $el = $todo->{node};
968     my $new_todos = [];
969 wakaba 1.1 my @nodes = (@{$el->child_nodes});
970    
971     my $phase = 'before head';
972     while (@nodes) {
973     my $node = shift @nodes;
974 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
975    
976 wakaba 1.1 my $nt = $node->node_type;
977     if ($nt == 1) {
978 wakaba 1.2 my $node_ns = $node->namespace_uri;
979     $node_ns = '' unless defined $node_ns;
980     my $node_ln = $node->manakai_local_name;
981 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
982 wakaba 1.1 if ($phase eq 'before head') {
983 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'head') {
984 wakaba 1.1 $phase = 'after head';
985 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'body') {
986     $self->{onerror}->(node => $node, type => 'ps element missing:head');
987 wakaba 1.1 $phase = 'after body';
988     } else {
989 wakaba 1.6 $not_allowed = 1;
990 wakaba 1.1 # before head
991     }
992     } elsif ($phase eq 'after head') {
993 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'body') {
994 wakaba 1.1 $phase = 'after body';
995     } else {
996 wakaba 1.6 $not_allowed = 1;
997 wakaba 1.1 # after head
998     }
999     } else { #elsif ($phase eq 'after body') {
1000 wakaba 1.6 $not_allowed = 1;
1001 wakaba 1.1 # after body
1002     }
1003 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
1004     if $not_allowed;
1005 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1006     unshift @nodes, @$sib;
1007 wakaba 1.4 push @$new_todos, @$ch;
1008 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1009     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1010 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1011 wakaba 1.1 }
1012     } elsif ($nt == 5) {
1013     unshift @nodes, @{$node->child_nodes};
1014     }
1015     }
1016 wakaba 1.3
1017     if ($phase eq 'before head') {
1018     $self->{onerror}->(node => $el, type => 'child element missing:head');
1019     $self->{onerror}->(node => $el, type => 'child element missing:body');
1020     } elsif ($phase eq 'after head') {
1021     $self->{onerror}->(node => $el, type => 'child element missing:body');
1022     }
1023    
1024 wakaba 1.4 return ($new_todos);
1025 wakaba 1.1 },
1026     };
1027    
1028     $Element->{$HTML_NS}->{head} = {
1029 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1030 wakaba 1.1 checker => sub {
1031 wakaba 1.4 my ($self, $todo) = @_;
1032     my $el = $todo->{node};
1033     my $new_todos = [];
1034 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1035    
1036     my $has_title;
1037 wakaba 1.3 my $phase = 'initial'; # 'after charset', 'after base'
1038 wakaba 1.1 while (@nodes) {
1039     my $node = shift @nodes;
1040 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1041    
1042 wakaba 1.1 my $nt = $node->node_type;
1043     if ($nt == 1) {
1044 wakaba 1.2 my $node_ns = $node->namespace_uri;
1045     $node_ns = '' unless defined $node_ns;
1046     my $node_ln = $node->manakai_local_name;
1047 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
1048 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'title') {
1049 wakaba 1.3 $phase = 'after base';
1050 wakaba 1.1 unless ($has_title) {
1051     $has_title = 1;
1052     } else {
1053 wakaba 1.6 $not_allowed = 1;
1054 wakaba 1.1 }
1055 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'meta') {
1056 wakaba 1.1 if ($node->has_attribute_ns (undef, 'charset')) {
1057 wakaba 1.3 if ($phase eq 'initial') {
1058     $phase = 'after charset';
1059 wakaba 1.1 } else {
1060 wakaba 1.6 $not_allowed = 1;
1061 wakaba 1.3 ## NOTE: See also |base|'s "contexts" field in the spec
1062 wakaba 1.1 }
1063     } else {
1064 wakaba 1.3 $phase = 'after base';
1065 wakaba 1.1 }
1066 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'base') {
1067 wakaba 1.3 if ($phase eq 'initial' or $phase eq 'after charset') {
1068     $phase = 'after base';
1069 wakaba 1.1 } else {
1070 wakaba 1.6 $not_allowed = 1;
1071 wakaba 1.1 }
1072 wakaba 1.7 } elsif ($HTMLMetadataElements->{$node_ns}->{$node_ln}) {
1073     $phase = 'after base';
1074 wakaba 1.1 } else {
1075 wakaba 1.7 $not_allowed = 1;
1076 wakaba 1.1 }
1077 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
1078     if $not_allowed;
1079 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1080     unshift @nodes, @$sib;
1081 wakaba 1.4 push @$new_todos, @$ch;
1082 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1083     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1084 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1085 wakaba 1.1 }
1086     } elsif ($nt == 5) {
1087     unshift @nodes, @{$node->child_nodes};
1088     }
1089     }
1090     unless ($has_title) {
1091 wakaba 1.3 $self->{onerror}->(node => $el, type => 'child element missing:title');
1092 wakaba 1.1 }
1093 wakaba 1.4 return ($new_todos);
1094 wakaba 1.1 },
1095     };
1096    
1097     $Element->{$HTML_NS}->{title} = {
1098 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1099 wakaba 1.1 checker => $HTMLTextChecker,
1100     };
1101    
1102     $Element->{$HTML_NS}->{base} = {
1103 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({
1104 wakaba 1.11 href => $HTMLURIAttrChecker,
1105 wakaba 1.17 target => $HTMLTargetAttrChecker,
1106 wakaba 1.10 }),
1107 wakaba 1.1 checker => $HTMLEmptyChecker,
1108     };
1109    
1110     $Element->{$HTML_NS}->{link} = {
1111 wakaba 1.16 attrs_checker => sub {
1112     my ($self, $todo) = @_;
1113     $GetHTMLAttrsChecker->({
1114     href => $HTMLURIAttrChecker,
1115 wakaba 1.20 rel => sub { $HTMLLinkTypesAttrChecker->(0, @_) },
1116 wakaba 1.17 media => $HTMLMQAttrChecker,
1117     hreflang => $HTMLLanguageTagAttrChecker,
1118 wakaba 1.16 type => $HTMLIMTAttrChecker,
1119     ## NOTE: Though |title| has special semantics,
1120     ## syntactically same as the |title| as global attribute.
1121     })->($self, $todo);
1122     unless ($todo->{node}->has_attribute_ns (undef, 'href')) {
1123     $self->{onerror}->(node => $todo->{node},
1124     type => 'attribute missing:href');
1125     }
1126     unless ($todo->{node}->has_attribute_ns (undef, 'rel')) {
1127     $self->{onerror}->(node => $todo->{node},
1128     type => 'attribute missing:rel');
1129     }
1130     },
1131 wakaba 1.1 checker => $HTMLEmptyChecker,
1132     };
1133    
1134     $Element->{$HTML_NS}->{meta} = {
1135 wakaba 1.10 attrs_checker => sub {
1136     my ($self, $todo) = @_;
1137     my $name_attr;
1138     my $http_equiv_attr;
1139     my $charset_attr;
1140     my $content_attr;
1141     for my $attr (@{$todo->{node}->attributes}) {
1142     my $attr_ns = $attr->namespace_uri;
1143     $attr_ns = '' unless defined $attr_ns;
1144     my $attr_ln = $attr->manakai_local_name;
1145     my $checker;
1146     if ($attr_ns eq '') {
1147     if ($attr_ln eq 'content') {
1148     $content_attr = $attr;
1149     $checker = 1;
1150     } elsif ($attr_ln eq 'name') {
1151     $name_attr = $attr;
1152     $checker = 1;
1153     } elsif ($attr_ln eq 'http-equiv') {
1154     $http_equiv_attr = $attr;
1155     $checker = 1;
1156     } elsif ($attr_ln eq 'charset') {
1157     $charset_attr = $attr;
1158     $checker = 1;
1159     } else {
1160     $checker = $HTMLAttrChecker->{$attr_ln}
1161     || $AttrChecker->{$attr_ns}->{$attr_ln}
1162     || $AttrChecker->{$attr_ns}->{''};
1163     }
1164     } else {
1165     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1166     || $AttrChecker->{$attr_ns}->{''};
1167     }
1168     if ($checker) {
1169     $checker->($self, $attr) if ref $checker;
1170     } else {
1171     $self->{onerror}->(node => $attr, type => 'attribute not supported');
1172     ## ISSUE: No comformance createria for unknown attributes in the spec
1173     }
1174     }
1175    
1176     if (defined $name_attr) {
1177     if (defined $http_equiv_attr) {
1178     $self->{onerror}->(node => $http_equiv_attr,
1179     type => 'attribute not allowed');
1180     } elsif (defined $charset_attr) {
1181     $self->{onerror}->(node => $charset_attr,
1182     type => 'attribute not allowed');
1183     }
1184     my $metadata_name = $name_attr->value;
1185     my $metadata_value;
1186     if (defined $content_attr) {
1187     $metadata_value = $content_attr->value;
1188     } else {
1189     $self->{onerror}->(node => $todo->{node},
1190     type => 'attribute missing:content');
1191     $metadata_value = '';
1192     }
1193     } elsif (defined $http_equiv_attr) {
1194     if (defined $charset_attr) {
1195     $self->{onerror}->(node => $charset_attr,
1196     type => 'attribute not allowed');
1197     }
1198     unless (defined $content_attr) {
1199     $self->{onerror}->(node => $todo->{node},
1200     type => 'attribute missing:content');
1201     }
1202     } elsif (defined $charset_attr) {
1203     if (defined $content_attr) {
1204     $self->{onerror}->(node => $content_attr,
1205     type => 'attribute not allowed');
1206     }
1207     ## TODO: Allowed only in HTML documents
1208     } else {
1209     if (defined $content_attr) {
1210     $self->{onerror}->(node => $content_attr,
1211     type => 'attribute not allowed');
1212     $self->{onerror}->(node => $todo->{node},
1213     type => 'attribute missing:name|http-equiv');
1214     } else {
1215     $self->{onerror}->(node => $todo->{node},
1216     type => 'attribute missing:name|http-equiv|charset');
1217     }
1218     }
1219    
1220     ## TODO: metadata conformance
1221    
1222     ## TODO: pragma conformance
1223     if (defined $http_equiv_attr) { ## An enumerated attribute
1224     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1225     if ({
1226     'refresh' => 1,
1227     'default-style' => 1,
1228     }->{$keyword}) {
1229     #
1230     } else {
1231     $self->{onerror}->(node => $http_equiv_attr,
1232     type => 'invalid enumerated attribute value');
1233     }
1234     }
1235    
1236     ## TODO: charset
1237     },
1238 wakaba 1.1 checker => $HTMLEmptyChecker,
1239     };
1240    
1241     ## NOTE: |html:style| has no conformance creteria on content model
1242 wakaba 1.3 $Element->{$HTML_NS}->{style} = {
1243 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({
1244 wakaba 1.15 type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1245 wakaba 1.17 media => $HTMLMQAttrChecker,
1246 wakaba 1.10 scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1247     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1248     ## not different
1249     }),
1250 wakaba 1.3 checker => $AnyChecker,
1251     };
1252 wakaba 1.1
1253     $Element->{$HTML_NS}->{body} = {
1254 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1255 wakaba 1.1 checker => $HTMLBlockChecker,
1256     };
1257    
1258     $Element->{$HTML_NS}->{section} = {
1259 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1260 wakaba 1.1 checker => $HTMLStylableBlockChecker,
1261     };
1262    
1263     $Element->{$HTML_NS}->{nav} = {
1264 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1265 wakaba 1.1 checker => $HTMLBlockOrInlineChecker,
1266     };
1267    
1268     $Element->{$HTML_NS}->{article} = {
1269 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1270 wakaba 1.1 checker => $HTMLStylableBlockChecker,
1271     };
1272    
1273     $Element->{$HTML_NS}->{blockquote} = {
1274 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({
1275     cite => $HTMLURIAttrChecker,
1276     }),
1277 wakaba 1.1 checker => $HTMLBlockChecker,
1278     };
1279    
1280     $Element->{$HTML_NS}->{aside} = {
1281 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1282 wakaba 1.2 checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
1283 wakaba 1.1 };
1284    
1285     $Element->{$HTML_NS}->{h1} = {
1286 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1287 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
1288     };
1289    
1290     $Element->{$HTML_NS}->{h2} = {
1291 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1292 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
1293     };
1294    
1295     $Element->{$HTML_NS}->{h3} = {
1296 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1297 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
1298     };
1299    
1300     $Element->{$HTML_NS}->{h4} = {
1301 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1302 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
1303     };
1304    
1305     $Element->{$HTML_NS}->{h5} = {
1306 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1307 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
1308     };
1309    
1310     $Element->{$HTML_NS}->{h6} = {
1311 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1312 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
1313     };
1314    
1315     ## TODO: header
1316    
1317 wakaba 1.2 $Element->{$HTML_NS}->{footer} = {
1318 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1319 wakaba 1.2 checker => sub { ## block -hn -header -footer -sectioning or inline
1320 wakaba 1.4 my ($self, $todo) = @_;
1321     my $el = $todo->{node};
1322     my $new_todos = [];
1323 wakaba 1.2 my @nodes = (@{$el->child_nodes});
1324    
1325     my $content = 'block-or-inline'; # or 'block' or 'inline'
1326     my @block_not_inline;
1327     while (@nodes) {
1328     my $node = shift @nodes;
1329     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1330    
1331     my $nt = $node->node_type;
1332     if ($nt == 1) {
1333     my $node_ns = $node->namespace_uri;
1334     $node_ns = '' unless defined $node_ns;
1335     my $node_ln = $node->manakai_local_name;
1336 wakaba 1.6 my $not_allowed;
1337 wakaba 1.2 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
1338 wakaba 1.6 $not_allowed = 1;
1339 wakaba 1.2 } elsif ($node_ns eq $HTML_NS and
1340     {
1341     qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1/
1342     }->{$node_ln}) {
1343 wakaba 1.6 $not_allowed = 1;
1344 wakaba 1.2 } elsif ($HTMLSectioningElements->{$node_ns}->{$node_ln}) {
1345 wakaba 1.6 $not_allowed = 1;
1346 wakaba 1.2 }
1347     if ($content eq 'block') {
1348 wakaba 1.7 $not_allowed = 1
1349     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1350 wakaba 1.2 } elsif ($content eq 'inline') {
1351 wakaba 1.7 $not_allowed = 1
1352     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
1353     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1354 wakaba 1.2 } else {
1355 wakaba 1.7 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1356     my $is_inline
1357     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
1358     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1359 wakaba 1.2
1360 wakaba 1.6 push @block_not_inline, $node
1361     if $is_block and not $is_inline and not $not_allowed;
1362 wakaba 1.2 unless ($is_block) {
1363     $content = 'inline';
1364     for (@block_not_inline) {
1365     $self->{onerror}->(node => $_, type => 'element not allowed');
1366     }
1367 wakaba 1.6 $not_allowed = 1 unless $is_inline;
1368 wakaba 1.2 }
1369     }
1370 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
1371     if $not_allowed;
1372 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1373     unshift @nodes, @$sib;
1374 wakaba 1.4 push @$new_todos, @$ch;
1375 wakaba 1.2 } elsif ($nt == 3 or $nt == 4) {
1376     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1377     if ($content eq 'block') {
1378     $self->{onerror}->(node => $node, type => 'character not allowed');
1379     } else {
1380     $content = 'inline';
1381     for (@block_not_inline) {
1382     $self->{onerror}->(node => $_, type => 'element not allowed');
1383     }
1384     }
1385     }
1386     } elsif ($nt == 5) {
1387     unshift @nodes, @{$node->child_nodes};
1388     }
1389     }
1390    
1391     my $end = $self->_add_minuses
1392     ({$HTML_NS => {qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1/}},
1393     $HTMLSectioningElements);
1394 wakaba 1.4 push @$new_todos, $end;
1395 wakaba 1.2
1396 wakaba 1.4 if ($content eq 'inline') {
1397     for (@$new_todos) {
1398     $_->{inline} = 1;
1399     }
1400     }
1401    
1402     return ($new_todos);
1403 wakaba 1.2 },
1404     };
1405 wakaba 1.1
1406     $Element->{$HTML_NS}->{address} = {
1407 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1408 wakaba 1.1 checker => $HTMLInlineChecker,
1409     };
1410    
1411     $Element->{$HTML_NS}->{p} = {
1412 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1413 wakaba 1.1 checker => $HTMLSignificantInlineChecker,
1414     };
1415    
1416     $Element->{$HTML_NS}->{hr} = {
1417 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1418 wakaba 1.1 checker => $HTMLEmptyChecker,
1419     };
1420    
1421     $Element->{$HTML_NS}->{br} = {
1422 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1423 wakaba 1.1 checker => $HTMLEmptyChecker,
1424     };
1425    
1426     $Element->{$HTML_NS}->{dialog} = {
1427 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1428 wakaba 1.1 checker => sub {
1429 wakaba 1.4 my ($self, $todo) = @_;
1430     my $el = $todo->{node};
1431     my $new_todos = [];
1432 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1433    
1434     my $phase = 'before dt';
1435     while (@nodes) {
1436     my $node = shift @nodes;
1437 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1438    
1439 wakaba 1.1 my $nt = $node->node_type;
1440     if ($nt == 1) {
1441 wakaba 1.8 my $node_ns = $node->namespace_uri;
1442     $node_ns = '' unless defined $node_ns;
1443     my $node_ln = $node->manakai_local_name;
1444 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1445 wakaba 1.1 if ($phase eq 'before dt') {
1446 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1447 wakaba 1.1 $phase = 'before dd';
1448 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1449 wakaba 1.2 $self->{onerror}
1450 wakaba 1.3 ->(node => $node, type => 'ps element missing:dt');
1451 wakaba 1.1 $phase = 'before dt';
1452     } else {
1453 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1454 wakaba 1.1 }
1455     } else { # before dd
1456 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1457 wakaba 1.1 $phase = 'before dt';
1458 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1459 wakaba 1.2 $self->{onerror}
1460 wakaba 1.3 ->(node => $node, type => 'ps element missing:dd');
1461 wakaba 1.1 $phase = 'before dd';
1462     } else {
1463 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1464 wakaba 1.1 }
1465     }
1466 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1467     unshift @nodes, @$sib;
1468 wakaba 1.4 push @$new_todos, @$ch;
1469 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1470     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1471 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1472 wakaba 1.1 }
1473     } elsif ($nt == 5) {
1474     unshift @nodes, @{$node->child_nodes};
1475     }
1476     }
1477     if ($phase eq 'before dd') {
1478 wakaba 1.3 $self->{onerror}->(node => $el, type => 'ps element missing:dd');
1479 wakaba 1.1 }
1480 wakaba 1.4 return ($new_todos);
1481 wakaba 1.1 },
1482     };
1483    
1484     $Element->{$HTML_NS}->{pre} = {
1485 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1486 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1487     };
1488    
1489     $Element->{$HTML_NS}->{ol} = {
1490 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({
1491     start => $HTMLIntegerAttrChecker,
1492     }),
1493 wakaba 1.1 checker => sub {
1494 wakaba 1.4 my ($self, $todo) = @_;
1495     my $el = $todo->{node};
1496     my $new_todos = [];
1497 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1498    
1499     while (@nodes) {
1500     my $node = shift @nodes;
1501 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1502    
1503 wakaba 1.1 my $nt = $node->node_type;
1504     if ($nt == 1) {
1505 wakaba 1.8 my $node_ns = $node->namespace_uri;
1506     $node_ns = '' unless defined $node_ns;
1507     my $node_ln = $node->manakai_local_name;
1508 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1509 wakaba 1.8 unless ($node_ns eq $HTML_NS and $node_ln eq 'li') {
1510 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1511 wakaba 1.1 }
1512 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1513     unshift @nodes, @$sib;
1514 wakaba 1.4 push @$new_todos, @$ch;
1515 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1516     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1517 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1518 wakaba 1.1 }
1519     } elsif ($nt == 5) {
1520     unshift @nodes, @{$node->child_nodes};
1521     }
1522     }
1523 wakaba 1.4
1524     if ($todo->{inline}) {
1525     for (@$new_todos) {
1526     $_->{inline} = 1;
1527     }
1528     }
1529     return ($new_todos);
1530 wakaba 1.1 },
1531     };
1532    
1533     $Element->{$HTML_NS}->{ul} = {
1534 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1535 wakaba 1.1 checker => $Element->{$HTML_NS}->{ol}->{checker},
1536     };
1537    
1538 wakaba 1.5
1539     $Element->{$HTML_NS}->{li} = {
1540 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({
1541     start => sub {
1542     my ($self, $attr) = @_;
1543     my $parent = $attr->owner_element->manakai_parent_element;
1544     if (defined $parent) {
1545     my $parent_ns = $parent->namespace_uri;
1546     $parent_ns = '' unless defined $parent_ns;
1547     my $parent_ln = $parent->manakai_local_name;
1548     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1549     $self->{onerror}->(node => $attr, type => 'attribute not supported');
1550     }
1551     }
1552     $HTMLIntegerAttrChecker->($self, $attr);
1553     },
1554     }),
1555 wakaba 1.5 checker => sub {
1556     my ($self, $todo) = @_;
1557     if ($todo->{inline}) {
1558     return $HTMLInlineChecker->($self, $todo);
1559     } else {
1560     return $HTMLBlockOrInlineChecker->($self, $todo);
1561     }
1562     },
1563     };
1564 wakaba 1.1
1565     $Element->{$HTML_NS}->{dl} = {
1566 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1567 wakaba 1.1 checker => sub {
1568 wakaba 1.4 my ($self, $todo) = @_;
1569     my $el = $todo->{node};
1570     my $new_todos = [];
1571 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1572    
1573     my $phase = 'before dt';
1574     while (@nodes) {
1575     my $node = shift @nodes;
1576 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1577    
1578 wakaba 1.1 my $nt = $node->node_type;
1579     if ($nt == 1) {
1580 wakaba 1.8 my $node_ns = $node->namespace_uri;
1581     $node_ns = '' unless defined $node_ns;
1582     my $node_ln = $node->manakai_local_name;
1583 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1584 wakaba 1.1 if ($phase eq 'in dds') {
1585 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1586 wakaba 1.1 #$phase = 'in dds';
1587 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1588 wakaba 1.1 $phase = 'in dts';
1589     } else {
1590 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1591 wakaba 1.1 }
1592     } elsif ($phase eq 'in dts') {
1593 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1594 wakaba 1.1 #$phase = 'in dts';
1595 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1596 wakaba 1.1 $phase = 'in dds';
1597     } else {
1598 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1599 wakaba 1.1 }
1600     } else { # before dt
1601 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1602 wakaba 1.1 $phase = 'in dts';
1603 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1604 wakaba 1.2 $self->{onerror}
1605 wakaba 1.3 ->(node => $node, type => 'ps element missing:dt');
1606 wakaba 1.1 $phase = 'in dds';
1607     } else {
1608 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1609 wakaba 1.1 }
1610     }
1611 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1612     unshift @nodes, @$sib;
1613 wakaba 1.4 push @$new_todos, @$ch;
1614 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1615     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1616 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1617 wakaba 1.1 }
1618     } elsif ($nt == 5) {
1619     unshift @nodes, @{$node->child_nodes};
1620     }
1621     }
1622     if ($phase eq 'in dts') {
1623 wakaba 1.3 $self->{onerror}->(node => $el, type => 'ps element missing:dd');
1624 wakaba 1.1 }
1625 wakaba 1.4
1626     if ($todo->{inline}) {
1627     for (@$new_todos) {
1628     $_->{inline} = 1;
1629     }
1630     }
1631     return ($new_todos);
1632 wakaba 1.1 },
1633     };
1634    
1635     $Element->{$HTML_NS}->{dt} = {
1636 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1637 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1638     };
1639    
1640 wakaba 1.4 $Element->{$HTML_NS}->{dd} = {
1641 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1642 wakaba 1.5 checker => $Element->{$HTML_NS}->{li}->{checker},
1643 wakaba 1.4 };
1644 wakaba 1.1
1645 wakaba 1.6 $Element->{$HTML_NS}->{a} = {
1646 wakaba 1.17 attrs_checker => sub {
1647     my ($self, $todo) = @_;
1648 wakaba 1.15 my %attr;
1649 wakaba 1.17 for my $attr (@{$todo->{node}->attributes}) {
1650     my $attr_ns = $attr->namespace_uri;
1651     $attr_ns = '' unless defined $attr_ns;
1652     my $attr_ln = $attr->manakai_local_name;
1653     my $checker;
1654     if ($attr_ns eq '') {
1655     $checker = {
1656     target => $HTMLTargetAttrChecker,
1657     href => $HTMLURIAttrChecker,
1658     ping => $HTMLSpaceURIsAttrChecker,
1659 wakaba 1.20 rel => sub { $HTMLLinkTypesAttrChecker->(1, @_) },
1660 wakaba 1.17 media => $HTMLMQAttrChecker,
1661     hreflang => $HTMLLanguageTagAttrChecker,
1662     type => $HTMLIMTAttrChecker,
1663     }->{$attr_ln};
1664     if ($checker) {
1665     $attr{$attr_ln} = $attr;
1666     } else {
1667     $checker = $HTMLAttrChecker->{$attr_ln};
1668     }
1669     }
1670     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1671     || $AttrChecker->{$attr_ns}->{''};
1672     if ($checker) {
1673     $checker->($self, $attr) if ref $checker;
1674     } else {
1675     $self->{onerror}->(node => $attr, type => 'attribute not supported');
1676     ## ISSUE: No comformance createria for unknown attributes in the spec
1677     }
1678     }
1679    
1680     unless (defined $attr{href}) {
1681     for (qw/target ping rel media hreflang type/) {
1682     if (defined $attr{$_}) {
1683     $self->{onerror}->(node => $attr{$_},
1684     type => 'attribute not allowed');
1685 wakaba 1.15 }
1686     }
1687 wakaba 1.17 }
1688 wakaba 1.15 },
1689 wakaba 1.6 checker => sub {
1690     my ($self, $todo) = @_;
1691    
1692     my $end = $self->_add_minuses ($HTMLInteractiveElements);
1693     my ($sib, $ch)
1694     = $HTMLSignificantInlineOrStrictlyInlineChecker->($self, $todo);
1695     push @$sib, $end;
1696     return ($sib, $ch);
1697     },
1698     };
1699 wakaba 1.1
1700 wakaba 1.4 $Element->{$HTML_NS}->{q} = {
1701 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({
1702     cite => $HTMLURIAttrChecker,
1703     }),
1704 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1705     };
1706 wakaba 1.1
1707     $Element->{$HTML_NS}->{cite} = {
1708 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1709 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1710     };
1711    
1712 wakaba 1.4 $Element->{$HTML_NS}->{em} = {
1713 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1714 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1715     };
1716    
1717     $Element->{$HTML_NS}->{strong} = {
1718 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1719 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1720     };
1721 wakaba 1.1
1722 wakaba 1.4 $Element->{$HTML_NS}->{small} = {
1723 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1724 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1725     };
1726    
1727     $Element->{$HTML_NS}->{m} = {
1728 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1729 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1730     };
1731    
1732 wakaba 1.11 $Element->{$HTML_NS}->{dfn} = { ## TODO: term duplication
1733 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1734 wakaba 1.4 checker => sub {
1735     my ($self, $todo) = @_;
1736    
1737     my $end = $self->_add_minuses ({$HTML_NS => {dfn => 1}});
1738     my ($sib, $ch) = $HTMLStrictlyInlineChecker->($self, $todo);
1739     push @$sib, $end;
1740     return ($sib, $ch);
1741     },
1742     };
1743 wakaba 1.1
1744     $Element->{$HTML_NS}->{abbr} = {
1745 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({
1746     ## NOTE: |title| has special semantics for |abbr|s, but is syntactically
1747     ## not different. The spec says that the |title| MAY be omitted
1748     ## if there is a |dfn| whose defining term is the abbreviation,
1749     ## but it does not prohibit |abbr| w/o |title| in other cases.
1750     }),
1751 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1752     };
1753    
1754 wakaba 1.11 $Element->{$HTML_NS}->{time} = { ## TODO: validate content
1755     attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO: datetime
1756 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1757     };
1758    
1759 wakaba 1.11 $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
1760     attrs_checker => $GetHTMLAttrsChecker->({
1761     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1762     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1763     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1764     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1765     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1766     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1767     }),
1768 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1769     };
1770    
1771 wakaba 1.11 $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
1772     attrs_checker => $GetHTMLAttrsChecker->({
1773     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
1774     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
1775     }),
1776 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1777     };
1778    
1779 wakaba 1.4 $Element->{$HTML_NS}->{code} = {
1780 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1781 wakaba 1.12 ## NOTE: Though |title| has special semantics,
1782     ## syntatically same as the |title| as global attribute.
1783 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1784     };
1785 wakaba 1.1
1786     $Element->{$HTML_NS}->{var} = {
1787 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1788 wakaba 1.12 ## NOTE: Though |title| has special semantics,
1789     ## syntatically same as the |title| as global attribute.
1790 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1791     };
1792    
1793 wakaba 1.4 $Element->{$HTML_NS}->{samp} = {
1794 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1795 wakaba 1.12 ## NOTE: Though |title| has special semantics,
1796     ## syntatically same as the |title| as global attribute.
1797 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1798     };
1799 wakaba 1.1
1800     $Element->{$HTML_NS}->{kbd} = {
1801 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1802 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1803     };
1804    
1805     $Element->{$HTML_NS}->{sub} = {
1806 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1807 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1808     };
1809    
1810     $Element->{$HTML_NS}->{sup} = {
1811 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1812 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1813     };
1814    
1815 wakaba 1.4 $Element->{$HTML_NS}->{span} = {
1816 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1817 wakaba 1.12 ## NOTE: Though |title| has special semantics,
1818     ## syntatically same as the |title| as global attribute.
1819 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1820     };
1821 wakaba 1.1
1822     $Element->{$HTML_NS}->{i} = {
1823 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1824 wakaba 1.12 ## NOTE: Though |title| has special semantics,
1825     ## syntatically same as the |title| as global attribute.
1826 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1827     };
1828    
1829     $Element->{$HTML_NS}->{b} = {
1830 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1831 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1832     };
1833    
1834     $Element->{$HTML_NS}->{bdo} = {
1835 wakaba 1.12 attrs_checker => sub {
1836     my ($self, $todo) = @_;
1837     $GetHTMLAttrsChecker->({})->($self, $todo);
1838     unless ($todo->{node}->has_attribute_ns (undef, 'dir')) {
1839     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:dir');
1840     }
1841     },
1842     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
1843 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1844     };
1845    
1846     $Element->{$HTML_NS}->{ins} = {
1847 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
1848     cite => $HTMLURIAttrChecker,
1849     ## TODO: datetime
1850     }),
1851 wakaba 1.1 checker => $HTMLTransparentChecker,
1852     };
1853    
1854     $Element->{$HTML_NS}->{del} = {
1855 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
1856     cite => $HTMLURIAttrChecker,
1857     ## TODO: datetime
1858     }),
1859 wakaba 1.1 checker => sub {
1860 wakaba 1.4 my ($self, $todo) = @_;
1861 wakaba 1.1
1862 wakaba 1.4 my $parent = $todo->{node}->manakai_parent_element;
1863 wakaba 1.1 if (defined $parent) {
1864     my $nsuri = $parent->namespace_uri;
1865     $nsuri = '' unless defined $nsuri;
1866     my $ln = $parent->manakai_local_name;
1867     my $eldef = $Element->{$nsuri}->{$ln} ||
1868     $Element->{$nsuri}->{''} ||
1869     $ElementDefault;
1870 wakaba 1.4 return $eldef->{checker}->($self, $todo);
1871 wakaba 1.1 } else {
1872 wakaba 1.4 return $HTMLBlockOrInlineChecker->($self, $todo);
1873 wakaba 1.1 }
1874     },
1875     };
1876    
1877     ## TODO: figure
1878    
1879     $Element->{$HTML_NS}->{img} = {
1880 wakaba 1.17 attrs_checker => sub {
1881     my ($self, $todo) = @_;
1882     $GetHTMLAttrsChecker->({
1883     alt => sub { }, ## NOTE: No syntactical requirement
1884     src => $HTMLURIAttrChecker,
1885     usemap => $HTMLUsemapAttrChecker,
1886     ismap => $GetHTMLBooleanAttrChecker->('ismap'), ## TODO: MUST ancestor <a>
1887     ## TODO: height
1888     ## TODO: width
1889     })->($self, $todo);
1890     unless ($todo->{node}->has_attribute_ns (undef, 'alt')) {
1891     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:alt');
1892     }
1893     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
1894     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:src');
1895     }
1896     },
1897 wakaba 1.1 checker => $HTMLEmptyChecker,
1898     };
1899    
1900     $Element->{$HTML_NS}->{iframe} = {
1901 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
1902     src => $HTMLURIAttrChecker,
1903     }),
1904 wakaba 1.1 checker => $HTMLTextChecker,
1905     };
1906    
1907     $Element->{$HTML_NS}->{embed} = {
1908 wakaba 1.16 attrs_checker => sub {
1909     my ($self, $todo) = @_;
1910     my $has_src;
1911     for my $attr (@{$todo->{node}->attributes}) {
1912     my $attr_ns = $attr->namespace_uri;
1913     $attr_ns = '' unless defined $attr_ns;
1914     my $attr_ln = $attr->manakai_local_name;
1915     my $checker;
1916     if ($attr_ns eq '') {
1917     if ($attr_ln eq 'src') {
1918     $checker = $HTMLURIAttrChecker;
1919     $has_src = 1;
1920     } elsif ($attr_ln eq 'type') {
1921     $checker = $HTMLIMTAttrChecker;
1922     } else {
1923     ## TODO: height
1924     ## TODO: width
1925     $checker = $HTMLAttrChecker->{$attr_ln}
1926     || sub { }; ## NOTE: Any local attribute is ok.
1927     }
1928     }
1929     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1930     || $AttrChecker->{$attr_ns}->{''};
1931     if ($checker) {
1932     $checker->($self, $attr);
1933     } else {
1934     $self->{onerror}->(node => $attr, type => 'attribute not supported');
1935     ## ISSUE: No comformance createria for global attributes in the spec
1936     }
1937     }
1938    
1939     unless ($has_src) {
1940     $self->{onerror}->(node => $todo->{node},
1941     type => 'attribute missing:src');
1942     }
1943     },
1944 wakaba 1.1 checker => $HTMLEmptyChecker,
1945     };
1946    
1947 wakaba 1.15 $Element->{$HTML_NS}->{object} = {
1948 wakaba 1.17 attrs_checker => sub {
1949     my ($self, $todo) = @_;
1950     $GetHTMLAttrsChecker->({
1951     data => $HTMLURIAttrChecker,
1952     type => $HTMLIMTAttrChecker,
1953     usemap => $HTMLUsemapAttrChecker,
1954     ## TODO: width
1955     ## TODO: height
1956     })->($self, $todo);
1957     unless ($todo->{node}->has_attribute_ns (undef, 'data')) {
1958     unless ($todo->{node}->has_attribute_ns (undef, 'type')) {
1959     $self->{onerror}->(node => $todo->{node},
1960     type => 'attribute missing:data|type');
1961     }
1962     }
1963     },
1964 wakaba 1.15 checker => $ElementDefault->{checker}, ## TODO
1965     };
1966    
1967 wakaba 1.1 $Element->{$HTML_NS}->{param} = {
1968 wakaba 1.12 attrs_checker => sub {
1969     my ($self, $todo) = @_;
1970     $GetHTMLAttrsChecker->({
1971     name => sub { },
1972     value => sub { },
1973     })->($self, $todo);
1974     unless ($todo->{node}->has_attribute_ns (undef, 'name')) {
1975     $self->{onerror}->(node => $todo->{node},
1976     type => 'attribute missing:name');
1977     }
1978     unless ($todo->{node}->has_attribute_ns (undef, 'value')) {
1979     $self->{onerror}->(node => $todo->{node},
1980     type => 'attribute missing:value');
1981     }
1982     },
1983 wakaba 1.1 checker => $HTMLEmptyChecker,
1984     };
1985    
1986 wakaba 1.2 $Element->{$HTML_NS}->{video} = {
1987 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
1988     src => $HTMLURIAttrChecker,
1989     ## TODO: start, loopstart, loopend, end
1990     ## ISSUE: they MUST be "value time offset"s. Value?
1991     ## ISSUE: loopcount has no conformance creteria
1992     autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
1993     controls => $GetHTMLBooleanAttrChecker->('controls'),
1994     }),
1995 wakaba 1.2 checker => sub {
1996 wakaba 1.4 my ($self, $todo) = @_;
1997 wakaba 1.2
1998 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'src')) {
1999     return $HTMLBlockOrInlineChecker->($self, $todo);
2000 wakaba 1.2 } else {
2001     return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
2002 wakaba 1.4 ->($self, $todo);
2003 wakaba 1.2 }
2004     },
2005     };
2006    
2007     $Element->{$HTML_NS}->{audio} = {
2008 wakaba 1.12 attrs_checker => $Element->{$HTML_NS}->{video}->{attrs_checker},
2009     checker => $Element->{$HTML_NS}->{video}->{checker},
2010 wakaba 1.2 };
2011 wakaba 1.1
2012     $Element->{$HTML_NS}->{source} = {
2013 wakaba 1.17 attrs_checker => sub {
2014     my ($self, $todo) = @_;
2015     $GetHTMLAttrsChecker->({
2016     src => $HTMLURIAttrChecker,
2017     type => $HTMLIMTAttrChecker,
2018     media => $HTMLMQAttrChecker,
2019     })->($self, $todo);
2020     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2021     $self->{onerror}->(node => $todo->{node},
2022     type => 'attribute missing:src');
2023     }
2024     },
2025 wakaba 1.1 checker => $HTMLEmptyChecker,
2026     };
2027    
2028     $Element->{$HTML_NS}->{canvas} = {
2029 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2030     height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2031     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2032     }),
2033 wakaba 1.1 checker => $HTMLInlineChecker,
2034     };
2035    
2036     $Element->{$HTML_NS}->{map} = {
2037 wakaba 1.17 attrs_checker => $GetHTMLAttrsChecker->({
2038     id => sub {
2039     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2040     my ($self, $attr) = @_;
2041     my $value = $attr->value;
2042     if (length $value > 0) {
2043     if ($self->{id}->{$value}) {
2044     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2045     } else {
2046     $self->{id}->{$value} = 1;
2047     }
2048     } else {
2049     ## NOTE: MUST contain at least one character
2050     $self->{onerror}->(node => $attr, type => 'attribute value is empty');
2051     }
2052     $self->{map}->{$value} ||= $attr;
2053     },
2054     }),
2055 wakaba 1.1 checker => $HTMLBlockChecker,
2056     };
2057    
2058     $Element->{$HTML_NS}->{area} = {
2059 wakaba 1.15 attrs_checker => sub {
2060     my ($self, $todo) = @_;
2061     my %attr;
2062     my $coords;
2063     for my $attr (@{$todo->{node}->attributes}) {
2064     my $attr_ns = $attr->namespace_uri;
2065     $attr_ns = '' unless defined $attr_ns;
2066     my $attr_ln = $attr->manakai_local_name;
2067     my $checker;
2068     if ($attr_ns eq '') {
2069     $checker = {
2070     alt => sub { },
2071     ## NOTE: |alt| value has no conformance creteria.
2072     shape => $GetHTMLEnumeratedAttrChecker->({
2073     circ => -1, circle => 1,
2074     default => 1,
2075     poly => 1, polygon => -1,
2076     rect => 1, rectangle => -1,
2077     }),
2078     coords => sub {
2079     my ($self, $attr) = @_;
2080     my $value = $attr->value;
2081     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
2082     $coords = [split /,/, $value];
2083     } else {
2084     $self->{onerror}->(node => $attr,
2085     type => 'syntax error');
2086     }
2087     },
2088 wakaba 1.17 target => $HTMLTargetAttrChecker,
2089 wakaba 1.15 href => $HTMLURIAttrChecker,
2090     ping => $HTMLSpaceURIsAttrChecker,
2091 wakaba 1.20 rel => sub { $HTMLLinkTypesAttrChecker->(1, @_) },
2092 wakaba 1.17 media => $HTMLMQAttrChecker,
2093     hreflang => $HTMLLanguageTagAttrChecker,
2094 wakaba 1.15 type => $HTMLIMTAttrChecker,
2095     }->{$attr_ln};
2096     if ($checker) {
2097     $attr{$attr_ln} = $attr;
2098     } else {
2099     $checker = $HTMLAttrChecker->{$attr_ln};
2100     }
2101     }
2102     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2103     || $AttrChecker->{$attr_ns}->{''};
2104     if ($checker) {
2105     $checker->($self, $attr) if ref $checker;
2106     } else {
2107     $self->{onerror}->(node => $attr, type => 'attribute not supported');
2108     ## ISSUE: No comformance createria for unknown attributes in the spec
2109     }
2110     }
2111    
2112     if (defined $attr{href}) {
2113     unless (defined $attr{alt}) {
2114     $self->{onerror}->(node => $todo->{node},
2115     type => 'attribute missing:alt');
2116     }
2117     } else {
2118     for (qw/target ping rel media hreflang type alt/) {
2119     if (defined $attr{$_}) {
2120     $self->{onerror}->(node => $attr{$_},
2121     type => 'attribute not allowed');
2122     }
2123     }
2124     }
2125    
2126     my $shape = 'rectangle';
2127     if (defined $attr{shape}) {
2128     $shape = {
2129     circ => 'circle', circle => 'circle',
2130     default => 'default',
2131     poly => 'polygon', polygon => 'polygon',
2132     rect => 'rectangle', rectangle => 'rectangle',
2133     }->{lc $attr{shape}->value} || 'rectangle';
2134     ## TODO: ASCII lowercase?
2135     }
2136    
2137     if ($shape eq 'circle') {
2138     if (defined $attr{coords}) {
2139     if (defined $coords) {
2140     if (@$coords == 3) {
2141     if ($coords->[2] < 0) {
2142     $self->{onerror}->(node => $attr{coords},
2143     type => 'out of range:2');
2144     }
2145     } else {
2146     $self->{onerror}->(node => $attr{coords},
2147     type => 'list item number:3:'.@$coords);
2148     }
2149     } else {
2150     ## NOTE: A syntax error has been reported.
2151     }
2152     } else {
2153     $self->{onerror}->(node => $todo->{node},
2154     type => 'attribute missing:coords');
2155     }
2156     } elsif ($shape eq 'default') {
2157     if (defined $attr{coords}) {
2158     $self->{onerror}->(node => $attr{coords},
2159     type => 'attribute not allowed');
2160     }
2161     } elsif ($shape eq 'polygon') {
2162     if (defined $attr{coords}) {
2163     if (defined $coords) {
2164     if (@$coords >= 6) {
2165     unless (@$coords % 2 == 0) {
2166     $self->{onerror}->(node => $attr{coords},
2167     type => 'list item number:even:'.@$coords);
2168     }
2169     } else {
2170     $self->{onerror}->(node => $attr{coords},
2171     type => 'list item number:>=6:'.@$coords);
2172     }
2173     } else {
2174     ## NOTE: A syntax error has been reported.
2175     }
2176     } else {
2177     $self->{onerror}->(node => $todo->{node},
2178     type => 'attribute missing:coords');
2179     }
2180     } elsif ($shape eq 'rectangle') {
2181     if (defined $attr{coords}) {
2182     if (defined $coords) {
2183     if (@$coords == 4) {
2184     unless ($coords->[0] < $coords->[2]) {
2185     $self->{onerror}->(node => $attr{coords},
2186     type => 'out of range:0');
2187     }
2188     unless ($coords->[1] < $coords->[3]) {
2189     $self->{onerror}->(node => $attr{coords},
2190     type => 'out of range:1');
2191     }
2192     } else {
2193     $self->{onerror}->(node => $attr{coords},
2194     type => 'list item number:4:'.@$coords);
2195     }
2196     } else {
2197     ## NOTE: A syntax error has been reported.
2198     }
2199     } else {
2200     $self->{onerror}->(node => $todo->{node},
2201     type => 'attribute missing:coords');
2202     }
2203     }
2204     },
2205 wakaba 1.1 checker => $HTMLEmptyChecker,
2206     };
2207     ## TODO: only in map
2208    
2209     $Element->{$HTML_NS}->{table} = {
2210 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2211 wakaba 1.1 checker => sub {
2212 wakaba 1.4 my ($self, $todo) = @_;
2213     my $el = $todo->{node};
2214     my $new_todos = [];
2215 wakaba 1.1 my @nodes = (@{$el->child_nodes});
2216    
2217     my $phase = 'before caption';
2218     my $has_tfoot;
2219     while (@nodes) {
2220     my $node = shift @nodes;
2221 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2222    
2223 wakaba 1.1 my $nt = $node->node_type;
2224     if ($nt == 1) {
2225 wakaba 1.8 my $node_ns = $node->namespace_uri;
2226     $node_ns = '' unless defined $node_ns;
2227     my $node_ln = $node->manakai_local_name;
2228 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
2229 wakaba 1.1 if ($phase eq 'in tbodys') {
2230 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2231 wakaba 1.1 #$phase = 'in tbodys';
2232     } elsif (not $has_tfoot and
2233 wakaba 1.8 $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2234 wakaba 1.1 $phase = 'after tfoot';
2235     $has_tfoot = 1;
2236     } else {
2237 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2238 wakaba 1.1 }
2239     } elsif ($phase eq 'in trs') {
2240 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2241 wakaba 1.1 #$phase = 'in trs';
2242     } elsif (not $has_tfoot and
2243 wakaba 1.8 $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2244 wakaba 1.1 $phase = 'after tfoot';
2245     $has_tfoot = 1;
2246     } else {
2247 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2248 wakaba 1.1 }
2249     } elsif ($phase eq 'after thead') {
2250 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2251 wakaba 1.1 $phase = 'in tbodys';
2252 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2253 wakaba 1.1 $phase = 'in trs';
2254 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2255 wakaba 1.1 $phase = 'in tbodys';
2256     $has_tfoot = 1;
2257     } else {
2258 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2259 wakaba 1.1 }
2260     } elsif ($phase eq 'in colgroup') {
2261 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2262 wakaba 1.1 $phase = 'in colgroup';
2263 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2264 wakaba 1.1 $phase = 'after thead';
2265 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2266 wakaba 1.1 $phase = 'in tbodys';
2267 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2268 wakaba 1.1 $phase = 'in trs';
2269 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2270 wakaba 1.1 $phase = 'in tbodys';
2271     $has_tfoot = 1;
2272     } else {
2273 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2274 wakaba 1.1 }
2275     } elsif ($phase eq 'before caption') {
2276 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'caption') {
2277 wakaba 1.1 $phase = 'in colgroup';
2278 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2279 wakaba 1.1 $phase = 'in colgroup';
2280 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2281 wakaba 1.1 $phase = 'after thead';
2282 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2283 wakaba 1.1 $phase = 'in tbodys';
2284 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2285 wakaba 1.1 $phase = 'in trs';
2286 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2287 wakaba 1.1 $phase = 'in tbodys';
2288     $has_tfoot = 1;
2289     } else {
2290 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2291 wakaba 1.1 }
2292     } else { # after tfoot
2293 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2294 wakaba 1.1 }
2295 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
2296     unshift @nodes, @$sib;
2297 wakaba 1.4 push @$new_todos, @$ch;
2298 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
2299     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2300 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
2301 wakaba 1.1 }
2302     } elsif ($nt == 5) {
2303     unshift @nodes, @{$node->child_nodes};
2304     }
2305     }
2306 wakaba 1.21
2307     ## Table model errors
2308     require Whatpm::HTMLTable;
2309     Whatpm::HTMLTable->form_table ($todo->{node}, sub {
2310     my %opt = @_;
2311     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
2312     });
2313    
2314 wakaba 1.4 return ($new_todos);
2315 wakaba 1.1 },
2316     };
2317    
2318     $Element->{$HTML_NS}->{caption} = {
2319 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2320 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
2321     };
2322    
2323     $Element->{$HTML_NS}->{colgroup} = {
2324 wakaba 1.17 attrs_checker => $GetHTMLAttrsChecker->({
2325     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2326     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
2327     ## TODO: "attribute not supported" if |col|.
2328     ## ISSUE: MUST NOT if any |col|?
2329     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
2330     }),
2331 wakaba 1.1 checker => sub {
2332 wakaba 1.4 my ($self, $todo) = @_;
2333     my $el = $todo->{node};
2334     my $new_todos = [];
2335 wakaba 1.1 my @nodes = (@{$el->child_nodes});
2336    
2337     while (@nodes) {
2338     my $node = shift @nodes;
2339 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2340    
2341 wakaba 1.1 my $nt = $node->node_type;
2342     if ($nt == 1) {
2343 wakaba 1.8 my $node_ns = $node->namespace_uri;
2344     $node_ns = '' unless defined $node_ns;
2345     my $node_ln = $node->manakai_local_name;
2346 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
2347 wakaba 1.8 unless ($node_ns eq $HTML_NS and $node_ln eq 'col') {
2348 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2349 wakaba 1.1 }
2350 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
2351     unshift @nodes, @$sib;
2352 wakaba 1.4 push @$new_todos, @$ch;
2353 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
2354     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2355 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
2356 wakaba 1.1 }
2357     } elsif ($nt == 5) {
2358     unshift @nodes, @{$node->child_nodes};
2359     }
2360     }
2361 wakaba 1.4 return ($new_todos);
2362 wakaba 1.1 },
2363     };
2364    
2365     $Element->{$HTML_NS}->{col} = {
2366 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2367     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2368     }),
2369 wakaba 1.1 checker => $HTMLEmptyChecker,
2370     };
2371    
2372     $Element->{$HTML_NS}->{tbody} = {
2373 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2374 wakaba 1.1 checker => sub {
2375 wakaba 1.4 my ($self, $todo) = @_;
2376     my $el = $todo->{node};
2377     my $new_todos = [];
2378 wakaba 1.1 my @nodes = (@{$el->child_nodes});
2379    
2380     my $has_tr;
2381     while (@nodes) {
2382     my $node = shift @nodes;
2383 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2384    
2385 wakaba 1.1 my $nt = $node->node_type;
2386     if ($nt == 1) {
2387 wakaba 1.8 my $node_ns = $node->namespace_uri;
2388     $node_ns = '' unless defined $node_ns;
2389     my $node_ln = $node->manakai_local_name;
2390 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
2391 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2392 wakaba 1.1 $has_tr = 1;
2393     } else {
2394 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2395 wakaba 1.1 }
2396 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
2397     unshift @nodes, @$sib;
2398 wakaba 1.4 push @$new_todos, @$ch;
2399 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
2400     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2401 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
2402 wakaba 1.1 }
2403     } elsif ($nt == 5) {
2404     unshift @nodes, @{$node->child_nodes};
2405     }
2406     }
2407     unless ($has_tr) {
2408 wakaba 1.3 $self->{onerror}->(node => $el, type => 'child element missing:tr');
2409 wakaba 1.1 }
2410 wakaba 1.4 return ($new_todos);
2411 wakaba 1.1 },
2412     };
2413    
2414     $Element->{$HTML_NS}->{thead} = {
2415 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2416 wakaba 1.23 checker => $Element->{$HTML_NS}->{tbody}->{checker},
2417 wakaba 1.1 };
2418    
2419     $Element->{$HTML_NS}->{tfoot} = {
2420 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2421 wakaba 1.23 checker => $Element->{$HTML_NS}->{tbody}->{checker},
2422 wakaba 1.1 };
2423    
2424     $Element->{$HTML_NS}->{tr} = {
2425 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2426 wakaba 1.1 checker => sub {
2427 wakaba 1.4 my ($self, $todo) = @_;
2428     my $el = $todo->{node};
2429     my $new_todos = [];
2430 wakaba 1.1 my @nodes = (@{$el->child_nodes});
2431    
2432     my $has_td;
2433     while (@nodes) {
2434     my $node = shift @nodes;
2435 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2436    
2437 wakaba 1.1 my $nt = $node->node_type;
2438     if ($nt == 1) {
2439 wakaba 1.8 my $node_ns = $node->namespace_uri;
2440     $node_ns = '' unless defined $node_ns;
2441     my $node_ln = $node->manakai_local_name;
2442 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
2443 wakaba 1.8 if ($node_ns eq $HTML_NS and ($node_ln eq 'td' or $node_ln eq 'th')) {
2444 wakaba 1.1 $has_td = 1;
2445     } else {
2446 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2447 wakaba 1.1 }
2448 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
2449     unshift @nodes, @$sib;
2450 wakaba 1.4 push @$new_todos, @$ch;
2451 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
2452     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2453 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
2454 wakaba 1.1 }
2455     } elsif ($nt == 5) {
2456     unshift @nodes, @{$node->child_nodes};
2457     }
2458     }
2459     unless ($has_td) {
2460 wakaba 1.3 $self->{onerror}->(node => $el, type => 'child element missing:td|th');
2461 wakaba 1.1 }
2462 wakaba 1.4 return ($new_todos);
2463 wakaba 1.1 },
2464     };
2465    
2466     $Element->{$HTML_NS}->{td} = {
2467 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2468     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2469     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2470     }),
2471 wakaba 1.1 checker => $HTMLBlockOrInlineChecker,
2472     };
2473    
2474     $Element->{$HTML_NS}->{th} = {
2475 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2476     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2477     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2478     scope => $GetHTMLEnumeratedAttrChecker
2479     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
2480     }),
2481 wakaba 1.1 checker => $HTMLBlockOrInlineChecker,
2482     };
2483    
2484 wakaba 1.12 ## TODO: table model error checking
2485    
2486 wakaba 1.1 ## TODO: forms
2487    
2488 wakaba 1.2 $Element->{$HTML_NS}->{script} = {
2489 wakaba 1.25 attrs_checker => sub {
2490     my ($self, $todo) = @_;
2491     $GetHTMLAttrsChecker->({
2492     src => $HTMLURIAttrChecker,
2493     defer => $GetHTMLBooleanAttrChecker->('defer'),
2494     async => $GetHTMLBooleanAttrChecker->('async'),
2495     type => $HTMLIMTAttrChecker,
2496     })->($self, $todo);
2497     if ($todo->{node}->has_attribute_ns (undef, 'defer')) {
2498     my $async_attr = $todo->{node}->get_attribute_node_ns (undef, 'async');
2499     if ($async_attr) {
2500     $self->{onerror}->(node => $async_attr,
2501     type => 'attribute not allowed'); # MUST NOT
2502     }
2503     }
2504     },
2505 wakaba 1.2 checker => sub {
2506 wakaba 1.4 my ($self, $todo) = @_;
2507 wakaba 1.2
2508 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2509     return $HTMLEmptyChecker->($self, $todo);
2510 wakaba 1.2 } else {
2511     ## NOTE: No content model conformance in HTML5 spec.
2512 wakaba 1.4 return $AnyChecker->($self, $todo);
2513 wakaba 1.2 }
2514     },
2515     };
2516    
2517     ## NOTE: When script is disabled.
2518     $Element->{$HTML_NS}->{noscript} = {
2519 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2520 wakaba 1.2 checker => sub {
2521 wakaba 1.4 my ($self, $todo) = @_;
2522 wakaba 1.1
2523 wakaba 1.2 my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}});
2524 wakaba 1.4 my ($sib, $ch) = $HTMLBlockOrInlineChecker->($self, $todo);
2525 wakaba 1.2 push @$sib, $end;
2526     return ($sib, $ch);
2527     },
2528     };
2529 wakaba 1.1
2530     $Element->{$HTML_NS}->{'event-source'} = {
2531 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2532     src => $HTMLURIAttrChecker,
2533     }),
2534 wakaba 1.1 checker => $HTMLEmptyChecker,
2535     };
2536    
2537     $Element->{$HTML_NS}->{details} = {
2538 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2539     open => $GetHTMLBooleanAttrChecker->('open'),
2540     }),
2541 wakaba 1.6 checker => sub {
2542     my ($self, $todo) = @_;
2543    
2544     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2545     my ($sib, $ch)
2546     = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
2547     ->($self, $todo);
2548     push @$sib, $end;
2549     return ($sib, $ch);
2550     },
2551 wakaba 1.1 };
2552    
2553     $Element->{$HTML_NS}->{datagrid} = {
2554 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2555     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2556     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
2557     }),
2558 wakaba 1.6 checker => sub {
2559     my ($self, $todo) = @_;
2560    
2561     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2562     my ($sib, $ch) = $HTMLBlockChecker->($self, $todo);
2563 wakaba 1.25 ## TODO: (Block-table)+ | table | select | datalist
2564 wakaba 1.6 push @$sib, $end;
2565     return ($sib, $ch);
2566     },
2567 wakaba 1.1 };
2568    
2569     $Element->{$HTML_NS}->{command} = {
2570 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
2571 wakaba 1.1 checker => $HTMLEmptyChecker,
2572     };
2573    
2574     $Element->{$HTML_NS}->{menu} = {
2575 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
2576 wakaba 1.1 checker => sub {
2577 wakaba 1.4 my ($self, $todo) = @_;
2578     my $el = $todo->{node};
2579     my $new_todos = [];
2580 wakaba 1.1 my @nodes = (@{$el->child_nodes});
2581    
2582     my $content = 'li or inline';
2583     while (@nodes) {
2584     my $node = shift @nodes;
2585 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2586    
2587 wakaba 1.1 my $nt = $node->node_type;
2588     if ($nt == 1) {
2589 wakaba 1.2 my $node_ns = $node->namespace_uri;
2590     $node_ns = '' unless defined $node_ns;
2591     my $node_ln = $node->manakai_local_name;
2592 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
2593 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'li') {
2594 wakaba 1.1 if ($content eq 'inline') {
2595 wakaba 1.6 $not_allowed = 1;
2596 wakaba 1.1 } elsif ($content eq 'li or inline') {
2597     $content = 'li';
2598     }
2599     } else {
2600 wakaba 1.7 if ($HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
2601     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln}) {
2602     $content = 'inline';
2603     } else {
2604 wakaba 1.6 $not_allowed = 1;
2605 wakaba 1.7 }
2606 wakaba 1.1 }
2607 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
2608     if $not_allowed;
2609 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
2610     unshift @nodes, @$sib;
2611 wakaba 1.4 push @$new_todos, @$ch;
2612 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
2613     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2614     if ($content eq 'li') {
2615 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
2616 wakaba 1.1 } elsif ($content eq 'li or inline') {
2617     $content = 'inline';
2618     }
2619     }
2620     } elsif ($nt == 5) {
2621     unshift @nodes, @{$node->child_nodes};
2622     }
2623     }
2624 wakaba 1.4
2625     for (@$new_todos) {
2626     $_->{inline} = 1;
2627     }
2628     return ($new_todos);
2629 wakaba 1.1 },
2630     };
2631    
2632 wakaba 1.6 $Element->{$HTML_NS}->{legend} = {
2633 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2634 wakaba 1.6 checker => sub {
2635     my ($self, $todo) = @_;
2636    
2637     my $parent = $todo->{node}->manakai_parent_element;
2638     if (defined $parent) {
2639     my $nsuri = $parent->namespace_uri;
2640     $nsuri = '' unless defined $nsuri;
2641     my $ln = $parent->manakai_local_name;
2642     if ($nsuri eq $HTML_NS and $ln eq 'figure') {
2643     return $HTMLInlineChecker->($self, $todo);
2644     } else {
2645     return $HTMLSignificantStrictlyInlineChecker->($self, $todo);
2646     }
2647     } else {
2648     return $HTMLInlineChecker->($self, $todo);
2649     }
2650    
2651     ## ISSUE: Content model is defined only for fieldset/legend,
2652     ## details/legend, and figure/legend.
2653     },
2654     };
2655 wakaba 1.1
2656     $Element->{$HTML_NS}->{div} = {
2657 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2658 wakaba 1.2 checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
2659 wakaba 1.1 };
2660    
2661     $Element->{$HTML_NS}->{font} = {
2662 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
2663 wakaba 1.1 checker => $HTMLTransparentChecker,
2664     };
2665    
2666 wakaba 1.24 sub check_document ($$$) {
2667     my ($self, $doc, $onerror) = @_;
2668     $self = bless {}, $self unless ref $self;
2669     $self->{onerror} = $onerror;
2670    
2671     my $docel = $doc->document_element;
2672 wakaba 1.26 unless (defined $docel) {
2673     ## ISSUE: Should we check content of Document node?
2674     $onerror->(node => $doc, type => 'no document element');
2675     ## ISSUE: Is this non-conforming (to what spec)? Or just a warning?
2676     return;
2677     }
2678    
2679     ## ISSUE: Unexpanded entity references and HTML5 conformance
2680    
2681 wakaba 1.24 my $docel_nsuri = $docel->namespace_uri;
2682     $docel_nsuri = '' unless defined $docel_nsuri;
2683     my $docel_def = $Element->{$docel_nsuri}->{$docel->manakai_local_name} ||
2684     $Element->{$docel_nsuri}->{''} ||
2685     $ElementDefault;
2686     if ($docel_def->{is_root}) {
2687     #
2688     } else {
2689     $onerror->(node => $docel, type => 'element not allowed');
2690     }
2691    
2692     ## TODO: Check for other items other than document element
2693     ## (second (errorous) element, text nodes, PI nodes, doctype nodes)
2694    
2695     $self->check_element ($docel, $onerror);
2696     } # check_document
2697 wakaba 1.2
2698 wakaba 1.1 sub check_element ($$$) {
2699     my ($self, $el, $onerror) = @_;
2700 wakaba 1.24 $self = bless {}, $self unless ref $self;
2701     $self->{onerror} = $onerror;
2702 wakaba 1.1
2703 wakaba 1.2 $self->{minuses} = {};
2704 wakaba 1.10 $self->{id} = {};
2705 wakaba 1.17 $self->{usemap} = [];
2706     $self->{map} = {};
2707 wakaba 1.20 $self->{has_link_type} = {};
2708 wakaba 1.2
2709 wakaba 1.4 my @todo = ({type => 'element', node => $el});
2710     while (@todo) {
2711     my $todo = shift @todo;
2712     if ($todo->{type} eq 'element') {
2713 wakaba 1.13 my $prefix = $todo->{node}->prefix;
2714     if (defined $prefix and $prefix eq 'xmlns') {
2715     $self->{onerror}
2716     ->(node => $todo->{node},
2717     type => 'NC:Reserved Prefixes and Namespace Names:<xmlns:>');
2718     }
2719 wakaba 1.4 my $nsuri = $todo->{node}->namespace_uri;
2720     $nsuri = '' unless defined $nsuri;
2721     my $ln = $todo->{node}->manakai_local_name;
2722     my $eldef = $Element->{$nsuri}->{$ln} ||
2723     $Element->{$nsuri}->{''} ||
2724     $ElementDefault;
2725 wakaba 1.9 $eldef->{attrs_checker}->($self, $todo);
2726 wakaba 1.4 my ($new_todos) = $eldef->{checker}->($self, $todo);
2727 wakaba 1.14 unshift @todo, @$new_todos;
2728 wakaba 1.9 } elsif ($todo->{type} eq 'element-attributes') {
2729 wakaba 1.13 my $prefix = $todo->{node}->prefix;
2730     if (defined $prefix and $prefix eq 'xmlns') {
2731     $self->{onerror}
2732     ->(node => $todo->{node},
2733     type => 'NC:Reserved Prefixes and Namespace Names:<xmlns:>');
2734     }
2735 wakaba 1.9 my $nsuri = $todo->{node}->namespace_uri;
2736     $nsuri = '' unless defined $nsuri;
2737     my $ln = $todo->{node}->manakai_local_name;
2738     my $eldef = $Element->{$nsuri}->{$ln} ||
2739     $Element->{$nsuri}->{''} ||
2740     $ElementDefault;
2741     $eldef->{attrs_checker}->($self, $todo);
2742 wakaba 1.4 } elsif ($todo->{type} eq 'plus') {
2743     $self->_remove_minuses ($todo);
2744     }
2745 wakaba 1.1 }
2746 wakaba 1.17
2747     for (@{$self->{usemap}}) {
2748     unless ($self->{map}->{$_->[0]}) {
2749     $self->{onerror}->(node => $_->[1], type => 'no referenced map');
2750     }
2751     }
2752    
2753     delete $self->{minuses};
2754     delete $self->{onerror};
2755     delete $self->{id};
2756     delete $self->{usemap};
2757     delete $self->{map};
2758 wakaba 1.1 } # check_element
2759    
2760 wakaba 1.2 sub _add_minuses ($@) {
2761     my $self = shift;
2762     my $r = {};
2763     for my $list (@_) {
2764     for my $ns (keys %$list) {
2765     for my $ln (keys %{$list->{$ns}}) {
2766     unless ($self->{minuses}->{$ns}->{$ln}) {
2767     $self->{minuses}->{$ns}->{$ln} = 1;
2768     $r->{$ns}->{$ln} = 1;
2769     }
2770     }
2771     }
2772     }
2773 wakaba 1.4 return {type => 'plus', list => $r};
2774 wakaba 1.2 } # _add_minuses
2775    
2776     sub _remove_minuses ($$) {
2777 wakaba 1.4 my ($self, $todo) = @_;
2778     for my $ns (keys %{$todo->{list}}) {
2779     for my $ln (keys %{$todo->{list}->{$ns}}) {
2780     delete $self->{minuses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
2781 wakaba 1.2 }
2782     }
2783     1;
2784     } # _remove_minuses
2785    
2786     sub _check_get_children ($$) {
2787     my ($self, $node) = @_;
2788 wakaba 1.4 my $new_todos = [];
2789 wakaba 1.2 my $sib = [];
2790     TP: {
2791     my $node_ns = $node->namespace_uri;
2792     $node_ns = '' unless defined $node_ns;
2793     my $node_ln = $node->manakai_local_name;
2794     if ($node_ns eq $HTML_NS) {
2795     if ($node_ln eq 'noscript') {
2796     my $end = $self->_add_minuses ({$HTML_NS, {noscript => 1}});
2797     push @$sib, $end;
2798     }
2799     }
2800 wakaba 1.7 if ($HTMLTransparentElements->{$node_ns}->{$node_ln}) {
2801     unshift @$sib, @{$node->child_nodes};
2802 wakaba 1.9 push @$new_todos, {type => 'element-attributes', node => $node};
2803 wakaba 1.7 last TP;
2804 wakaba 1.2 }
2805 wakaba 1.8 if ($node_ns eq $HTML_NS and ($node_ln eq 'video' or $node_ln eq 'audio')) {
2806 wakaba 1.2 if ($node->has_attribute_ns (undef, 'src')) {
2807     unshift @$sib, @{$node->child_nodes};
2808 wakaba 1.9 push @$new_todos, {type => 'element-attributes', node => $node};
2809 wakaba 1.2 last TP;
2810     } else {
2811     my @cn = @{$node->child_nodes};
2812     CN: while (@cn) {
2813     my $cn = shift @cn;
2814     my $cnt = $cn->node_type;
2815     if ($cnt == 1) {
2816 wakaba 1.8 my $cn_nsuri = $cn->namespace_uri;
2817     $cn_nsuri = '' unless defined $cn_nsuri;
2818     if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'source') {
2819 wakaba 1.2 #
2820     } else {
2821     last CN;
2822     }
2823     } elsif ($cnt == 3 or $cnt == 4) {
2824     if ($cn->data =~ /[^\x09-\x0D\x20]/) {
2825     last CN;
2826     }
2827     }
2828     } # CN
2829     unshift @$sib, @cn;
2830     }
2831     }
2832 wakaba 1.4 push @$new_todos, {type => 'element', node => $node};
2833 wakaba 1.2 } # TP
2834 wakaba 1.4 return ($sib, $new_todos);
2835 wakaba 1.2 } # _check_get_children
2836    
2837 wakaba 1.1 1;
2838 wakaba 1.26 # $Date: 2007/06/05 00:56:42 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24