/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.24 - (hide annotations) (download)
Sun May 27 11:14:55 2007 UTC (17 years, 5 months ago) by wakaba
Branch: MAIN
Changes since 1.23: +26 -5 lines
++ whatpm/Whatpm/ChangeLog	27 May 2007 11:14:45 -0000
	* ContentChecker.pm (html): Set |is_root| (allowed
	as a document element) flag on.
	(new): Removed.
	(check_document): New method.

2007-05-27  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3    
4 wakaba 1.18 require Whatpm::URIChecker;
5    
6 wakaba 1.13 ## ISSUE: How XML and XML Namespaces conformance can (or cannot)
7     ## be applied to an in-memory representation (i.e. DOM)?
8    
9 wakaba 1.9 my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
10     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
11    
12     my $AttrChecker = {
13     $XML_NS => {
14 wakaba 1.13 space => sub {
15     my ($self, $attr) = @_;
16     my $value = $attr->value;
17     if ($value eq 'default' or $value eq 'preserve') {
18     #
19     } else {
20     ## NOTE: An XML "error"
21     $self->{onerror}->(node => $attr,
22     type => 'XML error:invalid xml:space value');
23     }
24     },
25     lang => sub {
26     ## NOTE: "The values of the attribute are language identifiers
27     ## as defined by [IETF RFC 3066], Tags for the Identification
28     ## of Languages, or its successor; in addition, the empty string
29     ## may be specified." ("may" in lower case)
30     ## TODO: xml:lang MUST NOT in HTML document
31     },
32     base => sub {
33     my ($self, $attr) = @_;
34     my $value = $attr->value;
35     if ($value =~ /[^\x{0000}-\x{10FFFF}]/) { ## ISSUE: Should we disallow noncharacters?
36     $self->{onerror}->(node => $attr,
37     type => 'syntax error');
38     }
39 wakaba 1.18 ## NOTE: Conformance to URI standard is not checked since there is
40     ## no author requirement on conformance in the XML Base specification.
41 wakaba 1.13 },
42     id => sub {
43     my ($self, $attr) = @_;
44     my $value = $attr->value;
45     $value =~ s/[\x09\x0A\x0D\x20]+/ /g;
46     $value =~ s/^\x20//;
47     $value =~ s/\x20$//;
48     ## TODO: NCName in XML 1.0 or 1.1
49     ## TODO: declared type is ID?
50     if ($self->{id}->{$value}) {
51     $self->{onerror}->(node => $attr, type => 'xml:id error:duplicate ID');
52     } else {
53     $self->{id}->{$value} = 1;
54     }
55     },
56 wakaba 1.9 },
57     $XMLNS_NS => {
58 wakaba 1.13 '' => sub {
59     my ($self, $attr) = @_;
60     my $ln = $attr->manakai_local_name;
61     my $value = $attr->value;
62     if ($value eq $XML_NS and $ln ne 'xml') {
63     $self->{onerror}
64     ->(node => $attr,
65     type => 'NC:Reserved Prefixes and Namespace Names:=xml');
66     } elsif ($value eq $XMLNS_NS) {
67     $self->{onerror}
68     ->(node => $attr,
69     type => 'NC:Reserved Prefixes and Namespace Names:=xmlns');
70     }
71     if ($ln eq 'xml' and $value ne $XML_NS) {
72     $self->{onerror}
73     ->(node => $attr,
74     type => 'NC:Reserved Prefixes and Namespace Names:xmlns:xml=');
75     } elsif ($ln eq 'xmlns') {
76     $self->{onerror}
77     ->(node => $attr,
78     type => 'NC:Reserved Prefixes and Namespace Names:xmlns:xmlns=');
79     }
80     ## TODO: If XML 1.0 and empty
81     },
82     xmlns => sub {
83     my ($self, $attr) = @_;
84     ## TODO: In XML 1.0, URI reference [RFC 3986] or an empty string
85     ## TODO: In XML 1.1, IRI reference [RFC 3987] or an empty string
86 wakaba 1.18 ## TODO: relative references are deprecated
87 wakaba 1.13 my $value = $attr->value;
88     if ($value eq $XML_NS) {
89     $self->{onerror}
90     ->(node => $attr,
91     type => 'NC:Reserved Prefixes and Namespace Names:=xml');
92     } elsif ($value eq $XMLNS_NS) {
93     $self->{onerror}
94     ->(node => $attr,
95     type => 'NC:Reserved Prefixes and Namespace Names:=xmlns');
96     }
97     },
98 wakaba 1.9 },
99     };
100    
101 wakaba 1.14 ## ISSUE: Should we really allow these attributes?
102 wakaba 1.13 $AttrChecker->{''}->{'xml:space'} = $AttrChecker->{$XML_NS}->{space};
103     $AttrChecker->{''}->{'xml:lang'} = $AttrChecker->{$XML_NS}->{lang};
104     $AttrChecker->{''}->{'xml:base'} = $AttrChecker->{$XML_NS}->{base};
105     $AttrChecker->{''}->{'xml:id'} = $AttrChecker->{$XML_NS}->{id};
106    
107 wakaba 1.3 ## ANY
108     my $AnyChecker = sub {
109 wakaba 1.4 my ($self, $todo) = @_;
110     my $el = $todo->{node};
111     my $new_todos = [];
112 wakaba 1.3 my @nodes = (@{$el->child_nodes});
113     while (@nodes) {
114     my $node = shift @nodes;
115     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
116    
117     my $nt = $node->node_type;
118     if ($nt == 1) {
119     my $node_ns = $node->namespace_uri;
120     $node_ns = '' unless defined $node_ns;
121     my $node_ln = $node->manakai_local_name;
122     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
123     $self->{onerror}->(node => $node, type => 'element not allowed');
124     }
125 wakaba 1.4 push @$new_todos, {type => 'element', node => $node};
126 wakaba 1.3 } elsif ($nt == 5) {
127     unshift @nodes, @{$node->child_nodes};
128     }
129     }
130 wakaba 1.4 return ($new_todos);
131 wakaba 1.3 }; # $AnyChecker
132    
133 wakaba 1.1 my $ElementDefault = {
134     checker => sub {
135 wakaba 1.4 my ($self, $todo) = @_;
136     $self->{onerror}->(node => $todo->{node}, type => 'element not supported');
137     return $AnyChecker->($self, $todo);
138 wakaba 1.1 },
139 wakaba 1.9 attrs_checker => sub {
140     my ($self, $todo) = @_;
141     for my $attr (@{$todo->{node}->attributes}) {
142     my $attr_ns = $attr->namespace_uri;
143     $attr_ns = '' unless defined $attr_ns;
144     my $attr_ln = $attr->manakai_local_name;
145     my $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
146     || $AttrChecker->{$attr_ns}->{''};
147     if ($checker) {
148     $checker->($self, $attr);
149 wakaba 1.17 } else {
150     $self->{onerror}->(node => $attr, type => 'attribute not supported');
151 wakaba 1.9 }
152     }
153     },
154 wakaba 1.1 };
155    
156     my $Element = {};
157    
158     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
159    
160 wakaba 1.7 my $HTMLMetadataElements = {
161     $HTML_NS => {
162     qw/link 1 meta 1 style 1 script 1 event-source 1 command 1 base 1 title 1/,
163     },
164     };
165 wakaba 1.1
166 wakaba 1.2 my $HTMLSectioningElements = {
167     $HTML_NS => {qw/body 1 section 1 nav 1 article 1 blockquote 1 aside 1/},
168     };
169 wakaba 1.1
170 wakaba 1.7 my $HTMLBlockLevelElements = {
171     $HTML_NS => {
172     qw/
173     section 1 nav 1 article 1 blockquote 1 aside 1
174     h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1
175     address 1 p 1 hr 1 dialog 1 pre 1 ol 1 ul 1 dl 1
176     ins 1 del 1 figure 1 map 1 table 1 script 1 noscript 1
177     event-source 1 details 1 datagrid 1 menu 1 div 1 font 1
178     /,
179     },
180     };
181    
182     my $HTMLStrictlyInlineLevelElements = {
183     $HTML_NS => {
184     qw/
185     br 1 a 1 q 1 cite 1 em 1 strong 1 small 1 m 1 dfn 1 abbr 1
186     time 1 meter 1 progress 1 code 1 var 1 samp 1 kbd 1
187     sub 1 sup 1 span 1 i 1 b 1 bdo 1 ins 1 del 1 img 1
188     iframe 1 embed 1 object 1 video 1 audio 1 canvas 1 area 1
189     script 1 noscript 1 event-source 1 command 1 font 1
190     /,
191     },
192     };
193    
194     my $HTMLStructuredInlineLevelElements = {
195     $HTML_NS => {qw/blockquote 1 pre 1 ol 1 ul 1 dl 1 table 1 menu 1/},
196     };
197 wakaba 1.1
198 wakaba 1.6 my $HTMLInteractiveElements = {
199     $HTML_NS => {a => 1, details => 1, datagrid => 1},
200     };
201     ## NOTE: |html:a| and |html:datagrid| are not allowed as a descendant
202     ## of interactive elements
203 wakaba 1.1
204 wakaba 1.7 my $HTMLTransparentElements = {
205     $HTML_NS => {qw/ins 1 font 1 noscript 1/},
206     ## NOTE: |html:noscript| is transparent if scripting is disabled.
207     };
208    
209     #my $HTMLSemiTransparentElements = {
210     # $HTML_NS => {qw/video 1 audio 1/},
211     #};
212    
213     my $HTMLEmbededElements = {
214     $HTML_NS => {qw/img 1 iframe 1 embed 1 object 1 video 1 audio 1 canvas 1/},
215     };
216 wakaba 1.1
217     ## Empty
218     my $HTMLEmptyChecker = sub {
219 wakaba 1.4 my ($self, $todo) = @_;
220     my $el = $todo->{node};
221     my $new_todos = [];
222 wakaba 1.1 my @nodes = (@{$el->child_nodes});
223    
224     while (@nodes) {
225     my $node = shift @nodes;
226 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
227    
228 wakaba 1.1 my $nt = $node->node_type;
229     if ($nt == 1) {
230 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
231     $self->{onerror}->(node => $node, type => 'element not allowed');
232     my ($sib, $ch) = $self->_check_get_children ($node);
233     unshift @nodes, @$sib;
234 wakaba 1.4 push @$new_todos, @$ch;
235 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
236 wakaba 1.3 if ($node->data =~ /[^\x09-\x0D\x20]/) {
237     $self->{onerror}->(node => $node, type => 'character not allowed');
238     }
239 wakaba 1.1 } elsif ($nt == 5) {
240     unshift @nodes, @{$node->child_nodes};
241     }
242     }
243 wakaba 1.4 return ($new_todos);
244 wakaba 1.1 };
245    
246     ## Text
247     my $HTMLTextChecker = sub {
248 wakaba 1.4 my ($self, $todo) = @_;
249     my $el = $todo->{node};
250     my $new_todos = [];
251 wakaba 1.1 my @nodes = (@{$el->child_nodes});
252    
253     while (@nodes) {
254     my $node = shift @nodes;
255 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
256    
257 wakaba 1.1 my $nt = $node->node_type;
258     if ($nt == 1) {
259 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
260     $self->{onerror}->(node => $node, type => 'element not allowed');
261     my ($sib, $ch) = $self->_check_get_children ($node);
262     unshift @nodes, @$sib;
263 wakaba 1.4 push @$new_todos, @$ch;
264 wakaba 1.1 } elsif ($nt == 5) {
265     unshift @nodes, @{$node->child_nodes};
266     }
267     }
268 wakaba 1.4 return ($new_todos);
269 wakaba 1.1 };
270    
271     ## Zero or more |html:style| elements,
272     ## followed by zero or more block-level elements
273     my $HTMLStylableBlockChecker = sub {
274 wakaba 1.4 my ($self, $todo) = @_;
275     my $el = $todo->{node};
276     my $new_todos = [];
277 wakaba 1.1 my @nodes = (@{$el->child_nodes});
278    
279     my $has_non_style;
280     while (@nodes) {
281     my $node = shift @nodes;
282 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
283    
284 wakaba 1.1 my $nt = $node->node_type;
285     if ($nt == 1) {
286 wakaba 1.2 my $node_ns = $node->namespace_uri;
287     $node_ns = '' unless defined $node_ns;
288     my $node_ln = $node->manakai_local_name;
289 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
290 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'style') {
291 wakaba 1.6 $not_allowed = 1 if $has_non_style;
292 wakaba 1.7 } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
293     $has_non_style = 1;
294 wakaba 1.1 } else {
295     $has_non_style = 1;
296 wakaba 1.7 $not_allowed = 1;
297 wakaba 1.1 }
298 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
299     if $not_allowed;
300 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
301     unshift @nodes, @$sib;
302 wakaba 1.4 push @$new_todos, @$ch;
303 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
304     if ($node->data =~ /[^\x09-\x0D\x20]/) {
305 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
306 wakaba 1.1 }
307     } elsif ($nt == 5) {
308     unshift @nodes, @{$node->child_nodes};
309     }
310     }
311 wakaba 1.4 return ($new_todos);
312 wakaba 1.1 }; # $HTMLStylableBlockChecker
313    
314     ## Zero or more block-level elements
315     my $HTMLBlockChecker = sub {
316 wakaba 1.4 my ($self, $todo) = @_;
317     my $el = $todo->{node};
318     my $new_todos = [];
319 wakaba 1.1 my @nodes = (@{$el->child_nodes});
320    
321     while (@nodes) {
322     my $node = shift @nodes;
323 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
324    
325 wakaba 1.1 my $nt = $node->node_type;
326     if ($nt == 1) {
327 wakaba 1.2 my $node_ns = $node->namespace_uri;
328     $node_ns = '' unless defined $node_ns;
329     my $node_ln = $node->manakai_local_name;
330 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
331 wakaba 1.7 $not_allowed = 1
332     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
333 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
334     if $not_allowed;
335 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
336     unshift @nodes, @$sib;
337 wakaba 1.4 push @$new_todos, @$ch;
338 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
339     if ($node->data =~ /[^\x09-\x0D\x20]/) {
340 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
341 wakaba 1.1 }
342     } elsif ($nt == 5) {
343     unshift @nodes, @{$node->child_nodes};
344     }
345     }
346 wakaba 1.4 return ($new_todos);
347 wakaba 1.1 }; # $HTMLBlockChecker
348    
349     ## Inline-level content
350     my $HTMLInlineChecker = sub {
351 wakaba 1.4 my ($self, $todo) = @_;
352     my $el = $todo->{node};
353     my $new_todos = [];
354 wakaba 1.1 my @nodes = (@{$el->child_nodes});
355    
356     while (@nodes) {
357     my $node = shift @nodes;
358 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
359    
360 wakaba 1.1 my $nt = $node->node_type;
361     if ($nt == 1) {
362 wakaba 1.2 my $node_ns = $node->namespace_uri;
363     $node_ns = '' unless defined $node_ns;
364     my $node_ln = $node->manakai_local_name;
365 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
366 wakaba 1.7 $not_allowed = 1
367     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
368     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
369 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
370     if $not_allowed;
371 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
372     unshift @nodes, @$sib;
373 wakaba 1.4 push @$new_todos, @$ch;
374 wakaba 1.1 } elsif ($nt == 5) {
375     unshift @nodes, @{$node->child_nodes};
376     }
377     }
378 wakaba 1.4
379     for (@$new_todos) {
380     $_->{inline} = 1;
381     }
382     return ($new_todos);
383     }; # $HTMLInlineChecker
384 wakaba 1.1
385     my $HTMLSignificantInlineChecker = $HTMLInlineChecker;
386     ## TODO: check significant content
387    
388     ## Strictly inline-level content
389     my $HTMLStrictlyInlineChecker = sub {
390 wakaba 1.4 my ($self, $todo) = @_;
391     my $el = $todo->{node};
392     my $new_todos = [];
393 wakaba 1.1 my @nodes = (@{$el->child_nodes});
394    
395     while (@nodes) {
396     my $node = shift @nodes;
397 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
398    
399 wakaba 1.1 my $nt = $node->node_type;
400     if ($nt == 1) {
401 wakaba 1.2 my $node_ns = $node->namespace_uri;
402     $node_ns = '' unless defined $node_ns;
403     my $node_ln = $node->manakai_local_name;
404 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
405 wakaba 1.7 $not_allowed = 1
406     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln};
407 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
408     if $not_allowed;
409 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
410     unshift @nodes, @$sib;
411 wakaba 1.4 push @$new_todos, @$ch;
412 wakaba 1.1 } elsif ($nt == 5) {
413     unshift @nodes, @{$node->child_nodes};
414     }
415     }
416 wakaba 1.4
417     for (@$new_todos) {
418     $_->{inline} = 1;
419     $_->{strictly_inline} = 1;
420     }
421     return ($new_todos);
422 wakaba 1.1 }; # $HTMLStrictlyInlineChecker
423    
424     my $HTMLSignificantStrictlyInlineChecker = $HTMLStrictlyInlineChecker;
425     ## TODO: check significant content
426    
427 wakaba 1.4 ## Inline-level or strictly inline-kevek content
428     my $HTMLInlineOrStrictlyInlineChecker = sub {
429     my ($self, $todo) = @_;
430     my $el = $todo->{node};
431     my $new_todos = [];
432     my @nodes = (@{$el->child_nodes});
433    
434     while (@nodes) {
435     my $node = shift @nodes;
436     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
437    
438     my $nt = $node->node_type;
439     if ($nt == 1) {
440     my $node_ns = $node->namespace_uri;
441     $node_ns = '' unless defined $node_ns;
442     my $node_ln = $node->manakai_local_name;
443 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
444 wakaba 1.7 if ($todo->{strictly_inline}) {
445     $not_allowed = 1
446     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln};
447     } else {
448     $not_allowed = 1
449     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
450     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
451     }
452 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
453     if $not_allowed;
454 wakaba 1.4 my ($sib, $ch) = $self->_check_get_children ($node);
455     unshift @nodes, @$sib;
456     push @$new_todos, @$ch;
457     } elsif ($nt == 5) {
458     unshift @nodes, @{$node->child_nodes};
459     }
460     }
461    
462     for (@$new_todos) {
463     $_->{inline} = 1;
464     $_->{strictly_inline} = 1;
465     }
466     return ($new_todos);
467     }; # $HTMLInlineOrStrictlyInlineChecker
468    
469 wakaba 1.6 my $HTMLSignificantInlineOrStrictlyInlineChecker
470     = $HTMLInlineOrStrictlyInlineChecker;
471     ## TODO: check significant content
472    
473 wakaba 1.1 my $HTMLBlockOrInlineChecker = sub {
474 wakaba 1.4 my ($self, $todo) = @_;
475     my $el = $todo->{node};
476     my $new_todos = [];
477 wakaba 1.1 my @nodes = (@{$el->child_nodes});
478    
479     my $content = 'block-or-inline'; # or 'block' or 'inline'
480     my @block_not_inline;
481     while (@nodes) {
482     my $node = shift @nodes;
483 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
484    
485 wakaba 1.1 my $nt = $node->node_type;
486     if ($nt == 1) {
487 wakaba 1.2 my $node_ns = $node->namespace_uri;
488     $node_ns = '' unless defined $node_ns;
489     my $node_ln = $node->manakai_local_name;
490 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
491 wakaba 1.1 if ($content eq 'block') {
492 wakaba 1.7 $not_allowed = 1
493     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
494 wakaba 1.1 } elsif ($content eq 'inline') {
495 wakaba 1.7 $not_allowed = 1
496     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
497     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
498 wakaba 1.1 } else {
499 wakaba 1.7 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
500     my $is_inline
501     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
502     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
503 wakaba 1.1
504 wakaba 1.6 push @block_not_inline, $node
505     if $is_block and not $is_inline and not $not_allowed;
506 wakaba 1.1 unless ($is_block) {
507     $content = 'inline';
508     for (@block_not_inline) {
509 wakaba 1.2 $self->{onerror}->(node => $_, type => 'element not allowed');
510 wakaba 1.1 }
511 wakaba 1.6 $not_allowed = 1 unless $is_inline;
512 wakaba 1.1 }
513     }
514 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
515     if $not_allowed;
516 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
517     unshift @nodes, @$sib;
518 wakaba 1.4 push @$new_todos, @$ch;
519 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
520     if ($node->data =~ /[^\x09-\x0D\x20]/) {
521     if ($content eq 'block') {
522 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
523 wakaba 1.1 } else {
524     $content = 'inline';
525     for (@block_not_inline) {
526 wakaba 1.2 $self->{onerror}->(node => $_, type => 'element not allowed');
527 wakaba 1.1 }
528     }
529     }
530     } elsif ($nt == 5) {
531     unshift @nodes, @{$node->child_nodes};
532     }
533     }
534 wakaba 1.4
535     if ($content eq 'inline') {
536     for (@$new_todos) {
537     $_->{inline} = 1;
538     }
539     }
540     return ($new_todos);
541 wakaba 1.1 };
542    
543 wakaba 1.2 ## Zero or more XXX element, then either block-level or inline-level
544     my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) {
545     my ($elnsuri, $ellname) = @_;
546     return sub {
547 wakaba 1.4 my ($self, $todo) = @_;
548     my $el = $todo->{node};
549     my $new_todos = [];
550 wakaba 1.2 my @nodes = (@{$el->child_nodes});
551    
552     my $has_non_style;
553     my $content = 'block-or-inline'; # or 'block' or 'inline'
554     my @block_not_inline;
555     while (@nodes) {
556     my $node = shift @nodes;
557     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
558    
559     my $nt = $node->node_type;
560     if ($nt == 1) {
561     my $node_ns = $node->namespace_uri;
562     $node_ns = '' unless defined $node_ns;
563     my $node_ln = $node->manakai_local_name;
564 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
565 wakaba 1.8 if ($node_ns eq $elnsuri and $node_ln eq $ellname) {
566 wakaba 1.6 $not_allowed = 1 if $has_non_style;
567 wakaba 1.2 } elsif ($content eq 'block') {
568     $has_non_style = 1;
569 wakaba 1.7 $not_allowed = 1
570     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
571 wakaba 1.2 } elsif ($content eq 'inline') {
572     $has_non_style = 1;
573 wakaba 1.7 $not_allowed = 1
574     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
575     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
576 wakaba 1.2 } else {
577     $has_non_style = 1;
578 wakaba 1.7 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
579     my $is_inline
580     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
581     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
582 wakaba 1.2
583 wakaba 1.6 push @block_not_inline, $node
584     if $is_block and not $is_inline and not $not_allowed;
585 wakaba 1.2 unless ($is_block) {
586     $content = 'inline';
587     for (@block_not_inline) {
588     $self->{onerror}->(node => $_, type => 'element not allowed');
589     }
590 wakaba 1.6 $not_allowed = 1 unless $is_inline;
591 wakaba 1.1 }
592     }
593 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
594     if $not_allowed;
595 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
596     unshift @nodes, @$sib;
597 wakaba 1.4 push @$new_todos, @$ch;
598 wakaba 1.2 } elsif ($nt == 3 or $nt == 4) {
599     if ($node->data =~ /[^\x09-\x0D\x20]/) {
600     $has_non_style = 1;
601     if ($content eq 'block') {
602     $self->{onerror}->(node => $node, type => 'character not allowed');
603     } else {
604     $content = 'inline';
605     for (@block_not_inline) {
606     $self->{onerror}->(node => $_, type => 'element not allowed');
607     }
608 wakaba 1.1 }
609     }
610 wakaba 1.2 } elsif ($nt == 5) {
611     unshift @nodes, @{$node->child_nodes};
612 wakaba 1.1 }
613     }
614 wakaba 1.4
615     if ($content eq 'inline') {
616     for (@$new_todos) {
617     $_->{inline} = 1;
618     }
619     }
620     return ($new_todos);
621 wakaba 1.2 };
622     }; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker
623 wakaba 1.1
624     my $HTMLTransparentChecker = $HTMLBlockOrInlineChecker;
625    
626 wakaba 1.10 my $GetHTMLEnumeratedAttrChecker = sub {
627     my $states = shift; # {value => conforming ? 1 : -1}
628     return sub {
629     my ($self, $attr) = @_;
630     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
631     if ($states->{$value} > 0) {
632     #
633     } elsif ($states->{$value}) {
634     $self->{onerror}->(node => $attr,
635     type => 'non-conforming enumerated attribute value');
636     } else {
637     $self->{onerror}->(node => $attr,
638     type => 'invalid enumerated attribute value');
639     }
640     };
641     }; # $GetHTMLEnumeratedAttrChecker
642 wakaba 1.9
643 wakaba 1.10 my $GetHTMLBooleanAttrChecker = sub {
644     my $local_name = shift;
645     return sub {
646     my ($self, $attr) = @_;
647     my $value = $attr->value;
648     unless ($value eq $local_name or $value eq '') {
649     $self->{onerror}->(node => $attr,
650     type => 'invalid boolean attribute value');
651     }
652     };
653     }; # $GetHTMLBooleanAttrChecker
654 wakaba 1.9
655 wakaba 1.10 my $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker = sub {
656     my ($self, $attr) = @_;
657     my %word;
658     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
659     unless ($word{$word}) {
660     $word{$word} = 1;
661 wakaba 1.9 } else {
662 wakaba 1.10 $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
663 wakaba 1.9 }
664     }
665 wakaba 1.10 }; # $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker
666    
667 wakaba 1.20 ## |rel| attribute (unordered set of space separated tokens,
668     ## whose allowed values are defined by the section on link types)
669     my $HTMLLinkTypesAttrChecker = sub {
670     my ($a_or_area, $self, $attr) = @_;
671     my %word;
672     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
673     unless ($word{$word}) {
674     $word{$word} = 1;
675     } else {
676     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
677     }
678     }
679     ## NOTE: Case sensitive match (since HTML5 spec does not say link
680     ## types are case-insensitive and it says "The value should not
681     ## be confusingly similar to any other defined value (e.g.
682     ## differing only in case).").
683     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
684     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
685     ## values to be used conformingly.
686     require Whatpm::_LinkTypeList;
687     our $LinkType;
688     for my $word (keys %word) {
689     my $def = $LinkType->{$word};
690     if (defined $def) {
691     if ($def->{status} eq 'accepted') {
692     if (defined $def->{effect}->[$a_or_area]) {
693     #
694     } else {
695     $self->{onerror}->(node => $attr,
696     type => 'link type bad context:'.$word);
697     }
698     } elsif ($def->{status} eq 'proposal') {
699     $self->{onerror}->(node => $attr,
700     type => 'proposed link type:'.$word);
701     } else { # rejected or synonym
702     $self->{onerror}->(node => $attr,
703     type => 'non-conforming link type:'.$word);
704     }
705     if ($def->{unique}) {
706     unless ($self->{has_link_type}->{$word}) {
707     $self->{has_link_type}->{$word} = 1;
708     } else {
709     $self->{onerror}->(node => $attr,
710     type => 'link with type not unique:'.$word);
711     }
712     }
713     } else {
714     $self->{onerror}->(node => $attr,
715     type => 'link type not supported:'.$word);
716     }
717     }
718     ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
719     ## says that using both X-Pingback: header field and HTML
720     ## <link rel=pingback> is deprecated and if both appears they
721     ## SHOULD contain exactly the same value.
722     ## ISSUE: Pingback 1.0 specification defines the exact representation
723     ## of its link element, which cannot be tested by the current arch.
724     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
725     ## include any string that matches to the pattern for the rel=pingback link,
726     ## which again inpossible to test.
727     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
728     }; # $HTMLLinkTypesAttrChecker
729    
730 wakaba 1.18 ## URI (or IRI)
731 wakaba 1.11 my $HTMLURIAttrChecker = sub {
732     my ($self, $attr) = @_;
733 wakaba 1.15 ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
734 wakaba 1.18 my $value = $attr->value;
735     Whatpm::URIChecker->check_iri_reference ($value, sub {
736     my %opt = @_;
737 wakaba 1.22 $self->{onerror}->(node => $attr,
738     type => 'URI:'.$opt{level}.':'.
739     (defined $opt{position} ? $opt{position} : '').':'.
740     $opt{type});
741 wakaba 1.18 });
742 wakaba 1.11 }; # $HTMLURIAttrChecker
743    
744 wakaba 1.15 ## A space separated list of one or more URIs (or IRIs)
745     my $HTMLSpaceURIsAttrChecker = sub {
746     my ($self, $attr) = @_;
747     ## TODO: URI or IRI check
748     ## ISSUE: Relative references?
749     ## ISSUE: Leading or trailing white spaces are conformant?
750     ## ISSUE: A sequence of white space characters are conformant?
751     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
752     ## NOTE: Duplication seems not an error.
753     }; # $HTMLSpaceURIsAttrChecker
754    
755 wakaba 1.11 my $HTMLIntegerAttrChecker = sub {
756     my ($self, $attr) = @_;
757     my $value = $attr->value;
758     unless ($value =~ /\A-?[0-9]+\z/) {
759 wakaba 1.15 $self->{onerror}->(node => $attr, type => 'integer syntax error');
760 wakaba 1.11 }
761     }; # $HTMLIntegerAttrChecker
762    
763 wakaba 1.12 my $GetHTMLNonNegativeIntegerAttrChecker = sub {
764     my $range_check = shift;
765     return sub {
766     my ($self, $attr) = @_;
767     my $value = $attr->value;
768     if ($value =~ /\A[0-9]+\z/) {
769     unless ($range_check->($value + 0)) {
770     $self->{onerror}->(node => $attr, type => 'out of range');
771     }
772     } else {
773 wakaba 1.15 $self->{onerror}->(node => $attr,
774     type => 'non-negative integer syntax error');
775 wakaba 1.12 }
776     };
777     }; # $GetHTMLNonNegativeIntegerAttrChecker
778    
779 wakaba 1.11 my $GetHTMLFloatingPointNumberAttrChecker = sub {
780     my $range_check = shift;
781     return sub {
782     my ($self, $attr) = @_;
783     my $value = $attr->value;
784     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
785     unless ($range_check->($value + 0)) {
786     $self->{onerror}->(node => $attr, type => 'out of range');
787     }
788     } else {
789 wakaba 1.15 $self->{onerror}->(node => $attr,
790     type => 'floating point number syntax error');
791 wakaba 1.11 }
792     };
793     }; # $GetHTMLFloatingPointNumberAttrChecker
794    
795 wakaba 1.15 ## "A valid MIME type, optionally with parameters. [RFC 2046]"
796     ## ISSUE: RFC 2046 does not define syntax of media types.
797     ## ISSUE: The definition of "a valid MIME type" is unknown.
798     ## Syntactical correctness?
799     my $HTMLIMTAttrChecker = sub {
800     my ($self, $attr) = @_;
801     my $value = $attr->value;
802     ## ISSUE: RFC 2045 Content-Type header field allows insertion
803     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
804     ## ISSUE: RFC 2231 extension? Maybe no.
805     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
806     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
807     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
808 wakaba 1.19 if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
809     my @type = ($1, $2);
810     my $param = $3;
811     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
812     if (defined $2) {
813     push @type, $1 => $2;
814     } else {
815     my $n = $1;
816     my $v = $2;
817     $v =~ s/\\(.)/$1/gs;
818     push @type, $n => $v;
819     }
820     }
821     require Whatpm::IMTChecker;
822     Whatpm::IMTChecker->check_imt (sub {
823     my %opt = @_;
824     $self->{onerror}->(node => $attr,
825     type => 'IMT:'.$opt{level}.':'.$opt{type});
826     }, @type);
827     } else {
828 wakaba 1.15 $self->{onerror}->(node => $attr, type => 'IMT syntax error');
829     }
830     }; # $HTMLIMTAttrChecker
831    
832 wakaba 1.17 my $HTMLLanguageTagAttrChecker = sub {
833     my ($self, $attr) = @_;
834     if ($attr->value eq '') {
835     $self->{onerror}->(node => $attr, type => 'language tag syntax error');
836     }
837     ## TODO: RFC 3066 test
838     ## ISSUE: RFC 4646 (3066bis)?
839     }; # $HTMLLanguageTagAttrChecker
840    
841     ## "A valid media query [MQ]"
842     my $HTMLMQAttrChecker = sub {
843     ## ISSUE: What is "a valid media query"?
844     }; # $HTMLMQAttrChecker
845    
846 wakaba 1.16 my $HTMLEventHandlerAttrChecker = sub {
847     ## TODO: MUST contain valid ECMAScript code matching the
848     ## ECMAScript |FunctionBody| production. [ECMA262]
849     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
850     ## ISSUE: Automatic semicolon insertion does not apply?
851     ## ISSUE: Other script languages?
852     }; # $HTMLEventHandlerAttrChecker
853    
854 wakaba 1.17 my $HTMLUsemapAttrChecker = sub {
855     my ($self, $attr) = @_;
856     ## MUST be a valid hashed ID reference to a |map| element
857     my $value = $attr->value;
858     if ($value =~ s/^#//) {
859     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
860     push @{$self->{usemap}}, [$value => $attr];
861     } else {
862     $self->{onerror}->(node => $attr, type => 'hashed idref syntax error');
863     }
864 wakaba 1.20 ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
865 wakaba 1.17 }; # $HTMLUsemapAttrChecker
866    
867     my $HTMLTargetAttrChecker = sub {
868     my ($self, $attr) = @_;
869     my $value = $attr->value;
870     if ($value =~ /^_/) {
871     $value = lc $value; ## ISSUE: ASCII case-insentitive?
872     unless ({
873     _self => 1, _parent => 1, _top => 1,
874     }->{$value}) {
875     $self->{onerror}->(node => $attr,
876     type => 'reserved browsing context name');
877     }
878     } else {
879     #$ ISSUE: An empty string is conforming?
880     }
881     }; # $HTMLTargetAttrChecker
882    
883 wakaba 1.10 my $HTMLAttrChecker = {
884     id => sub {
885 wakaba 1.17 ## NOTE: |map| has its own variant of |id=""| checker
886 wakaba 1.10 my ($self, $attr) = @_;
887     my $value = $attr->value;
888 wakaba 1.17 if (length $value > 0) {
889 wakaba 1.10 if ($self->{id}->{$value}) {
890     $self->{onerror}->(node => $attr, type => 'duplicate ID');
891     } else {
892     $self->{id}->{$value} = 1;
893     }
894 wakaba 1.17 } else {
895     ## NOTE: MUST contain at least one character
896     $self->{onerror}->(node => $attr, type => 'attribute value is empty');
897 wakaba 1.10 }
898     },
899     title => sub {}, ## NOTE: No conformance creteria
900     lang => sub {
901 wakaba 1.17 ## TODO: RFC 3066 or empty test
902 wakaba 1.10 ## ISSUE: RFC 4646 (3066bis)?
903     ## TODO: HTML vs XHTML
904     },
905     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
906     class => $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker,
907     irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
908     ## TODO: tabindex
909     };
910    
911 wakaba 1.16 for (qw/
912     onabort onbeforeunload onblur onchange onclick oncontextmenu
913     ondblclick ondrag ondragend ondragenter ondragleave ondragover
914     ondragstart ondrop onerror onfocus onkeydown onkeypress
915     onkeyup onload onmessage onmousedown onmousemove onmouseout
916     onmouseover onmouseup onmousewheel onresize onscroll onselect
917     onsubmit onunload
918     /) {
919     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
920     }
921    
922 wakaba 1.10 my $GetHTMLAttrsChecker = sub {
923     my $element_specific_checker = shift;
924     return sub {
925     my ($self, $todo) = @_;
926     for my $attr (@{$todo->{node}->attributes}) {
927     my $attr_ns = $attr->namespace_uri;
928     $attr_ns = '' unless defined $attr_ns;
929     my $attr_ln = $attr->manakai_local_name;
930     my $checker;
931     if ($attr_ns eq '') {
932     $checker = $element_specific_checker->{$attr_ln}
933     || $HTMLAttrChecker->{$attr_ln};
934     }
935     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
936     || $AttrChecker->{$attr_ns}->{''};
937     if ($checker) {
938     $checker->($self, $attr);
939     } else {
940     $self->{onerror}->(node => $attr, type => 'attribute not supported');
941     ## ISSUE: No comformance createria for unknown attributes in the spec
942     }
943     }
944     };
945     }; # $GetHTMLAttrsChecker
946 wakaba 1.9
947     $Element->{$HTML_NS}->{''} = {
948 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
949 wakaba 1.9 checker => $ElementDefault->{checker},
950     };
951    
952 wakaba 1.1 $Element->{$HTML_NS}->{html} = {
953 wakaba 1.24 is_root => 1,
954 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({
955     xmlns => sub {
956     my ($self, $attr) = @_;
957     my $value = $attr->value;
958     unless ($value eq $HTML_NS) {
959     $self->{onerror}->(node => $attr, type => 'syntax error');
960     ## TODO: only in HTML documents
961     }
962     },
963     }),
964 wakaba 1.1 checker => sub {
965 wakaba 1.4 my ($self, $todo) = @_;
966     my $el = $todo->{node};
967     my $new_todos = [];
968 wakaba 1.1 my @nodes = (@{$el->child_nodes});
969    
970     my $phase = 'before head';
971     while (@nodes) {
972     my $node = shift @nodes;
973 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
974    
975 wakaba 1.1 my $nt = $node->node_type;
976     if ($nt == 1) {
977 wakaba 1.2 my $node_ns = $node->namespace_uri;
978     $node_ns = '' unless defined $node_ns;
979     my $node_ln = $node->manakai_local_name;
980 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
981 wakaba 1.1 if ($phase eq 'before head') {
982 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'head') {
983 wakaba 1.1 $phase = 'after head';
984 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'body') {
985     $self->{onerror}->(node => $node, type => 'ps element missing:head');
986 wakaba 1.1 $phase = 'after body';
987     } else {
988 wakaba 1.6 $not_allowed = 1;
989 wakaba 1.1 # before head
990     }
991     } elsif ($phase eq 'after head') {
992 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'body') {
993 wakaba 1.1 $phase = 'after body';
994     } else {
995 wakaba 1.6 $not_allowed = 1;
996 wakaba 1.1 # after head
997     }
998     } else { #elsif ($phase eq 'after body') {
999 wakaba 1.6 $not_allowed = 1;
1000 wakaba 1.1 # after body
1001     }
1002 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
1003     if $not_allowed;
1004 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1005     unshift @nodes, @$sib;
1006 wakaba 1.4 push @$new_todos, @$ch;
1007 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1008     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1009 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1010 wakaba 1.1 }
1011     } elsif ($nt == 5) {
1012     unshift @nodes, @{$node->child_nodes};
1013     }
1014     }
1015 wakaba 1.3
1016     if ($phase eq 'before head') {
1017     $self->{onerror}->(node => $el, type => 'child element missing:head');
1018     $self->{onerror}->(node => $el, type => 'child element missing:body');
1019     } elsif ($phase eq 'after head') {
1020     $self->{onerror}->(node => $el, type => 'child element missing:body');
1021     }
1022    
1023 wakaba 1.4 return ($new_todos);
1024 wakaba 1.1 },
1025     };
1026    
1027     $Element->{$HTML_NS}->{head} = {
1028 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1029 wakaba 1.1 checker => sub {
1030 wakaba 1.4 my ($self, $todo) = @_;
1031     my $el = $todo->{node};
1032     my $new_todos = [];
1033 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1034    
1035     my $has_title;
1036 wakaba 1.3 my $phase = 'initial'; # 'after charset', 'after base'
1037 wakaba 1.1 while (@nodes) {
1038     my $node = shift @nodes;
1039 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1040    
1041 wakaba 1.1 my $nt = $node->node_type;
1042     if ($nt == 1) {
1043 wakaba 1.2 my $node_ns = $node->namespace_uri;
1044     $node_ns = '' unless defined $node_ns;
1045     my $node_ln = $node->manakai_local_name;
1046 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
1047 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'title') {
1048 wakaba 1.3 $phase = 'after base';
1049 wakaba 1.1 unless ($has_title) {
1050     $has_title = 1;
1051     } else {
1052 wakaba 1.6 $not_allowed = 1;
1053 wakaba 1.1 }
1054 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'meta') {
1055 wakaba 1.1 if ($node->has_attribute_ns (undef, 'charset')) {
1056 wakaba 1.3 if ($phase eq 'initial') {
1057     $phase = 'after charset';
1058 wakaba 1.1 } else {
1059 wakaba 1.6 $not_allowed = 1;
1060 wakaba 1.3 ## NOTE: See also |base|'s "contexts" field in the spec
1061 wakaba 1.1 }
1062     } else {
1063 wakaba 1.3 $phase = 'after base';
1064 wakaba 1.1 }
1065 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'base') {
1066 wakaba 1.3 if ($phase eq 'initial' or $phase eq 'after charset') {
1067     $phase = 'after base';
1068 wakaba 1.1 } else {
1069 wakaba 1.6 $not_allowed = 1;
1070 wakaba 1.1 }
1071 wakaba 1.7 } elsif ($HTMLMetadataElements->{$node_ns}->{$node_ln}) {
1072     $phase = 'after base';
1073 wakaba 1.1 } else {
1074 wakaba 1.7 $not_allowed = 1;
1075 wakaba 1.1 }
1076 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
1077     if $not_allowed;
1078 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1079     unshift @nodes, @$sib;
1080 wakaba 1.4 push @$new_todos, @$ch;
1081 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1082     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1083 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1084 wakaba 1.1 }
1085     } elsif ($nt == 5) {
1086     unshift @nodes, @{$node->child_nodes};
1087     }
1088     }
1089     unless ($has_title) {
1090 wakaba 1.3 $self->{onerror}->(node => $el, type => 'child element missing:title');
1091 wakaba 1.1 }
1092 wakaba 1.4 return ($new_todos);
1093 wakaba 1.1 },
1094     };
1095    
1096     $Element->{$HTML_NS}->{title} = {
1097 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1098 wakaba 1.1 checker => $HTMLTextChecker,
1099     };
1100    
1101     $Element->{$HTML_NS}->{base} = {
1102 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({
1103 wakaba 1.11 href => $HTMLURIAttrChecker,
1104 wakaba 1.17 target => $HTMLTargetAttrChecker,
1105 wakaba 1.10 }),
1106 wakaba 1.1 checker => $HTMLEmptyChecker,
1107     };
1108    
1109     $Element->{$HTML_NS}->{link} = {
1110 wakaba 1.16 attrs_checker => sub {
1111     my ($self, $todo) = @_;
1112     $GetHTMLAttrsChecker->({
1113     href => $HTMLURIAttrChecker,
1114 wakaba 1.20 rel => sub { $HTMLLinkTypesAttrChecker->(0, @_) },
1115 wakaba 1.17 media => $HTMLMQAttrChecker,
1116     hreflang => $HTMLLanguageTagAttrChecker,
1117 wakaba 1.16 type => $HTMLIMTAttrChecker,
1118     ## NOTE: Though |title| has special semantics,
1119     ## syntactically same as the |title| as global attribute.
1120     })->($self, $todo);
1121     unless ($todo->{node}->has_attribute_ns (undef, 'href')) {
1122     $self->{onerror}->(node => $todo->{node},
1123     type => 'attribute missing:href');
1124     }
1125     unless ($todo->{node}->has_attribute_ns (undef, 'rel')) {
1126     $self->{onerror}->(node => $todo->{node},
1127     type => 'attribute missing:rel');
1128     }
1129     },
1130 wakaba 1.1 checker => $HTMLEmptyChecker,
1131     };
1132    
1133     $Element->{$HTML_NS}->{meta} = {
1134 wakaba 1.10 attrs_checker => sub {
1135     my ($self, $todo) = @_;
1136     my $name_attr;
1137     my $http_equiv_attr;
1138     my $charset_attr;
1139     my $content_attr;
1140     for my $attr (@{$todo->{node}->attributes}) {
1141     my $attr_ns = $attr->namespace_uri;
1142     $attr_ns = '' unless defined $attr_ns;
1143     my $attr_ln = $attr->manakai_local_name;
1144     my $checker;
1145     if ($attr_ns eq '') {
1146     if ($attr_ln eq 'content') {
1147     $content_attr = $attr;
1148     $checker = 1;
1149     } elsif ($attr_ln eq 'name') {
1150     $name_attr = $attr;
1151     $checker = 1;
1152     } elsif ($attr_ln eq 'http-equiv') {
1153     $http_equiv_attr = $attr;
1154     $checker = 1;
1155     } elsif ($attr_ln eq 'charset') {
1156     $charset_attr = $attr;
1157     $checker = 1;
1158     } else {
1159     $checker = $HTMLAttrChecker->{$attr_ln}
1160     || $AttrChecker->{$attr_ns}->{$attr_ln}
1161     || $AttrChecker->{$attr_ns}->{''};
1162     }
1163     } else {
1164     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1165     || $AttrChecker->{$attr_ns}->{''};
1166     }
1167     if ($checker) {
1168     $checker->($self, $attr) if ref $checker;
1169     } else {
1170     $self->{onerror}->(node => $attr, type => 'attribute not supported');
1171     ## ISSUE: No comformance createria for unknown attributes in the spec
1172     }
1173     }
1174    
1175     if (defined $name_attr) {
1176     if (defined $http_equiv_attr) {
1177     $self->{onerror}->(node => $http_equiv_attr,
1178     type => 'attribute not allowed');
1179     } elsif (defined $charset_attr) {
1180     $self->{onerror}->(node => $charset_attr,
1181     type => 'attribute not allowed');
1182     }
1183     my $metadata_name = $name_attr->value;
1184     my $metadata_value;
1185     if (defined $content_attr) {
1186     $metadata_value = $content_attr->value;
1187     } else {
1188     $self->{onerror}->(node => $todo->{node},
1189     type => 'attribute missing:content');
1190     $metadata_value = '';
1191     }
1192     } elsif (defined $http_equiv_attr) {
1193     if (defined $charset_attr) {
1194     $self->{onerror}->(node => $charset_attr,
1195     type => 'attribute not allowed');
1196     }
1197     unless (defined $content_attr) {
1198     $self->{onerror}->(node => $todo->{node},
1199     type => 'attribute missing:content');
1200     }
1201     } elsif (defined $charset_attr) {
1202     if (defined $content_attr) {
1203     $self->{onerror}->(node => $content_attr,
1204     type => 'attribute not allowed');
1205     }
1206     ## TODO: Allowed only in HTML documents
1207     } else {
1208     if (defined $content_attr) {
1209     $self->{onerror}->(node => $content_attr,
1210     type => 'attribute not allowed');
1211     $self->{onerror}->(node => $todo->{node},
1212     type => 'attribute missing:name|http-equiv');
1213     } else {
1214     $self->{onerror}->(node => $todo->{node},
1215     type => 'attribute missing:name|http-equiv|charset');
1216     }
1217     }
1218    
1219     ## TODO: metadata conformance
1220    
1221     ## TODO: pragma conformance
1222     if (defined $http_equiv_attr) { ## An enumerated attribute
1223     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1224     if ({
1225     'refresh' => 1,
1226     'default-style' => 1,
1227     }->{$keyword}) {
1228     #
1229     } else {
1230     $self->{onerror}->(node => $http_equiv_attr,
1231     type => 'invalid enumerated attribute value');
1232     }
1233     }
1234    
1235     ## TODO: charset
1236     },
1237 wakaba 1.1 checker => $HTMLEmptyChecker,
1238     };
1239    
1240     ## NOTE: |html:style| has no conformance creteria on content model
1241 wakaba 1.3 $Element->{$HTML_NS}->{style} = {
1242 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({
1243 wakaba 1.15 type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1244 wakaba 1.17 media => $HTMLMQAttrChecker,
1245 wakaba 1.10 scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1246     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1247     ## not different
1248     }),
1249 wakaba 1.3 checker => $AnyChecker,
1250     };
1251 wakaba 1.1
1252     $Element->{$HTML_NS}->{body} = {
1253 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1254 wakaba 1.1 checker => $HTMLBlockChecker,
1255     };
1256    
1257     $Element->{$HTML_NS}->{section} = {
1258 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1259 wakaba 1.1 checker => $HTMLStylableBlockChecker,
1260     };
1261    
1262     $Element->{$HTML_NS}->{nav} = {
1263 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1264 wakaba 1.1 checker => $HTMLBlockOrInlineChecker,
1265     };
1266    
1267     $Element->{$HTML_NS}->{article} = {
1268 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1269 wakaba 1.1 checker => $HTMLStylableBlockChecker,
1270     };
1271    
1272     $Element->{$HTML_NS}->{blockquote} = {
1273 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({
1274     cite => $HTMLURIAttrChecker,
1275     }),
1276 wakaba 1.1 checker => $HTMLBlockChecker,
1277     };
1278    
1279     $Element->{$HTML_NS}->{aside} = {
1280 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1281 wakaba 1.2 checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
1282 wakaba 1.1 };
1283    
1284     $Element->{$HTML_NS}->{h1} = {
1285 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1286 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
1287     };
1288    
1289     $Element->{$HTML_NS}->{h2} = {
1290 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1291 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
1292     };
1293    
1294     $Element->{$HTML_NS}->{h3} = {
1295 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1296 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
1297     };
1298    
1299     $Element->{$HTML_NS}->{h4} = {
1300 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1301 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
1302     };
1303    
1304     $Element->{$HTML_NS}->{h5} = {
1305 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1306 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
1307     };
1308    
1309     $Element->{$HTML_NS}->{h6} = {
1310 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1311 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
1312     };
1313    
1314     ## TODO: header
1315    
1316 wakaba 1.2 $Element->{$HTML_NS}->{footer} = {
1317 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1318 wakaba 1.2 checker => sub { ## block -hn -header -footer -sectioning or inline
1319 wakaba 1.4 my ($self, $todo) = @_;
1320     my $el = $todo->{node};
1321     my $new_todos = [];
1322 wakaba 1.2 my @nodes = (@{$el->child_nodes});
1323    
1324     my $content = 'block-or-inline'; # or 'block' or 'inline'
1325     my @block_not_inline;
1326     while (@nodes) {
1327     my $node = shift @nodes;
1328     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1329    
1330     my $nt = $node->node_type;
1331     if ($nt == 1) {
1332     my $node_ns = $node->namespace_uri;
1333     $node_ns = '' unless defined $node_ns;
1334     my $node_ln = $node->manakai_local_name;
1335 wakaba 1.6 my $not_allowed;
1336 wakaba 1.2 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
1337 wakaba 1.6 $not_allowed = 1;
1338 wakaba 1.2 } elsif ($node_ns eq $HTML_NS and
1339     {
1340     qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1/
1341     }->{$node_ln}) {
1342 wakaba 1.6 $not_allowed = 1;
1343 wakaba 1.2 } elsif ($HTMLSectioningElements->{$node_ns}->{$node_ln}) {
1344 wakaba 1.6 $not_allowed = 1;
1345 wakaba 1.2 }
1346     if ($content eq 'block') {
1347 wakaba 1.7 $not_allowed = 1
1348     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1349 wakaba 1.2 } elsif ($content eq 'inline') {
1350 wakaba 1.7 $not_allowed = 1
1351     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
1352     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1353 wakaba 1.2 } else {
1354 wakaba 1.7 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1355     my $is_inline
1356     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
1357     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1358 wakaba 1.2
1359 wakaba 1.6 push @block_not_inline, $node
1360     if $is_block and not $is_inline and not $not_allowed;
1361 wakaba 1.2 unless ($is_block) {
1362     $content = 'inline';
1363     for (@block_not_inline) {
1364     $self->{onerror}->(node => $_, type => 'element not allowed');
1365     }
1366 wakaba 1.6 $not_allowed = 1 unless $is_inline;
1367 wakaba 1.2 }
1368     }
1369 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
1370     if $not_allowed;
1371 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1372     unshift @nodes, @$sib;
1373 wakaba 1.4 push @$new_todos, @$ch;
1374 wakaba 1.2 } elsif ($nt == 3 or $nt == 4) {
1375     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1376     if ($content eq 'block') {
1377     $self->{onerror}->(node => $node, type => 'character not allowed');
1378     } else {
1379     $content = 'inline';
1380     for (@block_not_inline) {
1381     $self->{onerror}->(node => $_, type => 'element not allowed');
1382     }
1383     }
1384     }
1385     } elsif ($nt == 5) {
1386     unshift @nodes, @{$node->child_nodes};
1387     }
1388     }
1389    
1390     my $end = $self->_add_minuses
1391     ({$HTML_NS => {qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1/}},
1392     $HTMLSectioningElements);
1393 wakaba 1.4 push @$new_todos, $end;
1394 wakaba 1.2
1395 wakaba 1.4 if ($content eq 'inline') {
1396     for (@$new_todos) {
1397     $_->{inline} = 1;
1398     }
1399     }
1400    
1401     return ($new_todos);
1402 wakaba 1.2 },
1403     };
1404 wakaba 1.1
1405     $Element->{$HTML_NS}->{address} = {
1406 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1407 wakaba 1.1 checker => $HTMLInlineChecker,
1408     };
1409    
1410     $Element->{$HTML_NS}->{p} = {
1411 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1412 wakaba 1.1 checker => $HTMLSignificantInlineChecker,
1413     };
1414    
1415     $Element->{$HTML_NS}->{hr} = {
1416 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1417 wakaba 1.1 checker => $HTMLEmptyChecker,
1418     };
1419    
1420     $Element->{$HTML_NS}->{br} = {
1421 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1422 wakaba 1.1 checker => $HTMLEmptyChecker,
1423     };
1424    
1425     $Element->{$HTML_NS}->{dialog} = {
1426 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1427 wakaba 1.1 checker => sub {
1428 wakaba 1.4 my ($self, $todo) = @_;
1429     my $el = $todo->{node};
1430     my $new_todos = [];
1431 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1432    
1433     my $phase = 'before dt';
1434     while (@nodes) {
1435     my $node = shift @nodes;
1436 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1437    
1438 wakaba 1.1 my $nt = $node->node_type;
1439     if ($nt == 1) {
1440 wakaba 1.8 my $node_ns = $node->namespace_uri;
1441     $node_ns = '' unless defined $node_ns;
1442     my $node_ln = $node->manakai_local_name;
1443 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1444 wakaba 1.1 if ($phase eq 'before dt') {
1445 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1446 wakaba 1.1 $phase = 'before dd';
1447 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1448 wakaba 1.2 $self->{onerror}
1449 wakaba 1.3 ->(node => $node, type => 'ps element missing:dt');
1450 wakaba 1.1 $phase = 'before dt';
1451     } else {
1452 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1453 wakaba 1.1 }
1454     } else { # before dd
1455 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1456 wakaba 1.1 $phase = 'before dt';
1457 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1458 wakaba 1.2 $self->{onerror}
1459 wakaba 1.3 ->(node => $node, type => 'ps element missing:dd');
1460 wakaba 1.1 $phase = 'before dd';
1461     } else {
1462 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1463 wakaba 1.1 }
1464     }
1465 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1466     unshift @nodes, @$sib;
1467 wakaba 1.4 push @$new_todos, @$ch;
1468 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1469     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1470 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1471 wakaba 1.1 }
1472     } elsif ($nt == 5) {
1473     unshift @nodes, @{$node->child_nodes};
1474     }
1475     }
1476     if ($phase eq 'before dd') {
1477 wakaba 1.3 $self->{onerror}->(node => $el, type => 'ps element missing:dd');
1478 wakaba 1.1 }
1479 wakaba 1.4 return ($new_todos);
1480 wakaba 1.1 },
1481     };
1482    
1483     $Element->{$HTML_NS}->{pre} = {
1484 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1485 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1486     };
1487    
1488     $Element->{$HTML_NS}->{ol} = {
1489 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({
1490     start => $HTMLIntegerAttrChecker,
1491     }),
1492 wakaba 1.1 checker => sub {
1493 wakaba 1.4 my ($self, $todo) = @_;
1494     my $el = $todo->{node};
1495     my $new_todos = [];
1496 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1497    
1498     while (@nodes) {
1499     my $node = shift @nodes;
1500 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1501    
1502 wakaba 1.1 my $nt = $node->node_type;
1503     if ($nt == 1) {
1504 wakaba 1.8 my $node_ns = $node->namespace_uri;
1505     $node_ns = '' unless defined $node_ns;
1506     my $node_ln = $node->manakai_local_name;
1507 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1508 wakaba 1.8 unless ($node_ns eq $HTML_NS and $node_ln eq 'li') {
1509 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1510 wakaba 1.1 }
1511 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1512     unshift @nodes, @$sib;
1513 wakaba 1.4 push @$new_todos, @$ch;
1514 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1515     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1516 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1517 wakaba 1.1 }
1518     } elsif ($nt == 5) {
1519     unshift @nodes, @{$node->child_nodes};
1520     }
1521     }
1522 wakaba 1.4
1523     if ($todo->{inline}) {
1524     for (@$new_todos) {
1525     $_->{inline} = 1;
1526     }
1527     }
1528     return ($new_todos);
1529 wakaba 1.1 },
1530     };
1531    
1532     $Element->{$HTML_NS}->{ul} = {
1533 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1534 wakaba 1.1 checker => $Element->{$HTML_NS}->{ol}->{checker},
1535     };
1536    
1537 wakaba 1.5
1538     $Element->{$HTML_NS}->{li} = {
1539 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({
1540     start => sub {
1541     my ($self, $attr) = @_;
1542     my $parent = $attr->owner_element->manakai_parent_element;
1543     if (defined $parent) {
1544     my $parent_ns = $parent->namespace_uri;
1545     $parent_ns = '' unless defined $parent_ns;
1546     my $parent_ln = $parent->manakai_local_name;
1547     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1548     $self->{onerror}->(node => $attr, type => 'attribute not supported');
1549     }
1550     }
1551     $HTMLIntegerAttrChecker->($self, $attr);
1552     },
1553     }),
1554 wakaba 1.5 checker => sub {
1555     my ($self, $todo) = @_;
1556     if ($todo->{inline}) {
1557     return $HTMLInlineChecker->($self, $todo);
1558     } else {
1559     return $HTMLBlockOrInlineChecker->($self, $todo);
1560     }
1561     },
1562     };
1563 wakaba 1.1
1564     $Element->{$HTML_NS}->{dl} = {
1565 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1566 wakaba 1.1 checker => sub {
1567 wakaba 1.4 my ($self, $todo) = @_;
1568     my $el = $todo->{node};
1569     my $new_todos = [];
1570 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1571    
1572     my $phase = 'before dt';
1573     while (@nodes) {
1574     my $node = shift @nodes;
1575 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1576    
1577 wakaba 1.1 my $nt = $node->node_type;
1578     if ($nt == 1) {
1579 wakaba 1.8 my $node_ns = $node->namespace_uri;
1580     $node_ns = '' unless defined $node_ns;
1581     my $node_ln = $node->manakai_local_name;
1582 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1583 wakaba 1.1 if ($phase eq 'in dds') {
1584 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1585 wakaba 1.1 #$phase = 'in dds';
1586 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1587 wakaba 1.1 $phase = 'in dts';
1588     } else {
1589 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1590 wakaba 1.1 }
1591     } elsif ($phase eq 'in dts') {
1592 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1593 wakaba 1.1 #$phase = 'in dts';
1594 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1595 wakaba 1.1 $phase = 'in dds';
1596     } else {
1597 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1598 wakaba 1.1 }
1599     } else { # before dt
1600 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1601 wakaba 1.1 $phase = 'in dts';
1602 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1603 wakaba 1.2 $self->{onerror}
1604 wakaba 1.3 ->(node => $node, type => 'ps element missing:dt');
1605 wakaba 1.1 $phase = 'in dds';
1606     } else {
1607 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1608 wakaba 1.1 }
1609     }
1610 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1611     unshift @nodes, @$sib;
1612 wakaba 1.4 push @$new_todos, @$ch;
1613 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1614     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1615 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1616 wakaba 1.1 }
1617     } elsif ($nt == 5) {
1618     unshift @nodes, @{$node->child_nodes};
1619     }
1620     }
1621     if ($phase eq 'in dts') {
1622 wakaba 1.3 $self->{onerror}->(node => $el, type => 'ps element missing:dd');
1623 wakaba 1.1 }
1624 wakaba 1.4
1625     if ($todo->{inline}) {
1626     for (@$new_todos) {
1627     $_->{inline} = 1;
1628     }
1629     }
1630     return ($new_todos);
1631 wakaba 1.1 },
1632     };
1633    
1634     $Element->{$HTML_NS}->{dt} = {
1635 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1636 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1637     };
1638    
1639 wakaba 1.4 $Element->{$HTML_NS}->{dd} = {
1640 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1641 wakaba 1.5 checker => $Element->{$HTML_NS}->{li}->{checker},
1642 wakaba 1.4 };
1643 wakaba 1.1
1644 wakaba 1.6 $Element->{$HTML_NS}->{a} = {
1645 wakaba 1.17 attrs_checker => sub {
1646     my ($self, $todo) = @_;
1647 wakaba 1.15 my %attr;
1648 wakaba 1.17 for my $attr (@{$todo->{node}->attributes}) {
1649     my $attr_ns = $attr->namespace_uri;
1650     $attr_ns = '' unless defined $attr_ns;
1651     my $attr_ln = $attr->manakai_local_name;
1652     my $checker;
1653     if ($attr_ns eq '') {
1654     $checker = {
1655     target => $HTMLTargetAttrChecker,
1656     href => $HTMLURIAttrChecker,
1657     ping => $HTMLSpaceURIsAttrChecker,
1658 wakaba 1.20 rel => sub { $HTMLLinkTypesAttrChecker->(1, @_) },
1659 wakaba 1.17 media => $HTMLMQAttrChecker,
1660     hreflang => $HTMLLanguageTagAttrChecker,
1661     type => $HTMLIMTAttrChecker,
1662     }->{$attr_ln};
1663     if ($checker) {
1664     $attr{$attr_ln} = $attr;
1665     } else {
1666     $checker = $HTMLAttrChecker->{$attr_ln};
1667     }
1668     }
1669     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1670     || $AttrChecker->{$attr_ns}->{''};
1671     if ($checker) {
1672     $checker->($self, $attr) if ref $checker;
1673     } else {
1674     $self->{onerror}->(node => $attr, type => 'attribute not supported');
1675     ## ISSUE: No comformance createria for unknown attributes in the spec
1676     }
1677     }
1678    
1679     unless (defined $attr{href}) {
1680     for (qw/target ping rel media hreflang type/) {
1681     if (defined $attr{$_}) {
1682     $self->{onerror}->(node => $attr{$_},
1683     type => 'attribute not allowed');
1684 wakaba 1.15 }
1685     }
1686 wakaba 1.17 }
1687 wakaba 1.15 },
1688 wakaba 1.6 checker => sub {
1689     my ($self, $todo) = @_;
1690    
1691     my $end = $self->_add_minuses ($HTMLInteractiveElements);
1692     my ($sib, $ch)
1693     = $HTMLSignificantInlineOrStrictlyInlineChecker->($self, $todo);
1694     push @$sib, $end;
1695     return ($sib, $ch);
1696     },
1697     };
1698 wakaba 1.1
1699 wakaba 1.4 $Element->{$HTML_NS}->{q} = {
1700 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({
1701     cite => $HTMLURIAttrChecker,
1702     }),
1703 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1704     };
1705 wakaba 1.1
1706     $Element->{$HTML_NS}->{cite} = {
1707 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1708 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1709     };
1710    
1711 wakaba 1.4 $Element->{$HTML_NS}->{em} = {
1712 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1713 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1714     };
1715    
1716     $Element->{$HTML_NS}->{strong} = {
1717 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1718 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1719     };
1720 wakaba 1.1
1721 wakaba 1.4 $Element->{$HTML_NS}->{small} = {
1722 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1723 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1724     };
1725    
1726     $Element->{$HTML_NS}->{m} = {
1727 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1728 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1729     };
1730    
1731 wakaba 1.11 $Element->{$HTML_NS}->{dfn} = { ## TODO: term duplication
1732 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1733 wakaba 1.4 checker => sub {
1734     my ($self, $todo) = @_;
1735    
1736     my $end = $self->_add_minuses ({$HTML_NS => {dfn => 1}});
1737     my ($sib, $ch) = $HTMLStrictlyInlineChecker->($self, $todo);
1738     push @$sib, $end;
1739     return ($sib, $ch);
1740     },
1741     };
1742 wakaba 1.1
1743     $Element->{$HTML_NS}->{abbr} = {
1744 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({
1745     ## NOTE: |title| has special semantics for |abbr|s, but is syntactically
1746     ## not different. The spec says that the |title| MAY be omitted
1747     ## if there is a |dfn| whose defining term is the abbreviation,
1748     ## but it does not prohibit |abbr| w/o |title| in other cases.
1749     }),
1750 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1751     };
1752    
1753 wakaba 1.11 $Element->{$HTML_NS}->{time} = { ## TODO: validate content
1754     attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO: datetime
1755 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1756     };
1757    
1758 wakaba 1.11 $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
1759     attrs_checker => $GetHTMLAttrsChecker->({
1760     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1761     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1762     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1763     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1764     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1765     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1766     }),
1767 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1768     };
1769    
1770 wakaba 1.11 $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
1771     attrs_checker => $GetHTMLAttrsChecker->({
1772     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
1773     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
1774     }),
1775 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1776     };
1777    
1778 wakaba 1.4 $Element->{$HTML_NS}->{code} = {
1779 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1780 wakaba 1.12 ## NOTE: Though |title| has special semantics,
1781     ## syntatically same as the |title| as global attribute.
1782 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1783     };
1784 wakaba 1.1
1785     $Element->{$HTML_NS}->{var} = {
1786 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1787 wakaba 1.12 ## NOTE: Though |title| has special semantics,
1788     ## syntatically same as the |title| as global attribute.
1789 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1790     };
1791    
1792 wakaba 1.4 $Element->{$HTML_NS}->{samp} = {
1793 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1794 wakaba 1.12 ## NOTE: Though |title| has special semantics,
1795     ## syntatically same as the |title| as global attribute.
1796 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1797     };
1798 wakaba 1.1
1799     $Element->{$HTML_NS}->{kbd} = {
1800 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1801 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1802     };
1803    
1804     $Element->{$HTML_NS}->{sub} = {
1805 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1806 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1807     };
1808    
1809     $Element->{$HTML_NS}->{sup} = {
1810 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1811 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1812     };
1813    
1814 wakaba 1.4 $Element->{$HTML_NS}->{span} = {
1815 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1816 wakaba 1.12 ## NOTE: Though |title| has special semantics,
1817     ## syntatically same as the |title| as global attribute.
1818 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1819     };
1820 wakaba 1.1
1821     $Element->{$HTML_NS}->{i} = {
1822 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1823 wakaba 1.12 ## NOTE: Though |title| has special semantics,
1824     ## syntatically same as the |title| as global attribute.
1825 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1826     };
1827    
1828     $Element->{$HTML_NS}->{b} = {
1829 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1830 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1831     };
1832    
1833     $Element->{$HTML_NS}->{bdo} = {
1834 wakaba 1.12 attrs_checker => sub {
1835     my ($self, $todo) = @_;
1836     $GetHTMLAttrsChecker->({})->($self, $todo);
1837     unless ($todo->{node}->has_attribute_ns (undef, 'dir')) {
1838     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:dir');
1839     }
1840     },
1841     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
1842 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1843     };
1844    
1845     $Element->{$HTML_NS}->{ins} = {
1846 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
1847     cite => $HTMLURIAttrChecker,
1848     ## TODO: datetime
1849     }),
1850 wakaba 1.1 checker => $HTMLTransparentChecker,
1851     };
1852    
1853     $Element->{$HTML_NS}->{del} = {
1854 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
1855     cite => $HTMLURIAttrChecker,
1856     ## TODO: datetime
1857     }),
1858 wakaba 1.1 checker => sub {
1859 wakaba 1.4 my ($self, $todo) = @_;
1860 wakaba 1.1
1861 wakaba 1.4 my $parent = $todo->{node}->manakai_parent_element;
1862 wakaba 1.1 if (defined $parent) {
1863     my $nsuri = $parent->namespace_uri;
1864     $nsuri = '' unless defined $nsuri;
1865     my $ln = $parent->manakai_local_name;
1866     my $eldef = $Element->{$nsuri}->{$ln} ||
1867     $Element->{$nsuri}->{''} ||
1868     $ElementDefault;
1869 wakaba 1.4 return $eldef->{checker}->($self, $todo);
1870 wakaba 1.1 } else {
1871 wakaba 1.4 return $HTMLBlockOrInlineChecker->($self, $todo);
1872 wakaba 1.1 }
1873     },
1874     };
1875    
1876     ## TODO: figure
1877    
1878     $Element->{$HTML_NS}->{img} = {
1879 wakaba 1.17 attrs_checker => sub {
1880     my ($self, $todo) = @_;
1881     $GetHTMLAttrsChecker->({
1882     alt => sub { }, ## NOTE: No syntactical requirement
1883     src => $HTMLURIAttrChecker,
1884     usemap => $HTMLUsemapAttrChecker,
1885     ismap => $GetHTMLBooleanAttrChecker->('ismap'), ## TODO: MUST ancestor <a>
1886     ## TODO: height
1887     ## TODO: width
1888     })->($self, $todo);
1889     unless ($todo->{node}->has_attribute_ns (undef, 'alt')) {
1890     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:alt');
1891     }
1892     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
1893     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:src');
1894     }
1895     },
1896 wakaba 1.1 checker => $HTMLEmptyChecker,
1897     };
1898    
1899     $Element->{$HTML_NS}->{iframe} = {
1900 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
1901     src => $HTMLURIAttrChecker,
1902     }),
1903 wakaba 1.1 checker => $HTMLTextChecker,
1904     };
1905    
1906     $Element->{$HTML_NS}->{embed} = {
1907 wakaba 1.16 attrs_checker => sub {
1908     my ($self, $todo) = @_;
1909     my $has_src;
1910     for my $attr (@{$todo->{node}->attributes}) {
1911     my $attr_ns = $attr->namespace_uri;
1912     $attr_ns = '' unless defined $attr_ns;
1913     my $attr_ln = $attr->manakai_local_name;
1914     my $checker;
1915     if ($attr_ns eq '') {
1916     if ($attr_ln eq 'src') {
1917     $checker = $HTMLURIAttrChecker;
1918     $has_src = 1;
1919     } elsif ($attr_ln eq 'type') {
1920     $checker = $HTMLIMTAttrChecker;
1921     } else {
1922     ## TODO: height
1923     ## TODO: width
1924     $checker = $HTMLAttrChecker->{$attr_ln}
1925     || sub { }; ## NOTE: Any local attribute is ok.
1926     }
1927     }
1928     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1929     || $AttrChecker->{$attr_ns}->{''};
1930     if ($checker) {
1931     $checker->($self, $attr);
1932     } else {
1933     $self->{onerror}->(node => $attr, type => 'attribute not supported');
1934     ## ISSUE: No comformance createria for global attributes in the spec
1935     }
1936     }
1937    
1938     unless ($has_src) {
1939     $self->{onerror}->(node => $todo->{node},
1940     type => 'attribute missing:src');
1941     }
1942     },
1943 wakaba 1.1 checker => $HTMLEmptyChecker,
1944     };
1945    
1946 wakaba 1.15 $Element->{$HTML_NS}->{object} = {
1947 wakaba 1.17 attrs_checker => sub {
1948     my ($self, $todo) = @_;
1949     $GetHTMLAttrsChecker->({
1950     data => $HTMLURIAttrChecker,
1951     type => $HTMLIMTAttrChecker,
1952     usemap => $HTMLUsemapAttrChecker,
1953     ## TODO: width
1954     ## TODO: height
1955     })->($self, $todo);
1956     unless ($todo->{node}->has_attribute_ns (undef, 'data')) {
1957     unless ($todo->{node}->has_attribute_ns (undef, 'type')) {
1958     $self->{onerror}->(node => $todo->{node},
1959     type => 'attribute missing:data|type');
1960     }
1961     }
1962     },
1963 wakaba 1.15 checker => $ElementDefault->{checker}, ## TODO
1964     };
1965    
1966 wakaba 1.1 $Element->{$HTML_NS}->{param} = {
1967 wakaba 1.12 attrs_checker => sub {
1968     my ($self, $todo) = @_;
1969     $GetHTMLAttrsChecker->({
1970     name => sub { },
1971     value => sub { },
1972     })->($self, $todo);
1973     unless ($todo->{node}->has_attribute_ns (undef, 'name')) {
1974     $self->{onerror}->(node => $todo->{node},
1975     type => 'attribute missing:name');
1976     }
1977     unless ($todo->{node}->has_attribute_ns (undef, 'value')) {
1978     $self->{onerror}->(node => $todo->{node},
1979     type => 'attribute missing:value');
1980     }
1981     },
1982 wakaba 1.1 checker => $HTMLEmptyChecker,
1983     };
1984    
1985 wakaba 1.2 $Element->{$HTML_NS}->{video} = {
1986 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
1987     src => $HTMLURIAttrChecker,
1988     ## TODO: start, loopstart, loopend, end
1989     ## ISSUE: they MUST be "value time offset"s. Value?
1990     ## ISSUE: loopcount has no conformance creteria
1991     autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
1992     controls => $GetHTMLBooleanAttrChecker->('controls'),
1993     }),
1994 wakaba 1.2 checker => sub {
1995 wakaba 1.4 my ($self, $todo) = @_;
1996 wakaba 1.2
1997 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'src')) {
1998     return $HTMLBlockOrInlineChecker->($self, $todo);
1999 wakaba 1.2 } else {
2000     return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
2001 wakaba 1.4 ->($self, $todo);
2002 wakaba 1.2 }
2003     },
2004     };
2005    
2006     $Element->{$HTML_NS}->{audio} = {
2007 wakaba 1.12 attrs_checker => $Element->{$HTML_NS}->{video}->{attrs_checker},
2008     checker => $Element->{$HTML_NS}->{video}->{checker},
2009 wakaba 1.2 };
2010 wakaba 1.1
2011     $Element->{$HTML_NS}->{source} = {
2012 wakaba 1.17 attrs_checker => sub {
2013     my ($self, $todo) = @_;
2014     $GetHTMLAttrsChecker->({
2015     src => $HTMLURIAttrChecker,
2016     type => $HTMLIMTAttrChecker,
2017     media => $HTMLMQAttrChecker,
2018     })->($self, $todo);
2019     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2020     $self->{onerror}->(node => $todo->{node},
2021     type => 'attribute missing:src');
2022     }
2023     },
2024 wakaba 1.1 checker => $HTMLEmptyChecker,
2025     };
2026    
2027     $Element->{$HTML_NS}->{canvas} = {
2028 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2029     height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2030     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2031     }),
2032 wakaba 1.1 checker => $HTMLInlineChecker,
2033     };
2034    
2035     $Element->{$HTML_NS}->{map} = {
2036 wakaba 1.17 attrs_checker => $GetHTMLAttrsChecker->({
2037     id => sub {
2038     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2039     my ($self, $attr) = @_;
2040     my $value = $attr->value;
2041     if (length $value > 0) {
2042     if ($self->{id}->{$value}) {
2043     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2044     } else {
2045     $self->{id}->{$value} = 1;
2046     }
2047     } else {
2048     ## NOTE: MUST contain at least one character
2049     $self->{onerror}->(node => $attr, type => 'attribute value is empty');
2050     }
2051     $self->{map}->{$value} ||= $attr;
2052     },
2053     }),
2054 wakaba 1.1 checker => $HTMLBlockChecker,
2055     };
2056    
2057     $Element->{$HTML_NS}->{area} = {
2058 wakaba 1.15 attrs_checker => sub {
2059     my ($self, $todo) = @_;
2060     my %attr;
2061     my $coords;
2062     for my $attr (@{$todo->{node}->attributes}) {
2063     my $attr_ns = $attr->namespace_uri;
2064     $attr_ns = '' unless defined $attr_ns;
2065     my $attr_ln = $attr->manakai_local_name;
2066     my $checker;
2067     if ($attr_ns eq '') {
2068     $checker = {
2069     alt => sub { },
2070     ## NOTE: |alt| value has no conformance creteria.
2071     shape => $GetHTMLEnumeratedAttrChecker->({
2072     circ => -1, circle => 1,
2073     default => 1,
2074     poly => 1, polygon => -1,
2075     rect => 1, rectangle => -1,
2076     }),
2077     coords => sub {
2078     my ($self, $attr) = @_;
2079     my $value = $attr->value;
2080     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
2081     $coords = [split /,/, $value];
2082     } else {
2083     $self->{onerror}->(node => $attr,
2084     type => 'syntax error');
2085     }
2086     },
2087 wakaba 1.17 target => $HTMLTargetAttrChecker,
2088 wakaba 1.15 href => $HTMLURIAttrChecker,
2089     ping => $HTMLSpaceURIsAttrChecker,
2090 wakaba 1.20 rel => sub { $HTMLLinkTypesAttrChecker->(1, @_) },
2091 wakaba 1.17 media => $HTMLMQAttrChecker,
2092     hreflang => $HTMLLanguageTagAttrChecker,
2093 wakaba 1.15 type => $HTMLIMTAttrChecker,
2094     }->{$attr_ln};
2095     if ($checker) {
2096     $attr{$attr_ln} = $attr;
2097     } else {
2098     $checker = $HTMLAttrChecker->{$attr_ln};
2099     }
2100     }
2101     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2102     || $AttrChecker->{$attr_ns}->{''};
2103     if ($checker) {
2104     $checker->($self, $attr) if ref $checker;
2105     } else {
2106     $self->{onerror}->(node => $attr, type => 'attribute not supported');
2107     ## ISSUE: No comformance createria for unknown attributes in the spec
2108     }
2109     }
2110    
2111     if (defined $attr{href}) {
2112     unless (defined $attr{alt}) {
2113     $self->{onerror}->(node => $todo->{node},
2114     type => 'attribute missing:alt');
2115     }
2116     } else {
2117     for (qw/target ping rel media hreflang type alt/) {
2118     if (defined $attr{$_}) {
2119     $self->{onerror}->(node => $attr{$_},
2120     type => 'attribute not allowed');
2121     }
2122     }
2123     }
2124    
2125     my $shape = 'rectangle';
2126     if (defined $attr{shape}) {
2127     $shape = {
2128     circ => 'circle', circle => 'circle',
2129     default => 'default',
2130     poly => 'polygon', polygon => 'polygon',
2131     rect => 'rectangle', rectangle => 'rectangle',
2132     }->{lc $attr{shape}->value} || 'rectangle';
2133     ## TODO: ASCII lowercase?
2134     }
2135    
2136     if ($shape eq 'circle') {
2137     if (defined $attr{coords}) {
2138     if (defined $coords) {
2139     if (@$coords == 3) {
2140     if ($coords->[2] < 0) {
2141     $self->{onerror}->(node => $attr{coords},
2142     type => 'out of range:2');
2143     }
2144     } else {
2145     $self->{onerror}->(node => $attr{coords},
2146     type => 'list item number:3:'.@$coords);
2147     }
2148     } else {
2149     ## NOTE: A syntax error has been reported.
2150     }
2151     } else {
2152     $self->{onerror}->(node => $todo->{node},
2153     type => 'attribute missing:coords');
2154     }
2155     } elsif ($shape eq 'default') {
2156     if (defined $attr{coords}) {
2157     $self->{onerror}->(node => $attr{coords},
2158     type => 'attribute not allowed');
2159     }
2160     } elsif ($shape eq 'polygon') {
2161     if (defined $attr{coords}) {
2162     if (defined $coords) {
2163     if (@$coords >= 6) {
2164     unless (@$coords % 2 == 0) {
2165     $self->{onerror}->(node => $attr{coords},
2166     type => 'list item number:even:'.@$coords);
2167     }
2168     } else {
2169     $self->{onerror}->(node => $attr{coords},
2170     type => 'list item number:>=6:'.@$coords);
2171     }
2172     } else {
2173     ## NOTE: A syntax error has been reported.
2174     }
2175     } else {
2176     $self->{onerror}->(node => $todo->{node},
2177     type => 'attribute missing:coords');
2178     }
2179     } elsif ($shape eq 'rectangle') {
2180     if (defined $attr{coords}) {
2181     if (defined $coords) {
2182     if (@$coords == 4) {
2183     unless ($coords->[0] < $coords->[2]) {
2184     $self->{onerror}->(node => $attr{coords},
2185     type => 'out of range:0');
2186     }
2187     unless ($coords->[1] < $coords->[3]) {
2188     $self->{onerror}->(node => $attr{coords},
2189     type => 'out of range:1');
2190     }
2191     } else {
2192     $self->{onerror}->(node => $attr{coords},
2193     type => 'list item number:4:'.@$coords);
2194     }
2195     } else {
2196     ## NOTE: A syntax error has been reported.
2197     }
2198     } else {
2199     $self->{onerror}->(node => $todo->{node},
2200     type => 'attribute missing:coords');
2201     }
2202     }
2203     },
2204 wakaba 1.1 checker => $HTMLEmptyChecker,
2205     };
2206     ## TODO: only in map
2207    
2208     $Element->{$HTML_NS}->{table} = {
2209 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2210 wakaba 1.1 checker => sub {
2211 wakaba 1.4 my ($self, $todo) = @_;
2212     my $el = $todo->{node};
2213     my $new_todos = [];
2214 wakaba 1.1 my @nodes = (@{$el->child_nodes});
2215    
2216     my $phase = 'before caption';
2217     my $has_tfoot;
2218     while (@nodes) {
2219     my $node = shift @nodes;
2220 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2221    
2222 wakaba 1.1 my $nt = $node->node_type;
2223     if ($nt == 1) {
2224 wakaba 1.8 my $node_ns = $node->namespace_uri;
2225     $node_ns = '' unless defined $node_ns;
2226     my $node_ln = $node->manakai_local_name;
2227 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
2228 wakaba 1.1 if ($phase eq 'in tbodys') {
2229 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2230 wakaba 1.1 #$phase = 'in tbodys';
2231     } elsif (not $has_tfoot and
2232 wakaba 1.8 $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2233 wakaba 1.1 $phase = 'after tfoot';
2234     $has_tfoot = 1;
2235     } else {
2236 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2237 wakaba 1.1 }
2238     } elsif ($phase eq 'in trs') {
2239 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2240 wakaba 1.1 #$phase = 'in trs';
2241     } elsif (not $has_tfoot and
2242 wakaba 1.8 $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2243 wakaba 1.1 $phase = 'after tfoot';
2244     $has_tfoot = 1;
2245     } else {
2246 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2247 wakaba 1.1 }
2248     } elsif ($phase eq 'after thead') {
2249 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2250 wakaba 1.1 $phase = 'in tbodys';
2251 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2252 wakaba 1.1 $phase = 'in trs';
2253 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2254 wakaba 1.1 $phase = 'in tbodys';
2255     $has_tfoot = 1;
2256     } else {
2257 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2258 wakaba 1.1 }
2259     } elsif ($phase eq 'in colgroup') {
2260 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2261 wakaba 1.1 $phase = 'in colgroup';
2262 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2263 wakaba 1.1 $phase = 'after thead';
2264 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2265 wakaba 1.1 $phase = 'in tbodys';
2266 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2267 wakaba 1.1 $phase = 'in trs';
2268 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2269 wakaba 1.1 $phase = 'in tbodys';
2270     $has_tfoot = 1;
2271     } else {
2272 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2273 wakaba 1.1 }
2274     } elsif ($phase eq 'before caption') {
2275 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'caption') {
2276 wakaba 1.1 $phase = 'in colgroup';
2277 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2278 wakaba 1.1 $phase = 'in colgroup';
2279 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2280 wakaba 1.1 $phase = 'after thead';
2281 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2282 wakaba 1.1 $phase = 'in tbodys';
2283 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2284 wakaba 1.1 $phase = 'in trs';
2285 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2286 wakaba 1.1 $phase = 'in tbodys';
2287     $has_tfoot = 1;
2288     } else {
2289 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2290 wakaba 1.1 }
2291     } else { # after tfoot
2292 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2293 wakaba 1.1 }
2294 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
2295     unshift @nodes, @$sib;
2296 wakaba 1.4 push @$new_todos, @$ch;
2297 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
2298     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2299 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
2300 wakaba 1.1 }
2301     } elsif ($nt == 5) {
2302     unshift @nodes, @{$node->child_nodes};
2303     }
2304     }
2305 wakaba 1.21
2306     ## Table model errors
2307     require Whatpm::HTMLTable;
2308     Whatpm::HTMLTable->form_table ($todo->{node}, sub {
2309     my %opt = @_;
2310     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
2311     });
2312    
2313 wakaba 1.4 return ($new_todos);
2314 wakaba 1.1 },
2315     };
2316    
2317     $Element->{$HTML_NS}->{caption} = {
2318 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2319 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
2320     };
2321    
2322     $Element->{$HTML_NS}->{colgroup} = {
2323 wakaba 1.17 attrs_checker => $GetHTMLAttrsChecker->({
2324     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2325     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
2326     ## TODO: "attribute not supported" if |col|.
2327     ## ISSUE: MUST NOT if any |col|?
2328     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
2329     }),
2330 wakaba 1.1 checker => sub {
2331 wakaba 1.4 my ($self, $todo) = @_;
2332     my $el = $todo->{node};
2333     my $new_todos = [];
2334 wakaba 1.1 my @nodes = (@{$el->child_nodes});
2335    
2336     while (@nodes) {
2337     my $node = shift @nodes;
2338 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2339    
2340 wakaba 1.1 my $nt = $node->node_type;
2341     if ($nt == 1) {
2342 wakaba 1.8 my $node_ns = $node->namespace_uri;
2343     $node_ns = '' unless defined $node_ns;
2344     my $node_ln = $node->manakai_local_name;
2345 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
2346 wakaba 1.8 unless ($node_ns eq $HTML_NS and $node_ln eq 'col') {
2347 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2348 wakaba 1.1 }
2349 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
2350     unshift @nodes, @$sib;
2351 wakaba 1.4 push @$new_todos, @$ch;
2352 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
2353     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2354 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
2355 wakaba 1.1 }
2356     } elsif ($nt == 5) {
2357     unshift @nodes, @{$node->child_nodes};
2358     }
2359     }
2360 wakaba 1.4 return ($new_todos);
2361 wakaba 1.1 },
2362     };
2363    
2364     $Element->{$HTML_NS}->{col} = {
2365 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2366     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2367     }),
2368 wakaba 1.1 checker => $HTMLEmptyChecker,
2369     };
2370    
2371     $Element->{$HTML_NS}->{tbody} = {
2372 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2373 wakaba 1.1 checker => sub {
2374 wakaba 1.4 my ($self, $todo) = @_;
2375     my $el = $todo->{node};
2376     my $new_todos = [];
2377 wakaba 1.1 my @nodes = (@{$el->child_nodes});
2378    
2379     my $has_tr;
2380     while (@nodes) {
2381     my $node = shift @nodes;
2382 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2383    
2384 wakaba 1.1 my $nt = $node->node_type;
2385     if ($nt == 1) {
2386 wakaba 1.8 my $node_ns = $node->namespace_uri;
2387     $node_ns = '' unless defined $node_ns;
2388     my $node_ln = $node->manakai_local_name;
2389 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
2390 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2391 wakaba 1.1 $has_tr = 1;
2392     } else {
2393 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2394 wakaba 1.1 }
2395 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
2396     unshift @nodes, @$sib;
2397 wakaba 1.4 push @$new_todos, @$ch;
2398 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
2399     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2400 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
2401 wakaba 1.1 }
2402     } elsif ($nt == 5) {
2403     unshift @nodes, @{$node->child_nodes};
2404     }
2405     }
2406     unless ($has_tr) {
2407 wakaba 1.3 $self->{onerror}->(node => $el, type => 'child element missing:tr');
2408 wakaba 1.1 }
2409 wakaba 1.4 return ($new_todos);
2410 wakaba 1.1 },
2411     };
2412    
2413     $Element->{$HTML_NS}->{thead} = {
2414 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2415 wakaba 1.23 checker => $Element->{$HTML_NS}->{tbody}->{checker},
2416 wakaba 1.1 };
2417    
2418     $Element->{$HTML_NS}->{tfoot} = {
2419 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2420 wakaba 1.23 checker => $Element->{$HTML_NS}->{tbody}->{checker},
2421 wakaba 1.1 };
2422    
2423     $Element->{$HTML_NS}->{tr} = {
2424 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2425 wakaba 1.1 checker => sub {
2426 wakaba 1.4 my ($self, $todo) = @_;
2427     my $el = $todo->{node};
2428     my $new_todos = [];
2429 wakaba 1.1 my @nodes = (@{$el->child_nodes});
2430    
2431     my $has_td;
2432     while (@nodes) {
2433     my $node = shift @nodes;
2434 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2435    
2436 wakaba 1.1 my $nt = $node->node_type;
2437     if ($nt == 1) {
2438 wakaba 1.8 my $node_ns = $node->namespace_uri;
2439     $node_ns = '' unless defined $node_ns;
2440     my $node_ln = $node->manakai_local_name;
2441 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
2442 wakaba 1.8 if ($node_ns eq $HTML_NS and ($node_ln eq 'td' or $node_ln eq 'th')) {
2443 wakaba 1.1 $has_td = 1;
2444     } else {
2445 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2446 wakaba 1.1 }
2447 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
2448     unshift @nodes, @$sib;
2449 wakaba 1.4 push @$new_todos, @$ch;
2450 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
2451     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2452 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
2453 wakaba 1.1 }
2454     } elsif ($nt == 5) {
2455     unshift @nodes, @{$node->child_nodes};
2456     }
2457     }
2458     unless ($has_td) {
2459 wakaba 1.3 $self->{onerror}->(node => $el, type => 'child element missing:td|th');
2460 wakaba 1.1 }
2461 wakaba 1.4 return ($new_todos);
2462 wakaba 1.1 },
2463     };
2464    
2465     $Element->{$HTML_NS}->{td} = {
2466 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2467     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2468     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2469     }),
2470 wakaba 1.1 checker => $HTMLBlockOrInlineChecker,
2471     };
2472    
2473     $Element->{$HTML_NS}->{th} = {
2474 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2475     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2476     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2477     scope => $GetHTMLEnumeratedAttrChecker
2478     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
2479     }),
2480 wakaba 1.1 checker => $HTMLBlockOrInlineChecker,
2481     };
2482    
2483 wakaba 1.12 ## TODO: table model error checking
2484    
2485 wakaba 1.1 ## TODO: forms
2486    
2487 wakaba 1.2 $Element->{$HTML_NS}->{script} = {
2488 wakaba 1.15 attrs_checker => $GetHTMLAttrsChecker->({
2489     src => $HTMLURIAttrChecker,
2490     defer => $GetHTMLBooleanAttrChecker->('defer'), ## TODO: if src ## ISSUE: no MUST NOT
2491     async => $GetHTMLBooleanAttrChecker->('async'), ## TODO: if src ## ISSUE: no MUST NOT
2492     type => $HTMLIMTAttrChecker,
2493     }),
2494 wakaba 1.2 checker => sub {
2495 wakaba 1.4 my ($self, $todo) = @_;
2496 wakaba 1.2
2497 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2498     return $HTMLEmptyChecker->($self, $todo);
2499 wakaba 1.2 } else {
2500     ## NOTE: No content model conformance in HTML5 spec.
2501 wakaba 1.4 return $AnyChecker->($self, $todo);
2502 wakaba 1.2 }
2503     },
2504     };
2505    
2506     ## NOTE: When script is disabled.
2507     $Element->{$HTML_NS}->{noscript} = {
2508 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2509 wakaba 1.2 checker => sub {
2510 wakaba 1.4 my ($self, $todo) = @_;
2511 wakaba 1.1
2512 wakaba 1.2 my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}});
2513 wakaba 1.4 my ($sib, $ch) = $HTMLBlockOrInlineChecker->($self, $todo);
2514 wakaba 1.2 push @$sib, $end;
2515     return ($sib, $ch);
2516     },
2517     };
2518 wakaba 1.1
2519     $Element->{$HTML_NS}->{'event-source'} = {
2520 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2521     src => $HTMLURIAttrChecker,
2522     }),
2523 wakaba 1.1 checker => $HTMLEmptyChecker,
2524     };
2525    
2526     $Element->{$HTML_NS}->{details} = {
2527 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2528     open => $GetHTMLBooleanAttrChecker->('open'),
2529     }),
2530 wakaba 1.6 checker => sub {
2531     my ($self, $todo) = @_;
2532    
2533     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2534     my ($sib, $ch)
2535     = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
2536     ->($self, $todo);
2537     push @$sib, $end;
2538     return ($sib, $ch);
2539     },
2540 wakaba 1.1 };
2541    
2542     $Element->{$HTML_NS}->{datagrid} = {
2543 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2544     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2545     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
2546     }),
2547 wakaba 1.6 checker => sub {
2548     my ($self, $todo) = @_;
2549    
2550     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2551     my ($sib, $ch) = $HTMLBlockChecker->($self, $todo);
2552     push @$sib, $end;
2553     return ($sib, $ch);
2554     },
2555 wakaba 1.1 };
2556    
2557     $Element->{$HTML_NS}->{command} = {
2558 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
2559 wakaba 1.1 checker => $HTMLEmptyChecker,
2560     };
2561    
2562     $Element->{$HTML_NS}->{menu} = {
2563 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
2564 wakaba 1.1 checker => sub {
2565 wakaba 1.4 my ($self, $todo) = @_;
2566     my $el = $todo->{node};
2567     my $new_todos = [];
2568 wakaba 1.1 my @nodes = (@{$el->child_nodes});
2569    
2570     my $content = 'li or inline';
2571     while (@nodes) {
2572     my $node = shift @nodes;
2573 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2574    
2575 wakaba 1.1 my $nt = $node->node_type;
2576     if ($nt == 1) {
2577 wakaba 1.2 my $node_ns = $node->namespace_uri;
2578     $node_ns = '' unless defined $node_ns;
2579     my $node_ln = $node->manakai_local_name;
2580 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
2581 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'li') {
2582 wakaba 1.1 if ($content eq 'inline') {
2583 wakaba 1.6 $not_allowed = 1;
2584 wakaba 1.1 } elsif ($content eq 'li or inline') {
2585     $content = 'li';
2586     }
2587     } else {
2588 wakaba 1.7 if ($HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
2589     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln}) {
2590     $content = 'inline';
2591     } else {
2592 wakaba 1.6 $not_allowed = 1;
2593 wakaba 1.7 }
2594 wakaba 1.1 }
2595 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
2596     if $not_allowed;
2597 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
2598     unshift @nodes, @$sib;
2599 wakaba 1.4 push @$new_todos, @$ch;
2600 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
2601     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2602     if ($content eq 'li') {
2603 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
2604 wakaba 1.1 } elsif ($content eq 'li or inline') {
2605     $content = 'inline';
2606     }
2607     }
2608     } elsif ($nt == 5) {
2609     unshift @nodes, @{$node->child_nodes};
2610     }
2611     }
2612 wakaba 1.4
2613     for (@$new_todos) {
2614     $_->{inline} = 1;
2615     }
2616     return ($new_todos);
2617 wakaba 1.1 },
2618     };
2619    
2620 wakaba 1.6 $Element->{$HTML_NS}->{legend} = {
2621 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2622 wakaba 1.6 checker => sub {
2623     my ($self, $todo) = @_;
2624    
2625     my $parent = $todo->{node}->manakai_parent_element;
2626     if (defined $parent) {
2627     my $nsuri = $parent->namespace_uri;
2628     $nsuri = '' unless defined $nsuri;
2629     my $ln = $parent->manakai_local_name;
2630     if ($nsuri eq $HTML_NS and $ln eq 'figure') {
2631     return $HTMLInlineChecker->($self, $todo);
2632     } else {
2633     return $HTMLSignificantStrictlyInlineChecker->($self, $todo);
2634     }
2635     } else {
2636     return $HTMLInlineChecker->($self, $todo);
2637     }
2638    
2639     ## ISSUE: Content model is defined only for fieldset/legend,
2640     ## details/legend, and figure/legend.
2641     },
2642     };
2643 wakaba 1.1
2644     $Element->{$HTML_NS}->{div} = {
2645 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2646 wakaba 1.2 checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
2647 wakaba 1.1 };
2648    
2649     $Element->{$HTML_NS}->{font} = {
2650 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
2651 wakaba 1.1 checker => $HTMLTransparentChecker,
2652     };
2653    
2654 wakaba 1.24 sub check_document ($$$) {
2655     my ($self, $doc, $onerror) = @_;
2656     $self = bless {}, $self unless ref $self;
2657     $self->{onerror} = $onerror;
2658    
2659     my $docel = $doc->document_element;
2660     my $docel_nsuri = $docel->namespace_uri;
2661     $docel_nsuri = '' unless defined $docel_nsuri;
2662     my $docel_def = $Element->{$docel_nsuri}->{$docel->manakai_local_name} ||
2663     $Element->{$docel_nsuri}->{''} ||
2664     $ElementDefault;
2665     if ($docel_def->{is_root}) {
2666     #
2667     } else {
2668     $onerror->(node => $docel, type => 'element not allowed');
2669     }
2670    
2671     ## TODO: Check for other items other than document element
2672     ## (second (errorous) element, text nodes, PI nodes, doctype nodes)
2673    
2674     $self->check_element ($docel, $onerror);
2675     } # check_document
2676 wakaba 1.2
2677 wakaba 1.1 sub check_element ($$$) {
2678     my ($self, $el, $onerror) = @_;
2679 wakaba 1.24 $self = bless {}, $self unless ref $self;
2680     $self->{onerror} = $onerror;
2681 wakaba 1.1
2682 wakaba 1.2 $self->{minuses} = {};
2683 wakaba 1.10 $self->{id} = {};
2684 wakaba 1.17 $self->{usemap} = [];
2685     $self->{map} = {};
2686 wakaba 1.20 $self->{has_link_type} = {};
2687 wakaba 1.2
2688 wakaba 1.4 my @todo = ({type => 'element', node => $el});
2689     while (@todo) {
2690     my $todo = shift @todo;
2691     if ($todo->{type} eq 'element') {
2692 wakaba 1.13 my $prefix = $todo->{node}->prefix;
2693     if (defined $prefix and $prefix eq 'xmlns') {
2694     $self->{onerror}
2695     ->(node => $todo->{node},
2696     type => 'NC:Reserved Prefixes and Namespace Names:<xmlns:>');
2697     }
2698 wakaba 1.4 my $nsuri = $todo->{node}->namespace_uri;
2699     $nsuri = '' unless defined $nsuri;
2700     my $ln = $todo->{node}->manakai_local_name;
2701     my $eldef = $Element->{$nsuri}->{$ln} ||
2702     $Element->{$nsuri}->{''} ||
2703     $ElementDefault;
2704 wakaba 1.9 $eldef->{attrs_checker}->($self, $todo);
2705 wakaba 1.4 my ($new_todos) = $eldef->{checker}->($self, $todo);
2706 wakaba 1.14 unshift @todo, @$new_todos;
2707 wakaba 1.9 } elsif ($todo->{type} eq 'element-attributes') {
2708 wakaba 1.13 my $prefix = $todo->{node}->prefix;
2709     if (defined $prefix and $prefix eq 'xmlns') {
2710     $self->{onerror}
2711     ->(node => $todo->{node},
2712     type => 'NC:Reserved Prefixes and Namespace Names:<xmlns:>');
2713     }
2714 wakaba 1.9 my $nsuri = $todo->{node}->namespace_uri;
2715     $nsuri = '' unless defined $nsuri;
2716     my $ln = $todo->{node}->manakai_local_name;
2717     my $eldef = $Element->{$nsuri}->{$ln} ||
2718     $Element->{$nsuri}->{''} ||
2719     $ElementDefault;
2720     $eldef->{attrs_checker}->($self, $todo);
2721 wakaba 1.4 } elsif ($todo->{type} eq 'plus') {
2722     $self->_remove_minuses ($todo);
2723     }
2724 wakaba 1.1 }
2725 wakaba 1.17
2726     for (@{$self->{usemap}}) {
2727     unless ($self->{map}->{$_->[0]}) {
2728     $self->{onerror}->(node => $_->[1], type => 'no referenced map');
2729     }
2730     }
2731    
2732     delete $self->{minuses};
2733     delete $self->{onerror};
2734     delete $self->{id};
2735     delete $self->{usemap};
2736     delete $self->{map};
2737 wakaba 1.1 } # check_element
2738    
2739 wakaba 1.2 sub _add_minuses ($@) {
2740     my $self = shift;
2741     my $r = {};
2742     for my $list (@_) {
2743     for my $ns (keys %$list) {
2744     for my $ln (keys %{$list->{$ns}}) {
2745     unless ($self->{minuses}->{$ns}->{$ln}) {
2746     $self->{minuses}->{$ns}->{$ln} = 1;
2747     $r->{$ns}->{$ln} = 1;
2748     }
2749     }
2750     }
2751     }
2752 wakaba 1.4 return {type => 'plus', list => $r};
2753 wakaba 1.2 } # _add_minuses
2754    
2755     sub _remove_minuses ($$) {
2756 wakaba 1.4 my ($self, $todo) = @_;
2757     for my $ns (keys %{$todo->{list}}) {
2758     for my $ln (keys %{$todo->{list}->{$ns}}) {
2759     delete $self->{minuses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
2760 wakaba 1.2 }
2761     }
2762     1;
2763     } # _remove_minuses
2764    
2765     sub _check_get_children ($$) {
2766     my ($self, $node) = @_;
2767 wakaba 1.4 my $new_todos = [];
2768 wakaba 1.2 my $sib = [];
2769     TP: {
2770     my $node_ns = $node->namespace_uri;
2771     $node_ns = '' unless defined $node_ns;
2772     my $node_ln = $node->manakai_local_name;
2773     if ($node_ns eq $HTML_NS) {
2774     if ($node_ln eq 'noscript') {
2775     my $end = $self->_add_minuses ({$HTML_NS, {noscript => 1}});
2776     push @$sib, $end;
2777     }
2778     }
2779 wakaba 1.7 if ($HTMLTransparentElements->{$node_ns}->{$node_ln}) {
2780     unshift @$sib, @{$node->child_nodes};
2781 wakaba 1.9 push @$new_todos, {type => 'element-attributes', node => $node};
2782 wakaba 1.7 last TP;
2783 wakaba 1.2 }
2784 wakaba 1.8 if ($node_ns eq $HTML_NS and ($node_ln eq 'video' or $node_ln eq 'audio')) {
2785 wakaba 1.2 if ($node->has_attribute_ns (undef, 'src')) {
2786     unshift @$sib, @{$node->child_nodes};
2787 wakaba 1.9 push @$new_todos, {type => 'element-attributes', node => $node};
2788 wakaba 1.2 last TP;
2789     } else {
2790     my @cn = @{$node->child_nodes};
2791     CN: while (@cn) {
2792     my $cn = shift @cn;
2793     my $cnt = $cn->node_type;
2794     if ($cnt == 1) {
2795 wakaba 1.8 my $cn_nsuri = $cn->namespace_uri;
2796     $cn_nsuri = '' unless defined $cn_nsuri;
2797     if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'source') {
2798 wakaba 1.2 #
2799     } else {
2800     last CN;
2801     }
2802     } elsif ($cnt == 3 or $cnt == 4) {
2803     if ($cn->data =~ /[^\x09-\x0D\x20]/) {
2804     last CN;
2805     }
2806     }
2807     } # CN
2808     unshift @$sib, @cn;
2809     }
2810     }
2811 wakaba 1.4 push @$new_todos, {type => 'element', node => $node};
2812 wakaba 1.2 } # TP
2813 wakaba 1.4 return ($sib, $new_todos);
2814 wakaba 1.2 } # _check_get_children
2815    
2816 wakaba 1.1 1;
2817 wakaba 1.24 # $Date: 2007/05/27 10:28:01 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24