/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.18 - (hide annotations) (download)
Fri May 25 14:46:54 2007 UTC (17 years, 5 months ago) by wakaba
Branch: MAIN
Changes since 1.17: +12 -3 lines
++ whatpm/t/ChangeLog	25 May 2007 14:46:11 -0000
	* content-model-2.dat: New test entries for URI attributes.

++ whatpm/Whatpm/ChangeLog	25 May 2007 14:25:11 -0000
	* ContentChecker.pm ($HTMLURIAttrChecker): Implemented.

2007-05-25  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3    
4 wakaba 1.18 require Whatpm::URIChecker;
5    
6 wakaba 1.13 ## ISSUE: How XML and XML Namespaces conformance can (or cannot)
7     ## be applied to an in-memory representation (i.e. DOM)?
8    
9 wakaba 1.9 my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
10     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
11    
12     my $AttrChecker = {
13     $XML_NS => {
14 wakaba 1.13 space => sub {
15     my ($self, $attr) = @_;
16     my $value = $attr->value;
17     if ($value eq 'default' or $value eq 'preserve') {
18     #
19     } else {
20     ## NOTE: An XML "error"
21     $self->{onerror}->(node => $attr,
22     type => 'XML error:invalid xml:space value');
23     }
24     },
25     lang => sub {
26     ## NOTE: "The values of the attribute are language identifiers
27     ## as defined by [IETF RFC 3066], Tags for the Identification
28     ## of Languages, or its successor; in addition, the empty string
29     ## may be specified." ("may" in lower case)
30     ## TODO: xml:lang MUST NOT in HTML document
31     },
32     base => sub {
33     my ($self, $attr) = @_;
34     my $value = $attr->value;
35     if ($value =~ /[^\x{0000}-\x{10FFFF}]/) { ## ISSUE: Should we disallow noncharacters?
36     $self->{onerror}->(node => $attr,
37     type => 'syntax error');
38     }
39 wakaba 1.18 ## NOTE: Conformance to URI standard is not checked since there is
40     ## no author requirement on conformance in the XML Base specification.
41 wakaba 1.13 },
42     id => sub {
43     my ($self, $attr) = @_;
44     my $value = $attr->value;
45     $value =~ s/[\x09\x0A\x0D\x20]+/ /g;
46     $value =~ s/^\x20//;
47     $value =~ s/\x20$//;
48     ## TODO: NCName in XML 1.0 or 1.1
49     ## TODO: declared type is ID?
50     if ($self->{id}->{$value}) {
51     $self->{onerror}->(node => $attr, type => 'xml:id error:duplicate ID');
52     } else {
53     $self->{id}->{$value} = 1;
54     }
55     },
56 wakaba 1.9 },
57     $XMLNS_NS => {
58 wakaba 1.13 '' => sub {
59     my ($self, $attr) = @_;
60     my $ln = $attr->manakai_local_name;
61     my $value = $attr->value;
62     if ($value eq $XML_NS and $ln ne 'xml') {
63     $self->{onerror}
64     ->(node => $attr,
65     type => 'NC:Reserved Prefixes and Namespace Names:=xml');
66     } elsif ($value eq $XMLNS_NS) {
67     $self->{onerror}
68     ->(node => $attr,
69     type => 'NC:Reserved Prefixes and Namespace Names:=xmlns');
70     }
71     if ($ln eq 'xml' and $value ne $XML_NS) {
72     $self->{onerror}
73     ->(node => $attr,
74     type => 'NC:Reserved Prefixes and Namespace Names:xmlns:xml=');
75     } elsif ($ln eq 'xmlns') {
76     $self->{onerror}
77     ->(node => $attr,
78     type => 'NC:Reserved Prefixes and Namespace Names:xmlns:xmlns=');
79     }
80     ## TODO: If XML 1.0 and empty
81     },
82     xmlns => sub {
83     my ($self, $attr) = @_;
84     ## TODO: In XML 1.0, URI reference [RFC 3986] or an empty string
85     ## TODO: In XML 1.1, IRI reference [RFC 3987] or an empty string
86 wakaba 1.18 ## TODO: relative references are deprecated
87 wakaba 1.13 my $value = $attr->value;
88     if ($value eq $XML_NS) {
89     $self->{onerror}
90     ->(node => $attr,
91     type => 'NC:Reserved Prefixes and Namespace Names:=xml');
92     } elsif ($value eq $XMLNS_NS) {
93     $self->{onerror}
94     ->(node => $attr,
95     type => 'NC:Reserved Prefixes and Namespace Names:=xmlns');
96     }
97     },
98 wakaba 1.9 },
99     };
100    
101 wakaba 1.14 ## ISSUE: Should we really allow these attributes?
102 wakaba 1.13 $AttrChecker->{''}->{'xml:space'} = $AttrChecker->{$XML_NS}->{space};
103     $AttrChecker->{''}->{'xml:lang'} = $AttrChecker->{$XML_NS}->{lang};
104     $AttrChecker->{''}->{'xml:base'} = $AttrChecker->{$XML_NS}->{base};
105     $AttrChecker->{''}->{'xml:id'} = $AttrChecker->{$XML_NS}->{id};
106    
107 wakaba 1.3 ## ANY
108     my $AnyChecker = sub {
109 wakaba 1.4 my ($self, $todo) = @_;
110     my $el = $todo->{node};
111     my $new_todos = [];
112 wakaba 1.3 my @nodes = (@{$el->child_nodes});
113     while (@nodes) {
114     my $node = shift @nodes;
115     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
116    
117     my $nt = $node->node_type;
118     if ($nt == 1) {
119     my $node_ns = $node->namespace_uri;
120     $node_ns = '' unless defined $node_ns;
121     my $node_ln = $node->manakai_local_name;
122     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
123     $self->{onerror}->(node => $node, type => 'element not allowed');
124     }
125 wakaba 1.4 push @$new_todos, {type => 'element', node => $node};
126 wakaba 1.3 } elsif ($nt == 5) {
127     unshift @nodes, @{$node->child_nodes};
128     }
129     }
130 wakaba 1.4 return ($new_todos);
131 wakaba 1.3 }; # $AnyChecker
132    
133 wakaba 1.1 my $ElementDefault = {
134     checker => sub {
135 wakaba 1.4 my ($self, $todo) = @_;
136     $self->{onerror}->(node => $todo->{node}, type => 'element not supported');
137     return $AnyChecker->($self, $todo);
138 wakaba 1.1 },
139 wakaba 1.9 attrs_checker => sub {
140     my ($self, $todo) = @_;
141     for my $attr (@{$todo->{node}->attributes}) {
142     my $attr_ns = $attr->namespace_uri;
143     $attr_ns = '' unless defined $attr_ns;
144     my $attr_ln = $attr->manakai_local_name;
145     my $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
146     || $AttrChecker->{$attr_ns}->{''};
147     if ($checker) {
148     $checker->($self, $attr);
149 wakaba 1.17 } else {
150     $self->{onerror}->(node => $attr, type => 'attribute not supported');
151 wakaba 1.9 }
152     }
153     },
154 wakaba 1.1 };
155    
156     my $Element = {};
157    
158     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
159    
160 wakaba 1.7 my $HTMLMetadataElements = {
161     $HTML_NS => {
162     qw/link 1 meta 1 style 1 script 1 event-source 1 command 1 base 1 title 1/,
163     },
164     };
165 wakaba 1.1
166 wakaba 1.2 my $HTMLSectioningElements = {
167     $HTML_NS => {qw/body 1 section 1 nav 1 article 1 blockquote 1 aside 1/},
168     };
169 wakaba 1.1
170 wakaba 1.7 my $HTMLBlockLevelElements = {
171     $HTML_NS => {
172     qw/
173     section 1 nav 1 article 1 blockquote 1 aside 1
174     h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1
175     address 1 p 1 hr 1 dialog 1 pre 1 ol 1 ul 1 dl 1
176     ins 1 del 1 figure 1 map 1 table 1 script 1 noscript 1
177     event-source 1 details 1 datagrid 1 menu 1 div 1 font 1
178     /,
179     },
180     };
181    
182     my $HTMLStrictlyInlineLevelElements = {
183     $HTML_NS => {
184     qw/
185     br 1 a 1 q 1 cite 1 em 1 strong 1 small 1 m 1 dfn 1 abbr 1
186     time 1 meter 1 progress 1 code 1 var 1 samp 1 kbd 1
187     sub 1 sup 1 span 1 i 1 b 1 bdo 1 ins 1 del 1 img 1
188     iframe 1 embed 1 object 1 video 1 audio 1 canvas 1 area 1
189     script 1 noscript 1 event-source 1 command 1 font 1
190     /,
191     },
192     };
193    
194     my $HTMLStructuredInlineLevelElements = {
195     $HTML_NS => {qw/blockquote 1 pre 1 ol 1 ul 1 dl 1 table 1 menu 1/},
196     };
197 wakaba 1.1
198 wakaba 1.6 my $HTMLInteractiveElements = {
199     $HTML_NS => {a => 1, details => 1, datagrid => 1},
200     };
201     ## NOTE: |html:a| and |html:datagrid| are not allowed as a descendant
202     ## of interactive elements
203 wakaba 1.1
204 wakaba 1.7 my $HTMLTransparentElements = {
205     $HTML_NS => {qw/ins 1 font 1 noscript 1/},
206     ## NOTE: |html:noscript| is transparent if scripting is disabled.
207     };
208    
209     #my $HTMLSemiTransparentElements = {
210     # $HTML_NS => {qw/video 1 audio 1/},
211     #};
212    
213     my $HTMLEmbededElements = {
214     $HTML_NS => {qw/img 1 iframe 1 embed 1 object 1 video 1 audio 1 canvas 1/},
215     };
216 wakaba 1.1
217     ## Empty
218     my $HTMLEmptyChecker = sub {
219 wakaba 1.4 my ($self, $todo) = @_;
220     my $el = $todo->{node};
221     my $new_todos = [];
222 wakaba 1.1 my @nodes = (@{$el->child_nodes});
223    
224     while (@nodes) {
225     my $node = shift @nodes;
226 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
227    
228 wakaba 1.1 my $nt = $node->node_type;
229     if ($nt == 1) {
230 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
231     $self->{onerror}->(node => $node, type => 'element not allowed');
232     my ($sib, $ch) = $self->_check_get_children ($node);
233     unshift @nodes, @$sib;
234 wakaba 1.4 push @$new_todos, @$ch;
235 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
236 wakaba 1.3 if ($node->data =~ /[^\x09-\x0D\x20]/) {
237     $self->{onerror}->(node => $node, type => 'character not allowed');
238     }
239 wakaba 1.1 } elsif ($nt == 5) {
240     unshift @nodes, @{$node->child_nodes};
241     }
242     }
243 wakaba 1.4 return ($new_todos);
244 wakaba 1.1 };
245    
246     ## Text
247     my $HTMLTextChecker = sub {
248 wakaba 1.4 my ($self, $todo) = @_;
249     my $el = $todo->{node};
250     my $new_todos = [];
251 wakaba 1.1 my @nodes = (@{$el->child_nodes});
252    
253     while (@nodes) {
254     my $node = shift @nodes;
255 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
256    
257 wakaba 1.1 my $nt = $node->node_type;
258     if ($nt == 1) {
259 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
260     $self->{onerror}->(node => $node, type => 'element not allowed');
261     my ($sib, $ch) = $self->_check_get_children ($node);
262     unshift @nodes, @$sib;
263 wakaba 1.4 push @$new_todos, @$ch;
264 wakaba 1.1 } elsif ($nt == 5) {
265     unshift @nodes, @{$node->child_nodes};
266     }
267     }
268 wakaba 1.4 return ($new_todos);
269 wakaba 1.1 };
270    
271     ## Zero or more |html:style| elements,
272     ## followed by zero or more block-level elements
273     my $HTMLStylableBlockChecker = sub {
274 wakaba 1.4 my ($self, $todo) = @_;
275     my $el = $todo->{node};
276     my $new_todos = [];
277 wakaba 1.1 my @nodes = (@{$el->child_nodes});
278    
279     my $has_non_style;
280     while (@nodes) {
281     my $node = shift @nodes;
282 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
283    
284 wakaba 1.1 my $nt = $node->node_type;
285     if ($nt == 1) {
286 wakaba 1.2 my $node_ns = $node->namespace_uri;
287     $node_ns = '' unless defined $node_ns;
288     my $node_ln = $node->manakai_local_name;
289 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
290 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'style') {
291 wakaba 1.6 $not_allowed = 1 if $has_non_style;
292 wakaba 1.7 } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
293     $has_non_style = 1;
294 wakaba 1.1 } else {
295     $has_non_style = 1;
296 wakaba 1.7 $not_allowed = 1;
297 wakaba 1.1 }
298 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
299     if $not_allowed;
300 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
301     unshift @nodes, @$sib;
302 wakaba 1.4 push @$new_todos, @$ch;
303 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
304     if ($node->data =~ /[^\x09-\x0D\x20]/) {
305 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
306 wakaba 1.1 }
307     } elsif ($nt == 5) {
308     unshift @nodes, @{$node->child_nodes};
309     }
310     }
311 wakaba 1.4 return ($new_todos);
312 wakaba 1.1 }; # $HTMLStylableBlockChecker
313    
314     ## Zero or more block-level elements
315     my $HTMLBlockChecker = sub {
316 wakaba 1.4 my ($self, $todo) = @_;
317     my $el = $todo->{node};
318     my $new_todos = [];
319 wakaba 1.1 my @nodes = (@{$el->child_nodes});
320    
321     while (@nodes) {
322     my $node = shift @nodes;
323 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
324    
325 wakaba 1.1 my $nt = $node->node_type;
326     if ($nt == 1) {
327 wakaba 1.2 my $node_ns = $node->namespace_uri;
328     $node_ns = '' unless defined $node_ns;
329     my $node_ln = $node->manakai_local_name;
330 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
331 wakaba 1.7 $not_allowed = 1
332     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
333 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
334     if $not_allowed;
335 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
336     unshift @nodes, @$sib;
337 wakaba 1.4 push @$new_todos, @$ch;
338 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
339     if ($node->data =~ /[^\x09-\x0D\x20]/) {
340 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
341 wakaba 1.1 }
342     } elsif ($nt == 5) {
343     unshift @nodes, @{$node->child_nodes};
344     }
345     }
346 wakaba 1.4 return ($new_todos);
347 wakaba 1.1 }; # $HTMLBlockChecker
348    
349     ## Inline-level content
350     my $HTMLInlineChecker = sub {
351 wakaba 1.4 my ($self, $todo) = @_;
352     my $el = $todo->{node};
353     my $new_todos = [];
354 wakaba 1.1 my @nodes = (@{$el->child_nodes});
355    
356     while (@nodes) {
357     my $node = shift @nodes;
358 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
359    
360 wakaba 1.1 my $nt = $node->node_type;
361     if ($nt == 1) {
362 wakaba 1.2 my $node_ns = $node->namespace_uri;
363     $node_ns = '' unless defined $node_ns;
364     my $node_ln = $node->manakai_local_name;
365 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
366 wakaba 1.7 $not_allowed = 1
367     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
368     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
369 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
370     if $not_allowed;
371 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
372     unshift @nodes, @$sib;
373 wakaba 1.4 push @$new_todos, @$ch;
374 wakaba 1.1 } elsif ($nt == 5) {
375     unshift @nodes, @{$node->child_nodes};
376     }
377     }
378 wakaba 1.4
379     for (@$new_todos) {
380     $_->{inline} = 1;
381     }
382     return ($new_todos);
383     }; # $HTMLInlineChecker
384 wakaba 1.1
385     my $HTMLSignificantInlineChecker = $HTMLInlineChecker;
386     ## TODO: check significant content
387    
388     ## Strictly inline-level content
389     my $HTMLStrictlyInlineChecker = sub {
390 wakaba 1.4 my ($self, $todo) = @_;
391     my $el = $todo->{node};
392     my $new_todos = [];
393 wakaba 1.1 my @nodes = (@{$el->child_nodes});
394    
395     while (@nodes) {
396     my $node = shift @nodes;
397 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
398    
399 wakaba 1.1 my $nt = $node->node_type;
400     if ($nt == 1) {
401 wakaba 1.2 my $node_ns = $node->namespace_uri;
402     $node_ns = '' unless defined $node_ns;
403     my $node_ln = $node->manakai_local_name;
404 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
405 wakaba 1.7 $not_allowed = 1
406     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln};
407 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
408     if $not_allowed;
409 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
410     unshift @nodes, @$sib;
411 wakaba 1.4 push @$new_todos, @$ch;
412 wakaba 1.1 } elsif ($nt == 5) {
413     unshift @nodes, @{$node->child_nodes};
414     }
415     }
416 wakaba 1.4
417     for (@$new_todos) {
418     $_->{inline} = 1;
419     $_->{strictly_inline} = 1;
420     }
421     return ($new_todos);
422 wakaba 1.1 }; # $HTMLStrictlyInlineChecker
423    
424     my $HTMLSignificantStrictlyInlineChecker = $HTMLStrictlyInlineChecker;
425     ## TODO: check significant content
426    
427 wakaba 1.4 ## Inline-level or strictly inline-kevek content
428     my $HTMLInlineOrStrictlyInlineChecker = sub {
429     my ($self, $todo) = @_;
430     my $el = $todo->{node};
431     my $new_todos = [];
432     my @nodes = (@{$el->child_nodes});
433    
434     while (@nodes) {
435     my $node = shift @nodes;
436     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
437    
438     my $nt = $node->node_type;
439     if ($nt == 1) {
440     my $node_ns = $node->namespace_uri;
441     $node_ns = '' unless defined $node_ns;
442     my $node_ln = $node->manakai_local_name;
443 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
444 wakaba 1.7 if ($todo->{strictly_inline}) {
445     $not_allowed = 1
446     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln};
447     } else {
448     $not_allowed = 1
449     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
450     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
451     }
452 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
453     if $not_allowed;
454 wakaba 1.4 my ($sib, $ch) = $self->_check_get_children ($node);
455     unshift @nodes, @$sib;
456     push @$new_todos, @$ch;
457     } elsif ($nt == 5) {
458     unshift @nodes, @{$node->child_nodes};
459     }
460     }
461    
462     for (@$new_todos) {
463     $_->{inline} = 1;
464     $_->{strictly_inline} = 1;
465     }
466     return ($new_todos);
467     }; # $HTMLInlineOrStrictlyInlineChecker
468    
469 wakaba 1.6 my $HTMLSignificantInlineOrStrictlyInlineChecker
470     = $HTMLInlineOrStrictlyInlineChecker;
471     ## TODO: check significant content
472    
473 wakaba 1.1 my $HTMLBlockOrInlineChecker = sub {
474 wakaba 1.4 my ($self, $todo) = @_;
475     my $el = $todo->{node};
476     my $new_todos = [];
477 wakaba 1.1 my @nodes = (@{$el->child_nodes});
478    
479     my $content = 'block-or-inline'; # or 'block' or 'inline'
480     my @block_not_inline;
481     while (@nodes) {
482     my $node = shift @nodes;
483 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
484    
485 wakaba 1.1 my $nt = $node->node_type;
486     if ($nt == 1) {
487 wakaba 1.2 my $node_ns = $node->namespace_uri;
488     $node_ns = '' unless defined $node_ns;
489     my $node_ln = $node->manakai_local_name;
490 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
491 wakaba 1.1 if ($content eq 'block') {
492 wakaba 1.7 $not_allowed = 1
493     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
494 wakaba 1.1 } elsif ($content eq 'inline') {
495 wakaba 1.7 $not_allowed = 1
496     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
497     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
498 wakaba 1.1 } else {
499 wakaba 1.7 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
500     my $is_inline
501     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
502     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
503 wakaba 1.1
504 wakaba 1.6 push @block_not_inline, $node
505     if $is_block and not $is_inline and not $not_allowed;
506 wakaba 1.1 unless ($is_block) {
507     $content = 'inline';
508     for (@block_not_inline) {
509 wakaba 1.2 $self->{onerror}->(node => $_, type => 'element not allowed');
510 wakaba 1.1 }
511 wakaba 1.6 $not_allowed = 1 unless $is_inline;
512 wakaba 1.1 }
513     }
514 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
515     if $not_allowed;
516 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
517     unshift @nodes, @$sib;
518 wakaba 1.4 push @$new_todos, @$ch;
519 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
520     if ($node->data =~ /[^\x09-\x0D\x20]/) {
521     if ($content eq 'block') {
522 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
523 wakaba 1.1 } else {
524     $content = 'inline';
525     for (@block_not_inline) {
526 wakaba 1.2 $self->{onerror}->(node => $_, type => 'element not allowed');
527 wakaba 1.1 }
528     }
529     }
530     } elsif ($nt == 5) {
531     unshift @nodes, @{$node->child_nodes};
532     }
533     }
534 wakaba 1.4
535     if ($content eq 'inline') {
536     for (@$new_todos) {
537     $_->{inline} = 1;
538     }
539     }
540     return ($new_todos);
541 wakaba 1.1 };
542    
543 wakaba 1.2 ## Zero or more XXX element, then either block-level or inline-level
544     my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) {
545     my ($elnsuri, $ellname) = @_;
546     return sub {
547 wakaba 1.4 my ($self, $todo) = @_;
548     my $el = $todo->{node};
549     my $new_todos = [];
550 wakaba 1.2 my @nodes = (@{$el->child_nodes});
551    
552     my $has_non_style;
553     my $content = 'block-or-inline'; # or 'block' or 'inline'
554     my @block_not_inline;
555     while (@nodes) {
556     my $node = shift @nodes;
557     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
558    
559     my $nt = $node->node_type;
560     if ($nt == 1) {
561     my $node_ns = $node->namespace_uri;
562     $node_ns = '' unless defined $node_ns;
563     my $node_ln = $node->manakai_local_name;
564 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
565 wakaba 1.8 if ($node_ns eq $elnsuri and $node_ln eq $ellname) {
566 wakaba 1.6 $not_allowed = 1 if $has_non_style;
567 wakaba 1.2 } elsif ($content eq 'block') {
568     $has_non_style = 1;
569 wakaba 1.7 $not_allowed = 1
570     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
571 wakaba 1.2 } elsif ($content eq 'inline') {
572     $has_non_style = 1;
573 wakaba 1.7 $not_allowed = 1
574     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
575     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
576 wakaba 1.2 } else {
577     $has_non_style = 1;
578 wakaba 1.7 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
579     my $is_inline
580     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
581     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
582 wakaba 1.2
583 wakaba 1.6 push @block_not_inline, $node
584     if $is_block and not $is_inline and not $not_allowed;
585 wakaba 1.2 unless ($is_block) {
586     $content = 'inline';
587     for (@block_not_inline) {
588     $self->{onerror}->(node => $_, type => 'element not allowed');
589     }
590 wakaba 1.6 $not_allowed = 1 unless $is_inline;
591 wakaba 1.1 }
592     }
593 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
594     if $not_allowed;
595 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
596     unshift @nodes, @$sib;
597 wakaba 1.4 push @$new_todos, @$ch;
598 wakaba 1.2 } elsif ($nt == 3 or $nt == 4) {
599     if ($node->data =~ /[^\x09-\x0D\x20]/) {
600     $has_non_style = 1;
601     if ($content eq 'block') {
602     $self->{onerror}->(node => $node, type => 'character not allowed');
603     } else {
604     $content = 'inline';
605     for (@block_not_inline) {
606     $self->{onerror}->(node => $_, type => 'element not allowed');
607     }
608 wakaba 1.1 }
609     }
610 wakaba 1.2 } elsif ($nt == 5) {
611     unshift @nodes, @{$node->child_nodes};
612 wakaba 1.1 }
613     }
614 wakaba 1.4
615     if ($content eq 'inline') {
616     for (@$new_todos) {
617     $_->{inline} = 1;
618     }
619     }
620     return ($new_todos);
621 wakaba 1.2 };
622     }; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker
623 wakaba 1.1
624     my $HTMLTransparentChecker = $HTMLBlockOrInlineChecker;
625    
626 wakaba 1.10 my $GetHTMLEnumeratedAttrChecker = sub {
627     my $states = shift; # {value => conforming ? 1 : -1}
628     return sub {
629     my ($self, $attr) = @_;
630     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
631     if ($states->{$value} > 0) {
632     #
633     } elsif ($states->{$value}) {
634     $self->{onerror}->(node => $attr,
635     type => 'non-conforming enumerated attribute value');
636     } else {
637     $self->{onerror}->(node => $attr,
638     type => 'invalid enumerated attribute value');
639     }
640     };
641     }; # $GetHTMLEnumeratedAttrChecker
642 wakaba 1.9
643 wakaba 1.10 my $GetHTMLBooleanAttrChecker = sub {
644     my $local_name = shift;
645     return sub {
646     my ($self, $attr) = @_;
647     my $value = $attr->value;
648     unless ($value eq $local_name or $value eq '') {
649     $self->{onerror}->(node => $attr,
650     type => 'invalid boolean attribute value');
651     }
652     };
653     }; # $GetHTMLBooleanAttrChecker
654 wakaba 1.9
655 wakaba 1.10 my $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker = sub {
656     my ($self, $attr) = @_;
657     my %word;
658     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
659     unless ($word{$word}) {
660     $word{$word} = 1;
661 wakaba 1.9 } else {
662 wakaba 1.10 $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
663 wakaba 1.9 }
664     }
665 wakaba 1.10 }; # $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker
666    
667 wakaba 1.18 ## URI (or IRI)
668 wakaba 1.11 my $HTMLURIAttrChecker = sub {
669     my ($self, $attr) = @_;
670 wakaba 1.15 ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
671 wakaba 1.18 my $value = $attr->value;
672     Whatpm::URIChecker->check_iri_reference ($value, sub {
673     my %opt = @_;
674     $self->{onerror}->(node => $attr, type => 'URI:'.$opt{level}.':'.$opt{type});
675     });
676 wakaba 1.11 }; # $HTMLURIAttrChecker
677    
678 wakaba 1.15 ## A space separated list of one or more URIs (or IRIs)
679     my $HTMLSpaceURIsAttrChecker = sub {
680     my ($self, $attr) = @_;
681     ## TODO: URI or IRI check
682     ## ISSUE: Relative references?
683     ## ISSUE: Leading or trailing white spaces are conformant?
684     ## ISSUE: A sequence of white space characters are conformant?
685     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
686     ## NOTE: Duplication seems not an error.
687     }; # $HTMLSpaceURIsAttrChecker
688    
689 wakaba 1.11 my $HTMLIntegerAttrChecker = sub {
690     my ($self, $attr) = @_;
691     my $value = $attr->value;
692     unless ($value =~ /\A-?[0-9]+\z/) {
693 wakaba 1.15 $self->{onerror}->(node => $attr, type => 'integer syntax error');
694 wakaba 1.11 }
695     }; # $HTMLIntegerAttrChecker
696    
697 wakaba 1.12 my $GetHTMLNonNegativeIntegerAttrChecker = sub {
698     my $range_check = shift;
699     return sub {
700     my ($self, $attr) = @_;
701     my $value = $attr->value;
702     if ($value =~ /\A[0-9]+\z/) {
703     unless ($range_check->($value + 0)) {
704     $self->{onerror}->(node => $attr, type => 'out of range');
705     }
706     } else {
707 wakaba 1.15 $self->{onerror}->(node => $attr,
708     type => 'non-negative integer syntax error');
709 wakaba 1.12 }
710     };
711     }; # $GetHTMLNonNegativeIntegerAttrChecker
712    
713 wakaba 1.11 my $GetHTMLFloatingPointNumberAttrChecker = sub {
714     my $range_check = shift;
715     return sub {
716     my ($self, $attr) = @_;
717     my $value = $attr->value;
718     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
719     unless ($range_check->($value + 0)) {
720     $self->{onerror}->(node => $attr, type => 'out of range');
721     }
722     } else {
723 wakaba 1.15 $self->{onerror}->(node => $attr,
724     type => 'floating point number syntax error');
725 wakaba 1.11 }
726     };
727     }; # $GetHTMLFloatingPointNumberAttrChecker
728    
729 wakaba 1.15 ## "A valid MIME type, optionally with parameters. [RFC 2046]"
730     ## ISSUE: RFC 2046 does not define syntax of media types.
731     ## ISSUE: The definition of "a valid MIME type" is unknown.
732     ## Syntactical correctness?
733     my $HTMLIMTAttrChecker = sub {
734     my ($self, $attr) = @_;
735     my $value = $attr->value;
736     ## ISSUE: RFC 2045 Content-Type header field allows insertion
737     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
738     ## ISSUE: RFC 2231 extension? Maybe no.
739     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
740     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
741     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
742     unless ($value =~ m#\A$lws0$token$lws0/$lws0$token$lws0(?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*\z#) {
743     $self->{onerror}->(node => $attr, type => 'IMT syntax error');
744     }
745     ## TODO: Warn unless registered
746     }; # $HTMLIMTAttrChecker
747    
748 wakaba 1.17 my $HTMLLanguageTagAttrChecker = sub {
749     my ($self, $attr) = @_;
750     if ($attr->value eq '') {
751     $self->{onerror}->(node => $attr, type => 'language tag syntax error');
752     }
753     ## TODO: RFC 3066 test
754     ## ISSUE: RFC 4646 (3066bis)?
755     }; # $HTMLLanguageTagAttrChecker
756    
757     ## "A valid media query [MQ]"
758     my $HTMLMQAttrChecker = sub {
759     ## ISSUE: What is "a valid media query"?
760     }; # $HTMLMQAttrChecker
761    
762 wakaba 1.16 my $HTMLEventHandlerAttrChecker = sub {
763     ## TODO: MUST contain valid ECMAScript code matching the
764     ## ECMAScript |FunctionBody| production. [ECMA262]
765     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
766     ## ISSUE: Automatic semicolon insertion does not apply?
767     ## ISSUE: Other script languages?
768     }; # $HTMLEventHandlerAttrChecker
769    
770 wakaba 1.17 my $HTMLUsemapAttrChecker = sub {
771     my ($self, $attr) = @_;
772     ## MUST be a valid hashed ID reference to a |map| element
773     my $value = $attr->value;
774     if ($value =~ s/^#//) {
775     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
776     push @{$self->{usemap}}, [$value => $attr];
777     } else {
778     $self->{onerror}->(node => $attr, type => 'hashed idref syntax error');
779     }
780     }; # $HTMLUsemapAttrChecker
781    
782     my $HTMLTargetAttrChecker = sub {
783     my ($self, $attr) = @_;
784     my $value = $attr->value;
785     if ($value =~ /^_/) {
786     $value = lc $value; ## ISSUE: ASCII case-insentitive?
787     unless ({
788     _self => 1, _parent => 1, _top => 1,
789     }->{$value}) {
790     $self->{onerror}->(node => $attr,
791     type => 'reserved browsing context name');
792     }
793     } else {
794     #$ ISSUE: An empty string is conforming?
795     }
796     }; # $HTMLTargetAttrChecker
797    
798 wakaba 1.10 my $HTMLAttrChecker = {
799     id => sub {
800 wakaba 1.17 ## NOTE: |map| has its own variant of |id=""| checker
801 wakaba 1.10 my ($self, $attr) = @_;
802     my $value = $attr->value;
803 wakaba 1.17 if (length $value > 0) {
804 wakaba 1.10 if ($self->{id}->{$value}) {
805     $self->{onerror}->(node => $attr, type => 'duplicate ID');
806     } else {
807     $self->{id}->{$value} = 1;
808     }
809 wakaba 1.17 } else {
810     ## NOTE: MUST contain at least one character
811     $self->{onerror}->(node => $attr, type => 'attribute value is empty');
812 wakaba 1.10 }
813     },
814     title => sub {}, ## NOTE: No conformance creteria
815     lang => sub {
816 wakaba 1.17 ## TODO: RFC 3066 or empty test
817 wakaba 1.10 ## ISSUE: RFC 4646 (3066bis)?
818     ## TODO: HTML vs XHTML
819     },
820     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
821     class => $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker,
822     irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
823     ## TODO: tabindex
824     };
825    
826 wakaba 1.16 for (qw/
827     onabort onbeforeunload onblur onchange onclick oncontextmenu
828     ondblclick ondrag ondragend ondragenter ondragleave ondragover
829     ondragstart ondrop onerror onfocus onkeydown onkeypress
830     onkeyup onload onmessage onmousedown onmousemove onmouseout
831     onmouseover onmouseup onmousewheel onresize onscroll onselect
832     onsubmit onunload
833     /) {
834     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
835     }
836    
837 wakaba 1.10 my $GetHTMLAttrsChecker = sub {
838     my $element_specific_checker = shift;
839     return sub {
840     my ($self, $todo) = @_;
841     for my $attr (@{$todo->{node}->attributes}) {
842     my $attr_ns = $attr->namespace_uri;
843     $attr_ns = '' unless defined $attr_ns;
844     my $attr_ln = $attr->manakai_local_name;
845     my $checker;
846     if ($attr_ns eq '') {
847     $checker = $element_specific_checker->{$attr_ln}
848     || $HTMLAttrChecker->{$attr_ln};
849     }
850     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
851     || $AttrChecker->{$attr_ns}->{''};
852     if ($checker) {
853     $checker->($self, $attr);
854     } else {
855     $self->{onerror}->(node => $attr, type => 'attribute not supported');
856     ## ISSUE: No comformance createria for unknown attributes in the spec
857     }
858     }
859     };
860     }; # $GetHTMLAttrsChecker
861 wakaba 1.9
862     $Element->{$HTML_NS}->{''} = {
863 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
864 wakaba 1.9 checker => $ElementDefault->{checker},
865     };
866    
867 wakaba 1.1 $Element->{$HTML_NS}->{html} = {
868 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({
869     xmlns => sub {
870     my ($self, $attr) = @_;
871     my $value = $attr->value;
872     unless ($value eq $HTML_NS) {
873     $self->{onerror}->(node => $attr, type => 'syntax error');
874     ## TODO: only in HTML documents
875     }
876     },
877     }),
878 wakaba 1.1 checker => sub {
879 wakaba 1.4 my ($self, $todo) = @_;
880     my $el = $todo->{node};
881     my $new_todos = [];
882 wakaba 1.1 my @nodes = (@{$el->child_nodes});
883    
884     my $phase = 'before head';
885     while (@nodes) {
886     my $node = shift @nodes;
887 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
888    
889 wakaba 1.1 my $nt = $node->node_type;
890     if ($nt == 1) {
891 wakaba 1.2 my $node_ns = $node->namespace_uri;
892     $node_ns = '' unless defined $node_ns;
893     my $node_ln = $node->manakai_local_name;
894 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
895 wakaba 1.1 if ($phase eq 'before head') {
896 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'head') {
897 wakaba 1.1 $phase = 'after head';
898 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'body') {
899     $self->{onerror}->(node => $node, type => 'ps element missing:head');
900 wakaba 1.1 $phase = 'after body';
901     } else {
902 wakaba 1.6 $not_allowed = 1;
903 wakaba 1.1 # before head
904     }
905     } elsif ($phase eq 'after head') {
906 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'body') {
907 wakaba 1.1 $phase = 'after body';
908     } else {
909 wakaba 1.6 $not_allowed = 1;
910 wakaba 1.1 # after head
911     }
912     } else { #elsif ($phase eq 'after body') {
913 wakaba 1.6 $not_allowed = 1;
914 wakaba 1.1 # after body
915     }
916 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
917     if $not_allowed;
918 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
919     unshift @nodes, @$sib;
920 wakaba 1.4 push @$new_todos, @$ch;
921 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
922     if ($node->data =~ /[^\x09-\x0D\x20]/) {
923 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
924 wakaba 1.1 }
925     } elsif ($nt == 5) {
926     unshift @nodes, @{$node->child_nodes};
927     }
928     }
929 wakaba 1.3
930     if ($phase eq 'before head') {
931     $self->{onerror}->(node => $el, type => 'child element missing:head');
932     $self->{onerror}->(node => $el, type => 'child element missing:body');
933     } elsif ($phase eq 'after head') {
934     $self->{onerror}->(node => $el, type => 'child element missing:body');
935     }
936    
937 wakaba 1.4 return ($new_todos);
938 wakaba 1.1 },
939     };
940    
941     $Element->{$HTML_NS}->{head} = {
942 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
943 wakaba 1.1 checker => sub {
944 wakaba 1.4 my ($self, $todo) = @_;
945     my $el = $todo->{node};
946     my $new_todos = [];
947 wakaba 1.1 my @nodes = (@{$el->child_nodes});
948    
949     my $has_title;
950 wakaba 1.3 my $phase = 'initial'; # 'after charset', 'after base'
951 wakaba 1.1 while (@nodes) {
952     my $node = shift @nodes;
953 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
954    
955 wakaba 1.1 my $nt = $node->node_type;
956     if ($nt == 1) {
957 wakaba 1.2 my $node_ns = $node->namespace_uri;
958     $node_ns = '' unless defined $node_ns;
959     my $node_ln = $node->manakai_local_name;
960 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
961 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'title') {
962 wakaba 1.3 $phase = 'after base';
963 wakaba 1.1 unless ($has_title) {
964     $has_title = 1;
965     } else {
966 wakaba 1.6 $not_allowed = 1;
967 wakaba 1.1 }
968 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'meta') {
969 wakaba 1.1 if ($node->has_attribute_ns (undef, 'charset')) {
970 wakaba 1.3 if ($phase eq 'initial') {
971     $phase = 'after charset';
972 wakaba 1.1 } else {
973 wakaba 1.6 $not_allowed = 1;
974 wakaba 1.3 ## NOTE: See also |base|'s "contexts" field in the spec
975 wakaba 1.1 }
976     } else {
977 wakaba 1.3 $phase = 'after base';
978 wakaba 1.1 }
979 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'base') {
980 wakaba 1.3 if ($phase eq 'initial' or $phase eq 'after charset') {
981     $phase = 'after base';
982 wakaba 1.1 } else {
983 wakaba 1.6 $not_allowed = 1;
984 wakaba 1.1 }
985 wakaba 1.7 } elsif ($HTMLMetadataElements->{$node_ns}->{$node_ln}) {
986     $phase = 'after base';
987 wakaba 1.1 } else {
988 wakaba 1.7 $not_allowed = 1;
989 wakaba 1.1 }
990 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
991     if $not_allowed;
992 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
993     unshift @nodes, @$sib;
994 wakaba 1.4 push @$new_todos, @$ch;
995 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
996     if ($node->data =~ /[^\x09-\x0D\x20]/) {
997 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
998 wakaba 1.1 }
999     } elsif ($nt == 5) {
1000     unshift @nodes, @{$node->child_nodes};
1001     }
1002     }
1003     unless ($has_title) {
1004 wakaba 1.3 $self->{onerror}->(node => $el, type => 'child element missing:title');
1005 wakaba 1.1 }
1006 wakaba 1.4 return ($new_todos);
1007 wakaba 1.1 },
1008     };
1009    
1010     $Element->{$HTML_NS}->{title} = {
1011 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1012 wakaba 1.1 checker => $HTMLTextChecker,
1013     };
1014    
1015     $Element->{$HTML_NS}->{base} = {
1016 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({
1017 wakaba 1.11 href => $HTMLURIAttrChecker,
1018 wakaba 1.17 target => $HTMLTargetAttrChecker,
1019 wakaba 1.10 }),
1020 wakaba 1.1 checker => $HTMLEmptyChecker,
1021     };
1022    
1023     $Element->{$HTML_NS}->{link} = {
1024 wakaba 1.16 attrs_checker => sub {
1025     my ($self, $todo) = @_;
1026     $GetHTMLAttrsChecker->({
1027     href => $HTMLURIAttrChecker,
1028     rel => $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker, ## TODO: registered? check
1029 wakaba 1.17 media => $HTMLMQAttrChecker,
1030     hreflang => $HTMLLanguageTagAttrChecker,
1031 wakaba 1.16 type => $HTMLIMTAttrChecker,
1032     ## NOTE: Though |title| has special semantics,
1033     ## syntactically same as the |title| as global attribute.
1034     })->($self, $todo);
1035     unless ($todo->{node}->has_attribute_ns (undef, 'href')) {
1036     $self->{onerror}->(node => $todo->{node},
1037     type => 'attribute missing:href');
1038     }
1039     unless ($todo->{node}->has_attribute_ns (undef, 'rel')) {
1040     $self->{onerror}->(node => $todo->{node},
1041     type => 'attribute missing:rel');
1042     }
1043     },
1044 wakaba 1.1 checker => $HTMLEmptyChecker,
1045     };
1046    
1047     $Element->{$HTML_NS}->{meta} = {
1048 wakaba 1.10 attrs_checker => sub {
1049     my ($self, $todo) = @_;
1050     my $name_attr;
1051     my $http_equiv_attr;
1052     my $charset_attr;
1053     my $content_attr;
1054     for my $attr (@{$todo->{node}->attributes}) {
1055     my $attr_ns = $attr->namespace_uri;
1056     $attr_ns = '' unless defined $attr_ns;
1057     my $attr_ln = $attr->manakai_local_name;
1058     my $checker;
1059     if ($attr_ns eq '') {
1060     if ($attr_ln eq 'content') {
1061     $content_attr = $attr;
1062     $checker = 1;
1063     } elsif ($attr_ln eq 'name') {
1064     $name_attr = $attr;
1065     $checker = 1;
1066     } elsif ($attr_ln eq 'http-equiv') {
1067     $http_equiv_attr = $attr;
1068     $checker = 1;
1069     } elsif ($attr_ln eq 'charset') {
1070     $charset_attr = $attr;
1071     $checker = 1;
1072     } else {
1073     $checker = $HTMLAttrChecker->{$attr_ln}
1074     || $AttrChecker->{$attr_ns}->{$attr_ln}
1075     || $AttrChecker->{$attr_ns}->{''};
1076     }
1077     } else {
1078     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1079     || $AttrChecker->{$attr_ns}->{''};
1080     }
1081     if ($checker) {
1082     $checker->($self, $attr) if ref $checker;
1083     } else {
1084     $self->{onerror}->(node => $attr, type => 'attribute not supported');
1085     ## ISSUE: No comformance createria for unknown attributes in the spec
1086     }
1087     }
1088    
1089     if (defined $name_attr) {
1090     if (defined $http_equiv_attr) {
1091     $self->{onerror}->(node => $http_equiv_attr,
1092     type => 'attribute not allowed');
1093     } elsif (defined $charset_attr) {
1094     $self->{onerror}->(node => $charset_attr,
1095     type => 'attribute not allowed');
1096     }
1097     my $metadata_name = $name_attr->value;
1098     my $metadata_value;
1099     if (defined $content_attr) {
1100     $metadata_value = $content_attr->value;
1101     } else {
1102     $self->{onerror}->(node => $todo->{node},
1103     type => 'attribute missing:content');
1104     $metadata_value = '';
1105     }
1106     } elsif (defined $http_equiv_attr) {
1107     if (defined $charset_attr) {
1108     $self->{onerror}->(node => $charset_attr,
1109     type => 'attribute not allowed');
1110     }
1111     unless (defined $content_attr) {
1112     $self->{onerror}->(node => $todo->{node},
1113     type => 'attribute missing:content');
1114     }
1115     } elsif (defined $charset_attr) {
1116     if (defined $content_attr) {
1117     $self->{onerror}->(node => $content_attr,
1118     type => 'attribute not allowed');
1119     }
1120     ## TODO: Allowed only in HTML documents
1121     } else {
1122     if (defined $content_attr) {
1123     $self->{onerror}->(node => $content_attr,
1124     type => 'attribute not allowed');
1125     $self->{onerror}->(node => $todo->{node},
1126     type => 'attribute missing:name|http-equiv');
1127     } else {
1128     $self->{onerror}->(node => $todo->{node},
1129     type => 'attribute missing:name|http-equiv|charset');
1130     }
1131     }
1132    
1133     ## TODO: metadata conformance
1134    
1135     ## TODO: pragma conformance
1136     if (defined $http_equiv_attr) { ## An enumerated attribute
1137     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1138     if ({
1139     'refresh' => 1,
1140     'default-style' => 1,
1141     }->{$keyword}) {
1142     #
1143     } else {
1144     $self->{onerror}->(node => $http_equiv_attr,
1145     type => 'invalid enumerated attribute value');
1146     }
1147     }
1148    
1149     ## TODO: charset
1150     },
1151 wakaba 1.1 checker => $HTMLEmptyChecker,
1152     };
1153    
1154     ## NOTE: |html:style| has no conformance creteria on content model
1155 wakaba 1.3 $Element->{$HTML_NS}->{style} = {
1156 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({
1157 wakaba 1.15 type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1158 wakaba 1.17 media => $HTMLMQAttrChecker,
1159 wakaba 1.10 scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1160     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1161     ## not different
1162     }),
1163 wakaba 1.3 checker => $AnyChecker,
1164     };
1165 wakaba 1.1
1166     $Element->{$HTML_NS}->{body} = {
1167 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1168 wakaba 1.1 checker => $HTMLBlockChecker,
1169     };
1170    
1171     $Element->{$HTML_NS}->{section} = {
1172 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1173 wakaba 1.1 checker => $HTMLStylableBlockChecker,
1174     };
1175    
1176     $Element->{$HTML_NS}->{nav} = {
1177 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1178 wakaba 1.1 checker => $HTMLBlockOrInlineChecker,
1179     };
1180    
1181     $Element->{$HTML_NS}->{article} = {
1182 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1183 wakaba 1.1 checker => $HTMLStylableBlockChecker,
1184     };
1185    
1186     $Element->{$HTML_NS}->{blockquote} = {
1187 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({
1188     cite => $HTMLURIAttrChecker,
1189     }),
1190 wakaba 1.1 checker => $HTMLBlockChecker,
1191     };
1192    
1193     $Element->{$HTML_NS}->{aside} = {
1194 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1195 wakaba 1.2 checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
1196 wakaba 1.1 };
1197    
1198     $Element->{$HTML_NS}->{h1} = {
1199 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1200 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
1201     };
1202    
1203     $Element->{$HTML_NS}->{h2} = {
1204 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1205 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
1206     };
1207    
1208     $Element->{$HTML_NS}->{h3} = {
1209 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1210 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
1211     };
1212    
1213     $Element->{$HTML_NS}->{h4} = {
1214 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1215 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
1216     };
1217    
1218     $Element->{$HTML_NS}->{h5} = {
1219 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1220 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
1221     };
1222    
1223     $Element->{$HTML_NS}->{h6} = {
1224 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1225 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
1226     };
1227    
1228     ## TODO: header
1229    
1230 wakaba 1.2 $Element->{$HTML_NS}->{footer} = {
1231 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1232 wakaba 1.2 checker => sub { ## block -hn -header -footer -sectioning or inline
1233 wakaba 1.4 my ($self, $todo) = @_;
1234     my $el = $todo->{node};
1235     my $new_todos = [];
1236 wakaba 1.2 my @nodes = (@{$el->child_nodes});
1237    
1238     my $content = 'block-or-inline'; # or 'block' or 'inline'
1239     my @block_not_inline;
1240     while (@nodes) {
1241     my $node = shift @nodes;
1242     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1243    
1244     my $nt = $node->node_type;
1245     if ($nt == 1) {
1246     my $node_ns = $node->namespace_uri;
1247     $node_ns = '' unless defined $node_ns;
1248     my $node_ln = $node->manakai_local_name;
1249 wakaba 1.6 my $not_allowed;
1250 wakaba 1.2 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
1251 wakaba 1.6 $not_allowed = 1;
1252 wakaba 1.2 } elsif ($node_ns eq $HTML_NS and
1253     {
1254     qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1/
1255     }->{$node_ln}) {
1256 wakaba 1.6 $not_allowed = 1;
1257 wakaba 1.2 } elsif ($HTMLSectioningElements->{$node_ns}->{$node_ln}) {
1258 wakaba 1.6 $not_allowed = 1;
1259 wakaba 1.2 }
1260     if ($content eq 'block') {
1261 wakaba 1.7 $not_allowed = 1
1262     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1263 wakaba 1.2 } elsif ($content eq 'inline') {
1264 wakaba 1.7 $not_allowed = 1
1265     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
1266     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1267 wakaba 1.2 } else {
1268 wakaba 1.7 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1269     my $is_inline
1270     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
1271     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1272 wakaba 1.2
1273 wakaba 1.6 push @block_not_inline, $node
1274     if $is_block and not $is_inline and not $not_allowed;
1275 wakaba 1.2 unless ($is_block) {
1276     $content = 'inline';
1277     for (@block_not_inline) {
1278     $self->{onerror}->(node => $_, type => 'element not allowed');
1279     }
1280 wakaba 1.6 $not_allowed = 1 unless $is_inline;
1281 wakaba 1.2 }
1282     }
1283 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
1284     if $not_allowed;
1285 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1286     unshift @nodes, @$sib;
1287 wakaba 1.4 push @$new_todos, @$ch;
1288 wakaba 1.2 } elsif ($nt == 3 or $nt == 4) {
1289     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1290     if ($content eq 'block') {
1291     $self->{onerror}->(node => $node, type => 'character not allowed');
1292     } else {
1293     $content = 'inline';
1294     for (@block_not_inline) {
1295     $self->{onerror}->(node => $_, type => 'element not allowed');
1296     }
1297     }
1298     }
1299     } elsif ($nt == 5) {
1300     unshift @nodes, @{$node->child_nodes};
1301     }
1302     }
1303    
1304     my $end = $self->_add_minuses
1305     ({$HTML_NS => {qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1/}},
1306     $HTMLSectioningElements);
1307 wakaba 1.4 push @$new_todos, $end;
1308 wakaba 1.2
1309 wakaba 1.4 if ($content eq 'inline') {
1310     for (@$new_todos) {
1311     $_->{inline} = 1;
1312     }
1313     }
1314    
1315     return ($new_todos);
1316 wakaba 1.2 },
1317     };
1318 wakaba 1.1
1319     $Element->{$HTML_NS}->{address} = {
1320 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1321 wakaba 1.1 checker => $HTMLInlineChecker,
1322     };
1323    
1324     $Element->{$HTML_NS}->{p} = {
1325 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1326 wakaba 1.1 checker => $HTMLSignificantInlineChecker,
1327     };
1328    
1329     $Element->{$HTML_NS}->{hr} = {
1330 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1331 wakaba 1.1 checker => $HTMLEmptyChecker,
1332     };
1333    
1334     $Element->{$HTML_NS}->{br} = {
1335 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1336 wakaba 1.1 checker => $HTMLEmptyChecker,
1337     };
1338    
1339     $Element->{$HTML_NS}->{dialog} = {
1340 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1341 wakaba 1.1 checker => sub {
1342 wakaba 1.4 my ($self, $todo) = @_;
1343     my $el = $todo->{node};
1344     my $new_todos = [];
1345 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1346    
1347     my $phase = 'before dt';
1348     while (@nodes) {
1349     my $node = shift @nodes;
1350 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1351    
1352 wakaba 1.1 my $nt = $node->node_type;
1353     if ($nt == 1) {
1354 wakaba 1.8 my $node_ns = $node->namespace_uri;
1355     $node_ns = '' unless defined $node_ns;
1356     my $node_ln = $node->manakai_local_name;
1357 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1358 wakaba 1.1 if ($phase eq 'before dt') {
1359 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1360 wakaba 1.1 $phase = 'before dd';
1361 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1362 wakaba 1.2 $self->{onerror}
1363 wakaba 1.3 ->(node => $node, type => 'ps element missing:dt');
1364 wakaba 1.1 $phase = 'before dt';
1365     } else {
1366 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1367 wakaba 1.1 }
1368     } else { # before dd
1369 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1370 wakaba 1.1 $phase = 'before dt';
1371 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1372 wakaba 1.2 $self->{onerror}
1373 wakaba 1.3 ->(node => $node, type => 'ps element missing:dd');
1374 wakaba 1.1 $phase = 'before dd';
1375     } else {
1376 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1377 wakaba 1.1 }
1378     }
1379 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1380     unshift @nodes, @$sib;
1381 wakaba 1.4 push @$new_todos, @$ch;
1382 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1383     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1384 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1385 wakaba 1.1 }
1386     } elsif ($nt == 5) {
1387     unshift @nodes, @{$node->child_nodes};
1388     }
1389     }
1390     if ($phase eq 'before dd') {
1391 wakaba 1.3 $self->{onerror}->(node => $el, type => 'ps element missing:dd');
1392 wakaba 1.1 }
1393 wakaba 1.4 return ($new_todos);
1394 wakaba 1.1 },
1395     };
1396    
1397     $Element->{$HTML_NS}->{pre} = {
1398 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1399 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1400     };
1401    
1402     $Element->{$HTML_NS}->{ol} = {
1403 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({
1404     start => $HTMLIntegerAttrChecker,
1405     }),
1406 wakaba 1.1 checker => sub {
1407 wakaba 1.4 my ($self, $todo) = @_;
1408     my $el = $todo->{node};
1409     my $new_todos = [];
1410 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1411    
1412     while (@nodes) {
1413     my $node = shift @nodes;
1414 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1415    
1416 wakaba 1.1 my $nt = $node->node_type;
1417     if ($nt == 1) {
1418 wakaba 1.8 my $node_ns = $node->namespace_uri;
1419     $node_ns = '' unless defined $node_ns;
1420     my $node_ln = $node->manakai_local_name;
1421 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1422 wakaba 1.8 unless ($node_ns eq $HTML_NS and $node_ln eq 'li') {
1423 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1424 wakaba 1.1 }
1425 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1426     unshift @nodes, @$sib;
1427 wakaba 1.4 push @$new_todos, @$ch;
1428 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1429     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1430 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1431 wakaba 1.1 }
1432     } elsif ($nt == 5) {
1433     unshift @nodes, @{$node->child_nodes};
1434     }
1435     }
1436 wakaba 1.4
1437     if ($todo->{inline}) {
1438     for (@$new_todos) {
1439     $_->{inline} = 1;
1440     }
1441     }
1442     return ($new_todos);
1443 wakaba 1.1 },
1444     };
1445    
1446     $Element->{$HTML_NS}->{ul} = {
1447 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1448 wakaba 1.1 checker => $Element->{$HTML_NS}->{ol}->{checker},
1449     };
1450    
1451 wakaba 1.5
1452     $Element->{$HTML_NS}->{li} = {
1453 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({
1454     start => sub {
1455     my ($self, $attr) = @_;
1456     my $parent = $attr->owner_element->manakai_parent_element;
1457     if (defined $parent) {
1458     my $parent_ns = $parent->namespace_uri;
1459     $parent_ns = '' unless defined $parent_ns;
1460     my $parent_ln = $parent->manakai_local_name;
1461     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1462     $self->{onerror}->(node => $attr, type => 'attribute not supported');
1463     }
1464     }
1465     $HTMLIntegerAttrChecker->($self, $attr);
1466     },
1467     }),
1468 wakaba 1.5 checker => sub {
1469     my ($self, $todo) = @_;
1470     if ($todo->{inline}) {
1471     return $HTMLInlineChecker->($self, $todo);
1472     } else {
1473     return $HTMLBlockOrInlineChecker->($self, $todo);
1474     }
1475     },
1476     };
1477 wakaba 1.1
1478     $Element->{$HTML_NS}->{dl} = {
1479 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1480 wakaba 1.1 checker => sub {
1481 wakaba 1.4 my ($self, $todo) = @_;
1482     my $el = $todo->{node};
1483     my $new_todos = [];
1484 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1485    
1486     my $phase = 'before dt';
1487     while (@nodes) {
1488     my $node = shift @nodes;
1489 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1490    
1491 wakaba 1.1 my $nt = $node->node_type;
1492     if ($nt == 1) {
1493 wakaba 1.8 my $node_ns = $node->namespace_uri;
1494     $node_ns = '' unless defined $node_ns;
1495     my $node_ln = $node->manakai_local_name;
1496 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1497 wakaba 1.1 if ($phase eq 'in dds') {
1498 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1499 wakaba 1.1 #$phase = 'in dds';
1500 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1501 wakaba 1.1 $phase = 'in dts';
1502     } else {
1503 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1504 wakaba 1.1 }
1505     } elsif ($phase eq 'in dts') {
1506 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1507 wakaba 1.1 #$phase = 'in dts';
1508 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1509 wakaba 1.1 $phase = 'in dds';
1510     } else {
1511 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1512 wakaba 1.1 }
1513     } else { # before dt
1514 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1515 wakaba 1.1 $phase = 'in dts';
1516 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1517 wakaba 1.2 $self->{onerror}
1518 wakaba 1.3 ->(node => $node, type => 'ps element missing:dt');
1519 wakaba 1.1 $phase = 'in dds';
1520     } else {
1521 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1522 wakaba 1.1 }
1523     }
1524 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1525     unshift @nodes, @$sib;
1526 wakaba 1.4 push @$new_todos, @$ch;
1527 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1528     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1529 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1530 wakaba 1.1 }
1531     } elsif ($nt == 5) {
1532     unshift @nodes, @{$node->child_nodes};
1533     }
1534     }
1535     if ($phase eq 'in dts') {
1536 wakaba 1.3 $self->{onerror}->(node => $el, type => 'ps element missing:dd');
1537 wakaba 1.1 }
1538 wakaba 1.4
1539     if ($todo->{inline}) {
1540     for (@$new_todos) {
1541     $_->{inline} = 1;
1542     }
1543     }
1544     return ($new_todos);
1545 wakaba 1.1 },
1546     };
1547    
1548     $Element->{$HTML_NS}->{dt} = {
1549 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1550 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1551     };
1552    
1553 wakaba 1.4 $Element->{$HTML_NS}->{dd} = {
1554 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1555 wakaba 1.5 checker => $Element->{$HTML_NS}->{li}->{checker},
1556 wakaba 1.4 };
1557 wakaba 1.1
1558 wakaba 1.6 $Element->{$HTML_NS}->{a} = {
1559 wakaba 1.17 attrs_checker => sub {
1560     my ($self, $todo) = @_;
1561 wakaba 1.15 my %attr;
1562 wakaba 1.17 for my $attr (@{$todo->{node}->attributes}) {
1563     my $attr_ns = $attr->namespace_uri;
1564     $attr_ns = '' unless defined $attr_ns;
1565     my $attr_ln = $attr->manakai_local_name;
1566     my $checker;
1567     if ($attr_ns eq '') {
1568     $checker = {
1569     target => $HTMLTargetAttrChecker,
1570     href => $HTMLURIAttrChecker,
1571     ping => $HTMLSpaceURIsAttrChecker,
1572     rel => $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker, ## TODO: registered? check
1573     media => $HTMLMQAttrChecker,
1574     hreflang => $HTMLLanguageTagAttrChecker,
1575     type => $HTMLIMTAttrChecker,
1576     }->{$attr_ln};
1577     if ($checker) {
1578     $attr{$attr_ln} = $attr;
1579     } else {
1580     $checker = $HTMLAttrChecker->{$attr_ln};
1581     }
1582     }
1583     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1584     || $AttrChecker->{$attr_ns}->{''};
1585     if ($checker) {
1586     $checker->($self, $attr) if ref $checker;
1587     } else {
1588     $self->{onerror}->(node => $attr, type => 'attribute not supported');
1589     ## ISSUE: No comformance createria for unknown attributes in the spec
1590     }
1591     }
1592    
1593     unless (defined $attr{href}) {
1594     for (qw/target ping rel media hreflang type/) {
1595     if (defined $attr{$_}) {
1596     $self->{onerror}->(node => $attr{$_},
1597     type => 'attribute not allowed');
1598 wakaba 1.15 }
1599     }
1600 wakaba 1.17 }
1601 wakaba 1.15 },
1602 wakaba 1.6 checker => sub {
1603     my ($self, $todo) = @_;
1604    
1605     my $end = $self->_add_minuses ($HTMLInteractiveElements);
1606     my ($sib, $ch)
1607     = $HTMLSignificantInlineOrStrictlyInlineChecker->($self, $todo);
1608     push @$sib, $end;
1609     return ($sib, $ch);
1610     },
1611     };
1612 wakaba 1.1
1613 wakaba 1.4 $Element->{$HTML_NS}->{q} = {
1614 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({
1615     cite => $HTMLURIAttrChecker,
1616     }),
1617 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1618     };
1619 wakaba 1.1
1620     $Element->{$HTML_NS}->{cite} = {
1621 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1622 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1623     };
1624    
1625 wakaba 1.4 $Element->{$HTML_NS}->{em} = {
1626 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1627 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1628     };
1629    
1630     $Element->{$HTML_NS}->{strong} = {
1631 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1632 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1633     };
1634 wakaba 1.1
1635 wakaba 1.4 $Element->{$HTML_NS}->{small} = {
1636 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1637 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1638     };
1639    
1640     $Element->{$HTML_NS}->{m} = {
1641 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1642 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1643     };
1644    
1645 wakaba 1.11 $Element->{$HTML_NS}->{dfn} = { ## TODO: term duplication
1646 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1647 wakaba 1.4 checker => sub {
1648     my ($self, $todo) = @_;
1649    
1650     my $end = $self->_add_minuses ({$HTML_NS => {dfn => 1}});
1651     my ($sib, $ch) = $HTMLStrictlyInlineChecker->($self, $todo);
1652     push @$sib, $end;
1653     return ($sib, $ch);
1654     },
1655     };
1656 wakaba 1.1
1657     $Element->{$HTML_NS}->{abbr} = {
1658 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({
1659     ## NOTE: |title| has special semantics for |abbr|s, but is syntactically
1660     ## not different. The spec says that the |title| MAY be omitted
1661     ## if there is a |dfn| whose defining term is the abbreviation,
1662     ## but it does not prohibit |abbr| w/o |title| in other cases.
1663     }),
1664 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1665     };
1666    
1667 wakaba 1.11 $Element->{$HTML_NS}->{time} = { ## TODO: validate content
1668     attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO: datetime
1669 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1670     };
1671    
1672 wakaba 1.11 $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
1673     attrs_checker => $GetHTMLAttrsChecker->({
1674     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1675     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1676     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1677     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1678     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1679     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1680     }),
1681 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1682     };
1683    
1684 wakaba 1.11 $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
1685     attrs_checker => $GetHTMLAttrsChecker->({
1686     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
1687     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
1688     }),
1689 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1690     };
1691    
1692 wakaba 1.4 $Element->{$HTML_NS}->{code} = {
1693 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1694 wakaba 1.12 ## NOTE: Though |title| has special semantics,
1695     ## syntatically same as the |title| as global attribute.
1696 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1697     };
1698 wakaba 1.1
1699     $Element->{$HTML_NS}->{var} = {
1700 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1701 wakaba 1.12 ## NOTE: Though |title| has special semantics,
1702     ## syntatically same as the |title| as global attribute.
1703 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1704     };
1705    
1706 wakaba 1.4 $Element->{$HTML_NS}->{samp} = {
1707 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1708 wakaba 1.12 ## NOTE: Though |title| has special semantics,
1709     ## syntatically same as the |title| as global attribute.
1710 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1711     };
1712 wakaba 1.1
1713     $Element->{$HTML_NS}->{kbd} = {
1714 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1715 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1716     };
1717    
1718     $Element->{$HTML_NS}->{sub} = {
1719 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1720 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1721     };
1722    
1723     $Element->{$HTML_NS}->{sup} = {
1724 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1725 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1726     };
1727    
1728 wakaba 1.4 $Element->{$HTML_NS}->{span} = {
1729 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1730 wakaba 1.12 ## NOTE: Though |title| has special semantics,
1731     ## syntatically same as the |title| as global attribute.
1732 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1733     };
1734 wakaba 1.1
1735     $Element->{$HTML_NS}->{i} = {
1736 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1737 wakaba 1.12 ## NOTE: Though |title| has special semantics,
1738     ## syntatically same as the |title| as global attribute.
1739 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1740     };
1741    
1742     $Element->{$HTML_NS}->{b} = {
1743 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1744 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1745     };
1746    
1747     $Element->{$HTML_NS}->{bdo} = {
1748 wakaba 1.12 attrs_checker => sub {
1749     my ($self, $todo) = @_;
1750     $GetHTMLAttrsChecker->({})->($self, $todo);
1751     unless ($todo->{node}->has_attribute_ns (undef, 'dir')) {
1752     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:dir');
1753     }
1754     },
1755     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
1756 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1757     };
1758    
1759     $Element->{$HTML_NS}->{ins} = {
1760 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
1761     cite => $HTMLURIAttrChecker,
1762     ## TODO: datetime
1763     }),
1764 wakaba 1.1 checker => $HTMLTransparentChecker,
1765     };
1766    
1767     $Element->{$HTML_NS}->{del} = {
1768 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
1769     cite => $HTMLURIAttrChecker,
1770     ## TODO: datetime
1771     }),
1772 wakaba 1.1 checker => sub {
1773 wakaba 1.4 my ($self, $todo) = @_;
1774 wakaba 1.1
1775 wakaba 1.4 my $parent = $todo->{node}->manakai_parent_element;
1776 wakaba 1.1 if (defined $parent) {
1777     my $nsuri = $parent->namespace_uri;
1778     $nsuri = '' unless defined $nsuri;
1779     my $ln = $parent->manakai_local_name;
1780     my $eldef = $Element->{$nsuri}->{$ln} ||
1781     $Element->{$nsuri}->{''} ||
1782     $ElementDefault;
1783 wakaba 1.4 return $eldef->{checker}->($self, $todo);
1784 wakaba 1.1 } else {
1785 wakaba 1.4 return $HTMLBlockOrInlineChecker->($self, $todo);
1786 wakaba 1.1 }
1787     },
1788     };
1789    
1790     ## TODO: figure
1791    
1792     $Element->{$HTML_NS}->{img} = {
1793 wakaba 1.17 attrs_checker => sub {
1794     my ($self, $todo) = @_;
1795     $GetHTMLAttrsChecker->({
1796     alt => sub { }, ## NOTE: No syntactical requirement
1797     src => $HTMLURIAttrChecker,
1798     usemap => $HTMLUsemapAttrChecker,
1799     ismap => $GetHTMLBooleanAttrChecker->('ismap'), ## TODO: MUST ancestor <a>
1800     ## TODO: height
1801     ## TODO: width
1802     })->($self, $todo);
1803     unless ($todo->{node}->has_attribute_ns (undef, 'alt')) {
1804     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:alt');
1805     }
1806     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
1807     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:src');
1808     }
1809     },
1810 wakaba 1.1 checker => $HTMLEmptyChecker,
1811     };
1812    
1813     $Element->{$HTML_NS}->{iframe} = {
1814 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
1815     src => $HTMLURIAttrChecker,
1816     }),
1817 wakaba 1.1 checker => $HTMLTextChecker,
1818     };
1819    
1820     $Element->{$HTML_NS}->{embed} = {
1821 wakaba 1.16 attrs_checker => sub {
1822     my ($self, $todo) = @_;
1823     my $has_src;
1824     for my $attr (@{$todo->{node}->attributes}) {
1825     my $attr_ns = $attr->namespace_uri;
1826     $attr_ns = '' unless defined $attr_ns;
1827     my $attr_ln = $attr->manakai_local_name;
1828     my $checker;
1829     if ($attr_ns eq '') {
1830     if ($attr_ln eq 'src') {
1831     $checker = $HTMLURIAttrChecker;
1832     $has_src = 1;
1833     } elsif ($attr_ln eq 'type') {
1834     $checker = $HTMLIMTAttrChecker;
1835     } else {
1836     ## TODO: height
1837     ## TODO: width
1838     $checker = $HTMLAttrChecker->{$attr_ln}
1839     || sub { }; ## NOTE: Any local attribute is ok.
1840     }
1841     }
1842     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1843     || $AttrChecker->{$attr_ns}->{''};
1844     if ($checker) {
1845     $checker->($self, $attr);
1846     } else {
1847     $self->{onerror}->(node => $attr, type => 'attribute not supported');
1848     ## ISSUE: No comformance createria for global attributes in the spec
1849     }
1850     }
1851    
1852     unless ($has_src) {
1853     $self->{onerror}->(node => $todo->{node},
1854     type => 'attribute missing:src');
1855     }
1856     },
1857 wakaba 1.1 checker => $HTMLEmptyChecker,
1858     };
1859    
1860 wakaba 1.15 $Element->{$HTML_NS}->{object} = {
1861 wakaba 1.17 attrs_checker => sub {
1862     my ($self, $todo) = @_;
1863     $GetHTMLAttrsChecker->({
1864     data => $HTMLURIAttrChecker,
1865     type => $HTMLIMTAttrChecker,
1866     usemap => $HTMLUsemapAttrChecker,
1867     ## TODO: width
1868     ## TODO: height
1869     })->($self, $todo);
1870     unless ($todo->{node}->has_attribute_ns (undef, 'data')) {
1871     unless ($todo->{node}->has_attribute_ns (undef, 'type')) {
1872     $self->{onerror}->(node => $todo->{node},
1873     type => 'attribute missing:data|type');
1874     }
1875     }
1876     },
1877 wakaba 1.15 checker => $ElementDefault->{checker}, ## TODO
1878     };
1879    
1880 wakaba 1.1 $Element->{$HTML_NS}->{param} = {
1881 wakaba 1.12 attrs_checker => sub {
1882     my ($self, $todo) = @_;
1883     $GetHTMLAttrsChecker->({
1884     name => sub { },
1885     value => sub { },
1886     })->($self, $todo);
1887     unless ($todo->{node}->has_attribute_ns (undef, 'name')) {
1888     $self->{onerror}->(node => $todo->{node},
1889     type => 'attribute missing:name');
1890     }
1891     unless ($todo->{node}->has_attribute_ns (undef, 'value')) {
1892     $self->{onerror}->(node => $todo->{node},
1893     type => 'attribute missing:value');
1894     }
1895     },
1896 wakaba 1.1 checker => $HTMLEmptyChecker,
1897     };
1898    
1899 wakaba 1.2 $Element->{$HTML_NS}->{video} = {
1900 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
1901     src => $HTMLURIAttrChecker,
1902     ## TODO: start, loopstart, loopend, end
1903     ## ISSUE: they MUST be "value time offset"s. Value?
1904     ## ISSUE: loopcount has no conformance creteria
1905     autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
1906     controls => $GetHTMLBooleanAttrChecker->('controls'),
1907     }),
1908 wakaba 1.2 checker => sub {
1909 wakaba 1.4 my ($self, $todo) = @_;
1910 wakaba 1.2
1911 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'src')) {
1912     return $HTMLBlockOrInlineChecker->($self, $todo);
1913 wakaba 1.2 } else {
1914     return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
1915 wakaba 1.4 ->($self, $todo);
1916 wakaba 1.2 }
1917     },
1918     };
1919    
1920     $Element->{$HTML_NS}->{audio} = {
1921 wakaba 1.12 attrs_checker => $Element->{$HTML_NS}->{video}->{attrs_checker},
1922     checker => $Element->{$HTML_NS}->{video}->{checker},
1923 wakaba 1.2 };
1924 wakaba 1.1
1925     $Element->{$HTML_NS}->{source} = {
1926 wakaba 1.17 attrs_checker => sub {
1927     my ($self, $todo) = @_;
1928     $GetHTMLAttrsChecker->({
1929     src => $HTMLURIAttrChecker,
1930     type => $HTMLIMTAttrChecker,
1931     media => $HTMLMQAttrChecker,
1932     })->($self, $todo);
1933     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
1934     $self->{onerror}->(node => $todo->{node},
1935     type => 'attribute missing:src');
1936     }
1937     },
1938 wakaba 1.1 checker => $HTMLEmptyChecker,
1939     };
1940    
1941     $Element->{$HTML_NS}->{canvas} = {
1942 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
1943     height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
1944     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
1945     }),
1946 wakaba 1.1 checker => $HTMLInlineChecker,
1947     };
1948    
1949     $Element->{$HTML_NS}->{map} = {
1950 wakaba 1.17 attrs_checker => $GetHTMLAttrsChecker->({
1951     id => sub {
1952     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
1953     my ($self, $attr) = @_;
1954     my $value = $attr->value;
1955     if (length $value > 0) {
1956     if ($self->{id}->{$value}) {
1957     $self->{onerror}->(node => $attr, type => 'duplicate ID');
1958     } else {
1959     $self->{id}->{$value} = 1;
1960     }
1961     } else {
1962     ## NOTE: MUST contain at least one character
1963     $self->{onerror}->(node => $attr, type => 'attribute value is empty');
1964     }
1965     $self->{map}->{$value} ||= $attr;
1966     },
1967     }),
1968 wakaba 1.1 checker => $HTMLBlockChecker,
1969     };
1970    
1971     $Element->{$HTML_NS}->{area} = {
1972 wakaba 1.15 attrs_checker => sub {
1973     my ($self, $todo) = @_;
1974     my %attr;
1975     my $coords;
1976     for my $attr (@{$todo->{node}->attributes}) {
1977     my $attr_ns = $attr->namespace_uri;
1978     $attr_ns = '' unless defined $attr_ns;
1979     my $attr_ln = $attr->manakai_local_name;
1980     my $checker;
1981     if ($attr_ns eq '') {
1982     $checker = {
1983     alt => sub { },
1984     ## NOTE: |alt| value has no conformance creteria.
1985     shape => $GetHTMLEnumeratedAttrChecker->({
1986     circ => -1, circle => 1,
1987     default => 1,
1988     poly => 1, polygon => -1,
1989     rect => 1, rectangle => -1,
1990     }),
1991     coords => sub {
1992     my ($self, $attr) = @_;
1993     my $value = $attr->value;
1994     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
1995     $coords = [split /,/, $value];
1996     } else {
1997     $self->{onerror}->(node => $attr,
1998     type => 'syntax error');
1999     }
2000     },
2001 wakaba 1.17 target => $HTMLTargetAttrChecker,
2002 wakaba 1.15 href => $HTMLURIAttrChecker,
2003     ping => $HTMLSpaceURIsAttrChecker,
2004     rel => $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker, ## TODO: registered? check
2005 wakaba 1.17 media => $HTMLMQAttrChecker,
2006     hreflang => $HTMLLanguageTagAttrChecker,
2007 wakaba 1.15 type => $HTMLIMTAttrChecker,
2008     }->{$attr_ln};
2009     if ($checker) {
2010     $attr{$attr_ln} = $attr;
2011     } else {
2012     $checker = $HTMLAttrChecker->{$attr_ln};
2013     }
2014     }
2015     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2016     || $AttrChecker->{$attr_ns}->{''};
2017     if ($checker) {
2018     $checker->($self, $attr) if ref $checker;
2019     } else {
2020     $self->{onerror}->(node => $attr, type => 'attribute not supported');
2021     ## ISSUE: No comformance createria for unknown attributes in the spec
2022     }
2023     }
2024    
2025     if (defined $attr{href}) {
2026     unless (defined $attr{alt}) {
2027     $self->{onerror}->(node => $todo->{node},
2028     type => 'attribute missing:alt');
2029     }
2030     } else {
2031     for (qw/target ping rel media hreflang type alt/) {
2032     if (defined $attr{$_}) {
2033     $self->{onerror}->(node => $attr{$_},
2034     type => 'attribute not allowed');
2035     }
2036     }
2037     }
2038    
2039     my $shape = 'rectangle';
2040     if (defined $attr{shape}) {
2041     $shape = {
2042     circ => 'circle', circle => 'circle',
2043     default => 'default',
2044     poly => 'polygon', polygon => 'polygon',
2045     rect => 'rectangle', rectangle => 'rectangle',
2046     }->{lc $attr{shape}->value} || 'rectangle';
2047     ## TODO: ASCII lowercase?
2048     }
2049    
2050     if ($shape eq 'circle') {
2051     if (defined $attr{coords}) {
2052     if (defined $coords) {
2053     if (@$coords == 3) {
2054     if ($coords->[2] < 0) {
2055     $self->{onerror}->(node => $attr{coords},
2056     type => 'out of range:2');
2057     }
2058     } else {
2059     $self->{onerror}->(node => $attr{coords},
2060     type => 'list item number:3:'.@$coords);
2061     }
2062     } else {
2063     ## NOTE: A syntax error has been reported.
2064     }
2065     } else {
2066     $self->{onerror}->(node => $todo->{node},
2067     type => 'attribute missing:coords');
2068     }
2069     } elsif ($shape eq 'default') {
2070     if (defined $attr{coords}) {
2071     $self->{onerror}->(node => $attr{coords},
2072     type => 'attribute not allowed');
2073     }
2074     } elsif ($shape eq 'polygon') {
2075     if (defined $attr{coords}) {
2076     if (defined $coords) {
2077     if (@$coords >= 6) {
2078     unless (@$coords % 2 == 0) {
2079     $self->{onerror}->(node => $attr{coords},
2080     type => 'list item number:even:'.@$coords);
2081     }
2082     } else {
2083     $self->{onerror}->(node => $attr{coords},
2084     type => 'list item number:>=6:'.@$coords);
2085     }
2086     } else {
2087     ## NOTE: A syntax error has been reported.
2088     }
2089     } else {
2090     $self->{onerror}->(node => $todo->{node},
2091     type => 'attribute missing:coords');
2092     }
2093     } elsif ($shape eq 'rectangle') {
2094     if (defined $attr{coords}) {
2095     if (defined $coords) {
2096     if (@$coords == 4) {
2097     unless ($coords->[0] < $coords->[2]) {
2098     $self->{onerror}->(node => $attr{coords},
2099     type => 'out of range:0');
2100     }
2101     unless ($coords->[1] < $coords->[3]) {
2102     $self->{onerror}->(node => $attr{coords},
2103     type => 'out of range:1');
2104     }
2105     } else {
2106     $self->{onerror}->(node => $attr{coords},
2107     type => 'list item number:4:'.@$coords);
2108     }
2109     } else {
2110     ## NOTE: A syntax error has been reported.
2111     }
2112     } else {
2113     $self->{onerror}->(node => $todo->{node},
2114     type => 'attribute missing:coords');
2115     }
2116     }
2117     },
2118 wakaba 1.1 checker => $HTMLEmptyChecker,
2119     };
2120     ## TODO: only in map
2121    
2122     $Element->{$HTML_NS}->{table} = {
2123 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2124 wakaba 1.1 checker => sub {
2125 wakaba 1.4 my ($self, $todo) = @_;
2126     my $el = $todo->{node};
2127     my $new_todos = [];
2128 wakaba 1.1 my @nodes = (@{$el->child_nodes});
2129    
2130     my $phase = 'before caption';
2131     my $has_tfoot;
2132     while (@nodes) {
2133     my $node = shift @nodes;
2134 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2135    
2136 wakaba 1.1 my $nt = $node->node_type;
2137     if ($nt == 1) {
2138 wakaba 1.8 my $node_ns = $node->namespace_uri;
2139     $node_ns = '' unless defined $node_ns;
2140     my $node_ln = $node->manakai_local_name;
2141 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
2142 wakaba 1.1 if ($phase eq 'in tbodys') {
2143 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2144 wakaba 1.1 #$phase = 'in tbodys';
2145     } elsif (not $has_tfoot and
2146 wakaba 1.8 $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2147 wakaba 1.1 $phase = 'after tfoot';
2148     $has_tfoot = 1;
2149     } else {
2150 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2151 wakaba 1.1 }
2152     } elsif ($phase eq 'in trs') {
2153 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2154 wakaba 1.1 #$phase = 'in trs';
2155     } elsif (not $has_tfoot and
2156 wakaba 1.8 $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2157 wakaba 1.1 $phase = 'after tfoot';
2158     $has_tfoot = 1;
2159     } else {
2160 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2161 wakaba 1.1 }
2162     } elsif ($phase eq 'after thead') {
2163 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2164 wakaba 1.1 $phase = 'in tbodys';
2165 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2166 wakaba 1.1 $phase = 'in trs';
2167 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2168 wakaba 1.1 $phase = 'in tbodys';
2169     $has_tfoot = 1;
2170     } else {
2171 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2172 wakaba 1.1 }
2173     } elsif ($phase eq 'in colgroup') {
2174 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2175 wakaba 1.1 $phase = 'in colgroup';
2176 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2177 wakaba 1.1 $phase = 'after thead';
2178 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2179 wakaba 1.1 $phase = 'in tbodys';
2180 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2181 wakaba 1.1 $phase = 'in trs';
2182 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2183 wakaba 1.1 $phase = 'in tbodys';
2184     $has_tfoot = 1;
2185     } else {
2186 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2187 wakaba 1.1 }
2188     } elsif ($phase eq 'before caption') {
2189 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'caption') {
2190 wakaba 1.1 $phase = 'in colgroup';
2191 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2192 wakaba 1.1 $phase = 'in colgroup';
2193 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2194 wakaba 1.1 $phase = 'after thead';
2195 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2196 wakaba 1.1 $phase = 'in tbodys';
2197 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2198 wakaba 1.1 $phase = 'in trs';
2199 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2200 wakaba 1.1 $phase = 'in tbodys';
2201     $has_tfoot = 1;
2202     } else {
2203 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2204 wakaba 1.1 }
2205     } else { # after tfoot
2206 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2207 wakaba 1.1 }
2208 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
2209     unshift @nodes, @$sib;
2210 wakaba 1.4 push @$new_todos, @$ch;
2211 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
2212     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2213 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
2214 wakaba 1.1 }
2215     } elsif ($nt == 5) {
2216     unshift @nodes, @{$node->child_nodes};
2217     }
2218     }
2219 wakaba 1.4 return ($new_todos);
2220 wakaba 1.1 },
2221     };
2222    
2223     $Element->{$HTML_NS}->{caption} = {
2224 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2225 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
2226     };
2227    
2228     $Element->{$HTML_NS}->{colgroup} = {
2229 wakaba 1.17 attrs_checker => $GetHTMLAttrsChecker->({
2230     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2231     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
2232     ## TODO: "attribute not supported" if |col|.
2233     ## ISSUE: MUST NOT if any |col|?
2234     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
2235     }),
2236 wakaba 1.1 checker => sub {
2237 wakaba 1.4 my ($self, $todo) = @_;
2238     my $el = $todo->{node};
2239     my $new_todos = [];
2240 wakaba 1.1 my @nodes = (@{$el->child_nodes});
2241    
2242     while (@nodes) {
2243     my $node = shift @nodes;
2244 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2245    
2246 wakaba 1.1 my $nt = $node->node_type;
2247     if ($nt == 1) {
2248 wakaba 1.8 my $node_ns = $node->namespace_uri;
2249     $node_ns = '' unless defined $node_ns;
2250     my $node_ln = $node->manakai_local_name;
2251 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
2252 wakaba 1.8 unless ($node_ns eq $HTML_NS and $node_ln eq 'col') {
2253 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2254 wakaba 1.1 }
2255 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
2256     unshift @nodes, @$sib;
2257 wakaba 1.4 push @$new_todos, @$ch;
2258 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
2259     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2260 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
2261 wakaba 1.1 }
2262     } elsif ($nt == 5) {
2263     unshift @nodes, @{$node->child_nodes};
2264     }
2265     }
2266 wakaba 1.4 return ($new_todos);
2267 wakaba 1.1 },
2268     };
2269    
2270     $Element->{$HTML_NS}->{col} = {
2271 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2272     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2273     }),
2274 wakaba 1.1 checker => $HTMLEmptyChecker,
2275     };
2276    
2277     $Element->{$HTML_NS}->{tbody} = {
2278 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2279 wakaba 1.1 checker => sub {
2280 wakaba 1.4 my ($self, $todo) = @_;
2281     my $el = $todo->{node};
2282     my $new_todos = [];
2283 wakaba 1.1 my @nodes = (@{$el->child_nodes});
2284    
2285     my $has_tr;
2286     while (@nodes) {
2287     my $node = shift @nodes;
2288 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2289    
2290 wakaba 1.1 my $nt = $node->node_type;
2291     if ($nt == 1) {
2292 wakaba 1.8 my $node_ns = $node->namespace_uri;
2293     $node_ns = '' unless defined $node_ns;
2294     my $node_ln = $node->manakai_local_name;
2295 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
2296 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2297 wakaba 1.1 $has_tr = 1;
2298     } else {
2299 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2300 wakaba 1.1 }
2301 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
2302     unshift @nodes, @$sib;
2303 wakaba 1.4 push @$new_todos, @$ch;
2304 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
2305     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2306 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
2307 wakaba 1.1 }
2308     } elsif ($nt == 5) {
2309     unshift @nodes, @{$node->child_nodes};
2310     }
2311     }
2312     unless ($has_tr) {
2313 wakaba 1.3 $self->{onerror}->(node => $el, type => 'child element missing:tr');
2314 wakaba 1.1 }
2315 wakaba 1.4 return ($new_todos);
2316 wakaba 1.1 },
2317     };
2318    
2319     $Element->{$HTML_NS}->{thead} = {
2320 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2321 wakaba 1.1 checker => $Element->{$HTML_NS}->{tbody},
2322     };
2323    
2324     $Element->{$HTML_NS}->{tfoot} = {
2325 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2326 wakaba 1.1 checker => $Element->{$HTML_NS}->{tbody},
2327     };
2328    
2329     $Element->{$HTML_NS}->{tr} = {
2330 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2331 wakaba 1.1 checker => sub {
2332 wakaba 1.4 my ($self, $todo) = @_;
2333     my $el = $todo->{node};
2334     my $new_todos = [];
2335 wakaba 1.1 my @nodes = (@{$el->child_nodes});
2336    
2337     my $has_td;
2338     while (@nodes) {
2339     my $node = shift @nodes;
2340 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2341    
2342 wakaba 1.1 my $nt = $node->node_type;
2343     if ($nt == 1) {
2344 wakaba 1.8 my $node_ns = $node->namespace_uri;
2345     $node_ns = '' unless defined $node_ns;
2346     my $node_ln = $node->manakai_local_name;
2347 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
2348 wakaba 1.8 if ($node_ns eq $HTML_NS and ($node_ln eq 'td' or $node_ln eq 'th')) {
2349 wakaba 1.1 $has_td = 1;
2350     } else {
2351 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2352 wakaba 1.1 }
2353 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
2354     unshift @nodes, @$sib;
2355 wakaba 1.4 push @$new_todos, @$ch;
2356 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
2357     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2358 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
2359 wakaba 1.1 }
2360     } elsif ($nt == 5) {
2361     unshift @nodes, @{$node->child_nodes};
2362     }
2363     }
2364     unless ($has_td) {
2365 wakaba 1.3 $self->{onerror}->(node => $el, type => 'child element missing:td|th');
2366 wakaba 1.1 }
2367 wakaba 1.4 return ($new_todos);
2368 wakaba 1.1 },
2369     };
2370    
2371     $Element->{$HTML_NS}->{td} = {
2372 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2373     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2374     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2375     }),
2376 wakaba 1.1 checker => $HTMLBlockOrInlineChecker,
2377     };
2378    
2379     $Element->{$HTML_NS}->{th} = {
2380 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2381     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2382     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2383     scope => $GetHTMLEnumeratedAttrChecker
2384     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
2385     }),
2386 wakaba 1.1 checker => $HTMLBlockOrInlineChecker,
2387     };
2388    
2389 wakaba 1.12 ## TODO: table model error checking
2390    
2391 wakaba 1.1 ## TODO: forms
2392    
2393 wakaba 1.2 $Element->{$HTML_NS}->{script} = {
2394 wakaba 1.15 attrs_checker => $GetHTMLAttrsChecker->({
2395     src => $HTMLURIAttrChecker,
2396     defer => $GetHTMLBooleanAttrChecker->('defer'), ## TODO: if src ## ISSUE: no MUST NOT
2397     async => $GetHTMLBooleanAttrChecker->('async'), ## TODO: if src ## ISSUE: no MUST NOT
2398     type => $HTMLIMTAttrChecker,
2399     }),
2400 wakaba 1.2 checker => sub {
2401 wakaba 1.4 my ($self, $todo) = @_;
2402 wakaba 1.2
2403 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2404     return $HTMLEmptyChecker->($self, $todo);
2405 wakaba 1.2 } else {
2406     ## NOTE: No content model conformance in HTML5 spec.
2407 wakaba 1.4 return $AnyChecker->($self, $todo);
2408 wakaba 1.2 }
2409     },
2410     };
2411    
2412     ## NOTE: When script is disabled.
2413     $Element->{$HTML_NS}->{noscript} = {
2414 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2415 wakaba 1.2 checker => sub {
2416 wakaba 1.4 my ($self, $todo) = @_;
2417 wakaba 1.1
2418 wakaba 1.2 my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}});
2419 wakaba 1.4 my ($sib, $ch) = $HTMLBlockOrInlineChecker->($self, $todo);
2420 wakaba 1.2 push @$sib, $end;
2421     return ($sib, $ch);
2422     },
2423     };
2424 wakaba 1.1
2425     $Element->{$HTML_NS}->{'event-source'} = {
2426 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2427     src => $HTMLURIAttrChecker,
2428     }),
2429 wakaba 1.1 checker => $HTMLEmptyChecker,
2430     };
2431    
2432     $Element->{$HTML_NS}->{details} = {
2433 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2434     open => $GetHTMLBooleanAttrChecker->('open'),
2435     }),
2436 wakaba 1.6 checker => sub {
2437     my ($self, $todo) = @_;
2438    
2439     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2440     my ($sib, $ch)
2441     = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
2442     ->($self, $todo);
2443     push @$sib, $end;
2444     return ($sib, $ch);
2445     },
2446 wakaba 1.1 };
2447    
2448     $Element->{$HTML_NS}->{datagrid} = {
2449 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2450     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2451     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
2452     }),
2453 wakaba 1.6 checker => sub {
2454     my ($self, $todo) = @_;
2455    
2456     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2457     my ($sib, $ch) = $HTMLBlockChecker->($self, $todo);
2458     push @$sib, $end;
2459     return ($sib, $ch);
2460     },
2461 wakaba 1.1 };
2462    
2463     $Element->{$HTML_NS}->{command} = {
2464 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
2465 wakaba 1.1 checker => $HTMLEmptyChecker,
2466     };
2467    
2468     $Element->{$HTML_NS}->{menu} = {
2469 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
2470 wakaba 1.1 checker => sub {
2471 wakaba 1.4 my ($self, $todo) = @_;
2472     my $el = $todo->{node};
2473     my $new_todos = [];
2474 wakaba 1.1 my @nodes = (@{$el->child_nodes});
2475    
2476     my $content = 'li or inline';
2477     while (@nodes) {
2478     my $node = shift @nodes;
2479 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2480    
2481 wakaba 1.1 my $nt = $node->node_type;
2482     if ($nt == 1) {
2483 wakaba 1.2 my $node_ns = $node->namespace_uri;
2484     $node_ns = '' unless defined $node_ns;
2485     my $node_ln = $node->manakai_local_name;
2486 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
2487 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'li') {
2488 wakaba 1.1 if ($content eq 'inline') {
2489 wakaba 1.6 $not_allowed = 1;
2490 wakaba 1.1 } elsif ($content eq 'li or inline') {
2491     $content = 'li';
2492     }
2493     } else {
2494 wakaba 1.7 if ($HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
2495     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln}) {
2496     $content = 'inline';
2497     } else {
2498 wakaba 1.6 $not_allowed = 1;
2499 wakaba 1.7 }
2500 wakaba 1.1 }
2501 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
2502     if $not_allowed;
2503 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
2504     unshift @nodes, @$sib;
2505 wakaba 1.4 push @$new_todos, @$ch;
2506 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
2507     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2508     if ($content eq 'li') {
2509 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
2510 wakaba 1.1 } elsif ($content eq 'li or inline') {
2511     $content = 'inline';
2512     }
2513     }
2514     } elsif ($nt == 5) {
2515     unshift @nodes, @{$node->child_nodes};
2516     }
2517     }
2518 wakaba 1.4
2519     for (@$new_todos) {
2520     $_->{inline} = 1;
2521     }
2522     return ($new_todos);
2523 wakaba 1.1 },
2524     };
2525    
2526 wakaba 1.6 $Element->{$HTML_NS}->{legend} = {
2527 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2528 wakaba 1.6 checker => sub {
2529     my ($self, $todo) = @_;
2530    
2531     my $parent = $todo->{node}->manakai_parent_element;
2532     if (defined $parent) {
2533     my $nsuri = $parent->namespace_uri;
2534     $nsuri = '' unless defined $nsuri;
2535     my $ln = $parent->manakai_local_name;
2536     if ($nsuri eq $HTML_NS and $ln eq 'figure') {
2537     return $HTMLInlineChecker->($self, $todo);
2538     } else {
2539     return $HTMLSignificantStrictlyInlineChecker->($self, $todo);
2540     }
2541     } else {
2542     return $HTMLInlineChecker->($self, $todo);
2543     }
2544    
2545     ## ISSUE: Content model is defined only for fieldset/legend,
2546     ## details/legend, and figure/legend.
2547     },
2548     };
2549 wakaba 1.1
2550     $Element->{$HTML_NS}->{div} = {
2551 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2552 wakaba 1.2 checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
2553 wakaba 1.1 };
2554    
2555     $Element->{$HTML_NS}->{font} = {
2556 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
2557 wakaba 1.1 checker => $HTMLTransparentChecker,
2558     };
2559    
2560 wakaba 1.2 sub new ($) {
2561     return bless {}, shift;
2562     } # new
2563    
2564 wakaba 1.1 sub check_element ($$$) {
2565     my ($self, $el, $onerror) = @_;
2566    
2567 wakaba 1.2 $self->{minuses} = {};
2568     $self->{onerror} = $onerror;
2569 wakaba 1.10 $self->{id} = {};
2570 wakaba 1.17 $self->{usemap} = [];
2571     $self->{map} = {};
2572 wakaba 1.2
2573 wakaba 1.4 my @todo = ({type => 'element', node => $el});
2574     while (@todo) {
2575     my $todo = shift @todo;
2576     if ($todo->{type} eq 'element') {
2577 wakaba 1.13 my $prefix = $todo->{node}->prefix;
2578     if (defined $prefix and $prefix eq 'xmlns') {
2579     $self->{onerror}
2580     ->(node => $todo->{node},
2581     type => 'NC:Reserved Prefixes and Namespace Names:<xmlns:>');
2582     }
2583 wakaba 1.4 my $nsuri = $todo->{node}->namespace_uri;
2584     $nsuri = '' unless defined $nsuri;
2585     my $ln = $todo->{node}->manakai_local_name;
2586     my $eldef = $Element->{$nsuri}->{$ln} ||
2587     $Element->{$nsuri}->{''} ||
2588     $ElementDefault;
2589 wakaba 1.9 $eldef->{attrs_checker}->($self, $todo);
2590 wakaba 1.4 my ($new_todos) = $eldef->{checker}->($self, $todo);
2591 wakaba 1.14 unshift @todo, @$new_todos;
2592 wakaba 1.9 } elsif ($todo->{type} eq 'element-attributes') {
2593 wakaba 1.13 my $prefix = $todo->{node}->prefix;
2594     if (defined $prefix and $prefix eq 'xmlns') {
2595     $self->{onerror}
2596     ->(node => $todo->{node},
2597     type => 'NC:Reserved Prefixes and Namespace Names:<xmlns:>');
2598     }
2599 wakaba 1.9 my $nsuri = $todo->{node}->namespace_uri;
2600     $nsuri = '' unless defined $nsuri;
2601     my $ln = $todo->{node}->manakai_local_name;
2602     my $eldef = $Element->{$nsuri}->{$ln} ||
2603     $Element->{$nsuri}->{''} ||
2604     $ElementDefault;
2605     $eldef->{attrs_checker}->($self, $todo);
2606 wakaba 1.4 } elsif ($todo->{type} eq 'plus') {
2607     $self->_remove_minuses ($todo);
2608     }
2609 wakaba 1.1 }
2610 wakaba 1.17
2611     for (@{$self->{usemap}}) {
2612     unless ($self->{map}->{$_->[0]}) {
2613     $self->{onerror}->(node => $_->[1], type => 'no referenced map');
2614     }
2615     }
2616    
2617     delete $self->{minuses};
2618     delete $self->{onerror};
2619     delete $self->{id};
2620     delete $self->{usemap};
2621     delete $self->{map};
2622 wakaba 1.1 } # check_element
2623    
2624 wakaba 1.2 sub _add_minuses ($@) {
2625     my $self = shift;
2626     my $r = {};
2627     for my $list (@_) {
2628     for my $ns (keys %$list) {
2629     for my $ln (keys %{$list->{$ns}}) {
2630     unless ($self->{minuses}->{$ns}->{$ln}) {
2631     $self->{minuses}->{$ns}->{$ln} = 1;
2632     $r->{$ns}->{$ln} = 1;
2633     }
2634     }
2635     }
2636     }
2637 wakaba 1.4 return {type => 'plus', list => $r};
2638 wakaba 1.2 } # _add_minuses
2639    
2640     sub _remove_minuses ($$) {
2641 wakaba 1.4 my ($self, $todo) = @_;
2642     for my $ns (keys %{$todo->{list}}) {
2643     for my $ln (keys %{$todo->{list}->{$ns}}) {
2644     delete $self->{minuses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
2645 wakaba 1.2 }
2646     }
2647     1;
2648     } # _remove_minuses
2649    
2650     sub _check_get_children ($$) {
2651     my ($self, $node) = @_;
2652 wakaba 1.4 my $new_todos = [];
2653 wakaba 1.2 my $sib = [];
2654     TP: {
2655     my $node_ns = $node->namespace_uri;
2656     $node_ns = '' unless defined $node_ns;
2657     my $node_ln = $node->manakai_local_name;
2658     if ($node_ns eq $HTML_NS) {
2659     if ($node_ln eq 'noscript') {
2660     my $end = $self->_add_minuses ({$HTML_NS, {noscript => 1}});
2661     push @$sib, $end;
2662     }
2663     }
2664 wakaba 1.7 if ($HTMLTransparentElements->{$node_ns}->{$node_ln}) {
2665     unshift @$sib, @{$node->child_nodes};
2666 wakaba 1.9 push @$new_todos, {type => 'element-attributes', node => $node};
2667 wakaba 1.7 last TP;
2668 wakaba 1.2 }
2669 wakaba 1.8 if ($node_ns eq $HTML_NS and ($node_ln eq 'video' or $node_ln eq 'audio')) {
2670 wakaba 1.2 if ($node->has_attribute_ns (undef, 'src')) {
2671     unshift @$sib, @{$node->child_nodes};
2672 wakaba 1.9 push @$new_todos, {type => 'element-attributes', node => $node};
2673 wakaba 1.2 last TP;
2674     } else {
2675     my @cn = @{$node->child_nodes};
2676     CN: while (@cn) {
2677     my $cn = shift @cn;
2678     my $cnt = $cn->node_type;
2679     if ($cnt == 1) {
2680 wakaba 1.8 my $cn_nsuri = $cn->namespace_uri;
2681     $cn_nsuri = '' unless defined $cn_nsuri;
2682     if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'source') {
2683 wakaba 1.2 #
2684     } else {
2685     last CN;
2686     }
2687     } elsif ($cnt == 3 or $cnt == 4) {
2688     if ($cn->data =~ /[^\x09-\x0D\x20]/) {
2689     last CN;
2690     }
2691     }
2692     } # CN
2693     unshift @$sib, @cn;
2694     }
2695     }
2696 wakaba 1.4 push @$new_todos, {type => 'element', node => $node};
2697 wakaba 1.2 } # TP
2698 wakaba 1.4 return ($sib, $new_todos);
2699 wakaba 1.2 } # _check_get_children
2700    
2701 wakaba 1.1 1;
2702 wakaba 1.18 # $Date: 2007/05/20 11:12:25 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24