/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.32 - (hide annotations) (download)
Mon Jun 25 12:39:11 2007 UTC (17 years, 4 months ago) by wakaba
Branch: MAIN
Changes since 1.31: +121 -12 lines
++ whatpm/t/ChangeLog	25 Jun 2007 12:31:07 -0000
	* content-model-2.dat: Tests for |<img ismap>| context
	are added.  Tests for |<command>|'s attributes are added.
	Tests for |<menu>|'s attributes are added.  Tests for |contextmenu|
	are added.

	* content-model-1.dat: Tests for |<datagrid>| contents
	are added.

2007-06-25  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	25 Jun 2007 12:31:24 -0000
	* ContentChecker.pm: |<img ismap>| not in |<a></a>|
	is now erred.  |<datalist>| is implemented.
	Attribute checker for |<command>| and |<menu>| are
	added.  Support for |contextmenu| global attribute
	is added.

2007-06-25  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3    
4 wakaba 1.18 require Whatpm::URIChecker;
5    
6 wakaba 1.13 ## ISSUE: How XML and XML Namespaces conformance can (or cannot)
7     ## be applied to an in-memory representation (i.e. DOM)?
8    
9 wakaba 1.9 my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
10     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
11    
12     my $AttrChecker = {
13     $XML_NS => {
14 wakaba 1.13 space => sub {
15     my ($self, $attr) = @_;
16     my $value = $attr->value;
17     if ($value eq 'default' or $value eq 'preserve') {
18     #
19     } else {
20     ## NOTE: An XML "error"
21     $self->{onerror}->(node => $attr,
22     type => 'XML error:invalid xml:space value');
23     }
24     },
25     lang => sub {
26     ## NOTE: "The values of the attribute are language identifiers
27     ## as defined by [IETF RFC 3066], Tags for the Identification
28     ## of Languages, or its successor; in addition, the empty string
29     ## may be specified." ("may" in lower case)
30     ## TODO: xml:lang MUST NOT in HTML document
31     },
32     base => sub {
33     my ($self, $attr) = @_;
34     my $value = $attr->value;
35     if ($value =~ /[^\x{0000}-\x{10FFFF}]/) { ## ISSUE: Should we disallow noncharacters?
36     $self->{onerror}->(node => $attr,
37     type => 'syntax error');
38     }
39 wakaba 1.18 ## NOTE: Conformance to URI standard is not checked since there is
40     ## no author requirement on conformance in the XML Base specification.
41 wakaba 1.13 },
42     id => sub {
43     my ($self, $attr) = @_;
44     my $value = $attr->value;
45     $value =~ s/[\x09\x0A\x0D\x20]+/ /g;
46     $value =~ s/^\x20//;
47     $value =~ s/\x20$//;
48     ## TODO: NCName in XML 1.0 or 1.1
49     ## TODO: declared type is ID?
50     if ($self->{id}->{$value}) {
51     $self->{onerror}->(node => $attr, type => 'xml:id error:duplicate ID');
52     } else {
53     $self->{id}->{$value} = 1;
54     }
55     },
56 wakaba 1.9 },
57     $XMLNS_NS => {
58 wakaba 1.13 '' => sub {
59     my ($self, $attr) = @_;
60     my $ln = $attr->manakai_local_name;
61     my $value = $attr->value;
62     if ($value eq $XML_NS and $ln ne 'xml') {
63     $self->{onerror}
64     ->(node => $attr,
65     type => 'NC:Reserved Prefixes and Namespace Names:=xml');
66     } elsif ($value eq $XMLNS_NS) {
67     $self->{onerror}
68     ->(node => $attr,
69     type => 'NC:Reserved Prefixes and Namespace Names:=xmlns');
70     }
71     if ($ln eq 'xml' and $value ne $XML_NS) {
72     $self->{onerror}
73     ->(node => $attr,
74     type => 'NC:Reserved Prefixes and Namespace Names:xmlns:xml=');
75     } elsif ($ln eq 'xmlns') {
76     $self->{onerror}
77     ->(node => $attr,
78     type => 'NC:Reserved Prefixes and Namespace Names:xmlns:xmlns=');
79     }
80     ## TODO: If XML 1.0 and empty
81     },
82     xmlns => sub {
83     my ($self, $attr) = @_;
84     ## TODO: In XML 1.0, URI reference [RFC 3986] or an empty string
85     ## TODO: In XML 1.1, IRI reference [RFC 3987] or an empty string
86 wakaba 1.18 ## TODO: relative references are deprecated
87 wakaba 1.13 my $value = $attr->value;
88     if ($value eq $XML_NS) {
89     $self->{onerror}
90     ->(node => $attr,
91     type => 'NC:Reserved Prefixes and Namespace Names:=xml');
92     } elsif ($value eq $XMLNS_NS) {
93     $self->{onerror}
94     ->(node => $attr,
95     type => 'NC:Reserved Prefixes and Namespace Names:=xmlns');
96     }
97     },
98 wakaba 1.9 },
99     };
100    
101 wakaba 1.14 ## ISSUE: Should we really allow these attributes?
102 wakaba 1.13 $AttrChecker->{''}->{'xml:space'} = $AttrChecker->{$XML_NS}->{space};
103     $AttrChecker->{''}->{'xml:lang'} = $AttrChecker->{$XML_NS}->{lang};
104     $AttrChecker->{''}->{'xml:base'} = $AttrChecker->{$XML_NS}->{base};
105     $AttrChecker->{''}->{'xml:id'} = $AttrChecker->{$XML_NS}->{id};
106    
107 wakaba 1.3 ## ANY
108     my $AnyChecker = sub {
109 wakaba 1.4 my ($self, $todo) = @_;
110     my $el = $todo->{node};
111     my $new_todos = [];
112 wakaba 1.3 my @nodes = (@{$el->child_nodes});
113     while (@nodes) {
114     my $node = shift @nodes;
115     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
116    
117     my $nt = $node->node_type;
118     if ($nt == 1) {
119     my $node_ns = $node->namespace_uri;
120     $node_ns = '' unless defined $node_ns;
121     my $node_ln = $node->manakai_local_name;
122     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
123     $self->{onerror}->(node => $node, type => 'element not allowed');
124     }
125 wakaba 1.4 push @$new_todos, {type => 'element', node => $node};
126 wakaba 1.3 } elsif ($nt == 5) {
127     unshift @nodes, @{$node->child_nodes};
128     }
129     }
130 wakaba 1.4 return ($new_todos);
131 wakaba 1.3 }; # $AnyChecker
132    
133 wakaba 1.1 my $ElementDefault = {
134     checker => sub {
135 wakaba 1.4 my ($self, $todo) = @_;
136     $self->{onerror}->(node => $todo->{node}, type => 'element not supported');
137     return $AnyChecker->($self, $todo);
138 wakaba 1.1 },
139 wakaba 1.9 attrs_checker => sub {
140     my ($self, $todo) = @_;
141     for my $attr (@{$todo->{node}->attributes}) {
142     my $attr_ns = $attr->namespace_uri;
143     $attr_ns = '' unless defined $attr_ns;
144     my $attr_ln = $attr->manakai_local_name;
145     my $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
146     || $AttrChecker->{$attr_ns}->{''};
147     if ($checker) {
148     $checker->($self, $attr);
149 wakaba 1.17 } else {
150     $self->{onerror}->(node => $attr, type => 'attribute not supported');
151 wakaba 1.9 }
152     }
153     },
154 wakaba 1.1 };
155    
156     my $Element = {};
157    
158     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
159    
160 wakaba 1.7 my $HTMLMetadataElements = {
161     $HTML_NS => {
162 wakaba 1.29 qw/link 1 meta 1 style 1 script 1 event-source 1 command 1 base 1 title 1
163     noscript 1
164     /,
165 wakaba 1.7 },
166     };
167 wakaba 1.1
168 wakaba 1.2 my $HTMLSectioningElements = {
169     $HTML_NS => {qw/body 1 section 1 nav 1 article 1 blockquote 1 aside 1/},
170     };
171 wakaba 1.1
172 wakaba 1.7 my $HTMLBlockLevelElements = {
173     $HTML_NS => {
174     qw/
175     section 1 nav 1 article 1 blockquote 1 aside 1
176     h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1
177     address 1 p 1 hr 1 dialog 1 pre 1 ol 1 ul 1 dl 1
178     ins 1 del 1 figure 1 map 1 table 1 script 1 noscript 1
179     event-source 1 details 1 datagrid 1 menu 1 div 1 font 1
180     /,
181     },
182     };
183    
184     my $HTMLStrictlyInlineLevelElements = {
185     $HTML_NS => {
186     qw/
187     br 1 a 1 q 1 cite 1 em 1 strong 1 small 1 m 1 dfn 1 abbr 1
188     time 1 meter 1 progress 1 code 1 var 1 samp 1 kbd 1
189     sub 1 sup 1 span 1 i 1 b 1 bdo 1 ins 1 del 1 img 1
190     iframe 1 embed 1 object 1 video 1 audio 1 canvas 1 area 1
191     script 1 noscript 1 event-source 1 command 1 font 1
192     /,
193     },
194     };
195    
196     my $HTMLStructuredInlineLevelElements = {
197     $HTML_NS => {qw/blockquote 1 pre 1 ol 1 ul 1 dl 1 table 1 menu 1/},
198     };
199 wakaba 1.1
200 wakaba 1.6 my $HTMLInteractiveElements = {
201     $HTML_NS => {a => 1, details => 1, datagrid => 1},
202     };
203     ## NOTE: |html:a| and |html:datagrid| are not allowed as a descendant
204     ## of interactive elements
205 wakaba 1.1
206 wakaba 1.7 my $HTMLTransparentElements = {
207     $HTML_NS => {qw/ins 1 font 1 noscript 1/},
208 wakaba 1.29 ## NOTE: |html:noscript| is transparent if scripting is disabled
209     ## and not in |head|.
210 wakaba 1.7 };
211    
212     #my $HTMLSemiTransparentElements = {
213     # $HTML_NS => {qw/video 1 audio 1/},
214     #};
215    
216     my $HTMLEmbededElements = {
217     $HTML_NS => {qw/img 1 iframe 1 embed 1 object 1 video 1 audio 1 canvas 1/},
218     };
219 wakaba 1.1
220     ## Empty
221     my $HTMLEmptyChecker = sub {
222 wakaba 1.4 my ($self, $todo) = @_;
223     my $el = $todo->{node};
224     my $new_todos = [];
225 wakaba 1.1 my @nodes = (@{$el->child_nodes});
226    
227     while (@nodes) {
228     my $node = shift @nodes;
229 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
230    
231 wakaba 1.1 my $nt = $node->node_type;
232     if ($nt == 1) {
233 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
234     $self->{onerror}->(node => $node, type => 'element not allowed');
235 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
236 wakaba 1.2 unshift @nodes, @$sib;
237 wakaba 1.4 push @$new_todos, @$ch;
238 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
239 wakaba 1.3 if ($node->data =~ /[^\x09-\x0D\x20]/) {
240     $self->{onerror}->(node => $node, type => 'character not allowed');
241     }
242 wakaba 1.1 } elsif ($nt == 5) {
243     unshift @nodes, @{$node->child_nodes};
244     }
245     }
246 wakaba 1.4 return ($new_todos);
247 wakaba 1.1 };
248    
249     ## Text
250     my $HTMLTextChecker = sub {
251 wakaba 1.4 my ($self, $todo) = @_;
252     my $el = $todo->{node};
253     my $new_todos = [];
254 wakaba 1.1 my @nodes = (@{$el->child_nodes});
255    
256     while (@nodes) {
257     my $node = shift @nodes;
258 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
259    
260 wakaba 1.1 my $nt = $node->node_type;
261     if ($nt == 1) {
262 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
263     $self->{onerror}->(node => $node, type => 'element not allowed');
264 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
265 wakaba 1.2 unshift @nodes, @$sib;
266 wakaba 1.4 push @$new_todos, @$ch;
267 wakaba 1.1 } elsif ($nt == 5) {
268     unshift @nodes, @{$node->child_nodes};
269     }
270     }
271 wakaba 1.4 return ($new_todos);
272 wakaba 1.1 };
273    
274     ## Zero or more |html:style| elements,
275     ## followed by zero or more block-level elements
276     my $HTMLStylableBlockChecker = sub {
277 wakaba 1.4 my ($self, $todo) = @_;
278     my $el = $todo->{node};
279     my $new_todos = [];
280 wakaba 1.1 my @nodes = (@{$el->child_nodes});
281    
282     my $has_non_style;
283     while (@nodes) {
284     my $node = shift @nodes;
285 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
286    
287 wakaba 1.1 my $nt = $node->node_type;
288     if ($nt == 1) {
289 wakaba 1.2 my $node_ns = $node->namespace_uri;
290     $node_ns = '' unless defined $node_ns;
291     my $node_ln = $node->manakai_local_name;
292 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
293 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'style') {
294 wakaba 1.28 $not_allowed = 1 if $has_non_style or
295     not $node->has_attribute_ns (undef, 'scoped');
296 wakaba 1.7 } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
297     $has_non_style = 1;
298 wakaba 1.1 } else {
299     $has_non_style = 1;
300 wakaba 1.7 $not_allowed = 1;
301 wakaba 1.1 }
302 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
303     if $not_allowed;
304 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
305 wakaba 1.2 unshift @nodes, @$sib;
306 wakaba 1.4 push @$new_todos, @$ch;
307 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
308     if ($node->data =~ /[^\x09-\x0D\x20]/) {
309 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
310 wakaba 1.1 }
311     } elsif ($nt == 5) {
312     unshift @nodes, @{$node->child_nodes};
313     }
314     }
315 wakaba 1.4 return ($new_todos);
316 wakaba 1.1 }; # $HTMLStylableBlockChecker
317    
318     ## Zero or more block-level elements
319     my $HTMLBlockChecker = sub {
320 wakaba 1.4 my ($self, $todo) = @_;
321     my $el = $todo->{node};
322     my $new_todos = [];
323 wakaba 1.1 my @nodes = (@{$el->child_nodes});
324    
325     while (@nodes) {
326     my $node = shift @nodes;
327 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
328    
329 wakaba 1.1 my $nt = $node->node_type;
330     if ($nt == 1) {
331 wakaba 1.2 my $node_ns = $node->namespace_uri;
332     $node_ns = '' unless defined $node_ns;
333     my $node_ln = $node->manakai_local_name;
334 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
335 wakaba 1.7 $not_allowed = 1
336     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
337 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
338     if $not_allowed;
339 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
340 wakaba 1.2 unshift @nodes, @$sib;
341 wakaba 1.4 push @$new_todos, @$ch;
342 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
343     if ($node->data =~ /[^\x09-\x0D\x20]/) {
344 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
345 wakaba 1.1 }
346     } elsif ($nt == 5) {
347     unshift @nodes, @{$node->child_nodes};
348     }
349     }
350 wakaba 1.4 return ($new_todos);
351 wakaba 1.1 }; # $HTMLBlockChecker
352    
353     ## Inline-level content
354     my $HTMLInlineChecker = sub {
355 wakaba 1.4 my ($self, $todo) = @_;
356     my $el = $todo->{node};
357     my $new_todos = [];
358 wakaba 1.1 my @nodes = (@{$el->child_nodes});
359    
360     while (@nodes) {
361     my $node = shift @nodes;
362 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
363    
364 wakaba 1.1 my $nt = $node->node_type;
365     if ($nt == 1) {
366 wakaba 1.2 my $node_ns = $node->namespace_uri;
367     $node_ns = '' unless defined $node_ns;
368     my $node_ln = $node->manakai_local_name;
369 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
370 wakaba 1.7 $not_allowed = 1
371     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
372     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
373 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
374     if $not_allowed;
375 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
376 wakaba 1.2 unshift @nodes, @$sib;
377 wakaba 1.4 push @$new_todos, @$ch;
378 wakaba 1.1 } elsif ($nt == 5) {
379     unshift @nodes, @{$node->child_nodes};
380     }
381     }
382 wakaba 1.4
383     for (@$new_todos) {
384     $_->{inline} = 1;
385     }
386     return ($new_todos);
387     }; # $HTMLInlineChecker
388 wakaba 1.1
389     my $HTMLSignificantInlineChecker = $HTMLInlineChecker;
390     ## TODO: check significant content
391    
392     ## Strictly inline-level content
393     my $HTMLStrictlyInlineChecker = sub {
394 wakaba 1.4 my ($self, $todo) = @_;
395     my $el = $todo->{node};
396     my $new_todos = [];
397 wakaba 1.1 my @nodes = (@{$el->child_nodes});
398    
399     while (@nodes) {
400     my $node = shift @nodes;
401 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
402    
403 wakaba 1.1 my $nt = $node->node_type;
404     if ($nt == 1) {
405 wakaba 1.2 my $node_ns = $node->namespace_uri;
406     $node_ns = '' unless defined $node_ns;
407     my $node_ln = $node->manakai_local_name;
408 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
409 wakaba 1.7 $not_allowed = 1
410     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln};
411 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
412     if $not_allowed;
413 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
414 wakaba 1.2 unshift @nodes, @$sib;
415 wakaba 1.4 push @$new_todos, @$ch;
416 wakaba 1.1 } elsif ($nt == 5) {
417     unshift @nodes, @{$node->child_nodes};
418     }
419     }
420 wakaba 1.4
421     for (@$new_todos) {
422     $_->{inline} = 1;
423     $_->{strictly_inline} = 1;
424     }
425     return ($new_todos);
426 wakaba 1.1 }; # $HTMLStrictlyInlineChecker
427    
428     my $HTMLSignificantStrictlyInlineChecker = $HTMLStrictlyInlineChecker;
429     ## TODO: check significant content
430    
431 wakaba 1.4 ## Inline-level or strictly inline-kevek content
432     my $HTMLInlineOrStrictlyInlineChecker = sub {
433     my ($self, $todo) = @_;
434     my $el = $todo->{node};
435     my $new_todos = [];
436     my @nodes = (@{$el->child_nodes});
437    
438     while (@nodes) {
439     my $node = shift @nodes;
440     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
441    
442     my $nt = $node->node_type;
443     if ($nt == 1) {
444     my $node_ns = $node->namespace_uri;
445     $node_ns = '' unless defined $node_ns;
446     my $node_ln = $node->manakai_local_name;
447 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
448 wakaba 1.7 if ($todo->{strictly_inline}) {
449     $not_allowed = 1
450     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln};
451     } else {
452     $not_allowed = 1
453     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
454     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
455     }
456 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
457     if $not_allowed;
458 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
459 wakaba 1.4 unshift @nodes, @$sib;
460     push @$new_todos, @$ch;
461     } elsif ($nt == 5) {
462     unshift @nodes, @{$node->child_nodes};
463     }
464     }
465    
466     for (@$new_todos) {
467     $_->{inline} = 1;
468     $_->{strictly_inline} = 1;
469     }
470     return ($new_todos);
471     }; # $HTMLInlineOrStrictlyInlineChecker
472    
473 wakaba 1.6 my $HTMLSignificantInlineOrStrictlyInlineChecker
474     = $HTMLInlineOrStrictlyInlineChecker;
475     ## TODO: check significant content
476    
477 wakaba 1.1 my $HTMLBlockOrInlineChecker = sub {
478 wakaba 1.4 my ($self, $todo) = @_;
479     my $el = $todo->{node};
480     my $new_todos = [];
481 wakaba 1.1 my @nodes = (@{$el->child_nodes});
482    
483     my $content = 'block-or-inline'; # or 'block' or 'inline'
484     my @block_not_inline;
485     while (@nodes) {
486     my $node = shift @nodes;
487 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
488    
489 wakaba 1.1 my $nt = $node->node_type;
490     if ($nt == 1) {
491 wakaba 1.2 my $node_ns = $node->namespace_uri;
492     $node_ns = '' unless defined $node_ns;
493     my $node_ln = $node->manakai_local_name;
494 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
495 wakaba 1.1 if ($content eq 'block') {
496 wakaba 1.7 $not_allowed = 1
497     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
498 wakaba 1.1 } elsif ($content eq 'inline') {
499 wakaba 1.7 $not_allowed = 1
500     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
501     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
502 wakaba 1.1 } else {
503 wakaba 1.7 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
504     my $is_inline
505     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
506     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
507 wakaba 1.1
508 wakaba 1.6 push @block_not_inline, $node
509     if $is_block and not $is_inline and not $not_allowed;
510 wakaba 1.1 unless ($is_block) {
511     $content = 'inline';
512     for (@block_not_inline) {
513 wakaba 1.2 $self->{onerror}->(node => $_, type => 'element not allowed');
514 wakaba 1.1 }
515 wakaba 1.6 $not_allowed = 1 unless $is_inline;
516 wakaba 1.1 }
517     }
518 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
519     if $not_allowed;
520 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
521 wakaba 1.2 unshift @nodes, @$sib;
522 wakaba 1.4 push @$new_todos, @$ch;
523 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
524     if ($node->data =~ /[^\x09-\x0D\x20]/) {
525     if ($content eq 'block') {
526 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
527 wakaba 1.1 } else {
528     $content = 'inline';
529     for (@block_not_inline) {
530 wakaba 1.2 $self->{onerror}->(node => $_, type => 'element not allowed');
531 wakaba 1.1 }
532     }
533     }
534     } elsif ($nt == 5) {
535     unshift @nodes, @{$node->child_nodes};
536     }
537     }
538 wakaba 1.4
539     if ($content eq 'inline') {
540     for (@$new_todos) {
541     $_->{inline} = 1;
542     }
543     }
544     return ($new_todos);
545 wakaba 1.1 };
546    
547 wakaba 1.2 ## Zero or more XXX element, then either block-level or inline-level
548     my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) {
549     my ($elnsuri, $ellname) = @_;
550     return sub {
551 wakaba 1.4 my ($self, $todo) = @_;
552     my $el = $todo->{node};
553     my $new_todos = [];
554 wakaba 1.2 my @nodes = (@{$el->child_nodes});
555    
556     my $has_non_style;
557     my $content = 'block-or-inline'; # or 'block' or 'inline'
558     my @block_not_inline;
559     while (@nodes) {
560     my $node = shift @nodes;
561     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
562    
563     my $nt = $node->node_type;
564     if ($nt == 1) {
565     my $node_ns = $node->namespace_uri;
566     $node_ns = '' unless defined $node_ns;
567     my $node_ln = $node->manakai_local_name;
568 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
569 wakaba 1.8 if ($node_ns eq $elnsuri and $node_ln eq $ellname) {
570 wakaba 1.6 $not_allowed = 1 if $has_non_style;
571 wakaba 1.28 if ($ellname eq 'style' and
572     not $node->has_attribute_ns (undef, 'scoped')) {
573     $not_allowed = 1;
574     }
575 wakaba 1.2 } elsif ($content eq 'block') {
576     $has_non_style = 1;
577 wakaba 1.7 $not_allowed = 1
578     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
579 wakaba 1.2 } elsif ($content eq 'inline') {
580     $has_non_style = 1;
581 wakaba 1.7 $not_allowed = 1
582     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
583     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
584 wakaba 1.2 } else {
585     $has_non_style = 1;
586 wakaba 1.7 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
587     my $is_inline
588     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
589     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
590 wakaba 1.2
591 wakaba 1.6 push @block_not_inline, $node
592     if $is_block and not $is_inline and not $not_allowed;
593 wakaba 1.2 unless ($is_block) {
594     $content = 'inline';
595     for (@block_not_inline) {
596     $self->{onerror}->(node => $_, type => 'element not allowed');
597     }
598 wakaba 1.6 $not_allowed = 1 unless $is_inline;
599 wakaba 1.1 }
600     }
601 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
602     if $not_allowed;
603 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
604 wakaba 1.2 unshift @nodes, @$sib;
605 wakaba 1.4 push @$new_todos, @$ch;
606 wakaba 1.2 } elsif ($nt == 3 or $nt == 4) {
607     if ($node->data =~ /[^\x09-\x0D\x20]/) {
608     $has_non_style = 1;
609     if ($content eq 'block') {
610     $self->{onerror}->(node => $node, type => 'character not allowed');
611     } else {
612     $content = 'inline';
613     for (@block_not_inline) {
614     $self->{onerror}->(node => $_, type => 'element not allowed');
615     }
616 wakaba 1.1 }
617     }
618 wakaba 1.2 } elsif ($nt == 5) {
619     unshift @nodes, @{$node->child_nodes};
620 wakaba 1.1 }
621     }
622 wakaba 1.4
623     if ($content eq 'inline') {
624     for (@$new_todos) {
625     $_->{inline} = 1;
626     }
627     }
628     return ($new_todos);
629 wakaba 1.2 };
630     }; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker
631 wakaba 1.1
632     my $HTMLTransparentChecker = $HTMLBlockOrInlineChecker;
633    
634 wakaba 1.10 my $GetHTMLEnumeratedAttrChecker = sub {
635     my $states = shift; # {value => conforming ? 1 : -1}
636     return sub {
637     my ($self, $attr) = @_;
638     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
639     if ($states->{$value} > 0) {
640     #
641     } elsif ($states->{$value}) {
642     $self->{onerror}->(node => $attr,
643     type => 'non-conforming enumerated attribute value');
644     } else {
645     $self->{onerror}->(node => $attr,
646     type => 'invalid enumerated attribute value');
647     }
648     };
649     }; # $GetHTMLEnumeratedAttrChecker
650 wakaba 1.9
651 wakaba 1.10 my $GetHTMLBooleanAttrChecker = sub {
652     my $local_name = shift;
653     return sub {
654     my ($self, $attr) = @_;
655     my $value = $attr->value;
656     unless ($value eq $local_name or $value eq '') {
657     $self->{onerror}->(node => $attr,
658     type => 'invalid boolean attribute value');
659     }
660     };
661     }; # $GetHTMLBooleanAttrChecker
662 wakaba 1.9
663 wakaba 1.10 my $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker = sub {
664     my ($self, $attr) = @_;
665     my %word;
666     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
667     unless ($word{$word}) {
668     $word{$word} = 1;
669 wakaba 1.9 } else {
670 wakaba 1.10 $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
671 wakaba 1.9 }
672     }
673 wakaba 1.10 }; # $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker
674    
675 wakaba 1.20 ## |rel| attribute (unordered set of space separated tokens,
676     ## whose allowed values are defined by the section on link types)
677     my $HTMLLinkTypesAttrChecker = sub {
678     my ($a_or_area, $self, $attr) = @_;
679     my %word;
680     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
681     unless ($word{$word}) {
682     $word{$word} = 1;
683     } else {
684     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
685     }
686     }
687     ## NOTE: Case sensitive match (since HTML5 spec does not say link
688     ## types are case-insensitive and it says "The value should not
689     ## be confusingly similar to any other defined value (e.g.
690     ## differing only in case).").
691     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
692     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
693     ## values to be used conformingly.
694     require Whatpm::_LinkTypeList;
695     our $LinkType;
696     for my $word (keys %word) {
697     my $def = $LinkType->{$word};
698     if (defined $def) {
699     if ($def->{status} eq 'accepted') {
700     if (defined $def->{effect}->[$a_or_area]) {
701     #
702     } else {
703     $self->{onerror}->(node => $attr,
704     type => 'link type bad context:'.$word);
705     }
706     } elsif ($def->{status} eq 'proposal') {
707     $self->{onerror}->(node => $attr,
708     type => 'proposed link type:'.$word);
709     } else { # rejected or synonym
710     $self->{onerror}->(node => $attr,
711     type => 'non-conforming link type:'.$word);
712     }
713     if ($def->{unique}) {
714     unless ($self->{has_link_type}->{$word}) {
715     $self->{has_link_type}->{$word} = 1;
716     } else {
717     $self->{onerror}->(node => $attr,
718     type => 'link with type not unique:'.$word);
719     }
720     }
721     } else {
722     $self->{onerror}->(node => $attr,
723     type => 'link type not supported:'.$word);
724     }
725     }
726     ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
727     ## says that using both X-Pingback: header field and HTML
728     ## <link rel=pingback> is deprecated and if both appears they
729     ## SHOULD contain exactly the same value.
730     ## ISSUE: Pingback 1.0 specification defines the exact representation
731     ## of its link element, which cannot be tested by the current arch.
732     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
733     ## include any string that matches to the pattern for the rel=pingback link,
734     ## which again inpossible to test.
735     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
736     }; # $HTMLLinkTypesAttrChecker
737    
738 wakaba 1.18 ## URI (or IRI)
739 wakaba 1.11 my $HTMLURIAttrChecker = sub {
740     my ($self, $attr) = @_;
741 wakaba 1.15 ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
742 wakaba 1.18 my $value = $attr->value;
743     Whatpm::URIChecker->check_iri_reference ($value, sub {
744     my %opt = @_;
745 wakaba 1.22 $self->{onerror}->(node => $attr,
746     type => 'URI:'.$opt{level}.':'.
747     (defined $opt{position} ? $opt{position} : '').':'.
748     $opt{type});
749 wakaba 1.18 });
750 wakaba 1.11 }; # $HTMLURIAttrChecker
751    
752 wakaba 1.15 ## A space separated list of one or more URIs (or IRIs)
753     my $HTMLSpaceURIsAttrChecker = sub {
754     my ($self, $attr) = @_;
755 wakaba 1.30 my $i = 0;
756     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
757     Whatpm::URIChecker->check_iri_reference ($value, sub {
758     my %opt = @_;
759     $self->{onerror}->(node => $attr,
760     type => 'URI['.$i.']:'.$opt{level}.':'.
761     (defined $opt{position} ? $opt{position} : '').':'.
762     $opt{type});
763     });
764     $i++;
765     }
766 wakaba 1.15 ## ISSUE: Relative references?
767     ## ISSUE: Leading or trailing white spaces are conformant?
768     ## ISSUE: A sequence of white space characters are conformant?
769     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
770     ## NOTE: Duplication seems not an error.
771     }; # $HTMLSpaceURIsAttrChecker
772    
773 wakaba 1.30 my $HTMLDatetimeAttrChecker = sub {
774     my ($self, $attr) = @_;
775     my $value = $attr->value;
776     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
777     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
778     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
779     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
780     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
781     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
782     if $d < 1 or
783     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
784     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
785     if $M == 2 and $d == 29 and
786     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
787     } else {
788     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
789     }
790     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
791     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
792     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
793     if defined $s and $s > 59;
794     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
795     if $zh > 23;
796     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
797     if $zm > 59;
798     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
799     } else {
800     $self->{onerror}->(node => $attr, type => 'datetime syntax error');
801     }
802     }; # $HTMLDatetimeAttrChecker
803    
804 wakaba 1.11 my $HTMLIntegerAttrChecker = sub {
805     my ($self, $attr) = @_;
806     my $value = $attr->value;
807     unless ($value =~ /\A-?[0-9]+\z/) {
808 wakaba 1.15 $self->{onerror}->(node => $attr, type => 'integer syntax error');
809 wakaba 1.11 }
810     }; # $HTMLIntegerAttrChecker
811    
812 wakaba 1.12 my $GetHTMLNonNegativeIntegerAttrChecker = sub {
813     my $range_check = shift;
814     return sub {
815     my ($self, $attr) = @_;
816     my $value = $attr->value;
817     if ($value =~ /\A[0-9]+\z/) {
818     unless ($range_check->($value + 0)) {
819     $self->{onerror}->(node => $attr, type => 'out of range');
820     }
821     } else {
822 wakaba 1.15 $self->{onerror}->(node => $attr,
823     type => 'non-negative integer syntax error');
824 wakaba 1.12 }
825     };
826     }; # $GetHTMLNonNegativeIntegerAttrChecker
827    
828 wakaba 1.11 my $GetHTMLFloatingPointNumberAttrChecker = sub {
829     my $range_check = shift;
830     return sub {
831     my ($self, $attr) = @_;
832     my $value = $attr->value;
833     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
834     unless ($range_check->($value + 0)) {
835     $self->{onerror}->(node => $attr, type => 'out of range');
836     }
837     } else {
838 wakaba 1.15 $self->{onerror}->(node => $attr,
839     type => 'floating point number syntax error');
840 wakaba 1.11 }
841     };
842     }; # $GetHTMLFloatingPointNumberAttrChecker
843    
844 wakaba 1.15 ## "A valid MIME type, optionally with parameters. [RFC 2046]"
845     ## ISSUE: RFC 2046 does not define syntax of media types.
846     ## ISSUE: The definition of "a valid MIME type" is unknown.
847     ## Syntactical correctness?
848     my $HTMLIMTAttrChecker = sub {
849     my ($self, $attr) = @_;
850     my $value = $attr->value;
851     ## ISSUE: RFC 2045 Content-Type header field allows insertion
852     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
853     ## ISSUE: RFC 2231 extension? Maybe no.
854     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
855     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
856     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
857 wakaba 1.19 if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
858     my @type = ($1, $2);
859     my $param = $3;
860     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
861     if (defined $2) {
862     push @type, $1 => $2;
863     } else {
864     my $n = $1;
865     my $v = $2;
866     $v =~ s/\\(.)/$1/gs;
867     push @type, $n => $v;
868     }
869     }
870     require Whatpm::IMTChecker;
871     Whatpm::IMTChecker->check_imt (sub {
872     my %opt = @_;
873     $self->{onerror}->(node => $attr,
874     type => 'IMT:'.$opt{level}.':'.$opt{type});
875     }, @type);
876     } else {
877 wakaba 1.15 $self->{onerror}->(node => $attr, type => 'IMT syntax error');
878     }
879     }; # $HTMLIMTAttrChecker
880    
881 wakaba 1.17 my $HTMLLanguageTagAttrChecker = sub {
882     my ($self, $attr) = @_;
883     if ($attr->value eq '') {
884     $self->{onerror}->(node => $attr, type => 'language tag syntax error');
885     }
886     ## TODO: RFC 3066 test
887     ## ISSUE: RFC 4646 (3066bis)?
888     }; # $HTMLLanguageTagAttrChecker
889    
890     ## "A valid media query [MQ]"
891     my $HTMLMQAttrChecker = sub {
892     ## ISSUE: What is "a valid media query"?
893     }; # $HTMLMQAttrChecker
894    
895 wakaba 1.16 my $HTMLEventHandlerAttrChecker = sub {
896     ## TODO: MUST contain valid ECMAScript code matching the
897     ## ECMAScript |FunctionBody| production. [ECMA262]
898     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
899     ## ISSUE: Automatic semicolon insertion does not apply?
900     ## ISSUE: Other script languages?
901     }; # $HTMLEventHandlerAttrChecker
902    
903 wakaba 1.17 my $HTMLUsemapAttrChecker = sub {
904     my ($self, $attr) = @_;
905     ## MUST be a valid hashed ID reference to a |map| element
906     my $value = $attr->value;
907     if ($value =~ s/^#//) {
908     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
909     push @{$self->{usemap}}, [$value => $attr];
910     } else {
911     $self->{onerror}->(node => $attr, type => 'hashed idref syntax error');
912     }
913 wakaba 1.27 ## NOTE: Space characters in hashed ID references are conforming.
914 wakaba 1.20 ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
915 wakaba 1.17 }; # $HTMLUsemapAttrChecker
916    
917     my $HTMLTargetAttrChecker = sub {
918     my ($self, $attr) = @_;
919     my $value = $attr->value;
920     if ($value =~ /^_/) {
921     $value = lc $value; ## ISSUE: ASCII case-insentitive?
922     unless ({
923     _self => 1, _parent => 1, _top => 1,
924     }->{$value}) {
925     $self->{onerror}->(node => $attr,
926     type => 'reserved browsing context name');
927     }
928     } else {
929     #$ ISSUE: An empty string is conforming?
930     }
931     }; # $HTMLTargetAttrChecker
932    
933 wakaba 1.10 my $HTMLAttrChecker = {
934     id => sub {
935 wakaba 1.17 ## NOTE: |map| has its own variant of |id=""| checker
936 wakaba 1.10 my ($self, $attr) = @_;
937     my $value = $attr->value;
938 wakaba 1.17 if (length $value > 0) {
939 wakaba 1.10 if ($self->{id}->{$value}) {
940     $self->{onerror}->(node => $attr, type => 'duplicate ID');
941     } else {
942     $self->{id}->{$value} = 1;
943     }
944 wakaba 1.27 if ($value =~ /[\x09-\x0D\x20]/) {
945     $self->{onerror}->(node => $attr, type => 'space in ID');
946     }
947 wakaba 1.17 } else {
948     ## NOTE: MUST contain at least one character
949     $self->{onerror}->(node => $attr, type => 'attribute value is empty');
950 wakaba 1.10 }
951     },
952     title => sub {}, ## NOTE: No conformance creteria
953     lang => sub {
954 wakaba 1.17 ## TODO: RFC 3066 or empty test
955 wakaba 1.10 ## ISSUE: RFC 4646 (3066bis)?
956     ## TODO: HTML vs XHTML
957     },
958     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
959     class => $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker,
960 wakaba 1.32 contextmenu => sub {
961     my ($self, $attr) = @_;
962     my $value = $attr->value;
963     push @{$self->{contextmenu}}, [$value => $attr];
964     ## ISSUE: "The value must be the ID of a menu element in the DOM."
965     ## What is "in the DOM"? A menu Element node that is not part
966     ## of the Document tree is in the DOM? A menu Element node that
967     ## belong to another Document tree is in the DOM?
968     },
969 wakaba 1.10 irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
970 wakaba 1.30 tabindex => $HTMLIntegerAttrChecker,
971 wakaba 1.10 };
972    
973 wakaba 1.16 for (qw/
974     onabort onbeforeunload onblur onchange onclick oncontextmenu
975     ondblclick ondrag ondragend ondragenter ondragleave ondragover
976     ondragstart ondrop onerror onfocus onkeydown onkeypress
977     onkeyup onload onmessage onmousedown onmousemove onmouseout
978     onmouseover onmouseup onmousewheel onresize onscroll onselect
979     onsubmit onunload
980     /) {
981     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
982     }
983    
984 wakaba 1.10 my $GetHTMLAttrsChecker = sub {
985     my $element_specific_checker = shift;
986     return sub {
987     my ($self, $todo) = @_;
988     for my $attr (@{$todo->{node}->attributes}) {
989     my $attr_ns = $attr->namespace_uri;
990     $attr_ns = '' unless defined $attr_ns;
991     my $attr_ln = $attr->manakai_local_name;
992     my $checker;
993     if ($attr_ns eq '') {
994     $checker = $element_specific_checker->{$attr_ln}
995     || $HTMLAttrChecker->{$attr_ln};
996     }
997     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
998     || $AttrChecker->{$attr_ns}->{''};
999     if ($checker) {
1000 wakaba 1.32 $checker->($self, $attr, $todo);
1001 wakaba 1.10 } else {
1002     $self->{onerror}->(node => $attr, type => 'attribute not supported');
1003     ## ISSUE: No comformance createria for unknown attributes in the spec
1004     }
1005     }
1006     };
1007     }; # $GetHTMLAttrsChecker
1008 wakaba 1.9
1009     $Element->{$HTML_NS}->{''} = {
1010 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1011 wakaba 1.9 checker => $ElementDefault->{checker},
1012     };
1013    
1014 wakaba 1.1 $Element->{$HTML_NS}->{html} = {
1015 wakaba 1.24 is_root => 1,
1016 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({
1017     xmlns => sub {
1018     my ($self, $attr) = @_;
1019     my $value = $attr->value;
1020     unless ($value eq $HTML_NS) {
1021     $self->{onerror}->(node => $attr, type => 'syntax error');
1022     ## TODO: only in HTML documents
1023     }
1024     },
1025     }),
1026 wakaba 1.1 checker => sub {
1027 wakaba 1.4 my ($self, $todo) = @_;
1028     my $el = $todo->{node};
1029     my $new_todos = [];
1030 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1031    
1032     my $phase = 'before head';
1033     while (@nodes) {
1034     my $node = shift @nodes;
1035 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1036    
1037 wakaba 1.1 my $nt = $node->node_type;
1038     if ($nt == 1) {
1039 wakaba 1.2 my $node_ns = $node->namespace_uri;
1040     $node_ns = '' unless defined $node_ns;
1041     my $node_ln = $node->manakai_local_name;
1042 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
1043 wakaba 1.1 if ($phase eq 'before head') {
1044 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'head') {
1045 wakaba 1.1 $phase = 'after head';
1046 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'body') {
1047     $self->{onerror}->(node => $node, type => 'ps element missing:head');
1048 wakaba 1.1 $phase = 'after body';
1049     } else {
1050 wakaba 1.6 $not_allowed = 1;
1051 wakaba 1.1 # before head
1052     }
1053     } elsif ($phase eq 'after head') {
1054 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'body') {
1055 wakaba 1.1 $phase = 'after body';
1056     } else {
1057 wakaba 1.6 $not_allowed = 1;
1058 wakaba 1.1 # after head
1059     }
1060     } else { #elsif ($phase eq 'after body') {
1061 wakaba 1.6 $not_allowed = 1;
1062 wakaba 1.1 # after body
1063     }
1064 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
1065     if $not_allowed;
1066 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1067 wakaba 1.2 unshift @nodes, @$sib;
1068 wakaba 1.4 push @$new_todos, @$ch;
1069 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1070     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1071 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1072 wakaba 1.1 }
1073     } elsif ($nt == 5) {
1074     unshift @nodes, @{$node->child_nodes};
1075     }
1076     }
1077 wakaba 1.3
1078     if ($phase eq 'before head') {
1079     $self->{onerror}->(node => $el, type => 'child element missing:head');
1080     $self->{onerror}->(node => $el, type => 'child element missing:body');
1081     } elsif ($phase eq 'after head') {
1082     $self->{onerror}->(node => $el, type => 'child element missing:body');
1083     }
1084    
1085 wakaba 1.4 return ($new_todos);
1086 wakaba 1.1 },
1087     };
1088    
1089     $Element->{$HTML_NS}->{head} = {
1090 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1091 wakaba 1.1 checker => sub {
1092 wakaba 1.4 my ($self, $todo) = @_;
1093     my $el = $todo->{node};
1094     my $new_todos = [];
1095 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1096    
1097     my $has_title;
1098 wakaba 1.3 my $phase = 'initial'; # 'after charset', 'after base'
1099 wakaba 1.1 while (@nodes) {
1100     my $node = shift @nodes;
1101 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1102    
1103 wakaba 1.1 my $nt = $node->node_type;
1104     if ($nt == 1) {
1105 wakaba 1.2 my $node_ns = $node->namespace_uri;
1106     $node_ns = '' unless defined $node_ns;
1107     my $node_ln = $node->manakai_local_name;
1108 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
1109 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'title') {
1110 wakaba 1.3 $phase = 'after base';
1111 wakaba 1.1 unless ($has_title) {
1112     $has_title = 1;
1113     } else {
1114 wakaba 1.6 $not_allowed = 1;
1115 wakaba 1.1 }
1116 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'meta') {
1117 wakaba 1.1 if ($node->has_attribute_ns (undef, 'charset')) {
1118 wakaba 1.3 if ($phase eq 'initial') {
1119     $phase = 'after charset';
1120 wakaba 1.1 } else {
1121 wakaba 1.6 $not_allowed = 1;
1122 wakaba 1.3 ## NOTE: See also |base|'s "contexts" field in the spec
1123 wakaba 1.1 }
1124     } else {
1125 wakaba 1.3 $phase = 'after base';
1126 wakaba 1.1 }
1127 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'base') {
1128 wakaba 1.3 if ($phase eq 'initial' or $phase eq 'after charset') {
1129     $phase = 'after base';
1130 wakaba 1.1 } else {
1131 wakaba 1.6 $not_allowed = 1;
1132 wakaba 1.1 }
1133 wakaba 1.28 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'style') {
1134     $phase = 'after base';
1135     if ($node->has_attribute_ns (undef, 'scoped')) {
1136     $not_allowed = 1;
1137     }
1138 wakaba 1.7 } elsif ($HTMLMetadataElements->{$node_ns}->{$node_ln}) {
1139     $phase = 'after base';
1140 wakaba 1.1 } else {
1141 wakaba 1.7 $not_allowed = 1;
1142 wakaba 1.1 }
1143 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
1144     if $not_allowed;
1145 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1146 wakaba 1.2 unshift @nodes, @$sib;
1147 wakaba 1.4 push @$new_todos, @$ch;
1148 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1149     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1150 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1151 wakaba 1.1 }
1152     } elsif ($nt == 5) {
1153     unshift @nodes, @{$node->child_nodes};
1154     }
1155     }
1156     unless ($has_title) {
1157 wakaba 1.3 $self->{onerror}->(node => $el, type => 'child element missing:title');
1158 wakaba 1.1 }
1159 wakaba 1.4 return ($new_todos);
1160 wakaba 1.1 },
1161     };
1162    
1163     $Element->{$HTML_NS}->{title} = {
1164 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1165 wakaba 1.1 checker => $HTMLTextChecker,
1166     };
1167    
1168     $Element->{$HTML_NS}->{base} = {
1169 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({
1170 wakaba 1.11 href => $HTMLURIAttrChecker,
1171 wakaba 1.17 target => $HTMLTargetAttrChecker,
1172 wakaba 1.10 }),
1173 wakaba 1.1 checker => $HTMLEmptyChecker,
1174     };
1175    
1176     $Element->{$HTML_NS}->{link} = {
1177 wakaba 1.16 attrs_checker => sub {
1178     my ($self, $todo) = @_;
1179     $GetHTMLAttrsChecker->({
1180     href => $HTMLURIAttrChecker,
1181 wakaba 1.20 rel => sub { $HTMLLinkTypesAttrChecker->(0, @_) },
1182 wakaba 1.17 media => $HTMLMQAttrChecker,
1183     hreflang => $HTMLLanguageTagAttrChecker,
1184 wakaba 1.16 type => $HTMLIMTAttrChecker,
1185     ## NOTE: Though |title| has special semantics,
1186     ## syntactically same as the |title| as global attribute.
1187     })->($self, $todo);
1188     unless ($todo->{node}->has_attribute_ns (undef, 'href')) {
1189     $self->{onerror}->(node => $todo->{node},
1190     type => 'attribute missing:href');
1191     }
1192     unless ($todo->{node}->has_attribute_ns (undef, 'rel')) {
1193     $self->{onerror}->(node => $todo->{node},
1194     type => 'attribute missing:rel');
1195     }
1196     },
1197 wakaba 1.1 checker => $HTMLEmptyChecker,
1198     };
1199    
1200     $Element->{$HTML_NS}->{meta} = {
1201 wakaba 1.10 attrs_checker => sub {
1202     my ($self, $todo) = @_;
1203     my $name_attr;
1204     my $http_equiv_attr;
1205     my $charset_attr;
1206     my $content_attr;
1207     for my $attr (@{$todo->{node}->attributes}) {
1208     my $attr_ns = $attr->namespace_uri;
1209     $attr_ns = '' unless defined $attr_ns;
1210     my $attr_ln = $attr->manakai_local_name;
1211     my $checker;
1212     if ($attr_ns eq '') {
1213     if ($attr_ln eq 'content') {
1214     $content_attr = $attr;
1215     $checker = 1;
1216     } elsif ($attr_ln eq 'name') {
1217     $name_attr = $attr;
1218     $checker = 1;
1219     } elsif ($attr_ln eq 'http-equiv') {
1220     $http_equiv_attr = $attr;
1221     $checker = 1;
1222     } elsif ($attr_ln eq 'charset') {
1223     $charset_attr = $attr;
1224     $checker = 1;
1225     } else {
1226     $checker = $HTMLAttrChecker->{$attr_ln}
1227     || $AttrChecker->{$attr_ns}->{$attr_ln}
1228     || $AttrChecker->{$attr_ns}->{''};
1229     }
1230     } else {
1231     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1232     || $AttrChecker->{$attr_ns}->{''};
1233     }
1234     if ($checker) {
1235     $checker->($self, $attr) if ref $checker;
1236     } else {
1237     $self->{onerror}->(node => $attr, type => 'attribute not supported');
1238     ## ISSUE: No comformance createria for unknown attributes in the spec
1239     }
1240     }
1241    
1242     if (defined $name_attr) {
1243     if (defined $http_equiv_attr) {
1244     $self->{onerror}->(node => $http_equiv_attr,
1245     type => 'attribute not allowed');
1246     } elsif (defined $charset_attr) {
1247     $self->{onerror}->(node => $charset_attr,
1248     type => 'attribute not allowed');
1249     }
1250     my $metadata_name = $name_attr->value;
1251     my $metadata_value;
1252     if (defined $content_attr) {
1253     $metadata_value = $content_attr->value;
1254     } else {
1255     $self->{onerror}->(node => $todo->{node},
1256     type => 'attribute missing:content');
1257     $metadata_value = '';
1258     }
1259     } elsif (defined $http_equiv_attr) {
1260     if (defined $charset_attr) {
1261     $self->{onerror}->(node => $charset_attr,
1262     type => 'attribute not allowed');
1263     }
1264     unless (defined $content_attr) {
1265     $self->{onerror}->(node => $todo->{node},
1266     type => 'attribute missing:content');
1267     }
1268     } elsif (defined $charset_attr) {
1269     if (defined $content_attr) {
1270     $self->{onerror}->(node => $content_attr,
1271     type => 'attribute not allowed');
1272     }
1273     ## TODO: Allowed only in HTML documents
1274     } else {
1275     if (defined $content_attr) {
1276     $self->{onerror}->(node => $content_attr,
1277     type => 'attribute not allowed');
1278     $self->{onerror}->(node => $todo->{node},
1279     type => 'attribute missing:name|http-equiv');
1280     } else {
1281     $self->{onerror}->(node => $todo->{node},
1282     type => 'attribute missing:name|http-equiv|charset');
1283     }
1284     }
1285    
1286     ## TODO: metadata conformance
1287    
1288     ## TODO: pragma conformance
1289     if (defined $http_equiv_attr) { ## An enumerated attribute
1290     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1291     if ({
1292     'refresh' => 1,
1293     'default-style' => 1,
1294     }->{$keyword}) {
1295     #
1296     } else {
1297     $self->{onerror}->(node => $http_equiv_attr,
1298     type => 'invalid enumerated attribute value');
1299     }
1300     }
1301    
1302     ## TODO: charset
1303     },
1304 wakaba 1.1 checker => $HTMLEmptyChecker,
1305     };
1306    
1307     ## NOTE: |html:style| has no conformance creteria on content model
1308 wakaba 1.3 $Element->{$HTML_NS}->{style} = {
1309 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({
1310 wakaba 1.15 type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1311 wakaba 1.17 media => $HTMLMQAttrChecker,
1312 wakaba 1.10 scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1313     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1314     ## not different
1315     }),
1316 wakaba 1.3 checker => $AnyChecker,
1317     };
1318 wakaba 1.1
1319     $Element->{$HTML_NS}->{body} = {
1320 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1321 wakaba 1.1 checker => $HTMLBlockChecker,
1322     };
1323    
1324     $Element->{$HTML_NS}->{section} = {
1325 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1326 wakaba 1.1 checker => $HTMLStylableBlockChecker,
1327     };
1328    
1329     $Element->{$HTML_NS}->{nav} = {
1330 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1331 wakaba 1.1 checker => $HTMLBlockOrInlineChecker,
1332     };
1333    
1334     $Element->{$HTML_NS}->{article} = {
1335 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1336 wakaba 1.1 checker => $HTMLStylableBlockChecker,
1337     };
1338    
1339     $Element->{$HTML_NS}->{blockquote} = {
1340 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({
1341     cite => $HTMLURIAttrChecker,
1342     }),
1343 wakaba 1.1 checker => $HTMLBlockChecker,
1344     };
1345    
1346     $Element->{$HTML_NS}->{aside} = {
1347 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1348 wakaba 1.2 checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
1349 wakaba 1.1 };
1350    
1351     $Element->{$HTML_NS}->{h1} = {
1352 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1353 wakaba 1.30 checker => sub {
1354     my ($self, $todo) = @_;
1355     $todo->{flag}->{has_heading}->[0] = 1;
1356     return $HTMLSignificantStrictlyInlineChecker->($self, $todo);
1357     },
1358 wakaba 1.1 };
1359    
1360     $Element->{$HTML_NS}->{h2} = {
1361 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1362 wakaba 1.30 checker => $Element->{$HTML_NS}->{h1}->{checker},
1363 wakaba 1.1 };
1364    
1365     $Element->{$HTML_NS}->{h3} = {
1366 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1367 wakaba 1.30 checker => $Element->{$HTML_NS}->{h1}->{checker},
1368 wakaba 1.1 };
1369    
1370     $Element->{$HTML_NS}->{h4} = {
1371 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1372 wakaba 1.30 checker => $Element->{$HTML_NS}->{h1}->{checker},
1373 wakaba 1.1 };
1374    
1375     $Element->{$HTML_NS}->{h5} = {
1376 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1377 wakaba 1.30 checker => $Element->{$HTML_NS}->{h1}->{checker},
1378 wakaba 1.1 };
1379    
1380     $Element->{$HTML_NS}->{h6} = {
1381 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1382 wakaba 1.30 checker => $Element->{$HTML_NS}->{h1}->{checker},
1383 wakaba 1.1 };
1384    
1385 wakaba 1.30 $Element->{$HTML_NS}->{header} = {
1386     attrs_checker => $GetHTMLAttrsChecker->({}),
1387     checker => sub {
1388     my ($self, $todo) = @_;
1389     my $old_flag = $todo->{flag}->{has_heading} || [];
1390     my $new_flag = [];
1391     local $todo->{flag}->{has_heading} = $new_flag;
1392     my $node = $todo->{node};
1393    
1394     my $end = $self->_add_minuses
1395     ({$HTML_NS => {qw/header 1 footer 1/}},
1396     $HTMLSectioningElements);
1397     my ($new_todos, $ch) = $HTMLBlockChecker->($self, $todo);
1398     push @$new_todos, $end,
1399     {type => 'code', code => sub {
1400     if ($new_flag->[0]) {
1401     $old_flag->[0] = 1;
1402     } else {
1403     $self->{onerror}->(node => $node, type => 'element missing:hn');
1404     }
1405     }};
1406     return ($new_todos, $ch);
1407     },
1408     };
1409 wakaba 1.1
1410 wakaba 1.2 $Element->{$HTML_NS}->{footer} = {
1411 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1412 wakaba 1.2 checker => sub { ## block -hn -header -footer -sectioning or inline
1413 wakaba 1.4 my ($self, $todo) = @_;
1414     my $el = $todo->{node};
1415     my $new_todos = [];
1416 wakaba 1.2 my @nodes = (@{$el->child_nodes});
1417    
1418     my $content = 'block-or-inline'; # or 'block' or 'inline'
1419     my @block_not_inline;
1420     while (@nodes) {
1421     my $node = shift @nodes;
1422     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1423    
1424     my $nt = $node->node_type;
1425     if ($nt == 1) {
1426     my $node_ns = $node->namespace_uri;
1427     $node_ns = '' unless defined $node_ns;
1428     my $node_ln = $node->manakai_local_name;
1429 wakaba 1.6 my $not_allowed;
1430 wakaba 1.2 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
1431 wakaba 1.6 $not_allowed = 1;
1432 wakaba 1.2 } elsif ($node_ns eq $HTML_NS and
1433     {
1434     qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1/
1435     }->{$node_ln}) {
1436 wakaba 1.6 $not_allowed = 1;
1437 wakaba 1.2 } elsif ($HTMLSectioningElements->{$node_ns}->{$node_ln}) {
1438 wakaba 1.6 $not_allowed = 1;
1439 wakaba 1.2 }
1440     if ($content eq 'block') {
1441 wakaba 1.7 $not_allowed = 1
1442     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1443 wakaba 1.2 } elsif ($content eq 'inline') {
1444 wakaba 1.7 $not_allowed = 1
1445     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
1446     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1447 wakaba 1.2 } else {
1448 wakaba 1.7 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1449     my $is_inline
1450     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
1451     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1452 wakaba 1.2
1453 wakaba 1.6 push @block_not_inline, $node
1454     if $is_block and not $is_inline and not $not_allowed;
1455 wakaba 1.2 unless ($is_block) {
1456     $content = 'inline';
1457     for (@block_not_inline) {
1458     $self->{onerror}->(node => $_, type => 'element not allowed');
1459     }
1460 wakaba 1.6 $not_allowed = 1 unless $is_inline;
1461 wakaba 1.2 }
1462     }
1463 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
1464     if $not_allowed;
1465 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1466 wakaba 1.2 unshift @nodes, @$sib;
1467 wakaba 1.4 push @$new_todos, @$ch;
1468 wakaba 1.2 } elsif ($nt == 3 or $nt == 4) {
1469     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1470     if ($content eq 'block') {
1471     $self->{onerror}->(node => $node, type => 'character not allowed');
1472     } else {
1473     $content = 'inline';
1474     for (@block_not_inline) {
1475     $self->{onerror}->(node => $_, type => 'element not allowed');
1476     }
1477     }
1478     }
1479     } elsif ($nt == 5) {
1480     unshift @nodes, @{$node->child_nodes};
1481     }
1482     }
1483    
1484     my $end = $self->_add_minuses
1485     ({$HTML_NS => {qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1/}},
1486     $HTMLSectioningElements);
1487 wakaba 1.4 push @$new_todos, $end;
1488 wakaba 1.2
1489 wakaba 1.4 if ($content eq 'inline') {
1490     for (@$new_todos) {
1491     $_->{inline} = 1;
1492     }
1493     }
1494    
1495     return ($new_todos);
1496 wakaba 1.2 },
1497     };
1498 wakaba 1.1
1499     $Element->{$HTML_NS}->{address} = {
1500 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1501 wakaba 1.1 checker => $HTMLInlineChecker,
1502     };
1503    
1504     $Element->{$HTML_NS}->{p} = {
1505 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1506 wakaba 1.1 checker => $HTMLSignificantInlineChecker,
1507     };
1508    
1509     $Element->{$HTML_NS}->{hr} = {
1510 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1511 wakaba 1.1 checker => $HTMLEmptyChecker,
1512     };
1513    
1514     $Element->{$HTML_NS}->{br} = {
1515 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1516 wakaba 1.1 checker => $HTMLEmptyChecker,
1517     };
1518    
1519     $Element->{$HTML_NS}->{dialog} = {
1520 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1521 wakaba 1.1 checker => sub {
1522 wakaba 1.4 my ($self, $todo) = @_;
1523     my $el = $todo->{node};
1524     my $new_todos = [];
1525 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1526    
1527     my $phase = 'before dt';
1528     while (@nodes) {
1529     my $node = shift @nodes;
1530 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1531    
1532 wakaba 1.1 my $nt = $node->node_type;
1533     if ($nt == 1) {
1534 wakaba 1.8 my $node_ns = $node->namespace_uri;
1535     $node_ns = '' unless defined $node_ns;
1536     my $node_ln = $node->manakai_local_name;
1537 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1538 wakaba 1.1 if ($phase eq 'before dt') {
1539 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1540 wakaba 1.1 $phase = 'before dd';
1541 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1542 wakaba 1.2 $self->{onerror}
1543 wakaba 1.3 ->(node => $node, type => 'ps element missing:dt');
1544 wakaba 1.1 $phase = 'before dt';
1545     } else {
1546 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1547 wakaba 1.1 }
1548     } else { # before dd
1549 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1550 wakaba 1.1 $phase = 'before dt';
1551 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1552 wakaba 1.2 $self->{onerror}
1553 wakaba 1.3 ->(node => $node, type => 'ps element missing:dd');
1554 wakaba 1.1 $phase = 'before dd';
1555     } else {
1556 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1557 wakaba 1.1 }
1558     }
1559 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1560 wakaba 1.2 unshift @nodes, @$sib;
1561 wakaba 1.4 push @$new_todos, @$ch;
1562 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1563     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1564 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1565 wakaba 1.1 }
1566     } elsif ($nt == 5) {
1567     unshift @nodes, @{$node->child_nodes};
1568     }
1569     }
1570     if ($phase eq 'before dd') {
1571 wakaba 1.3 $self->{onerror}->(node => $el, type => 'ps element missing:dd');
1572 wakaba 1.1 }
1573 wakaba 1.4 return ($new_todos);
1574 wakaba 1.1 },
1575     };
1576    
1577     $Element->{$HTML_NS}->{pre} = {
1578 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1579 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1580     };
1581    
1582     $Element->{$HTML_NS}->{ol} = {
1583 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({
1584     start => $HTMLIntegerAttrChecker,
1585     }),
1586 wakaba 1.1 checker => sub {
1587 wakaba 1.4 my ($self, $todo) = @_;
1588     my $el = $todo->{node};
1589     my $new_todos = [];
1590 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1591    
1592     while (@nodes) {
1593     my $node = shift @nodes;
1594 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1595    
1596 wakaba 1.1 my $nt = $node->node_type;
1597     if ($nt == 1) {
1598 wakaba 1.8 my $node_ns = $node->namespace_uri;
1599     $node_ns = '' unless defined $node_ns;
1600     my $node_ln = $node->manakai_local_name;
1601 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1602 wakaba 1.8 unless ($node_ns eq $HTML_NS and $node_ln eq 'li') {
1603 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1604 wakaba 1.1 }
1605 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1606 wakaba 1.2 unshift @nodes, @$sib;
1607 wakaba 1.4 push @$new_todos, @$ch;
1608 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1609     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1610 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1611 wakaba 1.1 }
1612     } elsif ($nt == 5) {
1613     unshift @nodes, @{$node->child_nodes};
1614     }
1615     }
1616 wakaba 1.4
1617     if ($todo->{inline}) {
1618     for (@$new_todos) {
1619     $_->{inline} = 1;
1620     }
1621     }
1622     return ($new_todos);
1623 wakaba 1.1 },
1624     };
1625    
1626     $Element->{$HTML_NS}->{ul} = {
1627 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1628 wakaba 1.1 checker => $Element->{$HTML_NS}->{ol}->{checker},
1629     };
1630    
1631 wakaba 1.5
1632     $Element->{$HTML_NS}->{li} = {
1633 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({
1634     start => sub {
1635     my ($self, $attr) = @_;
1636     my $parent = $attr->owner_element->manakai_parent_element;
1637     if (defined $parent) {
1638     my $parent_ns = $parent->namespace_uri;
1639     $parent_ns = '' unless defined $parent_ns;
1640     my $parent_ln = $parent->manakai_local_name;
1641     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1642     $self->{onerror}->(node => $attr, type => 'attribute not supported');
1643     }
1644     }
1645     $HTMLIntegerAttrChecker->($self, $attr);
1646     },
1647     }),
1648 wakaba 1.5 checker => sub {
1649     my ($self, $todo) = @_;
1650     if ($todo->{inline}) {
1651     return $HTMLInlineChecker->($self, $todo);
1652     } else {
1653     return $HTMLBlockOrInlineChecker->($self, $todo);
1654     }
1655     },
1656     };
1657 wakaba 1.1
1658     $Element->{$HTML_NS}->{dl} = {
1659 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1660 wakaba 1.1 checker => sub {
1661 wakaba 1.4 my ($self, $todo) = @_;
1662     my $el = $todo->{node};
1663     my $new_todos = [];
1664 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1665    
1666     my $phase = 'before dt';
1667     while (@nodes) {
1668     my $node = shift @nodes;
1669 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1670    
1671 wakaba 1.1 my $nt = $node->node_type;
1672     if ($nt == 1) {
1673 wakaba 1.8 my $node_ns = $node->namespace_uri;
1674     $node_ns = '' unless defined $node_ns;
1675     my $node_ln = $node->manakai_local_name;
1676 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1677 wakaba 1.1 if ($phase eq 'in dds') {
1678 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1679 wakaba 1.1 #$phase = 'in dds';
1680 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1681 wakaba 1.1 $phase = 'in dts';
1682     } else {
1683 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1684 wakaba 1.1 }
1685     } elsif ($phase eq 'in dts') {
1686 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1687 wakaba 1.1 #$phase = 'in dts';
1688 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1689 wakaba 1.1 $phase = 'in dds';
1690     } else {
1691 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1692 wakaba 1.1 }
1693     } else { # before dt
1694 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1695 wakaba 1.1 $phase = 'in dts';
1696 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1697 wakaba 1.2 $self->{onerror}
1698 wakaba 1.3 ->(node => $node, type => 'ps element missing:dt');
1699 wakaba 1.1 $phase = 'in dds';
1700     } else {
1701 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1702 wakaba 1.1 }
1703     }
1704 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1705 wakaba 1.2 unshift @nodes, @$sib;
1706 wakaba 1.4 push @$new_todos, @$ch;
1707 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1708     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1709 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1710 wakaba 1.1 }
1711     } elsif ($nt == 5) {
1712     unshift @nodes, @{$node->child_nodes};
1713     }
1714     }
1715     if ($phase eq 'in dts') {
1716 wakaba 1.3 $self->{onerror}->(node => $el, type => 'ps element missing:dd');
1717 wakaba 1.1 }
1718 wakaba 1.4
1719     if ($todo->{inline}) {
1720     for (@$new_todos) {
1721     $_->{inline} = 1;
1722     }
1723     }
1724     return ($new_todos);
1725 wakaba 1.1 },
1726     };
1727    
1728     $Element->{$HTML_NS}->{dt} = {
1729 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1730 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1731     };
1732    
1733 wakaba 1.4 $Element->{$HTML_NS}->{dd} = {
1734 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1735 wakaba 1.5 checker => $Element->{$HTML_NS}->{li}->{checker},
1736 wakaba 1.4 };
1737 wakaba 1.1
1738 wakaba 1.6 $Element->{$HTML_NS}->{a} = {
1739 wakaba 1.17 attrs_checker => sub {
1740     my ($self, $todo) = @_;
1741 wakaba 1.15 my %attr;
1742 wakaba 1.17 for my $attr (@{$todo->{node}->attributes}) {
1743     my $attr_ns = $attr->namespace_uri;
1744     $attr_ns = '' unless defined $attr_ns;
1745     my $attr_ln = $attr->manakai_local_name;
1746     my $checker;
1747     if ($attr_ns eq '') {
1748     $checker = {
1749     target => $HTMLTargetAttrChecker,
1750     href => $HTMLURIAttrChecker,
1751     ping => $HTMLSpaceURIsAttrChecker,
1752 wakaba 1.20 rel => sub { $HTMLLinkTypesAttrChecker->(1, @_) },
1753 wakaba 1.17 media => $HTMLMQAttrChecker,
1754     hreflang => $HTMLLanguageTagAttrChecker,
1755     type => $HTMLIMTAttrChecker,
1756     }->{$attr_ln};
1757     if ($checker) {
1758     $attr{$attr_ln} = $attr;
1759     } else {
1760     $checker = $HTMLAttrChecker->{$attr_ln};
1761     }
1762     }
1763     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1764     || $AttrChecker->{$attr_ns}->{''};
1765     if ($checker) {
1766     $checker->($self, $attr) if ref $checker;
1767     } else {
1768     $self->{onerror}->(node => $attr, type => 'attribute not supported');
1769     ## ISSUE: No comformance createria for unknown attributes in the spec
1770     }
1771     }
1772    
1773     unless (defined $attr{href}) {
1774     for (qw/target ping rel media hreflang type/) {
1775     if (defined $attr{$_}) {
1776     $self->{onerror}->(node => $attr{$_},
1777     type => 'attribute not allowed');
1778 wakaba 1.15 }
1779     }
1780 wakaba 1.17 }
1781 wakaba 1.15 },
1782 wakaba 1.6 checker => sub {
1783     my ($self, $todo) = @_;
1784    
1785     my $end = $self->_add_minuses ($HTMLInteractiveElements);
1786 wakaba 1.32 my ($new_todos, $ch)
1787 wakaba 1.6 = $HTMLSignificantInlineOrStrictlyInlineChecker->($self, $todo);
1788 wakaba 1.32 push @$new_todos, $end;
1789    
1790     $_->{flag}->{has_a} = 1 for @$new_todos;
1791    
1792     return ($new_todos, $ch);
1793 wakaba 1.6 },
1794     };
1795 wakaba 1.1
1796 wakaba 1.4 $Element->{$HTML_NS}->{q} = {
1797 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({
1798     cite => $HTMLURIAttrChecker,
1799     }),
1800 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1801     };
1802 wakaba 1.1
1803     $Element->{$HTML_NS}->{cite} = {
1804 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1805 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1806     };
1807    
1808 wakaba 1.4 $Element->{$HTML_NS}->{em} = {
1809 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1810 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1811     };
1812    
1813     $Element->{$HTML_NS}->{strong} = {
1814 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1815 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1816     };
1817 wakaba 1.1
1818 wakaba 1.4 $Element->{$HTML_NS}->{small} = {
1819 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1820 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1821     };
1822    
1823     $Element->{$HTML_NS}->{m} = {
1824 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1825 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1826     };
1827    
1828 wakaba 1.30 $Element->{$HTML_NS}->{dfn} = {
1829 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1830 wakaba 1.4 checker => sub {
1831     my ($self, $todo) = @_;
1832    
1833     my $end = $self->_add_minuses ({$HTML_NS => {dfn => 1}});
1834     my ($sib, $ch) = $HTMLStrictlyInlineChecker->($self, $todo);
1835     push @$sib, $end;
1836 wakaba 1.30
1837     my $node = $todo->{node};
1838     my $term = $node->get_attribute_ns (undef, 'title');
1839     unless (defined $term) {
1840     for my $child (@{$node->child_nodes}) {
1841     if ($child->node_type == 1) { # ELEMENT_NODE
1842     if (defined $term) {
1843     undef $term;
1844     last;
1845     } elsif ($child->manakai_local_name eq 'abbr') {
1846     my $nsuri = $child->namespace_uri;
1847     if (defined $nsuri and $nsuri eq $HTML_NS) {
1848     my $attr = $child->get_attribute_node_ns (undef, 'title');
1849     if ($attr) {
1850     $term = $attr->value;
1851     }
1852     }
1853     }
1854     } elsif ($child->node_type == 3 or $child->node_type == 4) {
1855     ## TEXT_NODE or CDATA_SECTION_NODE
1856     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
1857     next;
1858     }
1859     undef $term;
1860     last;
1861     }
1862     }
1863     unless (defined $term) {
1864     $term = $node->text_content;
1865     }
1866     }
1867     if ($self->{term}->{$term}) {
1868     $self->{onerror}->(node => $node, type => 'duplicate term');
1869     } else {
1870     $self->{term}->{$term} = 1;
1871     }
1872    
1873 wakaba 1.4 return ($sib, $ch);
1874     },
1875     };
1876 wakaba 1.1
1877     $Element->{$HTML_NS}->{abbr} = {
1878 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({
1879     ## NOTE: |title| has special semantics for |abbr|s, but is syntactically
1880     ## not different. The spec says that the |title| MAY be omitted
1881     ## if there is a |dfn| whose defining term is the abbreviation,
1882     ## but it does not prohibit |abbr| w/o |title| in other cases.
1883     }),
1884 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1885     };
1886    
1887 wakaba 1.11 $Element->{$HTML_NS}->{time} = { ## TODO: validate content
1888     attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO: datetime
1889 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1890     };
1891    
1892 wakaba 1.11 $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
1893     attrs_checker => $GetHTMLAttrsChecker->({
1894     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1895     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1896     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1897     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1898     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1899     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1900     }),
1901 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1902     };
1903    
1904 wakaba 1.11 $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
1905     attrs_checker => $GetHTMLAttrsChecker->({
1906     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
1907     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
1908     }),
1909 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1910     };
1911    
1912 wakaba 1.4 $Element->{$HTML_NS}->{code} = {
1913 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1914 wakaba 1.12 ## NOTE: Though |title| has special semantics,
1915     ## syntatically same as the |title| as global attribute.
1916 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1917     };
1918 wakaba 1.1
1919     $Element->{$HTML_NS}->{var} = {
1920 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1921 wakaba 1.12 ## NOTE: Though |title| has special semantics,
1922     ## syntatically same as the |title| as global attribute.
1923 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1924     };
1925    
1926 wakaba 1.4 $Element->{$HTML_NS}->{samp} = {
1927 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1928 wakaba 1.12 ## NOTE: Though |title| has special semantics,
1929     ## syntatically same as the |title| as global attribute.
1930 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1931     };
1932 wakaba 1.1
1933     $Element->{$HTML_NS}->{kbd} = {
1934 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1935 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1936     };
1937    
1938     $Element->{$HTML_NS}->{sub} = {
1939 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1940 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1941     };
1942    
1943     $Element->{$HTML_NS}->{sup} = {
1944 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1945 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1946     };
1947    
1948 wakaba 1.4 $Element->{$HTML_NS}->{span} = {
1949 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1950 wakaba 1.12 ## NOTE: Though |title| has special semantics,
1951     ## syntatically same as the |title| as global attribute.
1952 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1953     };
1954 wakaba 1.1
1955     $Element->{$HTML_NS}->{i} = {
1956 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1957 wakaba 1.12 ## NOTE: Though |title| has special semantics,
1958     ## syntatically same as the |title| as global attribute.
1959 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1960     };
1961    
1962     $Element->{$HTML_NS}->{b} = {
1963 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1964 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1965     };
1966    
1967     $Element->{$HTML_NS}->{bdo} = {
1968 wakaba 1.12 attrs_checker => sub {
1969     my ($self, $todo) = @_;
1970     $GetHTMLAttrsChecker->({})->($self, $todo);
1971     unless ($todo->{node}->has_attribute_ns (undef, 'dir')) {
1972     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:dir');
1973     }
1974     },
1975     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
1976 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1977     };
1978    
1979     $Element->{$HTML_NS}->{ins} = {
1980 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
1981     cite => $HTMLURIAttrChecker,
1982 wakaba 1.30 datetime => $HTMLDatetimeAttrChecker,
1983 wakaba 1.12 }),
1984 wakaba 1.1 checker => $HTMLTransparentChecker,
1985     };
1986    
1987     $Element->{$HTML_NS}->{del} = {
1988 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
1989     cite => $HTMLURIAttrChecker,
1990 wakaba 1.30 datetime => $HTMLDatetimeAttrChecker,
1991 wakaba 1.12 }),
1992 wakaba 1.1 checker => sub {
1993 wakaba 1.4 my ($self, $todo) = @_;
1994 wakaba 1.1
1995 wakaba 1.4 my $parent = $todo->{node}->manakai_parent_element;
1996 wakaba 1.1 if (defined $parent) {
1997     my $nsuri = $parent->namespace_uri;
1998     $nsuri = '' unless defined $nsuri;
1999     my $ln = $parent->manakai_local_name;
2000     my $eldef = $Element->{$nsuri}->{$ln} ||
2001     $Element->{$nsuri}->{''} ||
2002     $ElementDefault;
2003 wakaba 1.4 return $eldef->{checker}->($self, $todo);
2004 wakaba 1.1 } else {
2005 wakaba 1.4 return $HTMLBlockOrInlineChecker->($self, $todo);
2006 wakaba 1.1 }
2007     },
2008     };
2009    
2010     ## TODO: figure
2011    
2012     $Element->{$HTML_NS}->{img} = {
2013 wakaba 1.17 attrs_checker => sub {
2014     my ($self, $todo) = @_;
2015     $GetHTMLAttrsChecker->({
2016     alt => sub { }, ## NOTE: No syntactical requirement
2017     src => $HTMLURIAttrChecker,
2018     usemap => $HTMLUsemapAttrChecker,
2019 wakaba 1.32 ismap => sub {
2020     my ($self, $attr, $parent_todo) = @_;
2021     if (not $todo->{flag}->{has_a}) {
2022     $self->{onerror}->(node => $attr, type => 'attribute not allowed');
2023     }
2024     $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_todo);
2025     },
2026 wakaba 1.17 ## TODO: height
2027     ## TODO: width
2028     })->($self, $todo);
2029     unless ($todo->{node}->has_attribute_ns (undef, 'alt')) {
2030     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:alt');
2031     }
2032     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2033     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:src');
2034     }
2035     },
2036 wakaba 1.1 checker => $HTMLEmptyChecker,
2037     };
2038    
2039     $Element->{$HTML_NS}->{iframe} = {
2040 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2041     src => $HTMLURIAttrChecker,
2042     }),
2043 wakaba 1.1 checker => $HTMLTextChecker,
2044     };
2045    
2046     $Element->{$HTML_NS}->{embed} = {
2047 wakaba 1.16 attrs_checker => sub {
2048     my ($self, $todo) = @_;
2049     my $has_src;
2050     for my $attr (@{$todo->{node}->attributes}) {
2051     my $attr_ns = $attr->namespace_uri;
2052     $attr_ns = '' unless defined $attr_ns;
2053     my $attr_ln = $attr->manakai_local_name;
2054     my $checker;
2055     if ($attr_ns eq '') {
2056     if ($attr_ln eq 'src') {
2057     $checker = $HTMLURIAttrChecker;
2058     $has_src = 1;
2059     } elsif ($attr_ln eq 'type') {
2060     $checker = $HTMLIMTAttrChecker;
2061     } else {
2062     ## TODO: height
2063     ## TODO: width
2064     $checker = $HTMLAttrChecker->{$attr_ln}
2065     || sub { }; ## NOTE: Any local attribute is ok.
2066     }
2067     }
2068     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2069     || $AttrChecker->{$attr_ns}->{''};
2070     if ($checker) {
2071     $checker->($self, $attr);
2072     } else {
2073     $self->{onerror}->(node => $attr, type => 'attribute not supported');
2074     ## ISSUE: No comformance createria for global attributes in the spec
2075     }
2076     }
2077    
2078     unless ($has_src) {
2079     $self->{onerror}->(node => $todo->{node},
2080     type => 'attribute missing:src');
2081     }
2082     },
2083 wakaba 1.1 checker => $HTMLEmptyChecker,
2084     };
2085    
2086 wakaba 1.15 $Element->{$HTML_NS}->{object} = {
2087 wakaba 1.17 attrs_checker => sub {
2088     my ($self, $todo) = @_;
2089     $GetHTMLAttrsChecker->({
2090     data => $HTMLURIAttrChecker,
2091     type => $HTMLIMTAttrChecker,
2092     usemap => $HTMLUsemapAttrChecker,
2093     ## TODO: width
2094     ## TODO: height
2095     })->($self, $todo);
2096     unless ($todo->{node}->has_attribute_ns (undef, 'data')) {
2097     unless ($todo->{node}->has_attribute_ns (undef, 'type')) {
2098     $self->{onerror}->(node => $todo->{node},
2099     type => 'attribute missing:data|type');
2100     }
2101     }
2102     },
2103 wakaba 1.15 checker => $ElementDefault->{checker}, ## TODO
2104     };
2105    
2106 wakaba 1.1 $Element->{$HTML_NS}->{param} = {
2107 wakaba 1.12 attrs_checker => sub {
2108     my ($self, $todo) = @_;
2109     $GetHTMLAttrsChecker->({
2110     name => sub { },
2111     value => sub { },
2112     })->($self, $todo);
2113     unless ($todo->{node}->has_attribute_ns (undef, 'name')) {
2114     $self->{onerror}->(node => $todo->{node},
2115     type => 'attribute missing:name');
2116     }
2117     unless ($todo->{node}->has_attribute_ns (undef, 'value')) {
2118     $self->{onerror}->(node => $todo->{node},
2119     type => 'attribute missing:value');
2120     }
2121     },
2122 wakaba 1.1 checker => $HTMLEmptyChecker,
2123     };
2124    
2125 wakaba 1.2 $Element->{$HTML_NS}->{video} = {
2126 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2127     src => $HTMLURIAttrChecker,
2128     ## TODO: start, loopstart, loopend, end
2129     ## ISSUE: they MUST be "value time offset"s. Value?
2130     ## ISSUE: loopcount has no conformance creteria
2131     autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2132     controls => $GetHTMLBooleanAttrChecker->('controls'),
2133     }),
2134 wakaba 1.2 checker => sub {
2135 wakaba 1.4 my ($self, $todo) = @_;
2136 wakaba 1.2
2137 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2138     return $HTMLBlockOrInlineChecker->($self, $todo);
2139 wakaba 1.2 } else {
2140     return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
2141 wakaba 1.4 ->($self, $todo);
2142 wakaba 1.2 }
2143     },
2144     };
2145    
2146     $Element->{$HTML_NS}->{audio} = {
2147 wakaba 1.12 attrs_checker => $Element->{$HTML_NS}->{video}->{attrs_checker},
2148     checker => $Element->{$HTML_NS}->{video}->{checker},
2149 wakaba 1.2 };
2150 wakaba 1.1
2151     $Element->{$HTML_NS}->{source} = {
2152 wakaba 1.17 attrs_checker => sub {
2153     my ($self, $todo) = @_;
2154     $GetHTMLAttrsChecker->({
2155     src => $HTMLURIAttrChecker,
2156     type => $HTMLIMTAttrChecker,
2157     media => $HTMLMQAttrChecker,
2158     })->($self, $todo);
2159     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2160     $self->{onerror}->(node => $todo->{node},
2161     type => 'attribute missing:src');
2162     }
2163     },
2164 wakaba 1.1 checker => $HTMLEmptyChecker,
2165     };
2166    
2167     $Element->{$HTML_NS}->{canvas} = {
2168 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2169     height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2170     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2171     }),
2172 wakaba 1.1 checker => $HTMLInlineChecker,
2173     };
2174    
2175     $Element->{$HTML_NS}->{map} = {
2176 wakaba 1.17 attrs_checker => $GetHTMLAttrsChecker->({
2177     id => sub {
2178     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2179     my ($self, $attr) = @_;
2180     my $value = $attr->value;
2181     if (length $value > 0) {
2182     if ($self->{id}->{$value}) {
2183     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2184     } else {
2185     $self->{id}->{$value} = 1;
2186     }
2187     } else {
2188     ## NOTE: MUST contain at least one character
2189     $self->{onerror}->(node => $attr, type => 'attribute value is empty');
2190     }
2191 wakaba 1.27 if ($value =~ /[\x09-\x0D\x20]/) {
2192     $self->{onerror}->(node => $attr, type => 'space in ID');
2193     }
2194 wakaba 1.17 $self->{map}->{$value} ||= $attr;
2195     },
2196     }),
2197 wakaba 1.1 checker => $HTMLBlockChecker,
2198     };
2199    
2200     $Element->{$HTML_NS}->{area} = {
2201 wakaba 1.15 attrs_checker => sub {
2202     my ($self, $todo) = @_;
2203     my %attr;
2204     my $coords;
2205     for my $attr (@{$todo->{node}->attributes}) {
2206     my $attr_ns = $attr->namespace_uri;
2207     $attr_ns = '' unless defined $attr_ns;
2208     my $attr_ln = $attr->manakai_local_name;
2209     my $checker;
2210     if ($attr_ns eq '') {
2211     $checker = {
2212     alt => sub { },
2213     ## NOTE: |alt| value has no conformance creteria.
2214     shape => $GetHTMLEnumeratedAttrChecker->({
2215     circ => -1, circle => 1,
2216     default => 1,
2217     poly => 1, polygon => -1,
2218     rect => 1, rectangle => -1,
2219     }),
2220     coords => sub {
2221     my ($self, $attr) = @_;
2222     my $value = $attr->value;
2223     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
2224     $coords = [split /,/, $value];
2225     } else {
2226     $self->{onerror}->(node => $attr,
2227     type => 'syntax error');
2228     }
2229     },
2230 wakaba 1.17 target => $HTMLTargetAttrChecker,
2231 wakaba 1.15 href => $HTMLURIAttrChecker,
2232     ping => $HTMLSpaceURIsAttrChecker,
2233 wakaba 1.20 rel => sub { $HTMLLinkTypesAttrChecker->(1, @_) },
2234 wakaba 1.17 media => $HTMLMQAttrChecker,
2235     hreflang => $HTMLLanguageTagAttrChecker,
2236 wakaba 1.15 type => $HTMLIMTAttrChecker,
2237     }->{$attr_ln};
2238     if ($checker) {
2239     $attr{$attr_ln} = $attr;
2240     } else {
2241     $checker = $HTMLAttrChecker->{$attr_ln};
2242     }
2243     }
2244     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2245     || $AttrChecker->{$attr_ns}->{''};
2246     if ($checker) {
2247     $checker->($self, $attr) if ref $checker;
2248     } else {
2249     $self->{onerror}->(node => $attr, type => 'attribute not supported');
2250     ## ISSUE: No comformance createria for unknown attributes in the spec
2251     }
2252     }
2253    
2254     if (defined $attr{href}) {
2255     unless (defined $attr{alt}) {
2256     $self->{onerror}->(node => $todo->{node},
2257     type => 'attribute missing:alt');
2258     }
2259     } else {
2260     for (qw/target ping rel media hreflang type alt/) {
2261     if (defined $attr{$_}) {
2262     $self->{onerror}->(node => $attr{$_},
2263     type => 'attribute not allowed');
2264     }
2265     }
2266     }
2267    
2268     my $shape = 'rectangle';
2269     if (defined $attr{shape}) {
2270     $shape = {
2271     circ => 'circle', circle => 'circle',
2272     default => 'default',
2273     poly => 'polygon', polygon => 'polygon',
2274     rect => 'rectangle', rectangle => 'rectangle',
2275     }->{lc $attr{shape}->value} || 'rectangle';
2276     ## TODO: ASCII lowercase?
2277     }
2278    
2279     if ($shape eq 'circle') {
2280     if (defined $attr{coords}) {
2281     if (defined $coords) {
2282     if (@$coords == 3) {
2283     if ($coords->[2] < 0) {
2284     $self->{onerror}->(node => $attr{coords},
2285     type => 'out of range:2');
2286     }
2287     } else {
2288     $self->{onerror}->(node => $attr{coords},
2289     type => 'list item number:3:'.@$coords);
2290     }
2291     } else {
2292     ## NOTE: A syntax error has been reported.
2293     }
2294     } else {
2295     $self->{onerror}->(node => $todo->{node},
2296     type => 'attribute missing:coords');
2297     }
2298     } elsif ($shape eq 'default') {
2299     if (defined $attr{coords}) {
2300     $self->{onerror}->(node => $attr{coords},
2301     type => 'attribute not allowed');
2302     }
2303     } elsif ($shape eq 'polygon') {
2304     if (defined $attr{coords}) {
2305     if (defined $coords) {
2306     if (@$coords >= 6) {
2307     unless (@$coords % 2 == 0) {
2308     $self->{onerror}->(node => $attr{coords},
2309     type => 'list item number:even:'.@$coords);
2310     }
2311     } else {
2312     $self->{onerror}->(node => $attr{coords},
2313     type => 'list item number:>=6:'.@$coords);
2314     }
2315     } else {
2316     ## NOTE: A syntax error has been reported.
2317     }
2318     } else {
2319     $self->{onerror}->(node => $todo->{node},
2320     type => 'attribute missing:coords');
2321     }
2322     } elsif ($shape eq 'rectangle') {
2323     if (defined $attr{coords}) {
2324     if (defined $coords) {
2325     if (@$coords == 4) {
2326     unless ($coords->[0] < $coords->[2]) {
2327     $self->{onerror}->(node => $attr{coords},
2328     type => 'out of range:0');
2329     }
2330     unless ($coords->[1] < $coords->[3]) {
2331     $self->{onerror}->(node => $attr{coords},
2332     type => 'out of range:1');
2333     }
2334     } else {
2335     $self->{onerror}->(node => $attr{coords},
2336     type => 'list item number:4:'.@$coords);
2337     }
2338     } else {
2339     ## NOTE: A syntax error has been reported.
2340     }
2341     } else {
2342     $self->{onerror}->(node => $todo->{node},
2343     type => 'attribute missing:coords');
2344     }
2345     }
2346     },
2347 wakaba 1.1 checker => $HTMLEmptyChecker,
2348     };
2349     ## TODO: only in map
2350    
2351     $Element->{$HTML_NS}->{table} = {
2352 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2353 wakaba 1.1 checker => sub {
2354 wakaba 1.4 my ($self, $todo) = @_;
2355     my $el = $todo->{node};
2356     my $new_todos = [];
2357 wakaba 1.1 my @nodes = (@{$el->child_nodes});
2358    
2359     my $phase = 'before caption';
2360     my $has_tfoot;
2361     while (@nodes) {
2362     my $node = shift @nodes;
2363 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2364    
2365 wakaba 1.1 my $nt = $node->node_type;
2366     if ($nt == 1) {
2367 wakaba 1.8 my $node_ns = $node->namespace_uri;
2368     $node_ns = '' unless defined $node_ns;
2369     my $node_ln = $node->manakai_local_name;
2370 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
2371 wakaba 1.1 if ($phase eq 'in tbodys') {
2372 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2373 wakaba 1.1 #$phase = 'in tbodys';
2374     } elsif (not $has_tfoot and
2375 wakaba 1.8 $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2376 wakaba 1.1 $phase = 'after tfoot';
2377     $has_tfoot = 1;
2378     } else {
2379 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2380 wakaba 1.1 }
2381     } elsif ($phase eq 'in trs') {
2382 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2383 wakaba 1.1 #$phase = 'in trs';
2384     } elsif (not $has_tfoot and
2385 wakaba 1.8 $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2386 wakaba 1.1 $phase = 'after tfoot';
2387     $has_tfoot = 1;
2388     } else {
2389 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2390 wakaba 1.1 }
2391     } elsif ($phase eq 'after thead') {
2392 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2393 wakaba 1.1 $phase = 'in tbodys';
2394 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2395 wakaba 1.1 $phase = 'in trs';
2396 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2397 wakaba 1.1 $phase = 'in tbodys';
2398     $has_tfoot = 1;
2399     } else {
2400 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2401 wakaba 1.1 }
2402     } elsif ($phase eq 'in colgroup') {
2403 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2404 wakaba 1.1 $phase = 'in colgroup';
2405 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2406 wakaba 1.1 $phase = 'after thead';
2407 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2408 wakaba 1.1 $phase = 'in tbodys';
2409 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2410 wakaba 1.1 $phase = 'in trs';
2411 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2412 wakaba 1.1 $phase = 'in tbodys';
2413     $has_tfoot = 1;
2414     } else {
2415 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2416 wakaba 1.1 }
2417     } elsif ($phase eq 'before caption') {
2418 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'caption') {
2419 wakaba 1.1 $phase = 'in colgroup';
2420 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2421 wakaba 1.1 $phase = 'in colgroup';
2422 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2423 wakaba 1.1 $phase = 'after thead';
2424 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2425 wakaba 1.1 $phase = 'in tbodys';
2426 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2427 wakaba 1.1 $phase = 'in trs';
2428 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2429 wakaba 1.1 $phase = 'in tbodys';
2430     $has_tfoot = 1;
2431     } else {
2432 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2433 wakaba 1.1 }
2434     } else { # after tfoot
2435 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2436 wakaba 1.1 }
2437 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2438 wakaba 1.2 unshift @nodes, @$sib;
2439 wakaba 1.4 push @$new_todos, @$ch;
2440 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
2441     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2442 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
2443 wakaba 1.1 }
2444     } elsif ($nt == 5) {
2445     unshift @nodes, @{$node->child_nodes};
2446     }
2447     }
2448 wakaba 1.21
2449     ## Table model errors
2450     require Whatpm::HTMLTable;
2451     Whatpm::HTMLTable->form_table ($todo->{node}, sub {
2452     my %opt = @_;
2453     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
2454     });
2455    
2456 wakaba 1.4 return ($new_todos);
2457 wakaba 1.1 },
2458     };
2459    
2460     $Element->{$HTML_NS}->{caption} = {
2461 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2462 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
2463     };
2464    
2465     $Element->{$HTML_NS}->{colgroup} = {
2466 wakaba 1.17 attrs_checker => $GetHTMLAttrsChecker->({
2467     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2468     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
2469     ## TODO: "attribute not supported" if |col|.
2470     ## ISSUE: MUST NOT if any |col|?
2471     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
2472     }),
2473 wakaba 1.1 checker => sub {
2474 wakaba 1.4 my ($self, $todo) = @_;
2475     my $el = $todo->{node};
2476     my $new_todos = [];
2477 wakaba 1.1 my @nodes = (@{$el->child_nodes});
2478    
2479     while (@nodes) {
2480     my $node = shift @nodes;
2481 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2482    
2483 wakaba 1.1 my $nt = $node->node_type;
2484     if ($nt == 1) {
2485 wakaba 1.8 my $node_ns = $node->namespace_uri;
2486     $node_ns = '' unless defined $node_ns;
2487     my $node_ln = $node->manakai_local_name;
2488 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
2489 wakaba 1.8 unless ($node_ns eq $HTML_NS and $node_ln eq 'col') {
2490 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2491 wakaba 1.1 }
2492 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2493 wakaba 1.2 unshift @nodes, @$sib;
2494 wakaba 1.4 push @$new_todos, @$ch;
2495 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
2496     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2497 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
2498 wakaba 1.1 }
2499     } elsif ($nt == 5) {
2500     unshift @nodes, @{$node->child_nodes};
2501     }
2502     }
2503 wakaba 1.4 return ($new_todos);
2504 wakaba 1.1 },
2505     };
2506    
2507     $Element->{$HTML_NS}->{col} = {
2508 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2509     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2510     }),
2511 wakaba 1.1 checker => $HTMLEmptyChecker,
2512     };
2513    
2514     $Element->{$HTML_NS}->{tbody} = {
2515 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2516 wakaba 1.1 checker => sub {
2517 wakaba 1.4 my ($self, $todo) = @_;
2518     my $el = $todo->{node};
2519     my $new_todos = [];
2520 wakaba 1.1 my @nodes = (@{$el->child_nodes});
2521    
2522     my $has_tr;
2523     while (@nodes) {
2524     my $node = shift @nodes;
2525 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2526    
2527 wakaba 1.1 my $nt = $node->node_type;
2528     if ($nt == 1) {
2529 wakaba 1.8 my $node_ns = $node->namespace_uri;
2530     $node_ns = '' unless defined $node_ns;
2531     my $node_ln = $node->manakai_local_name;
2532 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
2533 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2534 wakaba 1.1 $has_tr = 1;
2535     } else {
2536 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2537 wakaba 1.1 }
2538 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2539 wakaba 1.2 unshift @nodes, @$sib;
2540 wakaba 1.4 push @$new_todos, @$ch;
2541 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
2542     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2543 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
2544 wakaba 1.1 }
2545     } elsif ($nt == 5) {
2546     unshift @nodes, @{$node->child_nodes};
2547     }
2548     }
2549     unless ($has_tr) {
2550 wakaba 1.3 $self->{onerror}->(node => $el, type => 'child element missing:tr');
2551 wakaba 1.1 }
2552 wakaba 1.4 return ($new_todos);
2553 wakaba 1.1 },
2554     };
2555    
2556     $Element->{$HTML_NS}->{thead} = {
2557 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2558 wakaba 1.23 checker => $Element->{$HTML_NS}->{tbody}->{checker},
2559 wakaba 1.1 };
2560    
2561     $Element->{$HTML_NS}->{tfoot} = {
2562 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2563 wakaba 1.23 checker => $Element->{$HTML_NS}->{tbody}->{checker},
2564 wakaba 1.1 };
2565    
2566     $Element->{$HTML_NS}->{tr} = {
2567 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2568 wakaba 1.1 checker => sub {
2569 wakaba 1.4 my ($self, $todo) = @_;
2570     my $el = $todo->{node};
2571     my $new_todos = [];
2572 wakaba 1.1 my @nodes = (@{$el->child_nodes});
2573    
2574     my $has_td;
2575     while (@nodes) {
2576     my $node = shift @nodes;
2577 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2578    
2579 wakaba 1.1 my $nt = $node->node_type;
2580     if ($nt == 1) {
2581 wakaba 1.8 my $node_ns = $node->namespace_uri;
2582     $node_ns = '' unless defined $node_ns;
2583     my $node_ln = $node->manakai_local_name;
2584 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
2585 wakaba 1.8 if ($node_ns eq $HTML_NS and ($node_ln eq 'td' or $node_ln eq 'th')) {
2586 wakaba 1.1 $has_td = 1;
2587     } else {
2588 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2589 wakaba 1.1 }
2590 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2591 wakaba 1.2 unshift @nodes, @$sib;
2592 wakaba 1.4 push @$new_todos, @$ch;
2593 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
2594     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2595 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
2596 wakaba 1.1 }
2597     } elsif ($nt == 5) {
2598     unshift @nodes, @{$node->child_nodes};
2599     }
2600     }
2601     unless ($has_td) {
2602 wakaba 1.3 $self->{onerror}->(node => $el, type => 'child element missing:td|th');
2603 wakaba 1.1 }
2604 wakaba 1.4 return ($new_todos);
2605 wakaba 1.1 },
2606     };
2607    
2608     $Element->{$HTML_NS}->{td} = {
2609 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2610     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2611     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2612     }),
2613 wakaba 1.1 checker => $HTMLBlockOrInlineChecker,
2614     };
2615    
2616     $Element->{$HTML_NS}->{th} = {
2617 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2618     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2619     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2620     scope => $GetHTMLEnumeratedAttrChecker
2621     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
2622     }),
2623 wakaba 1.1 checker => $HTMLBlockOrInlineChecker,
2624     };
2625    
2626     ## TODO: forms
2627    
2628 wakaba 1.2 $Element->{$HTML_NS}->{script} = {
2629 wakaba 1.25 attrs_checker => sub {
2630     my ($self, $todo) = @_;
2631     $GetHTMLAttrsChecker->({
2632     src => $HTMLURIAttrChecker,
2633     defer => $GetHTMLBooleanAttrChecker->('defer'),
2634     async => $GetHTMLBooleanAttrChecker->('async'),
2635     type => $HTMLIMTAttrChecker,
2636     })->($self, $todo);
2637     if ($todo->{node}->has_attribute_ns (undef, 'defer')) {
2638     my $async_attr = $todo->{node}->get_attribute_node_ns (undef, 'async');
2639     if ($async_attr) {
2640     $self->{onerror}->(node => $async_attr,
2641     type => 'attribute not allowed'); # MUST NOT
2642     }
2643     }
2644     },
2645 wakaba 1.2 checker => sub {
2646 wakaba 1.4 my ($self, $todo) = @_;
2647 wakaba 1.2
2648 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2649     return $HTMLEmptyChecker->($self, $todo);
2650 wakaba 1.2 } else {
2651     ## NOTE: No content model conformance in HTML5 spec.
2652 wakaba 1.4 return $AnyChecker->($self, $todo);
2653 wakaba 1.2 }
2654     },
2655     };
2656    
2657     ## NOTE: When script is disabled.
2658     $Element->{$HTML_NS}->{noscript} = {
2659 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2660 wakaba 1.2 checker => sub {
2661 wakaba 1.4 my ($self, $todo) = @_;
2662 wakaba 1.1
2663 wakaba 1.2 my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}});
2664 wakaba 1.4 my ($sib, $ch) = $HTMLBlockOrInlineChecker->($self, $todo);
2665 wakaba 1.2 push @$sib, $end;
2666     return ($sib, $ch);
2667     },
2668     };
2669 wakaba 1.29 ## TODO: noscript in head
2670 wakaba 1.1
2671     $Element->{$HTML_NS}->{'event-source'} = {
2672 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2673     src => $HTMLURIAttrChecker,
2674     }),
2675 wakaba 1.1 checker => $HTMLEmptyChecker,
2676     };
2677    
2678     $Element->{$HTML_NS}->{details} = {
2679 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2680     open => $GetHTMLBooleanAttrChecker->('open'),
2681     }),
2682 wakaba 1.6 checker => sub {
2683     my ($self, $todo) = @_;
2684    
2685     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2686     my ($sib, $ch)
2687     = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
2688     ->($self, $todo);
2689     push @$sib, $end;
2690     return ($sib, $ch);
2691     },
2692 wakaba 1.1 };
2693    
2694     $Element->{$HTML_NS}->{datagrid} = {
2695 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2696     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2697     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
2698     }),
2699 wakaba 1.6 checker => sub {
2700     my ($self, $todo) = @_;
2701 wakaba 1.32 my $el = $todo->{node};
2702     my $new_todos = [];
2703     my @nodes = (@{$el->child_nodes});
2704 wakaba 1.6
2705     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2706 wakaba 1.32
2707     ## Block-table Block* | table | select | datalist | Empty
2708     my $mode = 'any';
2709     while (@nodes) {
2710     my $node = shift @nodes;
2711     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2712    
2713     my $nt = $node->node_type;
2714     if ($nt == 1) {
2715     my $node_ns = $node->namespace_uri;
2716     $node_ns = '' unless defined $node_ns;
2717     my $node_ln = $node->manakai_local_name;
2718     my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
2719     if ($mode eq 'block') {
2720     $not_allowed = 1
2721     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
2722     } elsif ($mode eq 'any') {
2723     if ($node_ns eq $HTML_NS and
2724     {table => 1, select => 1, datalist => 1}->{$node_ln}) {
2725     $mode = 'none';
2726     } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
2727     $mode = 'block';
2728     } else {
2729     $not_allowed = 1;
2730     }
2731     } else {
2732     $not_allowed = 1;
2733     }
2734     $self->{onerror}->(node => $node, type => 'element not allowed')
2735     if $not_allowed;
2736     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2737     unshift @nodes, @$sib;
2738     push @$new_todos, @$ch;
2739     } elsif ($nt == 3 or $nt == 4) {
2740     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2741     $self->{onerror}->(node => $node, type => 'character not allowed');
2742     }
2743     } elsif ($nt == 5) {
2744     unshift @nodes, @{$node->child_nodes};
2745     }
2746     }
2747    
2748     push @$new_todos, $end;
2749     return ($new_todos);
2750 wakaba 1.6 },
2751 wakaba 1.1 };
2752    
2753     $Element->{$HTML_NS}->{command} = {
2754 wakaba 1.32 attrs_checker => $GetHTMLAttrsChecker->({
2755     checked => $GetHTMLBooleanAttrChecker->('checked'),
2756     default => $GetHTMLBooleanAttrChecker->('default'),
2757     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2758     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
2759     icon => $HTMLURIAttrChecker,
2760     label => sub { }, ## NOTE: No conformance creteria
2761     radiogroup => sub { }, ## NOTE: No conformance creteria
2762     ## NOTE: |title| has special semantics, but no syntactical difference
2763     type => sub {
2764     my ($self, $attr) = @_;
2765     my $value = $attr->value;
2766     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
2767     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
2768     }
2769     },
2770     }),
2771 wakaba 1.1 checker => $HTMLEmptyChecker,
2772     };
2773    
2774     $Element->{$HTML_NS}->{menu} = {
2775 wakaba 1.32 attrs_checker => $GetHTMLAttrsChecker->({
2776     autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
2777     id => sub {
2778     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
2779     my ($self, $attr) = @_;
2780     my $value = $attr->value;
2781     if (length $value > 0) {
2782     if ($self->{id}->{$value}) {
2783     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2784     } else {
2785     $self->{id}->{$value} = 1;
2786     }
2787     } else {
2788     ## NOTE: MUST contain at least one character
2789     $self->{onerror}->(node => $attr, type => 'attribute value is empty');
2790     }
2791     if ($value =~ /[\x09-\x0D\x20]/) {
2792     $self->{onerror}->(node => $attr, type => 'space in ID');
2793     }
2794     $self->{menu}->{$value} ||= $attr;
2795     ## ISSUE: <menu id=""><p contextmenu=""> match?
2796     },
2797     label => sub { }, ## NOTE: No conformance creteria
2798     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
2799     }),
2800 wakaba 1.1 checker => sub {
2801 wakaba 1.4 my ($self, $todo) = @_;
2802     my $el = $todo->{node};
2803     my $new_todos = [];
2804 wakaba 1.1 my @nodes = (@{$el->child_nodes});
2805    
2806     my $content = 'li or inline';
2807     while (@nodes) {
2808     my $node = shift @nodes;
2809 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2810    
2811 wakaba 1.1 my $nt = $node->node_type;
2812     if ($nt == 1) {
2813 wakaba 1.2 my $node_ns = $node->namespace_uri;
2814     $node_ns = '' unless defined $node_ns;
2815     my $node_ln = $node->manakai_local_name;
2816 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
2817 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'li') {
2818 wakaba 1.1 if ($content eq 'inline') {
2819 wakaba 1.6 $not_allowed = 1;
2820 wakaba 1.1 } elsif ($content eq 'li or inline') {
2821     $content = 'li';
2822     }
2823     } else {
2824 wakaba 1.7 if ($HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
2825     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln}) {
2826     $content = 'inline';
2827     } else {
2828 wakaba 1.6 $not_allowed = 1;
2829 wakaba 1.7 }
2830 wakaba 1.1 }
2831 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
2832     if $not_allowed;
2833 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2834 wakaba 1.2 unshift @nodes, @$sib;
2835 wakaba 1.4 push @$new_todos, @$ch;
2836 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
2837     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2838     if ($content eq 'li') {
2839 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
2840 wakaba 1.1 } elsif ($content eq 'li or inline') {
2841     $content = 'inline';
2842     }
2843     }
2844     } elsif ($nt == 5) {
2845     unshift @nodes, @{$node->child_nodes};
2846     }
2847     }
2848 wakaba 1.4
2849     for (@$new_todos) {
2850     $_->{inline} = 1;
2851     }
2852     return ($new_todos);
2853 wakaba 1.1 },
2854     };
2855    
2856 wakaba 1.6 $Element->{$HTML_NS}->{legend} = {
2857 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2858 wakaba 1.6 checker => sub {
2859     my ($self, $todo) = @_;
2860    
2861     my $parent = $todo->{node}->manakai_parent_element;
2862     if (defined $parent) {
2863     my $nsuri = $parent->namespace_uri;
2864     $nsuri = '' unless defined $nsuri;
2865     my $ln = $parent->manakai_local_name;
2866     if ($nsuri eq $HTML_NS and $ln eq 'figure') {
2867     return $HTMLInlineChecker->($self, $todo);
2868     } else {
2869     return $HTMLSignificantStrictlyInlineChecker->($self, $todo);
2870     }
2871     } else {
2872     return $HTMLInlineChecker->($self, $todo);
2873     }
2874    
2875     ## ISSUE: Content model is defined only for fieldset/legend,
2876     ## details/legend, and figure/legend.
2877     },
2878     };
2879 wakaba 1.1
2880     $Element->{$HTML_NS}->{div} = {
2881 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2882 wakaba 1.2 checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
2883 wakaba 1.1 };
2884    
2885     $Element->{$HTML_NS}->{font} = {
2886 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
2887 wakaba 1.1 checker => $HTMLTransparentChecker,
2888     };
2889    
2890 wakaba 1.24 sub check_document ($$$) {
2891     my ($self, $doc, $onerror) = @_;
2892     $self = bless {}, $self unless ref $self;
2893     $self->{onerror} = $onerror;
2894    
2895     my $docel = $doc->document_element;
2896 wakaba 1.26 unless (defined $docel) {
2897     ## ISSUE: Should we check content of Document node?
2898     $onerror->(node => $doc, type => 'no document element');
2899     ## ISSUE: Is this non-conforming (to what spec)? Or just a warning?
2900     return;
2901     }
2902    
2903     ## ISSUE: Unexpanded entity references and HTML5 conformance
2904    
2905 wakaba 1.24 my $docel_nsuri = $docel->namespace_uri;
2906     $docel_nsuri = '' unless defined $docel_nsuri;
2907     my $docel_def = $Element->{$docel_nsuri}->{$docel->manakai_local_name} ||
2908     $Element->{$docel_nsuri}->{''} ||
2909     $ElementDefault;
2910     if ($docel_def->{is_root}) {
2911     #
2912     } else {
2913     $onerror->(node => $docel, type => 'element not allowed');
2914     }
2915    
2916     ## TODO: Check for other items other than document element
2917     ## (second (errorous) element, text nodes, PI nodes, doctype nodes)
2918    
2919     $self->check_element ($docel, $onerror);
2920     } # check_document
2921 wakaba 1.2
2922 wakaba 1.1 sub check_element ($$$) {
2923     my ($self, $el, $onerror) = @_;
2924 wakaba 1.24 $self = bless {}, $self unless ref $self;
2925     $self->{onerror} = $onerror;
2926 wakaba 1.1
2927 wakaba 1.2 $self->{minuses} = {};
2928 wakaba 1.10 $self->{id} = {};
2929 wakaba 1.30 $self->{term} = {};
2930 wakaba 1.17 $self->{usemap} = [];
2931 wakaba 1.32 $self->{contextmenu} = [];
2932 wakaba 1.17 $self->{map} = {};
2933 wakaba 1.32 $self->{menu} = {};
2934 wakaba 1.20 $self->{has_link_type} = {};
2935 wakaba 1.2
2936 wakaba 1.4 my @todo = ({type => 'element', node => $el});
2937     while (@todo) {
2938     my $todo = shift @todo;
2939     if ($todo->{type} eq 'element') {
2940 wakaba 1.13 my $prefix = $todo->{node}->prefix;
2941     if (defined $prefix and $prefix eq 'xmlns') {
2942     $self->{onerror}
2943     ->(node => $todo->{node},
2944     type => 'NC:Reserved Prefixes and Namespace Names:<xmlns:>');
2945     }
2946 wakaba 1.4 my $nsuri = $todo->{node}->namespace_uri;
2947     $nsuri = '' unless defined $nsuri;
2948     my $ln = $todo->{node}->manakai_local_name;
2949     my $eldef = $Element->{$nsuri}->{$ln} ||
2950     $Element->{$nsuri}->{''} ||
2951     $ElementDefault;
2952 wakaba 1.9 $eldef->{attrs_checker}->($self, $todo);
2953 wakaba 1.4 my ($new_todos) = $eldef->{checker}->($self, $todo);
2954 wakaba 1.14 unshift @todo, @$new_todos;
2955 wakaba 1.9 } elsif ($todo->{type} eq 'element-attributes') {
2956 wakaba 1.13 my $prefix = $todo->{node}->prefix;
2957     if (defined $prefix and $prefix eq 'xmlns') {
2958     $self->{onerror}
2959     ->(node => $todo->{node},
2960     type => 'NC:Reserved Prefixes and Namespace Names:<xmlns:>');
2961     }
2962 wakaba 1.9 my $nsuri = $todo->{node}->namespace_uri;
2963     $nsuri = '' unless defined $nsuri;
2964     my $ln = $todo->{node}->manakai_local_name;
2965     my $eldef = $Element->{$nsuri}->{$ln} ||
2966     $Element->{$nsuri}->{''} ||
2967     $ElementDefault;
2968     $eldef->{attrs_checker}->($self, $todo);
2969 wakaba 1.4 } elsif ($todo->{type} eq 'plus') {
2970     $self->_remove_minuses ($todo);
2971 wakaba 1.30 } elsif ($todo->{type} eq 'code') {
2972     $todo->{code}->();
2973     } else {
2974     die "$0: Internal error: Unsupported checking action type |$todo->{type}|";
2975 wakaba 1.4 }
2976 wakaba 1.1 }
2977 wakaba 1.17
2978     for (@{$self->{usemap}}) {
2979     unless ($self->{map}->{$_->[0]}) {
2980     $self->{onerror}->(node => $_->[1], type => 'no referenced map');
2981     }
2982     }
2983    
2984 wakaba 1.32 for (@{$self->{contextmenu}}) {
2985     unless ($self->{menu}->{$_->[0]}) {
2986     $self->{onerror}->(node => $_->[1], type => 'no referenced menu');
2987     }
2988     }
2989    
2990 wakaba 1.17 delete $self->{minuses};
2991     delete $self->{onerror};
2992     delete $self->{id};
2993     delete $self->{usemap};
2994     delete $self->{map};
2995 wakaba 1.1 } # check_element
2996    
2997 wakaba 1.2 sub _add_minuses ($@) {
2998     my $self = shift;
2999     my $r = {};
3000     for my $list (@_) {
3001     for my $ns (keys %$list) {
3002     for my $ln (keys %{$list->{$ns}}) {
3003     unless ($self->{minuses}->{$ns}->{$ln}) {
3004     $self->{minuses}->{$ns}->{$ln} = 1;
3005     $r->{$ns}->{$ln} = 1;
3006     }
3007     }
3008     }
3009     }
3010 wakaba 1.4 return {type => 'plus', list => $r};
3011 wakaba 1.2 } # _add_minuses
3012    
3013     sub _remove_minuses ($$) {
3014 wakaba 1.4 my ($self, $todo) = @_;
3015     for my $ns (keys %{$todo->{list}}) {
3016     for my $ln (keys %{$todo->{list}->{$ns}}) {
3017     delete $self->{minuses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
3018 wakaba 1.2 }
3019     }
3020     1;
3021     } # _remove_minuses
3022    
3023 wakaba 1.30 sub _check_get_children ($$$) {
3024     my ($self, $node, $parent_todo) = @_;
3025 wakaba 1.4 my $new_todos = [];
3026 wakaba 1.2 my $sib = [];
3027     TP: {
3028     my $node_ns = $node->namespace_uri;
3029     $node_ns = '' unless defined $node_ns;
3030     my $node_ln = $node->manakai_local_name;
3031     if ($node_ns eq $HTML_NS) {
3032     if ($node_ln eq 'noscript') {
3033     my $end = $self->_add_minuses ({$HTML_NS, {noscript => 1}});
3034     push @$sib, $end;
3035     }
3036     }
3037 wakaba 1.31 ## TODO: |noscript| is not a transparent element in |head|.
3038 wakaba 1.7 if ($HTMLTransparentElements->{$node_ns}->{$node_ln}) {
3039     unshift @$sib, @{$node->child_nodes};
3040 wakaba 1.9 push @$new_todos, {type => 'element-attributes', node => $node};
3041 wakaba 1.7 last TP;
3042 wakaba 1.2 }
3043 wakaba 1.8 if ($node_ns eq $HTML_NS and ($node_ln eq 'video' or $node_ln eq 'audio')) {
3044 wakaba 1.2 if ($node->has_attribute_ns (undef, 'src')) {
3045     unshift @$sib, @{$node->child_nodes};
3046 wakaba 1.9 push @$new_todos, {type => 'element-attributes', node => $node};
3047 wakaba 1.2 last TP;
3048     } else {
3049     my @cn = @{$node->child_nodes};
3050     CN: while (@cn) {
3051     my $cn = shift @cn;
3052     my $cnt = $cn->node_type;
3053     if ($cnt == 1) {
3054 wakaba 1.8 my $cn_nsuri = $cn->namespace_uri;
3055     $cn_nsuri = '' unless defined $cn_nsuri;
3056     if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'source') {
3057 wakaba 1.2 #
3058     } else {
3059     last CN;
3060     }
3061     } elsif ($cnt == 3 or $cnt == 4) {
3062     if ($cn->data =~ /[^\x09-\x0D\x20]/) {
3063     last CN;
3064     }
3065     }
3066     } # CN
3067     unshift @$sib, @cn;
3068     }
3069     }
3070 wakaba 1.4 push @$new_todos, {type => 'element', node => $node};
3071 wakaba 1.2 } # TP
3072 wakaba 1.30
3073     for my $new_todo (@$new_todos) {
3074     $new_todo->{flag} = {%{$parent_todo->{flag} or {}}};
3075     }
3076    
3077 wakaba 1.4 return ($sib, $new_todos);
3078 wakaba 1.2 } # _check_get_children
3079    
3080 wakaba 1.1 1;
3081 wakaba 1.32 # $Date: 2007/06/25 00:14:39 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24