/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.14 - (hide annotations) (download)
Sat May 19 15:31:07 2007 UTC (17 years, 5 months ago) by wakaba
Branch: MAIN
Changes since 1.13: +3 -2 lines
++ whatpm/t/ChangeLog	19 May 2007 15:31:00 -0000
2007-05-20  Wakaba  <wakaba@suika.fam.cx>

	* content-model-2.dat: Tests for <a>/interactive
	elements non-nesting are added.

++ whatpm/Whatpm/ChangeLog	19 May 2007 15:30:33 -0000
2007-05-20  Wakaba  <wakaba@suika.fam.cx>

	* ContentChecker.pm: Descendant checking was incorrect.

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3    
4 wakaba 1.13 ## ISSUE: How XML and XML Namespaces conformance can (or cannot)
5     ## be applied to an in-memory representation (i.e. DOM)?
6    
7 wakaba 1.9 my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
8     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
9    
10     my $AttrChecker = {
11     $XML_NS => {
12 wakaba 1.13 space => sub {
13     my ($self, $attr) = @_;
14     my $value = $attr->value;
15     if ($value eq 'default' or $value eq 'preserve') {
16     #
17     } else {
18     ## NOTE: An XML "error"
19     $self->{onerror}->(node => $attr,
20     type => 'XML error:invalid xml:space value');
21     }
22     },
23     lang => sub {
24     ## NOTE: "The values of the attribute are language identifiers
25     ## as defined by [IETF RFC 3066], Tags for the Identification
26     ## of Languages, or its successor; in addition, the empty string
27     ## may be specified." ("may" in lower case)
28     ## TODO: xml:lang MUST NOT in HTML document
29     },
30     base => sub {
31     my ($self, $attr) = @_;
32     my $value = $attr->value;
33     if ($value =~ /[^\x{0000}-\x{10FFFF}]/) { ## ISSUE: Should we disallow noncharacters?
34     $self->{onerror}->(node => $attr,
35     type => 'syntax error');
36     }
37     ## NOTE: Conformance to URI standard is not checked.
38     },
39     id => sub {
40     my ($self, $attr) = @_;
41     my $value = $attr->value;
42     $value =~ s/[\x09\x0A\x0D\x20]+/ /g;
43     $value =~ s/^\x20//;
44     $value =~ s/\x20$//;
45     ## TODO: NCName in XML 1.0 or 1.1
46     ## TODO: declared type is ID?
47     if ($self->{id}->{$value}) {
48     $self->{onerror}->(node => $attr, type => 'xml:id error:duplicate ID');
49     } else {
50     $self->{id}->{$value} = 1;
51     }
52     },
53 wakaba 1.9 },
54     $XMLNS_NS => {
55 wakaba 1.13 '' => sub {
56     my ($self, $attr) = @_;
57     my $ln = $attr->manakai_local_name;
58     my $value = $attr->value;
59     if ($value eq $XML_NS and $ln ne 'xml') {
60     $self->{onerror}
61     ->(node => $attr,
62     type => 'NC:Reserved Prefixes and Namespace Names:=xml');
63     } elsif ($value eq $XMLNS_NS) {
64     $self->{onerror}
65     ->(node => $attr,
66     type => 'NC:Reserved Prefixes and Namespace Names:=xmlns');
67     }
68     if ($ln eq 'xml' and $value ne $XML_NS) {
69     $self->{onerror}
70     ->(node => $attr,
71     type => 'NC:Reserved Prefixes and Namespace Names:xmlns:xml=');
72     } elsif ($ln eq 'xmlns') {
73     $self->{onerror}
74     ->(node => $attr,
75     type => 'NC:Reserved Prefixes and Namespace Names:xmlns:xmlns=');
76     }
77     ## TODO: If XML 1.0 and empty
78     },
79     xmlns => sub {
80     my ($self, $attr) = @_;
81     ## TODO: In XML 1.0, URI reference [RFC 3986] or an empty string
82     ## TODO: In XML 1.1, IRI reference [RFC 3987] or an empty string
83     my $value = $attr->value;
84     if ($value eq $XML_NS) {
85     $self->{onerror}
86     ->(node => $attr,
87     type => 'NC:Reserved Prefixes and Namespace Names:=xml');
88     } elsif ($value eq $XMLNS_NS) {
89     $self->{onerror}
90     ->(node => $attr,
91     type => 'NC:Reserved Prefixes and Namespace Names:=xmlns');
92     }
93     },
94 wakaba 1.9 },
95     };
96    
97 wakaba 1.14 ## ISSUE: Should we really allow these attributes?
98 wakaba 1.13 $AttrChecker->{''}->{'xml:space'} = $AttrChecker->{$XML_NS}->{space};
99     $AttrChecker->{''}->{'xml:lang'} = $AttrChecker->{$XML_NS}->{lang};
100     $AttrChecker->{''}->{'xml:base'} = $AttrChecker->{$XML_NS}->{base};
101     $AttrChecker->{''}->{'xml:id'} = $AttrChecker->{$XML_NS}->{id};
102    
103 wakaba 1.3 ## ANY
104     my $AnyChecker = sub {
105 wakaba 1.4 my ($self, $todo) = @_;
106     my $el = $todo->{node};
107     my $new_todos = [];
108 wakaba 1.3 my @nodes = (@{$el->child_nodes});
109     while (@nodes) {
110     my $node = shift @nodes;
111     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
112    
113     my $nt = $node->node_type;
114     if ($nt == 1) {
115     my $node_ns = $node->namespace_uri;
116     $node_ns = '' unless defined $node_ns;
117     my $node_ln = $node->manakai_local_name;
118     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
119     $self->{onerror}->(node => $node, type => 'element not allowed');
120     }
121 wakaba 1.4 push @$new_todos, {type => 'element', node => $node};
122 wakaba 1.3 } elsif ($nt == 5) {
123     unshift @nodes, @{$node->child_nodes};
124     }
125     }
126 wakaba 1.4 return ($new_todos);
127 wakaba 1.3 }; # $AnyChecker
128    
129 wakaba 1.1 my $ElementDefault = {
130     checker => sub {
131 wakaba 1.4 my ($self, $todo) = @_;
132     $self->{onerror}->(node => $todo->{node}, type => 'element not supported');
133     return $AnyChecker->($self, $todo);
134 wakaba 1.1 },
135 wakaba 1.9 attrs_checker => sub {
136     my ($self, $todo) = @_;
137     for my $attr (@{$todo->{node}->attributes}) {
138     my $attr_ns = $attr->namespace_uri;
139     $attr_ns = '' unless defined $attr_ns;
140     my $attr_ln = $attr->manakai_local_name;
141     my $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
142     || $AttrChecker->{$attr_ns}->{''};
143     if ($checker) {
144     $checker->($self, $attr);
145     }
146     ## Don't check otherwise, since "element type not supported" warning
147     ## will be reported by the element checker.
148     }
149     },
150 wakaba 1.1 };
151    
152     my $Element = {};
153    
154     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
155    
156 wakaba 1.7 my $HTMLMetadataElements = {
157     $HTML_NS => {
158     qw/link 1 meta 1 style 1 script 1 event-source 1 command 1 base 1 title 1/,
159     },
160     };
161 wakaba 1.1
162 wakaba 1.2 my $HTMLSectioningElements = {
163     $HTML_NS => {qw/body 1 section 1 nav 1 article 1 blockquote 1 aside 1/},
164     };
165 wakaba 1.1
166 wakaba 1.7 my $HTMLBlockLevelElements = {
167     $HTML_NS => {
168     qw/
169     section 1 nav 1 article 1 blockquote 1 aside 1
170     h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1
171     address 1 p 1 hr 1 dialog 1 pre 1 ol 1 ul 1 dl 1
172     ins 1 del 1 figure 1 map 1 table 1 script 1 noscript 1
173     event-source 1 details 1 datagrid 1 menu 1 div 1 font 1
174     /,
175     },
176     };
177    
178     my $HTMLStrictlyInlineLevelElements = {
179     $HTML_NS => {
180     qw/
181     br 1 a 1 q 1 cite 1 em 1 strong 1 small 1 m 1 dfn 1 abbr 1
182     time 1 meter 1 progress 1 code 1 var 1 samp 1 kbd 1
183     sub 1 sup 1 span 1 i 1 b 1 bdo 1 ins 1 del 1 img 1
184     iframe 1 embed 1 object 1 video 1 audio 1 canvas 1 area 1
185     script 1 noscript 1 event-source 1 command 1 font 1
186     /,
187     },
188     };
189    
190     my $HTMLStructuredInlineLevelElements = {
191     $HTML_NS => {qw/blockquote 1 pre 1 ol 1 ul 1 dl 1 table 1 menu 1/},
192     };
193 wakaba 1.1
194 wakaba 1.6 my $HTMLInteractiveElements = {
195     $HTML_NS => {a => 1, details => 1, datagrid => 1},
196     };
197     ## NOTE: |html:a| and |html:datagrid| are not allowed as a descendant
198     ## of interactive elements
199 wakaba 1.1
200 wakaba 1.7 my $HTMLTransparentElements = {
201     $HTML_NS => {qw/ins 1 font 1 noscript 1/},
202     ## NOTE: |html:noscript| is transparent if scripting is disabled.
203     };
204    
205     #my $HTMLSemiTransparentElements = {
206     # $HTML_NS => {qw/video 1 audio 1/},
207     #};
208    
209     my $HTMLEmbededElements = {
210     $HTML_NS => {qw/img 1 iframe 1 embed 1 object 1 video 1 audio 1 canvas 1/},
211     };
212 wakaba 1.1
213     ## Empty
214     my $HTMLEmptyChecker = sub {
215 wakaba 1.4 my ($self, $todo) = @_;
216     my $el = $todo->{node};
217     my $new_todos = [];
218 wakaba 1.1 my @nodes = (@{$el->child_nodes});
219    
220     while (@nodes) {
221     my $node = shift @nodes;
222 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
223    
224 wakaba 1.1 my $nt = $node->node_type;
225     if ($nt == 1) {
226 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
227     $self->{onerror}->(node => $node, type => 'element not allowed');
228     my ($sib, $ch) = $self->_check_get_children ($node);
229     unshift @nodes, @$sib;
230 wakaba 1.4 push @$new_todos, @$ch;
231 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
232 wakaba 1.3 if ($node->data =~ /[^\x09-\x0D\x20]/) {
233     $self->{onerror}->(node => $node, type => 'character not allowed');
234     }
235 wakaba 1.1 } elsif ($nt == 5) {
236     unshift @nodes, @{$node->child_nodes};
237     }
238     }
239 wakaba 1.4 return ($new_todos);
240 wakaba 1.1 };
241    
242     ## Text
243     my $HTMLTextChecker = sub {
244 wakaba 1.4 my ($self, $todo) = @_;
245     my $el = $todo->{node};
246     my $new_todos = [];
247 wakaba 1.1 my @nodes = (@{$el->child_nodes});
248    
249     while (@nodes) {
250     my $node = shift @nodes;
251 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
252    
253 wakaba 1.1 my $nt = $node->node_type;
254     if ($nt == 1) {
255 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
256     $self->{onerror}->(node => $node, type => 'element not allowed');
257     my ($sib, $ch) = $self->_check_get_children ($node);
258     unshift @nodes, @$sib;
259 wakaba 1.4 push @$new_todos, @$ch;
260 wakaba 1.1 } elsif ($nt == 5) {
261     unshift @nodes, @{$node->child_nodes};
262     }
263     }
264 wakaba 1.4 return ($new_todos);
265 wakaba 1.1 };
266    
267     ## Zero or more |html:style| elements,
268     ## followed by zero or more block-level elements
269     my $HTMLStylableBlockChecker = sub {
270 wakaba 1.4 my ($self, $todo) = @_;
271     my $el = $todo->{node};
272     my $new_todos = [];
273 wakaba 1.1 my @nodes = (@{$el->child_nodes});
274    
275     my $has_non_style;
276     while (@nodes) {
277     my $node = shift @nodes;
278 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
279    
280 wakaba 1.1 my $nt = $node->node_type;
281     if ($nt == 1) {
282 wakaba 1.2 my $node_ns = $node->namespace_uri;
283     $node_ns = '' unless defined $node_ns;
284     my $node_ln = $node->manakai_local_name;
285 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
286 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'style') {
287 wakaba 1.6 $not_allowed = 1 if $has_non_style;
288 wakaba 1.7 } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
289     $has_non_style = 1;
290 wakaba 1.1 } else {
291     $has_non_style = 1;
292 wakaba 1.7 $not_allowed = 1;
293 wakaba 1.1 }
294 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
295     if $not_allowed;
296 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
297     unshift @nodes, @$sib;
298 wakaba 1.4 push @$new_todos, @$ch;
299 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
300     if ($node->data =~ /[^\x09-\x0D\x20]/) {
301 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
302 wakaba 1.1 }
303     } elsif ($nt == 5) {
304     unshift @nodes, @{$node->child_nodes};
305     }
306     }
307 wakaba 1.4 return ($new_todos);
308 wakaba 1.1 }; # $HTMLStylableBlockChecker
309    
310     ## Zero or more block-level elements
311     my $HTMLBlockChecker = sub {
312 wakaba 1.4 my ($self, $todo) = @_;
313     my $el = $todo->{node};
314     my $new_todos = [];
315 wakaba 1.1 my @nodes = (@{$el->child_nodes});
316    
317     while (@nodes) {
318     my $node = shift @nodes;
319 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
320    
321 wakaba 1.1 my $nt = $node->node_type;
322     if ($nt == 1) {
323 wakaba 1.2 my $node_ns = $node->namespace_uri;
324     $node_ns = '' unless defined $node_ns;
325     my $node_ln = $node->manakai_local_name;
326 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
327 wakaba 1.7 $not_allowed = 1
328     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
329 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
330     if $not_allowed;
331 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
332     unshift @nodes, @$sib;
333 wakaba 1.4 push @$new_todos, @$ch;
334 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
335     if ($node->data =~ /[^\x09-\x0D\x20]/) {
336 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
337 wakaba 1.1 }
338     } elsif ($nt == 5) {
339     unshift @nodes, @{$node->child_nodes};
340     }
341     }
342 wakaba 1.4 return ($new_todos);
343 wakaba 1.1 }; # $HTMLBlockChecker
344    
345     ## Inline-level content
346     my $HTMLInlineChecker = sub {
347 wakaba 1.4 my ($self, $todo) = @_;
348     my $el = $todo->{node};
349     my $new_todos = [];
350 wakaba 1.1 my @nodes = (@{$el->child_nodes});
351    
352     while (@nodes) {
353     my $node = shift @nodes;
354 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
355    
356 wakaba 1.1 my $nt = $node->node_type;
357     if ($nt == 1) {
358 wakaba 1.2 my $node_ns = $node->namespace_uri;
359     $node_ns = '' unless defined $node_ns;
360     my $node_ln = $node->manakai_local_name;
361 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
362 wakaba 1.7 $not_allowed = 1
363     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
364     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
365 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
366     if $not_allowed;
367 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
368     unshift @nodes, @$sib;
369 wakaba 1.4 push @$new_todos, @$ch;
370 wakaba 1.1 } elsif ($nt == 5) {
371     unshift @nodes, @{$node->child_nodes};
372     }
373     }
374 wakaba 1.4
375     for (@$new_todos) {
376     $_->{inline} = 1;
377     }
378     return ($new_todos);
379     }; # $HTMLInlineChecker
380 wakaba 1.1
381     my $HTMLSignificantInlineChecker = $HTMLInlineChecker;
382     ## TODO: check significant content
383    
384     ## Strictly inline-level content
385     my $HTMLStrictlyInlineChecker = sub {
386 wakaba 1.4 my ($self, $todo) = @_;
387     my $el = $todo->{node};
388     my $new_todos = [];
389 wakaba 1.1 my @nodes = (@{$el->child_nodes});
390    
391     while (@nodes) {
392     my $node = shift @nodes;
393 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
394    
395 wakaba 1.1 my $nt = $node->node_type;
396     if ($nt == 1) {
397 wakaba 1.2 my $node_ns = $node->namespace_uri;
398     $node_ns = '' unless defined $node_ns;
399     my $node_ln = $node->manakai_local_name;
400 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
401 wakaba 1.7 $not_allowed = 1
402     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln};
403 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
404     if $not_allowed;
405 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
406     unshift @nodes, @$sib;
407 wakaba 1.4 push @$new_todos, @$ch;
408 wakaba 1.1 } elsif ($nt == 5) {
409     unshift @nodes, @{$node->child_nodes};
410     }
411     }
412 wakaba 1.4
413     for (@$new_todos) {
414     $_->{inline} = 1;
415     $_->{strictly_inline} = 1;
416     }
417     return ($new_todos);
418 wakaba 1.1 }; # $HTMLStrictlyInlineChecker
419    
420     my $HTMLSignificantStrictlyInlineChecker = $HTMLStrictlyInlineChecker;
421     ## TODO: check significant content
422    
423 wakaba 1.4 ## Inline-level or strictly inline-kevek content
424     my $HTMLInlineOrStrictlyInlineChecker = sub {
425     my ($self, $todo) = @_;
426     my $el = $todo->{node};
427     my $new_todos = [];
428     my @nodes = (@{$el->child_nodes});
429    
430     while (@nodes) {
431     my $node = shift @nodes;
432     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
433    
434     my $nt = $node->node_type;
435     if ($nt == 1) {
436     my $node_ns = $node->namespace_uri;
437     $node_ns = '' unless defined $node_ns;
438     my $node_ln = $node->manakai_local_name;
439 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
440 wakaba 1.7 if ($todo->{strictly_inline}) {
441     $not_allowed = 1
442     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln};
443     } else {
444     $not_allowed = 1
445     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
446     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
447     }
448 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
449     if $not_allowed;
450 wakaba 1.4 my ($sib, $ch) = $self->_check_get_children ($node);
451     unshift @nodes, @$sib;
452     push @$new_todos, @$ch;
453     } elsif ($nt == 5) {
454     unshift @nodes, @{$node->child_nodes};
455     }
456     }
457    
458     for (@$new_todos) {
459     $_->{inline} = 1;
460     $_->{strictly_inline} = 1;
461     }
462     return ($new_todos);
463     }; # $HTMLInlineOrStrictlyInlineChecker
464    
465 wakaba 1.6 my $HTMLSignificantInlineOrStrictlyInlineChecker
466     = $HTMLInlineOrStrictlyInlineChecker;
467     ## TODO: check significant content
468    
469 wakaba 1.1 my $HTMLBlockOrInlineChecker = sub {
470 wakaba 1.4 my ($self, $todo) = @_;
471     my $el = $todo->{node};
472     my $new_todos = [];
473 wakaba 1.1 my @nodes = (@{$el->child_nodes});
474    
475     my $content = 'block-or-inline'; # or 'block' or 'inline'
476     my @block_not_inline;
477     while (@nodes) {
478     my $node = shift @nodes;
479 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
480    
481 wakaba 1.1 my $nt = $node->node_type;
482     if ($nt == 1) {
483 wakaba 1.2 my $node_ns = $node->namespace_uri;
484     $node_ns = '' unless defined $node_ns;
485     my $node_ln = $node->manakai_local_name;
486 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
487 wakaba 1.1 if ($content eq 'block') {
488 wakaba 1.7 $not_allowed = 1
489     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
490 wakaba 1.1 } elsif ($content eq 'inline') {
491 wakaba 1.7 $not_allowed = 1
492     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
493     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
494 wakaba 1.1 } else {
495 wakaba 1.7 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
496     my $is_inline
497     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
498     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
499 wakaba 1.1
500 wakaba 1.6 push @block_not_inline, $node
501     if $is_block and not $is_inline and not $not_allowed;
502 wakaba 1.1 unless ($is_block) {
503     $content = 'inline';
504     for (@block_not_inline) {
505 wakaba 1.2 $self->{onerror}->(node => $_, type => 'element not allowed');
506 wakaba 1.1 }
507 wakaba 1.6 $not_allowed = 1 unless $is_inline;
508 wakaba 1.1 }
509     }
510 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
511     if $not_allowed;
512 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
513     unshift @nodes, @$sib;
514 wakaba 1.4 push @$new_todos, @$ch;
515 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
516     if ($node->data =~ /[^\x09-\x0D\x20]/) {
517     if ($content eq 'block') {
518 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
519 wakaba 1.1 } else {
520     $content = 'inline';
521     for (@block_not_inline) {
522 wakaba 1.2 $self->{onerror}->(node => $_, type => 'element not allowed');
523 wakaba 1.1 }
524     }
525     }
526     } elsif ($nt == 5) {
527     unshift @nodes, @{$node->child_nodes};
528     }
529     }
530 wakaba 1.4
531     if ($content eq 'inline') {
532     for (@$new_todos) {
533     $_->{inline} = 1;
534     }
535     }
536     return ($new_todos);
537 wakaba 1.1 };
538    
539 wakaba 1.2 ## Zero or more XXX element, then either block-level or inline-level
540     my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) {
541     my ($elnsuri, $ellname) = @_;
542     return sub {
543 wakaba 1.4 my ($self, $todo) = @_;
544     my $el = $todo->{node};
545     my $new_todos = [];
546 wakaba 1.2 my @nodes = (@{$el->child_nodes});
547    
548     my $has_non_style;
549     my $content = 'block-or-inline'; # or 'block' or 'inline'
550     my @block_not_inline;
551     while (@nodes) {
552     my $node = shift @nodes;
553     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
554    
555     my $nt = $node->node_type;
556     if ($nt == 1) {
557     my $node_ns = $node->namespace_uri;
558     $node_ns = '' unless defined $node_ns;
559     my $node_ln = $node->manakai_local_name;
560 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
561 wakaba 1.8 if ($node_ns eq $elnsuri and $node_ln eq $ellname) {
562 wakaba 1.6 $not_allowed = 1 if $has_non_style;
563 wakaba 1.2 } elsif ($content eq 'block') {
564     $has_non_style = 1;
565 wakaba 1.7 $not_allowed = 1
566     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
567 wakaba 1.2 } elsif ($content eq 'inline') {
568     $has_non_style = 1;
569 wakaba 1.7 $not_allowed = 1
570     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
571     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
572 wakaba 1.2 } else {
573     $has_non_style = 1;
574 wakaba 1.7 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
575     my $is_inline
576     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
577     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
578 wakaba 1.2
579 wakaba 1.6 push @block_not_inline, $node
580     if $is_block and not $is_inline and not $not_allowed;
581 wakaba 1.2 unless ($is_block) {
582     $content = 'inline';
583     for (@block_not_inline) {
584     $self->{onerror}->(node => $_, type => 'element not allowed');
585     }
586 wakaba 1.6 $not_allowed = 1 unless $is_inline;
587 wakaba 1.1 }
588     }
589 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
590     if $not_allowed;
591 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
592     unshift @nodes, @$sib;
593 wakaba 1.4 push @$new_todos, @$ch;
594 wakaba 1.2 } elsif ($nt == 3 or $nt == 4) {
595     if ($node->data =~ /[^\x09-\x0D\x20]/) {
596     $has_non_style = 1;
597     if ($content eq 'block') {
598     $self->{onerror}->(node => $node, type => 'character not allowed');
599     } else {
600     $content = 'inline';
601     for (@block_not_inline) {
602     $self->{onerror}->(node => $_, type => 'element not allowed');
603     }
604 wakaba 1.1 }
605     }
606 wakaba 1.2 } elsif ($nt == 5) {
607     unshift @nodes, @{$node->child_nodes};
608 wakaba 1.1 }
609     }
610 wakaba 1.4
611     if ($content eq 'inline') {
612     for (@$new_todos) {
613     $_->{inline} = 1;
614     }
615     }
616     return ($new_todos);
617 wakaba 1.2 };
618     }; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker
619 wakaba 1.1
620     my $HTMLTransparentChecker = $HTMLBlockOrInlineChecker;
621    
622 wakaba 1.10 my $GetHTMLEnumeratedAttrChecker = sub {
623     my $states = shift; # {value => conforming ? 1 : -1}
624     return sub {
625     my ($self, $attr) = @_;
626     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
627     if ($states->{$value} > 0) {
628     #
629     } elsif ($states->{$value}) {
630     $self->{onerror}->(node => $attr,
631     type => 'non-conforming enumerated attribute value');
632     } else {
633     $self->{onerror}->(node => $attr,
634     type => 'invalid enumerated attribute value');
635     }
636     };
637     }; # $GetHTMLEnumeratedAttrChecker
638 wakaba 1.9
639 wakaba 1.10 my $GetHTMLBooleanAttrChecker = sub {
640     my $local_name = shift;
641     return sub {
642     my ($self, $attr) = @_;
643     my $value = $attr->value;
644     unless ($value eq $local_name or $value eq '') {
645     $self->{onerror}->(node => $attr,
646     type => 'invalid boolean attribute value');
647     }
648     };
649     }; # $GetHTMLBooleanAttrChecker
650 wakaba 1.9
651 wakaba 1.10 my $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker = sub {
652     my ($self, $attr) = @_;
653     my %word;
654     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
655     unless ($word{$word}) {
656     $word{$word} = 1;
657 wakaba 1.9 } else {
658 wakaba 1.10 $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
659 wakaba 1.9 }
660     }
661 wakaba 1.10 }; # $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker
662    
663 wakaba 1.11 my $HTMLURIAttrChecker = sub {
664     my ($self, $attr) = @_;
665     ## TODO: URI or IRI check
666     }; # $HTMLURIAttrChecker
667    
668     my $HTMLIntegerAttrChecker = sub {
669     my ($self, $attr) = @_;
670     my $value = $attr->value;
671     unless ($value =~ /\A-?[0-9]+\z/) {
672     $self->{onerror}->(node => $attr, type => 'syntax error');
673     }
674     }; # $HTMLIntegerAttrChecker
675    
676 wakaba 1.12 my $GetHTMLNonNegativeIntegerAttrChecker = sub {
677     my $range_check = shift;
678     return sub {
679     my ($self, $attr) = @_;
680     my $value = $attr->value;
681     if ($value =~ /\A[0-9]+\z/) {
682     unless ($range_check->($value + 0)) {
683     $self->{onerror}->(node => $attr, type => 'out of range');
684     }
685     } else {
686     $self->{onerror}->(node => $attr, type => 'syntax error');
687     }
688     };
689     }; # $GetHTMLNonNegativeIntegerAttrChecker
690    
691 wakaba 1.11 my $GetHTMLFloatingPointNumberAttrChecker = sub {
692     my $range_check = shift;
693     return sub {
694     my ($self, $attr) = @_;
695     my $value = $attr->value;
696     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
697     unless ($range_check->($value + 0)) {
698     $self->{onerror}->(node => $attr, type => 'out of range');
699     }
700     } else {
701     $self->{onerror}->(node => $attr, type => 'syntax error');
702     }
703     };
704     }; # $GetHTMLFloatingPointNumberAttrChecker
705    
706 wakaba 1.10 my $HTMLAttrChecker = {
707     id => sub {
708     my ($self, $attr) = @_;
709     my $value = $attr->value;
710     unless (length $value > 0) {
711     ## NOTE: MUST contain at least one character
712     $self->{onerror}->(node => $attr, type => 'attribute value is empty');
713     } else {
714     if ($self->{id}->{$value}) {
715     $self->{onerror}->(node => $attr, type => 'duplicate ID');
716     } else {
717     $self->{id}->{$value} = 1;
718     }
719     }
720     },
721     title => sub {}, ## NOTE: No conformance creteria
722     lang => sub {
723     ## TODO: RFC 3066 test
724     ## ISSUE: RFC 4646 (3066bis)?
725     ## TODO: HTML vs XHTML
726     },
727     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
728     class => $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker,
729     irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
730     ## TODO: tabindex
731     };
732    
733     my $GetHTMLAttrsChecker = sub {
734     my $element_specific_checker = shift;
735     return sub {
736     my ($self, $todo) = @_;
737     for my $attr (@{$todo->{node}->attributes}) {
738     my $attr_ns = $attr->namespace_uri;
739     $attr_ns = '' unless defined $attr_ns;
740     my $attr_ln = $attr->manakai_local_name;
741     my $checker;
742     if ($attr_ns eq '') {
743     $checker = $element_specific_checker->{$attr_ln}
744     || $HTMLAttrChecker->{$attr_ln};
745     }
746     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
747     || $AttrChecker->{$attr_ns}->{''};
748     if ($checker) {
749     $checker->($self, $attr);
750     } else {
751     $self->{onerror}->(node => $attr, type => 'attribute not supported');
752     ## ISSUE: No comformance createria for unknown attributes in the spec
753     }
754     }
755     };
756     }; # $GetHTMLAttrsChecker
757 wakaba 1.9
758     $Element->{$HTML_NS}->{''} = {
759 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
760 wakaba 1.9 checker => $ElementDefault->{checker},
761     };
762    
763 wakaba 1.1 $Element->{$HTML_NS}->{html} = {
764 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({
765     xmlns => sub {
766     my ($self, $attr) = @_;
767     my $value = $attr->value;
768     unless ($value eq $HTML_NS) {
769     $self->{onerror}->(node => $attr, type => 'syntax error');
770     ## TODO: only in HTML documents
771     }
772     },
773     }),
774 wakaba 1.1 checker => sub {
775 wakaba 1.4 my ($self, $todo) = @_;
776     my $el = $todo->{node};
777     my $new_todos = [];
778 wakaba 1.1 my @nodes = (@{$el->child_nodes});
779    
780     my $phase = 'before head';
781     while (@nodes) {
782     my $node = shift @nodes;
783 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
784    
785 wakaba 1.1 my $nt = $node->node_type;
786     if ($nt == 1) {
787 wakaba 1.2 my $node_ns = $node->namespace_uri;
788     $node_ns = '' unless defined $node_ns;
789     my $node_ln = $node->manakai_local_name;
790 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
791 wakaba 1.1 if ($phase eq 'before head') {
792 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'head') {
793 wakaba 1.1 $phase = 'after head';
794 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'body') {
795     $self->{onerror}->(node => $node, type => 'ps element missing:head');
796 wakaba 1.1 $phase = 'after body';
797     } else {
798 wakaba 1.6 $not_allowed = 1;
799 wakaba 1.1 # before head
800     }
801     } elsif ($phase eq 'after head') {
802 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'body') {
803 wakaba 1.1 $phase = 'after body';
804     } else {
805 wakaba 1.6 $not_allowed = 1;
806 wakaba 1.1 # after head
807     }
808     } else { #elsif ($phase eq 'after body') {
809 wakaba 1.6 $not_allowed = 1;
810 wakaba 1.1 # after body
811     }
812 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
813     if $not_allowed;
814 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
815     unshift @nodes, @$sib;
816 wakaba 1.4 push @$new_todos, @$ch;
817 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
818     if ($node->data =~ /[^\x09-\x0D\x20]/) {
819 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
820 wakaba 1.1 }
821     } elsif ($nt == 5) {
822     unshift @nodes, @{$node->child_nodes};
823     }
824     }
825 wakaba 1.3
826     if ($phase eq 'before head') {
827     $self->{onerror}->(node => $el, type => 'child element missing:head');
828     $self->{onerror}->(node => $el, type => 'child element missing:body');
829     } elsif ($phase eq 'after head') {
830     $self->{onerror}->(node => $el, type => 'child element missing:body');
831     }
832    
833 wakaba 1.4 return ($new_todos);
834 wakaba 1.1 },
835     };
836    
837     $Element->{$HTML_NS}->{head} = {
838 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
839 wakaba 1.1 checker => sub {
840 wakaba 1.4 my ($self, $todo) = @_;
841     my $el = $todo->{node};
842     my $new_todos = [];
843 wakaba 1.1 my @nodes = (@{$el->child_nodes});
844    
845     my $has_title;
846 wakaba 1.3 my $phase = 'initial'; # 'after charset', 'after base'
847 wakaba 1.1 while (@nodes) {
848     my $node = shift @nodes;
849 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
850    
851 wakaba 1.1 my $nt = $node->node_type;
852     if ($nt == 1) {
853 wakaba 1.2 my $node_ns = $node->namespace_uri;
854     $node_ns = '' unless defined $node_ns;
855     my $node_ln = $node->manakai_local_name;
856 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
857 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'title') {
858 wakaba 1.3 $phase = 'after base';
859 wakaba 1.1 unless ($has_title) {
860     $has_title = 1;
861     } else {
862 wakaba 1.6 $not_allowed = 1;
863 wakaba 1.1 }
864 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'meta') {
865 wakaba 1.1 if ($node->has_attribute_ns (undef, 'charset')) {
866 wakaba 1.3 if ($phase eq 'initial') {
867     $phase = 'after charset';
868 wakaba 1.1 } else {
869 wakaba 1.6 $not_allowed = 1;
870 wakaba 1.3 ## NOTE: See also |base|'s "contexts" field in the spec
871 wakaba 1.1 }
872     } else {
873 wakaba 1.3 $phase = 'after base';
874 wakaba 1.1 }
875 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'base') {
876 wakaba 1.3 if ($phase eq 'initial' or $phase eq 'after charset') {
877     $phase = 'after base';
878 wakaba 1.1 } else {
879 wakaba 1.6 $not_allowed = 1;
880 wakaba 1.1 }
881 wakaba 1.7 } elsif ($HTMLMetadataElements->{$node_ns}->{$node_ln}) {
882     $phase = 'after base';
883 wakaba 1.1 } else {
884 wakaba 1.7 $not_allowed = 1;
885 wakaba 1.1 }
886 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
887     if $not_allowed;
888 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
889     unshift @nodes, @$sib;
890 wakaba 1.4 push @$new_todos, @$ch;
891 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
892     if ($node->data =~ /[^\x09-\x0D\x20]/) {
893 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
894 wakaba 1.1 }
895     } elsif ($nt == 5) {
896     unshift @nodes, @{$node->child_nodes};
897     }
898     }
899     unless ($has_title) {
900 wakaba 1.3 $self->{onerror}->(node => $el, type => 'child element missing:title');
901 wakaba 1.1 }
902 wakaba 1.4 return ($new_todos);
903 wakaba 1.1 },
904     };
905    
906     $Element->{$HTML_NS}->{title} = {
907 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
908 wakaba 1.1 checker => $HTMLTextChecker,
909     };
910    
911     $Element->{$HTML_NS}->{base} = {
912 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({
913 wakaba 1.11 href => $HTMLURIAttrChecker,
914 wakaba 1.10 ## TODO: target
915     }),
916 wakaba 1.1 checker => $HTMLEmptyChecker,
917     };
918    
919     $Element->{$HTML_NS}->{link} = {
920 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
921 wakaba 1.1 checker => $HTMLEmptyChecker,
922     };
923    
924     $Element->{$HTML_NS}->{meta} = {
925 wakaba 1.10 attrs_checker => sub {
926     my ($self, $todo) = @_;
927     my $name_attr;
928     my $http_equiv_attr;
929     my $charset_attr;
930     my $content_attr;
931     for my $attr (@{$todo->{node}->attributes}) {
932     my $attr_ns = $attr->namespace_uri;
933     $attr_ns = '' unless defined $attr_ns;
934     my $attr_ln = $attr->manakai_local_name;
935     my $checker;
936     if ($attr_ns eq '') {
937     if ($attr_ln eq 'content') {
938     $content_attr = $attr;
939     $checker = 1;
940     } elsif ($attr_ln eq 'name') {
941     $name_attr = $attr;
942     $checker = 1;
943     } elsif ($attr_ln eq 'http-equiv') {
944     $http_equiv_attr = $attr;
945     $checker = 1;
946     } elsif ($attr_ln eq 'charset') {
947     $charset_attr = $attr;
948     $checker = 1;
949     } else {
950     $checker = $HTMLAttrChecker->{$attr_ln}
951     || $AttrChecker->{$attr_ns}->{$attr_ln}
952     || $AttrChecker->{$attr_ns}->{''};
953     }
954     } else {
955     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
956     || $AttrChecker->{$attr_ns}->{''};
957     }
958     if ($checker) {
959     $checker->($self, $attr) if ref $checker;
960     } else {
961     $self->{onerror}->(node => $attr, type => 'attribute not supported');
962     ## ISSUE: No comformance createria for unknown attributes in the spec
963     }
964     }
965    
966     if (defined $name_attr) {
967     if (defined $http_equiv_attr) {
968     $self->{onerror}->(node => $http_equiv_attr,
969     type => 'attribute not allowed');
970     } elsif (defined $charset_attr) {
971     $self->{onerror}->(node => $charset_attr,
972     type => 'attribute not allowed');
973     }
974     my $metadata_name = $name_attr->value;
975     my $metadata_value;
976     if (defined $content_attr) {
977     $metadata_value = $content_attr->value;
978     } else {
979     $self->{onerror}->(node => $todo->{node},
980     type => 'attribute missing:content');
981     $metadata_value = '';
982     }
983     } elsif (defined $http_equiv_attr) {
984     if (defined $charset_attr) {
985     $self->{onerror}->(node => $charset_attr,
986     type => 'attribute not allowed');
987     }
988     unless (defined $content_attr) {
989     $self->{onerror}->(node => $todo->{node},
990     type => 'attribute missing:content');
991     }
992     } elsif (defined $charset_attr) {
993     if (defined $content_attr) {
994     $self->{onerror}->(node => $content_attr,
995     type => 'attribute not allowed');
996     }
997     ## TODO: Allowed only in HTML documents
998     } else {
999     if (defined $content_attr) {
1000     $self->{onerror}->(node => $content_attr,
1001     type => 'attribute not allowed');
1002     $self->{onerror}->(node => $todo->{node},
1003     type => 'attribute missing:name|http-equiv');
1004     } else {
1005     $self->{onerror}->(node => $todo->{node},
1006     type => 'attribute missing:name|http-equiv|charset');
1007     }
1008     }
1009    
1010     ## TODO: metadata conformance
1011    
1012     ## TODO: pragma conformance
1013     if (defined $http_equiv_attr) { ## An enumerated attribute
1014     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1015     if ({
1016     'refresh' => 1,
1017     'default-style' => 1,
1018     }->{$keyword}) {
1019     #
1020     } else {
1021     $self->{onerror}->(node => $http_equiv_attr,
1022     type => 'invalid enumerated attribute value');
1023     }
1024     }
1025    
1026     ## TODO: charset
1027     },
1028 wakaba 1.1 checker => $HTMLEmptyChecker,
1029     };
1030    
1031     ## NOTE: |html:style| has no conformance creteria on content model
1032 wakaba 1.3 $Element->{$HTML_NS}->{style} = {
1033 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({
1034     ## TODO: type
1035     ## TODO: media
1036     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1037     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1038     ## not different
1039     }),
1040 wakaba 1.3 checker => $AnyChecker,
1041     };
1042 wakaba 1.1
1043     $Element->{$HTML_NS}->{body} = {
1044 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1045 wakaba 1.1 checker => $HTMLBlockChecker,
1046     };
1047    
1048     $Element->{$HTML_NS}->{section} = {
1049 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1050 wakaba 1.1 checker => $HTMLStylableBlockChecker,
1051     };
1052    
1053     $Element->{$HTML_NS}->{nav} = {
1054 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1055 wakaba 1.1 checker => $HTMLBlockOrInlineChecker,
1056     };
1057    
1058     $Element->{$HTML_NS}->{article} = {
1059 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1060 wakaba 1.1 checker => $HTMLStylableBlockChecker,
1061     };
1062    
1063     $Element->{$HTML_NS}->{blockquote} = {
1064 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({
1065     cite => $HTMLURIAttrChecker,
1066     }),
1067 wakaba 1.1 checker => $HTMLBlockChecker,
1068     };
1069    
1070     $Element->{$HTML_NS}->{aside} = {
1071 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1072 wakaba 1.2 checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
1073 wakaba 1.1 };
1074    
1075     $Element->{$HTML_NS}->{h1} = {
1076 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1077 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
1078     };
1079    
1080     $Element->{$HTML_NS}->{h2} = {
1081 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1082 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
1083     };
1084    
1085     $Element->{$HTML_NS}->{h3} = {
1086 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1087 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
1088     };
1089    
1090     $Element->{$HTML_NS}->{h4} = {
1091 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1092 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
1093     };
1094    
1095     $Element->{$HTML_NS}->{h5} = {
1096 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1097 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
1098     };
1099    
1100     $Element->{$HTML_NS}->{h6} = {
1101 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1102 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
1103     };
1104    
1105     ## TODO: header
1106    
1107 wakaba 1.2 $Element->{$HTML_NS}->{footer} = {
1108 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1109 wakaba 1.2 checker => sub { ## block -hn -header -footer -sectioning or inline
1110 wakaba 1.4 my ($self, $todo) = @_;
1111     my $el = $todo->{node};
1112     my $new_todos = [];
1113 wakaba 1.2 my @nodes = (@{$el->child_nodes});
1114    
1115     my $content = 'block-or-inline'; # or 'block' or 'inline'
1116     my @block_not_inline;
1117     while (@nodes) {
1118     my $node = shift @nodes;
1119     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1120    
1121     my $nt = $node->node_type;
1122     if ($nt == 1) {
1123     my $node_ns = $node->namespace_uri;
1124     $node_ns = '' unless defined $node_ns;
1125     my $node_ln = $node->manakai_local_name;
1126 wakaba 1.6 my $not_allowed;
1127 wakaba 1.2 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
1128 wakaba 1.6 $not_allowed = 1;
1129 wakaba 1.2 } elsif ($node_ns eq $HTML_NS and
1130     {
1131     qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1/
1132     }->{$node_ln}) {
1133 wakaba 1.6 $not_allowed = 1;
1134 wakaba 1.2 } elsif ($HTMLSectioningElements->{$node_ns}->{$node_ln}) {
1135 wakaba 1.6 $not_allowed = 1;
1136 wakaba 1.2 }
1137     if ($content eq 'block') {
1138 wakaba 1.7 $not_allowed = 1
1139     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1140 wakaba 1.2 } elsif ($content eq 'inline') {
1141 wakaba 1.7 $not_allowed = 1
1142     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
1143     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1144 wakaba 1.2 } else {
1145 wakaba 1.7 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1146     my $is_inline
1147     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
1148     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1149 wakaba 1.2
1150 wakaba 1.6 push @block_not_inline, $node
1151     if $is_block and not $is_inline and not $not_allowed;
1152 wakaba 1.2 unless ($is_block) {
1153     $content = 'inline';
1154     for (@block_not_inline) {
1155     $self->{onerror}->(node => $_, type => 'element not allowed');
1156     }
1157 wakaba 1.6 $not_allowed = 1 unless $is_inline;
1158 wakaba 1.2 }
1159     }
1160 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
1161     if $not_allowed;
1162 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1163     unshift @nodes, @$sib;
1164 wakaba 1.4 push @$new_todos, @$ch;
1165 wakaba 1.2 } elsif ($nt == 3 or $nt == 4) {
1166     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1167     if ($content eq 'block') {
1168     $self->{onerror}->(node => $node, type => 'character not allowed');
1169     } else {
1170     $content = 'inline';
1171     for (@block_not_inline) {
1172     $self->{onerror}->(node => $_, type => 'element not allowed');
1173     }
1174     }
1175     }
1176     } elsif ($nt == 5) {
1177     unshift @nodes, @{$node->child_nodes};
1178     }
1179     }
1180    
1181     my $end = $self->_add_minuses
1182     ({$HTML_NS => {qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1/}},
1183     $HTMLSectioningElements);
1184 wakaba 1.4 push @$new_todos, $end;
1185 wakaba 1.2
1186 wakaba 1.4 if ($content eq 'inline') {
1187     for (@$new_todos) {
1188     $_->{inline} = 1;
1189     }
1190     }
1191    
1192     return ($new_todos);
1193 wakaba 1.2 },
1194     };
1195 wakaba 1.1
1196     $Element->{$HTML_NS}->{address} = {
1197 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1198 wakaba 1.1 checker => $HTMLInlineChecker,
1199     };
1200    
1201     $Element->{$HTML_NS}->{p} = {
1202 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1203 wakaba 1.1 checker => $HTMLSignificantInlineChecker,
1204     };
1205    
1206     $Element->{$HTML_NS}->{hr} = {
1207 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1208 wakaba 1.1 checker => $HTMLEmptyChecker,
1209     };
1210    
1211     $Element->{$HTML_NS}->{br} = {
1212 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1213 wakaba 1.1 checker => $HTMLEmptyChecker,
1214     };
1215    
1216     $Element->{$HTML_NS}->{dialog} = {
1217 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1218 wakaba 1.1 checker => sub {
1219 wakaba 1.4 my ($self, $todo) = @_;
1220     my $el = $todo->{node};
1221     my $new_todos = [];
1222 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1223    
1224     my $phase = 'before dt';
1225     while (@nodes) {
1226     my $node = shift @nodes;
1227 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1228    
1229 wakaba 1.1 my $nt = $node->node_type;
1230     if ($nt == 1) {
1231 wakaba 1.8 my $node_ns = $node->namespace_uri;
1232     $node_ns = '' unless defined $node_ns;
1233     my $node_ln = $node->manakai_local_name;
1234 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1235 wakaba 1.1 if ($phase eq 'before dt') {
1236 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1237 wakaba 1.1 $phase = 'before dd';
1238 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1239 wakaba 1.2 $self->{onerror}
1240 wakaba 1.3 ->(node => $node, type => 'ps element missing:dt');
1241 wakaba 1.1 $phase = 'before dt';
1242     } else {
1243 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1244 wakaba 1.1 }
1245     } else { # before dd
1246 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1247 wakaba 1.1 $phase = 'before dt';
1248 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1249 wakaba 1.2 $self->{onerror}
1250 wakaba 1.3 ->(node => $node, type => 'ps element missing:dd');
1251 wakaba 1.1 $phase = 'before dd';
1252     } else {
1253 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1254 wakaba 1.1 }
1255     }
1256 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1257     unshift @nodes, @$sib;
1258 wakaba 1.4 push @$new_todos, @$ch;
1259 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1260     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1261 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1262 wakaba 1.1 }
1263     } elsif ($nt == 5) {
1264     unshift @nodes, @{$node->child_nodes};
1265     }
1266     }
1267     if ($phase eq 'before dd') {
1268 wakaba 1.3 $self->{onerror}->(node => $el, type => 'ps element missing:dd');
1269 wakaba 1.1 }
1270 wakaba 1.4 return ($new_todos);
1271 wakaba 1.1 },
1272     };
1273    
1274     $Element->{$HTML_NS}->{pre} = {
1275 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1276 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1277     };
1278    
1279     $Element->{$HTML_NS}->{ol} = {
1280 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({
1281     start => $HTMLIntegerAttrChecker,
1282     }),
1283 wakaba 1.1 checker => sub {
1284 wakaba 1.4 my ($self, $todo) = @_;
1285     my $el = $todo->{node};
1286     my $new_todos = [];
1287 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1288    
1289     while (@nodes) {
1290     my $node = shift @nodes;
1291 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1292    
1293 wakaba 1.1 my $nt = $node->node_type;
1294     if ($nt == 1) {
1295 wakaba 1.8 my $node_ns = $node->namespace_uri;
1296     $node_ns = '' unless defined $node_ns;
1297     my $node_ln = $node->manakai_local_name;
1298 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1299 wakaba 1.8 unless ($node_ns eq $HTML_NS and $node_ln eq 'li') {
1300 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1301 wakaba 1.1 }
1302 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1303     unshift @nodes, @$sib;
1304 wakaba 1.4 push @$new_todos, @$ch;
1305 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1306     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1307 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1308 wakaba 1.1 }
1309     } elsif ($nt == 5) {
1310     unshift @nodes, @{$node->child_nodes};
1311     }
1312     }
1313 wakaba 1.4
1314     if ($todo->{inline}) {
1315     for (@$new_todos) {
1316     $_->{inline} = 1;
1317     }
1318     }
1319     return ($new_todos);
1320 wakaba 1.1 },
1321     };
1322    
1323     $Element->{$HTML_NS}->{ul} = {
1324 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1325 wakaba 1.1 checker => $Element->{$HTML_NS}->{ol}->{checker},
1326     };
1327    
1328 wakaba 1.5
1329     $Element->{$HTML_NS}->{li} = {
1330 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({
1331     start => sub {
1332     my ($self, $attr) = @_;
1333     my $parent = $attr->owner_element->manakai_parent_element;
1334     if (defined $parent) {
1335     my $parent_ns = $parent->namespace_uri;
1336     $parent_ns = '' unless defined $parent_ns;
1337     my $parent_ln = $parent->manakai_local_name;
1338     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1339     $self->{onerror}->(node => $attr, type => 'attribute not supported');
1340     }
1341     }
1342     $HTMLIntegerAttrChecker->($self, $attr);
1343     },
1344     }),
1345 wakaba 1.5 checker => sub {
1346     my ($self, $todo) = @_;
1347     if ($todo->{inline}) {
1348     return $HTMLInlineChecker->($self, $todo);
1349     } else {
1350     return $HTMLBlockOrInlineChecker->($self, $todo);
1351     }
1352     },
1353     };
1354 wakaba 1.1
1355     $Element->{$HTML_NS}->{dl} = {
1356 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1357 wakaba 1.1 checker => sub {
1358 wakaba 1.4 my ($self, $todo) = @_;
1359     my $el = $todo->{node};
1360     my $new_todos = [];
1361 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1362    
1363     my $phase = 'before dt';
1364     while (@nodes) {
1365     my $node = shift @nodes;
1366 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1367    
1368 wakaba 1.1 my $nt = $node->node_type;
1369     if ($nt == 1) {
1370 wakaba 1.8 my $node_ns = $node->namespace_uri;
1371     $node_ns = '' unless defined $node_ns;
1372     my $node_ln = $node->manakai_local_name;
1373 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1374 wakaba 1.1 if ($phase eq 'in dds') {
1375 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1376 wakaba 1.1 #$phase = 'in dds';
1377 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1378 wakaba 1.1 $phase = 'in dts';
1379     } else {
1380 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1381 wakaba 1.1 }
1382     } elsif ($phase eq 'in dts') {
1383 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1384 wakaba 1.1 #$phase = 'in dts';
1385 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1386 wakaba 1.1 $phase = 'in dds';
1387     } else {
1388 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1389 wakaba 1.1 }
1390     } else { # before dt
1391 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1392 wakaba 1.1 $phase = 'in dts';
1393 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1394 wakaba 1.2 $self->{onerror}
1395 wakaba 1.3 ->(node => $node, type => 'ps element missing:dt');
1396 wakaba 1.1 $phase = 'in dds';
1397     } else {
1398 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1399 wakaba 1.1 }
1400     }
1401 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1402     unshift @nodes, @$sib;
1403 wakaba 1.4 push @$new_todos, @$ch;
1404 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1405     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1406 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1407 wakaba 1.1 }
1408     } elsif ($nt == 5) {
1409     unshift @nodes, @{$node->child_nodes};
1410     }
1411     }
1412     if ($phase eq 'in dts') {
1413 wakaba 1.3 $self->{onerror}->(node => $el, type => 'ps element missing:dd');
1414 wakaba 1.1 }
1415 wakaba 1.4
1416     if ($todo->{inline}) {
1417     for (@$new_todos) {
1418     $_->{inline} = 1;
1419     }
1420     }
1421     return ($new_todos);
1422 wakaba 1.1 },
1423     };
1424    
1425     $Element->{$HTML_NS}->{dt} = {
1426 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1427 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1428     };
1429    
1430 wakaba 1.4 $Element->{$HTML_NS}->{dd} = {
1431 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1432 wakaba 1.5 checker => $Element->{$HTML_NS}->{li}->{checker},
1433 wakaba 1.4 };
1434 wakaba 1.1
1435 wakaba 1.6 $Element->{$HTML_NS}->{a} = {
1436 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
1437 wakaba 1.6 checker => sub {
1438     my ($self, $todo) = @_;
1439    
1440     my $end = $self->_add_minuses ($HTMLInteractiveElements);
1441     my ($sib, $ch)
1442     = $HTMLSignificantInlineOrStrictlyInlineChecker->($self, $todo);
1443     push @$sib, $end;
1444     return ($sib, $ch);
1445     },
1446     };
1447 wakaba 1.1
1448 wakaba 1.4 $Element->{$HTML_NS}->{q} = {
1449 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({
1450     cite => $HTMLURIAttrChecker,
1451     }),
1452 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1453     };
1454 wakaba 1.1
1455     $Element->{$HTML_NS}->{cite} = {
1456 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1457 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1458     };
1459    
1460 wakaba 1.4 $Element->{$HTML_NS}->{em} = {
1461 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1462 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1463     };
1464    
1465     $Element->{$HTML_NS}->{strong} = {
1466 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1467 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1468     };
1469 wakaba 1.1
1470 wakaba 1.4 $Element->{$HTML_NS}->{small} = {
1471 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1472 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1473     };
1474    
1475     $Element->{$HTML_NS}->{m} = {
1476 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1477 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1478     };
1479    
1480 wakaba 1.11 $Element->{$HTML_NS}->{dfn} = { ## TODO: term duplication
1481 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1482 wakaba 1.4 checker => sub {
1483     my ($self, $todo) = @_;
1484    
1485     my $end = $self->_add_minuses ({$HTML_NS => {dfn => 1}});
1486     my ($sib, $ch) = $HTMLStrictlyInlineChecker->($self, $todo);
1487     push @$sib, $end;
1488     return ($sib, $ch);
1489     },
1490     };
1491 wakaba 1.1
1492     $Element->{$HTML_NS}->{abbr} = {
1493 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({
1494     ## NOTE: |title| has special semantics for |abbr|s, but is syntactically
1495     ## not different. The spec says that the |title| MAY be omitted
1496     ## if there is a |dfn| whose defining term is the abbreviation,
1497     ## but it does not prohibit |abbr| w/o |title| in other cases.
1498     }),
1499 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1500     };
1501    
1502 wakaba 1.11 $Element->{$HTML_NS}->{time} = { ## TODO: validate content
1503     attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO: datetime
1504 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1505     };
1506    
1507 wakaba 1.11 $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
1508     attrs_checker => $GetHTMLAttrsChecker->({
1509     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1510     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1511     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1512     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1513     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1514     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1515     }),
1516 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1517     };
1518    
1519 wakaba 1.11 $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
1520     attrs_checker => $GetHTMLAttrsChecker->({
1521     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
1522     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
1523     }),
1524 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1525     };
1526    
1527 wakaba 1.4 $Element->{$HTML_NS}->{code} = {
1528 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1529 wakaba 1.12 ## NOTE: Though |title| has special semantics,
1530     ## syntatically same as the |title| as global attribute.
1531 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1532     };
1533 wakaba 1.1
1534     $Element->{$HTML_NS}->{var} = {
1535 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1536 wakaba 1.12 ## NOTE: Though |title| has special semantics,
1537     ## syntatically same as the |title| as global attribute.
1538 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1539     };
1540    
1541 wakaba 1.4 $Element->{$HTML_NS}->{samp} = {
1542 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1543 wakaba 1.12 ## NOTE: Though |title| has special semantics,
1544     ## syntatically same as the |title| as global attribute.
1545 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1546     };
1547 wakaba 1.1
1548     $Element->{$HTML_NS}->{kbd} = {
1549 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1550 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1551     };
1552    
1553     $Element->{$HTML_NS}->{sub} = {
1554 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1555 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1556     };
1557    
1558     $Element->{$HTML_NS}->{sup} = {
1559 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1560 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1561     };
1562    
1563 wakaba 1.4 $Element->{$HTML_NS}->{span} = {
1564 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1565 wakaba 1.12 ## NOTE: Though |title| has special semantics,
1566     ## syntatically same as the |title| as global attribute.
1567 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1568     };
1569 wakaba 1.1
1570     $Element->{$HTML_NS}->{i} = {
1571 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1572 wakaba 1.12 ## NOTE: Though |title| has special semantics,
1573     ## syntatically same as the |title| as global attribute.
1574 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1575     };
1576    
1577     $Element->{$HTML_NS}->{b} = {
1578 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1579 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1580     };
1581    
1582     $Element->{$HTML_NS}->{bdo} = {
1583 wakaba 1.12 attrs_checker => sub {
1584     my ($self, $todo) = @_;
1585     $GetHTMLAttrsChecker->({})->($self, $todo);
1586     unless ($todo->{node}->has_attribute_ns (undef, 'dir')) {
1587     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:dir');
1588     }
1589     },
1590     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
1591 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1592     };
1593    
1594     $Element->{$HTML_NS}->{ins} = {
1595 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
1596     cite => $HTMLURIAttrChecker,
1597     ## TODO: datetime
1598     }),
1599 wakaba 1.1 checker => $HTMLTransparentChecker,
1600     };
1601    
1602     $Element->{$HTML_NS}->{del} = {
1603 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
1604     cite => $HTMLURIAttrChecker,
1605     ## TODO: datetime
1606     }),
1607 wakaba 1.1 checker => sub {
1608 wakaba 1.4 my ($self, $todo) = @_;
1609 wakaba 1.1
1610 wakaba 1.4 my $parent = $todo->{node}->manakai_parent_element;
1611 wakaba 1.1 if (defined $parent) {
1612     my $nsuri = $parent->namespace_uri;
1613     $nsuri = '' unless defined $nsuri;
1614     my $ln = $parent->manakai_local_name;
1615     my $eldef = $Element->{$nsuri}->{$ln} ||
1616     $Element->{$nsuri}->{''} ||
1617     $ElementDefault;
1618 wakaba 1.4 return $eldef->{checker}->($self, $todo);
1619 wakaba 1.1 } else {
1620 wakaba 1.4 return $HTMLBlockOrInlineChecker->($self, $todo);
1621 wakaba 1.1 }
1622     },
1623     };
1624    
1625     ## TODO: figure
1626    
1627     $Element->{$HTML_NS}->{img} = {
1628 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
1629 wakaba 1.1 checker => $HTMLEmptyChecker,
1630     };
1631    
1632     $Element->{$HTML_NS}->{iframe} = {
1633 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
1634     src => $HTMLURIAttrChecker,
1635     }),
1636 wakaba 1.1 checker => $HTMLTextChecker,
1637     };
1638    
1639     $Element->{$HTML_NS}->{embed} = {
1640 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
1641 wakaba 1.1 checker => $HTMLEmptyChecker,
1642     };
1643    
1644     $Element->{$HTML_NS}->{param} = {
1645 wakaba 1.12 attrs_checker => sub {
1646     my ($self, $todo) = @_;
1647     $GetHTMLAttrsChecker->({
1648     name => sub { },
1649     value => sub { },
1650     })->($self, $todo);
1651     unless ($todo->{node}->has_attribute_ns (undef, 'name')) {
1652     $self->{onerror}->(node => $todo->{node},
1653     type => 'attribute missing:name');
1654     }
1655     unless ($todo->{node}->has_attribute_ns (undef, 'value')) {
1656     $self->{onerror}->(node => $todo->{node},
1657     type => 'attribute missing:value');
1658     }
1659     },
1660 wakaba 1.1 checker => $HTMLEmptyChecker,
1661     };
1662    
1663     ## TODO: object
1664    
1665 wakaba 1.2 $Element->{$HTML_NS}->{video} = {
1666 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
1667     src => $HTMLURIAttrChecker,
1668     ## TODO: start, loopstart, loopend, end
1669     ## ISSUE: they MUST be "value time offset"s. Value?
1670     ## ISSUE: loopcount has no conformance creteria
1671     autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
1672     controls => $GetHTMLBooleanAttrChecker->('controls'),
1673     }),
1674 wakaba 1.2 checker => sub {
1675 wakaba 1.4 my ($self, $todo) = @_;
1676 wakaba 1.2
1677 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'src')) {
1678     return $HTMLBlockOrInlineChecker->($self, $todo);
1679 wakaba 1.2 } else {
1680     return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
1681 wakaba 1.4 ->($self, $todo);
1682 wakaba 1.2 }
1683     },
1684     };
1685    
1686     $Element->{$HTML_NS}->{audio} = {
1687 wakaba 1.12 attrs_checker => $Element->{$HTML_NS}->{video}->{attrs_checker},
1688     checker => $Element->{$HTML_NS}->{video}->{checker},
1689 wakaba 1.2 };
1690 wakaba 1.1
1691     $Element->{$HTML_NS}->{source} = {
1692 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
1693 wakaba 1.1 checker => $HTMLEmptyChecker,
1694     };
1695    
1696     $Element->{$HTML_NS}->{canvas} = {
1697 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
1698     height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
1699     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
1700     }),
1701 wakaba 1.1 checker => $HTMLInlineChecker,
1702     };
1703    
1704     $Element->{$HTML_NS}->{map} = {
1705 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1706 wakaba 1.1 checker => $HTMLBlockChecker,
1707     };
1708    
1709     $Element->{$HTML_NS}->{area} = {
1710 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
1711 wakaba 1.1 checker => $HTMLEmptyChecker,
1712     };
1713     ## TODO: only in map
1714    
1715     $Element->{$HTML_NS}->{table} = {
1716 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1717 wakaba 1.1 checker => sub {
1718 wakaba 1.4 my ($self, $todo) = @_;
1719     my $el = $todo->{node};
1720     my $new_todos = [];
1721 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1722    
1723     my $phase = 'before caption';
1724     my $has_tfoot;
1725     while (@nodes) {
1726     my $node = shift @nodes;
1727 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1728    
1729 wakaba 1.1 my $nt = $node->node_type;
1730     if ($nt == 1) {
1731 wakaba 1.8 my $node_ns = $node->namespace_uri;
1732     $node_ns = '' unless defined $node_ns;
1733     my $node_ln = $node->manakai_local_name;
1734 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1735 wakaba 1.1 if ($phase eq 'in tbodys') {
1736 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
1737 wakaba 1.1 #$phase = 'in tbodys';
1738     } elsif (not $has_tfoot and
1739 wakaba 1.8 $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
1740 wakaba 1.1 $phase = 'after tfoot';
1741     $has_tfoot = 1;
1742     } else {
1743 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1744 wakaba 1.1 }
1745     } elsif ($phase eq 'in trs') {
1746 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
1747 wakaba 1.1 #$phase = 'in trs';
1748     } elsif (not $has_tfoot and
1749 wakaba 1.8 $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
1750 wakaba 1.1 $phase = 'after tfoot';
1751     $has_tfoot = 1;
1752     } else {
1753 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1754 wakaba 1.1 }
1755     } elsif ($phase eq 'after thead') {
1756 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
1757 wakaba 1.1 $phase = 'in tbodys';
1758 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
1759 wakaba 1.1 $phase = 'in trs';
1760 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
1761 wakaba 1.1 $phase = 'in tbodys';
1762     $has_tfoot = 1;
1763     } else {
1764 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1765 wakaba 1.1 }
1766     } elsif ($phase eq 'in colgroup') {
1767 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
1768 wakaba 1.1 $phase = 'in colgroup';
1769 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
1770 wakaba 1.1 $phase = 'after thead';
1771 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
1772 wakaba 1.1 $phase = 'in tbodys';
1773 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
1774 wakaba 1.1 $phase = 'in trs';
1775 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
1776 wakaba 1.1 $phase = 'in tbodys';
1777     $has_tfoot = 1;
1778     } else {
1779 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1780 wakaba 1.1 }
1781     } elsif ($phase eq 'before caption') {
1782 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'caption') {
1783 wakaba 1.1 $phase = 'in colgroup';
1784 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
1785 wakaba 1.1 $phase = 'in colgroup';
1786 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
1787 wakaba 1.1 $phase = 'after thead';
1788 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
1789 wakaba 1.1 $phase = 'in tbodys';
1790 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
1791 wakaba 1.1 $phase = 'in trs';
1792 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
1793 wakaba 1.1 $phase = 'in tbodys';
1794     $has_tfoot = 1;
1795     } else {
1796 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1797 wakaba 1.1 }
1798     } else { # after tfoot
1799 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1800 wakaba 1.1 }
1801 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1802     unshift @nodes, @$sib;
1803 wakaba 1.4 push @$new_todos, @$ch;
1804 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1805     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1806 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1807 wakaba 1.1 }
1808     } elsif ($nt == 5) {
1809     unshift @nodes, @{$node->child_nodes};
1810     }
1811     }
1812 wakaba 1.4 return ($new_todos);
1813 wakaba 1.1 },
1814     };
1815    
1816     $Element->{$HTML_NS}->{caption} = {
1817 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1818 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
1819     };
1820    
1821     $Element->{$HTML_NS}->{colgroup} = {
1822 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
1823 wakaba 1.1 checker => sub {
1824 wakaba 1.4 my ($self, $todo) = @_;
1825     my $el = $todo->{node};
1826     my $new_todos = [];
1827 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1828    
1829     while (@nodes) {
1830     my $node = shift @nodes;
1831 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1832    
1833 wakaba 1.1 my $nt = $node->node_type;
1834     if ($nt == 1) {
1835 wakaba 1.8 my $node_ns = $node->namespace_uri;
1836     $node_ns = '' unless defined $node_ns;
1837     my $node_ln = $node->manakai_local_name;
1838 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1839 wakaba 1.8 unless ($node_ns eq $HTML_NS and $node_ln eq 'col') {
1840 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1841 wakaba 1.1 }
1842 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1843     unshift @nodes, @$sib;
1844 wakaba 1.4 push @$new_todos, @$ch;
1845 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1846     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1847 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1848 wakaba 1.1 }
1849     } elsif ($nt == 5) {
1850     unshift @nodes, @{$node->child_nodes};
1851     }
1852     }
1853 wakaba 1.4 return ($new_todos);
1854 wakaba 1.1 },
1855     };
1856    
1857     $Element->{$HTML_NS}->{col} = {
1858 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
1859     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
1860     }),
1861 wakaba 1.1 checker => $HTMLEmptyChecker,
1862     };
1863    
1864     $Element->{$HTML_NS}->{tbody} = {
1865 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1866 wakaba 1.1 checker => sub {
1867 wakaba 1.4 my ($self, $todo) = @_;
1868     my $el = $todo->{node};
1869     my $new_todos = [];
1870 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1871    
1872     my $has_tr;
1873     while (@nodes) {
1874     my $node = shift @nodes;
1875 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1876    
1877 wakaba 1.1 my $nt = $node->node_type;
1878     if ($nt == 1) {
1879 wakaba 1.8 my $node_ns = $node->namespace_uri;
1880     $node_ns = '' unless defined $node_ns;
1881     my $node_ln = $node->manakai_local_name;
1882 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1883 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
1884 wakaba 1.1 $has_tr = 1;
1885     } else {
1886 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1887 wakaba 1.1 }
1888 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1889     unshift @nodes, @$sib;
1890 wakaba 1.4 push @$new_todos, @$ch;
1891 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1892     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1893 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1894 wakaba 1.1 }
1895     } elsif ($nt == 5) {
1896     unshift @nodes, @{$node->child_nodes};
1897     }
1898     }
1899     unless ($has_tr) {
1900 wakaba 1.3 $self->{onerror}->(node => $el, type => 'child element missing:tr');
1901 wakaba 1.1 }
1902 wakaba 1.4 return ($new_todos);
1903 wakaba 1.1 },
1904     };
1905    
1906     $Element->{$HTML_NS}->{thead} = {
1907 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1908 wakaba 1.1 checker => $Element->{$HTML_NS}->{tbody},
1909     };
1910    
1911     $Element->{$HTML_NS}->{tfoot} = {
1912 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1913 wakaba 1.1 checker => $Element->{$HTML_NS}->{tbody},
1914     };
1915    
1916     $Element->{$HTML_NS}->{tr} = {
1917 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1918 wakaba 1.1 checker => sub {
1919 wakaba 1.4 my ($self, $todo) = @_;
1920     my $el = $todo->{node};
1921     my $new_todos = [];
1922 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1923    
1924     my $has_td;
1925     while (@nodes) {
1926     my $node = shift @nodes;
1927 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1928    
1929 wakaba 1.1 my $nt = $node->node_type;
1930     if ($nt == 1) {
1931 wakaba 1.8 my $node_ns = $node->namespace_uri;
1932     $node_ns = '' unless defined $node_ns;
1933     my $node_ln = $node->manakai_local_name;
1934 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1935 wakaba 1.8 if ($node_ns eq $HTML_NS and ($node_ln eq 'td' or $node_ln eq 'th')) {
1936 wakaba 1.1 $has_td = 1;
1937     } else {
1938 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1939 wakaba 1.1 }
1940 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1941     unshift @nodes, @$sib;
1942 wakaba 1.4 push @$new_todos, @$ch;
1943 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1944     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1945 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1946 wakaba 1.1 }
1947     } elsif ($nt == 5) {
1948     unshift @nodes, @{$node->child_nodes};
1949     }
1950     }
1951     unless ($has_td) {
1952 wakaba 1.3 $self->{onerror}->(node => $el, type => 'child element missing:td|th');
1953 wakaba 1.1 }
1954 wakaba 1.4 return ($new_todos);
1955 wakaba 1.1 },
1956     };
1957    
1958     $Element->{$HTML_NS}->{td} = {
1959 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
1960     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
1961     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
1962     }),
1963 wakaba 1.1 checker => $HTMLBlockOrInlineChecker,
1964     };
1965    
1966     $Element->{$HTML_NS}->{th} = {
1967 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
1968     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
1969     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
1970     scope => $GetHTMLEnumeratedAttrChecker
1971     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
1972     }),
1973 wakaba 1.1 checker => $HTMLBlockOrInlineChecker,
1974     };
1975    
1976 wakaba 1.12 ## TODO: table model error checking
1977    
1978 wakaba 1.1 ## TODO: forms
1979    
1980 wakaba 1.2 $Element->{$HTML_NS}->{script} = {
1981 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
1982 wakaba 1.2 checker => sub {
1983 wakaba 1.4 my ($self, $todo) = @_;
1984 wakaba 1.2
1985 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'src')) {
1986     return $HTMLEmptyChecker->($self, $todo);
1987 wakaba 1.2 } else {
1988     ## NOTE: No content model conformance in HTML5 spec.
1989 wakaba 1.4 return $AnyChecker->($self, $todo);
1990 wakaba 1.2 }
1991     },
1992     };
1993    
1994     ## NOTE: When script is disabled.
1995     $Element->{$HTML_NS}->{noscript} = {
1996 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1997 wakaba 1.2 checker => sub {
1998 wakaba 1.4 my ($self, $todo) = @_;
1999 wakaba 1.1
2000 wakaba 1.2 my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}});
2001 wakaba 1.4 my ($sib, $ch) = $HTMLBlockOrInlineChecker->($self, $todo);
2002 wakaba 1.2 push @$sib, $end;
2003     return ($sib, $ch);
2004     },
2005     };
2006 wakaba 1.1
2007     $Element->{$HTML_NS}->{'event-source'} = {
2008 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2009     src => $HTMLURIAttrChecker,
2010     }),
2011 wakaba 1.1 checker => $HTMLEmptyChecker,
2012     };
2013    
2014     $Element->{$HTML_NS}->{details} = {
2015 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2016     open => $GetHTMLBooleanAttrChecker->('open'),
2017     }),
2018 wakaba 1.6 checker => sub {
2019     my ($self, $todo) = @_;
2020    
2021     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2022     my ($sib, $ch)
2023     = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
2024     ->($self, $todo);
2025     push @$sib, $end;
2026     return ($sib, $ch);
2027     },
2028 wakaba 1.1 };
2029    
2030     $Element->{$HTML_NS}->{datagrid} = {
2031 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2032     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2033     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
2034     }),
2035 wakaba 1.6 checker => sub {
2036     my ($self, $todo) = @_;
2037    
2038     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2039     my ($sib, $ch) = $HTMLBlockChecker->($self, $todo);
2040     push @$sib, $end;
2041     return ($sib, $ch);
2042     },
2043 wakaba 1.1 };
2044    
2045     $Element->{$HTML_NS}->{command} = {
2046 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
2047 wakaba 1.1 checker => $HTMLEmptyChecker,
2048     };
2049    
2050     $Element->{$HTML_NS}->{menu} = {
2051 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
2052 wakaba 1.1 checker => sub {
2053 wakaba 1.4 my ($self, $todo) = @_;
2054     my $el = $todo->{node};
2055     my $new_todos = [];
2056 wakaba 1.1 my @nodes = (@{$el->child_nodes});
2057    
2058     my $content = 'li or inline';
2059     while (@nodes) {
2060     my $node = shift @nodes;
2061 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2062    
2063 wakaba 1.1 my $nt = $node->node_type;
2064     if ($nt == 1) {
2065 wakaba 1.2 my $node_ns = $node->namespace_uri;
2066     $node_ns = '' unless defined $node_ns;
2067     my $node_ln = $node->manakai_local_name;
2068 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
2069 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'li') {
2070 wakaba 1.1 if ($content eq 'inline') {
2071 wakaba 1.6 $not_allowed = 1;
2072 wakaba 1.1 } elsif ($content eq 'li or inline') {
2073     $content = 'li';
2074     }
2075     } else {
2076 wakaba 1.7 if ($HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
2077     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln}) {
2078     $content = 'inline';
2079     } else {
2080 wakaba 1.6 $not_allowed = 1;
2081 wakaba 1.7 }
2082 wakaba 1.1 }
2083 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
2084     if $not_allowed;
2085 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
2086     unshift @nodes, @$sib;
2087 wakaba 1.4 push @$new_todos, @$ch;
2088 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
2089     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2090     if ($content eq 'li') {
2091 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
2092 wakaba 1.1 } elsif ($content eq 'li or inline') {
2093     $content = 'inline';
2094     }
2095     }
2096     } elsif ($nt == 5) {
2097     unshift @nodes, @{$node->child_nodes};
2098     }
2099     }
2100 wakaba 1.4
2101     for (@$new_todos) {
2102     $_->{inline} = 1;
2103     }
2104     return ($new_todos);
2105 wakaba 1.1 },
2106     };
2107    
2108 wakaba 1.6 $Element->{$HTML_NS}->{legend} = {
2109 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2110 wakaba 1.6 checker => sub {
2111     my ($self, $todo) = @_;
2112    
2113     my $parent = $todo->{node}->manakai_parent_element;
2114     if (defined $parent) {
2115     my $nsuri = $parent->namespace_uri;
2116     $nsuri = '' unless defined $nsuri;
2117     my $ln = $parent->manakai_local_name;
2118     if ($nsuri eq $HTML_NS and $ln eq 'figure') {
2119     return $HTMLInlineChecker->($self, $todo);
2120     } else {
2121     return $HTMLSignificantStrictlyInlineChecker->($self, $todo);
2122     }
2123     } else {
2124     return $HTMLInlineChecker->($self, $todo);
2125     }
2126    
2127     ## ISSUE: Content model is defined only for fieldset/legend,
2128     ## details/legend, and figure/legend.
2129     },
2130     };
2131 wakaba 1.1
2132     $Element->{$HTML_NS}->{div} = {
2133 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2134 wakaba 1.2 checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
2135 wakaba 1.1 };
2136    
2137     $Element->{$HTML_NS}->{font} = {
2138 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
2139 wakaba 1.1 checker => $HTMLTransparentChecker,
2140     };
2141    
2142 wakaba 1.2 sub new ($) {
2143     return bless {}, shift;
2144     } # new
2145    
2146 wakaba 1.1 sub check_element ($$$) {
2147     my ($self, $el, $onerror) = @_;
2148    
2149 wakaba 1.2 $self->{minuses} = {};
2150     $self->{onerror} = $onerror;
2151 wakaba 1.10 $self->{id} = {};
2152 wakaba 1.2
2153 wakaba 1.4 my @todo = ({type => 'element', node => $el});
2154     while (@todo) {
2155     my $todo = shift @todo;
2156     if ($todo->{type} eq 'element') {
2157 wakaba 1.13 my $prefix = $todo->{node}->prefix;
2158     if (defined $prefix and $prefix eq 'xmlns') {
2159     $self->{onerror}
2160     ->(node => $todo->{node},
2161     type => 'NC:Reserved Prefixes and Namespace Names:<xmlns:>');
2162     }
2163 wakaba 1.4 my $nsuri = $todo->{node}->namespace_uri;
2164     $nsuri = '' unless defined $nsuri;
2165     my $ln = $todo->{node}->manakai_local_name;
2166     my $eldef = $Element->{$nsuri}->{$ln} ||
2167     $Element->{$nsuri}->{''} ||
2168     $ElementDefault;
2169 wakaba 1.9 $eldef->{attrs_checker}->($self, $todo);
2170 wakaba 1.4 my ($new_todos) = $eldef->{checker}->($self, $todo);
2171 wakaba 1.14 unshift @todo, @$new_todos;
2172 wakaba 1.9 } elsif ($todo->{type} eq 'element-attributes') {
2173 wakaba 1.13 my $prefix = $todo->{node}->prefix;
2174     if (defined $prefix and $prefix eq 'xmlns') {
2175     $self->{onerror}
2176     ->(node => $todo->{node},
2177     type => 'NC:Reserved Prefixes and Namespace Names:<xmlns:>');
2178     }
2179 wakaba 1.9 my $nsuri = $todo->{node}->namespace_uri;
2180     $nsuri = '' unless defined $nsuri;
2181     my $ln = $todo->{node}->manakai_local_name;
2182     my $eldef = $Element->{$nsuri}->{$ln} ||
2183     $Element->{$nsuri}->{''} ||
2184     $ElementDefault;
2185     $eldef->{attrs_checker}->($self, $todo);
2186 wakaba 1.4 } elsif ($todo->{type} eq 'plus') {
2187     $self->_remove_minuses ($todo);
2188     }
2189 wakaba 1.1 }
2190     } # check_element
2191    
2192 wakaba 1.2 sub _add_minuses ($@) {
2193     my $self = shift;
2194     my $r = {};
2195     for my $list (@_) {
2196     for my $ns (keys %$list) {
2197     for my $ln (keys %{$list->{$ns}}) {
2198     unless ($self->{minuses}->{$ns}->{$ln}) {
2199     $self->{minuses}->{$ns}->{$ln} = 1;
2200     $r->{$ns}->{$ln} = 1;
2201     }
2202     }
2203     }
2204     }
2205 wakaba 1.4 return {type => 'plus', list => $r};
2206 wakaba 1.2 } # _add_minuses
2207    
2208     sub _remove_minuses ($$) {
2209 wakaba 1.4 my ($self, $todo) = @_;
2210     for my $ns (keys %{$todo->{list}}) {
2211     for my $ln (keys %{$todo->{list}->{$ns}}) {
2212     delete $self->{minuses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
2213 wakaba 1.2 }
2214     }
2215     1;
2216     } # _remove_minuses
2217    
2218     sub _check_get_children ($$) {
2219     my ($self, $node) = @_;
2220 wakaba 1.4 my $new_todos = [];
2221 wakaba 1.2 my $sib = [];
2222     TP: {
2223     my $node_ns = $node->namespace_uri;
2224     $node_ns = '' unless defined $node_ns;
2225     my $node_ln = $node->manakai_local_name;
2226     if ($node_ns eq $HTML_NS) {
2227     if ($node_ln eq 'noscript') {
2228     my $end = $self->_add_minuses ({$HTML_NS, {noscript => 1}});
2229     push @$sib, $end;
2230     }
2231     }
2232 wakaba 1.7 if ($HTMLTransparentElements->{$node_ns}->{$node_ln}) {
2233     unshift @$sib, @{$node->child_nodes};
2234 wakaba 1.9 push @$new_todos, {type => 'element-attributes', node => $node};
2235 wakaba 1.7 last TP;
2236 wakaba 1.2 }
2237 wakaba 1.8 if ($node_ns eq $HTML_NS and ($node_ln eq 'video' or $node_ln eq 'audio')) {
2238 wakaba 1.2 if ($node->has_attribute_ns (undef, 'src')) {
2239     unshift @$sib, @{$node->child_nodes};
2240 wakaba 1.9 push @$new_todos, {type => 'element-attributes', node => $node};
2241 wakaba 1.2 last TP;
2242     } else {
2243     my @cn = @{$node->child_nodes};
2244     CN: while (@cn) {
2245     my $cn = shift @cn;
2246     my $cnt = $cn->node_type;
2247     if ($cnt == 1) {
2248 wakaba 1.8 my $cn_nsuri = $cn->namespace_uri;
2249     $cn_nsuri = '' unless defined $cn_nsuri;
2250     if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'source') {
2251 wakaba 1.2 #
2252     } else {
2253     last CN;
2254     }
2255     } elsif ($cnt == 3 or $cnt == 4) {
2256     if ($cn->data =~ /[^\x09-\x0D\x20]/) {
2257     last CN;
2258     }
2259     }
2260     } # CN
2261     unshift @$sib, @cn;
2262     }
2263     }
2264 wakaba 1.4 push @$new_todos, {type => 'element', node => $node};
2265 wakaba 1.2 } # TP
2266 wakaba 1.4 return ($sib, $new_todos);
2267 wakaba 1.2 } # _check_get_children
2268    
2269 wakaba 1.1 1;
2270 wakaba 1.14 # $Date: 2007/05/19 14:29:09 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24