/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.13 - (hide annotations) (download)
Sat May 19 14:29:09 2007 UTC (17 years, 5 months ago) by wakaba
Branch: MAIN
Changes since 1.12: +101 -5 lines
++ whatpm/t/ChangeLog	19 May 2007 14:28:59 -0000
	* content-model-3.dat: New test.

	* ContentChecker.t (@FILES): |content-model-3.dat| added.

2007-05-19  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	19 May 2007 14:28:30 -0000
	* ContentChecker.pm: Support |xml:*| and |xmlns:*|
	attributes.  Report an error if |Element.prefix|
	is |xmlns|.

	* NanoDOM.pm (prefix): New attribute.

2007-05-19  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3    
4 wakaba 1.13 ## ISSUE: How XML and XML Namespaces conformance can (or cannot)
5     ## be applied to an in-memory representation (i.e. DOM)?
6    
7 wakaba 1.9 my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
8     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
9    
10     my $AttrChecker = {
11     $XML_NS => {
12 wakaba 1.13 space => sub {
13     my ($self, $attr) = @_;
14     my $value = $attr->value;
15     if ($value eq 'default' or $value eq 'preserve') {
16     #
17     } else {
18     ## NOTE: An XML "error"
19     $self->{onerror}->(node => $attr,
20     type => 'XML error:invalid xml:space value');
21     }
22     },
23     lang => sub {
24     ## NOTE: "The values of the attribute are language identifiers
25     ## as defined by [IETF RFC 3066], Tags for the Identification
26     ## of Languages, or its successor; in addition, the empty string
27     ## may be specified." ("may" in lower case)
28     ## TODO: xml:lang MUST NOT in HTML document
29     },
30     base => sub {
31     my ($self, $attr) = @_;
32     my $value = $attr->value;
33     if ($value =~ /[^\x{0000}-\x{10FFFF}]/) { ## ISSUE: Should we disallow noncharacters?
34     $self->{onerror}->(node => $attr,
35     type => 'syntax error');
36     }
37     ## NOTE: Conformance to URI standard is not checked.
38     },
39     id => sub {
40     my ($self, $attr) = @_;
41     my $value = $attr->value;
42     $value =~ s/[\x09\x0A\x0D\x20]+/ /g;
43     $value =~ s/^\x20//;
44     $value =~ s/\x20$//;
45     ## TODO: NCName in XML 1.0 or 1.1
46     ## TODO: declared type is ID?
47     if ($self->{id}->{$value}) {
48     $self->{onerror}->(node => $attr, type => 'xml:id error:duplicate ID');
49     } else {
50     $self->{id}->{$value} = 1;
51     }
52     },
53 wakaba 1.9 },
54     $XMLNS_NS => {
55 wakaba 1.13 '' => sub {
56     my ($self, $attr) = @_;
57     my $ln = $attr->manakai_local_name;
58     my $value = $attr->value;
59     if ($value eq $XML_NS and $ln ne 'xml') {
60     $self->{onerror}
61     ->(node => $attr,
62     type => 'NC:Reserved Prefixes and Namespace Names:=xml');
63     } elsif ($value eq $XMLNS_NS) {
64     $self->{onerror}
65     ->(node => $attr,
66     type => 'NC:Reserved Prefixes and Namespace Names:=xmlns');
67     }
68     if ($ln eq 'xml' and $value ne $XML_NS) {
69     $self->{onerror}
70     ->(node => $attr,
71     type => 'NC:Reserved Prefixes and Namespace Names:xmlns:xml=');
72     } elsif ($ln eq 'xmlns') {
73     $self->{onerror}
74     ->(node => $attr,
75     type => 'NC:Reserved Prefixes and Namespace Names:xmlns:xmlns=');
76     }
77     ## TODO: If XML 1.0 and empty
78     },
79     xmlns => sub {
80     my ($self, $attr) = @_;
81     ## TODO: In XML 1.0, URI reference [RFC 3986] or an empty string
82     ## TODO: In XML 1.1, IRI reference [RFC 3987] or an empty string
83     my $value = $attr->value;
84     if ($value eq $XML_NS) {
85     $self->{onerror}
86     ->(node => $attr,
87     type => 'NC:Reserved Prefixes and Namespace Names:=xml');
88     } elsif ($value eq $XMLNS_NS) {
89     $self->{onerror}
90     ->(node => $attr,
91     type => 'NC:Reserved Prefixes and Namespace Names:=xmlns');
92     }
93     },
94 wakaba 1.9 },
95     };
96    
97 wakaba 1.13 $AttrChecker->{''}->{'xml:space'} = $AttrChecker->{$XML_NS}->{space};
98     $AttrChecker->{''}->{'xml:lang'} = $AttrChecker->{$XML_NS}->{lang};
99     $AttrChecker->{''}->{'xml:base'} = $AttrChecker->{$XML_NS}->{base};
100     $AttrChecker->{''}->{'xml:id'} = $AttrChecker->{$XML_NS}->{id};
101    
102 wakaba 1.3 ## ANY
103     my $AnyChecker = sub {
104 wakaba 1.4 my ($self, $todo) = @_;
105     my $el = $todo->{node};
106     my $new_todos = [];
107 wakaba 1.3 my @nodes = (@{$el->child_nodes});
108     while (@nodes) {
109     my $node = shift @nodes;
110     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
111    
112     my $nt = $node->node_type;
113     if ($nt == 1) {
114     my $node_ns = $node->namespace_uri;
115     $node_ns = '' unless defined $node_ns;
116     my $node_ln = $node->manakai_local_name;
117     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
118     $self->{onerror}->(node => $node, type => 'element not allowed');
119     }
120 wakaba 1.4 push @$new_todos, {type => 'element', node => $node};
121 wakaba 1.3 } elsif ($nt == 5) {
122     unshift @nodes, @{$node->child_nodes};
123     }
124     }
125 wakaba 1.4 return ($new_todos);
126 wakaba 1.3 }; # $AnyChecker
127    
128 wakaba 1.1 my $ElementDefault = {
129     checker => sub {
130 wakaba 1.4 my ($self, $todo) = @_;
131     $self->{onerror}->(node => $todo->{node}, type => 'element not supported');
132     return $AnyChecker->($self, $todo);
133 wakaba 1.1 },
134 wakaba 1.9 attrs_checker => sub {
135     my ($self, $todo) = @_;
136     for my $attr (@{$todo->{node}->attributes}) {
137     my $attr_ns = $attr->namespace_uri;
138     $attr_ns = '' unless defined $attr_ns;
139     my $attr_ln = $attr->manakai_local_name;
140     my $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
141     || $AttrChecker->{$attr_ns}->{''};
142     if ($checker) {
143     $checker->($self, $attr);
144     }
145     ## Don't check otherwise, since "element type not supported" warning
146     ## will be reported by the element checker.
147     }
148     },
149 wakaba 1.1 };
150    
151     my $Element = {};
152    
153     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
154    
155 wakaba 1.7 my $HTMLMetadataElements = {
156     $HTML_NS => {
157     qw/link 1 meta 1 style 1 script 1 event-source 1 command 1 base 1 title 1/,
158     },
159     };
160 wakaba 1.1
161 wakaba 1.2 my $HTMLSectioningElements = {
162     $HTML_NS => {qw/body 1 section 1 nav 1 article 1 blockquote 1 aside 1/},
163     };
164 wakaba 1.1
165 wakaba 1.7 my $HTMLBlockLevelElements = {
166     $HTML_NS => {
167     qw/
168     section 1 nav 1 article 1 blockquote 1 aside 1
169     h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1
170     address 1 p 1 hr 1 dialog 1 pre 1 ol 1 ul 1 dl 1
171     ins 1 del 1 figure 1 map 1 table 1 script 1 noscript 1
172     event-source 1 details 1 datagrid 1 menu 1 div 1 font 1
173     /,
174     },
175     };
176    
177     my $HTMLStrictlyInlineLevelElements = {
178     $HTML_NS => {
179     qw/
180     br 1 a 1 q 1 cite 1 em 1 strong 1 small 1 m 1 dfn 1 abbr 1
181     time 1 meter 1 progress 1 code 1 var 1 samp 1 kbd 1
182     sub 1 sup 1 span 1 i 1 b 1 bdo 1 ins 1 del 1 img 1
183     iframe 1 embed 1 object 1 video 1 audio 1 canvas 1 area 1
184     script 1 noscript 1 event-source 1 command 1 font 1
185     /,
186     },
187     };
188    
189     my $HTMLStructuredInlineLevelElements = {
190     $HTML_NS => {qw/blockquote 1 pre 1 ol 1 ul 1 dl 1 table 1 menu 1/},
191     };
192 wakaba 1.1
193 wakaba 1.6 my $HTMLInteractiveElements = {
194     $HTML_NS => {a => 1, details => 1, datagrid => 1},
195     };
196     ## NOTE: |html:a| and |html:datagrid| are not allowed as a descendant
197     ## of interactive elements
198 wakaba 1.1
199 wakaba 1.7 my $HTMLTransparentElements = {
200     $HTML_NS => {qw/ins 1 font 1 noscript 1/},
201     ## NOTE: |html:noscript| is transparent if scripting is disabled.
202     };
203    
204     #my $HTMLSemiTransparentElements = {
205     # $HTML_NS => {qw/video 1 audio 1/},
206     #};
207    
208     my $HTMLEmbededElements = {
209     $HTML_NS => {qw/img 1 iframe 1 embed 1 object 1 video 1 audio 1 canvas 1/},
210     };
211 wakaba 1.1
212     ## Empty
213     my $HTMLEmptyChecker = sub {
214 wakaba 1.4 my ($self, $todo) = @_;
215     my $el = $todo->{node};
216     my $new_todos = [];
217 wakaba 1.1 my @nodes = (@{$el->child_nodes});
218    
219     while (@nodes) {
220     my $node = shift @nodes;
221 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
222    
223 wakaba 1.1 my $nt = $node->node_type;
224     if ($nt == 1) {
225 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
226     $self->{onerror}->(node => $node, type => 'element not allowed');
227     my ($sib, $ch) = $self->_check_get_children ($node);
228     unshift @nodes, @$sib;
229 wakaba 1.4 push @$new_todos, @$ch;
230 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
231 wakaba 1.3 if ($node->data =~ /[^\x09-\x0D\x20]/) {
232     $self->{onerror}->(node => $node, type => 'character not allowed');
233     }
234 wakaba 1.1 } elsif ($nt == 5) {
235     unshift @nodes, @{$node->child_nodes};
236     }
237     }
238 wakaba 1.4 return ($new_todos);
239 wakaba 1.1 };
240    
241     ## Text
242     my $HTMLTextChecker = sub {
243 wakaba 1.4 my ($self, $todo) = @_;
244     my $el = $todo->{node};
245     my $new_todos = [];
246 wakaba 1.1 my @nodes = (@{$el->child_nodes});
247    
248     while (@nodes) {
249     my $node = shift @nodes;
250 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
251    
252 wakaba 1.1 my $nt = $node->node_type;
253     if ($nt == 1) {
254 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
255     $self->{onerror}->(node => $node, type => 'element not allowed');
256     my ($sib, $ch) = $self->_check_get_children ($node);
257     unshift @nodes, @$sib;
258 wakaba 1.4 push @$new_todos, @$ch;
259 wakaba 1.1 } elsif ($nt == 5) {
260     unshift @nodes, @{$node->child_nodes};
261     }
262     }
263 wakaba 1.4 return ($new_todos);
264 wakaba 1.1 };
265    
266     ## Zero or more |html:style| elements,
267     ## followed by zero or more block-level elements
268     my $HTMLStylableBlockChecker = sub {
269 wakaba 1.4 my ($self, $todo) = @_;
270     my $el = $todo->{node};
271     my $new_todos = [];
272 wakaba 1.1 my @nodes = (@{$el->child_nodes});
273    
274     my $has_non_style;
275     while (@nodes) {
276     my $node = shift @nodes;
277 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
278    
279 wakaba 1.1 my $nt = $node->node_type;
280     if ($nt == 1) {
281 wakaba 1.2 my $node_ns = $node->namespace_uri;
282     $node_ns = '' unless defined $node_ns;
283     my $node_ln = $node->manakai_local_name;
284 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
285 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'style') {
286 wakaba 1.6 $not_allowed = 1 if $has_non_style;
287 wakaba 1.7 } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
288     $has_non_style = 1;
289 wakaba 1.1 } else {
290     $has_non_style = 1;
291 wakaba 1.7 $not_allowed = 1;
292 wakaba 1.1 }
293 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
294     if $not_allowed;
295 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
296     unshift @nodes, @$sib;
297 wakaba 1.4 push @$new_todos, @$ch;
298 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
299     if ($node->data =~ /[^\x09-\x0D\x20]/) {
300 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
301 wakaba 1.1 }
302     } elsif ($nt == 5) {
303     unshift @nodes, @{$node->child_nodes};
304     }
305     }
306 wakaba 1.4 return ($new_todos);
307 wakaba 1.1 }; # $HTMLStylableBlockChecker
308    
309     ## Zero or more block-level elements
310     my $HTMLBlockChecker = sub {
311 wakaba 1.4 my ($self, $todo) = @_;
312     my $el = $todo->{node};
313     my $new_todos = [];
314 wakaba 1.1 my @nodes = (@{$el->child_nodes});
315    
316     while (@nodes) {
317     my $node = shift @nodes;
318 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
319    
320 wakaba 1.1 my $nt = $node->node_type;
321     if ($nt == 1) {
322 wakaba 1.2 my $node_ns = $node->namespace_uri;
323     $node_ns = '' unless defined $node_ns;
324     my $node_ln = $node->manakai_local_name;
325 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
326 wakaba 1.7 $not_allowed = 1
327     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
328 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
329     if $not_allowed;
330 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
331     unshift @nodes, @$sib;
332 wakaba 1.4 push @$new_todos, @$ch;
333 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
334     if ($node->data =~ /[^\x09-\x0D\x20]/) {
335 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
336 wakaba 1.1 }
337     } elsif ($nt == 5) {
338     unshift @nodes, @{$node->child_nodes};
339     }
340     }
341 wakaba 1.4 return ($new_todos);
342 wakaba 1.1 }; # $HTMLBlockChecker
343    
344     ## Inline-level content
345     my $HTMLInlineChecker = sub {
346 wakaba 1.4 my ($self, $todo) = @_;
347     my $el = $todo->{node};
348     my $new_todos = [];
349 wakaba 1.1 my @nodes = (@{$el->child_nodes});
350    
351     while (@nodes) {
352     my $node = shift @nodes;
353 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
354    
355 wakaba 1.1 my $nt = $node->node_type;
356     if ($nt == 1) {
357 wakaba 1.2 my $node_ns = $node->namespace_uri;
358     $node_ns = '' unless defined $node_ns;
359     my $node_ln = $node->manakai_local_name;
360 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
361 wakaba 1.7 $not_allowed = 1
362     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
363     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
364 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
365     if $not_allowed;
366 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
367     unshift @nodes, @$sib;
368 wakaba 1.4 push @$new_todos, @$ch;
369 wakaba 1.1 } elsif ($nt == 5) {
370     unshift @nodes, @{$node->child_nodes};
371     }
372     }
373 wakaba 1.4
374     for (@$new_todos) {
375     $_->{inline} = 1;
376     }
377     return ($new_todos);
378     }; # $HTMLInlineChecker
379 wakaba 1.1
380     my $HTMLSignificantInlineChecker = $HTMLInlineChecker;
381     ## TODO: check significant content
382    
383     ## Strictly inline-level content
384     my $HTMLStrictlyInlineChecker = sub {
385 wakaba 1.4 my ($self, $todo) = @_;
386     my $el = $todo->{node};
387     my $new_todos = [];
388 wakaba 1.1 my @nodes = (@{$el->child_nodes});
389    
390     while (@nodes) {
391     my $node = shift @nodes;
392 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
393    
394 wakaba 1.1 my $nt = $node->node_type;
395     if ($nt == 1) {
396 wakaba 1.2 my $node_ns = $node->namespace_uri;
397     $node_ns = '' unless defined $node_ns;
398     my $node_ln = $node->manakai_local_name;
399 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
400 wakaba 1.7 $not_allowed = 1
401     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln};
402 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
403     if $not_allowed;
404 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
405     unshift @nodes, @$sib;
406 wakaba 1.4 push @$new_todos, @$ch;
407 wakaba 1.1 } elsif ($nt == 5) {
408     unshift @nodes, @{$node->child_nodes};
409     }
410     }
411 wakaba 1.4
412     for (@$new_todos) {
413     $_->{inline} = 1;
414     $_->{strictly_inline} = 1;
415     }
416     return ($new_todos);
417 wakaba 1.1 }; # $HTMLStrictlyInlineChecker
418    
419     my $HTMLSignificantStrictlyInlineChecker = $HTMLStrictlyInlineChecker;
420     ## TODO: check significant content
421    
422 wakaba 1.4 ## Inline-level or strictly inline-kevek content
423     my $HTMLInlineOrStrictlyInlineChecker = sub {
424     my ($self, $todo) = @_;
425     my $el = $todo->{node};
426     my $new_todos = [];
427     my @nodes = (@{$el->child_nodes});
428    
429     while (@nodes) {
430     my $node = shift @nodes;
431     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
432    
433     my $nt = $node->node_type;
434     if ($nt == 1) {
435     my $node_ns = $node->namespace_uri;
436     $node_ns = '' unless defined $node_ns;
437     my $node_ln = $node->manakai_local_name;
438 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
439 wakaba 1.7 if ($todo->{strictly_inline}) {
440     $not_allowed = 1
441     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln};
442     } else {
443     $not_allowed = 1
444     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
445     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
446     }
447 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
448     if $not_allowed;
449 wakaba 1.4 my ($sib, $ch) = $self->_check_get_children ($node);
450     unshift @nodes, @$sib;
451     push @$new_todos, @$ch;
452     } elsif ($nt == 5) {
453     unshift @nodes, @{$node->child_nodes};
454     }
455     }
456    
457     for (@$new_todos) {
458     $_->{inline} = 1;
459     $_->{strictly_inline} = 1;
460     }
461     return ($new_todos);
462     }; # $HTMLInlineOrStrictlyInlineChecker
463    
464 wakaba 1.6 my $HTMLSignificantInlineOrStrictlyInlineChecker
465     = $HTMLInlineOrStrictlyInlineChecker;
466     ## TODO: check significant content
467    
468 wakaba 1.1 my $HTMLBlockOrInlineChecker = sub {
469 wakaba 1.4 my ($self, $todo) = @_;
470     my $el = $todo->{node};
471     my $new_todos = [];
472 wakaba 1.1 my @nodes = (@{$el->child_nodes});
473    
474     my $content = 'block-or-inline'; # or 'block' or 'inline'
475     my @block_not_inline;
476     while (@nodes) {
477     my $node = shift @nodes;
478 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
479    
480 wakaba 1.1 my $nt = $node->node_type;
481     if ($nt == 1) {
482 wakaba 1.2 my $node_ns = $node->namespace_uri;
483     $node_ns = '' unless defined $node_ns;
484     my $node_ln = $node->manakai_local_name;
485 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
486 wakaba 1.1 if ($content eq 'block') {
487 wakaba 1.7 $not_allowed = 1
488     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
489 wakaba 1.1 } elsif ($content eq 'inline') {
490 wakaba 1.7 $not_allowed = 1
491     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
492     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
493 wakaba 1.1 } else {
494 wakaba 1.7 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
495     my $is_inline
496     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
497     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
498 wakaba 1.1
499 wakaba 1.6 push @block_not_inline, $node
500     if $is_block and not $is_inline and not $not_allowed;
501 wakaba 1.1 unless ($is_block) {
502     $content = 'inline';
503     for (@block_not_inline) {
504 wakaba 1.2 $self->{onerror}->(node => $_, type => 'element not allowed');
505 wakaba 1.1 }
506 wakaba 1.6 $not_allowed = 1 unless $is_inline;
507 wakaba 1.1 }
508     }
509 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
510     if $not_allowed;
511 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
512     unshift @nodes, @$sib;
513 wakaba 1.4 push @$new_todos, @$ch;
514 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
515     if ($node->data =~ /[^\x09-\x0D\x20]/) {
516     if ($content eq 'block') {
517 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
518 wakaba 1.1 } else {
519     $content = 'inline';
520     for (@block_not_inline) {
521 wakaba 1.2 $self->{onerror}->(node => $_, type => 'element not allowed');
522 wakaba 1.1 }
523     }
524     }
525     } elsif ($nt == 5) {
526     unshift @nodes, @{$node->child_nodes};
527     }
528     }
529 wakaba 1.4
530     if ($content eq 'inline') {
531     for (@$new_todos) {
532     $_->{inline} = 1;
533     }
534     }
535     return ($new_todos);
536 wakaba 1.1 };
537    
538 wakaba 1.2 ## Zero or more XXX element, then either block-level or inline-level
539     my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) {
540     my ($elnsuri, $ellname) = @_;
541     return sub {
542 wakaba 1.4 my ($self, $todo) = @_;
543     my $el = $todo->{node};
544     my $new_todos = [];
545 wakaba 1.2 my @nodes = (@{$el->child_nodes});
546    
547     my $has_non_style;
548     my $content = 'block-or-inline'; # or 'block' or 'inline'
549     my @block_not_inline;
550     while (@nodes) {
551     my $node = shift @nodes;
552     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
553    
554     my $nt = $node->node_type;
555     if ($nt == 1) {
556     my $node_ns = $node->namespace_uri;
557     $node_ns = '' unless defined $node_ns;
558     my $node_ln = $node->manakai_local_name;
559 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
560 wakaba 1.8 if ($node_ns eq $elnsuri and $node_ln eq $ellname) {
561 wakaba 1.6 $not_allowed = 1 if $has_non_style;
562 wakaba 1.2 } elsif ($content eq 'block') {
563     $has_non_style = 1;
564 wakaba 1.7 $not_allowed = 1
565     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
566 wakaba 1.2 } elsif ($content eq 'inline') {
567     $has_non_style = 1;
568 wakaba 1.7 $not_allowed = 1
569     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
570     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
571 wakaba 1.2 } else {
572     $has_non_style = 1;
573 wakaba 1.7 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
574     my $is_inline
575     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
576     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
577 wakaba 1.2
578 wakaba 1.6 push @block_not_inline, $node
579     if $is_block and not $is_inline and not $not_allowed;
580 wakaba 1.2 unless ($is_block) {
581     $content = 'inline';
582     for (@block_not_inline) {
583     $self->{onerror}->(node => $_, type => 'element not allowed');
584     }
585 wakaba 1.6 $not_allowed = 1 unless $is_inline;
586 wakaba 1.1 }
587     }
588 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
589     if $not_allowed;
590 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
591     unshift @nodes, @$sib;
592 wakaba 1.4 push @$new_todos, @$ch;
593 wakaba 1.2 } elsif ($nt == 3 or $nt == 4) {
594     if ($node->data =~ /[^\x09-\x0D\x20]/) {
595     $has_non_style = 1;
596     if ($content eq 'block') {
597     $self->{onerror}->(node => $node, type => 'character not allowed');
598     } else {
599     $content = 'inline';
600     for (@block_not_inline) {
601     $self->{onerror}->(node => $_, type => 'element not allowed');
602     }
603 wakaba 1.1 }
604     }
605 wakaba 1.2 } elsif ($nt == 5) {
606     unshift @nodes, @{$node->child_nodes};
607 wakaba 1.1 }
608     }
609 wakaba 1.4
610     if ($content eq 'inline') {
611     for (@$new_todos) {
612     $_->{inline} = 1;
613     }
614     }
615     return ($new_todos);
616 wakaba 1.2 };
617     }; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker
618 wakaba 1.1
619     my $HTMLTransparentChecker = $HTMLBlockOrInlineChecker;
620    
621 wakaba 1.10 my $GetHTMLEnumeratedAttrChecker = sub {
622     my $states = shift; # {value => conforming ? 1 : -1}
623     return sub {
624     my ($self, $attr) = @_;
625     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
626     if ($states->{$value} > 0) {
627     #
628     } elsif ($states->{$value}) {
629     $self->{onerror}->(node => $attr,
630     type => 'non-conforming enumerated attribute value');
631     } else {
632     $self->{onerror}->(node => $attr,
633     type => 'invalid enumerated attribute value');
634     }
635     };
636     }; # $GetHTMLEnumeratedAttrChecker
637 wakaba 1.9
638 wakaba 1.10 my $GetHTMLBooleanAttrChecker = sub {
639     my $local_name = shift;
640     return sub {
641     my ($self, $attr) = @_;
642     my $value = $attr->value;
643     unless ($value eq $local_name or $value eq '') {
644     $self->{onerror}->(node => $attr,
645     type => 'invalid boolean attribute value');
646     }
647     };
648     }; # $GetHTMLBooleanAttrChecker
649 wakaba 1.9
650 wakaba 1.10 my $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker = sub {
651     my ($self, $attr) = @_;
652     my %word;
653     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
654     unless ($word{$word}) {
655     $word{$word} = 1;
656 wakaba 1.9 } else {
657 wakaba 1.10 $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
658 wakaba 1.9 }
659     }
660 wakaba 1.10 }; # $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker
661    
662 wakaba 1.11 my $HTMLURIAttrChecker = sub {
663     my ($self, $attr) = @_;
664     ## TODO: URI or IRI check
665     }; # $HTMLURIAttrChecker
666    
667     my $HTMLIntegerAttrChecker = sub {
668     my ($self, $attr) = @_;
669     my $value = $attr->value;
670     unless ($value =~ /\A-?[0-9]+\z/) {
671     $self->{onerror}->(node => $attr, type => 'syntax error');
672     }
673     }; # $HTMLIntegerAttrChecker
674    
675 wakaba 1.12 my $GetHTMLNonNegativeIntegerAttrChecker = sub {
676     my $range_check = shift;
677     return sub {
678     my ($self, $attr) = @_;
679     my $value = $attr->value;
680     if ($value =~ /\A[0-9]+\z/) {
681     unless ($range_check->($value + 0)) {
682     $self->{onerror}->(node => $attr, type => 'out of range');
683     }
684     } else {
685     $self->{onerror}->(node => $attr, type => 'syntax error');
686     }
687     };
688     }; # $GetHTMLNonNegativeIntegerAttrChecker
689    
690 wakaba 1.11 my $GetHTMLFloatingPointNumberAttrChecker = sub {
691     my $range_check = shift;
692     return sub {
693     my ($self, $attr) = @_;
694     my $value = $attr->value;
695     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
696     unless ($range_check->($value + 0)) {
697     $self->{onerror}->(node => $attr, type => 'out of range');
698     }
699     } else {
700     $self->{onerror}->(node => $attr, type => 'syntax error');
701     }
702     };
703     }; # $GetHTMLFloatingPointNumberAttrChecker
704    
705 wakaba 1.10 my $HTMLAttrChecker = {
706     id => sub {
707     my ($self, $attr) = @_;
708     my $value = $attr->value;
709     unless (length $value > 0) {
710     ## NOTE: MUST contain at least one character
711     $self->{onerror}->(node => $attr, type => 'attribute value is empty');
712     } else {
713     if ($self->{id}->{$value}) {
714     $self->{onerror}->(node => $attr, type => 'duplicate ID');
715     } else {
716     $self->{id}->{$value} = 1;
717     }
718     }
719     },
720     title => sub {}, ## NOTE: No conformance creteria
721     lang => sub {
722     ## TODO: RFC 3066 test
723     ## ISSUE: RFC 4646 (3066bis)?
724     ## TODO: HTML vs XHTML
725     },
726     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
727     class => $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker,
728     irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
729     ## TODO: tabindex
730     };
731    
732     my $GetHTMLAttrsChecker = sub {
733     my $element_specific_checker = shift;
734     return sub {
735     my ($self, $todo) = @_;
736     for my $attr (@{$todo->{node}->attributes}) {
737     my $attr_ns = $attr->namespace_uri;
738     $attr_ns = '' unless defined $attr_ns;
739     my $attr_ln = $attr->manakai_local_name;
740     my $checker;
741     if ($attr_ns eq '') {
742     $checker = $element_specific_checker->{$attr_ln}
743     || $HTMLAttrChecker->{$attr_ln};
744     }
745     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
746     || $AttrChecker->{$attr_ns}->{''};
747     if ($checker) {
748     $checker->($self, $attr);
749     } else {
750     $self->{onerror}->(node => $attr, type => 'attribute not supported');
751     ## ISSUE: No comformance createria for unknown attributes in the spec
752     }
753     }
754     };
755     }; # $GetHTMLAttrsChecker
756 wakaba 1.9
757     $Element->{$HTML_NS}->{''} = {
758 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
759 wakaba 1.9 checker => $ElementDefault->{checker},
760     };
761    
762 wakaba 1.1 $Element->{$HTML_NS}->{html} = {
763 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({
764     xmlns => sub {
765     my ($self, $attr) = @_;
766     my $value = $attr->value;
767     unless ($value eq $HTML_NS) {
768     $self->{onerror}->(node => $attr, type => 'syntax error');
769     ## TODO: only in HTML documents
770     }
771     },
772     }),
773 wakaba 1.1 checker => sub {
774 wakaba 1.4 my ($self, $todo) = @_;
775     my $el = $todo->{node};
776     my $new_todos = [];
777 wakaba 1.1 my @nodes = (@{$el->child_nodes});
778    
779     my $phase = 'before head';
780     while (@nodes) {
781     my $node = shift @nodes;
782 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
783    
784 wakaba 1.1 my $nt = $node->node_type;
785     if ($nt == 1) {
786 wakaba 1.2 my $node_ns = $node->namespace_uri;
787     $node_ns = '' unless defined $node_ns;
788     my $node_ln = $node->manakai_local_name;
789 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
790 wakaba 1.1 if ($phase eq 'before head') {
791 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'head') {
792 wakaba 1.1 $phase = 'after head';
793 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'body') {
794     $self->{onerror}->(node => $node, type => 'ps element missing:head');
795 wakaba 1.1 $phase = 'after body';
796     } else {
797 wakaba 1.6 $not_allowed = 1;
798 wakaba 1.1 # before head
799     }
800     } elsif ($phase eq 'after head') {
801 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'body') {
802 wakaba 1.1 $phase = 'after body';
803     } else {
804 wakaba 1.6 $not_allowed = 1;
805 wakaba 1.1 # after head
806     }
807     } else { #elsif ($phase eq 'after body') {
808 wakaba 1.6 $not_allowed = 1;
809 wakaba 1.1 # after body
810     }
811 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
812     if $not_allowed;
813 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
814     unshift @nodes, @$sib;
815 wakaba 1.4 push @$new_todos, @$ch;
816 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
817     if ($node->data =~ /[^\x09-\x0D\x20]/) {
818 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
819 wakaba 1.1 }
820     } elsif ($nt == 5) {
821     unshift @nodes, @{$node->child_nodes};
822     }
823     }
824 wakaba 1.3
825     if ($phase eq 'before head') {
826     $self->{onerror}->(node => $el, type => 'child element missing:head');
827     $self->{onerror}->(node => $el, type => 'child element missing:body');
828     } elsif ($phase eq 'after head') {
829     $self->{onerror}->(node => $el, type => 'child element missing:body');
830     }
831    
832 wakaba 1.4 return ($new_todos);
833 wakaba 1.1 },
834     };
835    
836     $Element->{$HTML_NS}->{head} = {
837 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
838 wakaba 1.1 checker => sub {
839 wakaba 1.4 my ($self, $todo) = @_;
840     my $el = $todo->{node};
841     my $new_todos = [];
842 wakaba 1.1 my @nodes = (@{$el->child_nodes});
843    
844     my $has_title;
845 wakaba 1.3 my $phase = 'initial'; # 'after charset', 'after base'
846 wakaba 1.1 while (@nodes) {
847     my $node = shift @nodes;
848 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
849    
850 wakaba 1.1 my $nt = $node->node_type;
851     if ($nt == 1) {
852 wakaba 1.2 my $node_ns = $node->namespace_uri;
853     $node_ns = '' unless defined $node_ns;
854     my $node_ln = $node->manakai_local_name;
855 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
856 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'title') {
857 wakaba 1.3 $phase = 'after base';
858 wakaba 1.1 unless ($has_title) {
859     $has_title = 1;
860     } else {
861 wakaba 1.6 $not_allowed = 1;
862 wakaba 1.1 }
863 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'meta') {
864 wakaba 1.1 if ($node->has_attribute_ns (undef, 'charset')) {
865 wakaba 1.3 if ($phase eq 'initial') {
866     $phase = 'after charset';
867 wakaba 1.1 } else {
868 wakaba 1.6 $not_allowed = 1;
869 wakaba 1.3 ## NOTE: See also |base|'s "contexts" field in the spec
870 wakaba 1.1 }
871     } else {
872 wakaba 1.3 $phase = 'after base';
873 wakaba 1.1 }
874 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'base') {
875 wakaba 1.3 if ($phase eq 'initial' or $phase eq 'after charset') {
876     $phase = 'after base';
877 wakaba 1.1 } else {
878 wakaba 1.6 $not_allowed = 1;
879 wakaba 1.1 }
880 wakaba 1.7 } elsif ($HTMLMetadataElements->{$node_ns}->{$node_ln}) {
881     $phase = 'after base';
882 wakaba 1.1 } else {
883 wakaba 1.7 $not_allowed = 1;
884 wakaba 1.1 }
885 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
886     if $not_allowed;
887 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
888     unshift @nodes, @$sib;
889 wakaba 1.4 push @$new_todos, @$ch;
890 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
891     if ($node->data =~ /[^\x09-\x0D\x20]/) {
892 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
893 wakaba 1.1 }
894     } elsif ($nt == 5) {
895     unshift @nodes, @{$node->child_nodes};
896     }
897     }
898     unless ($has_title) {
899 wakaba 1.3 $self->{onerror}->(node => $el, type => 'child element missing:title');
900 wakaba 1.1 }
901 wakaba 1.4 return ($new_todos);
902 wakaba 1.1 },
903     };
904    
905     $Element->{$HTML_NS}->{title} = {
906 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
907 wakaba 1.1 checker => $HTMLTextChecker,
908     };
909    
910     $Element->{$HTML_NS}->{base} = {
911 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({
912 wakaba 1.11 href => $HTMLURIAttrChecker,
913 wakaba 1.10 ## TODO: target
914     }),
915 wakaba 1.1 checker => $HTMLEmptyChecker,
916     };
917    
918     $Element->{$HTML_NS}->{link} = {
919 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
920 wakaba 1.1 checker => $HTMLEmptyChecker,
921     };
922    
923     $Element->{$HTML_NS}->{meta} = {
924 wakaba 1.10 attrs_checker => sub {
925     my ($self, $todo) = @_;
926     my $name_attr;
927     my $http_equiv_attr;
928     my $charset_attr;
929     my $content_attr;
930     for my $attr (@{$todo->{node}->attributes}) {
931     my $attr_ns = $attr->namespace_uri;
932     $attr_ns = '' unless defined $attr_ns;
933     my $attr_ln = $attr->manakai_local_name;
934     my $checker;
935     if ($attr_ns eq '') {
936     if ($attr_ln eq 'content') {
937     $content_attr = $attr;
938     $checker = 1;
939     } elsif ($attr_ln eq 'name') {
940     $name_attr = $attr;
941     $checker = 1;
942     } elsif ($attr_ln eq 'http-equiv') {
943     $http_equiv_attr = $attr;
944     $checker = 1;
945     } elsif ($attr_ln eq 'charset') {
946     $charset_attr = $attr;
947     $checker = 1;
948     } else {
949     $checker = $HTMLAttrChecker->{$attr_ln}
950     || $AttrChecker->{$attr_ns}->{$attr_ln}
951     || $AttrChecker->{$attr_ns}->{''};
952     }
953     } else {
954     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
955     || $AttrChecker->{$attr_ns}->{''};
956     }
957     if ($checker) {
958     $checker->($self, $attr) if ref $checker;
959     } else {
960     $self->{onerror}->(node => $attr, type => 'attribute not supported');
961     ## ISSUE: No comformance createria for unknown attributes in the spec
962     }
963     }
964    
965     if (defined $name_attr) {
966     if (defined $http_equiv_attr) {
967     $self->{onerror}->(node => $http_equiv_attr,
968     type => 'attribute not allowed');
969     } elsif (defined $charset_attr) {
970     $self->{onerror}->(node => $charset_attr,
971     type => 'attribute not allowed');
972     }
973     my $metadata_name = $name_attr->value;
974     my $metadata_value;
975     if (defined $content_attr) {
976     $metadata_value = $content_attr->value;
977     } else {
978     $self->{onerror}->(node => $todo->{node},
979     type => 'attribute missing:content');
980     $metadata_value = '';
981     }
982     } elsif (defined $http_equiv_attr) {
983     if (defined $charset_attr) {
984     $self->{onerror}->(node => $charset_attr,
985     type => 'attribute not allowed');
986     }
987     unless (defined $content_attr) {
988     $self->{onerror}->(node => $todo->{node},
989     type => 'attribute missing:content');
990     }
991     } elsif (defined $charset_attr) {
992     if (defined $content_attr) {
993     $self->{onerror}->(node => $content_attr,
994     type => 'attribute not allowed');
995     }
996     ## TODO: Allowed only in HTML documents
997     } else {
998     if (defined $content_attr) {
999     $self->{onerror}->(node => $content_attr,
1000     type => 'attribute not allowed');
1001     $self->{onerror}->(node => $todo->{node},
1002     type => 'attribute missing:name|http-equiv');
1003     } else {
1004     $self->{onerror}->(node => $todo->{node},
1005     type => 'attribute missing:name|http-equiv|charset');
1006     }
1007     }
1008    
1009     ## TODO: metadata conformance
1010    
1011     ## TODO: pragma conformance
1012     if (defined $http_equiv_attr) { ## An enumerated attribute
1013     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1014     if ({
1015     'refresh' => 1,
1016     'default-style' => 1,
1017     }->{$keyword}) {
1018     #
1019     } else {
1020     $self->{onerror}->(node => $http_equiv_attr,
1021     type => 'invalid enumerated attribute value');
1022     }
1023     }
1024    
1025     ## TODO: charset
1026     },
1027 wakaba 1.1 checker => $HTMLEmptyChecker,
1028     };
1029    
1030     ## NOTE: |html:style| has no conformance creteria on content model
1031 wakaba 1.3 $Element->{$HTML_NS}->{style} = {
1032 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({
1033     ## TODO: type
1034     ## TODO: media
1035     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1036     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1037     ## not different
1038     }),
1039 wakaba 1.3 checker => $AnyChecker,
1040     };
1041 wakaba 1.1
1042     $Element->{$HTML_NS}->{body} = {
1043 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1044 wakaba 1.1 checker => $HTMLBlockChecker,
1045     };
1046    
1047     $Element->{$HTML_NS}->{section} = {
1048 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1049 wakaba 1.1 checker => $HTMLStylableBlockChecker,
1050     };
1051    
1052     $Element->{$HTML_NS}->{nav} = {
1053 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1054 wakaba 1.1 checker => $HTMLBlockOrInlineChecker,
1055     };
1056    
1057     $Element->{$HTML_NS}->{article} = {
1058 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1059 wakaba 1.1 checker => $HTMLStylableBlockChecker,
1060     };
1061    
1062     $Element->{$HTML_NS}->{blockquote} = {
1063 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({
1064     cite => $HTMLURIAttrChecker,
1065     }),
1066 wakaba 1.1 checker => $HTMLBlockChecker,
1067     };
1068    
1069     $Element->{$HTML_NS}->{aside} = {
1070 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1071 wakaba 1.2 checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
1072 wakaba 1.1 };
1073    
1074     $Element->{$HTML_NS}->{h1} = {
1075 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1076 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
1077     };
1078    
1079     $Element->{$HTML_NS}->{h2} = {
1080 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1081 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
1082     };
1083    
1084     $Element->{$HTML_NS}->{h3} = {
1085 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1086 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
1087     };
1088    
1089     $Element->{$HTML_NS}->{h4} = {
1090 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1091 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
1092     };
1093    
1094     $Element->{$HTML_NS}->{h5} = {
1095 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1096 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
1097     };
1098    
1099     $Element->{$HTML_NS}->{h6} = {
1100 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1101 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
1102     };
1103    
1104     ## TODO: header
1105    
1106 wakaba 1.2 $Element->{$HTML_NS}->{footer} = {
1107 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1108 wakaba 1.2 checker => sub { ## block -hn -header -footer -sectioning or inline
1109 wakaba 1.4 my ($self, $todo) = @_;
1110     my $el = $todo->{node};
1111     my $new_todos = [];
1112 wakaba 1.2 my @nodes = (@{$el->child_nodes});
1113    
1114     my $content = 'block-or-inline'; # or 'block' or 'inline'
1115     my @block_not_inline;
1116     while (@nodes) {
1117     my $node = shift @nodes;
1118     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1119    
1120     my $nt = $node->node_type;
1121     if ($nt == 1) {
1122     my $node_ns = $node->namespace_uri;
1123     $node_ns = '' unless defined $node_ns;
1124     my $node_ln = $node->manakai_local_name;
1125 wakaba 1.6 my $not_allowed;
1126 wakaba 1.2 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
1127 wakaba 1.6 $not_allowed = 1;
1128 wakaba 1.2 } elsif ($node_ns eq $HTML_NS and
1129     {
1130     qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1/
1131     }->{$node_ln}) {
1132 wakaba 1.6 $not_allowed = 1;
1133 wakaba 1.2 } elsif ($HTMLSectioningElements->{$node_ns}->{$node_ln}) {
1134 wakaba 1.6 $not_allowed = 1;
1135 wakaba 1.2 }
1136     if ($content eq 'block') {
1137 wakaba 1.7 $not_allowed = 1
1138     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1139 wakaba 1.2 } elsif ($content eq 'inline') {
1140 wakaba 1.7 $not_allowed = 1
1141     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
1142     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1143 wakaba 1.2 } else {
1144 wakaba 1.7 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1145     my $is_inline
1146     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
1147     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1148 wakaba 1.2
1149 wakaba 1.6 push @block_not_inline, $node
1150     if $is_block and not $is_inline and not $not_allowed;
1151 wakaba 1.2 unless ($is_block) {
1152     $content = 'inline';
1153     for (@block_not_inline) {
1154     $self->{onerror}->(node => $_, type => 'element not allowed');
1155     }
1156 wakaba 1.6 $not_allowed = 1 unless $is_inline;
1157 wakaba 1.2 }
1158     }
1159 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
1160     if $not_allowed;
1161 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1162     unshift @nodes, @$sib;
1163 wakaba 1.4 push @$new_todos, @$ch;
1164 wakaba 1.2 } elsif ($nt == 3 or $nt == 4) {
1165     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1166     if ($content eq 'block') {
1167     $self->{onerror}->(node => $node, type => 'character not allowed');
1168     } else {
1169     $content = 'inline';
1170     for (@block_not_inline) {
1171     $self->{onerror}->(node => $_, type => 'element not allowed');
1172     }
1173     }
1174     }
1175     } elsif ($nt == 5) {
1176     unshift @nodes, @{$node->child_nodes};
1177     }
1178     }
1179    
1180     my $end = $self->_add_minuses
1181     ({$HTML_NS => {qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1/}},
1182     $HTMLSectioningElements);
1183 wakaba 1.4 push @$new_todos, $end;
1184 wakaba 1.2
1185 wakaba 1.4 if ($content eq 'inline') {
1186     for (@$new_todos) {
1187     $_->{inline} = 1;
1188     }
1189     }
1190    
1191     return ($new_todos);
1192 wakaba 1.2 },
1193     };
1194 wakaba 1.1
1195     $Element->{$HTML_NS}->{address} = {
1196 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1197 wakaba 1.1 checker => $HTMLInlineChecker,
1198     };
1199    
1200     $Element->{$HTML_NS}->{p} = {
1201 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1202 wakaba 1.1 checker => $HTMLSignificantInlineChecker,
1203     };
1204    
1205     $Element->{$HTML_NS}->{hr} = {
1206 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1207 wakaba 1.1 checker => $HTMLEmptyChecker,
1208     };
1209    
1210     $Element->{$HTML_NS}->{br} = {
1211 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1212 wakaba 1.1 checker => $HTMLEmptyChecker,
1213     };
1214    
1215     $Element->{$HTML_NS}->{dialog} = {
1216 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1217 wakaba 1.1 checker => sub {
1218 wakaba 1.4 my ($self, $todo) = @_;
1219     my $el = $todo->{node};
1220     my $new_todos = [];
1221 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1222    
1223     my $phase = 'before dt';
1224     while (@nodes) {
1225     my $node = shift @nodes;
1226 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1227    
1228 wakaba 1.1 my $nt = $node->node_type;
1229     if ($nt == 1) {
1230 wakaba 1.8 my $node_ns = $node->namespace_uri;
1231     $node_ns = '' unless defined $node_ns;
1232     my $node_ln = $node->manakai_local_name;
1233 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1234 wakaba 1.1 if ($phase eq 'before dt') {
1235 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1236 wakaba 1.1 $phase = 'before dd';
1237 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1238 wakaba 1.2 $self->{onerror}
1239 wakaba 1.3 ->(node => $node, type => 'ps element missing:dt');
1240 wakaba 1.1 $phase = 'before dt';
1241     } else {
1242 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1243 wakaba 1.1 }
1244     } else { # before dd
1245 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1246 wakaba 1.1 $phase = 'before dt';
1247 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1248 wakaba 1.2 $self->{onerror}
1249 wakaba 1.3 ->(node => $node, type => 'ps element missing:dd');
1250 wakaba 1.1 $phase = 'before dd';
1251     } else {
1252 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1253 wakaba 1.1 }
1254     }
1255 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1256     unshift @nodes, @$sib;
1257 wakaba 1.4 push @$new_todos, @$ch;
1258 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1259     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1260 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1261 wakaba 1.1 }
1262     } elsif ($nt == 5) {
1263     unshift @nodes, @{$node->child_nodes};
1264     }
1265     }
1266     if ($phase eq 'before dd') {
1267 wakaba 1.3 $self->{onerror}->(node => $el, type => 'ps element missing:dd');
1268 wakaba 1.1 }
1269 wakaba 1.4 return ($new_todos);
1270 wakaba 1.1 },
1271     };
1272    
1273     $Element->{$HTML_NS}->{pre} = {
1274 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1275 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1276     };
1277    
1278     $Element->{$HTML_NS}->{ol} = {
1279 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({
1280     start => $HTMLIntegerAttrChecker,
1281     }),
1282 wakaba 1.1 checker => sub {
1283 wakaba 1.4 my ($self, $todo) = @_;
1284     my $el = $todo->{node};
1285     my $new_todos = [];
1286 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1287    
1288     while (@nodes) {
1289     my $node = shift @nodes;
1290 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1291    
1292 wakaba 1.1 my $nt = $node->node_type;
1293     if ($nt == 1) {
1294 wakaba 1.8 my $node_ns = $node->namespace_uri;
1295     $node_ns = '' unless defined $node_ns;
1296     my $node_ln = $node->manakai_local_name;
1297 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1298 wakaba 1.8 unless ($node_ns eq $HTML_NS and $node_ln eq 'li') {
1299 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1300 wakaba 1.1 }
1301 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1302     unshift @nodes, @$sib;
1303 wakaba 1.4 push @$new_todos, @$ch;
1304 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1305     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1306 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1307 wakaba 1.1 }
1308     } elsif ($nt == 5) {
1309     unshift @nodes, @{$node->child_nodes};
1310     }
1311     }
1312 wakaba 1.4
1313     if ($todo->{inline}) {
1314     for (@$new_todos) {
1315     $_->{inline} = 1;
1316     }
1317     }
1318     return ($new_todos);
1319 wakaba 1.1 },
1320     };
1321    
1322     $Element->{$HTML_NS}->{ul} = {
1323 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1324 wakaba 1.1 checker => $Element->{$HTML_NS}->{ol}->{checker},
1325     };
1326    
1327 wakaba 1.5
1328     $Element->{$HTML_NS}->{li} = {
1329 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({
1330     start => sub {
1331     my ($self, $attr) = @_;
1332     my $parent = $attr->owner_element->manakai_parent_element;
1333     if (defined $parent) {
1334     my $parent_ns = $parent->namespace_uri;
1335     $parent_ns = '' unless defined $parent_ns;
1336     my $parent_ln = $parent->manakai_local_name;
1337     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1338     $self->{onerror}->(node => $attr, type => 'attribute not supported');
1339     }
1340     }
1341     $HTMLIntegerAttrChecker->($self, $attr);
1342     },
1343     }),
1344 wakaba 1.5 checker => sub {
1345     my ($self, $todo) = @_;
1346     if ($todo->{inline}) {
1347     return $HTMLInlineChecker->($self, $todo);
1348     } else {
1349     return $HTMLBlockOrInlineChecker->($self, $todo);
1350     }
1351     },
1352     };
1353 wakaba 1.1
1354     $Element->{$HTML_NS}->{dl} = {
1355 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1356 wakaba 1.1 checker => sub {
1357 wakaba 1.4 my ($self, $todo) = @_;
1358     my $el = $todo->{node};
1359     my $new_todos = [];
1360 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1361    
1362     my $phase = 'before dt';
1363     while (@nodes) {
1364     my $node = shift @nodes;
1365 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1366    
1367 wakaba 1.1 my $nt = $node->node_type;
1368     if ($nt == 1) {
1369 wakaba 1.8 my $node_ns = $node->namespace_uri;
1370     $node_ns = '' unless defined $node_ns;
1371     my $node_ln = $node->manakai_local_name;
1372 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1373 wakaba 1.1 if ($phase eq 'in dds') {
1374 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1375 wakaba 1.1 #$phase = 'in dds';
1376 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1377 wakaba 1.1 $phase = 'in dts';
1378     } else {
1379 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1380 wakaba 1.1 }
1381     } elsif ($phase eq 'in dts') {
1382 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1383 wakaba 1.1 #$phase = 'in dts';
1384 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1385 wakaba 1.1 $phase = 'in dds';
1386     } else {
1387 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1388 wakaba 1.1 }
1389     } else { # before dt
1390 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1391 wakaba 1.1 $phase = 'in dts';
1392 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1393 wakaba 1.2 $self->{onerror}
1394 wakaba 1.3 ->(node => $node, type => 'ps element missing:dt');
1395 wakaba 1.1 $phase = 'in dds';
1396     } else {
1397 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1398 wakaba 1.1 }
1399     }
1400 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1401     unshift @nodes, @$sib;
1402 wakaba 1.4 push @$new_todos, @$ch;
1403 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1404     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1405 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1406 wakaba 1.1 }
1407     } elsif ($nt == 5) {
1408     unshift @nodes, @{$node->child_nodes};
1409     }
1410     }
1411     if ($phase eq 'in dts') {
1412 wakaba 1.3 $self->{onerror}->(node => $el, type => 'ps element missing:dd');
1413 wakaba 1.1 }
1414 wakaba 1.4
1415     if ($todo->{inline}) {
1416     for (@$new_todos) {
1417     $_->{inline} = 1;
1418     }
1419     }
1420     return ($new_todos);
1421 wakaba 1.1 },
1422     };
1423    
1424     $Element->{$HTML_NS}->{dt} = {
1425 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1426 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1427     };
1428    
1429 wakaba 1.4 $Element->{$HTML_NS}->{dd} = {
1430 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1431 wakaba 1.5 checker => $Element->{$HTML_NS}->{li}->{checker},
1432 wakaba 1.4 };
1433 wakaba 1.1
1434 wakaba 1.6 $Element->{$HTML_NS}->{a} = {
1435 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
1436 wakaba 1.6 checker => sub {
1437     my ($self, $todo) = @_;
1438    
1439     my $end = $self->_add_minuses ($HTMLInteractiveElements);
1440     my ($sib, $ch)
1441     = $HTMLSignificantInlineOrStrictlyInlineChecker->($self, $todo);
1442     push @$sib, $end;
1443     return ($sib, $ch);
1444     },
1445     };
1446 wakaba 1.1
1447 wakaba 1.4 $Element->{$HTML_NS}->{q} = {
1448 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({
1449     cite => $HTMLURIAttrChecker,
1450     }),
1451 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1452     };
1453 wakaba 1.1
1454     $Element->{$HTML_NS}->{cite} = {
1455 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1456 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1457     };
1458    
1459 wakaba 1.4 $Element->{$HTML_NS}->{em} = {
1460 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1461 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1462     };
1463    
1464     $Element->{$HTML_NS}->{strong} = {
1465 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1466 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1467     };
1468 wakaba 1.1
1469 wakaba 1.4 $Element->{$HTML_NS}->{small} = {
1470 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1471 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1472     };
1473    
1474     $Element->{$HTML_NS}->{m} = {
1475 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1476 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1477     };
1478    
1479 wakaba 1.11 $Element->{$HTML_NS}->{dfn} = { ## TODO: term duplication
1480 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1481 wakaba 1.4 checker => sub {
1482     my ($self, $todo) = @_;
1483    
1484     my $end = $self->_add_minuses ({$HTML_NS => {dfn => 1}});
1485     my ($sib, $ch) = $HTMLStrictlyInlineChecker->($self, $todo);
1486     push @$sib, $end;
1487     return ($sib, $ch);
1488     },
1489     };
1490 wakaba 1.1
1491     $Element->{$HTML_NS}->{abbr} = {
1492 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({
1493     ## NOTE: |title| has special semantics for |abbr|s, but is syntactically
1494     ## not different. The spec says that the |title| MAY be omitted
1495     ## if there is a |dfn| whose defining term is the abbreviation,
1496     ## but it does not prohibit |abbr| w/o |title| in other cases.
1497     }),
1498 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1499     };
1500    
1501 wakaba 1.11 $Element->{$HTML_NS}->{time} = { ## TODO: validate content
1502     attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO: datetime
1503 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1504     };
1505    
1506 wakaba 1.11 $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
1507     attrs_checker => $GetHTMLAttrsChecker->({
1508     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1509     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1510     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1511     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1512     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1513     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1514     }),
1515 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1516     };
1517    
1518 wakaba 1.11 $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
1519     attrs_checker => $GetHTMLAttrsChecker->({
1520     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
1521     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
1522     }),
1523 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1524     };
1525    
1526 wakaba 1.4 $Element->{$HTML_NS}->{code} = {
1527 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1528 wakaba 1.12 ## NOTE: Though |title| has special semantics,
1529     ## syntatically same as the |title| as global attribute.
1530 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1531     };
1532 wakaba 1.1
1533     $Element->{$HTML_NS}->{var} = {
1534 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1535 wakaba 1.12 ## NOTE: Though |title| has special semantics,
1536     ## syntatically same as the |title| as global attribute.
1537 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1538     };
1539    
1540 wakaba 1.4 $Element->{$HTML_NS}->{samp} = {
1541 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1542 wakaba 1.12 ## NOTE: Though |title| has special semantics,
1543     ## syntatically same as the |title| as global attribute.
1544 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1545     };
1546 wakaba 1.1
1547     $Element->{$HTML_NS}->{kbd} = {
1548 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1549 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1550     };
1551    
1552     $Element->{$HTML_NS}->{sub} = {
1553 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1554 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1555     };
1556    
1557     $Element->{$HTML_NS}->{sup} = {
1558 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1559 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1560     };
1561    
1562 wakaba 1.4 $Element->{$HTML_NS}->{span} = {
1563 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1564 wakaba 1.12 ## NOTE: Though |title| has special semantics,
1565     ## syntatically same as the |title| as global attribute.
1566 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1567     };
1568 wakaba 1.1
1569     $Element->{$HTML_NS}->{i} = {
1570 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1571 wakaba 1.12 ## NOTE: Though |title| has special semantics,
1572     ## syntatically same as the |title| as global attribute.
1573 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1574     };
1575    
1576     $Element->{$HTML_NS}->{b} = {
1577 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1578 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1579     };
1580    
1581     $Element->{$HTML_NS}->{bdo} = {
1582 wakaba 1.12 attrs_checker => sub {
1583     my ($self, $todo) = @_;
1584     $GetHTMLAttrsChecker->({})->($self, $todo);
1585     unless ($todo->{node}->has_attribute_ns (undef, 'dir')) {
1586     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:dir');
1587     }
1588     },
1589     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
1590 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1591     };
1592    
1593     $Element->{$HTML_NS}->{ins} = {
1594 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
1595     cite => $HTMLURIAttrChecker,
1596     ## TODO: datetime
1597     }),
1598 wakaba 1.1 checker => $HTMLTransparentChecker,
1599     };
1600    
1601     $Element->{$HTML_NS}->{del} = {
1602 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
1603     cite => $HTMLURIAttrChecker,
1604     ## TODO: datetime
1605     }),
1606 wakaba 1.1 checker => sub {
1607 wakaba 1.4 my ($self, $todo) = @_;
1608 wakaba 1.1
1609 wakaba 1.4 my $parent = $todo->{node}->manakai_parent_element;
1610 wakaba 1.1 if (defined $parent) {
1611     my $nsuri = $parent->namespace_uri;
1612     $nsuri = '' unless defined $nsuri;
1613     my $ln = $parent->manakai_local_name;
1614     my $eldef = $Element->{$nsuri}->{$ln} ||
1615     $Element->{$nsuri}->{''} ||
1616     $ElementDefault;
1617 wakaba 1.4 return $eldef->{checker}->($self, $todo);
1618 wakaba 1.1 } else {
1619 wakaba 1.4 return $HTMLBlockOrInlineChecker->($self, $todo);
1620 wakaba 1.1 }
1621     },
1622     };
1623    
1624     ## TODO: figure
1625    
1626     $Element->{$HTML_NS}->{img} = {
1627 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
1628 wakaba 1.1 checker => $HTMLEmptyChecker,
1629     };
1630    
1631     $Element->{$HTML_NS}->{iframe} = {
1632 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
1633     src => $HTMLURIAttrChecker,
1634     }),
1635 wakaba 1.1 checker => $HTMLTextChecker,
1636     };
1637    
1638     $Element->{$HTML_NS}->{embed} = {
1639 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
1640 wakaba 1.1 checker => $HTMLEmptyChecker,
1641     };
1642    
1643     $Element->{$HTML_NS}->{param} = {
1644 wakaba 1.12 attrs_checker => sub {
1645     my ($self, $todo) = @_;
1646     $GetHTMLAttrsChecker->({
1647     name => sub { },
1648     value => sub { },
1649     })->($self, $todo);
1650     unless ($todo->{node}->has_attribute_ns (undef, 'name')) {
1651     $self->{onerror}->(node => $todo->{node},
1652     type => 'attribute missing:name');
1653     }
1654     unless ($todo->{node}->has_attribute_ns (undef, 'value')) {
1655     $self->{onerror}->(node => $todo->{node},
1656     type => 'attribute missing:value');
1657     }
1658     },
1659 wakaba 1.1 checker => $HTMLEmptyChecker,
1660     };
1661    
1662     ## TODO: object
1663    
1664 wakaba 1.2 $Element->{$HTML_NS}->{video} = {
1665 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
1666     src => $HTMLURIAttrChecker,
1667     ## TODO: start, loopstart, loopend, end
1668     ## ISSUE: they MUST be "value time offset"s. Value?
1669     ## ISSUE: loopcount has no conformance creteria
1670     autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
1671     controls => $GetHTMLBooleanAttrChecker->('controls'),
1672     }),
1673 wakaba 1.2 checker => sub {
1674 wakaba 1.4 my ($self, $todo) = @_;
1675 wakaba 1.2
1676 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'src')) {
1677     return $HTMLBlockOrInlineChecker->($self, $todo);
1678 wakaba 1.2 } else {
1679     return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
1680 wakaba 1.4 ->($self, $todo);
1681 wakaba 1.2 }
1682     },
1683     };
1684    
1685     $Element->{$HTML_NS}->{audio} = {
1686 wakaba 1.12 attrs_checker => $Element->{$HTML_NS}->{video}->{attrs_checker},
1687     checker => $Element->{$HTML_NS}->{video}->{checker},
1688 wakaba 1.2 };
1689 wakaba 1.1
1690     $Element->{$HTML_NS}->{source} = {
1691 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
1692 wakaba 1.1 checker => $HTMLEmptyChecker,
1693     };
1694    
1695     $Element->{$HTML_NS}->{canvas} = {
1696 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
1697     height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
1698     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
1699     }),
1700 wakaba 1.1 checker => $HTMLInlineChecker,
1701     };
1702    
1703     $Element->{$HTML_NS}->{map} = {
1704 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1705 wakaba 1.1 checker => $HTMLBlockChecker,
1706     };
1707    
1708     $Element->{$HTML_NS}->{area} = {
1709 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
1710 wakaba 1.1 checker => $HTMLEmptyChecker,
1711     };
1712     ## TODO: only in map
1713    
1714     $Element->{$HTML_NS}->{table} = {
1715 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1716 wakaba 1.1 checker => sub {
1717 wakaba 1.4 my ($self, $todo) = @_;
1718     my $el = $todo->{node};
1719     my $new_todos = [];
1720 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1721    
1722     my $phase = 'before caption';
1723     my $has_tfoot;
1724     while (@nodes) {
1725     my $node = shift @nodes;
1726 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1727    
1728 wakaba 1.1 my $nt = $node->node_type;
1729     if ($nt == 1) {
1730 wakaba 1.8 my $node_ns = $node->namespace_uri;
1731     $node_ns = '' unless defined $node_ns;
1732     my $node_ln = $node->manakai_local_name;
1733 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1734 wakaba 1.1 if ($phase eq 'in tbodys') {
1735 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
1736 wakaba 1.1 #$phase = 'in tbodys';
1737     } elsif (not $has_tfoot and
1738 wakaba 1.8 $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
1739 wakaba 1.1 $phase = 'after tfoot';
1740     $has_tfoot = 1;
1741     } else {
1742 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1743 wakaba 1.1 }
1744     } elsif ($phase eq 'in trs') {
1745 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
1746 wakaba 1.1 #$phase = 'in trs';
1747     } elsif (not $has_tfoot and
1748 wakaba 1.8 $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
1749 wakaba 1.1 $phase = 'after tfoot';
1750     $has_tfoot = 1;
1751     } else {
1752 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1753 wakaba 1.1 }
1754     } elsif ($phase eq 'after thead') {
1755 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
1756 wakaba 1.1 $phase = 'in tbodys';
1757 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
1758 wakaba 1.1 $phase = 'in trs';
1759 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
1760 wakaba 1.1 $phase = 'in tbodys';
1761     $has_tfoot = 1;
1762     } else {
1763 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1764 wakaba 1.1 }
1765     } elsif ($phase eq 'in colgroup') {
1766 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
1767 wakaba 1.1 $phase = 'in colgroup';
1768 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
1769 wakaba 1.1 $phase = 'after thead';
1770 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
1771 wakaba 1.1 $phase = 'in tbodys';
1772 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
1773 wakaba 1.1 $phase = 'in trs';
1774 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
1775 wakaba 1.1 $phase = 'in tbodys';
1776     $has_tfoot = 1;
1777     } else {
1778 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1779 wakaba 1.1 }
1780     } elsif ($phase eq 'before caption') {
1781 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'caption') {
1782 wakaba 1.1 $phase = 'in colgroup';
1783 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
1784 wakaba 1.1 $phase = 'in colgroup';
1785 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
1786 wakaba 1.1 $phase = 'after thead';
1787 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
1788 wakaba 1.1 $phase = 'in tbodys';
1789 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
1790 wakaba 1.1 $phase = 'in trs';
1791 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
1792 wakaba 1.1 $phase = 'in tbodys';
1793     $has_tfoot = 1;
1794     } else {
1795 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1796 wakaba 1.1 }
1797     } else { # after tfoot
1798 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1799 wakaba 1.1 }
1800 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1801     unshift @nodes, @$sib;
1802 wakaba 1.4 push @$new_todos, @$ch;
1803 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1804     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1805 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1806 wakaba 1.1 }
1807     } elsif ($nt == 5) {
1808     unshift @nodes, @{$node->child_nodes};
1809     }
1810     }
1811 wakaba 1.4 return ($new_todos);
1812 wakaba 1.1 },
1813     };
1814    
1815     $Element->{$HTML_NS}->{caption} = {
1816 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1817 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
1818     };
1819    
1820     $Element->{$HTML_NS}->{colgroup} = {
1821 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
1822 wakaba 1.1 checker => sub {
1823 wakaba 1.4 my ($self, $todo) = @_;
1824     my $el = $todo->{node};
1825     my $new_todos = [];
1826 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1827    
1828     while (@nodes) {
1829     my $node = shift @nodes;
1830 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1831    
1832 wakaba 1.1 my $nt = $node->node_type;
1833     if ($nt == 1) {
1834 wakaba 1.8 my $node_ns = $node->namespace_uri;
1835     $node_ns = '' unless defined $node_ns;
1836     my $node_ln = $node->manakai_local_name;
1837 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1838 wakaba 1.8 unless ($node_ns eq $HTML_NS and $node_ln eq 'col') {
1839 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1840 wakaba 1.1 }
1841 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1842     unshift @nodes, @$sib;
1843 wakaba 1.4 push @$new_todos, @$ch;
1844 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1845     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1846 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1847 wakaba 1.1 }
1848     } elsif ($nt == 5) {
1849     unshift @nodes, @{$node->child_nodes};
1850     }
1851     }
1852 wakaba 1.4 return ($new_todos);
1853 wakaba 1.1 },
1854     };
1855    
1856     $Element->{$HTML_NS}->{col} = {
1857 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
1858     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
1859     }),
1860 wakaba 1.1 checker => $HTMLEmptyChecker,
1861     };
1862    
1863     $Element->{$HTML_NS}->{tbody} = {
1864 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1865 wakaba 1.1 checker => sub {
1866 wakaba 1.4 my ($self, $todo) = @_;
1867     my $el = $todo->{node};
1868     my $new_todos = [];
1869 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1870    
1871     my $has_tr;
1872     while (@nodes) {
1873     my $node = shift @nodes;
1874 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1875    
1876 wakaba 1.1 my $nt = $node->node_type;
1877     if ($nt == 1) {
1878 wakaba 1.8 my $node_ns = $node->namespace_uri;
1879     $node_ns = '' unless defined $node_ns;
1880     my $node_ln = $node->manakai_local_name;
1881 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1882 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
1883 wakaba 1.1 $has_tr = 1;
1884     } else {
1885 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1886 wakaba 1.1 }
1887 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1888     unshift @nodes, @$sib;
1889 wakaba 1.4 push @$new_todos, @$ch;
1890 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1891     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1892 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1893 wakaba 1.1 }
1894     } elsif ($nt == 5) {
1895     unshift @nodes, @{$node->child_nodes};
1896     }
1897     }
1898     unless ($has_tr) {
1899 wakaba 1.3 $self->{onerror}->(node => $el, type => 'child element missing:tr');
1900 wakaba 1.1 }
1901 wakaba 1.4 return ($new_todos);
1902 wakaba 1.1 },
1903     };
1904    
1905     $Element->{$HTML_NS}->{thead} = {
1906 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1907 wakaba 1.1 checker => $Element->{$HTML_NS}->{tbody},
1908     };
1909    
1910     $Element->{$HTML_NS}->{tfoot} = {
1911 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1912 wakaba 1.1 checker => $Element->{$HTML_NS}->{tbody},
1913     };
1914    
1915     $Element->{$HTML_NS}->{tr} = {
1916 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1917 wakaba 1.1 checker => sub {
1918 wakaba 1.4 my ($self, $todo) = @_;
1919     my $el = $todo->{node};
1920     my $new_todos = [];
1921 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1922    
1923     my $has_td;
1924     while (@nodes) {
1925     my $node = shift @nodes;
1926 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1927    
1928 wakaba 1.1 my $nt = $node->node_type;
1929     if ($nt == 1) {
1930 wakaba 1.8 my $node_ns = $node->namespace_uri;
1931     $node_ns = '' unless defined $node_ns;
1932     my $node_ln = $node->manakai_local_name;
1933 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1934 wakaba 1.8 if ($node_ns eq $HTML_NS and ($node_ln eq 'td' or $node_ln eq 'th')) {
1935 wakaba 1.1 $has_td = 1;
1936     } else {
1937 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1938 wakaba 1.1 }
1939 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1940     unshift @nodes, @$sib;
1941 wakaba 1.4 push @$new_todos, @$ch;
1942 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1943     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1944 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1945 wakaba 1.1 }
1946     } elsif ($nt == 5) {
1947     unshift @nodes, @{$node->child_nodes};
1948     }
1949     }
1950     unless ($has_td) {
1951 wakaba 1.3 $self->{onerror}->(node => $el, type => 'child element missing:td|th');
1952 wakaba 1.1 }
1953 wakaba 1.4 return ($new_todos);
1954 wakaba 1.1 },
1955     };
1956    
1957     $Element->{$HTML_NS}->{td} = {
1958 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
1959     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
1960     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
1961     }),
1962 wakaba 1.1 checker => $HTMLBlockOrInlineChecker,
1963     };
1964    
1965     $Element->{$HTML_NS}->{th} = {
1966 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
1967     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
1968     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
1969     scope => $GetHTMLEnumeratedAttrChecker
1970     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
1971     }),
1972 wakaba 1.1 checker => $HTMLBlockOrInlineChecker,
1973     };
1974    
1975 wakaba 1.12 ## TODO: table model error checking
1976    
1977 wakaba 1.1 ## TODO: forms
1978    
1979 wakaba 1.2 $Element->{$HTML_NS}->{script} = {
1980 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
1981 wakaba 1.2 checker => sub {
1982 wakaba 1.4 my ($self, $todo) = @_;
1983 wakaba 1.2
1984 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'src')) {
1985     return $HTMLEmptyChecker->($self, $todo);
1986 wakaba 1.2 } else {
1987     ## NOTE: No content model conformance in HTML5 spec.
1988 wakaba 1.4 return $AnyChecker->($self, $todo);
1989 wakaba 1.2 }
1990     },
1991     };
1992    
1993     ## NOTE: When script is disabled.
1994     $Element->{$HTML_NS}->{noscript} = {
1995 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1996 wakaba 1.2 checker => sub {
1997 wakaba 1.4 my ($self, $todo) = @_;
1998 wakaba 1.1
1999 wakaba 1.2 my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}});
2000 wakaba 1.4 my ($sib, $ch) = $HTMLBlockOrInlineChecker->($self, $todo);
2001 wakaba 1.2 push @$sib, $end;
2002     return ($sib, $ch);
2003     },
2004     };
2005 wakaba 1.1
2006     $Element->{$HTML_NS}->{'event-source'} = {
2007 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2008     src => $HTMLURIAttrChecker,
2009     }),
2010 wakaba 1.1 checker => $HTMLEmptyChecker,
2011     };
2012    
2013     $Element->{$HTML_NS}->{details} = {
2014 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2015     open => $GetHTMLBooleanAttrChecker->('open'),
2016     }),
2017 wakaba 1.6 checker => sub {
2018     my ($self, $todo) = @_;
2019    
2020     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2021     my ($sib, $ch)
2022     = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
2023     ->($self, $todo);
2024     push @$sib, $end;
2025     return ($sib, $ch);
2026     },
2027 wakaba 1.1 };
2028    
2029     $Element->{$HTML_NS}->{datagrid} = {
2030 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2031     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2032     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
2033     }),
2034 wakaba 1.6 checker => sub {
2035     my ($self, $todo) = @_;
2036    
2037     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2038     my ($sib, $ch) = $HTMLBlockChecker->($self, $todo);
2039     push @$sib, $end;
2040     return ($sib, $ch);
2041     },
2042 wakaba 1.1 };
2043    
2044     $Element->{$HTML_NS}->{command} = {
2045 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
2046 wakaba 1.1 checker => $HTMLEmptyChecker,
2047     };
2048    
2049     $Element->{$HTML_NS}->{menu} = {
2050 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
2051 wakaba 1.1 checker => sub {
2052 wakaba 1.4 my ($self, $todo) = @_;
2053     my $el = $todo->{node};
2054     my $new_todos = [];
2055 wakaba 1.1 my @nodes = (@{$el->child_nodes});
2056    
2057     my $content = 'li or inline';
2058     while (@nodes) {
2059     my $node = shift @nodes;
2060 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2061    
2062 wakaba 1.1 my $nt = $node->node_type;
2063     if ($nt == 1) {
2064 wakaba 1.2 my $node_ns = $node->namespace_uri;
2065     $node_ns = '' unless defined $node_ns;
2066     my $node_ln = $node->manakai_local_name;
2067 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
2068 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'li') {
2069 wakaba 1.1 if ($content eq 'inline') {
2070 wakaba 1.6 $not_allowed = 1;
2071 wakaba 1.1 } elsif ($content eq 'li or inline') {
2072     $content = 'li';
2073     }
2074     } else {
2075 wakaba 1.7 if ($HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
2076     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln}) {
2077     $content = 'inline';
2078     } else {
2079 wakaba 1.6 $not_allowed = 1;
2080 wakaba 1.7 }
2081 wakaba 1.1 }
2082 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
2083     if $not_allowed;
2084 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
2085     unshift @nodes, @$sib;
2086 wakaba 1.4 push @$new_todos, @$ch;
2087 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
2088     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2089     if ($content eq 'li') {
2090 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
2091 wakaba 1.1 } elsif ($content eq 'li or inline') {
2092     $content = 'inline';
2093     }
2094     }
2095     } elsif ($nt == 5) {
2096     unshift @nodes, @{$node->child_nodes};
2097     }
2098     }
2099 wakaba 1.4
2100     for (@$new_todos) {
2101     $_->{inline} = 1;
2102     }
2103     return ($new_todos);
2104 wakaba 1.1 },
2105     };
2106    
2107 wakaba 1.6 $Element->{$HTML_NS}->{legend} = {
2108 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2109 wakaba 1.6 checker => sub {
2110     my ($self, $todo) = @_;
2111    
2112     my $parent = $todo->{node}->manakai_parent_element;
2113     if (defined $parent) {
2114     my $nsuri = $parent->namespace_uri;
2115     $nsuri = '' unless defined $nsuri;
2116     my $ln = $parent->manakai_local_name;
2117     if ($nsuri eq $HTML_NS and $ln eq 'figure') {
2118     return $HTMLInlineChecker->($self, $todo);
2119     } else {
2120     return $HTMLSignificantStrictlyInlineChecker->($self, $todo);
2121     }
2122     } else {
2123     return $HTMLInlineChecker->($self, $todo);
2124     }
2125    
2126     ## ISSUE: Content model is defined only for fieldset/legend,
2127     ## details/legend, and figure/legend.
2128     },
2129     };
2130 wakaba 1.1
2131     $Element->{$HTML_NS}->{div} = {
2132 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2133 wakaba 1.2 checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
2134 wakaba 1.1 };
2135    
2136     $Element->{$HTML_NS}->{font} = {
2137 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
2138 wakaba 1.1 checker => $HTMLTransparentChecker,
2139     };
2140    
2141 wakaba 1.2 sub new ($) {
2142     return bless {}, shift;
2143     } # new
2144    
2145 wakaba 1.1 sub check_element ($$$) {
2146     my ($self, $el, $onerror) = @_;
2147    
2148 wakaba 1.2 $self->{minuses} = {};
2149     $self->{onerror} = $onerror;
2150 wakaba 1.10 $self->{id} = {};
2151 wakaba 1.2
2152 wakaba 1.4 my @todo = ({type => 'element', node => $el});
2153     while (@todo) {
2154     my $todo = shift @todo;
2155     if ($todo->{type} eq 'element') {
2156 wakaba 1.13 my $prefix = $todo->{node}->prefix;
2157     if (defined $prefix and $prefix eq 'xmlns') {
2158     $self->{onerror}
2159     ->(node => $todo->{node},
2160     type => 'NC:Reserved Prefixes and Namespace Names:<xmlns:>');
2161     }
2162 wakaba 1.4 my $nsuri = $todo->{node}->namespace_uri;
2163     $nsuri = '' unless defined $nsuri;
2164     my $ln = $todo->{node}->manakai_local_name;
2165     my $eldef = $Element->{$nsuri}->{$ln} ||
2166     $Element->{$nsuri}->{''} ||
2167     $ElementDefault;
2168 wakaba 1.9 $eldef->{attrs_checker}->($self, $todo);
2169 wakaba 1.4 my ($new_todos) = $eldef->{checker}->($self, $todo);
2170     push @todo, @$new_todos;
2171 wakaba 1.9 } elsif ($todo->{type} eq 'element-attributes') {
2172 wakaba 1.13 my $prefix = $todo->{node}->prefix;
2173     if (defined $prefix and $prefix eq 'xmlns') {
2174     $self->{onerror}
2175     ->(node => $todo->{node},
2176     type => 'NC:Reserved Prefixes and Namespace Names:<xmlns:>');
2177     }
2178 wakaba 1.9 my $nsuri = $todo->{node}->namespace_uri;
2179     $nsuri = '' unless defined $nsuri;
2180     my $ln = $todo->{node}->manakai_local_name;
2181     my $eldef = $Element->{$nsuri}->{$ln} ||
2182     $Element->{$nsuri}->{''} ||
2183     $ElementDefault;
2184     $eldef->{attrs_checker}->($self, $todo);
2185 wakaba 1.4 } elsif ($todo->{type} eq 'plus') {
2186     $self->_remove_minuses ($todo);
2187     }
2188 wakaba 1.1 }
2189     } # check_element
2190    
2191 wakaba 1.2 sub _add_minuses ($@) {
2192     my $self = shift;
2193     my $r = {};
2194     for my $list (@_) {
2195     for my $ns (keys %$list) {
2196     for my $ln (keys %{$list->{$ns}}) {
2197     unless ($self->{minuses}->{$ns}->{$ln}) {
2198     $self->{minuses}->{$ns}->{$ln} = 1;
2199     $r->{$ns}->{$ln} = 1;
2200     }
2201     }
2202     }
2203     }
2204 wakaba 1.4 return {type => 'plus', list => $r};
2205 wakaba 1.2 } # _add_minuses
2206    
2207     sub _remove_minuses ($$) {
2208 wakaba 1.4 my ($self, $todo) = @_;
2209     for my $ns (keys %{$todo->{list}}) {
2210     for my $ln (keys %{$todo->{list}->{$ns}}) {
2211     delete $self->{minuses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
2212 wakaba 1.2 }
2213     }
2214     1;
2215     } # _remove_minuses
2216    
2217     sub _check_get_children ($$) {
2218     my ($self, $node) = @_;
2219 wakaba 1.4 my $new_todos = [];
2220 wakaba 1.2 my $sib = [];
2221     TP: {
2222     my $node_ns = $node->namespace_uri;
2223     $node_ns = '' unless defined $node_ns;
2224     my $node_ln = $node->manakai_local_name;
2225     if ($node_ns eq $HTML_NS) {
2226     if ($node_ln eq 'noscript') {
2227     my $end = $self->_add_minuses ({$HTML_NS, {noscript => 1}});
2228     push @$sib, $end;
2229     }
2230     }
2231 wakaba 1.7 if ($HTMLTransparentElements->{$node_ns}->{$node_ln}) {
2232     unshift @$sib, @{$node->child_nodes};
2233 wakaba 1.9 push @$new_todos, {type => 'element-attributes', node => $node};
2234 wakaba 1.7 last TP;
2235 wakaba 1.2 }
2236 wakaba 1.8 if ($node_ns eq $HTML_NS and ($node_ln eq 'video' or $node_ln eq 'audio')) {
2237 wakaba 1.2 if ($node->has_attribute_ns (undef, 'src')) {
2238     unshift @$sib, @{$node->child_nodes};
2239 wakaba 1.9 push @$new_todos, {type => 'element-attributes', node => $node};
2240 wakaba 1.2 last TP;
2241     } else {
2242     my @cn = @{$node->child_nodes};
2243     CN: while (@cn) {
2244     my $cn = shift @cn;
2245     my $cnt = $cn->node_type;
2246     if ($cnt == 1) {
2247 wakaba 1.8 my $cn_nsuri = $cn->namespace_uri;
2248     $cn_nsuri = '' unless defined $cn_nsuri;
2249     if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'source') {
2250 wakaba 1.2 #
2251     } else {
2252     last CN;
2253     }
2254     } elsif ($cnt == 3 or $cnt == 4) {
2255     if ($cn->data =~ /[^\x09-\x0D\x20]/) {
2256     last CN;
2257     }
2258     }
2259     } # CN
2260     unshift @$sib, @cn;
2261     }
2262     }
2263 wakaba 1.4 push @$new_todos, {type => 'element', node => $node};
2264 wakaba 1.2 } # TP
2265 wakaba 1.4 return ($sib, $new_todos);
2266 wakaba 1.2 } # _check_get_children
2267    
2268 wakaba 1.1 1;
2269 wakaba 1.13 # $Date: 2007/05/19 10:11:32 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24