/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.9 - (hide annotations) (download)
Sat May 19 03:49:58 2007 UTC (17 years, 5 months ago) by wakaba
Branch: MAIN
Changes since 1.8: +153 -5 lines
++ whatpm/t/ChangeLog	19 May 2007 03:49:54 -0000
2007-05-19  Wakaba  <wakaba@suika.fam.cx>

	* content-model-2.dat: New test.

	* ContentChecker.t (@FILES): |content-model-2.dat| is added.
	(get_node_path): |ATTRIBUTE_NODE| support.

	* content-model-1.dat: New tests for unknown
	element in HTML namespace.

	* ContentChecker.t (manakai_element_type_match): Removed.
++ whatpm/Whatpm/ChangeLog	19 May 2007 03:48:44 -0000
2007-05-19  Wakaba  <wakaba@suika.fam.cx>

	* ContentChecker.pm ($AttrChecker, $HTMLAttrChecker,
	$AnyChecker->{attr_checker}, $HTMLAttrsChecker,
	$Element->{$HTML_NS}->{''}): New.
	(check_element): Invoke attrs_checker for each element.

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3    
4 wakaba 1.9 my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
5     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
6    
7     my $AttrChecker = {
8     $XML_NS => {
9     ## TODO: xml:space, xml:base, xml:lang, xml:id
10     },
11     $XMLNS_NS => {
12     '' => sub {}, ## TODO: implement
13     xmlns => sub {}, ## TODO: implement
14     },
15     };
16    
17 wakaba 1.3 ## ANY
18     my $AnyChecker = sub {
19 wakaba 1.4 my ($self, $todo) = @_;
20     my $el = $todo->{node};
21     my $new_todos = [];
22 wakaba 1.3 my @nodes = (@{$el->child_nodes});
23     while (@nodes) {
24     my $node = shift @nodes;
25     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
26    
27     my $nt = $node->node_type;
28     if ($nt == 1) {
29     my $node_ns = $node->namespace_uri;
30     $node_ns = '' unless defined $node_ns;
31     my $node_ln = $node->manakai_local_name;
32     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
33     $self->{onerror}->(node => $node, type => 'element not allowed');
34     }
35 wakaba 1.4 push @$new_todos, {type => 'element', node => $node};
36 wakaba 1.3 } elsif ($nt == 5) {
37     unshift @nodes, @{$node->child_nodes};
38     }
39     }
40 wakaba 1.4 return ($new_todos);
41 wakaba 1.3 }; # $AnyChecker
42    
43 wakaba 1.1 my $ElementDefault = {
44     checker => sub {
45 wakaba 1.4 my ($self, $todo) = @_;
46     $self->{onerror}->(node => $todo->{node}, type => 'element not supported');
47     return $AnyChecker->($self, $todo);
48 wakaba 1.1 },
49 wakaba 1.9 attrs_checker => sub {
50     my ($self, $todo) = @_;
51     for my $attr (@{$todo->{node}->attributes}) {
52     my $attr_ns = $attr->namespace_uri;
53     $attr_ns = '' unless defined $attr_ns;
54     my $attr_ln = $attr->manakai_local_name;
55     my $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
56     || $AttrChecker->{$attr_ns}->{''};
57     if ($checker) {
58     $checker->($self, $attr);
59     }
60     ## Don't check otherwise, since "element type not supported" warning
61     ## will be reported by the element checker.
62     }
63     },
64 wakaba 1.1 };
65    
66     my $Element = {};
67    
68     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
69    
70 wakaba 1.7 my $HTMLMetadataElements = {
71     $HTML_NS => {
72     qw/link 1 meta 1 style 1 script 1 event-source 1 command 1 base 1 title 1/,
73     },
74     };
75 wakaba 1.1
76 wakaba 1.2 my $HTMLSectioningElements = {
77     $HTML_NS => {qw/body 1 section 1 nav 1 article 1 blockquote 1 aside 1/},
78     };
79 wakaba 1.1
80 wakaba 1.7 my $HTMLBlockLevelElements = {
81     $HTML_NS => {
82     qw/
83     section 1 nav 1 article 1 blockquote 1 aside 1
84     h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1
85     address 1 p 1 hr 1 dialog 1 pre 1 ol 1 ul 1 dl 1
86     ins 1 del 1 figure 1 map 1 table 1 script 1 noscript 1
87     event-source 1 details 1 datagrid 1 menu 1 div 1 font 1
88     /,
89     },
90     };
91    
92     my $HTMLStrictlyInlineLevelElements = {
93     $HTML_NS => {
94     qw/
95     br 1 a 1 q 1 cite 1 em 1 strong 1 small 1 m 1 dfn 1 abbr 1
96     time 1 meter 1 progress 1 code 1 var 1 samp 1 kbd 1
97     sub 1 sup 1 span 1 i 1 b 1 bdo 1 ins 1 del 1 img 1
98     iframe 1 embed 1 object 1 video 1 audio 1 canvas 1 area 1
99     script 1 noscript 1 event-source 1 command 1 font 1
100     /,
101     },
102     };
103    
104     my $HTMLStructuredInlineLevelElements = {
105     $HTML_NS => {qw/blockquote 1 pre 1 ol 1 ul 1 dl 1 table 1 menu 1/},
106     };
107 wakaba 1.1
108 wakaba 1.6 my $HTMLInteractiveElements = {
109     $HTML_NS => {a => 1, details => 1, datagrid => 1},
110     };
111     ## NOTE: |html:a| and |html:datagrid| are not allowed as a descendant
112     ## of interactive elements
113 wakaba 1.1
114 wakaba 1.7 my $HTMLTransparentElements = {
115     $HTML_NS => {qw/ins 1 font 1 noscript 1/},
116     ## NOTE: |html:noscript| is transparent if scripting is disabled.
117     };
118    
119     #my $HTMLSemiTransparentElements = {
120     # $HTML_NS => {qw/video 1 audio 1/},
121     #};
122    
123     my $HTMLEmbededElements = {
124     $HTML_NS => {qw/img 1 iframe 1 embed 1 object 1 video 1 audio 1 canvas 1/},
125     };
126 wakaba 1.1
127     ## Empty
128     my $HTMLEmptyChecker = sub {
129 wakaba 1.4 my ($self, $todo) = @_;
130     my $el = $todo->{node};
131     my $new_todos = [];
132 wakaba 1.1 my @nodes = (@{$el->child_nodes});
133    
134     while (@nodes) {
135     my $node = shift @nodes;
136 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
137    
138 wakaba 1.1 my $nt = $node->node_type;
139     if ($nt == 1) {
140 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
141     $self->{onerror}->(node => $node, type => 'element not allowed');
142     my ($sib, $ch) = $self->_check_get_children ($node);
143     unshift @nodes, @$sib;
144 wakaba 1.4 push @$new_todos, @$ch;
145 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
146 wakaba 1.3 if ($node->data =~ /[^\x09-\x0D\x20]/) {
147     $self->{onerror}->(node => $node, type => 'character not allowed');
148     }
149 wakaba 1.1 } elsif ($nt == 5) {
150     unshift @nodes, @{$node->child_nodes};
151     }
152     }
153 wakaba 1.4 return ($new_todos);
154 wakaba 1.1 };
155    
156     ## Text
157     my $HTMLTextChecker = sub {
158 wakaba 1.4 my ($self, $todo) = @_;
159     my $el = $todo->{node};
160     my $new_todos = [];
161 wakaba 1.1 my @nodes = (@{$el->child_nodes});
162    
163     while (@nodes) {
164     my $node = shift @nodes;
165 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
166    
167 wakaba 1.1 my $nt = $node->node_type;
168     if ($nt == 1) {
169 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
170     $self->{onerror}->(node => $node, type => 'element not allowed');
171     my ($sib, $ch) = $self->_check_get_children ($node);
172     unshift @nodes, @$sib;
173 wakaba 1.4 push @$new_todos, @$ch;
174 wakaba 1.1 } elsif ($nt == 5) {
175     unshift @nodes, @{$node->child_nodes};
176     }
177     }
178 wakaba 1.4 return ($new_todos);
179 wakaba 1.1 };
180    
181     ## Zero or more |html:style| elements,
182     ## followed by zero or more block-level elements
183     my $HTMLStylableBlockChecker = sub {
184 wakaba 1.4 my ($self, $todo) = @_;
185     my $el = $todo->{node};
186     my $new_todos = [];
187 wakaba 1.1 my @nodes = (@{$el->child_nodes});
188    
189     my $has_non_style;
190     while (@nodes) {
191     my $node = shift @nodes;
192 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
193    
194 wakaba 1.1 my $nt = $node->node_type;
195     if ($nt == 1) {
196 wakaba 1.2 my $node_ns = $node->namespace_uri;
197     $node_ns = '' unless defined $node_ns;
198     my $node_ln = $node->manakai_local_name;
199 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
200 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'style') {
201 wakaba 1.6 $not_allowed = 1 if $has_non_style;
202 wakaba 1.7 } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
203     $has_non_style = 1;
204 wakaba 1.1 } else {
205     $has_non_style = 1;
206 wakaba 1.7 $not_allowed = 1;
207 wakaba 1.1 }
208 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
209     if $not_allowed;
210 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
211     unshift @nodes, @$sib;
212 wakaba 1.4 push @$new_todos, @$ch;
213 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
214     if ($node->data =~ /[^\x09-\x0D\x20]/) {
215 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
216 wakaba 1.1 }
217     } elsif ($nt == 5) {
218     unshift @nodes, @{$node->child_nodes};
219     }
220     }
221 wakaba 1.4 return ($new_todos);
222 wakaba 1.1 }; # $HTMLStylableBlockChecker
223    
224     ## Zero or more block-level elements
225     my $HTMLBlockChecker = sub {
226 wakaba 1.4 my ($self, $todo) = @_;
227     my $el = $todo->{node};
228     my $new_todos = [];
229 wakaba 1.1 my @nodes = (@{$el->child_nodes});
230    
231     while (@nodes) {
232     my $node = shift @nodes;
233 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
234    
235 wakaba 1.1 my $nt = $node->node_type;
236     if ($nt == 1) {
237 wakaba 1.2 my $node_ns = $node->namespace_uri;
238     $node_ns = '' unless defined $node_ns;
239     my $node_ln = $node->manakai_local_name;
240 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
241 wakaba 1.7 $not_allowed = 1
242     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
243 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
244     if $not_allowed;
245 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
246     unshift @nodes, @$sib;
247 wakaba 1.4 push @$new_todos, @$ch;
248 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
249     if ($node->data =~ /[^\x09-\x0D\x20]/) {
250 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
251 wakaba 1.1 }
252     } elsif ($nt == 5) {
253     unshift @nodes, @{$node->child_nodes};
254     }
255     }
256 wakaba 1.4 return ($new_todos);
257 wakaba 1.1 }; # $HTMLBlockChecker
258    
259     ## Inline-level content
260     my $HTMLInlineChecker = sub {
261 wakaba 1.4 my ($self, $todo) = @_;
262     my $el = $todo->{node};
263     my $new_todos = [];
264 wakaba 1.1 my @nodes = (@{$el->child_nodes});
265    
266     while (@nodes) {
267     my $node = shift @nodes;
268 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
269    
270 wakaba 1.1 my $nt = $node->node_type;
271     if ($nt == 1) {
272 wakaba 1.2 my $node_ns = $node->namespace_uri;
273     $node_ns = '' unless defined $node_ns;
274     my $node_ln = $node->manakai_local_name;
275 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
276 wakaba 1.7 $not_allowed = 1
277     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
278     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
279 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
280     if $not_allowed;
281 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
282     unshift @nodes, @$sib;
283 wakaba 1.4 push @$new_todos, @$ch;
284 wakaba 1.1 } elsif ($nt == 5) {
285     unshift @nodes, @{$node->child_nodes};
286     }
287     }
288 wakaba 1.4
289     for (@$new_todos) {
290     $_->{inline} = 1;
291     }
292     return ($new_todos);
293     }; # $HTMLInlineChecker
294 wakaba 1.1
295     my $HTMLSignificantInlineChecker = $HTMLInlineChecker;
296     ## TODO: check significant content
297    
298     ## Strictly inline-level content
299     my $HTMLStrictlyInlineChecker = sub {
300 wakaba 1.4 my ($self, $todo) = @_;
301     my $el = $todo->{node};
302     my $new_todos = [];
303 wakaba 1.1 my @nodes = (@{$el->child_nodes});
304    
305     while (@nodes) {
306     my $node = shift @nodes;
307 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
308    
309 wakaba 1.1 my $nt = $node->node_type;
310     if ($nt == 1) {
311 wakaba 1.2 my $node_ns = $node->namespace_uri;
312     $node_ns = '' unless defined $node_ns;
313     my $node_ln = $node->manakai_local_name;
314 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
315 wakaba 1.7 $not_allowed = 1
316     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln};
317 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
318     if $not_allowed;
319 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
320     unshift @nodes, @$sib;
321 wakaba 1.4 push @$new_todos, @$ch;
322 wakaba 1.1 } elsif ($nt == 5) {
323     unshift @nodes, @{$node->child_nodes};
324     }
325     }
326 wakaba 1.4
327     for (@$new_todos) {
328     $_->{inline} = 1;
329     $_->{strictly_inline} = 1;
330     }
331     return ($new_todos);
332 wakaba 1.1 }; # $HTMLStrictlyInlineChecker
333    
334     my $HTMLSignificantStrictlyInlineChecker = $HTMLStrictlyInlineChecker;
335     ## TODO: check significant content
336    
337 wakaba 1.4 ## Inline-level or strictly inline-kevek content
338     my $HTMLInlineOrStrictlyInlineChecker = sub {
339     my ($self, $todo) = @_;
340     my $el = $todo->{node};
341     my $new_todos = [];
342     my @nodes = (@{$el->child_nodes});
343    
344     while (@nodes) {
345     my $node = shift @nodes;
346     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
347    
348     my $nt = $node->node_type;
349     if ($nt == 1) {
350     my $node_ns = $node->namespace_uri;
351     $node_ns = '' unless defined $node_ns;
352     my $node_ln = $node->manakai_local_name;
353 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
354 wakaba 1.7 if ($todo->{strictly_inline}) {
355     $not_allowed = 1
356     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln};
357     } else {
358     $not_allowed = 1
359     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
360     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
361     }
362 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
363     if $not_allowed;
364 wakaba 1.4 my ($sib, $ch) = $self->_check_get_children ($node);
365     unshift @nodes, @$sib;
366     push @$new_todos, @$ch;
367     } elsif ($nt == 5) {
368     unshift @nodes, @{$node->child_nodes};
369     }
370     }
371    
372     for (@$new_todos) {
373     $_->{inline} = 1;
374     $_->{strictly_inline} = 1;
375     }
376     return ($new_todos);
377     }; # $HTMLInlineOrStrictlyInlineChecker
378    
379 wakaba 1.6 my $HTMLSignificantInlineOrStrictlyInlineChecker
380     = $HTMLInlineOrStrictlyInlineChecker;
381     ## TODO: check significant content
382    
383 wakaba 1.1 my $HTMLBlockOrInlineChecker = sub {
384 wakaba 1.4 my ($self, $todo) = @_;
385     my $el = $todo->{node};
386     my $new_todos = [];
387 wakaba 1.1 my @nodes = (@{$el->child_nodes});
388    
389     my $content = 'block-or-inline'; # or 'block' or 'inline'
390     my @block_not_inline;
391     while (@nodes) {
392     my $node = shift @nodes;
393 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
394    
395 wakaba 1.1 my $nt = $node->node_type;
396     if ($nt == 1) {
397 wakaba 1.2 my $node_ns = $node->namespace_uri;
398     $node_ns = '' unless defined $node_ns;
399     my $node_ln = $node->manakai_local_name;
400 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
401 wakaba 1.1 if ($content eq 'block') {
402 wakaba 1.7 $not_allowed = 1
403     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
404 wakaba 1.1 } elsif ($content eq 'inline') {
405 wakaba 1.7 $not_allowed = 1
406     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
407     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
408 wakaba 1.1 } else {
409 wakaba 1.7 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
410     my $is_inline
411     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
412     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
413 wakaba 1.1
414 wakaba 1.6 push @block_not_inline, $node
415     if $is_block and not $is_inline and not $not_allowed;
416 wakaba 1.1 unless ($is_block) {
417     $content = 'inline';
418     for (@block_not_inline) {
419 wakaba 1.2 $self->{onerror}->(node => $_, type => 'element not allowed');
420 wakaba 1.1 }
421 wakaba 1.6 $not_allowed = 1 unless $is_inline;
422 wakaba 1.1 }
423     }
424 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
425     if $not_allowed;
426 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
427     unshift @nodes, @$sib;
428 wakaba 1.4 push @$new_todos, @$ch;
429 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
430     if ($node->data =~ /[^\x09-\x0D\x20]/) {
431     if ($content eq 'block') {
432 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
433 wakaba 1.1 } else {
434     $content = 'inline';
435     for (@block_not_inline) {
436 wakaba 1.2 $self->{onerror}->(node => $_, type => 'element not allowed');
437 wakaba 1.1 }
438     }
439     }
440     } elsif ($nt == 5) {
441     unshift @nodes, @{$node->child_nodes};
442     }
443     }
444 wakaba 1.4
445     if ($content eq 'inline') {
446     for (@$new_todos) {
447     $_->{inline} = 1;
448     }
449     }
450     return ($new_todos);
451 wakaba 1.1 };
452    
453 wakaba 1.2 ## Zero or more XXX element, then either block-level or inline-level
454     my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) {
455     my ($elnsuri, $ellname) = @_;
456     return sub {
457 wakaba 1.4 my ($self, $todo) = @_;
458     my $el = $todo->{node};
459     my $new_todos = [];
460 wakaba 1.2 my @nodes = (@{$el->child_nodes});
461    
462     my $has_non_style;
463     my $content = 'block-or-inline'; # or 'block' or 'inline'
464     my @block_not_inline;
465     while (@nodes) {
466     my $node = shift @nodes;
467     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
468    
469     my $nt = $node->node_type;
470     if ($nt == 1) {
471     my $node_ns = $node->namespace_uri;
472     $node_ns = '' unless defined $node_ns;
473     my $node_ln = $node->manakai_local_name;
474 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
475 wakaba 1.8 if ($node_ns eq $elnsuri and $node_ln eq $ellname) {
476 wakaba 1.6 $not_allowed = 1 if $has_non_style;
477 wakaba 1.2 } elsif ($content eq 'block') {
478     $has_non_style = 1;
479 wakaba 1.7 $not_allowed = 1
480     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
481 wakaba 1.2 } elsif ($content eq 'inline') {
482     $has_non_style = 1;
483 wakaba 1.7 $not_allowed = 1
484     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
485     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
486 wakaba 1.2 } else {
487     $has_non_style = 1;
488 wakaba 1.7 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
489     my $is_inline
490     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
491     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
492 wakaba 1.2
493 wakaba 1.6 push @block_not_inline, $node
494     if $is_block and not $is_inline and not $not_allowed;
495 wakaba 1.2 unless ($is_block) {
496     $content = 'inline';
497     for (@block_not_inline) {
498     $self->{onerror}->(node => $_, type => 'element not allowed');
499     }
500 wakaba 1.6 $not_allowed = 1 unless $is_inline;
501 wakaba 1.1 }
502     }
503 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
504     if $not_allowed;
505 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
506     unshift @nodes, @$sib;
507 wakaba 1.4 push @$new_todos, @$ch;
508 wakaba 1.2 } elsif ($nt == 3 or $nt == 4) {
509     if ($node->data =~ /[^\x09-\x0D\x20]/) {
510     $has_non_style = 1;
511     if ($content eq 'block') {
512     $self->{onerror}->(node => $node, type => 'character not allowed');
513     } else {
514     $content = 'inline';
515     for (@block_not_inline) {
516     $self->{onerror}->(node => $_, type => 'element not allowed');
517     }
518 wakaba 1.1 }
519     }
520 wakaba 1.2 } elsif ($nt == 5) {
521     unshift @nodes, @{$node->child_nodes};
522 wakaba 1.1 }
523     }
524 wakaba 1.4
525     if ($content eq 'inline') {
526     for (@$new_todos) {
527     $_->{inline} = 1;
528     }
529     }
530     return ($new_todos);
531 wakaba 1.2 };
532     }; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker
533 wakaba 1.1
534     my $HTMLTransparentChecker = $HTMLBlockOrInlineChecker;
535    
536 wakaba 1.9 my $HTMLAttrChecker = {
537    
538     };
539    
540     my $HTMLAttrsChecker = sub {
541     my ($self, $todo) = @_;
542     for my $attr (@{$todo->{node}->attributes}) {
543     my $attr_ns = $attr->namespace_uri;
544     $attr_ns = '' unless defined $attr_ns;
545     my $attr_ln = $attr->manakai_local_name;
546     my $checker = $attr_ns eq '' ? $HTMLAttrChecker->{$attr_ln} : undef
547     || $AttrChecker->{$attr_ns}->{$attr_ln}
548     || $AttrChecker->{$attr_ns}->{''};
549     if ($checker) {
550     $checker->($self, $attr);
551     } else {
552     $self->{onerror}->(node => $attr, type => 'attribute not supported');
553     ## ISSUE: No comformance createria for unknown attributes in the spec
554     }
555     }
556     }; # $HTMLAttrsChecker
557    
558     $Element->{$HTML_NS}->{''} = {
559     attrs_checker => $HTMLAttrsChecker,
560     checker => $ElementDefault->{checker},
561     };
562    
563 wakaba 1.1 $Element->{$HTML_NS}->{html} = {
564 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
565 wakaba 1.1 checker => sub {
566 wakaba 1.4 my ($self, $todo) = @_;
567     my $el = $todo->{node};
568     my $new_todos = [];
569 wakaba 1.1 my @nodes = (@{$el->child_nodes});
570    
571     my $phase = 'before head';
572     while (@nodes) {
573     my $node = shift @nodes;
574 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
575    
576 wakaba 1.1 my $nt = $node->node_type;
577     if ($nt == 1) {
578 wakaba 1.2 my $node_ns = $node->namespace_uri;
579     $node_ns = '' unless defined $node_ns;
580     my $node_ln = $node->manakai_local_name;
581 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
582 wakaba 1.1 if ($phase eq 'before head') {
583 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'head') {
584 wakaba 1.1 $phase = 'after head';
585 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'body') {
586     $self->{onerror}->(node => $node, type => 'ps element missing:head');
587 wakaba 1.1 $phase = 'after body';
588     } else {
589 wakaba 1.6 $not_allowed = 1;
590 wakaba 1.1 # before head
591     }
592     } elsif ($phase eq 'after head') {
593 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'body') {
594 wakaba 1.1 $phase = 'after body';
595     } else {
596 wakaba 1.6 $not_allowed = 1;
597 wakaba 1.1 # after head
598     }
599     } else { #elsif ($phase eq 'after body') {
600 wakaba 1.6 $not_allowed = 1;
601 wakaba 1.1 # after body
602     }
603 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
604     if $not_allowed;
605 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
606     unshift @nodes, @$sib;
607 wakaba 1.4 push @$new_todos, @$ch;
608 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
609     if ($node->data =~ /[^\x09-\x0D\x20]/) {
610 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
611 wakaba 1.1 }
612     } elsif ($nt == 5) {
613     unshift @nodes, @{$node->child_nodes};
614     }
615     }
616 wakaba 1.3
617     if ($phase eq 'before head') {
618     $self->{onerror}->(node => $el, type => 'child element missing:head');
619     $self->{onerror}->(node => $el, type => 'child element missing:body');
620     } elsif ($phase eq 'after head') {
621     $self->{onerror}->(node => $el, type => 'child element missing:body');
622     }
623    
624 wakaba 1.4 return ($new_todos);
625 wakaba 1.1 },
626     };
627    
628     $Element->{$HTML_NS}->{head} = {
629 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
630 wakaba 1.1 checker => sub {
631 wakaba 1.4 my ($self, $todo) = @_;
632     my $el = $todo->{node};
633     my $new_todos = [];
634 wakaba 1.1 my @nodes = (@{$el->child_nodes});
635    
636     my $has_title;
637 wakaba 1.3 my $phase = 'initial'; # 'after charset', 'after base'
638 wakaba 1.1 while (@nodes) {
639     my $node = shift @nodes;
640 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
641    
642 wakaba 1.1 my $nt = $node->node_type;
643     if ($nt == 1) {
644 wakaba 1.2 my $node_ns = $node->namespace_uri;
645     $node_ns = '' unless defined $node_ns;
646     my $node_ln = $node->manakai_local_name;
647 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
648 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'title') {
649 wakaba 1.3 $phase = 'after base';
650 wakaba 1.1 unless ($has_title) {
651     $has_title = 1;
652     } else {
653 wakaba 1.6 $not_allowed = 1;
654 wakaba 1.1 }
655 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'meta') {
656 wakaba 1.1 if ($node->has_attribute_ns (undef, 'charset')) {
657 wakaba 1.3 if ($phase eq 'initial') {
658     $phase = 'after charset';
659 wakaba 1.1 } else {
660 wakaba 1.6 $not_allowed = 1;
661 wakaba 1.3 ## NOTE: See also |base|'s "contexts" field in the spec
662 wakaba 1.1 }
663     } else {
664 wakaba 1.3 $phase = 'after base';
665 wakaba 1.1 }
666 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'base') {
667 wakaba 1.3 if ($phase eq 'initial' or $phase eq 'after charset') {
668     $phase = 'after base';
669 wakaba 1.1 } else {
670 wakaba 1.6 $not_allowed = 1;
671 wakaba 1.1 }
672 wakaba 1.7 } elsif ($HTMLMetadataElements->{$node_ns}->{$node_ln}) {
673     $phase = 'after base';
674 wakaba 1.1 } else {
675 wakaba 1.7 $not_allowed = 1;
676 wakaba 1.1 }
677 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
678     if $not_allowed;
679 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
680     unshift @nodes, @$sib;
681 wakaba 1.4 push @$new_todos, @$ch;
682 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
683     if ($node->data =~ /[^\x09-\x0D\x20]/) {
684 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
685 wakaba 1.1 }
686     } elsif ($nt == 5) {
687     unshift @nodes, @{$node->child_nodes};
688     }
689     }
690     unless ($has_title) {
691 wakaba 1.3 $self->{onerror}->(node => $el, type => 'child element missing:title');
692 wakaba 1.1 }
693 wakaba 1.4 return ($new_todos);
694 wakaba 1.1 },
695     };
696    
697     $Element->{$HTML_NS}->{title} = {
698 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
699 wakaba 1.1 checker => $HTMLTextChecker,
700     };
701    
702     $Element->{$HTML_NS}->{base} = {
703 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
704 wakaba 1.1 checker => $HTMLEmptyChecker,
705     };
706    
707     $Element->{$HTML_NS}->{link} = {
708 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
709 wakaba 1.1 checker => $HTMLEmptyChecker,
710     };
711    
712     $Element->{$HTML_NS}->{meta} = {
713 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
714 wakaba 1.1 checker => $HTMLEmptyChecker,
715     };
716    
717     ## NOTE: |html:style| has no conformance creteria on content model
718 wakaba 1.3 $Element->{$HTML_NS}->{style} = {
719 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
720 wakaba 1.3 checker => $AnyChecker,
721     };
722 wakaba 1.1
723     $Element->{$HTML_NS}->{body} = {
724 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
725 wakaba 1.1 checker => $HTMLBlockChecker,
726     };
727    
728     $Element->{$HTML_NS}->{section} = {
729 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
730 wakaba 1.1 checker => $HTMLStylableBlockChecker,
731     };
732    
733     $Element->{$HTML_NS}->{nav} = {
734 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
735 wakaba 1.1 checker => $HTMLBlockOrInlineChecker,
736     };
737    
738     $Element->{$HTML_NS}->{article} = {
739 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
740 wakaba 1.1 checker => $HTMLStylableBlockChecker,
741     };
742    
743     $Element->{$HTML_NS}->{blockquote} = {
744 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
745 wakaba 1.1 checker => $HTMLBlockChecker,
746     };
747    
748     $Element->{$HTML_NS}->{aside} = {
749 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
750 wakaba 1.2 checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
751 wakaba 1.1 };
752    
753     $Element->{$HTML_NS}->{h1} = {
754 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
755 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
756     };
757    
758     $Element->{$HTML_NS}->{h2} = {
759 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
760 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
761     };
762    
763     $Element->{$HTML_NS}->{h3} = {
764 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
765 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
766     };
767    
768     $Element->{$HTML_NS}->{h4} = {
769 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
770 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
771     };
772    
773     $Element->{$HTML_NS}->{h5} = {
774 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
775 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
776     };
777    
778     $Element->{$HTML_NS}->{h6} = {
779 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
780 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
781     };
782    
783     ## TODO: header
784    
785 wakaba 1.2 $Element->{$HTML_NS}->{footer} = {
786 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
787 wakaba 1.2 checker => sub { ## block -hn -header -footer -sectioning or inline
788 wakaba 1.4 my ($self, $todo) = @_;
789     my $el = $todo->{node};
790     my $new_todos = [];
791 wakaba 1.2 my @nodes = (@{$el->child_nodes});
792    
793     my $content = 'block-or-inline'; # or 'block' or 'inline'
794     my @block_not_inline;
795     while (@nodes) {
796     my $node = shift @nodes;
797     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
798    
799     my $nt = $node->node_type;
800     if ($nt == 1) {
801     my $node_ns = $node->namespace_uri;
802     $node_ns = '' unless defined $node_ns;
803     my $node_ln = $node->manakai_local_name;
804 wakaba 1.6 my $not_allowed;
805 wakaba 1.2 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
806 wakaba 1.6 $not_allowed = 1;
807 wakaba 1.2 } elsif ($node_ns eq $HTML_NS and
808     {
809     qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1/
810     }->{$node_ln}) {
811 wakaba 1.6 $not_allowed = 1;
812 wakaba 1.2 } elsif ($HTMLSectioningElements->{$node_ns}->{$node_ln}) {
813 wakaba 1.6 $not_allowed = 1;
814 wakaba 1.2 }
815     if ($content eq 'block') {
816 wakaba 1.7 $not_allowed = 1
817     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
818 wakaba 1.2 } elsif ($content eq 'inline') {
819 wakaba 1.7 $not_allowed = 1
820     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
821     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
822 wakaba 1.2 } else {
823 wakaba 1.7 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
824     my $is_inline
825     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
826     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
827 wakaba 1.2
828 wakaba 1.6 push @block_not_inline, $node
829     if $is_block and not $is_inline and not $not_allowed;
830 wakaba 1.2 unless ($is_block) {
831     $content = 'inline';
832     for (@block_not_inline) {
833     $self->{onerror}->(node => $_, type => 'element not allowed');
834     }
835 wakaba 1.6 $not_allowed = 1 unless $is_inline;
836 wakaba 1.2 }
837     }
838 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
839     if $not_allowed;
840 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
841     unshift @nodes, @$sib;
842 wakaba 1.4 push @$new_todos, @$ch;
843 wakaba 1.2 } elsif ($nt == 3 or $nt == 4) {
844     if ($node->data =~ /[^\x09-\x0D\x20]/) {
845     if ($content eq 'block') {
846     $self->{onerror}->(node => $node, type => 'character not allowed');
847     } else {
848     $content = 'inline';
849     for (@block_not_inline) {
850     $self->{onerror}->(node => $_, type => 'element not allowed');
851     }
852     }
853     }
854     } elsif ($nt == 5) {
855     unshift @nodes, @{$node->child_nodes};
856     }
857     }
858    
859     my $end = $self->_add_minuses
860     ({$HTML_NS => {qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1/}},
861     $HTMLSectioningElements);
862 wakaba 1.4 push @$new_todos, $end;
863 wakaba 1.2
864 wakaba 1.4 if ($content eq 'inline') {
865     for (@$new_todos) {
866     $_->{inline} = 1;
867     }
868     }
869    
870     return ($new_todos);
871 wakaba 1.2 },
872     };
873 wakaba 1.1
874     $Element->{$HTML_NS}->{address} = {
875 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
876 wakaba 1.1 checker => $HTMLInlineChecker,
877     };
878    
879     $Element->{$HTML_NS}->{p} = {
880 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
881 wakaba 1.1 checker => $HTMLSignificantInlineChecker,
882     };
883    
884     $Element->{$HTML_NS}->{hr} = {
885 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
886 wakaba 1.1 checker => $HTMLEmptyChecker,
887     };
888    
889     $Element->{$HTML_NS}->{br} = {
890 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
891 wakaba 1.1 checker => $HTMLEmptyChecker,
892     };
893    
894     $Element->{$HTML_NS}->{dialog} = {
895 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
896 wakaba 1.1 checker => sub {
897 wakaba 1.4 my ($self, $todo) = @_;
898     my $el = $todo->{node};
899     my $new_todos = [];
900 wakaba 1.1 my @nodes = (@{$el->child_nodes});
901    
902     my $phase = 'before dt';
903     while (@nodes) {
904     my $node = shift @nodes;
905 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
906    
907 wakaba 1.1 my $nt = $node->node_type;
908     if ($nt == 1) {
909 wakaba 1.8 my $node_ns = $node->namespace_uri;
910     $node_ns = '' unless defined $node_ns;
911     my $node_ln = $node->manakai_local_name;
912 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
913 wakaba 1.1 if ($phase eq 'before dt') {
914 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
915 wakaba 1.1 $phase = 'before dd';
916 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
917 wakaba 1.2 $self->{onerror}
918 wakaba 1.3 ->(node => $node, type => 'ps element missing:dt');
919 wakaba 1.1 $phase = 'before dt';
920     } else {
921 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
922 wakaba 1.1 }
923     } else { # before dd
924 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
925 wakaba 1.1 $phase = 'before dt';
926 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
927 wakaba 1.2 $self->{onerror}
928 wakaba 1.3 ->(node => $node, type => 'ps element missing:dd');
929 wakaba 1.1 $phase = 'before dd';
930     } else {
931 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
932 wakaba 1.1 }
933     }
934 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
935     unshift @nodes, @$sib;
936 wakaba 1.4 push @$new_todos, @$ch;
937 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
938     if ($node->data =~ /[^\x09-\x0D\x20]/) {
939 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
940 wakaba 1.1 }
941     } elsif ($nt == 5) {
942     unshift @nodes, @{$node->child_nodes};
943     }
944     }
945     if ($phase eq 'before dd') {
946 wakaba 1.3 $self->{onerror}->(node => $el, type => 'ps element missing:dd');
947 wakaba 1.1 }
948 wakaba 1.4 return ($new_todos);
949 wakaba 1.1 },
950     };
951    
952     $Element->{$HTML_NS}->{pre} = {
953 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
954 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
955     };
956    
957     $Element->{$HTML_NS}->{ol} = {
958 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
959 wakaba 1.1 checker => sub {
960 wakaba 1.4 my ($self, $todo) = @_;
961     my $el = $todo->{node};
962     my $new_todos = [];
963 wakaba 1.1 my @nodes = (@{$el->child_nodes});
964    
965     while (@nodes) {
966     my $node = shift @nodes;
967 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
968    
969 wakaba 1.1 my $nt = $node->node_type;
970     if ($nt == 1) {
971 wakaba 1.8 my $node_ns = $node->namespace_uri;
972     $node_ns = '' unless defined $node_ns;
973     my $node_ln = $node->manakai_local_name;
974 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
975 wakaba 1.8 unless ($node_ns eq $HTML_NS and $node_ln eq 'li') {
976 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
977 wakaba 1.1 }
978 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
979     unshift @nodes, @$sib;
980 wakaba 1.4 push @$new_todos, @$ch;
981 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
982     if ($node->data =~ /[^\x09-\x0D\x20]/) {
983 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
984 wakaba 1.1 }
985     } elsif ($nt == 5) {
986     unshift @nodes, @{$node->child_nodes};
987     }
988     }
989 wakaba 1.4
990     if ($todo->{inline}) {
991     for (@$new_todos) {
992     $_->{inline} = 1;
993     }
994     }
995     return ($new_todos);
996 wakaba 1.1 },
997     };
998    
999     $Element->{$HTML_NS}->{ul} = {
1000 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1001 wakaba 1.1 checker => $Element->{$HTML_NS}->{ol}->{checker},
1002     };
1003    
1004 wakaba 1.5
1005     $Element->{$HTML_NS}->{li} = {
1006 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1007 wakaba 1.5 checker => sub {
1008     my ($self, $todo) = @_;
1009     if ($todo->{inline}) {
1010     return $HTMLInlineChecker->($self, $todo);
1011     } else {
1012     return $HTMLBlockOrInlineChecker->($self, $todo);
1013     }
1014     },
1015     };
1016 wakaba 1.1
1017     $Element->{$HTML_NS}->{dl} = {
1018 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1019 wakaba 1.1 checker => sub {
1020 wakaba 1.4 my ($self, $todo) = @_;
1021     my $el = $todo->{node};
1022     my $new_todos = [];
1023 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1024    
1025     my $phase = 'before dt';
1026     while (@nodes) {
1027     my $node = shift @nodes;
1028 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1029    
1030 wakaba 1.1 my $nt = $node->node_type;
1031     if ($nt == 1) {
1032 wakaba 1.8 my $node_ns = $node->namespace_uri;
1033     $node_ns = '' unless defined $node_ns;
1034     my $node_ln = $node->manakai_local_name;
1035 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1036 wakaba 1.1 if ($phase eq 'in dds') {
1037 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1038 wakaba 1.1 #$phase = 'in dds';
1039 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1040 wakaba 1.1 $phase = 'in dts';
1041     } else {
1042 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1043 wakaba 1.1 }
1044     } elsif ($phase eq 'in dts') {
1045 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1046 wakaba 1.1 #$phase = 'in dts';
1047 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1048 wakaba 1.1 $phase = 'in dds';
1049     } else {
1050 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1051 wakaba 1.1 }
1052     } else { # before dt
1053 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1054 wakaba 1.1 $phase = 'in dts';
1055 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1056 wakaba 1.2 $self->{onerror}
1057 wakaba 1.3 ->(node => $node, type => 'ps element missing:dt');
1058 wakaba 1.1 $phase = 'in dds';
1059     } else {
1060 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1061 wakaba 1.1 }
1062     }
1063 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1064     unshift @nodes, @$sib;
1065 wakaba 1.4 push @$new_todos, @$ch;
1066 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1067     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1068 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1069 wakaba 1.1 }
1070     } elsif ($nt == 5) {
1071     unshift @nodes, @{$node->child_nodes};
1072     }
1073     }
1074     if ($phase eq 'in dts') {
1075 wakaba 1.3 $self->{onerror}->(node => $el, type => 'ps element missing:dd');
1076 wakaba 1.1 }
1077 wakaba 1.4
1078     if ($todo->{inline}) {
1079     for (@$new_todos) {
1080     $_->{inline} = 1;
1081     }
1082     }
1083     return ($new_todos);
1084 wakaba 1.1 },
1085     };
1086    
1087     $Element->{$HTML_NS}->{dt} = {
1088 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1089 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1090     };
1091    
1092 wakaba 1.4 $Element->{$HTML_NS}->{dd} = {
1093 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1094 wakaba 1.5 checker => $Element->{$HTML_NS}->{li}->{checker},
1095 wakaba 1.4 };
1096 wakaba 1.1
1097 wakaba 1.6 $Element->{$HTML_NS}->{a} = {
1098 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1099 wakaba 1.6 checker => sub {
1100     my ($self, $todo) = @_;
1101    
1102     my $end = $self->_add_minuses ($HTMLInteractiveElements);
1103     my ($sib, $ch)
1104     = $HTMLSignificantInlineOrStrictlyInlineChecker->($self, $todo);
1105     push @$sib, $end;
1106     return ($sib, $ch);
1107     },
1108     };
1109 wakaba 1.1
1110 wakaba 1.4 $Element->{$HTML_NS}->{q} = {
1111 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1112 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1113     };
1114 wakaba 1.1
1115     $Element->{$HTML_NS}->{cite} = {
1116 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1117 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1118     };
1119    
1120 wakaba 1.4 $Element->{$HTML_NS}->{em} = {
1121 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1122 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1123     };
1124    
1125     $Element->{$HTML_NS}->{strong} = {
1126 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1127 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1128     };
1129 wakaba 1.1
1130 wakaba 1.4 $Element->{$HTML_NS}->{small} = {
1131 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1132 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1133     };
1134    
1135     $Element->{$HTML_NS}->{m} = {
1136 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1137 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1138     };
1139    
1140     $Element->{$HTML_NS}->{dfn} = {
1141 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1142 wakaba 1.4 checker => sub {
1143     my ($self, $todo) = @_;
1144    
1145     my $end = $self->_add_minuses ({$HTML_NS => {dfn => 1}});
1146     my ($sib, $ch) = $HTMLStrictlyInlineChecker->($self, $todo);
1147     push @$sib, $end;
1148     return ($sib, $ch);
1149     },
1150     };
1151 wakaba 1.1
1152     $Element->{$HTML_NS}->{abbr} = {
1153 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1154 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1155     };
1156    
1157     $Element->{$HTML_NS}->{time} = {
1158 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1159 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1160     };
1161    
1162     $Element->{$HTML_NS}->{meter} = {
1163 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1164 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1165     };
1166    
1167     $Element->{$HTML_NS}->{progress} = {
1168 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1169 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1170     };
1171    
1172 wakaba 1.4 $Element->{$HTML_NS}->{code} = {
1173 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1174 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1175     };
1176 wakaba 1.1
1177     $Element->{$HTML_NS}->{var} = {
1178 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1179 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1180     };
1181    
1182 wakaba 1.4 $Element->{$HTML_NS}->{samp} = {
1183 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1184 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1185     };
1186 wakaba 1.1
1187     $Element->{$HTML_NS}->{kbd} = {
1188 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1189 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1190     };
1191    
1192     $Element->{$HTML_NS}->{sub} = {
1193 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1194 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1195     };
1196    
1197     $Element->{$HTML_NS}->{sup} = {
1198 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1199 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1200     };
1201    
1202 wakaba 1.4 $Element->{$HTML_NS}->{span} = {
1203 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1204 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1205     };
1206 wakaba 1.1
1207     $Element->{$HTML_NS}->{i} = {
1208 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1209 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1210     };
1211    
1212     $Element->{$HTML_NS}->{b} = {
1213 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1214 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1215     };
1216    
1217     $Element->{$HTML_NS}->{bdo} = {
1218 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1219 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1220     };
1221    
1222     $Element->{$HTML_NS}->{ins} = {
1223 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1224 wakaba 1.1 checker => $HTMLTransparentChecker,
1225     };
1226    
1227     $Element->{$HTML_NS}->{del} = {
1228 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1229 wakaba 1.1 checker => sub {
1230 wakaba 1.4 my ($self, $todo) = @_;
1231 wakaba 1.1
1232 wakaba 1.4 my $parent = $todo->{node}->manakai_parent_element;
1233 wakaba 1.1 if (defined $parent) {
1234     my $nsuri = $parent->namespace_uri;
1235     $nsuri = '' unless defined $nsuri;
1236     my $ln = $parent->manakai_local_name;
1237     my $eldef = $Element->{$nsuri}->{$ln} ||
1238     $Element->{$nsuri}->{''} ||
1239     $ElementDefault;
1240 wakaba 1.4 return $eldef->{checker}->($self, $todo);
1241 wakaba 1.1 } else {
1242 wakaba 1.4 return $HTMLBlockOrInlineChecker->($self, $todo);
1243 wakaba 1.1 }
1244     },
1245     };
1246    
1247     ## TODO: figure
1248    
1249     $Element->{$HTML_NS}->{img} = {
1250 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1251 wakaba 1.1 checker => $HTMLEmptyChecker,
1252     };
1253    
1254     $Element->{$HTML_NS}->{iframe} = {
1255 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1256 wakaba 1.1 checker => $HTMLTextChecker,
1257     };
1258    
1259     $Element->{$HTML_NS}->{embed} = {
1260 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1261 wakaba 1.1 checker => $HTMLEmptyChecker,
1262     };
1263    
1264     $Element->{$HTML_NS}->{param} = {
1265 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1266 wakaba 1.1 checker => $HTMLEmptyChecker,
1267     };
1268    
1269     ## TODO: object
1270    
1271 wakaba 1.2 $Element->{$HTML_NS}->{video} = {
1272 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1273 wakaba 1.2 checker => sub {
1274 wakaba 1.4 my ($self, $todo) = @_;
1275 wakaba 1.2
1276 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'src')) {
1277     return $HTMLBlockOrInlineChecker->($self, $todo);
1278 wakaba 1.2 } else {
1279     return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
1280 wakaba 1.4 ->($self, $todo);
1281 wakaba 1.2 }
1282     },
1283     };
1284    
1285     $Element->{$HTML_NS}->{audio} = {
1286 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1287 wakaba 1.2 checker => $Element->{$HTML_NS}->{audio}->{checker},
1288     };
1289 wakaba 1.1
1290     $Element->{$HTML_NS}->{source} = {
1291 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1292 wakaba 1.1 checker => $HTMLEmptyChecker,
1293     };
1294    
1295     $Element->{$HTML_NS}->{canvas} = {
1296 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1297 wakaba 1.1 checker => $HTMLInlineChecker,
1298     };
1299    
1300     $Element->{$HTML_NS}->{map} = {
1301 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1302 wakaba 1.1 checker => $HTMLBlockChecker,
1303     };
1304    
1305     $Element->{$HTML_NS}->{area} = {
1306 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1307 wakaba 1.1 checker => $HTMLEmptyChecker,
1308     };
1309     ## TODO: only in map
1310    
1311     $Element->{$HTML_NS}->{table} = {
1312 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1313 wakaba 1.1 checker => sub {
1314 wakaba 1.4 my ($self, $todo) = @_;
1315     my $el = $todo->{node};
1316     my $new_todos = [];
1317 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1318    
1319     my $phase = 'before caption';
1320     my $has_tfoot;
1321     while (@nodes) {
1322     my $node = shift @nodes;
1323 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1324    
1325 wakaba 1.1 my $nt = $node->node_type;
1326     if ($nt == 1) {
1327 wakaba 1.8 my $node_ns = $node->namespace_uri;
1328     $node_ns = '' unless defined $node_ns;
1329     my $node_ln = $node->manakai_local_name;
1330 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1331 wakaba 1.1 if ($phase eq 'in tbodys') {
1332 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
1333 wakaba 1.1 #$phase = 'in tbodys';
1334     } elsif (not $has_tfoot and
1335 wakaba 1.8 $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
1336 wakaba 1.1 $phase = 'after tfoot';
1337     $has_tfoot = 1;
1338     } else {
1339 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1340 wakaba 1.1 }
1341     } elsif ($phase eq 'in trs') {
1342 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
1343 wakaba 1.1 #$phase = 'in trs';
1344     } elsif (not $has_tfoot and
1345 wakaba 1.8 $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
1346 wakaba 1.1 $phase = 'after tfoot';
1347     $has_tfoot = 1;
1348     } else {
1349 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1350 wakaba 1.1 }
1351     } elsif ($phase eq 'after thead') {
1352 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
1353 wakaba 1.1 $phase = 'in tbodys';
1354 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
1355 wakaba 1.1 $phase = 'in trs';
1356 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
1357 wakaba 1.1 $phase = 'in tbodys';
1358     $has_tfoot = 1;
1359     } else {
1360 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1361 wakaba 1.1 }
1362     } elsif ($phase eq 'in colgroup') {
1363 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
1364 wakaba 1.1 $phase = 'in colgroup';
1365 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
1366 wakaba 1.1 $phase = 'after thead';
1367 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
1368 wakaba 1.1 $phase = 'in tbodys';
1369 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
1370 wakaba 1.1 $phase = 'in trs';
1371 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
1372 wakaba 1.1 $phase = 'in tbodys';
1373     $has_tfoot = 1;
1374     } else {
1375 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1376 wakaba 1.1 }
1377     } elsif ($phase eq 'before caption') {
1378 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'caption') {
1379 wakaba 1.1 $phase = 'in colgroup';
1380 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
1381 wakaba 1.1 $phase = 'in colgroup';
1382 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
1383 wakaba 1.1 $phase = 'after thead';
1384 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
1385 wakaba 1.1 $phase = 'in tbodys';
1386 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
1387 wakaba 1.1 $phase = 'in trs';
1388 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
1389 wakaba 1.1 $phase = 'in tbodys';
1390     $has_tfoot = 1;
1391     } else {
1392 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1393 wakaba 1.1 }
1394     } else { # after tfoot
1395 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1396 wakaba 1.1 }
1397 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1398     unshift @nodes, @$sib;
1399 wakaba 1.4 push @$new_todos, @$ch;
1400 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1401     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1402 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1403 wakaba 1.1 }
1404     } elsif ($nt == 5) {
1405     unshift @nodes, @{$node->child_nodes};
1406     }
1407     }
1408 wakaba 1.4 return ($new_todos);
1409 wakaba 1.1 },
1410     };
1411    
1412     $Element->{$HTML_NS}->{caption} = {
1413 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1414 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
1415     };
1416    
1417     $Element->{$HTML_NS}->{colgroup} = {
1418 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1419 wakaba 1.1 checker => sub {
1420 wakaba 1.4 my ($self, $todo) = @_;
1421     my $el = $todo->{node};
1422     my $new_todos = [];
1423 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1424    
1425     while (@nodes) {
1426     my $node = shift @nodes;
1427 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1428    
1429 wakaba 1.1 my $nt = $node->node_type;
1430     if ($nt == 1) {
1431 wakaba 1.8 my $node_ns = $node->namespace_uri;
1432     $node_ns = '' unless defined $node_ns;
1433     my $node_ln = $node->manakai_local_name;
1434 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1435 wakaba 1.8 unless ($node_ns eq $HTML_NS and $node_ln eq 'col') {
1436 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1437 wakaba 1.1 }
1438 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1439     unshift @nodes, @$sib;
1440 wakaba 1.4 push @$new_todos, @$ch;
1441 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1442     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1443 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1444 wakaba 1.1 }
1445     } elsif ($nt == 5) {
1446     unshift @nodes, @{$node->child_nodes};
1447     }
1448     }
1449 wakaba 1.4 return ($new_todos);
1450 wakaba 1.1 },
1451     };
1452    
1453     $Element->{$HTML_NS}->{col} = {
1454 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1455 wakaba 1.1 checker => $HTMLEmptyChecker,
1456     };
1457    
1458     $Element->{$HTML_NS}->{tbody} = {
1459 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1460 wakaba 1.1 checker => sub {
1461 wakaba 1.4 my ($self, $todo) = @_;
1462     my $el = $todo->{node};
1463     my $new_todos = [];
1464 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1465    
1466     my $has_tr;
1467     while (@nodes) {
1468     my $node = shift @nodes;
1469 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1470    
1471 wakaba 1.1 my $nt = $node->node_type;
1472     if ($nt == 1) {
1473 wakaba 1.8 my $node_ns = $node->namespace_uri;
1474     $node_ns = '' unless defined $node_ns;
1475     my $node_ln = $node->manakai_local_name;
1476 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1477 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
1478 wakaba 1.1 $has_tr = 1;
1479     } else {
1480 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1481 wakaba 1.1 }
1482 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1483     unshift @nodes, @$sib;
1484 wakaba 1.4 push @$new_todos, @$ch;
1485 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1486     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1487 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1488 wakaba 1.1 }
1489     } elsif ($nt == 5) {
1490     unshift @nodes, @{$node->child_nodes};
1491     }
1492     }
1493     unless ($has_tr) {
1494 wakaba 1.3 $self->{onerror}->(node => $el, type => 'child element missing:tr');
1495 wakaba 1.1 }
1496 wakaba 1.4 return ($new_todos);
1497 wakaba 1.1 },
1498     };
1499    
1500     $Element->{$HTML_NS}->{thead} = {
1501 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1502 wakaba 1.1 checker => $Element->{$HTML_NS}->{tbody},
1503     };
1504    
1505     $Element->{$HTML_NS}->{tfoot} = {
1506 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1507 wakaba 1.1 checker => $Element->{$HTML_NS}->{tbody},
1508     };
1509    
1510     $Element->{$HTML_NS}->{tr} = {
1511 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1512 wakaba 1.1 checker => sub {
1513 wakaba 1.4 my ($self, $todo) = @_;
1514     my $el = $todo->{node};
1515     my $new_todos = [];
1516 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1517    
1518     my $has_td;
1519     while (@nodes) {
1520     my $node = shift @nodes;
1521 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1522    
1523 wakaba 1.1 my $nt = $node->node_type;
1524     if ($nt == 1) {
1525 wakaba 1.8 my $node_ns = $node->namespace_uri;
1526     $node_ns = '' unless defined $node_ns;
1527     my $node_ln = $node->manakai_local_name;
1528 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1529 wakaba 1.8 if ($node_ns eq $HTML_NS and ($node_ln eq 'td' or $node_ln eq 'th')) {
1530 wakaba 1.1 $has_td = 1;
1531     } else {
1532 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1533 wakaba 1.1 }
1534 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1535     unshift @nodes, @$sib;
1536 wakaba 1.4 push @$new_todos, @$ch;
1537 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1538     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1539 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1540 wakaba 1.1 }
1541     } elsif ($nt == 5) {
1542     unshift @nodes, @{$node->child_nodes};
1543     }
1544     }
1545     unless ($has_td) {
1546 wakaba 1.3 $self->{onerror}->(node => $el, type => 'child element missing:td|th');
1547 wakaba 1.1 }
1548 wakaba 1.4 return ($new_todos);
1549 wakaba 1.1 },
1550     };
1551    
1552     $Element->{$HTML_NS}->{td} = {
1553 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1554 wakaba 1.1 checker => $HTMLBlockOrInlineChecker,
1555     };
1556    
1557     $Element->{$HTML_NS}->{th} = {
1558 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1559 wakaba 1.1 checker => $HTMLBlockOrInlineChecker,
1560     };
1561    
1562     ## TODO: forms
1563    
1564 wakaba 1.2 $Element->{$HTML_NS}->{script} = {
1565 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1566 wakaba 1.2 checker => sub {
1567 wakaba 1.4 my ($self, $todo) = @_;
1568 wakaba 1.2
1569 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'src')) {
1570     return $HTMLEmptyChecker->($self, $todo);
1571 wakaba 1.2 } else {
1572     ## NOTE: No content model conformance in HTML5 spec.
1573 wakaba 1.4 return $AnyChecker->($self, $todo);
1574 wakaba 1.2 }
1575     },
1576     };
1577    
1578     ## NOTE: When script is disabled.
1579     $Element->{$HTML_NS}->{noscript} = {
1580 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1581 wakaba 1.2 checker => sub {
1582 wakaba 1.4 my ($self, $todo) = @_;
1583 wakaba 1.1
1584 wakaba 1.2 my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}});
1585 wakaba 1.4 my ($sib, $ch) = $HTMLBlockOrInlineChecker->($self, $todo);
1586 wakaba 1.2 push @$sib, $end;
1587     return ($sib, $ch);
1588     },
1589     };
1590 wakaba 1.1
1591     $Element->{$HTML_NS}->{'event-source'} = {
1592 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1593 wakaba 1.1 checker => $HTMLEmptyChecker,
1594     };
1595    
1596     $Element->{$HTML_NS}->{details} = {
1597 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1598 wakaba 1.6 checker => sub {
1599     my ($self, $todo) = @_;
1600    
1601     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
1602     my ($sib, $ch)
1603     = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
1604     ->($self, $todo);
1605     push @$sib, $end;
1606     return ($sib, $ch);
1607     },
1608 wakaba 1.1 };
1609    
1610     $Element->{$HTML_NS}->{datagrid} = {
1611 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1612 wakaba 1.6 checker => sub {
1613     my ($self, $todo) = @_;
1614    
1615     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
1616     my ($sib, $ch) = $HTMLBlockChecker->($self, $todo);
1617     push @$sib, $end;
1618     return ($sib, $ch);
1619     },
1620 wakaba 1.1 };
1621    
1622     $Element->{$HTML_NS}->{command} = {
1623 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1624 wakaba 1.1 checker => $HTMLEmptyChecker,
1625     };
1626    
1627     $Element->{$HTML_NS}->{menu} = {
1628 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1629 wakaba 1.1 checker => sub {
1630 wakaba 1.4 my ($self, $todo) = @_;
1631     my $el = $todo->{node};
1632     my $new_todos = [];
1633 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1634    
1635     my $content = 'li or inline';
1636     while (@nodes) {
1637     my $node = shift @nodes;
1638 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1639    
1640 wakaba 1.1 my $nt = $node->node_type;
1641     if ($nt == 1) {
1642 wakaba 1.2 my $node_ns = $node->namespace_uri;
1643     $node_ns = '' unless defined $node_ns;
1644     my $node_ln = $node->manakai_local_name;
1645 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
1646 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'li') {
1647 wakaba 1.1 if ($content eq 'inline') {
1648 wakaba 1.6 $not_allowed = 1;
1649 wakaba 1.1 } elsif ($content eq 'li or inline') {
1650     $content = 'li';
1651     }
1652     } else {
1653 wakaba 1.7 if ($HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
1654     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln}) {
1655     $content = 'inline';
1656     } else {
1657 wakaba 1.6 $not_allowed = 1;
1658 wakaba 1.7 }
1659 wakaba 1.1 }
1660 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
1661     if $not_allowed;
1662 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1663     unshift @nodes, @$sib;
1664 wakaba 1.4 push @$new_todos, @$ch;
1665 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1666     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1667     if ($content eq 'li') {
1668 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1669 wakaba 1.1 } elsif ($content eq 'li or inline') {
1670     $content = 'inline';
1671     }
1672     }
1673     } elsif ($nt == 5) {
1674     unshift @nodes, @{$node->child_nodes};
1675     }
1676     }
1677 wakaba 1.4
1678     for (@$new_todos) {
1679     $_->{inline} = 1;
1680     }
1681     return ($new_todos);
1682 wakaba 1.1 },
1683     };
1684    
1685 wakaba 1.6 $Element->{$HTML_NS}->{legend} = {
1686 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1687 wakaba 1.6 checker => sub {
1688     my ($self, $todo) = @_;
1689    
1690     my $parent = $todo->{node}->manakai_parent_element;
1691     if (defined $parent) {
1692     my $nsuri = $parent->namespace_uri;
1693     $nsuri = '' unless defined $nsuri;
1694     my $ln = $parent->manakai_local_name;
1695     if ($nsuri eq $HTML_NS and $ln eq 'figure') {
1696     return $HTMLInlineChecker->($self, $todo);
1697     } else {
1698     return $HTMLSignificantStrictlyInlineChecker->($self, $todo);
1699     }
1700     } else {
1701     return $HTMLInlineChecker->($self, $todo);
1702     }
1703    
1704     ## ISSUE: Content model is defined only for fieldset/legend,
1705     ## details/legend, and figure/legend.
1706     },
1707     };
1708 wakaba 1.1
1709     $Element->{$HTML_NS}->{div} = {
1710 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1711 wakaba 1.2 checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
1712 wakaba 1.1 };
1713    
1714     $Element->{$HTML_NS}->{font} = {
1715 wakaba 1.9 attrs_checker => $HTMLAttrsChecker,
1716 wakaba 1.1 checker => $HTMLTransparentChecker,
1717     };
1718    
1719 wakaba 1.2 sub new ($) {
1720     return bless {}, shift;
1721     } # new
1722    
1723 wakaba 1.1 sub check_element ($$$) {
1724     my ($self, $el, $onerror) = @_;
1725    
1726 wakaba 1.2 $self->{minuses} = {};
1727     $self->{onerror} = $onerror;
1728    
1729 wakaba 1.4 my @todo = ({type => 'element', node => $el});
1730     while (@todo) {
1731     my $todo = shift @todo;
1732     if ($todo->{type} eq 'element') {
1733     my $nsuri = $todo->{node}->namespace_uri;
1734     $nsuri = '' unless defined $nsuri;
1735     my $ln = $todo->{node}->manakai_local_name;
1736     my $eldef = $Element->{$nsuri}->{$ln} ||
1737     $Element->{$nsuri}->{''} ||
1738     $ElementDefault;
1739 wakaba 1.9 $eldef->{attrs_checker}->($self, $todo);
1740 wakaba 1.4 my ($new_todos) = $eldef->{checker}->($self, $todo);
1741     push @todo, @$new_todos;
1742 wakaba 1.9 } elsif ($todo->{type} eq 'element-attributes') {
1743     my $nsuri = $todo->{node}->namespace_uri;
1744     $nsuri = '' unless defined $nsuri;
1745     my $ln = $todo->{node}->manakai_local_name;
1746     my $eldef = $Element->{$nsuri}->{$ln} ||
1747     $Element->{$nsuri}->{''} ||
1748     $ElementDefault;
1749     $eldef->{attrs_checker}->($self, $todo);
1750 wakaba 1.4 } elsif ($todo->{type} eq 'plus') {
1751     $self->_remove_minuses ($todo);
1752     }
1753 wakaba 1.1 }
1754     } # check_element
1755    
1756 wakaba 1.2 sub _add_minuses ($@) {
1757     my $self = shift;
1758     my $r = {};
1759     for my $list (@_) {
1760     for my $ns (keys %$list) {
1761     for my $ln (keys %{$list->{$ns}}) {
1762     unless ($self->{minuses}->{$ns}->{$ln}) {
1763     $self->{minuses}->{$ns}->{$ln} = 1;
1764     $r->{$ns}->{$ln} = 1;
1765     }
1766     }
1767     }
1768     }
1769 wakaba 1.4 return {type => 'plus', list => $r};
1770 wakaba 1.2 } # _add_minuses
1771    
1772     sub _remove_minuses ($$) {
1773 wakaba 1.4 my ($self, $todo) = @_;
1774     for my $ns (keys %{$todo->{list}}) {
1775     for my $ln (keys %{$todo->{list}->{$ns}}) {
1776     delete $self->{minuses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
1777 wakaba 1.2 }
1778     }
1779     1;
1780     } # _remove_minuses
1781    
1782     sub _check_get_children ($$) {
1783     my ($self, $node) = @_;
1784 wakaba 1.4 my $new_todos = [];
1785 wakaba 1.2 my $sib = [];
1786     TP: {
1787     my $node_ns = $node->namespace_uri;
1788     $node_ns = '' unless defined $node_ns;
1789     my $node_ln = $node->manakai_local_name;
1790     if ($node_ns eq $HTML_NS) {
1791     if ($node_ln eq 'noscript') {
1792     my $end = $self->_add_minuses ({$HTML_NS, {noscript => 1}});
1793     push @$sib, $end;
1794     }
1795     }
1796 wakaba 1.7 if ($HTMLTransparentElements->{$node_ns}->{$node_ln}) {
1797     unshift @$sib, @{$node->child_nodes};
1798 wakaba 1.9 push @$new_todos, {type => 'element-attributes', node => $node};
1799 wakaba 1.7 last TP;
1800 wakaba 1.2 }
1801 wakaba 1.8 if ($node_ns eq $HTML_NS and ($node_ln eq 'video' or $node_ln eq 'audio')) {
1802 wakaba 1.2 if ($node->has_attribute_ns (undef, 'src')) {
1803     unshift @$sib, @{$node->child_nodes};
1804 wakaba 1.9 push @$new_todos, {type => 'element-attributes', node => $node};
1805 wakaba 1.2 last TP;
1806     } else {
1807     my @cn = @{$node->child_nodes};
1808     CN: while (@cn) {
1809     my $cn = shift @cn;
1810     my $cnt = $cn->node_type;
1811     if ($cnt == 1) {
1812 wakaba 1.8 my $cn_nsuri = $cn->namespace_uri;
1813     $cn_nsuri = '' unless defined $cn_nsuri;
1814     if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'source') {
1815 wakaba 1.2 #
1816     } else {
1817     last CN;
1818     }
1819     } elsif ($cnt == 3 or $cnt == 4) {
1820     if ($cn->data =~ /[^\x09-\x0D\x20]/) {
1821     last CN;
1822     }
1823     }
1824     } # CN
1825     unshift @$sib, @cn;
1826     }
1827     }
1828 wakaba 1.4 push @$new_todos, {type => 'element', node => $node};
1829 wakaba 1.2 } # TP
1830 wakaba 1.4 return ($sib, $new_todos);
1831 wakaba 1.2 } # _check_get_children
1832    
1833 wakaba 1.1 1;
1834 wakaba 1.9 # $Date: 2007/05/13 10:40:07 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24