/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.8 - (hide annotations) (download)
Sun May 13 10:40:07 2007 UTC (17 years, 5 months ago) by wakaba
Branch: MAIN
Changes since 1.7: +68 -48 lines
++ whatpm/t/ChangeLog	13 May 2007 10:39:43 -0000
	* ContentChecker.pm (manakai_element_type_match): Removed.

2007-05-13  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	13 May 2007 10:40:03 -0000
	* ContentChecker.pm: Don't use |manakai_element_type_match|.

2007-05-13  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3    
4 wakaba 1.3 ## ANY
5     my $AnyChecker = sub {
6 wakaba 1.4 my ($self, $todo) = @_;
7     my $el = $todo->{node};
8     my $new_todos = [];
9 wakaba 1.3 my @nodes = (@{$el->child_nodes});
10     while (@nodes) {
11     my $node = shift @nodes;
12     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
13    
14     my $nt = $node->node_type;
15     if ($nt == 1) {
16     my $node_ns = $node->namespace_uri;
17     $node_ns = '' unless defined $node_ns;
18     my $node_ln = $node->manakai_local_name;
19     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
20     $self->{onerror}->(node => $node, type => 'element not allowed');
21     }
22 wakaba 1.4 push @$new_todos, {type => 'element', node => $node};
23 wakaba 1.3 } elsif ($nt == 5) {
24     unshift @nodes, @{$node->child_nodes};
25     }
26     }
27 wakaba 1.4 return ($new_todos);
28 wakaba 1.3 }; # $AnyChecker
29    
30 wakaba 1.1 my $ElementDefault = {
31     checker => sub {
32 wakaba 1.4 my ($self, $todo) = @_;
33     $self->{onerror}->(node => $todo->{node}, type => 'element not supported');
34     return $AnyChecker->($self, $todo);
35 wakaba 1.1 },
36     };
37    
38     my $Element = {};
39    
40     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
41    
42 wakaba 1.7 my $HTMLMetadataElements = {
43     $HTML_NS => {
44     qw/link 1 meta 1 style 1 script 1 event-source 1 command 1 base 1 title 1/,
45     },
46     };
47 wakaba 1.1
48 wakaba 1.2 my $HTMLSectioningElements = {
49     $HTML_NS => {qw/body 1 section 1 nav 1 article 1 blockquote 1 aside 1/},
50     };
51 wakaba 1.1
52 wakaba 1.7 my $HTMLBlockLevelElements = {
53     $HTML_NS => {
54     qw/
55     section 1 nav 1 article 1 blockquote 1 aside 1
56     h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1
57     address 1 p 1 hr 1 dialog 1 pre 1 ol 1 ul 1 dl 1
58     ins 1 del 1 figure 1 map 1 table 1 script 1 noscript 1
59     event-source 1 details 1 datagrid 1 menu 1 div 1 font 1
60     /,
61     },
62     };
63    
64     my $HTMLStrictlyInlineLevelElements = {
65     $HTML_NS => {
66     qw/
67     br 1 a 1 q 1 cite 1 em 1 strong 1 small 1 m 1 dfn 1 abbr 1
68     time 1 meter 1 progress 1 code 1 var 1 samp 1 kbd 1
69     sub 1 sup 1 span 1 i 1 b 1 bdo 1 ins 1 del 1 img 1
70     iframe 1 embed 1 object 1 video 1 audio 1 canvas 1 area 1
71     script 1 noscript 1 event-source 1 command 1 font 1
72     /,
73     },
74     };
75    
76     my $HTMLStructuredInlineLevelElements = {
77     $HTML_NS => {qw/blockquote 1 pre 1 ol 1 ul 1 dl 1 table 1 menu 1/},
78     };
79 wakaba 1.1
80 wakaba 1.6 my $HTMLInteractiveElements = {
81     $HTML_NS => {a => 1, details => 1, datagrid => 1},
82     };
83     ## NOTE: |html:a| and |html:datagrid| are not allowed as a descendant
84     ## of interactive elements
85 wakaba 1.1
86 wakaba 1.7 my $HTMLTransparentElements = {
87     $HTML_NS => {qw/ins 1 font 1 noscript 1/},
88     ## NOTE: |html:noscript| is transparent if scripting is disabled.
89     };
90    
91     #my $HTMLSemiTransparentElements = {
92     # $HTML_NS => {qw/video 1 audio 1/},
93     #};
94    
95     my $HTMLEmbededElements = {
96     $HTML_NS => {qw/img 1 iframe 1 embed 1 object 1 video 1 audio 1 canvas 1/},
97     };
98 wakaba 1.1
99     ## Empty
100     my $HTMLEmptyChecker = sub {
101 wakaba 1.4 my ($self, $todo) = @_;
102     my $el = $todo->{node};
103     my $new_todos = [];
104 wakaba 1.1 my @nodes = (@{$el->child_nodes});
105    
106     while (@nodes) {
107     my $node = shift @nodes;
108 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
109    
110 wakaba 1.1 my $nt = $node->node_type;
111     if ($nt == 1) {
112 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
113     $self->{onerror}->(node => $node, type => 'element not allowed');
114     my ($sib, $ch) = $self->_check_get_children ($node);
115     unshift @nodes, @$sib;
116 wakaba 1.4 push @$new_todos, @$ch;
117 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
118 wakaba 1.3 if ($node->data =~ /[^\x09-\x0D\x20]/) {
119     $self->{onerror}->(node => $node, type => 'character not allowed');
120     }
121 wakaba 1.1 } elsif ($nt == 5) {
122     unshift @nodes, @{$node->child_nodes};
123     }
124     }
125 wakaba 1.4 return ($new_todos);
126 wakaba 1.1 };
127    
128     ## Text
129     my $HTMLTextChecker = sub {
130 wakaba 1.4 my ($self, $todo) = @_;
131     my $el = $todo->{node};
132     my $new_todos = [];
133 wakaba 1.1 my @nodes = (@{$el->child_nodes});
134    
135     while (@nodes) {
136     my $node = shift @nodes;
137 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
138    
139 wakaba 1.1 my $nt = $node->node_type;
140     if ($nt == 1) {
141 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
142     $self->{onerror}->(node => $node, type => 'element not allowed');
143     my ($sib, $ch) = $self->_check_get_children ($node);
144     unshift @nodes, @$sib;
145 wakaba 1.4 push @$new_todos, @$ch;
146 wakaba 1.1 } elsif ($nt == 5) {
147     unshift @nodes, @{$node->child_nodes};
148     }
149     }
150 wakaba 1.4 return ($new_todos);
151 wakaba 1.1 };
152    
153     ## Zero or more |html:style| elements,
154     ## followed by zero or more block-level elements
155     my $HTMLStylableBlockChecker = sub {
156 wakaba 1.4 my ($self, $todo) = @_;
157     my $el = $todo->{node};
158     my $new_todos = [];
159 wakaba 1.1 my @nodes = (@{$el->child_nodes});
160    
161     my $has_non_style;
162     while (@nodes) {
163     my $node = shift @nodes;
164 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
165    
166 wakaba 1.1 my $nt = $node->node_type;
167     if ($nt == 1) {
168 wakaba 1.2 my $node_ns = $node->namespace_uri;
169     $node_ns = '' unless defined $node_ns;
170     my $node_ln = $node->manakai_local_name;
171 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
172 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'style') {
173 wakaba 1.6 $not_allowed = 1 if $has_non_style;
174 wakaba 1.7 } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
175     $has_non_style = 1;
176 wakaba 1.1 } else {
177     $has_non_style = 1;
178 wakaba 1.7 $not_allowed = 1;
179 wakaba 1.1 }
180 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
181     if $not_allowed;
182 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
183     unshift @nodes, @$sib;
184 wakaba 1.4 push @$new_todos, @$ch;
185 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
186     if ($node->data =~ /[^\x09-\x0D\x20]/) {
187 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
188 wakaba 1.1 }
189     } elsif ($nt == 5) {
190     unshift @nodes, @{$node->child_nodes};
191     }
192     }
193 wakaba 1.4 return ($new_todos);
194 wakaba 1.1 }; # $HTMLStylableBlockChecker
195    
196     ## Zero or more block-level elements
197     my $HTMLBlockChecker = sub {
198 wakaba 1.4 my ($self, $todo) = @_;
199     my $el = $todo->{node};
200     my $new_todos = [];
201 wakaba 1.1 my @nodes = (@{$el->child_nodes});
202    
203     while (@nodes) {
204     my $node = shift @nodes;
205 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
206    
207 wakaba 1.1 my $nt = $node->node_type;
208     if ($nt == 1) {
209 wakaba 1.2 my $node_ns = $node->namespace_uri;
210     $node_ns = '' unless defined $node_ns;
211     my $node_ln = $node->manakai_local_name;
212 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
213 wakaba 1.7 $not_allowed = 1
214     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
215 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
216     if $not_allowed;
217 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
218     unshift @nodes, @$sib;
219 wakaba 1.4 push @$new_todos, @$ch;
220 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
221     if ($node->data =~ /[^\x09-\x0D\x20]/) {
222 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
223 wakaba 1.1 }
224     } elsif ($nt == 5) {
225     unshift @nodes, @{$node->child_nodes};
226     }
227     }
228 wakaba 1.4 return ($new_todos);
229 wakaba 1.1 }; # $HTMLBlockChecker
230    
231     ## Inline-level content
232     my $HTMLInlineChecker = sub {
233 wakaba 1.4 my ($self, $todo) = @_;
234     my $el = $todo->{node};
235     my $new_todos = [];
236 wakaba 1.1 my @nodes = (@{$el->child_nodes});
237    
238     while (@nodes) {
239     my $node = shift @nodes;
240 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
241    
242 wakaba 1.1 my $nt = $node->node_type;
243     if ($nt == 1) {
244 wakaba 1.2 my $node_ns = $node->namespace_uri;
245     $node_ns = '' unless defined $node_ns;
246     my $node_ln = $node->manakai_local_name;
247 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
248 wakaba 1.7 $not_allowed = 1
249     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
250     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
251 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
252     if $not_allowed;
253 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
254     unshift @nodes, @$sib;
255 wakaba 1.4 push @$new_todos, @$ch;
256 wakaba 1.1 } elsif ($nt == 5) {
257     unshift @nodes, @{$node->child_nodes};
258     }
259     }
260 wakaba 1.4
261     for (@$new_todos) {
262     $_->{inline} = 1;
263     }
264     return ($new_todos);
265     }; # $HTMLInlineChecker
266 wakaba 1.1
267     my $HTMLSignificantInlineChecker = $HTMLInlineChecker;
268     ## TODO: check significant content
269    
270     ## Strictly inline-level content
271     my $HTMLStrictlyInlineChecker = sub {
272 wakaba 1.4 my ($self, $todo) = @_;
273     my $el = $todo->{node};
274     my $new_todos = [];
275 wakaba 1.1 my @nodes = (@{$el->child_nodes});
276    
277     while (@nodes) {
278     my $node = shift @nodes;
279 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
280    
281 wakaba 1.1 my $nt = $node->node_type;
282     if ($nt == 1) {
283 wakaba 1.2 my $node_ns = $node->namespace_uri;
284     $node_ns = '' unless defined $node_ns;
285     my $node_ln = $node->manakai_local_name;
286 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
287 wakaba 1.7 $not_allowed = 1
288     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln};
289 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
290     if $not_allowed;
291 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
292     unshift @nodes, @$sib;
293 wakaba 1.4 push @$new_todos, @$ch;
294 wakaba 1.1 } elsif ($nt == 5) {
295     unshift @nodes, @{$node->child_nodes};
296     }
297     }
298 wakaba 1.4
299     for (@$new_todos) {
300     $_->{inline} = 1;
301     $_->{strictly_inline} = 1;
302     }
303     return ($new_todos);
304 wakaba 1.1 }; # $HTMLStrictlyInlineChecker
305    
306     my $HTMLSignificantStrictlyInlineChecker = $HTMLStrictlyInlineChecker;
307     ## TODO: check significant content
308    
309 wakaba 1.4 ## Inline-level or strictly inline-kevek content
310     my $HTMLInlineOrStrictlyInlineChecker = sub {
311     my ($self, $todo) = @_;
312     my $el = $todo->{node};
313     my $new_todos = [];
314     my @nodes = (@{$el->child_nodes});
315    
316     while (@nodes) {
317     my $node = shift @nodes;
318     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
319    
320     my $nt = $node->node_type;
321     if ($nt == 1) {
322     my $node_ns = $node->namespace_uri;
323     $node_ns = '' unless defined $node_ns;
324     my $node_ln = $node->manakai_local_name;
325 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
326 wakaba 1.7 if ($todo->{strictly_inline}) {
327     $not_allowed = 1
328     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln};
329     } else {
330     $not_allowed = 1
331     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
332     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
333     }
334 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
335     if $not_allowed;
336 wakaba 1.4 my ($sib, $ch) = $self->_check_get_children ($node);
337     unshift @nodes, @$sib;
338     push @$new_todos, @$ch;
339     } elsif ($nt == 5) {
340     unshift @nodes, @{$node->child_nodes};
341     }
342     }
343    
344     for (@$new_todos) {
345     $_->{inline} = 1;
346     $_->{strictly_inline} = 1;
347     }
348     return ($new_todos);
349     }; # $HTMLInlineOrStrictlyInlineChecker
350    
351 wakaba 1.6 my $HTMLSignificantInlineOrStrictlyInlineChecker
352     = $HTMLInlineOrStrictlyInlineChecker;
353     ## TODO: check significant content
354    
355 wakaba 1.1 my $HTMLBlockOrInlineChecker = sub {
356 wakaba 1.4 my ($self, $todo) = @_;
357     my $el = $todo->{node};
358     my $new_todos = [];
359 wakaba 1.1 my @nodes = (@{$el->child_nodes});
360    
361     my $content = 'block-or-inline'; # or 'block' or 'inline'
362     my @block_not_inline;
363     while (@nodes) {
364     my $node = shift @nodes;
365 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
366    
367 wakaba 1.1 my $nt = $node->node_type;
368     if ($nt == 1) {
369 wakaba 1.2 my $node_ns = $node->namespace_uri;
370     $node_ns = '' unless defined $node_ns;
371     my $node_ln = $node->manakai_local_name;
372 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
373 wakaba 1.1 if ($content eq 'block') {
374 wakaba 1.7 $not_allowed = 1
375     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
376 wakaba 1.1 } elsif ($content eq 'inline') {
377 wakaba 1.7 $not_allowed = 1
378     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
379     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
380 wakaba 1.1 } else {
381 wakaba 1.7 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
382     my $is_inline
383     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
384     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
385 wakaba 1.1
386 wakaba 1.6 push @block_not_inline, $node
387     if $is_block and not $is_inline and not $not_allowed;
388 wakaba 1.1 unless ($is_block) {
389     $content = 'inline';
390     for (@block_not_inline) {
391 wakaba 1.2 $self->{onerror}->(node => $_, type => 'element not allowed');
392 wakaba 1.1 }
393 wakaba 1.6 $not_allowed = 1 unless $is_inline;
394 wakaba 1.1 }
395     }
396 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
397     if $not_allowed;
398 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
399     unshift @nodes, @$sib;
400 wakaba 1.4 push @$new_todos, @$ch;
401 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
402     if ($node->data =~ /[^\x09-\x0D\x20]/) {
403     if ($content eq 'block') {
404 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
405 wakaba 1.1 } else {
406     $content = 'inline';
407     for (@block_not_inline) {
408 wakaba 1.2 $self->{onerror}->(node => $_, type => 'element not allowed');
409 wakaba 1.1 }
410     }
411     }
412     } elsif ($nt == 5) {
413     unshift @nodes, @{$node->child_nodes};
414     }
415     }
416 wakaba 1.4
417     if ($content eq 'inline') {
418     for (@$new_todos) {
419     $_->{inline} = 1;
420     }
421     }
422     return ($new_todos);
423 wakaba 1.1 };
424    
425 wakaba 1.2 ## Zero or more XXX element, then either block-level or inline-level
426     my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) {
427     my ($elnsuri, $ellname) = @_;
428     return sub {
429 wakaba 1.4 my ($self, $todo) = @_;
430     my $el = $todo->{node};
431     my $new_todos = [];
432 wakaba 1.2 my @nodes = (@{$el->child_nodes});
433    
434     my $has_non_style;
435     my $content = 'block-or-inline'; # or 'block' or 'inline'
436     my @block_not_inline;
437     while (@nodes) {
438     my $node = shift @nodes;
439     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
440    
441     my $nt = $node->node_type;
442     if ($nt == 1) {
443     my $node_ns = $node->namespace_uri;
444     $node_ns = '' unless defined $node_ns;
445     my $node_ln = $node->manakai_local_name;
446 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
447 wakaba 1.8 if ($node_ns eq $elnsuri and $node_ln eq $ellname) {
448 wakaba 1.6 $not_allowed = 1 if $has_non_style;
449 wakaba 1.2 } elsif ($content eq 'block') {
450     $has_non_style = 1;
451 wakaba 1.7 $not_allowed = 1
452     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
453 wakaba 1.2 } elsif ($content eq 'inline') {
454     $has_non_style = 1;
455 wakaba 1.7 $not_allowed = 1
456     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
457     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
458 wakaba 1.2 } else {
459     $has_non_style = 1;
460 wakaba 1.7 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
461     my $is_inline
462     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
463     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
464 wakaba 1.2
465 wakaba 1.6 push @block_not_inline, $node
466     if $is_block and not $is_inline and not $not_allowed;
467 wakaba 1.2 unless ($is_block) {
468     $content = 'inline';
469     for (@block_not_inline) {
470     $self->{onerror}->(node => $_, type => 'element not allowed');
471     }
472 wakaba 1.6 $not_allowed = 1 unless $is_inline;
473 wakaba 1.1 }
474     }
475 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
476     if $not_allowed;
477 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
478     unshift @nodes, @$sib;
479 wakaba 1.4 push @$new_todos, @$ch;
480 wakaba 1.2 } elsif ($nt == 3 or $nt == 4) {
481     if ($node->data =~ /[^\x09-\x0D\x20]/) {
482     $has_non_style = 1;
483     if ($content eq 'block') {
484     $self->{onerror}->(node => $node, type => 'character not allowed');
485     } else {
486     $content = 'inline';
487     for (@block_not_inline) {
488     $self->{onerror}->(node => $_, type => 'element not allowed');
489     }
490 wakaba 1.1 }
491     }
492 wakaba 1.2 } elsif ($nt == 5) {
493     unshift @nodes, @{$node->child_nodes};
494 wakaba 1.1 }
495     }
496 wakaba 1.4
497     if ($content eq 'inline') {
498     for (@$new_todos) {
499     $_->{inline} = 1;
500     }
501     }
502     return ($new_todos);
503 wakaba 1.2 };
504     }; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker
505 wakaba 1.1
506     my $HTMLTransparentChecker = $HTMLBlockOrInlineChecker;
507    
508     $Element->{$HTML_NS}->{html} = {
509     checker => sub {
510 wakaba 1.4 my ($self, $todo) = @_;
511     my $el = $todo->{node};
512     my $new_todos = [];
513 wakaba 1.1 my @nodes = (@{$el->child_nodes});
514    
515     my $phase = 'before head';
516     while (@nodes) {
517     my $node = shift @nodes;
518 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
519    
520 wakaba 1.1 my $nt = $node->node_type;
521     if ($nt == 1) {
522 wakaba 1.2 my $node_ns = $node->namespace_uri;
523     $node_ns = '' unless defined $node_ns;
524     my $node_ln = $node->manakai_local_name;
525 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
526 wakaba 1.1 if ($phase eq 'before head') {
527 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'head') {
528 wakaba 1.1 $phase = 'after head';
529 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'body') {
530     $self->{onerror}->(node => $node, type => 'ps element missing:head');
531 wakaba 1.1 $phase = 'after body';
532     } else {
533 wakaba 1.6 $not_allowed = 1;
534 wakaba 1.1 # before head
535     }
536     } elsif ($phase eq 'after head') {
537 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'body') {
538 wakaba 1.1 $phase = 'after body';
539     } else {
540 wakaba 1.6 $not_allowed = 1;
541 wakaba 1.1 # after head
542     }
543     } else { #elsif ($phase eq 'after body') {
544 wakaba 1.6 $not_allowed = 1;
545 wakaba 1.1 # after body
546     }
547 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
548     if $not_allowed;
549 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
550     unshift @nodes, @$sib;
551 wakaba 1.4 push @$new_todos, @$ch;
552 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
553     if ($node->data =~ /[^\x09-\x0D\x20]/) {
554 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
555 wakaba 1.1 }
556     } elsif ($nt == 5) {
557     unshift @nodes, @{$node->child_nodes};
558     }
559     }
560 wakaba 1.3
561     if ($phase eq 'before head') {
562     $self->{onerror}->(node => $el, type => 'child element missing:head');
563     $self->{onerror}->(node => $el, type => 'child element missing:body');
564     } elsif ($phase eq 'after head') {
565     $self->{onerror}->(node => $el, type => 'child element missing:body');
566     }
567    
568 wakaba 1.4 return ($new_todos);
569 wakaba 1.1 },
570     };
571    
572     $Element->{$HTML_NS}->{head} = {
573     checker => sub {
574 wakaba 1.4 my ($self, $todo) = @_;
575     my $el = $todo->{node};
576     my $new_todos = [];
577 wakaba 1.1 my @nodes = (@{$el->child_nodes});
578    
579     my $has_title;
580 wakaba 1.3 my $phase = 'initial'; # 'after charset', 'after base'
581 wakaba 1.1 while (@nodes) {
582     my $node = shift @nodes;
583 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
584    
585 wakaba 1.1 my $nt = $node->node_type;
586     if ($nt == 1) {
587 wakaba 1.2 my $node_ns = $node->namespace_uri;
588     $node_ns = '' unless defined $node_ns;
589     my $node_ln = $node->manakai_local_name;
590 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
591 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'title') {
592 wakaba 1.3 $phase = 'after base';
593 wakaba 1.1 unless ($has_title) {
594     $has_title = 1;
595     } else {
596 wakaba 1.6 $not_allowed = 1;
597 wakaba 1.1 }
598 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'meta') {
599 wakaba 1.1 if ($node->has_attribute_ns (undef, 'charset')) {
600 wakaba 1.3 if ($phase eq 'initial') {
601     $phase = 'after charset';
602 wakaba 1.1 } else {
603 wakaba 1.6 $not_allowed = 1;
604 wakaba 1.3 ## NOTE: See also |base|'s "contexts" field in the spec
605 wakaba 1.1 }
606     } else {
607 wakaba 1.3 $phase = 'after base';
608 wakaba 1.1 }
609 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'base') {
610 wakaba 1.3 if ($phase eq 'initial' or $phase eq 'after charset') {
611     $phase = 'after base';
612 wakaba 1.1 } else {
613 wakaba 1.6 $not_allowed = 1;
614 wakaba 1.1 }
615 wakaba 1.7 } elsif ($HTMLMetadataElements->{$node_ns}->{$node_ln}) {
616     $phase = 'after base';
617 wakaba 1.1 } else {
618 wakaba 1.7 $not_allowed = 1;
619 wakaba 1.1 }
620 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
621     if $not_allowed;
622 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
623     unshift @nodes, @$sib;
624 wakaba 1.4 push @$new_todos, @$ch;
625 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
626     if ($node->data =~ /[^\x09-\x0D\x20]/) {
627 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
628 wakaba 1.1 }
629     } elsif ($nt == 5) {
630     unshift @nodes, @{$node->child_nodes};
631     }
632     }
633     unless ($has_title) {
634 wakaba 1.3 $self->{onerror}->(node => $el, type => 'child element missing:title');
635 wakaba 1.1 }
636 wakaba 1.4 return ($new_todos);
637 wakaba 1.1 },
638     };
639    
640     $Element->{$HTML_NS}->{title} = {
641     checker => $HTMLTextChecker,
642     };
643    
644     $Element->{$HTML_NS}->{base} = {
645     checker => $HTMLEmptyChecker,
646     };
647    
648     $Element->{$HTML_NS}->{link} = {
649     checker => $HTMLEmptyChecker,
650     };
651    
652     $Element->{$HTML_NS}->{meta} = {
653     checker => $HTMLEmptyChecker,
654     };
655    
656     ## NOTE: |html:style| has no conformance creteria on content model
657 wakaba 1.3 $Element->{$HTML_NS}->{style} = {
658     checker => $AnyChecker,
659     };
660 wakaba 1.1
661     $Element->{$HTML_NS}->{body} = {
662     checker => $HTMLBlockChecker,
663     };
664    
665     $Element->{$HTML_NS}->{section} = {
666     checker => $HTMLStylableBlockChecker,
667     };
668    
669     $Element->{$HTML_NS}->{nav} = {
670     checker => $HTMLBlockOrInlineChecker,
671     };
672    
673     $Element->{$HTML_NS}->{article} = {
674     checker => $HTMLStylableBlockChecker,
675     };
676    
677     $Element->{$HTML_NS}->{blockquote} = {
678     checker => $HTMLBlockChecker,
679     };
680    
681     $Element->{$HTML_NS}->{aside} = {
682 wakaba 1.2 checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
683 wakaba 1.1 };
684    
685     $Element->{$HTML_NS}->{h1} = {
686     checker => $HTMLSignificantStrictlyInlineChecker,
687     };
688    
689     $Element->{$HTML_NS}->{h2} = {
690     checker => $HTMLSignificantStrictlyInlineChecker,
691     };
692    
693     $Element->{$HTML_NS}->{h3} = {
694     checker => $HTMLSignificantStrictlyInlineChecker,
695     };
696    
697     $Element->{$HTML_NS}->{h4} = {
698     checker => $HTMLSignificantStrictlyInlineChecker,
699     };
700    
701     $Element->{$HTML_NS}->{h5} = {
702     checker => $HTMLSignificantStrictlyInlineChecker,
703     };
704    
705     $Element->{$HTML_NS}->{h6} = {
706     checker => $HTMLSignificantStrictlyInlineChecker,
707     };
708    
709     ## TODO: header
710    
711 wakaba 1.2 $Element->{$HTML_NS}->{footer} = {
712     checker => sub { ## block -hn -header -footer -sectioning or inline
713 wakaba 1.4 my ($self, $todo) = @_;
714     my $el = $todo->{node};
715     my $new_todos = [];
716 wakaba 1.2 my @nodes = (@{$el->child_nodes});
717    
718     my $content = 'block-or-inline'; # or 'block' or 'inline'
719     my @block_not_inline;
720     while (@nodes) {
721     my $node = shift @nodes;
722     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
723    
724     my $nt = $node->node_type;
725     if ($nt == 1) {
726     my $node_ns = $node->namespace_uri;
727     $node_ns = '' unless defined $node_ns;
728     my $node_ln = $node->manakai_local_name;
729 wakaba 1.6 my $not_allowed;
730 wakaba 1.2 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
731 wakaba 1.6 $not_allowed = 1;
732 wakaba 1.2 } elsif ($node_ns eq $HTML_NS and
733     {
734     qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1/
735     }->{$node_ln}) {
736 wakaba 1.6 $not_allowed = 1;
737 wakaba 1.2 } elsif ($HTMLSectioningElements->{$node_ns}->{$node_ln}) {
738 wakaba 1.6 $not_allowed = 1;
739 wakaba 1.2 }
740     if ($content eq 'block') {
741 wakaba 1.7 $not_allowed = 1
742     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
743 wakaba 1.2 } elsif ($content eq 'inline') {
744 wakaba 1.7 $not_allowed = 1
745     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
746     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
747 wakaba 1.2 } else {
748 wakaba 1.7 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
749     my $is_inline
750     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
751     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
752 wakaba 1.2
753 wakaba 1.6 push @block_not_inline, $node
754     if $is_block and not $is_inline and not $not_allowed;
755 wakaba 1.2 unless ($is_block) {
756     $content = 'inline';
757     for (@block_not_inline) {
758     $self->{onerror}->(node => $_, type => 'element not allowed');
759     }
760 wakaba 1.6 $not_allowed = 1 unless $is_inline;
761 wakaba 1.2 }
762     }
763 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
764     if $not_allowed;
765 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
766     unshift @nodes, @$sib;
767 wakaba 1.4 push @$new_todos, @$ch;
768 wakaba 1.2 } elsif ($nt == 3 or $nt == 4) {
769     if ($node->data =~ /[^\x09-\x0D\x20]/) {
770     if ($content eq 'block') {
771     $self->{onerror}->(node => $node, type => 'character not allowed');
772     } else {
773     $content = 'inline';
774     for (@block_not_inline) {
775     $self->{onerror}->(node => $_, type => 'element not allowed');
776     }
777     }
778     }
779     } elsif ($nt == 5) {
780     unshift @nodes, @{$node->child_nodes};
781     }
782     }
783    
784     my $end = $self->_add_minuses
785     ({$HTML_NS => {qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1/}},
786     $HTMLSectioningElements);
787 wakaba 1.4 push @$new_todos, $end;
788 wakaba 1.2
789 wakaba 1.4 if ($content eq 'inline') {
790     for (@$new_todos) {
791     $_->{inline} = 1;
792     }
793     }
794    
795     return ($new_todos);
796 wakaba 1.2 },
797     };
798 wakaba 1.1
799     $Element->{$HTML_NS}->{address} = {
800     checker => $HTMLInlineChecker,
801     };
802    
803     $Element->{$HTML_NS}->{p} = {
804     checker => $HTMLSignificantInlineChecker,
805     };
806    
807     $Element->{$HTML_NS}->{hr} = {
808     checker => $HTMLEmptyChecker,
809     };
810    
811     $Element->{$HTML_NS}->{br} = {
812     checker => $HTMLEmptyChecker,
813     };
814    
815     $Element->{$HTML_NS}->{dialog} = {
816     checker => sub {
817 wakaba 1.4 my ($self, $todo) = @_;
818     my $el = $todo->{node};
819     my $new_todos = [];
820 wakaba 1.1 my @nodes = (@{$el->child_nodes});
821    
822     my $phase = 'before dt';
823     while (@nodes) {
824     my $node = shift @nodes;
825 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
826    
827 wakaba 1.1 my $nt = $node->node_type;
828     if ($nt == 1) {
829 wakaba 1.8 my $node_ns = $node->namespace_uri;
830     $node_ns = '' unless defined $node_ns;
831     my $node_ln = $node->manakai_local_name;
832 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
833 wakaba 1.1 if ($phase eq 'before dt') {
834 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
835 wakaba 1.1 $phase = 'before dd';
836 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
837 wakaba 1.2 $self->{onerror}
838 wakaba 1.3 ->(node => $node, type => 'ps element missing:dt');
839 wakaba 1.1 $phase = 'before dt';
840     } else {
841 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
842 wakaba 1.1 }
843     } else { # before dd
844 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
845 wakaba 1.1 $phase = 'before dt';
846 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
847 wakaba 1.2 $self->{onerror}
848 wakaba 1.3 ->(node => $node, type => 'ps element missing:dd');
849 wakaba 1.1 $phase = 'before dd';
850     } else {
851 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
852 wakaba 1.1 }
853     }
854 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
855     unshift @nodes, @$sib;
856 wakaba 1.4 push @$new_todos, @$ch;
857 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
858     if ($node->data =~ /[^\x09-\x0D\x20]/) {
859 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
860 wakaba 1.1 }
861     } elsif ($nt == 5) {
862     unshift @nodes, @{$node->child_nodes};
863     }
864     }
865     if ($phase eq 'before dd') {
866 wakaba 1.3 $self->{onerror}->(node => $el, type => 'ps element missing:dd');
867 wakaba 1.1 }
868 wakaba 1.4 return ($new_todos);
869 wakaba 1.1 },
870     };
871    
872     $Element->{$HTML_NS}->{pre} = {
873     checker => $HTMLStrictlyInlineChecker,
874     };
875    
876     $Element->{$HTML_NS}->{ol} = {
877     checker => sub {
878 wakaba 1.4 my ($self, $todo) = @_;
879     my $el = $todo->{node};
880     my $new_todos = [];
881 wakaba 1.1 my @nodes = (@{$el->child_nodes});
882    
883     while (@nodes) {
884     my $node = shift @nodes;
885 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
886    
887 wakaba 1.1 my $nt = $node->node_type;
888     if ($nt == 1) {
889 wakaba 1.8 my $node_ns = $node->namespace_uri;
890     $node_ns = '' unless defined $node_ns;
891     my $node_ln = $node->manakai_local_name;
892 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
893 wakaba 1.8 unless ($node_ns eq $HTML_NS and $node_ln eq 'li') {
894 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
895 wakaba 1.1 }
896 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
897     unshift @nodes, @$sib;
898 wakaba 1.4 push @$new_todos, @$ch;
899 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
900     if ($node->data =~ /[^\x09-\x0D\x20]/) {
901 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
902 wakaba 1.1 }
903     } elsif ($nt == 5) {
904     unshift @nodes, @{$node->child_nodes};
905     }
906     }
907 wakaba 1.4
908     if ($todo->{inline}) {
909     for (@$new_todos) {
910     $_->{inline} = 1;
911     }
912     }
913     return ($new_todos);
914 wakaba 1.1 },
915     };
916    
917     $Element->{$HTML_NS}->{ul} = {
918     checker => $Element->{$HTML_NS}->{ol}->{checker},
919     };
920    
921 wakaba 1.5
922     $Element->{$HTML_NS}->{li} = {
923     checker => sub {
924     my ($self, $todo) = @_;
925     if ($todo->{inline}) {
926     return $HTMLInlineChecker->($self, $todo);
927     } else {
928     return $HTMLBlockOrInlineChecker->($self, $todo);
929     }
930     },
931     };
932 wakaba 1.1
933     $Element->{$HTML_NS}->{dl} = {
934     checker => sub {
935 wakaba 1.4 my ($self, $todo) = @_;
936     my $el = $todo->{node};
937     my $new_todos = [];
938 wakaba 1.1 my @nodes = (@{$el->child_nodes});
939    
940     my $phase = 'before dt';
941     while (@nodes) {
942     my $node = shift @nodes;
943 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
944    
945 wakaba 1.1 my $nt = $node->node_type;
946     if ($nt == 1) {
947 wakaba 1.8 my $node_ns = $node->namespace_uri;
948     $node_ns = '' unless defined $node_ns;
949     my $node_ln = $node->manakai_local_name;
950 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
951 wakaba 1.1 if ($phase eq 'in dds') {
952 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
953 wakaba 1.1 #$phase = 'in dds';
954 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
955 wakaba 1.1 $phase = 'in dts';
956     } else {
957 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
958 wakaba 1.1 }
959     } elsif ($phase eq 'in dts') {
960 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
961 wakaba 1.1 #$phase = 'in dts';
962 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
963 wakaba 1.1 $phase = 'in dds';
964     } else {
965 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
966 wakaba 1.1 }
967     } else { # before dt
968 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
969 wakaba 1.1 $phase = 'in dts';
970 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
971 wakaba 1.2 $self->{onerror}
972 wakaba 1.3 ->(node => $node, type => 'ps element missing:dt');
973 wakaba 1.1 $phase = 'in dds';
974     } else {
975 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
976 wakaba 1.1 }
977     }
978 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
979     unshift @nodes, @$sib;
980 wakaba 1.4 push @$new_todos, @$ch;
981 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
982     if ($node->data =~ /[^\x09-\x0D\x20]/) {
983 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
984 wakaba 1.1 }
985     } elsif ($nt == 5) {
986     unshift @nodes, @{$node->child_nodes};
987     }
988     }
989     if ($phase eq 'in dts') {
990 wakaba 1.3 $self->{onerror}->(node => $el, type => 'ps element missing:dd');
991 wakaba 1.1 }
992 wakaba 1.4
993     if ($todo->{inline}) {
994     for (@$new_todos) {
995     $_->{inline} = 1;
996     }
997     }
998     return ($new_todos);
999 wakaba 1.1 },
1000     };
1001    
1002     $Element->{$HTML_NS}->{dt} = {
1003     checker => $HTMLStrictlyInlineChecker,
1004     };
1005    
1006 wakaba 1.4 $Element->{$HTML_NS}->{dd} = {
1007 wakaba 1.5 checker => $Element->{$HTML_NS}->{li}->{checker},
1008 wakaba 1.4 };
1009 wakaba 1.1
1010 wakaba 1.6 $Element->{$HTML_NS}->{a} = {
1011     checker => sub {
1012     my ($self, $todo) = @_;
1013    
1014     my $end = $self->_add_minuses ($HTMLInteractiveElements);
1015     my ($sib, $ch)
1016     = $HTMLSignificantInlineOrStrictlyInlineChecker->($self, $todo);
1017     push @$sib, $end;
1018     return ($sib, $ch);
1019     },
1020     };
1021 wakaba 1.1
1022 wakaba 1.4 $Element->{$HTML_NS}->{q} = {
1023     checker => $HTMLInlineOrStrictlyInlineChecker,
1024     };
1025 wakaba 1.1
1026     $Element->{$HTML_NS}->{cite} = {
1027     checker => $HTMLStrictlyInlineChecker,
1028     };
1029    
1030 wakaba 1.4 $Element->{$HTML_NS}->{em} = {
1031     checker => $HTMLInlineOrStrictlyInlineChecker,
1032     };
1033    
1034     $Element->{$HTML_NS}->{strong} = {
1035     checker => $HTMLInlineOrStrictlyInlineChecker,
1036     };
1037 wakaba 1.1
1038 wakaba 1.4 $Element->{$HTML_NS}->{small} = {
1039     checker => $HTMLInlineOrStrictlyInlineChecker,
1040     };
1041    
1042     $Element->{$HTML_NS}->{m} = {
1043     checker => $HTMLInlineOrStrictlyInlineChecker,
1044     };
1045    
1046     $Element->{$HTML_NS}->{dfn} = {
1047     checker => sub {
1048     my ($self, $todo) = @_;
1049    
1050     my $end = $self->_add_minuses ({$HTML_NS => {dfn => 1}});
1051     my ($sib, $ch) = $HTMLStrictlyInlineChecker->($self, $todo);
1052     push @$sib, $end;
1053     return ($sib, $ch);
1054     },
1055     };
1056 wakaba 1.1
1057     $Element->{$HTML_NS}->{abbr} = {
1058     checker => $HTMLStrictlyInlineChecker,
1059     };
1060    
1061     $Element->{$HTML_NS}->{time} = {
1062     checker => $HTMLStrictlyInlineChecker,
1063     };
1064    
1065     $Element->{$HTML_NS}->{meter} = {
1066     checker => $HTMLStrictlyInlineChecker,
1067     };
1068    
1069     $Element->{$HTML_NS}->{progress} = {
1070     checker => $HTMLStrictlyInlineChecker,
1071     };
1072    
1073 wakaba 1.4 $Element->{$HTML_NS}->{code} = {
1074     checker => $HTMLInlineOrStrictlyInlineChecker,
1075     };
1076 wakaba 1.1
1077     $Element->{$HTML_NS}->{var} = {
1078     checker => $HTMLStrictlyInlineChecker,
1079     };
1080    
1081 wakaba 1.4 $Element->{$HTML_NS}->{samp} = {
1082     checker => $HTMLInlineOrStrictlyInlineChecker,
1083     };
1084 wakaba 1.1
1085     $Element->{$HTML_NS}->{kbd} = {
1086     checker => $HTMLStrictlyInlineChecker,
1087     };
1088    
1089     $Element->{$HTML_NS}->{sub} = {
1090     checker => $HTMLStrictlyInlineChecker,
1091     };
1092    
1093     $Element->{$HTML_NS}->{sup} = {
1094     checker => $HTMLStrictlyInlineChecker,
1095     };
1096    
1097 wakaba 1.4 $Element->{$HTML_NS}->{span} = {
1098     checker => $HTMLInlineOrStrictlyInlineChecker,
1099     };
1100 wakaba 1.1
1101     $Element->{$HTML_NS}->{i} = {
1102     checker => $HTMLStrictlyInlineChecker,
1103     };
1104    
1105     $Element->{$HTML_NS}->{b} = {
1106     checker => $HTMLStrictlyInlineChecker,
1107     };
1108    
1109     $Element->{$HTML_NS}->{bdo} = {
1110     checker => $HTMLStrictlyInlineChecker,
1111     };
1112    
1113     $Element->{$HTML_NS}->{ins} = {
1114     checker => $HTMLTransparentChecker,
1115     };
1116    
1117     $Element->{$HTML_NS}->{del} = {
1118     checker => sub {
1119 wakaba 1.4 my ($self, $todo) = @_;
1120 wakaba 1.1
1121 wakaba 1.4 my $parent = $todo->{node}->manakai_parent_element;
1122 wakaba 1.1 if (defined $parent) {
1123     my $nsuri = $parent->namespace_uri;
1124     $nsuri = '' unless defined $nsuri;
1125     my $ln = $parent->manakai_local_name;
1126     my $eldef = $Element->{$nsuri}->{$ln} ||
1127     $Element->{$nsuri}->{''} ||
1128     $ElementDefault;
1129 wakaba 1.4 return $eldef->{checker}->($self, $todo);
1130 wakaba 1.1 } else {
1131 wakaba 1.4 return $HTMLBlockOrInlineChecker->($self, $todo);
1132 wakaba 1.1 }
1133     },
1134     };
1135    
1136     ## TODO: figure
1137    
1138     $Element->{$HTML_NS}->{img} = {
1139     checker => $HTMLEmptyChecker,
1140     };
1141    
1142     $Element->{$HTML_NS}->{iframe} = {
1143     checker => $HTMLTextChecker,
1144     };
1145    
1146     $Element->{$HTML_NS}->{embed} = {
1147     checker => $HTMLEmptyChecker,
1148     };
1149    
1150     $Element->{$HTML_NS}->{param} = {
1151     checker => $HTMLEmptyChecker,
1152     };
1153    
1154     ## TODO: object
1155    
1156 wakaba 1.2 $Element->{$HTML_NS}->{video} = {
1157     checker => sub {
1158 wakaba 1.4 my ($self, $todo) = @_;
1159 wakaba 1.2
1160 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'src')) {
1161     return $HTMLBlockOrInlineChecker->($self, $todo);
1162 wakaba 1.2 } else {
1163     return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
1164 wakaba 1.4 ->($self, $todo);
1165 wakaba 1.2 }
1166     },
1167     };
1168    
1169     $Element->{$HTML_NS}->{audio} = {
1170     checker => $Element->{$HTML_NS}->{audio}->{checker},
1171     };
1172 wakaba 1.1
1173     $Element->{$HTML_NS}->{source} = {
1174     checker => $HTMLEmptyChecker,
1175     };
1176    
1177     $Element->{$HTML_NS}->{canvas} = {
1178     checker => $HTMLInlineChecker,
1179     };
1180    
1181     $Element->{$HTML_NS}->{map} = {
1182     checker => $HTMLBlockChecker,
1183     };
1184    
1185     $Element->{$HTML_NS}->{area} = {
1186     checker => $HTMLEmptyChecker,
1187     };
1188     ## TODO: only in map
1189    
1190     $Element->{$HTML_NS}->{table} = {
1191     checker => sub {
1192 wakaba 1.4 my ($self, $todo) = @_;
1193     my $el = $todo->{node};
1194     my $new_todos = [];
1195 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1196    
1197     my $phase = 'before caption';
1198     my $has_tfoot;
1199     while (@nodes) {
1200     my $node = shift @nodes;
1201 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1202    
1203 wakaba 1.1 my $nt = $node->node_type;
1204     if ($nt == 1) {
1205 wakaba 1.8 my $node_ns = $node->namespace_uri;
1206     $node_ns = '' unless defined $node_ns;
1207     my $node_ln = $node->manakai_local_name;
1208 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1209 wakaba 1.1 if ($phase eq 'in tbodys') {
1210 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
1211 wakaba 1.1 #$phase = 'in tbodys';
1212     } elsif (not $has_tfoot and
1213 wakaba 1.8 $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
1214 wakaba 1.1 $phase = 'after tfoot';
1215     $has_tfoot = 1;
1216     } else {
1217 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1218 wakaba 1.1 }
1219     } elsif ($phase eq 'in trs') {
1220 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
1221 wakaba 1.1 #$phase = 'in trs';
1222     } elsif (not $has_tfoot and
1223 wakaba 1.8 $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
1224 wakaba 1.1 $phase = 'after tfoot';
1225     $has_tfoot = 1;
1226     } else {
1227 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1228 wakaba 1.1 }
1229     } elsif ($phase eq 'after thead') {
1230 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
1231 wakaba 1.1 $phase = 'in tbodys';
1232 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
1233 wakaba 1.1 $phase = 'in trs';
1234 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
1235 wakaba 1.1 $phase = 'in tbodys';
1236     $has_tfoot = 1;
1237     } else {
1238 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1239 wakaba 1.1 }
1240     } elsif ($phase eq 'in colgroup') {
1241 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
1242 wakaba 1.1 $phase = 'in colgroup';
1243 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
1244 wakaba 1.1 $phase = 'after thead';
1245 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
1246 wakaba 1.1 $phase = 'in tbodys';
1247 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
1248 wakaba 1.1 $phase = 'in trs';
1249 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
1250 wakaba 1.1 $phase = 'in tbodys';
1251     $has_tfoot = 1;
1252     } else {
1253 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1254 wakaba 1.1 }
1255     } elsif ($phase eq 'before caption') {
1256 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'caption') {
1257 wakaba 1.1 $phase = 'in colgroup';
1258 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
1259 wakaba 1.1 $phase = 'in colgroup';
1260 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
1261 wakaba 1.1 $phase = 'after thead';
1262 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
1263 wakaba 1.1 $phase = 'in tbodys';
1264 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
1265 wakaba 1.1 $phase = 'in trs';
1266 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
1267 wakaba 1.1 $phase = 'in tbodys';
1268     $has_tfoot = 1;
1269     } else {
1270 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1271 wakaba 1.1 }
1272     } else { # after tfoot
1273 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1274 wakaba 1.1 }
1275 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1276     unshift @nodes, @$sib;
1277 wakaba 1.4 push @$new_todos, @$ch;
1278 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1279     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1280 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1281 wakaba 1.1 }
1282     } elsif ($nt == 5) {
1283     unshift @nodes, @{$node->child_nodes};
1284     }
1285     }
1286 wakaba 1.4 return ($new_todos);
1287 wakaba 1.1 },
1288     };
1289    
1290     $Element->{$HTML_NS}->{caption} = {
1291     checker => $HTMLSignificantStrictlyInlineChecker,
1292     };
1293    
1294     $Element->{$HTML_NS}->{colgroup} = {
1295     checker => sub {
1296 wakaba 1.4 my ($self, $todo) = @_;
1297     my $el = $todo->{node};
1298     my $new_todos = [];
1299 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1300    
1301     while (@nodes) {
1302     my $node = shift @nodes;
1303 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1304    
1305 wakaba 1.1 my $nt = $node->node_type;
1306     if ($nt == 1) {
1307 wakaba 1.8 my $node_ns = $node->namespace_uri;
1308     $node_ns = '' unless defined $node_ns;
1309     my $node_ln = $node->manakai_local_name;
1310 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1311 wakaba 1.8 unless ($node_ns eq $HTML_NS and $node_ln eq 'col') {
1312 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1313 wakaba 1.1 }
1314 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1315     unshift @nodes, @$sib;
1316 wakaba 1.4 push @$new_todos, @$ch;
1317 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1318     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1319 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1320 wakaba 1.1 }
1321     } elsif ($nt == 5) {
1322     unshift @nodes, @{$node->child_nodes};
1323     }
1324     }
1325 wakaba 1.4 return ($new_todos);
1326 wakaba 1.1 },
1327     };
1328    
1329     $Element->{$HTML_NS}->{col} = {
1330     checker => $HTMLEmptyChecker,
1331     };
1332    
1333     $Element->{$HTML_NS}->{tbody} = {
1334     checker => sub {
1335 wakaba 1.4 my ($self, $todo) = @_;
1336     my $el = $todo->{node};
1337     my $new_todos = [];
1338 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1339    
1340     my $has_tr;
1341     while (@nodes) {
1342     my $node = shift @nodes;
1343 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1344    
1345 wakaba 1.1 my $nt = $node->node_type;
1346     if ($nt == 1) {
1347 wakaba 1.8 my $node_ns = $node->namespace_uri;
1348     $node_ns = '' unless defined $node_ns;
1349     my $node_ln = $node->manakai_local_name;
1350 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1351 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
1352 wakaba 1.1 $has_tr = 1;
1353     } else {
1354 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1355 wakaba 1.1 }
1356 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1357     unshift @nodes, @$sib;
1358 wakaba 1.4 push @$new_todos, @$ch;
1359 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1360     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1361 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1362 wakaba 1.1 }
1363     } elsif ($nt == 5) {
1364     unshift @nodes, @{$node->child_nodes};
1365     }
1366     }
1367     unless ($has_tr) {
1368 wakaba 1.3 $self->{onerror}->(node => $el, type => 'child element missing:tr');
1369 wakaba 1.1 }
1370 wakaba 1.4 return ($new_todos);
1371 wakaba 1.1 },
1372     };
1373    
1374     $Element->{$HTML_NS}->{thead} = {
1375     checker => $Element->{$HTML_NS}->{tbody},
1376     };
1377    
1378     $Element->{$HTML_NS}->{tfoot} = {
1379     checker => $Element->{$HTML_NS}->{tbody},
1380     };
1381    
1382     $Element->{$HTML_NS}->{tr} = {
1383     checker => sub {
1384 wakaba 1.4 my ($self, $todo) = @_;
1385     my $el = $todo->{node};
1386     my $new_todos = [];
1387 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1388    
1389     my $has_td;
1390     while (@nodes) {
1391     my $node = shift @nodes;
1392 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1393    
1394 wakaba 1.1 my $nt = $node->node_type;
1395     if ($nt == 1) {
1396 wakaba 1.8 my $node_ns = $node->namespace_uri;
1397     $node_ns = '' unless defined $node_ns;
1398     my $node_ln = $node->manakai_local_name;
1399 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1400 wakaba 1.8 if ($node_ns eq $HTML_NS and ($node_ln eq 'td' or $node_ln eq 'th')) {
1401 wakaba 1.1 $has_td = 1;
1402     } else {
1403 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1404 wakaba 1.1 }
1405 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1406     unshift @nodes, @$sib;
1407 wakaba 1.4 push @$new_todos, @$ch;
1408 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1409     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1410 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1411 wakaba 1.1 }
1412     } elsif ($nt == 5) {
1413     unshift @nodes, @{$node->child_nodes};
1414     }
1415     }
1416     unless ($has_td) {
1417 wakaba 1.3 $self->{onerror}->(node => $el, type => 'child element missing:td|th');
1418 wakaba 1.1 }
1419 wakaba 1.4 return ($new_todos);
1420 wakaba 1.1 },
1421     };
1422    
1423     $Element->{$HTML_NS}->{td} = {
1424     checker => $HTMLBlockOrInlineChecker,
1425     };
1426    
1427     $Element->{$HTML_NS}->{th} = {
1428     checker => $HTMLBlockOrInlineChecker,
1429     };
1430    
1431     ## TODO: forms
1432    
1433 wakaba 1.2 $Element->{$HTML_NS}->{script} = {
1434     checker => sub {
1435 wakaba 1.4 my ($self, $todo) = @_;
1436 wakaba 1.2
1437 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'src')) {
1438     return $HTMLEmptyChecker->($self, $todo);
1439 wakaba 1.2 } else {
1440     ## NOTE: No content model conformance in HTML5 spec.
1441 wakaba 1.4 return $AnyChecker->($self, $todo);
1442 wakaba 1.2 }
1443     },
1444     };
1445    
1446     ## NOTE: When script is disabled.
1447     $Element->{$HTML_NS}->{noscript} = {
1448     checker => sub {
1449 wakaba 1.4 my ($self, $todo) = @_;
1450 wakaba 1.1
1451 wakaba 1.2 my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}});
1452 wakaba 1.4 my ($sib, $ch) = $HTMLBlockOrInlineChecker->($self, $todo);
1453 wakaba 1.2 push @$sib, $end;
1454     return ($sib, $ch);
1455     },
1456     };
1457 wakaba 1.1
1458     $Element->{$HTML_NS}->{'event-source'} = {
1459     checker => $HTMLEmptyChecker,
1460     };
1461    
1462     $Element->{$HTML_NS}->{details} = {
1463 wakaba 1.6 checker => sub {
1464     my ($self, $todo) = @_;
1465    
1466     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
1467     my ($sib, $ch)
1468     = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
1469     ->($self, $todo);
1470     push @$sib, $end;
1471     return ($sib, $ch);
1472     },
1473 wakaba 1.1 };
1474    
1475     $Element->{$HTML_NS}->{datagrid} = {
1476 wakaba 1.6 checker => sub {
1477     my ($self, $todo) = @_;
1478    
1479     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
1480     my ($sib, $ch) = $HTMLBlockChecker->($self, $todo);
1481     push @$sib, $end;
1482     return ($sib, $ch);
1483     },
1484 wakaba 1.1 };
1485    
1486     $Element->{$HTML_NS}->{command} = {
1487     checker => $HTMLEmptyChecker,
1488     };
1489    
1490     $Element->{$HTML_NS}->{menu} = {
1491     checker => sub {
1492 wakaba 1.4 my ($self, $todo) = @_;
1493     my $el = $todo->{node};
1494     my $new_todos = [];
1495 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1496    
1497     my $content = 'li or inline';
1498     while (@nodes) {
1499     my $node = shift @nodes;
1500 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1501    
1502 wakaba 1.1 my $nt = $node->node_type;
1503     if ($nt == 1) {
1504 wakaba 1.2 my $node_ns = $node->namespace_uri;
1505     $node_ns = '' unless defined $node_ns;
1506     my $node_ln = $node->manakai_local_name;
1507 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
1508 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'li') {
1509 wakaba 1.1 if ($content eq 'inline') {
1510 wakaba 1.6 $not_allowed = 1;
1511 wakaba 1.1 } elsif ($content eq 'li or inline') {
1512     $content = 'li';
1513     }
1514     } else {
1515 wakaba 1.7 if ($HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
1516     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln}) {
1517     $content = 'inline';
1518     } else {
1519 wakaba 1.6 $not_allowed = 1;
1520 wakaba 1.7 }
1521 wakaba 1.1 }
1522 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
1523     if $not_allowed;
1524 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1525     unshift @nodes, @$sib;
1526 wakaba 1.4 push @$new_todos, @$ch;
1527 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1528     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1529     if ($content eq 'li') {
1530 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1531 wakaba 1.1 } elsif ($content eq 'li or inline') {
1532     $content = 'inline';
1533     }
1534     }
1535     } elsif ($nt == 5) {
1536     unshift @nodes, @{$node->child_nodes};
1537     }
1538     }
1539 wakaba 1.4
1540     for (@$new_todos) {
1541     $_->{inline} = 1;
1542     }
1543     return ($new_todos);
1544 wakaba 1.1 },
1545     };
1546    
1547 wakaba 1.6 $Element->{$HTML_NS}->{legend} = {
1548     checker => sub {
1549     my ($self, $todo) = @_;
1550    
1551     my $parent = $todo->{node}->manakai_parent_element;
1552     if (defined $parent) {
1553     my $nsuri = $parent->namespace_uri;
1554     $nsuri = '' unless defined $nsuri;
1555     my $ln = $parent->manakai_local_name;
1556     if ($nsuri eq $HTML_NS and $ln eq 'figure') {
1557     return $HTMLInlineChecker->($self, $todo);
1558     } else {
1559     return $HTMLSignificantStrictlyInlineChecker->($self, $todo);
1560     }
1561     } else {
1562     return $HTMLInlineChecker->($self, $todo);
1563     }
1564    
1565     ## ISSUE: Content model is defined only for fieldset/legend,
1566     ## details/legend, and figure/legend.
1567     },
1568     };
1569 wakaba 1.1
1570     $Element->{$HTML_NS}->{div} = {
1571 wakaba 1.2 checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
1572 wakaba 1.1 };
1573    
1574     $Element->{$HTML_NS}->{font} = {
1575     checker => $HTMLTransparentChecker,
1576     };
1577    
1578     my $Attr = {
1579    
1580     };
1581    
1582 wakaba 1.2 sub new ($) {
1583     return bless {}, shift;
1584     } # new
1585    
1586 wakaba 1.1 sub check_element ($$$) {
1587     my ($self, $el, $onerror) = @_;
1588    
1589 wakaba 1.2 $self->{minuses} = {};
1590     $self->{onerror} = $onerror;
1591    
1592 wakaba 1.4 my @todo = ({type => 'element', node => $el});
1593     while (@todo) {
1594     my $todo = shift @todo;
1595     if ($todo->{type} eq 'element') {
1596     my $nsuri = $todo->{node}->namespace_uri;
1597     $nsuri = '' unless defined $nsuri;
1598     my $ln = $todo->{node}->manakai_local_name;
1599     my $eldef = $Element->{$nsuri}->{$ln} ||
1600     $Element->{$nsuri}->{''} ||
1601     $ElementDefault;
1602     my ($new_todos) = $eldef->{checker}->($self, $todo);
1603     push @todo, @$new_todos;
1604     } elsif ($todo->{type} eq 'plus') {
1605     $self->_remove_minuses ($todo);
1606     }
1607 wakaba 1.1 }
1608     } # check_element
1609    
1610 wakaba 1.2 sub _add_minuses ($@) {
1611     my $self = shift;
1612     my $r = {};
1613     for my $list (@_) {
1614     for my $ns (keys %$list) {
1615     for my $ln (keys %{$list->{$ns}}) {
1616     unless ($self->{minuses}->{$ns}->{$ln}) {
1617     $self->{minuses}->{$ns}->{$ln} = 1;
1618     $r->{$ns}->{$ln} = 1;
1619     }
1620     }
1621     }
1622     }
1623 wakaba 1.4 return {type => 'plus', list => $r};
1624 wakaba 1.2 } # _add_minuses
1625    
1626     sub _remove_minuses ($$) {
1627 wakaba 1.4 my ($self, $todo) = @_;
1628     for my $ns (keys %{$todo->{list}}) {
1629     for my $ln (keys %{$todo->{list}->{$ns}}) {
1630     delete $self->{minuses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
1631 wakaba 1.2 }
1632     }
1633     1;
1634     } # _remove_minuses
1635    
1636     sub _check_get_children ($$) {
1637     my ($self, $node) = @_;
1638 wakaba 1.4 my $new_todos = [];
1639 wakaba 1.2 my $sib = [];
1640     TP: {
1641     my $node_ns = $node->namespace_uri;
1642     $node_ns = '' unless defined $node_ns;
1643     my $node_ln = $node->manakai_local_name;
1644     if ($node_ns eq $HTML_NS) {
1645     if ($node_ln eq 'noscript') {
1646     my $end = $self->_add_minuses ({$HTML_NS, {noscript => 1}});
1647     push @$sib, $end;
1648     }
1649     }
1650 wakaba 1.7 if ($HTMLTransparentElements->{$node_ns}->{$node_ln}) {
1651     unshift @$sib, @{$node->child_nodes};
1652     last TP;
1653 wakaba 1.2 }
1654 wakaba 1.8 if ($node_ns eq $HTML_NS and ($node_ln eq 'video' or $node_ln eq 'audio')) {
1655 wakaba 1.2 if ($node->has_attribute_ns (undef, 'src')) {
1656     unshift @$sib, @{$node->child_nodes};
1657     last TP;
1658     } else {
1659     my @cn = @{$node->child_nodes};
1660     CN: while (@cn) {
1661     my $cn = shift @cn;
1662     my $cnt = $cn->node_type;
1663     if ($cnt == 1) {
1664 wakaba 1.8 my $cn_nsuri = $cn->namespace_uri;
1665     $cn_nsuri = '' unless defined $cn_nsuri;
1666     if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'source') {
1667 wakaba 1.2 #
1668     } else {
1669     last CN;
1670     }
1671     } elsif ($cnt == 3 or $cnt == 4) {
1672     if ($cn->data =~ /[^\x09-\x0D\x20]/) {
1673     last CN;
1674     }
1675     }
1676     } # CN
1677     unshift @$sib, @cn;
1678     }
1679     }
1680 wakaba 1.4 push @$new_todos, {type => 'element', node => $node};
1681 wakaba 1.2 } # TP
1682 wakaba 1.4 return ($sib, $new_todos);
1683 wakaba 1.2 } # _check_get_children
1684    
1685 wakaba 1.1 1;
1686 wakaba 1.8 # $Date: 2007/05/13 10:17:35 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24