/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (hide annotations) (download)
Sun May 13 05:35:22 2007 UTC (17 years, 5 months ago) by wakaba
Branch: MAIN
Changes since 1.2: +62 -50 lines
++ whatpm/t/ChangeLog	13 May 2007 05:35:20 -0000
2007-05-13  Wakaba  <wakaba@suika.fam.cx>

	* content-model-1.dat: New test data.

	* ContentChecker.t: New test.

++ whatpm/Whatpm/ChangeLog	13 May 2007 05:34:38 -0000
2007-05-13  Wakaba  <wakaba@suika.fam.cx>

	* ContentChecker.pm ($AnyChecker): Renamed
	from |$ElementDefault->{checker}|.
	($ElementDefault->{checker}): Throw an error that
	the element type is not supported by the checker.
	($HTMLMetadataElement): |html:base| was missing.
	($HTMLEmptyChecker): Don't throw an error
	for inter-element whitespace nodes.
	(html:html checker): Errors were not
	thrown even if |html:head| and/or |html:body|
	children were missing.
	(html:head checker): An error was not
	thrown if <meta charset> appered after other
	elements.

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3    
4 wakaba 1.3 ## ANY
5     my $AnyChecker = sub {
6     my ($self, $el) = @_;
7     my $children = [];
8     my @nodes = (@{$el->child_nodes});
9     while (@nodes) {
10     my $node = shift @nodes;
11     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
12    
13     my $nt = $node->node_type;
14     if ($nt == 1) {
15     my $node_ns = $node->namespace_uri;
16     $node_ns = '' unless defined $node_ns;
17     my $node_ln = $node->manakai_local_name;
18     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
19     $self->{onerror}->(node => $node, type => 'element not allowed');
20     }
21     push @$children, $node;
22     } elsif ($nt == 5) {
23     unshift @nodes, @{$node->child_nodes};
24     }
25     }
26     return ($children);
27     }; # $AnyChecker
28    
29 wakaba 1.1 my $ElementDefault = {
30     checker => sub {
31 wakaba 1.2 my ($self, $el) = @_;
32 wakaba 1.3 $self->{onerror}->(node => $el, type => 'element not supported');
33     return $AnyChecker->($self, $el);
34 wakaba 1.1 },
35     };
36    
37     my $Element = {};
38    
39     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
40    
41     my $HTMLMetadataElements = [
42     [$HTML_NS, 'link'],
43     [$HTML_NS, 'meta'],
44     [$HTML_NS, 'style'],
45     [$HTML_NS, 'script'],
46     [$HTML_NS, 'event-source'],
47     [$HTML_NS, 'command'],
48 wakaba 1.3 [$HTML_NS, 'base'],
49 wakaba 1.1 [$HTML_NS, 'title'],
50     ];
51    
52 wakaba 1.2 my $HTMLSectioningElements = {
53     $HTML_NS => {qw/body 1 section 1 nav 1 article 1 blockquote 1 aside 1/},
54     };
55 wakaba 1.1
56     my $HTMLBlockLevelElements = [
57     [$HTML_NS, 'section'],
58     [$HTML_NS, 'nav'],
59     [$HTML_NS, 'article'],
60     [$HTML_NS, 'blockquote'],
61     [$HTML_NS, 'aside'],
62 wakaba 1.2 [$HTML_NS, 'h1'],
63     [$HTML_NS, 'h2'],
64     [$HTML_NS, 'h3'],
65     [$HTML_NS, 'h4'],
66     [$HTML_NS, 'h5'],
67     [$HTML_NS, 'h6'],
68 wakaba 1.1 [$HTML_NS, 'header'],
69     [$HTML_NS, 'footer'],
70     [$HTML_NS, 'address'],
71     [$HTML_NS, 'p'],
72     [$HTML_NS, 'hr'],
73     [$HTML_NS, 'dialog'],
74     [$HTML_NS, 'pre'],
75     [$HTML_NS, 'ol'],
76     [$HTML_NS, 'ul'],
77     [$HTML_NS, 'dl'],
78     [$HTML_NS, 'ins'],
79     [$HTML_NS, 'del'],
80     [$HTML_NS, 'figure'],
81     [$HTML_NS, 'map'],
82     [$HTML_NS, 'table'],
83     [$HTML_NS, 'script'],
84     [$HTML_NS, 'noscript'],
85     [$HTML_NS, 'event-source'],
86     [$HTML_NS, 'details'],
87     [$HTML_NS, 'datagrid'],
88     [$HTML_NS, 'menu'],
89     [$HTML_NS, 'div'],
90     [$HTML_NS, 'font'],
91     ];
92    
93     my $HTMLStrictlyInlineLevelElements = [
94     [$HTML_NS, 'br'],
95     [$HTML_NS, 'a'],
96     [$HTML_NS, 'q'],
97     [$HTML_NS, 'cite'],
98     [$HTML_NS, 'em'],
99     [$HTML_NS, 'strong'],
100     [$HTML_NS, 'small'],
101     [$HTML_NS, 'm'],
102     [$HTML_NS, 'dfn'],
103     [$HTML_NS, 'abbr'],
104     [$HTML_NS, 'time'],
105     [$HTML_NS, 'meter'],
106     [$HTML_NS, 'progress'],
107     [$HTML_NS, 'code'],
108     [$HTML_NS, 'var'],
109     [$HTML_NS, 'samp'],
110     [$HTML_NS, 'kbd'],
111     [$HTML_NS, 'sub'],
112     [$HTML_NS, 'sup'],
113     [$HTML_NS, 'span'],
114     [$HTML_NS, 'i'],
115     [$HTML_NS, 'b'],
116     [$HTML_NS, 'bdo'],
117     [$HTML_NS, 'ins'],
118     [$HTML_NS, 'del'],
119     [$HTML_NS, 'img'],
120     [$HTML_NS, 'iframe'],
121     [$HTML_NS, 'embed'],
122     [$HTML_NS, 'object'],
123     [$HTML_NS, 'video'],
124     [$HTML_NS, 'audio'],
125     [$HTML_NS, 'canvas'],
126     [$HTML_NS, 'area'],
127     [$HTML_NS, 'script'],
128     [$HTML_NS, 'noscript'],
129     [$HTML_NS, 'event-source'],
130     [$HTML_NS, 'command'],
131     [$HTML_NS, 'font'],
132     ];
133    
134     my $HTMLStructuredInlineLevelElements = [
135     [$HTML_NS, 'blockquote'],
136     [$HTML_NS, 'pre'],
137     [$HTML_NS, 'ol'],
138     [$HTML_NS, 'ul'],
139     [$HTML_NS, 'dl'],
140     [$HTML_NS, 'table'],
141     [$HTML_NS, 'menu'],
142     ];
143    
144     my $HTMLInteractiveElements = [
145     [$HTML_NS, 'a'],
146     [$HTML_NS, 'details'],
147     [$HTML_NS, 'datagrid'],
148     ];
149    
150     my $HTMLTransparentElements = [
151     [$HTML_NS, 'ins'],
152     [$HTML_NS, 'font'],
153 wakaba 1.2 [$HTML_NS, 'noscript'], ## NOTE: If scripting is disabled.
154 wakaba 1.1 ];
155    
156 wakaba 1.2 #my $HTMLSemiTransparentElements = [
157     # [$HTML_NS, 'video'],
158     # [$HTML_NS, 'audio'],
159     #];
160 wakaba 1.1
161     my $HTMLEmbededElements = [
162     [$HTML_NS, 'img'],
163     [$HTML_NS, 'iframe'],
164     [$HTML_NS, 'embed'],
165     [$HTML_NS, 'object'],
166     [$HTML_NS, 'video'],
167     [$HTML_NS, 'audio'],
168     [$HTML_NS, 'canvas'],
169     ];
170    
171     ## Empty
172     my $HTMLEmptyChecker = sub {
173 wakaba 1.2 my ($self, $el) = @_;
174 wakaba 1.1 my $children = [];
175     my @nodes = (@{$el->child_nodes});
176    
177     while (@nodes) {
178     my $node = shift @nodes;
179 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
180    
181 wakaba 1.1 my $nt = $node->node_type;
182     if ($nt == 1) {
183 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
184     $self->{onerror}->(node => $node, type => 'element not allowed');
185     my ($sib, $ch) = $self->_check_get_children ($node);
186     unshift @nodes, @$sib;
187     push @$children, @$ch;
188 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
189 wakaba 1.3 if ($node->data =~ /[^\x09-\x0D\x20]/) {
190     $self->{onerror}->(node => $node, type => 'character not allowed');
191     }
192 wakaba 1.1 } elsif ($nt == 5) {
193     unshift @nodes, @{$node->child_nodes};
194     }
195     }
196     return ($children);
197     };
198    
199     ## Text
200     my $HTMLTextChecker = sub {
201 wakaba 1.2 my ($self, $el) = @_;
202 wakaba 1.1 my $children = [];
203     my @nodes = (@{$el->child_nodes});
204    
205     while (@nodes) {
206     my $node = shift @nodes;
207 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
208    
209 wakaba 1.1 my $nt = $node->node_type;
210     if ($nt == 1) {
211 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
212     $self->{onerror}->(node => $node, type => 'element not allowed');
213     my ($sib, $ch) = $self->_check_get_children ($node);
214     unshift @nodes, @$sib;
215     push @$children, @$ch;
216 wakaba 1.1 } elsif ($nt == 5) {
217     unshift @nodes, @{$node->child_nodes};
218     }
219     }
220     return ($children);
221     };
222    
223     ## Zero or more |html:style| elements,
224     ## followed by zero or more block-level elements
225     my $HTMLStylableBlockChecker = sub {
226 wakaba 1.2 my ($self, $el) = @_;
227 wakaba 1.1 my $children = [];
228     my @nodes = (@{$el->child_nodes});
229    
230     my $has_non_style;
231     while (@nodes) {
232     my $node = shift @nodes;
233 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
234    
235 wakaba 1.1 my $nt = $node->node_type;
236     if ($nt == 1) {
237 wakaba 1.2 my $node_ns = $node->namespace_uri;
238     $node_ns = '' unless defined $node_ns;
239     my $node_ln = $node->manakai_local_name;
240     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
241     $self->{onerror}->(node => $node, type => 'element not allowed');
242     }
243 wakaba 1.1 if ($node->manakai_element_type_match ($HTML_NS, 'style')) {
244     if ($has_non_style) {
245 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
246 wakaba 1.1 }
247     } else {
248     $has_non_style = 1;
249     CHK: {
250     for (@{$HTMLBlockLevelElements}) {
251     if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
252     last CHK;
253     }
254     }
255 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
256 wakaba 1.1 } # CHK
257     }
258 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
259     unshift @nodes, @$sib;
260     push @$children, @$ch;
261 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
262     if ($node->data =~ /[^\x09-\x0D\x20]/) {
263 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
264 wakaba 1.1 }
265     } elsif ($nt == 5) {
266     unshift @nodes, @{$node->child_nodes};
267     }
268     }
269     return ($children);
270     }; # $HTMLStylableBlockChecker
271    
272     ## Zero or more block-level elements
273     my $HTMLBlockChecker = sub {
274 wakaba 1.2 my ($self, $el) = @_;
275 wakaba 1.1 my $children = [];
276     my @nodes = (@{$el->child_nodes});
277    
278     while (@nodes) {
279     my $node = shift @nodes;
280 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
281    
282 wakaba 1.1 my $nt = $node->node_type;
283     if ($nt == 1) {
284 wakaba 1.2 my $node_ns = $node->namespace_uri;
285     $node_ns = '' unless defined $node_ns;
286     my $node_ln = $node->manakai_local_name;
287     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
288     $self->{onerror}->(node => $node, type => 'element not allowed');
289     }
290 wakaba 1.1 CHK: {
291     for (@{$HTMLBlockLevelElements}) {
292     if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
293     last CHK;
294     }
295     }
296 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
297 wakaba 1.1 } # CHK
298 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
299     unshift @nodes, @$sib;
300     push @$children, @$ch;
301 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
302     if ($node->data =~ /[^\x09-\x0D\x20]/) {
303 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
304 wakaba 1.1 }
305     } elsif ($nt == 5) {
306     unshift @nodes, @{$node->child_nodes};
307     }
308     }
309     return ($children);
310     }; # $HTMLBlockChecker
311    
312     ## Inline-level content
313     my $HTMLInlineChecker = sub {
314 wakaba 1.2 my ($self, $el) = @_;
315 wakaba 1.1 my $children = [];
316     my @nodes = (@{$el->child_nodes});
317    
318     while (@nodes) {
319     my $node = shift @nodes;
320 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
321    
322 wakaba 1.1 my $nt = $node->node_type;
323     if ($nt == 1) {
324 wakaba 1.2 my $node_ns = $node->namespace_uri;
325     $node_ns = '' unless defined $node_ns;
326     my $node_ln = $node->manakai_local_name;
327     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
328     $self->{onerror}->(node => $node, type => 'element not allowed');
329     }
330 wakaba 1.1 CHK: {
331     for (@{$HTMLStrictlyInlineLevelElements},
332     @{$HTMLStructuredInlineLevelElements}) {
333     if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
334     last CHK;
335     }
336     }
337 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
338 wakaba 1.1 } # CHK
339 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
340     unshift @nodes, @$sib;
341     push @$children, @$ch;
342 wakaba 1.1 } elsif ($nt == 5) {
343     unshift @nodes, @{$node->child_nodes};
344     }
345     }
346     return ($children);
347     }; # $HTMLStrictlyInlineChecker
348    
349     my $HTMLSignificantInlineChecker = $HTMLInlineChecker;
350     ## TODO: check significant content
351    
352     ## Strictly inline-level content
353     my $HTMLStrictlyInlineChecker = sub {
354 wakaba 1.2 my ($self, $el) = @_;
355 wakaba 1.1 my $children = [];
356     my @nodes = (@{$el->child_nodes});
357    
358     while (@nodes) {
359     my $node = shift @nodes;
360 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
361    
362 wakaba 1.1 my $nt = $node->node_type;
363     if ($nt == 1) {
364 wakaba 1.2 my $node_ns = $node->namespace_uri;
365     $node_ns = '' unless defined $node_ns;
366     my $node_ln = $node->manakai_local_name;
367     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
368     $self->{onerror}->(node => $node, type => 'element not allowed');
369     }
370 wakaba 1.1 CHK: {
371     for (@{$HTMLStrictlyInlineLevelElements}) {
372     if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
373     last CHK;
374     }
375     }
376 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
377 wakaba 1.1 } # CHK
378 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
379     unshift @nodes, @$sib;
380     push @$children, @$ch;
381 wakaba 1.1 } elsif ($nt == 5) {
382     unshift @nodes, @{$node->child_nodes};
383     }
384     }
385     return ($children);
386     }; # $HTMLStrictlyInlineChecker
387    
388     my $HTMLSignificantStrictlyInlineChecker = $HTMLStrictlyInlineChecker;
389     ## TODO: check significant content
390    
391     my $HTMLBlockOrInlineChecker = sub {
392 wakaba 1.2 my ($self, $el) = @_;
393 wakaba 1.1 my $children = [];
394     my @nodes = (@{$el->child_nodes});
395    
396     my $content = 'block-or-inline'; # or 'block' or 'inline'
397     my @block_not_inline;
398     while (@nodes) {
399     my $node = shift @nodes;
400 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
401    
402 wakaba 1.1 my $nt = $node->node_type;
403     if ($nt == 1) {
404 wakaba 1.2 my $node_ns = $node->namespace_uri;
405     $node_ns = '' unless defined $node_ns;
406     my $node_ln = $node->manakai_local_name;
407     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
408     $self->{onerror}->(node => $node, type => 'element not allowed');
409     }
410 wakaba 1.1 if ($content eq 'block') {
411     CHK: {
412     for (@{$HTMLBlockLevelElements}) {
413     if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
414     last CHK;
415     }
416     }
417 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
418 wakaba 1.1 } # CHK
419     } elsif ($content eq 'inline') {
420     CHK: {
421     for (@{$HTMLStrictlyInlineLevelElements},
422     @{$HTMLStructuredInlineLevelElements}) {
423     if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
424     last CHK;
425     }
426     }
427 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
428 wakaba 1.1 } # CHK
429     } else {
430     my $is_block;
431     my $is_inline;
432     for (@{$HTMLBlockLevelElements}) {
433     if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
434     $is_block = 1;
435     last;
436     }
437     }
438    
439     for (@{$HTMLStrictlyInlineLevelElements},
440     @{$HTMLStructuredInlineLevelElements}) {
441     if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
442     $is_inline = 1;
443     last;
444     }
445     }
446    
447     push @block_not_inline, $node if $is_block and not $is_inline;
448     unless ($is_block) {
449     $content = 'inline';
450     for (@block_not_inline) {
451 wakaba 1.2 $self->{onerror}->(node => $_, type => 'element not allowed');
452 wakaba 1.1 }
453     unless ($is_inline) {
454 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
455 wakaba 1.1 }
456     }
457     }
458 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
459     unshift @nodes, @$sib;
460     push @$children, @$ch;
461 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
462     if ($node->data =~ /[^\x09-\x0D\x20]/) {
463     if ($content eq 'block') {
464 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
465 wakaba 1.1 } else {
466     $content = 'inline';
467     for (@block_not_inline) {
468 wakaba 1.2 $self->{onerror}->(node => $_, type => 'element not allowed');
469 wakaba 1.1 }
470     }
471     }
472     } elsif ($nt == 5) {
473     unshift @nodes, @{$node->child_nodes};
474     }
475     }
476     return ($children);
477     };
478    
479 wakaba 1.2 ## Zero or more XXX element, then either block-level or inline-level
480     my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) {
481     my ($elnsuri, $ellname) = @_;
482     return sub {
483     my ($self, $el) = @_;
484     my $children = [];
485     my @nodes = (@{$el->child_nodes});
486    
487     my $has_non_style;
488     my $content = 'block-or-inline'; # or 'block' or 'inline'
489     my @block_not_inline;
490     while (@nodes) {
491     my $node = shift @nodes;
492     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
493    
494     my $nt = $node->node_type;
495     if ($nt == 1) {
496     my $node_ns = $node->namespace_uri;
497     $node_ns = '' unless defined $node_ns;
498     my $node_ln = $node->manakai_local_name;
499     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
500     $self->{onerror}->(node => $node, type => 'element not allowed');
501     }
502     if ($node->manakai_element_type_match ($elnsuri, $ellname)) {
503     if ($has_non_style) {
504     $self->{onerror}->(node => $node, type => 'element not allowed');
505     }
506     } elsif ($content eq 'block') {
507     $has_non_style = 1;
508     CHK: {
509     for (@{$HTMLBlockLevelElements}) {
510     if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
511     last CHK;
512     }
513     }
514     $self->{onerror}->(node => $node, type => 'element not allowed');
515     } # CHK
516     } elsif ($content eq 'inline') {
517     $has_non_style = 1;
518     CHK: {
519     for (@{$HTMLStrictlyInlineLevelElements},
520     @{$HTMLStructuredInlineLevelElements}) {
521     if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
522     last CHK;
523     }
524     }
525     $self->{onerror}->(node => $node, type => 'element not allowed');
526     } # CHK
527     } else {
528     $has_non_style = 1;
529     my $is_block;
530     my $is_inline;
531 wakaba 1.1 for (@{$HTMLBlockLevelElements}) {
532     if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
533 wakaba 1.2 $is_block = 1;
534     last;
535 wakaba 1.1 }
536     }
537 wakaba 1.2
538 wakaba 1.1 for (@{$HTMLStrictlyInlineLevelElements},
539     @{$HTMLStructuredInlineLevelElements}) {
540     if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
541 wakaba 1.2 $is_inline = 1;
542     last;
543 wakaba 1.1 }
544     }
545 wakaba 1.2
546     push @block_not_inline, $node if $is_block and not $is_inline;
547     unless ($is_block) {
548     $content = 'inline';
549     for (@block_not_inline) {
550     $self->{onerror}->(node => $_, type => 'element not allowed');
551     }
552     unless ($is_inline) {
553     $self->{onerror}->(node => $node, type => 'element not allowed');
554     }
555 wakaba 1.1 }
556     }
557 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
558     unshift @nodes, @$sib;
559     push @$children, @$ch;
560     } elsif ($nt == 3 or $nt == 4) {
561     if ($node->data =~ /[^\x09-\x0D\x20]/) {
562     $has_non_style = 1;
563     if ($content eq 'block') {
564     $self->{onerror}->(node => $node, type => 'character not allowed');
565     } else {
566     $content = 'inline';
567     for (@block_not_inline) {
568     $self->{onerror}->(node => $_, type => 'element not allowed');
569     }
570 wakaba 1.1 }
571     }
572 wakaba 1.2 } elsif ($nt == 5) {
573     unshift @nodes, @{$node->child_nodes};
574 wakaba 1.1 }
575     }
576 wakaba 1.2 return ($children);
577     };
578     }; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker
579 wakaba 1.1
580     my $HTMLTransparentChecker = $HTMLBlockOrInlineChecker;
581    
582     $Element->{$HTML_NS}->{html} = {
583     checker => sub {
584 wakaba 1.2 my ($self, $el) = @_;
585 wakaba 1.1 my $children = [];
586     my @nodes = (@{$el->child_nodes});
587    
588     my $phase = 'before head';
589     while (@nodes) {
590     my $node = shift @nodes;
591 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
592    
593 wakaba 1.1 my $nt = $node->node_type;
594     if ($nt == 1) {
595 wakaba 1.2 my $node_ns = $node->namespace_uri;
596     $node_ns = '' unless defined $node_ns;
597     my $node_ln = $node->manakai_local_name;
598     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
599     $self->{onerror}->(node => $node, type => 'element not allowed');
600     }
601 wakaba 1.1 if ($phase eq 'before head') {
602     if ($node->manakai_element_type_match ($HTML_NS, 'head')) {
603     $phase = 'after head';
604     } elsif ($node->manakai_element_type_match ($HTML_NS, 'body')) {
605 wakaba 1.2 $self->{onerror}
606 wakaba 1.3 ->(node => $node, type => 'ps element missing:head');
607 wakaba 1.1 $phase = 'after body';
608     } else {
609 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
610 wakaba 1.1 # before head
611     }
612     } elsif ($phase eq 'after head') {
613     if ($node->manakai_element_type_match ($HTML_NS, 'body')) {
614     $phase = 'after body';
615     } else {
616 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
617 wakaba 1.1 # after head
618     }
619     } else { #elsif ($phase eq 'after body') {
620 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
621 wakaba 1.1 # after body
622     }
623 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
624     unshift @nodes, @$sib;
625     push @$children, @$ch;
626 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
627     if ($node->data =~ /[^\x09-\x0D\x20]/) {
628 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
629 wakaba 1.1 }
630     } elsif ($nt == 5) {
631     unshift @nodes, @{$node->child_nodes};
632     }
633     }
634 wakaba 1.3
635     if ($phase eq 'before head') {
636     $self->{onerror}->(node => $el, type => 'child element missing:head');
637     $self->{onerror}->(node => $el, type => 'child element missing:body');
638     } elsif ($phase eq 'after head') {
639     $self->{onerror}->(node => $el, type => 'child element missing:body');
640     }
641    
642 wakaba 1.1 return ($children);
643     },
644     };
645    
646     $Element->{$HTML_NS}->{head} = {
647     checker => sub {
648 wakaba 1.2 my ($self, $el) = @_;
649 wakaba 1.1 my $children = [];
650     my @nodes = (@{$el->child_nodes});
651    
652     my $has_title;
653 wakaba 1.3 my $phase = 'initial'; # 'after charset', 'after base'
654 wakaba 1.1 while (@nodes) {
655     my $node = shift @nodes;
656 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
657    
658 wakaba 1.1 my $nt = $node->node_type;
659     if ($nt == 1) {
660 wakaba 1.2 my $node_ns = $node->namespace_uri;
661     $node_ns = '' unless defined $node_ns;
662     my $node_ln = $node->manakai_local_name;
663     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
664     $self->{onerror}->(node => $node, type => 'element not allowed');
665     }
666 wakaba 1.1 if ($node->manakai_element_type_match ($HTML_NS, 'title')) {
667 wakaba 1.3 $phase = 'after base';
668 wakaba 1.1 unless ($has_title) {
669     $has_title = 1;
670     } else {
671 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
672 wakaba 1.1 }
673     } elsif ($node->manakai_element_type_match ($HTML_NS, 'meta')) {
674     if ($node->has_attribute_ns (undef, 'charset')) {
675 wakaba 1.3 if ($phase eq 'initial') {
676     $phase = 'after charset';
677 wakaba 1.1 } else {
678 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
679 wakaba 1.3 ## NOTE: See also |base|'s "contexts" field in the spec
680 wakaba 1.1 }
681     } else {
682 wakaba 1.3 $phase = 'after base';
683 wakaba 1.1 }
684     } elsif ($node->manakai_element_type_match ($HTML_NS, 'base')) {
685 wakaba 1.3 if ($phase eq 'initial' or $phase eq 'after charset') {
686     $phase = 'after base';
687 wakaba 1.1 } else {
688 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
689 wakaba 1.1 }
690     } else {
691 wakaba 1.3 $phase = 'after base';
692 wakaba 1.1 CHK: {
693     for (@{$HTMLMetadataElements}) {
694     if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
695     last CHK;
696     }
697     }
698 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
699 wakaba 1.1 } # CHK
700     }
701 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
702     unshift @nodes, @$sib;
703     push @$children, @$ch;
704 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
705     if ($node->data =~ /[^\x09-\x0D\x20]/) {
706 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
707 wakaba 1.1 }
708     } elsif ($nt == 5) {
709     unshift @nodes, @{$node->child_nodes};
710     }
711     }
712     unless ($has_title) {
713 wakaba 1.3 $self->{onerror}->(node => $el, type => 'child element missing:title');
714 wakaba 1.1 }
715     return ($children);
716     },
717     };
718    
719     $Element->{$HTML_NS}->{title} = {
720     checker => $HTMLTextChecker,
721     };
722    
723     $Element->{$HTML_NS}->{base} = {
724     checker => $HTMLEmptyChecker,
725     };
726    
727     $Element->{$HTML_NS}->{link} = {
728     checker => $HTMLEmptyChecker,
729     };
730    
731     $Element->{$HTML_NS}->{meta} = {
732     checker => $HTMLEmptyChecker,
733     };
734    
735     ## NOTE: |html:style| has no conformance creteria on content model
736 wakaba 1.3 $Element->{$HTML_NS}->{style} = {
737     checker => $AnyChecker,
738     };
739 wakaba 1.1
740     $Element->{$HTML_NS}->{body} = {
741     checker => $HTMLBlockChecker,
742     };
743    
744     $Element->{$HTML_NS}->{section} = {
745     checker => $HTMLStylableBlockChecker,
746     };
747    
748     $Element->{$HTML_NS}->{nav} = {
749     checker => $HTMLBlockOrInlineChecker,
750     };
751    
752     $Element->{$HTML_NS}->{article} = {
753     checker => $HTMLStylableBlockChecker,
754     };
755    
756     $Element->{$HTML_NS}->{blockquote} = {
757     checker => $HTMLBlockChecker,
758     };
759    
760     $Element->{$HTML_NS}->{aside} = {
761 wakaba 1.2 checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
762 wakaba 1.1 };
763    
764     $Element->{$HTML_NS}->{h1} = {
765     checker => $HTMLSignificantStrictlyInlineChecker,
766     };
767    
768     $Element->{$HTML_NS}->{h2} = {
769     checker => $HTMLSignificantStrictlyInlineChecker,
770     };
771    
772     $Element->{$HTML_NS}->{h3} = {
773     checker => $HTMLSignificantStrictlyInlineChecker,
774     };
775    
776     $Element->{$HTML_NS}->{h4} = {
777     checker => $HTMLSignificantStrictlyInlineChecker,
778     };
779    
780     $Element->{$HTML_NS}->{h5} = {
781     checker => $HTMLSignificantStrictlyInlineChecker,
782     };
783    
784     $Element->{$HTML_NS}->{h6} = {
785     checker => $HTMLSignificantStrictlyInlineChecker,
786     };
787    
788     ## TODO: header
789    
790 wakaba 1.2 $Element->{$HTML_NS}->{footer} = {
791     checker => sub { ## block -hn -header -footer -sectioning or inline
792     my ($self, $el) = @_;
793     my $children = [];
794     my @nodes = (@{$el->child_nodes});
795    
796     my $content = 'block-or-inline'; # or 'block' or 'inline'
797     my @block_not_inline;
798     while (@nodes) {
799     my $node = shift @nodes;
800     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
801    
802     my $nt = $node->node_type;
803     if ($nt == 1) {
804     my $node_ns = $node->namespace_uri;
805     $node_ns = '' unless defined $node_ns;
806     my $node_ln = $node->manakai_local_name;
807     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
808     $self->{onerror}->(node => $node, type => 'element not allowed');
809     } elsif ($node_ns eq $HTML_NS and
810     {
811     qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1/
812     }->{$node_ln}) {
813     $self->{onerror}->(node => $node, type => 'element not allowed');
814     } elsif ($HTMLSectioningElements->{$node_ns}->{$node_ln}) {
815     $self->{onerror}->(node => $node, type => 'element not allowed');
816     }
817     if ($content eq 'block') {
818     CHK: {
819     for (@{$HTMLBlockLevelElements}) {
820     if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
821     last CHK;
822     }
823     }
824     $self->{onerror}->(node => $node, type => 'element not allowed');
825     } # CHK
826     } elsif ($content eq 'inline') {
827     CHK: {
828     for (@{$HTMLStrictlyInlineLevelElements},
829     @{$HTMLStructuredInlineLevelElements}) {
830     if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
831     last CHK;
832     }
833     }
834     $self->{onerror}->(node => $node, type => 'element not allowed');
835     } # CHK
836     } else {
837     my $is_block;
838     my $is_inline;
839     for (@{$HTMLBlockLevelElements}) {
840     if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
841     $is_block = 1;
842     last;
843     }
844     }
845    
846     for (@{$HTMLStrictlyInlineLevelElements},
847     @{$HTMLStructuredInlineLevelElements}) {
848     if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
849     $is_inline = 1;
850     last;
851     }
852     }
853    
854     push @block_not_inline, $node if $is_block and not $is_inline;
855     unless ($is_block) {
856     $content = 'inline';
857     for (@block_not_inline) {
858     $self->{onerror}->(node => $_, type => 'element not allowed');
859     }
860     unless ($is_inline) {
861     $self->{onerror}->(node => $node, type => 'element not allowed');
862     }
863     }
864     }
865     my ($sib, $ch) = $self->_check_get_children ($node);
866     unshift @nodes, @$sib;
867     push @$children, @$ch;
868     } elsif ($nt == 3 or $nt == 4) {
869     if ($node->data =~ /[^\x09-\x0D\x20]/) {
870     if ($content eq 'block') {
871     $self->{onerror}->(node => $node, type => 'character not allowed');
872     } else {
873     $content = 'inline';
874     for (@block_not_inline) {
875     $self->{onerror}->(node => $_, type => 'element not allowed');
876     }
877     }
878     }
879     } elsif ($nt == 5) {
880     unshift @nodes, @{$node->child_nodes};
881     }
882     }
883    
884     my $end = $self->_add_minuses
885     ({$HTML_NS => {qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1/}},
886     $HTMLSectioningElements);
887     push @$children, $end;
888    
889     return ($children);
890     },
891     };
892 wakaba 1.1
893     $Element->{$HTML_NS}->{address} = {
894     checker => $HTMLInlineChecker,
895     };
896    
897     $Element->{$HTML_NS}->{p} = {
898     checker => $HTMLSignificantInlineChecker,
899     };
900    
901     $Element->{$HTML_NS}->{hr} = {
902     checker => $HTMLEmptyChecker,
903     };
904    
905     $Element->{$HTML_NS}->{br} = {
906     checker => $HTMLEmptyChecker,
907     };
908    
909     $Element->{$HTML_NS}->{dialog} = {
910     checker => sub {
911 wakaba 1.2 my ($self, $el) = @_;
912 wakaba 1.1 my $children = [];
913     my @nodes = (@{$el->child_nodes});
914    
915     my $phase = 'before dt';
916     while (@nodes) {
917     my $node = shift @nodes;
918 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
919    
920 wakaba 1.1 my $nt = $node->node_type;
921     if ($nt == 1) {
922 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
923 wakaba 1.1 if ($phase eq 'before dt') {
924     if ($node->manakai_element_type_match ($HTML_NS, 'dt')) {
925     $phase = 'before dd';
926     } elsif ($node->manakai_element_type_match ($HTML_NS, 'dd')) {
927 wakaba 1.2 $self->{onerror}
928 wakaba 1.3 ->(node => $node, type => 'ps element missing:dt');
929 wakaba 1.1 $phase = 'before dt';
930     } else {
931 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
932 wakaba 1.1 }
933     } else { # before dd
934     if ($node->manakai_element_type_match ($HTML_NS, 'dd')) {
935     $phase = 'before dt';
936     } elsif ($node->manakai_element_type_match ($HTML_NS, 'dt')) {
937 wakaba 1.2 $self->{onerror}
938 wakaba 1.3 ->(node => $node, type => 'ps element missing:dd');
939 wakaba 1.1 $phase = 'before dd';
940     } else {
941 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
942 wakaba 1.1 }
943     }
944 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
945     unshift @nodes, @$sib;
946     push @$children, @$ch;
947 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
948     if ($node->data =~ /[^\x09-\x0D\x20]/) {
949 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
950 wakaba 1.1 }
951     } elsif ($nt == 5) {
952     unshift @nodes, @{$node->child_nodes};
953     }
954     }
955     if ($phase eq 'before dd') {
956 wakaba 1.3 $self->{onerror}->(node => $el, type => 'ps element missing:dd');
957 wakaba 1.1 }
958     return ($children);
959     },
960     };
961    
962     $Element->{$HTML_NS}->{pre} = {
963     checker => $HTMLStrictlyInlineChecker,
964     };
965    
966     $Element->{$HTML_NS}->{ol} = {
967     checker => sub {
968 wakaba 1.2 my ($self, $el) = @_;
969 wakaba 1.1 my $children = [];
970     my @nodes = (@{$el->child_nodes});
971    
972     while (@nodes) {
973     my $node = shift @nodes;
974 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
975    
976 wakaba 1.1 my $nt = $node->node_type;
977     if ($nt == 1) {
978 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
979 wakaba 1.1 unless ($node->manakai_element_type_match ($HTML_NS, 'li')) {
980 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
981 wakaba 1.1 }
982 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
983     unshift @nodes, @$sib;
984     push @$children, @$ch;
985 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
986     if ($node->data =~ /[^\x09-\x0D\x20]/) {
987 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
988 wakaba 1.1 }
989     } elsif ($nt == 5) {
990     unshift @nodes, @{$node->child_nodes};
991     }
992     }
993     return ($children);
994     },
995     };
996    
997     $Element->{$HTML_NS}->{ul} = {
998     checker => $Element->{$HTML_NS}->{ol}->{checker},
999     };
1000    
1001     ## TODO: li
1002    
1003     $Element->{$HTML_NS}->{dl} = {
1004     checker => sub {
1005 wakaba 1.2 my ($self, $el) = @_;
1006 wakaba 1.1 my $children = [];
1007     my @nodes = (@{$el->child_nodes});
1008    
1009     my $phase = 'before dt';
1010     while (@nodes) {
1011     my $node = shift @nodes;
1012 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1013    
1014 wakaba 1.1 my $nt = $node->node_type;
1015     if ($nt == 1) {
1016 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1017 wakaba 1.1 if ($phase eq 'in dds') {
1018     if ($node->manakai_element_type_match ($HTML_NS, 'dd')) {
1019     #$phase = 'in dds';
1020     } elsif ($node->manakai_element_type_match ($HTML_NS, 'dt')) {
1021     $phase = 'in dts';
1022     } else {
1023 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1024 wakaba 1.1 }
1025     } elsif ($phase eq 'in dts') {
1026     if ($node->manakai_element_type_match ($HTML_NS, 'dt')) {
1027     #$phase = 'in dts';
1028     } elsif ($node->manakai_element_type_match ($HTML_NS, 'dd')) {
1029     $phase = 'in dds';
1030     } else {
1031 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1032 wakaba 1.1 }
1033     } else { # before dt
1034     if ($node->manakai_element_type_match ($HTML_NS, 'dt')) {
1035     $phase = 'in dts';
1036     } elsif ($node->manakai_element_type_match ($HTML_NS, 'dd')) {
1037 wakaba 1.2 $self->{onerror}
1038 wakaba 1.3 ->(node => $node, type => 'ps element missing:dt');
1039 wakaba 1.1 $phase = 'in dds';
1040     } else {
1041 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1042 wakaba 1.1 }
1043     }
1044 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1045     unshift @nodes, @$sib;
1046     push @$children, @$ch;
1047 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1048     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1049 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1050 wakaba 1.1 }
1051     } elsif ($nt == 5) {
1052     unshift @nodes, @{$node->child_nodes};
1053     }
1054     }
1055     if ($phase eq 'in dts') {
1056 wakaba 1.3 $self->{onerror}->(node => $el, type => 'ps element missing:dd');
1057 wakaba 1.1 }
1058     return ($children);
1059     },
1060     };
1061    
1062     $Element->{$HTML_NS}->{dt} = {
1063     checker => $HTMLStrictlyInlineChecker,
1064     };
1065    
1066     ## TODO: dd
1067    
1068     ## TODO: a
1069    
1070     ## TODO: q
1071    
1072     $Element->{$HTML_NS}->{cite} = {
1073     checker => $HTMLStrictlyInlineChecker,
1074     };
1075    
1076     ## TODO: em
1077    
1078     ## TODO: strong, small, m, dfn
1079    
1080     $Element->{$HTML_NS}->{abbr} = {
1081     checker => $HTMLStrictlyInlineChecker,
1082     };
1083    
1084     $Element->{$HTML_NS}->{time} = {
1085     checker => $HTMLStrictlyInlineChecker,
1086     };
1087    
1088     $Element->{$HTML_NS}->{meter} = {
1089     checker => $HTMLStrictlyInlineChecker,
1090     };
1091    
1092     $Element->{$HTML_NS}->{progress} = {
1093     checker => $HTMLStrictlyInlineChecker,
1094     };
1095    
1096     ## TODO: code
1097    
1098     $Element->{$HTML_NS}->{var} = {
1099     checker => $HTMLStrictlyInlineChecker,
1100     };
1101    
1102     ## TODO: samp
1103    
1104     $Element->{$HTML_NS}->{kbd} = {
1105     checker => $HTMLStrictlyInlineChecker,
1106     };
1107    
1108     $Element->{$HTML_NS}->{sub} = {
1109     checker => $HTMLStrictlyInlineChecker,
1110     };
1111    
1112     $Element->{$HTML_NS}->{sup} = {
1113     checker => $HTMLStrictlyInlineChecker,
1114     };
1115    
1116     ## TODO: span
1117    
1118     $Element->{$HTML_NS}->{i} = {
1119     checker => $HTMLStrictlyInlineChecker,
1120     };
1121    
1122     $Element->{$HTML_NS}->{b} = {
1123     checker => $HTMLStrictlyInlineChecker,
1124     };
1125    
1126     $Element->{$HTML_NS}->{bdo} = {
1127     checker => $HTMLStrictlyInlineChecker,
1128     };
1129    
1130     $Element->{$HTML_NS}->{ins} = {
1131     checker => $HTMLTransparentChecker,
1132     };
1133    
1134     $Element->{$HTML_NS}->{del} = {
1135     checker => sub {
1136 wakaba 1.2 my ($self, $el) = @_;
1137 wakaba 1.1
1138     my $parent = $el->manakai_parent_element;
1139     if (defined $parent) {
1140     my $nsuri = $parent->namespace_uri;
1141     $nsuri = '' unless defined $nsuri;
1142     my $ln = $parent->manakai_local_name;
1143     my $eldef = $Element->{$nsuri}->{$ln} ||
1144     $Element->{$nsuri}->{''} ||
1145     $ElementDefault;
1146 wakaba 1.2 return $eldef->{checker}->($self, $el);
1147 wakaba 1.1 } else {
1148 wakaba 1.2 return $HTMLBlockOrInlineChecker->($self, $el);
1149 wakaba 1.1 }
1150     },
1151     };
1152    
1153     ## TODO: figure
1154    
1155     $Element->{$HTML_NS}->{img} = {
1156     checker => $HTMLEmptyChecker,
1157     };
1158    
1159     $Element->{$HTML_NS}->{iframe} = {
1160     checker => $HTMLTextChecker,
1161     };
1162    
1163     $Element->{$HTML_NS}->{embed} = {
1164     checker => $HTMLEmptyChecker,
1165     };
1166    
1167     $Element->{$HTML_NS}->{param} = {
1168     checker => $HTMLEmptyChecker,
1169     };
1170    
1171     ## TODO: object
1172    
1173 wakaba 1.2 $Element->{$HTML_NS}->{video} = {
1174     checker => sub {
1175     my ($self, $el) = @_;
1176    
1177     if ($el->has_attribute_ns (undef, 'src')) {
1178     return $HTMLBlockOrInlineChecker->($self, $el);
1179     } else {
1180     return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
1181     ->($self, $el);
1182     }
1183     },
1184     };
1185    
1186     $Element->{$HTML_NS}->{audio} = {
1187     checker => $Element->{$HTML_NS}->{audio}->{checker},
1188     };
1189 wakaba 1.1
1190     $Element->{$HTML_NS}->{source} = {
1191     checker => $HTMLEmptyChecker,
1192     };
1193    
1194     $Element->{$HTML_NS}->{canvas} = {
1195     checker => $HTMLInlineChecker,
1196     };
1197    
1198     $Element->{$HTML_NS}->{map} = {
1199     checker => $HTMLBlockChecker,
1200     };
1201    
1202     $Element->{$HTML_NS}->{area} = {
1203     checker => $HTMLEmptyChecker,
1204     };
1205     ## TODO: only in map
1206    
1207     $Element->{$HTML_NS}->{table} = {
1208     checker => sub {
1209 wakaba 1.2 my ($self, $el) = @_;
1210 wakaba 1.1 my $children = [];
1211     my @nodes = (@{$el->child_nodes});
1212    
1213     my $phase = 'before caption';
1214     my $has_tfoot;
1215     while (@nodes) {
1216     my $node = shift @nodes;
1217 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1218    
1219 wakaba 1.1 my $nt = $node->node_type;
1220     if ($nt == 1) {
1221 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1222 wakaba 1.1 if ($phase eq 'in tbodys') {
1223     if ($node->manakai_element_type_match ($HTML_NS, 'tbody')) {
1224     #$phase = 'in tbodys';
1225     } elsif (not $has_tfoot and
1226     $node->manakai_element_type_match ($HTML_NS, 'tfoot')) {
1227     $phase = 'after tfoot';
1228     $has_tfoot = 1;
1229     } else {
1230 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1231 wakaba 1.1 }
1232     } elsif ($phase eq 'in trs') {
1233     if ($node->manakai_element_type_match ($HTML_NS, 'tr')) {
1234     #$phase = 'in trs';
1235     } elsif (not $has_tfoot and
1236     $node->manakai_element_type_match ($HTML_NS, 'tfoot')) {
1237     $phase = 'after tfoot';
1238     $has_tfoot = 1;
1239     } else {
1240 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1241 wakaba 1.1 }
1242     } elsif ($phase eq 'after thead') {
1243     if ($node->manakai_element_type_match ($HTML_NS, 'tbody')) {
1244     $phase = 'in tbodys';
1245     } elsif ($node->manakai_element_type_match ($HTML_NS, 'tr')) {
1246     $phase = 'in trs';
1247     } elsif ($node->manakai_element_type_match ($HTML_NS, 'tfoot')) {
1248     $phase = 'in tbodys';
1249     $has_tfoot = 1;
1250     } else {
1251 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1252 wakaba 1.1 }
1253     } elsif ($phase eq 'in colgroup') {
1254     if ($node->manakai_element_type_match ($HTML_NS, 'colgroup')) {
1255     $phase = 'in colgroup';
1256     } elsif ($node->manakai_element_type_match ($HTML_NS, 'thead')) {
1257     $phase = 'after thead';
1258     } elsif ($node->manakai_element_type_match ($HTML_NS, 'tbody')) {
1259     $phase = 'in tbodys';
1260     } elsif ($node->manakai_element_type_match ($HTML_NS, 'tr')) {
1261     $phase = 'in trs';
1262     } elsif ($node->manakai_element_type_match ($HTML_NS, 'tfoot')) {
1263     $phase = 'in tbodys';
1264     $has_tfoot = 1;
1265     } else {
1266 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1267 wakaba 1.1 }
1268     } elsif ($phase eq 'before caption') {
1269     if ($node->manakai_element_type_match ($HTML_NS, 'caption')) {
1270     $phase = 'in colgroup';
1271     } elsif ($node->manakai_element_type_match ($HTML_NS, 'colgroup')) {
1272     $phase = 'in colgroup';
1273     } elsif ($node->manakai_element_type_match ($HTML_NS, 'thead')) {
1274     $phase = 'after thead';
1275     } elsif ($node->manakai_element_type_match ($HTML_NS, 'tbody')) {
1276     $phase = 'in tbodys';
1277     } elsif ($node->manakai_element_type_match ($HTML_NS, 'tr')) {
1278     $phase = 'in trs';
1279     } elsif ($node->manakai_element_type_match ($HTML_NS, 'tfoot')) {
1280     $phase = 'in tbodys';
1281     $has_tfoot = 1;
1282     } else {
1283 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1284 wakaba 1.1 }
1285     } else { # after tfoot
1286 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1287 wakaba 1.1 }
1288 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1289     unshift @nodes, @$sib;
1290     push @$children, @$ch;
1291 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1292     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1293 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1294 wakaba 1.1 }
1295     } elsif ($nt == 5) {
1296     unshift @nodes, @{$node->child_nodes};
1297     }
1298     }
1299     return ($children);
1300     },
1301     };
1302    
1303     $Element->{$HTML_NS}->{caption} = {
1304     checker => $HTMLSignificantStrictlyInlineChecker,
1305     };
1306    
1307     $Element->{$HTML_NS}->{colgroup} = {
1308     checker => sub {
1309 wakaba 1.2 my ($self, $el) = @_;
1310 wakaba 1.1 my $children = [];
1311     my @nodes = (@{$el->child_nodes});
1312    
1313     while (@nodes) {
1314     my $node = shift @nodes;
1315 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1316    
1317 wakaba 1.1 my $nt = $node->node_type;
1318     if ($nt == 1) {
1319 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1320 wakaba 1.1 unless ($node->manakai_element_type_match ($HTML_NS, 'col')) {
1321 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1322 wakaba 1.1 }
1323 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1324     unshift @nodes, @$sib;
1325     push @$children, @$ch;
1326 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1327     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1328 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1329 wakaba 1.1 }
1330     } elsif ($nt == 5) {
1331     unshift @nodes, @{$node->child_nodes};
1332     }
1333     }
1334     return ($children);
1335     },
1336     };
1337    
1338     $Element->{$HTML_NS}->{col} = {
1339     checker => $HTMLEmptyChecker,
1340     };
1341    
1342     $Element->{$HTML_NS}->{tbody} = {
1343     checker => sub {
1344 wakaba 1.2 my ($self, $el) = @_;
1345 wakaba 1.1 my $children = [];
1346     my @nodes = (@{$el->child_nodes});
1347    
1348     my $has_tr;
1349     while (@nodes) {
1350     my $node = shift @nodes;
1351 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1352    
1353 wakaba 1.1 my $nt = $node->node_type;
1354     if ($nt == 1) {
1355 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1356 wakaba 1.1 if ($node->manakai_element_type_match ($HTML_NS, 'tr')) {
1357     $has_tr = 1;
1358     } else {
1359 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1360 wakaba 1.1 }
1361 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1362     unshift @nodes, @$sib;
1363     push @$children, @$ch;
1364 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1365     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1366 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1367 wakaba 1.1 }
1368     } elsif ($nt == 5) {
1369     unshift @nodes, @{$node->child_nodes};
1370     }
1371     }
1372     unless ($has_tr) {
1373 wakaba 1.3 $self->{onerror}->(node => $el, type => 'child element missing:tr');
1374 wakaba 1.1 }
1375     return ($children);
1376     },
1377     };
1378    
1379     $Element->{$HTML_NS}->{thead} = {
1380     checker => $Element->{$HTML_NS}->{tbody},
1381     };
1382    
1383     $Element->{$HTML_NS}->{tfoot} = {
1384     checker => $Element->{$HTML_NS}->{tbody},
1385     };
1386    
1387     $Element->{$HTML_NS}->{tr} = {
1388     checker => sub {
1389 wakaba 1.2 my ($self, $el) = @_;
1390 wakaba 1.1 my $children = [];
1391     my @nodes = (@{$el->child_nodes});
1392    
1393     my $has_td;
1394     while (@nodes) {
1395     my $node = shift @nodes;
1396 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1397    
1398 wakaba 1.1 my $nt = $node->node_type;
1399     if ($nt == 1) {
1400 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1401 wakaba 1.1 if ($node->manakai_element_type_match ($HTML_NS, 'td') or
1402     $node->manakai_element_type_match ($HTML_NS, 'th')) {
1403     $has_td = 1;
1404     } else {
1405 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1406 wakaba 1.1 }
1407 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1408     unshift @nodes, @$sib;
1409     push @$children, @$ch;
1410 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1411     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1412 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1413 wakaba 1.1 }
1414     } elsif ($nt == 5) {
1415     unshift @nodes, @{$node->child_nodes};
1416     }
1417     }
1418     unless ($has_td) {
1419 wakaba 1.3 $self->{onerror}->(node => $el, type => 'child element missing:td|th');
1420 wakaba 1.1 }
1421     return ($children);
1422     },
1423     };
1424    
1425     $Element->{$HTML_NS}->{td} = {
1426     checker => $HTMLBlockOrInlineChecker,
1427     };
1428    
1429     $Element->{$HTML_NS}->{th} = {
1430     checker => $HTMLBlockOrInlineChecker,
1431     };
1432    
1433     ## TODO: forms
1434    
1435 wakaba 1.2 $Element->{$HTML_NS}->{script} = {
1436     checker => sub {
1437     my ($self, $el) = @_;
1438    
1439     if ($el->has_attribute_ns (undef, 'src')) {
1440     return $HTMLEmptyChecker->($self, $el);
1441     } else {
1442     ## NOTE: No content model conformance in HTML5 spec.
1443 wakaba 1.3 return $AnyChecker->($self, $el);
1444 wakaba 1.2 }
1445     },
1446     };
1447    
1448     ## NOTE: When script is disabled.
1449     $Element->{$HTML_NS}->{noscript} = {
1450     checker => sub {
1451     my ($self, $el) = @_;
1452 wakaba 1.1
1453 wakaba 1.2 my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}});
1454     my ($sib, $ch) = $HTMLBlockOrInlineChecker->($self, $el);
1455     push @$sib, $end;
1456     return ($sib, $ch);
1457     },
1458     };
1459 wakaba 1.1 ## TODO: noscript
1460    
1461     $Element->{$HTML_NS}->{'event-source'} = {
1462     checker => $HTMLEmptyChecker,
1463     };
1464    
1465     $Element->{$HTML_NS}->{details} = {
1466 wakaba 1.2 checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend'),
1467 wakaba 1.1 };
1468    
1469     $Element->{$HTML_NS}->{datagrid} = {
1470     checker => $HTMLBlockChecker,
1471     };
1472    
1473     $Element->{$HTML_NS}->{command} = {
1474     checker => $HTMLEmptyChecker,
1475     };
1476    
1477     $Element->{$HTML_NS}->{menu} = {
1478     checker => sub {
1479 wakaba 1.2 my ($self, $el) = @_;
1480 wakaba 1.1 my $children = [];
1481     my @nodes = (@{$el->child_nodes});
1482    
1483     my $content = 'li or inline';
1484     while (@nodes) {
1485     my $node = shift @nodes;
1486 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1487    
1488 wakaba 1.1 my $nt = $node->node_type;
1489     if ($nt == 1) {
1490 wakaba 1.2 my $node_ns = $node->namespace_uri;
1491     $node_ns = '' unless defined $node_ns;
1492     my $node_ln = $node->manakai_local_name;
1493     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
1494     $self->{onerror}->(node => $node, type => 'element not allowed');
1495     }
1496 wakaba 1.1 if ($node->manakai_element_type_match ($HTML_NS, 'li')) {
1497     if ($content eq 'inline') {
1498 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1499 wakaba 1.1 } elsif ($content eq 'li or inline') {
1500     $content = 'li';
1501     }
1502     } else {
1503     CHK: {
1504     for (@{$HTMLStrictlyInlineLevelElements},
1505     @{$HTMLStructuredInlineLevelElements}) {
1506     if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
1507     $content = 'inline';
1508     last CHK;
1509     }
1510     }
1511 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1512 wakaba 1.1 } # CHK
1513     }
1514 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1515     unshift @nodes, @$sib;
1516     push @$children, @$ch;
1517 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1518     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1519     if ($content eq 'li') {
1520 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1521 wakaba 1.1 } elsif ($content eq 'li or inline') {
1522     $content = 'inline';
1523     }
1524     }
1525     } elsif ($nt == 5) {
1526     unshift @nodes, @{$node->child_nodes};
1527     }
1528     }
1529     return ($children);
1530     },
1531     };
1532    
1533     ## TODO: legend
1534    
1535     $Element->{$HTML_NS}->{div} = {
1536 wakaba 1.2 checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
1537 wakaba 1.1 };
1538    
1539     $Element->{$HTML_NS}->{font} = {
1540     checker => $HTMLTransparentChecker,
1541     };
1542    
1543     my $Attr = {
1544    
1545     };
1546    
1547 wakaba 1.2 sub new ($) {
1548     return bless {}, shift;
1549     } # new
1550    
1551 wakaba 1.1 sub check_element ($$$) {
1552     my ($self, $el, $onerror) = @_;
1553    
1554 wakaba 1.2 $self->{minuses} = {};
1555     $self->{onerror} = $onerror;
1556    
1557 wakaba 1.1 my @nodes = ($el);
1558     while (@nodes) {
1559     my $node = shift @nodes;
1560 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1561    
1562 wakaba 1.1 my $nsuri = $node->namespace_uri;
1563     $nsuri = '' unless defined $nsuri;
1564     my $ln = $node->manakai_local_name;
1565     my $eldef = $Element->{$nsuri}->{$ln} ||
1566     $Element->{$nsuri}->{''} ||
1567     $ElementDefault;
1568 wakaba 1.2 my ($children) = $eldef->{checker}->($self, $node);
1569 wakaba 1.1 push @nodes, @$children;
1570     }
1571     } # check_element
1572    
1573 wakaba 1.2 sub _add_minuses ($@) {
1574     my $self = shift;
1575     my $r = {};
1576     for my $list (@_) {
1577     for my $ns (keys %$list) {
1578     for my $ln (keys %{$list->{$ns}}) {
1579     unless ($self->{minuses}->{$ns}->{$ln}) {
1580     $self->{minuses}->{$ns}->{$ln} = 1;
1581     $r->{$ns}->{$ln} = 1;
1582     }
1583     }
1584     }
1585     }
1586     return $r;
1587     } # _add_minuses
1588    
1589     sub _remove_minuses ($$) {
1590     my ($self, $list) = @_;
1591     for my $ns (keys %{$list}) {
1592     for my $ln (keys %{$list->{$ns}}) {
1593     delete $self->{minuses}->{$ns}->{$ln} if $list->{$ns}->{$ln};
1594     }
1595     }
1596     1;
1597     } # _remove_minuses
1598    
1599     sub _check_get_children ($$) {
1600     my ($self, $node) = @_;
1601     my $ch = [];
1602     my $sib = [];
1603     TP: {
1604     my $node_ns = $node->namespace_uri;
1605     $node_ns = '' unless defined $node_ns;
1606     my $node_ln = $node->manakai_local_name;
1607     if ($node_ns eq $HTML_NS) {
1608     if ($node_ln eq 'noscript') {
1609     my $end = $self->_add_minuses ({$HTML_NS, {noscript => 1}});
1610     push @$sib, $end;
1611     }
1612     }
1613     for (@{$HTMLTransparentElements}) {
1614     if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
1615     unshift @$sib, @{$node->child_nodes};
1616     last TP;
1617     }
1618     }
1619     if ($node->manakai_element_type_match ($HTML_NS, 'video') or
1620     $node->manakai_element_type_match ($HTML_NS, 'audio')) {
1621     if ($node->has_attribute_ns (undef, 'src')) {
1622     unshift @$sib, @{$node->child_nodes};
1623     last TP;
1624     } else {
1625     my @cn = @{$node->child_nodes};
1626     CN: while (@cn) {
1627     my $cn = shift @cn;
1628     my $cnt = $cn->node_type;
1629     if ($cnt == 1) {
1630     if ($cn->manakai_element_type_match ($HTML_NS, 'source')) {
1631     #
1632     } else {
1633     last CN;
1634     }
1635     } elsif ($cnt == 3 or $cnt == 4) {
1636     if ($cn->data =~ /[^\x09-\x0D\x20]/) {
1637     last CN;
1638     }
1639     }
1640     } # CN
1641     unshift @$sib, @cn;
1642     }
1643     }
1644     push @$ch, $node;
1645     } # TP
1646     return ($sib, $ch);
1647     } # _check_get_children
1648    
1649 wakaba 1.1 1;
1650 wakaba 1.3 # $Date: 2007/05/05 06:51:06 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24