/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (show annotations) (download)
Sun May 13 08:09:15 2007 UTC (17 years, 5 months ago) by wakaba
Branch: MAIN
Changes since 1.3: +276 -129 lines
++ whatpm/t/ChangeLog	13 May 2007 08:08:56 -0000
	* content-model-1.dat: Tests for |dd| content model are added.
	Tests for |em| content model (inline-level content
	or stricly inline-level content) are added.
	Tests for |dfn| content model are added.

2007-05-13  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	13 May 2007 08:07:15 -0000
	* ContentChecker.pm ($HTMLInlineOrStriclyInlineChecker): New
	checker.
	(html:dd checker): New checker.
	(html:q, html:em, html:strong, html:small,
	html:m, html:dfn, html:code, html:samp, html:span): New checkers.

2007-05-13  Wakaba  <wakaba@suika.fam.cx>

1 package Whatpm::ContentChecker;
2 use strict;
3
4 ## ANY
5 my $AnyChecker = sub {
6 my ($self, $todo) = @_;
7 my $el = $todo->{node};
8 my $new_todos = [];
9 my @nodes = (@{$el->child_nodes});
10 while (@nodes) {
11 my $node = shift @nodes;
12 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
13
14 my $nt = $node->node_type;
15 if ($nt == 1) {
16 my $node_ns = $node->namespace_uri;
17 $node_ns = '' unless defined $node_ns;
18 my $node_ln = $node->manakai_local_name;
19 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
20 $self->{onerror}->(node => $node, type => 'element not allowed');
21 }
22 push @$new_todos, {type => 'element', node => $node};
23 } elsif ($nt == 5) {
24 unshift @nodes, @{$node->child_nodes};
25 }
26 }
27 return ($new_todos);
28 }; # $AnyChecker
29
30 my $ElementDefault = {
31 checker => sub {
32 my ($self, $todo) = @_;
33 $self->{onerror}->(node => $todo->{node}, type => 'element not supported');
34 return $AnyChecker->($self, $todo);
35 },
36 };
37
38 my $Element = {};
39
40 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
41
42 my $HTMLMetadataElements = [
43 [$HTML_NS, 'link'],
44 [$HTML_NS, 'meta'],
45 [$HTML_NS, 'style'],
46 [$HTML_NS, 'script'],
47 [$HTML_NS, 'event-source'],
48 [$HTML_NS, 'command'],
49 [$HTML_NS, 'base'],
50 [$HTML_NS, 'title'],
51 ];
52
53 my $HTMLSectioningElements = {
54 $HTML_NS => {qw/body 1 section 1 nav 1 article 1 blockquote 1 aside 1/},
55 };
56
57 my $HTMLBlockLevelElements = [
58 [$HTML_NS, 'section'],
59 [$HTML_NS, 'nav'],
60 [$HTML_NS, 'article'],
61 [$HTML_NS, 'blockquote'],
62 [$HTML_NS, 'aside'],
63 [$HTML_NS, 'h1'],
64 [$HTML_NS, 'h2'],
65 [$HTML_NS, 'h3'],
66 [$HTML_NS, 'h4'],
67 [$HTML_NS, 'h5'],
68 [$HTML_NS, 'h6'],
69 [$HTML_NS, 'header'],
70 [$HTML_NS, 'footer'],
71 [$HTML_NS, 'address'],
72 [$HTML_NS, 'p'],
73 [$HTML_NS, 'hr'],
74 [$HTML_NS, 'dialog'],
75 [$HTML_NS, 'pre'],
76 [$HTML_NS, 'ol'],
77 [$HTML_NS, 'ul'],
78 [$HTML_NS, 'dl'],
79 [$HTML_NS, 'ins'],
80 [$HTML_NS, 'del'],
81 [$HTML_NS, 'figure'],
82 [$HTML_NS, 'map'],
83 [$HTML_NS, 'table'],
84 [$HTML_NS, 'script'],
85 [$HTML_NS, 'noscript'],
86 [$HTML_NS, 'event-source'],
87 [$HTML_NS, 'details'],
88 [$HTML_NS, 'datagrid'],
89 [$HTML_NS, 'menu'],
90 [$HTML_NS, 'div'],
91 [$HTML_NS, 'font'],
92 ];
93
94 my $HTMLStrictlyInlineLevelElements = [
95 [$HTML_NS, 'br'],
96 [$HTML_NS, 'a'],
97 [$HTML_NS, 'q'],
98 [$HTML_NS, 'cite'],
99 [$HTML_NS, 'em'],
100 [$HTML_NS, 'strong'],
101 [$HTML_NS, 'small'],
102 [$HTML_NS, 'm'],
103 [$HTML_NS, 'dfn'],
104 [$HTML_NS, 'abbr'],
105 [$HTML_NS, 'time'],
106 [$HTML_NS, 'meter'],
107 [$HTML_NS, 'progress'],
108 [$HTML_NS, 'code'],
109 [$HTML_NS, 'var'],
110 [$HTML_NS, 'samp'],
111 [$HTML_NS, 'kbd'],
112 [$HTML_NS, 'sub'],
113 [$HTML_NS, 'sup'],
114 [$HTML_NS, 'span'],
115 [$HTML_NS, 'i'],
116 [$HTML_NS, 'b'],
117 [$HTML_NS, 'bdo'],
118 [$HTML_NS, 'ins'],
119 [$HTML_NS, 'del'],
120 [$HTML_NS, 'img'],
121 [$HTML_NS, 'iframe'],
122 [$HTML_NS, 'embed'],
123 [$HTML_NS, 'object'],
124 [$HTML_NS, 'video'],
125 [$HTML_NS, 'audio'],
126 [$HTML_NS, 'canvas'],
127 [$HTML_NS, 'area'],
128 [$HTML_NS, 'script'],
129 [$HTML_NS, 'noscript'],
130 [$HTML_NS, 'event-source'],
131 [$HTML_NS, 'command'],
132 [$HTML_NS, 'font'],
133 ];
134
135 my $HTMLStructuredInlineLevelElements = [
136 [$HTML_NS, 'blockquote'],
137 [$HTML_NS, 'pre'],
138 [$HTML_NS, 'ol'],
139 [$HTML_NS, 'ul'],
140 [$HTML_NS, 'dl'],
141 [$HTML_NS, 'table'],
142 [$HTML_NS, 'menu'],
143 ];
144
145 my $HTMLInteractiveElements = [
146 [$HTML_NS, 'a'],
147 [$HTML_NS, 'details'],
148 [$HTML_NS, 'datagrid'],
149 ];
150
151 my $HTMLTransparentElements = [
152 [$HTML_NS, 'ins'],
153 [$HTML_NS, 'font'],
154 [$HTML_NS, 'noscript'], ## NOTE: If scripting is disabled.
155 ];
156
157 #my $HTMLSemiTransparentElements = [
158 # [$HTML_NS, 'video'],
159 # [$HTML_NS, 'audio'],
160 #];
161
162 my $HTMLEmbededElements = [
163 [$HTML_NS, 'img'],
164 [$HTML_NS, 'iframe'],
165 [$HTML_NS, 'embed'],
166 [$HTML_NS, 'object'],
167 [$HTML_NS, 'video'],
168 [$HTML_NS, 'audio'],
169 [$HTML_NS, 'canvas'],
170 ];
171
172 ## Empty
173 my $HTMLEmptyChecker = sub {
174 my ($self, $todo) = @_;
175 my $el = $todo->{node};
176 my $new_todos = [];
177 my @nodes = (@{$el->child_nodes});
178
179 while (@nodes) {
180 my $node = shift @nodes;
181 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
182
183 my $nt = $node->node_type;
184 if ($nt == 1) {
185 ## NOTE: |minuses| list is not checked since redundant
186 $self->{onerror}->(node => $node, type => 'element not allowed');
187 my ($sib, $ch) = $self->_check_get_children ($node);
188 unshift @nodes, @$sib;
189 push @$new_todos, @$ch;
190 } elsif ($nt == 3 or $nt == 4) {
191 if ($node->data =~ /[^\x09-\x0D\x20]/) {
192 $self->{onerror}->(node => $node, type => 'character not allowed');
193 }
194 } elsif ($nt == 5) {
195 unshift @nodes, @{$node->child_nodes};
196 }
197 }
198 return ($new_todos);
199 };
200
201 ## Text
202 my $HTMLTextChecker = sub {
203 my ($self, $todo) = @_;
204 my $el = $todo->{node};
205 my $new_todos = [];
206 my @nodes = (@{$el->child_nodes});
207
208 while (@nodes) {
209 my $node = shift @nodes;
210 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
211
212 my $nt = $node->node_type;
213 if ($nt == 1) {
214 ## NOTE: |minuses| list is not checked since redundant
215 $self->{onerror}->(node => $node, type => 'element not allowed');
216 my ($sib, $ch) = $self->_check_get_children ($node);
217 unshift @nodes, @$sib;
218 push @$new_todos, @$ch;
219 } elsif ($nt == 5) {
220 unshift @nodes, @{$node->child_nodes};
221 }
222 }
223 return ($new_todos);
224 };
225
226 ## Zero or more |html:style| elements,
227 ## followed by zero or more block-level elements
228 my $HTMLStylableBlockChecker = sub {
229 my ($self, $todo) = @_;
230 my $el = $todo->{node};
231 my $new_todos = [];
232 my @nodes = (@{$el->child_nodes});
233
234 my $has_non_style;
235 while (@nodes) {
236 my $node = shift @nodes;
237 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
238
239 my $nt = $node->node_type;
240 if ($nt == 1) {
241 my $node_ns = $node->namespace_uri;
242 $node_ns = '' unless defined $node_ns;
243 my $node_ln = $node->manakai_local_name;
244 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
245 $self->{onerror}->(node => $node, type => 'element not allowed');
246 }
247 if ($node->manakai_element_type_match ($HTML_NS, 'style')) {
248 if ($has_non_style) {
249 $self->{onerror}->(node => $node, type => 'element not allowed');
250 }
251 } else {
252 $has_non_style = 1;
253 CHK: {
254 for (@{$HTMLBlockLevelElements}) {
255 if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
256 last CHK;
257 }
258 }
259 $self->{onerror}->(node => $node, type => 'element not allowed');
260 } # CHK
261 }
262 my ($sib, $ch) = $self->_check_get_children ($node);
263 unshift @nodes, @$sib;
264 push @$new_todos, @$ch;
265 } elsif ($nt == 3 or $nt == 4) {
266 if ($node->data =~ /[^\x09-\x0D\x20]/) {
267 $self->{onerror}->(node => $node, type => 'character not allowed');
268 }
269 } elsif ($nt == 5) {
270 unshift @nodes, @{$node->child_nodes};
271 }
272 }
273 return ($new_todos);
274 }; # $HTMLStylableBlockChecker
275
276 ## Zero or more block-level elements
277 my $HTMLBlockChecker = sub {
278 my ($self, $todo) = @_;
279 my $el = $todo->{node};
280 my $new_todos = [];
281 my @nodes = (@{$el->child_nodes});
282
283 while (@nodes) {
284 my $node = shift @nodes;
285 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
286
287 my $nt = $node->node_type;
288 if ($nt == 1) {
289 my $node_ns = $node->namespace_uri;
290 $node_ns = '' unless defined $node_ns;
291 my $node_ln = $node->manakai_local_name;
292 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
293 $self->{onerror}->(node => $node, type => 'element not allowed');
294 }
295 CHK: {
296 for (@{$HTMLBlockLevelElements}) {
297 if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
298 last CHK;
299 }
300 }
301 $self->{onerror}->(node => $node, type => 'element not allowed');
302 } # CHK
303 my ($sib, $ch) = $self->_check_get_children ($node);
304 unshift @nodes, @$sib;
305 push @$new_todos, @$ch;
306 } elsif ($nt == 3 or $nt == 4) {
307 if ($node->data =~ /[^\x09-\x0D\x20]/) {
308 $self->{onerror}->(node => $node, type => 'character not allowed');
309 }
310 } elsif ($nt == 5) {
311 unshift @nodes, @{$node->child_nodes};
312 }
313 }
314 return ($new_todos);
315 }; # $HTMLBlockChecker
316
317 ## Inline-level content
318 my $HTMLInlineChecker = sub {
319 my ($self, $todo) = @_;
320 my $el = $todo->{node};
321 my $new_todos = [];
322 my @nodes = (@{$el->child_nodes});
323
324 while (@nodes) {
325 my $node = shift @nodes;
326 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
327
328 my $nt = $node->node_type;
329 if ($nt == 1) {
330 my $node_ns = $node->namespace_uri;
331 $node_ns = '' unless defined $node_ns;
332 my $node_ln = $node->manakai_local_name;
333 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
334 $self->{onerror}->(node => $node, type => 'element not allowed');
335 }
336 CHK: {
337 for (@{$HTMLStrictlyInlineLevelElements},
338 @{$HTMLStructuredInlineLevelElements}) {
339 if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
340 last CHK;
341 }
342 }
343 $self->{onerror}->(node => $node, type => 'element not allowed');
344 } # CHK
345 my ($sib, $ch) = $self->_check_get_children ($node);
346 unshift @nodes, @$sib;
347 push @$new_todos, @$ch;
348 } elsif ($nt == 5) {
349 unshift @nodes, @{$node->child_nodes};
350 }
351 }
352
353 for (@$new_todos) {
354 $_->{inline} = 1;
355 }
356 return ($new_todos);
357 }; # $HTMLInlineChecker
358
359 my $HTMLSignificantInlineChecker = $HTMLInlineChecker;
360 ## TODO: check significant content
361
362 ## Strictly inline-level content
363 my $HTMLStrictlyInlineChecker = sub {
364 my ($self, $todo) = @_;
365 my $el = $todo->{node};
366 my $new_todos = [];
367 my @nodes = (@{$el->child_nodes});
368
369 while (@nodes) {
370 my $node = shift @nodes;
371 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
372
373 my $nt = $node->node_type;
374 if ($nt == 1) {
375 my $node_ns = $node->namespace_uri;
376 $node_ns = '' unless defined $node_ns;
377 my $node_ln = $node->manakai_local_name;
378 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
379 $self->{onerror}->(node => $node, type => 'element not allowed');
380 }
381 CHK: {
382 for (@{$HTMLStrictlyInlineLevelElements}) {
383 if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
384 last CHK;
385 }
386 }
387 $self->{onerror}->(node => $node, type => 'element not allowed');
388 } # CHK
389 my ($sib, $ch) = $self->_check_get_children ($node);
390 unshift @nodes, @$sib;
391 push @$new_todos, @$ch;
392 } elsif ($nt == 5) {
393 unshift @nodes, @{$node->child_nodes};
394 }
395 }
396
397 for (@$new_todos) {
398 $_->{inline} = 1;
399 $_->{strictly_inline} = 1;
400 }
401 return ($new_todos);
402 }; # $HTMLStrictlyInlineChecker
403
404 my $HTMLSignificantStrictlyInlineChecker = $HTMLStrictlyInlineChecker;
405 ## TODO: check significant content
406
407 ## Inline-level or strictly inline-kevek content
408 my $HTMLInlineOrStrictlyInlineChecker = sub {
409 my ($self, $todo) = @_;
410 my $el = $todo->{node};
411 my $new_todos = [];
412 my @nodes = (@{$el->child_nodes});
413
414 while (@nodes) {
415 my $node = shift @nodes;
416 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
417
418 my $nt = $node->node_type;
419 if ($nt == 1) {
420 my $node_ns = $node->namespace_uri;
421 $node_ns = '' unless defined $node_ns;
422 my $node_ln = $node->manakai_local_name;
423 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
424 $self->{onerror}->(node => $node, type => 'element not allowed');
425 }
426 CHK: {
427 for (@{$HTMLStrictlyInlineLevelElements},
428 $todo->{strictly_inline} ? () : @{$HTMLStructuredInlineLevelElements}) {
429 if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
430 last CHK;
431 }
432 }
433 $self->{onerror}->(node => $node, type => 'element not allowed');
434 } # CHK
435 my ($sib, $ch) = $self->_check_get_children ($node);
436 unshift @nodes, @$sib;
437 push @$new_todos, @$ch;
438 } elsif ($nt == 5) {
439 unshift @nodes, @{$node->child_nodes};
440 }
441 }
442
443 for (@$new_todos) {
444 $_->{inline} = 1;
445 $_->{strictly_inline} = 1;
446 }
447 return ($new_todos);
448 }; # $HTMLInlineOrStrictlyInlineChecker
449
450 my $HTMLBlockOrInlineChecker = sub {
451 my ($self, $todo) = @_;
452 my $el = $todo->{node};
453 my $new_todos = [];
454 my @nodes = (@{$el->child_nodes});
455
456 my $content = 'block-or-inline'; # or 'block' or 'inline'
457 my @block_not_inline;
458 while (@nodes) {
459 my $node = shift @nodes;
460 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
461
462 my $nt = $node->node_type;
463 if ($nt == 1) {
464 my $node_ns = $node->namespace_uri;
465 $node_ns = '' unless defined $node_ns;
466 my $node_ln = $node->manakai_local_name;
467 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
468 $self->{onerror}->(node => $node, type => 'element not allowed');
469 }
470 if ($content eq 'block') {
471 CHK: {
472 for (@{$HTMLBlockLevelElements}) {
473 if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
474 last CHK;
475 }
476 }
477 $self->{onerror}->(node => $node, type => 'element not allowed');
478 } # CHK
479 } elsif ($content eq 'inline') {
480 CHK: {
481 for (@{$HTMLStrictlyInlineLevelElements},
482 @{$HTMLStructuredInlineLevelElements}) {
483 if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
484 last CHK;
485 }
486 }
487 $self->{onerror}->(node => $node, type => 'element not allowed');
488 } # CHK
489 } else {
490 my $is_block;
491 my $is_inline;
492 for (@{$HTMLBlockLevelElements}) {
493 if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
494 $is_block = 1;
495 last;
496 }
497 }
498
499 for (@{$HTMLStrictlyInlineLevelElements},
500 @{$HTMLStructuredInlineLevelElements}) {
501 if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
502 $is_inline = 1;
503 last;
504 }
505 }
506
507 push @block_not_inline, $node if $is_block and not $is_inline;
508 unless ($is_block) {
509 $content = 'inline';
510 for (@block_not_inline) {
511 $self->{onerror}->(node => $_, type => 'element not allowed');
512 }
513 unless ($is_inline) {
514 $self->{onerror}->(node => $node, type => 'element not allowed');
515 }
516 }
517 }
518 my ($sib, $ch) = $self->_check_get_children ($node);
519 unshift @nodes, @$sib;
520 push @$new_todos, @$ch;
521 } elsif ($nt == 3 or $nt == 4) {
522 if ($node->data =~ /[^\x09-\x0D\x20]/) {
523 if ($content eq 'block') {
524 $self->{onerror}->(node => $node, type => 'character not allowed');
525 } else {
526 $content = 'inline';
527 for (@block_not_inline) {
528 $self->{onerror}->(node => $_, type => 'element not allowed');
529 }
530 }
531 }
532 } elsif ($nt == 5) {
533 unshift @nodes, @{$node->child_nodes};
534 }
535 }
536
537 if ($content eq 'inline') {
538 for (@$new_todos) {
539 $_->{inline} = 1;
540 }
541 }
542 return ($new_todos);
543 };
544
545 ## Zero or more XXX element, then either block-level or inline-level
546 my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) {
547 my ($elnsuri, $ellname) = @_;
548 return sub {
549 my ($self, $todo) = @_;
550 my $el = $todo->{node};
551 my $new_todos = [];
552 my @nodes = (@{$el->child_nodes});
553
554 my $has_non_style;
555 my $content = 'block-or-inline'; # or 'block' or 'inline'
556 my @block_not_inline;
557 while (@nodes) {
558 my $node = shift @nodes;
559 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
560
561 my $nt = $node->node_type;
562 if ($nt == 1) {
563 my $node_ns = $node->namespace_uri;
564 $node_ns = '' unless defined $node_ns;
565 my $node_ln = $node->manakai_local_name;
566 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
567 $self->{onerror}->(node => $node, type => 'element not allowed');
568 }
569 if ($node->manakai_element_type_match ($elnsuri, $ellname)) {
570 if ($has_non_style) {
571 $self->{onerror}->(node => $node, type => 'element not allowed');
572 }
573 } elsif ($content eq 'block') {
574 $has_non_style = 1;
575 CHK: {
576 for (@{$HTMLBlockLevelElements}) {
577 if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
578 last CHK;
579 }
580 }
581 $self->{onerror}->(node => $node, type => 'element not allowed');
582 } # CHK
583 } elsif ($content eq 'inline') {
584 $has_non_style = 1;
585 CHK: {
586 for (@{$HTMLStrictlyInlineLevelElements},
587 @{$HTMLStructuredInlineLevelElements}) {
588 if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
589 last CHK;
590 }
591 }
592 $self->{onerror}->(node => $node, type => 'element not allowed');
593 } # CHK
594 } else {
595 $has_non_style = 1;
596 my $is_block;
597 my $is_inline;
598 for (@{$HTMLBlockLevelElements}) {
599 if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
600 $is_block = 1;
601 last;
602 }
603 }
604
605 for (@{$HTMLStrictlyInlineLevelElements},
606 @{$HTMLStructuredInlineLevelElements}) {
607 if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
608 $is_inline = 1;
609 last;
610 }
611 }
612
613 push @block_not_inline, $node if $is_block and not $is_inline;
614 unless ($is_block) {
615 $content = 'inline';
616 for (@block_not_inline) {
617 $self->{onerror}->(node => $_, type => 'element not allowed');
618 }
619 unless ($is_inline) {
620 $self->{onerror}->(node => $node, type => 'element not allowed');
621 }
622 }
623 }
624 my ($sib, $ch) = $self->_check_get_children ($node);
625 unshift @nodes, @$sib;
626 push @$new_todos, @$ch;
627 } elsif ($nt == 3 or $nt == 4) {
628 if ($node->data =~ /[^\x09-\x0D\x20]/) {
629 $has_non_style = 1;
630 if ($content eq 'block') {
631 $self->{onerror}->(node => $node, type => 'character not allowed');
632 } else {
633 $content = 'inline';
634 for (@block_not_inline) {
635 $self->{onerror}->(node => $_, type => 'element not allowed');
636 }
637 }
638 }
639 } elsif ($nt == 5) {
640 unshift @nodes, @{$node->child_nodes};
641 }
642 }
643
644 if ($content eq 'inline') {
645 for (@$new_todos) {
646 $_->{inline} = 1;
647 }
648 }
649 return ($new_todos);
650 };
651 }; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker
652
653 my $HTMLTransparentChecker = $HTMLBlockOrInlineChecker;
654
655 $Element->{$HTML_NS}->{html} = {
656 checker => sub {
657 my ($self, $todo) = @_;
658 my $el = $todo->{node};
659 my $new_todos = [];
660 my @nodes = (@{$el->child_nodes});
661
662 my $phase = 'before head';
663 while (@nodes) {
664 my $node = shift @nodes;
665 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
666
667 my $nt = $node->node_type;
668 if ($nt == 1) {
669 my $node_ns = $node->namespace_uri;
670 $node_ns = '' unless defined $node_ns;
671 my $node_ln = $node->manakai_local_name;
672 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
673 $self->{onerror}->(node => $node, type => 'element not allowed');
674 }
675 if ($phase eq 'before head') {
676 if ($node->manakai_element_type_match ($HTML_NS, 'head')) {
677 $phase = 'after head';
678 } elsif ($node->manakai_element_type_match ($HTML_NS, 'body')) {
679 $self->{onerror}
680 ->(node => $node, type => 'ps element missing:head');
681 $phase = 'after body';
682 } else {
683 $self->{onerror}->(node => $node, type => 'element not allowed');
684 # before head
685 }
686 } elsif ($phase eq 'after head') {
687 if ($node->manakai_element_type_match ($HTML_NS, 'body')) {
688 $phase = 'after body';
689 } else {
690 $self->{onerror}->(node => $node, type => 'element not allowed');
691 # after head
692 }
693 } else { #elsif ($phase eq 'after body') {
694 $self->{onerror}->(node => $node, type => 'element not allowed');
695 # after body
696 }
697 my ($sib, $ch) = $self->_check_get_children ($node);
698 unshift @nodes, @$sib;
699 push @$new_todos, @$ch;
700 } elsif ($nt == 3 or $nt == 4) {
701 if ($node->data =~ /[^\x09-\x0D\x20]/) {
702 $self->{onerror}->(node => $node, type => 'character not allowed');
703 }
704 } elsif ($nt == 5) {
705 unshift @nodes, @{$node->child_nodes};
706 }
707 }
708
709 if ($phase eq 'before head') {
710 $self->{onerror}->(node => $el, type => 'child element missing:head');
711 $self->{onerror}->(node => $el, type => 'child element missing:body');
712 } elsif ($phase eq 'after head') {
713 $self->{onerror}->(node => $el, type => 'child element missing:body');
714 }
715
716 return ($new_todos);
717 },
718 };
719
720 $Element->{$HTML_NS}->{head} = {
721 checker => sub {
722 my ($self, $todo) = @_;
723 my $el = $todo->{node};
724 my $new_todos = [];
725 my @nodes = (@{$el->child_nodes});
726
727 my $has_title;
728 my $phase = 'initial'; # 'after charset', 'after base'
729 while (@nodes) {
730 my $node = shift @nodes;
731 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
732
733 my $nt = $node->node_type;
734 if ($nt == 1) {
735 my $node_ns = $node->namespace_uri;
736 $node_ns = '' unless defined $node_ns;
737 my $node_ln = $node->manakai_local_name;
738 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
739 $self->{onerror}->(node => $node, type => 'element not allowed');
740 }
741 if ($node->manakai_element_type_match ($HTML_NS, 'title')) {
742 $phase = 'after base';
743 unless ($has_title) {
744 $has_title = 1;
745 } else {
746 $self->{onerror}->(node => $node, type => 'element not allowed');
747 }
748 } elsif ($node->manakai_element_type_match ($HTML_NS, 'meta')) {
749 if ($node->has_attribute_ns (undef, 'charset')) {
750 if ($phase eq 'initial') {
751 $phase = 'after charset';
752 } else {
753 $self->{onerror}->(node => $node, type => 'element not allowed');
754 ## NOTE: See also |base|'s "contexts" field in the spec
755 }
756 } else {
757 $phase = 'after base';
758 }
759 } elsif ($node->manakai_element_type_match ($HTML_NS, 'base')) {
760 if ($phase eq 'initial' or $phase eq 'after charset') {
761 $phase = 'after base';
762 } else {
763 $self->{onerror}->(node => $node, type => 'element not allowed');
764 }
765 } else {
766 $phase = 'after base';
767 CHK: {
768 for (@{$HTMLMetadataElements}) {
769 if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
770 last CHK;
771 }
772 }
773 $self->{onerror}->(node => $node, type => 'element not allowed');
774 } # CHK
775 }
776 my ($sib, $ch) = $self->_check_get_children ($node);
777 unshift @nodes, @$sib;
778 push @$new_todos, @$ch;
779 } elsif ($nt == 3 or $nt == 4) {
780 if ($node->data =~ /[^\x09-\x0D\x20]/) {
781 $self->{onerror}->(node => $node, type => 'character not allowed');
782 }
783 } elsif ($nt == 5) {
784 unshift @nodes, @{$node->child_nodes};
785 }
786 }
787 unless ($has_title) {
788 $self->{onerror}->(node => $el, type => 'child element missing:title');
789 }
790 return ($new_todos);
791 },
792 };
793
794 $Element->{$HTML_NS}->{title} = {
795 checker => $HTMLTextChecker,
796 };
797
798 $Element->{$HTML_NS}->{base} = {
799 checker => $HTMLEmptyChecker,
800 };
801
802 $Element->{$HTML_NS}->{link} = {
803 checker => $HTMLEmptyChecker,
804 };
805
806 $Element->{$HTML_NS}->{meta} = {
807 checker => $HTMLEmptyChecker,
808 };
809
810 ## NOTE: |html:style| has no conformance creteria on content model
811 $Element->{$HTML_NS}->{style} = {
812 checker => $AnyChecker,
813 };
814
815 $Element->{$HTML_NS}->{body} = {
816 checker => $HTMLBlockChecker,
817 };
818
819 $Element->{$HTML_NS}->{section} = {
820 checker => $HTMLStylableBlockChecker,
821 };
822
823 $Element->{$HTML_NS}->{nav} = {
824 checker => $HTMLBlockOrInlineChecker,
825 };
826
827 $Element->{$HTML_NS}->{article} = {
828 checker => $HTMLStylableBlockChecker,
829 };
830
831 $Element->{$HTML_NS}->{blockquote} = {
832 checker => $HTMLBlockChecker,
833 };
834
835 $Element->{$HTML_NS}->{aside} = {
836 checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
837 };
838
839 $Element->{$HTML_NS}->{h1} = {
840 checker => $HTMLSignificantStrictlyInlineChecker,
841 };
842
843 $Element->{$HTML_NS}->{h2} = {
844 checker => $HTMLSignificantStrictlyInlineChecker,
845 };
846
847 $Element->{$HTML_NS}->{h3} = {
848 checker => $HTMLSignificantStrictlyInlineChecker,
849 };
850
851 $Element->{$HTML_NS}->{h4} = {
852 checker => $HTMLSignificantStrictlyInlineChecker,
853 };
854
855 $Element->{$HTML_NS}->{h5} = {
856 checker => $HTMLSignificantStrictlyInlineChecker,
857 };
858
859 $Element->{$HTML_NS}->{h6} = {
860 checker => $HTMLSignificantStrictlyInlineChecker,
861 };
862
863 ## TODO: header
864
865 $Element->{$HTML_NS}->{footer} = {
866 checker => sub { ## block -hn -header -footer -sectioning or inline
867 my ($self, $todo) = @_;
868 my $el = $todo->{node};
869 my $new_todos = [];
870 my @nodes = (@{$el->child_nodes});
871
872 my $content = 'block-or-inline'; # or 'block' or 'inline'
873 my @block_not_inline;
874 while (@nodes) {
875 my $node = shift @nodes;
876 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
877
878 my $nt = $node->node_type;
879 if ($nt == 1) {
880 my $node_ns = $node->namespace_uri;
881 $node_ns = '' unless defined $node_ns;
882 my $node_ln = $node->manakai_local_name;
883 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
884 $self->{onerror}->(node => $node, type => 'element not allowed');
885 } elsif ($node_ns eq $HTML_NS and
886 {
887 qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1/
888 }->{$node_ln}) {
889 $self->{onerror}->(node => $node, type => 'element not allowed');
890 } elsif ($HTMLSectioningElements->{$node_ns}->{$node_ln}) {
891 $self->{onerror}->(node => $node, type => 'element not allowed');
892 }
893 if ($content eq 'block') {
894 CHK: {
895 for (@{$HTMLBlockLevelElements}) {
896 if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
897 last CHK;
898 }
899 }
900 $self->{onerror}->(node => $node, type => 'element not allowed');
901 } # CHK
902 } elsif ($content eq 'inline') {
903 CHK: {
904 for (@{$HTMLStrictlyInlineLevelElements},
905 @{$HTMLStructuredInlineLevelElements}) {
906 if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
907 last CHK;
908 }
909 }
910 $self->{onerror}->(node => $node, type => 'element not allowed');
911 } # CHK
912 } else {
913 my $is_block;
914 my $is_inline;
915 for (@{$HTMLBlockLevelElements}) {
916 if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
917 $is_block = 1;
918 last;
919 }
920 }
921
922 for (@{$HTMLStrictlyInlineLevelElements},
923 @{$HTMLStructuredInlineLevelElements}) {
924 if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
925 $is_inline = 1;
926 last;
927 }
928 }
929
930 push @block_not_inline, $node if $is_block and not $is_inline;
931 unless ($is_block) {
932 $content = 'inline';
933 for (@block_not_inline) {
934 $self->{onerror}->(node => $_, type => 'element not allowed');
935 }
936 unless ($is_inline) {
937 $self->{onerror}->(node => $node, type => 'element not allowed');
938 }
939 }
940 }
941 my ($sib, $ch) = $self->_check_get_children ($node);
942 unshift @nodes, @$sib;
943 push @$new_todos, @$ch;
944 } elsif ($nt == 3 or $nt == 4) {
945 if ($node->data =~ /[^\x09-\x0D\x20]/) {
946 if ($content eq 'block') {
947 $self->{onerror}->(node => $node, type => 'character not allowed');
948 } else {
949 $content = 'inline';
950 for (@block_not_inline) {
951 $self->{onerror}->(node => $_, type => 'element not allowed');
952 }
953 }
954 }
955 } elsif ($nt == 5) {
956 unshift @nodes, @{$node->child_nodes};
957 }
958 }
959
960 my $end = $self->_add_minuses
961 ({$HTML_NS => {qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1/}},
962 $HTMLSectioningElements);
963 push @$new_todos, $end;
964
965 if ($content eq 'inline') {
966 for (@$new_todos) {
967 $_->{inline} = 1;
968 }
969 }
970
971 return ($new_todos);
972 },
973 };
974
975 $Element->{$HTML_NS}->{address} = {
976 checker => $HTMLInlineChecker,
977 };
978
979 $Element->{$HTML_NS}->{p} = {
980 checker => $HTMLSignificantInlineChecker,
981 };
982
983 $Element->{$HTML_NS}->{hr} = {
984 checker => $HTMLEmptyChecker,
985 };
986
987 $Element->{$HTML_NS}->{br} = {
988 checker => $HTMLEmptyChecker,
989 };
990
991 $Element->{$HTML_NS}->{dialog} = {
992 checker => sub {
993 my ($self, $todo) = @_;
994 my $el = $todo->{node};
995 my $new_todos = [];
996 my @nodes = (@{$el->child_nodes});
997
998 my $phase = 'before dt';
999 while (@nodes) {
1000 my $node = shift @nodes;
1001 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1002
1003 my $nt = $node->node_type;
1004 if ($nt == 1) {
1005 ## NOTE: |minuses| list is not checked since redundant
1006 if ($phase eq 'before dt') {
1007 if ($node->manakai_element_type_match ($HTML_NS, 'dt')) {
1008 $phase = 'before dd';
1009 } elsif ($node->manakai_element_type_match ($HTML_NS, 'dd')) {
1010 $self->{onerror}
1011 ->(node => $node, type => 'ps element missing:dt');
1012 $phase = 'before dt';
1013 } else {
1014 $self->{onerror}->(node => $node, type => 'element not allowed');
1015 }
1016 } else { # before dd
1017 if ($node->manakai_element_type_match ($HTML_NS, 'dd')) {
1018 $phase = 'before dt';
1019 } elsif ($node->manakai_element_type_match ($HTML_NS, 'dt')) {
1020 $self->{onerror}
1021 ->(node => $node, type => 'ps element missing:dd');
1022 $phase = 'before dd';
1023 } else {
1024 $self->{onerror}->(node => $node, type => 'element not allowed');
1025 }
1026 }
1027 my ($sib, $ch) = $self->_check_get_children ($node);
1028 unshift @nodes, @$sib;
1029 push @$new_todos, @$ch;
1030 } elsif ($nt == 3 or $nt == 4) {
1031 if ($node->data =~ /[^\x09-\x0D\x20]/) {
1032 $self->{onerror}->(node => $node, type => 'character not allowed');
1033 }
1034 } elsif ($nt == 5) {
1035 unshift @nodes, @{$node->child_nodes};
1036 }
1037 }
1038 if ($phase eq 'before dd') {
1039 $self->{onerror}->(node => $el, type => 'ps element missing:dd');
1040 }
1041 return ($new_todos);
1042 },
1043 };
1044
1045 $Element->{$HTML_NS}->{pre} = {
1046 checker => $HTMLStrictlyInlineChecker,
1047 };
1048
1049 $Element->{$HTML_NS}->{ol} = {
1050 checker => sub {
1051 my ($self, $todo) = @_;
1052 my $el = $todo->{node};
1053 my $new_todos = [];
1054 my @nodes = (@{$el->child_nodes});
1055
1056 while (@nodes) {
1057 my $node = shift @nodes;
1058 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1059
1060 my $nt = $node->node_type;
1061 if ($nt == 1) {
1062 ## NOTE: |minuses| list is not checked since redundant
1063 unless ($node->manakai_element_type_match ($HTML_NS, 'li')) {
1064 $self->{onerror}->(node => $node, type => 'element not allowed');
1065 }
1066 my ($sib, $ch) = $self->_check_get_children ($node);
1067 unshift @nodes, @$sib;
1068 push @$new_todos, @$ch;
1069 } elsif ($nt == 3 or $nt == 4) {
1070 if ($node->data =~ /[^\x09-\x0D\x20]/) {
1071 $self->{onerror}->(node => $node, type => 'character not allowed');
1072 }
1073 } elsif ($nt == 5) {
1074 unshift @nodes, @{$node->child_nodes};
1075 }
1076 }
1077
1078 if ($todo->{inline}) {
1079 for (@$new_todos) {
1080 $_->{inline} = 1;
1081 }
1082 }
1083 return ($new_todos);
1084 },
1085 };
1086
1087 $Element->{$HTML_NS}->{ul} = {
1088 checker => $Element->{$HTML_NS}->{ol}->{checker},
1089 };
1090
1091 ## TODO: li
1092
1093 $Element->{$HTML_NS}->{dl} = {
1094 checker => sub {
1095 my ($self, $todo) = @_;
1096 my $el = $todo->{node};
1097 my $new_todos = [];
1098 my @nodes = (@{$el->child_nodes});
1099
1100 my $phase = 'before dt';
1101 while (@nodes) {
1102 my $node = shift @nodes;
1103 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1104
1105 my $nt = $node->node_type;
1106 if ($nt == 1) {
1107 ## NOTE: |minuses| list is not checked since redundant
1108 if ($phase eq 'in dds') {
1109 if ($node->manakai_element_type_match ($HTML_NS, 'dd')) {
1110 #$phase = 'in dds';
1111 } elsif ($node->manakai_element_type_match ($HTML_NS, 'dt')) {
1112 $phase = 'in dts';
1113 } else {
1114 $self->{onerror}->(node => $node, type => 'element not allowed');
1115 }
1116 } elsif ($phase eq 'in dts') {
1117 if ($node->manakai_element_type_match ($HTML_NS, 'dt')) {
1118 #$phase = 'in dts';
1119 } elsif ($node->manakai_element_type_match ($HTML_NS, 'dd')) {
1120 $phase = 'in dds';
1121 } else {
1122 $self->{onerror}->(node => $node, type => 'element not allowed');
1123 }
1124 } else { # before dt
1125 if ($node->manakai_element_type_match ($HTML_NS, 'dt')) {
1126 $phase = 'in dts';
1127 } elsif ($node->manakai_element_type_match ($HTML_NS, 'dd')) {
1128 $self->{onerror}
1129 ->(node => $node, type => 'ps element missing:dt');
1130 $phase = 'in dds';
1131 } else {
1132 $self->{onerror}->(node => $node, type => 'element not allowed');
1133 }
1134 }
1135 my ($sib, $ch) = $self->_check_get_children ($node);
1136 unshift @nodes, @$sib;
1137 push @$new_todos, @$ch;
1138 } elsif ($nt == 3 or $nt == 4) {
1139 if ($node->data =~ /[^\x09-\x0D\x20]/) {
1140 $self->{onerror}->(node => $node, type => 'character not allowed');
1141 }
1142 } elsif ($nt == 5) {
1143 unshift @nodes, @{$node->child_nodes};
1144 }
1145 }
1146 if ($phase eq 'in dts') {
1147 $self->{onerror}->(node => $el, type => 'ps element missing:dd');
1148 }
1149
1150 if ($todo->{inline}) {
1151 for (@$new_todos) {
1152 $_->{inline} = 1;
1153 }
1154 }
1155 return ($new_todos);
1156 },
1157 };
1158
1159 $Element->{$HTML_NS}->{dt} = {
1160 checker => $HTMLStrictlyInlineChecker,
1161 };
1162
1163 $Element->{$HTML_NS}->{dd} = {
1164 checker => sub {
1165 my ($self, $todo) = @_;
1166 if ($todo->{inline}) {
1167 return $HTMLInlineChecker->($self, $todo);
1168 } else {
1169 return $HTMLBlockOrInlineChecker->($self, $todo);
1170 }
1171 },
1172 };
1173
1174 ## TODO: a
1175
1176 $Element->{$HTML_NS}->{q} = {
1177 checker => $HTMLInlineOrStrictlyInlineChecker,
1178 };
1179
1180 $Element->{$HTML_NS}->{cite} = {
1181 checker => $HTMLStrictlyInlineChecker,
1182 };
1183
1184 $Element->{$HTML_NS}->{em} = {
1185 checker => $HTMLInlineOrStrictlyInlineChecker,
1186 };
1187
1188 $Element->{$HTML_NS}->{strong} = {
1189 checker => $HTMLInlineOrStrictlyInlineChecker,
1190 };
1191
1192 $Element->{$HTML_NS}->{small} = {
1193 checker => $HTMLInlineOrStrictlyInlineChecker,
1194 };
1195
1196 $Element->{$HTML_NS}->{m} = {
1197 checker => $HTMLInlineOrStrictlyInlineChecker,
1198 };
1199
1200 $Element->{$HTML_NS}->{dfn} = {
1201 checker => sub {
1202 my ($self, $todo) = @_;
1203
1204 my $end = $self->_add_minuses ({$HTML_NS => {dfn => 1}});
1205 my ($sib, $ch) = $HTMLStrictlyInlineChecker->($self, $todo);
1206 push @$sib, $end;
1207 return ($sib, $ch);
1208 },
1209 };
1210
1211 $Element->{$HTML_NS}->{abbr} = {
1212 checker => $HTMLStrictlyInlineChecker,
1213 };
1214
1215 $Element->{$HTML_NS}->{time} = {
1216 checker => $HTMLStrictlyInlineChecker,
1217 };
1218
1219 $Element->{$HTML_NS}->{meter} = {
1220 checker => $HTMLStrictlyInlineChecker,
1221 };
1222
1223 $Element->{$HTML_NS}->{progress} = {
1224 checker => $HTMLStrictlyInlineChecker,
1225 };
1226
1227 $Element->{$HTML_NS}->{code} = {
1228 checker => $HTMLInlineOrStrictlyInlineChecker,
1229 };
1230
1231 $Element->{$HTML_NS}->{var} = {
1232 checker => $HTMLStrictlyInlineChecker,
1233 };
1234
1235 $Element->{$HTML_NS}->{samp} = {
1236 checker => $HTMLInlineOrStrictlyInlineChecker,
1237 };
1238
1239 $Element->{$HTML_NS}->{kbd} = {
1240 checker => $HTMLStrictlyInlineChecker,
1241 };
1242
1243 $Element->{$HTML_NS}->{sub} = {
1244 checker => $HTMLStrictlyInlineChecker,
1245 };
1246
1247 $Element->{$HTML_NS}->{sup} = {
1248 checker => $HTMLStrictlyInlineChecker,
1249 };
1250
1251 $Element->{$HTML_NS}->{span} = {
1252 checker => $HTMLInlineOrStrictlyInlineChecker,
1253 };
1254
1255 $Element->{$HTML_NS}->{i} = {
1256 checker => $HTMLStrictlyInlineChecker,
1257 };
1258
1259 $Element->{$HTML_NS}->{b} = {
1260 checker => $HTMLStrictlyInlineChecker,
1261 };
1262
1263 $Element->{$HTML_NS}->{bdo} = {
1264 checker => $HTMLStrictlyInlineChecker,
1265 };
1266
1267 $Element->{$HTML_NS}->{ins} = {
1268 checker => $HTMLTransparentChecker,
1269 };
1270
1271 $Element->{$HTML_NS}->{del} = {
1272 checker => sub {
1273 my ($self, $todo) = @_;
1274
1275 my $parent = $todo->{node}->manakai_parent_element;
1276 if (defined $parent) {
1277 my $nsuri = $parent->namespace_uri;
1278 $nsuri = '' unless defined $nsuri;
1279 my $ln = $parent->manakai_local_name;
1280 my $eldef = $Element->{$nsuri}->{$ln} ||
1281 $Element->{$nsuri}->{''} ||
1282 $ElementDefault;
1283 return $eldef->{checker}->($self, $todo);
1284 } else {
1285 return $HTMLBlockOrInlineChecker->($self, $todo);
1286 }
1287 },
1288 };
1289
1290 ## TODO: figure
1291
1292 $Element->{$HTML_NS}->{img} = {
1293 checker => $HTMLEmptyChecker,
1294 };
1295
1296 $Element->{$HTML_NS}->{iframe} = {
1297 checker => $HTMLTextChecker,
1298 };
1299
1300 $Element->{$HTML_NS}->{embed} = {
1301 checker => $HTMLEmptyChecker,
1302 };
1303
1304 $Element->{$HTML_NS}->{param} = {
1305 checker => $HTMLEmptyChecker,
1306 };
1307
1308 ## TODO: object
1309
1310 $Element->{$HTML_NS}->{video} = {
1311 checker => sub {
1312 my ($self, $todo) = @_;
1313
1314 if ($todo->{node}->has_attribute_ns (undef, 'src')) {
1315 return $HTMLBlockOrInlineChecker->($self, $todo);
1316 } else {
1317 return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
1318 ->($self, $todo);
1319 }
1320 },
1321 };
1322
1323 $Element->{$HTML_NS}->{audio} = {
1324 checker => $Element->{$HTML_NS}->{audio}->{checker},
1325 };
1326
1327 $Element->{$HTML_NS}->{source} = {
1328 checker => $HTMLEmptyChecker,
1329 };
1330
1331 $Element->{$HTML_NS}->{canvas} = {
1332 checker => $HTMLInlineChecker,
1333 };
1334
1335 $Element->{$HTML_NS}->{map} = {
1336 checker => $HTMLBlockChecker,
1337 };
1338
1339 $Element->{$HTML_NS}->{area} = {
1340 checker => $HTMLEmptyChecker,
1341 };
1342 ## TODO: only in map
1343
1344 $Element->{$HTML_NS}->{table} = {
1345 checker => sub {
1346 my ($self, $todo) = @_;
1347 my $el = $todo->{node};
1348 my $new_todos = [];
1349 my @nodes = (@{$el->child_nodes});
1350
1351 my $phase = 'before caption';
1352 my $has_tfoot;
1353 while (@nodes) {
1354 my $node = shift @nodes;
1355 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1356
1357 my $nt = $node->node_type;
1358 if ($nt == 1) {
1359 ## NOTE: |minuses| list is not checked since redundant
1360 if ($phase eq 'in tbodys') {
1361 if ($node->manakai_element_type_match ($HTML_NS, 'tbody')) {
1362 #$phase = 'in tbodys';
1363 } elsif (not $has_tfoot and
1364 $node->manakai_element_type_match ($HTML_NS, 'tfoot')) {
1365 $phase = 'after tfoot';
1366 $has_tfoot = 1;
1367 } else {
1368 $self->{onerror}->(node => $node, type => 'element not allowed');
1369 }
1370 } elsif ($phase eq 'in trs') {
1371 if ($node->manakai_element_type_match ($HTML_NS, 'tr')) {
1372 #$phase = 'in trs';
1373 } elsif (not $has_tfoot and
1374 $node->manakai_element_type_match ($HTML_NS, 'tfoot')) {
1375 $phase = 'after tfoot';
1376 $has_tfoot = 1;
1377 } else {
1378 $self->{onerror}->(node => $node, type => 'element not allowed');
1379 }
1380 } elsif ($phase eq 'after thead') {
1381 if ($node->manakai_element_type_match ($HTML_NS, 'tbody')) {
1382 $phase = 'in tbodys';
1383 } elsif ($node->manakai_element_type_match ($HTML_NS, 'tr')) {
1384 $phase = 'in trs';
1385 } elsif ($node->manakai_element_type_match ($HTML_NS, 'tfoot')) {
1386 $phase = 'in tbodys';
1387 $has_tfoot = 1;
1388 } else {
1389 $self->{onerror}->(node => $node, type => 'element not allowed');
1390 }
1391 } elsif ($phase eq 'in colgroup') {
1392 if ($node->manakai_element_type_match ($HTML_NS, 'colgroup')) {
1393 $phase = 'in colgroup';
1394 } elsif ($node->manakai_element_type_match ($HTML_NS, 'thead')) {
1395 $phase = 'after thead';
1396 } elsif ($node->manakai_element_type_match ($HTML_NS, 'tbody')) {
1397 $phase = 'in tbodys';
1398 } elsif ($node->manakai_element_type_match ($HTML_NS, 'tr')) {
1399 $phase = 'in trs';
1400 } elsif ($node->manakai_element_type_match ($HTML_NS, 'tfoot')) {
1401 $phase = 'in tbodys';
1402 $has_tfoot = 1;
1403 } else {
1404 $self->{onerror}->(node => $node, type => 'element not allowed');
1405 }
1406 } elsif ($phase eq 'before caption') {
1407 if ($node->manakai_element_type_match ($HTML_NS, 'caption')) {
1408 $phase = 'in colgroup';
1409 } elsif ($node->manakai_element_type_match ($HTML_NS, 'colgroup')) {
1410 $phase = 'in colgroup';
1411 } elsif ($node->manakai_element_type_match ($HTML_NS, 'thead')) {
1412 $phase = 'after thead';
1413 } elsif ($node->manakai_element_type_match ($HTML_NS, 'tbody')) {
1414 $phase = 'in tbodys';
1415 } elsif ($node->manakai_element_type_match ($HTML_NS, 'tr')) {
1416 $phase = 'in trs';
1417 } elsif ($node->manakai_element_type_match ($HTML_NS, 'tfoot')) {
1418 $phase = 'in tbodys';
1419 $has_tfoot = 1;
1420 } else {
1421 $self->{onerror}->(node => $node, type => 'element not allowed');
1422 }
1423 } else { # after tfoot
1424 $self->{onerror}->(node => $node, type => 'element not allowed');
1425 }
1426 my ($sib, $ch) = $self->_check_get_children ($node);
1427 unshift @nodes, @$sib;
1428 push @$new_todos, @$ch;
1429 } elsif ($nt == 3 or $nt == 4) {
1430 if ($node->data =~ /[^\x09-\x0D\x20]/) {
1431 $self->{onerror}->(node => $node, type => 'character not allowed');
1432 }
1433 } elsif ($nt == 5) {
1434 unshift @nodes, @{$node->child_nodes};
1435 }
1436 }
1437 return ($new_todos);
1438 },
1439 };
1440
1441 $Element->{$HTML_NS}->{caption} = {
1442 checker => $HTMLSignificantStrictlyInlineChecker,
1443 };
1444
1445 $Element->{$HTML_NS}->{colgroup} = {
1446 checker => sub {
1447 my ($self, $todo) = @_;
1448 my $el = $todo->{node};
1449 my $new_todos = [];
1450 my @nodes = (@{$el->child_nodes});
1451
1452 while (@nodes) {
1453 my $node = shift @nodes;
1454 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1455
1456 my $nt = $node->node_type;
1457 if ($nt == 1) {
1458 ## NOTE: |minuses| list is not checked since redundant
1459 unless ($node->manakai_element_type_match ($HTML_NS, 'col')) {
1460 $self->{onerror}->(node => $node, type => 'element not allowed');
1461 }
1462 my ($sib, $ch) = $self->_check_get_children ($node);
1463 unshift @nodes, @$sib;
1464 push @$new_todos, @$ch;
1465 } elsif ($nt == 3 or $nt == 4) {
1466 if ($node->data =~ /[^\x09-\x0D\x20]/) {
1467 $self->{onerror}->(node => $node, type => 'character not allowed');
1468 }
1469 } elsif ($nt == 5) {
1470 unshift @nodes, @{$node->child_nodes};
1471 }
1472 }
1473 return ($new_todos);
1474 },
1475 };
1476
1477 $Element->{$HTML_NS}->{col} = {
1478 checker => $HTMLEmptyChecker,
1479 };
1480
1481 $Element->{$HTML_NS}->{tbody} = {
1482 checker => sub {
1483 my ($self, $todo) = @_;
1484 my $el = $todo->{node};
1485 my $new_todos = [];
1486 my @nodes = (@{$el->child_nodes});
1487
1488 my $has_tr;
1489 while (@nodes) {
1490 my $node = shift @nodes;
1491 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1492
1493 my $nt = $node->node_type;
1494 if ($nt == 1) {
1495 ## NOTE: |minuses| list is not checked since redundant
1496 if ($node->manakai_element_type_match ($HTML_NS, 'tr')) {
1497 $has_tr = 1;
1498 } else {
1499 $self->{onerror}->(node => $node, type => 'element not allowed');
1500 }
1501 my ($sib, $ch) = $self->_check_get_children ($node);
1502 unshift @nodes, @$sib;
1503 push @$new_todos, @$ch;
1504 } elsif ($nt == 3 or $nt == 4) {
1505 if ($node->data =~ /[^\x09-\x0D\x20]/) {
1506 $self->{onerror}->(node => $node, type => 'character not allowed');
1507 }
1508 } elsif ($nt == 5) {
1509 unshift @nodes, @{$node->child_nodes};
1510 }
1511 }
1512 unless ($has_tr) {
1513 $self->{onerror}->(node => $el, type => 'child element missing:tr');
1514 }
1515 return ($new_todos);
1516 },
1517 };
1518
1519 $Element->{$HTML_NS}->{thead} = {
1520 checker => $Element->{$HTML_NS}->{tbody},
1521 };
1522
1523 $Element->{$HTML_NS}->{tfoot} = {
1524 checker => $Element->{$HTML_NS}->{tbody},
1525 };
1526
1527 $Element->{$HTML_NS}->{tr} = {
1528 checker => sub {
1529 my ($self, $todo) = @_;
1530 my $el = $todo->{node};
1531 my $new_todos = [];
1532 my @nodes = (@{$el->child_nodes});
1533
1534 my $has_td;
1535 while (@nodes) {
1536 my $node = shift @nodes;
1537 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1538
1539 my $nt = $node->node_type;
1540 if ($nt == 1) {
1541 ## NOTE: |minuses| list is not checked since redundant
1542 if ($node->manakai_element_type_match ($HTML_NS, 'td') or
1543 $node->manakai_element_type_match ($HTML_NS, 'th')) {
1544 $has_td = 1;
1545 } else {
1546 $self->{onerror}->(node => $node, type => 'element not allowed');
1547 }
1548 my ($sib, $ch) = $self->_check_get_children ($node);
1549 unshift @nodes, @$sib;
1550 push @$new_todos, @$ch;
1551 } elsif ($nt == 3 or $nt == 4) {
1552 if ($node->data =~ /[^\x09-\x0D\x20]/) {
1553 $self->{onerror}->(node => $node, type => 'character not allowed');
1554 }
1555 } elsif ($nt == 5) {
1556 unshift @nodes, @{$node->child_nodes};
1557 }
1558 }
1559 unless ($has_td) {
1560 $self->{onerror}->(node => $el, type => 'child element missing:td|th');
1561 }
1562 return ($new_todos);
1563 },
1564 };
1565
1566 $Element->{$HTML_NS}->{td} = {
1567 checker => $HTMLBlockOrInlineChecker,
1568 };
1569
1570 $Element->{$HTML_NS}->{th} = {
1571 checker => $HTMLBlockOrInlineChecker,
1572 };
1573
1574 ## TODO: forms
1575
1576 $Element->{$HTML_NS}->{script} = {
1577 checker => sub {
1578 my ($self, $todo) = @_;
1579
1580 if ($todo->{node}->has_attribute_ns (undef, 'src')) {
1581 return $HTMLEmptyChecker->($self, $todo);
1582 } else {
1583 ## NOTE: No content model conformance in HTML5 spec.
1584 return $AnyChecker->($self, $todo);
1585 }
1586 },
1587 };
1588
1589 ## NOTE: When script is disabled.
1590 $Element->{$HTML_NS}->{noscript} = {
1591 checker => sub {
1592 my ($self, $todo) = @_;
1593
1594 my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}});
1595 my ($sib, $ch) = $HTMLBlockOrInlineChecker->($self, $todo);
1596 push @$sib, $end;
1597 return ($sib, $ch);
1598 },
1599 };
1600
1601 $Element->{$HTML_NS}->{'event-source'} = {
1602 checker => $HTMLEmptyChecker,
1603 };
1604
1605 $Element->{$HTML_NS}->{details} = {
1606 checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend'),
1607 };
1608
1609 $Element->{$HTML_NS}->{datagrid} = {
1610 checker => $HTMLBlockChecker,
1611 };
1612
1613 $Element->{$HTML_NS}->{command} = {
1614 checker => $HTMLEmptyChecker,
1615 };
1616
1617 $Element->{$HTML_NS}->{menu} = {
1618 checker => sub {
1619 my ($self, $todo) = @_;
1620 my $el = $todo->{node};
1621 my $new_todos = [];
1622 my @nodes = (@{$el->child_nodes});
1623
1624 my $content = 'li or inline';
1625 while (@nodes) {
1626 my $node = shift @nodes;
1627 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1628
1629 my $nt = $node->node_type;
1630 if ($nt == 1) {
1631 my $node_ns = $node->namespace_uri;
1632 $node_ns = '' unless defined $node_ns;
1633 my $node_ln = $node->manakai_local_name;
1634 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
1635 $self->{onerror}->(node => $node, type => 'element not allowed');
1636 }
1637 if ($node->manakai_element_type_match ($HTML_NS, 'li')) {
1638 if ($content eq 'inline') {
1639 $self->{onerror}->(node => $node, type => 'element not allowed');
1640 } elsif ($content eq 'li or inline') {
1641 $content = 'li';
1642 }
1643 } else {
1644 CHK: {
1645 for (@{$HTMLStrictlyInlineLevelElements},
1646 @{$HTMLStructuredInlineLevelElements}) {
1647 if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
1648 $content = 'inline';
1649 last CHK;
1650 }
1651 }
1652 $self->{onerror}->(node => $node, type => 'element not allowed');
1653 } # CHK
1654 }
1655 my ($sib, $ch) = $self->_check_get_children ($node);
1656 unshift @nodes, @$sib;
1657 push @$new_todos, @$ch;
1658 } elsif ($nt == 3 or $nt == 4) {
1659 if ($node->data =~ /[^\x09-\x0D\x20]/) {
1660 if ($content eq 'li') {
1661 $self->{onerror}->(node => $node, type => 'character not allowed');
1662 } elsif ($content eq 'li or inline') {
1663 $content = 'inline';
1664 }
1665 }
1666 } elsif ($nt == 5) {
1667 unshift @nodes, @{$node->child_nodes};
1668 }
1669 }
1670
1671 for (@$new_todos) {
1672 $_->{inline} = 1;
1673 }
1674 return ($new_todos);
1675 },
1676 };
1677
1678 ## TODO: legend
1679
1680 $Element->{$HTML_NS}->{div} = {
1681 checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
1682 };
1683
1684 $Element->{$HTML_NS}->{font} = {
1685 checker => $HTMLTransparentChecker,
1686 };
1687
1688 my $Attr = {
1689
1690 };
1691
1692 sub new ($) {
1693 return bless {}, shift;
1694 } # new
1695
1696 sub check_element ($$$) {
1697 my ($self, $el, $onerror) = @_;
1698
1699 $self->{minuses} = {};
1700 $self->{onerror} = $onerror;
1701
1702 my @todo = ({type => 'element', node => $el});
1703 while (@todo) {
1704 my $todo = shift @todo;
1705 if ($todo->{type} eq 'element') {
1706 my $nsuri = $todo->{node}->namespace_uri;
1707 $nsuri = '' unless defined $nsuri;
1708 my $ln = $todo->{node}->manakai_local_name;
1709 my $eldef = $Element->{$nsuri}->{$ln} ||
1710 $Element->{$nsuri}->{''} ||
1711 $ElementDefault;
1712 my ($new_todos) = $eldef->{checker}->($self, $todo);
1713 push @todo, @$new_todos;
1714 } elsif ($todo->{type} eq 'plus') {
1715 $self->_remove_minuses ($todo);
1716 }
1717 }
1718 } # check_element
1719
1720 sub _add_minuses ($@) {
1721 my $self = shift;
1722 my $r = {};
1723 for my $list (@_) {
1724 for my $ns (keys %$list) {
1725 for my $ln (keys %{$list->{$ns}}) {
1726 unless ($self->{minuses}->{$ns}->{$ln}) {
1727 $self->{minuses}->{$ns}->{$ln} = 1;
1728 $r->{$ns}->{$ln} = 1;
1729 }
1730 }
1731 }
1732 }
1733 return {type => 'plus', list => $r};
1734 } # _add_minuses
1735
1736 sub _remove_minuses ($$) {
1737 my ($self, $todo) = @_;
1738 for my $ns (keys %{$todo->{list}}) {
1739 for my $ln (keys %{$todo->{list}->{$ns}}) {
1740 delete $self->{minuses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
1741 }
1742 }
1743 1;
1744 } # _remove_minuses
1745
1746 sub _check_get_children ($$) {
1747 my ($self, $node) = @_;
1748 my $new_todos = [];
1749 my $sib = [];
1750 TP: {
1751 my $node_ns = $node->namespace_uri;
1752 $node_ns = '' unless defined $node_ns;
1753 my $node_ln = $node->manakai_local_name;
1754 if ($node_ns eq $HTML_NS) {
1755 if ($node_ln eq 'noscript') {
1756 my $end = $self->_add_minuses ({$HTML_NS, {noscript => 1}});
1757 push @$sib, $end;
1758 }
1759 }
1760 for (@{$HTMLTransparentElements}) {
1761 if ($node->manakai_element_type_match ($_->[0], $_->[1])) {
1762 unshift @$sib, @{$node->child_nodes};
1763 last TP;
1764 }
1765 }
1766 if ($node->manakai_element_type_match ($HTML_NS, 'video') or
1767 $node->manakai_element_type_match ($HTML_NS, 'audio')) {
1768 if ($node->has_attribute_ns (undef, 'src')) {
1769 unshift @$sib, @{$node->child_nodes};
1770 last TP;
1771 } else {
1772 my @cn = @{$node->child_nodes};
1773 CN: while (@cn) {
1774 my $cn = shift @cn;
1775 my $cnt = $cn->node_type;
1776 if ($cnt == 1) {
1777 if ($cn->manakai_element_type_match ($HTML_NS, 'source')) {
1778 #
1779 } else {
1780 last CN;
1781 }
1782 } elsif ($cnt == 3 or $cnt == 4) {
1783 if ($cn->data =~ /[^\x09-\x0D\x20]/) {
1784 last CN;
1785 }
1786 }
1787 } # CN
1788 unshift @$sib, @cn;
1789 }
1790 }
1791 push @$new_todos, {type => 'element', node => $node};
1792 } # TP
1793 return ($sib, $new_todos);
1794 } # _check_get_children
1795
1796 1;
1797 # $Date: 2007/05/13 05:35:22 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24