/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.7 - (show annotations) (download)
Sun May 13 10:17:35 2007 UTC (17 years, 5 months ago) by wakaba
Branch: MAIN
Changes since 1.6: +101 -282 lines
++ whatpm/Whatpm/ChangeLog	13 May 2007 10:17:32 -0000
	* ContentChecker.pm: Use hashs rather than lists for
	element type testings.

2007-05-13  Wakaba  <wakaba@suika.fam.cx>

1 package Whatpm::ContentChecker;
2 use strict;
3
4 ## ANY
5 my $AnyChecker = sub {
6 my ($self, $todo) = @_;
7 my $el = $todo->{node};
8 my $new_todos = [];
9 my @nodes = (@{$el->child_nodes});
10 while (@nodes) {
11 my $node = shift @nodes;
12 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
13
14 my $nt = $node->node_type;
15 if ($nt == 1) {
16 my $node_ns = $node->namespace_uri;
17 $node_ns = '' unless defined $node_ns;
18 my $node_ln = $node->manakai_local_name;
19 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
20 $self->{onerror}->(node => $node, type => 'element not allowed');
21 }
22 push @$new_todos, {type => 'element', node => $node};
23 } elsif ($nt == 5) {
24 unshift @nodes, @{$node->child_nodes};
25 }
26 }
27 return ($new_todos);
28 }; # $AnyChecker
29
30 my $ElementDefault = {
31 checker => sub {
32 my ($self, $todo) = @_;
33 $self->{onerror}->(node => $todo->{node}, type => 'element not supported');
34 return $AnyChecker->($self, $todo);
35 },
36 };
37
38 my $Element = {};
39
40 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
41
42 my $HTMLMetadataElements = {
43 $HTML_NS => {
44 qw/link 1 meta 1 style 1 script 1 event-source 1 command 1 base 1 title 1/,
45 },
46 };
47
48 my $HTMLSectioningElements = {
49 $HTML_NS => {qw/body 1 section 1 nav 1 article 1 blockquote 1 aside 1/},
50 };
51
52 my $HTMLBlockLevelElements = {
53 $HTML_NS => {
54 qw/
55 section 1 nav 1 article 1 blockquote 1 aside 1
56 h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1
57 address 1 p 1 hr 1 dialog 1 pre 1 ol 1 ul 1 dl 1
58 ins 1 del 1 figure 1 map 1 table 1 script 1 noscript 1
59 event-source 1 details 1 datagrid 1 menu 1 div 1 font 1
60 /,
61 },
62 };
63
64 my $HTMLStrictlyInlineLevelElements = {
65 $HTML_NS => {
66 qw/
67 br 1 a 1 q 1 cite 1 em 1 strong 1 small 1 m 1 dfn 1 abbr 1
68 time 1 meter 1 progress 1 code 1 var 1 samp 1 kbd 1
69 sub 1 sup 1 span 1 i 1 b 1 bdo 1 ins 1 del 1 img 1
70 iframe 1 embed 1 object 1 video 1 audio 1 canvas 1 area 1
71 script 1 noscript 1 event-source 1 command 1 font 1
72 /,
73 },
74 };
75
76 my $HTMLStructuredInlineLevelElements = {
77 $HTML_NS => {qw/blockquote 1 pre 1 ol 1 ul 1 dl 1 table 1 menu 1/},
78 };
79
80 my $HTMLInteractiveElements = {
81 $HTML_NS => {a => 1, details => 1, datagrid => 1},
82 };
83 ## NOTE: |html:a| and |html:datagrid| are not allowed as a descendant
84 ## of interactive elements
85
86 my $HTMLTransparentElements = {
87 $HTML_NS => {qw/ins 1 font 1 noscript 1/},
88 ## NOTE: |html:noscript| is transparent if scripting is disabled.
89 };
90
91 #my $HTMLSemiTransparentElements = {
92 # $HTML_NS => {qw/video 1 audio 1/},
93 #};
94
95 my $HTMLEmbededElements = {
96 $HTML_NS => {qw/img 1 iframe 1 embed 1 object 1 video 1 audio 1 canvas 1/},
97 };
98
99 ## Empty
100 my $HTMLEmptyChecker = sub {
101 my ($self, $todo) = @_;
102 my $el = $todo->{node};
103 my $new_todos = [];
104 my @nodes = (@{$el->child_nodes});
105
106 while (@nodes) {
107 my $node = shift @nodes;
108 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
109
110 my $nt = $node->node_type;
111 if ($nt == 1) {
112 ## NOTE: |minuses| list is not checked since redundant
113 $self->{onerror}->(node => $node, type => 'element not allowed');
114 my ($sib, $ch) = $self->_check_get_children ($node);
115 unshift @nodes, @$sib;
116 push @$new_todos, @$ch;
117 } elsif ($nt == 3 or $nt == 4) {
118 if ($node->data =~ /[^\x09-\x0D\x20]/) {
119 $self->{onerror}->(node => $node, type => 'character not allowed');
120 }
121 } elsif ($nt == 5) {
122 unshift @nodes, @{$node->child_nodes};
123 }
124 }
125 return ($new_todos);
126 };
127
128 ## Text
129 my $HTMLTextChecker = sub {
130 my ($self, $todo) = @_;
131 my $el = $todo->{node};
132 my $new_todos = [];
133 my @nodes = (@{$el->child_nodes});
134
135 while (@nodes) {
136 my $node = shift @nodes;
137 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
138
139 my $nt = $node->node_type;
140 if ($nt == 1) {
141 ## NOTE: |minuses| list is not checked since redundant
142 $self->{onerror}->(node => $node, type => 'element not allowed');
143 my ($sib, $ch) = $self->_check_get_children ($node);
144 unshift @nodes, @$sib;
145 push @$new_todos, @$ch;
146 } elsif ($nt == 5) {
147 unshift @nodes, @{$node->child_nodes};
148 }
149 }
150 return ($new_todos);
151 };
152
153 ## Zero or more |html:style| elements,
154 ## followed by zero or more block-level elements
155 my $HTMLStylableBlockChecker = sub {
156 my ($self, $todo) = @_;
157 my $el = $todo->{node};
158 my $new_todos = [];
159 my @nodes = (@{$el->child_nodes});
160
161 my $has_non_style;
162 while (@nodes) {
163 my $node = shift @nodes;
164 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
165
166 my $nt = $node->node_type;
167 if ($nt == 1) {
168 my $node_ns = $node->namespace_uri;
169 $node_ns = '' unless defined $node_ns;
170 my $node_ln = $node->manakai_local_name;
171 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
172 if ($node->manakai_element_type_match ($HTML_NS, 'style')) {
173 $not_allowed = 1 if $has_non_style;
174 } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
175 $has_non_style = 1;
176 } else {
177 $has_non_style = 1;
178 $not_allowed = 1;
179 }
180 $self->{onerror}->(node => $node, type => 'element not allowed')
181 if $not_allowed;
182 my ($sib, $ch) = $self->_check_get_children ($node);
183 unshift @nodes, @$sib;
184 push @$new_todos, @$ch;
185 } elsif ($nt == 3 or $nt == 4) {
186 if ($node->data =~ /[^\x09-\x0D\x20]/) {
187 $self->{onerror}->(node => $node, type => 'character not allowed');
188 }
189 } elsif ($nt == 5) {
190 unshift @nodes, @{$node->child_nodes};
191 }
192 }
193 return ($new_todos);
194 }; # $HTMLStylableBlockChecker
195
196 ## Zero or more block-level elements
197 my $HTMLBlockChecker = sub {
198 my ($self, $todo) = @_;
199 my $el = $todo->{node};
200 my $new_todos = [];
201 my @nodes = (@{$el->child_nodes});
202
203 while (@nodes) {
204 my $node = shift @nodes;
205 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
206
207 my $nt = $node->node_type;
208 if ($nt == 1) {
209 my $node_ns = $node->namespace_uri;
210 $node_ns = '' unless defined $node_ns;
211 my $node_ln = $node->manakai_local_name;
212 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
213 $not_allowed = 1
214 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
215 $self->{onerror}->(node => $node, type => 'element not allowed')
216 if $not_allowed;
217 my ($sib, $ch) = $self->_check_get_children ($node);
218 unshift @nodes, @$sib;
219 push @$new_todos, @$ch;
220 } elsif ($nt == 3 or $nt == 4) {
221 if ($node->data =~ /[^\x09-\x0D\x20]/) {
222 $self->{onerror}->(node => $node, type => 'character not allowed');
223 }
224 } elsif ($nt == 5) {
225 unshift @nodes, @{$node->child_nodes};
226 }
227 }
228 return ($new_todos);
229 }; # $HTMLBlockChecker
230
231 ## Inline-level content
232 my $HTMLInlineChecker = sub {
233 my ($self, $todo) = @_;
234 my $el = $todo->{node};
235 my $new_todos = [];
236 my @nodes = (@{$el->child_nodes});
237
238 while (@nodes) {
239 my $node = shift @nodes;
240 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
241
242 my $nt = $node->node_type;
243 if ($nt == 1) {
244 my $node_ns = $node->namespace_uri;
245 $node_ns = '' unless defined $node_ns;
246 my $node_ln = $node->manakai_local_name;
247 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
248 $not_allowed = 1
249 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
250 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
251 $self->{onerror}->(node => $node, type => 'element not allowed')
252 if $not_allowed;
253 my ($sib, $ch) = $self->_check_get_children ($node);
254 unshift @nodes, @$sib;
255 push @$new_todos, @$ch;
256 } elsif ($nt == 5) {
257 unshift @nodes, @{$node->child_nodes};
258 }
259 }
260
261 for (@$new_todos) {
262 $_->{inline} = 1;
263 }
264 return ($new_todos);
265 }; # $HTMLInlineChecker
266
267 my $HTMLSignificantInlineChecker = $HTMLInlineChecker;
268 ## TODO: check significant content
269
270 ## Strictly inline-level content
271 my $HTMLStrictlyInlineChecker = sub {
272 my ($self, $todo) = @_;
273 my $el = $todo->{node};
274 my $new_todos = [];
275 my @nodes = (@{$el->child_nodes});
276
277 while (@nodes) {
278 my $node = shift @nodes;
279 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
280
281 my $nt = $node->node_type;
282 if ($nt == 1) {
283 my $node_ns = $node->namespace_uri;
284 $node_ns = '' unless defined $node_ns;
285 my $node_ln = $node->manakai_local_name;
286 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
287 $not_allowed = 1
288 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln};
289 $self->{onerror}->(node => $node, type => 'element not allowed')
290 if $not_allowed;
291 my ($sib, $ch) = $self->_check_get_children ($node);
292 unshift @nodes, @$sib;
293 push @$new_todos, @$ch;
294 } elsif ($nt == 5) {
295 unshift @nodes, @{$node->child_nodes};
296 }
297 }
298
299 for (@$new_todos) {
300 $_->{inline} = 1;
301 $_->{strictly_inline} = 1;
302 }
303 return ($new_todos);
304 }; # $HTMLStrictlyInlineChecker
305
306 my $HTMLSignificantStrictlyInlineChecker = $HTMLStrictlyInlineChecker;
307 ## TODO: check significant content
308
309 ## Inline-level or strictly inline-kevek content
310 my $HTMLInlineOrStrictlyInlineChecker = sub {
311 my ($self, $todo) = @_;
312 my $el = $todo->{node};
313 my $new_todos = [];
314 my @nodes = (@{$el->child_nodes});
315
316 while (@nodes) {
317 my $node = shift @nodes;
318 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
319
320 my $nt = $node->node_type;
321 if ($nt == 1) {
322 my $node_ns = $node->namespace_uri;
323 $node_ns = '' unless defined $node_ns;
324 my $node_ln = $node->manakai_local_name;
325 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
326 if ($todo->{strictly_inline}) {
327 $not_allowed = 1
328 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln};
329 } else {
330 $not_allowed = 1
331 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
332 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
333 }
334 $self->{onerror}->(node => $node, type => 'element not allowed')
335 if $not_allowed;
336 my ($sib, $ch) = $self->_check_get_children ($node);
337 unshift @nodes, @$sib;
338 push @$new_todos, @$ch;
339 } elsif ($nt == 5) {
340 unshift @nodes, @{$node->child_nodes};
341 }
342 }
343
344 for (@$new_todos) {
345 $_->{inline} = 1;
346 $_->{strictly_inline} = 1;
347 }
348 return ($new_todos);
349 }; # $HTMLInlineOrStrictlyInlineChecker
350
351 my $HTMLSignificantInlineOrStrictlyInlineChecker
352 = $HTMLInlineOrStrictlyInlineChecker;
353 ## TODO: check significant content
354
355 my $HTMLBlockOrInlineChecker = sub {
356 my ($self, $todo) = @_;
357 my $el = $todo->{node};
358 my $new_todos = [];
359 my @nodes = (@{$el->child_nodes});
360
361 my $content = 'block-or-inline'; # or 'block' or 'inline'
362 my @block_not_inline;
363 while (@nodes) {
364 my $node = shift @nodes;
365 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
366
367 my $nt = $node->node_type;
368 if ($nt == 1) {
369 my $node_ns = $node->namespace_uri;
370 $node_ns = '' unless defined $node_ns;
371 my $node_ln = $node->manakai_local_name;
372 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
373 if ($content eq 'block') {
374 $not_allowed = 1
375 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
376 } elsif ($content eq 'inline') {
377 $not_allowed = 1
378 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
379 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
380 } else {
381 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
382 my $is_inline
383 = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
384 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
385
386 push @block_not_inline, $node
387 if $is_block and not $is_inline and not $not_allowed;
388 unless ($is_block) {
389 $content = 'inline';
390 for (@block_not_inline) {
391 $self->{onerror}->(node => $_, type => 'element not allowed');
392 }
393 $not_allowed = 1 unless $is_inline;
394 }
395 }
396 $self->{onerror}->(node => $node, type => 'element not allowed')
397 if $not_allowed;
398 my ($sib, $ch) = $self->_check_get_children ($node);
399 unshift @nodes, @$sib;
400 push @$new_todos, @$ch;
401 } elsif ($nt == 3 or $nt == 4) {
402 if ($node->data =~ /[^\x09-\x0D\x20]/) {
403 if ($content eq 'block') {
404 $self->{onerror}->(node => $node, type => 'character not allowed');
405 } else {
406 $content = 'inline';
407 for (@block_not_inline) {
408 $self->{onerror}->(node => $_, type => 'element not allowed');
409 }
410 }
411 }
412 } elsif ($nt == 5) {
413 unshift @nodes, @{$node->child_nodes};
414 }
415 }
416
417 if ($content eq 'inline') {
418 for (@$new_todos) {
419 $_->{inline} = 1;
420 }
421 }
422 return ($new_todos);
423 };
424
425 ## Zero or more XXX element, then either block-level or inline-level
426 my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) {
427 my ($elnsuri, $ellname) = @_;
428 return sub {
429 my ($self, $todo) = @_;
430 my $el = $todo->{node};
431 my $new_todos = [];
432 my @nodes = (@{$el->child_nodes});
433
434 my $has_non_style;
435 my $content = 'block-or-inline'; # or 'block' or 'inline'
436 my @block_not_inline;
437 while (@nodes) {
438 my $node = shift @nodes;
439 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
440
441 my $nt = $node->node_type;
442 if ($nt == 1) {
443 my $node_ns = $node->namespace_uri;
444 $node_ns = '' unless defined $node_ns;
445 my $node_ln = $node->manakai_local_name;
446 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
447 if ($node->manakai_element_type_match ($elnsuri, $ellname)) {
448 $not_allowed = 1 if $has_non_style;
449 } elsif ($content eq 'block') {
450 $has_non_style = 1;
451 $not_allowed = 1
452 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
453 } elsif ($content eq 'inline') {
454 $has_non_style = 1;
455 $not_allowed = 1
456 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
457 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
458 } else {
459 $has_non_style = 1;
460 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
461 my $is_inline
462 = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
463 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
464
465 push @block_not_inline, $node
466 if $is_block and not $is_inline and not $not_allowed;
467 unless ($is_block) {
468 $content = 'inline';
469 for (@block_not_inline) {
470 $self->{onerror}->(node => $_, type => 'element not allowed');
471 }
472 $not_allowed = 1 unless $is_inline;
473 }
474 }
475 $self->{onerror}->(node => $node, type => 'element not allowed')
476 if $not_allowed;
477 my ($sib, $ch) = $self->_check_get_children ($node);
478 unshift @nodes, @$sib;
479 push @$new_todos, @$ch;
480 } elsif ($nt == 3 or $nt == 4) {
481 if ($node->data =~ /[^\x09-\x0D\x20]/) {
482 $has_non_style = 1;
483 if ($content eq 'block') {
484 $self->{onerror}->(node => $node, type => 'character not allowed');
485 } else {
486 $content = 'inline';
487 for (@block_not_inline) {
488 $self->{onerror}->(node => $_, type => 'element not allowed');
489 }
490 }
491 }
492 } elsif ($nt == 5) {
493 unshift @nodes, @{$node->child_nodes};
494 }
495 }
496
497 if ($content eq 'inline') {
498 for (@$new_todos) {
499 $_->{inline} = 1;
500 }
501 }
502 return ($new_todos);
503 };
504 }; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker
505
506 my $HTMLTransparentChecker = $HTMLBlockOrInlineChecker;
507
508 $Element->{$HTML_NS}->{html} = {
509 checker => sub {
510 my ($self, $todo) = @_;
511 my $el = $todo->{node};
512 my $new_todos = [];
513 my @nodes = (@{$el->child_nodes});
514
515 my $phase = 'before head';
516 while (@nodes) {
517 my $node = shift @nodes;
518 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
519
520 my $nt = $node->node_type;
521 if ($nt == 1) {
522 my $node_ns = $node->namespace_uri;
523 $node_ns = '' unless defined $node_ns;
524 my $node_ln = $node->manakai_local_name;
525 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
526 if ($phase eq 'before head') {
527 if ($node->manakai_element_type_match ($HTML_NS, 'head')) {
528 $phase = 'after head';
529 } elsif ($node->manakai_element_type_match ($HTML_NS, 'body')) {
530 $self->{onerror}
531 ->(node => $node, type => 'ps element missing:head');
532 $phase = 'after body';
533 } else {
534 $not_allowed = 1;
535 # before head
536 }
537 } elsif ($phase eq 'after head') {
538 if ($node->manakai_element_type_match ($HTML_NS, 'body')) {
539 $phase = 'after body';
540 } else {
541 $not_allowed = 1;
542 # after head
543 }
544 } else { #elsif ($phase eq 'after body') {
545 $not_allowed = 1;
546 # after body
547 }
548 $self->{onerror}->(node => $node, type => 'element not allowed')
549 if $not_allowed;
550 my ($sib, $ch) = $self->_check_get_children ($node);
551 unshift @nodes, @$sib;
552 push @$new_todos, @$ch;
553 } elsif ($nt == 3 or $nt == 4) {
554 if ($node->data =~ /[^\x09-\x0D\x20]/) {
555 $self->{onerror}->(node => $node, type => 'character not allowed');
556 }
557 } elsif ($nt == 5) {
558 unshift @nodes, @{$node->child_nodes};
559 }
560 }
561
562 if ($phase eq 'before head') {
563 $self->{onerror}->(node => $el, type => 'child element missing:head');
564 $self->{onerror}->(node => $el, type => 'child element missing:body');
565 } elsif ($phase eq 'after head') {
566 $self->{onerror}->(node => $el, type => 'child element missing:body');
567 }
568
569 return ($new_todos);
570 },
571 };
572
573 $Element->{$HTML_NS}->{head} = {
574 checker => sub {
575 my ($self, $todo) = @_;
576 my $el = $todo->{node};
577 my $new_todos = [];
578 my @nodes = (@{$el->child_nodes});
579
580 my $has_title;
581 my $phase = 'initial'; # 'after charset', 'after base'
582 while (@nodes) {
583 my $node = shift @nodes;
584 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
585
586 my $nt = $node->node_type;
587 if ($nt == 1) {
588 my $node_ns = $node->namespace_uri;
589 $node_ns = '' unless defined $node_ns;
590 my $node_ln = $node->manakai_local_name;
591 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
592 if ($node->manakai_element_type_match ($HTML_NS, 'title')) {
593 $phase = 'after base';
594 unless ($has_title) {
595 $has_title = 1;
596 } else {
597 $not_allowed = 1;
598 }
599 } elsif ($node->manakai_element_type_match ($HTML_NS, 'meta')) {
600 if ($node->has_attribute_ns (undef, 'charset')) {
601 if ($phase eq 'initial') {
602 $phase = 'after charset';
603 } else {
604 $not_allowed = 1;
605 ## NOTE: See also |base|'s "contexts" field in the spec
606 }
607 } else {
608 $phase = 'after base';
609 }
610 } elsif ($node->manakai_element_type_match ($HTML_NS, 'base')) {
611 if ($phase eq 'initial' or $phase eq 'after charset') {
612 $phase = 'after base';
613 } else {
614 $not_allowed = 1;
615 }
616 } elsif ($HTMLMetadataElements->{$node_ns}->{$node_ln}) {
617 $phase = 'after base';
618 } else {
619 $not_allowed = 1;
620 }
621 $self->{onerror}->(node => $node, type => 'element not allowed')
622 if $not_allowed;
623 my ($sib, $ch) = $self->_check_get_children ($node);
624 unshift @nodes, @$sib;
625 push @$new_todos, @$ch;
626 } elsif ($nt == 3 or $nt == 4) {
627 if ($node->data =~ /[^\x09-\x0D\x20]/) {
628 $self->{onerror}->(node => $node, type => 'character not allowed');
629 }
630 } elsif ($nt == 5) {
631 unshift @nodes, @{$node->child_nodes};
632 }
633 }
634 unless ($has_title) {
635 $self->{onerror}->(node => $el, type => 'child element missing:title');
636 }
637 return ($new_todos);
638 },
639 };
640
641 $Element->{$HTML_NS}->{title} = {
642 checker => $HTMLTextChecker,
643 };
644
645 $Element->{$HTML_NS}->{base} = {
646 checker => $HTMLEmptyChecker,
647 };
648
649 $Element->{$HTML_NS}->{link} = {
650 checker => $HTMLEmptyChecker,
651 };
652
653 $Element->{$HTML_NS}->{meta} = {
654 checker => $HTMLEmptyChecker,
655 };
656
657 ## NOTE: |html:style| has no conformance creteria on content model
658 $Element->{$HTML_NS}->{style} = {
659 checker => $AnyChecker,
660 };
661
662 $Element->{$HTML_NS}->{body} = {
663 checker => $HTMLBlockChecker,
664 };
665
666 $Element->{$HTML_NS}->{section} = {
667 checker => $HTMLStylableBlockChecker,
668 };
669
670 $Element->{$HTML_NS}->{nav} = {
671 checker => $HTMLBlockOrInlineChecker,
672 };
673
674 $Element->{$HTML_NS}->{article} = {
675 checker => $HTMLStylableBlockChecker,
676 };
677
678 $Element->{$HTML_NS}->{blockquote} = {
679 checker => $HTMLBlockChecker,
680 };
681
682 $Element->{$HTML_NS}->{aside} = {
683 checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
684 };
685
686 $Element->{$HTML_NS}->{h1} = {
687 checker => $HTMLSignificantStrictlyInlineChecker,
688 };
689
690 $Element->{$HTML_NS}->{h2} = {
691 checker => $HTMLSignificantStrictlyInlineChecker,
692 };
693
694 $Element->{$HTML_NS}->{h3} = {
695 checker => $HTMLSignificantStrictlyInlineChecker,
696 };
697
698 $Element->{$HTML_NS}->{h4} = {
699 checker => $HTMLSignificantStrictlyInlineChecker,
700 };
701
702 $Element->{$HTML_NS}->{h5} = {
703 checker => $HTMLSignificantStrictlyInlineChecker,
704 };
705
706 $Element->{$HTML_NS}->{h6} = {
707 checker => $HTMLSignificantStrictlyInlineChecker,
708 };
709
710 ## TODO: header
711
712 $Element->{$HTML_NS}->{footer} = {
713 checker => sub { ## block -hn -header -footer -sectioning or inline
714 my ($self, $todo) = @_;
715 my $el = $todo->{node};
716 my $new_todos = [];
717 my @nodes = (@{$el->child_nodes});
718
719 my $content = 'block-or-inline'; # or 'block' or 'inline'
720 my @block_not_inline;
721 while (@nodes) {
722 my $node = shift @nodes;
723 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
724
725 my $nt = $node->node_type;
726 if ($nt == 1) {
727 my $node_ns = $node->namespace_uri;
728 $node_ns = '' unless defined $node_ns;
729 my $node_ln = $node->manakai_local_name;
730 my $not_allowed;
731 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
732 $not_allowed = 1;
733 } elsif ($node_ns eq $HTML_NS and
734 {
735 qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1/
736 }->{$node_ln}) {
737 $not_allowed = 1;
738 } elsif ($HTMLSectioningElements->{$node_ns}->{$node_ln}) {
739 $not_allowed = 1;
740 }
741 if ($content eq 'block') {
742 $not_allowed = 1
743 unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
744 } elsif ($content eq 'inline') {
745 $not_allowed = 1
746 unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
747 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
748 } else {
749 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
750 my $is_inline
751 = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
752 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
753
754 push @block_not_inline, $node
755 if $is_block and not $is_inline and not $not_allowed;
756 unless ($is_block) {
757 $content = 'inline';
758 for (@block_not_inline) {
759 $self->{onerror}->(node => $_, type => 'element not allowed');
760 }
761 $not_allowed = 1 unless $is_inline;
762 }
763 }
764 $self->{onerror}->(node => $node, type => 'element not allowed')
765 if $not_allowed;
766 my ($sib, $ch) = $self->_check_get_children ($node);
767 unshift @nodes, @$sib;
768 push @$new_todos, @$ch;
769 } elsif ($nt == 3 or $nt == 4) {
770 if ($node->data =~ /[^\x09-\x0D\x20]/) {
771 if ($content eq 'block') {
772 $self->{onerror}->(node => $node, type => 'character not allowed');
773 } else {
774 $content = 'inline';
775 for (@block_not_inline) {
776 $self->{onerror}->(node => $_, type => 'element not allowed');
777 }
778 }
779 }
780 } elsif ($nt == 5) {
781 unshift @nodes, @{$node->child_nodes};
782 }
783 }
784
785 my $end = $self->_add_minuses
786 ({$HTML_NS => {qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1/}},
787 $HTMLSectioningElements);
788 push @$new_todos, $end;
789
790 if ($content eq 'inline') {
791 for (@$new_todos) {
792 $_->{inline} = 1;
793 }
794 }
795
796 return ($new_todos);
797 },
798 };
799
800 $Element->{$HTML_NS}->{address} = {
801 checker => $HTMLInlineChecker,
802 };
803
804 $Element->{$HTML_NS}->{p} = {
805 checker => $HTMLSignificantInlineChecker,
806 };
807
808 $Element->{$HTML_NS}->{hr} = {
809 checker => $HTMLEmptyChecker,
810 };
811
812 $Element->{$HTML_NS}->{br} = {
813 checker => $HTMLEmptyChecker,
814 };
815
816 $Element->{$HTML_NS}->{dialog} = {
817 checker => sub {
818 my ($self, $todo) = @_;
819 my $el = $todo->{node};
820 my $new_todos = [];
821 my @nodes = (@{$el->child_nodes});
822
823 my $phase = 'before dt';
824 while (@nodes) {
825 my $node = shift @nodes;
826 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
827
828 my $nt = $node->node_type;
829 if ($nt == 1) {
830 ## NOTE: |minuses| list is not checked since redundant
831 if ($phase eq 'before dt') {
832 if ($node->manakai_element_type_match ($HTML_NS, 'dt')) {
833 $phase = 'before dd';
834 } elsif ($node->manakai_element_type_match ($HTML_NS, 'dd')) {
835 $self->{onerror}
836 ->(node => $node, type => 'ps element missing:dt');
837 $phase = 'before dt';
838 } else {
839 $self->{onerror}->(node => $node, type => 'element not allowed');
840 }
841 } else { # before dd
842 if ($node->manakai_element_type_match ($HTML_NS, 'dd')) {
843 $phase = 'before dt';
844 } elsif ($node->manakai_element_type_match ($HTML_NS, 'dt')) {
845 $self->{onerror}
846 ->(node => $node, type => 'ps element missing:dd');
847 $phase = 'before dd';
848 } else {
849 $self->{onerror}->(node => $node, type => 'element not allowed');
850 }
851 }
852 my ($sib, $ch) = $self->_check_get_children ($node);
853 unshift @nodes, @$sib;
854 push @$new_todos, @$ch;
855 } elsif ($nt == 3 or $nt == 4) {
856 if ($node->data =~ /[^\x09-\x0D\x20]/) {
857 $self->{onerror}->(node => $node, type => 'character not allowed');
858 }
859 } elsif ($nt == 5) {
860 unshift @nodes, @{$node->child_nodes};
861 }
862 }
863 if ($phase eq 'before dd') {
864 $self->{onerror}->(node => $el, type => 'ps element missing:dd');
865 }
866 return ($new_todos);
867 },
868 };
869
870 $Element->{$HTML_NS}->{pre} = {
871 checker => $HTMLStrictlyInlineChecker,
872 };
873
874 $Element->{$HTML_NS}->{ol} = {
875 checker => sub {
876 my ($self, $todo) = @_;
877 my $el = $todo->{node};
878 my $new_todos = [];
879 my @nodes = (@{$el->child_nodes});
880
881 while (@nodes) {
882 my $node = shift @nodes;
883 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
884
885 my $nt = $node->node_type;
886 if ($nt == 1) {
887 ## NOTE: |minuses| list is not checked since redundant
888 unless ($node->manakai_element_type_match ($HTML_NS, 'li')) {
889 $self->{onerror}->(node => $node, type => 'element not allowed');
890 }
891 my ($sib, $ch) = $self->_check_get_children ($node);
892 unshift @nodes, @$sib;
893 push @$new_todos, @$ch;
894 } elsif ($nt == 3 or $nt == 4) {
895 if ($node->data =~ /[^\x09-\x0D\x20]/) {
896 $self->{onerror}->(node => $node, type => 'character not allowed');
897 }
898 } elsif ($nt == 5) {
899 unshift @nodes, @{$node->child_nodes};
900 }
901 }
902
903 if ($todo->{inline}) {
904 for (@$new_todos) {
905 $_->{inline} = 1;
906 }
907 }
908 return ($new_todos);
909 },
910 };
911
912 $Element->{$HTML_NS}->{ul} = {
913 checker => $Element->{$HTML_NS}->{ol}->{checker},
914 };
915
916
917 $Element->{$HTML_NS}->{li} = {
918 checker => sub {
919 my ($self, $todo) = @_;
920 if ($todo->{inline}) {
921 return $HTMLInlineChecker->($self, $todo);
922 } else {
923 return $HTMLBlockOrInlineChecker->($self, $todo);
924 }
925 },
926 };
927
928 $Element->{$HTML_NS}->{dl} = {
929 checker => sub {
930 my ($self, $todo) = @_;
931 my $el = $todo->{node};
932 my $new_todos = [];
933 my @nodes = (@{$el->child_nodes});
934
935 my $phase = 'before dt';
936 while (@nodes) {
937 my $node = shift @nodes;
938 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
939
940 my $nt = $node->node_type;
941 if ($nt == 1) {
942 ## NOTE: |minuses| list is not checked since redundant
943 if ($phase eq 'in dds') {
944 if ($node->manakai_element_type_match ($HTML_NS, 'dd')) {
945 #$phase = 'in dds';
946 } elsif ($node->manakai_element_type_match ($HTML_NS, 'dt')) {
947 $phase = 'in dts';
948 } else {
949 $self->{onerror}->(node => $node, type => 'element not allowed');
950 }
951 } elsif ($phase eq 'in dts') {
952 if ($node->manakai_element_type_match ($HTML_NS, 'dt')) {
953 #$phase = 'in dts';
954 } elsif ($node->manakai_element_type_match ($HTML_NS, 'dd')) {
955 $phase = 'in dds';
956 } else {
957 $self->{onerror}->(node => $node, type => 'element not allowed');
958 }
959 } else { # before dt
960 if ($node->manakai_element_type_match ($HTML_NS, 'dt')) {
961 $phase = 'in dts';
962 } elsif ($node->manakai_element_type_match ($HTML_NS, 'dd')) {
963 $self->{onerror}
964 ->(node => $node, type => 'ps element missing:dt');
965 $phase = 'in dds';
966 } else {
967 $self->{onerror}->(node => $node, type => 'element not allowed');
968 }
969 }
970 my ($sib, $ch) = $self->_check_get_children ($node);
971 unshift @nodes, @$sib;
972 push @$new_todos, @$ch;
973 } elsif ($nt == 3 or $nt == 4) {
974 if ($node->data =~ /[^\x09-\x0D\x20]/) {
975 $self->{onerror}->(node => $node, type => 'character not allowed');
976 }
977 } elsif ($nt == 5) {
978 unshift @nodes, @{$node->child_nodes};
979 }
980 }
981 if ($phase eq 'in dts') {
982 $self->{onerror}->(node => $el, type => 'ps element missing:dd');
983 }
984
985 if ($todo->{inline}) {
986 for (@$new_todos) {
987 $_->{inline} = 1;
988 }
989 }
990 return ($new_todos);
991 },
992 };
993
994 $Element->{$HTML_NS}->{dt} = {
995 checker => $HTMLStrictlyInlineChecker,
996 };
997
998 $Element->{$HTML_NS}->{dd} = {
999 checker => $Element->{$HTML_NS}->{li}->{checker},
1000 };
1001
1002 $Element->{$HTML_NS}->{a} = {
1003 checker => sub {
1004 my ($self, $todo) = @_;
1005
1006 my $end = $self->_add_minuses ($HTMLInteractiveElements);
1007 my ($sib, $ch)
1008 = $HTMLSignificantInlineOrStrictlyInlineChecker->($self, $todo);
1009 push @$sib, $end;
1010 return ($sib, $ch);
1011 },
1012 };
1013
1014 $Element->{$HTML_NS}->{q} = {
1015 checker => $HTMLInlineOrStrictlyInlineChecker,
1016 };
1017
1018 $Element->{$HTML_NS}->{cite} = {
1019 checker => $HTMLStrictlyInlineChecker,
1020 };
1021
1022 $Element->{$HTML_NS}->{em} = {
1023 checker => $HTMLInlineOrStrictlyInlineChecker,
1024 };
1025
1026 $Element->{$HTML_NS}->{strong} = {
1027 checker => $HTMLInlineOrStrictlyInlineChecker,
1028 };
1029
1030 $Element->{$HTML_NS}->{small} = {
1031 checker => $HTMLInlineOrStrictlyInlineChecker,
1032 };
1033
1034 $Element->{$HTML_NS}->{m} = {
1035 checker => $HTMLInlineOrStrictlyInlineChecker,
1036 };
1037
1038 $Element->{$HTML_NS}->{dfn} = {
1039 checker => sub {
1040 my ($self, $todo) = @_;
1041
1042 my $end = $self->_add_minuses ({$HTML_NS => {dfn => 1}});
1043 my ($sib, $ch) = $HTMLStrictlyInlineChecker->($self, $todo);
1044 push @$sib, $end;
1045 return ($sib, $ch);
1046 },
1047 };
1048
1049 $Element->{$HTML_NS}->{abbr} = {
1050 checker => $HTMLStrictlyInlineChecker,
1051 };
1052
1053 $Element->{$HTML_NS}->{time} = {
1054 checker => $HTMLStrictlyInlineChecker,
1055 };
1056
1057 $Element->{$HTML_NS}->{meter} = {
1058 checker => $HTMLStrictlyInlineChecker,
1059 };
1060
1061 $Element->{$HTML_NS}->{progress} = {
1062 checker => $HTMLStrictlyInlineChecker,
1063 };
1064
1065 $Element->{$HTML_NS}->{code} = {
1066 checker => $HTMLInlineOrStrictlyInlineChecker,
1067 };
1068
1069 $Element->{$HTML_NS}->{var} = {
1070 checker => $HTMLStrictlyInlineChecker,
1071 };
1072
1073 $Element->{$HTML_NS}->{samp} = {
1074 checker => $HTMLInlineOrStrictlyInlineChecker,
1075 };
1076
1077 $Element->{$HTML_NS}->{kbd} = {
1078 checker => $HTMLStrictlyInlineChecker,
1079 };
1080
1081 $Element->{$HTML_NS}->{sub} = {
1082 checker => $HTMLStrictlyInlineChecker,
1083 };
1084
1085 $Element->{$HTML_NS}->{sup} = {
1086 checker => $HTMLStrictlyInlineChecker,
1087 };
1088
1089 $Element->{$HTML_NS}->{span} = {
1090 checker => $HTMLInlineOrStrictlyInlineChecker,
1091 };
1092
1093 $Element->{$HTML_NS}->{i} = {
1094 checker => $HTMLStrictlyInlineChecker,
1095 };
1096
1097 $Element->{$HTML_NS}->{b} = {
1098 checker => $HTMLStrictlyInlineChecker,
1099 };
1100
1101 $Element->{$HTML_NS}->{bdo} = {
1102 checker => $HTMLStrictlyInlineChecker,
1103 };
1104
1105 $Element->{$HTML_NS}->{ins} = {
1106 checker => $HTMLTransparentChecker,
1107 };
1108
1109 $Element->{$HTML_NS}->{del} = {
1110 checker => sub {
1111 my ($self, $todo) = @_;
1112
1113 my $parent = $todo->{node}->manakai_parent_element;
1114 if (defined $parent) {
1115 my $nsuri = $parent->namespace_uri;
1116 $nsuri = '' unless defined $nsuri;
1117 my $ln = $parent->manakai_local_name;
1118 my $eldef = $Element->{$nsuri}->{$ln} ||
1119 $Element->{$nsuri}->{''} ||
1120 $ElementDefault;
1121 return $eldef->{checker}->($self, $todo);
1122 } else {
1123 return $HTMLBlockOrInlineChecker->($self, $todo);
1124 }
1125 },
1126 };
1127
1128 ## TODO: figure
1129
1130 $Element->{$HTML_NS}->{img} = {
1131 checker => $HTMLEmptyChecker,
1132 };
1133
1134 $Element->{$HTML_NS}->{iframe} = {
1135 checker => $HTMLTextChecker,
1136 };
1137
1138 $Element->{$HTML_NS}->{embed} = {
1139 checker => $HTMLEmptyChecker,
1140 };
1141
1142 $Element->{$HTML_NS}->{param} = {
1143 checker => $HTMLEmptyChecker,
1144 };
1145
1146 ## TODO: object
1147
1148 $Element->{$HTML_NS}->{video} = {
1149 checker => sub {
1150 my ($self, $todo) = @_;
1151
1152 if ($todo->{node}->has_attribute_ns (undef, 'src')) {
1153 return $HTMLBlockOrInlineChecker->($self, $todo);
1154 } else {
1155 return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
1156 ->($self, $todo);
1157 }
1158 },
1159 };
1160
1161 $Element->{$HTML_NS}->{audio} = {
1162 checker => $Element->{$HTML_NS}->{audio}->{checker},
1163 };
1164
1165 $Element->{$HTML_NS}->{source} = {
1166 checker => $HTMLEmptyChecker,
1167 };
1168
1169 $Element->{$HTML_NS}->{canvas} = {
1170 checker => $HTMLInlineChecker,
1171 };
1172
1173 $Element->{$HTML_NS}->{map} = {
1174 checker => $HTMLBlockChecker,
1175 };
1176
1177 $Element->{$HTML_NS}->{area} = {
1178 checker => $HTMLEmptyChecker,
1179 };
1180 ## TODO: only in map
1181
1182 $Element->{$HTML_NS}->{table} = {
1183 checker => sub {
1184 my ($self, $todo) = @_;
1185 my $el = $todo->{node};
1186 my $new_todos = [];
1187 my @nodes = (@{$el->child_nodes});
1188
1189 my $phase = 'before caption';
1190 my $has_tfoot;
1191 while (@nodes) {
1192 my $node = shift @nodes;
1193 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1194
1195 my $nt = $node->node_type;
1196 if ($nt == 1) {
1197 ## NOTE: |minuses| list is not checked since redundant
1198 if ($phase eq 'in tbodys') {
1199 if ($node->manakai_element_type_match ($HTML_NS, 'tbody')) {
1200 #$phase = 'in tbodys';
1201 } elsif (not $has_tfoot and
1202 $node->manakai_element_type_match ($HTML_NS, 'tfoot')) {
1203 $phase = 'after tfoot';
1204 $has_tfoot = 1;
1205 } else {
1206 $self->{onerror}->(node => $node, type => 'element not allowed');
1207 }
1208 } elsif ($phase eq 'in trs') {
1209 if ($node->manakai_element_type_match ($HTML_NS, 'tr')) {
1210 #$phase = 'in trs';
1211 } elsif (not $has_tfoot and
1212 $node->manakai_element_type_match ($HTML_NS, 'tfoot')) {
1213 $phase = 'after tfoot';
1214 $has_tfoot = 1;
1215 } else {
1216 $self->{onerror}->(node => $node, type => 'element not allowed');
1217 }
1218 } elsif ($phase eq 'after thead') {
1219 if ($node->manakai_element_type_match ($HTML_NS, 'tbody')) {
1220 $phase = 'in tbodys';
1221 } elsif ($node->manakai_element_type_match ($HTML_NS, 'tr')) {
1222 $phase = 'in trs';
1223 } elsif ($node->manakai_element_type_match ($HTML_NS, 'tfoot')) {
1224 $phase = 'in tbodys';
1225 $has_tfoot = 1;
1226 } else {
1227 $self->{onerror}->(node => $node, type => 'element not allowed');
1228 }
1229 } elsif ($phase eq 'in colgroup') {
1230 if ($node->manakai_element_type_match ($HTML_NS, 'colgroup')) {
1231 $phase = 'in colgroup';
1232 } elsif ($node->manakai_element_type_match ($HTML_NS, 'thead')) {
1233 $phase = 'after thead';
1234 } elsif ($node->manakai_element_type_match ($HTML_NS, 'tbody')) {
1235 $phase = 'in tbodys';
1236 } elsif ($node->manakai_element_type_match ($HTML_NS, 'tr')) {
1237 $phase = 'in trs';
1238 } elsif ($node->manakai_element_type_match ($HTML_NS, 'tfoot')) {
1239 $phase = 'in tbodys';
1240 $has_tfoot = 1;
1241 } else {
1242 $self->{onerror}->(node => $node, type => 'element not allowed');
1243 }
1244 } elsif ($phase eq 'before caption') {
1245 if ($node->manakai_element_type_match ($HTML_NS, 'caption')) {
1246 $phase = 'in colgroup';
1247 } elsif ($node->manakai_element_type_match ($HTML_NS, 'colgroup')) {
1248 $phase = 'in colgroup';
1249 } elsif ($node->manakai_element_type_match ($HTML_NS, 'thead')) {
1250 $phase = 'after thead';
1251 } elsif ($node->manakai_element_type_match ($HTML_NS, 'tbody')) {
1252 $phase = 'in tbodys';
1253 } elsif ($node->manakai_element_type_match ($HTML_NS, 'tr')) {
1254 $phase = 'in trs';
1255 } elsif ($node->manakai_element_type_match ($HTML_NS, 'tfoot')) {
1256 $phase = 'in tbodys';
1257 $has_tfoot = 1;
1258 } else {
1259 $self->{onerror}->(node => $node, type => 'element not allowed');
1260 }
1261 } else { # after tfoot
1262 $self->{onerror}->(node => $node, type => 'element not allowed');
1263 }
1264 my ($sib, $ch) = $self->_check_get_children ($node);
1265 unshift @nodes, @$sib;
1266 push @$new_todos, @$ch;
1267 } elsif ($nt == 3 or $nt == 4) {
1268 if ($node->data =~ /[^\x09-\x0D\x20]/) {
1269 $self->{onerror}->(node => $node, type => 'character not allowed');
1270 }
1271 } elsif ($nt == 5) {
1272 unshift @nodes, @{$node->child_nodes};
1273 }
1274 }
1275 return ($new_todos);
1276 },
1277 };
1278
1279 $Element->{$HTML_NS}->{caption} = {
1280 checker => $HTMLSignificantStrictlyInlineChecker,
1281 };
1282
1283 $Element->{$HTML_NS}->{colgroup} = {
1284 checker => sub {
1285 my ($self, $todo) = @_;
1286 my $el = $todo->{node};
1287 my $new_todos = [];
1288 my @nodes = (@{$el->child_nodes});
1289
1290 while (@nodes) {
1291 my $node = shift @nodes;
1292 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1293
1294 my $nt = $node->node_type;
1295 if ($nt == 1) {
1296 ## NOTE: |minuses| list is not checked since redundant
1297 unless ($node->manakai_element_type_match ($HTML_NS, 'col')) {
1298 $self->{onerror}->(node => $node, type => 'element not allowed');
1299 }
1300 my ($sib, $ch) = $self->_check_get_children ($node);
1301 unshift @nodes, @$sib;
1302 push @$new_todos, @$ch;
1303 } elsif ($nt == 3 or $nt == 4) {
1304 if ($node->data =~ /[^\x09-\x0D\x20]/) {
1305 $self->{onerror}->(node => $node, type => 'character not allowed');
1306 }
1307 } elsif ($nt == 5) {
1308 unshift @nodes, @{$node->child_nodes};
1309 }
1310 }
1311 return ($new_todos);
1312 },
1313 };
1314
1315 $Element->{$HTML_NS}->{col} = {
1316 checker => $HTMLEmptyChecker,
1317 };
1318
1319 $Element->{$HTML_NS}->{tbody} = {
1320 checker => sub {
1321 my ($self, $todo) = @_;
1322 my $el = $todo->{node};
1323 my $new_todos = [];
1324 my @nodes = (@{$el->child_nodes});
1325
1326 my $has_tr;
1327 while (@nodes) {
1328 my $node = shift @nodes;
1329 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1330
1331 my $nt = $node->node_type;
1332 if ($nt == 1) {
1333 ## NOTE: |minuses| list is not checked since redundant
1334 if ($node->manakai_element_type_match ($HTML_NS, 'tr')) {
1335 $has_tr = 1;
1336 } else {
1337 $self->{onerror}->(node => $node, type => 'element not allowed');
1338 }
1339 my ($sib, $ch) = $self->_check_get_children ($node);
1340 unshift @nodes, @$sib;
1341 push @$new_todos, @$ch;
1342 } elsif ($nt == 3 or $nt == 4) {
1343 if ($node->data =~ /[^\x09-\x0D\x20]/) {
1344 $self->{onerror}->(node => $node, type => 'character not allowed');
1345 }
1346 } elsif ($nt == 5) {
1347 unshift @nodes, @{$node->child_nodes};
1348 }
1349 }
1350 unless ($has_tr) {
1351 $self->{onerror}->(node => $el, type => 'child element missing:tr');
1352 }
1353 return ($new_todos);
1354 },
1355 };
1356
1357 $Element->{$HTML_NS}->{thead} = {
1358 checker => $Element->{$HTML_NS}->{tbody},
1359 };
1360
1361 $Element->{$HTML_NS}->{tfoot} = {
1362 checker => $Element->{$HTML_NS}->{tbody},
1363 };
1364
1365 $Element->{$HTML_NS}->{tr} = {
1366 checker => sub {
1367 my ($self, $todo) = @_;
1368 my $el = $todo->{node};
1369 my $new_todos = [];
1370 my @nodes = (@{$el->child_nodes});
1371
1372 my $has_td;
1373 while (@nodes) {
1374 my $node = shift @nodes;
1375 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1376
1377 my $nt = $node->node_type;
1378 if ($nt == 1) {
1379 ## NOTE: |minuses| list is not checked since redundant
1380 if ($node->manakai_element_type_match ($HTML_NS, 'td') or
1381 $node->manakai_element_type_match ($HTML_NS, 'th')) {
1382 $has_td = 1;
1383 } else {
1384 $self->{onerror}->(node => $node, type => 'element not allowed');
1385 }
1386 my ($sib, $ch) = $self->_check_get_children ($node);
1387 unshift @nodes, @$sib;
1388 push @$new_todos, @$ch;
1389 } elsif ($nt == 3 or $nt == 4) {
1390 if ($node->data =~ /[^\x09-\x0D\x20]/) {
1391 $self->{onerror}->(node => $node, type => 'character not allowed');
1392 }
1393 } elsif ($nt == 5) {
1394 unshift @nodes, @{$node->child_nodes};
1395 }
1396 }
1397 unless ($has_td) {
1398 $self->{onerror}->(node => $el, type => 'child element missing:td|th');
1399 }
1400 return ($new_todos);
1401 },
1402 };
1403
1404 $Element->{$HTML_NS}->{td} = {
1405 checker => $HTMLBlockOrInlineChecker,
1406 };
1407
1408 $Element->{$HTML_NS}->{th} = {
1409 checker => $HTMLBlockOrInlineChecker,
1410 };
1411
1412 ## TODO: forms
1413
1414 $Element->{$HTML_NS}->{script} = {
1415 checker => sub {
1416 my ($self, $todo) = @_;
1417
1418 if ($todo->{node}->has_attribute_ns (undef, 'src')) {
1419 return $HTMLEmptyChecker->($self, $todo);
1420 } else {
1421 ## NOTE: No content model conformance in HTML5 spec.
1422 return $AnyChecker->($self, $todo);
1423 }
1424 },
1425 };
1426
1427 ## NOTE: When script is disabled.
1428 $Element->{$HTML_NS}->{noscript} = {
1429 checker => sub {
1430 my ($self, $todo) = @_;
1431
1432 my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}});
1433 my ($sib, $ch) = $HTMLBlockOrInlineChecker->($self, $todo);
1434 push @$sib, $end;
1435 return ($sib, $ch);
1436 },
1437 };
1438
1439 $Element->{$HTML_NS}->{'event-source'} = {
1440 checker => $HTMLEmptyChecker,
1441 };
1442
1443 $Element->{$HTML_NS}->{details} = {
1444 checker => sub {
1445 my ($self, $todo) = @_;
1446
1447 my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
1448 my ($sib, $ch)
1449 = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
1450 ->($self, $todo);
1451 push @$sib, $end;
1452 return ($sib, $ch);
1453 },
1454 };
1455
1456 $Element->{$HTML_NS}->{datagrid} = {
1457 checker => sub {
1458 my ($self, $todo) = @_;
1459
1460 my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
1461 my ($sib, $ch) = $HTMLBlockChecker->($self, $todo);
1462 push @$sib, $end;
1463 return ($sib, $ch);
1464 },
1465 };
1466
1467 $Element->{$HTML_NS}->{command} = {
1468 checker => $HTMLEmptyChecker,
1469 };
1470
1471 $Element->{$HTML_NS}->{menu} = {
1472 checker => sub {
1473 my ($self, $todo) = @_;
1474 my $el = $todo->{node};
1475 my $new_todos = [];
1476 my @nodes = (@{$el->child_nodes});
1477
1478 my $content = 'li or inline';
1479 while (@nodes) {
1480 my $node = shift @nodes;
1481 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1482
1483 my $nt = $node->node_type;
1484 if ($nt == 1) {
1485 my $node_ns = $node->namespace_uri;
1486 $node_ns = '' unless defined $node_ns;
1487 my $node_ln = $node->manakai_local_name;
1488 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
1489 if ($node->manakai_element_type_match ($HTML_NS, 'li')) {
1490 if ($content eq 'inline') {
1491 $not_allowed = 1;
1492 } elsif ($content eq 'li or inline') {
1493 $content = 'li';
1494 }
1495 } else {
1496 if ($HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
1497 $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln}) {
1498 $content = 'inline';
1499 } else {
1500 $not_allowed = 1;
1501 }
1502 }
1503 $self->{onerror}->(node => $node, type => 'element not allowed')
1504 if $not_allowed;
1505 my ($sib, $ch) = $self->_check_get_children ($node);
1506 unshift @nodes, @$sib;
1507 push @$new_todos, @$ch;
1508 } elsif ($nt == 3 or $nt == 4) {
1509 if ($node->data =~ /[^\x09-\x0D\x20]/) {
1510 if ($content eq 'li') {
1511 $self->{onerror}->(node => $node, type => 'character not allowed');
1512 } elsif ($content eq 'li or inline') {
1513 $content = 'inline';
1514 }
1515 }
1516 } elsif ($nt == 5) {
1517 unshift @nodes, @{$node->child_nodes};
1518 }
1519 }
1520
1521 for (@$new_todos) {
1522 $_->{inline} = 1;
1523 }
1524 return ($new_todos);
1525 },
1526 };
1527
1528 $Element->{$HTML_NS}->{legend} = {
1529 checker => sub {
1530 my ($self, $todo) = @_;
1531
1532 my $parent = $todo->{node}->manakai_parent_element;
1533 if (defined $parent) {
1534 my $nsuri = $parent->namespace_uri;
1535 $nsuri = '' unless defined $nsuri;
1536 my $ln = $parent->manakai_local_name;
1537 if ($nsuri eq $HTML_NS and $ln eq 'figure') {
1538 return $HTMLInlineChecker->($self, $todo);
1539 } else {
1540 return $HTMLSignificantStrictlyInlineChecker->($self, $todo);
1541 }
1542 } else {
1543 return $HTMLInlineChecker->($self, $todo);
1544 }
1545
1546 ## ISSUE: Content model is defined only for fieldset/legend,
1547 ## details/legend, and figure/legend.
1548 },
1549 };
1550
1551 $Element->{$HTML_NS}->{div} = {
1552 checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
1553 };
1554
1555 $Element->{$HTML_NS}->{font} = {
1556 checker => $HTMLTransparentChecker,
1557 };
1558
1559 my $Attr = {
1560
1561 };
1562
1563 sub new ($) {
1564 return bless {}, shift;
1565 } # new
1566
1567 sub check_element ($$$) {
1568 my ($self, $el, $onerror) = @_;
1569
1570 $self->{minuses} = {};
1571 $self->{onerror} = $onerror;
1572
1573 my @todo = ({type => 'element', node => $el});
1574 while (@todo) {
1575 my $todo = shift @todo;
1576 if ($todo->{type} eq 'element') {
1577 my $nsuri = $todo->{node}->namespace_uri;
1578 $nsuri = '' unless defined $nsuri;
1579 my $ln = $todo->{node}->manakai_local_name;
1580 my $eldef = $Element->{$nsuri}->{$ln} ||
1581 $Element->{$nsuri}->{''} ||
1582 $ElementDefault;
1583 my ($new_todos) = $eldef->{checker}->($self, $todo);
1584 push @todo, @$new_todos;
1585 } elsif ($todo->{type} eq 'plus') {
1586 $self->_remove_minuses ($todo);
1587 }
1588 }
1589 } # check_element
1590
1591 sub _add_minuses ($@) {
1592 my $self = shift;
1593 my $r = {};
1594 for my $list (@_) {
1595 for my $ns (keys %$list) {
1596 for my $ln (keys %{$list->{$ns}}) {
1597 unless ($self->{minuses}->{$ns}->{$ln}) {
1598 $self->{minuses}->{$ns}->{$ln} = 1;
1599 $r->{$ns}->{$ln} = 1;
1600 }
1601 }
1602 }
1603 }
1604 return {type => 'plus', list => $r};
1605 } # _add_minuses
1606
1607 sub _remove_minuses ($$) {
1608 my ($self, $todo) = @_;
1609 for my $ns (keys %{$todo->{list}}) {
1610 for my $ln (keys %{$todo->{list}->{$ns}}) {
1611 delete $self->{minuses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
1612 }
1613 }
1614 1;
1615 } # _remove_minuses
1616
1617 sub _check_get_children ($$) {
1618 my ($self, $node) = @_;
1619 my $new_todos = [];
1620 my $sib = [];
1621 TP: {
1622 my $node_ns = $node->namespace_uri;
1623 $node_ns = '' unless defined $node_ns;
1624 my $node_ln = $node->manakai_local_name;
1625 if ($node_ns eq $HTML_NS) {
1626 if ($node_ln eq 'noscript') {
1627 my $end = $self->_add_minuses ({$HTML_NS, {noscript => 1}});
1628 push @$sib, $end;
1629 }
1630 }
1631 if ($HTMLTransparentElements->{$node_ns}->{$node_ln}) {
1632 unshift @$sib, @{$node->child_nodes};
1633 last TP;
1634 }
1635 if ($node->manakai_element_type_match ($HTML_NS, 'video') or
1636 $node->manakai_element_type_match ($HTML_NS, 'audio')) {
1637 if ($node->has_attribute_ns (undef, 'src')) {
1638 unshift @$sib, @{$node->child_nodes};
1639 last TP;
1640 } else {
1641 my @cn = @{$node->child_nodes};
1642 CN: while (@cn) {
1643 my $cn = shift @cn;
1644 my $cnt = $cn->node_type;
1645 if ($cnt == 1) {
1646 if ($cn->manakai_element_type_match ($HTML_NS, 'source')) {
1647 #
1648 } else {
1649 last CN;
1650 }
1651 } elsif ($cnt == 3 or $cnt == 4) {
1652 if ($cn->data =~ /[^\x09-\x0D\x20]/) {
1653 last CN;
1654 }
1655 }
1656 } # CN
1657 unshift @$sib, @cn;
1658 }
1659 }
1660 push @$new_todos, {type => 'element', node => $node};
1661 } # TP
1662 return ($sib, $new_todos);
1663 } # _check_get_children
1664
1665 1;
1666 # $Date: 2007/05/13 09:52:08 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24