/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.10 - (hide annotations) (download)
Sat May 19 06:02:36 2007 UTC (17 years, 5 months ago) by wakaba
Branch: MAIN
Changes since 1.9: +297 -104 lines
++ whatpm/t/ChangeLog	19 May 2007 06:02:30 -0000
	* content-model-2.dat: Tests for global attributes, |html|, |head|,
	|base|, |meta|, and |style|.

	* ContentChecker.t: Support for |#data html| (HTML parsing
	mode).

2007-05-19  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	19 May 2007 06:01:57 -0000
	* ContentChecker.pm: Attribute checkers
	for global attributes, |html|, |base|, |style|, and |meta|.

	* NanoDOM.pm (insert_before): Weaken reference
	to the parent node.
	(Attr::new): Set |owner_element| attribute.
	(namespace_uri, manakai_local_name): New attribute implementations.
	(owner_element): New attribute.

2007-05-19  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3    
4 wakaba 1.9 my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
5     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
6    
7     my $AttrChecker = {
8     $XML_NS => {
9     ## TODO: xml:space, xml:base, xml:lang, xml:id
10 wakaba 1.10 ## TODO: xml:lang MUST NOT in HTML document
11 wakaba 1.9 },
12     $XMLNS_NS => {
13     '' => sub {}, ## TODO: implement
14     xmlns => sub {}, ## TODO: implement
15     },
16     };
17    
18 wakaba 1.3 ## ANY
19     my $AnyChecker = sub {
20 wakaba 1.4 my ($self, $todo) = @_;
21     my $el = $todo->{node};
22     my $new_todos = [];
23 wakaba 1.3 my @nodes = (@{$el->child_nodes});
24     while (@nodes) {
25     my $node = shift @nodes;
26     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
27    
28     my $nt = $node->node_type;
29     if ($nt == 1) {
30     my $node_ns = $node->namespace_uri;
31     $node_ns = '' unless defined $node_ns;
32     my $node_ln = $node->manakai_local_name;
33     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
34     $self->{onerror}->(node => $node, type => 'element not allowed');
35     }
36 wakaba 1.4 push @$new_todos, {type => 'element', node => $node};
37 wakaba 1.3 } elsif ($nt == 5) {
38     unshift @nodes, @{$node->child_nodes};
39     }
40     }
41 wakaba 1.4 return ($new_todos);
42 wakaba 1.3 }; # $AnyChecker
43    
44 wakaba 1.1 my $ElementDefault = {
45     checker => sub {
46 wakaba 1.4 my ($self, $todo) = @_;
47     $self->{onerror}->(node => $todo->{node}, type => 'element not supported');
48     return $AnyChecker->($self, $todo);
49 wakaba 1.1 },
50 wakaba 1.9 attrs_checker => sub {
51     my ($self, $todo) = @_;
52     for my $attr (@{$todo->{node}->attributes}) {
53     my $attr_ns = $attr->namespace_uri;
54     $attr_ns = '' unless defined $attr_ns;
55     my $attr_ln = $attr->manakai_local_name;
56     my $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
57     || $AttrChecker->{$attr_ns}->{''};
58     if ($checker) {
59     $checker->($self, $attr);
60     }
61     ## Don't check otherwise, since "element type not supported" warning
62     ## will be reported by the element checker.
63     }
64     },
65 wakaba 1.1 };
66    
67     my $Element = {};
68    
69     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
70    
71 wakaba 1.7 my $HTMLMetadataElements = {
72     $HTML_NS => {
73     qw/link 1 meta 1 style 1 script 1 event-source 1 command 1 base 1 title 1/,
74     },
75     };
76 wakaba 1.1
77 wakaba 1.2 my $HTMLSectioningElements = {
78     $HTML_NS => {qw/body 1 section 1 nav 1 article 1 blockquote 1 aside 1/},
79     };
80 wakaba 1.1
81 wakaba 1.7 my $HTMLBlockLevelElements = {
82     $HTML_NS => {
83     qw/
84     section 1 nav 1 article 1 blockquote 1 aside 1
85     h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1
86     address 1 p 1 hr 1 dialog 1 pre 1 ol 1 ul 1 dl 1
87     ins 1 del 1 figure 1 map 1 table 1 script 1 noscript 1
88     event-source 1 details 1 datagrid 1 menu 1 div 1 font 1
89     /,
90     },
91     };
92    
93     my $HTMLStrictlyInlineLevelElements = {
94     $HTML_NS => {
95     qw/
96     br 1 a 1 q 1 cite 1 em 1 strong 1 small 1 m 1 dfn 1 abbr 1
97     time 1 meter 1 progress 1 code 1 var 1 samp 1 kbd 1
98     sub 1 sup 1 span 1 i 1 b 1 bdo 1 ins 1 del 1 img 1
99     iframe 1 embed 1 object 1 video 1 audio 1 canvas 1 area 1
100     script 1 noscript 1 event-source 1 command 1 font 1
101     /,
102     },
103     };
104    
105     my $HTMLStructuredInlineLevelElements = {
106     $HTML_NS => {qw/blockquote 1 pre 1 ol 1 ul 1 dl 1 table 1 menu 1/},
107     };
108 wakaba 1.1
109 wakaba 1.6 my $HTMLInteractiveElements = {
110     $HTML_NS => {a => 1, details => 1, datagrid => 1},
111     };
112     ## NOTE: |html:a| and |html:datagrid| are not allowed as a descendant
113     ## of interactive elements
114 wakaba 1.1
115 wakaba 1.7 my $HTMLTransparentElements = {
116     $HTML_NS => {qw/ins 1 font 1 noscript 1/},
117     ## NOTE: |html:noscript| is transparent if scripting is disabled.
118     };
119    
120     #my $HTMLSemiTransparentElements = {
121     # $HTML_NS => {qw/video 1 audio 1/},
122     #};
123    
124     my $HTMLEmbededElements = {
125     $HTML_NS => {qw/img 1 iframe 1 embed 1 object 1 video 1 audio 1 canvas 1/},
126     };
127 wakaba 1.1
128     ## Empty
129     my $HTMLEmptyChecker = sub {
130 wakaba 1.4 my ($self, $todo) = @_;
131     my $el = $todo->{node};
132     my $new_todos = [];
133 wakaba 1.1 my @nodes = (@{$el->child_nodes});
134    
135     while (@nodes) {
136     my $node = shift @nodes;
137 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
138    
139 wakaba 1.1 my $nt = $node->node_type;
140     if ($nt == 1) {
141 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
142     $self->{onerror}->(node => $node, type => 'element not allowed');
143     my ($sib, $ch) = $self->_check_get_children ($node);
144     unshift @nodes, @$sib;
145 wakaba 1.4 push @$new_todos, @$ch;
146 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
147 wakaba 1.3 if ($node->data =~ /[^\x09-\x0D\x20]/) {
148     $self->{onerror}->(node => $node, type => 'character not allowed');
149     }
150 wakaba 1.1 } elsif ($nt == 5) {
151     unshift @nodes, @{$node->child_nodes};
152     }
153     }
154 wakaba 1.4 return ($new_todos);
155 wakaba 1.1 };
156    
157     ## Text
158     my $HTMLTextChecker = sub {
159 wakaba 1.4 my ($self, $todo) = @_;
160     my $el = $todo->{node};
161     my $new_todos = [];
162 wakaba 1.1 my @nodes = (@{$el->child_nodes});
163    
164     while (@nodes) {
165     my $node = shift @nodes;
166 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
167    
168 wakaba 1.1 my $nt = $node->node_type;
169     if ($nt == 1) {
170 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
171     $self->{onerror}->(node => $node, type => 'element not allowed');
172     my ($sib, $ch) = $self->_check_get_children ($node);
173     unshift @nodes, @$sib;
174 wakaba 1.4 push @$new_todos, @$ch;
175 wakaba 1.1 } elsif ($nt == 5) {
176     unshift @nodes, @{$node->child_nodes};
177     }
178     }
179 wakaba 1.4 return ($new_todos);
180 wakaba 1.1 };
181    
182     ## Zero or more |html:style| elements,
183     ## followed by zero or more block-level elements
184     my $HTMLStylableBlockChecker = sub {
185 wakaba 1.4 my ($self, $todo) = @_;
186     my $el = $todo->{node};
187     my $new_todos = [];
188 wakaba 1.1 my @nodes = (@{$el->child_nodes});
189    
190     my $has_non_style;
191     while (@nodes) {
192     my $node = shift @nodes;
193 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
194    
195 wakaba 1.1 my $nt = $node->node_type;
196     if ($nt == 1) {
197 wakaba 1.2 my $node_ns = $node->namespace_uri;
198     $node_ns = '' unless defined $node_ns;
199     my $node_ln = $node->manakai_local_name;
200 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
201 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'style') {
202 wakaba 1.6 $not_allowed = 1 if $has_non_style;
203 wakaba 1.7 } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
204     $has_non_style = 1;
205 wakaba 1.1 } else {
206     $has_non_style = 1;
207 wakaba 1.7 $not_allowed = 1;
208 wakaba 1.1 }
209 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
210     if $not_allowed;
211 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
212     unshift @nodes, @$sib;
213 wakaba 1.4 push @$new_todos, @$ch;
214 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
215     if ($node->data =~ /[^\x09-\x0D\x20]/) {
216 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
217 wakaba 1.1 }
218     } elsif ($nt == 5) {
219     unshift @nodes, @{$node->child_nodes};
220     }
221     }
222 wakaba 1.4 return ($new_todos);
223 wakaba 1.1 }; # $HTMLStylableBlockChecker
224    
225     ## Zero or more block-level elements
226     my $HTMLBlockChecker = sub {
227 wakaba 1.4 my ($self, $todo) = @_;
228     my $el = $todo->{node};
229     my $new_todos = [];
230 wakaba 1.1 my @nodes = (@{$el->child_nodes});
231    
232     while (@nodes) {
233     my $node = shift @nodes;
234 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
235    
236 wakaba 1.1 my $nt = $node->node_type;
237     if ($nt == 1) {
238 wakaba 1.2 my $node_ns = $node->namespace_uri;
239     $node_ns = '' unless defined $node_ns;
240     my $node_ln = $node->manakai_local_name;
241 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
242 wakaba 1.7 $not_allowed = 1
243     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
244 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
245     if $not_allowed;
246 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
247     unshift @nodes, @$sib;
248 wakaba 1.4 push @$new_todos, @$ch;
249 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
250     if ($node->data =~ /[^\x09-\x0D\x20]/) {
251 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
252 wakaba 1.1 }
253     } elsif ($nt == 5) {
254     unshift @nodes, @{$node->child_nodes};
255     }
256     }
257 wakaba 1.4 return ($new_todos);
258 wakaba 1.1 }; # $HTMLBlockChecker
259    
260     ## Inline-level content
261     my $HTMLInlineChecker = sub {
262 wakaba 1.4 my ($self, $todo) = @_;
263     my $el = $todo->{node};
264     my $new_todos = [];
265 wakaba 1.1 my @nodes = (@{$el->child_nodes});
266    
267     while (@nodes) {
268     my $node = shift @nodes;
269 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
270    
271 wakaba 1.1 my $nt = $node->node_type;
272     if ($nt == 1) {
273 wakaba 1.2 my $node_ns = $node->namespace_uri;
274     $node_ns = '' unless defined $node_ns;
275     my $node_ln = $node->manakai_local_name;
276 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
277 wakaba 1.7 $not_allowed = 1
278     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
279     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
280 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
281     if $not_allowed;
282 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
283     unshift @nodes, @$sib;
284 wakaba 1.4 push @$new_todos, @$ch;
285 wakaba 1.1 } elsif ($nt == 5) {
286     unshift @nodes, @{$node->child_nodes};
287     }
288     }
289 wakaba 1.4
290     for (@$new_todos) {
291     $_->{inline} = 1;
292     }
293     return ($new_todos);
294     }; # $HTMLInlineChecker
295 wakaba 1.1
296     my $HTMLSignificantInlineChecker = $HTMLInlineChecker;
297     ## TODO: check significant content
298    
299     ## Strictly inline-level content
300     my $HTMLStrictlyInlineChecker = sub {
301 wakaba 1.4 my ($self, $todo) = @_;
302     my $el = $todo->{node};
303     my $new_todos = [];
304 wakaba 1.1 my @nodes = (@{$el->child_nodes});
305    
306     while (@nodes) {
307     my $node = shift @nodes;
308 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
309    
310 wakaba 1.1 my $nt = $node->node_type;
311     if ($nt == 1) {
312 wakaba 1.2 my $node_ns = $node->namespace_uri;
313     $node_ns = '' unless defined $node_ns;
314     my $node_ln = $node->manakai_local_name;
315 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
316 wakaba 1.7 $not_allowed = 1
317     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln};
318 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
319     if $not_allowed;
320 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
321     unshift @nodes, @$sib;
322 wakaba 1.4 push @$new_todos, @$ch;
323 wakaba 1.1 } elsif ($nt == 5) {
324     unshift @nodes, @{$node->child_nodes};
325     }
326     }
327 wakaba 1.4
328     for (@$new_todos) {
329     $_->{inline} = 1;
330     $_->{strictly_inline} = 1;
331     }
332     return ($new_todos);
333 wakaba 1.1 }; # $HTMLStrictlyInlineChecker
334    
335     my $HTMLSignificantStrictlyInlineChecker = $HTMLStrictlyInlineChecker;
336     ## TODO: check significant content
337    
338 wakaba 1.4 ## Inline-level or strictly inline-kevek content
339     my $HTMLInlineOrStrictlyInlineChecker = sub {
340     my ($self, $todo) = @_;
341     my $el = $todo->{node};
342     my $new_todos = [];
343     my @nodes = (@{$el->child_nodes});
344    
345     while (@nodes) {
346     my $node = shift @nodes;
347     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
348    
349     my $nt = $node->node_type;
350     if ($nt == 1) {
351     my $node_ns = $node->namespace_uri;
352     $node_ns = '' unless defined $node_ns;
353     my $node_ln = $node->manakai_local_name;
354 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
355 wakaba 1.7 if ($todo->{strictly_inline}) {
356     $not_allowed = 1
357     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln};
358     } else {
359     $not_allowed = 1
360     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
361     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
362     }
363 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
364     if $not_allowed;
365 wakaba 1.4 my ($sib, $ch) = $self->_check_get_children ($node);
366     unshift @nodes, @$sib;
367     push @$new_todos, @$ch;
368     } elsif ($nt == 5) {
369     unshift @nodes, @{$node->child_nodes};
370     }
371     }
372    
373     for (@$new_todos) {
374     $_->{inline} = 1;
375     $_->{strictly_inline} = 1;
376     }
377     return ($new_todos);
378     }; # $HTMLInlineOrStrictlyInlineChecker
379    
380 wakaba 1.6 my $HTMLSignificantInlineOrStrictlyInlineChecker
381     = $HTMLInlineOrStrictlyInlineChecker;
382     ## TODO: check significant content
383    
384 wakaba 1.1 my $HTMLBlockOrInlineChecker = sub {
385 wakaba 1.4 my ($self, $todo) = @_;
386     my $el = $todo->{node};
387     my $new_todos = [];
388 wakaba 1.1 my @nodes = (@{$el->child_nodes});
389    
390     my $content = 'block-or-inline'; # or 'block' or 'inline'
391     my @block_not_inline;
392     while (@nodes) {
393     my $node = shift @nodes;
394 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
395    
396 wakaba 1.1 my $nt = $node->node_type;
397     if ($nt == 1) {
398 wakaba 1.2 my $node_ns = $node->namespace_uri;
399     $node_ns = '' unless defined $node_ns;
400     my $node_ln = $node->manakai_local_name;
401 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
402 wakaba 1.1 if ($content eq 'block') {
403 wakaba 1.7 $not_allowed = 1
404     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
405 wakaba 1.1 } elsif ($content eq 'inline') {
406 wakaba 1.7 $not_allowed = 1
407     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
408     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
409 wakaba 1.1 } else {
410 wakaba 1.7 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
411     my $is_inline
412     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
413     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
414 wakaba 1.1
415 wakaba 1.6 push @block_not_inline, $node
416     if $is_block and not $is_inline and not $not_allowed;
417 wakaba 1.1 unless ($is_block) {
418     $content = 'inline';
419     for (@block_not_inline) {
420 wakaba 1.2 $self->{onerror}->(node => $_, type => 'element not allowed');
421 wakaba 1.1 }
422 wakaba 1.6 $not_allowed = 1 unless $is_inline;
423 wakaba 1.1 }
424     }
425 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
426     if $not_allowed;
427 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
428     unshift @nodes, @$sib;
429 wakaba 1.4 push @$new_todos, @$ch;
430 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
431     if ($node->data =~ /[^\x09-\x0D\x20]/) {
432     if ($content eq 'block') {
433 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
434 wakaba 1.1 } else {
435     $content = 'inline';
436     for (@block_not_inline) {
437 wakaba 1.2 $self->{onerror}->(node => $_, type => 'element not allowed');
438 wakaba 1.1 }
439     }
440     }
441     } elsif ($nt == 5) {
442     unshift @nodes, @{$node->child_nodes};
443     }
444     }
445 wakaba 1.4
446     if ($content eq 'inline') {
447     for (@$new_todos) {
448     $_->{inline} = 1;
449     }
450     }
451     return ($new_todos);
452 wakaba 1.1 };
453    
454 wakaba 1.2 ## Zero or more XXX element, then either block-level or inline-level
455     my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) {
456     my ($elnsuri, $ellname) = @_;
457     return sub {
458 wakaba 1.4 my ($self, $todo) = @_;
459     my $el = $todo->{node};
460     my $new_todos = [];
461 wakaba 1.2 my @nodes = (@{$el->child_nodes});
462    
463     my $has_non_style;
464     my $content = 'block-or-inline'; # or 'block' or 'inline'
465     my @block_not_inline;
466     while (@nodes) {
467     my $node = shift @nodes;
468     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
469    
470     my $nt = $node->node_type;
471     if ($nt == 1) {
472     my $node_ns = $node->namespace_uri;
473     $node_ns = '' unless defined $node_ns;
474     my $node_ln = $node->manakai_local_name;
475 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
476 wakaba 1.8 if ($node_ns eq $elnsuri and $node_ln eq $ellname) {
477 wakaba 1.6 $not_allowed = 1 if $has_non_style;
478 wakaba 1.2 } elsif ($content eq 'block') {
479     $has_non_style = 1;
480 wakaba 1.7 $not_allowed = 1
481     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
482 wakaba 1.2 } elsif ($content eq 'inline') {
483     $has_non_style = 1;
484 wakaba 1.7 $not_allowed = 1
485     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
486     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
487 wakaba 1.2 } else {
488     $has_non_style = 1;
489 wakaba 1.7 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
490     my $is_inline
491     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
492     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
493 wakaba 1.2
494 wakaba 1.6 push @block_not_inline, $node
495     if $is_block and not $is_inline and not $not_allowed;
496 wakaba 1.2 unless ($is_block) {
497     $content = 'inline';
498     for (@block_not_inline) {
499     $self->{onerror}->(node => $_, type => 'element not allowed');
500     }
501 wakaba 1.6 $not_allowed = 1 unless $is_inline;
502 wakaba 1.1 }
503     }
504 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
505     if $not_allowed;
506 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
507     unshift @nodes, @$sib;
508 wakaba 1.4 push @$new_todos, @$ch;
509 wakaba 1.2 } elsif ($nt == 3 or $nt == 4) {
510     if ($node->data =~ /[^\x09-\x0D\x20]/) {
511     $has_non_style = 1;
512     if ($content eq 'block') {
513     $self->{onerror}->(node => $node, type => 'character not allowed');
514     } else {
515     $content = 'inline';
516     for (@block_not_inline) {
517     $self->{onerror}->(node => $_, type => 'element not allowed');
518     }
519 wakaba 1.1 }
520     }
521 wakaba 1.2 } elsif ($nt == 5) {
522     unshift @nodes, @{$node->child_nodes};
523 wakaba 1.1 }
524     }
525 wakaba 1.4
526     if ($content eq 'inline') {
527     for (@$new_todos) {
528     $_->{inline} = 1;
529     }
530     }
531     return ($new_todos);
532 wakaba 1.2 };
533     }; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker
534 wakaba 1.1
535     my $HTMLTransparentChecker = $HTMLBlockOrInlineChecker;
536    
537 wakaba 1.10 my $GetHTMLEnumeratedAttrChecker = sub {
538     my $states = shift; # {value => conforming ? 1 : -1}
539     return sub {
540     my ($self, $attr) = @_;
541     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
542     if ($states->{$value} > 0) {
543     #
544     } elsif ($states->{$value}) {
545     $self->{onerror}->(node => $attr,
546     type => 'non-conforming enumerated attribute value');
547     } else {
548     $self->{onerror}->(node => $attr,
549     type => 'invalid enumerated attribute value');
550     }
551     };
552     }; # $GetHTMLEnumeratedAttrChecker
553 wakaba 1.9
554 wakaba 1.10 my $GetHTMLBooleanAttrChecker = sub {
555     my $local_name = shift;
556     return sub {
557     my ($self, $attr) = @_;
558     my $value = $attr->value;
559     unless ($value eq $local_name or $value eq '') {
560     $self->{onerror}->(node => $attr,
561     type => 'invalid boolean attribute value');
562     }
563     };
564     }; # $GetHTMLBooleanAttrChecker
565 wakaba 1.9
566 wakaba 1.10 my $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker = sub {
567     my ($self, $attr) = @_;
568     my %word;
569     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
570     unless ($word{$word}) {
571     $word{$word} = 1;
572 wakaba 1.9 } else {
573 wakaba 1.10 $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
574 wakaba 1.9 }
575     }
576 wakaba 1.10 }; # $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker
577    
578     my $HTMLAttrChecker = {
579     id => sub {
580     my ($self, $attr) = @_;
581     my $value = $attr->value;
582     unless (length $value > 0) {
583     ## NOTE: MUST contain at least one character
584     $self->{onerror}->(node => $attr, type => 'attribute value is empty');
585     } else {
586     if ($self->{id}->{$value}) {
587     $self->{onerror}->(node => $attr, type => 'duplicate ID');
588     } else {
589     $self->{id}->{$value} = 1;
590     }
591     }
592     },
593     title => sub {}, ## NOTE: No conformance creteria
594     lang => sub {
595     ## TODO: RFC 3066 test
596     ## ISSUE: RFC 4646 (3066bis)?
597     ## TODO: HTML vs XHTML
598     },
599     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
600     class => $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker,
601     irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
602     ## TODO: tabindex
603     };
604    
605     my $GetHTMLAttrsChecker = sub {
606     my $element_specific_checker = shift;
607     return sub {
608     my ($self, $todo) = @_;
609     for my $attr (@{$todo->{node}->attributes}) {
610     my $attr_ns = $attr->namespace_uri;
611     $attr_ns = '' unless defined $attr_ns;
612     my $attr_ln = $attr->manakai_local_name;
613     my $checker;
614     if ($attr_ns eq '') {
615     $checker = $element_specific_checker->{$attr_ln}
616     || $HTMLAttrChecker->{$attr_ln};
617     }
618     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
619     || $AttrChecker->{$attr_ns}->{''};
620     if ($checker) {
621     $checker->($self, $attr);
622     } else {
623     $self->{onerror}->(node => $attr, type => 'attribute not supported');
624     ## ISSUE: No comformance createria for unknown attributes in the spec
625     }
626     }
627     };
628     }; # $GetHTMLAttrsChecker
629 wakaba 1.9
630     $Element->{$HTML_NS}->{''} = {
631 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
632 wakaba 1.9 checker => $ElementDefault->{checker},
633     };
634    
635 wakaba 1.1 $Element->{$HTML_NS}->{html} = {
636 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({
637     xmlns => sub {
638     my ($self, $attr) = @_;
639     my $value = $attr->value;
640     unless ($value eq $HTML_NS) {
641     $self->{onerror}->(node => $attr, type => 'syntax error');
642     ## TODO: only in HTML documents
643     }
644     },
645     }),
646 wakaba 1.1 checker => sub {
647 wakaba 1.4 my ($self, $todo) = @_;
648     my $el = $todo->{node};
649     my $new_todos = [];
650 wakaba 1.1 my @nodes = (@{$el->child_nodes});
651    
652     my $phase = 'before head';
653     while (@nodes) {
654     my $node = shift @nodes;
655 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
656    
657 wakaba 1.1 my $nt = $node->node_type;
658     if ($nt == 1) {
659 wakaba 1.2 my $node_ns = $node->namespace_uri;
660     $node_ns = '' unless defined $node_ns;
661     my $node_ln = $node->manakai_local_name;
662 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
663 wakaba 1.1 if ($phase eq 'before head') {
664 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'head') {
665 wakaba 1.1 $phase = 'after head';
666 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'body') {
667     $self->{onerror}->(node => $node, type => 'ps element missing:head');
668 wakaba 1.1 $phase = 'after body';
669     } else {
670 wakaba 1.6 $not_allowed = 1;
671 wakaba 1.1 # before head
672     }
673     } elsif ($phase eq 'after head') {
674 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'body') {
675 wakaba 1.1 $phase = 'after body';
676     } else {
677 wakaba 1.6 $not_allowed = 1;
678 wakaba 1.1 # after head
679     }
680     } else { #elsif ($phase eq 'after body') {
681 wakaba 1.6 $not_allowed = 1;
682 wakaba 1.1 # after body
683     }
684 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
685     if $not_allowed;
686 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
687     unshift @nodes, @$sib;
688 wakaba 1.4 push @$new_todos, @$ch;
689 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
690     if ($node->data =~ /[^\x09-\x0D\x20]/) {
691 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
692 wakaba 1.1 }
693     } elsif ($nt == 5) {
694     unshift @nodes, @{$node->child_nodes};
695     }
696     }
697 wakaba 1.3
698     if ($phase eq 'before head') {
699     $self->{onerror}->(node => $el, type => 'child element missing:head');
700     $self->{onerror}->(node => $el, type => 'child element missing:body');
701     } elsif ($phase eq 'after head') {
702     $self->{onerror}->(node => $el, type => 'child element missing:body');
703     }
704    
705 wakaba 1.4 return ($new_todos);
706 wakaba 1.1 },
707     };
708    
709     $Element->{$HTML_NS}->{head} = {
710 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
711 wakaba 1.1 checker => sub {
712 wakaba 1.4 my ($self, $todo) = @_;
713     my $el = $todo->{node};
714     my $new_todos = [];
715 wakaba 1.1 my @nodes = (@{$el->child_nodes});
716    
717     my $has_title;
718 wakaba 1.3 my $phase = 'initial'; # 'after charset', 'after base'
719 wakaba 1.1 while (@nodes) {
720     my $node = shift @nodes;
721 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
722    
723 wakaba 1.1 my $nt = $node->node_type;
724     if ($nt == 1) {
725 wakaba 1.2 my $node_ns = $node->namespace_uri;
726     $node_ns = '' unless defined $node_ns;
727     my $node_ln = $node->manakai_local_name;
728 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
729 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'title') {
730 wakaba 1.3 $phase = 'after base';
731 wakaba 1.1 unless ($has_title) {
732     $has_title = 1;
733     } else {
734 wakaba 1.6 $not_allowed = 1;
735 wakaba 1.1 }
736 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'meta') {
737 wakaba 1.1 if ($node->has_attribute_ns (undef, 'charset')) {
738 wakaba 1.3 if ($phase eq 'initial') {
739     $phase = 'after charset';
740 wakaba 1.1 } else {
741 wakaba 1.6 $not_allowed = 1;
742 wakaba 1.3 ## NOTE: See also |base|'s "contexts" field in the spec
743 wakaba 1.1 }
744     } else {
745 wakaba 1.3 $phase = 'after base';
746 wakaba 1.1 }
747 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'base') {
748 wakaba 1.3 if ($phase eq 'initial' or $phase eq 'after charset') {
749     $phase = 'after base';
750 wakaba 1.1 } else {
751 wakaba 1.6 $not_allowed = 1;
752 wakaba 1.1 }
753 wakaba 1.7 } elsif ($HTMLMetadataElements->{$node_ns}->{$node_ln}) {
754     $phase = 'after base';
755 wakaba 1.1 } else {
756 wakaba 1.7 $not_allowed = 1;
757 wakaba 1.1 }
758 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
759     if $not_allowed;
760 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
761     unshift @nodes, @$sib;
762 wakaba 1.4 push @$new_todos, @$ch;
763 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
764     if ($node->data =~ /[^\x09-\x0D\x20]/) {
765 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
766 wakaba 1.1 }
767     } elsif ($nt == 5) {
768     unshift @nodes, @{$node->child_nodes};
769     }
770     }
771     unless ($has_title) {
772 wakaba 1.3 $self->{onerror}->(node => $el, type => 'child element missing:title');
773 wakaba 1.1 }
774 wakaba 1.4 return ($new_todos);
775 wakaba 1.1 },
776     };
777    
778     $Element->{$HTML_NS}->{title} = {
779 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
780 wakaba 1.1 checker => $HTMLTextChecker,
781     };
782    
783     $Element->{$HTML_NS}->{base} = {
784 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({
785     href => sub {}, ## TODO: IRI
786     ## TODO: target
787     }),
788 wakaba 1.1 checker => $HTMLEmptyChecker,
789     };
790    
791     $Element->{$HTML_NS}->{link} = {
792 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
793 wakaba 1.1 checker => $HTMLEmptyChecker,
794     };
795    
796     $Element->{$HTML_NS}->{meta} = {
797 wakaba 1.10 attrs_checker => sub {
798     my ($self, $todo) = @_;
799     my $name_attr;
800     my $http_equiv_attr;
801     my $charset_attr;
802     my $content_attr;
803     for my $attr (@{$todo->{node}->attributes}) {
804     my $attr_ns = $attr->namespace_uri;
805     $attr_ns = '' unless defined $attr_ns;
806     my $attr_ln = $attr->manakai_local_name;
807     my $checker;
808     if ($attr_ns eq '') {
809     if ($attr_ln eq 'content') {
810     $content_attr = $attr;
811     $checker = 1;
812     } elsif ($attr_ln eq 'name') {
813     $name_attr = $attr;
814     $checker = 1;
815     } elsif ($attr_ln eq 'http-equiv') {
816     $http_equiv_attr = $attr;
817     $checker = 1;
818     } elsif ($attr_ln eq 'charset') {
819     $charset_attr = $attr;
820     $checker = 1;
821     } else {
822     $checker = $HTMLAttrChecker->{$attr_ln}
823     || $AttrChecker->{$attr_ns}->{$attr_ln}
824     || $AttrChecker->{$attr_ns}->{''};
825     }
826     } else {
827     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
828     || $AttrChecker->{$attr_ns}->{''};
829     }
830     if ($checker) {
831     $checker->($self, $attr) if ref $checker;
832     } else {
833     $self->{onerror}->(node => $attr, type => 'attribute not supported');
834     ## ISSUE: No comformance createria for unknown attributes in the spec
835     }
836     }
837    
838     if (defined $name_attr) {
839     if (defined $http_equiv_attr) {
840     $self->{onerror}->(node => $http_equiv_attr,
841     type => 'attribute not allowed');
842     } elsif (defined $charset_attr) {
843     $self->{onerror}->(node => $charset_attr,
844     type => 'attribute not allowed');
845     }
846     my $metadata_name = $name_attr->value;
847     my $metadata_value;
848     if (defined $content_attr) {
849     $metadata_value = $content_attr->value;
850     } else {
851     $self->{onerror}->(node => $todo->{node},
852     type => 'attribute missing:content');
853     $metadata_value = '';
854     }
855     } elsif (defined $http_equiv_attr) {
856     if (defined $charset_attr) {
857     $self->{onerror}->(node => $charset_attr,
858     type => 'attribute not allowed');
859     }
860     unless (defined $content_attr) {
861     $self->{onerror}->(node => $todo->{node},
862     type => 'attribute missing:content');
863     }
864     } elsif (defined $charset_attr) {
865     if (defined $content_attr) {
866     $self->{onerror}->(node => $content_attr,
867     type => 'attribute not allowed');
868     }
869     ## TODO: Allowed only in HTML documents
870     } else {
871     if (defined $content_attr) {
872     $self->{onerror}->(node => $content_attr,
873     type => 'attribute not allowed');
874     $self->{onerror}->(node => $todo->{node},
875     type => 'attribute missing:name|http-equiv');
876     } else {
877     $self->{onerror}->(node => $todo->{node},
878     type => 'attribute missing:name|http-equiv|charset');
879     }
880     }
881    
882     ## TODO: metadata conformance
883    
884     ## TODO: pragma conformance
885     if (defined $http_equiv_attr) { ## An enumerated attribute
886     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
887     if ({
888     'refresh' => 1,
889     'default-style' => 1,
890     }->{$keyword}) {
891     #
892     } else {
893     $self->{onerror}->(node => $http_equiv_attr,
894     type => 'invalid enumerated attribute value');
895     }
896     }
897    
898     ## TODO: charset
899     },
900 wakaba 1.1 checker => $HTMLEmptyChecker,
901     };
902    
903     ## NOTE: |html:style| has no conformance creteria on content model
904 wakaba 1.3 $Element->{$HTML_NS}->{style} = {
905 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({
906     ## TODO: type
907     ## TODO: media
908     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
909     ## NOTE: |title| has special semantics for |style|s, but is syntactically
910     ## not different
911     }),
912 wakaba 1.3 checker => $AnyChecker,
913     };
914 wakaba 1.1
915     $Element->{$HTML_NS}->{body} = {
916 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
917 wakaba 1.1 checker => $HTMLBlockChecker,
918     };
919    
920     $Element->{$HTML_NS}->{section} = {
921 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
922 wakaba 1.1 checker => $HTMLStylableBlockChecker,
923     };
924    
925     $Element->{$HTML_NS}->{nav} = {
926 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
927 wakaba 1.1 checker => $HTMLBlockOrInlineChecker,
928     };
929    
930     $Element->{$HTML_NS}->{article} = {
931 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
932 wakaba 1.1 checker => $HTMLStylableBlockChecker,
933     };
934    
935     $Element->{$HTML_NS}->{blockquote} = {
936 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
937 wakaba 1.1 checker => $HTMLBlockChecker,
938     };
939    
940     $Element->{$HTML_NS}->{aside} = {
941 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
942 wakaba 1.2 checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
943 wakaba 1.1 };
944    
945     $Element->{$HTML_NS}->{h1} = {
946 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
947 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
948     };
949    
950     $Element->{$HTML_NS}->{h2} = {
951 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
952 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
953     };
954    
955     $Element->{$HTML_NS}->{h3} = {
956 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
957 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
958     };
959    
960     $Element->{$HTML_NS}->{h4} = {
961 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
962 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
963     };
964    
965     $Element->{$HTML_NS}->{h5} = {
966 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
967 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
968     };
969    
970     $Element->{$HTML_NS}->{h6} = {
971 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
972 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
973     };
974    
975     ## TODO: header
976    
977 wakaba 1.2 $Element->{$HTML_NS}->{footer} = {
978 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
979 wakaba 1.2 checker => sub { ## block -hn -header -footer -sectioning or inline
980 wakaba 1.4 my ($self, $todo) = @_;
981     my $el = $todo->{node};
982     my $new_todos = [];
983 wakaba 1.2 my @nodes = (@{$el->child_nodes});
984    
985     my $content = 'block-or-inline'; # or 'block' or 'inline'
986     my @block_not_inline;
987     while (@nodes) {
988     my $node = shift @nodes;
989     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
990    
991     my $nt = $node->node_type;
992     if ($nt == 1) {
993     my $node_ns = $node->namespace_uri;
994     $node_ns = '' unless defined $node_ns;
995     my $node_ln = $node->manakai_local_name;
996 wakaba 1.6 my $not_allowed;
997 wakaba 1.2 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
998 wakaba 1.6 $not_allowed = 1;
999 wakaba 1.2 } elsif ($node_ns eq $HTML_NS and
1000     {
1001     qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1/
1002     }->{$node_ln}) {
1003 wakaba 1.6 $not_allowed = 1;
1004 wakaba 1.2 } elsif ($HTMLSectioningElements->{$node_ns}->{$node_ln}) {
1005 wakaba 1.6 $not_allowed = 1;
1006 wakaba 1.2 }
1007     if ($content eq 'block') {
1008 wakaba 1.7 $not_allowed = 1
1009     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1010 wakaba 1.2 } elsif ($content eq 'inline') {
1011 wakaba 1.7 $not_allowed = 1
1012     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
1013     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1014 wakaba 1.2 } else {
1015 wakaba 1.7 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1016     my $is_inline
1017     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
1018     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1019 wakaba 1.2
1020 wakaba 1.6 push @block_not_inline, $node
1021     if $is_block and not $is_inline and not $not_allowed;
1022 wakaba 1.2 unless ($is_block) {
1023     $content = 'inline';
1024     for (@block_not_inline) {
1025     $self->{onerror}->(node => $_, type => 'element not allowed');
1026     }
1027 wakaba 1.6 $not_allowed = 1 unless $is_inline;
1028 wakaba 1.2 }
1029     }
1030 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
1031     if $not_allowed;
1032 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1033     unshift @nodes, @$sib;
1034 wakaba 1.4 push @$new_todos, @$ch;
1035 wakaba 1.2 } elsif ($nt == 3 or $nt == 4) {
1036     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1037     if ($content eq 'block') {
1038     $self->{onerror}->(node => $node, type => 'character not allowed');
1039     } else {
1040     $content = 'inline';
1041     for (@block_not_inline) {
1042     $self->{onerror}->(node => $_, type => 'element not allowed');
1043     }
1044     }
1045     }
1046     } elsif ($nt == 5) {
1047     unshift @nodes, @{$node->child_nodes};
1048     }
1049     }
1050    
1051     my $end = $self->_add_minuses
1052     ({$HTML_NS => {qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1/}},
1053     $HTMLSectioningElements);
1054 wakaba 1.4 push @$new_todos, $end;
1055 wakaba 1.2
1056 wakaba 1.4 if ($content eq 'inline') {
1057     for (@$new_todos) {
1058     $_->{inline} = 1;
1059     }
1060     }
1061    
1062     return ($new_todos);
1063 wakaba 1.2 },
1064     };
1065 wakaba 1.1
1066     $Element->{$HTML_NS}->{address} = {
1067 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1068 wakaba 1.1 checker => $HTMLInlineChecker,
1069     };
1070    
1071     $Element->{$HTML_NS}->{p} = {
1072 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1073 wakaba 1.1 checker => $HTMLSignificantInlineChecker,
1074     };
1075    
1076     $Element->{$HTML_NS}->{hr} = {
1077 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1078 wakaba 1.1 checker => $HTMLEmptyChecker,
1079     };
1080    
1081     $Element->{$HTML_NS}->{br} = {
1082 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1083 wakaba 1.1 checker => $HTMLEmptyChecker,
1084     };
1085    
1086     $Element->{$HTML_NS}->{dialog} = {
1087 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1088 wakaba 1.1 checker => sub {
1089 wakaba 1.4 my ($self, $todo) = @_;
1090     my $el = $todo->{node};
1091     my $new_todos = [];
1092 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1093    
1094     my $phase = 'before dt';
1095     while (@nodes) {
1096     my $node = shift @nodes;
1097 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1098    
1099 wakaba 1.1 my $nt = $node->node_type;
1100     if ($nt == 1) {
1101 wakaba 1.8 my $node_ns = $node->namespace_uri;
1102     $node_ns = '' unless defined $node_ns;
1103     my $node_ln = $node->manakai_local_name;
1104 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1105 wakaba 1.1 if ($phase eq 'before dt') {
1106 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1107 wakaba 1.1 $phase = 'before dd';
1108 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1109 wakaba 1.2 $self->{onerror}
1110 wakaba 1.3 ->(node => $node, type => 'ps element missing:dt');
1111 wakaba 1.1 $phase = 'before dt';
1112     } else {
1113 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1114 wakaba 1.1 }
1115     } else { # before dd
1116 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1117 wakaba 1.1 $phase = 'before dt';
1118 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1119 wakaba 1.2 $self->{onerror}
1120 wakaba 1.3 ->(node => $node, type => 'ps element missing:dd');
1121 wakaba 1.1 $phase = 'before dd';
1122     } else {
1123 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1124 wakaba 1.1 }
1125     }
1126 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1127     unshift @nodes, @$sib;
1128 wakaba 1.4 push @$new_todos, @$ch;
1129 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1130     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1131 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1132 wakaba 1.1 }
1133     } elsif ($nt == 5) {
1134     unshift @nodes, @{$node->child_nodes};
1135     }
1136     }
1137     if ($phase eq 'before dd') {
1138 wakaba 1.3 $self->{onerror}->(node => $el, type => 'ps element missing:dd');
1139 wakaba 1.1 }
1140 wakaba 1.4 return ($new_todos);
1141 wakaba 1.1 },
1142     };
1143    
1144     $Element->{$HTML_NS}->{pre} = {
1145 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1146 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1147     };
1148    
1149     $Element->{$HTML_NS}->{ol} = {
1150 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1151 wakaba 1.1 checker => sub {
1152 wakaba 1.4 my ($self, $todo) = @_;
1153     my $el = $todo->{node};
1154     my $new_todos = [];
1155 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1156    
1157     while (@nodes) {
1158     my $node = shift @nodes;
1159 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1160    
1161 wakaba 1.1 my $nt = $node->node_type;
1162     if ($nt == 1) {
1163 wakaba 1.8 my $node_ns = $node->namespace_uri;
1164     $node_ns = '' unless defined $node_ns;
1165     my $node_ln = $node->manakai_local_name;
1166 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1167 wakaba 1.8 unless ($node_ns eq $HTML_NS and $node_ln eq 'li') {
1168 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1169 wakaba 1.1 }
1170 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1171     unshift @nodes, @$sib;
1172 wakaba 1.4 push @$new_todos, @$ch;
1173 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1174     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1175 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1176 wakaba 1.1 }
1177     } elsif ($nt == 5) {
1178     unshift @nodes, @{$node->child_nodes};
1179     }
1180     }
1181 wakaba 1.4
1182     if ($todo->{inline}) {
1183     for (@$new_todos) {
1184     $_->{inline} = 1;
1185     }
1186     }
1187     return ($new_todos);
1188 wakaba 1.1 },
1189     };
1190    
1191     $Element->{$HTML_NS}->{ul} = {
1192 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1193 wakaba 1.1 checker => $Element->{$HTML_NS}->{ol}->{checker},
1194     };
1195    
1196 wakaba 1.5
1197     $Element->{$HTML_NS}->{li} = {
1198 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1199 wakaba 1.5 checker => sub {
1200     my ($self, $todo) = @_;
1201     if ($todo->{inline}) {
1202     return $HTMLInlineChecker->($self, $todo);
1203     } else {
1204     return $HTMLBlockOrInlineChecker->($self, $todo);
1205     }
1206     },
1207     };
1208 wakaba 1.1
1209     $Element->{$HTML_NS}->{dl} = {
1210 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1211 wakaba 1.1 checker => sub {
1212 wakaba 1.4 my ($self, $todo) = @_;
1213     my $el = $todo->{node};
1214     my $new_todos = [];
1215 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1216    
1217     my $phase = 'before dt';
1218     while (@nodes) {
1219     my $node = shift @nodes;
1220 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1221    
1222 wakaba 1.1 my $nt = $node->node_type;
1223     if ($nt == 1) {
1224 wakaba 1.8 my $node_ns = $node->namespace_uri;
1225     $node_ns = '' unless defined $node_ns;
1226     my $node_ln = $node->manakai_local_name;
1227 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1228 wakaba 1.1 if ($phase eq 'in dds') {
1229 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1230 wakaba 1.1 #$phase = 'in dds';
1231 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1232 wakaba 1.1 $phase = 'in dts';
1233     } else {
1234 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1235 wakaba 1.1 }
1236     } elsif ($phase eq 'in dts') {
1237 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1238 wakaba 1.1 #$phase = 'in dts';
1239 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1240 wakaba 1.1 $phase = 'in dds';
1241     } else {
1242 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1243 wakaba 1.1 }
1244     } else { # before dt
1245 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1246 wakaba 1.1 $phase = 'in dts';
1247 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1248 wakaba 1.2 $self->{onerror}
1249 wakaba 1.3 ->(node => $node, type => 'ps element missing:dt');
1250 wakaba 1.1 $phase = 'in dds';
1251     } else {
1252 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1253 wakaba 1.1 }
1254     }
1255 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1256     unshift @nodes, @$sib;
1257 wakaba 1.4 push @$new_todos, @$ch;
1258 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1259     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1260 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1261 wakaba 1.1 }
1262     } elsif ($nt == 5) {
1263     unshift @nodes, @{$node->child_nodes};
1264     }
1265     }
1266     if ($phase eq 'in dts') {
1267 wakaba 1.3 $self->{onerror}->(node => $el, type => 'ps element missing:dd');
1268 wakaba 1.1 }
1269 wakaba 1.4
1270     if ($todo->{inline}) {
1271     for (@$new_todos) {
1272     $_->{inline} = 1;
1273     }
1274     }
1275     return ($new_todos);
1276 wakaba 1.1 },
1277     };
1278    
1279     $Element->{$HTML_NS}->{dt} = {
1280 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1281 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1282     };
1283    
1284 wakaba 1.4 $Element->{$HTML_NS}->{dd} = {
1285 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1286 wakaba 1.5 checker => $Element->{$HTML_NS}->{li}->{checker},
1287 wakaba 1.4 };
1288 wakaba 1.1
1289 wakaba 1.6 $Element->{$HTML_NS}->{a} = {
1290 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1291 wakaba 1.6 checker => sub {
1292     my ($self, $todo) = @_;
1293    
1294     my $end = $self->_add_minuses ($HTMLInteractiveElements);
1295     my ($sib, $ch)
1296     = $HTMLSignificantInlineOrStrictlyInlineChecker->($self, $todo);
1297     push @$sib, $end;
1298     return ($sib, $ch);
1299     },
1300     };
1301 wakaba 1.1
1302 wakaba 1.4 $Element->{$HTML_NS}->{q} = {
1303 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1304 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1305     };
1306 wakaba 1.1
1307     $Element->{$HTML_NS}->{cite} = {
1308 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1309 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1310     };
1311    
1312 wakaba 1.4 $Element->{$HTML_NS}->{em} = {
1313 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1314 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1315     };
1316    
1317     $Element->{$HTML_NS}->{strong} = {
1318 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1319 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1320     };
1321 wakaba 1.1
1322 wakaba 1.4 $Element->{$HTML_NS}->{small} = {
1323 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1324 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1325     };
1326    
1327     $Element->{$HTML_NS}->{m} = {
1328 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1329 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1330     };
1331    
1332     $Element->{$HTML_NS}->{dfn} = {
1333 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1334 wakaba 1.4 checker => sub {
1335     my ($self, $todo) = @_;
1336    
1337     my $end = $self->_add_minuses ({$HTML_NS => {dfn => 1}});
1338     my ($sib, $ch) = $HTMLStrictlyInlineChecker->($self, $todo);
1339     push @$sib, $end;
1340     return ($sib, $ch);
1341     },
1342     };
1343 wakaba 1.1
1344     $Element->{$HTML_NS}->{abbr} = {
1345 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1346 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1347     };
1348    
1349     $Element->{$HTML_NS}->{time} = {
1350 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1351 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1352     };
1353    
1354     $Element->{$HTML_NS}->{meter} = {
1355 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1356 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1357     };
1358    
1359     $Element->{$HTML_NS}->{progress} = {
1360 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1361 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1362     };
1363    
1364 wakaba 1.4 $Element->{$HTML_NS}->{code} = {
1365 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1366 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1367     };
1368 wakaba 1.1
1369     $Element->{$HTML_NS}->{var} = {
1370 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1371 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1372     };
1373    
1374 wakaba 1.4 $Element->{$HTML_NS}->{samp} = {
1375 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1376 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1377     };
1378 wakaba 1.1
1379     $Element->{$HTML_NS}->{kbd} = {
1380 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1381 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1382     };
1383    
1384     $Element->{$HTML_NS}->{sub} = {
1385 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1386 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1387     };
1388    
1389     $Element->{$HTML_NS}->{sup} = {
1390 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1391 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1392     };
1393    
1394 wakaba 1.4 $Element->{$HTML_NS}->{span} = {
1395 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1396 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1397     };
1398 wakaba 1.1
1399     $Element->{$HTML_NS}->{i} = {
1400 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1401 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1402     };
1403    
1404     $Element->{$HTML_NS}->{b} = {
1405 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1406 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1407     };
1408    
1409     $Element->{$HTML_NS}->{bdo} = {
1410 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1411 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1412     };
1413    
1414     $Element->{$HTML_NS}->{ins} = {
1415 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1416 wakaba 1.1 checker => $HTMLTransparentChecker,
1417     };
1418    
1419     $Element->{$HTML_NS}->{del} = {
1420 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1421 wakaba 1.1 checker => sub {
1422 wakaba 1.4 my ($self, $todo) = @_;
1423 wakaba 1.1
1424 wakaba 1.4 my $parent = $todo->{node}->manakai_parent_element;
1425 wakaba 1.1 if (defined $parent) {
1426     my $nsuri = $parent->namespace_uri;
1427     $nsuri = '' unless defined $nsuri;
1428     my $ln = $parent->manakai_local_name;
1429     my $eldef = $Element->{$nsuri}->{$ln} ||
1430     $Element->{$nsuri}->{''} ||
1431     $ElementDefault;
1432 wakaba 1.4 return $eldef->{checker}->($self, $todo);
1433 wakaba 1.1 } else {
1434 wakaba 1.4 return $HTMLBlockOrInlineChecker->($self, $todo);
1435 wakaba 1.1 }
1436     },
1437     };
1438    
1439     ## TODO: figure
1440    
1441     $Element->{$HTML_NS}->{img} = {
1442 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1443 wakaba 1.1 checker => $HTMLEmptyChecker,
1444     };
1445    
1446     $Element->{$HTML_NS}->{iframe} = {
1447 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1448 wakaba 1.1 checker => $HTMLTextChecker,
1449     };
1450    
1451     $Element->{$HTML_NS}->{embed} = {
1452 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1453 wakaba 1.1 checker => $HTMLEmptyChecker,
1454     };
1455    
1456     $Element->{$HTML_NS}->{param} = {
1457 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1458 wakaba 1.1 checker => $HTMLEmptyChecker,
1459     };
1460    
1461     ## TODO: object
1462    
1463 wakaba 1.2 $Element->{$HTML_NS}->{video} = {
1464 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1465 wakaba 1.2 checker => sub {
1466 wakaba 1.4 my ($self, $todo) = @_;
1467 wakaba 1.2
1468 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'src')) {
1469     return $HTMLBlockOrInlineChecker->($self, $todo);
1470 wakaba 1.2 } else {
1471     return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
1472 wakaba 1.4 ->($self, $todo);
1473 wakaba 1.2 }
1474     },
1475     };
1476    
1477     $Element->{$HTML_NS}->{audio} = {
1478 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1479 wakaba 1.2 checker => $Element->{$HTML_NS}->{audio}->{checker},
1480     };
1481 wakaba 1.1
1482     $Element->{$HTML_NS}->{source} = {
1483 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1484 wakaba 1.1 checker => $HTMLEmptyChecker,
1485     };
1486    
1487     $Element->{$HTML_NS}->{canvas} = {
1488 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1489 wakaba 1.1 checker => $HTMLInlineChecker,
1490     };
1491    
1492     $Element->{$HTML_NS}->{map} = {
1493 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1494 wakaba 1.1 checker => $HTMLBlockChecker,
1495     };
1496    
1497     $Element->{$HTML_NS}->{area} = {
1498 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1499 wakaba 1.1 checker => $HTMLEmptyChecker,
1500     };
1501     ## TODO: only in map
1502    
1503     $Element->{$HTML_NS}->{table} = {
1504 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1505 wakaba 1.1 checker => sub {
1506 wakaba 1.4 my ($self, $todo) = @_;
1507     my $el = $todo->{node};
1508     my $new_todos = [];
1509 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1510    
1511     my $phase = 'before caption';
1512     my $has_tfoot;
1513     while (@nodes) {
1514     my $node = shift @nodes;
1515 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1516    
1517 wakaba 1.1 my $nt = $node->node_type;
1518     if ($nt == 1) {
1519 wakaba 1.8 my $node_ns = $node->namespace_uri;
1520     $node_ns = '' unless defined $node_ns;
1521     my $node_ln = $node->manakai_local_name;
1522 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1523 wakaba 1.1 if ($phase eq 'in tbodys') {
1524 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
1525 wakaba 1.1 #$phase = 'in tbodys';
1526     } elsif (not $has_tfoot and
1527 wakaba 1.8 $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
1528 wakaba 1.1 $phase = 'after tfoot';
1529     $has_tfoot = 1;
1530     } else {
1531 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1532 wakaba 1.1 }
1533     } elsif ($phase eq 'in trs') {
1534 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
1535 wakaba 1.1 #$phase = 'in trs';
1536     } elsif (not $has_tfoot and
1537 wakaba 1.8 $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
1538 wakaba 1.1 $phase = 'after tfoot';
1539     $has_tfoot = 1;
1540     } else {
1541 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1542 wakaba 1.1 }
1543     } elsif ($phase eq 'after thead') {
1544 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
1545 wakaba 1.1 $phase = 'in tbodys';
1546 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
1547 wakaba 1.1 $phase = 'in trs';
1548 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
1549 wakaba 1.1 $phase = 'in tbodys';
1550     $has_tfoot = 1;
1551     } else {
1552 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1553 wakaba 1.1 }
1554     } elsif ($phase eq 'in colgroup') {
1555 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
1556 wakaba 1.1 $phase = 'in colgroup';
1557 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
1558 wakaba 1.1 $phase = 'after thead';
1559 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
1560 wakaba 1.1 $phase = 'in tbodys';
1561 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
1562 wakaba 1.1 $phase = 'in trs';
1563 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
1564 wakaba 1.1 $phase = 'in tbodys';
1565     $has_tfoot = 1;
1566     } else {
1567 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1568 wakaba 1.1 }
1569     } elsif ($phase eq 'before caption') {
1570 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'caption') {
1571 wakaba 1.1 $phase = 'in colgroup';
1572 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
1573 wakaba 1.1 $phase = 'in colgroup';
1574 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
1575 wakaba 1.1 $phase = 'after thead';
1576 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
1577 wakaba 1.1 $phase = 'in tbodys';
1578 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
1579 wakaba 1.1 $phase = 'in trs';
1580 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
1581 wakaba 1.1 $phase = 'in tbodys';
1582     $has_tfoot = 1;
1583     } else {
1584 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1585 wakaba 1.1 }
1586     } else { # after tfoot
1587 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1588 wakaba 1.1 }
1589 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1590     unshift @nodes, @$sib;
1591 wakaba 1.4 push @$new_todos, @$ch;
1592 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1593     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1594 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1595 wakaba 1.1 }
1596     } elsif ($nt == 5) {
1597     unshift @nodes, @{$node->child_nodes};
1598     }
1599     }
1600 wakaba 1.4 return ($new_todos);
1601 wakaba 1.1 },
1602     };
1603    
1604     $Element->{$HTML_NS}->{caption} = {
1605 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1606 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
1607     };
1608    
1609     $Element->{$HTML_NS}->{colgroup} = {
1610 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1611 wakaba 1.1 checker => sub {
1612 wakaba 1.4 my ($self, $todo) = @_;
1613     my $el = $todo->{node};
1614     my $new_todos = [];
1615 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1616    
1617     while (@nodes) {
1618     my $node = shift @nodes;
1619 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1620    
1621 wakaba 1.1 my $nt = $node->node_type;
1622     if ($nt == 1) {
1623 wakaba 1.8 my $node_ns = $node->namespace_uri;
1624     $node_ns = '' unless defined $node_ns;
1625     my $node_ln = $node->manakai_local_name;
1626 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1627 wakaba 1.8 unless ($node_ns eq $HTML_NS and $node_ln eq 'col') {
1628 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1629 wakaba 1.1 }
1630 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1631     unshift @nodes, @$sib;
1632 wakaba 1.4 push @$new_todos, @$ch;
1633 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1634     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1635 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1636 wakaba 1.1 }
1637     } elsif ($nt == 5) {
1638     unshift @nodes, @{$node->child_nodes};
1639     }
1640     }
1641 wakaba 1.4 return ($new_todos);
1642 wakaba 1.1 },
1643     };
1644    
1645     $Element->{$HTML_NS}->{col} = {
1646 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1647 wakaba 1.1 checker => $HTMLEmptyChecker,
1648     };
1649    
1650     $Element->{$HTML_NS}->{tbody} = {
1651 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1652 wakaba 1.1 checker => sub {
1653 wakaba 1.4 my ($self, $todo) = @_;
1654     my $el = $todo->{node};
1655     my $new_todos = [];
1656 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1657    
1658     my $has_tr;
1659     while (@nodes) {
1660     my $node = shift @nodes;
1661 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1662    
1663 wakaba 1.1 my $nt = $node->node_type;
1664     if ($nt == 1) {
1665 wakaba 1.8 my $node_ns = $node->namespace_uri;
1666     $node_ns = '' unless defined $node_ns;
1667     my $node_ln = $node->manakai_local_name;
1668 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1669 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
1670 wakaba 1.1 $has_tr = 1;
1671     } else {
1672 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1673 wakaba 1.1 }
1674 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1675     unshift @nodes, @$sib;
1676 wakaba 1.4 push @$new_todos, @$ch;
1677 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1678     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1679 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1680 wakaba 1.1 }
1681     } elsif ($nt == 5) {
1682     unshift @nodes, @{$node->child_nodes};
1683     }
1684     }
1685     unless ($has_tr) {
1686 wakaba 1.3 $self->{onerror}->(node => $el, type => 'child element missing:tr');
1687 wakaba 1.1 }
1688 wakaba 1.4 return ($new_todos);
1689 wakaba 1.1 },
1690     };
1691    
1692     $Element->{$HTML_NS}->{thead} = {
1693 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1694 wakaba 1.1 checker => $Element->{$HTML_NS}->{tbody},
1695     };
1696    
1697     $Element->{$HTML_NS}->{tfoot} = {
1698 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1699 wakaba 1.1 checker => $Element->{$HTML_NS}->{tbody},
1700     };
1701    
1702     $Element->{$HTML_NS}->{tr} = {
1703 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1704 wakaba 1.1 checker => sub {
1705 wakaba 1.4 my ($self, $todo) = @_;
1706     my $el = $todo->{node};
1707     my $new_todos = [];
1708 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1709    
1710     my $has_td;
1711     while (@nodes) {
1712     my $node = shift @nodes;
1713 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1714    
1715 wakaba 1.1 my $nt = $node->node_type;
1716     if ($nt == 1) {
1717 wakaba 1.8 my $node_ns = $node->namespace_uri;
1718     $node_ns = '' unless defined $node_ns;
1719     my $node_ln = $node->manakai_local_name;
1720 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1721 wakaba 1.8 if ($node_ns eq $HTML_NS and ($node_ln eq 'td' or $node_ln eq 'th')) {
1722 wakaba 1.1 $has_td = 1;
1723     } else {
1724 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1725 wakaba 1.1 }
1726 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1727     unshift @nodes, @$sib;
1728 wakaba 1.4 push @$new_todos, @$ch;
1729 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1730     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1731 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1732 wakaba 1.1 }
1733     } elsif ($nt == 5) {
1734     unshift @nodes, @{$node->child_nodes};
1735     }
1736     }
1737     unless ($has_td) {
1738 wakaba 1.3 $self->{onerror}->(node => $el, type => 'child element missing:td|th');
1739 wakaba 1.1 }
1740 wakaba 1.4 return ($new_todos);
1741 wakaba 1.1 },
1742     };
1743    
1744     $Element->{$HTML_NS}->{td} = {
1745 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1746 wakaba 1.1 checker => $HTMLBlockOrInlineChecker,
1747     };
1748    
1749     $Element->{$HTML_NS}->{th} = {
1750 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1751 wakaba 1.1 checker => $HTMLBlockOrInlineChecker,
1752     };
1753    
1754     ## TODO: forms
1755    
1756 wakaba 1.2 $Element->{$HTML_NS}->{script} = {
1757 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1758 wakaba 1.2 checker => sub {
1759 wakaba 1.4 my ($self, $todo) = @_;
1760 wakaba 1.2
1761 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'src')) {
1762     return $HTMLEmptyChecker->($self, $todo);
1763 wakaba 1.2 } else {
1764     ## NOTE: No content model conformance in HTML5 spec.
1765 wakaba 1.4 return $AnyChecker->($self, $todo);
1766 wakaba 1.2 }
1767     },
1768     };
1769    
1770     ## NOTE: When script is disabled.
1771     $Element->{$HTML_NS}->{noscript} = {
1772 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1773 wakaba 1.2 checker => sub {
1774 wakaba 1.4 my ($self, $todo) = @_;
1775 wakaba 1.1
1776 wakaba 1.2 my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}});
1777 wakaba 1.4 my ($sib, $ch) = $HTMLBlockOrInlineChecker->($self, $todo);
1778 wakaba 1.2 push @$sib, $end;
1779     return ($sib, $ch);
1780     },
1781     };
1782 wakaba 1.1
1783     $Element->{$HTML_NS}->{'event-source'} = {
1784 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1785 wakaba 1.1 checker => $HTMLEmptyChecker,
1786     };
1787    
1788     $Element->{$HTML_NS}->{details} = {
1789 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1790 wakaba 1.6 checker => sub {
1791     my ($self, $todo) = @_;
1792    
1793     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
1794     my ($sib, $ch)
1795     = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
1796     ->($self, $todo);
1797     push @$sib, $end;
1798     return ($sib, $ch);
1799     },
1800 wakaba 1.1 };
1801    
1802     $Element->{$HTML_NS}->{datagrid} = {
1803 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1804 wakaba 1.6 checker => sub {
1805     my ($self, $todo) = @_;
1806    
1807     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
1808     my ($sib, $ch) = $HTMLBlockChecker->($self, $todo);
1809     push @$sib, $end;
1810     return ($sib, $ch);
1811     },
1812 wakaba 1.1 };
1813    
1814     $Element->{$HTML_NS}->{command} = {
1815 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1816 wakaba 1.1 checker => $HTMLEmptyChecker,
1817     };
1818    
1819     $Element->{$HTML_NS}->{menu} = {
1820 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1821 wakaba 1.1 checker => sub {
1822 wakaba 1.4 my ($self, $todo) = @_;
1823     my $el = $todo->{node};
1824     my $new_todos = [];
1825 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1826    
1827     my $content = 'li or inline';
1828     while (@nodes) {
1829     my $node = shift @nodes;
1830 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1831    
1832 wakaba 1.1 my $nt = $node->node_type;
1833     if ($nt == 1) {
1834 wakaba 1.2 my $node_ns = $node->namespace_uri;
1835     $node_ns = '' unless defined $node_ns;
1836     my $node_ln = $node->manakai_local_name;
1837 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
1838 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'li') {
1839 wakaba 1.1 if ($content eq 'inline') {
1840 wakaba 1.6 $not_allowed = 1;
1841 wakaba 1.1 } elsif ($content eq 'li or inline') {
1842     $content = 'li';
1843     }
1844     } else {
1845 wakaba 1.7 if ($HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
1846     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln}) {
1847     $content = 'inline';
1848     } else {
1849 wakaba 1.6 $not_allowed = 1;
1850 wakaba 1.7 }
1851 wakaba 1.1 }
1852 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
1853     if $not_allowed;
1854 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node);
1855     unshift @nodes, @$sib;
1856 wakaba 1.4 push @$new_todos, @$ch;
1857 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1858     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1859     if ($content eq 'li') {
1860 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1861 wakaba 1.1 } elsif ($content eq 'li or inline') {
1862     $content = 'inline';
1863     }
1864     }
1865     } elsif ($nt == 5) {
1866     unshift @nodes, @{$node->child_nodes};
1867     }
1868     }
1869 wakaba 1.4
1870     for (@$new_todos) {
1871     $_->{inline} = 1;
1872     }
1873     return ($new_todos);
1874 wakaba 1.1 },
1875     };
1876    
1877 wakaba 1.6 $Element->{$HTML_NS}->{legend} = {
1878 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1879 wakaba 1.6 checker => sub {
1880     my ($self, $todo) = @_;
1881    
1882     my $parent = $todo->{node}->manakai_parent_element;
1883     if (defined $parent) {
1884     my $nsuri = $parent->namespace_uri;
1885     $nsuri = '' unless defined $nsuri;
1886     my $ln = $parent->manakai_local_name;
1887     if ($nsuri eq $HTML_NS and $ln eq 'figure') {
1888     return $HTMLInlineChecker->($self, $todo);
1889     } else {
1890     return $HTMLSignificantStrictlyInlineChecker->($self, $todo);
1891     }
1892     } else {
1893     return $HTMLInlineChecker->($self, $todo);
1894     }
1895    
1896     ## ISSUE: Content model is defined only for fieldset/legend,
1897     ## details/legend, and figure/legend.
1898     },
1899     };
1900 wakaba 1.1
1901     $Element->{$HTML_NS}->{div} = {
1902 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1903 wakaba 1.2 checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
1904 wakaba 1.1 };
1905    
1906     $Element->{$HTML_NS}->{font} = {
1907 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1908 wakaba 1.1 checker => $HTMLTransparentChecker,
1909     };
1910    
1911 wakaba 1.2 sub new ($) {
1912     return bless {}, shift;
1913     } # new
1914    
1915 wakaba 1.1 sub check_element ($$$) {
1916     my ($self, $el, $onerror) = @_;
1917    
1918 wakaba 1.2 $self->{minuses} = {};
1919     $self->{onerror} = $onerror;
1920 wakaba 1.10 $self->{id} = {};
1921 wakaba 1.2
1922 wakaba 1.4 my @todo = ({type => 'element', node => $el});
1923     while (@todo) {
1924     my $todo = shift @todo;
1925     if ($todo->{type} eq 'element') {
1926     my $nsuri = $todo->{node}->namespace_uri;
1927     $nsuri = '' unless defined $nsuri;
1928     my $ln = $todo->{node}->manakai_local_name;
1929     my $eldef = $Element->{$nsuri}->{$ln} ||
1930     $Element->{$nsuri}->{''} ||
1931     $ElementDefault;
1932 wakaba 1.9 $eldef->{attrs_checker}->($self, $todo);
1933 wakaba 1.4 my ($new_todos) = $eldef->{checker}->($self, $todo);
1934     push @todo, @$new_todos;
1935 wakaba 1.9 } elsif ($todo->{type} eq 'element-attributes') {
1936     my $nsuri = $todo->{node}->namespace_uri;
1937     $nsuri = '' unless defined $nsuri;
1938     my $ln = $todo->{node}->manakai_local_name;
1939     my $eldef = $Element->{$nsuri}->{$ln} ||
1940     $Element->{$nsuri}->{''} ||
1941     $ElementDefault;
1942     $eldef->{attrs_checker}->($self, $todo);
1943 wakaba 1.4 } elsif ($todo->{type} eq 'plus') {
1944     $self->_remove_minuses ($todo);
1945     }
1946 wakaba 1.1 }
1947     } # check_element
1948    
1949 wakaba 1.2 sub _add_minuses ($@) {
1950     my $self = shift;
1951     my $r = {};
1952     for my $list (@_) {
1953     for my $ns (keys %$list) {
1954     for my $ln (keys %{$list->{$ns}}) {
1955     unless ($self->{minuses}->{$ns}->{$ln}) {
1956     $self->{minuses}->{$ns}->{$ln} = 1;
1957     $r->{$ns}->{$ln} = 1;
1958     }
1959     }
1960     }
1961     }
1962 wakaba 1.4 return {type => 'plus', list => $r};
1963 wakaba 1.2 } # _add_minuses
1964    
1965     sub _remove_minuses ($$) {
1966 wakaba 1.4 my ($self, $todo) = @_;
1967     for my $ns (keys %{$todo->{list}}) {
1968     for my $ln (keys %{$todo->{list}->{$ns}}) {
1969     delete $self->{minuses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
1970 wakaba 1.2 }
1971     }
1972     1;
1973     } # _remove_minuses
1974    
1975     sub _check_get_children ($$) {
1976     my ($self, $node) = @_;
1977 wakaba 1.4 my $new_todos = [];
1978 wakaba 1.2 my $sib = [];
1979     TP: {
1980     my $node_ns = $node->namespace_uri;
1981     $node_ns = '' unless defined $node_ns;
1982     my $node_ln = $node->manakai_local_name;
1983     if ($node_ns eq $HTML_NS) {
1984     if ($node_ln eq 'noscript') {
1985     my $end = $self->_add_minuses ({$HTML_NS, {noscript => 1}});
1986     push @$sib, $end;
1987     }
1988     }
1989 wakaba 1.7 if ($HTMLTransparentElements->{$node_ns}->{$node_ln}) {
1990     unshift @$sib, @{$node->child_nodes};
1991 wakaba 1.9 push @$new_todos, {type => 'element-attributes', node => $node};
1992 wakaba 1.7 last TP;
1993 wakaba 1.2 }
1994 wakaba 1.8 if ($node_ns eq $HTML_NS and ($node_ln eq 'video' or $node_ln eq 'audio')) {
1995 wakaba 1.2 if ($node->has_attribute_ns (undef, 'src')) {
1996     unshift @$sib, @{$node->child_nodes};
1997 wakaba 1.9 push @$new_todos, {type => 'element-attributes', node => $node};
1998 wakaba 1.2 last TP;
1999     } else {
2000     my @cn = @{$node->child_nodes};
2001     CN: while (@cn) {
2002     my $cn = shift @cn;
2003     my $cnt = $cn->node_type;
2004     if ($cnt == 1) {
2005 wakaba 1.8 my $cn_nsuri = $cn->namespace_uri;
2006     $cn_nsuri = '' unless defined $cn_nsuri;
2007     if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'source') {
2008 wakaba 1.2 #
2009     } else {
2010     last CN;
2011     }
2012     } elsif ($cnt == 3 or $cnt == 4) {
2013     if ($cn->data =~ /[^\x09-\x0D\x20]/) {
2014     last CN;
2015     }
2016     }
2017     } # CN
2018     unshift @$sib, @cn;
2019     }
2020     }
2021 wakaba 1.4 push @$new_todos, {type => 'element', node => $node};
2022 wakaba 1.2 } # TP
2023 wakaba 1.4 return ($sib, $new_todos);
2024 wakaba 1.2 } # _check_get_children
2025    
2026 wakaba 1.1 1;
2027 wakaba 1.10 # $Date: 2007/05/19 03:49:58 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24