/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.30 - (hide annotations) (download)
Sun Jun 24 14:24:21 2007 UTC (17 years, 4 months ago) by wakaba
Branch: MAIN
Changes since 1.29: +150 -37 lines
++ whatpm/t/ChangeLog	24 Jun 2007 14:19:51 -0000
	* content-model-1.dat: Tests for |footer|
	content model are added.

	* content-model-2.dat: Tests for |ping|
	and |tabindex| attributes are added.  Tests for |datetime|
	attribute of |ins| and |del| elements are added.

	* content-model-4.dat: New test data.

	* ContentChecker.t: |content-model-4.dat| is added.

2007-06-24  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	24 Jun 2007 14:20:06 -0000
	* URIChecker.pm (check_iri_reference): A |decode| method name was
	incorrect.

	* ContentChecker.pm: Support for the |footer| element.
	Check URI syntax for space-separated URI attributes.
	Support for the |tabindex| attribute.  Support
	for |datetime| attribute.

2007-06-24  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3    
4 wakaba 1.18 require Whatpm::URIChecker;
5    
6 wakaba 1.13 ## ISSUE: How XML and XML Namespaces conformance can (or cannot)
7     ## be applied to an in-memory representation (i.e. DOM)?
8    
9 wakaba 1.9 my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
10     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
11    
12     my $AttrChecker = {
13     $XML_NS => {
14 wakaba 1.13 space => sub {
15     my ($self, $attr) = @_;
16     my $value = $attr->value;
17     if ($value eq 'default' or $value eq 'preserve') {
18     #
19     } else {
20     ## NOTE: An XML "error"
21     $self->{onerror}->(node => $attr,
22     type => 'XML error:invalid xml:space value');
23     }
24     },
25     lang => sub {
26     ## NOTE: "The values of the attribute are language identifiers
27     ## as defined by [IETF RFC 3066], Tags for the Identification
28     ## of Languages, or its successor; in addition, the empty string
29     ## may be specified." ("may" in lower case)
30     ## TODO: xml:lang MUST NOT in HTML document
31     },
32     base => sub {
33     my ($self, $attr) = @_;
34     my $value = $attr->value;
35     if ($value =~ /[^\x{0000}-\x{10FFFF}]/) { ## ISSUE: Should we disallow noncharacters?
36     $self->{onerror}->(node => $attr,
37     type => 'syntax error');
38     }
39 wakaba 1.18 ## NOTE: Conformance to URI standard is not checked since there is
40     ## no author requirement on conformance in the XML Base specification.
41 wakaba 1.13 },
42     id => sub {
43     my ($self, $attr) = @_;
44     my $value = $attr->value;
45     $value =~ s/[\x09\x0A\x0D\x20]+/ /g;
46     $value =~ s/^\x20//;
47     $value =~ s/\x20$//;
48     ## TODO: NCName in XML 1.0 or 1.1
49     ## TODO: declared type is ID?
50     if ($self->{id}->{$value}) {
51     $self->{onerror}->(node => $attr, type => 'xml:id error:duplicate ID');
52     } else {
53     $self->{id}->{$value} = 1;
54     }
55     },
56 wakaba 1.9 },
57     $XMLNS_NS => {
58 wakaba 1.13 '' => sub {
59     my ($self, $attr) = @_;
60     my $ln = $attr->manakai_local_name;
61     my $value = $attr->value;
62     if ($value eq $XML_NS and $ln ne 'xml') {
63     $self->{onerror}
64     ->(node => $attr,
65     type => 'NC:Reserved Prefixes and Namespace Names:=xml');
66     } elsif ($value eq $XMLNS_NS) {
67     $self->{onerror}
68     ->(node => $attr,
69     type => 'NC:Reserved Prefixes and Namespace Names:=xmlns');
70     }
71     if ($ln eq 'xml' and $value ne $XML_NS) {
72     $self->{onerror}
73     ->(node => $attr,
74     type => 'NC:Reserved Prefixes and Namespace Names:xmlns:xml=');
75     } elsif ($ln eq 'xmlns') {
76     $self->{onerror}
77     ->(node => $attr,
78     type => 'NC:Reserved Prefixes and Namespace Names:xmlns:xmlns=');
79     }
80     ## TODO: If XML 1.0 and empty
81     },
82     xmlns => sub {
83     my ($self, $attr) = @_;
84     ## TODO: In XML 1.0, URI reference [RFC 3986] or an empty string
85     ## TODO: In XML 1.1, IRI reference [RFC 3987] or an empty string
86 wakaba 1.18 ## TODO: relative references are deprecated
87 wakaba 1.13 my $value = $attr->value;
88     if ($value eq $XML_NS) {
89     $self->{onerror}
90     ->(node => $attr,
91     type => 'NC:Reserved Prefixes and Namespace Names:=xml');
92     } elsif ($value eq $XMLNS_NS) {
93     $self->{onerror}
94     ->(node => $attr,
95     type => 'NC:Reserved Prefixes and Namespace Names:=xmlns');
96     }
97     },
98 wakaba 1.9 },
99     };
100    
101 wakaba 1.14 ## ISSUE: Should we really allow these attributes?
102 wakaba 1.13 $AttrChecker->{''}->{'xml:space'} = $AttrChecker->{$XML_NS}->{space};
103     $AttrChecker->{''}->{'xml:lang'} = $AttrChecker->{$XML_NS}->{lang};
104     $AttrChecker->{''}->{'xml:base'} = $AttrChecker->{$XML_NS}->{base};
105     $AttrChecker->{''}->{'xml:id'} = $AttrChecker->{$XML_NS}->{id};
106    
107 wakaba 1.3 ## ANY
108     my $AnyChecker = sub {
109 wakaba 1.4 my ($self, $todo) = @_;
110     my $el = $todo->{node};
111     my $new_todos = [];
112 wakaba 1.3 my @nodes = (@{$el->child_nodes});
113     while (@nodes) {
114     my $node = shift @nodes;
115     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
116    
117     my $nt = $node->node_type;
118     if ($nt == 1) {
119     my $node_ns = $node->namespace_uri;
120     $node_ns = '' unless defined $node_ns;
121     my $node_ln = $node->manakai_local_name;
122     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
123     $self->{onerror}->(node => $node, type => 'element not allowed');
124     }
125 wakaba 1.4 push @$new_todos, {type => 'element', node => $node};
126 wakaba 1.3 } elsif ($nt == 5) {
127     unshift @nodes, @{$node->child_nodes};
128     }
129     }
130 wakaba 1.4 return ($new_todos);
131 wakaba 1.3 }; # $AnyChecker
132    
133 wakaba 1.1 my $ElementDefault = {
134     checker => sub {
135 wakaba 1.4 my ($self, $todo) = @_;
136     $self->{onerror}->(node => $todo->{node}, type => 'element not supported');
137     return $AnyChecker->($self, $todo);
138 wakaba 1.1 },
139 wakaba 1.9 attrs_checker => sub {
140     my ($self, $todo) = @_;
141     for my $attr (@{$todo->{node}->attributes}) {
142     my $attr_ns = $attr->namespace_uri;
143     $attr_ns = '' unless defined $attr_ns;
144     my $attr_ln = $attr->manakai_local_name;
145     my $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
146     || $AttrChecker->{$attr_ns}->{''};
147     if ($checker) {
148     $checker->($self, $attr);
149 wakaba 1.17 } else {
150     $self->{onerror}->(node => $attr, type => 'attribute not supported');
151 wakaba 1.9 }
152     }
153     },
154 wakaba 1.1 };
155    
156     my $Element = {};
157    
158     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
159    
160 wakaba 1.7 my $HTMLMetadataElements = {
161     $HTML_NS => {
162 wakaba 1.29 qw/link 1 meta 1 style 1 script 1 event-source 1 command 1 base 1 title 1
163     noscript 1
164     /,
165 wakaba 1.7 },
166     };
167 wakaba 1.1
168 wakaba 1.2 my $HTMLSectioningElements = {
169     $HTML_NS => {qw/body 1 section 1 nav 1 article 1 blockquote 1 aside 1/},
170     };
171 wakaba 1.1
172 wakaba 1.7 my $HTMLBlockLevelElements = {
173     $HTML_NS => {
174     qw/
175     section 1 nav 1 article 1 blockquote 1 aside 1
176     h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1
177     address 1 p 1 hr 1 dialog 1 pre 1 ol 1 ul 1 dl 1
178     ins 1 del 1 figure 1 map 1 table 1 script 1 noscript 1
179     event-source 1 details 1 datagrid 1 menu 1 div 1 font 1
180     /,
181     },
182     };
183    
184     my $HTMLStrictlyInlineLevelElements = {
185     $HTML_NS => {
186     qw/
187     br 1 a 1 q 1 cite 1 em 1 strong 1 small 1 m 1 dfn 1 abbr 1
188     time 1 meter 1 progress 1 code 1 var 1 samp 1 kbd 1
189     sub 1 sup 1 span 1 i 1 b 1 bdo 1 ins 1 del 1 img 1
190     iframe 1 embed 1 object 1 video 1 audio 1 canvas 1 area 1
191     script 1 noscript 1 event-source 1 command 1 font 1
192     /,
193     },
194     };
195    
196     my $HTMLStructuredInlineLevelElements = {
197     $HTML_NS => {qw/blockquote 1 pre 1 ol 1 ul 1 dl 1 table 1 menu 1/},
198     };
199 wakaba 1.1
200 wakaba 1.6 my $HTMLInteractiveElements = {
201     $HTML_NS => {a => 1, details => 1, datagrid => 1},
202     };
203     ## NOTE: |html:a| and |html:datagrid| are not allowed as a descendant
204     ## of interactive elements
205 wakaba 1.1
206 wakaba 1.7 my $HTMLTransparentElements = {
207     $HTML_NS => {qw/ins 1 font 1 noscript 1/},
208 wakaba 1.29 ## NOTE: |html:noscript| is transparent if scripting is disabled
209     ## and not in |head|.
210 wakaba 1.7 };
211    
212     #my $HTMLSemiTransparentElements = {
213     # $HTML_NS => {qw/video 1 audio 1/},
214     #};
215    
216     my $HTMLEmbededElements = {
217     $HTML_NS => {qw/img 1 iframe 1 embed 1 object 1 video 1 audio 1 canvas 1/},
218     };
219 wakaba 1.1
220     ## Empty
221     my $HTMLEmptyChecker = sub {
222 wakaba 1.4 my ($self, $todo) = @_;
223     my $el = $todo->{node};
224     my $new_todos = [];
225 wakaba 1.1 my @nodes = (@{$el->child_nodes});
226    
227     while (@nodes) {
228     my $node = shift @nodes;
229 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
230    
231 wakaba 1.1 my $nt = $node->node_type;
232     if ($nt == 1) {
233 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
234     $self->{onerror}->(node => $node, type => 'element not allowed');
235 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
236 wakaba 1.2 unshift @nodes, @$sib;
237 wakaba 1.4 push @$new_todos, @$ch;
238 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
239 wakaba 1.3 if ($node->data =~ /[^\x09-\x0D\x20]/) {
240     $self->{onerror}->(node => $node, type => 'character not allowed');
241     }
242 wakaba 1.1 } elsif ($nt == 5) {
243     unshift @nodes, @{$node->child_nodes};
244     }
245     }
246 wakaba 1.4 return ($new_todos);
247 wakaba 1.1 };
248    
249     ## Text
250     my $HTMLTextChecker = sub {
251 wakaba 1.4 my ($self, $todo) = @_;
252     my $el = $todo->{node};
253     my $new_todos = [];
254 wakaba 1.1 my @nodes = (@{$el->child_nodes});
255    
256     while (@nodes) {
257     my $node = shift @nodes;
258 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
259    
260 wakaba 1.1 my $nt = $node->node_type;
261     if ($nt == 1) {
262 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
263     $self->{onerror}->(node => $node, type => 'element not allowed');
264 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
265 wakaba 1.2 unshift @nodes, @$sib;
266 wakaba 1.4 push @$new_todos, @$ch;
267 wakaba 1.1 } elsif ($nt == 5) {
268     unshift @nodes, @{$node->child_nodes};
269     }
270     }
271 wakaba 1.4 return ($new_todos);
272 wakaba 1.1 };
273    
274     ## Zero or more |html:style| elements,
275     ## followed by zero or more block-level elements
276     my $HTMLStylableBlockChecker = sub {
277 wakaba 1.4 my ($self, $todo) = @_;
278     my $el = $todo->{node};
279     my $new_todos = [];
280 wakaba 1.1 my @nodes = (@{$el->child_nodes});
281    
282     my $has_non_style;
283     while (@nodes) {
284     my $node = shift @nodes;
285 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
286    
287 wakaba 1.1 my $nt = $node->node_type;
288     if ($nt == 1) {
289 wakaba 1.2 my $node_ns = $node->namespace_uri;
290     $node_ns = '' unless defined $node_ns;
291     my $node_ln = $node->manakai_local_name;
292 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
293 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'style') {
294 wakaba 1.28 $not_allowed = 1 if $has_non_style or
295     not $node->has_attribute_ns (undef, 'scoped');
296 wakaba 1.7 } elsif ($HTMLBlockLevelElements->{$node_ns}->{$node_ln}) {
297     $has_non_style = 1;
298 wakaba 1.1 } else {
299     $has_non_style = 1;
300 wakaba 1.7 $not_allowed = 1;
301 wakaba 1.1 }
302 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
303     if $not_allowed;
304 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
305 wakaba 1.2 unshift @nodes, @$sib;
306 wakaba 1.4 push @$new_todos, @$ch;
307 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
308     if ($node->data =~ /[^\x09-\x0D\x20]/) {
309 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
310 wakaba 1.1 }
311     } elsif ($nt == 5) {
312     unshift @nodes, @{$node->child_nodes};
313     }
314     }
315 wakaba 1.4 return ($new_todos);
316 wakaba 1.1 }; # $HTMLStylableBlockChecker
317    
318     ## Zero or more block-level elements
319     my $HTMLBlockChecker = sub {
320 wakaba 1.4 my ($self, $todo) = @_;
321     my $el = $todo->{node};
322     my $new_todos = [];
323 wakaba 1.1 my @nodes = (@{$el->child_nodes});
324    
325     while (@nodes) {
326     my $node = shift @nodes;
327 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
328    
329 wakaba 1.1 my $nt = $node->node_type;
330     if ($nt == 1) {
331 wakaba 1.2 my $node_ns = $node->namespace_uri;
332     $node_ns = '' unless defined $node_ns;
333     my $node_ln = $node->manakai_local_name;
334 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
335 wakaba 1.7 $not_allowed = 1
336     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
337 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
338     if $not_allowed;
339 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
340 wakaba 1.2 unshift @nodes, @$sib;
341 wakaba 1.4 push @$new_todos, @$ch;
342 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
343     if ($node->data =~ /[^\x09-\x0D\x20]/) {
344 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
345 wakaba 1.1 }
346     } elsif ($nt == 5) {
347     unshift @nodes, @{$node->child_nodes};
348     }
349     }
350 wakaba 1.4 return ($new_todos);
351 wakaba 1.1 }; # $HTMLBlockChecker
352    
353     ## Inline-level content
354     my $HTMLInlineChecker = sub {
355 wakaba 1.4 my ($self, $todo) = @_;
356     my $el = $todo->{node};
357     my $new_todos = [];
358 wakaba 1.1 my @nodes = (@{$el->child_nodes});
359    
360     while (@nodes) {
361     my $node = shift @nodes;
362 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
363    
364 wakaba 1.1 my $nt = $node->node_type;
365     if ($nt == 1) {
366 wakaba 1.2 my $node_ns = $node->namespace_uri;
367     $node_ns = '' unless defined $node_ns;
368     my $node_ln = $node->manakai_local_name;
369 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
370 wakaba 1.7 $not_allowed = 1
371     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
372     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
373 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
374     if $not_allowed;
375 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
376 wakaba 1.2 unshift @nodes, @$sib;
377 wakaba 1.4 push @$new_todos, @$ch;
378 wakaba 1.1 } elsif ($nt == 5) {
379     unshift @nodes, @{$node->child_nodes};
380     }
381     }
382 wakaba 1.4
383     for (@$new_todos) {
384     $_->{inline} = 1;
385     }
386     return ($new_todos);
387     }; # $HTMLInlineChecker
388 wakaba 1.1
389     my $HTMLSignificantInlineChecker = $HTMLInlineChecker;
390     ## TODO: check significant content
391    
392     ## Strictly inline-level content
393     my $HTMLStrictlyInlineChecker = sub {
394 wakaba 1.4 my ($self, $todo) = @_;
395     my $el = $todo->{node};
396     my $new_todos = [];
397 wakaba 1.1 my @nodes = (@{$el->child_nodes});
398    
399     while (@nodes) {
400     my $node = shift @nodes;
401 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
402    
403 wakaba 1.1 my $nt = $node->node_type;
404     if ($nt == 1) {
405 wakaba 1.2 my $node_ns = $node->namespace_uri;
406     $node_ns = '' unless defined $node_ns;
407     my $node_ln = $node->manakai_local_name;
408 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
409 wakaba 1.7 $not_allowed = 1
410     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln};
411 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
412     if $not_allowed;
413 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
414 wakaba 1.2 unshift @nodes, @$sib;
415 wakaba 1.4 push @$new_todos, @$ch;
416 wakaba 1.1 } elsif ($nt == 5) {
417     unshift @nodes, @{$node->child_nodes};
418     }
419     }
420 wakaba 1.4
421     for (@$new_todos) {
422     $_->{inline} = 1;
423     $_->{strictly_inline} = 1;
424     }
425     return ($new_todos);
426 wakaba 1.1 }; # $HTMLStrictlyInlineChecker
427    
428     my $HTMLSignificantStrictlyInlineChecker = $HTMLStrictlyInlineChecker;
429     ## TODO: check significant content
430    
431 wakaba 1.4 ## Inline-level or strictly inline-kevek content
432     my $HTMLInlineOrStrictlyInlineChecker = sub {
433     my ($self, $todo) = @_;
434     my $el = $todo->{node};
435     my $new_todos = [];
436     my @nodes = (@{$el->child_nodes});
437    
438     while (@nodes) {
439     my $node = shift @nodes;
440     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
441    
442     my $nt = $node->node_type;
443     if ($nt == 1) {
444     my $node_ns = $node->namespace_uri;
445     $node_ns = '' unless defined $node_ns;
446     my $node_ln = $node->manakai_local_name;
447 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
448 wakaba 1.7 if ($todo->{strictly_inline}) {
449     $not_allowed = 1
450     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln};
451     } else {
452     $not_allowed = 1
453     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
454     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
455     }
456 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
457     if $not_allowed;
458 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
459 wakaba 1.4 unshift @nodes, @$sib;
460     push @$new_todos, @$ch;
461     } elsif ($nt == 5) {
462     unshift @nodes, @{$node->child_nodes};
463     }
464     }
465    
466     for (@$new_todos) {
467     $_->{inline} = 1;
468     $_->{strictly_inline} = 1;
469     }
470     return ($new_todos);
471     }; # $HTMLInlineOrStrictlyInlineChecker
472    
473 wakaba 1.6 my $HTMLSignificantInlineOrStrictlyInlineChecker
474     = $HTMLInlineOrStrictlyInlineChecker;
475     ## TODO: check significant content
476    
477 wakaba 1.1 my $HTMLBlockOrInlineChecker = sub {
478 wakaba 1.4 my ($self, $todo) = @_;
479     my $el = $todo->{node};
480     my $new_todos = [];
481 wakaba 1.1 my @nodes = (@{$el->child_nodes});
482    
483     my $content = 'block-or-inline'; # or 'block' or 'inline'
484     my @block_not_inline;
485     while (@nodes) {
486     my $node = shift @nodes;
487 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
488    
489 wakaba 1.1 my $nt = $node->node_type;
490     if ($nt == 1) {
491 wakaba 1.2 my $node_ns = $node->namespace_uri;
492     $node_ns = '' unless defined $node_ns;
493     my $node_ln = $node->manakai_local_name;
494 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
495 wakaba 1.1 if ($content eq 'block') {
496 wakaba 1.7 $not_allowed = 1
497     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
498 wakaba 1.1 } elsif ($content eq 'inline') {
499 wakaba 1.7 $not_allowed = 1
500     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
501     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
502 wakaba 1.1 } else {
503 wakaba 1.7 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
504     my $is_inline
505     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
506     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
507 wakaba 1.1
508 wakaba 1.6 push @block_not_inline, $node
509     if $is_block and not $is_inline and not $not_allowed;
510 wakaba 1.1 unless ($is_block) {
511     $content = 'inline';
512     for (@block_not_inline) {
513 wakaba 1.2 $self->{onerror}->(node => $_, type => 'element not allowed');
514 wakaba 1.1 }
515 wakaba 1.6 $not_allowed = 1 unless $is_inline;
516 wakaba 1.1 }
517     }
518 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
519     if $not_allowed;
520 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
521 wakaba 1.2 unshift @nodes, @$sib;
522 wakaba 1.4 push @$new_todos, @$ch;
523 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
524     if ($node->data =~ /[^\x09-\x0D\x20]/) {
525     if ($content eq 'block') {
526 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
527 wakaba 1.1 } else {
528     $content = 'inline';
529     for (@block_not_inline) {
530 wakaba 1.2 $self->{onerror}->(node => $_, type => 'element not allowed');
531 wakaba 1.1 }
532     }
533     }
534     } elsif ($nt == 5) {
535     unshift @nodes, @{$node->child_nodes};
536     }
537     }
538 wakaba 1.4
539     if ($content eq 'inline') {
540     for (@$new_todos) {
541     $_->{inline} = 1;
542     }
543     }
544     return ($new_todos);
545 wakaba 1.1 };
546    
547 wakaba 1.2 ## Zero or more XXX element, then either block-level or inline-level
548     my $GetHTMLZeroOrMoreThenBlockOrInlineChecker = sub ($$) {
549     my ($elnsuri, $ellname) = @_;
550     return sub {
551 wakaba 1.4 my ($self, $todo) = @_;
552     my $el = $todo->{node};
553     my $new_todos = [];
554 wakaba 1.2 my @nodes = (@{$el->child_nodes});
555    
556     my $has_non_style;
557     my $content = 'block-or-inline'; # or 'block' or 'inline'
558     my @block_not_inline;
559     while (@nodes) {
560     my $node = shift @nodes;
561     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
562    
563     my $nt = $node->node_type;
564     if ($nt == 1) {
565     my $node_ns = $node->namespace_uri;
566     $node_ns = '' unless defined $node_ns;
567     my $node_ln = $node->manakai_local_name;
568 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
569 wakaba 1.8 if ($node_ns eq $elnsuri and $node_ln eq $ellname) {
570 wakaba 1.6 $not_allowed = 1 if $has_non_style;
571 wakaba 1.28 if ($ellname eq 'style' and
572     not $node->has_attribute_ns (undef, 'scoped')) {
573     $not_allowed = 1;
574     }
575 wakaba 1.2 } elsif ($content eq 'block') {
576     $has_non_style = 1;
577 wakaba 1.7 $not_allowed = 1
578     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
579 wakaba 1.2 } elsif ($content eq 'inline') {
580     $has_non_style = 1;
581 wakaba 1.7 $not_allowed = 1
582     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
583     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
584 wakaba 1.2 } else {
585     $has_non_style = 1;
586 wakaba 1.7 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
587     my $is_inline
588     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
589     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
590 wakaba 1.2
591 wakaba 1.6 push @block_not_inline, $node
592     if $is_block and not $is_inline and not $not_allowed;
593 wakaba 1.2 unless ($is_block) {
594     $content = 'inline';
595     for (@block_not_inline) {
596     $self->{onerror}->(node => $_, type => 'element not allowed');
597     }
598 wakaba 1.6 $not_allowed = 1 unless $is_inline;
599 wakaba 1.1 }
600     }
601 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
602     if $not_allowed;
603 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
604 wakaba 1.2 unshift @nodes, @$sib;
605 wakaba 1.4 push @$new_todos, @$ch;
606 wakaba 1.2 } elsif ($nt == 3 or $nt == 4) {
607     if ($node->data =~ /[^\x09-\x0D\x20]/) {
608     $has_non_style = 1;
609     if ($content eq 'block') {
610     $self->{onerror}->(node => $node, type => 'character not allowed');
611     } else {
612     $content = 'inline';
613     for (@block_not_inline) {
614     $self->{onerror}->(node => $_, type => 'element not allowed');
615     }
616 wakaba 1.1 }
617     }
618 wakaba 1.2 } elsif ($nt == 5) {
619     unshift @nodes, @{$node->child_nodes};
620 wakaba 1.1 }
621     }
622 wakaba 1.4
623     if ($content eq 'inline') {
624     for (@$new_todos) {
625     $_->{inline} = 1;
626     }
627     }
628     return ($new_todos);
629 wakaba 1.2 };
630     }; # $GetHTMLZeroOrMoreThenBlockOrInlineChecker
631 wakaba 1.1
632     my $HTMLTransparentChecker = $HTMLBlockOrInlineChecker;
633    
634 wakaba 1.10 my $GetHTMLEnumeratedAttrChecker = sub {
635     my $states = shift; # {value => conforming ? 1 : -1}
636     return sub {
637     my ($self, $attr) = @_;
638     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
639     if ($states->{$value} > 0) {
640     #
641     } elsif ($states->{$value}) {
642     $self->{onerror}->(node => $attr,
643     type => 'non-conforming enumerated attribute value');
644     } else {
645     $self->{onerror}->(node => $attr,
646     type => 'invalid enumerated attribute value');
647     }
648     };
649     }; # $GetHTMLEnumeratedAttrChecker
650 wakaba 1.9
651 wakaba 1.10 my $GetHTMLBooleanAttrChecker = sub {
652     my $local_name = shift;
653     return sub {
654     my ($self, $attr) = @_;
655     my $value = $attr->value;
656     unless ($value eq $local_name or $value eq '') {
657     $self->{onerror}->(node => $attr,
658     type => 'invalid boolean attribute value');
659     }
660     };
661     }; # $GetHTMLBooleanAttrChecker
662 wakaba 1.9
663 wakaba 1.10 my $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker = sub {
664     my ($self, $attr) = @_;
665     my %word;
666     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
667     unless ($word{$word}) {
668     $word{$word} = 1;
669 wakaba 1.9 } else {
670 wakaba 1.10 $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
671 wakaba 1.9 }
672     }
673 wakaba 1.10 }; # $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker
674    
675 wakaba 1.20 ## |rel| attribute (unordered set of space separated tokens,
676     ## whose allowed values are defined by the section on link types)
677     my $HTMLLinkTypesAttrChecker = sub {
678     my ($a_or_area, $self, $attr) = @_;
679     my %word;
680     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
681     unless ($word{$word}) {
682     $word{$word} = 1;
683     } else {
684     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
685     }
686     }
687     ## NOTE: Case sensitive match (since HTML5 spec does not say link
688     ## types are case-insensitive and it says "The value should not
689     ## be confusingly similar to any other defined value (e.g.
690     ## differing only in case).").
691     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
692     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
693     ## values to be used conformingly.
694     require Whatpm::_LinkTypeList;
695     our $LinkType;
696     for my $word (keys %word) {
697     my $def = $LinkType->{$word};
698     if (defined $def) {
699     if ($def->{status} eq 'accepted') {
700     if (defined $def->{effect}->[$a_or_area]) {
701     #
702     } else {
703     $self->{onerror}->(node => $attr,
704     type => 'link type bad context:'.$word);
705     }
706     } elsif ($def->{status} eq 'proposal') {
707     $self->{onerror}->(node => $attr,
708     type => 'proposed link type:'.$word);
709     } else { # rejected or synonym
710     $self->{onerror}->(node => $attr,
711     type => 'non-conforming link type:'.$word);
712     }
713     if ($def->{unique}) {
714     unless ($self->{has_link_type}->{$word}) {
715     $self->{has_link_type}->{$word} = 1;
716     } else {
717     $self->{onerror}->(node => $attr,
718     type => 'link with type not unique:'.$word);
719     }
720     }
721     } else {
722     $self->{onerror}->(node => $attr,
723     type => 'link type not supported:'.$word);
724     }
725     }
726     ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
727     ## says that using both X-Pingback: header field and HTML
728     ## <link rel=pingback> is deprecated and if both appears they
729     ## SHOULD contain exactly the same value.
730     ## ISSUE: Pingback 1.0 specification defines the exact representation
731     ## of its link element, which cannot be tested by the current arch.
732     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
733     ## include any string that matches to the pattern for the rel=pingback link,
734     ## which again inpossible to test.
735     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
736     }; # $HTMLLinkTypesAttrChecker
737    
738 wakaba 1.18 ## URI (or IRI)
739 wakaba 1.11 my $HTMLURIAttrChecker = sub {
740     my ($self, $attr) = @_;
741 wakaba 1.15 ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
742 wakaba 1.18 my $value = $attr->value;
743     Whatpm::URIChecker->check_iri_reference ($value, sub {
744     my %opt = @_;
745 wakaba 1.22 $self->{onerror}->(node => $attr,
746     type => 'URI:'.$opt{level}.':'.
747     (defined $opt{position} ? $opt{position} : '').':'.
748     $opt{type});
749 wakaba 1.18 });
750 wakaba 1.11 }; # $HTMLURIAttrChecker
751    
752 wakaba 1.15 ## A space separated list of one or more URIs (or IRIs)
753     my $HTMLSpaceURIsAttrChecker = sub {
754     my ($self, $attr) = @_;
755 wakaba 1.30 my $i = 0;
756     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
757     Whatpm::URIChecker->check_iri_reference ($value, sub {
758     my %opt = @_;
759     $self->{onerror}->(node => $attr,
760     type => 'URI['.$i.']:'.$opt{level}.':'.
761     (defined $opt{position} ? $opt{position} : '').':'.
762     $opt{type});
763     });
764     $i++;
765     }
766 wakaba 1.15 ## ISSUE: Relative references?
767     ## ISSUE: Leading or trailing white spaces are conformant?
768     ## ISSUE: A sequence of white space characters are conformant?
769     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
770     ## NOTE: Duplication seems not an error.
771     }; # $HTMLSpaceURIsAttrChecker
772    
773 wakaba 1.30 my $HTMLDatetimeAttrChecker = sub {
774     my ($self, $attr) = @_;
775     my $value = $attr->value;
776     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
777     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
778     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
779     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
780     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
781     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
782     if $d < 1 or
783     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
784     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
785     if $M == 2 and $d == 29 and
786     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
787     } else {
788     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
789     }
790     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
791     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
792     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
793     if defined $s and $s > 59;
794     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
795     if $zh > 23;
796     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
797     if $zm > 59;
798     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
799     } else {
800     $self->{onerror}->(node => $attr, type => 'datetime syntax error');
801     }
802     }; # $HTMLDatetimeAttrChecker
803    
804 wakaba 1.11 my $HTMLIntegerAttrChecker = sub {
805     my ($self, $attr) = @_;
806     my $value = $attr->value;
807     unless ($value =~ /\A-?[0-9]+\z/) {
808 wakaba 1.15 $self->{onerror}->(node => $attr, type => 'integer syntax error');
809 wakaba 1.11 }
810     }; # $HTMLIntegerAttrChecker
811    
812 wakaba 1.12 my $GetHTMLNonNegativeIntegerAttrChecker = sub {
813     my $range_check = shift;
814     return sub {
815     my ($self, $attr) = @_;
816     my $value = $attr->value;
817     if ($value =~ /\A[0-9]+\z/) {
818     unless ($range_check->($value + 0)) {
819     $self->{onerror}->(node => $attr, type => 'out of range');
820     }
821     } else {
822 wakaba 1.15 $self->{onerror}->(node => $attr,
823     type => 'non-negative integer syntax error');
824 wakaba 1.12 }
825     };
826     }; # $GetHTMLNonNegativeIntegerAttrChecker
827    
828 wakaba 1.11 my $GetHTMLFloatingPointNumberAttrChecker = sub {
829     my $range_check = shift;
830     return sub {
831     my ($self, $attr) = @_;
832     my $value = $attr->value;
833     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
834     unless ($range_check->($value + 0)) {
835     $self->{onerror}->(node => $attr, type => 'out of range');
836     }
837     } else {
838 wakaba 1.15 $self->{onerror}->(node => $attr,
839     type => 'floating point number syntax error');
840 wakaba 1.11 }
841     };
842     }; # $GetHTMLFloatingPointNumberAttrChecker
843    
844 wakaba 1.15 ## "A valid MIME type, optionally with parameters. [RFC 2046]"
845     ## ISSUE: RFC 2046 does not define syntax of media types.
846     ## ISSUE: The definition of "a valid MIME type" is unknown.
847     ## Syntactical correctness?
848     my $HTMLIMTAttrChecker = sub {
849     my ($self, $attr) = @_;
850     my $value = $attr->value;
851     ## ISSUE: RFC 2045 Content-Type header field allows insertion
852     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
853     ## ISSUE: RFC 2231 extension? Maybe no.
854     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
855     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
856     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
857 wakaba 1.19 if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
858     my @type = ($1, $2);
859     my $param = $3;
860     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
861     if (defined $2) {
862     push @type, $1 => $2;
863     } else {
864     my $n = $1;
865     my $v = $2;
866     $v =~ s/\\(.)/$1/gs;
867     push @type, $n => $v;
868     }
869     }
870     require Whatpm::IMTChecker;
871     Whatpm::IMTChecker->check_imt (sub {
872     my %opt = @_;
873     $self->{onerror}->(node => $attr,
874     type => 'IMT:'.$opt{level}.':'.$opt{type});
875     }, @type);
876     } else {
877 wakaba 1.15 $self->{onerror}->(node => $attr, type => 'IMT syntax error');
878     }
879     }; # $HTMLIMTAttrChecker
880    
881 wakaba 1.17 my $HTMLLanguageTagAttrChecker = sub {
882     my ($self, $attr) = @_;
883     if ($attr->value eq '') {
884     $self->{onerror}->(node => $attr, type => 'language tag syntax error');
885     }
886     ## TODO: RFC 3066 test
887     ## ISSUE: RFC 4646 (3066bis)?
888     }; # $HTMLLanguageTagAttrChecker
889    
890     ## "A valid media query [MQ]"
891     my $HTMLMQAttrChecker = sub {
892     ## ISSUE: What is "a valid media query"?
893     }; # $HTMLMQAttrChecker
894    
895 wakaba 1.16 my $HTMLEventHandlerAttrChecker = sub {
896     ## TODO: MUST contain valid ECMAScript code matching the
897     ## ECMAScript |FunctionBody| production. [ECMA262]
898     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
899     ## ISSUE: Automatic semicolon insertion does not apply?
900     ## ISSUE: Other script languages?
901     }; # $HTMLEventHandlerAttrChecker
902    
903 wakaba 1.17 my $HTMLUsemapAttrChecker = sub {
904     my ($self, $attr) = @_;
905     ## MUST be a valid hashed ID reference to a |map| element
906     my $value = $attr->value;
907     if ($value =~ s/^#//) {
908     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
909     push @{$self->{usemap}}, [$value => $attr];
910     } else {
911     $self->{onerror}->(node => $attr, type => 'hashed idref syntax error');
912     }
913 wakaba 1.27 ## NOTE: Space characters in hashed ID references are conforming.
914 wakaba 1.20 ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
915 wakaba 1.17 }; # $HTMLUsemapAttrChecker
916    
917     my $HTMLTargetAttrChecker = sub {
918     my ($self, $attr) = @_;
919     my $value = $attr->value;
920     if ($value =~ /^_/) {
921     $value = lc $value; ## ISSUE: ASCII case-insentitive?
922     unless ({
923     _self => 1, _parent => 1, _top => 1,
924     }->{$value}) {
925     $self->{onerror}->(node => $attr,
926     type => 'reserved browsing context name');
927     }
928     } else {
929     #$ ISSUE: An empty string is conforming?
930     }
931     }; # $HTMLTargetAttrChecker
932    
933 wakaba 1.10 my $HTMLAttrChecker = {
934     id => sub {
935 wakaba 1.17 ## NOTE: |map| has its own variant of |id=""| checker
936 wakaba 1.10 my ($self, $attr) = @_;
937     my $value = $attr->value;
938 wakaba 1.17 if (length $value > 0) {
939 wakaba 1.10 if ($self->{id}->{$value}) {
940     $self->{onerror}->(node => $attr, type => 'duplicate ID');
941     } else {
942     $self->{id}->{$value} = 1;
943     }
944 wakaba 1.27 if ($value =~ /[\x09-\x0D\x20]/) {
945     $self->{onerror}->(node => $attr, type => 'space in ID');
946     }
947 wakaba 1.17 } else {
948     ## NOTE: MUST contain at least one character
949     $self->{onerror}->(node => $attr, type => 'attribute value is empty');
950 wakaba 1.10 }
951     },
952     title => sub {}, ## NOTE: No conformance creteria
953     lang => sub {
954 wakaba 1.17 ## TODO: RFC 3066 or empty test
955 wakaba 1.10 ## ISSUE: RFC 4646 (3066bis)?
956     ## TODO: HTML vs XHTML
957     },
958     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
959     class => $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker,
960     irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
961 wakaba 1.30 tabindex => $HTMLIntegerAttrChecker,
962 wakaba 1.10 };
963    
964 wakaba 1.16 for (qw/
965     onabort onbeforeunload onblur onchange onclick oncontextmenu
966     ondblclick ondrag ondragend ondragenter ondragleave ondragover
967     ondragstart ondrop onerror onfocus onkeydown onkeypress
968     onkeyup onload onmessage onmousedown onmousemove onmouseout
969     onmouseover onmouseup onmousewheel onresize onscroll onselect
970     onsubmit onunload
971     /) {
972     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
973     }
974    
975 wakaba 1.10 my $GetHTMLAttrsChecker = sub {
976     my $element_specific_checker = shift;
977     return sub {
978     my ($self, $todo) = @_;
979     for my $attr (@{$todo->{node}->attributes}) {
980     my $attr_ns = $attr->namespace_uri;
981     $attr_ns = '' unless defined $attr_ns;
982     my $attr_ln = $attr->manakai_local_name;
983     my $checker;
984     if ($attr_ns eq '') {
985     $checker = $element_specific_checker->{$attr_ln}
986     || $HTMLAttrChecker->{$attr_ln};
987     }
988     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
989     || $AttrChecker->{$attr_ns}->{''};
990     if ($checker) {
991     $checker->($self, $attr);
992     } else {
993     $self->{onerror}->(node => $attr, type => 'attribute not supported');
994     ## ISSUE: No comformance createria for unknown attributes in the spec
995     }
996     }
997     };
998     }; # $GetHTMLAttrsChecker
999 wakaba 1.9
1000     $Element->{$HTML_NS}->{''} = {
1001 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1002 wakaba 1.9 checker => $ElementDefault->{checker},
1003     };
1004    
1005 wakaba 1.1 $Element->{$HTML_NS}->{html} = {
1006 wakaba 1.24 is_root => 1,
1007 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({
1008     xmlns => sub {
1009     my ($self, $attr) = @_;
1010     my $value = $attr->value;
1011     unless ($value eq $HTML_NS) {
1012     $self->{onerror}->(node => $attr, type => 'syntax error');
1013     ## TODO: only in HTML documents
1014     }
1015     },
1016     }),
1017 wakaba 1.1 checker => sub {
1018 wakaba 1.4 my ($self, $todo) = @_;
1019     my $el = $todo->{node};
1020     my $new_todos = [];
1021 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1022    
1023     my $phase = 'before head';
1024     while (@nodes) {
1025     my $node = shift @nodes;
1026 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1027    
1028 wakaba 1.1 my $nt = $node->node_type;
1029     if ($nt == 1) {
1030 wakaba 1.2 my $node_ns = $node->namespace_uri;
1031     $node_ns = '' unless defined $node_ns;
1032     my $node_ln = $node->manakai_local_name;
1033 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
1034 wakaba 1.1 if ($phase eq 'before head') {
1035 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'head') {
1036 wakaba 1.1 $phase = 'after head';
1037 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'body') {
1038     $self->{onerror}->(node => $node, type => 'ps element missing:head');
1039 wakaba 1.1 $phase = 'after body';
1040     } else {
1041 wakaba 1.6 $not_allowed = 1;
1042 wakaba 1.1 # before head
1043     }
1044     } elsif ($phase eq 'after head') {
1045 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'body') {
1046 wakaba 1.1 $phase = 'after body';
1047     } else {
1048 wakaba 1.6 $not_allowed = 1;
1049 wakaba 1.1 # after head
1050     }
1051     } else { #elsif ($phase eq 'after body') {
1052 wakaba 1.6 $not_allowed = 1;
1053 wakaba 1.1 # after body
1054     }
1055 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
1056     if $not_allowed;
1057 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1058 wakaba 1.2 unshift @nodes, @$sib;
1059 wakaba 1.4 push @$new_todos, @$ch;
1060 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1061     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1062 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1063 wakaba 1.1 }
1064     } elsif ($nt == 5) {
1065     unshift @nodes, @{$node->child_nodes};
1066     }
1067     }
1068 wakaba 1.3
1069     if ($phase eq 'before head') {
1070     $self->{onerror}->(node => $el, type => 'child element missing:head');
1071     $self->{onerror}->(node => $el, type => 'child element missing:body');
1072     } elsif ($phase eq 'after head') {
1073     $self->{onerror}->(node => $el, type => 'child element missing:body');
1074     }
1075    
1076 wakaba 1.4 return ($new_todos);
1077 wakaba 1.1 },
1078     };
1079    
1080     $Element->{$HTML_NS}->{head} = {
1081 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1082 wakaba 1.1 checker => sub {
1083 wakaba 1.4 my ($self, $todo) = @_;
1084     my $el = $todo->{node};
1085     my $new_todos = [];
1086 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1087    
1088     my $has_title;
1089 wakaba 1.3 my $phase = 'initial'; # 'after charset', 'after base'
1090 wakaba 1.1 while (@nodes) {
1091     my $node = shift @nodes;
1092 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1093    
1094 wakaba 1.1 my $nt = $node->node_type;
1095     if ($nt == 1) {
1096 wakaba 1.2 my $node_ns = $node->namespace_uri;
1097     $node_ns = '' unless defined $node_ns;
1098     my $node_ln = $node->manakai_local_name;
1099 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
1100 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'title') {
1101 wakaba 1.3 $phase = 'after base';
1102 wakaba 1.1 unless ($has_title) {
1103     $has_title = 1;
1104     } else {
1105 wakaba 1.6 $not_allowed = 1;
1106 wakaba 1.1 }
1107 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'meta') {
1108 wakaba 1.1 if ($node->has_attribute_ns (undef, 'charset')) {
1109 wakaba 1.3 if ($phase eq 'initial') {
1110     $phase = 'after charset';
1111 wakaba 1.1 } else {
1112 wakaba 1.6 $not_allowed = 1;
1113 wakaba 1.3 ## NOTE: See also |base|'s "contexts" field in the spec
1114 wakaba 1.1 }
1115     } else {
1116 wakaba 1.3 $phase = 'after base';
1117 wakaba 1.1 }
1118 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'base') {
1119 wakaba 1.3 if ($phase eq 'initial' or $phase eq 'after charset') {
1120     $phase = 'after base';
1121 wakaba 1.1 } else {
1122 wakaba 1.6 $not_allowed = 1;
1123 wakaba 1.1 }
1124 wakaba 1.28 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'style') {
1125     $phase = 'after base';
1126     if ($node->has_attribute_ns (undef, 'scoped')) {
1127     $not_allowed = 1;
1128     }
1129 wakaba 1.7 } elsif ($HTMLMetadataElements->{$node_ns}->{$node_ln}) {
1130     $phase = 'after base';
1131 wakaba 1.1 } else {
1132 wakaba 1.7 $not_allowed = 1;
1133 wakaba 1.1 }
1134 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
1135     if $not_allowed;
1136 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1137 wakaba 1.2 unshift @nodes, @$sib;
1138 wakaba 1.4 push @$new_todos, @$ch;
1139 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1140     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1141 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1142 wakaba 1.1 }
1143     } elsif ($nt == 5) {
1144     unshift @nodes, @{$node->child_nodes};
1145     }
1146     }
1147     unless ($has_title) {
1148 wakaba 1.3 $self->{onerror}->(node => $el, type => 'child element missing:title');
1149 wakaba 1.1 }
1150 wakaba 1.4 return ($new_todos);
1151 wakaba 1.1 },
1152     };
1153    
1154     $Element->{$HTML_NS}->{title} = {
1155 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1156 wakaba 1.1 checker => $HTMLTextChecker,
1157     };
1158    
1159     $Element->{$HTML_NS}->{base} = {
1160 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({
1161 wakaba 1.11 href => $HTMLURIAttrChecker,
1162 wakaba 1.17 target => $HTMLTargetAttrChecker,
1163 wakaba 1.10 }),
1164 wakaba 1.1 checker => $HTMLEmptyChecker,
1165     };
1166    
1167     $Element->{$HTML_NS}->{link} = {
1168 wakaba 1.16 attrs_checker => sub {
1169     my ($self, $todo) = @_;
1170     $GetHTMLAttrsChecker->({
1171     href => $HTMLURIAttrChecker,
1172 wakaba 1.20 rel => sub { $HTMLLinkTypesAttrChecker->(0, @_) },
1173 wakaba 1.17 media => $HTMLMQAttrChecker,
1174     hreflang => $HTMLLanguageTagAttrChecker,
1175 wakaba 1.16 type => $HTMLIMTAttrChecker,
1176     ## NOTE: Though |title| has special semantics,
1177     ## syntactically same as the |title| as global attribute.
1178     })->($self, $todo);
1179     unless ($todo->{node}->has_attribute_ns (undef, 'href')) {
1180     $self->{onerror}->(node => $todo->{node},
1181     type => 'attribute missing:href');
1182     }
1183     unless ($todo->{node}->has_attribute_ns (undef, 'rel')) {
1184     $self->{onerror}->(node => $todo->{node},
1185     type => 'attribute missing:rel');
1186     }
1187     },
1188 wakaba 1.1 checker => $HTMLEmptyChecker,
1189     };
1190    
1191     $Element->{$HTML_NS}->{meta} = {
1192 wakaba 1.10 attrs_checker => sub {
1193     my ($self, $todo) = @_;
1194     my $name_attr;
1195     my $http_equiv_attr;
1196     my $charset_attr;
1197     my $content_attr;
1198     for my $attr (@{$todo->{node}->attributes}) {
1199     my $attr_ns = $attr->namespace_uri;
1200     $attr_ns = '' unless defined $attr_ns;
1201     my $attr_ln = $attr->manakai_local_name;
1202     my $checker;
1203     if ($attr_ns eq '') {
1204     if ($attr_ln eq 'content') {
1205     $content_attr = $attr;
1206     $checker = 1;
1207     } elsif ($attr_ln eq 'name') {
1208     $name_attr = $attr;
1209     $checker = 1;
1210     } elsif ($attr_ln eq 'http-equiv') {
1211     $http_equiv_attr = $attr;
1212     $checker = 1;
1213     } elsif ($attr_ln eq 'charset') {
1214     $charset_attr = $attr;
1215     $checker = 1;
1216     } else {
1217     $checker = $HTMLAttrChecker->{$attr_ln}
1218     || $AttrChecker->{$attr_ns}->{$attr_ln}
1219     || $AttrChecker->{$attr_ns}->{''};
1220     }
1221     } else {
1222     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1223     || $AttrChecker->{$attr_ns}->{''};
1224     }
1225     if ($checker) {
1226     $checker->($self, $attr) if ref $checker;
1227     } else {
1228     $self->{onerror}->(node => $attr, type => 'attribute not supported');
1229     ## ISSUE: No comformance createria for unknown attributes in the spec
1230     }
1231     }
1232    
1233     if (defined $name_attr) {
1234     if (defined $http_equiv_attr) {
1235     $self->{onerror}->(node => $http_equiv_attr,
1236     type => 'attribute not allowed');
1237     } elsif (defined $charset_attr) {
1238     $self->{onerror}->(node => $charset_attr,
1239     type => 'attribute not allowed');
1240     }
1241     my $metadata_name = $name_attr->value;
1242     my $metadata_value;
1243     if (defined $content_attr) {
1244     $metadata_value = $content_attr->value;
1245     } else {
1246     $self->{onerror}->(node => $todo->{node},
1247     type => 'attribute missing:content');
1248     $metadata_value = '';
1249     }
1250     } elsif (defined $http_equiv_attr) {
1251     if (defined $charset_attr) {
1252     $self->{onerror}->(node => $charset_attr,
1253     type => 'attribute not allowed');
1254     }
1255     unless (defined $content_attr) {
1256     $self->{onerror}->(node => $todo->{node},
1257     type => 'attribute missing:content');
1258     }
1259     } elsif (defined $charset_attr) {
1260     if (defined $content_attr) {
1261     $self->{onerror}->(node => $content_attr,
1262     type => 'attribute not allowed');
1263     }
1264     ## TODO: Allowed only in HTML documents
1265     } else {
1266     if (defined $content_attr) {
1267     $self->{onerror}->(node => $content_attr,
1268     type => 'attribute not allowed');
1269     $self->{onerror}->(node => $todo->{node},
1270     type => 'attribute missing:name|http-equiv');
1271     } else {
1272     $self->{onerror}->(node => $todo->{node},
1273     type => 'attribute missing:name|http-equiv|charset');
1274     }
1275     }
1276    
1277     ## TODO: metadata conformance
1278    
1279     ## TODO: pragma conformance
1280     if (defined $http_equiv_attr) { ## An enumerated attribute
1281     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1282     if ({
1283     'refresh' => 1,
1284     'default-style' => 1,
1285     }->{$keyword}) {
1286     #
1287     } else {
1288     $self->{onerror}->(node => $http_equiv_attr,
1289     type => 'invalid enumerated attribute value');
1290     }
1291     }
1292    
1293     ## TODO: charset
1294     },
1295 wakaba 1.1 checker => $HTMLEmptyChecker,
1296     };
1297    
1298     ## NOTE: |html:style| has no conformance creteria on content model
1299 wakaba 1.3 $Element->{$HTML_NS}->{style} = {
1300 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({
1301 wakaba 1.15 type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1302 wakaba 1.17 media => $HTMLMQAttrChecker,
1303 wakaba 1.10 scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1304     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1305     ## not different
1306     }),
1307 wakaba 1.3 checker => $AnyChecker,
1308     };
1309 wakaba 1.1
1310     $Element->{$HTML_NS}->{body} = {
1311 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1312 wakaba 1.1 checker => $HTMLBlockChecker,
1313     };
1314    
1315     $Element->{$HTML_NS}->{section} = {
1316 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1317 wakaba 1.1 checker => $HTMLStylableBlockChecker,
1318     };
1319    
1320     $Element->{$HTML_NS}->{nav} = {
1321 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1322 wakaba 1.1 checker => $HTMLBlockOrInlineChecker,
1323     };
1324    
1325     $Element->{$HTML_NS}->{article} = {
1326 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1327 wakaba 1.1 checker => $HTMLStylableBlockChecker,
1328     };
1329    
1330     $Element->{$HTML_NS}->{blockquote} = {
1331 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({
1332     cite => $HTMLURIAttrChecker,
1333     }),
1334 wakaba 1.1 checker => $HTMLBlockChecker,
1335     };
1336    
1337     $Element->{$HTML_NS}->{aside} = {
1338 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1339 wakaba 1.2 checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
1340 wakaba 1.1 };
1341    
1342     $Element->{$HTML_NS}->{h1} = {
1343 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1344 wakaba 1.30 checker => sub {
1345     my ($self, $todo) = @_;
1346     $todo->{flag}->{has_heading}->[0] = 1;
1347     return $HTMLSignificantStrictlyInlineChecker->($self, $todo);
1348     },
1349 wakaba 1.1 };
1350    
1351     $Element->{$HTML_NS}->{h2} = {
1352 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1353 wakaba 1.30 checker => $Element->{$HTML_NS}->{h1}->{checker},
1354 wakaba 1.1 };
1355    
1356     $Element->{$HTML_NS}->{h3} = {
1357 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1358 wakaba 1.30 checker => $Element->{$HTML_NS}->{h1}->{checker},
1359 wakaba 1.1 };
1360    
1361     $Element->{$HTML_NS}->{h4} = {
1362 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1363 wakaba 1.30 checker => $Element->{$HTML_NS}->{h1}->{checker},
1364 wakaba 1.1 };
1365    
1366     $Element->{$HTML_NS}->{h5} = {
1367 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1368 wakaba 1.30 checker => $Element->{$HTML_NS}->{h1}->{checker},
1369 wakaba 1.1 };
1370    
1371     $Element->{$HTML_NS}->{h6} = {
1372 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1373 wakaba 1.30 checker => $Element->{$HTML_NS}->{h1}->{checker},
1374 wakaba 1.1 };
1375    
1376 wakaba 1.30 $Element->{$HTML_NS}->{header} = {
1377     attrs_checker => $GetHTMLAttrsChecker->({}),
1378     checker => sub {
1379     my ($self, $todo) = @_;
1380     my $old_flag = $todo->{flag}->{has_heading} || [];
1381     my $new_flag = [];
1382     local $todo->{flag}->{has_heading} = $new_flag;
1383     my $node = $todo->{node};
1384    
1385     my $end = $self->_add_minuses
1386     ({$HTML_NS => {qw/header 1 footer 1/}},
1387     $HTMLSectioningElements);
1388     my ($new_todos, $ch) = $HTMLBlockChecker->($self, $todo);
1389     push @$new_todos, $end,
1390     {type => 'code', code => sub {
1391     if ($new_flag->[0]) {
1392     $old_flag->[0] = 1;
1393     } else {
1394     $self->{onerror}->(node => $node, type => 'element missing:hn');
1395     }
1396     }};
1397     return ($new_todos, $ch);
1398     },
1399     };
1400 wakaba 1.1
1401 wakaba 1.2 $Element->{$HTML_NS}->{footer} = {
1402 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1403 wakaba 1.2 checker => sub { ## block -hn -header -footer -sectioning or inline
1404 wakaba 1.4 my ($self, $todo) = @_;
1405     my $el = $todo->{node};
1406     my $new_todos = [];
1407 wakaba 1.2 my @nodes = (@{$el->child_nodes});
1408    
1409     my $content = 'block-or-inline'; # or 'block' or 'inline'
1410     my @block_not_inline;
1411     while (@nodes) {
1412     my $node = shift @nodes;
1413     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1414    
1415     my $nt = $node->node_type;
1416     if ($nt == 1) {
1417     my $node_ns = $node->namespace_uri;
1418     $node_ns = '' unless defined $node_ns;
1419     my $node_ln = $node->manakai_local_name;
1420 wakaba 1.6 my $not_allowed;
1421 wakaba 1.2 if ($self->{minuses}->{$node_ns}->{$node_ln}) {
1422 wakaba 1.6 $not_allowed = 1;
1423 wakaba 1.2 } elsif ($node_ns eq $HTML_NS and
1424     {
1425     qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1 header 1 footer 1/
1426     }->{$node_ln}) {
1427 wakaba 1.6 $not_allowed = 1;
1428 wakaba 1.2 } elsif ($HTMLSectioningElements->{$node_ns}->{$node_ln}) {
1429 wakaba 1.6 $not_allowed = 1;
1430 wakaba 1.2 }
1431     if ($content eq 'block') {
1432 wakaba 1.7 $not_allowed = 1
1433     unless $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1434 wakaba 1.2 } elsif ($content eq 'inline') {
1435 wakaba 1.7 $not_allowed = 1
1436     unless $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
1437     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1438 wakaba 1.2 } else {
1439 wakaba 1.7 my $is_block = $HTMLBlockLevelElements->{$node_ns}->{$node_ln};
1440     my $is_inline
1441     = $HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} ||
1442     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln};
1443 wakaba 1.2
1444 wakaba 1.6 push @block_not_inline, $node
1445     if $is_block and not $is_inline and not $not_allowed;
1446 wakaba 1.2 unless ($is_block) {
1447     $content = 'inline';
1448     for (@block_not_inline) {
1449     $self->{onerror}->(node => $_, type => 'element not allowed');
1450     }
1451 wakaba 1.6 $not_allowed = 1 unless $is_inline;
1452 wakaba 1.2 }
1453     }
1454 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
1455     if $not_allowed;
1456 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1457 wakaba 1.2 unshift @nodes, @$sib;
1458 wakaba 1.4 push @$new_todos, @$ch;
1459 wakaba 1.2 } elsif ($nt == 3 or $nt == 4) {
1460     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1461     if ($content eq 'block') {
1462     $self->{onerror}->(node => $node, type => 'character not allowed');
1463     } else {
1464     $content = 'inline';
1465     for (@block_not_inline) {
1466     $self->{onerror}->(node => $_, type => 'element not allowed');
1467     }
1468     }
1469     }
1470     } elsif ($nt == 5) {
1471     unshift @nodes, @{$node->child_nodes};
1472     }
1473     }
1474    
1475     my $end = $self->_add_minuses
1476     ({$HTML_NS => {qw/h1 1 h2 1 h3 1 h4 1 h5 1 h6 1/}},
1477     $HTMLSectioningElements);
1478 wakaba 1.4 push @$new_todos, $end;
1479 wakaba 1.2
1480 wakaba 1.4 if ($content eq 'inline') {
1481     for (@$new_todos) {
1482     $_->{inline} = 1;
1483     }
1484     }
1485    
1486     return ($new_todos);
1487 wakaba 1.2 },
1488     };
1489 wakaba 1.1
1490     $Element->{$HTML_NS}->{address} = {
1491 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1492 wakaba 1.1 checker => $HTMLInlineChecker,
1493     };
1494    
1495     $Element->{$HTML_NS}->{p} = {
1496 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1497 wakaba 1.1 checker => $HTMLSignificantInlineChecker,
1498     };
1499    
1500     $Element->{$HTML_NS}->{hr} = {
1501 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1502 wakaba 1.1 checker => $HTMLEmptyChecker,
1503     };
1504    
1505     $Element->{$HTML_NS}->{br} = {
1506 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1507 wakaba 1.1 checker => $HTMLEmptyChecker,
1508     };
1509    
1510     $Element->{$HTML_NS}->{dialog} = {
1511 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1512 wakaba 1.1 checker => sub {
1513 wakaba 1.4 my ($self, $todo) = @_;
1514     my $el = $todo->{node};
1515     my $new_todos = [];
1516 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1517    
1518     my $phase = 'before dt';
1519     while (@nodes) {
1520     my $node = shift @nodes;
1521 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1522    
1523 wakaba 1.1 my $nt = $node->node_type;
1524     if ($nt == 1) {
1525 wakaba 1.8 my $node_ns = $node->namespace_uri;
1526     $node_ns = '' unless defined $node_ns;
1527     my $node_ln = $node->manakai_local_name;
1528 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1529 wakaba 1.1 if ($phase eq 'before dt') {
1530 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1531 wakaba 1.1 $phase = 'before dd';
1532 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1533 wakaba 1.2 $self->{onerror}
1534 wakaba 1.3 ->(node => $node, type => 'ps element missing:dt');
1535 wakaba 1.1 $phase = 'before dt';
1536     } else {
1537 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1538 wakaba 1.1 }
1539     } else { # before dd
1540 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1541 wakaba 1.1 $phase = 'before dt';
1542 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1543 wakaba 1.2 $self->{onerror}
1544 wakaba 1.3 ->(node => $node, type => 'ps element missing:dd');
1545 wakaba 1.1 $phase = 'before dd';
1546     } else {
1547 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1548 wakaba 1.1 }
1549     }
1550 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1551 wakaba 1.2 unshift @nodes, @$sib;
1552 wakaba 1.4 push @$new_todos, @$ch;
1553 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1554     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1555 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1556 wakaba 1.1 }
1557     } elsif ($nt == 5) {
1558     unshift @nodes, @{$node->child_nodes};
1559     }
1560     }
1561     if ($phase eq 'before dd') {
1562 wakaba 1.3 $self->{onerror}->(node => $el, type => 'ps element missing:dd');
1563 wakaba 1.1 }
1564 wakaba 1.4 return ($new_todos);
1565 wakaba 1.1 },
1566     };
1567    
1568     $Element->{$HTML_NS}->{pre} = {
1569 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1570 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1571     };
1572    
1573     $Element->{$HTML_NS}->{ol} = {
1574 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({
1575     start => $HTMLIntegerAttrChecker,
1576     }),
1577 wakaba 1.1 checker => sub {
1578 wakaba 1.4 my ($self, $todo) = @_;
1579     my $el = $todo->{node};
1580     my $new_todos = [];
1581 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1582    
1583     while (@nodes) {
1584     my $node = shift @nodes;
1585 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1586    
1587 wakaba 1.1 my $nt = $node->node_type;
1588     if ($nt == 1) {
1589 wakaba 1.8 my $node_ns = $node->namespace_uri;
1590     $node_ns = '' unless defined $node_ns;
1591     my $node_ln = $node->manakai_local_name;
1592 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1593 wakaba 1.8 unless ($node_ns eq $HTML_NS and $node_ln eq 'li') {
1594 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1595 wakaba 1.1 }
1596 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1597 wakaba 1.2 unshift @nodes, @$sib;
1598 wakaba 1.4 push @$new_todos, @$ch;
1599 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1600     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1601 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1602 wakaba 1.1 }
1603     } elsif ($nt == 5) {
1604     unshift @nodes, @{$node->child_nodes};
1605     }
1606     }
1607 wakaba 1.4
1608     if ($todo->{inline}) {
1609     for (@$new_todos) {
1610     $_->{inline} = 1;
1611     }
1612     }
1613     return ($new_todos);
1614 wakaba 1.1 },
1615     };
1616    
1617     $Element->{$HTML_NS}->{ul} = {
1618 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1619 wakaba 1.1 checker => $Element->{$HTML_NS}->{ol}->{checker},
1620     };
1621    
1622 wakaba 1.5
1623     $Element->{$HTML_NS}->{li} = {
1624 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({
1625     start => sub {
1626     my ($self, $attr) = @_;
1627     my $parent = $attr->owner_element->manakai_parent_element;
1628     if (defined $parent) {
1629     my $parent_ns = $parent->namespace_uri;
1630     $parent_ns = '' unless defined $parent_ns;
1631     my $parent_ln = $parent->manakai_local_name;
1632     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1633     $self->{onerror}->(node => $attr, type => 'attribute not supported');
1634     }
1635     }
1636     $HTMLIntegerAttrChecker->($self, $attr);
1637     },
1638     }),
1639 wakaba 1.5 checker => sub {
1640     my ($self, $todo) = @_;
1641     if ($todo->{inline}) {
1642     return $HTMLInlineChecker->($self, $todo);
1643     } else {
1644     return $HTMLBlockOrInlineChecker->($self, $todo);
1645     }
1646     },
1647     };
1648 wakaba 1.1
1649     $Element->{$HTML_NS}->{dl} = {
1650 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1651 wakaba 1.1 checker => sub {
1652 wakaba 1.4 my ($self, $todo) = @_;
1653     my $el = $todo->{node};
1654     my $new_todos = [];
1655 wakaba 1.1 my @nodes = (@{$el->child_nodes});
1656    
1657     my $phase = 'before dt';
1658     while (@nodes) {
1659     my $node = shift @nodes;
1660 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1661    
1662 wakaba 1.1 my $nt = $node->node_type;
1663     if ($nt == 1) {
1664 wakaba 1.8 my $node_ns = $node->namespace_uri;
1665     $node_ns = '' unless defined $node_ns;
1666     my $node_ln = $node->manakai_local_name;
1667 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
1668 wakaba 1.1 if ($phase eq 'in dds') {
1669 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1670 wakaba 1.1 #$phase = 'in dds';
1671 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1672 wakaba 1.1 $phase = 'in dts';
1673     } else {
1674 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1675 wakaba 1.1 }
1676     } elsif ($phase eq 'in dts') {
1677 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1678 wakaba 1.1 #$phase = 'in dts';
1679 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1680 wakaba 1.1 $phase = 'in dds';
1681     } else {
1682 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1683 wakaba 1.1 }
1684     } else { # before dt
1685 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'dt') {
1686 wakaba 1.1 $phase = 'in dts';
1687 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'dd') {
1688 wakaba 1.2 $self->{onerror}
1689 wakaba 1.3 ->(node => $node, type => 'ps element missing:dt');
1690 wakaba 1.1 $phase = 'in dds';
1691     } else {
1692 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
1693 wakaba 1.1 }
1694     }
1695 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1696 wakaba 1.2 unshift @nodes, @$sib;
1697 wakaba 1.4 push @$new_todos, @$ch;
1698 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
1699     if ($node->data =~ /[^\x09-\x0D\x20]/) {
1700 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
1701 wakaba 1.1 }
1702     } elsif ($nt == 5) {
1703     unshift @nodes, @{$node->child_nodes};
1704     }
1705     }
1706     if ($phase eq 'in dts') {
1707 wakaba 1.3 $self->{onerror}->(node => $el, type => 'ps element missing:dd');
1708 wakaba 1.1 }
1709 wakaba 1.4
1710     if ($todo->{inline}) {
1711     for (@$new_todos) {
1712     $_->{inline} = 1;
1713     }
1714     }
1715     return ($new_todos);
1716 wakaba 1.1 },
1717     };
1718    
1719     $Element->{$HTML_NS}->{dt} = {
1720 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1721 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1722     };
1723    
1724 wakaba 1.4 $Element->{$HTML_NS}->{dd} = {
1725 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1726 wakaba 1.5 checker => $Element->{$HTML_NS}->{li}->{checker},
1727 wakaba 1.4 };
1728 wakaba 1.1
1729 wakaba 1.6 $Element->{$HTML_NS}->{a} = {
1730 wakaba 1.17 attrs_checker => sub {
1731     my ($self, $todo) = @_;
1732 wakaba 1.15 my %attr;
1733 wakaba 1.17 for my $attr (@{$todo->{node}->attributes}) {
1734     my $attr_ns = $attr->namespace_uri;
1735     $attr_ns = '' unless defined $attr_ns;
1736     my $attr_ln = $attr->manakai_local_name;
1737     my $checker;
1738     if ($attr_ns eq '') {
1739     $checker = {
1740     target => $HTMLTargetAttrChecker,
1741     href => $HTMLURIAttrChecker,
1742     ping => $HTMLSpaceURIsAttrChecker,
1743 wakaba 1.20 rel => sub { $HTMLLinkTypesAttrChecker->(1, @_) },
1744 wakaba 1.17 media => $HTMLMQAttrChecker,
1745     hreflang => $HTMLLanguageTagAttrChecker,
1746     type => $HTMLIMTAttrChecker,
1747     }->{$attr_ln};
1748     if ($checker) {
1749     $attr{$attr_ln} = $attr;
1750     } else {
1751     $checker = $HTMLAttrChecker->{$attr_ln};
1752     }
1753     }
1754     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1755     || $AttrChecker->{$attr_ns}->{''};
1756     if ($checker) {
1757     $checker->($self, $attr) if ref $checker;
1758     } else {
1759     $self->{onerror}->(node => $attr, type => 'attribute not supported');
1760     ## ISSUE: No comformance createria for unknown attributes in the spec
1761     }
1762     }
1763    
1764     unless (defined $attr{href}) {
1765     for (qw/target ping rel media hreflang type/) {
1766     if (defined $attr{$_}) {
1767     $self->{onerror}->(node => $attr{$_},
1768     type => 'attribute not allowed');
1769 wakaba 1.15 }
1770     }
1771 wakaba 1.17 }
1772 wakaba 1.15 },
1773 wakaba 1.6 checker => sub {
1774     my ($self, $todo) = @_;
1775    
1776     my $end = $self->_add_minuses ($HTMLInteractiveElements);
1777     my ($sib, $ch)
1778     = $HTMLSignificantInlineOrStrictlyInlineChecker->($self, $todo);
1779     push @$sib, $end;
1780     return ($sib, $ch);
1781     },
1782     };
1783 wakaba 1.1
1784 wakaba 1.4 $Element->{$HTML_NS}->{q} = {
1785 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({
1786     cite => $HTMLURIAttrChecker,
1787     }),
1788 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1789     };
1790 wakaba 1.1
1791     $Element->{$HTML_NS}->{cite} = {
1792 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1793 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1794     };
1795    
1796 wakaba 1.4 $Element->{$HTML_NS}->{em} = {
1797 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1798 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1799     };
1800    
1801     $Element->{$HTML_NS}->{strong} = {
1802 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1803 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1804     };
1805 wakaba 1.1
1806 wakaba 1.4 $Element->{$HTML_NS}->{small} = {
1807 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1808 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1809     };
1810    
1811     $Element->{$HTML_NS}->{m} = {
1812 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1813 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1814     };
1815    
1816 wakaba 1.30 $Element->{$HTML_NS}->{dfn} = {
1817 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1818 wakaba 1.4 checker => sub {
1819     my ($self, $todo) = @_;
1820    
1821     my $end = $self->_add_minuses ({$HTML_NS => {dfn => 1}});
1822     my ($sib, $ch) = $HTMLStrictlyInlineChecker->($self, $todo);
1823     push @$sib, $end;
1824 wakaba 1.30
1825     my $node = $todo->{node};
1826     my $term = $node->get_attribute_ns (undef, 'title');
1827     unless (defined $term) {
1828     for my $child (@{$node->child_nodes}) {
1829     if ($child->node_type == 1) { # ELEMENT_NODE
1830     if (defined $term) {
1831     undef $term;
1832     last;
1833     } elsif ($child->manakai_local_name eq 'abbr') {
1834     my $nsuri = $child->namespace_uri;
1835     if (defined $nsuri and $nsuri eq $HTML_NS) {
1836     my $attr = $child->get_attribute_node_ns (undef, 'title');
1837     if ($attr) {
1838     $term = $attr->value;
1839     }
1840     }
1841     }
1842     } elsif ($child->node_type == 3 or $child->node_type == 4) {
1843     ## TEXT_NODE or CDATA_SECTION_NODE
1844     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
1845     next;
1846     }
1847     undef $term;
1848     last;
1849     }
1850     }
1851     unless (defined $term) {
1852     $term = $node->text_content;
1853     }
1854     }
1855     if ($self->{term}->{$term}) {
1856     $self->{onerror}->(node => $node, type => 'duplicate term');
1857     } else {
1858     $self->{term}->{$term} = 1;
1859     }
1860    
1861 wakaba 1.4 return ($sib, $ch);
1862     },
1863     };
1864 wakaba 1.1
1865     $Element->{$HTML_NS}->{abbr} = {
1866 wakaba 1.11 attrs_checker => $GetHTMLAttrsChecker->({
1867     ## NOTE: |title| has special semantics for |abbr|s, but is syntactically
1868     ## not different. The spec says that the |title| MAY be omitted
1869     ## if there is a |dfn| whose defining term is the abbreviation,
1870     ## but it does not prohibit |abbr| w/o |title| in other cases.
1871     }),
1872 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1873     };
1874    
1875 wakaba 1.11 $Element->{$HTML_NS}->{time} = { ## TODO: validate content
1876     attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO: datetime
1877 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1878     };
1879    
1880 wakaba 1.11 $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
1881     attrs_checker => $GetHTMLAttrsChecker->({
1882     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1883     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1884     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1885     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1886     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1887     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1888     }),
1889 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1890     };
1891    
1892 wakaba 1.11 $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
1893     attrs_checker => $GetHTMLAttrsChecker->({
1894     value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
1895     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
1896     }),
1897 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1898     };
1899    
1900 wakaba 1.4 $Element->{$HTML_NS}->{code} = {
1901 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1902 wakaba 1.12 ## NOTE: Though |title| has special semantics,
1903     ## syntatically same as the |title| as global attribute.
1904 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1905     };
1906 wakaba 1.1
1907     $Element->{$HTML_NS}->{var} = {
1908 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1909 wakaba 1.12 ## NOTE: Though |title| has special semantics,
1910     ## syntatically same as the |title| as global attribute.
1911 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1912     };
1913    
1914 wakaba 1.4 $Element->{$HTML_NS}->{samp} = {
1915 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1916 wakaba 1.12 ## NOTE: Though |title| has special semantics,
1917     ## syntatically same as the |title| as global attribute.
1918 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1919     };
1920 wakaba 1.1
1921     $Element->{$HTML_NS}->{kbd} = {
1922 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1923 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1924     };
1925    
1926     $Element->{$HTML_NS}->{sub} = {
1927 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1928 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1929     };
1930    
1931     $Element->{$HTML_NS}->{sup} = {
1932 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1933 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1934     };
1935    
1936 wakaba 1.4 $Element->{$HTML_NS}->{span} = {
1937 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1938 wakaba 1.12 ## NOTE: Though |title| has special semantics,
1939     ## syntatically same as the |title| as global attribute.
1940 wakaba 1.4 checker => $HTMLInlineOrStrictlyInlineChecker,
1941     };
1942 wakaba 1.1
1943     $Element->{$HTML_NS}->{i} = {
1944 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1945 wakaba 1.12 ## NOTE: Though |title| has special semantics,
1946     ## syntatically same as the |title| as global attribute.
1947 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1948     };
1949    
1950     $Element->{$HTML_NS}->{b} = {
1951 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
1952 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1953     };
1954    
1955     $Element->{$HTML_NS}->{bdo} = {
1956 wakaba 1.12 attrs_checker => sub {
1957     my ($self, $todo) = @_;
1958     $GetHTMLAttrsChecker->({})->($self, $todo);
1959     unless ($todo->{node}->has_attribute_ns (undef, 'dir')) {
1960     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:dir');
1961     }
1962     },
1963     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
1964 wakaba 1.1 checker => $HTMLStrictlyInlineChecker,
1965     };
1966    
1967     $Element->{$HTML_NS}->{ins} = {
1968 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
1969     cite => $HTMLURIAttrChecker,
1970 wakaba 1.30 datetime => $HTMLDatetimeAttrChecker,
1971 wakaba 1.12 }),
1972 wakaba 1.1 checker => $HTMLTransparentChecker,
1973     };
1974    
1975     $Element->{$HTML_NS}->{del} = {
1976 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
1977     cite => $HTMLURIAttrChecker,
1978 wakaba 1.30 datetime => $HTMLDatetimeAttrChecker,
1979 wakaba 1.12 }),
1980 wakaba 1.1 checker => sub {
1981 wakaba 1.4 my ($self, $todo) = @_;
1982 wakaba 1.1
1983 wakaba 1.4 my $parent = $todo->{node}->manakai_parent_element;
1984 wakaba 1.1 if (defined $parent) {
1985     my $nsuri = $parent->namespace_uri;
1986     $nsuri = '' unless defined $nsuri;
1987     my $ln = $parent->manakai_local_name;
1988     my $eldef = $Element->{$nsuri}->{$ln} ||
1989     $Element->{$nsuri}->{''} ||
1990     $ElementDefault;
1991 wakaba 1.4 return $eldef->{checker}->($self, $todo);
1992 wakaba 1.1 } else {
1993 wakaba 1.4 return $HTMLBlockOrInlineChecker->($self, $todo);
1994 wakaba 1.1 }
1995     },
1996     };
1997    
1998     ## TODO: figure
1999    
2000     $Element->{$HTML_NS}->{img} = {
2001 wakaba 1.17 attrs_checker => sub {
2002     my ($self, $todo) = @_;
2003     $GetHTMLAttrsChecker->({
2004     alt => sub { }, ## NOTE: No syntactical requirement
2005     src => $HTMLURIAttrChecker,
2006     usemap => $HTMLUsemapAttrChecker,
2007     ismap => $GetHTMLBooleanAttrChecker->('ismap'), ## TODO: MUST ancestor <a>
2008     ## TODO: height
2009     ## TODO: width
2010     })->($self, $todo);
2011     unless ($todo->{node}->has_attribute_ns (undef, 'alt')) {
2012     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:alt');
2013     }
2014     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2015     $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:src');
2016     }
2017     },
2018 wakaba 1.1 checker => $HTMLEmptyChecker,
2019     };
2020    
2021     $Element->{$HTML_NS}->{iframe} = {
2022 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2023     src => $HTMLURIAttrChecker,
2024     }),
2025 wakaba 1.1 checker => $HTMLTextChecker,
2026     };
2027    
2028     $Element->{$HTML_NS}->{embed} = {
2029 wakaba 1.16 attrs_checker => sub {
2030     my ($self, $todo) = @_;
2031     my $has_src;
2032     for my $attr (@{$todo->{node}->attributes}) {
2033     my $attr_ns = $attr->namespace_uri;
2034     $attr_ns = '' unless defined $attr_ns;
2035     my $attr_ln = $attr->manakai_local_name;
2036     my $checker;
2037     if ($attr_ns eq '') {
2038     if ($attr_ln eq 'src') {
2039     $checker = $HTMLURIAttrChecker;
2040     $has_src = 1;
2041     } elsif ($attr_ln eq 'type') {
2042     $checker = $HTMLIMTAttrChecker;
2043     } else {
2044     ## TODO: height
2045     ## TODO: width
2046     $checker = $HTMLAttrChecker->{$attr_ln}
2047     || sub { }; ## NOTE: Any local attribute is ok.
2048     }
2049     }
2050     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2051     || $AttrChecker->{$attr_ns}->{''};
2052     if ($checker) {
2053     $checker->($self, $attr);
2054     } else {
2055     $self->{onerror}->(node => $attr, type => 'attribute not supported');
2056     ## ISSUE: No comformance createria for global attributes in the spec
2057     }
2058     }
2059    
2060     unless ($has_src) {
2061     $self->{onerror}->(node => $todo->{node},
2062     type => 'attribute missing:src');
2063     }
2064     },
2065 wakaba 1.1 checker => $HTMLEmptyChecker,
2066     };
2067    
2068 wakaba 1.15 $Element->{$HTML_NS}->{object} = {
2069 wakaba 1.17 attrs_checker => sub {
2070     my ($self, $todo) = @_;
2071     $GetHTMLAttrsChecker->({
2072     data => $HTMLURIAttrChecker,
2073     type => $HTMLIMTAttrChecker,
2074     usemap => $HTMLUsemapAttrChecker,
2075     ## TODO: width
2076     ## TODO: height
2077     })->($self, $todo);
2078     unless ($todo->{node}->has_attribute_ns (undef, 'data')) {
2079     unless ($todo->{node}->has_attribute_ns (undef, 'type')) {
2080     $self->{onerror}->(node => $todo->{node},
2081     type => 'attribute missing:data|type');
2082     }
2083     }
2084     },
2085 wakaba 1.15 checker => $ElementDefault->{checker}, ## TODO
2086     };
2087    
2088 wakaba 1.1 $Element->{$HTML_NS}->{param} = {
2089 wakaba 1.12 attrs_checker => sub {
2090     my ($self, $todo) = @_;
2091     $GetHTMLAttrsChecker->({
2092     name => sub { },
2093     value => sub { },
2094     })->($self, $todo);
2095     unless ($todo->{node}->has_attribute_ns (undef, 'name')) {
2096     $self->{onerror}->(node => $todo->{node},
2097     type => 'attribute missing:name');
2098     }
2099     unless ($todo->{node}->has_attribute_ns (undef, 'value')) {
2100     $self->{onerror}->(node => $todo->{node},
2101     type => 'attribute missing:value');
2102     }
2103     },
2104 wakaba 1.1 checker => $HTMLEmptyChecker,
2105     };
2106    
2107 wakaba 1.2 $Element->{$HTML_NS}->{video} = {
2108 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2109     src => $HTMLURIAttrChecker,
2110     ## TODO: start, loopstart, loopend, end
2111     ## ISSUE: they MUST be "value time offset"s. Value?
2112     ## ISSUE: loopcount has no conformance creteria
2113     autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2114     controls => $GetHTMLBooleanAttrChecker->('controls'),
2115     }),
2116 wakaba 1.2 checker => sub {
2117 wakaba 1.4 my ($self, $todo) = @_;
2118 wakaba 1.2
2119 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2120     return $HTMLBlockOrInlineChecker->($self, $todo);
2121 wakaba 1.2 } else {
2122     return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
2123 wakaba 1.4 ->($self, $todo);
2124 wakaba 1.2 }
2125     },
2126     };
2127    
2128     $Element->{$HTML_NS}->{audio} = {
2129 wakaba 1.12 attrs_checker => $Element->{$HTML_NS}->{video}->{attrs_checker},
2130     checker => $Element->{$HTML_NS}->{video}->{checker},
2131 wakaba 1.2 };
2132 wakaba 1.1
2133     $Element->{$HTML_NS}->{source} = {
2134 wakaba 1.17 attrs_checker => sub {
2135     my ($self, $todo) = @_;
2136     $GetHTMLAttrsChecker->({
2137     src => $HTMLURIAttrChecker,
2138     type => $HTMLIMTAttrChecker,
2139     media => $HTMLMQAttrChecker,
2140     })->($self, $todo);
2141     unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
2142     $self->{onerror}->(node => $todo->{node},
2143     type => 'attribute missing:src');
2144     }
2145     },
2146 wakaba 1.1 checker => $HTMLEmptyChecker,
2147     };
2148    
2149     $Element->{$HTML_NS}->{canvas} = {
2150 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2151     height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2152     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2153     }),
2154 wakaba 1.1 checker => $HTMLInlineChecker,
2155     };
2156    
2157     $Element->{$HTML_NS}->{map} = {
2158 wakaba 1.17 attrs_checker => $GetHTMLAttrsChecker->({
2159     id => sub {
2160     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2161     my ($self, $attr) = @_;
2162     my $value = $attr->value;
2163     if (length $value > 0) {
2164     if ($self->{id}->{$value}) {
2165     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2166     } else {
2167     $self->{id}->{$value} = 1;
2168     }
2169     } else {
2170     ## NOTE: MUST contain at least one character
2171     $self->{onerror}->(node => $attr, type => 'attribute value is empty');
2172     }
2173 wakaba 1.27 if ($value =~ /[\x09-\x0D\x20]/) {
2174     $self->{onerror}->(node => $attr, type => 'space in ID');
2175     }
2176 wakaba 1.17 $self->{map}->{$value} ||= $attr;
2177     },
2178     }),
2179 wakaba 1.1 checker => $HTMLBlockChecker,
2180     };
2181    
2182     $Element->{$HTML_NS}->{area} = {
2183 wakaba 1.15 attrs_checker => sub {
2184     my ($self, $todo) = @_;
2185     my %attr;
2186     my $coords;
2187     for my $attr (@{$todo->{node}->attributes}) {
2188     my $attr_ns = $attr->namespace_uri;
2189     $attr_ns = '' unless defined $attr_ns;
2190     my $attr_ln = $attr->manakai_local_name;
2191     my $checker;
2192     if ($attr_ns eq '') {
2193     $checker = {
2194     alt => sub { },
2195     ## NOTE: |alt| value has no conformance creteria.
2196     shape => $GetHTMLEnumeratedAttrChecker->({
2197     circ => -1, circle => 1,
2198     default => 1,
2199     poly => 1, polygon => -1,
2200     rect => 1, rectangle => -1,
2201     }),
2202     coords => sub {
2203     my ($self, $attr) = @_;
2204     my $value = $attr->value;
2205     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
2206     $coords = [split /,/, $value];
2207     } else {
2208     $self->{onerror}->(node => $attr,
2209     type => 'syntax error');
2210     }
2211     },
2212 wakaba 1.17 target => $HTMLTargetAttrChecker,
2213 wakaba 1.15 href => $HTMLURIAttrChecker,
2214     ping => $HTMLSpaceURIsAttrChecker,
2215 wakaba 1.20 rel => sub { $HTMLLinkTypesAttrChecker->(1, @_) },
2216 wakaba 1.17 media => $HTMLMQAttrChecker,
2217     hreflang => $HTMLLanguageTagAttrChecker,
2218 wakaba 1.15 type => $HTMLIMTAttrChecker,
2219     }->{$attr_ln};
2220     if ($checker) {
2221     $attr{$attr_ln} = $attr;
2222     } else {
2223     $checker = $HTMLAttrChecker->{$attr_ln};
2224     }
2225     }
2226     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2227     || $AttrChecker->{$attr_ns}->{''};
2228     if ($checker) {
2229     $checker->($self, $attr) if ref $checker;
2230     } else {
2231     $self->{onerror}->(node => $attr, type => 'attribute not supported');
2232     ## ISSUE: No comformance createria for unknown attributes in the spec
2233     }
2234     }
2235    
2236     if (defined $attr{href}) {
2237     unless (defined $attr{alt}) {
2238     $self->{onerror}->(node => $todo->{node},
2239     type => 'attribute missing:alt');
2240     }
2241     } else {
2242     for (qw/target ping rel media hreflang type alt/) {
2243     if (defined $attr{$_}) {
2244     $self->{onerror}->(node => $attr{$_},
2245     type => 'attribute not allowed');
2246     }
2247     }
2248     }
2249    
2250     my $shape = 'rectangle';
2251     if (defined $attr{shape}) {
2252     $shape = {
2253     circ => 'circle', circle => 'circle',
2254     default => 'default',
2255     poly => 'polygon', polygon => 'polygon',
2256     rect => 'rectangle', rectangle => 'rectangle',
2257     }->{lc $attr{shape}->value} || 'rectangle';
2258     ## TODO: ASCII lowercase?
2259     }
2260    
2261     if ($shape eq 'circle') {
2262     if (defined $attr{coords}) {
2263     if (defined $coords) {
2264     if (@$coords == 3) {
2265     if ($coords->[2] < 0) {
2266     $self->{onerror}->(node => $attr{coords},
2267     type => 'out of range:2');
2268     }
2269     } else {
2270     $self->{onerror}->(node => $attr{coords},
2271     type => 'list item number:3:'.@$coords);
2272     }
2273     } else {
2274     ## NOTE: A syntax error has been reported.
2275     }
2276     } else {
2277     $self->{onerror}->(node => $todo->{node},
2278     type => 'attribute missing:coords');
2279     }
2280     } elsif ($shape eq 'default') {
2281     if (defined $attr{coords}) {
2282     $self->{onerror}->(node => $attr{coords},
2283     type => 'attribute not allowed');
2284     }
2285     } elsif ($shape eq 'polygon') {
2286     if (defined $attr{coords}) {
2287     if (defined $coords) {
2288     if (@$coords >= 6) {
2289     unless (@$coords % 2 == 0) {
2290     $self->{onerror}->(node => $attr{coords},
2291     type => 'list item number:even:'.@$coords);
2292     }
2293     } else {
2294     $self->{onerror}->(node => $attr{coords},
2295     type => 'list item number:>=6:'.@$coords);
2296     }
2297     } else {
2298     ## NOTE: A syntax error has been reported.
2299     }
2300     } else {
2301     $self->{onerror}->(node => $todo->{node},
2302     type => 'attribute missing:coords');
2303     }
2304     } elsif ($shape eq 'rectangle') {
2305     if (defined $attr{coords}) {
2306     if (defined $coords) {
2307     if (@$coords == 4) {
2308     unless ($coords->[0] < $coords->[2]) {
2309     $self->{onerror}->(node => $attr{coords},
2310     type => 'out of range:0');
2311     }
2312     unless ($coords->[1] < $coords->[3]) {
2313     $self->{onerror}->(node => $attr{coords},
2314     type => 'out of range:1');
2315     }
2316     } else {
2317     $self->{onerror}->(node => $attr{coords},
2318     type => 'list item number:4:'.@$coords);
2319     }
2320     } else {
2321     ## NOTE: A syntax error has been reported.
2322     }
2323     } else {
2324     $self->{onerror}->(node => $todo->{node},
2325     type => 'attribute missing:coords');
2326     }
2327     }
2328     },
2329 wakaba 1.1 checker => $HTMLEmptyChecker,
2330     };
2331     ## TODO: only in map
2332    
2333     $Element->{$HTML_NS}->{table} = {
2334 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2335 wakaba 1.1 checker => sub {
2336 wakaba 1.4 my ($self, $todo) = @_;
2337     my $el = $todo->{node};
2338     my $new_todos = [];
2339 wakaba 1.1 my @nodes = (@{$el->child_nodes});
2340    
2341     my $phase = 'before caption';
2342     my $has_tfoot;
2343     while (@nodes) {
2344     my $node = shift @nodes;
2345 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2346    
2347 wakaba 1.1 my $nt = $node->node_type;
2348     if ($nt == 1) {
2349 wakaba 1.8 my $node_ns = $node->namespace_uri;
2350     $node_ns = '' unless defined $node_ns;
2351     my $node_ln = $node->manakai_local_name;
2352 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
2353 wakaba 1.1 if ($phase eq 'in tbodys') {
2354 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2355 wakaba 1.1 #$phase = 'in tbodys';
2356     } elsif (not $has_tfoot and
2357 wakaba 1.8 $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2358 wakaba 1.1 $phase = 'after tfoot';
2359     $has_tfoot = 1;
2360     } else {
2361 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2362 wakaba 1.1 }
2363     } elsif ($phase eq 'in trs') {
2364 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2365 wakaba 1.1 #$phase = 'in trs';
2366     } elsif (not $has_tfoot and
2367 wakaba 1.8 $node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2368 wakaba 1.1 $phase = 'after tfoot';
2369     $has_tfoot = 1;
2370     } else {
2371 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2372 wakaba 1.1 }
2373     } elsif ($phase eq 'after thead') {
2374 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2375 wakaba 1.1 $phase = 'in tbodys';
2376 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2377 wakaba 1.1 $phase = 'in trs';
2378 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2379 wakaba 1.1 $phase = 'in tbodys';
2380     $has_tfoot = 1;
2381     } else {
2382 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2383 wakaba 1.1 }
2384     } elsif ($phase eq 'in colgroup') {
2385 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2386 wakaba 1.1 $phase = 'in colgroup';
2387 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2388 wakaba 1.1 $phase = 'after thead';
2389 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2390 wakaba 1.1 $phase = 'in tbodys';
2391 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2392 wakaba 1.1 $phase = 'in trs';
2393 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2394 wakaba 1.1 $phase = 'in tbodys';
2395     $has_tfoot = 1;
2396     } else {
2397 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2398 wakaba 1.1 }
2399     } elsif ($phase eq 'before caption') {
2400 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'caption') {
2401 wakaba 1.1 $phase = 'in colgroup';
2402 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'colgroup') {
2403 wakaba 1.1 $phase = 'in colgroup';
2404 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'thead') {
2405 wakaba 1.1 $phase = 'after thead';
2406 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tbody') {
2407 wakaba 1.1 $phase = 'in tbodys';
2408 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2409 wakaba 1.1 $phase = 'in trs';
2410 wakaba 1.8 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'tfoot') {
2411 wakaba 1.1 $phase = 'in tbodys';
2412     $has_tfoot = 1;
2413     } else {
2414 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2415 wakaba 1.1 }
2416     } else { # after tfoot
2417 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2418 wakaba 1.1 }
2419 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2420 wakaba 1.2 unshift @nodes, @$sib;
2421 wakaba 1.4 push @$new_todos, @$ch;
2422 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
2423     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2424 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
2425 wakaba 1.1 }
2426     } elsif ($nt == 5) {
2427     unshift @nodes, @{$node->child_nodes};
2428     }
2429     }
2430 wakaba 1.21
2431     ## Table model errors
2432     require Whatpm::HTMLTable;
2433     Whatpm::HTMLTable->form_table ($todo->{node}, sub {
2434     my %opt = @_;
2435     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
2436     });
2437    
2438 wakaba 1.4 return ($new_todos);
2439 wakaba 1.1 },
2440     };
2441    
2442     $Element->{$HTML_NS}->{caption} = {
2443 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2444 wakaba 1.1 checker => $HTMLSignificantStrictlyInlineChecker,
2445     };
2446    
2447     $Element->{$HTML_NS}->{colgroup} = {
2448 wakaba 1.17 attrs_checker => $GetHTMLAttrsChecker->({
2449     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2450     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
2451     ## TODO: "attribute not supported" if |col|.
2452     ## ISSUE: MUST NOT if any |col|?
2453     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
2454     }),
2455 wakaba 1.1 checker => sub {
2456 wakaba 1.4 my ($self, $todo) = @_;
2457     my $el = $todo->{node};
2458     my $new_todos = [];
2459 wakaba 1.1 my @nodes = (@{$el->child_nodes});
2460    
2461     while (@nodes) {
2462     my $node = shift @nodes;
2463 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2464    
2465 wakaba 1.1 my $nt = $node->node_type;
2466     if ($nt == 1) {
2467 wakaba 1.8 my $node_ns = $node->namespace_uri;
2468     $node_ns = '' unless defined $node_ns;
2469     my $node_ln = $node->manakai_local_name;
2470 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
2471 wakaba 1.8 unless ($node_ns eq $HTML_NS and $node_ln eq 'col') {
2472 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2473 wakaba 1.1 }
2474 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2475 wakaba 1.2 unshift @nodes, @$sib;
2476 wakaba 1.4 push @$new_todos, @$ch;
2477 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
2478     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2479 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
2480 wakaba 1.1 }
2481     } elsif ($nt == 5) {
2482     unshift @nodes, @{$node->child_nodes};
2483     }
2484     }
2485 wakaba 1.4 return ($new_todos);
2486 wakaba 1.1 },
2487     };
2488    
2489     $Element->{$HTML_NS}->{col} = {
2490 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2491     span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2492     }),
2493 wakaba 1.1 checker => $HTMLEmptyChecker,
2494     };
2495    
2496     $Element->{$HTML_NS}->{tbody} = {
2497 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2498 wakaba 1.1 checker => sub {
2499 wakaba 1.4 my ($self, $todo) = @_;
2500     my $el = $todo->{node};
2501     my $new_todos = [];
2502 wakaba 1.1 my @nodes = (@{$el->child_nodes});
2503    
2504     my $has_tr;
2505     while (@nodes) {
2506     my $node = shift @nodes;
2507 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2508    
2509 wakaba 1.1 my $nt = $node->node_type;
2510     if ($nt == 1) {
2511 wakaba 1.8 my $node_ns = $node->namespace_uri;
2512     $node_ns = '' unless defined $node_ns;
2513     my $node_ln = $node->manakai_local_name;
2514 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
2515 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'tr') {
2516 wakaba 1.1 $has_tr = 1;
2517     } else {
2518 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2519 wakaba 1.1 }
2520 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2521 wakaba 1.2 unshift @nodes, @$sib;
2522 wakaba 1.4 push @$new_todos, @$ch;
2523 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
2524     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2525 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
2526 wakaba 1.1 }
2527     } elsif ($nt == 5) {
2528     unshift @nodes, @{$node->child_nodes};
2529     }
2530     }
2531     unless ($has_tr) {
2532 wakaba 1.3 $self->{onerror}->(node => $el, type => 'child element missing:tr');
2533 wakaba 1.1 }
2534 wakaba 1.4 return ($new_todos);
2535 wakaba 1.1 },
2536     };
2537    
2538     $Element->{$HTML_NS}->{thead} = {
2539 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2540 wakaba 1.23 checker => $Element->{$HTML_NS}->{tbody}->{checker},
2541 wakaba 1.1 };
2542    
2543     $Element->{$HTML_NS}->{tfoot} = {
2544 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2545 wakaba 1.23 checker => $Element->{$HTML_NS}->{tbody}->{checker},
2546 wakaba 1.1 };
2547    
2548     $Element->{$HTML_NS}->{tr} = {
2549 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2550 wakaba 1.1 checker => sub {
2551 wakaba 1.4 my ($self, $todo) = @_;
2552     my $el = $todo->{node};
2553     my $new_todos = [];
2554 wakaba 1.1 my @nodes = (@{$el->child_nodes});
2555    
2556     my $has_td;
2557     while (@nodes) {
2558     my $node = shift @nodes;
2559 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2560    
2561 wakaba 1.1 my $nt = $node->node_type;
2562     if ($nt == 1) {
2563 wakaba 1.8 my $node_ns = $node->namespace_uri;
2564     $node_ns = '' unless defined $node_ns;
2565     my $node_ln = $node->manakai_local_name;
2566 wakaba 1.2 ## NOTE: |minuses| list is not checked since redundant
2567 wakaba 1.8 if ($node_ns eq $HTML_NS and ($node_ln eq 'td' or $node_ln eq 'th')) {
2568 wakaba 1.1 $has_td = 1;
2569     } else {
2570 wakaba 1.2 $self->{onerror}->(node => $node, type => 'element not allowed');
2571 wakaba 1.1 }
2572 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2573 wakaba 1.2 unshift @nodes, @$sib;
2574 wakaba 1.4 push @$new_todos, @$ch;
2575 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
2576     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2577 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
2578 wakaba 1.1 }
2579     } elsif ($nt == 5) {
2580     unshift @nodes, @{$node->child_nodes};
2581     }
2582     }
2583     unless ($has_td) {
2584 wakaba 1.3 $self->{onerror}->(node => $el, type => 'child element missing:td|th');
2585 wakaba 1.1 }
2586 wakaba 1.4 return ($new_todos);
2587 wakaba 1.1 },
2588     };
2589    
2590     $Element->{$HTML_NS}->{td} = {
2591 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2592     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2593     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2594     }),
2595 wakaba 1.1 checker => $HTMLBlockOrInlineChecker,
2596     };
2597    
2598     $Element->{$HTML_NS}->{th} = {
2599 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2600     colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2601     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2602     scope => $GetHTMLEnumeratedAttrChecker
2603     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
2604     }),
2605 wakaba 1.1 checker => $HTMLBlockOrInlineChecker,
2606     };
2607    
2608     ## TODO: forms
2609    
2610 wakaba 1.2 $Element->{$HTML_NS}->{script} = {
2611 wakaba 1.25 attrs_checker => sub {
2612     my ($self, $todo) = @_;
2613     $GetHTMLAttrsChecker->({
2614     src => $HTMLURIAttrChecker,
2615     defer => $GetHTMLBooleanAttrChecker->('defer'),
2616     async => $GetHTMLBooleanAttrChecker->('async'),
2617     type => $HTMLIMTAttrChecker,
2618     })->($self, $todo);
2619     if ($todo->{node}->has_attribute_ns (undef, 'defer')) {
2620     my $async_attr = $todo->{node}->get_attribute_node_ns (undef, 'async');
2621     if ($async_attr) {
2622     $self->{onerror}->(node => $async_attr,
2623     type => 'attribute not allowed'); # MUST NOT
2624     }
2625     }
2626     },
2627 wakaba 1.2 checker => sub {
2628 wakaba 1.4 my ($self, $todo) = @_;
2629 wakaba 1.2
2630 wakaba 1.4 if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2631     return $HTMLEmptyChecker->($self, $todo);
2632 wakaba 1.2 } else {
2633     ## NOTE: No content model conformance in HTML5 spec.
2634 wakaba 1.4 return $AnyChecker->($self, $todo);
2635 wakaba 1.2 }
2636     },
2637     };
2638    
2639     ## NOTE: When script is disabled.
2640     $Element->{$HTML_NS}->{noscript} = {
2641 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2642 wakaba 1.2 checker => sub {
2643 wakaba 1.4 my ($self, $todo) = @_;
2644 wakaba 1.1
2645 wakaba 1.2 my $end = $self->_add_minuses ({$HTML_NS => {noscript => 1}});
2646 wakaba 1.4 my ($sib, $ch) = $HTMLBlockOrInlineChecker->($self, $todo);
2647 wakaba 1.2 push @$sib, $end;
2648     return ($sib, $ch);
2649     },
2650     };
2651 wakaba 1.29 ## TODO: noscript in head
2652 wakaba 1.1
2653     $Element->{$HTML_NS}->{'event-source'} = {
2654 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2655     src => $HTMLURIAttrChecker,
2656     }),
2657 wakaba 1.1 checker => $HTMLEmptyChecker,
2658     };
2659    
2660     $Element->{$HTML_NS}->{details} = {
2661 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2662     open => $GetHTMLBooleanAttrChecker->('open'),
2663     }),
2664 wakaba 1.6 checker => sub {
2665     my ($self, $todo) = @_;
2666    
2667     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2668     my ($sib, $ch)
2669     = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
2670     ->($self, $todo);
2671     push @$sib, $end;
2672     return ($sib, $ch);
2673     },
2674 wakaba 1.1 };
2675    
2676     $Element->{$HTML_NS}->{datagrid} = {
2677 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({
2678     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2679     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
2680     }),
2681 wakaba 1.6 checker => sub {
2682     my ($self, $todo) = @_;
2683    
2684     my $end = $self->_add_minuses ({$HTML_NS => {a => 1, datagrid => 1}});
2685     my ($sib, $ch) = $HTMLBlockChecker->($self, $todo);
2686 wakaba 1.25 ## TODO: (Block-table)+ | table | select | datalist
2687 wakaba 1.6 push @$sib, $end;
2688     return ($sib, $ch);
2689     },
2690 wakaba 1.1 };
2691    
2692     $Element->{$HTML_NS}->{command} = {
2693 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
2694 wakaba 1.1 checker => $HTMLEmptyChecker,
2695     };
2696    
2697     $Element->{$HTML_NS}->{menu} = {
2698 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
2699 wakaba 1.1 checker => sub {
2700 wakaba 1.4 my ($self, $todo) = @_;
2701     my $el = $todo->{node};
2702     my $new_todos = [];
2703 wakaba 1.1 my @nodes = (@{$el->child_nodes});
2704    
2705     my $content = 'li or inline';
2706     while (@nodes) {
2707     my $node = shift @nodes;
2708 wakaba 1.2 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2709    
2710 wakaba 1.1 my $nt = $node->node_type;
2711     if ($nt == 1) {
2712 wakaba 1.2 my $node_ns = $node->namespace_uri;
2713     $node_ns = '' unless defined $node_ns;
2714     my $node_ln = $node->manakai_local_name;
2715 wakaba 1.6 my $not_allowed = $self->{minuses}->{$node_ns}->{$node_ln};
2716 wakaba 1.8 if ($node_ns eq $HTML_NS and $node_ln eq 'li') {
2717 wakaba 1.1 if ($content eq 'inline') {
2718 wakaba 1.6 $not_allowed = 1;
2719 wakaba 1.1 } elsif ($content eq 'li or inline') {
2720     $content = 'li';
2721     }
2722     } else {
2723 wakaba 1.7 if ($HTMLStrictlyInlineLevelElements->{$node_ns}->{$node_ln} or
2724     $HTMLStructuredInlineLevelElements->{$node_ns}->{$node_ln}) {
2725     $content = 'inline';
2726     } else {
2727 wakaba 1.6 $not_allowed = 1;
2728 wakaba 1.7 }
2729 wakaba 1.1 }
2730 wakaba 1.6 $self->{onerror}->(node => $node, type => 'element not allowed')
2731     if $not_allowed;
2732 wakaba 1.30 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2733 wakaba 1.2 unshift @nodes, @$sib;
2734 wakaba 1.4 push @$new_todos, @$ch;
2735 wakaba 1.1 } elsif ($nt == 3 or $nt == 4) {
2736     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2737     if ($content eq 'li') {
2738 wakaba 1.2 $self->{onerror}->(node => $node, type => 'character not allowed');
2739 wakaba 1.1 } elsif ($content eq 'li or inline') {
2740     $content = 'inline';
2741     }
2742     }
2743     } elsif ($nt == 5) {
2744     unshift @nodes, @{$node->child_nodes};
2745     }
2746     }
2747 wakaba 1.4
2748     for (@$new_todos) {
2749     $_->{inline} = 1;
2750     }
2751     return ($new_todos);
2752 wakaba 1.1 },
2753     };
2754    
2755 wakaba 1.6 $Element->{$HTML_NS}->{legend} = {
2756 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2757 wakaba 1.6 checker => sub {
2758     my ($self, $todo) = @_;
2759    
2760     my $parent = $todo->{node}->manakai_parent_element;
2761     if (defined $parent) {
2762     my $nsuri = $parent->namespace_uri;
2763     $nsuri = '' unless defined $nsuri;
2764     my $ln = $parent->manakai_local_name;
2765     if ($nsuri eq $HTML_NS and $ln eq 'figure') {
2766     return $HTMLInlineChecker->($self, $todo);
2767     } else {
2768     return $HTMLSignificantStrictlyInlineChecker->($self, $todo);
2769     }
2770     } else {
2771     return $HTMLInlineChecker->($self, $todo);
2772     }
2773    
2774     ## ISSUE: Content model is defined only for fieldset/legend,
2775     ## details/legend, and figure/legend.
2776     },
2777     };
2778 wakaba 1.1
2779     $Element->{$HTML_NS}->{div} = {
2780 wakaba 1.10 attrs_checker => $GetHTMLAttrsChecker->({}),
2781 wakaba 1.2 checker => $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'style'),
2782 wakaba 1.1 };
2783    
2784     $Element->{$HTML_NS}->{font} = {
2785 wakaba 1.12 attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO
2786 wakaba 1.1 checker => $HTMLTransparentChecker,
2787     };
2788    
2789 wakaba 1.24 sub check_document ($$$) {
2790     my ($self, $doc, $onerror) = @_;
2791     $self = bless {}, $self unless ref $self;
2792     $self->{onerror} = $onerror;
2793    
2794     my $docel = $doc->document_element;
2795 wakaba 1.26 unless (defined $docel) {
2796     ## ISSUE: Should we check content of Document node?
2797     $onerror->(node => $doc, type => 'no document element');
2798     ## ISSUE: Is this non-conforming (to what spec)? Or just a warning?
2799     return;
2800     }
2801    
2802     ## ISSUE: Unexpanded entity references and HTML5 conformance
2803    
2804 wakaba 1.24 my $docel_nsuri = $docel->namespace_uri;
2805     $docel_nsuri = '' unless defined $docel_nsuri;
2806     my $docel_def = $Element->{$docel_nsuri}->{$docel->manakai_local_name} ||
2807     $Element->{$docel_nsuri}->{''} ||
2808     $ElementDefault;
2809     if ($docel_def->{is_root}) {
2810     #
2811     } else {
2812     $onerror->(node => $docel, type => 'element not allowed');
2813     }
2814    
2815     ## TODO: Check for other items other than document element
2816     ## (second (errorous) element, text nodes, PI nodes, doctype nodes)
2817    
2818     $self->check_element ($docel, $onerror);
2819     } # check_document
2820 wakaba 1.2
2821 wakaba 1.1 sub check_element ($$$) {
2822     my ($self, $el, $onerror) = @_;
2823 wakaba 1.24 $self = bless {}, $self unless ref $self;
2824     $self->{onerror} = $onerror;
2825 wakaba 1.1
2826 wakaba 1.2 $self->{minuses} = {};
2827 wakaba 1.10 $self->{id} = {};
2828 wakaba 1.30 $self->{term} = {};
2829 wakaba 1.17 $self->{usemap} = [];
2830     $self->{map} = {};
2831 wakaba 1.20 $self->{has_link_type} = {};
2832 wakaba 1.2
2833 wakaba 1.4 my @todo = ({type => 'element', node => $el});
2834     while (@todo) {
2835     my $todo = shift @todo;
2836     if ($todo->{type} eq 'element') {
2837 wakaba 1.13 my $prefix = $todo->{node}->prefix;
2838     if (defined $prefix and $prefix eq 'xmlns') {
2839     $self->{onerror}
2840     ->(node => $todo->{node},
2841     type => 'NC:Reserved Prefixes and Namespace Names:<xmlns:>');
2842     }
2843 wakaba 1.4 my $nsuri = $todo->{node}->namespace_uri;
2844     $nsuri = '' unless defined $nsuri;
2845     my $ln = $todo->{node}->manakai_local_name;
2846     my $eldef = $Element->{$nsuri}->{$ln} ||
2847     $Element->{$nsuri}->{''} ||
2848     $ElementDefault;
2849 wakaba 1.9 $eldef->{attrs_checker}->($self, $todo);
2850 wakaba 1.4 my ($new_todos) = $eldef->{checker}->($self, $todo);
2851 wakaba 1.14 unshift @todo, @$new_todos;
2852 wakaba 1.9 } elsif ($todo->{type} eq 'element-attributes') {
2853 wakaba 1.13 my $prefix = $todo->{node}->prefix;
2854     if (defined $prefix and $prefix eq 'xmlns') {
2855     $self->{onerror}
2856     ->(node => $todo->{node},
2857     type => 'NC:Reserved Prefixes and Namespace Names:<xmlns:>');
2858     }
2859 wakaba 1.9 my $nsuri = $todo->{node}->namespace_uri;
2860     $nsuri = '' unless defined $nsuri;
2861     my $ln = $todo->{node}->manakai_local_name;
2862     my $eldef = $Element->{$nsuri}->{$ln} ||
2863     $Element->{$nsuri}->{''} ||
2864     $ElementDefault;
2865     $eldef->{attrs_checker}->($self, $todo);
2866 wakaba 1.4 } elsif ($todo->{type} eq 'plus') {
2867     $self->_remove_minuses ($todo);
2868 wakaba 1.30 } elsif ($todo->{type} eq 'code') {
2869     $todo->{code}->();
2870     } else {
2871     die "$0: Internal error: Unsupported checking action type |$todo->{type}|";
2872 wakaba 1.4 }
2873 wakaba 1.1 }
2874 wakaba 1.17
2875     for (@{$self->{usemap}}) {
2876     unless ($self->{map}->{$_->[0]}) {
2877     $self->{onerror}->(node => $_->[1], type => 'no referenced map');
2878     }
2879     }
2880    
2881     delete $self->{minuses};
2882     delete $self->{onerror};
2883     delete $self->{id};
2884     delete $self->{usemap};
2885     delete $self->{map};
2886 wakaba 1.1 } # check_element
2887    
2888 wakaba 1.2 sub _add_minuses ($@) {
2889     my $self = shift;
2890     my $r = {};
2891     for my $list (@_) {
2892     for my $ns (keys %$list) {
2893     for my $ln (keys %{$list->{$ns}}) {
2894     unless ($self->{minuses}->{$ns}->{$ln}) {
2895     $self->{minuses}->{$ns}->{$ln} = 1;
2896     $r->{$ns}->{$ln} = 1;
2897     }
2898     }
2899     }
2900     }
2901 wakaba 1.4 return {type => 'plus', list => $r};
2902 wakaba 1.2 } # _add_minuses
2903    
2904     sub _remove_minuses ($$) {
2905 wakaba 1.4 my ($self, $todo) = @_;
2906     for my $ns (keys %{$todo->{list}}) {
2907     for my $ln (keys %{$todo->{list}->{$ns}}) {
2908     delete $self->{minuses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
2909 wakaba 1.2 }
2910     }
2911     1;
2912     } # _remove_minuses
2913    
2914 wakaba 1.30 sub _check_get_children ($$$) {
2915     my ($self, $node, $parent_todo) = @_;
2916 wakaba 1.4 my $new_todos = [];
2917 wakaba 1.2 my $sib = [];
2918     TP: {
2919     my $node_ns = $node->namespace_uri;
2920     $node_ns = '' unless defined $node_ns;
2921     my $node_ln = $node->manakai_local_name;
2922     if ($node_ns eq $HTML_NS) {
2923     if ($node_ln eq 'noscript') {
2924     my $end = $self->_add_minuses ({$HTML_NS, {noscript => 1}});
2925     push @$sib, $end;
2926     }
2927     }
2928 wakaba 1.29 ## TODO: |script| is not a transparent element in |head|.
2929 wakaba 1.7 if ($HTMLTransparentElements->{$node_ns}->{$node_ln}) {
2930     unshift @$sib, @{$node->child_nodes};
2931 wakaba 1.9 push @$new_todos, {type => 'element-attributes', node => $node};
2932 wakaba 1.7 last TP;
2933 wakaba 1.2 }
2934 wakaba 1.8 if ($node_ns eq $HTML_NS and ($node_ln eq 'video' or $node_ln eq 'audio')) {
2935 wakaba 1.2 if ($node->has_attribute_ns (undef, 'src')) {
2936     unshift @$sib, @{$node->child_nodes};
2937 wakaba 1.9 push @$new_todos, {type => 'element-attributes', node => $node};
2938 wakaba 1.2 last TP;
2939     } else {
2940     my @cn = @{$node->child_nodes};
2941     CN: while (@cn) {
2942     my $cn = shift @cn;
2943     my $cnt = $cn->node_type;
2944     if ($cnt == 1) {
2945 wakaba 1.8 my $cn_nsuri = $cn->namespace_uri;
2946     $cn_nsuri = '' unless defined $cn_nsuri;
2947     if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'source') {
2948 wakaba 1.2 #
2949     } else {
2950     last CN;
2951     }
2952     } elsif ($cnt == 3 or $cnt == 4) {
2953     if ($cn->data =~ /[^\x09-\x0D\x20]/) {
2954     last CN;
2955     }
2956     }
2957     } # CN
2958     unshift @$sib, @cn;
2959     }
2960     }
2961 wakaba 1.4 push @$new_todos, {type => 'element', node => $node};
2962 wakaba 1.2 } # TP
2963 wakaba 1.30
2964     for my $new_todo (@$new_todos) {
2965     $new_todo->{flag} = {%{$parent_todo->{flag} or {}}};
2966     }
2967    
2968 wakaba 1.4 return ($sib, $new_todos);
2969 wakaba 1.2 } # _check_get_children
2970    
2971 wakaba 1.1 1;
2972 wakaba 1.30 # $Date: 2007/06/24 05:12:11 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24