/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.111 - (hide annotations) (download)
Sun Aug 30 03:40:50 2009 UTC (15 years, 2 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.110: +6 -6 lines
++ whatpm/t/dom-conformance/ChangeLog	30 Aug 2009 03:35:02 -0000
2009-08-30  Wakaba  <wakaba@suika.fam.cx>

	* xml-global.dat: Added a test of |xml:lang| attribute on a
	foreign element in an HTML document (HTML5 revision 3697).

++ whatpm/Whatpm/ChangeLog	30 Aug 2009 03:40:20 -0000
2009-08-30  Wakaba  <wakaba@suika.fam.cx>

	* ContentChecker.pm: Allow |xml:lang| in HTML document if the
	owner element is not an HTML element (HTML5 revision 3697).

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3 wakaba 1.111 our $VERSION=do{my @r=(q$Revision: 1.110 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5 wakaba 1.18 require Whatpm::URIChecker;
6    
7 wakaba 1.13 ## ISSUE: How XML and XML Namespaces conformance can (or cannot)
8     ## be applied to an in-memory representation (i.e. DOM)?
9    
10 wakaba 1.50 ## TODO: Conformance of an HTML document with non-html root element.
11    
12 wakaba 1.70 ## Stability
13 wakaba 1.67 sub FEATURE_STATUS_REC () { 0b1 } ## Interoperable standard
14     sub FEATURE_STATUS_CR () { 0b10 } ## Call for implementation
15     sub FEATURE_STATUS_LC () { 0b100 } ## Last call for comments
16     sub FEATURE_STATUS_WD () { 0b1000 } ## Working or editor's draft
17    
18 wakaba 1.70 ## Deprecated
19     sub FEATURE_DEPRECATED_SHOULD () { 0b100000 } ## SHOULD-level
20     sub FEATURE_DEPRECATED_INFO () { 0b1000000 } ## Does not affect conformance
21    
22     ## Conformance
23     sub FEATURE_ALLOWED () { 0b10000 }
24    
25 wakaba 1.42 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
26 wakaba 1.9 my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
27     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
28    
29 wakaba 1.42 my $Namespace = {
30 wakaba 1.79 '' => {loaded => 1},
31 wakaba 1.43 q<http://www.w3.org/2005/Atom> => {module => 'Whatpm::ContentChecker::Atom'},
32 wakaba 1.72 q<http://purl.org/syndication/history/1.0>
33     => {module => 'Whatpm::ContentChecker::Atom'},
34     q<http://purl.org/syndication/threading/1.0>
35     => {module => 'Whatpm::ContentChecker::Atom'},
36 wakaba 1.42 $HTML_NS => {module => 'Whatpm::ContentChecker::HTML'},
37     $XML_NS => {loaded => 1},
38     $XMLNS_NS => {loaded => 1},
39 wakaba 1.73 q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {loaded => 1},
40 wakaba 1.42 };
41    
42 wakaba 1.79 sub load_ns_module ($) {
43     my $nsuri = shift; # namespace URI or ''
44     unless ($Namespace->{$nsuri}->{loaded}) {
45     if ($Namespace->{$nsuri}->{module}) {
46     eval qq{ require $Namespace->{$nsuri}->{module} } or die $@;
47     } else {
48     $Namespace->{$nsuri}->{loaded} = 1;
49     }
50     }
51     } # load_ns_module
52    
53 wakaba 1.42 our $AttrChecker = {
54 wakaba 1.9 $XML_NS => {
55 wakaba 1.13 space => sub {
56     my ($self, $attr) = @_;
57     my $value = $attr->value;
58     if ($value eq 'default' or $value eq 'preserve') {
59     #
60     } else {
61     ## NOTE: An XML "error"
62 wakaba 1.83 $self->{onerror}->(node => $attr, level => $self->{level}->{xml_error},
63 wakaba 1.33 type => 'invalid attribute value');
64 wakaba 1.13 }
65     },
66     lang => sub {
67 wakaba 1.35 my ($self, $attr) = @_;
68 wakaba 1.47 my $value = $attr->value;
69     if ($value eq '') {
70     #
71     } else {
72     require Whatpm::LangTag;
73     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
74 wakaba 1.83 $self->{onerror}->(@_, node => $attr);
75 wakaba 1.85 }, $self->{level});
76 wakaba 1.47 }
77    
78 wakaba 1.13 ## NOTE: "The values of the attribute are language identifiers
79     ## as defined by [IETF RFC 3066], Tags for the Identification
80     ## of Languages, or its successor; in addition, the empty string
81     ## may be specified." ("may" in lower case)
82 wakaba 1.94 ## NOTE: Is an RFC 3066-valid (but RFC 4646-invalid) language tag
83 wakaba 1.47 ## allowed today?
84    
85     ## TODO: test data
86    
87 wakaba 1.89 my $nsuri = $attr->owner_element->namespace_uri;
88     if (defined $nsuri and $nsuri eq $HTML_NS) {
89     my $lang_attr = $attr->owner_element->get_attribute_node_ns
90     (undef, 'lang');
91     if ($lang_attr) {
92     my $lang_attr_value = $lang_attr->value;
93     $lang_attr_value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
94     my $value = $value;
95     $value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
96     if ($lang_attr_value ne $value) {
97     ## NOTE: HTML5 Section "The |lang| and |xml:lang| attributes"
98     $self->{onerror}->(node => $attr,
99     type => 'xml:lang ne lang',
100     level => $self->{level}->{must});
101     }
102     }
103    
104 wakaba 1.111 if ($attr->owner_document->manakai_is_html) { # MUST NOT
105     $self->{onerror}->(node => $attr, type => 'in HTML:xml:lang',
106     level => $self->{level}->{must});
107 wakaba 1.35 ## TODO: Test data...
108 wakaba 1.111 }
109 wakaba 1.35 }
110 wakaba 1.13 },
111     base => sub {
112     my ($self, $attr) = @_;
113     my $value = $attr->value;
114     if ($value =~ /[^\x{0000}-\x{10FFFF}]/) { ## ISSUE: Should we disallow noncharacters?
115     $self->{onerror}->(node => $attr,
116 wakaba 1.83 type => 'invalid attribute value',
117     level => $self->{level}->{fact}, ## TODO: correct?
118     );
119 wakaba 1.13 }
120 wakaba 1.18 ## NOTE: Conformance to URI standard is not checked since there is
121     ## no author requirement on conformance in the XML Base specification.
122 wakaba 1.13 },
123     id => sub {
124 wakaba 1.98 my ($self, $attr, $item, $element_state) = @_;
125 wakaba 1.13 my $value = $attr->value;
126     $value =~ s/[\x09\x0A\x0D\x20]+/ /g;
127     $value =~ s/^\x20//;
128     $value =~ s/\x20$//;
129     ## TODO: NCName in XML 1.0 or 1.1
130     ## TODO: declared type is ID?
131 wakaba 1.83 if ($self->{id}->{$value}) {
132     $self->{onerror}->(node => $attr,
133     type => 'duplicate ID',
134     level => $self->{level}->{xml_id_error});
135 wakaba 1.37 push @{$self->{id}->{$value}}, $attr;
136 wakaba 1.13 } else {
137 wakaba 1.37 $self->{id}->{$value} = [$attr];
138 wakaba 1.98 $self->{id_type}->{$value} = $element_state->{id_type} || '';
139 wakaba 1.13 }
140     },
141 wakaba 1.9 },
142     $XMLNS_NS => {
143 wakaba 1.13 '' => sub {
144     my ($self, $attr) = @_;
145     my $ln = $attr->manakai_local_name;
146     my $value = $attr->value;
147     if ($value eq $XML_NS and $ln ne 'xml') {
148     $self->{onerror}
149 wakaba 1.83 ->(node => $attr,
150     type => 'Reserved Prefixes and Namespace Names:Name',
151     text => $value,
152     level => $self->{level}->{nc});
153 wakaba 1.13 } elsif ($value eq $XMLNS_NS) {
154     $self->{onerror}
155 wakaba 1.83 ->(node => $attr,
156     type => 'Reserved Prefixes and Namespace Names:Name',
157     text => $value,
158     level => $self->{level}->{nc});
159 wakaba 1.13 }
160     if ($ln eq 'xml' and $value ne $XML_NS) {
161     $self->{onerror}
162 wakaba 1.83 ->(node => $attr,
163     type => 'Reserved Prefixes and Namespace Names:Prefix',
164     text => $ln,
165     level => $self->{level}->{nc});
166 wakaba 1.13 } elsif ($ln eq 'xmlns') {
167     $self->{onerror}
168 wakaba 1.83 ->(node => $attr,
169     type => 'Reserved Prefixes and Namespace Names:Prefix',
170     text => $ln,
171     level => $self->{level}->{nc});
172 wakaba 1.13 }
173     ## TODO: If XML 1.0 and empty
174     },
175     xmlns => sub {
176     my ($self, $attr) = @_;
177     ## TODO: In XML 1.0, URI reference [RFC 3986] or an empty string
178     ## TODO: In XML 1.1, IRI reference [RFC 3987] or an empty string
179 wakaba 1.18 ## TODO: relative references are deprecated
180 wakaba 1.13 my $value = $attr->value;
181     if ($value eq $XML_NS) {
182     $self->{onerror}
183 wakaba 1.83 ->(node => $attr,
184     type => 'Reserved Prefixes and Namespace Names:Name',
185     text => $value,
186     level => $self->{level}->{nc});
187 wakaba 1.13 } elsif ($value eq $XMLNS_NS) {
188     $self->{onerror}
189 wakaba 1.83 ->(node => $attr,
190     type => 'Reserved Prefixes and Namespace Names:Name',
191     text => $value,
192     level => $self->{level}->{nc});
193 wakaba 1.13 }
194     },
195 wakaba 1.9 },
196     };
197    
198 wakaba 1.14 ## ISSUE: Should we really allow these attributes?
199 wakaba 1.13 $AttrChecker->{''}->{'xml:space'} = $AttrChecker->{$XML_NS}->{space};
200     $AttrChecker->{''}->{'xml:lang'} = $AttrChecker->{$XML_NS}->{lang};
201 wakaba 1.89 ## NOTE: Checker for (null, "xml:lang") attribute is shadowed for
202     ## HTML elements in Whatpm::ContentChecker::HTML.
203 wakaba 1.13 $AttrChecker->{''}->{'xml:base'} = $AttrChecker->{$XML_NS}->{base};
204     $AttrChecker->{''}->{'xml:id'} = $AttrChecker->{$XML_NS}->{id};
205    
206 wakaba 1.79 our $AttrStatus;
207    
208     for (qw/space lang base id/) {
209     $AttrStatus->{$XML_NS}->{$_} = FEATURE_STATUS_REC | FEATURE_ALLOWED;
210     $AttrStatus->{''}->{"xml:$_"} = FEATURE_STATUS_REC | FEATURE_ALLOWED;
211     ## XML 1.0: FEATURE_STATUS_CR
212     ## XML 1.1: FEATURE_STATUS_REC
213     ## XML Namespaces 1.0: FEATURE_STATUS_CR
214     ## XML Namespaces 1.1: FEATURE_STATUS_REC
215     ## XML Base: FEATURE_STATUS_REC
216     ## xml:id: FEATURE_STATUS_REC
217     }
218    
219     $AttrStatus->{$XMLNS_NS}->{''} = FEATURE_STATUS_REC | FEATURE_ALLOWED;
220    
221     ## TODO: xsi:schemaLocation for XHTML2 support (very, very low priority)
222    
223 wakaba 1.60 our %AnyChecker = (
224 wakaba 1.99 ## NOTE: |check_start| is invoked before anything on the element's
225     ## attributes and contents is checked.
226 wakaba 1.60 check_start => sub { },
227 wakaba 1.105 ## NOTE: |check_attrs| and |check_attrs2| are invoked after
228     ## |check_start| and before anything on the element's contents is
229     ## checked. |check_attrs| is invoked immediately before
230     ## |check_attrs2|.
231 wakaba 1.60 check_attrs => sub {
232     my ($self, $item, $element_state) = @_;
233     for my $attr (@{$item->{node}->attributes}) {
234 wakaba 1.9 my $attr_ns = $attr->namespace_uri;
235 wakaba 1.92 if (defined $attr_ns) {
236     load_ns_module ($attr_ns);
237     } else {
238     $attr_ns = '';
239     }
240 wakaba 1.9 my $attr_ln = $attr->manakai_local_name;
241 wakaba 1.79
242 wakaba 1.9 my $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
243 wakaba 1.60 || $AttrChecker->{$attr_ns}->{''};
244 wakaba 1.79 my $status = $AttrStatus->{$attr_ns}->{$attr_ln}
245     || $AttrStatus->{$attr_ns}->{''};
246     if (not defined $status) {
247     $status = FEATURE_ALLOWED;
248     ## NOTE: FEATURE_ALLOWED for all attributes, since the element
249     ## is not supported and therefore "attribute not defined" error
250     ## should not raised (too verbose) and global attributes should be
251     ## allowed anyway (if a global attribute has its specified creteria
252     ## for where it may be specified, then it should be checked in it's
253     ## checker function).
254     }
255 wakaba 1.9 if ($checker) {
256     $checker->($self, $attr);
257 wakaba 1.17 } else {
258 wakaba 1.83 $self->{onerror}->(node => $attr,
259     type => 'unknown attribute',
260     level => $self->{level}->{uncertain});
261 wakaba 1.9 }
262 wakaba 1.79 $self->_attr_status_info ($attr, $status);
263 wakaba 1.9 }
264     },
265 wakaba 1.105 check_attrs2 => sub { },
266 wakaba 1.99 ## NOTE: |check_child_element| is invoked for each occurence of
267     ## child elements. It is invoked after |check_attrs| and before
268     ## |check_end|. |check_child_element| and |check_child_text| are
269     ## invoked for each child elements and text nodes in tree order.
270 wakaba 1.60 check_child_element => sub {
271     my ($self, $item, $child_el, $child_nsuri, $child_ln,
272     $child_is_transparent, $element_state) = @_;
273     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
274     $self->{onerror}->(node => $child_el,
275     type => 'element not allowed:minus',
276 wakaba 1.83 level => $self->{level}->{must});
277 wakaba 1.60 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
278     #
279     } else {
280     #
281     }
282     },
283 wakaba 1.99 ## NOTE: |check_child_text| is invoked for each occurence of child
284     ## text nodes. It is invoked after |check_attrs| and before
285     ## |check_end|. |check_child_element| and |check_child_text| are
286     ## invoked for each child elements and text nodes in tree order.
287 wakaba 1.60 check_child_text => sub { },
288 wakaba 1.99 ## NOTE: |check_end| is invoked after everything on the element's
289     ## attributes and contents are checked.
290 wakaba 1.60 check_end => sub {
291     my ($self, $item, $element_state) = @_;
292 wakaba 1.82 ## NOTE: There is a modified copy of the code below for |html:ruby|.
293 wakaba 1.60 if ($element_state->{has_significant}) {
294 wakaba 1.66 $item->{real_parent_state}->{has_significant} = 1;
295 wakaba 1.60 }
296     },
297     );
298    
299     our $ElementDefault = {
300     %AnyChecker,
301 wakaba 1.70 status => FEATURE_ALLOWED,
302     ## NOTE: No "element not defined" error - it is not supported anyway.
303 wakaba 1.60 check_start => sub {
304     my ($self, $item, $element_state) = @_;
305 wakaba 1.83 $self->{onerror}->(node => $item->{node},
306     type => 'unknown element',
307     level => $self->{level}->{uncertain});
308 wakaba 1.60 },
309 wakaba 1.1 };
310    
311 wakaba 1.60 our $HTMLEmbeddedContent = {
312     ## NOTE: All embedded content is also phrasing content.
313     $HTML_NS => {
314     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
315     canvas => 1,
316     },
317     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
318     q<http://www.w3.org/2000/svg> => {svg => 1},
319     ## NOTE: Foreign elements with content (but no metadata) are
320     ## embedded content.
321     };
322    
323 wakaba 1.95 our $IsInHTMLInteractiveContent = sub {
324     my ($el, $nsuri, $ln) = @_;
325    
326     ## NOTE: This CODE returns whether an element that is conditionally
327     ## categorizzed as an interactive content is currently in that
328     ## condition or not. See $HTMLInteractiveContent list defined in
329     ## Whatpm::ContentChecler::HTML for the list of all (conditionally
330     ## or permanently) interactive content.
331    
332 wakaba 1.108 ## The variable name is not good, since this method also returns
333     ## true for non-interactive content as long as the element cannot be
334     ## interactive content.
335    
336 wakaba 1.110 if ($nsuri eq $HTML_NS and $ln eq 'input') {
337     my $value = $el->get_attribute_ns (undef, 'type');
338     $value =~ tr/A-Z/a-z/; ## ASCII case-insensitive.
339     return ($value ne 'hidden');
340     } elsif ($nsuri eq $HTML_NS and ($ln eq 'img' or $ln eq 'object')) {
341     return $el->has_attribute_ns (undef, 'usemap');
342     } elsif ($nsuri eq $HTML_NS and ($ln eq 'video' or $ln eq 'audio')) {
343 wakaba 1.103 return $el->has_attribute_ns (undef, 'controls');
344 wakaba 1.95 } elsif ($nsuri eq $HTML_NS and $ln eq 'menu') {
345 wakaba 1.103 my $value = $el->get_attribute_ns (undef, 'type');
346 wakaba 1.110 $value =~ tr/A-Z/a-z/; ## ASCII case-insensitive.
347 wakaba 1.95 return ($value eq 'toolbar');
348     } else {
349     return 1;
350     }
351     }; # $IsInHTMLInteractiveContent
352    
353 wakaba 1.7 my $HTMLTransparentElements = {
354 wakaba 1.109 $HTML_NS => {
355     ins => 1, del => 1,
356     font => 1, ## dropped from the spec
357     noscript => 1,
358     ## NOTE: |html:noscript| is transparent if scripting is disabled
359     ## and not in |head|.
360     canvas => 1,
361     a => 1,
362     map => 1,
363     },
364     }; # $HTMLTransparentElements
365 wakaba 1.7
366 wakaba 1.109 ## NOTE: Now that the term "semi-transparent content model" is dropped
367     ## from the spec, but the concept is not.
368 wakaba 1.61 my $HTMLSemiTransparentElements = {
369     $HTML_NS => {object => 1, video => 1, audio => 1},
370 wakaba 1.109 }; # $HTMLSemiTransparentElements
371 wakaba 1.57
372 wakaba 1.42 our $Element = {};
373 wakaba 1.7
374 wakaba 1.73 $Element->{q<http://www.w3.org/1999/02/22-rdf-syntax-ns#>}->{RDF} = {
375     %AnyChecker,
376     status => FEATURE_STATUS_REC | FEATURE_ALLOWED,
377     is_root => 1, ## ISSUE: Not explicitly allowed for non application/rdf+xml
378     check_start => sub {
379     my ($self, $item, $element_state) = @_;
380     my $triple = [];
381     push @{$self->{return}->{rdf}}, [$item->{node}, $triple];
382     require Whatpm::RDFXML;
383     my $rdf = Whatpm::RDFXML->new;
384 wakaba 1.75 ## TODO: Should we make bnodeid unique in a document?
385 wakaba 1.73 $rdf->{onerror} = $self->{onerror};
386 wakaba 1.84 $rdf->{level} = $self->{level};
387 wakaba 1.73 $rdf->{ontriple} = sub {
388     my %opt = @_;
389     push @$triple,
390     [$opt{node}, $opt{subject}, $opt{predicate}, $opt{object}];
391 wakaba 1.74 if (defined $opt{id}) {
392     push @$triple,
393     [$opt{node},
394     $opt{id},
395     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#subject>},
396     $opt{subject}];
397     push @$triple,
398     [$opt{node},
399     $opt{id},
400     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate>},
401     $opt{predicate}];
402     push @$triple,
403     [$opt{node},
404     $opt{id},
405     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#object>},
406     $opt{object}];
407     push @$triple,
408     [$opt{node},
409     $opt{id},
410     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>},
411     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement>}];
412     }
413 wakaba 1.73 };
414     $rdf->convert_rdf_element ($item->{node});
415     },
416     };
417    
418 wakaba 1.83 my $default_error_level = {
419     must => 'm',
420     should => 's',
421     warn => 'w',
422     good => 'w',
423 wakaba 1.93 undefined => 'w',
424 wakaba 1.83 info => 'i',
425 wakaba 1.85
426 wakaba 1.83 uncertain => 'u',
427    
428 wakaba 1.84 html4_fact => 'm',
429 wakaba 1.89 html5_no_may => 'm',
430 wakaba 1.101 html5_fact => 'm',
431 wakaba 1.89
432 wakaba 1.83 xml_error => 'm', ## TODO: correct?
433 wakaba 1.88 xml_id_error => 'm', ## TODO: ?
434 wakaba 1.83 nc => 'm', ## XML Namespace Constraints ## TODO: correct?
435 wakaba 1.84
436 wakaba 1.91 ## |Whatpm::URIChecker|
437 wakaba 1.85 uri_syntax => 'm',
438     uri_fact => 'm',
439     uri_lc_must => 'm',
440     uri_lc_should => 'w',
441    
442 wakaba 1.91 ## |Whatpm::IMTChecker|
443 wakaba 1.87 mime_must => 'm', # lowercase "must"
444     mime_fact => 'm',
445     mime_strongly_discouraged => 'w',
446     mime_discouraged => 'w',
447    
448 wakaba 1.91 ## |Whatpm::LangTag|
449 wakaba 1.85 langtag_fact => 'm',
450    
451 wakaba 1.91 ## |Whatpm::RDFXML|
452 wakaba 1.84 rdf_fact => 'm',
453     rdf_grammer => 'm',
454     rdf_lc_must => 'm',
455 wakaba 1.91
456     ## |Message::Charset::Info| and |Whatpm::Charset::DecodeHandle|
457     charset_variant => 'm',
458     ## An error caused by use of a variant charset that is not conforming
459     ## to the original charset (e.g. use of 0x80 in an ISO-8859-1 document
460     ## which is interpreted as a Windows-1252 document instead).
461     charset_fact => 'm',
462     iso_shall => 'm',
463 wakaba 1.83 };
464    
465 wakaba 1.56 sub check_document ($$$;$) {
466     my ($self, $doc, $onerror, $onsubdoc) = @_;
467 wakaba 1.42 $self = bless {}, $self unless ref $self;
468     $self->{onerror} = $onerror;
469 wakaba 1.56 $self->{onsubdoc} = $onsubdoc || sub {
470     warn "A subdocument is not conformance-checked";
471     };
472 wakaba 1.1
473 wakaba 1.83 $self->{level} ||= $default_error_level;
474 wakaba 1.48
475 wakaba 1.73 ## TODO: If application/rdf+xml, RDF/XML mode should be invoked.
476    
477 wakaba 1.42 my $docel = $doc->document_element;
478     unless (defined $docel) {
479     ## ISSUE: Should we check content of Document node?
480 wakaba 1.83 $onerror->(node => $doc, type => 'no document element',
481     level => $self->{level}->{must});
482 wakaba 1.42 ## ISSUE: Is this non-conforming (to what spec)? Or just a warning?
483     return {
484     class => {},
485     id => {}, table => [], term => {},
486     };
487 wakaba 1.1 }
488    
489 wakaba 1.42 ## ISSUE: Unexpanded entity references and HTML5 conformance
490 wakaba 1.1
491 wakaba 1.42 my $docel_nsuri = $docel->namespace_uri;
492 wakaba 1.92 if (defined $docel_nsuri) {
493     load_ns_module ($docel_nsuri);
494     } else {
495     $docel_nsuri = '';
496     }
497 wakaba 1.42 my $docel_def = $Element->{$docel_nsuri}->{$docel->manakai_local_name} ||
498     $Element->{$docel_nsuri}->{''} ||
499     $ElementDefault;
500     if ($docel_def->{is_root}) {
501     #
502 wakaba 1.50 } elsif ($docel_def->{is_xml_root}) {
503     unless ($doc->manakai_is_html) {
504     #
505     } else {
506 wakaba 1.83 $onerror->(node => $docel, type => 'element not allowed:root:xml',
507     level => $self->{level}->{must});
508 wakaba 1.50 }
509 wakaba 1.42 } else {
510 wakaba 1.83 $onerror->(node => $docel, type => 'element not allowed:root',
511     level => $self->{level}->{must});
512 wakaba 1.1 }
513    
514 wakaba 1.42 ## TODO: Check for other items other than document element
515     ## (second (errorous) element, text nodes, PI nodes, doctype nodes)
516 wakaba 1.2
517 wakaba 1.56 my $return = $self->check_element ($docel, $onerror, $onsubdoc);
518 wakaba 1.51
519 wakaba 1.52 ## TODO: Test for these checks are necessary.
520 wakaba 1.51 my $charset_name = $doc->input_encoding;
521     if (defined $charset_name) {
522     require Message::Charset::Info;
523     my $charset = $Message::Charset::Info::IANACharset->{$charset_name};
524    
525 wakaba 1.71 if ($doc->manakai_is_html) {
526     if (not $doc->manakai_has_bom and
527     not defined $doc->manakai_charset) {
528 wakaba 1.102 unless ($charset->{category}
529     & Message::Charset::Info::CHARSET_CATEGORY_ASCII_COMPAT ()) {
530 wakaba 1.86 $onerror->(node => $doc,
531     level => $self->{level}->{must},
532 wakaba 1.83 type => 'non ascii superset',
533     text => $charset_name);
534 wakaba 1.71 }
535    
536     if (not $self->{has_charset} and ## TODO: This does not work now.
537     not $charset->{iana_names}->{'us-ascii'}) {
538 wakaba 1.86 $onerror->(node => $doc,
539     level => $self->{level}->{must},
540 wakaba 1.83 type => 'no character encoding declaration',
541     text => $charset_name);
542 wakaba 1.71 }
543 wakaba 1.51 }
544 wakaba 1.71
545     if ($charset->{iana_names}->{'utf-8'}) {
546     #
547     } elsif ($charset->{iana_names}->{'jis_x0212-1990'} or
548     $charset->{iana_names}->{'x-jis0208'} or
549     $charset->{iana_names}->{'utf-32'} or ## ISSUE: UTF-32BE? UTF-32LE?
550 wakaba 1.91 ($charset->{category} & Message::Charset::Info::CHARSET_CATEGORY_EBCDIC ())) {
551 wakaba 1.71 $onerror->(node => $doc,
552 wakaba 1.83 type => 'bad character encoding',
553     text => $charset_name,
554     level => $self->{level}->{should},
555     layer => 'encode');
556 wakaba 1.71 } elsif ($charset->{iana_names}->{'cesu-8'} or
557 wakaba 1.102 $charset->{iana_names}->{'utf-7'} or ## ISSUE: UNICODE-1-1-UTF-7?
558 wakaba 1.71 $charset->{iana_names}->{'bocu-1'} or
559     $charset->{iana_names}->{'scsu'}) {
560     $onerror->(node => $doc,
561 wakaba 1.83 type => 'disallowed character encoding',
562     text => $charset_name,
563     level => $self->{level}->{must},
564     layer => 'encode');
565 wakaba 1.71 } else {
566     $onerror->(node => $doc,
567 wakaba 1.83 type => 'non-utf-8 character encoding',
568     text => $charset_name,
569     level => $self->{level}->{good},
570     layer => 'encode');
571 wakaba 1.51 }
572     }
573 wakaba 1.52 } elsif ($doc->manakai_is_html) {
574     ## NOTE: MUST and SHOULD requirements above cannot be tested,
575     ## since the document has no input charset encoding information.
576     $onerror->(node => $doc,
577 wakaba 1.83 type => 'character encoding unchecked',
578     level => $self->{level}->{info},
579     layer => 'encode');
580 wakaba 1.51 }
581    
582     return $return;
583 wakaba 1.42 } # check_document
584 wakaba 1.1
585 wakaba 1.81 ## Check an element. The element is checked as if it is an orphan node (i.e.
586     ## an element without a parent node).
587 wakaba 1.56 sub check_element ($$$;$) {
588     my ($self, $el, $onerror, $onsubdoc) = @_;
589 wakaba 1.42 $self = bless {}, $self unless ref $self;
590     $self->{onerror} = $onerror;
591 wakaba 1.56 $self->{onsubdoc} = $onsubdoc || sub {
592     warn "A subdocument is not conformance-checked";
593     };
594 wakaba 1.2
595 wakaba 1.83 $self->{level} ||= $default_error_level;
596 wakaba 1.48
597 wakaba 1.61 $self->{plus_elements} = {};
598     $self->{minus_elements} = {};
599 wakaba 1.42 $self->{id} = {};
600 wakaba 1.99 $self->{id_type} = {}; # 'form' / 'labelable' / 'menu'
601     $self->{form} = {}; # form/@name
602 wakaba 1.106 #$self->{has_autofocus};
603 wakaba 1.100 $self->{idref} = []; # @form, @for, @contextmenu
604 wakaba 1.42 $self->{term} = {};
605     $self->{usemap} = [];
606 wakaba 1.78 $self->{ref} = []; # datetemplate data references
607     $self->{template} = []; # datatemplate template references
608 wakaba 1.42 $self->{map} = {};
609     $self->{has_link_type} = {};
610 wakaba 1.60 $self->{flag} = {};
611 wakaba 1.46 #$self->{has_uri_attr};
612     #$self->{has_hyperlink_element};
613 wakaba 1.51 #$self->{has_charset};
614 wakaba 1.57 #$self->{has_base};
615 wakaba 1.42 $self->{return} = {
616     class => {},
617 wakaba 1.80 id => $self->{id},
618     table => [], # table objects returned by Whatpm::HTMLTable
619     term => $self->{term},
620 wakaba 1.76 uri => {}, # URIs other than those in RDF triples
621     ## TODO: xmlns="", SYSTEM "", atom:* src="", xml:base=""
622 wakaba 1.73 rdf => [],
623 wakaba 1.42 };
624 wakaba 1.4
625 wakaba 1.60 my @item = ({type => 'element', node => $el, parent_state => {}});
626 wakaba 1.66 $item[-1]->{real_parent_state} = $item[-1]->{parent_state};
627 wakaba 1.60 while (@item) {
628     my $item = shift @item;
629     if (ref $item eq 'ARRAY') {
630     my $code = shift @$item;
631     next unless $code;## TODO: temp.
632     $code->(@$item);
633     } elsif ($item->{type} eq 'element') {
634     my $el_nsuri = $item->{node}->namespace_uri;
635 wakaba 1.92 if (defined $el_nsuri) {
636     load_ns_module ($el_nsuri);
637     } else {
638     $el_nsuri = '';
639     }
640 wakaba 1.60 my $el_ln = $item->{node}->manakai_local_name;
641 wakaba 1.92
642 wakaba 1.63 my $element_state = {};
643 wakaba 1.60 my $eldef = $Element->{$el_nsuri}->{$el_ln} ||
644     $Element->{$el_nsuri}->{''} ||
645 wakaba 1.42 $ElementDefault;
646 wakaba 1.61 my $content_def = $item->{transparent}
647     ? $item->{parent_def} || $eldef : $eldef;
648 wakaba 1.63 my $content_state = $item->{transparent}
649 wakaba 1.65 ? $item->{parent_def}
650     ? $item->{parent_state} || $element_state : $element_state
651     : $element_state;
652 wakaba 1.60
653 wakaba 1.67 unless ($eldef->{status} & FEATURE_STATUS_REC) {
654     my $status = $eldef->{status} & FEATURE_STATUS_CR ? 'cr' :
655     $eldef->{status} & FEATURE_STATUS_LC ? 'lc' :
656     $eldef->{status} & FEATURE_STATUS_WD ? 'wd' : 'non-standard';
657     $self->{onerror}->(node => $item->{node},
658     type => 'status:'.$status.':element',
659 wakaba 1.83 level => $self->{level}->{info});
660 wakaba 1.67 }
661 wakaba 1.70 if (not ($eldef->{status} & FEATURE_ALLOWED)) {
662     $self->{onerror}->(node => $item->{node},
663     type => 'element not defined',
664 wakaba 1.83 level => $self->{level}->{must});
665 wakaba 1.70 } elsif ($eldef->{status} & FEATURE_DEPRECATED_SHOULD) {
666     $self->{onerror}->(node => $item->{node},
667     type => 'deprecated:element',
668 wakaba 1.83 level => $self->{level}->{should});
669 wakaba 1.70 } elsif ($eldef->{status} & FEATURE_DEPRECATED_INFO) {
670     $self->{onerror}->(node => $item->{node},
671     type => 'deprecated:element',
672 wakaba 1.83 level => $self->{level}->{info});
673 wakaba 1.70 }
674 wakaba 1.67
675 wakaba 1.60 my @new_item;
676     push @new_item, [$eldef->{check_start}, $self, $item, $element_state];
677     push @new_item, [$eldef->{check_attrs}, $self, $item, $element_state];
678 wakaba 1.105 push @new_item, [$eldef->{check_attrs2}, $self, $item, $element_state];
679 wakaba 1.61
680 wakaba 1.60 my @child = @{$item->{node}->child_nodes};
681     while (@child) {
682     my $child = shift @child;
683     my $child_nt = $child->node_type;
684     if ($child_nt == 1) { # ELEMENT_NODE
685     my $child_nsuri = $child->namespace_uri;
686     $child_nsuri = '' unless defined $child_nsuri;
687     my $child_ln = $child->manakai_local_name;
688     if ($HTMLTransparentElements->{$child_nsuri}->{$child_ln} and
689     not (($self->{flag}->{in_head} or
690 wakaba 1.61 ($el_nsuri eq $HTML_NS and $el_ln eq 'head')) and
691     $child_nsuri eq $HTML_NS and $child_ln eq 'noscript')) {
692 wakaba 1.60 push @new_item, [$content_def->{check_child_element},
693     $self, $item, $child,
694 wakaba 1.66 $child_nsuri, $child_ln, 1,
695     $content_state, $element_state];
696 wakaba 1.60 push @new_item, {type => 'element', node => $child,
697 wakaba 1.65 parent_state => $content_state,
698 wakaba 1.61 parent_def => $content_def,
699 wakaba 1.66 real_parent_state => $element_state,
700 wakaba 1.60 transparent => 1};
701     } else {
702 wakaba 1.65 if ($item->{parent_def} and # has parent
703     $el_nsuri eq $HTML_NS) { ## $HTMLSemiTransparentElements
704 wakaba 1.61 if ($el_ln eq 'object') {
705     if ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
706     #
707     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
708     #
709     } else {
710 wakaba 1.62 $content_def = $item->{parent_def} || $content_def;
711 wakaba 1.63 $content_state = $item->{parent_state} || $content_state;
712 wakaba 1.62 }
713     } elsif ($el_ln eq 'video' or $el_ln eq 'audio') {
714     if ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
715     #
716     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
717     $element_state->{has_source} = 1;
718     } else {
719     $content_def = $item->{parent_def} || $content_def;
720 wakaba 1.63 $content_state = $item->{parent_state} || $content_state;
721 wakaba 1.61 }
722     }
723     }
724    
725 wakaba 1.60 push @new_item, [$content_def->{check_child_element},
726     $self, $item, $child,
727 wakaba 1.64 $child_nsuri, $child_ln,
728     $HTMLSemiTransparentElements
729     ->{$child_nsuri}->{$child_ln},
730 wakaba 1.66 $content_state, $element_state];
731 wakaba 1.60 push @new_item, {type => 'element', node => $child,
732 wakaba 1.65 parent_def => $content_def,
733 wakaba 1.66 real_parent_state => $element_state,
734 wakaba 1.65 parent_state => $content_state};
735 wakaba 1.60 }
736    
737     if ($HTMLEmbeddedContent->{$child_nsuri}->{$child_ln}) {
738     $element_state->{has_significant} = 1;
739     }
740     } elsif ($child_nt == 3 or # TEXT_NODE
741     $child_nt == 4) { # CDATA_SECTION_NODE
742 wakaba 1.96 my $has_significant = ($child->data =~ /[^\x09\x0A\x0C\x0D\x20]/);
743 wakaba 1.60 push @new_item, [$content_def->{check_child_text},
744     $self, $item, $child, $has_significant,
745 wakaba 1.66 $content_state, $element_state];
746     $element_state->{has_significant} ||= $has_significant;
747 wakaba 1.61 if ($has_significant and
748     $HTMLSemiTransparentElements->{$el_nsuri}->{$el_ln}) {
749     $content_def = $item->{parent_def} || $content_def;
750     }
751 wakaba 1.60 } elsif ($child_nt == 5) { # ENTITY_REFERENCE_NODE
752     push @child, @{$child->child_nodes};
753 wakaba 1.1 }
754 wakaba 1.60 ## TODO: PI_NODE
755     ## TODO: Unknown node type
756 wakaba 1.1 }
757 wakaba 1.60
758     push @new_item, [$eldef->{check_end}, $self, $item, $element_state];
759    
760     unshift @item, @new_item;
761 wakaba 1.30 } else {
762 wakaba 1.60 die "$0: Internal error: Unsupported checking action type |$item->{type}|";
763 wakaba 1.4 }
764 wakaba 1.1 }
765 wakaba 1.17
766 wakaba 1.78 for (@{$self->{template}}) {
767     ## TODO: If the document is an XML document, ...
768     ## NOTE: If the document is an HTML document:
769     ## ISSUE: We need to percent-decode?
770     F: {
771     if ($self->{id}->{$_->[0]}) {
772     my $el = $self->{id}->{$_->[0]}->[0]->owner_element;
773     if ($el->node_type == 1 and # ELEMENT_NODE
774     $el->manakai_local_name eq 'datatemplate') {
775     my $nsuri = $el->namespace_uri;
776     if (defined $nsuri and $nsuri eq $HTML_NS) {
777     if ($el eq $_->[1]->owner_element) {
778     $self->{onerror}->(node => $_->[1],
779     type => 'fragment points itself',
780 wakaba 1.83 level => $self->{level}->{must});
781 wakaba 1.78 }
782    
783     last F;
784     }
785     }
786     }
787     ## TODO: Should we raise a "fragment points nothing" error instead
788     ## if the fragment identifier identifies no element?
789    
790     $self->{onerror}->(node => $_->[1], type => 'template:not template',
791 wakaba 1.83 level => $self->{level}->{must});
792 wakaba 1.78 } # F
793     }
794    
795     for (@{$self->{ref}}) {
796     ## TOOD: If XML
797     ## NOTE: If it is an HTML document:
798     if ($_->[0] eq '') {
799     ## NOTE: It points the top of the document.
800     } elsif ($self->{id}->{$_->[0]}) {
801     if ($self->{id}->{$_->[0]}->[0]->owner_element
802     eq $_->[1]->owner_element) {
803     $self->{onerror}->(node => $_->[1], type => 'fragment points itself',
804 wakaba 1.83 level => $self->{level}->{must});
805 wakaba 1.78 }
806     } else {
807     $self->{onerror}->(node => $_->[1], type => 'fragment points nothing',
808 wakaba 1.83 level => $self->{level}->{must});
809 wakaba 1.78 }
810     }
811    
812     ## TODO: Maybe we should have $document->manakai_get_by_fragment or something
813    
814 wakaba 1.17 for (@{$self->{usemap}}) {
815     unless ($self->{map}->{$_->[0]}) {
816 wakaba 1.83 $self->{onerror}->(node => $_->[1], type => 'no referenced map',
817     level => $self->{level}->{must});
818 wakaba 1.17 }
819     }
820    
821 wakaba 1.100 for (@{$self->{idref}}) {
822 wakaba 1.106 if ($self->{id}->{$_->[1]} and $self->{id_type}->{$_->[1]} eq $_->[0]) {
823     #
824     } elsif ($_->[0] eq 'any' and $self->{id}->{$_->[1]}) {
825 wakaba 1.99 #
826     } else {
827 wakaba 1.100 $self->{onerror}->(node => $_->[2],
828     type => {
829 wakaba 1.106 any => 'no referenced element', ## TODOC: type
830 wakaba 1.100 form => 'no referenced form',
831     labelable => 'no referenced control',
832     menu => 'no referenced menu',
833 wakaba 1.104 datalist => 'no referenced datalist', ## TODOC: type
834 wakaba 1.100 }->{$_->[0]},
835 wakaba 1.106 value => $_->[1],
836 wakaba 1.83 level => $self->{level}->{must});
837 wakaba 1.32 }
838     }
839    
840 wakaba 1.61 delete $self->{plus_elements};
841     delete $self->{minus_elements};
842 wakaba 1.17 delete $self->{onerror};
843     delete $self->{id};
844 wakaba 1.98 delete $self->{id_type};
845 wakaba 1.97 delete $self->{form};
846 wakaba 1.106 delete $self->{has_autofocus};
847 wakaba 1.100 delete $self->{idref};
848 wakaba 1.17 delete $self->{usemap};
849 wakaba 1.78 delete $self->{ref};
850     delete $self->{template};
851 wakaba 1.17 delete $self->{map};
852 wakaba 1.33 return $self->{return};
853 wakaba 1.1 } # check_element
854    
855 wakaba 1.60 sub _add_minus_elements ($$@) {
856     my $self = shift;
857     my $element_state = shift;
858     for my $elements (@_) {
859     for my $nsuri (keys %$elements) {
860     for my $ln (keys %{$elements->{$nsuri}}) {
861     unless ($self->{minus_elements}->{$nsuri}->{$ln}) {
862     $element_state->{minus_elements_original}->{$nsuri}->{$ln} = 0;
863     $self->{minus_elements}->{$nsuri}->{$ln} = 1;
864     }
865     }
866     }
867     }
868     } # _add_minus_elements
869    
870     sub _remove_minus_elements ($$) {
871     my $self = shift;
872     my $element_state = shift;
873     for my $nsuri (keys %{$element_state->{minus_elements_original}}) {
874     for my $ln (keys %{$element_state->{minus_elements_original}->{$nsuri}}) {
875     delete $self->{minus_elements}->{$nsuri}->{$ln};
876     }
877     }
878     } # _remove_minus_elements
879    
880     sub _add_plus_elements ($$@) {
881     my $self = shift;
882     my $element_state = shift;
883     for my $elements (@_) {
884     for my $nsuri (keys %$elements) {
885     for my $ln (keys %{$elements->{$nsuri}}) {
886     unless ($self->{plus_elements}->{$nsuri}->{$ln}) {
887     $element_state->{plus_elements_original}->{$nsuri}->{$ln} = 0;
888     $self->{plus_elements}->{$nsuri}->{$ln} = 1;
889     }
890     }
891     }
892     }
893     } # _add_plus_elements
894    
895     sub _remove_plus_elements ($$) {
896     my $self = shift;
897     my $element_state = shift;
898     for my $nsuri (keys %{$element_state->{plus_elements_original}}) {
899     for my $ln (keys %{$element_state->{plus_elements_original}->{$nsuri}}) {
900     delete $self->{plus_elements}->{$nsuri}->{$ln};
901     }
902     }
903     } # _remove_plus_elements
904    
905 wakaba 1.68 sub _attr_status_info ($$$) {
906     my ($self, $attr, $status_code) = @_;
907 wakaba 1.70
908     if (not ($status_code & FEATURE_ALLOWED)) {
909     $self->{onerror}->(node => $attr,
910     type => 'attribute not defined',
911 wakaba 1.83 level => $self->{level}->{must});
912 wakaba 1.70 } elsif ($status_code & FEATURE_DEPRECATED_SHOULD) {
913     $self->{onerror}->(node => $attr,
914     type => 'deprecated:attr',
915 wakaba 1.83 level => $self->{level}->{should});
916 wakaba 1.70 } elsif ($status_code & FEATURE_DEPRECATED_INFO) {
917     $self->{onerror}->(node => $attr,
918     type => 'deprecated:attr',
919 wakaba 1.83 level => $self->{level}->{info});
920 wakaba 1.70 }
921    
922 wakaba 1.68 my $status;
923     if ($status_code & FEATURE_STATUS_REC) {
924     return;
925     } elsif ($status_code & FEATURE_STATUS_CR) {
926     $status = 'cr';
927     } elsif ($status_code & FEATURE_STATUS_LC) {
928     $status = 'lc';
929     } elsif ($status_code & FEATURE_STATUS_WD) {
930     $status = 'wd';
931     } else {
932     $status = 'non-standard';
933     }
934     $self->{onerror}->(node => $attr,
935     type => 'status:'.$status.':attr',
936 wakaba 1.83 level => $self->{level}->{info});
937 wakaba 1.68 } # _attr_status_info
938    
939 wakaba 1.2 sub _add_minuses ($@) {
940     my $self = shift;
941     my $r = {};
942     for my $list (@_) {
943     for my $ns (keys %$list) {
944     for my $ln (keys %{$list->{$ns}}) {
945     unless ($self->{minuses}->{$ns}->{$ln}) {
946     $self->{minuses}->{$ns}->{$ln} = 1;
947     $r->{$ns}->{$ln} = 1;
948     }
949     }
950     }
951     }
952 wakaba 1.4 return {type => 'plus', list => $r};
953 wakaba 1.2 } # _add_minuses
954    
955 wakaba 1.50 sub _add_pluses ($@) {
956     my $self = shift;
957     my $r = {};
958     for my $list (@_) {
959     for my $ns (keys %$list) {
960     for my $ln (keys %{$list->{$ns}}) {
961     unless ($self->{pluses}->{$ns}->{$ln}) {
962     $self->{pluses}->{$ns}->{$ln} = 1;
963     $r->{$ns}->{$ln} = 1;
964     }
965     }
966     }
967     }
968     return {type => 'minus', list => $r};
969     } # _add_pluses
970    
971 wakaba 1.2 sub _remove_minuses ($$) {
972 wakaba 1.4 my ($self, $todo) = @_;
973 wakaba 1.50 if ($todo->{type} eq 'minus') {
974     for my $ns (keys %{$todo->{list}}) {
975     for my $ln (keys %{$todo->{list}->{$ns}}) {
976     delete $self->{pluses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
977     }
978 wakaba 1.2 }
979 wakaba 1.50 } elsif ($todo->{type} eq 'plus') {
980     for my $ns (keys %{$todo->{list}}) {
981     for my $ln (keys %{$todo->{list}->{$ns}}) {
982     delete $self->{minuses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
983     }
984     }
985     } else {
986     die "$0: Unknown +- type: $todo->{type}";
987 wakaba 1.2 }
988     1;
989     } # _remove_minuses
990    
991 wakaba 1.50 ## NOTE: Priority for "minuses" and "pluses" are currently left
992     ## undefined and implemented inconsistently; it is not a problem for
993     ## now, since no element belongs to both lists.
994    
995 wakaba 1.30 sub _check_get_children ($$$) {
996     my ($self, $node, $parent_todo) = @_;
997 wakaba 1.4 my $new_todos = [];
998 wakaba 1.2 my $sib = [];
999     TP: {
1000     my $node_ns = $node->namespace_uri;
1001     $node_ns = '' unless defined $node_ns;
1002     my $node_ln = $node->manakai_local_name;
1003 wakaba 1.45 if ($HTMLTransparentElements->{$node_ns}->{$node_ln}) {
1004     if ($node_ns eq $HTML_NS and $node_ln eq 'noscript') {
1005     if ($parent_todo->{flag}->{in_head}) {
1006     #
1007     } else {
1008     my $end = $self->_add_minuses ({$HTML_NS, {noscript => 1}});
1009     push @$sib, $end;
1010    
1011     unshift @$sib, @{$node->child_nodes};
1012     push @$new_todos, {type => 'element-attributes', node => $node};
1013     last TP;
1014     }
1015 wakaba 1.58 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'del') {
1016     my $sig_flag = $parent_todo->{flag}->{has_descendant}->{significant};
1017     unshift @$sib, @{$node->child_nodes};
1018     push @$new_todos, {type => 'element-attributes', node => $node};
1019     push @$new_todos,
1020     {type => 'code',
1021     code => sub {
1022     $parent_todo->{flag}->{has_descendant}->{significant} = 0
1023     if not $sig_flag;
1024     }};
1025     last TP;
1026 wakaba 1.45 } else {
1027     unshift @$sib, @{$node->child_nodes};
1028     push @$new_todos, {type => 'element-attributes', node => $node};
1029     last TP;
1030 wakaba 1.2 }
1031     }
1032 wakaba 1.8 if ($node_ns eq $HTML_NS and ($node_ln eq 'video' or $node_ln eq 'audio')) {
1033 wakaba 1.2 if ($node->has_attribute_ns (undef, 'src')) {
1034     unshift @$sib, @{$node->child_nodes};
1035 wakaba 1.9 push @$new_todos, {type => 'element-attributes', node => $node};
1036 wakaba 1.2 last TP;
1037     } else {
1038     my @cn = @{$node->child_nodes};
1039     CN: while (@cn) {
1040     my $cn = shift @cn;
1041     my $cnt = $cn->node_type;
1042     if ($cnt == 1) {
1043 wakaba 1.8 my $cn_nsuri = $cn->namespace_uri;
1044     $cn_nsuri = '' unless defined $cn_nsuri;
1045     if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'source') {
1046 wakaba 1.2 #
1047     } else {
1048     last CN;
1049     }
1050     } elsif ($cnt == 3 or $cnt == 4) {
1051 wakaba 1.96 if ($cn->data =~ /[^\x09\x0A\x0C\x0D\x20]/) {
1052 wakaba 1.2 last CN;
1053     }
1054     }
1055     } # CN
1056     unshift @$sib, @cn;
1057     }
1058 wakaba 1.57 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'object') {
1059     my @cn = @{$node->child_nodes};
1060     CN: while (@cn) {
1061     my $cn = shift @cn;
1062     my $cnt = $cn->node_type;
1063     if ($cnt == 1) {
1064     my $cn_nsuri = $cn->namespace_uri;
1065     $cn_nsuri = '' unless defined $cn_nsuri;
1066     if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'param') {
1067     #
1068     } else {
1069     last CN;
1070     }
1071     } elsif ($cnt == 3 or $cnt == 4) {
1072 wakaba 1.96 if ($cn->data =~ /[^\x09\x0A\x0C\x0D\x20]/) {
1073 wakaba 1.57 last CN;
1074     }
1075     }
1076     } # CN
1077     unshift @$sib, @cn;
1078 wakaba 1.2 }
1079 wakaba 1.4 push @$new_todos, {type => 'element', node => $node};
1080 wakaba 1.2 } # TP
1081 wakaba 1.30
1082     for my $new_todo (@$new_todos) {
1083     $new_todo->{flag} = {%{$parent_todo->{flag} or {}}};
1084     }
1085    
1086 wakaba 1.4 return ($sib, $new_todos);
1087 wakaba 1.2 } # _check_get_children
1088    
1089 wakaba 1.44 =head1 LICENSE
1090    
1091 wakaba 1.56 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
1092 wakaba 1.44
1093     This library is free software; you can redistribute it
1094     and/or modify it under the same terms as Perl itself.
1095    
1096     =cut
1097    
1098 wakaba 1.1 1;
1099 wakaba 1.111 # $Date: 2009/08/23 02:35:33 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24