/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.101 - (hide annotations) (download)
Tue Oct 7 11:41:41 2008 UTC (16 years ago) by wakaba
Branch: MAIN
Changes since 1.100: +3 -2 lines
++ whatpm/t/ChangeLog	7 Oct 2008 11:39:59 -0000
2008-10-07  Wakaba  <wakaba@suika.fam.cx>

	* content-model-2.dat: Repetition model test is moved to another
	file.  A wrong test result fixed.  Broken test entries fixed.
	<area rev> is now reported as "unknown attribute", since RDFa spec
	adds that attribute to all elements with the Common attribute set.
	Note that we have no plan to support RDFa, at the moment.

++ whatpm/t/dom-conformance/ChangeLog	7 Oct 2008 11:41:37 -0000
	* html-flows-1.dat: <li value> test results updated.

	* html-forms-1.dat: accesskey="" attribute is obsolete.

	* html-metadata-1.dat: Fix broken test results.

	* html-repetitions.dat: The repetition template feature is
	obsolete.  A test entry from ../content-model-2.dat is added.

2008-10-07  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	7 Oct 2008 11:36:17 -0000
2008-10-07  Wakaba  <wakaba@suika.fam.cx>

	* ContentChecker.pm: New error level "html5_fact" added, which
	should be tentatively used until all of requirements are properly
	specced as RFC 2119 "MUST" in HTML5.

++ whatpm/Whatpm/ContentChecker/ChangeLog	7 Oct 2008 11:37:15 -0000
	* HTML.pm: Quoted-strings in media type specifications were not
	properly decoded.  Fixed support for <li value> with non-<ol>
	parent (or with no parent).

2008-10-07  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3 wakaba 1.101 our $VERSION=do{my @r=(q$Revision: 1.100 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5 wakaba 1.18 require Whatpm::URIChecker;
6    
7 wakaba 1.13 ## ISSUE: How XML and XML Namespaces conformance can (or cannot)
8     ## be applied to an in-memory representation (i.e. DOM)?
9    
10 wakaba 1.50 ## TODO: Conformance of an HTML document with non-html root element.
11    
12 wakaba 1.70 ## Stability
13 wakaba 1.67 sub FEATURE_STATUS_REC () { 0b1 } ## Interoperable standard
14     sub FEATURE_STATUS_CR () { 0b10 } ## Call for implementation
15     sub FEATURE_STATUS_LC () { 0b100 } ## Last call for comments
16     sub FEATURE_STATUS_WD () { 0b1000 } ## Working or editor's draft
17    
18 wakaba 1.70 ## Deprecated
19     sub FEATURE_DEPRECATED_SHOULD () { 0b100000 } ## SHOULD-level
20     sub FEATURE_DEPRECATED_INFO () { 0b1000000 } ## Does not affect conformance
21    
22     ## Conformance
23     sub FEATURE_ALLOWED () { 0b10000 }
24    
25 wakaba 1.42 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
26 wakaba 1.9 my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
27     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
28    
29 wakaba 1.42 my $Namespace = {
30 wakaba 1.79 '' => {loaded => 1},
31 wakaba 1.43 q<http://www.w3.org/2005/Atom> => {module => 'Whatpm::ContentChecker::Atom'},
32 wakaba 1.72 q<http://purl.org/syndication/history/1.0>
33     => {module => 'Whatpm::ContentChecker::Atom'},
34     q<http://purl.org/syndication/threading/1.0>
35     => {module => 'Whatpm::ContentChecker::Atom'},
36 wakaba 1.42 $HTML_NS => {module => 'Whatpm::ContentChecker::HTML'},
37     $XML_NS => {loaded => 1},
38     $XMLNS_NS => {loaded => 1},
39 wakaba 1.73 q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {loaded => 1},
40 wakaba 1.42 };
41    
42 wakaba 1.79 sub load_ns_module ($) {
43     my $nsuri = shift; # namespace URI or ''
44     unless ($Namespace->{$nsuri}->{loaded}) {
45     if ($Namespace->{$nsuri}->{module}) {
46     eval qq{ require $Namespace->{$nsuri}->{module} } or die $@;
47     } else {
48     $Namespace->{$nsuri}->{loaded} = 1;
49     }
50     }
51     } # load_ns_module
52    
53 wakaba 1.42 our $AttrChecker = {
54 wakaba 1.9 $XML_NS => {
55 wakaba 1.13 space => sub {
56     my ($self, $attr) = @_;
57     my $value = $attr->value;
58     if ($value eq 'default' or $value eq 'preserve') {
59     #
60     } else {
61     ## NOTE: An XML "error"
62 wakaba 1.83 $self->{onerror}->(node => $attr, level => $self->{level}->{xml_error},
63 wakaba 1.33 type => 'invalid attribute value');
64 wakaba 1.13 }
65     },
66     lang => sub {
67 wakaba 1.35 my ($self, $attr) = @_;
68 wakaba 1.47 my $value = $attr->value;
69     if ($value eq '') {
70     #
71     } else {
72     require Whatpm::LangTag;
73     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
74 wakaba 1.83 $self->{onerror}->(@_, node => $attr);
75 wakaba 1.85 }, $self->{level});
76 wakaba 1.47 }
77    
78 wakaba 1.13 ## NOTE: "The values of the attribute are language identifiers
79     ## as defined by [IETF RFC 3066], Tags for the Identification
80     ## of Languages, or its successor; in addition, the empty string
81     ## may be specified." ("may" in lower case)
82 wakaba 1.94 ## NOTE: Is an RFC 3066-valid (but RFC 4646-invalid) language tag
83 wakaba 1.47 ## allowed today?
84    
85     ## TODO: test data
86    
87 wakaba 1.89 my $nsuri = $attr->owner_element->namespace_uri;
88     if (defined $nsuri and $nsuri eq $HTML_NS) {
89     my $lang_attr = $attr->owner_element->get_attribute_node_ns
90     (undef, 'lang');
91     if ($lang_attr) {
92     my $lang_attr_value = $lang_attr->value;
93     $lang_attr_value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
94     my $value = $value;
95     $value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
96     if ($lang_attr_value ne $value) {
97     ## NOTE: HTML5 Section "The |lang| and |xml:lang| attributes"
98     $self->{onerror}->(node => $attr,
99     type => 'xml:lang ne lang',
100     level => $self->{level}->{must});
101     }
102     }
103     }
104    
105 wakaba 1.35 if ($attr->owner_document->manakai_is_html) { # MUST NOT
106 wakaba 1.83 $self->{onerror}->(node => $attr, type => 'in HTML:xml:lang',
107     level => $self->{level}->{must});
108 wakaba 1.35 ## TODO: Test data...
109     }
110 wakaba 1.13 },
111     base => sub {
112     my ($self, $attr) = @_;
113     my $value = $attr->value;
114     if ($value =~ /[^\x{0000}-\x{10FFFF}]/) { ## ISSUE: Should we disallow noncharacters?
115     $self->{onerror}->(node => $attr,
116 wakaba 1.83 type => 'invalid attribute value',
117     level => $self->{level}->{fact}, ## TODO: correct?
118     );
119 wakaba 1.13 }
120 wakaba 1.18 ## NOTE: Conformance to URI standard is not checked since there is
121     ## no author requirement on conformance in the XML Base specification.
122 wakaba 1.13 },
123     id => sub {
124 wakaba 1.98 my ($self, $attr, $item, $element_state) = @_;
125 wakaba 1.13 my $value = $attr->value;
126     $value =~ s/[\x09\x0A\x0D\x20]+/ /g;
127     $value =~ s/^\x20//;
128     $value =~ s/\x20$//;
129     ## TODO: NCName in XML 1.0 or 1.1
130     ## TODO: declared type is ID?
131 wakaba 1.83 if ($self->{id}->{$value}) {
132     $self->{onerror}->(node => $attr,
133     type => 'duplicate ID',
134     level => $self->{level}->{xml_id_error});
135 wakaba 1.37 push @{$self->{id}->{$value}}, $attr;
136 wakaba 1.13 } else {
137 wakaba 1.37 $self->{id}->{$value} = [$attr];
138 wakaba 1.98 $self->{id_type}->{$value} = $element_state->{id_type} || '';
139 wakaba 1.13 }
140     },
141 wakaba 1.9 },
142     $XMLNS_NS => {
143 wakaba 1.13 '' => sub {
144     my ($self, $attr) = @_;
145     my $ln = $attr->manakai_local_name;
146     my $value = $attr->value;
147     if ($value eq $XML_NS and $ln ne 'xml') {
148     $self->{onerror}
149 wakaba 1.83 ->(node => $attr,
150     type => 'Reserved Prefixes and Namespace Names:Name',
151     text => $value,
152     level => $self->{level}->{nc});
153 wakaba 1.13 } elsif ($value eq $XMLNS_NS) {
154     $self->{onerror}
155 wakaba 1.83 ->(node => $attr,
156     type => 'Reserved Prefixes and Namespace Names:Name',
157     text => $value,
158     level => $self->{level}->{nc});
159 wakaba 1.13 }
160     if ($ln eq 'xml' and $value ne $XML_NS) {
161     $self->{onerror}
162 wakaba 1.83 ->(node => $attr,
163     type => 'Reserved Prefixes and Namespace Names:Prefix',
164     text => $ln,
165     level => $self->{level}->{nc});
166 wakaba 1.13 } elsif ($ln eq 'xmlns') {
167     $self->{onerror}
168 wakaba 1.83 ->(node => $attr,
169     type => 'Reserved Prefixes and Namespace Names:Prefix',
170     text => $ln,
171     level => $self->{level}->{nc});
172 wakaba 1.13 }
173     ## TODO: If XML 1.0 and empty
174     },
175     xmlns => sub {
176     my ($self, $attr) = @_;
177     ## TODO: In XML 1.0, URI reference [RFC 3986] or an empty string
178     ## TODO: In XML 1.1, IRI reference [RFC 3987] or an empty string
179 wakaba 1.18 ## TODO: relative references are deprecated
180 wakaba 1.13 my $value = $attr->value;
181     if ($value eq $XML_NS) {
182     $self->{onerror}
183 wakaba 1.83 ->(node => $attr,
184     type => 'Reserved Prefixes and Namespace Names:Name',
185     text => $value,
186     level => $self->{level}->{nc});
187 wakaba 1.13 } elsif ($value eq $XMLNS_NS) {
188     $self->{onerror}
189 wakaba 1.83 ->(node => $attr,
190     type => 'Reserved Prefixes and Namespace Names:Name',
191     text => $value,
192     level => $self->{level}->{nc});
193 wakaba 1.13 }
194     },
195 wakaba 1.9 },
196     };
197    
198 wakaba 1.14 ## ISSUE: Should we really allow these attributes?
199 wakaba 1.13 $AttrChecker->{''}->{'xml:space'} = $AttrChecker->{$XML_NS}->{space};
200     $AttrChecker->{''}->{'xml:lang'} = $AttrChecker->{$XML_NS}->{lang};
201 wakaba 1.89 ## NOTE: Checker for (null, "xml:lang") attribute is shadowed for
202     ## HTML elements in Whatpm::ContentChecker::HTML.
203 wakaba 1.13 $AttrChecker->{''}->{'xml:base'} = $AttrChecker->{$XML_NS}->{base};
204     $AttrChecker->{''}->{'xml:id'} = $AttrChecker->{$XML_NS}->{id};
205    
206 wakaba 1.79 our $AttrStatus;
207    
208     for (qw/space lang base id/) {
209     $AttrStatus->{$XML_NS}->{$_} = FEATURE_STATUS_REC | FEATURE_ALLOWED;
210     $AttrStatus->{''}->{"xml:$_"} = FEATURE_STATUS_REC | FEATURE_ALLOWED;
211     ## XML 1.0: FEATURE_STATUS_CR
212     ## XML 1.1: FEATURE_STATUS_REC
213     ## XML Namespaces 1.0: FEATURE_STATUS_CR
214     ## XML Namespaces 1.1: FEATURE_STATUS_REC
215     ## XML Base: FEATURE_STATUS_REC
216     ## xml:id: FEATURE_STATUS_REC
217     }
218    
219     $AttrStatus->{$XMLNS_NS}->{''} = FEATURE_STATUS_REC | FEATURE_ALLOWED;
220    
221     ## TODO: xsi:schemaLocation for XHTML2 support (very, very low priority)
222    
223 wakaba 1.60 our %AnyChecker = (
224 wakaba 1.99 ## NOTE: |check_start| is invoked before anything on the element's
225     ## attributes and contents is checked.
226 wakaba 1.60 check_start => sub { },
227 wakaba 1.99 ## NOTE: |check_attrs| is invoked after |check_start| and before
228     ## anything on the element's contents is checked.
229 wakaba 1.60 check_attrs => sub {
230     my ($self, $item, $element_state) = @_;
231     for my $attr (@{$item->{node}->attributes}) {
232 wakaba 1.9 my $attr_ns = $attr->namespace_uri;
233 wakaba 1.92 if (defined $attr_ns) {
234     load_ns_module ($attr_ns);
235     } else {
236     $attr_ns = '';
237     }
238 wakaba 1.9 my $attr_ln = $attr->manakai_local_name;
239 wakaba 1.79
240 wakaba 1.9 my $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
241 wakaba 1.60 || $AttrChecker->{$attr_ns}->{''};
242 wakaba 1.79 my $status = $AttrStatus->{$attr_ns}->{$attr_ln}
243     || $AttrStatus->{$attr_ns}->{''};
244     if (not defined $status) {
245     $status = FEATURE_ALLOWED;
246     ## NOTE: FEATURE_ALLOWED for all attributes, since the element
247     ## is not supported and therefore "attribute not defined" error
248     ## should not raised (too verbose) and global attributes should be
249     ## allowed anyway (if a global attribute has its specified creteria
250     ## for where it may be specified, then it should be checked in it's
251     ## checker function).
252     }
253 wakaba 1.9 if ($checker) {
254     $checker->($self, $attr);
255 wakaba 1.17 } else {
256 wakaba 1.83 $self->{onerror}->(node => $attr,
257     type => 'unknown attribute',
258     level => $self->{level}->{uncertain});
259 wakaba 1.9 }
260 wakaba 1.79 $self->_attr_status_info ($attr, $status);
261 wakaba 1.9 }
262     },
263 wakaba 1.99 ## NOTE: |check_child_element| is invoked for each occurence of
264     ## child elements. It is invoked after |check_attrs| and before
265     ## |check_end|. |check_child_element| and |check_child_text| are
266     ## invoked for each child elements and text nodes in tree order.
267 wakaba 1.60 check_child_element => sub {
268     my ($self, $item, $child_el, $child_nsuri, $child_ln,
269     $child_is_transparent, $element_state) = @_;
270     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
271     $self->{onerror}->(node => $child_el,
272     type => 'element not allowed:minus',
273 wakaba 1.83 level => $self->{level}->{must});
274 wakaba 1.60 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
275     #
276     } else {
277     #
278     }
279     },
280 wakaba 1.99 ## NOTE: |check_child_text| is invoked for each occurence of child
281     ## text nodes. It is invoked after |check_attrs| and before
282     ## |check_end|. |check_child_element| and |check_child_text| are
283     ## invoked for each child elements and text nodes in tree order.
284 wakaba 1.60 check_child_text => sub { },
285 wakaba 1.99 ## NOTE: |check_end| is invoked after everything on the element's
286     ## attributes and contents are checked.
287 wakaba 1.60 check_end => sub {
288     my ($self, $item, $element_state) = @_;
289 wakaba 1.82 ## NOTE: There is a modified copy of the code below for |html:ruby|.
290 wakaba 1.60 if ($element_state->{has_significant}) {
291 wakaba 1.66 $item->{real_parent_state}->{has_significant} = 1;
292 wakaba 1.60 }
293     },
294     );
295    
296     our $ElementDefault = {
297     %AnyChecker,
298 wakaba 1.70 status => FEATURE_ALLOWED,
299     ## NOTE: No "element not defined" error - it is not supported anyway.
300 wakaba 1.60 check_start => sub {
301     my ($self, $item, $element_state) = @_;
302 wakaba 1.83 $self->{onerror}->(node => $item->{node},
303     type => 'unknown element',
304     level => $self->{level}->{uncertain});
305 wakaba 1.60 },
306 wakaba 1.1 };
307    
308 wakaba 1.60 our $HTMLEmbeddedContent = {
309     ## NOTE: All embedded content is also phrasing content.
310     $HTML_NS => {
311     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
312     canvas => 1,
313     },
314     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
315     q<http://www.w3.org/2000/svg> => {svg => 1},
316     ## NOTE: Foreign elements with content (but no metadata) are
317     ## embedded content.
318     };
319    
320 wakaba 1.95 our $IsInHTMLInteractiveContent = sub {
321     my ($el, $nsuri, $ln) = @_;
322    
323     ## NOTE: This CODE returns whether an element that is conditionally
324     ## categorizzed as an interactive content is currently in that
325     ## condition or not. See $HTMLInteractiveContent list defined in
326     ## Whatpm::ContentChecler::HTML for the list of all (conditionally
327     ## or permanently) interactive content.
328    
329     if ($nsuri eq $HTML_NS and ($ln eq 'video' or $ln eq 'audio')) {
330     return $el->has_attribute ('controls');
331     } elsif ($nsuri eq $HTML_NS and $ln eq 'menu') {
332     my $value = $el->get_attribute ('type');
333     $value =~ tr/A-Z/a-z/; # ASCII case-insensitive
334     return ($value eq 'toolbar');
335     } else {
336     return 1;
337     }
338     }; # $IsInHTMLInteractiveContent
339    
340 wakaba 1.7 my $HTMLTransparentElements = {
341 wakaba 1.90 $HTML_NS => {qw/ins 1 del 1 font 1 noscript 1 canvas 1 a 1/},
342 wakaba 1.29 ## NOTE: |html:noscript| is transparent if scripting is disabled
343     ## and not in |head|.
344 wakaba 1.7 };
345    
346 wakaba 1.61 my $HTMLSemiTransparentElements = {
347     $HTML_NS => {object => 1, video => 1, audio => 1},
348     };
349 wakaba 1.57
350 wakaba 1.42 our $Element = {};
351 wakaba 1.7
352 wakaba 1.73 $Element->{q<http://www.w3.org/1999/02/22-rdf-syntax-ns#>}->{RDF} = {
353     %AnyChecker,
354     status => FEATURE_STATUS_REC | FEATURE_ALLOWED,
355     is_root => 1, ## ISSUE: Not explicitly allowed for non application/rdf+xml
356     check_start => sub {
357     my ($self, $item, $element_state) = @_;
358     my $triple = [];
359     push @{$self->{return}->{rdf}}, [$item->{node}, $triple];
360     require Whatpm::RDFXML;
361     my $rdf = Whatpm::RDFXML->new;
362 wakaba 1.75 ## TODO: Should we make bnodeid unique in a document?
363 wakaba 1.73 $rdf->{onerror} = $self->{onerror};
364 wakaba 1.84 $rdf->{level} = $self->{level};
365 wakaba 1.73 $rdf->{ontriple} = sub {
366     my %opt = @_;
367     push @$triple,
368     [$opt{node}, $opt{subject}, $opt{predicate}, $opt{object}];
369 wakaba 1.74 if (defined $opt{id}) {
370     push @$triple,
371     [$opt{node},
372     $opt{id},
373     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#subject>},
374     $opt{subject}];
375     push @$triple,
376     [$opt{node},
377     $opt{id},
378     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate>},
379     $opt{predicate}];
380     push @$triple,
381     [$opt{node},
382     $opt{id},
383     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#object>},
384     $opt{object}];
385     push @$triple,
386     [$opt{node},
387     $opt{id},
388     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>},
389     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement>}];
390     }
391 wakaba 1.73 };
392     $rdf->convert_rdf_element ($item->{node});
393     },
394     };
395    
396 wakaba 1.83 my $default_error_level = {
397     must => 'm',
398     should => 's',
399     warn => 'w',
400     good => 'w',
401 wakaba 1.93 undefined => 'w',
402 wakaba 1.83 info => 'i',
403 wakaba 1.85
404 wakaba 1.83 uncertain => 'u',
405    
406 wakaba 1.84 html4_fact => 'm',
407 wakaba 1.89 html5_no_may => 'm',
408 wakaba 1.101 html5_fact => 'm',
409 wakaba 1.89
410 wakaba 1.83 xml_error => 'm', ## TODO: correct?
411 wakaba 1.88 xml_id_error => 'm', ## TODO: ?
412 wakaba 1.83 nc => 'm', ## XML Namespace Constraints ## TODO: correct?
413 wakaba 1.84
414 wakaba 1.91 ## |Whatpm::URIChecker|
415 wakaba 1.85 uri_syntax => 'm',
416     uri_fact => 'm',
417     uri_lc_must => 'm',
418     uri_lc_should => 'w',
419    
420 wakaba 1.91 ## |Whatpm::IMTChecker|
421 wakaba 1.87 mime_must => 'm', # lowercase "must"
422     mime_fact => 'm',
423     mime_strongly_discouraged => 'w',
424     mime_discouraged => 'w',
425    
426 wakaba 1.91 ## |Whatpm::LangTag|
427 wakaba 1.85 langtag_fact => 'm',
428    
429 wakaba 1.91 ## |Whatpm::RDFXML|
430 wakaba 1.84 rdf_fact => 'm',
431     rdf_grammer => 'm',
432     rdf_lc_must => 'm',
433 wakaba 1.91
434     ## |Message::Charset::Info| and |Whatpm::Charset::DecodeHandle|
435     charset_variant => 'm',
436     ## An error caused by use of a variant charset that is not conforming
437     ## to the original charset (e.g. use of 0x80 in an ISO-8859-1 document
438     ## which is interpreted as a Windows-1252 document instead).
439     charset_fact => 'm',
440     iso_shall => 'm',
441 wakaba 1.83 };
442    
443 wakaba 1.56 sub check_document ($$$;$) {
444     my ($self, $doc, $onerror, $onsubdoc) = @_;
445 wakaba 1.42 $self = bless {}, $self unless ref $self;
446     $self->{onerror} = $onerror;
447 wakaba 1.56 $self->{onsubdoc} = $onsubdoc || sub {
448     warn "A subdocument is not conformance-checked";
449     };
450 wakaba 1.1
451 wakaba 1.83 $self->{level} ||= $default_error_level;
452 wakaba 1.48
453 wakaba 1.73 ## TODO: If application/rdf+xml, RDF/XML mode should be invoked.
454    
455 wakaba 1.42 my $docel = $doc->document_element;
456     unless (defined $docel) {
457     ## ISSUE: Should we check content of Document node?
458 wakaba 1.83 $onerror->(node => $doc, type => 'no document element',
459     level => $self->{level}->{must});
460 wakaba 1.42 ## ISSUE: Is this non-conforming (to what spec)? Or just a warning?
461     return {
462     class => {},
463     id => {}, table => [], term => {},
464     };
465 wakaba 1.1 }
466    
467 wakaba 1.42 ## ISSUE: Unexpanded entity references and HTML5 conformance
468 wakaba 1.1
469 wakaba 1.42 my $docel_nsuri = $docel->namespace_uri;
470 wakaba 1.92 if (defined $docel_nsuri) {
471     load_ns_module ($docel_nsuri);
472     } else {
473     $docel_nsuri = '';
474     }
475 wakaba 1.42 my $docel_def = $Element->{$docel_nsuri}->{$docel->manakai_local_name} ||
476     $Element->{$docel_nsuri}->{''} ||
477     $ElementDefault;
478     if ($docel_def->{is_root}) {
479     #
480 wakaba 1.50 } elsif ($docel_def->{is_xml_root}) {
481     unless ($doc->manakai_is_html) {
482     #
483     } else {
484 wakaba 1.83 $onerror->(node => $docel, type => 'element not allowed:root:xml',
485     level => $self->{level}->{must});
486 wakaba 1.50 }
487 wakaba 1.42 } else {
488 wakaba 1.83 $onerror->(node => $docel, type => 'element not allowed:root',
489     level => $self->{level}->{must});
490 wakaba 1.1 }
491    
492 wakaba 1.42 ## TODO: Check for other items other than document element
493     ## (second (errorous) element, text nodes, PI nodes, doctype nodes)
494 wakaba 1.2
495 wakaba 1.56 my $return = $self->check_element ($docel, $onerror, $onsubdoc);
496 wakaba 1.51
497 wakaba 1.52 ## TODO: Test for these checks are necessary.
498 wakaba 1.51 my $charset_name = $doc->input_encoding;
499     if (defined $charset_name) {
500     require Message::Charset::Info;
501     my $charset = $Message::Charset::Info::IANACharset->{$charset_name};
502    
503 wakaba 1.71 if ($doc->manakai_is_html) {
504     if (not $doc->manakai_has_bom and
505     not defined $doc->manakai_charset) {
506     unless ($charset->{is_html_ascii_superset}) {
507 wakaba 1.86 $onerror->(node => $doc,
508     level => $self->{level}->{must},
509 wakaba 1.83 type => 'non ascii superset',
510     text => $charset_name);
511 wakaba 1.71 }
512    
513     if (not $self->{has_charset} and ## TODO: This does not work now.
514     not $charset->{iana_names}->{'us-ascii'}) {
515 wakaba 1.86 $onerror->(node => $doc,
516     level => $self->{level}->{must},
517 wakaba 1.83 type => 'no character encoding declaration',
518     text => $charset_name);
519 wakaba 1.71 }
520 wakaba 1.51 }
521 wakaba 1.71
522     if ($charset->{iana_names}->{'utf-8'}) {
523     #
524     } elsif ($charset->{iana_names}->{'jis_x0212-1990'} or
525     $charset->{iana_names}->{'x-jis0208'} or
526     $charset->{iana_names}->{'utf-32'} or ## ISSUE: UTF-32BE? UTF-32LE?
527 wakaba 1.91 ($charset->{category} & Message::Charset::Info::CHARSET_CATEGORY_EBCDIC ())) {
528 wakaba 1.71 $onerror->(node => $doc,
529 wakaba 1.83 type => 'bad character encoding',
530     text => $charset_name,
531     level => $self->{level}->{should},
532     layer => 'encode');
533 wakaba 1.71 } elsif ($charset->{iana_names}->{'cesu-8'} or
534     $charset->{iana_names}->{'utf-8'} or ## ISSUE: UNICODE-1-1-UTF-7?
535     $charset->{iana_names}->{'bocu-1'} or
536     $charset->{iana_names}->{'scsu'}) {
537     $onerror->(node => $doc,
538 wakaba 1.83 type => 'disallowed character encoding',
539     text => $charset_name,
540     level => $self->{level}->{must},
541     layer => 'encode');
542 wakaba 1.71 } else {
543     $onerror->(node => $doc,
544 wakaba 1.83 type => 'non-utf-8 character encoding',
545     text => $charset_name,
546     level => $self->{level}->{good},
547     layer => 'encode');
548 wakaba 1.51 }
549     }
550 wakaba 1.52 } elsif ($doc->manakai_is_html) {
551     ## NOTE: MUST and SHOULD requirements above cannot be tested,
552     ## since the document has no input charset encoding information.
553     $onerror->(node => $doc,
554 wakaba 1.83 type => 'character encoding unchecked',
555     level => $self->{level}->{info},
556     layer => 'encode');
557 wakaba 1.51 }
558    
559     return $return;
560 wakaba 1.42 } # check_document
561 wakaba 1.1
562 wakaba 1.81 ## Check an element. The element is checked as if it is an orphan node (i.e.
563     ## an element without a parent node).
564 wakaba 1.56 sub check_element ($$$;$) {
565     my ($self, $el, $onerror, $onsubdoc) = @_;
566 wakaba 1.42 $self = bless {}, $self unless ref $self;
567     $self->{onerror} = $onerror;
568 wakaba 1.56 $self->{onsubdoc} = $onsubdoc || sub {
569     warn "A subdocument is not conformance-checked";
570     };
571 wakaba 1.2
572 wakaba 1.83 $self->{level} ||= $default_error_level;
573 wakaba 1.48
574 wakaba 1.61 $self->{plus_elements} = {};
575     $self->{minus_elements} = {};
576 wakaba 1.42 $self->{id} = {};
577 wakaba 1.99 $self->{id_type} = {}; # 'form' / 'labelable' / 'menu'
578     $self->{form} = {}; # form/@name
579 wakaba 1.100 $self->{idref} = []; # @form, @for, @contextmenu
580 wakaba 1.42 $self->{term} = {};
581     $self->{usemap} = [];
582 wakaba 1.78 $self->{ref} = []; # datetemplate data references
583     $self->{template} = []; # datatemplate template references
584 wakaba 1.42 $self->{map} = {};
585     $self->{has_link_type} = {};
586 wakaba 1.60 $self->{flag} = {};
587 wakaba 1.46 #$self->{has_uri_attr};
588     #$self->{has_hyperlink_element};
589 wakaba 1.51 #$self->{has_charset};
590 wakaba 1.57 #$self->{has_base};
591 wakaba 1.42 $self->{return} = {
592     class => {},
593 wakaba 1.80 id => $self->{id},
594     table => [], # table objects returned by Whatpm::HTMLTable
595     term => $self->{term},
596 wakaba 1.76 uri => {}, # URIs other than those in RDF triples
597     ## TODO: xmlns="", SYSTEM "", atom:* src="", xml:base=""
598 wakaba 1.73 rdf => [],
599 wakaba 1.42 };
600 wakaba 1.4
601 wakaba 1.60 my @item = ({type => 'element', node => $el, parent_state => {}});
602 wakaba 1.66 $item[-1]->{real_parent_state} = $item[-1]->{parent_state};
603 wakaba 1.60 while (@item) {
604     my $item = shift @item;
605     if (ref $item eq 'ARRAY') {
606     my $code = shift @$item;
607     next unless $code;## TODO: temp.
608     $code->(@$item);
609     } elsif ($item->{type} eq 'element') {
610     my $el_nsuri = $item->{node}->namespace_uri;
611 wakaba 1.92 if (defined $el_nsuri) {
612     load_ns_module ($el_nsuri);
613     } else {
614     $el_nsuri = '';
615     }
616 wakaba 1.60 my $el_ln = $item->{node}->manakai_local_name;
617 wakaba 1.92
618 wakaba 1.63 my $element_state = {};
619 wakaba 1.60 my $eldef = $Element->{$el_nsuri}->{$el_ln} ||
620     $Element->{$el_nsuri}->{''} ||
621 wakaba 1.42 $ElementDefault;
622 wakaba 1.61 my $content_def = $item->{transparent}
623     ? $item->{parent_def} || $eldef : $eldef;
624 wakaba 1.63 my $content_state = $item->{transparent}
625 wakaba 1.65 ? $item->{parent_def}
626     ? $item->{parent_state} || $element_state : $element_state
627     : $element_state;
628 wakaba 1.60
629 wakaba 1.67 unless ($eldef->{status} & FEATURE_STATUS_REC) {
630     my $status = $eldef->{status} & FEATURE_STATUS_CR ? 'cr' :
631     $eldef->{status} & FEATURE_STATUS_LC ? 'lc' :
632     $eldef->{status} & FEATURE_STATUS_WD ? 'wd' : 'non-standard';
633     $self->{onerror}->(node => $item->{node},
634     type => 'status:'.$status.':element',
635 wakaba 1.83 level => $self->{level}->{info});
636 wakaba 1.67 }
637 wakaba 1.70 if (not ($eldef->{status} & FEATURE_ALLOWED)) {
638     $self->{onerror}->(node => $item->{node},
639     type => 'element not defined',
640 wakaba 1.83 level => $self->{level}->{must});
641 wakaba 1.70 } elsif ($eldef->{status} & FEATURE_DEPRECATED_SHOULD) {
642     $self->{onerror}->(node => $item->{node},
643     type => 'deprecated:element',
644 wakaba 1.83 level => $self->{level}->{should});
645 wakaba 1.70 } elsif ($eldef->{status} & FEATURE_DEPRECATED_INFO) {
646     $self->{onerror}->(node => $item->{node},
647     type => 'deprecated:element',
648 wakaba 1.83 level => $self->{level}->{info});
649 wakaba 1.70 }
650 wakaba 1.67
651 wakaba 1.60 my @new_item;
652     push @new_item, [$eldef->{check_start}, $self, $item, $element_state];
653     push @new_item, [$eldef->{check_attrs}, $self, $item, $element_state];
654 wakaba 1.61
655 wakaba 1.60 my @child = @{$item->{node}->child_nodes};
656     while (@child) {
657     my $child = shift @child;
658     my $child_nt = $child->node_type;
659     if ($child_nt == 1) { # ELEMENT_NODE
660     my $child_nsuri = $child->namespace_uri;
661     $child_nsuri = '' unless defined $child_nsuri;
662     my $child_ln = $child->manakai_local_name;
663     if ($HTMLTransparentElements->{$child_nsuri}->{$child_ln} and
664     not (($self->{flag}->{in_head} or
665 wakaba 1.61 ($el_nsuri eq $HTML_NS and $el_ln eq 'head')) and
666     $child_nsuri eq $HTML_NS and $child_ln eq 'noscript')) {
667 wakaba 1.60 push @new_item, [$content_def->{check_child_element},
668     $self, $item, $child,
669 wakaba 1.66 $child_nsuri, $child_ln, 1,
670     $content_state, $element_state];
671 wakaba 1.60 push @new_item, {type => 'element', node => $child,
672 wakaba 1.65 parent_state => $content_state,
673 wakaba 1.61 parent_def => $content_def,
674 wakaba 1.66 real_parent_state => $element_state,
675 wakaba 1.60 transparent => 1};
676     } else {
677 wakaba 1.65 if ($item->{parent_def} and # has parent
678     $el_nsuri eq $HTML_NS) { ## $HTMLSemiTransparentElements
679 wakaba 1.61 if ($el_ln eq 'object') {
680     if ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
681     #
682     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
683     #
684     } else {
685 wakaba 1.62 $content_def = $item->{parent_def} || $content_def;
686 wakaba 1.63 $content_state = $item->{parent_state} || $content_state;
687 wakaba 1.62 }
688     } elsif ($el_ln eq 'video' or $el_ln eq 'audio') {
689     if ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
690     #
691     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
692     $element_state->{has_source} = 1;
693     } else {
694     $content_def = $item->{parent_def} || $content_def;
695 wakaba 1.63 $content_state = $item->{parent_state} || $content_state;
696 wakaba 1.61 }
697     }
698     }
699    
700 wakaba 1.60 push @new_item, [$content_def->{check_child_element},
701     $self, $item, $child,
702 wakaba 1.64 $child_nsuri, $child_ln,
703     $HTMLSemiTransparentElements
704     ->{$child_nsuri}->{$child_ln},
705 wakaba 1.66 $content_state, $element_state];
706 wakaba 1.60 push @new_item, {type => 'element', node => $child,
707 wakaba 1.65 parent_def => $content_def,
708 wakaba 1.66 real_parent_state => $element_state,
709 wakaba 1.65 parent_state => $content_state};
710 wakaba 1.60 }
711    
712     if ($HTMLEmbeddedContent->{$child_nsuri}->{$child_ln}) {
713     $element_state->{has_significant} = 1;
714     }
715     } elsif ($child_nt == 3 or # TEXT_NODE
716     $child_nt == 4) { # CDATA_SECTION_NODE
717 wakaba 1.96 my $has_significant = ($child->data =~ /[^\x09\x0A\x0C\x0D\x20]/);
718 wakaba 1.60 push @new_item, [$content_def->{check_child_text},
719     $self, $item, $child, $has_significant,
720 wakaba 1.66 $content_state, $element_state];
721     $element_state->{has_significant} ||= $has_significant;
722 wakaba 1.61 if ($has_significant and
723     $HTMLSemiTransparentElements->{$el_nsuri}->{$el_ln}) {
724     $content_def = $item->{parent_def} || $content_def;
725     }
726 wakaba 1.60 } elsif ($child_nt == 5) { # ENTITY_REFERENCE_NODE
727     push @child, @{$child->child_nodes};
728 wakaba 1.1 }
729 wakaba 1.60 ## TODO: PI_NODE
730     ## TODO: Unknown node type
731 wakaba 1.1 }
732 wakaba 1.60
733     push @new_item, [$eldef->{check_end}, $self, $item, $element_state];
734    
735     unshift @item, @new_item;
736 wakaba 1.30 } else {
737 wakaba 1.60 die "$0: Internal error: Unsupported checking action type |$item->{type}|";
738 wakaba 1.4 }
739 wakaba 1.1 }
740 wakaba 1.17
741 wakaba 1.78 for (@{$self->{template}}) {
742     ## TODO: If the document is an XML document, ...
743     ## NOTE: If the document is an HTML document:
744     ## ISSUE: We need to percent-decode?
745     F: {
746     if ($self->{id}->{$_->[0]}) {
747     my $el = $self->{id}->{$_->[0]}->[0]->owner_element;
748     if ($el->node_type == 1 and # ELEMENT_NODE
749     $el->manakai_local_name eq 'datatemplate') {
750     my $nsuri = $el->namespace_uri;
751     if (defined $nsuri and $nsuri eq $HTML_NS) {
752     if ($el eq $_->[1]->owner_element) {
753     $self->{onerror}->(node => $_->[1],
754     type => 'fragment points itself',
755 wakaba 1.83 level => $self->{level}->{must});
756 wakaba 1.78 }
757    
758     last F;
759     }
760     }
761     }
762     ## TODO: Should we raise a "fragment points nothing" error instead
763     ## if the fragment identifier identifies no element?
764    
765     $self->{onerror}->(node => $_->[1], type => 'template:not template',
766 wakaba 1.83 level => $self->{level}->{must});
767 wakaba 1.78 } # F
768     }
769    
770     for (@{$self->{ref}}) {
771     ## TOOD: If XML
772     ## NOTE: If it is an HTML document:
773     if ($_->[0] eq '') {
774     ## NOTE: It points the top of the document.
775     } elsif ($self->{id}->{$_->[0]}) {
776     if ($self->{id}->{$_->[0]}->[0]->owner_element
777     eq $_->[1]->owner_element) {
778     $self->{onerror}->(node => $_->[1], type => 'fragment points itself',
779 wakaba 1.83 level => $self->{level}->{must});
780 wakaba 1.78 }
781     } else {
782     $self->{onerror}->(node => $_->[1], type => 'fragment points nothing',
783 wakaba 1.83 level => $self->{level}->{must});
784 wakaba 1.78 }
785     }
786    
787     ## TODO: Maybe we should have $document->manakai_get_by_fragment or something
788    
789 wakaba 1.17 for (@{$self->{usemap}}) {
790     unless ($self->{map}->{$_->[0]}) {
791 wakaba 1.83 $self->{onerror}->(node => $_->[1], type => 'no referenced map',
792     level => $self->{level}->{must});
793 wakaba 1.17 }
794     }
795    
796 wakaba 1.100 for (@{$self->{idref}}) {
797     if ($self->{id}->{$_->[1]} and
798     $self->{id_type}->{$_->[1]} eq $_->[0]) {
799 wakaba 1.99 #
800     } else {
801 wakaba 1.100 $self->{onerror}->(node => $_->[2],
802     type => {
803     form => 'no referenced form',
804     labelable => 'no referenced control',
805     menu => 'no referenced menu',
806     }->{$_->[0]},
807 wakaba 1.83 level => $self->{level}->{must});
808 wakaba 1.32 }
809     }
810    
811 wakaba 1.61 delete $self->{plus_elements};
812     delete $self->{minus_elements};
813 wakaba 1.17 delete $self->{onerror};
814     delete $self->{id};
815 wakaba 1.98 delete $self->{id_type};
816 wakaba 1.97 delete $self->{form};
817 wakaba 1.100 delete $self->{idref};
818 wakaba 1.17 delete $self->{usemap};
819 wakaba 1.78 delete $self->{ref};
820     delete $self->{template};
821 wakaba 1.17 delete $self->{map};
822 wakaba 1.33 return $self->{return};
823 wakaba 1.1 } # check_element
824    
825 wakaba 1.60 sub _add_minus_elements ($$@) {
826     my $self = shift;
827     my $element_state = shift;
828     for my $elements (@_) {
829     for my $nsuri (keys %$elements) {
830     for my $ln (keys %{$elements->{$nsuri}}) {
831     unless ($self->{minus_elements}->{$nsuri}->{$ln}) {
832     $element_state->{minus_elements_original}->{$nsuri}->{$ln} = 0;
833     $self->{minus_elements}->{$nsuri}->{$ln} = 1;
834     }
835     }
836     }
837     }
838     } # _add_minus_elements
839    
840     sub _remove_minus_elements ($$) {
841     my $self = shift;
842     my $element_state = shift;
843     for my $nsuri (keys %{$element_state->{minus_elements_original}}) {
844     for my $ln (keys %{$element_state->{minus_elements_original}->{$nsuri}}) {
845     delete $self->{minus_elements}->{$nsuri}->{$ln};
846     }
847     }
848     } # _remove_minus_elements
849    
850     sub _add_plus_elements ($$@) {
851     my $self = shift;
852     my $element_state = shift;
853     for my $elements (@_) {
854     for my $nsuri (keys %$elements) {
855     for my $ln (keys %{$elements->{$nsuri}}) {
856     unless ($self->{plus_elements}->{$nsuri}->{$ln}) {
857     $element_state->{plus_elements_original}->{$nsuri}->{$ln} = 0;
858     $self->{plus_elements}->{$nsuri}->{$ln} = 1;
859     }
860     }
861     }
862     }
863     } # _add_plus_elements
864    
865     sub _remove_plus_elements ($$) {
866     my $self = shift;
867     my $element_state = shift;
868     for my $nsuri (keys %{$element_state->{plus_elements_original}}) {
869     for my $ln (keys %{$element_state->{plus_elements_original}->{$nsuri}}) {
870     delete $self->{plus_elements}->{$nsuri}->{$ln};
871     }
872     }
873     } # _remove_plus_elements
874    
875 wakaba 1.68 sub _attr_status_info ($$$) {
876     my ($self, $attr, $status_code) = @_;
877 wakaba 1.70
878     if (not ($status_code & FEATURE_ALLOWED)) {
879     $self->{onerror}->(node => $attr,
880     type => 'attribute not defined',
881 wakaba 1.83 level => $self->{level}->{must});
882 wakaba 1.70 } elsif ($status_code & FEATURE_DEPRECATED_SHOULD) {
883     $self->{onerror}->(node => $attr,
884     type => 'deprecated:attr',
885 wakaba 1.83 level => $self->{level}->{should});
886 wakaba 1.70 } elsif ($status_code & FEATURE_DEPRECATED_INFO) {
887     $self->{onerror}->(node => $attr,
888     type => 'deprecated:attr',
889 wakaba 1.83 level => $self->{level}->{info});
890 wakaba 1.70 }
891    
892 wakaba 1.68 my $status;
893     if ($status_code & FEATURE_STATUS_REC) {
894     return;
895     } elsif ($status_code & FEATURE_STATUS_CR) {
896     $status = 'cr';
897     } elsif ($status_code & FEATURE_STATUS_LC) {
898     $status = 'lc';
899     } elsif ($status_code & FEATURE_STATUS_WD) {
900     $status = 'wd';
901     } else {
902     $status = 'non-standard';
903     }
904     $self->{onerror}->(node => $attr,
905     type => 'status:'.$status.':attr',
906 wakaba 1.83 level => $self->{level}->{info});
907 wakaba 1.68 } # _attr_status_info
908    
909 wakaba 1.2 sub _add_minuses ($@) {
910     my $self = shift;
911     my $r = {};
912     for my $list (@_) {
913     for my $ns (keys %$list) {
914     for my $ln (keys %{$list->{$ns}}) {
915     unless ($self->{minuses}->{$ns}->{$ln}) {
916     $self->{minuses}->{$ns}->{$ln} = 1;
917     $r->{$ns}->{$ln} = 1;
918     }
919     }
920     }
921     }
922 wakaba 1.4 return {type => 'plus', list => $r};
923 wakaba 1.2 } # _add_minuses
924    
925 wakaba 1.50 sub _add_pluses ($@) {
926     my $self = shift;
927     my $r = {};
928     for my $list (@_) {
929     for my $ns (keys %$list) {
930     for my $ln (keys %{$list->{$ns}}) {
931     unless ($self->{pluses}->{$ns}->{$ln}) {
932     $self->{pluses}->{$ns}->{$ln} = 1;
933     $r->{$ns}->{$ln} = 1;
934     }
935     }
936     }
937     }
938     return {type => 'minus', list => $r};
939     } # _add_pluses
940    
941 wakaba 1.2 sub _remove_minuses ($$) {
942 wakaba 1.4 my ($self, $todo) = @_;
943 wakaba 1.50 if ($todo->{type} eq 'minus') {
944     for my $ns (keys %{$todo->{list}}) {
945     for my $ln (keys %{$todo->{list}->{$ns}}) {
946     delete $self->{pluses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
947     }
948 wakaba 1.2 }
949 wakaba 1.50 } elsif ($todo->{type} eq 'plus') {
950     for my $ns (keys %{$todo->{list}}) {
951     for my $ln (keys %{$todo->{list}->{$ns}}) {
952     delete $self->{minuses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
953     }
954     }
955     } else {
956     die "$0: Unknown +- type: $todo->{type}";
957 wakaba 1.2 }
958     1;
959     } # _remove_minuses
960    
961 wakaba 1.50 ## NOTE: Priority for "minuses" and "pluses" are currently left
962     ## undefined and implemented inconsistently; it is not a problem for
963     ## now, since no element belongs to both lists.
964    
965 wakaba 1.30 sub _check_get_children ($$$) {
966     my ($self, $node, $parent_todo) = @_;
967 wakaba 1.4 my $new_todos = [];
968 wakaba 1.2 my $sib = [];
969     TP: {
970     my $node_ns = $node->namespace_uri;
971     $node_ns = '' unless defined $node_ns;
972     my $node_ln = $node->manakai_local_name;
973 wakaba 1.45 if ($HTMLTransparentElements->{$node_ns}->{$node_ln}) {
974     if ($node_ns eq $HTML_NS and $node_ln eq 'noscript') {
975     if ($parent_todo->{flag}->{in_head}) {
976     #
977     } else {
978     my $end = $self->_add_minuses ({$HTML_NS, {noscript => 1}});
979     push @$sib, $end;
980    
981     unshift @$sib, @{$node->child_nodes};
982     push @$new_todos, {type => 'element-attributes', node => $node};
983     last TP;
984     }
985 wakaba 1.58 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'del') {
986     my $sig_flag = $parent_todo->{flag}->{has_descendant}->{significant};
987     unshift @$sib, @{$node->child_nodes};
988     push @$new_todos, {type => 'element-attributes', node => $node};
989     push @$new_todos,
990     {type => 'code',
991     code => sub {
992     $parent_todo->{flag}->{has_descendant}->{significant} = 0
993     if not $sig_flag;
994     }};
995     last TP;
996 wakaba 1.45 } else {
997     unshift @$sib, @{$node->child_nodes};
998     push @$new_todos, {type => 'element-attributes', node => $node};
999     last TP;
1000 wakaba 1.2 }
1001     }
1002 wakaba 1.8 if ($node_ns eq $HTML_NS and ($node_ln eq 'video' or $node_ln eq 'audio')) {
1003 wakaba 1.2 if ($node->has_attribute_ns (undef, 'src')) {
1004     unshift @$sib, @{$node->child_nodes};
1005 wakaba 1.9 push @$new_todos, {type => 'element-attributes', node => $node};
1006 wakaba 1.2 last TP;
1007     } else {
1008     my @cn = @{$node->child_nodes};
1009     CN: while (@cn) {
1010     my $cn = shift @cn;
1011     my $cnt = $cn->node_type;
1012     if ($cnt == 1) {
1013 wakaba 1.8 my $cn_nsuri = $cn->namespace_uri;
1014     $cn_nsuri = '' unless defined $cn_nsuri;
1015     if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'source') {
1016 wakaba 1.2 #
1017     } else {
1018     last CN;
1019     }
1020     } elsif ($cnt == 3 or $cnt == 4) {
1021 wakaba 1.96 if ($cn->data =~ /[^\x09\x0A\x0C\x0D\x20]/) {
1022 wakaba 1.2 last CN;
1023     }
1024     }
1025     } # CN
1026     unshift @$sib, @cn;
1027     }
1028 wakaba 1.57 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'object') {
1029     my @cn = @{$node->child_nodes};
1030     CN: while (@cn) {
1031     my $cn = shift @cn;
1032     my $cnt = $cn->node_type;
1033     if ($cnt == 1) {
1034     my $cn_nsuri = $cn->namespace_uri;
1035     $cn_nsuri = '' unless defined $cn_nsuri;
1036     if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'param') {
1037     #
1038     } else {
1039     last CN;
1040     }
1041     } elsif ($cnt == 3 or $cnt == 4) {
1042 wakaba 1.96 if ($cn->data =~ /[^\x09\x0A\x0C\x0D\x20]/) {
1043 wakaba 1.57 last CN;
1044     }
1045     }
1046     } # CN
1047     unshift @$sib, @cn;
1048 wakaba 1.2 }
1049 wakaba 1.4 push @$new_todos, {type => 'element', node => $node};
1050 wakaba 1.2 } # TP
1051 wakaba 1.30
1052     for my $new_todo (@$new_todos) {
1053     $new_todo->{flag} = {%{$parent_todo->{flag} or {}}};
1054     }
1055    
1056 wakaba 1.4 return ($sib, $new_todos);
1057 wakaba 1.2 } # _check_get_children
1058    
1059 wakaba 1.44 =head1 LICENSE
1060    
1061 wakaba 1.56 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
1062 wakaba 1.44
1063     This library is free software; you can redistribute it
1064     and/or modify it under the same terms as Perl itself.
1065    
1066     =cut
1067    
1068 wakaba 1.1 1;
1069 wakaba 1.101 # $Date: 2008/09/22 06:48:03 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24