/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.77 - (hide annotations) (download)
Sat Apr 12 10:41:30 2008 UTC (16 years, 6 months ago) by wakaba
Branch: MAIN
Changes since 1.76: +2 -4 lines
++ whatpm/t/ChangeLog	12 Apr 2008 10:41:08 -0000
	* HTML-tokenizer.t: Remove "self-closing flag" if the start
	tag token is that of a slash permitted element (This is necessary
	to maintain compatibility with current test data, since in the
	new algorithm whether slash is permitted or not is decided in
	tree construction stage).

2008-04-12  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	12 Apr 2008 10:38:11 -0000
2008-04-12  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm.src, mkhtmlparser.pl: The way permitted slash errors
	are raised is changed (HTML5 revision 1404).

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3 wakaba 1.77 our $VERSION=do{my @r=(q$Revision: 1.76 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5 wakaba 1.18 require Whatpm::URIChecker;
6    
7 wakaba 1.13 ## ISSUE: How XML and XML Namespaces conformance can (or cannot)
8     ## be applied to an in-memory representation (i.e. DOM)?
9    
10 wakaba 1.50 ## TODO: Conformance of an HTML document with non-html root element.
11    
12 wakaba 1.70 ## Stability
13 wakaba 1.67 sub FEATURE_STATUS_REC () { 0b1 } ## Interoperable standard
14     sub FEATURE_STATUS_CR () { 0b10 } ## Call for implementation
15     sub FEATURE_STATUS_LC () { 0b100 } ## Last call for comments
16     sub FEATURE_STATUS_WD () { 0b1000 } ## Working or editor's draft
17    
18 wakaba 1.70 ## Deprecated
19     sub FEATURE_DEPRECATED_SHOULD () { 0b100000 } ## SHOULD-level
20     sub FEATURE_DEPRECATED_INFO () { 0b1000000 } ## Does not affect conformance
21    
22     ## Conformance
23     sub FEATURE_ALLOWED () { 0b10000 }
24    
25 wakaba 1.42 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
26 wakaba 1.9 my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
27     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
28    
29 wakaba 1.42 my $Namespace = {
30 wakaba 1.43 q<http://www.w3.org/2005/Atom> => {module => 'Whatpm::ContentChecker::Atom'},
31 wakaba 1.72 q<http://purl.org/syndication/history/1.0>
32     => {module => 'Whatpm::ContentChecker::Atom'},
33     q<http://purl.org/syndication/threading/1.0>
34     => {module => 'Whatpm::ContentChecker::Atom'},
35 wakaba 1.42 $HTML_NS => {module => 'Whatpm::ContentChecker::HTML'},
36     $XML_NS => {loaded => 1},
37     $XMLNS_NS => {loaded => 1},
38 wakaba 1.73 q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {loaded => 1},
39 wakaba 1.42 };
40    
41     our $AttrChecker = {
42 wakaba 1.9 $XML_NS => {
43 wakaba 1.13 space => sub {
44     my ($self, $attr) = @_;
45     my $value = $attr->value;
46     if ($value eq 'default' or $value eq 'preserve') {
47     #
48     } else {
49     ## NOTE: An XML "error"
50 wakaba 1.33 $self->{onerror}->(node => $attr, level => 'error',
51     type => 'invalid attribute value');
52 wakaba 1.13 }
53     },
54     lang => sub {
55 wakaba 1.35 my ($self, $attr) = @_;
56 wakaba 1.47 my $value = $attr->value;
57     if ($value eq '') {
58     #
59     } else {
60     require Whatpm::LangTag;
61     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
62     my %opt = @_;
63     my $type = 'LangTag:'.$opt{type};
64     $type .= ':' . $opt{subtag} if defined $opt{subtag};
65     $self->{onerror}->(node => $attr, type => $type,
66     value => $opt{value}, level => $opt{level});
67     });
68     }
69    
70 wakaba 1.13 ## NOTE: "The values of the attribute are language identifiers
71     ## as defined by [IETF RFC 3066], Tags for the Identification
72     ## of Languages, or its successor; in addition, the empty string
73     ## may be specified." ("may" in lower case)
74 wakaba 1.47 ## NOTE: Is an RFC 3066-valid (but RFC 4647-invalid) language tag
75     ## allowed today?
76    
77     ## TODO: test data
78    
79 wakaba 1.35 if ($attr->owner_document->manakai_is_html) { # MUST NOT
80 wakaba 1.36 $self->{onerror}->(node => $attr, type => 'in HTML:xml:lang');
81 wakaba 1.35 ## TODO: Test data...
82     }
83 wakaba 1.13 },
84     base => sub {
85     my ($self, $attr) = @_;
86     my $value = $attr->value;
87     if ($value =~ /[^\x{0000}-\x{10FFFF}]/) { ## ISSUE: Should we disallow noncharacters?
88     $self->{onerror}->(node => $attr,
89 wakaba 1.33 type => 'invalid attribute value');
90 wakaba 1.13 }
91 wakaba 1.18 ## NOTE: Conformance to URI standard is not checked since there is
92     ## no author requirement on conformance in the XML Base specification.
93 wakaba 1.13 },
94     id => sub {
95     my ($self, $attr) = @_;
96     my $value = $attr->value;
97     $value =~ s/[\x09\x0A\x0D\x20]+/ /g;
98     $value =~ s/^\x20//;
99     $value =~ s/\x20$//;
100     ## TODO: NCName in XML 1.0 or 1.1
101     ## TODO: declared type is ID?
102 wakaba 1.33 if ($self->{id}->{$value}) { ## NOTE: An xml:id error
103     $self->{onerror}->(node => $attr, level => 'error',
104     type => 'duplicate ID');
105 wakaba 1.37 push @{$self->{id}->{$value}}, $attr;
106 wakaba 1.13 } else {
107 wakaba 1.37 $self->{id}->{$value} = [$attr];
108 wakaba 1.13 }
109     },
110 wakaba 1.9 },
111     $XMLNS_NS => {
112 wakaba 1.13 '' => sub {
113     my ($self, $attr) = @_;
114     my $ln = $attr->manakai_local_name;
115     my $value = $attr->value;
116     if ($value eq $XML_NS and $ln ne 'xml') {
117     $self->{onerror}
118 wakaba 1.33 ->(node => $attr, level => 'NC',
119     type => 'Reserved Prefixes and Namespace Names:=xml');
120 wakaba 1.13 } elsif ($value eq $XMLNS_NS) {
121     $self->{onerror}
122 wakaba 1.33 ->(node => $attr, level => 'NC',
123     type => 'Reserved Prefixes and Namespace Names:=xmlns');
124 wakaba 1.13 }
125     if ($ln eq 'xml' and $value ne $XML_NS) {
126     $self->{onerror}
127 wakaba 1.33 ->(node => $attr, level => 'NC',
128     type => 'Reserved Prefixes and Namespace Names:xmlns:xml=');
129 wakaba 1.13 } elsif ($ln eq 'xmlns') {
130     $self->{onerror}
131 wakaba 1.33 ->(node => $attr, level => 'NC',
132     type => 'Reserved Prefixes and Namespace Names:xmlns:xmlns=');
133 wakaba 1.13 }
134     ## TODO: If XML 1.0 and empty
135     },
136     xmlns => sub {
137     my ($self, $attr) = @_;
138     ## TODO: In XML 1.0, URI reference [RFC 3986] or an empty string
139     ## TODO: In XML 1.1, IRI reference [RFC 3987] or an empty string
140 wakaba 1.18 ## TODO: relative references are deprecated
141 wakaba 1.13 my $value = $attr->value;
142     if ($value eq $XML_NS) {
143     $self->{onerror}
144 wakaba 1.33 ->(node => $attr, level => 'NC',
145     type => 'Reserved Prefixes and Namespace Names:=xml');
146 wakaba 1.13 } elsif ($value eq $XMLNS_NS) {
147     $self->{onerror}
148 wakaba 1.33 ->(node => $attr, level => 'NC',
149     type => 'Reserved Prefixes and Namespace Names:=xmlns');
150 wakaba 1.13 }
151     },
152 wakaba 1.9 },
153     };
154    
155 wakaba 1.14 ## ISSUE: Should we really allow these attributes?
156 wakaba 1.13 $AttrChecker->{''}->{'xml:space'} = $AttrChecker->{$XML_NS}->{space};
157     $AttrChecker->{''}->{'xml:lang'} = $AttrChecker->{$XML_NS}->{lang};
158     $AttrChecker->{''}->{'xml:base'} = $AttrChecker->{$XML_NS}->{base};
159     $AttrChecker->{''}->{'xml:id'} = $AttrChecker->{$XML_NS}->{id};
160    
161 wakaba 1.60 our %AnyChecker = (
162     check_start => sub { },
163     check_attrs => sub {
164     my ($self, $item, $element_state) = @_;
165     for my $attr (@{$item->{node}->attributes}) {
166 wakaba 1.9 my $attr_ns = $attr->namespace_uri;
167     $attr_ns = '' unless defined $attr_ns;
168     my $attr_ln = $attr->manakai_local_name;
169     my $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
170 wakaba 1.60 || $AttrChecker->{$attr_ns}->{''};
171 wakaba 1.9 if ($checker) {
172     $checker->($self, $attr);
173 wakaba 1.17 } else {
174 wakaba 1.33 $self->{onerror}->(node => $attr, level => 'unsupported',
175     type => 'attribute');
176 wakaba 1.9 }
177     }
178     },
179 wakaba 1.60 check_child_element => sub {
180     my ($self, $item, $child_el, $child_nsuri, $child_ln,
181     $child_is_transparent, $element_state) = @_;
182     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
183     $self->{onerror}->(node => $child_el,
184     type => 'element not allowed:minus',
185     level => $self->{must_level});
186     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
187     #
188     } else {
189     #
190     }
191     },
192     check_child_text => sub { },
193     check_end => sub {
194     my ($self, $item, $element_state) = @_;
195     if ($element_state->{has_significant}) {
196 wakaba 1.66 $item->{real_parent_state}->{has_significant} = 1;
197 wakaba 1.60 }
198     },
199     );
200    
201     our $ElementDefault = {
202     %AnyChecker,
203 wakaba 1.70 status => FEATURE_ALLOWED,
204     ## NOTE: No "element not defined" error - it is not supported anyway.
205 wakaba 1.60 check_start => sub {
206     my ($self, $item, $element_state) = @_;
207     $self->{onerror}->(node => $item->{node}, level => 'unsupported',
208     type => 'element');
209     },
210 wakaba 1.1 };
211    
212 wakaba 1.60 our $HTMLEmbeddedContent = {
213     ## NOTE: All embedded content is also phrasing content.
214     $HTML_NS => {
215     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
216     canvas => 1,
217     },
218     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
219     q<http://www.w3.org/2000/svg> => {svg => 1},
220     ## NOTE: Foreign elements with content (but no metadata) are
221     ## embedded content.
222     };
223    
224 wakaba 1.7 my $HTMLTransparentElements = {
225 wakaba 1.57 $HTML_NS => {qw/ins 1 del 1 font 1 noscript 1 canvas 1/},
226 wakaba 1.29 ## NOTE: |html:noscript| is transparent if scripting is disabled
227     ## and not in |head|.
228 wakaba 1.7 };
229    
230 wakaba 1.61 my $HTMLSemiTransparentElements = {
231     $HTML_NS => {object => 1, video => 1, audio => 1},
232     };
233 wakaba 1.57
234 wakaba 1.42 our $Element = {};
235 wakaba 1.7
236 wakaba 1.73 $Element->{q<http://www.w3.org/1999/02/22-rdf-syntax-ns#>}->{RDF} = {
237     %AnyChecker,
238     status => FEATURE_STATUS_REC | FEATURE_ALLOWED,
239     is_root => 1, ## ISSUE: Not explicitly allowed for non application/rdf+xml
240     check_start => sub {
241     my ($self, $item, $element_state) = @_;
242     my $triple = [];
243     push @{$self->{return}->{rdf}}, [$item->{node}, $triple];
244     require Whatpm::RDFXML;
245     my $rdf = Whatpm::RDFXML->new;
246 wakaba 1.75 ## TODO: Should we make bnodeid unique in a document?
247 wakaba 1.73 $rdf->{onerror} = $self->{onerror};
248     $rdf->{ontriple} = sub {
249     my %opt = @_;
250     push @$triple,
251     [$opt{node}, $opt{subject}, $opt{predicate}, $opt{object}];
252 wakaba 1.74 if (defined $opt{id}) {
253     push @$triple,
254     [$opt{node},
255     $opt{id},
256     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#subject>},
257     $opt{subject}];
258     push @$triple,
259     [$opt{node},
260     $opt{id},
261     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate>},
262     $opt{predicate}];
263     push @$triple,
264     [$opt{node},
265     $opt{id},
266     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#object>},
267     $opt{object}];
268     push @$triple,
269     [$opt{node},
270     $opt{id},
271     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>},
272     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement>}];
273     }
274 wakaba 1.73 };
275     $rdf->convert_rdf_element ($item->{node});
276     },
277     };
278    
279 wakaba 1.56 sub check_document ($$$;$) {
280     my ($self, $doc, $onerror, $onsubdoc) = @_;
281 wakaba 1.42 $self = bless {}, $self unless ref $self;
282     $self->{onerror} = $onerror;
283 wakaba 1.56 $self->{onsubdoc} = $onsubdoc || sub {
284     warn "A subdocument is not conformance-checked";
285     };
286 wakaba 1.1
287 wakaba 1.48 $self->{must_level} = 'm';
288 wakaba 1.76 $self->{fact_level} = 'm';
289 wakaba 1.48 $self->{should_level} = 's';
290 wakaba 1.51 $self->{good_level} = 'w';
291 wakaba 1.67 $self->{info_level} = 'i';
292 wakaba 1.71 $self->{unsupported_level} = 'u';
293 wakaba 1.48
294 wakaba 1.73 ## TODO: If application/rdf+xml, RDF/XML mode should be invoked.
295    
296 wakaba 1.42 my $docel = $doc->document_element;
297     unless (defined $docel) {
298     ## ISSUE: Should we check content of Document node?
299     $onerror->(node => $doc, type => 'no document element');
300     ## ISSUE: Is this non-conforming (to what spec)? Or just a warning?
301     return {
302     class => {},
303     id => {}, table => [], term => {},
304     };
305 wakaba 1.1 }
306    
307 wakaba 1.42 ## ISSUE: Unexpanded entity references and HTML5 conformance
308 wakaba 1.1
309 wakaba 1.42 my $docel_nsuri = $docel->namespace_uri;
310     $docel_nsuri = '' unless defined $docel_nsuri;
311 wakaba 1.43 unless ($Namespace->{$docel_nsuri}->{loaded}) {
312     if ($Namespace->{$docel_nsuri}->{module}) {
313     eval qq{ require $Namespace->{$docel_nsuri}->{module} } or die $@;
314     } else {
315     $Namespace->{$docel_nsuri}->{loaded} = 1;
316     }
317     }
318 wakaba 1.42 my $docel_def = $Element->{$docel_nsuri}->{$docel->manakai_local_name} ||
319     $Element->{$docel_nsuri}->{''} ||
320     $ElementDefault;
321     if ($docel_def->{is_root}) {
322     #
323 wakaba 1.50 } elsif ($docel_def->{is_xml_root}) {
324     unless ($doc->manakai_is_html) {
325     #
326     } else {
327     $onerror->(node => $docel, type => 'element not allowed:root:xml');
328     }
329 wakaba 1.42 } else {
330 wakaba 1.49 $onerror->(node => $docel, type => 'element not allowed:root');
331 wakaba 1.1 }
332    
333 wakaba 1.42 ## TODO: Check for other items other than document element
334     ## (second (errorous) element, text nodes, PI nodes, doctype nodes)
335 wakaba 1.2
336 wakaba 1.56 my $return = $self->check_element ($docel, $onerror, $onsubdoc);
337 wakaba 1.51
338 wakaba 1.52 ## TODO: Test for these checks are necessary.
339 wakaba 1.51 my $charset_name = $doc->input_encoding;
340     if (defined $charset_name) {
341     require Message::Charset::Info;
342     my $charset = $Message::Charset::Info::IANACharset->{$charset_name};
343    
344 wakaba 1.71 if ($doc->manakai_is_html) {
345     if (not $doc->manakai_has_bom and
346     not defined $doc->manakai_charset) {
347     unless ($charset->{is_html_ascii_superset}) {
348     $onerror->(node => $doc, level => $self->{must_level},
349     type => 'non ascii superset:'.$charset_name);
350     }
351    
352     if (not $self->{has_charset} and ## TODO: This does not work now.
353     not $charset->{iana_names}->{'us-ascii'}) {
354     $onerror->(node => $doc, level => $self->{must_level},
355     type => 'no character encoding declaration:'.$charset_name);
356     }
357 wakaba 1.51 }
358 wakaba 1.71
359     if ($charset->{iana_names}->{'utf-8'}) {
360     #
361     } elsif ($charset->{iana_names}->{'jis_x0212-1990'} or
362     $charset->{iana_names}->{'x-jis0208'} or
363     $charset->{iana_names}->{'utf-32'} or ## ISSUE: UTF-32BE? UTF-32LE?
364     $charset->{is_ebcdic_based}) {
365     $onerror->(node => $doc,
366     type => 'character encoding:'.$charset_name,
367     level => $self->{should_level});
368     } elsif ($charset->{iana_names}->{'cesu-8'} or
369     $charset->{iana_names}->{'utf-8'} or ## ISSUE: UNICODE-1-1-UTF-7?
370     $charset->{iana_names}->{'bocu-1'} or
371     $charset->{iana_names}->{'scsu'}) {
372     $onerror->(node => $doc,
373     type => 'character encoding:'.$charset_name,
374     level => $self->{must_level});
375     } else {
376     $onerror->(node => $doc,
377     type => 'character encoding:'.$charset_name,
378     level => $self->{good_level});
379 wakaba 1.51 }
380     }
381 wakaba 1.52 } elsif ($doc->manakai_is_html) {
382     ## NOTE: MUST and SHOULD requirements above cannot be tested,
383     ## since the document has no input charset encoding information.
384     $onerror->(node => $doc,
385     type => 'character encoding:',
386     level => 'unsupported');
387 wakaba 1.51 }
388    
389     return $return;
390 wakaba 1.42 } # check_document
391 wakaba 1.1
392 wakaba 1.56 sub check_element ($$$;$) {
393     my ($self, $el, $onerror, $onsubdoc) = @_;
394 wakaba 1.42 $self = bless {}, $self unless ref $self;
395     $self->{onerror} = $onerror;
396 wakaba 1.56 $self->{onsubdoc} = $onsubdoc || sub {
397     warn "A subdocument is not conformance-checked";
398     };
399 wakaba 1.2
400 wakaba 1.48 $self->{must_level} = 'm';
401 wakaba 1.76 $self->{fact_level} = 'm';
402 wakaba 1.48 $self->{should_level} = 's';
403 wakaba 1.51 $self->{good_level} = 'w';
404 wakaba 1.67 $self->{info_level} = 'i';
405 wakaba 1.71 $self->{unsupported_level} = 'u';
406 wakaba 1.48
407 wakaba 1.61 $self->{plus_elements} = {};
408     $self->{minus_elements} = {};
409 wakaba 1.42 $self->{id} = {};
410     $self->{term} = {};
411     $self->{usemap} = [];
412     $self->{contextmenu} = [];
413     $self->{map} = {};
414     $self->{menu} = {};
415     $self->{has_link_type} = {};
416 wakaba 1.60 $self->{flag} = {};
417 wakaba 1.46 #$self->{has_uri_attr};
418     #$self->{has_hyperlink_element};
419 wakaba 1.51 #$self->{has_charset};
420 wakaba 1.57 #$self->{has_base};
421 wakaba 1.42 $self->{return} = {
422     class => {},
423     id => $self->{id}, table => [], term => $self->{term},
424 wakaba 1.76 uri => {}, # URIs other than those in RDF triples
425     ## TODO: xmlns="", SYSTEM "", atom:* src="", xml:base=""
426 wakaba 1.73 rdf => [],
427 wakaba 1.42 };
428 wakaba 1.4
429 wakaba 1.60 my @item = ({type => 'element', node => $el, parent_state => {}});
430 wakaba 1.66 $item[-1]->{real_parent_state} = $item[-1]->{parent_state};
431 wakaba 1.60 while (@item) {
432     my $item = shift @item;
433     if (ref $item eq 'ARRAY') {
434     my $code = shift @$item;
435     next unless $code;## TODO: temp.
436     $code->(@$item);
437     } elsif ($item->{type} eq 'element') {
438     my $el_nsuri = $item->{node}->namespace_uri;
439     $el_nsuri = '' unless defined $el_nsuri;
440     my $el_ln = $item->{node}->manakai_local_name;
441    
442     unless ($Namespace->{$el_nsuri}->{loaded}) {
443     if ($Namespace->{$el_nsuri}->{module}) {
444     eval qq{ require $Namespace->{$el_nsuri}->{module} } or die $@;
445 wakaba 1.42 } else {
446 wakaba 1.60 $Namespace->{$el_nsuri}->{loaded} = 1;
447 wakaba 1.1 }
448     }
449 wakaba 1.63
450     my $element_state = {};
451 wakaba 1.60 my $eldef = $Element->{$el_nsuri}->{$el_ln} ||
452     $Element->{$el_nsuri}->{''} ||
453 wakaba 1.42 $ElementDefault;
454 wakaba 1.61 my $content_def = $item->{transparent}
455     ? $item->{parent_def} || $eldef : $eldef;
456 wakaba 1.63 my $content_state = $item->{transparent}
457 wakaba 1.65 ? $item->{parent_def}
458     ? $item->{parent_state} || $element_state : $element_state
459     : $element_state;
460 wakaba 1.60
461 wakaba 1.67 unless ($eldef->{status} & FEATURE_STATUS_REC) {
462     my $status = $eldef->{status} & FEATURE_STATUS_CR ? 'cr' :
463     $eldef->{status} & FEATURE_STATUS_LC ? 'lc' :
464     $eldef->{status} & FEATURE_STATUS_WD ? 'wd' : 'non-standard';
465     $self->{onerror}->(node => $item->{node},
466     type => 'status:'.$status.':element',
467     level => $self->{info_level});
468     }
469 wakaba 1.70 if (not ($eldef->{status} & FEATURE_ALLOWED)) {
470     $self->{onerror}->(node => $item->{node},
471     type => 'element not defined',
472     level => $self->{must_level});
473     } elsif ($eldef->{status} & FEATURE_DEPRECATED_SHOULD) {
474     $self->{onerror}->(node => $item->{node},
475     type => 'deprecated:element',
476     level => $self->{should_level});
477     } elsif ($eldef->{status} & FEATURE_DEPRECATED_INFO) {
478     $self->{onerror}->(node => $item->{node},
479     type => 'deprecated:element',
480     level => $self->{info_level});
481     }
482 wakaba 1.67
483 wakaba 1.60 my @new_item;
484     push @new_item, [$eldef->{check_start}, $self, $item, $element_state];
485     push @new_item, [$eldef->{check_attrs}, $self, $item, $element_state];
486 wakaba 1.61
487 wakaba 1.60 my @child = @{$item->{node}->child_nodes};
488     while (@child) {
489     my $child = shift @child;
490     my $child_nt = $child->node_type;
491     if ($child_nt == 1) { # ELEMENT_NODE
492     my $child_nsuri = $child->namespace_uri;
493     $child_nsuri = '' unless defined $child_nsuri;
494     my $child_ln = $child->manakai_local_name;
495     if ($HTMLTransparentElements->{$child_nsuri}->{$child_ln} and
496     not (($self->{flag}->{in_head} or
497 wakaba 1.61 ($el_nsuri eq $HTML_NS and $el_ln eq 'head')) and
498     $child_nsuri eq $HTML_NS and $child_ln eq 'noscript')) {
499 wakaba 1.60 push @new_item, [$content_def->{check_child_element},
500     $self, $item, $child,
501 wakaba 1.66 $child_nsuri, $child_ln, 1,
502     $content_state, $element_state];
503 wakaba 1.60 push @new_item, {type => 'element', node => $child,
504 wakaba 1.65 parent_state => $content_state,
505 wakaba 1.61 parent_def => $content_def,
506 wakaba 1.66 real_parent_state => $element_state,
507 wakaba 1.60 transparent => 1};
508     } else {
509 wakaba 1.65 if ($item->{parent_def} and # has parent
510     $el_nsuri eq $HTML_NS) { ## $HTMLSemiTransparentElements
511 wakaba 1.61 if ($el_ln eq 'object') {
512     if ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
513     #
514     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
515     #
516     } else {
517 wakaba 1.62 $content_def = $item->{parent_def} || $content_def;
518 wakaba 1.63 $content_state = $item->{parent_state} || $content_state;
519 wakaba 1.62 }
520     } elsif ($el_ln eq 'video' or $el_ln eq 'audio') {
521     if ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
522     #
523     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
524     $element_state->{has_source} = 1;
525     } else {
526     $content_def = $item->{parent_def} || $content_def;
527 wakaba 1.63 $content_state = $item->{parent_state} || $content_state;
528 wakaba 1.61 }
529     }
530     }
531    
532 wakaba 1.60 push @new_item, [$content_def->{check_child_element},
533     $self, $item, $child,
534 wakaba 1.64 $child_nsuri, $child_ln,
535     $HTMLSemiTransparentElements
536     ->{$child_nsuri}->{$child_ln},
537 wakaba 1.66 $content_state, $element_state];
538 wakaba 1.60 push @new_item, {type => 'element', node => $child,
539 wakaba 1.65 parent_def => $content_def,
540 wakaba 1.66 real_parent_state => $element_state,
541 wakaba 1.65 parent_state => $content_state};
542 wakaba 1.60 }
543    
544     if ($HTMLEmbeddedContent->{$child_nsuri}->{$child_ln}) {
545     $element_state->{has_significant} = 1;
546     }
547     } elsif ($child_nt == 3 or # TEXT_NODE
548     $child_nt == 4) { # CDATA_SECTION_NODE
549     my $has_significant = ($child->data =~ /[^\x09-\x0D\x20]/);
550     push @new_item, [$content_def->{check_child_text},
551     $self, $item, $child, $has_significant,
552 wakaba 1.66 $content_state, $element_state];
553     $element_state->{has_significant} ||= $has_significant;
554 wakaba 1.61 if ($has_significant and
555     $HTMLSemiTransparentElements->{$el_nsuri}->{$el_ln}) {
556     $content_def = $item->{parent_def} || $content_def;
557     }
558 wakaba 1.60 } elsif ($child_nt == 5) { # ENTITY_REFERENCE_NODE
559     push @child, @{$child->child_nodes};
560 wakaba 1.1 }
561 wakaba 1.60 ## TODO: PI_NODE
562     ## TODO: Unknown node type
563 wakaba 1.1 }
564 wakaba 1.60
565     push @new_item, [$eldef->{check_end}, $self, $item, $element_state];
566    
567     unshift @item, @new_item;
568 wakaba 1.30 } else {
569 wakaba 1.60 die "$0: Internal error: Unsupported checking action type |$item->{type}|";
570 wakaba 1.4 }
571 wakaba 1.1 }
572 wakaba 1.17
573     for (@{$self->{usemap}}) {
574     unless ($self->{map}->{$_->[0]}) {
575     $self->{onerror}->(node => $_->[1], type => 'no referenced map');
576     }
577     }
578    
579 wakaba 1.32 for (@{$self->{contextmenu}}) {
580     unless ($self->{menu}->{$_->[0]}) {
581     $self->{onerror}->(node => $_->[1], type => 'no referenced menu');
582     }
583     }
584    
585 wakaba 1.61 delete $self->{plus_elements};
586     delete $self->{minus_elements};
587 wakaba 1.17 delete $self->{onerror};
588     delete $self->{id};
589     delete $self->{usemap};
590     delete $self->{map};
591 wakaba 1.33 return $self->{return};
592 wakaba 1.1 } # check_element
593    
594 wakaba 1.60 sub _add_minus_elements ($$@) {
595     my $self = shift;
596     my $element_state = shift;
597     for my $elements (@_) {
598     for my $nsuri (keys %$elements) {
599     for my $ln (keys %{$elements->{$nsuri}}) {
600     unless ($self->{minus_elements}->{$nsuri}->{$ln}) {
601     $element_state->{minus_elements_original}->{$nsuri}->{$ln} = 0;
602     $self->{minus_elements}->{$nsuri}->{$ln} = 1;
603     }
604     }
605     }
606     }
607     } # _add_minus_elements
608    
609     sub _remove_minus_elements ($$) {
610     my $self = shift;
611     my $element_state = shift;
612     for my $nsuri (keys %{$element_state->{minus_elements_original}}) {
613     for my $ln (keys %{$element_state->{minus_elements_original}->{$nsuri}}) {
614     delete $self->{minus_elements}->{$nsuri}->{$ln};
615     }
616     }
617     } # _remove_minus_elements
618    
619     sub _add_plus_elements ($$@) {
620     my $self = shift;
621     my $element_state = shift;
622     for my $elements (@_) {
623     for my $nsuri (keys %$elements) {
624     for my $ln (keys %{$elements->{$nsuri}}) {
625     unless ($self->{plus_elements}->{$nsuri}->{$ln}) {
626     $element_state->{plus_elements_original}->{$nsuri}->{$ln} = 0;
627     $self->{plus_elements}->{$nsuri}->{$ln} = 1;
628     }
629     }
630     }
631     }
632     } # _add_plus_elements
633    
634     sub _remove_plus_elements ($$) {
635     my $self = shift;
636     my $element_state = shift;
637     for my $nsuri (keys %{$element_state->{plus_elements_original}}) {
638     for my $ln (keys %{$element_state->{plus_elements_original}->{$nsuri}}) {
639     delete $self->{plus_elements}->{$nsuri}->{$ln};
640     }
641     }
642     } # _remove_plus_elements
643    
644 wakaba 1.68 sub _attr_status_info ($$$) {
645     my ($self, $attr, $status_code) = @_;
646 wakaba 1.70
647     if (not ($status_code & FEATURE_ALLOWED)) {
648     $self->{onerror}->(node => $attr,
649     type => 'attribute not defined',
650     level => $self->{must_level});
651     } elsif ($status_code & FEATURE_DEPRECATED_SHOULD) {
652     $self->{onerror}->(node => $attr,
653     type => 'deprecated:attr',
654     level => $self->{should_level});
655     } elsif ($status_code & FEATURE_DEPRECATED_INFO) {
656     $self->{onerror}->(node => $attr,
657     type => 'deprecated:attr',
658     level => $self->{info_level});
659     }
660    
661 wakaba 1.68 my $status;
662     if ($status_code & FEATURE_STATUS_REC) {
663     return;
664     } elsif ($status_code & FEATURE_STATUS_CR) {
665     $status = 'cr';
666     } elsif ($status_code & FEATURE_STATUS_LC) {
667     $status = 'lc';
668     } elsif ($status_code & FEATURE_STATUS_WD) {
669     $status = 'wd';
670     } else {
671     $status = 'non-standard';
672     }
673     $self->{onerror}->(node => $attr,
674     type => 'status:'.$status.':attr',
675     level => $self->{info_level});
676     } # _attr_status_info
677    
678 wakaba 1.2 sub _add_minuses ($@) {
679     my $self = shift;
680     my $r = {};
681     for my $list (@_) {
682     for my $ns (keys %$list) {
683     for my $ln (keys %{$list->{$ns}}) {
684     unless ($self->{minuses}->{$ns}->{$ln}) {
685     $self->{minuses}->{$ns}->{$ln} = 1;
686     $r->{$ns}->{$ln} = 1;
687     }
688     }
689     }
690     }
691 wakaba 1.4 return {type => 'plus', list => $r};
692 wakaba 1.2 } # _add_minuses
693    
694 wakaba 1.50 sub _add_pluses ($@) {
695     my $self = shift;
696     my $r = {};
697     for my $list (@_) {
698     for my $ns (keys %$list) {
699     for my $ln (keys %{$list->{$ns}}) {
700     unless ($self->{pluses}->{$ns}->{$ln}) {
701     $self->{pluses}->{$ns}->{$ln} = 1;
702     $r->{$ns}->{$ln} = 1;
703     }
704     }
705     }
706     }
707     return {type => 'minus', list => $r};
708     } # _add_pluses
709    
710 wakaba 1.2 sub _remove_minuses ($$) {
711 wakaba 1.4 my ($self, $todo) = @_;
712 wakaba 1.50 if ($todo->{type} eq 'minus') {
713     for my $ns (keys %{$todo->{list}}) {
714     for my $ln (keys %{$todo->{list}->{$ns}}) {
715     delete $self->{pluses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
716     }
717 wakaba 1.2 }
718 wakaba 1.50 } elsif ($todo->{type} eq 'plus') {
719     for my $ns (keys %{$todo->{list}}) {
720     for my $ln (keys %{$todo->{list}->{$ns}}) {
721     delete $self->{minuses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
722     }
723     }
724     } else {
725     die "$0: Unknown +- type: $todo->{type}";
726 wakaba 1.2 }
727     1;
728     } # _remove_minuses
729    
730 wakaba 1.50 ## NOTE: Priority for "minuses" and "pluses" are currently left
731     ## undefined and implemented inconsistently; it is not a problem for
732     ## now, since no element belongs to both lists.
733    
734 wakaba 1.30 sub _check_get_children ($$$) {
735     my ($self, $node, $parent_todo) = @_;
736 wakaba 1.4 my $new_todos = [];
737 wakaba 1.2 my $sib = [];
738     TP: {
739     my $node_ns = $node->namespace_uri;
740     $node_ns = '' unless defined $node_ns;
741     my $node_ln = $node->manakai_local_name;
742 wakaba 1.45 if ($HTMLTransparentElements->{$node_ns}->{$node_ln}) {
743     if ($node_ns eq $HTML_NS and $node_ln eq 'noscript') {
744     if ($parent_todo->{flag}->{in_head}) {
745     #
746     } else {
747     my $end = $self->_add_minuses ({$HTML_NS, {noscript => 1}});
748     push @$sib, $end;
749    
750     unshift @$sib, @{$node->child_nodes};
751     push @$new_todos, {type => 'element-attributes', node => $node};
752     last TP;
753     }
754 wakaba 1.58 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'del') {
755     my $sig_flag = $parent_todo->{flag}->{has_descendant}->{significant};
756     unshift @$sib, @{$node->child_nodes};
757     push @$new_todos, {type => 'element-attributes', node => $node};
758     push @$new_todos,
759     {type => 'code',
760     code => sub {
761     $parent_todo->{flag}->{has_descendant}->{significant} = 0
762     if not $sig_flag;
763     }};
764     last TP;
765 wakaba 1.45 } else {
766     unshift @$sib, @{$node->child_nodes};
767     push @$new_todos, {type => 'element-attributes', node => $node};
768     last TP;
769 wakaba 1.2 }
770     }
771 wakaba 1.8 if ($node_ns eq $HTML_NS and ($node_ln eq 'video' or $node_ln eq 'audio')) {
772 wakaba 1.2 if ($node->has_attribute_ns (undef, 'src')) {
773     unshift @$sib, @{$node->child_nodes};
774 wakaba 1.9 push @$new_todos, {type => 'element-attributes', node => $node};
775 wakaba 1.2 last TP;
776     } else {
777     my @cn = @{$node->child_nodes};
778     CN: while (@cn) {
779     my $cn = shift @cn;
780     my $cnt = $cn->node_type;
781     if ($cnt == 1) {
782 wakaba 1.8 my $cn_nsuri = $cn->namespace_uri;
783     $cn_nsuri = '' unless defined $cn_nsuri;
784     if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'source') {
785 wakaba 1.2 #
786     } else {
787     last CN;
788     }
789     } elsif ($cnt == 3 or $cnt == 4) {
790     if ($cn->data =~ /[^\x09-\x0D\x20]/) {
791     last CN;
792     }
793     }
794     } # CN
795     unshift @$sib, @cn;
796     }
797 wakaba 1.57 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'object') {
798     my @cn = @{$node->child_nodes};
799     CN: while (@cn) {
800     my $cn = shift @cn;
801     my $cnt = $cn->node_type;
802     if ($cnt == 1) {
803     my $cn_nsuri = $cn->namespace_uri;
804     $cn_nsuri = '' unless defined $cn_nsuri;
805     if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'param') {
806     #
807     } else {
808     last CN;
809     }
810     } elsif ($cnt == 3 or $cnt == 4) {
811     if ($cn->data =~ /[^\x09-\x0D\x20]/) {
812     last CN;
813     }
814     }
815     } # CN
816     unshift @$sib, @cn;
817 wakaba 1.2 }
818 wakaba 1.4 push @$new_todos, {type => 'element', node => $node};
819 wakaba 1.2 } # TP
820 wakaba 1.30
821     for my $new_todo (@$new_todos) {
822     $new_todo->{flag} = {%{$parent_todo->{flag} or {}}};
823     }
824    
825 wakaba 1.4 return ($sib, $new_todos);
826 wakaba 1.2 } # _check_get_children
827    
828 wakaba 1.44 =head1 LICENSE
829    
830 wakaba 1.56 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
831 wakaba 1.44
832     This library is free software; you can redistribute it
833     and/or modify it under the same terms as Perl itself.
834    
835     =cut
836    
837 wakaba 1.1 1;
838 wakaba 1.77 # $Date: 2008/03/22 03:07:06 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24