/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.70 - (hide annotations) (download)
Tue Feb 26 08:28:00 2008 UTC (16 years, 8 months ago) by wakaba
Branch: MAIN
Changes since 1.69: +40 -2 lines
++ whatpm/Whatpm/ChangeLog	26 Feb 2008 08:22:33 -0000
2008-02-26  Wakaba  <wakaba@suika.fam.cx>

	* ContentChecker.pm: New status constants are added.
	($ElementDefault): |status| added.
	(check_element): Err for non-standard or deprecated elements.
	(_attr_status_info): For non-standard or deprecated attributes.

++ whatpm/Whatpm/ContentChecker/ChangeLog	26 Feb 2008 08:27:45 -0000
	* HTML.pm: HTML5 status constants are OR-ed with "allowed" status.
	Don't raise "not defined" errors; they are now raised according
	to status flags.  Status flags of li/@value, ol/@start, and menu
	are now non-deprecated, to avoid deprecated error message.
	area/@hreftype typo fixed.  |isindex| SHOULD NOT be used
	according to HTML4.

2008-02-26  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3 wakaba 1.70 our $VERSION=do{my @r=(q$Revision: 1.69 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5 wakaba 1.18 require Whatpm::URIChecker;
6    
7 wakaba 1.13 ## ISSUE: How XML and XML Namespaces conformance can (or cannot)
8     ## be applied to an in-memory representation (i.e. DOM)?
9    
10 wakaba 1.50 ## TODO: Conformance of an HTML document with non-html root element.
11    
12 wakaba 1.70 ## Stability
13 wakaba 1.67 sub FEATURE_STATUS_REC () { 0b1 } ## Interoperable standard
14     sub FEATURE_STATUS_CR () { 0b10 } ## Call for implementation
15     sub FEATURE_STATUS_LC () { 0b100 } ## Last call for comments
16     sub FEATURE_STATUS_WD () { 0b1000 } ## Working or editor's draft
17    
18 wakaba 1.70 ## Deprecated
19     sub FEATURE_DEPRECATED_SHOULD () { 0b100000 } ## SHOULD-level
20     sub FEATURE_DEPRECATED_INFO () { 0b1000000 } ## Does not affect conformance
21    
22     ## Conformance
23     sub FEATURE_ALLOWED () { 0b10000 }
24    
25 wakaba 1.42 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
26 wakaba 1.9 my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
27     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
28    
29 wakaba 1.42 my $Namespace = {
30 wakaba 1.43 q<http://www.w3.org/2005/Atom> => {module => 'Whatpm::ContentChecker::Atom'},
31 wakaba 1.42 $HTML_NS => {module => 'Whatpm::ContentChecker::HTML'},
32     $XML_NS => {loaded => 1},
33     $XMLNS_NS => {loaded => 1},
34     };
35    
36     our $AttrChecker = {
37 wakaba 1.9 $XML_NS => {
38 wakaba 1.13 space => sub {
39     my ($self, $attr) = @_;
40     my $value = $attr->value;
41     if ($value eq 'default' or $value eq 'preserve') {
42     #
43     } else {
44     ## NOTE: An XML "error"
45 wakaba 1.33 $self->{onerror}->(node => $attr, level => 'error',
46     type => 'invalid attribute value');
47 wakaba 1.13 }
48     },
49     lang => sub {
50 wakaba 1.35 my ($self, $attr) = @_;
51 wakaba 1.47 my $value = $attr->value;
52     if ($value eq '') {
53     #
54     } else {
55     require Whatpm::LangTag;
56     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
57     my %opt = @_;
58     my $type = 'LangTag:'.$opt{type};
59     $type .= ':' . $opt{subtag} if defined $opt{subtag};
60     $self->{onerror}->(node => $attr, type => $type,
61     value => $opt{value}, level => $opt{level});
62     });
63     }
64    
65 wakaba 1.13 ## NOTE: "The values of the attribute are language identifiers
66     ## as defined by [IETF RFC 3066], Tags for the Identification
67     ## of Languages, or its successor; in addition, the empty string
68     ## may be specified." ("may" in lower case)
69 wakaba 1.47 ## NOTE: Is an RFC 3066-valid (but RFC 4647-invalid) language tag
70     ## allowed today?
71    
72     ## TODO: test data
73    
74 wakaba 1.35 if ($attr->owner_document->manakai_is_html) { # MUST NOT
75 wakaba 1.36 $self->{onerror}->(node => $attr, type => 'in HTML:xml:lang');
76 wakaba 1.35 ## TODO: Test data...
77     }
78 wakaba 1.13 },
79     base => sub {
80     my ($self, $attr) = @_;
81     my $value = $attr->value;
82     if ($value =~ /[^\x{0000}-\x{10FFFF}]/) { ## ISSUE: Should we disallow noncharacters?
83     $self->{onerror}->(node => $attr,
84 wakaba 1.33 type => 'invalid attribute value');
85 wakaba 1.13 }
86 wakaba 1.18 ## NOTE: Conformance to URI standard is not checked since there is
87     ## no author requirement on conformance in the XML Base specification.
88 wakaba 1.13 },
89     id => sub {
90     my ($self, $attr) = @_;
91     my $value = $attr->value;
92     $value =~ s/[\x09\x0A\x0D\x20]+/ /g;
93     $value =~ s/^\x20//;
94     $value =~ s/\x20$//;
95     ## TODO: NCName in XML 1.0 or 1.1
96     ## TODO: declared type is ID?
97 wakaba 1.33 if ($self->{id}->{$value}) { ## NOTE: An xml:id error
98     $self->{onerror}->(node => $attr, level => 'error',
99     type => 'duplicate ID');
100 wakaba 1.37 push @{$self->{id}->{$value}}, $attr;
101 wakaba 1.13 } else {
102 wakaba 1.37 $self->{id}->{$value} = [$attr];
103 wakaba 1.13 }
104     },
105 wakaba 1.9 },
106     $XMLNS_NS => {
107 wakaba 1.13 '' => sub {
108     my ($self, $attr) = @_;
109     my $ln = $attr->manakai_local_name;
110     my $value = $attr->value;
111     if ($value eq $XML_NS and $ln ne 'xml') {
112     $self->{onerror}
113 wakaba 1.33 ->(node => $attr, level => 'NC',
114     type => 'Reserved Prefixes and Namespace Names:=xml');
115 wakaba 1.13 } elsif ($value eq $XMLNS_NS) {
116     $self->{onerror}
117 wakaba 1.33 ->(node => $attr, level => 'NC',
118     type => 'Reserved Prefixes and Namespace Names:=xmlns');
119 wakaba 1.13 }
120     if ($ln eq 'xml' and $value ne $XML_NS) {
121     $self->{onerror}
122 wakaba 1.33 ->(node => $attr, level => 'NC',
123     type => 'Reserved Prefixes and Namespace Names:xmlns:xml=');
124 wakaba 1.13 } elsif ($ln eq 'xmlns') {
125     $self->{onerror}
126 wakaba 1.33 ->(node => $attr, level => 'NC',
127     type => 'Reserved Prefixes and Namespace Names:xmlns:xmlns=');
128 wakaba 1.13 }
129     ## TODO: If XML 1.0 and empty
130     },
131     xmlns => sub {
132     my ($self, $attr) = @_;
133     ## TODO: In XML 1.0, URI reference [RFC 3986] or an empty string
134     ## TODO: In XML 1.1, IRI reference [RFC 3987] or an empty string
135 wakaba 1.18 ## TODO: relative references are deprecated
136 wakaba 1.13 my $value = $attr->value;
137     if ($value eq $XML_NS) {
138     $self->{onerror}
139 wakaba 1.33 ->(node => $attr, level => 'NC',
140     type => 'Reserved Prefixes and Namespace Names:=xml');
141 wakaba 1.13 } elsif ($value eq $XMLNS_NS) {
142     $self->{onerror}
143 wakaba 1.33 ->(node => $attr, level => 'NC',
144     type => 'Reserved Prefixes and Namespace Names:=xmlns');
145 wakaba 1.13 }
146     },
147 wakaba 1.9 },
148     };
149    
150 wakaba 1.14 ## ISSUE: Should we really allow these attributes?
151 wakaba 1.13 $AttrChecker->{''}->{'xml:space'} = $AttrChecker->{$XML_NS}->{space};
152     $AttrChecker->{''}->{'xml:lang'} = $AttrChecker->{$XML_NS}->{lang};
153     $AttrChecker->{''}->{'xml:base'} = $AttrChecker->{$XML_NS}->{base};
154     $AttrChecker->{''}->{'xml:id'} = $AttrChecker->{$XML_NS}->{id};
155    
156 wakaba 1.60 our %AnyChecker = (
157     check_start => sub { },
158     check_attrs => sub {
159     my ($self, $item, $element_state) = @_;
160     for my $attr (@{$item->{node}->attributes}) {
161 wakaba 1.9 my $attr_ns = $attr->namespace_uri;
162     $attr_ns = '' unless defined $attr_ns;
163     my $attr_ln = $attr->manakai_local_name;
164     my $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
165 wakaba 1.60 || $AttrChecker->{$attr_ns}->{''};
166 wakaba 1.9 if ($checker) {
167     $checker->($self, $attr);
168 wakaba 1.17 } else {
169 wakaba 1.33 $self->{onerror}->(node => $attr, level => 'unsupported',
170     type => 'attribute');
171 wakaba 1.9 }
172     }
173     },
174 wakaba 1.60 check_child_element => sub {
175     my ($self, $item, $child_el, $child_nsuri, $child_ln,
176     $child_is_transparent, $element_state) = @_;
177     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
178     $self->{onerror}->(node => $child_el,
179     type => 'element not allowed:minus',
180     level => $self->{must_level});
181     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
182     #
183     } else {
184     #
185     }
186     },
187     check_child_text => sub { },
188     check_end => sub {
189     my ($self, $item, $element_state) = @_;
190     if ($element_state->{has_significant}) {
191 wakaba 1.66 $item->{real_parent_state}->{has_significant} = 1;
192 wakaba 1.60 }
193     },
194     );
195    
196     our $ElementDefault = {
197     %AnyChecker,
198 wakaba 1.70 status => FEATURE_ALLOWED,
199     ## NOTE: No "element not defined" error - it is not supported anyway.
200 wakaba 1.60 check_start => sub {
201     my ($self, $item, $element_state) = @_;
202     $self->{onerror}->(node => $item->{node}, level => 'unsupported',
203     type => 'element');
204     },
205 wakaba 1.1 };
206    
207 wakaba 1.60 our $HTMLEmbeddedContent = {
208     ## NOTE: All embedded content is also phrasing content.
209     $HTML_NS => {
210     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
211     canvas => 1,
212     },
213     ## NOTE: MathML is mentioned in the HTML5 spec.
214     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
215     ## NOTE: SVG is mentioned in the HTML5 spec.
216     q<http://www.w3.org/2000/svg> => {svg => 1},
217     ## NOTE: Foreign elements with content (but no metadata) are
218     ## embedded content.
219     };
220    
221 wakaba 1.7 my $HTMLTransparentElements = {
222 wakaba 1.57 $HTML_NS => {qw/ins 1 del 1 font 1 noscript 1 canvas 1/},
223 wakaba 1.29 ## NOTE: |html:noscript| is transparent if scripting is disabled
224     ## and not in |head|.
225 wakaba 1.7 };
226    
227 wakaba 1.61 my $HTMLSemiTransparentElements = {
228     $HTML_NS => {object => 1, video => 1, audio => 1},
229     };
230 wakaba 1.57
231 wakaba 1.42 our $Element = {};
232 wakaba 1.7
233 wakaba 1.56 sub check_document ($$$;$) {
234     my ($self, $doc, $onerror, $onsubdoc) = @_;
235 wakaba 1.42 $self = bless {}, $self unless ref $self;
236     $self->{onerror} = $onerror;
237 wakaba 1.56 $self->{onsubdoc} = $onsubdoc || sub {
238     warn "A subdocument is not conformance-checked";
239     };
240 wakaba 1.1
241 wakaba 1.48 $self->{must_level} = 'm';
242     $self->{fact_level} = 'f';
243     $self->{should_level} = 's';
244 wakaba 1.51 $self->{good_level} = 'w';
245 wakaba 1.67 $self->{info_level} = 'i';
246 wakaba 1.59 $self->{unsupported_lavel} = 'u';
247 wakaba 1.48
248 wakaba 1.42 my $docel = $doc->document_element;
249     unless (defined $docel) {
250     ## ISSUE: Should we check content of Document node?
251     $onerror->(node => $doc, type => 'no document element');
252     ## ISSUE: Is this non-conforming (to what spec)? Or just a warning?
253     return {
254     class => {},
255     id => {}, table => [], term => {},
256     };
257 wakaba 1.1 }
258    
259 wakaba 1.42 ## ISSUE: Unexpanded entity references and HTML5 conformance
260 wakaba 1.1
261 wakaba 1.42 my $docel_nsuri = $docel->namespace_uri;
262     $docel_nsuri = '' unless defined $docel_nsuri;
263 wakaba 1.43 unless ($Namespace->{$docel_nsuri}->{loaded}) {
264     if ($Namespace->{$docel_nsuri}->{module}) {
265     eval qq{ require $Namespace->{$docel_nsuri}->{module} } or die $@;
266     } else {
267     $Namespace->{$docel_nsuri}->{loaded} = 1;
268     }
269     }
270 wakaba 1.42 my $docel_def = $Element->{$docel_nsuri}->{$docel->manakai_local_name} ||
271     $Element->{$docel_nsuri}->{''} ||
272     $ElementDefault;
273     if ($docel_def->{is_root}) {
274     #
275 wakaba 1.50 } elsif ($docel_def->{is_xml_root}) {
276     unless ($doc->manakai_is_html) {
277     #
278     } else {
279     $onerror->(node => $docel, type => 'element not allowed:root:xml');
280     }
281 wakaba 1.42 } else {
282 wakaba 1.49 $onerror->(node => $docel, type => 'element not allowed:root');
283 wakaba 1.1 }
284    
285 wakaba 1.42 ## TODO: Check for other items other than document element
286     ## (second (errorous) element, text nodes, PI nodes, doctype nodes)
287 wakaba 1.2
288 wakaba 1.56 my $return = $self->check_element ($docel, $onerror, $onsubdoc);
289 wakaba 1.51
290 wakaba 1.52 ## TODO: Test for these checks are necessary.
291 wakaba 1.51 my $charset_name = $doc->input_encoding;
292     if (defined $charset_name) {
293     require Message::Charset::Info;
294     my $charset = $Message::Charset::Info::IANACharset->{$charset_name};
295    
296     if ($doc->manakai_is_html and
297     not $doc->manakai_has_bom and
298     not defined $doc->manakai_charset) {
299     unless ($charset->{is_html_ascii_superset}) {
300     $onerror->(node => $doc, level => $self->{must_level},
301     type => 'non ascii superset:'.$charset_name);
302     }
303    
304 wakaba 1.69 if (not $self->{has_charset} and ## TODO: This does not work now.
305 wakaba 1.51 not $charset->{iana_names}->{'us-ascii'}) {
306     $onerror->(node => $doc, level => $self->{must_level},
307     type => 'no character encoding declaration:'.$charset_name);
308     }
309     }
310    
311     if ($charset->{iana_names}->{'utf-8'}) {
312     #
313     } elsif ($charset->{iana_names}->{'jis_x0212-1990'} or
314     $charset->{iana_names}->{'x-jis0208'} or
315     $charset->{iana_names}->{'utf-32'} or ## ISSUE: UTF-32BE? UTF-32LE?
316     $charset->{is_ebcdic_based}) {
317     $onerror->(node => $doc,
318     type => 'character encoding:'.$charset_name,
319     level => $self->{should_level});
320     } elsif ($charset->{iana_names}->{'cesu-8'} or
321     $charset->{iana_names}->{'utf-8'} or ## ISSUE: UNICODE-1-1-UTF-7?
322     $charset->{iana_names}->{'bocu-1'} or
323     $charset->{iana_names}->{'scsu'}) {
324     $onerror->(node => $doc,
325     type => 'character encoding:'.$charset_name,
326     level => $self->{must_level});
327     } else {
328     $onerror->(node => $doc,
329     type => 'character encoding:'.$charset_name,
330     level => $self->{good_level});
331     }
332 wakaba 1.52 } elsif ($doc->manakai_is_html) {
333     ## NOTE: MUST and SHOULD requirements above cannot be tested,
334     ## since the document has no input charset encoding information.
335     $onerror->(node => $doc,
336     type => 'character encoding:',
337     level => 'unsupported');
338 wakaba 1.51 }
339    
340     return $return;
341 wakaba 1.42 } # check_document
342 wakaba 1.1
343 wakaba 1.56 sub check_element ($$$;$) {
344     my ($self, $el, $onerror, $onsubdoc) = @_;
345 wakaba 1.42 $self = bless {}, $self unless ref $self;
346     $self->{onerror} = $onerror;
347 wakaba 1.56 $self->{onsubdoc} = $onsubdoc || sub {
348     warn "A subdocument is not conformance-checked";
349     };
350 wakaba 1.2
351 wakaba 1.48 $self->{must_level} = 'm';
352     $self->{fact_level} = 'f';
353     $self->{should_level} = 's';
354 wakaba 1.51 $self->{good_level} = 'w';
355 wakaba 1.67 $self->{info_level} = 'i';
356 wakaba 1.59 $self->{unsupported_lavel} = 'u';
357 wakaba 1.48
358 wakaba 1.61 $self->{plus_elements} = {};
359     $self->{minus_elements} = {};
360 wakaba 1.42 $self->{id} = {};
361     $self->{term} = {};
362     $self->{usemap} = [];
363     $self->{contextmenu} = [];
364     $self->{map} = {};
365     $self->{menu} = {};
366     $self->{has_link_type} = {};
367 wakaba 1.60 $self->{flag} = {};
368 wakaba 1.46 #$self->{has_uri_attr};
369     #$self->{has_hyperlink_element};
370 wakaba 1.51 #$self->{has_charset};
371 wakaba 1.57 #$self->{has_base};
372 wakaba 1.42 $self->{return} = {
373     class => {},
374     id => $self->{id}, table => [], term => $self->{term},
375     };
376 wakaba 1.4
377 wakaba 1.60 my @item = ({type => 'element', node => $el, parent_state => {}});
378 wakaba 1.66 $item[-1]->{real_parent_state} = $item[-1]->{parent_state};
379 wakaba 1.60 while (@item) {
380     my $item = shift @item;
381     if (ref $item eq 'ARRAY') {
382     my $code = shift @$item;
383     next unless $code;## TODO: temp.
384     $code->(@$item);
385     } elsif ($item->{type} eq 'element') {
386     my $el_nsuri = $item->{node}->namespace_uri;
387     $el_nsuri = '' unless defined $el_nsuri;
388     my $el_ln = $item->{node}->manakai_local_name;
389    
390     unless ($Namespace->{$el_nsuri}->{loaded}) {
391     if ($Namespace->{$el_nsuri}->{module}) {
392     eval qq{ require $Namespace->{$el_nsuri}->{module} } or die $@;
393 wakaba 1.42 } else {
394 wakaba 1.60 $Namespace->{$el_nsuri}->{loaded} = 1;
395 wakaba 1.1 }
396     }
397 wakaba 1.63
398     my $element_state = {};
399 wakaba 1.60 my $eldef = $Element->{$el_nsuri}->{$el_ln} ||
400     $Element->{$el_nsuri}->{''} ||
401 wakaba 1.42 $ElementDefault;
402 wakaba 1.61 my $content_def = $item->{transparent}
403     ? $item->{parent_def} || $eldef : $eldef;
404 wakaba 1.63 my $content_state = $item->{transparent}
405 wakaba 1.65 ? $item->{parent_def}
406     ? $item->{parent_state} || $element_state : $element_state
407     : $element_state;
408 wakaba 1.60
409 wakaba 1.67 unless ($eldef->{status} & FEATURE_STATUS_REC) {
410     my $status = $eldef->{status} & FEATURE_STATUS_CR ? 'cr' :
411     $eldef->{status} & FEATURE_STATUS_LC ? 'lc' :
412     $eldef->{status} & FEATURE_STATUS_WD ? 'wd' : 'non-standard';
413     $self->{onerror}->(node => $item->{node},
414     type => 'status:'.$status.':element',
415     level => $self->{info_level});
416     }
417 wakaba 1.70 if (not ($eldef->{status} & FEATURE_ALLOWED)) {
418     $self->{onerror}->(node => $item->{node},
419     type => 'element not defined',
420     level => $self->{must_level});
421     } elsif ($eldef->{status} & FEATURE_DEPRECATED_SHOULD) {
422     $self->{onerror}->(node => $item->{node},
423     type => 'deprecated:element',
424     level => $self->{should_level});
425     } elsif ($eldef->{status} & FEATURE_DEPRECATED_INFO) {
426     $self->{onerror}->(node => $item->{node},
427     type => 'deprecated:element',
428     level => $self->{info_level});
429     }
430 wakaba 1.67
431 wakaba 1.60 my @new_item;
432     push @new_item, [$eldef->{check_start}, $self, $item, $element_state];
433     push @new_item, [$eldef->{check_attrs}, $self, $item, $element_state];
434 wakaba 1.61
435 wakaba 1.60 my @child = @{$item->{node}->child_nodes};
436     while (@child) {
437     my $child = shift @child;
438     my $child_nt = $child->node_type;
439     if ($child_nt == 1) { # ELEMENT_NODE
440     my $child_nsuri = $child->namespace_uri;
441     $child_nsuri = '' unless defined $child_nsuri;
442     my $child_ln = $child->manakai_local_name;
443     if ($HTMLTransparentElements->{$child_nsuri}->{$child_ln} and
444     not (($self->{flag}->{in_head} or
445 wakaba 1.61 ($el_nsuri eq $HTML_NS and $el_ln eq 'head')) and
446     $child_nsuri eq $HTML_NS and $child_ln eq 'noscript')) {
447 wakaba 1.60 push @new_item, [$content_def->{check_child_element},
448     $self, $item, $child,
449 wakaba 1.66 $child_nsuri, $child_ln, 1,
450     $content_state, $element_state];
451 wakaba 1.60 push @new_item, {type => 'element', node => $child,
452 wakaba 1.65 parent_state => $content_state,
453 wakaba 1.61 parent_def => $content_def,
454 wakaba 1.66 real_parent_state => $element_state,
455 wakaba 1.60 transparent => 1};
456     } else {
457 wakaba 1.65 if ($item->{parent_def} and # has parent
458     $el_nsuri eq $HTML_NS) { ## $HTMLSemiTransparentElements
459 wakaba 1.61 if ($el_ln eq 'object') {
460     if ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
461     #
462     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
463     #
464     } else {
465 wakaba 1.62 $content_def = $item->{parent_def} || $content_def;
466 wakaba 1.63 $content_state = $item->{parent_state} || $content_state;
467 wakaba 1.62 }
468     } elsif ($el_ln eq 'video' or $el_ln eq 'audio') {
469     if ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
470     #
471     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
472     $element_state->{has_source} = 1;
473     } else {
474     $content_def = $item->{parent_def} || $content_def;
475 wakaba 1.63 $content_state = $item->{parent_state} || $content_state;
476 wakaba 1.61 }
477     }
478     }
479    
480 wakaba 1.60 push @new_item, [$content_def->{check_child_element},
481     $self, $item, $child,
482 wakaba 1.64 $child_nsuri, $child_ln,
483     $HTMLSemiTransparentElements
484     ->{$child_nsuri}->{$child_ln},
485 wakaba 1.66 $content_state, $element_state];
486 wakaba 1.60 push @new_item, {type => 'element', node => $child,
487 wakaba 1.65 parent_def => $content_def,
488 wakaba 1.66 real_parent_state => $element_state,
489 wakaba 1.65 parent_state => $content_state};
490 wakaba 1.60 }
491    
492     if ($HTMLEmbeddedContent->{$child_nsuri}->{$child_ln}) {
493     $element_state->{has_significant} = 1;
494     }
495     } elsif ($child_nt == 3 or # TEXT_NODE
496     $child_nt == 4) { # CDATA_SECTION_NODE
497     my $has_significant = ($child->data =~ /[^\x09-\x0D\x20]/);
498     push @new_item, [$content_def->{check_child_text},
499     $self, $item, $child, $has_significant,
500 wakaba 1.66 $content_state, $element_state];
501     $element_state->{has_significant} ||= $has_significant;
502 wakaba 1.61 if ($has_significant and
503     $HTMLSemiTransparentElements->{$el_nsuri}->{$el_ln}) {
504     $content_def = $item->{parent_def} || $content_def;
505     }
506 wakaba 1.60 } elsif ($child_nt == 5) { # ENTITY_REFERENCE_NODE
507     push @child, @{$child->child_nodes};
508 wakaba 1.1 }
509 wakaba 1.60 ## TODO: PI_NODE
510     ## TODO: Unknown node type
511 wakaba 1.1 }
512 wakaba 1.60
513     push @new_item, [$eldef->{check_end}, $self, $item, $element_state];
514    
515     unshift @item, @new_item;
516 wakaba 1.30 } else {
517 wakaba 1.60 die "$0: Internal error: Unsupported checking action type |$item->{type}|";
518 wakaba 1.4 }
519 wakaba 1.1 }
520 wakaba 1.17
521     for (@{$self->{usemap}}) {
522     unless ($self->{map}->{$_->[0]}) {
523     $self->{onerror}->(node => $_->[1], type => 'no referenced map');
524     }
525     }
526    
527 wakaba 1.32 for (@{$self->{contextmenu}}) {
528     unless ($self->{menu}->{$_->[0]}) {
529     $self->{onerror}->(node => $_->[1], type => 'no referenced menu');
530     }
531     }
532    
533 wakaba 1.61 delete $self->{plus_elements};
534     delete $self->{minus_elements};
535 wakaba 1.17 delete $self->{onerror};
536     delete $self->{id};
537     delete $self->{usemap};
538     delete $self->{map};
539 wakaba 1.33 return $self->{return};
540 wakaba 1.1 } # check_element
541    
542 wakaba 1.60 sub _add_minus_elements ($$@) {
543     my $self = shift;
544     my $element_state = shift;
545     for my $elements (@_) {
546     for my $nsuri (keys %$elements) {
547     for my $ln (keys %{$elements->{$nsuri}}) {
548     unless ($self->{minus_elements}->{$nsuri}->{$ln}) {
549     $element_state->{minus_elements_original}->{$nsuri}->{$ln} = 0;
550     $self->{minus_elements}->{$nsuri}->{$ln} = 1;
551     }
552     }
553     }
554     }
555     } # _add_minus_elements
556    
557     sub _remove_minus_elements ($$) {
558     my $self = shift;
559     my $element_state = shift;
560     for my $nsuri (keys %{$element_state->{minus_elements_original}}) {
561     for my $ln (keys %{$element_state->{minus_elements_original}->{$nsuri}}) {
562     delete $self->{minus_elements}->{$nsuri}->{$ln};
563     }
564     }
565     } # _remove_minus_elements
566    
567     sub _add_plus_elements ($$@) {
568     my $self = shift;
569     my $element_state = shift;
570     for my $elements (@_) {
571     for my $nsuri (keys %$elements) {
572     for my $ln (keys %{$elements->{$nsuri}}) {
573     unless ($self->{plus_elements}->{$nsuri}->{$ln}) {
574     $element_state->{plus_elements_original}->{$nsuri}->{$ln} = 0;
575     $self->{plus_elements}->{$nsuri}->{$ln} = 1;
576     }
577     }
578     }
579     }
580     } # _add_plus_elements
581    
582     sub _remove_plus_elements ($$) {
583     my $self = shift;
584     my $element_state = shift;
585     for my $nsuri (keys %{$element_state->{plus_elements_original}}) {
586     for my $ln (keys %{$element_state->{plus_elements_original}->{$nsuri}}) {
587     delete $self->{plus_elements}->{$nsuri}->{$ln};
588     }
589     }
590     } # _remove_plus_elements
591    
592 wakaba 1.68 sub _attr_status_info ($$$) {
593     my ($self, $attr, $status_code) = @_;
594 wakaba 1.70
595     if (not ($status_code & FEATURE_ALLOWED)) {
596     $self->{onerror}->(node => $attr,
597     type => 'attribute not defined',
598     level => $self->{must_level});
599     } elsif ($status_code & FEATURE_DEPRECATED_SHOULD) {
600     $self->{onerror}->(node => $attr,
601     type => 'deprecated:attr',
602     level => $self->{should_level});
603     } elsif ($status_code & FEATURE_DEPRECATED_INFO) {
604     $self->{onerror}->(node => $attr,
605     type => 'deprecated:attr',
606     level => $self->{info_level});
607     }
608    
609 wakaba 1.68 my $status;
610     if ($status_code & FEATURE_STATUS_REC) {
611     return;
612     } elsif ($status_code & FEATURE_STATUS_CR) {
613     $status = 'cr';
614     } elsif ($status_code & FEATURE_STATUS_LC) {
615     $status = 'lc';
616     } elsif ($status_code & FEATURE_STATUS_WD) {
617     $status = 'wd';
618     } else {
619     $status = 'non-standard';
620     }
621     $self->{onerror}->(node => $attr,
622     type => 'status:'.$status.':attr',
623     level => $self->{info_level});
624     } # _attr_status_info
625    
626 wakaba 1.2 sub _add_minuses ($@) {
627     my $self = shift;
628     my $r = {};
629     for my $list (@_) {
630     for my $ns (keys %$list) {
631     for my $ln (keys %{$list->{$ns}}) {
632     unless ($self->{minuses}->{$ns}->{$ln}) {
633     $self->{minuses}->{$ns}->{$ln} = 1;
634     $r->{$ns}->{$ln} = 1;
635     }
636     }
637     }
638     }
639 wakaba 1.4 return {type => 'plus', list => $r};
640 wakaba 1.2 } # _add_minuses
641    
642 wakaba 1.50 sub _add_pluses ($@) {
643     my $self = shift;
644     my $r = {};
645     for my $list (@_) {
646     for my $ns (keys %$list) {
647     for my $ln (keys %{$list->{$ns}}) {
648     unless ($self->{pluses}->{$ns}->{$ln}) {
649     $self->{pluses}->{$ns}->{$ln} = 1;
650     $r->{$ns}->{$ln} = 1;
651     }
652     }
653     }
654     }
655     return {type => 'minus', list => $r};
656     } # _add_pluses
657    
658 wakaba 1.2 sub _remove_minuses ($$) {
659 wakaba 1.4 my ($self, $todo) = @_;
660 wakaba 1.50 if ($todo->{type} eq 'minus') {
661     for my $ns (keys %{$todo->{list}}) {
662     for my $ln (keys %{$todo->{list}->{$ns}}) {
663     delete $self->{pluses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
664     }
665 wakaba 1.2 }
666 wakaba 1.50 } elsif ($todo->{type} eq 'plus') {
667     for my $ns (keys %{$todo->{list}}) {
668     for my $ln (keys %{$todo->{list}->{$ns}}) {
669     delete $self->{minuses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
670     }
671     }
672     } else {
673     die "$0: Unknown +- type: $todo->{type}";
674 wakaba 1.2 }
675     1;
676     } # _remove_minuses
677    
678 wakaba 1.50 ## NOTE: Priority for "minuses" and "pluses" are currently left
679     ## undefined and implemented inconsistently; it is not a problem for
680     ## now, since no element belongs to both lists.
681    
682 wakaba 1.30 sub _check_get_children ($$$) {
683     my ($self, $node, $parent_todo) = @_;
684 wakaba 1.4 my $new_todos = [];
685 wakaba 1.2 my $sib = [];
686     TP: {
687     my $node_ns = $node->namespace_uri;
688     $node_ns = '' unless defined $node_ns;
689     my $node_ln = $node->manakai_local_name;
690 wakaba 1.45 if ($HTMLTransparentElements->{$node_ns}->{$node_ln}) {
691     if ($node_ns eq $HTML_NS and $node_ln eq 'noscript') {
692     if ($parent_todo->{flag}->{in_head}) {
693     #
694     } else {
695     my $end = $self->_add_minuses ({$HTML_NS, {noscript => 1}});
696     push @$sib, $end;
697    
698     unshift @$sib, @{$node->child_nodes};
699     push @$new_todos, {type => 'element-attributes', node => $node};
700     last TP;
701     }
702 wakaba 1.58 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'del') {
703     my $sig_flag = $parent_todo->{flag}->{has_descendant}->{significant};
704     unshift @$sib, @{$node->child_nodes};
705     push @$new_todos, {type => 'element-attributes', node => $node};
706     push @$new_todos,
707     {type => 'code',
708     code => sub {
709     $parent_todo->{flag}->{has_descendant}->{significant} = 0
710     if not $sig_flag;
711     }};
712     last TP;
713 wakaba 1.45 } else {
714     unshift @$sib, @{$node->child_nodes};
715     push @$new_todos, {type => 'element-attributes', node => $node};
716     last TP;
717 wakaba 1.2 }
718     }
719 wakaba 1.8 if ($node_ns eq $HTML_NS and ($node_ln eq 'video' or $node_ln eq 'audio')) {
720 wakaba 1.2 if ($node->has_attribute_ns (undef, 'src')) {
721     unshift @$sib, @{$node->child_nodes};
722 wakaba 1.9 push @$new_todos, {type => 'element-attributes', node => $node};
723 wakaba 1.2 last TP;
724     } else {
725     my @cn = @{$node->child_nodes};
726     CN: while (@cn) {
727     my $cn = shift @cn;
728     my $cnt = $cn->node_type;
729     if ($cnt == 1) {
730 wakaba 1.8 my $cn_nsuri = $cn->namespace_uri;
731     $cn_nsuri = '' unless defined $cn_nsuri;
732     if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'source') {
733 wakaba 1.2 #
734     } else {
735     last CN;
736     }
737     } elsif ($cnt == 3 or $cnt == 4) {
738     if ($cn->data =~ /[^\x09-\x0D\x20]/) {
739     last CN;
740     }
741     }
742     } # CN
743     unshift @$sib, @cn;
744     }
745 wakaba 1.57 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'object') {
746     my @cn = @{$node->child_nodes};
747     CN: while (@cn) {
748     my $cn = shift @cn;
749     my $cnt = $cn->node_type;
750     if ($cnt == 1) {
751     my $cn_nsuri = $cn->namespace_uri;
752     $cn_nsuri = '' unless defined $cn_nsuri;
753     if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'param') {
754     #
755     } else {
756     last CN;
757     }
758     } elsif ($cnt == 3 or $cnt == 4) {
759     if ($cn->data =~ /[^\x09-\x0D\x20]/) {
760     last CN;
761     }
762     }
763     } # CN
764     unshift @$sib, @cn;
765 wakaba 1.2 }
766 wakaba 1.4 push @$new_todos, {type => 'element', node => $node};
767 wakaba 1.2 } # TP
768 wakaba 1.30
769     for my $new_todo (@$new_todos) {
770     $new_todo->{flag} = {%{$parent_todo->{flag} or {}}};
771     }
772    
773 wakaba 1.4 return ($sib, $new_todos);
774 wakaba 1.2 } # _check_get_children
775    
776 wakaba 1.44 =head1 LICENSE
777    
778 wakaba 1.56 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
779 wakaba 1.44
780     This library is free software; you can redistribute it
781     and/or modify it under the same terms as Perl itself.
782    
783     =cut
784    
785 wakaba 1.1 1;
786 wakaba 1.70 # $Date: 2008/02/26 07:46:22 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24