/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.74 - (hide annotations) (download)
Fri Mar 21 09:18:40 2008 UTC (16 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.73: +24 -2 lines
*** empty log message ***

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3 wakaba 1.74 our $VERSION=do{my @r=(q$Revision: 1.73 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5 wakaba 1.18 require Whatpm::URIChecker;
6    
7 wakaba 1.13 ## ISSUE: How XML and XML Namespaces conformance can (or cannot)
8     ## be applied to an in-memory representation (i.e. DOM)?
9    
10 wakaba 1.50 ## TODO: Conformance of an HTML document with non-html root element.
11    
12 wakaba 1.70 ## Stability
13 wakaba 1.67 sub FEATURE_STATUS_REC () { 0b1 } ## Interoperable standard
14     sub FEATURE_STATUS_CR () { 0b10 } ## Call for implementation
15     sub FEATURE_STATUS_LC () { 0b100 } ## Last call for comments
16     sub FEATURE_STATUS_WD () { 0b1000 } ## Working or editor's draft
17    
18 wakaba 1.70 ## Deprecated
19     sub FEATURE_DEPRECATED_SHOULD () { 0b100000 } ## SHOULD-level
20     sub FEATURE_DEPRECATED_INFO () { 0b1000000 } ## Does not affect conformance
21    
22     ## Conformance
23     sub FEATURE_ALLOWED () { 0b10000 }
24    
25 wakaba 1.42 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
26 wakaba 1.9 my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
27     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
28    
29 wakaba 1.42 my $Namespace = {
30 wakaba 1.43 q<http://www.w3.org/2005/Atom> => {module => 'Whatpm::ContentChecker::Atom'},
31 wakaba 1.72 q<http://purl.org/syndication/history/1.0>
32     => {module => 'Whatpm::ContentChecker::Atom'},
33     q<http://purl.org/syndication/threading/1.0>
34     => {module => 'Whatpm::ContentChecker::Atom'},
35 wakaba 1.42 $HTML_NS => {module => 'Whatpm::ContentChecker::HTML'},
36     $XML_NS => {loaded => 1},
37     $XMLNS_NS => {loaded => 1},
38 wakaba 1.73 q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {loaded => 1},
39 wakaba 1.42 };
40    
41     our $AttrChecker = {
42 wakaba 1.9 $XML_NS => {
43 wakaba 1.13 space => sub {
44     my ($self, $attr) = @_;
45     my $value = $attr->value;
46     if ($value eq 'default' or $value eq 'preserve') {
47     #
48     } else {
49     ## NOTE: An XML "error"
50 wakaba 1.33 $self->{onerror}->(node => $attr, level => 'error',
51     type => 'invalid attribute value');
52 wakaba 1.13 }
53     },
54     lang => sub {
55 wakaba 1.35 my ($self, $attr) = @_;
56 wakaba 1.47 my $value = $attr->value;
57     if ($value eq '') {
58     #
59     } else {
60     require Whatpm::LangTag;
61     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
62     my %opt = @_;
63     my $type = 'LangTag:'.$opt{type};
64     $type .= ':' . $opt{subtag} if defined $opt{subtag};
65     $self->{onerror}->(node => $attr, type => $type,
66     value => $opt{value}, level => $opt{level});
67     });
68     }
69    
70 wakaba 1.13 ## NOTE: "The values of the attribute are language identifiers
71     ## as defined by [IETF RFC 3066], Tags for the Identification
72     ## of Languages, or its successor; in addition, the empty string
73     ## may be specified." ("may" in lower case)
74 wakaba 1.47 ## NOTE: Is an RFC 3066-valid (but RFC 4647-invalid) language tag
75     ## allowed today?
76    
77     ## TODO: test data
78    
79 wakaba 1.35 if ($attr->owner_document->manakai_is_html) { # MUST NOT
80 wakaba 1.36 $self->{onerror}->(node => $attr, type => 'in HTML:xml:lang');
81 wakaba 1.35 ## TODO: Test data...
82     }
83 wakaba 1.13 },
84     base => sub {
85     my ($self, $attr) = @_;
86     my $value = $attr->value;
87     if ($value =~ /[^\x{0000}-\x{10FFFF}]/) { ## ISSUE: Should we disallow noncharacters?
88     $self->{onerror}->(node => $attr,
89 wakaba 1.33 type => 'invalid attribute value');
90 wakaba 1.13 }
91 wakaba 1.18 ## NOTE: Conformance to URI standard is not checked since there is
92     ## no author requirement on conformance in the XML Base specification.
93 wakaba 1.13 },
94     id => sub {
95     my ($self, $attr) = @_;
96     my $value = $attr->value;
97     $value =~ s/[\x09\x0A\x0D\x20]+/ /g;
98     $value =~ s/^\x20//;
99     $value =~ s/\x20$//;
100     ## TODO: NCName in XML 1.0 or 1.1
101     ## TODO: declared type is ID?
102 wakaba 1.33 if ($self->{id}->{$value}) { ## NOTE: An xml:id error
103     $self->{onerror}->(node => $attr, level => 'error',
104     type => 'duplicate ID');
105 wakaba 1.37 push @{$self->{id}->{$value}}, $attr;
106 wakaba 1.13 } else {
107 wakaba 1.37 $self->{id}->{$value} = [$attr];
108 wakaba 1.13 }
109     },
110 wakaba 1.9 },
111     $XMLNS_NS => {
112 wakaba 1.13 '' => sub {
113     my ($self, $attr) = @_;
114     my $ln = $attr->manakai_local_name;
115     my $value = $attr->value;
116     if ($value eq $XML_NS and $ln ne 'xml') {
117     $self->{onerror}
118 wakaba 1.33 ->(node => $attr, level => 'NC',
119     type => 'Reserved Prefixes and Namespace Names:=xml');
120 wakaba 1.13 } elsif ($value eq $XMLNS_NS) {
121     $self->{onerror}
122 wakaba 1.33 ->(node => $attr, level => 'NC',
123     type => 'Reserved Prefixes and Namespace Names:=xmlns');
124 wakaba 1.13 }
125     if ($ln eq 'xml' and $value ne $XML_NS) {
126     $self->{onerror}
127 wakaba 1.33 ->(node => $attr, level => 'NC',
128     type => 'Reserved Prefixes and Namespace Names:xmlns:xml=');
129 wakaba 1.13 } elsif ($ln eq 'xmlns') {
130     $self->{onerror}
131 wakaba 1.33 ->(node => $attr, level => 'NC',
132     type => 'Reserved Prefixes and Namespace Names:xmlns:xmlns=');
133 wakaba 1.13 }
134     ## TODO: If XML 1.0 and empty
135     },
136     xmlns => sub {
137     my ($self, $attr) = @_;
138     ## TODO: In XML 1.0, URI reference [RFC 3986] or an empty string
139     ## TODO: In XML 1.1, IRI reference [RFC 3987] or an empty string
140 wakaba 1.18 ## TODO: relative references are deprecated
141 wakaba 1.13 my $value = $attr->value;
142     if ($value eq $XML_NS) {
143     $self->{onerror}
144 wakaba 1.33 ->(node => $attr, level => 'NC',
145     type => 'Reserved Prefixes and Namespace Names:=xml');
146 wakaba 1.13 } elsif ($value eq $XMLNS_NS) {
147     $self->{onerror}
148 wakaba 1.33 ->(node => $attr, level => 'NC',
149     type => 'Reserved Prefixes and Namespace Names:=xmlns');
150 wakaba 1.13 }
151     },
152 wakaba 1.9 },
153     };
154    
155 wakaba 1.14 ## ISSUE: Should we really allow these attributes?
156 wakaba 1.13 $AttrChecker->{''}->{'xml:space'} = $AttrChecker->{$XML_NS}->{space};
157     $AttrChecker->{''}->{'xml:lang'} = $AttrChecker->{$XML_NS}->{lang};
158     $AttrChecker->{''}->{'xml:base'} = $AttrChecker->{$XML_NS}->{base};
159     $AttrChecker->{''}->{'xml:id'} = $AttrChecker->{$XML_NS}->{id};
160    
161 wakaba 1.60 our %AnyChecker = (
162     check_start => sub { },
163     check_attrs => sub {
164     my ($self, $item, $element_state) = @_;
165     for my $attr (@{$item->{node}->attributes}) {
166 wakaba 1.9 my $attr_ns = $attr->namespace_uri;
167     $attr_ns = '' unless defined $attr_ns;
168     my $attr_ln = $attr->manakai_local_name;
169     my $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
170 wakaba 1.60 || $AttrChecker->{$attr_ns}->{''};
171 wakaba 1.9 if ($checker) {
172     $checker->($self, $attr);
173 wakaba 1.17 } else {
174 wakaba 1.33 $self->{onerror}->(node => $attr, level => 'unsupported',
175     type => 'attribute');
176 wakaba 1.9 }
177     }
178     },
179 wakaba 1.60 check_child_element => sub {
180     my ($self, $item, $child_el, $child_nsuri, $child_ln,
181     $child_is_transparent, $element_state) = @_;
182     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
183     $self->{onerror}->(node => $child_el,
184     type => 'element not allowed:minus',
185     level => $self->{must_level});
186     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
187     #
188     } else {
189     #
190     }
191     },
192     check_child_text => sub { },
193     check_end => sub {
194     my ($self, $item, $element_state) = @_;
195     if ($element_state->{has_significant}) {
196 wakaba 1.66 $item->{real_parent_state}->{has_significant} = 1;
197 wakaba 1.60 }
198     },
199     );
200    
201     our $ElementDefault = {
202     %AnyChecker,
203 wakaba 1.70 status => FEATURE_ALLOWED,
204     ## NOTE: No "element not defined" error - it is not supported anyway.
205 wakaba 1.60 check_start => sub {
206     my ($self, $item, $element_state) = @_;
207     $self->{onerror}->(node => $item->{node}, level => 'unsupported',
208     type => 'element');
209     },
210 wakaba 1.1 };
211    
212 wakaba 1.60 our $HTMLEmbeddedContent = {
213     ## NOTE: All embedded content is also phrasing content.
214     $HTML_NS => {
215     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
216     canvas => 1,
217     },
218     ## NOTE: MathML is mentioned in the HTML5 spec.
219     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
220     ## NOTE: SVG is mentioned in the HTML5 spec.
221     q<http://www.w3.org/2000/svg> => {svg => 1},
222     ## NOTE: Foreign elements with content (but no metadata) are
223     ## embedded content.
224     };
225    
226 wakaba 1.7 my $HTMLTransparentElements = {
227 wakaba 1.57 $HTML_NS => {qw/ins 1 del 1 font 1 noscript 1 canvas 1/},
228 wakaba 1.29 ## NOTE: |html:noscript| is transparent if scripting is disabled
229     ## and not in |head|.
230 wakaba 1.7 };
231    
232 wakaba 1.61 my $HTMLSemiTransparentElements = {
233     $HTML_NS => {object => 1, video => 1, audio => 1},
234     };
235 wakaba 1.57
236 wakaba 1.42 our $Element = {};
237 wakaba 1.7
238 wakaba 1.73 $Element->{q<http://www.w3.org/1999/02/22-rdf-syntax-ns#>}->{RDF} = {
239     %AnyChecker,
240     status => FEATURE_STATUS_REC | FEATURE_ALLOWED,
241     is_root => 1, ## ISSUE: Not explicitly allowed for non application/rdf+xml
242     check_start => sub {
243     my ($self, $item, $element_state) = @_;
244     my $triple = [];
245     push @{$self->{return}->{rdf}}, [$item->{node}, $triple];
246     require Whatpm::RDFXML;
247     my $rdf = Whatpm::RDFXML->new;
248     $rdf->{onerror} = $self->{onerror};
249     $rdf->{ontriple} = sub {
250     my %opt = @_;
251     push @$triple,
252     [$opt{node}, $opt{subject}, $opt{predicate}, $opt{object}];
253 wakaba 1.74 if (defined $opt{id}) {
254     push @$triple,
255     [$opt{node},
256     $opt{id},
257     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#subject>},
258     $opt{subject}];
259     push @$triple,
260     [$opt{node},
261     $opt{id},
262     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate>},
263     $opt{predicate}];
264     push @$triple,
265     [$opt{node},
266     $opt{id},
267     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#object>},
268     $opt{object}];
269     push @$triple,
270     [$opt{node},
271     $opt{id},
272     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>},
273     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement>}];
274     }
275 wakaba 1.73 };
276     $rdf->convert_rdf_element ($item->{node});
277     },
278     };
279    
280 wakaba 1.56 sub check_document ($$$;$) {
281     my ($self, $doc, $onerror, $onsubdoc) = @_;
282 wakaba 1.42 $self = bless {}, $self unless ref $self;
283     $self->{onerror} = $onerror;
284 wakaba 1.56 $self->{onsubdoc} = $onsubdoc || sub {
285     warn "A subdocument is not conformance-checked";
286     };
287 wakaba 1.1
288 wakaba 1.48 $self->{must_level} = 'm';
289     $self->{fact_level} = 'f';
290     $self->{should_level} = 's';
291 wakaba 1.51 $self->{good_level} = 'w';
292 wakaba 1.67 $self->{info_level} = 'i';
293 wakaba 1.71 $self->{unsupported_level} = 'u';
294 wakaba 1.48
295 wakaba 1.73 ## TODO: If application/rdf+xml, RDF/XML mode should be invoked.
296    
297 wakaba 1.42 my $docel = $doc->document_element;
298     unless (defined $docel) {
299     ## ISSUE: Should we check content of Document node?
300     $onerror->(node => $doc, type => 'no document element');
301     ## ISSUE: Is this non-conforming (to what spec)? Or just a warning?
302     return {
303     class => {},
304     id => {}, table => [], term => {},
305     };
306 wakaba 1.1 }
307    
308 wakaba 1.42 ## ISSUE: Unexpanded entity references and HTML5 conformance
309 wakaba 1.1
310 wakaba 1.42 my $docel_nsuri = $docel->namespace_uri;
311     $docel_nsuri = '' unless defined $docel_nsuri;
312 wakaba 1.43 unless ($Namespace->{$docel_nsuri}->{loaded}) {
313     if ($Namespace->{$docel_nsuri}->{module}) {
314     eval qq{ require $Namespace->{$docel_nsuri}->{module} } or die $@;
315     } else {
316     $Namespace->{$docel_nsuri}->{loaded} = 1;
317     }
318     }
319 wakaba 1.42 my $docel_def = $Element->{$docel_nsuri}->{$docel->manakai_local_name} ||
320     $Element->{$docel_nsuri}->{''} ||
321     $ElementDefault;
322     if ($docel_def->{is_root}) {
323     #
324 wakaba 1.50 } elsif ($docel_def->{is_xml_root}) {
325     unless ($doc->manakai_is_html) {
326     #
327     } else {
328     $onerror->(node => $docel, type => 'element not allowed:root:xml');
329     }
330 wakaba 1.42 } else {
331 wakaba 1.49 $onerror->(node => $docel, type => 'element not allowed:root');
332 wakaba 1.1 }
333    
334 wakaba 1.42 ## TODO: Check for other items other than document element
335     ## (second (errorous) element, text nodes, PI nodes, doctype nodes)
336 wakaba 1.2
337 wakaba 1.56 my $return = $self->check_element ($docel, $onerror, $onsubdoc);
338 wakaba 1.51
339 wakaba 1.52 ## TODO: Test for these checks are necessary.
340 wakaba 1.51 my $charset_name = $doc->input_encoding;
341     if (defined $charset_name) {
342     require Message::Charset::Info;
343     my $charset = $Message::Charset::Info::IANACharset->{$charset_name};
344    
345 wakaba 1.71 if ($doc->manakai_is_html) {
346     if (not $doc->manakai_has_bom and
347     not defined $doc->manakai_charset) {
348     unless ($charset->{is_html_ascii_superset}) {
349     $onerror->(node => $doc, level => $self->{must_level},
350     type => 'non ascii superset:'.$charset_name);
351     }
352    
353     if (not $self->{has_charset} and ## TODO: This does not work now.
354     not $charset->{iana_names}->{'us-ascii'}) {
355     $onerror->(node => $doc, level => $self->{must_level},
356     type => 'no character encoding declaration:'.$charset_name);
357     }
358 wakaba 1.51 }
359 wakaba 1.71
360     if ($charset->{iana_names}->{'utf-8'}) {
361     #
362     } elsif ($charset->{iana_names}->{'jis_x0212-1990'} or
363     $charset->{iana_names}->{'x-jis0208'} or
364     $charset->{iana_names}->{'utf-32'} or ## ISSUE: UTF-32BE? UTF-32LE?
365     $charset->{is_ebcdic_based}) {
366     $onerror->(node => $doc,
367     type => 'character encoding:'.$charset_name,
368     level => $self->{should_level});
369     } elsif ($charset->{iana_names}->{'cesu-8'} or
370     $charset->{iana_names}->{'utf-8'} or ## ISSUE: UNICODE-1-1-UTF-7?
371     $charset->{iana_names}->{'bocu-1'} or
372     $charset->{iana_names}->{'scsu'}) {
373     $onerror->(node => $doc,
374     type => 'character encoding:'.$charset_name,
375     level => $self->{must_level});
376     } else {
377     $onerror->(node => $doc,
378     type => 'character encoding:'.$charset_name,
379     level => $self->{good_level});
380 wakaba 1.51 }
381     }
382 wakaba 1.52 } elsif ($doc->manakai_is_html) {
383     ## NOTE: MUST and SHOULD requirements above cannot be tested,
384     ## since the document has no input charset encoding information.
385     $onerror->(node => $doc,
386     type => 'character encoding:',
387     level => 'unsupported');
388 wakaba 1.51 }
389    
390     return $return;
391 wakaba 1.42 } # check_document
392 wakaba 1.1
393 wakaba 1.56 sub check_element ($$$;$) {
394     my ($self, $el, $onerror, $onsubdoc) = @_;
395 wakaba 1.42 $self = bless {}, $self unless ref $self;
396     $self->{onerror} = $onerror;
397 wakaba 1.56 $self->{onsubdoc} = $onsubdoc || sub {
398     warn "A subdocument is not conformance-checked";
399     };
400 wakaba 1.2
401 wakaba 1.48 $self->{must_level} = 'm';
402     $self->{fact_level} = 'f';
403     $self->{should_level} = 's';
404 wakaba 1.51 $self->{good_level} = 'w';
405 wakaba 1.67 $self->{info_level} = 'i';
406 wakaba 1.71 $self->{unsupported_level} = 'u';
407 wakaba 1.48
408 wakaba 1.61 $self->{plus_elements} = {};
409     $self->{minus_elements} = {};
410 wakaba 1.42 $self->{id} = {};
411     $self->{term} = {};
412     $self->{usemap} = [];
413     $self->{contextmenu} = [];
414     $self->{map} = {};
415     $self->{menu} = {};
416     $self->{has_link_type} = {};
417 wakaba 1.60 $self->{flag} = {};
418 wakaba 1.46 #$self->{has_uri_attr};
419     #$self->{has_hyperlink_element};
420 wakaba 1.51 #$self->{has_charset};
421 wakaba 1.57 #$self->{has_base};
422 wakaba 1.42 $self->{return} = {
423     class => {},
424     id => $self->{id}, table => [], term => $self->{term},
425 wakaba 1.73 rdf => [],
426 wakaba 1.42 };
427 wakaba 1.4
428 wakaba 1.60 my @item = ({type => 'element', node => $el, parent_state => {}});
429 wakaba 1.66 $item[-1]->{real_parent_state} = $item[-1]->{parent_state};
430 wakaba 1.60 while (@item) {
431     my $item = shift @item;
432     if (ref $item eq 'ARRAY') {
433     my $code = shift @$item;
434     next unless $code;## TODO: temp.
435     $code->(@$item);
436     } elsif ($item->{type} eq 'element') {
437     my $el_nsuri = $item->{node}->namespace_uri;
438     $el_nsuri = '' unless defined $el_nsuri;
439     my $el_ln = $item->{node}->manakai_local_name;
440    
441     unless ($Namespace->{$el_nsuri}->{loaded}) {
442     if ($Namespace->{$el_nsuri}->{module}) {
443     eval qq{ require $Namespace->{$el_nsuri}->{module} } or die $@;
444 wakaba 1.42 } else {
445 wakaba 1.60 $Namespace->{$el_nsuri}->{loaded} = 1;
446 wakaba 1.1 }
447     }
448 wakaba 1.63
449     my $element_state = {};
450 wakaba 1.60 my $eldef = $Element->{$el_nsuri}->{$el_ln} ||
451     $Element->{$el_nsuri}->{''} ||
452 wakaba 1.42 $ElementDefault;
453 wakaba 1.61 my $content_def = $item->{transparent}
454     ? $item->{parent_def} || $eldef : $eldef;
455 wakaba 1.63 my $content_state = $item->{transparent}
456 wakaba 1.65 ? $item->{parent_def}
457     ? $item->{parent_state} || $element_state : $element_state
458     : $element_state;
459 wakaba 1.60
460 wakaba 1.67 unless ($eldef->{status} & FEATURE_STATUS_REC) {
461     my $status = $eldef->{status} & FEATURE_STATUS_CR ? 'cr' :
462     $eldef->{status} & FEATURE_STATUS_LC ? 'lc' :
463     $eldef->{status} & FEATURE_STATUS_WD ? 'wd' : 'non-standard';
464     $self->{onerror}->(node => $item->{node},
465     type => 'status:'.$status.':element',
466     level => $self->{info_level});
467     }
468 wakaba 1.70 if (not ($eldef->{status} & FEATURE_ALLOWED)) {
469     $self->{onerror}->(node => $item->{node},
470     type => 'element not defined',
471     level => $self->{must_level});
472     } elsif ($eldef->{status} & FEATURE_DEPRECATED_SHOULD) {
473     $self->{onerror}->(node => $item->{node},
474     type => 'deprecated:element',
475     level => $self->{should_level});
476     } elsif ($eldef->{status} & FEATURE_DEPRECATED_INFO) {
477     $self->{onerror}->(node => $item->{node},
478     type => 'deprecated:element',
479     level => $self->{info_level});
480     }
481 wakaba 1.67
482 wakaba 1.60 my @new_item;
483     push @new_item, [$eldef->{check_start}, $self, $item, $element_state];
484     push @new_item, [$eldef->{check_attrs}, $self, $item, $element_state];
485 wakaba 1.61
486 wakaba 1.60 my @child = @{$item->{node}->child_nodes};
487     while (@child) {
488     my $child = shift @child;
489     my $child_nt = $child->node_type;
490     if ($child_nt == 1) { # ELEMENT_NODE
491     my $child_nsuri = $child->namespace_uri;
492     $child_nsuri = '' unless defined $child_nsuri;
493     my $child_ln = $child->manakai_local_name;
494     if ($HTMLTransparentElements->{$child_nsuri}->{$child_ln} and
495     not (($self->{flag}->{in_head} or
496 wakaba 1.61 ($el_nsuri eq $HTML_NS and $el_ln eq 'head')) and
497     $child_nsuri eq $HTML_NS and $child_ln eq 'noscript')) {
498 wakaba 1.60 push @new_item, [$content_def->{check_child_element},
499     $self, $item, $child,
500 wakaba 1.66 $child_nsuri, $child_ln, 1,
501     $content_state, $element_state];
502 wakaba 1.60 push @new_item, {type => 'element', node => $child,
503 wakaba 1.65 parent_state => $content_state,
504 wakaba 1.61 parent_def => $content_def,
505 wakaba 1.66 real_parent_state => $element_state,
506 wakaba 1.60 transparent => 1};
507     } else {
508 wakaba 1.65 if ($item->{parent_def} and # has parent
509     $el_nsuri eq $HTML_NS) { ## $HTMLSemiTransparentElements
510 wakaba 1.61 if ($el_ln eq 'object') {
511     if ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
512     #
513     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
514     #
515     } else {
516 wakaba 1.62 $content_def = $item->{parent_def} || $content_def;
517 wakaba 1.63 $content_state = $item->{parent_state} || $content_state;
518 wakaba 1.62 }
519     } elsif ($el_ln eq 'video' or $el_ln eq 'audio') {
520     if ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
521     #
522     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
523     $element_state->{has_source} = 1;
524     } else {
525     $content_def = $item->{parent_def} || $content_def;
526 wakaba 1.63 $content_state = $item->{parent_state} || $content_state;
527 wakaba 1.61 }
528     }
529     }
530    
531 wakaba 1.60 push @new_item, [$content_def->{check_child_element},
532     $self, $item, $child,
533 wakaba 1.64 $child_nsuri, $child_ln,
534     $HTMLSemiTransparentElements
535     ->{$child_nsuri}->{$child_ln},
536 wakaba 1.66 $content_state, $element_state];
537 wakaba 1.60 push @new_item, {type => 'element', node => $child,
538 wakaba 1.65 parent_def => $content_def,
539 wakaba 1.66 real_parent_state => $element_state,
540 wakaba 1.65 parent_state => $content_state};
541 wakaba 1.60 }
542    
543     if ($HTMLEmbeddedContent->{$child_nsuri}->{$child_ln}) {
544     $element_state->{has_significant} = 1;
545     }
546     } elsif ($child_nt == 3 or # TEXT_NODE
547     $child_nt == 4) { # CDATA_SECTION_NODE
548     my $has_significant = ($child->data =~ /[^\x09-\x0D\x20]/);
549     push @new_item, [$content_def->{check_child_text},
550     $self, $item, $child, $has_significant,
551 wakaba 1.66 $content_state, $element_state];
552     $element_state->{has_significant} ||= $has_significant;
553 wakaba 1.61 if ($has_significant and
554     $HTMLSemiTransparentElements->{$el_nsuri}->{$el_ln}) {
555     $content_def = $item->{parent_def} || $content_def;
556     }
557 wakaba 1.60 } elsif ($child_nt == 5) { # ENTITY_REFERENCE_NODE
558     push @child, @{$child->child_nodes};
559 wakaba 1.1 }
560 wakaba 1.60 ## TODO: PI_NODE
561     ## TODO: Unknown node type
562 wakaba 1.1 }
563 wakaba 1.60
564     push @new_item, [$eldef->{check_end}, $self, $item, $element_state];
565    
566     unshift @item, @new_item;
567 wakaba 1.30 } else {
568 wakaba 1.60 die "$0: Internal error: Unsupported checking action type |$item->{type}|";
569 wakaba 1.4 }
570 wakaba 1.1 }
571 wakaba 1.17
572     for (@{$self->{usemap}}) {
573     unless ($self->{map}->{$_->[0]}) {
574     $self->{onerror}->(node => $_->[1], type => 'no referenced map');
575     }
576     }
577    
578 wakaba 1.32 for (@{$self->{contextmenu}}) {
579     unless ($self->{menu}->{$_->[0]}) {
580     $self->{onerror}->(node => $_->[1], type => 'no referenced menu');
581     }
582     }
583    
584 wakaba 1.61 delete $self->{plus_elements};
585     delete $self->{minus_elements};
586 wakaba 1.17 delete $self->{onerror};
587     delete $self->{id};
588     delete $self->{usemap};
589     delete $self->{map};
590 wakaba 1.33 return $self->{return};
591 wakaba 1.1 } # check_element
592    
593 wakaba 1.60 sub _add_minus_elements ($$@) {
594     my $self = shift;
595     my $element_state = shift;
596     for my $elements (@_) {
597     for my $nsuri (keys %$elements) {
598     for my $ln (keys %{$elements->{$nsuri}}) {
599     unless ($self->{minus_elements}->{$nsuri}->{$ln}) {
600     $element_state->{minus_elements_original}->{$nsuri}->{$ln} = 0;
601     $self->{minus_elements}->{$nsuri}->{$ln} = 1;
602     }
603     }
604     }
605     }
606     } # _add_minus_elements
607    
608     sub _remove_minus_elements ($$) {
609     my $self = shift;
610     my $element_state = shift;
611     for my $nsuri (keys %{$element_state->{minus_elements_original}}) {
612     for my $ln (keys %{$element_state->{minus_elements_original}->{$nsuri}}) {
613     delete $self->{minus_elements}->{$nsuri}->{$ln};
614     }
615     }
616     } # _remove_minus_elements
617    
618     sub _add_plus_elements ($$@) {
619     my $self = shift;
620     my $element_state = shift;
621     for my $elements (@_) {
622     for my $nsuri (keys %$elements) {
623     for my $ln (keys %{$elements->{$nsuri}}) {
624     unless ($self->{plus_elements}->{$nsuri}->{$ln}) {
625     $element_state->{plus_elements_original}->{$nsuri}->{$ln} = 0;
626     $self->{plus_elements}->{$nsuri}->{$ln} = 1;
627     }
628     }
629     }
630     }
631     } # _add_plus_elements
632    
633     sub _remove_plus_elements ($$) {
634     my $self = shift;
635     my $element_state = shift;
636     for my $nsuri (keys %{$element_state->{plus_elements_original}}) {
637     for my $ln (keys %{$element_state->{plus_elements_original}->{$nsuri}}) {
638     delete $self->{plus_elements}->{$nsuri}->{$ln};
639     }
640     }
641     } # _remove_plus_elements
642    
643 wakaba 1.68 sub _attr_status_info ($$$) {
644     my ($self, $attr, $status_code) = @_;
645 wakaba 1.70
646     if (not ($status_code & FEATURE_ALLOWED)) {
647     $self->{onerror}->(node => $attr,
648     type => 'attribute not defined',
649     level => $self->{must_level});
650     } elsif ($status_code & FEATURE_DEPRECATED_SHOULD) {
651     $self->{onerror}->(node => $attr,
652     type => 'deprecated:attr',
653     level => $self->{should_level});
654     } elsif ($status_code & FEATURE_DEPRECATED_INFO) {
655     $self->{onerror}->(node => $attr,
656     type => 'deprecated:attr',
657     level => $self->{info_level});
658     }
659    
660 wakaba 1.68 my $status;
661     if ($status_code & FEATURE_STATUS_REC) {
662     return;
663     } elsif ($status_code & FEATURE_STATUS_CR) {
664     $status = 'cr';
665     } elsif ($status_code & FEATURE_STATUS_LC) {
666     $status = 'lc';
667     } elsif ($status_code & FEATURE_STATUS_WD) {
668     $status = 'wd';
669     } else {
670     $status = 'non-standard';
671     }
672     $self->{onerror}->(node => $attr,
673     type => 'status:'.$status.':attr',
674     level => $self->{info_level});
675     } # _attr_status_info
676    
677 wakaba 1.2 sub _add_minuses ($@) {
678     my $self = shift;
679     my $r = {};
680     for my $list (@_) {
681     for my $ns (keys %$list) {
682     for my $ln (keys %{$list->{$ns}}) {
683     unless ($self->{minuses}->{$ns}->{$ln}) {
684     $self->{minuses}->{$ns}->{$ln} = 1;
685     $r->{$ns}->{$ln} = 1;
686     }
687     }
688     }
689     }
690 wakaba 1.4 return {type => 'plus', list => $r};
691 wakaba 1.2 } # _add_minuses
692    
693 wakaba 1.50 sub _add_pluses ($@) {
694     my $self = shift;
695     my $r = {};
696     for my $list (@_) {
697     for my $ns (keys %$list) {
698     for my $ln (keys %{$list->{$ns}}) {
699     unless ($self->{pluses}->{$ns}->{$ln}) {
700     $self->{pluses}->{$ns}->{$ln} = 1;
701     $r->{$ns}->{$ln} = 1;
702     }
703     }
704     }
705     }
706     return {type => 'minus', list => $r};
707     } # _add_pluses
708    
709 wakaba 1.2 sub _remove_minuses ($$) {
710 wakaba 1.4 my ($self, $todo) = @_;
711 wakaba 1.50 if ($todo->{type} eq 'minus') {
712     for my $ns (keys %{$todo->{list}}) {
713     for my $ln (keys %{$todo->{list}->{$ns}}) {
714     delete $self->{pluses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
715     }
716 wakaba 1.2 }
717 wakaba 1.50 } elsif ($todo->{type} eq 'plus') {
718     for my $ns (keys %{$todo->{list}}) {
719     for my $ln (keys %{$todo->{list}->{$ns}}) {
720     delete $self->{minuses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
721     }
722     }
723     } else {
724     die "$0: Unknown +- type: $todo->{type}";
725 wakaba 1.2 }
726     1;
727     } # _remove_minuses
728    
729 wakaba 1.50 ## NOTE: Priority for "minuses" and "pluses" are currently left
730     ## undefined and implemented inconsistently; it is not a problem for
731     ## now, since no element belongs to both lists.
732    
733 wakaba 1.30 sub _check_get_children ($$$) {
734     my ($self, $node, $parent_todo) = @_;
735 wakaba 1.4 my $new_todos = [];
736 wakaba 1.2 my $sib = [];
737     TP: {
738     my $node_ns = $node->namespace_uri;
739     $node_ns = '' unless defined $node_ns;
740     my $node_ln = $node->manakai_local_name;
741 wakaba 1.45 if ($HTMLTransparentElements->{$node_ns}->{$node_ln}) {
742     if ($node_ns eq $HTML_NS and $node_ln eq 'noscript') {
743     if ($parent_todo->{flag}->{in_head}) {
744     #
745     } else {
746     my $end = $self->_add_minuses ({$HTML_NS, {noscript => 1}});
747     push @$sib, $end;
748    
749     unshift @$sib, @{$node->child_nodes};
750     push @$new_todos, {type => 'element-attributes', node => $node};
751     last TP;
752     }
753 wakaba 1.58 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'del') {
754     my $sig_flag = $parent_todo->{flag}->{has_descendant}->{significant};
755     unshift @$sib, @{$node->child_nodes};
756     push @$new_todos, {type => 'element-attributes', node => $node};
757     push @$new_todos,
758     {type => 'code',
759     code => sub {
760     $parent_todo->{flag}->{has_descendant}->{significant} = 0
761     if not $sig_flag;
762     }};
763     last TP;
764 wakaba 1.45 } else {
765     unshift @$sib, @{$node->child_nodes};
766     push @$new_todos, {type => 'element-attributes', node => $node};
767     last TP;
768 wakaba 1.2 }
769     }
770 wakaba 1.8 if ($node_ns eq $HTML_NS and ($node_ln eq 'video' or $node_ln eq 'audio')) {
771 wakaba 1.2 if ($node->has_attribute_ns (undef, 'src')) {
772     unshift @$sib, @{$node->child_nodes};
773 wakaba 1.9 push @$new_todos, {type => 'element-attributes', node => $node};
774 wakaba 1.2 last TP;
775     } else {
776     my @cn = @{$node->child_nodes};
777     CN: while (@cn) {
778     my $cn = shift @cn;
779     my $cnt = $cn->node_type;
780     if ($cnt == 1) {
781 wakaba 1.8 my $cn_nsuri = $cn->namespace_uri;
782     $cn_nsuri = '' unless defined $cn_nsuri;
783     if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'source') {
784 wakaba 1.2 #
785     } else {
786     last CN;
787     }
788     } elsif ($cnt == 3 or $cnt == 4) {
789     if ($cn->data =~ /[^\x09-\x0D\x20]/) {
790     last CN;
791     }
792     }
793     } # CN
794     unshift @$sib, @cn;
795     }
796 wakaba 1.57 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'object') {
797     my @cn = @{$node->child_nodes};
798     CN: while (@cn) {
799     my $cn = shift @cn;
800     my $cnt = $cn->node_type;
801     if ($cnt == 1) {
802     my $cn_nsuri = $cn->namespace_uri;
803     $cn_nsuri = '' unless defined $cn_nsuri;
804     if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'param') {
805     #
806     } else {
807     last CN;
808     }
809     } elsif ($cnt == 3 or $cnt == 4) {
810     if ($cn->data =~ /[^\x09-\x0D\x20]/) {
811     last CN;
812     }
813     }
814     } # CN
815     unshift @$sib, @cn;
816 wakaba 1.2 }
817 wakaba 1.4 push @$new_todos, {type => 'element', node => $node};
818 wakaba 1.2 } # TP
819 wakaba 1.30
820     for my $new_todo (@$new_todos) {
821     $new_todo->{flag} = {%{$parent_todo->{flag} or {}}};
822     }
823    
824 wakaba 1.4 return ($sib, $new_todos);
825 wakaba 1.2 } # _check_get_children
826    
827 wakaba 1.44 =head1 LICENSE
828    
829 wakaba 1.56 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
830 wakaba 1.44
831     This library is free software; you can redistribute it
832     and/or modify it under the same terms as Perl itself.
833    
834     =cut
835    
836 wakaba 1.1 1;
837 wakaba 1.74 # $Date: 2008/03/21 08:58:35 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24