/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.76 - (hide annotations) (download)
Sat Mar 22 03:07:06 2008 UTC (16 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.75: +6 -4 lines
++ whatpm/t/ChangeLog	22 Mar 2008 02:51:31 -0000
2008-03-22  Wakaba  <wakaba@suika.fam.cx>

	* content-model-1.dat: Now we support RDF.

	* content-model-2.dat: Test data on |@accesskey| attribute
	are added.

++ whatpm/Whatpm/ChangeLog	22 Mar 2008 03:05:10 -0000
2008-03-22  Wakaba  <wakaba@suika.fam.cx>

	* ContentChecker.pm: |fact_level| is now treated
	as same as |must_level|, i.e. level = |m|.
	(check_element): Make list of URIs in the DOM.

++ whatpm/Whatpm/ContentChecker/ChangeLog	22 Mar 2008 03:06:11 -0000
2008-03-22  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm: Add URIs to the list of URIs to be returned
	by the checker method. |accesskey| attribute is implemented.

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3 wakaba 1.76 our $VERSION=do{my @r=(q$Revision: 1.75 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5 wakaba 1.18 require Whatpm::URIChecker;
6    
7 wakaba 1.13 ## ISSUE: How XML and XML Namespaces conformance can (or cannot)
8     ## be applied to an in-memory representation (i.e. DOM)?
9    
10 wakaba 1.50 ## TODO: Conformance of an HTML document with non-html root element.
11    
12 wakaba 1.70 ## Stability
13 wakaba 1.67 sub FEATURE_STATUS_REC () { 0b1 } ## Interoperable standard
14     sub FEATURE_STATUS_CR () { 0b10 } ## Call for implementation
15     sub FEATURE_STATUS_LC () { 0b100 } ## Last call for comments
16     sub FEATURE_STATUS_WD () { 0b1000 } ## Working or editor's draft
17    
18 wakaba 1.70 ## Deprecated
19     sub FEATURE_DEPRECATED_SHOULD () { 0b100000 } ## SHOULD-level
20     sub FEATURE_DEPRECATED_INFO () { 0b1000000 } ## Does not affect conformance
21    
22     ## Conformance
23     sub FEATURE_ALLOWED () { 0b10000 }
24    
25 wakaba 1.42 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
26 wakaba 1.9 my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
27     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
28    
29 wakaba 1.42 my $Namespace = {
30 wakaba 1.43 q<http://www.w3.org/2005/Atom> => {module => 'Whatpm::ContentChecker::Atom'},
31 wakaba 1.72 q<http://purl.org/syndication/history/1.0>
32     => {module => 'Whatpm::ContentChecker::Atom'},
33     q<http://purl.org/syndication/threading/1.0>
34     => {module => 'Whatpm::ContentChecker::Atom'},
35 wakaba 1.42 $HTML_NS => {module => 'Whatpm::ContentChecker::HTML'},
36     $XML_NS => {loaded => 1},
37     $XMLNS_NS => {loaded => 1},
38 wakaba 1.73 q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {loaded => 1},
39 wakaba 1.42 };
40    
41     our $AttrChecker = {
42 wakaba 1.9 $XML_NS => {
43 wakaba 1.13 space => sub {
44     my ($self, $attr) = @_;
45     my $value = $attr->value;
46     if ($value eq 'default' or $value eq 'preserve') {
47     #
48     } else {
49     ## NOTE: An XML "error"
50 wakaba 1.33 $self->{onerror}->(node => $attr, level => 'error',
51     type => 'invalid attribute value');
52 wakaba 1.13 }
53     },
54     lang => sub {
55 wakaba 1.35 my ($self, $attr) = @_;
56 wakaba 1.47 my $value = $attr->value;
57     if ($value eq '') {
58     #
59     } else {
60     require Whatpm::LangTag;
61     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
62     my %opt = @_;
63     my $type = 'LangTag:'.$opt{type};
64     $type .= ':' . $opt{subtag} if defined $opt{subtag};
65     $self->{onerror}->(node => $attr, type => $type,
66     value => $opt{value}, level => $opt{level});
67     });
68     }
69    
70 wakaba 1.13 ## NOTE: "The values of the attribute are language identifiers
71     ## as defined by [IETF RFC 3066], Tags for the Identification
72     ## of Languages, or its successor; in addition, the empty string
73     ## may be specified." ("may" in lower case)
74 wakaba 1.47 ## NOTE: Is an RFC 3066-valid (but RFC 4647-invalid) language tag
75     ## allowed today?
76    
77     ## TODO: test data
78    
79 wakaba 1.35 if ($attr->owner_document->manakai_is_html) { # MUST NOT
80 wakaba 1.36 $self->{onerror}->(node => $attr, type => 'in HTML:xml:lang');
81 wakaba 1.35 ## TODO: Test data...
82     }
83 wakaba 1.13 },
84     base => sub {
85     my ($self, $attr) = @_;
86     my $value = $attr->value;
87     if ($value =~ /[^\x{0000}-\x{10FFFF}]/) { ## ISSUE: Should we disallow noncharacters?
88     $self->{onerror}->(node => $attr,
89 wakaba 1.33 type => 'invalid attribute value');
90 wakaba 1.13 }
91 wakaba 1.18 ## NOTE: Conformance to URI standard is not checked since there is
92     ## no author requirement on conformance in the XML Base specification.
93 wakaba 1.13 },
94     id => sub {
95     my ($self, $attr) = @_;
96     my $value = $attr->value;
97     $value =~ s/[\x09\x0A\x0D\x20]+/ /g;
98     $value =~ s/^\x20//;
99     $value =~ s/\x20$//;
100     ## TODO: NCName in XML 1.0 or 1.1
101     ## TODO: declared type is ID?
102 wakaba 1.33 if ($self->{id}->{$value}) { ## NOTE: An xml:id error
103     $self->{onerror}->(node => $attr, level => 'error',
104     type => 'duplicate ID');
105 wakaba 1.37 push @{$self->{id}->{$value}}, $attr;
106 wakaba 1.13 } else {
107 wakaba 1.37 $self->{id}->{$value} = [$attr];
108 wakaba 1.13 }
109     },
110 wakaba 1.9 },
111     $XMLNS_NS => {
112 wakaba 1.13 '' => sub {
113     my ($self, $attr) = @_;
114     my $ln = $attr->manakai_local_name;
115     my $value = $attr->value;
116     if ($value eq $XML_NS and $ln ne 'xml') {
117     $self->{onerror}
118 wakaba 1.33 ->(node => $attr, level => 'NC',
119     type => 'Reserved Prefixes and Namespace Names:=xml');
120 wakaba 1.13 } elsif ($value eq $XMLNS_NS) {
121     $self->{onerror}
122 wakaba 1.33 ->(node => $attr, level => 'NC',
123     type => 'Reserved Prefixes and Namespace Names:=xmlns');
124 wakaba 1.13 }
125     if ($ln eq 'xml' and $value ne $XML_NS) {
126     $self->{onerror}
127 wakaba 1.33 ->(node => $attr, level => 'NC',
128     type => 'Reserved Prefixes and Namespace Names:xmlns:xml=');
129 wakaba 1.13 } elsif ($ln eq 'xmlns') {
130     $self->{onerror}
131 wakaba 1.33 ->(node => $attr, level => 'NC',
132     type => 'Reserved Prefixes and Namespace Names:xmlns:xmlns=');
133 wakaba 1.13 }
134     ## TODO: If XML 1.0 and empty
135     },
136     xmlns => sub {
137     my ($self, $attr) = @_;
138     ## TODO: In XML 1.0, URI reference [RFC 3986] or an empty string
139     ## TODO: In XML 1.1, IRI reference [RFC 3987] or an empty string
140 wakaba 1.18 ## TODO: relative references are deprecated
141 wakaba 1.13 my $value = $attr->value;
142     if ($value eq $XML_NS) {
143     $self->{onerror}
144 wakaba 1.33 ->(node => $attr, level => 'NC',
145     type => 'Reserved Prefixes and Namespace Names:=xml');
146 wakaba 1.13 } elsif ($value eq $XMLNS_NS) {
147     $self->{onerror}
148 wakaba 1.33 ->(node => $attr, level => 'NC',
149     type => 'Reserved Prefixes and Namespace Names:=xmlns');
150 wakaba 1.13 }
151     },
152 wakaba 1.9 },
153     };
154    
155 wakaba 1.14 ## ISSUE: Should we really allow these attributes?
156 wakaba 1.13 $AttrChecker->{''}->{'xml:space'} = $AttrChecker->{$XML_NS}->{space};
157     $AttrChecker->{''}->{'xml:lang'} = $AttrChecker->{$XML_NS}->{lang};
158     $AttrChecker->{''}->{'xml:base'} = $AttrChecker->{$XML_NS}->{base};
159     $AttrChecker->{''}->{'xml:id'} = $AttrChecker->{$XML_NS}->{id};
160    
161 wakaba 1.60 our %AnyChecker = (
162     check_start => sub { },
163     check_attrs => sub {
164     my ($self, $item, $element_state) = @_;
165     for my $attr (@{$item->{node}->attributes}) {
166 wakaba 1.9 my $attr_ns = $attr->namespace_uri;
167     $attr_ns = '' unless defined $attr_ns;
168     my $attr_ln = $attr->manakai_local_name;
169     my $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
170 wakaba 1.60 || $AttrChecker->{$attr_ns}->{''};
171 wakaba 1.9 if ($checker) {
172     $checker->($self, $attr);
173 wakaba 1.17 } else {
174 wakaba 1.33 $self->{onerror}->(node => $attr, level => 'unsupported',
175     type => 'attribute');
176 wakaba 1.9 }
177     }
178     },
179 wakaba 1.60 check_child_element => sub {
180     my ($self, $item, $child_el, $child_nsuri, $child_ln,
181     $child_is_transparent, $element_state) = @_;
182     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
183     $self->{onerror}->(node => $child_el,
184     type => 'element not allowed:minus',
185     level => $self->{must_level});
186     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
187     #
188     } else {
189     #
190     }
191     },
192     check_child_text => sub { },
193     check_end => sub {
194     my ($self, $item, $element_state) = @_;
195     if ($element_state->{has_significant}) {
196 wakaba 1.66 $item->{real_parent_state}->{has_significant} = 1;
197 wakaba 1.60 }
198     },
199     );
200    
201     our $ElementDefault = {
202     %AnyChecker,
203 wakaba 1.70 status => FEATURE_ALLOWED,
204     ## NOTE: No "element not defined" error - it is not supported anyway.
205 wakaba 1.60 check_start => sub {
206     my ($self, $item, $element_state) = @_;
207     $self->{onerror}->(node => $item->{node}, level => 'unsupported',
208     type => 'element');
209     },
210 wakaba 1.1 };
211    
212 wakaba 1.60 our $HTMLEmbeddedContent = {
213     ## NOTE: All embedded content is also phrasing content.
214     $HTML_NS => {
215     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
216     canvas => 1,
217     },
218     ## NOTE: MathML is mentioned in the HTML5 spec.
219     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
220     ## NOTE: SVG is mentioned in the HTML5 spec.
221     q<http://www.w3.org/2000/svg> => {svg => 1},
222     ## NOTE: Foreign elements with content (but no metadata) are
223     ## embedded content.
224     };
225    
226 wakaba 1.7 my $HTMLTransparentElements = {
227 wakaba 1.57 $HTML_NS => {qw/ins 1 del 1 font 1 noscript 1 canvas 1/},
228 wakaba 1.29 ## NOTE: |html:noscript| is transparent if scripting is disabled
229     ## and not in |head|.
230 wakaba 1.7 };
231    
232 wakaba 1.61 my $HTMLSemiTransparentElements = {
233     $HTML_NS => {object => 1, video => 1, audio => 1},
234     };
235 wakaba 1.57
236 wakaba 1.42 our $Element = {};
237 wakaba 1.7
238 wakaba 1.73 $Element->{q<http://www.w3.org/1999/02/22-rdf-syntax-ns#>}->{RDF} = {
239     %AnyChecker,
240     status => FEATURE_STATUS_REC | FEATURE_ALLOWED,
241     is_root => 1, ## ISSUE: Not explicitly allowed for non application/rdf+xml
242     check_start => sub {
243     my ($self, $item, $element_state) = @_;
244     my $triple = [];
245     push @{$self->{return}->{rdf}}, [$item->{node}, $triple];
246     require Whatpm::RDFXML;
247     my $rdf = Whatpm::RDFXML->new;
248 wakaba 1.75 ## TODO: Should we make bnodeid unique in a document?
249 wakaba 1.73 $rdf->{onerror} = $self->{onerror};
250     $rdf->{ontriple} = sub {
251     my %opt = @_;
252     push @$triple,
253     [$opt{node}, $opt{subject}, $opt{predicate}, $opt{object}];
254 wakaba 1.74 if (defined $opt{id}) {
255     push @$triple,
256     [$opt{node},
257     $opt{id},
258     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#subject>},
259     $opt{subject}];
260     push @$triple,
261     [$opt{node},
262     $opt{id},
263     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate>},
264     $opt{predicate}];
265     push @$triple,
266     [$opt{node},
267     $opt{id},
268     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#object>},
269     $opt{object}];
270     push @$triple,
271     [$opt{node},
272     $opt{id},
273     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>},
274     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement>}];
275     }
276 wakaba 1.73 };
277     $rdf->convert_rdf_element ($item->{node});
278     },
279     };
280    
281 wakaba 1.56 sub check_document ($$$;$) {
282     my ($self, $doc, $onerror, $onsubdoc) = @_;
283 wakaba 1.42 $self = bless {}, $self unless ref $self;
284     $self->{onerror} = $onerror;
285 wakaba 1.56 $self->{onsubdoc} = $onsubdoc || sub {
286     warn "A subdocument is not conformance-checked";
287     };
288 wakaba 1.1
289 wakaba 1.48 $self->{must_level} = 'm';
290 wakaba 1.76 $self->{fact_level} = 'm';
291 wakaba 1.48 $self->{should_level} = 's';
292 wakaba 1.51 $self->{good_level} = 'w';
293 wakaba 1.67 $self->{info_level} = 'i';
294 wakaba 1.71 $self->{unsupported_level} = 'u';
295 wakaba 1.48
296 wakaba 1.73 ## TODO: If application/rdf+xml, RDF/XML mode should be invoked.
297    
298 wakaba 1.42 my $docel = $doc->document_element;
299     unless (defined $docel) {
300     ## ISSUE: Should we check content of Document node?
301     $onerror->(node => $doc, type => 'no document element');
302     ## ISSUE: Is this non-conforming (to what spec)? Or just a warning?
303     return {
304     class => {},
305     id => {}, table => [], term => {},
306     };
307 wakaba 1.1 }
308    
309 wakaba 1.42 ## ISSUE: Unexpanded entity references and HTML5 conformance
310 wakaba 1.1
311 wakaba 1.42 my $docel_nsuri = $docel->namespace_uri;
312     $docel_nsuri = '' unless defined $docel_nsuri;
313 wakaba 1.43 unless ($Namespace->{$docel_nsuri}->{loaded}) {
314     if ($Namespace->{$docel_nsuri}->{module}) {
315     eval qq{ require $Namespace->{$docel_nsuri}->{module} } or die $@;
316     } else {
317     $Namespace->{$docel_nsuri}->{loaded} = 1;
318     }
319     }
320 wakaba 1.42 my $docel_def = $Element->{$docel_nsuri}->{$docel->manakai_local_name} ||
321     $Element->{$docel_nsuri}->{''} ||
322     $ElementDefault;
323     if ($docel_def->{is_root}) {
324     #
325 wakaba 1.50 } elsif ($docel_def->{is_xml_root}) {
326     unless ($doc->manakai_is_html) {
327     #
328     } else {
329     $onerror->(node => $docel, type => 'element not allowed:root:xml');
330     }
331 wakaba 1.42 } else {
332 wakaba 1.49 $onerror->(node => $docel, type => 'element not allowed:root');
333 wakaba 1.1 }
334    
335 wakaba 1.42 ## TODO: Check for other items other than document element
336     ## (second (errorous) element, text nodes, PI nodes, doctype nodes)
337 wakaba 1.2
338 wakaba 1.56 my $return = $self->check_element ($docel, $onerror, $onsubdoc);
339 wakaba 1.51
340 wakaba 1.52 ## TODO: Test for these checks are necessary.
341 wakaba 1.51 my $charset_name = $doc->input_encoding;
342     if (defined $charset_name) {
343     require Message::Charset::Info;
344     my $charset = $Message::Charset::Info::IANACharset->{$charset_name};
345    
346 wakaba 1.71 if ($doc->manakai_is_html) {
347     if (not $doc->manakai_has_bom and
348     not defined $doc->manakai_charset) {
349     unless ($charset->{is_html_ascii_superset}) {
350     $onerror->(node => $doc, level => $self->{must_level},
351     type => 'non ascii superset:'.$charset_name);
352     }
353    
354     if (not $self->{has_charset} and ## TODO: This does not work now.
355     not $charset->{iana_names}->{'us-ascii'}) {
356     $onerror->(node => $doc, level => $self->{must_level},
357     type => 'no character encoding declaration:'.$charset_name);
358     }
359 wakaba 1.51 }
360 wakaba 1.71
361     if ($charset->{iana_names}->{'utf-8'}) {
362     #
363     } elsif ($charset->{iana_names}->{'jis_x0212-1990'} or
364     $charset->{iana_names}->{'x-jis0208'} or
365     $charset->{iana_names}->{'utf-32'} or ## ISSUE: UTF-32BE? UTF-32LE?
366     $charset->{is_ebcdic_based}) {
367     $onerror->(node => $doc,
368     type => 'character encoding:'.$charset_name,
369     level => $self->{should_level});
370     } elsif ($charset->{iana_names}->{'cesu-8'} or
371     $charset->{iana_names}->{'utf-8'} or ## ISSUE: UNICODE-1-1-UTF-7?
372     $charset->{iana_names}->{'bocu-1'} or
373     $charset->{iana_names}->{'scsu'}) {
374     $onerror->(node => $doc,
375     type => 'character encoding:'.$charset_name,
376     level => $self->{must_level});
377     } else {
378     $onerror->(node => $doc,
379     type => 'character encoding:'.$charset_name,
380     level => $self->{good_level});
381 wakaba 1.51 }
382     }
383 wakaba 1.52 } elsif ($doc->manakai_is_html) {
384     ## NOTE: MUST and SHOULD requirements above cannot be tested,
385     ## since the document has no input charset encoding information.
386     $onerror->(node => $doc,
387     type => 'character encoding:',
388     level => 'unsupported');
389 wakaba 1.51 }
390    
391     return $return;
392 wakaba 1.42 } # check_document
393 wakaba 1.1
394 wakaba 1.56 sub check_element ($$$;$) {
395     my ($self, $el, $onerror, $onsubdoc) = @_;
396 wakaba 1.42 $self = bless {}, $self unless ref $self;
397     $self->{onerror} = $onerror;
398 wakaba 1.56 $self->{onsubdoc} = $onsubdoc || sub {
399     warn "A subdocument is not conformance-checked";
400     };
401 wakaba 1.2
402 wakaba 1.48 $self->{must_level} = 'm';
403 wakaba 1.76 $self->{fact_level} = 'm';
404 wakaba 1.48 $self->{should_level} = 's';
405 wakaba 1.51 $self->{good_level} = 'w';
406 wakaba 1.67 $self->{info_level} = 'i';
407 wakaba 1.71 $self->{unsupported_level} = 'u';
408 wakaba 1.48
409 wakaba 1.61 $self->{plus_elements} = {};
410     $self->{minus_elements} = {};
411 wakaba 1.42 $self->{id} = {};
412     $self->{term} = {};
413     $self->{usemap} = [];
414     $self->{contextmenu} = [];
415     $self->{map} = {};
416     $self->{menu} = {};
417     $self->{has_link_type} = {};
418 wakaba 1.60 $self->{flag} = {};
419 wakaba 1.46 #$self->{has_uri_attr};
420     #$self->{has_hyperlink_element};
421 wakaba 1.51 #$self->{has_charset};
422 wakaba 1.57 #$self->{has_base};
423 wakaba 1.42 $self->{return} = {
424     class => {},
425     id => $self->{id}, table => [], term => $self->{term},
426 wakaba 1.76 uri => {}, # URIs other than those in RDF triples
427     ## TODO: xmlns="", SYSTEM "", atom:* src="", xml:base=""
428 wakaba 1.73 rdf => [],
429 wakaba 1.42 };
430 wakaba 1.4
431 wakaba 1.60 my @item = ({type => 'element', node => $el, parent_state => {}});
432 wakaba 1.66 $item[-1]->{real_parent_state} = $item[-1]->{parent_state};
433 wakaba 1.60 while (@item) {
434     my $item = shift @item;
435     if (ref $item eq 'ARRAY') {
436     my $code = shift @$item;
437     next unless $code;## TODO: temp.
438     $code->(@$item);
439     } elsif ($item->{type} eq 'element') {
440     my $el_nsuri = $item->{node}->namespace_uri;
441     $el_nsuri = '' unless defined $el_nsuri;
442     my $el_ln = $item->{node}->manakai_local_name;
443    
444     unless ($Namespace->{$el_nsuri}->{loaded}) {
445     if ($Namespace->{$el_nsuri}->{module}) {
446     eval qq{ require $Namespace->{$el_nsuri}->{module} } or die $@;
447 wakaba 1.42 } else {
448 wakaba 1.60 $Namespace->{$el_nsuri}->{loaded} = 1;
449 wakaba 1.1 }
450     }
451 wakaba 1.63
452     my $element_state = {};
453 wakaba 1.60 my $eldef = $Element->{$el_nsuri}->{$el_ln} ||
454     $Element->{$el_nsuri}->{''} ||
455 wakaba 1.42 $ElementDefault;
456 wakaba 1.61 my $content_def = $item->{transparent}
457     ? $item->{parent_def} || $eldef : $eldef;
458 wakaba 1.63 my $content_state = $item->{transparent}
459 wakaba 1.65 ? $item->{parent_def}
460     ? $item->{parent_state} || $element_state : $element_state
461     : $element_state;
462 wakaba 1.60
463 wakaba 1.67 unless ($eldef->{status} & FEATURE_STATUS_REC) {
464     my $status = $eldef->{status} & FEATURE_STATUS_CR ? 'cr' :
465     $eldef->{status} & FEATURE_STATUS_LC ? 'lc' :
466     $eldef->{status} & FEATURE_STATUS_WD ? 'wd' : 'non-standard';
467     $self->{onerror}->(node => $item->{node},
468     type => 'status:'.$status.':element',
469     level => $self->{info_level});
470     }
471 wakaba 1.70 if (not ($eldef->{status} & FEATURE_ALLOWED)) {
472     $self->{onerror}->(node => $item->{node},
473     type => 'element not defined',
474     level => $self->{must_level});
475     } elsif ($eldef->{status} & FEATURE_DEPRECATED_SHOULD) {
476     $self->{onerror}->(node => $item->{node},
477     type => 'deprecated:element',
478     level => $self->{should_level});
479     } elsif ($eldef->{status} & FEATURE_DEPRECATED_INFO) {
480     $self->{onerror}->(node => $item->{node},
481     type => 'deprecated:element',
482     level => $self->{info_level});
483     }
484 wakaba 1.67
485 wakaba 1.60 my @new_item;
486     push @new_item, [$eldef->{check_start}, $self, $item, $element_state];
487     push @new_item, [$eldef->{check_attrs}, $self, $item, $element_state];
488 wakaba 1.61
489 wakaba 1.60 my @child = @{$item->{node}->child_nodes};
490     while (@child) {
491     my $child = shift @child;
492     my $child_nt = $child->node_type;
493     if ($child_nt == 1) { # ELEMENT_NODE
494     my $child_nsuri = $child->namespace_uri;
495     $child_nsuri = '' unless defined $child_nsuri;
496     my $child_ln = $child->manakai_local_name;
497     if ($HTMLTransparentElements->{$child_nsuri}->{$child_ln} and
498     not (($self->{flag}->{in_head} or
499 wakaba 1.61 ($el_nsuri eq $HTML_NS and $el_ln eq 'head')) and
500     $child_nsuri eq $HTML_NS and $child_ln eq 'noscript')) {
501 wakaba 1.60 push @new_item, [$content_def->{check_child_element},
502     $self, $item, $child,
503 wakaba 1.66 $child_nsuri, $child_ln, 1,
504     $content_state, $element_state];
505 wakaba 1.60 push @new_item, {type => 'element', node => $child,
506 wakaba 1.65 parent_state => $content_state,
507 wakaba 1.61 parent_def => $content_def,
508 wakaba 1.66 real_parent_state => $element_state,
509 wakaba 1.60 transparent => 1};
510     } else {
511 wakaba 1.65 if ($item->{parent_def} and # has parent
512     $el_nsuri eq $HTML_NS) { ## $HTMLSemiTransparentElements
513 wakaba 1.61 if ($el_ln eq 'object') {
514     if ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
515     #
516     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
517     #
518     } else {
519 wakaba 1.62 $content_def = $item->{parent_def} || $content_def;
520 wakaba 1.63 $content_state = $item->{parent_state} || $content_state;
521 wakaba 1.62 }
522     } elsif ($el_ln eq 'video' or $el_ln eq 'audio') {
523     if ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
524     #
525     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
526     $element_state->{has_source} = 1;
527     } else {
528     $content_def = $item->{parent_def} || $content_def;
529 wakaba 1.63 $content_state = $item->{parent_state} || $content_state;
530 wakaba 1.61 }
531     }
532     }
533    
534 wakaba 1.60 push @new_item, [$content_def->{check_child_element},
535     $self, $item, $child,
536 wakaba 1.64 $child_nsuri, $child_ln,
537     $HTMLSemiTransparentElements
538     ->{$child_nsuri}->{$child_ln},
539 wakaba 1.66 $content_state, $element_state];
540 wakaba 1.60 push @new_item, {type => 'element', node => $child,
541 wakaba 1.65 parent_def => $content_def,
542 wakaba 1.66 real_parent_state => $element_state,
543 wakaba 1.65 parent_state => $content_state};
544 wakaba 1.60 }
545    
546     if ($HTMLEmbeddedContent->{$child_nsuri}->{$child_ln}) {
547     $element_state->{has_significant} = 1;
548     }
549     } elsif ($child_nt == 3 or # TEXT_NODE
550     $child_nt == 4) { # CDATA_SECTION_NODE
551     my $has_significant = ($child->data =~ /[^\x09-\x0D\x20]/);
552     push @new_item, [$content_def->{check_child_text},
553     $self, $item, $child, $has_significant,
554 wakaba 1.66 $content_state, $element_state];
555     $element_state->{has_significant} ||= $has_significant;
556 wakaba 1.61 if ($has_significant and
557     $HTMLSemiTransparentElements->{$el_nsuri}->{$el_ln}) {
558     $content_def = $item->{parent_def} || $content_def;
559     }
560 wakaba 1.60 } elsif ($child_nt == 5) { # ENTITY_REFERENCE_NODE
561     push @child, @{$child->child_nodes};
562 wakaba 1.1 }
563 wakaba 1.60 ## TODO: PI_NODE
564     ## TODO: Unknown node type
565 wakaba 1.1 }
566 wakaba 1.60
567     push @new_item, [$eldef->{check_end}, $self, $item, $element_state];
568    
569     unshift @item, @new_item;
570 wakaba 1.30 } else {
571 wakaba 1.60 die "$0: Internal error: Unsupported checking action type |$item->{type}|";
572 wakaba 1.4 }
573 wakaba 1.1 }
574 wakaba 1.17
575     for (@{$self->{usemap}}) {
576     unless ($self->{map}->{$_->[0]}) {
577     $self->{onerror}->(node => $_->[1], type => 'no referenced map');
578     }
579     }
580    
581 wakaba 1.32 for (@{$self->{contextmenu}}) {
582     unless ($self->{menu}->{$_->[0]}) {
583     $self->{onerror}->(node => $_->[1], type => 'no referenced menu');
584     }
585     }
586    
587 wakaba 1.61 delete $self->{plus_elements};
588     delete $self->{minus_elements};
589 wakaba 1.17 delete $self->{onerror};
590     delete $self->{id};
591     delete $self->{usemap};
592     delete $self->{map};
593 wakaba 1.33 return $self->{return};
594 wakaba 1.1 } # check_element
595    
596 wakaba 1.60 sub _add_minus_elements ($$@) {
597     my $self = shift;
598     my $element_state = shift;
599     for my $elements (@_) {
600     for my $nsuri (keys %$elements) {
601     for my $ln (keys %{$elements->{$nsuri}}) {
602     unless ($self->{minus_elements}->{$nsuri}->{$ln}) {
603     $element_state->{minus_elements_original}->{$nsuri}->{$ln} = 0;
604     $self->{minus_elements}->{$nsuri}->{$ln} = 1;
605     }
606     }
607     }
608     }
609     } # _add_minus_elements
610    
611     sub _remove_minus_elements ($$) {
612     my $self = shift;
613     my $element_state = shift;
614     for my $nsuri (keys %{$element_state->{minus_elements_original}}) {
615     for my $ln (keys %{$element_state->{minus_elements_original}->{$nsuri}}) {
616     delete $self->{minus_elements}->{$nsuri}->{$ln};
617     }
618     }
619     } # _remove_minus_elements
620    
621     sub _add_plus_elements ($$@) {
622     my $self = shift;
623     my $element_state = shift;
624     for my $elements (@_) {
625     for my $nsuri (keys %$elements) {
626     for my $ln (keys %{$elements->{$nsuri}}) {
627     unless ($self->{plus_elements}->{$nsuri}->{$ln}) {
628     $element_state->{plus_elements_original}->{$nsuri}->{$ln} = 0;
629     $self->{plus_elements}->{$nsuri}->{$ln} = 1;
630     }
631     }
632     }
633     }
634     } # _add_plus_elements
635    
636     sub _remove_plus_elements ($$) {
637     my $self = shift;
638     my $element_state = shift;
639     for my $nsuri (keys %{$element_state->{plus_elements_original}}) {
640     for my $ln (keys %{$element_state->{plus_elements_original}->{$nsuri}}) {
641     delete $self->{plus_elements}->{$nsuri}->{$ln};
642     }
643     }
644     } # _remove_plus_elements
645    
646 wakaba 1.68 sub _attr_status_info ($$$) {
647     my ($self, $attr, $status_code) = @_;
648 wakaba 1.70
649     if (not ($status_code & FEATURE_ALLOWED)) {
650     $self->{onerror}->(node => $attr,
651     type => 'attribute not defined',
652     level => $self->{must_level});
653     } elsif ($status_code & FEATURE_DEPRECATED_SHOULD) {
654     $self->{onerror}->(node => $attr,
655     type => 'deprecated:attr',
656     level => $self->{should_level});
657     } elsif ($status_code & FEATURE_DEPRECATED_INFO) {
658     $self->{onerror}->(node => $attr,
659     type => 'deprecated:attr',
660     level => $self->{info_level});
661     }
662    
663 wakaba 1.68 my $status;
664     if ($status_code & FEATURE_STATUS_REC) {
665     return;
666     } elsif ($status_code & FEATURE_STATUS_CR) {
667     $status = 'cr';
668     } elsif ($status_code & FEATURE_STATUS_LC) {
669     $status = 'lc';
670     } elsif ($status_code & FEATURE_STATUS_WD) {
671     $status = 'wd';
672     } else {
673     $status = 'non-standard';
674     }
675     $self->{onerror}->(node => $attr,
676     type => 'status:'.$status.':attr',
677     level => $self->{info_level});
678     } # _attr_status_info
679    
680 wakaba 1.2 sub _add_minuses ($@) {
681     my $self = shift;
682     my $r = {};
683     for my $list (@_) {
684     for my $ns (keys %$list) {
685     for my $ln (keys %{$list->{$ns}}) {
686     unless ($self->{minuses}->{$ns}->{$ln}) {
687     $self->{minuses}->{$ns}->{$ln} = 1;
688     $r->{$ns}->{$ln} = 1;
689     }
690     }
691     }
692     }
693 wakaba 1.4 return {type => 'plus', list => $r};
694 wakaba 1.2 } # _add_minuses
695    
696 wakaba 1.50 sub _add_pluses ($@) {
697     my $self = shift;
698     my $r = {};
699     for my $list (@_) {
700     for my $ns (keys %$list) {
701     for my $ln (keys %{$list->{$ns}}) {
702     unless ($self->{pluses}->{$ns}->{$ln}) {
703     $self->{pluses}->{$ns}->{$ln} = 1;
704     $r->{$ns}->{$ln} = 1;
705     }
706     }
707     }
708     }
709     return {type => 'minus', list => $r};
710     } # _add_pluses
711    
712 wakaba 1.2 sub _remove_minuses ($$) {
713 wakaba 1.4 my ($self, $todo) = @_;
714 wakaba 1.50 if ($todo->{type} eq 'minus') {
715     for my $ns (keys %{$todo->{list}}) {
716     for my $ln (keys %{$todo->{list}->{$ns}}) {
717     delete $self->{pluses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
718     }
719 wakaba 1.2 }
720 wakaba 1.50 } elsif ($todo->{type} eq 'plus') {
721     for my $ns (keys %{$todo->{list}}) {
722     for my $ln (keys %{$todo->{list}->{$ns}}) {
723     delete $self->{minuses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
724     }
725     }
726     } else {
727     die "$0: Unknown +- type: $todo->{type}";
728 wakaba 1.2 }
729     1;
730     } # _remove_minuses
731    
732 wakaba 1.50 ## NOTE: Priority for "minuses" and "pluses" are currently left
733     ## undefined and implemented inconsistently; it is not a problem for
734     ## now, since no element belongs to both lists.
735    
736 wakaba 1.30 sub _check_get_children ($$$) {
737     my ($self, $node, $parent_todo) = @_;
738 wakaba 1.4 my $new_todos = [];
739 wakaba 1.2 my $sib = [];
740     TP: {
741     my $node_ns = $node->namespace_uri;
742     $node_ns = '' unless defined $node_ns;
743     my $node_ln = $node->manakai_local_name;
744 wakaba 1.45 if ($HTMLTransparentElements->{$node_ns}->{$node_ln}) {
745     if ($node_ns eq $HTML_NS and $node_ln eq 'noscript') {
746     if ($parent_todo->{flag}->{in_head}) {
747     #
748     } else {
749     my $end = $self->_add_minuses ({$HTML_NS, {noscript => 1}});
750     push @$sib, $end;
751    
752     unshift @$sib, @{$node->child_nodes};
753     push @$new_todos, {type => 'element-attributes', node => $node};
754     last TP;
755     }
756 wakaba 1.58 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'del') {
757     my $sig_flag = $parent_todo->{flag}->{has_descendant}->{significant};
758     unshift @$sib, @{$node->child_nodes};
759     push @$new_todos, {type => 'element-attributes', node => $node};
760     push @$new_todos,
761     {type => 'code',
762     code => sub {
763     $parent_todo->{flag}->{has_descendant}->{significant} = 0
764     if not $sig_flag;
765     }};
766     last TP;
767 wakaba 1.45 } else {
768     unshift @$sib, @{$node->child_nodes};
769     push @$new_todos, {type => 'element-attributes', node => $node};
770     last TP;
771 wakaba 1.2 }
772     }
773 wakaba 1.8 if ($node_ns eq $HTML_NS and ($node_ln eq 'video' or $node_ln eq 'audio')) {
774 wakaba 1.2 if ($node->has_attribute_ns (undef, 'src')) {
775     unshift @$sib, @{$node->child_nodes};
776 wakaba 1.9 push @$new_todos, {type => 'element-attributes', node => $node};
777 wakaba 1.2 last TP;
778     } else {
779     my @cn = @{$node->child_nodes};
780     CN: while (@cn) {
781     my $cn = shift @cn;
782     my $cnt = $cn->node_type;
783     if ($cnt == 1) {
784 wakaba 1.8 my $cn_nsuri = $cn->namespace_uri;
785     $cn_nsuri = '' unless defined $cn_nsuri;
786     if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'source') {
787 wakaba 1.2 #
788     } else {
789     last CN;
790     }
791     } elsif ($cnt == 3 or $cnt == 4) {
792     if ($cn->data =~ /[^\x09-\x0D\x20]/) {
793     last CN;
794     }
795     }
796     } # CN
797     unshift @$sib, @cn;
798     }
799 wakaba 1.57 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'object') {
800     my @cn = @{$node->child_nodes};
801     CN: while (@cn) {
802     my $cn = shift @cn;
803     my $cnt = $cn->node_type;
804     if ($cnt == 1) {
805     my $cn_nsuri = $cn->namespace_uri;
806     $cn_nsuri = '' unless defined $cn_nsuri;
807     if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'param') {
808     #
809     } else {
810     last CN;
811     }
812     } elsif ($cnt == 3 or $cnt == 4) {
813     if ($cn->data =~ /[^\x09-\x0D\x20]/) {
814     last CN;
815     }
816     }
817     } # CN
818     unshift @$sib, @cn;
819 wakaba 1.2 }
820 wakaba 1.4 push @$new_todos, {type => 'element', node => $node};
821 wakaba 1.2 } # TP
822 wakaba 1.30
823     for my $new_todo (@$new_todos) {
824     $new_todo->{flag} = {%{$parent_todo->{flag} or {}}};
825     }
826    
827 wakaba 1.4 return ($sib, $new_todos);
828 wakaba 1.2 } # _check_get_children
829    
830 wakaba 1.44 =head1 LICENSE
831    
832 wakaba 1.56 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
833 wakaba 1.44
834     This library is free software; you can redistribute it
835     and/or modify it under the same terms as Perl itself.
836    
837     =cut
838    
839 wakaba 1.1 1;
840 wakaba 1.76 # $Date: 2008/03/21 09:44:57 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24