/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.87 - (hide annotations) (download)
Fri Aug 29 13:34:36 2008 UTC (16 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.86: +7 -2 lines
++ whatpm/Whatpm/ChangeLog	29 Aug 2008 13:33:31 -0000
2008-08-29  Wakaba  <wakaba@suika.fam.cx>

	* IMTChecker.pm: Updated for the new error reporting architecture.

	* ContentChecker.pm: Error levels for IMTs are added.

++ whatpm/Whatpm/ContentChecker/ChangeLog	29 Aug 2008 13:34:24 -0000
2008-08-29  Wakaba  <wakaba@suika.fam.cx>

	* Atom.pm, HTML.pm: Made {level} inherited to the IMT checker.

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3 wakaba 1.87 our $VERSION=do{my @r=(q$Revision: 1.86 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5 wakaba 1.18 require Whatpm::URIChecker;
6    
7 wakaba 1.13 ## ISSUE: How XML and XML Namespaces conformance can (or cannot)
8     ## be applied to an in-memory representation (i.e. DOM)?
9    
10 wakaba 1.50 ## TODO: Conformance of an HTML document with non-html root element.
11    
12 wakaba 1.70 ## Stability
13 wakaba 1.67 sub FEATURE_STATUS_REC () { 0b1 } ## Interoperable standard
14     sub FEATURE_STATUS_CR () { 0b10 } ## Call for implementation
15     sub FEATURE_STATUS_LC () { 0b100 } ## Last call for comments
16     sub FEATURE_STATUS_WD () { 0b1000 } ## Working or editor's draft
17    
18 wakaba 1.70 ## Deprecated
19     sub FEATURE_DEPRECATED_SHOULD () { 0b100000 } ## SHOULD-level
20     sub FEATURE_DEPRECATED_INFO () { 0b1000000 } ## Does not affect conformance
21    
22     ## Conformance
23     sub FEATURE_ALLOWED () { 0b10000 }
24    
25 wakaba 1.42 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
26 wakaba 1.9 my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
27     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
28    
29 wakaba 1.42 my $Namespace = {
30 wakaba 1.79 '' => {loaded => 1},
31 wakaba 1.43 q<http://www.w3.org/2005/Atom> => {module => 'Whatpm::ContentChecker::Atom'},
32 wakaba 1.72 q<http://purl.org/syndication/history/1.0>
33     => {module => 'Whatpm::ContentChecker::Atom'},
34     q<http://purl.org/syndication/threading/1.0>
35     => {module => 'Whatpm::ContentChecker::Atom'},
36 wakaba 1.42 $HTML_NS => {module => 'Whatpm::ContentChecker::HTML'},
37     $XML_NS => {loaded => 1},
38     $XMLNS_NS => {loaded => 1},
39 wakaba 1.73 q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {loaded => 1},
40 wakaba 1.42 };
41    
42 wakaba 1.79 sub load_ns_module ($) {
43     my $nsuri = shift; # namespace URI or ''
44     unless ($Namespace->{$nsuri}->{loaded}) {
45     if ($Namespace->{$nsuri}->{module}) {
46     eval qq{ require $Namespace->{$nsuri}->{module} } or die $@;
47     } else {
48     $Namespace->{$nsuri}->{loaded} = 1;
49     }
50     }
51     } # load_ns_module
52    
53 wakaba 1.42 our $AttrChecker = {
54 wakaba 1.9 $XML_NS => {
55 wakaba 1.13 space => sub {
56     my ($self, $attr) = @_;
57     my $value = $attr->value;
58     if ($value eq 'default' or $value eq 'preserve') {
59     #
60     } else {
61     ## NOTE: An XML "error"
62 wakaba 1.83 $self->{onerror}->(node => $attr, level => $self->{level}->{xml_error},
63 wakaba 1.33 type => 'invalid attribute value');
64 wakaba 1.13 }
65     },
66     lang => sub {
67 wakaba 1.35 my ($self, $attr) = @_;
68 wakaba 1.47 my $value = $attr->value;
69     if ($value eq '') {
70     #
71     } else {
72     require Whatpm::LangTag;
73     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
74 wakaba 1.83 $self->{onerror}->(@_, node => $attr);
75 wakaba 1.85 }, $self->{level});
76 wakaba 1.47 }
77    
78 wakaba 1.13 ## NOTE: "The values of the attribute are language identifiers
79     ## as defined by [IETF RFC 3066], Tags for the Identification
80     ## of Languages, or its successor; in addition, the empty string
81     ## may be specified." ("may" in lower case)
82 wakaba 1.47 ## NOTE: Is an RFC 3066-valid (but RFC 4647-invalid) language tag
83     ## allowed today?
84    
85     ## TODO: test data
86    
87 wakaba 1.35 if ($attr->owner_document->manakai_is_html) { # MUST NOT
88 wakaba 1.83 $self->{onerror}->(node => $attr, type => 'in HTML:xml:lang',
89     level => $self->{level}->{must});
90 wakaba 1.35 ## TODO: Test data...
91     }
92 wakaba 1.13 },
93     base => sub {
94     my ($self, $attr) = @_;
95     my $value = $attr->value;
96     if ($value =~ /[^\x{0000}-\x{10FFFF}]/) { ## ISSUE: Should we disallow noncharacters?
97     $self->{onerror}->(node => $attr,
98 wakaba 1.83 type => 'invalid attribute value',
99     level => $self->{level}->{fact}, ## TODO: correct?
100     );
101 wakaba 1.13 }
102 wakaba 1.18 ## NOTE: Conformance to URI standard is not checked since there is
103     ## no author requirement on conformance in the XML Base specification.
104 wakaba 1.13 },
105     id => sub {
106     my ($self, $attr) = @_;
107     my $value = $attr->value;
108     $value =~ s/[\x09\x0A\x0D\x20]+/ /g;
109     $value =~ s/^\x20//;
110     $value =~ s/\x20$//;
111     ## TODO: NCName in XML 1.0 or 1.1
112     ## TODO: declared type is ID?
113 wakaba 1.83 if ($self->{id}->{$value}) {
114     $self->{onerror}->(node => $attr,
115     type => 'duplicate ID',
116     level => $self->{level}->{xml_id_error});
117 wakaba 1.37 push @{$self->{id}->{$value}}, $attr;
118 wakaba 1.13 } else {
119 wakaba 1.37 $self->{id}->{$value} = [$attr];
120 wakaba 1.13 }
121     },
122 wakaba 1.9 },
123     $XMLNS_NS => {
124 wakaba 1.13 '' => sub {
125     my ($self, $attr) = @_;
126     my $ln = $attr->manakai_local_name;
127     my $value = $attr->value;
128     if ($value eq $XML_NS and $ln ne 'xml') {
129     $self->{onerror}
130 wakaba 1.83 ->(node => $attr,
131     type => 'Reserved Prefixes and Namespace Names:Name',
132     text => $value,
133     level => $self->{level}->{nc});
134 wakaba 1.13 } elsif ($value eq $XMLNS_NS) {
135     $self->{onerror}
136 wakaba 1.83 ->(node => $attr,
137     type => 'Reserved Prefixes and Namespace Names:Name',
138     text => $value,
139     level => $self->{level}->{nc});
140 wakaba 1.13 }
141     if ($ln eq 'xml' and $value ne $XML_NS) {
142     $self->{onerror}
143 wakaba 1.83 ->(node => $attr,
144     type => 'Reserved Prefixes and Namespace Names:Prefix',
145     text => $ln,
146     level => $self->{level}->{nc});
147 wakaba 1.13 } elsif ($ln eq 'xmlns') {
148     $self->{onerror}
149 wakaba 1.83 ->(node => $attr,
150     type => 'Reserved Prefixes and Namespace Names:Prefix',
151     text => $ln,
152     level => $self->{level}->{nc});
153 wakaba 1.13 }
154     ## TODO: If XML 1.0 and empty
155     },
156     xmlns => sub {
157     my ($self, $attr) = @_;
158     ## TODO: In XML 1.0, URI reference [RFC 3986] or an empty string
159     ## TODO: In XML 1.1, IRI reference [RFC 3987] or an empty string
160 wakaba 1.18 ## TODO: relative references are deprecated
161 wakaba 1.13 my $value = $attr->value;
162     if ($value eq $XML_NS) {
163     $self->{onerror}
164 wakaba 1.83 ->(node => $attr,
165     type => 'Reserved Prefixes and Namespace Names:Name',
166     text => $value,
167     level => $self->{level}->{nc});
168 wakaba 1.13 } elsif ($value eq $XMLNS_NS) {
169     $self->{onerror}
170 wakaba 1.83 ->(node => $attr,
171     type => 'Reserved Prefixes and Namespace Names:Name',
172     text => $value,
173     level => $self->{level}->{nc});
174 wakaba 1.13 }
175     },
176 wakaba 1.9 },
177     };
178    
179 wakaba 1.14 ## ISSUE: Should we really allow these attributes?
180 wakaba 1.13 $AttrChecker->{''}->{'xml:space'} = $AttrChecker->{$XML_NS}->{space};
181     $AttrChecker->{''}->{'xml:lang'} = $AttrChecker->{$XML_NS}->{lang};
182     $AttrChecker->{''}->{'xml:base'} = $AttrChecker->{$XML_NS}->{base};
183     $AttrChecker->{''}->{'xml:id'} = $AttrChecker->{$XML_NS}->{id};
184    
185 wakaba 1.79 our $AttrStatus;
186    
187     for (qw/space lang base id/) {
188     $AttrStatus->{$XML_NS}->{$_} = FEATURE_STATUS_REC | FEATURE_ALLOWED;
189     $AttrStatus->{''}->{"xml:$_"} = FEATURE_STATUS_REC | FEATURE_ALLOWED;
190     ## XML 1.0: FEATURE_STATUS_CR
191     ## XML 1.1: FEATURE_STATUS_REC
192     ## XML Namespaces 1.0: FEATURE_STATUS_CR
193     ## XML Namespaces 1.1: FEATURE_STATUS_REC
194     ## XML Base: FEATURE_STATUS_REC
195     ## xml:id: FEATURE_STATUS_REC
196     }
197    
198     $AttrStatus->{$XMLNS_NS}->{''} = FEATURE_STATUS_REC | FEATURE_ALLOWED;
199    
200     ## TODO: xsi:schemaLocation for XHTML2 support (very, very low priority)
201    
202 wakaba 1.60 our %AnyChecker = (
203     check_start => sub { },
204     check_attrs => sub {
205     my ($self, $item, $element_state) = @_;
206     for my $attr (@{$item->{node}->attributes}) {
207 wakaba 1.9 my $attr_ns = $attr->namespace_uri;
208     $attr_ns = '' unless defined $attr_ns;
209     my $attr_ln = $attr->manakai_local_name;
210 wakaba 1.79
211     load_ns_module ($attr_ns);
212    
213 wakaba 1.9 my $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
214 wakaba 1.60 || $AttrChecker->{$attr_ns}->{''};
215 wakaba 1.79 my $status = $AttrStatus->{$attr_ns}->{$attr_ln}
216     || $AttrStatus->{$attr_ns}->{''};
217     if (not defined $status) {
218     $status = FEATURE_ALLOWED;
219     ## NOTE: FEATURE_ALLOWED for all attributes, since the element
220     ## is not supported and therefore "attribute not defined" error
221     ## should not raised (too verbose) and global attributes should be
222     ## allowed anyway (if a global attribute has its specified creteria
223     ## for where it may be specified, then it should be checked in it's
224     ## checker function).
225     }
226 wakaba 1.9 if ($checker) {
227     $checker->($self, $attr);
228 wakaba 1.17 } else {
229 wakaba 1.83 $self->{onerror}->(node => $attr,
230     type => 'unknown attribute',
231     level => $self->{level}->{uncertain});
232 wakaba 1.9 }
233 wakaba 1.79 $self->_attr_status_info ($attr, $status);
234 wakaba 1.9 }
235     },
236 wakaba 1.60 check_child_element => sub {
237     my ($self, $item, $child_el, $child_nsuri, $child_ln,
238     $child_is_transparent, $element_state) = @_;
239     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
240     $self->{onerror}->(node => $child_el,
241     type => 'element not allowed:minus',
242 wakaba 1.83 level => $self->{level}->{must});
243 wakaba 1.60 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
244     #
245     } else {
246     #
247     }
248     },
249     check_child_text => sub { },
250     check_end => sub {
251     my ($self, $item, $element_state) = @_;
252 wakaba 1.82 ## NOTE: There is a modified copy of the code below for |html:ruby|.
253 wakaba 1.60 if ($element_state->{has_significant}) {
254 wakaba 1.66 $item->{real_parent_state}->{has_significant} = 1;
255 wakaba 1.60 }
256     },
257     );
258    
259     our $ElementDefault = {
260     %AnyChecker,
261 wakaba 1.70 status => FEATURE_ALLOWED,
262     ## NOTE: No "element not defined" error - it is not supported anyway.
263 wakaba 1.60 check_start => sub {
264     my ($self, $item, $element_state) = @_;
265 wakaba 1.83 $self->{onerror}->(node => $item->{node},
266     type => 'unknown element',
267     level => $self->{level}->{uncertain});
268 wakaba 1.60 },
269 wakaba 1.1 };
270    
271 wakaba 1.60 our $HTMLEmbeddedContent = {
272     ## NOTE: All embedded content is also phrasing content.
273     $HTML_NS => {
274     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
275     canvas => 1,
276     },
277     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
278     q<http://www.w3.org/2000/svg> => {svg => 1},
279     ## NOTE: Foreign elements with content (but no metadata) are
280     ## embedded content.
281     };
282    
283 wakaba 1.7 my $HTMLTransparentElements = {
284 wakaba 1.57 $HTML_NS => {qw/ins 1 del 1 font 1 noscript 1 canvas 1/},
285 wakaba 1.29 ## NOTE: |html:noscript| is transparent if scripting is disabled
286     ## and not in |head|.
287 wakaba 1.7 };
288    
289 wakaba 1.61 my $HTMLSemiTransparentElements = {
290     $HTML_NS => {object => 1, video => 1, audio => 1},
291     };
292 wakaba 1.57
293 wakaba 1.42 our $Element = {};
294 wakaba 1.7
295 wakaba 1.73 $Element->{q<http://www.w3.org/1999/02/22-rdf-syntax-ns#>}->{RDF} = {
296     %AnyChecker,
297     status => FEATURE_STATUS_REC | FEATURE_ALLOWED,
298     is_root => 1, ## ISSUE: Not explicitly allowed for non application/rdf+xml
299     check_start => sub {
300     my ($self, $item, $element_state) = @_;
301     my $triple = [];
302     push @{$self->{return}->{rdf}}, [$item->{node}, $triple];
303     require Whatpm::RDFXML;
304     my $rdf = Whatpm::RDFXML->new;
305 wakaba 1.75 ## TODO: Should we make bnodeid unique in a document?
306 wakaba 1.73 $rdf->{onerror} = $self->{onerror};
307 wakaba 1.84 $rdf->{level} = $self->{level};
308 wakaba 1.73 $rdf->{ontriple} = sub {
309     my %opt = @_;
310     push @$triple,
311     [$opt{node}, $opt{subject}, $opt{predicate}, $opt{object}];
312 wakaba 1.74 if (defined $opt{id}) {
313     push @$triple,
314     [$opt{node},
315     $opt{id},
316     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#subject>},
317     $opt{subject}];
318     push @$triple,
319     [$opt{node},
320     $opt{id},
321     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate>},
322     $opt{predicate}];
323     push @$triple,
324     [$opt{node},
325     $opt{id},
326     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#object>},
327     $opt{object}];
328     push @$triple,
329     [$opt{node},
330     $opt{id},
331     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>},
332     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement>}];
333     }
334 wakaba 1.73 };
335     $rdf->convert_rdf_element ($item->{node});
336     },
337     };
338    
339 wakaba 1.83 my $default_error_level = {
340     must => 'm',
341     should => 's',
342     warn => 'w',
343     good => 'w',
344     info => 'i',
345 wakaba 1.85
346 wakaba 1.83 uncertain => 'u',
347    
348 wakaba 1.84 html4_fact => 'm',
349 wakaba 1.83 xml_error => 'm', ## TODO: correct?
350     nc => 'm', ## XML Namespace Constraints ## TODO: correct?
351 wakaba 1.84
352 wakaba 1.85 uri_syntax => 'm',
353     uri_fact => 'm',
354     uri_lc_must => 'm',
355     uri_lc_should => 'w',
356    
357 wakaba 1.87 mime_must => 'm', # lowercase "must"
358     mime_fact => 'm',
359     mime_strongly_discouraged => 'w',
360     mime_discouraged => 'w',
361    
362 wakaba 1.85 langtag_fact => 'm',
363    
364 wakaba 1.84 rdf_fact => 'm',
365     rdf_grammer => 'm',
366     rdf_lc_must => 'm',
367 wakaba 1.83 };
368    
369 wakaba 1.56 sub check_document ($$$;$) {
370     my ($self, $doc, $onerror, $onsubdoc) = @_;
371 wakaba 1.42 $self = bless {}, $self unless ref $self;
372     $self->{onerror} = $onerror;
373 wakaba 1.56 $self->{onsubdoc} = $onsubdoc || sub {
374     warn "A subdocument is not conformance-checked";
375     };
376 wakaba 1.1
377 wakaba 1.83 $self->{level} ||= $default_error_level;
378 wakaba 1.48
379 wakaba 1.73 ## TODO: If application/rdf+xml, RDF/XML mode should be invoked.
380    
381 wakaba 1.42 my $docel = $doc->document_element;
382     unless (defined $docel) {
383     ## ISSUE: Should we check content of Document node?
384 wakaba 1.83 $onerror->(node => $doc, type => 'no document element',
385     level => $self->{level}->{must});
386 wakaba 1.42 ## ISSUE: Is this non-conforming (to what spec)? Or just a warning?
387     return {
388     class => {},
389     id => {}, table => [], term => {},
390     };
391 wakaba 1.1 }
392    
393 wakaba 1.42 ## ISSUE: Unexpanded entity references and HTML5 conformance
394 wakaba 1.1
395 wakaba 1.42 my $docel_nsuri = $docel->namespace_uri;
396     $docel_nsuri = '' unless defined $docel_nsuri;
397 wakaba 1.79 load_ns_module ($docel_nsuri);
398 wakaba 1.42 my $docel_def = $Element->{$docel_nsuri}->{$docel->manakai_local_name} ||
399     $Element->{$docel_nsuri}->{''} ||
400     $ElementDefault;
401     if ($docel_def->{is_root}) {
402     #
403 wakaba 1.50 } elsif ($docel_def->{is_xml_root}) {
404     unless ($doc->manakai_is_html) {
405     #
406     } else {
407 wakaba 1.83 $onerror->(node => $docel, type => 'element not allowed:root:xml',
408     level => $self->{level}->{must});
409 wakaba 1.50 }
410 wakaba 1.42 } else {
411 wakaba 1.83 $onerror->(node => $docel, type => 'element not allowed:root',
412     level => $self->{level}->{must});
413 wakaba 1.1 }
414    
415 wakaba 1.42 ## TODO: Check for other items other than document element
416     ## (second (errorous) element, text nodes, PI nodes, doctype nodes)
417 wakaba 1.2
418 wakaba 1.56 my $return = $self->check_element ($docel, $onerror, $onsubdoc);
419 wakaba 1.51
420 wakaba 1.52 ## TODO: Test for these checks are necessary.
421 wakaba 1.51 my $charset_name = $doc->input_encoding;
422     if (defined $charset_name) {
423     require Message::Charset::Info;
424     my $charset = $Message::Charset::Info::IANACharset->{$charset_name};
425    
426 wakaba 1.71 if ($doc->manakai_is_html) {
427     if (not $doc->manakai_has_bom and
428     not defined $doc->manakai_charset) {
429     unless ($charset->{is_html_ascii_superset}) {
430 wakaba 1.86 $onerror->(node => $doc,
431     level => $self->{level}->{must},
432 wakaba 1.83 type => 'non ascii superset',
433     text => $charset_name);
434 wakaba 1.71 }
435    
436     if (not $self->{has_charset} and ## TODO: This does not work now.
437     not $charset->{iana_names}->{'us-ascii'}) {
438 wakaba 1.86 $onerror->(node => $doc,
439     level => $self->{level}->{must},
440 wakaba 1.83 type => 'no character encoding declaration',
441     text => $charset_name);
442 wakaba 1.71 }
443 wakaba 1.51 }
444 wakaba 1.71
445     if ($charset->{iana_names}->{'utf-8'}) {
446     #
447     } elsif ($charset->{iana_names}->{'jis_x0212-1990'} or
448     $charset->{iana_names}->{'x-jis0208'} or
449     $charset->{iana_names}->{'utf-32'} or ## ISSUE: UTF-32BE? UTF-32LE?
450     $charset->{is_ebcdic_based}) {
451     $onerror->(node => $doc,
452 wakaba 1.83 type => 'bad character encoding',
453     text => $charset_name,
454     level => $self->{level}->{should},
455     layer => 'encode');
456 wakaba 1.71 } elsif ($charset->{iana_names}->{'cesu-8'} or
457     $charset->{iana_names}->{'utf-8'} or ## ISSUE: UNICODE-1-1-UTF-7?
458     $charset->{iana_names}->{'bocu-1'} or
459     $charset->{iana_names}->{'scsu'}) {
460     $onerror->(node => $doc,
461 wakaba 1.83 type => 'disallowed character encoding',
462     text => $charset_name,
463     level => $self->{level}->{must},
464     layer => 'encode');
465 wakaba 1.71 } else {
466     $onerror->(node => $doc,
467 wakaba 1.83 type => 'non-utf-8 character encoding',
468     text => $charset_name,
469     level => $self->{level}->{good},
470     layer => 'encode');
471 wakaba 1.51 }
472     }
473 wakaba 1.52 } elsif ($doc->manakai_is_html) {
474     ## NOTE: MUST and SHOULD requirements above cannot be tested,
475     ## since the document has no input charset encoding information.
476     $onerror->(node => $doc,
477 wakaba 1.83 type => 'character encoding unchecked',
478     level => $self->{level}->{info},
479     layer => 'encode');
480 wakaba 1.51 }
481    
482     return $return;
483 wakaba 1.42 } # check_document
484 wakaba 1.1
485 wakaba 1.81 ## Check an element. The element is checked as if it is an orphan node (i.e.
486     ## an element without a parent node).
487 wakaba 1.56 sub check_element ($$$;$) {
488     my ($self, $el, $onerror, $onsubdoc) = @_;
489 wakaba 1.42 $self = bless {}, $self unless ref $self;
490     $self->{onerror} = $onerror;
491 wakaba 1.56 $self->{onsubdoc} = $onsubdoc || sub {
492     warn "A subdocument is not conformance-checked";
493     };
494 wakaba 1.2
495 wakaba 1.83 $self->{level} ||= $default_error_level;
496 wakaba 1.48
497 wakaba 1.61 $self->{plus_elements} = {};
498     $self->{minus_elements} = {};
499 wakaba 1.42 $self->{id} = {};
500     $self->{term} = {};
501     $self->{usemap} = [];
502 wakaba 1.78 $self->{ref} = []; # datetemplate data references
503     $self->{template} = []; # datatemplate template references
504 wakaba 1.42 $self->{contextmenu} = [];
505     $self->{map} = {};
506     $self->{menu} = {};
507     $self->{has_link_type} = {};
508 wakaba 1.60 $self->{flag} = {};
509 wakaba 1.46 #$self->{has_uri_attr};
510     #$self->{has_hyperlink_element};
511 wakaba 1.51 #$self->{has_charset};
512 wakaba 1.57 #$self->{has_base};
513 wakaba 1.42 $self->{return} = {
514     class => {},
515 wakaba 1.80 id => $self->{id},
516     table => [], # table objects returned by Whatpm::HTMLTable
517     term => $self->{term},
518 wakaba 1.76 uri => {}, # URIs other than those in RDF triples
519     ## TODO: xmlns="", SYSTEM "", atom:* src="", xml:base=""
520 wakaba 1.73 rdf => [],
521 wakaba 1.42 };
522 wakaba 1.4
523 wakaba 1.60 my @item = ({type => 'element', node => $el, parent_state => {}});
524 wakaba 1.66 $item[-1]->{real_parent_state} = $item[-1]->{parent_state};
525 wakaba 1.60 while (@item) {
526     my $item = shift @item;
527     if (ref $item eq 'ARRAY') {
528     my $code = shift @$item;
529     next unless $code;## TODO: temp.
530     $code->(@$item);
531     } elsif ($item->{type} eq 'element') {
532     my $el_nsuri = $item->{node}->namespace_uri;
533     $el_nsuri = '' unless defined $el_nsuri;
534     my $el_ln = $item->{node}->manakai_local_name;
535 wakaba 1.79
536     load_ns_module ($el_nsuri);
537 wakaba 1.63
538     my $element_state = {};
539 wakaba 1.60 my $eldef = $Element->{$el_nsuri}->{$el_ln} ||
540     $Element->{$el_nsuri}->{''} ||
541 wakaba 1.42 $ElementDefault;
542 wakaba 1.61 my $content_def = $item->{transparent}
543     ? $item->{parent_def} || $eldef : $eldef;
544 wakaba 1.63 my $content_state = $item->{transparent}
545 wakaba 1.65 ? $item->{parent_def}
546     ? $item->{parent_state} || $element_state : $element_state
547     : $element_state;
548 wakaba 1.60
549 wakaba 1.67 unless ($eldef->{status} & FEATURE_STATUS_REC) {
550     my $status = $eldef->{status} & FEATURE_STATUS_CR ? 'cr' :
551     $eldef->{status} & FEATURE_STATUS_LC ? 'lc' :
552     $eldef->{status} & FEATURE_STATUS_WD ? 'wd' : 'non-standard';
553     $self->{onerror}->(node => $item->{node},
554     type => 'status:'.$status.':element',
555 wakaba 1.83 level => $self->{level}->{info});
556 wakaba 1.67 }
557 wakaba 1.70 if (not ($eldef->{status} & FEATURE_ALLOWED)) {
558     $self->{onerror}->(node => $item->{node},
559     type => 'element not defined',
560 wakaba 1.83 level => $self->{level}->{must});
561 wakaba 1.70 } elsif ($eldef->{status} & FEATURE_DEPRECATED_SHOULD) {
562     $self->{onerror}->(node => $item->{node},
563     type => 'deprecated:element',
564 wakaba 1.83 level => $self->{level}->{should});
565 wakaba 1.70 } elsif ($eldef->{status} & FEATURE_DEPRECATED_INFO) {
566     $self->{onerror}->(node => $item->{node},
567     type => 'deprecated:element',
568 wakaba 1.83 level => $self->{level}->{info});
569 wakaba 1.70 }
570 wakaba 1.67
571 wakaba 1.60 my @new_item;
572     push @new_item, [$eldef->{check_start}, $self, $item, $element_state];
573     push @new_item, [$eldef->{check_attrs}, $self, $item, $element_state];
574 wakaba 1.61
575 wakaba 1.60 my @child = @{$item->{node}->child_nodes};
576     while (@child) {
577     my $child = shift @child;
578     my $child_nt = $child->node_type;
579     if ($child_nt == 1) { # ELEMENT_NODE
580     my $child_nsuri = $child->namespace_uri;
581     $child_nsuri = '' unless defined $child_nsuri;
582     my $child_ln = $child->manakai_local_name;
583     if ($HTMLTransparentElements->{$child_nsuri}->{$child_ln} and
584     not (($self->{flag}->{in_head} or
585 wakaba 1.61 ($el_nsuri eq $HTML_NS and $el_ln eq 'head')) and
586     $child_nsuri eq $HTML_NS and $child_ln eq 'noscript')) {
587 wakaba 1.60 push @new_item, [$content_def->{check_child_element},
588     $self, $item, $child,
589 wakaba 1.66 $child_nsuri, $child_ln, 1,
590     $content_state, $element_state];
591 wakaba 1.60 push @new_item, {type => 'element', node => $child,
592 wakaba 1.65 parent_state => $content_state,
593 wakaba 1.61 parent_def => $content_def,
594 wakaba 1.66 real_parent_state => $element_state,
595 wakaba 1.60 transparent => 1};
596     } else {
597 wakaba 1.65 if ($item->{parent_def} and # has parent
598     $el_nsuri eq $HTML_NS) { ## $HTMLSemiTransparentElements
599 wakaba 1.61 if ($el_ln eq 'object') {
600     if ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
601     #
602     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
603     #
604     } else {
605 wakaba 1.62 $content_def = $item->{parent_def} || $content_def;
606 wakaba 1.63 $content_state = $item->{parent_state} || $content_state;
607 wakaba 1.62 }
608     } elsif ($el_ln eq 'video' or $el_ln eq 'audio') {
609     if ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
610     #
611     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
612     $element_state->{has_source} = 1;
613     } else {
614     $content_def = $item->{parent_def} || $content_def;
615 wakaba 1.63 $content_state = $item->{parent_state} || $content_state;
616 wakaba 1.61 }
617     }
618     }
619    
620 wakaba 1.60 push @new_item, [$content_def->{check_child_element},
621     $self, $item, $child,
622 wakaba 1.64 $child_nsuri, $child_ln,
623     $HTMLSemiTransparentElements
624     ->{$child_nsuri}->{$child_ln},
625 wakaba 1.66 $content_state, $element_state];
626 wakaba 1.60 push @new_item, {type => 'element', node => $child,
627 wakaba 1.65 parent_def => $content_def,
628 wakaba 1.66 real_parent_state => $element_state,
629 wakaba 1.65 parent_state => $content_state};
630 wakaba 1.60 }
631    
632     if ($HTMLEmbeddedContent->{$child_nsuri}->{$child_ln}) {
633     $element_state->{has_significant} = 1;
634     }
635     } elsif ($child_nt == 3 or # TEXT_NODE
636     $child_nt == 4) { # CDATA_SECTION_NODE
637     my $has_significant = ($child->data =~ /[^\x09-\x0D\x20]/);
638     push @new_item, [$content_def->{check_child_text},
639     $self, $item, $child, $has_significant,
640 wakaba 1.66 $content_state, $element_state];
641     $element_state->{has_significant} ||= $has_significant;
642 wakaba 1.61 if ($has_significant and
643     $HTMLSemiTransparentElements->{$el_nsuri}->{$el_ln}) {
644     $content_def = $item->{parent_def} || $content_def;
645     }
646 wakaba 1.60 } elsif ($child_nt == 5) { # ENTITY_REFERENCE_NODE
647     push @child, @{$child->child_nodes};
648 wakaba 1.1 }
649 wakaba 1.60 ## TODO: PI_NODE
650     ## TODO: Unknown node type
651 wakaba 1.1 }
652 wakaba 1.60
653     push @new_item, [$eldef->{check_end}, $self, $item, $element_state];
654    
655     unshift @item, @new_item;
656 wakaba 1.30 } else {
657 wakaba 1.60 die "$0: Internal error: Unsupported checking action type |$item->{type}|";
658 wakaba 1.4 }
659 wakaba 1.1 }
660 wakaba 1.17
661 wakaba 1.78 for (@{$self->{template}}) {
662     ## TODO: If the document is an XML document, ...
663     ## NOTE: If the document is an HTML document:
664     ## ISSUE: We need to percent-decode?
665     F: {
666     if ($self->{id}->{$_->[0]}) {
667     my $el = $self->{id}->{$_->[0]}->[0]->owner_element;
668     if ($el->node_type == 1 and # ELEMENT_NODE
669     $el->manakai_local_name eq 'datatemplate') {
670     my $nsuri = $el->namespace_uri;
671     if (defined $nsuri and $nsuri eq $HTML_NS) {
672     if ($el eq $_->[1]->owner_element) {
673     $self->{onerror}->(node => $_->[1],
674     type => 'fragment points itself',
675 wakaba 1.83 level => $self->{level}->{must});
676 wakaba 1.78 }
677    
678     last F;
679     }
680     }
681     }
682     ## TODO: Should we raise a "fragment points nothing" error instead
683     ## if the fragment identifier identifies no element?
684    
685     $self->{onerror}->(node => $_->[1], type => 'template:not template',
686 wakaba 1.83 level => $self->{level}->{must});
687 wakaba 1.78 } # F
688     }
689    
690     for (@{$self->{ref}}) {
691     ## TOOD: If XML
692     ## NOTE: If it is an HTML document:
693     if ($_->[0] eq '') {
694     ## NOTE: It points the top of the document.
695     } elsif ($self->{id}->{$_->[0]}) {
696     if ($self->{id}->{$_->[0]}->[0]->owner_element
697     eq $_->[1]->owner_element) {
698     $self->{onerror}->(node => $_->[1], type => 'fragment points itself',
699 wakaba 1.83 level => $self->{level}->{must});
700 wakaba 1.78 }
701     } else {
702     $self->{onerror}->(node => $_->[1], type => 'fragment points nothing',
703 wakaba 1.83 level => $self->{level}->{must});
704 wakaba 1.78 }
705     }
706    
707     ## TODO: Maybe we should have $document->manakai_get_by_fragment or something
708    
709 wakaba 1.17 for (@{$self->{usemap}}) {
710     unless ($self->{map}->{$_->[0]}) {
711 wakaba 1.83 $self->{onerror}->(node => $_->[1], type => 'no referenced map',
712     level => $self->{level}->{must});
713 wakaba 1.17 }
714     }
715    
716 wakaba 1.32 for (@{$self->{contextmenu}}) {
717     unless ($self->{menu}->{$_->[0]}) {
718 wakaba 1.83 $self->{onerror}->(node => $_->[1], type => 'no referenced menu',
719     level => $self->{level}->{must});
720 wakaba 1.32 }
721     }
722    
723 wakaba 1.61 delete $self->{plus_elements};
724     delete $self->{minus_elements};
725 wakaba 1.17 delete $self->{onerror};
726     delete $self->{id};
727     delete $self->{usemap};
728 wakaba 1.78 delete $self->{ref};
729     delete $self->{template};
730 wakaba 1.17 delete $self->{map};
731 wakaba 1.33 return $self->{return};
732 wakaba 1.1 } # check_element
733    
734 wakaba 1.60 sub _add_minus_elements ($$@) {
735     my $self = shift;
736     my $element_state = shift;
737     for my $elements (@_) {
738     for my $nsuri (keys %$elements) {
739     for my $ln (keys %{$elements->{$nsuri}}) {
740     unless ($self->{minus_elements}->{$nsuri}->{$ln}) {
741     $element_state->{minus_elements_original}->{$nsuri}->{$ln} = 0;
742     $self->{minus_elements}->{$nsuri}->{$ln} = 1;
743     }
744     }
745     }
746     }
747     } # _add_minus_elements
748    
749     sub _remove_minus_elements ($$) {
750     my $self = shift;
751     my $element_state = shift;
752     for my $nsuri (keys %{$element_state->{minus_elements_original}}) {
753     for my $ln (keys %{$element_state->{minus_elements_original}->{$nsuri}}) {
754     delete $self->{minus_elements}->{$nsuri}->{$ln};
755     }
756     }
757     } # _remove_minus_elements
758    
759     sub _add_plus_elements ($$@) {
760     my $self = shift;
761     my $element_state = shift;
762     for my $elements (@_) {
763     for my $nsuri (keys %$elements) {
764     for my $ln (keys %{$elements->{$nsuri}}) {
765     unless ($self->{plus_elements}->{$nsuri}->{$ln}) {
766     $element_state->{plus_elements_original}->{$nsuri}->{$ln} = 0;
767     $self->{plus_elements}->{$nsuri}->{$ln} = 1;
768     }
769     }
770     }
771     }
772     } # _add_plus_elements
773    
774     sub _remove_plus_elements ($$) {
775     my $self = shift;
776     my $element_state = shift;
777     for my $nsuri (keys %{$element_state->{plus_elements_original}}) {
778     for my $ln (keys %{$element_state->{plus_elements_original}->{$nsuri}}) {
779     delete $self->{plus_elements}->{$nsuri}->{$ln};
780     }
781     }
782     } # _remove_plus_elements
783    
784 wakaba 1.68 sub _attr_status_info ($$$) {
785     my ($self, $attr, $status_code) = @_;
786 wakaba 1.70
787     if (not ($status_code & FEATURE_ALLOWED)) {
788     $self->{onerror}->(node => $attr,
789     type => 'attribute not defined',
790 wakaba 1.83 level => $self->{level}->{must});
791 wakaba 1.70 } elsif ($status_code & FEATURE_DEPRECATED_SHOULD) {
792     $self->{onerror}->(node => $attr,
793     type => 'deprecated:attr',
794 wakaba 1.83 level => $self->{level}->{should});
795 wakaba 1.70 } elsif ($status_code & FEATURE_DEPRECATED_INFO) {
796     $self->{onerror}->(node => $attr,
797     type => 'deprecated:attr',
798 wakaba 1.83 level => $self->{level}->{info});
799 wakaba 1.70 }
800    
801 wakaba 1.68 my $status;
802     if ($status_code & FEATURE_STATUS_REC) {
803     return;
804     } elsif ($status_code & FEATURE_STATUS_CR) {
805     $status = 'cr';
806     } elsif ($status_code & FEATURE_STATUS_LC) {
807     $status = 'lc';
808     } elsif ($status_code & FEATURE_STATUS_WD) {
809     $status = 'wd';
810     } else {
811     $status = 'non-standard';
812     }
813     $self->{onerror}->(node => $attr,
814     type => 'status:'.$status.':attr',
815 wakaba 1.83 level => $self->{level}->{info});
816 wakaba 1.68 } # _attr_status_info
817    
818 wakaba 1.2 sub _add_minuses ($@) {
819     my $self = shift;
820     my $r = {};
821     for my $list (@_) {
822     for my $ns (keys %$list) {
823     for my $ln (keys %{$list->{$ns}}) {
824     unless ($self->{minuses}->{$ns}->{$ln}) {
825     $self->{minuses}->{$ns}->{$ln} = 1;
826     $r->{$ns}->{$ln} = 1;
827     }
828     }
829     }
830     }
831 wakaba 1.4 return {type => 'plus', list => $r};
832 wakaba 1.2 } # _add_minuses
833    
834 wakaba 1.50 sub _add_pluses ($@) {
835     my $self = shift;
836     my $r = {};
837     for my $list (@_) {
838     for my $ns (keys %$list) {
839     for my $ln (keys %{$list->{$ns}}) {
840     unless ($self->{pluses}->{$ns}->{$ln}) {
841     $self->{pluses}->{$ns}->{$ln} = 1;
842     $r->{$ns}->{$ln} = 1;
843     }
844     }
845     }
846     }
847     return {type => 'minus', list => $r};
848     } # _add_pluses
849    
850 wakaba 1.2 sub _remove_minuses ($$) {
851 wakaba 1.4 my ($self, $todo) = @_;
852 wakaba 1.50 if ($todo->{type} eq 'minus') {
853     for my $ns (keys %{$todo->{list}}) {
854     for my $ln (keys %{$todo->{list}->{$ns}}) {
855     delete $self->{pluses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
856     }
857 wakaba 1.2 }
858 wakaba 1.50 } elsif ($todo->{type} eq 'plus') {
859     for my $ns (keys %{$todo->{list}}) {
860     for my $ln (keys %{$todo->{list}->{$ns}}) {
861     delete $self->{minuses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
862     }
863     }
864     } else {
865     die "$0: Unknown +- type: $todo->{type}";
866 wakaba 1.2 }
867     1;
868     } # _remove_minuses
869    
870 wakaba 1.50 ## NOTE: Priority for "minuses" and "pluses" are currently left
871     ## undefined and implemented inconsistently; it is not a problem for
872     ## now, since no element belongs to both lists.
873    
874 wakaba 1.30 sub _check_get_children ($$$) {
875     my ($self, $node, $parent_todo) = @_;
876 wakaba 1.4 my $new_todos = [];
877 wakaba 1.2 my $sib = [];
878     TP: {
879     my $node_ns = $node->namespace_uri;
880     $node_ns = '' unless defined $node_ns;
881     my $node_ln = $node->manakai_local_name;
882 wakaba 1.45 if ($HTMLTransparentElements->{$node_ns}->{$node_ln}) {
883     if ($node_ns eq $HTML_NS and $node_ln eq 'noscript') {
884     if ($parent_todo->{flag}->{in_head}) {
885     #
886     } else {
887     my $end = $self->_add_minuses ({$HTML_NS, {noscript => 1}});
888     push @$sib, $end;
889    
890     unshift @$sib, @{$node->child_nodes};
891     push @$new_todos, {type => 'element-attributes', node => $node};
892     last TP;
893     }
894 wakaba 1.58 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'del') {
895     my $sig_flag = $parent_todo->{flag}->{has_descendant}->{significant};
896     unshift @$sib, @{$node->child_nodes};
897     push @$new_todos, {type => 'element-attributes', node => $node};
898     push @$new_todos,
899     {type => 'code',
900     code => sub {
901     $parent_todo->{flag}->{has_descendant}->{significant} = 0
902     if not $sig_flag;
903     }};
904     last TP;
905 wakaba 1.45 } else {
906     unshift @$sib, @{$node->child_nodes};
907     push @$new_todos, {type => 'element-attributes', node => $node};
908     last TP;
909 wakaba 1.2 }
910     }
911 wakaba 1.8 if ($node_ns eq $HTML_NS and ($node_ln eq 'video' or $node_ln eq 'audio')) {
912 wakaba 1.2 if ($node->has_attribute_ns (undef, 'src')) {
913     unshift @$sib, @{$node->child_nodes};
914 wakaba 1.9 push @$new_todos, {type => 'element-attributes', node => $node};
915 wakaba 1.2 last TP;
916     } else {
917     my @cn = @{$node->child_nodes};
918     CN: while (@cn) {
919     my $cn = shift @cn;
920     my $cnt = $cn->node_type;
921     if ($cnt == 1) {
922 wakaba 1.8 my $cn_nsuri = $cn->namespace_uri;
923     $cn_nsuri = '' unless defined $cn_nsuri;
924     if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'source') {
925 wakaba 1.2 #
926     } else {
927     last CN;
928     }
929     } elsif ($cnt == 3 or $cnt == 4) {
930     if ($cn->data =~ /[^\x09-\x0D\x20]/) {
931     last CN;
932     }
933     }
934     } # CN
935     unshift @$sib, @cn;
936     }
937 wakaba 1.57 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'object') {
938     my @cn = @{$node->child_nodes};
939     CN: while (@cn) {
940     my $cn = shift @cn;
941     my $cnt = $cn->node_type;
942     if ($cnt == 1) {
943     my $cn_nsuri = $cn->namespace_uri;
944     $cn_nsuri = '' unless defined $cn_nsuri;
945     if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'param') {
946     #
947     } else {
948     last CN;
949     }
950     } elsif ($cnt == 3 or $cnt == 4) {
951     if ($cn->data =~ /[^\x09-\x0D\x20]/) {
952     last CN;
953     }
954     }
955     } # CN
956     unshift @$sib, @cn;
957 wakaba 1.2 }
958 wakaba 1.4 push @$new_todos, {type => 'element', node => $node};
959 wakaba 1.2 } # TP
960 wakaba 1.30
961     for my $new_todo (@$new_todos) {
962     $new_todo->{flag} = {%{$parent_todo->{flag} or {}}};
963     }
964    
965 wakaba 1.4 return ($sib, $new_todos);
966 wakaba 1.2 } # _check_get_children
967    
968 wakaba 1.44 =head1 LICENSE
969    
970 wakaba 1.56 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
971 wakaba 1.44
972     This library is free software; you can redistribute it
973     and/or modify it under the same terms as Perl itself.
974    
975     =cut
976    
977 wakaba 1.1 1;
978 wakaba 1.87 # $Date: 2008/08/16 07:35:23 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24