/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.78 - (hide annotations) (download)
Sat May 3 08:00:16 2008 UTC (16 years, 6 months ago) by wakaba
Branch: MAIN
Changes since 1.77: +54 -2 lines
++ whatpm/Whatpm/ChangeLog	3 May 2008 07:58:56 -0000
2008-05-03  Wakaba  <wakaba@suika.fam.cx>

	* ContentChecker.pm (check_element): Support for |template=""|
	and |ref=""| attribute (referent element type checking).

++ whatpm/Whatpm/ContentChecker/ChangeLog	3 May 2008 07:58:22 -0000
2008-05-03  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm: Support for |template=""|, |ref=""|, and
	|registrationmark=""| attributes.

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3 wakaba 1.78 our $VERSION=do{my @r=(q$Revision: 1.77 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5 wakaba 1.18 require Whatpm::URIChecker;
6    
7 wakaba 1.13 ## ISSUE: How XML and XML Namespaces conformance can (or cannot)
8     ## be applied to an in-memory representation (i.e. DOM)?
9    
10 wakaba 1.50 ## TODO: Conformance of an HTML document with non-html root element.
11    
12 wakaba 1.70 ## Stability
13 wakaba 1.67 sub FEATURE_STATUS_REC () { 0b1 } ## Interoperable standard
14     sub FEATURE_STATUS_CR () { 0b10 } ## Call for implementation
15     sub FEATURE_STATUS_LC () { 0b100 } ## Last call for comments
16     sub FEATURE_STATUS_WD () { 0b1000 } ## Working or editor's draft
17    
18 wakaba 1.70 ## Deprecated
19     sub FEATURE_DEPRECATED_SHOULD () { 0b100000 } ## SHOULD-level
20     sub FEATURE_DEPRECATED_INFO () { 0b1000000 } ## Does not affect conformance
21    
22     ## Conformance
23     sub FEATURE_ALLOWED () { 0b10000 }
24    
25 wakaba 1.42 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
26 wakaba 1.9 my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
27     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
28    
29 wakaba 1.42 my $Namespace = {
30 wakaba 1.43 q<http://www.w3.org/2005/Atom> => {module => 'Whatpm::ContentChecker::Atom'},
31 wakaba 1.72 q<http://purl.org/syndication/history/1.0>
32     => {module => 'Whatpm::ContentChecker::Atom'},
33     q<http://purl.org/syndication/threading/1.0>
34     => {module => 'Whatpm::ContentChecker::Atom'},
35 wakaba 1.42 $HTML_NS => {module => 'Whatpm::ContentChecker::HTML'},
36     $XML_NS => {loaded => 1},
37     $XMLNS_NS => {loaded => 1},
38 wakaba 1.73 q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {loaded => 1},
39 wakaba 1.42 };
40    
41     our $AttrChecker = {
42 wakaba 1.9 $XML_NS => {
43 wakaba 1.13 space => sub {
44     my ($self, $attr) = @_;
45     my $value = $attr->value;
46     if ($value eq 'default' or $value eq 'preserve') {
47     #
48     } else {
49     ## NOTE: An XML "error"
50 wakaba 1.33 $self->{onerror}->(node => $attr, level => 'error',
51     type => 'invalid attribute value');
52 wakaba 1.13 }
53     },
54     lang => sub {
55 wakaba 1.35 my ($self, $attr) = @_;
56 wakaba 1.47 my $value = $attr->value;
57     if ($value eq '') {
58     #
59     } else {
60     require Whatpm::LangTag;
61     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
62     my %opt = @_;
63     my $type = 'LangTag:'.$opt{type};
64     $type .= ':' . $opt{subtag} if defined $opt{subtag};
65     $self->{onerror}->(node => $attr, type => $type,
66     value => $opt{value}, level => $opt{level});
67     });
68     }
69    
70 wakaba 1.13 ## NOTE: "The values of the attribute are language identifiers
71     ## as defined by [IETF RFC 3066], Tags for the Identification
72     ## of Languages, or its successor; in addition, the empty string
73     ## may be specified." ("may" in lower case)
74 wakaba 1.47 ## NOTE: Is an RFC 3066-valid (but RFC 4647-invalid) language tag
75     ## allowed today?
76    
77     ## TODO: test data
78    
79 wakaba 1.35 if ($attr->owner_document->manakai_is_html) { # MUST NOT
80 wakaba 1.36 $self->{onerror}->(node => $attr, type => 'in HTML:xml:lang');
81 wakaba 1.35 ## TODO: Test data...
82     }
83 wakaba 1.13 },
84     base => sub {
85     my ($self, $attr) = @_;
86     my $value = $attr->value;
87     if ($value =~ /[^\x{0000}-\x{10FFFF}]/) { ## ISSUE: Should we disallow noncharacters?
88     $self->{onerror}->(node => $attr,
89 wakaba 1.33 type => 'invalid attribute value');
90 wakaba 1.13 }
91 wakaba 1.18 ## NOTE: Conformance to URI standard is not checked since there is
92     ## no author requirement on conformance in the XML Base specification.
93 wakaba 1.13 },
94     id => sub {
95     my ($self, $attr) = @_;
96     my $value = $attr->value;
97     $value =~ s/[\x09\x0A\x0D\x20]+/ /g;
98     $value =~ s/^\x20//;
99     $value =~ s/\x20$//;
100     ## TODO: NCName in XML 1.0 or 1.1
101     ## TODO: declared type is ID?
102 wakaba 1.33 if ($self->{id}->{$value}) { ## NOTE: An xml:id error
103     $self->{onerror}->(node => $attr, level => 'error',
104     type => 'duplicate ID');
105 wakaba 1.37 push @{$self->{id}->{$value}}, $attr;
106 wakaba 1.13 } else {
107 wakaba 1.37 $self->{id}->{$value} = [$attr];
108 wakaba 1.13 }
109     },
110 wakaba 1.9 },
111     $XMLNS_NS => {
112 wakaba 1.13 '' => sub {
113     my ($self, $attr) = @_;
114     my $ln = $attr->manakai_local_name;
115     my $value = $attr->value;
116     if ($value eq $XML_NS and $ln ne 'xml') {
117     $self->{onerror}
118 wakaba 1.33 ->(node => $attr, level => 'NC',
119     type => 'Reserved Prefixes and Namespace Names:=xml');
120 wakaba 1.13 } elsif ($value eq $XMLNS_NS) {
121     $self->{onerror}
122 wakaba 1.33 ->(node => $attr, level => 'NC',
123     type => 'Reserved Prefixes and Namespace Names:=xmlns');
124 wakaba 1.13 }
125     if ($ln eq 'xml' and $value ne $XML_NS) {
126     $self->{onerror}
127 wakaba 1.33 ->(node => $attr, level => 'NC',
128     type => 'Reserved Prefixes and Namespace Names:xmlns:xml=');
129 wakaba 1.13 } elsif ($ln eq 'xmlns') {
130     $self->{onerror}
131 wakaba 1.33 ->(node => $attr, level => 'NC',
132     type => 'Reserved Prefixes and Namespace Names:xmlns:xmlns=');
133 wakaba 1.13 }
134     ## TODO: If XML 1.0 and empty
135     },
136     xmlns => sub {
137     my ($self, $attr) = @_;
138     ## TODO: In XML 1.0, URI reference [RFC 3986] or an empty string
139     ## TODO: In XML 1.1, IRI reference [RFC 3987] or an empty string
140 wakaba 1.18 ## TODO: relative references are deprecated
141 wakaba 1.13 my $value = $attr->value;
142     if ($value eq $XML_NS) {
143     $self->{onerror}
144 wakaba 1.33 ->(node => $attr, level => 'NC',
145     type => 'Reserved Prefixes and Namespace Names:=xml');
146 wakaba 1.13 } elsif ($value eq $XMLNS_NS) {
147     $self->{onerror}
148 wakaba 1.33 ->(node => $attr, level => 'NC',
149     type => 'Reserved Prefixes and Namespace Names:=xmlns');
150 wakaba 1.13 }
151     },
152 wakaba 1.9 },
153     };
154    
155 wakaba 1.14 ## ISSUE: Should we really allow these attributes?
156 wakaba 1.13 $AttrChecker->{''}->{'xml:space'} = $AttrChecker->{$XML_NS}->{space};
157     $AttrChecker->{''}->{'xml:lang'} = $AttrChecker->{$XML_NS}->{lang};
158     $AttrChecker->{''}->{'xml:base'} = $AttrChecker->{$XML_NS}->{base};
159     $AttrChecker->{''}->{'xml:id'} = $AttrChecker->{$XML_NS}->{id};
160    
161 wakaba 1.60 our %AnyChecker = (
162     check_start => sub { },
163     check_attrs => sub {
164     my ($self, $item, $element_state) = @_;
165     for my $attr (@{$item->{node}->attributes}) {
166 wakaba 1.9 my $attr_ns = $attr->namespace_uri;
167     $attr_ns = '' unless defined $attr_ns;
168     my $attr_ln = $attr->manakai_local_name;
169     my $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
170 wakaba 1.60 || $AttrChecker->{$attr_ns}->{''};
171 wakaba 1.9 if ($checker) {
172     $checker->($self, $attr);
173 wakaba 1.17 } else {
174 wakaba 1.33 $self->{onerror}->(node => $attr, level => 'unsupported',
175     type => 'attribute');
176 wakaba 1.9 }
177     }
178     },
179 wakaba 1.60 check_child_element => sub {
180     my ($self, $item, $child_el, $child_nsuri, $child_ln,
181     $child_is_transparent, $element_state) = @_;
182     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
183     $self->{onerror}->(node => $child_el,
184     type => 'element not allowed:minus',
185     level => $self->{must_level});
186     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
187     #
188     } else {
189     #
190     }
191     },
192     check_child_text => sub { },
193     check_end => sub {
194     my ($self, $item, $element_state) = @_;
195     if ($element_state->{has_significant}) {
196 wakaba 1.66 $item->{real_parent_state}->{has_significant} = 1;
197 wakaba 1.60 }
198     },
199     );
200    
201     our $ElementDefault = {
202     %AnyChecker,
203 wakaba 1.70 status => FEATURE_ALLOWED,
204     ## NOTE: No "element not defined" error - it is not supported anyway.
205 wakaba 1.60 check_start => sub {
206     my ($self, $item, $element_state) = @_;
207     $self->{onerror}->(node => $item->{node}, level => 'unsupported',
208     type => 'element');
209     },
210 wakaba 1.1 };
211    
212 wakaba 1.60 our $HTMLEmbeddedContent = {
213     ## NOTE: All embedded content is also phrasing content.
214     $HTML_NS => {
215     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
216     canvas => 1,
217     },
218     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
219     q<http://www.w3.org/2000/svg> => {svg => 1},
220     ## NOTE: Foreign elements with content (but no metadata) are
221     ## embedded content.
222     };
223    
224 wakaba 1.7 my $HTMLTransparentElements = {
225 wakaba 1.57 $HTML_NS => {qw/ins 1 del 1 font 1 noscript 1 canvas 1/},
226 wakaba 1.29 ## NOTE: |html:noscript| is transparent if scripting is disabled
227     ## and not in |head|.
228 wakaba 1.7 };
229    
230 wakaba 1.61 my $HTMLSemiTransparentElements = {
231     $HTML_NS => {object => 1, video => 1, audio => 1},
232     };
233 wakaba 1.57
234 wakaba 1.42 our $Element = {};
235 wakaba 1.7
236 wakaba 1.73 $Element->{q<http://www.w3.org/1999/02/22-rdf-syntax-ns#>}->{RDF} = {
237     %AnyChecker,
238     status => FEATURE_STATUS_REC | FEATURE_ALLOWED,
239     is_root => 1, ## ISSUE: Not explicitly allowed for non application/rdf+xml
240     check_start => sub {
241     my ($self, $item, $element_state) = @_;
242     my $triple = [];
243     push @{$self->{return}->{rdf}}, [$item->{node}, $triple];
244     require Whatpm::RDFXML;
245     my $rdf = Whatpm::RDFXML->new;
246 wakaba 1.75 ## TODO: Should we make bnodeid unique in a document?
247 wakaba 1.73 $rdf->{onerror} = $self->{onerror};
248     $rdf->{ontriple} = sub {
249     my %opt = @_;
250     push @$triple,
251     [$opt{node}, $opt{subject}, $opt{predicate}, $opt{object}];
252 wakaba 1.74 if (defined $opt{id}) {
253     push @$triple,
254     [$opt{node},
255     $opt{id},
256     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#subject>},
257     $opt{subject}];
258     push @$triple,
259     [$opt{node},
260     $opt{id},
261     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate>},
262     $opt{predicate}];
263     push @$triple,
264     [$opt{node},
265     $opt{id},
266     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#object>},
267     $opt{object}];
268     push @$triple,
269     [$opt{node},
270     $opt{id},
271     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>},
272     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement>}];
273     }
274 wakaba 1.73 };
275     $rdf->convert_rdf_element ($item->{node});
276     },
277     };
278    
279 wakaba 1.56 sub check_document ($$$;$) {
280     my ($self, $doc, $onerror, $onsubdoc) = @_;
281 wakaba 1.42 $self = bless {}, $self unless ref $self;
282     $self->{onerror} = $onerror;
283 wakaba 1.56 $self->{onsubdoc} = $onsubdoc || sub {
284     warn "A subdocument is not conformance-checked";
285     };
286 wakaba 1.1
287 wakaba 1.48 $self->{must_level} = 'm';
288 wakaba 1.76 $self->{fact_level} = 'm';
289 wakaba 1.48 $self->{should_level} = 's';
290 wakaba 1.51 $self->{good_level} = 'w';
291 wakaba 1.67 $self->{info_level} = 'i';
292 wakaba 1.71 $self->{unsupported_level} = 'u';
293 wakaba 1.48
294 wakaba 1.73 ## TODO: If application/rdf+xml, RDF/XML mode should be invoked.
295    
296 wakaba 1.42 my $docel = $doc->document_element;
297     unless (defined $docel) {
298     ## ISSUE: Should we check content of Document node?
299     $onerror->(node => $doc, type => 'no document element');
300     ## ISSUE: Is this non-conforming (to what spec)? Or just a warning?
301     return {
302     class => {},
303     id => {}, table => [], term => {},
304     };
305 wakaba 1.1 }
306    
307 wakaba 1.42 ## ISSUE: Unexpanded entity references and HTML5 conformance
308 wakaba 1.1
309 wakaba 1.42 my $docel_nsuri = $docel->namespace_uri;
310     $docel_nsuri = '' unless defined $docel_nsuri;
311 wakaba 1.43 unless ($Namespace->{$docel_nsuri}->{loaded}) {
312     if ($Namespace->{$docel_nsuri}->{module}) {
313     eval qq{ require $Namespace->{$docel_nsuri}->{module} } or die $@;
314     } else {
315     $Namespace->{$docel_nsuri}->{loaded} = 1;
316     }
317     }
318 wakaba 1.42 my $docel_def = $Element->{$docel_nsuri}->{$docel->manakai_local_name} ||
319     $Element->{$docel_nsuri}->{''} ||
320     $ElementDefault;
321     if ($docel_def->{is_root}) {
322     #
323 wakaba 1.50 } elsif ($docel_def->{is_xml_root}) {
324     unless ($doc->manakai_is_html) {
325     #
326     } else {
327     $onerror->(node => $docel, type => 'element not allowed:root:xml');
328     }
329 wakaba 1.42 } else {
330 wakaba 1.49 $onerror->(node => $docel, type => 'element not allowed:root');
331 wakaba 1.1 }
332    
333 wakaba 1.42 ## TODO: Check for other items other than document element
334     ## (second (errorous) element, text nodes, PI nodes, doctype nodes)
335 wakaba 1.2
336 wakaba 1.56 my $return = $self->check_element ($docel, $onerror, $onsubdoc);
337 wakaba 1.51
338 wakaba 1.52 ## TODO: Test for these checks are necessary.
339 wakaba 1.51 my $charset_name = $doc->input_encoding;
340     if (defined $charset_name) {
341     require Message::Charset::Info;
342     my $charset = $Message::Charset::Info::IANACharset->{$charset_name};
343    
344 wakaba 1.71 if ($doc->manakai_is_html) {
345     if (not $doc->manakai_has_bom and
346     not defined $doc->manakai_charset) {
347     unless ($charset->{is_html_ascii_superset}) {
348     $onerror->(node => $doc, level => $self->{must_level},
349     type => 'non ascii superset:'.$charset_name);
350     }
351    
352     if (not $self->{has_charset} and ## TODO: This does not work now.
353     not $charset->{iana_names}->{'us-ascii'}) {
354     $onerror->(node => $doc, level => $self->{must_level},
355     type => 'no character encoding declaration:'.$charset_name);
356     }
357 wakaba 1.51 }
358 wakaba 1.71
359     if ($charset->{iana_names}->{'utf-8'}) {
360     #
361     } elsif ($charset->{iana_names}->{'jis_x0212-1990'} or
362     $charset->{iana_names}->{'x-jis0208'} or
363     $charset->{iana_names}->{'utf-32'} or ## ISSUE: UTF-32BE? UTF-32LE?
364     $charset->{is_ebcdic_based}) {
365     $onerror->(node => $doc,
366     type => 'character encoding:'.$charset_name,
367     level => $self->{should_level});
368     } elsif ($charset->{iana_names}->{'cesu-8'} or
369     $charset->{iana_names}->{'utf-8'} or ## ISSUE: UNICODE-1-1-UTF-7?
370     $charset->{iana_names}->{'bocu-1'} or
371     $charset->{iana_names}->{'scsu'}) {
372     $onerror->(node => $doc,
373     type => 'character encoding:'.$charset_name,
374     level => $self->{must_level});
375     } else {
376     $onerror->(node => $doc,
377     type => 'character encoding:'.$charset_name,
378     level => $self->{good_level});
379 wakaba 1.51 }
380     }
381 wakaba 1.52 } elsif ($doc->manakai_is_html) {
382     ## NOTE: MUST and SHOULD requirements above cannot be tested,
383     ## since the document has no input charset encoding information.
384     $onerror->(node => $doc,
385     type => 'character encoding:',
386     level => 'unsupported');
387 wakaba 1.51 }
388    
389     return $return;
390 wakaba 1.42 } # check_document
391 wakaba 1.1
392 wakaba 1.56 sub check_element ($$$;$) {
393     my ($self, $el, $onerror, $onsubdoc) = @_;
394 wakaba 1.42 $self = bless {}, $self unless ref $self;
395     $self->{onerror} = $onerror;
396 wakaba 1.56 $self->{onsubdoc} = $onsubdoc || sub {
397     warn "A subdocument is not conformance-checked";
398     };
399 wakaba 1.2
400 wakaba 1.48 $self->{must_level} = 'm';
401 wakaba 1.76 $self->{fact_level} = 'm';
402 wakaba 1.48 $self->{should_level} = 's';
403 wakaba 1.51 $self->{good_level} = 'w';
404 wakaba 1.67 $self->{info_level} = 'i';
405 wakaba 1.71 $self->{unsupported_level} = 'u';
406 wakaba 1.48
407 wakaba 1.61 $self->{plus_elements} = {};
408     $self->{minus_elements} = {};
409 wakaba 1.42 $self->{id} = {};
410     $self->{term} = {};
411     $self->{usemap} = [];
412 wakaba 1.78 $self->{ref} = []; # datetemplate data references
413     $self->{template} = []; # datatemplate template references
414 wakaba 1.42 $self->{contextmenu} = [];
415     $self->{map} = {};
416     $self->{menu} = {};
417     $self->{has_link_type} = {};
418 wakaba 1.60 $self->{flag} = {};
419 wakaba 1.46 #$self->{has_uri_attr};
420     #$self->{has_hyperlink_element};
421 wakaba 1.51 #$self->{has_charset};
422 wakaba 1.57 #$self->{has_base};
423 wakaba 1.42 $self->{return} = {
424     class => {},
425     id => $self->{id}, table => [], term => $self->{term},
426 wakaba 1.76 uri => {}, # URIs other than those in RDF triples
427     ## TODO: xmlns="", SYSTEM "", atom:* src="", xml:base=""
428 wakaba 1.73 rdf => [],
429 wakaba 1.42 };
430 wakaba 1.4
431 wakaba 1.60 my @item = ({type => 'element', node => $el, parent_state => {}});
432 wakaba 1.66 $item[-1]->{real_parent_state} = $item[-1]->{parent_state};
433 wakaba 1.60 while (@item) {
434     my $item = shift @item;
435     if (ref $item eq 'ARRAY') {
436     my $code = shift @$item;
437     next unless $code;## TODO: temp.
438     $code->(@$item);
439     } elsif ($item->{type} eq 'element') {
440     my $el_nsuri = $item->{node}->namespace_uri;
441     $el_nsuri = '' unless defined $el_nsuri;
442     my $el_ln = $item->{node}->manakai_local_name;
443    
444     unless ($Namespace->{$el_nsuri}->{loaded}) {
445     if ($Namespace->{$el_nsuri}->{module}) {
446     eval qq{ require $Namespace->{$el_nsuri}->{module} } or die $@;
447 wakaba 1.42 } else {
448 wakaba 1.60 $Namespace->{$el_nsuri}->{loaded} = 1;
449 wakaba 1.1 }
450     }
451 wakaba 1.63
452     my $element_state = {};
453 wakaba 1.60 my $eldef = $Element->{$el_nsuri}->{$el_ln} ||
454     $Element->{$el_nsuri}->{''} ||
455 wakaba 1.42 $ElementDefault;
456 wakaba 1.61 my $content_def = $item->{transparent}
457     ? $item->{parent_def} || $eldef : $eldef;
458 wakaba 1.63 my $content_state = $item->{transparent}
459 wakaba 1.65 ? $item->{parent_def}
460     ? $item->{parent_state} || $element_state : $element_state
461     : $element_state;
462 wakaba 1.60
463 wakaba 1.67 unless ($eldef->{status} & FEATURE_STATUS_REC) {
464     my $status = $eldef->{status} & FEATURE_STATUS_CR ? 'cr' :
465     $eldef->{status} & FEATURE_STATUS_LC ? 'lc' :
466     $eldef->{status} & FEATURE_STATUS_WD ? 'wd' : 'non-standard';
467     $self->{onerror}->(node => $item->{node},
468     type => 'status:'.$status.':element',
469     level => $self->{info_level});
470     }
471 wakaba 1.70 if (not ($eldef->{status} & FEATURE_ALLOWED)) {
472     $self->{onerror}->(node => $item->{node},
473     type => 'element not defined',
474     level => $self->{must_level});
475     } elsif ($eldef->{status} & FEATURE_DEPRECATED_SHOULD) {
476     $self->{onerror}->(node => $item->{node},
477     type => 'deprecated:element',
478     level => $self->{should_level});
479     } elsif ($eldef->{status} & FEATURE_DEPRECATED_INFO) {
480     $self->{onerror}->(node => $item->{node},
481     type => 'deprecated:element',
482     level => $self->{info_level});
483     }
484 wakaba 1.67
485 wakaba 1.60 my @new_item;
486     push @new_item, [$eldef->{check_start}, $self, $item, $element_state];
487     push @new_item, [$eldef->{check_attrs}, $self, $item, $element_state];
488 wakaba 1.61
489 wakaba 1.60 my @child = @{$item->{node}->child_nodes};
490     while (@child) {
491     my $child = shift @child;
492     my $child_nt = $child->node_type;
493     if ($child_nt == 1) { # ELEMENT_NODE
494     my $child_nsuri = $child->namespace_uri;
495     $child_nsuri = '' unless defined $child_nsuri;
496     my $child_ln = $child->manakai_local_name;
497     if ($HTMLTransparentElements->{$child_nsuri}->{$child_ln} and
498     not (($self->{flag}->{in_head} or
499 wakaba 1.61 ($el_nsuri eq $HTML_NS and $el_ln eq 'head')) and
500     $child_nsuri eq $HTML_NS and $child_ln eq 'noscript')) {
501 wakaba 1.60 push @new_item, [$content_def->{check_child_element},
502     $self, $item, $child,
503 wakaba 1.66 $child_nsuri, $child_ln, 1,
504     $content_state, $element_state];
505 wakaba 1.60 push @new_item, {type => 'element', node => $child,
506 wakaba 1.65 parent_state => $content_state,
507 wakaba 1.61 parent_def => $content_def,
508 wakaba 1.66 real_parent_state => $element_state,
509 wakaba 1.60 transparent => 1};
510     } else {
511 wakaba 1.65 if ($item->{parent_def} and # has parent
512     $el_nsuri eq $HTML_NS) { ## $HTMLSemiTransparentElements
513 wakaba 1.61 if ($el_ln eq 'object') {
514     if ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
515     #
516     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
517     #
518     } else {
519 wakaba 1.62 $content_def = $item->{parent_def} || $content_def;
520 wakaba 1.63 $content_state = $item->{parent_state} || $content_state;
521 wakaba 1.62 }
522     } elsif ($el_ln eq 'video' or $el_ln eq 'audio') {
523     if ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
524     #
525     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
526     $element_state->{has_source} = 1;
527     } else {
528     $content_def = $item->{parent_def} || $content_def;
529 wakaba 1.63 $content_state = $item->{parent_state} || $content_state;
530 wakaba 1.61 }
531     }
532     }
533    
534 wakaba 1.60 push @new_item, [$content_def->{check_child_element},
535     $self, $item, $child,
536 wakaba 1.64 $child_nsuri, $child_ln,
537     $HTMLSemiTransparentElements
538     ->{$child_nsuri}->{$child_ln},
539 wakaba 1.66 $content_state, $element_state];
540 wakaba 1.60 push @new_item, {type => 'element', node => $child,
541 wakaba 1.65 parent_def => $content_def,
542 wakaba 1.66 real_parent_state => $element_state,
543 wakaba 1.65 parent_state => $content_state};
544 wakaba 1.60 }
545    
546     if ($HTMLEmbeddedContent->{$child_nsuri}->{$child_ln}) {
547     $element_state->{has_significant} = 1;
548     }
549     } elsif ($child_nt == 3 or # TEXT_NODE
550     $child_nt == 4) { # CDATA_SECTION_NODE
551     my $has_significant = ($child->data =~ /[^\x09-\x0D\x20]/);
552     push @new_item, [$content_def->{check_child_text},
553     $self, $item, $child, $has_significant,
554 wakaba 1.66 $content_state, $element_state];
555     $element_state->{has_significant} ||= $has_significant;
556 wakaba 1.61 if ($has_significant and
557     $HTMLSemiTransparentElements->{$el_nsuri}->{$el_ln}) {
558     $content_def = $item->{parent_def} || $content_def;
559     }
560 wakaba 1.60 } elsif ($child_nt == 5) { # ENTITY_REFERENCE_NODE
561     push @child, @{$child->child_nodes};
562 wakaba 1.1 }
563 wakaba 1.60 ## TODO: PI_NODE
564     ## TODO: Unknown node type
565 wakaba 1.1 }
566 wakaba 1.60
567     push @new_item, [$eldef->{check_end}, $self, $item, $element_state];
568    
569     unshift @item, @new_item;
570 wakaba 1.30 } else {
571 wakaba 1.60 die "$0: Internal error: Unsupported checking action type |$item->{type}|";
572 wakaba 1.4 }
573 wakaba 1.1 }
574 wakaba 1.17
575 wakaba 1.78 for (@{$self->{template}}) {
576     ## TODO: If the document is an XML document, ...
577     ## NOTE: If the document is an HTML document:
578     ## ISSUE: We need to percent-decode?
579     F: {
580     if ($self->{id}->{$_->[0]}) {
581     my $el = $self->{id}->{$_->[0]}->[0]->owner_element;
582     if ($el->node_type == 1 and # ELEMENT_NODE
583     $el->manakai_local_name eq 'datatemplate') {
584     my $nsuri = $el->namespace_uri;
585     if (defined $nsuri and $nsuri eq $HTML_NS) {
586     if ($el eq $_->[1]->owner_element) {
587     $self->{onerror}->(node => $_->[1],
588     type => 'fragment points itself',
589     level => $self->{must_level});
590     }
591    
592     last F;
593     }
594     }
595     }
596     ## TODO: Should we raise a "fragment points nothing" error instead
597     ## if the fragment identifier identifies no element?
598    
599     $self->{onerror}->(node => $_->[1], type => 'template:not template',
600     level => $self->{must_level});
601     } # F
602     }
603    
604     for (@{$self->{ref}}) {
605     ## TOOD: If XML
606     ## NOTE: If it is an HTML document:
607     if ($_->[0] eq '') {
608     ## NOTE: It points the top of the document.
609     } elsif ($self->{id}->{$_->[0]}) {
610     if ($self->{id}->{$_->[0]}->[0]->owner_element
611     eq $_->[1]->owner_element) {
612     $self->{onerror}->(node => $_->[1], type => 'fragment points itself',
613     level => $self->{must_level});
614     }
615     } else {
616     $self->{onerror}->(node => $_->[1], type => 'fragment points nothing',
617     level => $self->{must_level});
618     }
619     }
620    
621     ## TODO: Maybe we should have $document->manakai_get_by_fragment or something
622    
623 wakaba 1.17 for (@{$self->{usemap}}) {
624     unless ($self->{map}->{$_->[0]}) {
625     $self->{onerror}->(node => $_->[1], type => 'no referenced map');
626     }
627     }
628    
629 wakaba 1.32 for (@{$self->{contextmenu}}) {
630     unless ($self->{menu}->{$_->[0]}) {
631     $self->{onerror}->(node => $_->[1], type => 'no referenced menu');
632     }
633     }
634    
635 wakaba 1.61 delete $self->{plus_elements};
636     delete $self->{minus_elements};
637 wakaba 1.17 delete $self->{onerror};
638     delete $self->{id};
639     delete $self->{usemap};
640 wakaba 1.78 delete $self->{ref};
641     delete $self->{template};
642 wakaba 1.17 delete $self->{map};
643 wakaba 1.33 return $self->{return};
644 wakaba 1.1 } # check_element
645    
646 wakaba 1.60 sub _add_minus_elements ($$@) {
647     my $self = shift;
648     my $element_state = shift;
649     for my $elements (@_) {
650     for my $nsuri (keys %$elements) {
651     for my $ln (keys %{$elements->{$nsuri}}) {
652     unless ($self->{minus_elements}->{$nsuri}->{$ln}) {
653     $element_state->{minus_elements_original}->{$nsuri}->{$ln} = 0;
654     $self->{minus_elements}->{$nsuri}->{$ln} = 1;
655     }
656     }
657     }
658     }
659     } # _add_minus_elements
660    
661     sub _remove_minus_elements ($$) {
662     my $self = shift;
663     my $element_state = shift;
664     for my $nsuri (keys %{$element_state->{minus_elements_original}}) {
665     for my $ln (keys %{$element_state->{minus_elements_original}->{$nsuri}}) {
666     delete $self->{minus_elements}->{$nsuri}->{$ln};
667     }
668     }
669     } # _remove_minus_elements
670    
671     sub _add_plus_elements ($$@) {
672     my $self = shift;
673     my $element_state = shift;
674     for my $elements (@_) {
675     for my $nsuri (keys %$elements) {
676     for my $ln (keys %{$elements->{$nsuri}}) {
677     unless ($self->{plus_elements}->{$nsuri}->{$ln}) {
678     $element_state->{plus_elements_original}->{$nsuri}->{$ln} = 0;
679     $self->{plus_elements}->{$nsuri}->{$ln} = 1;
680     }
681     }
682     }
683     }
684     } # _add_plus_elements
685    
686     sub _remove_plus_elements ($$) {
687     my $self = shift;
688     my $element_state = shift;
689     for my $nsuri (keys %{$element_state->{plus_elements_original}}) {
690     for my $ln (keys %{$element_state->{plus_elements_original}->{$nsuri}}) {
691     delete $self->{plus_elements}->{$nsuri}->{$ln};
692     }
693     }
694     } # _remove_plus_elements
695    
696 wakaba 1.68 sub _attr_status_info ($$$) {
697     my ($self, $attr, $status_code) = @_;
698 wakaba 1.70
699     if (not ($status_code & FEATURE_ALLOWED)) {
700     $self->{onerror}->(node => $attr,
701     type => 'attribute not defined',
702     level => $self->{must_level});
703     } elsif ($status_code & FEATURE_DEPRECATED_SHOULD) {
704     $self->{onerror}->(node => $attr,
705     type => 'deprecated:attr',
706     level => $self->{should_level});
707     } elsif ($status_code & FEATURE_DEPRECATED_INFO) {
708     $self->{onerror}->(node => $attr,
709     type => 'deprecated:attr',
710     level => $self->{info_level});
711     }
712    
713 wakaba 1.68 my $status;
714     if ($status_code & FEATURE_STATUS_REC) {
715     return;
716     } elsif ($status_code & FEATURE_STATUS_CR) {
717     $status = 'cr';
718     } elsif ($status_code & FEATURE_STATUS_LC) {
719     $status = 'lc';
720     } elsif ($status_code & FEATURE_STATUS_WD) {
721     $status = 'wd';
722     } else {
723     $status = 'non-standard';
724     }
725     $self->{onerror}->(node => $attr,
726     type => 'status:'.$status.':attr',
727     level => $self->{info_level});
728     } # _attr_status_info
729    
730 wakaba 1.2 sub _add_minuses ($@) {
731     my $self = shift;
732     my $r = {};
733     for my $list (@_) {
734     for my $ns (keys %$list) {
735     for my $ln (keys %{$list->{$ns}}) {
736     unless ($self->{minuses}->{$ns}->{$ln}) {
737     $self->{minuses}->{$ns}->{$ln} = 1;
738     $r->{$ns}->{$ln} = 1;
739     }
740     }
741     }
742     }
743 wakaba 1.4 return {type => 'plus', list => $r};
744 wakaba 1.2 } # _add_minuses
745    
746 wakaba 1.50 sub _add_pluses ($@) {
747     my $self = shift;
748     my $r = {};
749     for my $list (@_) {
750     for my $ns (keys %$list) {
751     for my $ln (keys %{$list->{$ns}}) {
752     unless ($self->{pluses}->{$ns}->{$ln}) {
753     $self->{pluses}->{$ns}->{$ln} = 1;
754     $r->{$ns}->{$ln} = 1;
755     }
756     }
757     }
758     }
759     return {type => 'minus', list => $r};
760     } # _add_pluses
761    
762 wakaba 1.2 sub _remove_minuses ($$) {
763 wakaba 1.4 my ($self, $todo) = @_;
764 wakaba 1.50 if ($todo->{type} eq 'minus') {
765     for my $ns (keys %{$todo->{list}}) {
766     for my $ln (keys %{$todo->{list}->{$ns}}) {
767     delete $self->{pluses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
768     }
769 wakaba 1.2 }
770 wakaba 1.50 } elsif ($todo->{type} eq 'plus') {
771     for my $ns (keys %{$todo->{list}}) {
772     for my $ln (keys %{$todo->{list}->{$ns}}) {
773     delete $self->{minuses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
774     }
775     }
776     } else {
777     die "$0: Unknown +- type: $todo->{type}";
778 wakaba 1.2 }
779     1;
780     } # _remove_minuses
781    
782 wakaba 1.50 ## NOTE: Priority for "minuses" and "pluses" are currently left
783     ## undefined and implemented inconsistently; it is not a problem for
784     ## now, since no element belongs to both lists.
785    
786 wakaba 1.30 sub _check_get_children ($$$) {
787     my ($self, $node, $parent_todo) = @_;
788 wakaba 1.4 my $new_todos = [];
789 wakaba 1.2 my $sib = [];
790     TP: {
791     my $node_ns = $node->namespace_uri;
792     $node_ns = '' unless defined $node_ns;
793     my $node_ln = $node->manakai_local_name;
794 wakaba 1.45 if ($HTMLTransparentElements->{$node_ns}->{$node_ln}) {
795     if ($node_ns eq $HTML_NS and $node_ln eq 'noscript') {
796     if ($parent_todo->{flag}->{in_head}) {
797     #
798     } else {
799     my $end = $self->_add_minuses ({$HTML_NS, {noscript => 1}});
800     push @$sib, $end;
801    
802     unshift @$sib, @{$node->child_nodes};
803     push @$new_todos, {type => 'element-attributes', node => $node};
804     last TP;
805     }
806 wakaba 1.58 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'del') {
807     my $sig_flag = $parent_todo->{flag}->{has_descendant}->{significant};
808     unshift @$sib, @{$node->child_nodes};
809     push @$new_todos, {type => 'element-attributes', node => $node};
810     push @$new_todos,
811     {type => 'code',
812     code => sub {
813     $parent_todo->{flag}->{has_descendant}->{significant} = 0
814     if not $sig_flag;
815     }};
816     last TP;
817 wakaba 1.45 } else {
818     unshift @$sib, @{$node->child_nodes};
819     push @$new_todos, {type => 'element-attributes', node => $node};
820     last TP;
821 wakaba 1.2 }
822     }
823 wakaba 1.8 if ($node_ns eq $HTML_NS and ($node_ln eq 'video' or $node_ln eq 'audio')) {
824 wakaba 1.2 if ($node->has_attribute_ns (undef, 'src')) {
825     unshift @$sib, @{$node->child_nodes};
826 wakaba 1.9 push @$new_todos, {type => 'element-attributes', node => $node};
827 wakaba 1.2 last TP;
828     } else {
829     my @cn = @{$node->child_nodes};
830     CN: while (@cn) {
831     my $cn = shift @cn;
832     my $cnt = $cn->node_type;
833     if ($cnt == 1) {
834 wakaba 1.8 my $cn_nsuri = $cn->namespace_uri;
835     $cn_nsuri = '' unless defined $cn_nsuri;
836     if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'source') {
837 wakaba 1.2 #
838     } else {
839     last CN;
840     }
841     } elsif ($cnt == 3 or $cnt == 4) {
842     if ($cn->data =~ /[^\x09-\x0D\x20]/) {
843     last CN;
844     }
845     }
846     } # CN
847     unshift @$sib, @cn;
848     }
849 wakaba 1.57 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'object') {
850     my @cn = @{$node->child_nodes};
851     CN: while (@cn) {
852     my $cn = shift @cn;
853     my $cnt = $cn->node_type;
854     if ($cnt == 1) {
855     my $cn_nsuri = $cn->namespace_uri;
856     $cn_nsuri = '' unless defined $cn_nsuri;
857     if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'param') {
858     #
859     } else {
860     last CN;
861     }
862     } elsif ($cnt == 3 or $cnt == 4) {
863     if ($cn->data =~ /[^\x09-\x0D\x20]/) {
864     last CN;
865     }
866     }
867     } # CN
868     unshift @$sib, @cn;
869 wakaba 1.2 }
870 wakaba 1.4 push @$new_todos, {type => 'element', node => $node};
871 wakaba 1.2 } # TP
872 wakaba 1.30
873     for my $new_todo (@$new_todos) {
874     $new_todo->{flag} = {%{$parent_todo->{flag} or {}}};
875     }
876    
877 wakaba 1.4 return ($sib, $new_todos);
878 wakaba 1.2 } # _check_get_children
879    
880 wakaba 1.44 =head1 LICENSE
881    
882 wakaba 1.56 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
883 wakaba 1.44
884     This library is free software; you can redistribute it
885     and/or modify it under the same terms as Perl itself.
886    
887     =cut
888    
889 wakaba 1.1 1;
890 wakaba 1.78 # $Date: 2008/04/12 10:41:30 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24