/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.82 - (hide annotations) (download)
Sun Jun 8 12:22:54 2008 UTC (16 years, 4 months ago) by wakaba
Branch: MAIN
Changes since 1.81: +3 -2 lines
++ whatpm/t/ChangeLog	8 Jun 2008 12:20:25 -0000
	* content-model-1.dat: Test data for ruby are added (HTML5 revision
	1704).

2008-06-08  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ContentChecker/ChangeLog	8 Jun 2008 12:22:16 -0000
2008-06-08  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm: Support for ruby, rt, and rp elements (HTML5
	revision 1704).  The |href| attribute is also extended
	as a common attribute by RDFa Last Call Working Draft.

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3 wakaba 1.82 our $VERSION=do{my @r=(q$Revision: 1.81 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5 wakaba 1.18 require Whatpm::URIChecker;
6    
7 wakaba 1.13 ## ISSUE: How XML and XML Namespaces conformance can (or cannot)
8     ## be applied to an in-memory representation (i.e. DOM)?
9    
10 wakaba 1.50 ## TODO: Conformance of an HTML document with non-html root element.
11    
12 wakaba 1.70 ## Stability
13 wakaba 1.67 sub FEATURE_STATUS_REC () { 0b1 } ## Interoperable standard
14     sub FEATURE_STATUS_CR () { 0b10 } ## Call for implementation
15     sub FEATURE_STATUS_LC () { 0b100 } ## Last call for comments
16     sub FEATURE_STATUS_WD () { 0b1000 } ## Working or editor's draft
17    
18 wakaba 1.70 ## Deprecated
19     sub FEATURE_DEPRECATED_SHOULD () { 0b100000 } ## SHOULD-level
20     sub FEATURE_DEPRECATED_INFO () { 0b1000000 } ## Does not affect conformance
21    
22     ## Conformance
23     sub FEATURE_ALLOWED () { 0b10000 }
24    
25 wakaba 1.42 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
26 wakaba 1.9 my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
27     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
28    
29 wakaba 1.42 my $Namespace = {
30 wakaba 1.79 '' => {loaded => 1},
31 wakaba 1.43 q<http://www.w3.org/2005/Atom> => {module => 'Whatpm::ContentChecker::Atom'},
32 wakaba 1.72 q<http://purl.org/syndication/history/1.0>
33     => {module => 'Whatpm::ContentChecker::Atom'},
34     q<http://purl.org/syndication/threading/1.0>
35     => {module => 'Whatpm::ContentChecker::Atom'},
36 wakaba 1.42 $HTML_NS => {module => 'Whatpm::ContentChecker::HTML'},
37     $XML_NS => {loaded => 1},
38     $XMLNS_NS => {loaded => 1},
39 wakaba 1.73 q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {loaded => 1},
40 wakaba 1.42 };
41    
42 wakaba 1.79 sub load_ns_module ($) {
43     my $nsuri = shift; # namespace URI or ''
44     unless ($Namespace->{$nsuri}->{loaded}) {
45     if ($Namespace->{$nsuri}->{module}) {
46     eval qq{ require $Namespace->{$nsuri}->{module} } or die $@;
47     } else {
48     $Namespace->{$nsuri}->{loaded} = 1;
49     }
50     }
51     } # load_ns_module
52    
53 wakaba 1.42 our $AttrChecker = {
54 wakaba 1.9 $XML_NS => {
55 wakaba 1.13 space => sub {
56     my ($self, $attr) = @_;
57     my $value = $attr->value;
58     if ($value eq 'default' or $value eq 'preserve') {
59     #
60     } else {
61     ## NOTE: An XML "error"
62 wakaba 1.33 $self->{onerror}->(node => $attr, level => 'error',
63     type => 'invalid attribute value');
64 wakaba 1.13 }
65     },
66     lang => sub {
67 wakaba 1.35 my ($self, $attr) = @_;
68 wakaba 1.47 my $value = $attr->value;
69     if ($value eq '') {
70     #
71     } else {
72     require Whatpm::LangTag;
73     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
74     my %opt = @_;
75     my $type = 'LangTag:'.$opt{type};
76     $type .= ':' . $opt{subtag} if defined $opt{subtag};
77     $self->{onerror}->(node => $attr, type => $type,
78     value => $opt{value}, level => $opt{level});
79     });
80     }
81    
82 wakaba 1.13 ## NOTE: "The values of the attribute are language identifiers
83     ## as defined by [IETF RFC 3066], Tags for the Identification
84     ## of Languages, or its successor; in addition, the empty string
85     ## may be specified." ("may" in lower case)
86 wakaba 1.47 ## NOTE: Is an RFC 3066-valid (but RFC 4647-invalid) language tag
87     ## allowed today?
88    
89     ## TODO: test data
90    
91 wakaba 1.35 if ($attr->owner_document->manakai_is_html) { # MUST NOT
92 wakaba 1.36 $self->{onerror}->(node => $attr, type => 'in HTML:xml:lang');
93 wakaba 1.35 ## TODO: Test data...
94     }
95 wakaba 1.13 },
96     base => sub {
97     my ($self, $attr) = @_;
98     my $value = $attr->value;
99     if ($value =~ /[^\x{0000}-\x{10FFFF}]/) { ## ISSUE: Should we disallow noncharacters?
100     $self->{onerror}->(node => $attr,
101 wakaba 1.33 type => 'invalid attribute value');
102 wakaba 1.13 }
103 wakaba 1.18 ## NOTE: Conformance to URI standard is not checked since there is
104     ## no author requirement on conformance in the XML Base specification.
105 wakaba 1.13 },
106     id => sub {
107     my ($self, $attr) = @_;
108     my $value = $attr->value;
109     $value =~ s/[\x09\x0A\x0D\x20]+/ /g;
110     $value =~ s/^\x20//;
111     $value =~ s/\x20$//;
112     ## TODO: NCName in XML 1.0 or 1.1
113     ## TODO: declared type is ID?
114 wakaba 1.33 if ($self->{id}->{$value}) { ## NOTE: An xml:id error
115     $self->{onerror}->(node => $attr, level => 'error',
116     type => 'duplicate ID');
117 wakaba 1.37 push @{$self->{id}->{$value}}, $attr;
118 wakaba 1.13 } else {
119 wakaba 1.37 $self->{id}->{$value} = [$attr];
120 wakaba 1.13 }
121     },
122 wakaba 1.9 },
123     $XMLNS_NS => {
124 wakaba 1.13 '' => sub {
125     my ($self, $attr) = @_;
126     my $ln = $attr->manakai_local_name;
127     my $value = $attr->value;
128     if ($value eq $XML_NS and $ln ne 'xml') {
129     $self->{onerror}
130 wakaba 1.33 ->(node => $attr, level => 'NC',
131     type => 'Reserved Prefixes and Namespace Names:=xml');
132 wakaba 1.13 } elsif ($value eq $XMLNS_NS) {
133     $self->{onerror}
134 wakaba 1.33 ->(node => $attr, level => 'NC',
135     type => 'Reserved Prefixes and Namespace Names:=xmlns');
136 wakaba 1.13 }
137     if ($ln eq 'xml' and $value ne $XML_NS) {
138     $self->{onerror}
139 wakaba 1.33 ->(node => $attr, level => 'NC',
140     type => 'Reserved Prefixes and Namespace Names:xmlns:xml=');
141 wakaba 1.13 } elsif ($ln eq 'xmlns') {
142     $self->{onerror}
143 wakaba 1.33 ->(node => $attr, level => 'NC',
144     type => 'Reserved Prefixes and Namespace Names:xmlns:xmlns=');
145 wakaba 1.13 }
146     ## TODO: If XML 1.0 and empty
147     },
148     xmlns => sub {
149     my ($self, $attr) = @_;
150     ## TODO: In XML 1.0, URI reference [RFC 3986] or an empty string
151     ## TODO: In XML 1.1, IRI reference [RFC 3987] or an empty string
152 wakaba 1.18 ## TODO: relative references are deprecated
153 wakaba 1.13 my $value = $attr->value;
154     if ($value eq $XML_NS) {
155     $self->{onerror}
156 wakaba 1.33 ->(node => $attr, level => 'NC',
157     type => 'Reserved Prefixes and Namespace Names:=xml');
158 wakaba 1.13 } elsif ($value eq $XMLNS_NS) {
159     $self->{onerror}
160 wakaba 1.33 ->(node => $attr, level => 'NC',
161     type => 'Reserved Prefixes and Namespace Names:=xmlns');
162 wakaba 1.13 }
163     },
164 wakaba 1.9 },
165     };
166    
167 wakaba 1.14 ## ISSUE: Should we really allow these attributes?
168 wakaba 1.13 $AttrChecker->{''}->{'xml:space'} = $AttrChecker->{$XML_NS}->{space};
169     $AttrChecker->{''}->{'xml:lang'} = $AttrChecker->{$XML_NS}->{lang};
170     $AttrChecker->{''}->{'xml:base'} = $AttrChecker->{$XML_NS}->{base};
171     $AttrChecker->{''}->{'xml:id'} = $AttrChecker->{$XML_NS}->{id};
172    
173 wakaba 1.79 our $AttrStatus;
174    
175     for (qw/space lang base id/) {
176     $AttrStatus->{$XML_NS}->{$_} = FEATURE_STATUS_REC | FEATURE_ALLOWED;
177     $AttrStatus->{''}->{"xml:$_"} = FEATURE_STATUS_REC | FEATURE_ALLOWED;
178     ## XML 1.0: FEATURE_STATUS_CR
179     ## XML 1.1: FEATURE_STATUS_REC
180     ## XML Namespaces 1.0: FEATURE_STATUS_CR
181     ## XML Namespaces 1.1: FEATURE_STATUS_REC
182     ## XML Base: FEATURE_STATUS_REC
183     ## xml:id: FEATURE_STATUS_REC
184     }
185    
186     $AttrStatus->{$XMLNS_NS}->{''} = FEATURE_STATUS_REC | FEATURE_ALLOWED;
187    
188     ## TODO: xsi:schemaLocation for XHTML2 support (very, very low priority)
189    
190 wakaba 1.60 our %AnyChecker = (
191     check_start => sub { },
192     check_attrs => sub {
193     my ($self, $item, $element_state) = @_;
194     for my $attr (@{$item->{node}->attributes}) {
195 wakaba 1.9 my $attr_ns = $attr->namespace_uri;
196     $attr_ns = '' unless defined $attr_ns;
197     my $attr_ln = $attr->manakai_local_name;
198 wakaba 1.79
199     load_ns_module ($attr_ns);
200    
201 wakaba 1.9 my $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
202 wakaba 1.60 || $AttrChecker->{$attr_ns}->{''};
203 wakaba 1.79 my $status = $AttrStatus->{$attr_ns}->{$attr_ln}
204     || $AttrStatus->{$attr_ns}->{''};
205     if (not defined $status) {
206     $status = FEATURE_ALLOWED;
207     ## NOTE: FEATURE_ALLOWED for all attributes, since the element
208     ## is not supported and therefore "attribute not defined" error
209     ## should not raised (too verbose) and global attributes should be
210     ## allowed anyway (if a global attribute has its specified creteria
211     ## for where it may be specified, then it should be checked in it's
212     ## checker function).
213     }
214 wakaba 1.9 if ($checker) {
215     $checker->($self, $attr);
216 wakaba 1.17 } else {
217 wakaba 1.33 $self->{onerror}->(node => $attr, level => 'unsupported',
218     type => 'attribute');
219 wakaba 1.9 }
220 wakaba 1.79 $self->_attr_status_info ($attr, $status);
221 wakaba 1.9 }
222     },
223 wakaba 1.60 check_child_element => sub {
224     my ($self, $item, $child_el, $child_nsuri, $child_ln,
225     $child_is_transparent, $element_state) = @_;
226     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
227     $self->{onerror}->(node => $child_el,
228     type => 'element not allowed:minus',
229     level => $self->{must_level});
230     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
231     #
232     } else {
233     #
234     }
235     },
236     check_child_text => sub { },
237     check_end => sub {
238     my ($self, $item, $element_state) = @_;
239 wakaba 1.82 ## NOTE: There is a modified copy of the code below for |html:ruby|.
240 wakaba 1.60 if ($element_state->{has_significant}) {
241 wakaba 1.66 $item->{real_parent_state}->{has_significant} = 1;
242 wakaba 1.60 }
243     },
244     );
245    
246     our $ElementDefault = {
247     %AnyChecker,
248 wakaba 1.70 status => FEATURE_ALLOWED,
249     ## NOTE: No "element not defined" error - it is not supported anyway.
250 wakaba 1.60 check_start => sub {
251     my ($self, $item, $element_state) = @_;
252     $self->{onerror}->(node => $item->{node}, level => 'unsupported',
253     type => 'element');
254     },
255 wakaba 1.1 };
256    
257 wakaba 1.60 our $HTMLEmbeddedContent = {
258     ## NOTE: All embedded content is also phrasing content.
259     $HTML_NS => {
260     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
261     canvas => 1,
262     },
263     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
264     q<http://www.w3.org/2000/svg> => {svg => 1},
265     ## NOTE: Foreign elements with content (but no metadata) are
266     ## embedded content.
267     };
268    
269 wakaba 1.7 my $HTMLTransparentElements = {
270 wakaba 1.57 $HTML_NS => {qw/ins 1 del 1 font 1 noscript 1 canvas 1/},
271 wakaba 1.29 ## NOTE: |html:noscript| is transparent if scripting is disabled
272     ## and not in |head|.
273 wakaba 1.7 };
274    
275 wakaba 1.61 my $HTMLSemiTransparentElements = {
276     $HTML_NS => {object => 1, video => 1, audio => 1},
277     };
278 wakaba 1.57
279 wakaba 1.42 our $Element = {};
280 wakaba 1.7
281 wakaba 1.73 $Element->{q<http://www.w3.org/1999/02/22-rdf-syntax-ns#>}->{RDF} = {
282     %AnyChecker,
283     status => FEATURE_STATUS_REC | FEATURE_ALLOWED,
284     is_root => 1, ## ISSUE: Not explicitly allowed for non application/rdf+xml
285     check_start => sub {
286     my ($self, $item, $element_state) = @_;
287     my $triple = [];
288     push @{$self->{return}->{rdf}}, [$item->{node}, $triple];
289     require Whatpm::RDFXML;
290     my $rdf = Whatpm::RDFXML->new;
291 wakaba 1.75 ## TODO: Should we make bnodeid unique in a document?
292 wakaba 1.73 $rdf->{onerror} = $self->{onerror};
293     $rdf->{ontriple} = sub {
294     my %opt = @_;
295     push @$triple,
296     [$opt{node}, $opt{subject}, $opt{predicate}, $opt{object}];
297 wakaba 1.74 if (defined $opt{id}) {
298     push @$triple,
299     [$opt{node},
300     $opt{id},
301     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#subject>},
302     $opt{subject}];
303     push @$triple,
304     [$opt{node},
305     $opt{id},
306     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate>},
307     $opt{predicate}];
308     push @$triple,
309     [$opt{node},
310     $opt{id},
311     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#object>},
312     $opt{object}];
313     push @$triple,
314     [$opt{node},
315     $opt{id},
316     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>},
317     {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement>}];
318     }
319 wakaba 1.73 };
320     $rdf->convert_rdf_element ($item->{node});
321     },
322     };
323    
324 wakaba 1.56 sub check_document ($$$;$) {
325     my ($self, $doc, $onerror, $onsubdoc) = @_;
326 wakaba 1.42 $self = bless {}, $self unless ref $self;
327     $self->{onerror} = $onerror;
328 wakaba 1.56 $self->{onsubdoc} = $onsubdoc || sub {
329     warn "A subdocument is not conformance-checked";
330     };
331 wakaba 1.1
332 wakaba 1.48 $self->{must_level} = 'm';
333 wakaba 1.76 $self->{fact_level} = 'm';
334 wakaba 1.48 $self->{should_level} = 's';
335 wakaba 1.51 $self->{good_level} = 'w';
336 wakaba 1.67 $self->{info_level} = 'i';
337 wakaba 1.71 $self->{unsupported_level} = 'u';
338 wakaba 1.48
339 wakaba 1.73 ## TODO: If application/rdf+xml, RDF/XML mode should be invoked.
340    
341 wakaba 1.42 my $docel = $doc->document_element;
342     unless (defined $docel) {
343     ## ISSUE: Should we check content of Document node?
344     $onerror->(node => $doc, type => 'no document element');
345     ## ISSUE: Is this non-conforming (to what spec)? Or just a warning?
346     return {
347     class => {},
348     id => {}, table => [], term => {},
349     };
350 wakaba 1.1 }
351    
352 wakaba 1.42 ## ISSUE: Unexpanded entity references and HTML5 conformance
353 wakaba 1.1
354 wakaba 1.42 my $docel_nsuri = $docel->namespace_uri;
355     $docel_nsuri = '' unless defined $docel_nsuri;
356 wakaba 1.79 load_ns_module ($docel_nsuri);
357 wakaba 1.42 my $docel_def = $Element->{$docel_nsuri}->{$docel->manakai_local_name} ||
358     $Element->{$docel_nsuri}->{''} ||
359     $ElementDefault;
360     if ($docel_def->{is_root}) {
361     #
362 wakaba 1.50 } elsif ($docel_def->{is_xml_root}) {
363     unless ($doc->manakai_is_html) {
364     #
365     } else {
366     $onerror->(node => $docel, type => 'element not allowed:root:xml');
367     }
368 wakaba 1.42 } else {
369 wakaba 1.49 $onerror->(node => $docel, type => 'element not allowed:root');
370 wakaba 1.1 }
371    
372 wakaba 1.42 ## TODO: Check for other items other than document element
373     ## (second (errorous) element, text nodes, PI nodes, doctype nodes)
374 wakaba 1.2
375 wakaba 1.56 my $return = $self->check_element ($docel, $onerror, $onsubdoc);
376 wakaba 1.51
377 wakaba 1.52 ## TODO: Test for these checks are necessary.
378 wakaba 1.51 my $charset_name = $doc->input_encoding;
379     if (defined $charset_name) {
380     require Message::Charset::Info;
381     my $charset = $Message::Charset::Info::IANACharset->{$charset_name};
382    
383 wakaba 1.71 if ($doc->manakai_is_html) {
384     if (not $doc->manakai_has_bom and
385     not defined $doc->manakai_charset) {
386     unless ($charset->{is_html_ascii_superset}) {
387     $onerror->(node => $doc, level => $self->{must_level},
388     type => 'non ascii superset:'.$charset_name);
389     }
390    
391     if (not $self->{has_charset} and ## TODO: This does not work now.
392     not $charset->{iana_names}->{'us-ascii'}) {
393     $onerror->(node => $doc, level => $self->{must_level},
394     type => 'no character encoding declaration:'.$charset_name);
395     }
396 wakaba 1.51 }
397 wakaba 1.71
398     if ($charset->{iana_names}->{'utf-8'}) {
399     #
400     } elsif ($charset->{iana_names}->{'jis_x0212-1990'} or
401     $charset->{iana_names}->{'x-jis0208'} or
402     $charset->{iana_names}->{'utf-32'} or ## ISSUE: UTF-32BE? UTF-32LE?
403     $charset->{is_ebcdic_based}) {
404     $onerror->(node => $doc,
405     type => 'character encoding:'.$charset_name,
406     level => $self->{should_level});
407     } elsif ($charset->{iana_names}->{'cesu-8'} or
408     $charset->{iana_names}->{'utf-8'} or ## ISSUE: UNICODE-1-1-UTF-7?
409     $charset->{iana_names}->{'bocu-1'} or
410     $charset->{iana_names}->{'scsu'}) {
411     $onerror->(node => $doc,
412     type => 'character encoding:'.$charset_name,
413     level => $self->{must_level});
414     } else {
415     $onerror->(node => $doc,
416     type => 'character encoding:'.$charset_name,
417     level => $self->{good_level});
418 wakaba 1.51 }
419     }
420 wakaba 1.52 } elsif ($doc->manakai_is_html) {
421     ## NOTE: MUST and SHOULD requirements above cannot be tested,
422     ## since the document has no input charset encoding information.
423     $onerror->(node => $doc,
424     type => 'character encoding:',
425     level => 'unsupported');
426 wakaba 1.51 }
427    
428     return $return;
429 wakaba 1.42 } # check_document
430 wakaba 1.1
431 wakaba 1.81 ## Check an element. The element is checked as if it is an orphan node (i.e.
432     ## an element without a parent node).
433 wakaba 1.56 sub check_element ($$$;$) {
434     my ($self, $el, $onerror, $onsubdoc) = @_;
435 wakaba 1.42 $self = bless {}, $self unless ref $self;
436     $self->{onerror} = $onerror;
437 wakaba 1.56 $self->{onsubdoc} = $onsubdoc || sub {
438     warn "A subdocument is not conformance-checked";
439     };
440 wakaba 1.2
441 wakaba 1.48 $self->{must_level} = 'm';
442 wakaba 1.76 $self->{fact_level} = 'm';
443 wakaba 1.48 $self->{should_level} = 's';
444 wakaba 1.51 $self->{good_level} = 'w';
445 wakaba 1.67 $self->{info_level} = 'i';
446 wakaba 1.71 $self->{unsupported_level} = 'u';
447 wakaba 1.48
448 wakaba 1.61 $self->{plus_elements} = {};
449     $self->{minus_elements} = {};
450 wakaba 1.42 $self->{id} = {};
451     $self->{term} = {};
452     $self->{usemap} = [];
453 wakaba 1.78 $self->{ref} = []; # datetemplate data references
454     $self->{template} = []; # datatemplate template references
455 wakaba 1.42 $self->{contextmenu} = [];
456     $self->{map} = {};
457     $self->{menu} = {};
458     $self->{has_link_type} = {};
459 wakaba 1.60 $self->{flag} = {};
460 wakaba 1.46 #$self->{has_uri_attr};
461     #$self->{has_hyperlink_element};
462 wakaba 1.51 #$self->{has_charset};
463 wakaba 1.57 #$self->{has_base};
464 wakaba 1.42 $self->{return} = {
465     class => {},
466 wakaba 1.80 id => $self->{id},
467     table => [], # table objects returned by Whatpm::HTMLTable
468     term => $self->{term},
469 wakaba 1.76 uri => {}, # URIs other than those in RDF triples
470     ## TODO: xmlns="", SYSTEM "", atom:* src="", xml:base=""
471 wakaba 1.73 rdf => [],
472 wakaba 1.42 };
473 wakaba 1.4
474 wakaba 1.60 my @item = ({type => 'element', node => $el, parent_state => {}});
475 wakaba 1.66 $item[-1]->{real_parent_state} = $item[-1]->{parent_state};
476 wakaba 1.60 while (@item) {
477     my $item = shift @item;
478     if (ref $item eq 'ARRAY') {
479     my $code = shift @$item;
480     next unless $code;## TODO: temp.
481     $code->(@$item);
482     } elsif ($item->{type} eq 'element') {
483     my $el_nsuri = $item->{node}->namespace_uri;
484     $el_nsuri = '' unless defined $el_nsuri;
485     my $el_ln = $item->{node}->manakai_local_name;
486 wakaba 1.79
487     load_ns_module ($el_nsuri);
488 wakaba 1.63
489     my $element_state = {};
490 wakaba 1.60 my $eldef = $Element->{$el_nsuri}->{$el_ln} ||
491     $Element->{$el_nsuri}->{''} ||
492 wakaba 1.42 $ElementDefault;
493 wakaba 1.61 my $content_def = $item->{transparent}
494     ? $item->{parent_def} || $eldef : $eldef;
495 wakaba 1.63 my $content_state = $item->{transparent}
496 wakaba 1.65 ? $item->{parent_def}
497     ? $item->{parent_state} || $element_state : $element_state
498     : $element_state;
499 wakaba 1.60
500 wakaba 1.67 unless ($eldef->{status} & FEATURE_STATUS_REC) {
501     my $status = $eldef->{status} & FEATURE_STATUS_CR ? 'cr' :
502     $eldef->{status} & FEATURE_STATUS_LC ? 'lc' :
503     $eldef->{status} & FEATURE_STATUS_WD ? 'wd' : 'non-standard';
504     $self->{onerror}->(node => $item->{node},
505     type => 'status:'.$status.':element',
506     level => $self->{info_level});
507     }
508 wakaba 1.70 if (not ($eldef->{status} & FEATURE_ALLOWED)) {
509     $self->{onerror}->(node => $item->{node},
510     type => 'element not defined',
511     level => $self->{must_level});
512     } elsif ($eldef->{status} & FEATURE_DEPRECATED_SHOULD) {
513     $self->{onerror}->(node => $item->{node},
514     type => 'deprecated:element',
515     level => $self->{should_level});
516     } elsif ($eldef->{status} & FEATURE_DEPRECATED_INFO) {
517     $self->{onerror}->(node => $item->{node},
518     type => 'deprecated:element',
519     level => $self->{info_level});
520     }
521 wakaba 1.67
522 wakaba 1.60 my @new_item;
523     push @new_item, [$eldef->{check_start}, $self, $item, $element_state];
524     push @new_item, [$eldef->{check_attrs}, $self, $item, $element_state];
525 wakaba 1.61
526 wakaba 1.60 my @child = @{$item->{node}->child_nodes};
527     while (@child) {
528     my $child = shift @child;
529     my $child_nt = $child->node_type;
530     if ($child_nt == 1) { # ELEMENT_NODE
531     my $child_nsuri = $child->namespace_uri;
532     $child_nsuri = '' unless defined $child_nsuri;
533     my $child_ln = $child->manakai_local_name;
534     if ($HTMLTransparentElements->{$child_nsuri}->{$child_ln} and
535     not (($self->{flag}->{in_head} or
536 wakaba 1.61 ($el_nsuri eq $HTML_NS and $el_ln eq 'head')) and
537     $child_nsuri eq $HTML_NS and $child_ln eq 'noscript')) {
538 wakaba 1.60 push @new_item, [$content_def->{check_child_element},
539     $self, $item, $child,
540 wakaba 1.66 $child_nsuri, $child_ln, 1,
541     $content_state, $element_state];
542 wakaba 1.60 push @new_item, {type => 'element', node => $child,
543 wakaba 1.65 parent_state => $content_state,
544 wakaba 1.61 parent_def => $content_def,
545 wakaba 1.66 real_parent_state => $element_state,
546 wakaba 1.60 transparent => 1};
547     } else {
548 wakaba 1.65 if ($item->{parent_def} and # has parent
549     $el_nsuri eq $HTML_NS) { ## $HTMLSemiTransparentElements
550 wakaba 1.61 if ($el_ln eq 'object') {
551     if ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
552     #
553     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
554     #
555     } else {
556 wakaba 1.62 $content_def = $item->{parent_def} || $content_def;
557 wakaba 1.63 $content_state = $item->{parent_state} || $content_state;
558 wakaba 1.62 }
559     } elsif ($el_ln eq 'video' or $el_ln eq 'audio') {
560     if ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
561     #
562     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
563     $element_state->{has_source} = 1;
564     } else {
565     $content_def = $item->{parent_def} || $content_def;
566 wakaba 1.63 $content_state = $item->{parent_state} || $content_state;
567 wakaba 1.61 }
568     }
569     }
570    
571 wakaba 1.60 push @new_item, [$content_def->{check_child_element},
572     $self, $item, $child,
573 wakaba 1.64 $child_nsuri, $child_ln,
574     $HTMLSemiTransparentElements
575     ->{$child_nsuri}->{$child_ln},
576 wakaba 1.66 $content_state, $element_state];
577 wakaba 1.60 push @new_item, {type => 'element', node => $child,
578 wakaba 1.65 parent_def => $content_def,
579 wakaba 1.66 real_parent_state => $element_state,
580 wakaba 1.65 parent_state => $content_state};
581 wakaba 1.60 }
582    
583     if ($HTMLEmbeddedContent->{$child_nsuri}->{$child_ln}) {
584     $element_state->{has_significant} = 1;
585     }
586     } elsif ($child_nt == 3 or # TEXT_NODE
587     $child_nt == 4) { # CDATA_SECTION_NODE
588     my $has_significant = ($child->data =~ /[^\x09-\x0D\x20]/);
589     push @new_item, [$content_def->{check_child_text},
590     $self, $item, $child, $has_significant,
591 wakaba 1.66 $content_state, $element_state];
592     $element_state->{has_significant} ||= $has_significant;
593 wakaba 1.61 if ($has_significant and
594     $HTMLSemiTransparentElements->{$el_nsuri}->{$el_ln}) {
595     $content_def = $item->{parent_def} || $content_def;
596     }
597 wakaba 1.60 } elsif ($child_nt == 5) { # ENTITY_REFERENCE_NODE
598     push @child, @{$child->child_nodes};
599 wakaba 1.1 }
600 wakaba 1.60 ## TODO: PI_NODE
601     ## TODO: Unknown node type
602 wakaba 1.1 }
603 wakaba 1.60
604     push @new_item, [$eldef->{check_end}, $self, $item, $element_state];
605    
606     unshift @item, @new_item;
607 wakaba 1.30 } else {
608 wakaba 1.60 die "$0: Internal error: Unsupported checking action type |$item->{type}|";
609 wakaba 1.4 }
610 wakaba 1.1 }
611 wakaba 1.17
612 wakaba 1.78 for (@{$self->{template}}) {
613     ## TODO: If the document is an XML document, ...
614     ## NOTE: If the document is an HTML document:
615     ## ISSUE: We need to percent-decode?
616     F: {
617     if ($self->{id}->{$_->[0]}) {
618     my $el = $self->{id}->{$_->[0]}->[0]->owner_element;
619     if ($el->node_type == 1 and # ELEMENT_NODE
620     $el->manakai_local_name eq 'datatemplate') {
621     my $nsuri = $el->namespace_uri;
622     if (defined $nsuri and $nsuri eq $HTML_NS) {
623     if ($el eq $_->[1]->owner_element) {
624     $self->{onerror}->(node => $_->[1],
625     type => 'fragment points itself',
626     level => $self->{must_level});
627     }
628    
629     last F;
630     }
631     }
632     }
633     ## TODO: Should we raise a "fragment points nothing" error instead
634     ## if the fragment identifier identifies no element?
635    
636     $self->{onerror}->(node => $_->[1], type => 'template:not template',
637     level => $self->{must_level});
638     } # F
639     }
640    
641     for (@{$self->{ref}}) {
642     ## TOOD: If XML
643     ## NOTE: If it is an HTML document:
644     if ($_->[0] eq '') {
645     ## NOTE: It points the top of the document.
646     } elsif ($self->{id}->{$_->[0]}) {
647     if ($self->{id}->{$_->[0]}->[0]->owner_element
648     eq $_->[1]->owner_element) {
649     $self->{onerror}->(node => $_->[1], type => 'fragment points itself',
650     level => $self->{must_level});
651     }
652     } else {
653     $self->{onerror}->(node => $_->[1], type => 'fragment points nothing',
654     level => $self->{must_level});
655     }
656     }
657    
658     ## TODO: Maybe we should have $document->manakai_get_by_fragment or something
659    
660 wakaba 1.17 for (@{$self->{usemap}}) {
661     unless ($self->{map}->{$_->[0]}) {
662     $self->{onerror}->(node => $_->[1], type => 'no referenced map');
663     }
664     }
665    
666 wakaba 1.32 for (@{$self->{contextmenu}}) {
667     unless ($self->{menu}->{$_->[0]}) {
668     $self->{onerror}->(node => $_->[1], type => 'no referenced menu');
669     }
670     }
671    
672 wakaba 1.61 delete $self->{plus_elements};
673     delete $self->{minus_elements};
674 wakaba 1.17 delete $self->{onerror};
675     delete $self->{id};
676     delete $self->{usemap};
677 wakaba 1.78 delete $self->{ref};
678     delete $self->{template};
679 wakaba 1.17 delete $self->{map};
680 wakaba 1.33 return $self->{return};
681 wakaba 1.1 } # check_element
682    
683 wakaba 1.60 sub _add_minus_elements ($$@) {
684     my $self = shift;
685     my $element_state = shift;
686     for my $elements (@_) {
687     for my $nsuri (keys %$elements) {
688     for my $ln (keys %{$elements->{$nsuri}}) {
689     unless ($self->{minus_elements}->{$nsuri}->{$ln}) {
690     $element_state->{minus_elements_original}->{$nsuri}->{$ln} = 0;
691     $self->{minus_elements}->{$nsuri}->{$ln} = 1;
692     }
693     }
694     }
695     }
696     } # _add_minus_elements
697    
698     sub _remove_minus_elements ($$) {
699     my $self = shift;
700     my $element_state = shift;
701     for my $nsuri (keys %{$element_state->{minus_elements_original}}) {
702     for my $ln (keys %{$element_state->{minus_elements_original}->{$nsuri}}) {
703     delete $self->{minus_elements}->{$nsuri}->{$ln};
704     }
705     }
706     } # _remove_minus_elements
707    
708     sub _add_plus_elements ($$@) {
709     my $self = shift;
710     my $element_state = shift;
711     for my $elements (@_) {
712     for my $nsuri (keys %$elements) {
713     for my $ln (keys %{$elements->{$nsuri}}) {
714     unless ($self->{plus_elements}->{$nsuri}->{$ln}) {
715     $element_state->{plus_elements_original}->{$nsuri}->{$ln} = 0;
716     $self->{plus_elements}->{$nsuri}->{$ln} = 1;
717     }
718     }
719     }
720     }
721     } # _add_plus_elements
722    
723     sub _remove_plus_elements ($$) {
724     my $self = shift;
725     my $element_state = shift;
726     for my $nsuri (keys %{$element_state->{plus_elements_original}}) {
727     for my $ln (keys %{$element_state->{plus_elements_original}->{$nsuri}}) {
728     delete $self->{plus_elements}->{$nsuri}->{$ln};
729     }
730     }
731     } # _remove_plus_elements
732    
733 wakaba 1.68 sub _attr_status_info ($$$) {
734     my ($self, $attr, $status_code) = @_;
735 wakaba 1.70
736     if (not ($status_code & FEATURE_ALLOWED)) {
737     $self->{onerror}->(node => $attr,
738     type => 'attribute not defined',
739     level => $self->{must_level});
740     } elsif ($status_code & FEATURE_DEPRECATED_SHOULD) {
741     $self->{onerror}->(node => $attr,
742     type => 'deprecated:attr',
743     level => $self->{should_level});
744     } elsif ($status_code & FEATURE_DEPRECATED_INFO) {
745     $self->{onerror}->(node => $attr,
746     type => 'deprecated:attr',
747     level => $self->{info_level});
748     }
749    
750 wakaba 1.68 my $status;
751     if ($status_code & FEATURE_STATUS_REC) {
752     return;
753     } elsif ($status_code & FEATURE_STATUS_CR) {
754     $status = 'cr';
755     } elsif ($status_code & FEATURE_STATUS_LC) {
756     $status = 'lc';
757     } elsif ($status_code & FEATURE_STATUS_WD) {
758     $status = 'wd';
759     } else {
760     $status = 'non-standard';
761     }
762     $self->{onerror}->(node => $attr,
763     type => 'status:'.$status.':attr',
764     level => $self->{info_level});
765     } # _attr_status_info
766    
767 wakaba 1.2 sub _add_minuses ($@) {
768     my $self = shift;
769     my $r = {};
770     for my $list (@_) {
771     for my $ns (keys %$list) {
772     for my $ln (keys %{$list->{$ns}}) {
773     unless ($self->{minuses}->{$ns}->{$ln}) {
774     $self->{minuses}->{$ns}->{$ln} = 1;
775     $r->{$ns}->{$ln} = 1;
776     }
777     }
778     }
779     }
780 wakaba 1.4 return {type => 'plus', list => $r};
781 wakaba 1.2 } # _add_minuses
782    
783 wakaba 1.50 sub _add_pluses ($@) {
784     my $self = shift;
785     my $r = {};
786     for my $list (@_) {
787     for my $ns (keys %$list) {
788     for my $ln (keys %{$list->{$ns}}) {
789     unless ($self->{pluses}->{$ns}->{$ln}) {
790     $self->{pluses}->{$ns}->{$ln} = 1;
791     $r->{$ns}->{$ln} = 1;
792     }
793     }
794     }
795     }
796     return {type => 'minus', list => $r};
797     } # _add_pluses
798    
799 wakaba 1.2 sub _remove_minuses ($$) {
800 wakaba 1.4 my ($self, $todo) = @_;
801 wakaba 1.50 if ($todo->{type} eq 'minus') {
802     for my $ns (keys %{$todo->{list}}) {
803     for my $ln (keys %{$todo->{list}->{$ns}}) {
804     delete $self->{pluses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
805     }
806 wakaba 1.2 }
807 wakaba 1.50 } elsif ($todo->{type} eq 'plus') {
808     for my $ns (keys %{$todo->{list}}) {
809     for my $ln (keys %{$todo->{list}->{$ns}}) {
810     delete $self->{minuses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
811     }
812     }
813     } else {
814     die "$0: Unknown +- type: $todo->{type}";
815 wakaba 1.2 }
816     1;
817     } # _remove_minuses
818    
819 wakaba 1.50 ## NOTE: Priority for "minuses" and "pluses" are currently left
820     ## undefined and implemented inconsistently; it is not a problem for
821     ## now, since no element belongs to both lists.
822    
823 wakaba 1.30 sub _check_get_children ($$$) {
824     my ($self, $node, $parent_todo) = @_;
825 wakaba 1.4 my $new_todos = [];
826 wakaba 1.2 my $sib = [];
827     TP: {
828     my $node_ns = $node->namespace_uri;
829     $node_ns = '' unless defined $node_ns;
830     my $node_ln = $node->manakai_local_name;
831 wakaba 1.45 if ($HTMLTransparentElements->{$node_ns}->{$node_ln}) {
832     if ($node_ns eq $HTML_NS and $node_ln eq 'noscript') {
833     if ($parent_todo->{flag}->{in_head}) {
834     #
835     } else {
836     my $end = $self->_add_minuses ({$HTML_NS, {noscript => 1}});
837     push @$sib, $end;
838    
839     unshift @$sib, @{$node->child_nodes};
840     push @$new_todos, {type => 'element-attributes', node => $node};
841     last TP;
842     }
843 wakaba 1.58 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'del') {
844     my $sig_flag = $parent_todo->{flag}->{has_descendant}->{significant};
845     unshift @$sib, @{$node->child_nodes};
846     push @$new_todos, {type => 'element-attributes', node => $node};
847     push @$new_todos,
848     {type => 'code',
849     code => sub {
850     $parent_todo->{flag}->{has_descendant}->{significant} = 0
851     if not $sig_flag;
852     }};
853     last TP;
854 wakaba 1.45 } else {
855     unshift @$sib, @{$node->child_nodes};
856     push @$new_todos, {type => 'element-attributes', node => $node};
857     last TP;
858 wakaba 1.2 }
859     }
860 wakaba 1.8 if ($node_ns eq $HTML_NS and ($node_ln eq 'video' or $node_ln eq 'audio')) {
861 wakaba 1.2 if ($node->has_attribute_ns (undef, 'src')) {
862     unshift @$sib, @{$node->child_nodes};
863 wakaba 1.9 push @$new_todos, {type => 'element-attributes', node => $node};
864 wakaba 1.2 last TP;
865     } else {
866     my @cn = @{$node->child_nodes};
867     CN: while (@cn) {
868     my $cn = shift @cn;
869     my $cnt = $cn->node_type;
870     if ($cnt == 1) {
871 wakaba 1.8 my $cn_nsuri = $cn->namespace_uri;
872     $cn_nsuri = '' unless defined $cn_nsuri;
873     if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'source') {
874 wakaba 1.2 #
875     } else {
876     last CN;
877     }
878     } elsif ($cnt == 3 or $cnt == 4) {
879     if ($cn->data =~ /[^\x09-\x0D\x20]/) {
880     last CN;
881     }
882     }
883     } # CN
884     unshift @$sib, @cn;
885     }
886 wakaba 1.57 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'object') {
887     my @cn = @{$node->child_nodes};
888     CN: while (@cn) {
889     my $cn = shift @cn;
890     my $cnt = $cn->node_type;
891     if ($cnt == 1) {
892     my $cn_nsuri = $cn->namespace_uri;
893     $cn_nsuri = '' unless defined $cn_nsuri;
894     if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'param') {
895     #
896     } else {
897     last CN;
898     }
899     } elsif ($cnt == 3 or $cnt == 4) {
900     if ($cn->data =~ /[^\x09-\x0D\x20]/) {
901     last CN;
902     }
903     }
904     } # CN
905     unshift @$sib, @cn;
906 wakaba 1.2 }
907 wakaba 1.4 push @$new_todos, {type => 'element', node => $node};
908 wakaba 1.2 } # TP
909 wakaba 1.30
910     for my $new_todo (@$new_todos) {
911     $new_todo->{flag} = {%{$parent_todo->{flag} or {}}};
912     }
913    
914 wakaba 1.4 return ($sib, $new_todos);
915 wakaba 1.2 } # _check_get_children
916    
917 wakaba 1.44 =head1 LICENSE
918    
919 wakaba 1.56 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
920 wakaba 1.44
921     This library is free software; you can redistribute it
922     and/or modify it under the same terms as Perl itself.
923    
924     =cut
925    
926 wakaba 1.1 1;
927 wakaba 1.82 # $Date: 2008/05/10 06:04:39 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24