/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.46 - (hide annotations) (download)
Fri Aug 17 11:53:52 2007 UTC (17 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.45: +4 -2 lines
++ whatpm/t/ChangeLog	17 Aug 2007 07:08:23 -0000
	* content-model-2.dat: New tests for |base|
	following URI or hyperlink are added.

2007-08-17  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	17 Aug 2007 07:44:01 -0000
	* CSS/: New directory.

2007-08-17  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/CSS/ChangeLog	17 Aug 2007 11:53:38 -0000
2007-08-17  Wakaba  <wakaba@suika.fam.cx>

	* Tokenizer.pm: New module.

	* ChangeLog: New file.

++ whatpm/Whatpm/ContentChecker/ChangeLog	17 Aug 2007 07:08:56 -0000
	* HTML.pm: Raise new errors if |base| is following
	URI attributes or hyperlink attributes.

2007-08-17  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3 wakaba 1.46 our $VERSION=do{my @r=(q$Revision: 1.45 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5 wakaba 1.18 require Whatpm::URIChecker;
6    
7 wakaba 1.13 ## ISSUE: How XML and XML Namespaces conformance can (or cannot)
8     ## be applied to an in-memory representation (i.e. DOM)?
9    
10 wakaba 1.42 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
11 wakaba 1.9 my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
12     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
13    
14 wakaba 1.42 my $Namespace = {
15 wakaba 1.43 q<http://www.w3.org/2005/Atom> => {module => 'Whatpm::ContentChecker::Atom'},
16 wakaba 1.42 $HTML_NS => {module => 'Whatpm::ContentChecker::HTML'},
17     $XML_NS => {loaded => 1},
18     $XMLNS_NS => {loaded => 1},
19     };
20    
21     our $AttrChecker = {
22 wakaba 1.9 $XML_NS => {
23 wakaba 1.13 space => sub {
24     my ($self, $attr) = @_;
25     my $value = $attr->value;
26     if ($value eq 'default' or $value eq 'preserve') {
27     #
28     } else {
29     ## NOTE: An XML "error"
30 wakaba 1.33 $self->{onerror}->(node => $attr, level => 'error',
31     type => 'invalid attribute value');
32 wakaba 1.13 }
33     },
34     lang => sub {
35 wakaba 1.35 my ($self, $attr) = @_;
36 wakaba 1.13 ## NOTE: "The values of the attribute are language identifiers
37     ## as defined by [IETF RFC 3066], Tags for the Identification
38     ## of Languages, or its successor; in addition, the empty string
39     ## may be specified." ("may" in lower case)
40 wakaba 1.36 $self->{onerror}->(node => $attr, level => 'unsupported',
41     type => 'language tag');
42 wakaba 1.35 if ($attr->owner_document->manakai_is_html) { # MUST NOT
43 wakaba 1.36 $self->{onerror}->(node => $attr, type => 'in HTML:xml:lang');
44 wakaba 1.35 ## TODO: Test data...
45     }
46 wakaba 1.13 },
47     base => sub {
48     my ($self, $attr) = @_;
49     my $value = $attr->value;
50     if ($value =~ /[^\x{0000}-\x{10FFFF}]/) { ## ISSUE: Should we disallow noncharacters?
51     $self->{onerror}->(node => $attr,
52 wakaba 1.33 type => 'invalid attribute value');
53 wakaba 1.13 }
54 wakaba 1.18 ## NOTE: Conformance to URI standard is not checked since there is
55     ## no author requirement on conformance in the XML Base specification.
56 wakaba 1.13 },
57     id => sub {
58     my ($self, $attr) = @_;
59     my $value = $attr->value;
60     $value =~ s/[\x09\x0A\x0D\x20]+/ /g;
61     $value =~ s/^\x20//;
62     $value =~ s/\x20$//;
63     ## TODO: NCName in XML 1.0 or 1.1
64     ## TODO: declared type is ID?
65 wakaba 1.33 if ($self->{id}->{$value}) { ## NOTE: An xml:id error
66     $self->{onerror}->(node => $attr, level => 'error',
67     type => 'duplicate ID');
68 wakaba 1.37 push @{$self->{id}->{$value}}, $attr;
69 wakaba 1.13 } else {
70 wakaba 1.37 $self->{id}->{$value} = [$attr];
71 wakaba 1.13 }
72     },
73 wakaba 1.9 },
74     $XMLNS_NS => {
75 wakaba 1.13 '' => sub {
76     my ($self, $attr) = @_;
77     my $ln = $attr->manakai_local_name;
78     my $value = $attr->value;
79     if ($value eq $XML_NS and $ln ne 'xml') {
80     $self->{onerror}
81 wakaba 1.33 ->(node => $attr, level => 'NC',
82     type => 'Reserved Prefixes and Namespace Names:=xml');
83 wakaba 1.13 } elsif ($value eq $XMLNS_NS) {
84     $self->{onerror}
85 wakaba 1.33 ->(node => $attr, level => 'NC',
86     type => 'Reserved Prefixes and Namespace Names:=xmlns');
87 wakaba 1.13 }
88     if ($ln eq 'xml' and $value ne $XML_NS) {
89     $self->{onerror}
90 wakaba 1.33 ->(node => $attr, level => 'NC',
91     type => 'Reserved Prefixes and Namespace Names:xmlns:xml=');
92 wakaba 1.13 } elsif ($ln eq 'xmlns') {
93     $self->{onerror}
94 wakaba 1.33 ->(node => $attr, level => 'NC',
95     type => 'Reserved Prefixes and Namespace Names:xmlns:xmlns=');
96 wakaba 1.13 }
97     ## TODO: If XML 1.0 and empty
98     },
99     xmlns => sub {
100     my ($self, $attr) = @_;
101     ## TODO: In XML 1.0, URI reference [RFC 3986] or an empty string
102     ## TODO: In XML 1.1, IRI reference [RFC 3987] or an empty string
103 wakaba 1.18 ## TODO: relative references are deprecated
104 wakaba 1.13 my $value = $attr->value;
105     if ($value eq $XML_NS) {
106     $self->{onerror}
107 wakaba 1.33 ->(node => $attr, level => 'NC',
108     type => 'Reserved Prefixes and Namespace Names:=xml');
109 wakaba 1.13 } elsif ($value eq $XMLNS_NS) {
110     $self->{onerror}
111 wakaba 1.33 ->(node => $attr, level => 'NC',
112     type => 'Reserved Prefixes and Namespace Names:=xmlns');
113 wakaba 1.13 }
114     },
115 wakaba 1.9 },
116     };
117    
118 wakaba 1.14 ## ISSUE: Should we really allow these attributes?
119 wakaba 1.13 $AttrChecker->{''}->{'xml:space'} = $AttrChecker->{$XML_NS}->{space};
120     $AttrChecker->{''}->{'xml:lang'} = $AttrChecker->{$XML_NS}->{lang};
121     $AttrChecker->{''}->{'xml:base'} = $AttrChecker->{$XML_NS}->{base};
122     $AttrChecker->{''}->{'xml:id'} = $AttrChecker->{$XML_NS}->{id};
123    
124 wakaba 1.3 ## ANY
125 wakaba 1.42 our $AnyChecker = sub {
126 wakaba 1.4 my ($self, $todo) = @_;
127     my $el = $todo->{node};
128     my $new_todos = [];
129 wakaba 1.3 my @nodes = (@{$el->child_nodes});
130     while (@nodes) {
131     my $node = shift @nodes;
132     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
133    
134     my $nt = $node->node_type;
135     if ($nt == 1) {
136     my $node_ns = $node->namespace_uri;
137     $node_ns = '' unless defined $node_ns;
138     my $node_ln = $node->manakai_local_name;
139     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
140     $self->{onerror}->(node => $node, type => 'element not allowed');
141     }
142 wakaba 1.4 push @$new_todos, {type => 'element', node => $node};
143 wakaba 1.3 } elsif ($nt == 5) {
144     unshift @nodes, @{$node->child_nodes};
145     }
146     }
147 wakaba 1.4 return ($new_todos);
148 wakaba 1.3 }; # $AnyChecker
149    
150 wakaba 1.42 our $ElementDefault = {
151 wakaba 1.1 checker => sub {
152 wakaba 1.4 my ($self, $todo) = @_;
153 wakaba 1.33 $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
154     type => 'element');
155 wakaba 1.4 return $AnyChecker->($self, $todo);
156 wakaba 1.1 },
157 wakaba 1.9 attrs_checker => sub {
158     my ($self, $todo) = @_;
159     for my $attr (@{$todo->{node}->attributes}) {
160     my $attr_ns = $attr->namespace_uri;
161     $attr_ns = '' unless defined $attr_ns;
162     my $attr_ln = $attr->manakai_local_name;
163     my $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
164     || $AttrChecker->{$attr_ns}->{''};
165     if ($checker) {
166     $checker->($self, $attr);
167 wakaba 1.17 } else {
168 wakaba 1.33 $self->{onerror}->(node => $attr, level => 'unsupported',
169     type => 'attribute');
170 wakaba 1.9 }
171     }
172     },
173 wakaba 1.1 };
174    
175 wakaba 1.7 my $HTMLTransparentElements = {
176     $HTML_NS => {qw/ins 1 font 1 noscript 1/},
177 wakaba 1.29 ## NOTE: |html:noscript| is transparent if scripting is disabled
178     ## and not in |head|.
179 wakaba 1.7 };
180    
181 wakaba 1.42 our $Element = {};
182 wakaba 1.7
183 wakaba 1.42 sub check_document ($$$) {
184     my ($self, $doc, $onerror) = @_;
185     $self = bless {}, $self unless ref $self;
186     $self->{onerror} = $onerror;
187 wakaba 1.1
188 wakaba 1.42 my $docel = $doc->document_element;
189     unless (defined $docel) {
190     ## ISSUE: Should we check content of Document node?
191     $onerror->(node => $doc, type => 'no document element');
192     ## ISSUE: Is this non-conforming (to what spec)? Or just a warning?
193     return {
194     class => {},
195     id => {}, table => [], term => {},
196     };
197 wakaba 1.1 }
198    
199 wakaba 1.42 ## ISSUE: Unexpanded entity references and HTML5 conformance
200 wakaba 1.1
201 wakaba 1.42 my $docel_nsuri = $docel->namespace_uri;
202     $docel_nsuri = '' unless defined $docel_nsuri;
203 wakaba 1.43 unless ($Namespace->{$docel_nsuri}->{loaded}) {
204     if ($Namespace->{$docel_nsuri}->{module}) {
205     eval qq{ require $Namespace->{$docel_nsuri}->{module} } or die $@;
206     } else {
207     $Namespace->{$docel_nsuri}->{loaded} = 1;
208     }
209     }
210 wakaba 1.42 my $docel_def = $Element->{$docel_nsuri}->{$docel->manakai_local_name} ||
211     $Element->{$docel_nsuri}->{''} ||
212     $ElementDefault;
213     if ($docel_def->{is_root}) {
214     #
215     } else {
216     $onerror->(node => $docel, type => 'element not allowed');
217 wakaba 1.1 }
218    
219 wakaba 1.42 ## TODO: Check for other items other than document element
220     ## (second (errorous) element, text nodes, PI nodes, doctype nodes)
221 wakaba 1.2
222 wakaba 1.42 return $self->check_element ($docel, $onerror);
223     } # check_document
224 wakaba 1.1
225 wakaba 1.42 sub check_element ($$$) {
226     my ($self, $el, $onerror) = @_;
227     $self = bless {}, $self unless ref $self;
228     $self->{onerror} = $onerror;
229 wakaba 1.2
230 wakaba 1.42 $self->{minuses} = {};
231     $self->{id} = {};
232     $self->{term} = {};
233     $self->{usemap} = [];
234     $self->{contextmenu} = [];
235     $self->{map} = {};
236     $self->{menu} = {};
237     $self->{has_link_type} = {};
238 wakaba 1.46 #$self->{has_uri_attr};
239     #$self->{has_hyperlink_element};
240 wakaba 1.42 $self->{return} = {
241     class => {},
242     id => $self->{id}, table => [], term => $self->{term},
243     };
244 wakaba 1.4
245 wakaba 1.42 my @todo = ({type => 'element', node => $el});
246     while (@todo) {
247     my $todo = shift @todo;
248     if ($todo->{type} eq 'element') {
249     my $prefix = $todo->{node}->prefix;
250     if (defined $prefix and $prefix eq 'xmlns') {
251     $self->{onerror}
252     ->(node => $todo->{node}, level => 'NC',
253     type => 'Reserved Prefixes and Namespace Names:<xmlns:>');
254 wakaba 1.7 }
255 wakaba 1.42 my $nsuri = $todo->{node}->namespace_uri;
256     $nsuri = '' unless defined $nsuri;
257     unless ($Namespace->{$nsuri}->{loaded}) {
258     if ($Namespace->{$nsuri}->{module}) {
259     eval qq{ require $Namespace->{$nsuri}->{module} } or die $@;
260     } else {
261     $Namespace->{$nsuri}->{loaded} = 1;
262 wakaba 1.1 }
263     }
264 wakaba 1.42 my $ln = $todo->{node}->manakai_local_name;
265     my $eldef = $Element->{$nsuri}->{$ln} ||
266     $Element->{$nsuri}->{''} ||
267     $ElementDefault;
268     $eldef->{attrs_checker}->($self, $todo);
269     my ($new_todos) = $eldef->{checker}->($self, $todo);
270     unshift @todo, @$new_todos;
271     } elsif ($todo->{type} eq 'element-attributes') {
272     my $prefix = $todo->{node}->prefix;
273     if (defined $prefix and $prefix eq 'xmlns') {
274     $self->{onerror}
275     ->(node => $todo->{node}, level => 'NC',
276     type => 'Reserved Prefixes and Namespace Names:<xmlns:>');
277     }
278     my $nsuri = $todo->{node}->namespace_uri;
279     $nsuri = '' unless defined $nsuri;
280     unless ($Namespace->{$nsuri}->{loaded}) {
281     if ($Namespace->{$nsuri}->{module}) {
282     eval qq{ require $Namespace->{$nsuri}->{module} } or die $@;
283 wakaba 1.1 } else {
284 wakaba 1.42 $Namespace->{$nsuri}->{loaded} = 1;
285 wakaba 1.1 }
286     }
287 wakaba 1.9 my $ln = $todo->{node}->manakai_local_name;
288     my $eldef = $Element->{$nsuri}->{$ln} ||
289     $Element->{$nsuri}->{''} ||
290     $ElementDefault;
291     $eldef->{attrs_checker}->($self, $todo);
292 wakaba 1.4 } elsif ($todo->{type} eq 'plus') {
293     $self->_remove_minuses ($todo);
294 wakaba 1.30 } elsif ($todo->{type} eq 'code') {
295     $todo->{code}->();
296     } else {
297     die "$0: Internal error: Unsupported checking action type |$todo->{type}|";
298 wakaba 1.4 }
299 wakaba 1.1 }
300 wakaba 1.17
301     for (@{$self->{usemap}}) {
302     unless ($self->{map}->{$_->[0]}) {
303     $self->{onerror}->(node => $_->[1], type => 'no referenced map');
304     }
305     }
306    
307 wakaba 1.32 for (@{$self->{contextmenu}}) {
308     unless ($self->{menu}->{$_->[0]}) {
309     $self->{onerror}->(node => $_->[1], type => 'no referenced menu');
310     }
311     }
312    
313 wakaba 1.17 delete $self->{minuses};
314     delete $self->{onerror};
315     delete $self->{id};
316     delete $self->{usemap};
317     delete $self->{map};
318 wakaba 1.33 return $self->{return};
319 wakaba 1.1 } # check_element
320    
321 wakaba 1.2 sub _add_minuses ($@) {
322     my $self = shift;
323     my $r = {};
324     for my $list (@_) {
325     for my $ns (keys %$list) {
326     for my $ln (keys %{$list->{$ns}}) {
327     unless ($self->{minuses}->{$ns}->{$ln}) {
328     $self->{minuses}->{$ns}->{$ln} = 1;
329     $r->{$ns}->{$ln} = 1;
330     }
331     }
332     }
333     }
334 wakaba 1.4 return {type => 'plus', list => $r};
335 wakaba 1.2 } # _add_minuses
336    
337     sub _remove_minuses ($$) {
338 wakaba 1.4 my ($self, $todo) = @_;
339     for my $ns (keys %{$todo->{list}}) {
340     for my $ln (keys %{$todo->{list}->{$ns}}) {
341     delete $self->{minuses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
342 wakaba 1.2 }
343     }
344     1;
345     } # _remove_minuses
346    
347 wakaba 1.30 sub _check_get_children ($$$) {
348     my ($self, $node, $parent_todo) = @_;
349 wakaba 1.4 my $new_todos = [];
350 wakaba 1.2 my $sib = [];
351     TP: {
352     my $node_ns = $node->namespace_uri;
353     $node_ns = '' unless defined $node_ns;
354     my $node_ln = $node->manakai_local_name;
355 wakaba 1.45 if ($HTMLTransparentElements->{$node_ns}->{$node_ln}) {
356     if ($node_ns eq $HTML_NS and $node_ln eq 'noscript') {
357     if ($parent_todo->{flag}->{in_head}) {
358     #
359     } else {
360     my $end = $self->_add_minuses ({$HTML_NS, {noscript => 1}});
361     push @$sib, $end;
362    
363     unshift @$sib, @{$node->child_nodes};
364     push @$new_todos, {type => 'element-attributes', node => $node};
365     last TP;
366     }
367     } else {
368     unshift @$sib, @{$node->child_nodes};
369     push @$new_todos, {type => 'element-attributes', node => $node};
370     last TP;
371 wakaba 1.2 }
372     }
373 wakaba 1.8 if ($node_ns eq $HTML_NS and ($node_ln eq 'video' or $node_ln eq 'audio')) {
374 wakaba 1.2 if ($node->has_attribute_ns (undef, 'src')) {
375     unshift @$sib, @{$node->child_nodes};
376 wakaba 1.9 push @$new_todos, {type => 'element-attributes', node => $node};
377 wakaba 1.2 last TP;
378     } else {
379     my @cn = @{$node->child_nodes};
380     CN: while (@cn) {
381     my $cn = shift @cn;
382     my $cnt = $cn->node_type;
383     if ($cnt == 1) {
384 wakaba 1.8 my $cn_nsuri = $cn->namespace_uri;
385     $cn_nsuri = '' unless defined $cn_nsuri;
386     if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'source') {
387 wakaba 1.2 #
388     } else {
389     last CN;
390     }
391     } elsif ($cnt == 3 or $cnt == 4) {
392     if ($cn->data =~ /[^\x09-\x0D\x20]/) {
393     last CN;
394     }
395     }
396     } # CN
397     unshift @$sib, @cn;
398     }
399     }
400 wakaba 1.4 push @$new_todos, {type => 'element', node => $node};
401 wakaba 1.2 } # TP
402 wakaba 1.30
403     for my $new_todo (@$new_todos) {
404     $new_todo->{flag} = {%{$parent_todo->{flag} or {}}};
405     }
406    
407 wakaba 1.4 return ($sib, $new_todos);
408 wakaba 1.2 } # _check_get_children
409    
410 wakaba 1.44 =head1 LICENSE
411    
412     Copyright 2007 Wakaba <w@suika.fam.cx>
413    
414     This library is free software; you can redistribute it
415     and/or modify it under the same terms as Perl itself.
416    
417     =cut
418    
419 wakaba 1.1 1;
420 wakaba 1.46 # $Date: 2007/08/17 05:55:44 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24