/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.43 - (hide annotations) (download)
Sun Aug 5 04:50:57 2007 UTC (17 years, 3 months ago) by wakaba
Branch: MAIN
Changes since 1.42: +9 -1 lines
++ whatpm/Whatpm/ChangeLog	5 Aug 2007 04:50:52 -0000
	* ContentChecker.pm: Reference to the |Whatpm::ContentChecker::Atom|
	is added.
	(check_document): Load appropriate module before validation.

2007-08-04  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ContentChecker/ChangeLog	5 Aug 2007 04:49:57 -0000
2007-08-05  Wakaba  <wakaba@suika.fam.cx>

	* Atom.pm: New Perl module.

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3    
4 wakaba 1.18 require Whatpm::URIChecker;
5    
6 wakaba 1.13 ## ISSUE: How XML and XML Namespaces conformance can (or cannot)
7     ## be applied to an in-memory representation (i.e. DOM)?
8    
9 wakaba 1.42 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
10 wakaba 1.9 my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
11     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
12    
13 wakaba 1.42 my $Namespace = {
14 wakaba 1.43 q<http://www.w3.org/2005/Atom> => {module => 'Whatpm::ContentChecker::Atom'},
15 wakaba 1.42 $HTML_NS => {module => 'Whatpm::ContentChecker::HTML'},
16     $XML_NS => {loaded => 1},
17     $XMLNS_NS => {loaded => 1},
18     };
19    
20     our $AttrChecker = {
21 wakaba 1.9 $XML_NS => {
22 wakaba 1.13 space => sub {
23     my ($self, $attr) = @_;
24     my $value = $attr->value;
25     if ($value eq 'default' or $value eq 'preserve') {
26     #
27     } else {
28     ## NOTE: An XML "error"
29 wakaba 1.33 $self->{onerror}->(node => $attr, level => 'error',
30     type => 'invalid attribute value');
31 wakaba 1.13 }
32     },
33     lang => sub {
34 wakaba 1.35 my ($self, $attr) = @_;
35 wakaba 1.13 ## NOTE: "The values of the attribute are language identifiers
36     ## as defined by [IETF RFC 3066], Tags for the Identification
37     ## of Languages, or its successor; in addition, the empty string
38     ## may be specified." ("may" in lower case)
39 wakaba 1.36 $self->{onerror}->(node => $attr, level => 'unsupported',
40     type => 'language tag');
41 wakaba 1.35 if ($attr->owner_document->manakai_is_html) { # MUST NOT
42 wakaba 1.36 $self->{onerror}->(node => $attr, type => 'in HTML:xml:lang');
43 wakaba 1.35 ## TODO: Test data...
44     }
45 wakaba 1.13 },
46     base => sub {
47     my ($self, $attr) = @_;
48     my $value = $attr->value;
49     if ($value =~ /[^\x{0000}-\x{10FFFF}]/) { ## ISSUE: Should we disallow noncharacters?
50     $self->{onerror}->(node => $attr,
51 wakaba 1.33 type => 'invalid attribute value');
52 wakaba 1.13 }
53 wakaba 1.18 ## NOTE: Conformance to URI standard is not checked since there is
54     ## no author requirement on conformance in the XML Base specification.
55 wakaba 1.13 },
56     id => sub {
57     my ($self, $attr) = @_;
58     my $value = $attr->value;
59     $value =~ s/[\x09\x0A\x0D\x20]+/ /g;
60     $value =~ s/^\x20//;
61     $value =~ s/\x20$//;
62     ## TODO: NCName in XML 1.0 or 1.1
63     ## TODO: declared type is ID?
64 wakaba 1.33 if ($self->{id}->{$value}) { ## NOTE: An xml:id error
65     $self->{onerror}->(node => $attr, level => 'error',
66     type => 'duplicate ID');
67 wakaba 1.37 push @{$self->{id}->{$value}}, $attr;
68 wakaba 1.13 } else {
69 wakaba 1.37 $self->{id}->{$value} = [$attr];
70 wakaba 1.13 }
71     },
72 wakaba 1.9 },
73     $XMLNS_NS => {
74 wakaba 1.13 '' => sub {
75     my ($self, $attr) = @_;
76     my $ln = $attr->manakai_local_name;
77     my $value = $attr->value;
78     if ($value eq $XML_NS and $ln ne 'xml') {
79     $self->{onerror}
80 wakaba 1.33 ->(node => $attr, level => 'NC',
81     type => 'Reserved Prefixes and Namespace Names:=xml');
82 wakaba 1.13 } elsif ($value eq $XMLNS_NS) {
83     $self->{onerror}
84 wakaba 1.33 ->(node => $attr, level => 'NC',
85     type => 'Reserved Prefixes and Namespace Names:=xmlns');
86 wakaba 1.13 }
87     if ($ln eq 'xml' and $value ne $XML_NS) {
88     $self->{onerror}
89 wakaba 1.33 ->(node => $attr, level => 'NC',
90     type => 'Reserved Prefixes and Namespace Names:xmlns:xml=');
91 wakaba 1.13 } elsif ($ln eq 'xmlns') {
92     $self->{onerror}
93 wakaba 1.33 ->(node => $attr, level => 'NC',
94     type => 'Reserved Prefixes and Namespace Names:xmlns:xmlns=');
95 wakaba 1.13 }
96     ## TODO: If XML 1.0 and empty
97     },
98     xmlns => sub {
99     my ($self, $attr) = @_;
100     ## TODO: In XML 1.0, URI reference [RFC 3986] or an empty string
101     ## TODO: In XML 1.1, IRI reference [RFC 3987] or an empty string
102 wakaba 1.18 ## TODO: relative references are deprecated
103 wakaba 1.13 my $value = $attr->value;
104     if ($value eq $XML_NS) {
105     $self->{onerror}
106 wakaba 1.33 ->(node => $attr, level => 'NC',
107     type => 'Reserved Prefixes and Namespace Names:=xml');
108 wakaba 1.13 } elsif ($value eq $XMLNS_NS) {
109     $self->{onerror}
110 wakaba 1.33 ->(node => $attr, level => 'NC',
111     type => 'Reserved Prefixes and Namespace Names:=xmlns');
112 wakaba 1.13 }
113     },
114 wakaba 1.9 },
115     };
116    
117 wakaba 1.14 ## ISSUE: Should we really allow these attributes?
118 wakaba 1.13 $AttrChecker->{''}->{'xml:space'} = $AttrChecker->{$XML_NS}->{space};
119     $AttrChecker->{''}->{'xml:lang'} = $AttrChecker->{$XML_NS}->{lang};
120     $AttrChecker->{''}->{'xml:base'} = $AttrChecker->{$XML_NS}->{base};
121     $AttrChecker->{''}->{'xml:id'} = $AttrChecker->{$XML_NS}->{id};
122    
123 wakaba 1.3 ## ANY
124 wakaba 1.42 our $AnyChecker = sub {
125 wakaba 1.4 my ($self, $todo) = @_;
126     my $el = $todo->{node};
127     my $new_todos = [];
128 wakaba 1.3 my @nodes = (@{$el->child_nodes});
129     while (@nodes) {
130     my $node = shift @nodes;
131     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
132    
133     my $nt = $node->node_type;
134     if ($nt == 1) {
135     my $node_ns = $node->namespace_uri;
136     $node_ns = '' unless defined $node_ns;
137     my $node_ln = $node->manakai_local_name;
138     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
139     $self->{onerror}->(node => $node, type => 'element not allowed');
140     }
141 wakaba 1.4 push @$new_todos, {type => 'element', node => $node};
142 wakaba 1.3 } elsif ($nt == 5) {
143     unshift @nodes, @{$node->child_nodes};
144     }
145     }
146 wakaba 1.4 return ($new_todos);
147 wakaba 1.3 }; # $AnyChecker
148    
149 wakaba 1.42 our $ElementDefault = {
150 wakaba 1.1 checker => sub {
151 wakaba 1.4 my ($self, $todo) = @_;
152 wakaba 1.33 $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
153     type => 'element');
154 wakaba 1.4 return $AnyChecker->($self, $todo);
155 wakaba 1.1 },
156 wakaba 1.9 attrs_checker => sub {
157     my ($self, $todo) = @_;
158     for my $attr (@{$todo->{node}->attributes}) {
159     my $attr_ns = $attr->namespace_uri;
160     $attr_ns = '' unless defined $attr_ns;
161     my $attr_ln = $attr->manakai_local_name;
162     my $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
163     || $AttrChecker->{$attr_ns}->{''};
164     if ($checker) {
165     $checker->($self, $attr);
166 wakaba 1.17 } else {
167 wakaba 1.33 $self->{onerror}->(node => $attr, level => 'unsupported',
168     type => 'attribute');
169 wakaba 1.9 }
170     }
171     },
172 wakaba 1.1 };
173    
174 wakaba 1.7 my $HTMLTransparentElements = {
175     $HTML_NS => {qw/ins 1 font 1 noscript 1/},
176 wakaba 1.29 ## NOTE: |html:noscript| is transparent if scripting is disabled
177     ## and not in |head|.
178 wakaba 1.7 };
179    
180 wakaba 1.42 our $Element = {};
181 wakaba 1.7
182 wakaba 1.42 sub check_document ($$$) {
183     my ($self, $doc, $onerror) = @_;
184     $self = bless {}, $self unless ref $self;
185     $self->{onerror} = $onerror;
186 wakaba 1.1
187 wakaba 1.42 my $docel = $doc->document_element;
188     unless (defined $docel) {
189     ## ISSUE: Should we check content of Document node?
190     $onerror->(node => $doc, type => 'no document element');
191     ## ISSUE: Is this non-conforming (to what spec)? Or just a warning?
192     return {
193     class => {},
194     id => {}, table => [], term => {},
195     };
196 wakaba 1.1 }
197    
198 wakaba 1.42 ## ISSUE: Unexpanded entity references and HTML5 conformance
199 wakaba 1.1
200 wakaba 1.42 my $docel_nsuri = $docel->namespace_uri;
201     $docel_nsuri = '' unless defined $docel_nsuri;
202 wakaba 1.43 unless ($Namespace->{$docel_nsuri}->{loaded}) {
203     if ($Namespace->{$docel_nsuri}->{module}) {
204     eval qq{ require $Namespace->{$docel_nsuri}->{module} } or die $@;
205     } else {
206     $Namespace->{$docel_nsuri}->{loaded} = 1;
207     }
208     }
209 wakaba 1.42 my $docel_def = $Element->{$docel_nsuri}->{$docel->manakai_local_name} ||
210     $Element->{$docel_nsuri}->{''} ||
211     $ElementDefault;
212     if ($docel_def->{is_root}) {
213     #
214     } else {
215     $onerror->(node => $docel, type => 'element not allowed');
216 wakaba 1.1 }
217    
218 wakaba 1.42 ## TODO: Check for other items other than document element
219     ## (second (errorous) element, text nodes, PI nodes, doctype nodes)
220 wakaba 1.2
221 wakaba 1.42 return $self->check_element ($docel, $onerror);
222     } # check_document
223 wakaba 1.1
224 wakaba 1.42 sub check_element ($$$) {
225     my ($self, $el, $onerror) = @_;
226     $self = bless {}, $self unless ref $self;
227     $self->{onerror} = $onerror;
228 wakaba 1.2
229 wakaba 1.42 $self->{minuses} = {};
230     $self->{id} = {};
231     $self->{term} = {};
232     $self->{usemap} = [];
233     $self->{contextmenu} = [];
234     $self->{map} = {};
235     $self->{menu} = {};
236     $self->{has_link_type} = {};
237     $self->{return} = {
238     class => {},
239     id => $self->{id}, table => [], term => $self->{term},
240     };
241 wakaba 1.4
242 wakaba 1.42 my @todo = ({type => 'element', node => $el});
243     while (@todo) {
244     my $todo = shift @todo;
245     if ($todo->{type} eq 'element') {
246     my $prefix = $todo->{node}->prefix;
247     if (defined $prefix and $prefix eq 'xmlns') {
248     $self->{onerror}
249     ->(node => $todo->{node}, level => 'NC',
250     type => 'Reserved Prefixes and Namespace Names:<xmlns:>');
251 wakaba 1.7 }
252 wakaba 1.42 my $nsuri = $todo->{node}->namespace_uri;
253     $nsuri = '' unless defined $nsuri;
254     unless ($Namespace->{$nsuri}->{loaded}) {
255     if ($Namespace->{$nsuri}->{module}) {
256     eval qq{ require $Namespace->{$nsuri}->{module} } or die $@;
257     } else {
258     $Namespace->{$nsuri}->{loaded} = 1;
259 wakaba 1.1 }
260     }
261 wakaba 1.42 my $ln = $todo->{node}->manakai_local_name;
262     my $eldef = $Element->{$nsuri}->{$ln} ||
263     $Element->{$nsuri}->{''} ||
264     $ElementDefault;
265     $eldef->{attrs_checker}->($self, $todo);
266     my ($new_todos) = $eldef->{checker}->($self, $todo);
267     unshift @todo, @$new_todos;
268     } elsif ($todo->{type} eq 'element-attributes') {
269     my $prefix = $todo->{node}->prefix;
270     if (defined $prefix and $prefix eq 'xmlns') {
271     $self->{onerror}
272     ->(node => $todo->{node}, level => 'NC',
273     type => 'Reserved Prefixes and Namespace Names:<xmlns:>');
274     }
275     my $nsuri = $todo->{node}->namespace_uri;
276     $nsuri = '' unless defined $nsuri;
277     unless ($Namespace->{$nsuri}->{loaded}) {
278     if ($Namespace->{$nsuri}->{module}) {
279     eval qq{ require $Namespace->{$nsuri}->{module} } or die $@;
280 wakaba 1.1 } else {
281 wakaba 1.42 $Namespace->{$nsuri}->{loaded} = 1;
282 wakaba 1.1 }
283     }
284 wakaba 1.9 my $ln = $todo->{node}->manakai_local_name;
285     my $eldef = $Element->{$nsuri}->{$ln} ||
286     $Element->{$nsuri}->{''} ||
287     $ElementDefault;
288     $eldef->{attrs_checker}->($self, $todo);
289 wakaba 1.4 } elsif ($todo->{type} eq 'plus') {
290     $self->_remove_minuses ($todo);
291 wakaba 1.30 } elsif ($todo->{type} eq 'code') {
292     $todo->{code}->();
293     } else {
294     die "$0: Internal error: Unsupported checking action type |$todo->{type}|";
295 wakaba 1.4 }
296 wakaba 1.1 }
297 wakaba 1.17
298     for (@{$self->{usemap}}) {
299     unless ($self->{map}->{$_->[0]}) {
300     $self->{onerror}->(node => $_->[1], type => 'no referenced map');
301     }
302     }
303    
304 wakaba 1.32 for (@{$self->{contextmenu}}) {
305     unless ($self->{menu}->{$_->[0]}) {
306     $self->{onerror}->(node => $_->[1], type => 'no referenced menu');
307     }
308     }
309    
310 wakaba 1.17 delete $self->{minuses};
311     delete $self->{onerror};
312     delete $self->{id};
313     delete $self->{usemap};
314     delete $self->{map};
315 wakaba 1.33 return $self->{return};
316 wakaba 1.1 } # check_element
317    
318 wakaba 1.2 sub _add_minuses ($@) {
319     my $self = shift;
320     my $r = {};
321     for my $list (@_) {
322     for my $ns (keys %$list) {
323     for my $ln (keys %{$list->{$ns}}) {
324     unless ($self->{minuses}->{$ns}->{$ln}) {
325     $self->{minuses}->{$ns}->{$ln} = 1;
326     $r->{$ns}->{$ln} = 1;
327     }
328     }
329     }
330     }
331 wakaba 1.4 return {type => 'plus', list => $r};
332 wakaba 1.2 } # _add_minuses
333    
334     sub _remove_minuses ($$) {
335 wakaba 1.4 my ($self, $todo) = @_;
336     for my $ns (keys %{$todo->{list}}) {
337     for my $ln (keys %{$todo->{list}->{$ns}}) {
338     delete $self->{minuses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
339 wakaba 1.2 }
340     }
341     1;
342     } # _remove_minuses
343    
344 wakaba 1.30 sub _check_get_children ($$$) {
345     my ($self, $node, $parent_todo) = @_;
346 wakaba 1.4 my $new_todos = [];
347 wakaba 1.2 my $sib = [];
348     TP: {
349     my $node_ns = $node->namespace_uri;
350     $node_ns = '' unless defined $node_ns;
351     my $node_ln = $node->manakai_local_name;
352     if ($node_ns eq $HTML_NS) {
353     if ($node_ln eq 'noscript') {
354     my $end = $self->_add_minuses ({$HTML_NS, {noscript => 1}});
355     push @$sib, $end;
356     }
357     }
358 wakaba 1.31 ## TODO: |noscript| is not a transparent element in |head|.
359 wakaba 1.7 if ($HTMLTransparentElements->{$node_ns}->{$node_ln}) {
360     unshift @$sib, @{$node->child_nodes};
361 wakaba 1.9 push @$new_todos, {type => 'element-attributes', node => $node};
362 wakaba 1.7 last TP;
363 wakaba 1.2 }
364 wakaba 1.8 if ($node_ns eq $HTML_NS and ($node_ln eq 'video' or $node_ln eq 'audio')) {
365 wakaba 1.2 if ($node->has_attribute_ns (undef, 'src')) {
366     unshift @$sib, @{$node->child_nodes};
367 wakaba 1.9 push @$new_todos, {type => 'element-attributes', node => $node};
368 wakaba 1.2 last TP;
369     } else {
370     my @cn = @{$node->child_nodes};
371     CN: while (@cn) {
372     my $cn = shift @cn;
373     my $cnt = $cn->node_type;
374     if ($cnt == 1) {
375 wakaba 1.8 my $cn_nsuri = $cn->namespace_uri;
376     $cn_nsuri = '' unless defined $cn_nsuri;
377     if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'source') {
378 wakaba 1.2 #
379     } else {
380     last CN;
381     }
382     } elsif ($cnt == 3 or $cnt == 4) {
383     if ($cn->data =~ /[^\x09-\x0D\x20]/) {
384     last CN;
385     }
386     }
387     } # CN
388     unshift @$sib, @cn;
389     }
390     }
391 wakaba 1.4 push @$new_todos, {type => 'element', node => $node};
392 wakaba 1.2 } # TP
393 wakaba 1.30
394     for my $new_todo (@$new_todos) {
395     $new_todo->{flag} = {%{$parent_todo->{flag} or {}}};
396     }
397    
398 wakaba 1.4 return ($sib, $new_todos);
399 wakaba 1.2 } # _check_get_children
400    
401 wakaba 1.1 1;
402 wakaba 1.43 # $Date: 2007/08/04 13:48:25 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24