/[suikacvs]/markup/html/whatpm/Whatpm/CacheManifest.pm
Suika

Contents of /markup/html/whatpm/Whatpm/CacheManifest.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.6 - (hide annotations) (download)
Fri May 16 13:56:16 2008 UTC (16 years, 5 months ago) by wakaba
Branch: MAIN
Changes since 1.5: +4 -4 lines
++ whatpm/t/ChangeLog	16 May 2008 13:45:01 -0000
	* content-model-2.dat: Test data for charset="" attribute
	are added.

2008-05-16  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	16 May 2008 11:18:17 -0000
2008-05-16  Wakaba  <wakaba@suika.fam.cx>

	* CacheManifest.pm (_parse): Drop fragment identifiers from
	URIs in fallback section (HTML5 revision 1596).

++ whatpm/Whatpm/ContentChecker/ChangeLog	16 May 2008 13:45:47 -0000
	* HTML.pm: Fact out generic charset name checking code
	to $HTMLCharsetChecker.  Support for charset="" attributes
	on <a>, <link>, and <script> elements.

2008-05-16  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::CacheManifest;
2     use strict;
3 wakaba 1.6 our $VERSION=do{my @r=(q$Revision: 1.5 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1 require Message::URI::URIReference;
5    
6     sub parse_byte_string ($$$$$) {
7     require Encode;
8     my $s = Encode::decode ('utf-8', $_[1]);
9     return $_[0]->_parse (\$s, $_[2], $_[3], $_[4] || sub {
10     my %err = @_;
11     warn $err{type}, "\n";
12     });
13     } # parse_byte_string
14    
15     sub parse_char_string ($$$$$) {
16     return $_[0]->_parse (\($_[1]), $_[2], $_[3], $_[4] || sub {
17     my %err = @_;
18     warn $err{type}, "\n";
19     });
20     } # parse_char_string
21    
22     sub _parse ($$$$$) {
23     #my (undef, $input, $manifest_uri, $base_uri, $onerror) = @_;
24    
25     ## NOTE: A manifest MUST be labeled as text/cache-manifest. (This should
26     ## be checked in upper-level).
27     ## NOTE: No "MUST" for being UTF-8.
28     ## NOTE: A |text/cache-manifest| MUST be a cache manifest.
29     ## NOTE: Newlines MUST be CR/CRLF/LF. (We don't and can't check this.)
30    
31 wakaba 1.2 ## ISSUE: In RFC 2046: "The specification for any future subtypes of "text" must specify whether or not they will also utilize a "charset" parameter"
32    
33 wakaba 1.1 my $m_uri = Message::DOM::DOMImplementation->create_uri_reference ($_[2]);
34     my $m_scheme = $m_uri->uri_scheme;
35    
36     my $onerror = $_[4];
37     my $must_level = 'm';
38     my $warn_level = 'w';
39     my $line_number = 1;
40    
41 wakaba 1.4 ## Same origin with the manifest's URI
42     my $same_origin = sub {
43     ## NOTE: Step numbers in this function corresponds to those in the
44     ## algorithm for determining the origin of a URI specified in HTML5.
45 wakaba 1.1
46 wakaba 1.4 ## 1. and 2.
47 wakaba 1.1 my $u1 = shift;
48 wakaba 1.4 #my $m_uri = $m_uri;
49    
50     ## 3.
51     return 0 unless defined $u1->uri_authority;
52     return 0 unless defined $m_uri->uri_authority;
53     ## TODO: In addition, in the case of URIs with non-server-based authority
54     ## it must also return 0.
55 wakaba 1.1
56 wakaba 1.4 ## 4.
57 wakaba 1.3 unless (lc $u1->uri_scheme eq lc $m_scheme) { ## TODO: case
58 wakaba 1.1 return 0;
59     }
60 wakaba 1.4 ## TODO: Return if $u1->uri_scheme is not a supported scheme.
61     ## NOTE: $m_scheme is always a supported URI scheme, otherwise
62     ## the manifest itself cannot be retrieved.
63 wakaba 1.1
64 wakaba 1.4 ## 5., 6., and 7.
65 wakaba 1.1 return 0 unless $u1->uri_host eq $m_uri->uri_host;
66 wakaba 1.4 ## TODO: IDNA ToASCII
67    
68     ## 8.
69 wakaba 1.1 return 0 unless $u1->uri_port eq $m_uri->uri_port;
70 wakaba 1.4 ## TODO: default port
71 wakaba 1.1
72 wakaba 1.4 ## 9.
73 wakaba 1.1 return 1;
74 wakaba 1.4 }; # $same_origin
75 wakaba 1.1
76     ## Step 5
77     my $input = $_[1];
78    
79     ## Step 1: MUST bytes --UTF-8--> characters.
80 wakaba 1.5 ## NOTE: illegal(s) -> U+FFFD, #U+0000 -> U+FFFD (commented out in r1553).
81     #$$input =~ tr/\x00/\x{FFFD}/;
82 wakaba 1.1
83     ## Step 2
84     my $explicit_uris = [];
85    
86     ## Step 3
87     my $fallback_uris = {};
88    
89     ## Step 4
90     my $online_whitelist_uris = [];
91    
92     ## Step 6
93     pos ($$input) = 0;
94    
95     ## Step 7
96     ## Skip BOM ## NOTE: MAY in syntax.
97    
98     ## Step 8-10
99     unless ($$input =~ /^CACHE MANIFEST[\x20\x09]*(?![^\x0D\x0A])/gc) {
100     $onerror->(type => 'not manifest', level => $must_level,
101     line => $line_number, column => 1); ## NOTE: MUST in syntax.
102     return; ## Not a manifest.
103     }
104    
105     ## Step 11
106     ## This is a cache manifest.
107    
108     ## Step 12
109     my $mode = 'explicit';
110    
111     ## NOTE: MUST be (blank line|comment|section head|data for the current
112     ## section)*.
113    
114     ## Step 13
115     START_OF_LINE: while (pos $$input < length $$input) {
116     $$input =~ /([\x0A\x0D\x20\x09]+)/gc;
117     my $v = $1;
118 wakaba 1.4 $line_number++ for $v =~ /\x0D\x0A?|\x0A/g;
119 wakaba 1.1
120     ## Step 14
121     $$input =~ /([^\x0A\x0D]*)/gc;
122     my $line = $1;
123    
124     ## Step 15
125 wakaba 1.3 $line =~ s/[\x20\x09]+\z//;
126 wakaba 1.1
127 wakaba 1.3 ## Step 16-17
128     if ($line eq '' or $line =~ /^#/) {
129     next START_OF_LINE;
130     }
131 wakaba 1.1
132     if ($line eq 'CACHE:') {
133 wakaba 1.3 ## Step 18
134 wakaba 1.1 $mode = 'explicit';
135     next START_OF_LINE;
136     } elsif ($line eq 'FALLBACK:') {
137 wakaba 1.3 ## Step 19
138 wakaba 1.1 $mode = 'fallback';
139     next START_OF_LINE;
140     } elsif ($line eq 'NETWORK:') {
141 wakaba 1.3 ## Step 20
142 wakaba 1.1 $mode = 'online whitelist';
143     next START_OF_LINE;
144     }
145    
146     ## NOTE: "URIs that are to be fallback pages associated with
147     ## opportunistic caching namespaces, and those namespaces themselves,
148     ## MUST be given in fallback sections, with the namespace being the
149     ## first URI of the data line, and the corresponding fallback page
150     ## being the second URI. All the other pages to be cached MUST be
151     ## listed in explicit sections." in writing section can't be tested.
152     ## NOTE: "URIs that the user agent is to put into the online whitelist
153     ## MUST all be specified in online whitelist sections." in writing
154     ## section can't be tested.
155    
156     ## NOTE: "Relative URIs MUST be given relative to the manifest's own URI."
157     ## requirement in writing section can't be tested.
158    
159 wakaba 1.3 ## Step 21
160 wakaba 1.1 if ($mode eq 'explicit') {
161     my $uri = Message::DOM::DOMImplementation->create_uri_reference ($line);
162    
163     unless ($uri->is_iri_reference_3987) {
164     $onerror->(type => 'URI::syntax error:iriref3987',
165     level => $must_level, line => $line_number, column => 1,
166     value => $line);
167     next START_OF_LINE; ## NOTE: MUST in syntax.
168     }
169    
170     $uri = $uri->get_absolute_reference ($_[3]);
171    
172     if (defined $uri->uri_fragment) {
173     $uri->uri_fragment (undef);
174     $onerror->(type => 'URI fragment not allowed',
175     level => $must_level, line => $line_number, column => 1,
176     value => $line);
177     ## NOTE: MUST in writing section.
178     }
179    
180     my $scheme = $uri->uri_scheme;
181     unless (defined $scheme and $scheme eq $m_scheme) {
182     $onerror->(type => 'different scheme from manifest',
183     level => $warn_level, line => $line_number, column => 1,
184     value => $uri->uri_reference);
185     next START_OF_LINE;
186     }
187    
188     push @$explicit_uris, $uri->uri_reference;
189     } elsif ($mode eq 'fallback') {
190     my ($p1, $p2) = split /[\x20\x09]+/, $line, 2;
191    
192     unless (defined $p2) {
193     $onerror->(type => 'no fallback entry URI',
194     level => $must_level, line => $line_number, column => 1,
195     value => $line);
196     next START_OF_LINE; ## NOTE: MUST in syntax.
197     }
198    
199     my $u1 = Message::DOM::DOMImplementation->create_uri_reference ($p1);
200    
201     unless ($u1->is_iri_reference_3987) {
202     $onerror->(type => 'URI::syntax error:iriref3987',
203     level => $must_level, line => $line_number, column => 1,
204     index => 0, value => $p1);
205     next START_OF_LINE; ## NOTE: MUST in syntax.
206     }
207    
208     my $u2 = Message::DOM::DOMImplementation->create_uri_reference ($p2);
209    
210     unless ($u2->is_iri_reference_3987) {
211     $onerror->(type => 'URI::syntax error:iriref3987',
212     level => $must_level, line => $line_number, column => 1,
213     index => 1, value => $p2);
214     next START_OF_LINE; ## NOTE: MUST in syntax.
215     }
216    
217     if (defined $u1->uri_fragment) {
218 wakaba 1.6 $u1->uri_fragment (undef);
219 wakaba 1.1 $onerror->(type => 'URI fragment not allowed',
220     level => $must_level, line => $line_number, column => 1,
221     index => 0, value => $p1);
222     ## NOTE: MUST in writing section.
223     }
224    
225     if (defined $u2->uri_fragment) {
226 wakaba 1.6 $u2->uri_fragment (undef);
227 wakaba 1.1 $onerror->(type => 'URI fragment not allowed',
228     level => $must_level, line => $line_number, column => 1,
229     index => 1, value => $p2);
230     ## NOTE: MUST in writing section.
231     }
232    
233     $u1 = $u1->get_absolute_reference ($_[3]);
234     $u2 = $u2->get_absolute_reference ($_[3]);
235    
236     if (exists $fallback_uris->{$u1->uri_reference}) {
237     $onerror->(type => 'duplicate oc namespace',
238     level => $must_level, line => $line_number, column => 1,
239     index => 0, value => $u1->uri_reference);
240     next START_OF_LINE; ## NOTE: MUST in syntax.
241     }
242    
243 wakaba 1.4 unless ($same_origin->($u1)) {
244 wakaba 1.1 $onerror->(type => 'different shp from manifest',
245     level => $must_level, line => $line_number, column => 1,
246     index => 0, value => $u1->uri_reference);
247     next START_OF_LINE; ## NOTE: MUST in syntax.
248     }
249    
250     my $u2_scheme = $u2->uri_scheme;
251     unless (defined $u2_scheme and $u2_scheme eq $m_scheme) {
252     $onerror->(type => 'different scheme from manifest',
253     level => $warn_level, line => $line_number, column => 1,
254     index => 1, value => $u2->uri_reference);
255     next START_OF_LINE;
256     }
257    
258     $fallback_uris->{$u1->uri_reference} = $u2->uri_reference;
259     } elsif ($mode eq 'online whitelist') {
260     my $uri = Message::DOM::DOMImplementation->create_uri_reference ($line);
261    
262     unless ($uri->is_iri_reference_3987) {
263     $onerror->(type => 'URI::syntax error:iriref3987',
264     level => $must_level, line => $line_number, column => 1,
265     value => $line);
266     next START_OF_LINE; ## NOTE: MUST in syntax.
267     }
268    
269     $uri = $uri->get_absolute_reference ($_[3]);
270    
271     if (defined $uri->uri_fragment) {
272     $uri->uri_fragment (undef);
273     $onerror->(type => 'URI fragment not allowed',
274     level => $must_level, line => $line_number, column => 1,
275     value => $line);
276     ## NOTE: MUST in writing section.
277     }
278    
279     my $scheme = $uri->uri_scheme;
280     unless (defined $scheme and $scheme eq $m_scheme) {
281     $onerror->(type => 'different scheme from manifest',
282     level => $warn_level, line => $line_number, column => 1,
283     value => $uri->uri_reference);
284     next START_OF_LINE;
285     }
286    
287     push @$online_whitelist_uris, $uri->uri_reference;
288     }
289    
290 wakaba 1.3 ## Step 22
291 wakaba 1.1 #next START_OF_LINE;
292     } # START_OF_LINE
293    
294 wakaba 1.3 ## Step 23
295     return [$explicit_uris, $fallback_uris, $online_whitelist_uris,
296     $m_uri->uri_reference];
297 wakaba 1.1 } # _parse
298    
299     sub check_manifest ($$$) {
300     my (undef, $manifest, $onerror) = @_;
301    
302     my $listed = {};
303     my $must_level = 'm';
304    
305     require Whatpm::URIChecker;
306    
307 wakaba 1.3 my $i = 0;
308 wakaba 1.1 for my $uri (@{$manifest->[0]}) {
309     $listed->{$uri} = 1;
310    
311     Whatpm::URIChecker->check_iri_reference ($uri, sub {
312     my %opt = @_;
313     $onerror->(level => $opt{level}, value => $uri,
314 wakaba 1.3 index => $i,
315 wakaba 1.1 type => 'URI::'.$opt{type}.
316     (defined $opt{position} ? ':'.$opt{position} : ''));
317     });
318 wakaba 1.3
319     ## ISSUE: Literal equivalence, right?
320     if ($uri eq $manifest->[3]) {
321     $onerror->(level => $must_level, value => $uri,
322     index => $i,
323     type => 'manifest URI');
324     }
325    
326     $i++;
327 wakaba 1.1 }
328    
329 wakaba 1.3 for my $uri1 (sort {$a cmp $b} keys %{$manifest->[1]}) {
330     Whatpm::URIChecker->check_iri_reference ($uri1, sub {
331 wakaba 1.1 my %opt = @_;
332 wakaba 1.3 $onerror->(level => $opt{level}, index => 0, value => $uri1,
333 wakaba 1.1 type => 'URI::'.$opt{type}.
334     (defined $opt{position} ? ':'.$opt{position} : ''));
335     });
336    
337 wakaba 1.3 if ($uri1 eq $manifest->[3]) {
338     $onerror->(level => $must_level, value => $uri1,
339     index => $i,
340     type => 'manifest URI');
341     }
342    
343     $i++;
344    
345     my $uri2 = $manifest->[1]->{$uri1};
346     $listed->{$uri2} = 1;
347    
348     Whatpm::URIChecker->check_iri_reference ($uri2, sub {
349 wakaba 1.1 my %opt = @_;
350 wakaba 1.3 $onerror->(level => $opt{level}, index => 1, value => $uri2,
351     index => $i,
352 wakaba 1.1 type => 'URI::'.$opt{type}.
353     (defined $opt{position} ? ':'.$opt{position} : ''));
354     });
355 wakaba 1.3
356     if ($uri2 eq $manifest->[3]) {
357     $onerror->(level => $must_level, value => $uri2,
358     index => $i,
359     type => 'manifest URI');
360     }
361    
362     $i++;
363 wakaba 1.1 }
364    
365     for my $uri (@{$manifest->[2]}) {
366     if ($listed->{$uri}) {
367     $onerror->(type => 'both in entries and whitelist',
368 wakaba 1.3 index => $i,
369 wakaba 1.1 level => $must_level, value => $uri);
370     ## NOTE: MUST in writing section.
371     }
372    
373     Whatpm::URIChecker->check_iri_reference ($uri, sub {
374     my %opt = @_;
375     $onerror->(level => $opt{level}, value => $uri,
376 wakaba 1.3 index => $i,
377 wakaba 1.1 type => 'URI::'.$opt{type}.
378     (defined $opt{position} ? ':'.$opt{position} : ''));
379     });
380 wakaba 1.3
381     if ($uri eq $manifest->[3]) {
382     $onerror->(level => $must_level, value => $uri,
383     index => $i,
384     type => 'manifest URI');
385     }
386    
387     $i++;
388 wakaba 1.1 }
389     } # check_manifest
390    
391    
392     =head1 LICENSE
393    
394 wakaba 1.3 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
395 wakaba 1.1
396     This library is free software; you can redistribute it
397     and/or modify it under the same terms as Perl itself.
398    
399     =cut
400    
401     1;
402 wakaba 1.6 # $Date: 2008/05/10 06:04:39 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24