/[suikacvs]/markup/html/whatpm/Whatpm/CacheManifest.pm
Suika

Contents of /markup/html/whatpm/Whatpm/CacheManifest.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.8 - (hide annotations) (download)
Sun Aug 31 13:27:33 2008 UTC (16 years, 2 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.7: +55 -8 lines
++ whatpm/Whatpm/ChangeLog	31 Aug 2008 13:23:51 -0000
	* CacheManifest.pm: Support for extensibility (HTML5 revision 2051).

2008-08-31  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::CacheManifest;
2     use strict;
3 wakaba 1.8 our $VERSION=do{my @r=(q$Revision: 1.7 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1 require Message::URI::URIReference;
5    
6 wakaba 1.7 sub parse_byte_string ($$$$;$$) {
7 wakaba 1.1 require Encode;
8     my $s = Encode::decode ('utf-8', $_[1]);
9     return $_[0]->_parse (\$s, $_[2], $_[3], $_[4] || sub {
10     my %err = @_;
11     warn $err{type}, "\n";
12 wakaba 1.7 }, $_[5]);
13 wakaba 1.1 } # parse_byte_string
14    
15 wakaba 1.7 sub parse_char_string ($$$$;$$) {
16 wakaba 1.1 return $_[0]->_parse (\($_[1]), $_[2], $_[3], $_[4] || sub {
17     my %err = @_;
18     warn $err{type}, "\n";
19 wakaba 1.7 }, $_[5]);
20 wakaba 1.1 } # parse_char_string
21    
22 wakaba 1.7 my $default_error_levels = {
23     must => 'm',
24     info => 'i',
25     };
26    
27     sub _parse ($$$$$$) {
28     #my (undef, $input, $manifest_uri, $base_uri, $onerror, $levels) = @_;
29 wakaba 1.1
30     ## NOTE: A manifest MUST be labeled as text/cache-manifest. (This should
31     ## be checked in upper-level).
32     ## NOTE: No "MUST" for being UTF-8.
33     ## NOTE: A |text/cache-manifest| MUST be a cache manifest.
34     ## NOTE: Newlines MUST be CR/CRLF/LF. (We don't and can't check this.)
35    
36 wakaba 1.2 ## ISSUE: In RFC 2046: "The specification for any future subtypes of "text" must specify whether or not they will also utilize a "charset" parameter"
37    
38 wakaba 1.1 my $m_uri = Message::DOM::DOMImplementation->create_uri_reference ($_[2]);
39     my $m_scheme = $m_uri->uri_scheme;
40    
41     my $onerror = $_[4];
42 wakaba 1.7 my $levels = $_[5] || $default_error_levels;
43    
44 wakaba 1.1 my $line_number = 1;
45    
46 wakaba 1.4 ## Same origin with the manifest's URI
47     my $same_origin = sub {
48     ## NOTE: Step numbers in this function corresponds to those in the
49     ## algorithm for determining the origin of a URI specified in HTML5.
50 wakaba 1.1
51 wakaba 1.4 ## 1. and 2.
52 wakaba 1.1 my $u1 = shift;
53 wakaba 1.4 #my $m_uri = $m_uri;
54    
55 wakaba 1.7
56 wakaba 1.4 ## 3.
57     return 0 unless defined $u1->uri_authority;
58     return 0 unless defined $m_uri->uri_authority;
59     ## TODO: In addition, in the case of URIs with non-server-based authority
60     ## it must also return 0.
61 wakaba 1.1
62 wakaba 1.4 ## 4.
63 wakaba 1.3 unless (lc $u1->uri_scheme eq lc $m_scheme) { ## TODO: case
64 wakaba 1.1 return 0;
65     }
66 wakaba 1.4 ## TODO: Return if $u1->uri_scheme is not a supported scheme.
67     ## NOTE: $m_scheme is always a supported URI scheme, otherwise
68     ## the manifest itself cannot be retrieved.
69 wakaba 1.1
70 wakaba 1.4 ## 5., 6., and 7.
71 wakaba 1.1 return 0 unless $u1->uri_host eq $m_uri->uri_host;
72 wakaba 1.4 ## TODO: IDNA ToASCII
73    
74     ## 8.
75 wakaba 1.1 return 0 unless $u1->uri_port eq $m_uri->uri_port;
76 wakaba 1.4 ## TODO: default port
77 wakaba 1.1
78 wakaba 1.4 ## 9.
79 wakaba 1.1 return 1;
80 wakaba 1.4 }; # $same_origin
81 wakaba 1.1
82     ## Step 5
83     my $input = $_[1];
84    
85     ## Step 1: MUST bytes --UTF-8--> characters.
86 wakaba 1.5 ## NOTE: illegal(s) -> U+FFFD, #U+0000 -> U+FFFD (commented out in r1553).
87     #$$input =~ tr/\x00/\x{FFFD}/;
88 wakaba 1.1
89     ## Step 2
90     my $explicit_uris = [];
91    
92     ## Step 3
93     my $fallback_uris = {};
94    
95     ## Step 4
96     my $online_whitelist_uris = [];
97    
98     ## Step 6
99     pos ($$input) = 0;
100    
101     ## Step 7
102     ## Skip BOM ## NOTE: MAY in syntax.
103    
104     ## Step 8-10
105     unless ($$input =~ /^CACHE MANIFEST[\x20\x09]*(?![^\x0D\x0A])/gc) {
106 wakaba 1.7 $onerror->(type => 'not manifest', level => $levels->{must},
107 wakaba 1.1 line => $line_number, column => 1); ## NOTE: MUST in syntax.
108     return; ## Not a manifest.
109     }
110    
111     ## Step 11
112     ## This is a cache manifest.
113    
114     ## Step 12
115     my $mode = 'explicit';
116    
117     ## NOTE: MUST be (blank line|comment|section head|data for the current
118     ## section)*.
119    
120     ## Step 13
121     START_OF_LINE: while (pos $$input < length $$input) {
122     $$input =~ /([\x0A\x0D\x20\x09]+)/gc;
123     my $v = $1;
124 wakaba 1.4 $line_number++ for $v =~ /\x0D\x0A?|\x0A/g;
125 wakaba 1.1
126     ## Step 14
127     $$input =~ /([^\x0A\x0D]*)/gc;
128     my $line = $1;
129    
130     ## Step 15
131 wakaba 1.3 $line =~ s/[\x20\x09]+\z//;
132 wakaba 1.1
133 wakaba 1.3 ## Step 16-17
134     if ($line eq '' or $line =~ /^#/) {
135     next START_OF_LINE;
136     }
137 wakaba 1.1
138     if ($line eq 'CACHE:') {
139 wakaba 1.3 ## Step 18
140 wakaba 1.1 $mode = 'explicit';
141     next START_OF_LINE;
142     } elsif ($line eq 'FALLBACK:') {
143 wakaba 1.3 ## Step 19
144 wakaba 1.1 $mode = 'fallback';
145     next START_OF_LINE;
146     } elsif ($line eq 'NETWORK:') {
147 wakaba 1.3 ## Step 20
148 wakaba 1.1 $mode = 'online whitelist';
149     next START_OF_LINE;
150 wakaba 1.8 } elsif ($line =~ /:\z/) {
151     ## Step 21
152     $mode = 'unknown';
153    
154     $onerror->(type => 'manifest:unknown section',
155     level => $levels->{must},
156     line => $line_number, column => 1,
157     value => $line);
158    
159     next START_OF_LINE;
160 wakaba 1.1 }
161    
162     ## NOTE: "URIs that are to be fallback pages associated with
163     ## opportunistic caching namespaces, and those namespaces themselves,
164     ## MUST be given in fallback sections, with the namespace being the
165     ## first URI of the data line, and the corresponding fallback page
166     ## being the second URI. All the other pages to be cached MUST be
167     ## listed in explicit sections." in writing section can't be tested.
168     ## NOTE: "URIs that the user agent is to put into the online whitelist
169     ## MUST all be specified in online whitelist sections." in writing
170     ## section can't be tested.
171    
172     ## NOTE: "Relative URIs MUST be given relative to the manifest's own URI."
173     ## requirement in writing section can't be tested.
174    
175 wakaba 1.8 ## Step 22
176     ## "This is either a data line or it is syntactically incorrect."
177    
178     ## Step 23-26
179     my $tokens = [split /[\x09\x20]+/, $line];
180     shift @$tokens if $tokens->[0] eq ''; # leading white space
181     ## NOTE: Now, @$tokens contains at least one non-empty string.
182    
183     ## Step 27
184 wakaba 1.1 if ($mode eq 'explicit') {
185 wakaba 1.8 if (@$tokens > 1) {
186     $onerror->(type => 'manifest:too many tokens',
187     level => $levels->{must},
188     line => $line_number, column => 1,
189     value => $tokens->[1]);
190     }
191    
192     my $uri = Message::DOM::DOMImplementation->create_uri_reference
193     ($tokens->[0]);
194 wakaba 1.1
195     unless ($uri->is_iri_reference_3987) {
196 wakaba 1.7 $onerror->(type => 'syntax error:iriref3987',
197     level => $levels->{must},
198     line => $line_number, column => 1,
199 wakaba 1.1 value => $line);
200     next START_OF_LINE; ## NOTE: MUST in syntax.
201     }
202    
203     $uri = $uri->get_absolute_reference ($_[3]);
204    
205     if (defined $uri->uri_fragment) {
206     $uri->uri_fragment (undef);
207 wakaba 1.7 $onerror->(type => 'URL fragment not allowed',
208     level => $levels->{must},
209     line => $line_number, column => 1,
210 wakaba 1.1 value => $line);
211     ## NOTE: MUST in writing section.
212     }
213    
214     my $scheme = $uri->uri_scheme;
215     unless (defined $scheme and $scheme eq $m_scheme) {
216     $onerror->(type => 'different scheme from manifest',
217 wakaba 1.7 level => $levels->{info},
218     line => $line_number, column => 1,
219 wakaba 1.1 value => $uri->uri_reference);
220     next START_OF_LINE;
221     }
222    
223     push @$explicit_uris, $uri->uri_reference;
224     } elsif ($mode eq 'fallback') {
225 wakaba 1.8 if (@$tokens > 2) {
226     $onerror->(type => 'manifest:too many tokens',
227     level => $levels->{must},
228     line => $line_number, column => 1,
229     value => $tokens->[2]);
230     }
231    
232     my ($p1, $p2) = (@$tokens);
233 wakaba 1.1
234     unless (defined $p2) {
235 wakaba 1.7 $onerror->(type => 'no fallback entry URL',
236     level => $levels->{must},
237     line => $line_number, column => 1,
238 wakaba 1.1 value => $line);
239 wakaba 1.8
240     ## ISSUE: The following is dropped in r2051 (error?)
241 wakaba 1.1 next START_OF_LINE; ## NOTE: MUST in syntax.
242     }
243    
244     my $u1 = Message::DOM::DOMImplementation->create_uri_reference ($p1);
245    
246     unless ($u1->is_iri_reference_3987) {
247 wakaba 1.7 $onerror->(type => 'syntax error:iriref3987',
248     level => $levels->{must},
249     line => $line_number, column => 1,
250 wakaba 1.1 index => 0, value => $p1);
251     next START_OF_LINE; ## NOTE: MUST in syntax.
252     }
253    
254     my $u2 = Message::DOM::DOMImplementation->create_uri_reference ($p2);
255    
256     unless ($u2->is_iri_reference_3987) {
257 wakaba 1.7 $onerror->(type => 'syntax error:iriref3987',
258     level => $levels->{must},
259     line => $line_number, column => 1,
260 wakaba 1.1 index => 1, value => $p2);
261     next START_OF_LINE; ## NOTE: MUST in syntax.
262     }
263    
264     if (defined $u1->uri_fragment) {
265 wakaba 1.6 $u1->uri_fragment (undef);
266 wakaba 1.7 $onerror->(type => 'URL fragment not allowed',
267     level => $levels->{must},
268     line => $line_number, column => 1,
269 wakaba 1.1 index => 0, value => $p1);
270     ## NOTE: MUST in writing section.
271     }
272    
273     if (defined $u2->uri_fragment) {
274 wakaba 1.6 $u2->uri_fragment (undef);
275 wakaba 1.7 $onerror->(type => 'URL fragment not allowed',
276     level => $levels->{must},
277     line => $line_number, column => 1,
278 wakaba 1.1 index => 1, value => $p2);
279     ## NOTE: MUST in writing section.
280     }
281    
282     $u1 = $u1->get_absolute_reference ($_[3]);
283     $u2 = $u2->get_absolute_reference ($_[3]);
284    
285     if (exists $fallback_uris->{$u1->uri_reference}) {
286     $onerror->(type => 'duplicate oc namespace',
287 wakaba 1.7 level => $levels->{must},
288     line => $line_number, column => 1,
289 wakaba 1.1 index => 0, value => $u1->uri_reference);
290     next START_OF_LINE; ## NOTE: MUST in syntax.
291     }
292    
293 wakaba 1.4 unless ($same_origin->($u1)) {
294 wakaba 1.7 $onerror->(type => 'different origin from manifest',
295     level => $levels->{must},
296     line => $line_number, column => 1,
297 wakaba 1.1 index => 0, value => $u1->uri_reference);
298     next START_OF_LINE; ## NOTE: MUST in syntax.
299     }
300    
301     my $u2_scheme = $u2->uri_scheme;
302     unless (defined $u2_scheme and $u2_scheme eq $m_scheme) {
303     $onerror->(type => 'different scheme from manifest',
304 wakaba 1.7 level => $levels->{info},
305     line => $line_number, column => 1,
306 wakaba 1.1 index => 1, value => $u2->uri_reference);
307     next START_OF_LINE;
308     }
309    
310     $fallback_uris->{$u1->uri_reference} = $u2->uri_reference;
311     } elsif ($mode eq 'online whitelist') {
312 wakaba 1.8 if (@$tokens > 1) {
313     $onerror->(type => 'manifest:too many tokens',
314     level => $levels->{must},
315     line => $line_number, column => 1,
316     value => $tokens->[1]);
317     }
318    
319     my $uri = Message::DOM::DOMImplementation->create_uri_reference
320     ($tokens->[0]);
321 wakaba 1.1
322     unless ($uri->is_iri_reference_3987) {
323 wakaba 1.7 $onerror->(type => 'syntax error:iriref3987',
324     level => $levels->{must},
325     line => $line_number, column => 1,
326 wakaba 1.1 value => $line);
327     next START_OF_LINE; ## NOTE: MUST in syntax.
328     }
329    
330     $uri = $uri->get_absolute_reference ($_[3]);
331    
332     if (defined $uri->uri_fragment) {
333     $uri->uri_fragment (undef);
334 wakaba 1.7 $onerror->(type => 'URL fragment not allowed',
335     level => $levels->{must},
336     line => $line_number, column => 1,
337 wakaba 1.1 value => $line);
338     ## NOTE: MUST in writing section.
339     }
340    
341     my $scheme = $uri->uri_scheme;
342     unless (defined $scheme and $scheme eq $m_scheme) {
343     $onerror->(type => 'different scheme from manifest',
344 wakaba 1.7 level => $levels->{info},
345     line => $line_number, column => 1,
346 wakaba 1.1 value => $uri->uri_reference);
347     next START_OF_LINE;
348     }
349    
350     push @$online_whitelist_uris, $uri->uri_reference;
351 wakaba 1.8 } elsif ($mode eq 'unknown') {
352     ## NOTE: Do nothing.
353     ## NOTE: No informational message is thrown, since when the mode
354     ## is switched to the "unknown" state an error is raised.
355 wakaba 1.1 }
356    
357 wakaba 1.8 ## Step 28
358 wakaba 1.1 #next START_OF_LINE;
359     } # START_OF_LINE
360    
361 wakaba 1.8 ## Step 29
362 wakaba 1.3 return [$explicit_uris, $fallback_uris, $online_whitelist_uris,
363     $m_uri->uri_reference];
364 wakaba 1.1 } # _parse
365    
366 wakaba 1.7 sub check_manifest ($$$;$) {
367     my (undef, $manifest, $onerror, $levels) = @_;
368 wakaba 1.1
369     my $listed = {};
370 wakaba 1.7
371     $levels ||= $default_error_levels;
372 wakaba 1.1
373     require Whatpm::URIChecker;
374    
375 wakaba 1.3 my $i = 0;
376 wakaba 1.1 for my $uri (@{$manifest->[0]}) {
377     $listed->{$uri} = 1;
378    
379     Whatpm::URIChecker->check_iri_reference ($uri, sub {
380 wakaba 1.7 $onerror->(value => $uri, @_, index => $i);
381 wakaba 1.1 });
382 wakaba 1.3
383     ## ISSUE: Literal equivalence, right?
384     if ($uri eq $manifest->[3]) {
385 wakaba 1.7 $onerror->(level => $levels->{must}, value => $uri,
386 wakaba 1.3 index => $i,
387 wakaba 1.7 type => 'same as manifest URL');
388 wakaba 1.3 }
389    
390     $i++;
391 wakaba 1.1 }
392    
393 wakaba 1.3 for my $uri1 (sort {$a cmp $b} keys %{$manifest->[1]}) {
394     Whatpm::URIChecker->check_iri_reference ($uri1, sub {
395 wakaba 1.7 $onerror->(value => $uri1, @_, index => 0);
396 wakaba 1.1 });
397    
398 wakaba 1.3 if ($uri1 eq $manifest->[3]) {
399 wakaba 1.7 $onerror->(level => $levels->{must}, value => $uri1,
400 wakaba 1.3 index => $i,
401 wakaba 1.7 type => 'same as manifest URL');
402 wakaba 1.3 }
403    
404     $i++;
405    
406     my $uri2 = $manifest->[1]->{$uri1};
407     $listed->{$uri2} = 1;
408    
409     Whatpm::URIChecker->check_iri_reference ($uri2, sub {
410 wakaba 1.7 $onerror->(value => $uri2, @_, index => 1);
411 wakaba 1.1 });
412 wakaba 1.3
413     if ($uri2 eq $manifest->[3]) {
414 wakaba 1.7 $onerror->(level => $levels->{must}, value => $uri2,
415 wakaba 1.3 index => $i,
416 wakaba 1.7 type => 'same as manifest URL');
417 wakaba 1.3 }
418    
419     $i++;
420 wakaba 1.1 }
421    
422     for my $uri (@{$manifest->[2]}) {
423     if ($listed->{$uri}) {
424     $onerror->(type => 'both in entries and whitelist',
425 wakaba 1.3 index => $i,
426 wakaba 1.7 level => $levels->{must}, value => $uri);
427 wakaba 1.1 ## NOTE: MUST in writing section.
428     }
429    
430     Whatpm::URIChecker->check_iri_reference ($uri, sub {
431 wakaba 1.7 $onerror->(value => $uri, @_, index => $i);
432 wakaba 1.1 });
433 wakaba 1.3
434     if ($uri eq $manifest->[3]) {
435 wakaba 1.7 $onerror->(level => $levels->{must}, value => $uri,
436 wakaba 1.3 index => $i,
437 wakaba 1.7 type => 'same as manifest URL');
438 wakaba 1.3 }
439    
440     $i++;
441 wakaba 1.1 }
442     } # check_manifest
443    
444    
445     =head1 LICENSE
446    
447 wakaba 1.3 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
448 wakaba 1.1
449     This library is free software; you can redistribute it
450     and/or modify it under the same terms as Perl itself.
451    
452     =cut
453    
454     1;
455 wakaba 1.8 # $Date: 2008/08/16 07:35:22 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24