/[suikacvs]/markup/html/whatpm/Whatpm/IMTChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/IMTChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (hide annotations) (download)
Fri Aug 29 13:34:36 2008 UTC (16 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.3: +108 -42 lines
++ whatpm/Whatpm/ChangeLog	29 Aug 2008 13:33:31 -0000
2008-08-29  Wakaba  <wakaba@suika.fam.cx>

	* IMTChecker.pm: Updated for the new error reporting architecture.

	* ContentChecker.pm: Error levels for IMTs are added.

++ whatpm/Whatpm/ContentChecker/ChangeLog	29 Aug 2008 13:34:24 -0000
2008-08-29  Wakaba  <wakaba@suika.fam.cx>

	* Atom.pm, HTML.pm: Made {level} inherited to the IMT checker.

1 wakaba 1.1 package Whatpm::IMTChecker;
2     use strict;
3    
4 wakaba 1.4 ## NOTE: RFC 2046 sucks, it is a poorly written specification such that
5     ## what we should do is not entirely clear and it does define almost nothing
6     ## from the today's viewpoint... Suprisingly, it's even worse than
7     ## RFC 1521, the previous version of that specification, which does
8     ## contain BNF rules for parameter values at least.
9    
10     my $default_error_levels = {
11     must => 'm',
12     warn => 'w',
13     info => 'i',
14     uncertain => 'u',
15    
16     mime_must => 'm', # lowercase "must"
17     mime_fact => 'm',
18     mime_strongly_discouraged => 'w',
19     mime_discouraged => 'w',
20     };
21 wakaba 1.1
22     our $Type;
23 wakaba 1.2
24     my $application_xml_charset = { ## TODO: ...
25     syntax => 'token',
26 wakaba 1.3 registered => 1,
27 wakaba 1.2 };
28    
29     $Type->{application}->{registered} = 1;
30    
31 wakaba 1.3 $Type->{application}->{subtype}->{'atom+xml'} = { ## NOTE: RFC 4287
32     parameter => {
33     type => { ## NOTE: RFC 5023
34     ## TODO: "entry"|"feed" (case-insensitive)
35     registered => 1,
36     ## NOTE: SHOULD for Atom Entry Document.
37     },
38     },
39     registered => 1,
40     };
41    
42 wakaba 1.2 $Type->{application}->{subtype}->{'rdf+xml'} = { # RFC 3870
43     parameter => {
44     charset => $application_xml_charset,
45     },
46     registered => 1,
47     ## RECOMMENDED that an RDF document follows new RDF/XML spec
48     ## rather than 1999 spec - this is not testable in this layer.
49     };
50    
51     $Type->{application}->{subtype}->{'rss+xml'} = {
52     parameter => {
53     },
54     ## NOTE: Not registered
55     };
56    
57     $Type->{audio}->{registered} = 1;
58    
59     $Type->{image}->{registered} = 1;
60    
61     $Type->{message}->{registered} = 1;
62    
63     $Type->{model}->{registered} = 1;
64    
65     $Type->{multipart}->{registered} = 1;
66    
67     $Type->{text}->{registered} = 1;
68    
69 wakaba 1.1 $Type->{text}->{subtype}->{plain} = {
70     parameter => {
71 wakaba 1.3 charset => {syntax => 'token', registered => 1}, # RFC 2046 ## TODO: registered?
72     'charset-edition' => {registered => 1}, # RFC 1922
73     'charset-extension' => {syntax => 'token', registered => 1}, # RFC 1922 ## TODO: registered?
74 wakaba 1.1 },
75 wakaba 1.2 registered => 1,
76 wakaba 1.1 };
77     $Type->{text}->{subtype}->{html} = { # RFC 2854
78     parameter => {
79 wakaba 1.3 charset => {registered => 1}, ## TODO: UTF-8 is preferred ## TODO: strongly recommended that it always be present ## NOTE: Syntax and range are not defined.
80 wakaba 1.4 level => {obsolete => 'mime_fact'}, # RFC 1866
81     version => {obsolete => 'mime_fact'}, # HTML 3.0
82 wakaba 1.1 },
83 wakaba 1.2 registered => 1,
84 wakaba 1.1 };
85     $Type->{text}->{subtype}->{css} = { # RFC 2318
86     parameter => {
87 wakaba 1.3 charset => {registered => 1}, ## TODO: US-ASCII, iso-8859-X, utf-8 are recommended ## TODO: Any charset that is a superset of US-ASCII may be used ## NOTE: Syntax and range are not defined.
88 wakaba 1.1 },
89 wakaba 1.2 registered => 1,
90 wakaba 1.1 };
91     $Type->{text}->{subtype}->{javascript} = { # RFC 4329
92     parameter => {
93 wakaba 1.3 charset => {syntax => 'mime-charset', registered => 1}, ## TODO: SHOULD be registered
94 wakaba 1.1 e4x => {checker => sub { # HTML5 (but informative?)
95 wakaba 1.4 my ($self, $value, $onerror) = @_;
96 wakaba 1.1 unless ($value eq '1') {
97 wakaba 1.4 $onerror->(type => 'e4x:syntax error',
98     level => $self->{level}->{info},
99     value => $value);
100 wakaba 1.2 ## NOTE: Whether values other than "1" is non-conformant
101     ## or not is not defined actually...
102 wakaba 1.1 }
103     }},
104     },
105     obsolete => 1,
106 wakaba 1.2 registered => 1,
107 wakaba 1.1 };
108     $Type->{text}->{subtype}->{ecmascript} = { # RFC 4329
109     parameter => {
110     charset => $Type->{text}->{subtype}->{javascript}->{parameter}->{charset},
111     },
112 wakaba 1.2 registered => 1,
113 wakaba 1.1 };
114     $Type->{audio}->{subtype}->{mpeg} = { # RFC 3003
115 wakaba 1.2 registered => 1,
116 wakaba 1.1 };
117     my $CodecsParameter = { # RFC 4281
118     ## TODO: syntax and value check
119 wakaba 1.3 registered => 1,
120 wakaba 1.1 };
121     $Type->{audio}->{subtype}->{'3gpp'} = {
122     parameter => {
123     codecs => $CodecsParameter, # RFC 4281
124     },
125 wakaba 1.2 registered => 1,
126 wakaba 1.1 };
127 wakaba 1.2
128     $Type->{video}->{registered} = 1;
129    
130 wakaba 1.1 $Type->{video}->{subtype}->{'3gpp'} = {
131     parameter => {
132     codecs => $CodecsParameter, # RFC 4281
133     },
134 wakaba 1.2 registered => 1,
135 wakaba 1.1 };
136     $Type->{audio}->{subtype}->{'3gpp2'} = { # RFC 4393
137     parameter => {
138     codecs => $CodecsParameter, # RFC 4393 -> RFC 4281
139     },
140 wakaba 1.2 registered => 1,
141 wakaba 1.1 };
142     $Type->{video}->{subtype}->{'3gpp2'} = { # RFC 4393
143     parameter => {
144     codecs => $CodecsParameter, # RFC 4393 -> RFC 4281
145     },
146 wakaba 1.2 registered => 1,
147 wakaba 1.1 };
148     $Type->{application}->{subtype}->{'octet-stream'} = {
149     parameter => {
150 wakaba 1.4 conversions => {obsolete => 'mime_fact',
151     registered => 1}, # RFC 1341 ## TODO: syntax
152     name => {obsolete => 'mime_fact', registered => 1}, # RFC 1341
153 wakaba 1.3 padding => {registered => 1}, # RFC 2046
154     type => {registered => 1}, # RFC 2046
155 wakaba 1.1 },
156 wakaba 1.2 registered => 1,
157 wakaba 1.1 };
158     $Type->{application}->{subtype}->{javascript} = { # RFC 4329
159     parameter => {
160     charset => $Type->{text}->{subtype}->{javascript}->{parameter}->{charset},
161     },
162 wakaba 1.2 registered => 1,
163 wakaba 1.1 };
164     $Type->{application}->{subtype}->{ecmascript} = { # RFC 4329
165     parameter => {
166     charset => $Type->{text}->{subtype}->{ecmascript}->{parameter}->{charset},
167     },
168 wakaba 1.2 registered => 1,
169 wakaba 1.1 };
170     $Type->{multipart}->{parameter}->{boundary} = {
171     checker => sub {
172 wakaba 1.4 my ($self, $value, $onerror) = @_;
173 wakaba 1.1 if ($value !~ /\A[0-9A-Za-z'()+_,.\x2F:=?-]{0,69}[0-9A-Za-z'()+_,.\x2F:=?\x20-]\z/) {
174 wakaba 1.4 $onerror->(type => 'boundary:syntax error',
175     level => $self->{level}->{mime_fact}, # TODO: correct?
176     value => $value);
177 wakaba 1.1 }
178     },
179     required => 1,
180 wakaba 1.3 registered => 1,
181 wakaba 1.1 };
182     $Type->{message}->{subtype}->{partial} = {
183     parameter => {
184 wakaba 1.3 id => {required => 1, registered => 1}, # RFC 2046
185     number => {required => 1, registered => 1}, # RFC 2046
186     total => {registered => 1}, # RFC 2046 # required for the last fragment
187 wakaba 1.1 },
188 wakaba 1.2 registered => 1,
189 wakaba 1.1 };
190     $Type->{message}->{subtype}->{'external-body'} = {
191     parameter => {
192     'access-type' => {
193     required => 1,
194     syntax => 'token', ## TODO: registry?
195 wakaba 1.3 registered => 1,
196 wakaba 1.1 }, # RFC 2046
197 wakaba 1.3 expiration => {syntax => 'MIME date-time', registered => 1}, # RFC 2046
198     permission => {registered => 1}, # RFC 2046
199     size => {registered => 1}, # RFC 2046
200 wakaba 1.1 ## TODO: access-type dependent parameters
201     },
202 wakaba 1.2 registered => 1,
203 wakaba 1.1 };
204    
205 wakaba 1.4 sub new ($) {
206     my $self = bless {}, shift;
207    
208     $self->{level} = $default_error_levels;
209    
210     return $self;
211     } # new
212 wakaba 1.3
213 wakaba 1.1 sub check_imt ($$$$@) {
214 wakaba 1.4 my $self = ref $_[0] ? shift : shift->new;
215     my ($onerror, $type, $subtype, @parameter) = @_;
216 wakaba 1.1
217 wakaba 1.3 require Message::IMT::InternetMediaType;
218     my $dom = Message::DOM::DOMImplementation->new;
219 wakaba 1.1
220     my $imt = $dom->create_internet_media_type ($type, $subtype);
221     while (@parameter) {
222     $imt->add_parameter (shift @parameter => shift @parameter);
223     ## NOTE: Attribute duplication are not error, though its semantics
224     ## is not defined.
225     ## See <http://suika.fam.cx/gate/2005/sw/%E5%AA%92%E4%BD%93%E5%9E%8B/%E5%BC%95%E6%95%B0>.
226     }
227    
228     my $type = $imt->top_level_type;
229     my $subtype = $imt->subtype;
230    
231 wakaba 1.3 ## NOTE: RFC 2045 (MIME), RFC 2616 (HTTP/1.1), and RFC 4288 (IMT
232     ## registration) have different requirements on type and subtype names.
233     if ($type !~ /\A[A-Za-z0-9!#\$&.+^_-]{1,127}\z/) {
234 wakaba 1.4 $onerror->(type => 'IMT:type syntax error',
235     level => $self->{level}->{must}, # RFC 4288 4.2.
236     value => $type);
237 wakaba 1.3 }
238     if ($subtype !~ /\A[A-Za-z0-9!#\$&.+^_-]{1,127}\z/) {
239 wakaba 1.4 $onerror->(type => 'IMT:subtype syntax error',
240     level => $self->{level}->{must}, # RFC 4288 4.2.
241     value => $subtype);
242 wakaba 1.3 }
243    
244 wakaba 1.1 my $type_def = $Type->{$type};
245     my $has_param;
246 wakaba 1.2
247 wakaba 1.3 if ($type =~ /^x-/) {
248 wakaba 1.4 $onerror->(type => 'IMT:private type',
249     level => $self->{level}->{mime_strongly_discouraged},
250     value => $type); # RFC 2046 6.
251     ## NOTE: "discouraged" in RFC 4288 3.4.
252 wakaba 1.3 } elsif (not $type_def or not $type_def->{registered}) {
253     #} elsif ($type_def and not $type_def->{registered}) {
254     ## NOTE: Top-level type is seldom added.
255    
256     ## NOTE: RFC 2046 6. "Any format without a rigorous and public
257     ## definition must be named with an "X-" prefix" (strictly, this
258     ## is not an author requirement, but a requirement for media
259     ## type specfication author and it does not restrict use of
260     ## unregistered value).
261 wakaba 1.4 $onerror->(type => 'IMT:unregistered type',
262     level => $self->{level}->{mime_must},
263     value => $type);
264 wakaba 1.2 }
265    
266 wakaba 1.1 if ($type_def) {
267     my $subtype_def = $type_def->{subtype}->{$subtype};
268 wakaba 1.2
269     if ($subtype =~ /^x[-\.]/) {
270 wakaba 1.4 $onerror->(type => 'IMT:private subtype',
271     level => $self->{level}->{mime_discouraged},
272     value => $type . '/' . $subtype);
273     ## NOTE: "x." and "x-" are discouraged in RFC 4288 3.4.
274 wakaba 1.2 } elsif ($subtype_def and not $subtype_def->{registered}) {
275 wakaba 1.3 ## NOTE: RFC 2046 6. "Any format without a rigorous and public
276     ## definition must be named with an "X-" prefix" (strictly, this
277     ## is not an author requirement, but a requirement for media
278     ## type specfication author and it does not restrict use of
279     ## unregistered value).
280 wakaba 1.4 $onerror->(type => 'IMT:unregistered subtype',
281     level => $self->{level}->{mime_must},
282     value => $type . '/' . $subtype);
283 wakaba 1.2 }
284    
285 wakaba 1.1 if ($subtype_def) {
286 wakaba 1.4 ## NOTE: Semantics (including its relationship between conformance)
287     ## is not defined for the "intended usage" keywords of the IMT
288     ## registration template.
289 wakaba 1.3 if ($subtype_def->{obsolete}) {
290 wakaba 1.4 $onerror->(type => 'IMT:obsolete subtype',
291     level => $self->{level}->{warn},
292     value => $type . '/' . $subtype);
293     } elsif ($subtype_def->{limited_use}) {
294     $onerror->(type => 'IMT:limited use subtype',
295     level => $self->{level}->{warn},
296     value => $type . '/' . $subtype);
297 wakaba 1.3 }
298    
299 wakaba 1.1 for (0..$imt->parameter_length-1) {
300     my $attr = $imt->get_attribute ($_);
301     my $value = $imt->get_value ($_);
302 wakaba 1.3
303     if ($attr !~ /\A[A-Za-z0-9!#\$&.+^_-]{1,127}\z/) {
304 wakaba 1.4 $onerror->(type => 'IMT:attribute syntax error',
305     level => $self->{level}->{mime_fact}, # RFC 4288 4.3.
306     value => $attr);
307 wakaba 1.3 }
308    
309 wakaba 1.1 $has_param->{$attr} = 1;
310     my $param_def = $subtype_def->{parameter}->{$attr}
311     || $type_def->{parameter}->{$attr};
312     if ($param_def) {
313     if (defined $param_def->{syntax}) {
314     if ($param_def->{syntax} eq 'mime-charset') { # RFC 2978
315 wakaba 1.3 ## TODO: ...
316 wakaba 1.1 if ($value =~ /[^A-Za-z0-9!#\x23%&'+^_`{}~-]/) {
317     $onerror->(type => 'value syntax error:'.$attr, level => 'm');
318     }
319     } elsif ($param_def->{syntax} eq 'token') { # RFC 2046
320 wakaba 1.3 ## TODO: ...
321 wakaba 1.1 if ($value =~ /[^\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]/) {
322     $onerror->(type => 'value syntax error:'.$attr, level => 'm');
323     }
324     }
325     ## TODO: syntax |MIME date-time|
326 wakaba 1.4 } elsif ($param_def->{checker}) {
327     $param_def->{checker}->($self, $value, $onerror);
328     }
329    
330     if ($param_def->{obsolete}) {
331     $onerror->(type => 'IMT:obsolete parameter',
332     level => $self->{level}->{$param_def->{obsolete}},
333     value => $attr);
334     ## NOTE: The value of |$param_def->{obsolete}|, if it has a
335     ## true value, must be "mime_fact", which represents that
336     ## the parameter is defined in a previous version of the MIME
337     ## specification (or a related specification) and then
338     ## removed or marked as obsolete such that it seems that use of
339     ## that parameter is made non-conforming without using any
340     ## explicit statement on that fact.
341 wakaba 1.1 }
342 wakaba 1.3 }
343     if (not $param_def or not $param_def->{registered}) {
344     if ($subtype =~ /\./ or $subtype =~ /^x-/ or $type =~ /^x-/) {
345     ## NOTE: The parameter names SHOULD be fully specified for
346     ## personal or vendor tree subtype [RFC 4288]. Therefore, there
347     ## might be unknown parameters and still conforming.
348 wakaba 1.4 $onerror->(type => 'IMT:unknown parameter',
349     level => $self->{level}->{uncertain},
350     value => $attr);
351 wakaba 1.3 } else {
352     ## NOTE: The parameter names MUST be fully specified for
353     ## standard tree. Therefore, unknown parameter is non-conforming,
354     ## unless it is standardized later.
355 wakaba 1.4 $onerror->(type => 'IMT:parameter not allowed',
356     level => $self->{level}->{mime_fact},
357     value => $attr);
358 wakaba 1.3 }
359 wakaba 1.1 }
360     }
361    
362     for (keys %{$subtype_def->{parameter} or {}}) {
363     if ($subtype_def->{parameter}->{$_}->{required} and
364     not $has_param->{$_}) {
365 wakaba 1.4 $onerror->(type => 'IMT:parameter missing',
366     level => $self->{level}->{mime_fact},
367     text => $_,
368     value => $type . '/' . $subtype);
369 wakaba 1.1 }
370     }
371     } else {
372 wakaba 1.4 ## NOTE: Since subtypes are frequently added to the IANAREG and such
373     ## that our database might be out-of-date, we don't raise an error
374     ## for an unknown subtype, instead we report an "uncertain" status.
375     $onerror->(type => 'IMT:unknown subtype',
376     level => $self->{level}->{uncertain},
377     value => $type . '/' . $subtype);
378 wakaba 1.1 }
379    
380     for (keys %{$type_def->{parameter} or {}}) {
381     if ($type_def->{parameter}->{$_}->{required} and
382     not $has_param->{$_}) {
383 wakaba 1.4 $onerror->(type => 'IMT:parameter missing',
384     level => $self->{level}->{mime_fact},
385     text => $_,
386     value => $type . '/' . $subtype);
387 wakaba 1.1 }
388     }
389     }
390     } # check_imt
391    
392     1;
393 wakaba 1.4 ## $Date: 2007/11/23 14:47:49 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24