/[suikacvs]/markup/html/whatpm/Whatpm/IMTChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/IMTChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (hide annotations) (download)
Fri Dec 12 05:05:20 2008 UTC (15 years, 10 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.4: +25 -1 lines
++ whatpm/t/ChangeLog	12 Dec 2008 05:03:53 -0000
2008-12-12  Wakaba  <wakaba@suika.fam.cx>

	* ContentChecker.t: Added new test data file.

++ whatpm/t/dom-conformance/ChangeLog	12 Dec 2008 05:04:54 -0000
2008-12-12  Wakaba  <wakaba@suika.fam.cx>

	* html-form-input-1.dat, html-forms-1.dat: Added test data for
	@accept and @pattern.

	* html-form-textarea.dat: New test data file.

++ whatpm/Whatpm/ChangeLog	12 Dec 2008 05:00:26 -0000
2008-12-12  Wakaba  <wakaba@suika.fam.cx>

	* IMTChecker.pm: Added more definitions for subtypes.

++ whatpm/Whatpm/ContentChecker/ChangeLog	12 Dec 2008 05:03:26 -0000
2008-12-12  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm: Implemented <input accept> and <form accept>.  Raise a
	SHOULD-level error if @pattern but no @title.

1 wakaba 1.1 package Whatpm::IMTChecker;
2     use strict;
3    
4 wakaba 1.4 ## NOTE: RFC 2046 sucks, it is a poorly written specification such that
5     ## what we should do is not entirely clear and it does define almost nothing
6     ## from the today's viewpoint... Suprisingly, it's even worse than
7     ## RFC 1521, the previous version of that specification, which does
8     ## contain BNF rules for parameter values at least.
9    
10     my $default_error_levels = {
11     must => 'm',
12     warn => 'w',
13     info => 'i',
14     uncertain => 'u',
15    
16     mime_must => 'm', # lowercase "must"
17     mime_fact => 'm',
18     mime_strongly_discouraged => 'w',
19     mime_discouraged => 'w',
20     };
21 wakaba 1.1
22     our $Type;
23 wakaba 1.2
24     my $application_xml_charset = { ## TODO: ...
25     syntax => 'token',
26 wakaba 1.3 registered => 1,
27 wakaba 1.2 };
28    
29     $Type->{application}->{registered} = 1;
30    
31 wakaba 1.3 $Type->{application}->{subtype}->{'atom+xml'} = { ## NOTE: RFC 4287
32     parameter => {
33     type => { ## NOTE: RFC 5023
34     ## TODO: "entry"|"feed" (case-insensitive)
35     registered => 1,
36     ## NOTE: SHOULD for Atom Entry Document.
37     },
38     },
39     registered => 1,
40     };
41    
42 wakaba 1.2 $Type->{application}->{subtype}->{'rdf+xml'} = { # RFC 3870
43     parameter => {
44     charset => $application_xml_charset,
45     },
46     registered => 1,
47     ## RECOMMENDED that an RDF document follows new RDF/XML spec
48     ## rather than 1999 spec - this is not testable in this layer.
49     };
50    
51     $Type->{application}->{subtype}->{'rss+xml'} = {
52     parameter => {
53     },
54     ## NOTE: Not registered
55     };
56    
57 wakaba 1.5 $Type->{application}->{subtype}->{xml} = { ## TODO: check IANAREG
58     registered => 1,
59     };
60    
61 wakaba 1.2 $Type->{audio}->{registered} = 1;
62    
63 wakaba 1.5 $Type->{audio}->{subtype}->{basic} = { ## TODO: check IANAREG
64     registered => 1,
65     };
66    
67     $Type->{audio}->{subtype}->{mpeg} = { ## TODO: check IANAREG
68     registered => 1,
69     };
70    
71 wakaba 1.2 $Type->{image}->{registered} = 1;
72    
73 wakaba 1.5 $Type->{image}->{subtype}->{jpeg} = { ## TODO: check IANAREG
74     registered => 1,
75     };
76    
77     $Type->{image}->{subtype}->{png} = { ## TODO: check IANAREG
78     registered => 1,
79     };
80    
81     $Type->{image}->{subtype}->{'svg+xml'} = { ## TODO: check IANAREG
82     registered => 1,
83     };
84    
85 wakaba 1.2 $Type->{message}->{registered} = 1;
86    
87     $Type->{model}->{registered} = 1;
88    
89     $Type->{multipart}->{registered} = 1;
90    
91     $Type->{text}->{registered} = 1;
92    
93 wakaba 1.1 $Type->{text}->{subtype}->{plain} = {
94     parameter => {
95 wakaba 1.3 charset => {syntax => 'token', registered => 1}, # RFC 2046 ## TODO: registered?
96     'charset-edition' => {registered => 1}, # RFC 1922
97     'charset-extension' => {syntax => 'token', registered => 1}, # RFC 1922 ## TODO: registered?
98 wakaba 1.1 },
99 wakaba 1.2 registered => 1,
100 wakaba 1.1 };
101     $Type->{text}->{subtype}->{html} = { # RFC 2854
102     parameter => {
103 wakaba 1.3 charset => {registered => 1}, ## TODO: UTF-8 is preferred ## TODO: strongly recommended that it always be present ## NOTE: Syntax and range are not defined.
104 wakaba 1.4 level => {obsolete => 'mime_fact'}, # RFC 1866
105     version => {obsolete => 'mime_fact'}, # HTML 3.0
106 wakaba 1.1 },
107 wakaba 1.2 registered => 1,
108 wakaba 1.1 };
109     $Type->{text}->{subtype}->{css} = { # RFC 2318
110     parameter => {
111 wakaba 1.3 charset => {registered => 1}, ## TODO: US-ASCII, iso-8859-X, utf-8 are recommended ## TODO: Any charset that is a superset of US-ASCII may be used ## NOTE: Syntax and range are not defined.
112 wakaba 1.1 },
113 wakaba 1.2 registered => 1,
114 wakaba 1.1 };
115     $Type->{text}->{subtype}->{javascript} = { # RFC 4329
116     parameter => {
117 wakaba 1.3 charset => {syntax => 'mime-charset', registered => 1}, ## TODO: SHOULD be registered
118 wakaba 1.1 e4x => {checker => sub { # HTML5 (but informative?)
119 wakaba 1.4 my ($self, $value, $onerror) = @_;
120 wakaba 1.1 unless ($value eq '1') {
121 wakaba 1.4 $onerror->(type => 'e4x:syntax error',
122     level => $self->{level}->{info},
123     value => $value);
124 wakaba 1.2 ## NOTE: Whether values other than "1" is non-conformant
125     ## or not is not defined actually...
126 wakaba 1.1 }
127     }},
128     },
129     obsolete => 1,
130 wakaba 1.2 registered => 1,
131 wakaba 1.1 };
132     $Type->{text}->{subtype}->{ecmascript} = { # RFC 4329
133     parameter => {
134     charset => $Type->{text}->{subtype}->{javascript}->{parameter}->{charset},
135     },
136 wakaba 1.2 registered => 1,
137 wakaba 1.1 };
138     $Type->{audio}->{subtype}->{mpeg} = { # RFC 3003
139 wakaba 1.2 registered => 1,
140 wakaba 1.1 };
141     my $CodecsParameter = { # RFC 4281
142     ## TODO: syntax and value check
143 wakaba 1.3 registered => 1,
144 wakaba 1.1 };
145     $Type->{audio}->{subtype}->{'3gpp'} = {
146     parameter => {
147     codecs => $CodecsParameter, # RFC 4281
148     },
149 wakaba 1.2 registered => 1,
150 wakaba 1.1 };
151 wakaba 1.2
152     $Type->{video}->{registered} = 1;
153    
154 wakaba 1.1 $Type->{video}->{subtype}->{'3gpp'} = {
155     parameter => {
156     codecs => $CodecsParameter, # RFC 4281
157     },
158 wakaba 1.2 registered => 1,
159 wakaba 1.1 };
160     $Type->{audio}->{subtype}->{'3gpp2'} = { # RFC 4393
161     parameter => {
162     codecs => $CodecsParameter, # RFC 4393 -> RFC 4281
163     },
164 wakaba 1.2 registered => 1,
165 wakaba 1.1 };
166     $Type->{video}->{subtype}->{'3gpp2'} = { # RFC 4393
167     parameter => {
168     codecs => $CodecsParameter, # RFC 4393 -> RFC 4281
169     },
170 wakaba 1.2 registered => 1,
171 wakaba 1.1 };
172     $Type->{application}->{subtype}->{'octet-stream'} = {
173     parameter => {
174 wakaba 1.4 conversions => {obsolete => 'mime_fact',
175     registered => 1}, # RFC 1341 ## TODO: syntax
176     name => {obsolete => 'mime_fact', registered => 1}, # RFC 1341
177 wakaba 1.3 padding => {registered => 1}, # RFC 2046
178     type => {registered => 1}, # RFC 2046
179 wakaba 1.1 },
180 wakaba 1.2 registered => 1,
181 wakaba 1.1 };
182     $Type->{application}->{subtype}->{javascript} = { # RFC 4329
183     parameter => {
184     charset => $Type->{text}->{subtype}->{javascript}->{parameter}->{charset},
185     },
186 wakaba 1.2 registered => 1,
187 wakaba 1.1 };
188     $Type->{application}->{subtype}->{ecmascript} = { # RFC 4329
189     parameter => {
190     charset => $Type->{text}->{subtype}->{ecmascript}->{parameter}->{charset},
191     },
192 wakaba 1.2 registered => 1,
193 wakaba 1.1 };
194     $Type->{multipart}->{parameter}->{boundary} = {
195     checker => sub {
196 wakaba 1.4 my ($self, $value, $onerror) = @_;
197 wakaba 1.1 if ($value !~ /\A[0-9A-Za-z'()+_,.\x2F:=?-]{0,69}[0-9A-Za-z'()+_,.\x2F:=?\x20-]\z/) {
198 wakaba 1.4 $onerror->(type => 'boundary:syntax error',
199     level => $self->{level}->{mime_fact}, # TODO: correct?
200     value => $value);
201 wakaba 1.1 }
202     },
203     required => 1,
204 wakaba 1.3 registered => 1,
205 wakaba 1.1 };
206     $Type->{message}->{subtype}->{partial} = {
207     parameter => {
208 wakaba 1.3 id => {required => 1, registered => 1}, # RFC 2046
209     number => {required => 1, registered => 1}, # RFC 2046
210     total => {registered => 1}, # RFC 2046 # required for the last fragment
211 wakaba 1.1 },
212 wakaba 1.2 registered => 1,
213 wakaba 1.1 };
214     $Type->{message}->{subtype}->{'external-body'} = {
215     parameter => {
216     'access-type' => {
217     required => 1,
218     syntax => 'token', ## TODO: registry?
219 wakaba 1.3 registered => 1,
220 wakaba 1.1 }, # RFC 2046
221 wakaba 1.3 expiration => {syntax => 'MIME date-time', registered => 1}, # RFC 2046
222     permission => {registered => 1}, # RFC 2046
223     size => {registered => 1}, # RFC 2046
224 wakaba 1.1 ## TODO: access-type dependent parameters
225     },
226 wakaba 1.2 registered => 1,
227 wakaba 1.1 };
228    
229 wakaba 1.4 sub new ($) {
230     my $self = bless {}, shift;
231    
232     $self->{level} = $default_error_levels;
233    
234     return $self;
235     } # new
236 wakaba 1.3
237 wakaba 1.1 sub check_imt ($$$$@) {
238 wakaba 1.4 my $self = ref $_[0] ? shift : shift->new;
239     my ($onerror, $type, $subtype, @parameter) = @_;
240 wakaba 1.1
241 wakaba 1.3 require Message::IMT::InternetMediaType;
242     my $dom = Message::DOM::DOMImplementation->new;
243 wakaba 1.1
244     my $imt = $dom->create_internet_media_type ($type, $subtype);
245     while (@parameter) {
246     $imt->add_parameter (shift @parameter => shift @parameter);
247     ## NOTE: Attribute duplication are not error, though its semantics
248     ## is not defined.
249     ## See <http://suika.fam.cx/gate/2005/sw/%E5%AA%92%E4%BD%93%E5%9E%8B/%E5%BC%95%E6%95%B0>.
250     }
251    
252     my $type = $imt->top_level_type;
253     my $subtype = $imt->subtype;
254    
255 wakaba 1.3 ## NOTE: RFC 2045 (MIME), RFC 2616 (HTTP/1.1), and RFC 4288 (IMT
256     ## registration) have different requirements on type and subtype names.
257     if ($type !~ /\A[A-Za-z0-9!#\$&.+^_-]{1,127}\z/) {
258 wakaba 1.4 $onerror->(type => 'IMT:type syntax error',
259     level => $self->{level}->{must}, # RFC 4288 4.2.
260     value => $type);
261 wakaba 1.3 }
262     if ($subtype !~ /\A[A-Za-z0-9!#\$&.+^_-]{1,127}\z/) {
263 wakaba 1.4 $onerror->(type => 'IMT:subtype syntax error',
264     level => $self->{level}->{must}, # RFC 4288 4.2.
265     value => $subtype);
266 wakaba 1.3 }
267    
268 wakaba 1.1 my $type_def = $Type->{$type};
269     my $has_param;
270 wakaba 1.2
271 wakaba 1.3 if ($type =~ /^x-/) {
272 wakaba 1.4 $onerror->(type => 'IMT:private type',
273     level => $self->{level}->{mime_strongly_discouraged},
274     value => $type); # RFC 2046 6.
275     ## NOTE: "discouraged" in RFC 4288 3.4.
276 wakaba 1.3 } elsif (not $type_def or not $type_def->{registered}) {
277     #} elsif ($type_def and not $type_def->{registered}) {
278     ## NOTE: Top-level type is seldom added.
279    
280     ## NOTE: RFC 2046 6. "Any format without a rigorous and public
281     ## definition must be named with an "X-" prefix" (strictly, this
282     ## is not an author requirement, but a requirement for media
283     ## type specfication author and it does not restrict use of
284     ## unregistered value).
285 wakaba 1.4 $onerror->(type => 'IMT:unregistered type',
286     level => $self->{level}->{mime_must},
287     value => $type);
288 wakaba 1.2 }
289    
290 wakaba 1.1 if ($type_def) {
291     my $subtype_def = $type_def->{subtype}->{$subtype};
292 wakaba 1.2
293     if ($subtype =~ /^x[-\.]/) {
294 wakaba 1.4 $onerror->(type => 'IMT:private subtype',
295     level => $self->{level}->{mime_discouraged},
296     value => $type . '/' . $subtype);
297     ## NOTE: "x." and "x-" are discouraged in RFC 4288 3.4.
298 wakaba 1.2 } elsif ($subtype_def and not $subtype_def->{registered}) {
299 wakaba 1.3 ## NOTE: RFC 2046 6. "Any format without a rigorous and public
300     ## definition must be named with an "X-" prefix" (strictly, this
301     ## is not an author requirement, but a requirement for media
302     ## type specfication author and it does not restrict use of
303     ## unregistered value).
304 wakaba 1.4 $onerror->(type => 'IMT:unregistered subtype',
305     level => $self->{level}->{mime_must},
306     value => $type . '/' . $subtype);
307 wakaba 1.2 }
308    
309 wakaba 1.1 if ($subtype_def) {
310 wakaba 1.4 ## NOTE: Semantics (including its relationship between conformance)
311     ## is not defined for the "intended usage" keywords of the IMT
312     ## registration template.
313 wakaba 1.3 if ($subtype_def->{obsolete}) {
314 wakaba 1.4 $onerror->(type => 'IMT:obsolete subtype',
315     level => $self->{level}->{warn},
316     value => $type . '/' . $subtype);
317     } elsif ($subtype_def->{limited_use}) {
318     $onerror->(type => 'IMT:limited use subtype',
319     level => $self->{level}->{warn},
320     value => $type . '/' . $subtype);
321 wakaba 1.3 }
322    
323 wakaba 1.1 for (0..$imt->parameter_length-1) {
324     my $attr = $imt->get_attribute ($_);
325     my $value = $imt->get_value ($_);
326 wakaba 1.3
327     if ($attr !~ /\A[A-Za-z0-9!#\$&.+^_-]{1,127}\z/) {
328 wakaba 1.4 $onerror->(type => 'IMT:attribute syntax error',
329     level => $self->{level}->{mime_fact}, # RFC 4288 4.3.
330     value => $attr);
331 wakaba 1.3 }
332    
333 wakaba 1.1 $has_param->{$attr} = 1;
334     my $param_def = $subtype_def->{parameter}->{$attr}
335     || $type_def->{parameter}->{$attr};
336     if ($param_def) {
337     if (defined $param_def->{syntax}) {
338     if ($param_def->{syntax} eq 'mime-charset') { # RFC 2978
339 wakaba 1.3 ## TODO: ...
340 wakaba 1.1 if ($value =~ /[^A-Za-z0-9!#\x23%&'+^_`{}~-]/) {
341     $onerror->(type => 'value syntax error:'.$attr, level => 'm');
342     }
343     } elsif ($param_def->{syntax} eq 'token') { # RFC 2046
344 wakaba 1.3 ## TODO: ...
345 wakaba 1.1 if ($value =~ /[^\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]/) {
346     $onerror->(type => 'value syntax error:'.$attr, level => 'm');
347     }
348     }
349     ## TODO: syntax |MIME date-time|
350 wakaba 1.4 } elsif ($param_def->{checker}) {
351     $param_def->{checker}->($self, $value, $onerror);
352     }
353    
354     if ($param_def->{obsolete}) {
355     $onerror->(type => 'IMT:obsolete parameter',
356     level => $self->{level}->{$param_def->{obsolete}},
357     value => $attr);
358     ## NOTE: The value of |$param_def->{obsolete}|, if it has a
359     ## true value, must be "mime_fact", which represents that
360     ## the parameter is defined in a previous version of the MIME
361     ## specification (or a related specification) and then
362     ## removed or marked as obsolete such that it seems that use of
363     ## that parameter is made non-conforming without using any
364     ## explicit statement on that fact.
365 wakaba 1.1 }
366 wakaba 1.3 }
367     if (not $param_def or not $param_def->{registered}) {
368     if ($subtype =~ /\./ or $subtype =~ /^x-/ or $type =~ /^x-/) {
369     ## NOTE: The parameter names SHOULD be fully specified for
370     ## personal or vendor tree subtype [RFC 4288]. Therefore, there
371     ## might be unknown parameters and still conforming.
372 wakaba 1.4 $onerror->(type => 'IMT:unknown parameter',
373     level => $self->{level}->{uncertain},
374     value => $attr);
375 wakaba 1.3 } else {
376     ## NOTE: The parameter names MUST be fully specified for
377     ## standard tree. Therefore, unknown parameter is non-conforming,
378     ## unless it is standardized later.
379 wakaba 1.4 $onerror->(type => 'IMT:parameter not allowed',
380     level => $self->{level}->{mime_fact},
381     value => $attr);
382 wakaba 1.3 }
383 wakaba 1.1 }
384     }
385    
386     for (keys %{$subtype_def->{parameter} or {}}) {
387     if ($subtype_def->{parameter}->{$_}->{required} and
388     not $has_param->{$_}) {
389 wakaba 1.4 $onerror->(type => 'IMT:parameter missing',
390     level => $self->{level}->{mime_fact},
391     text => $_,
392     value => $type . '/' . $subtype);
393 wakaba 1.1 }
394     }
395     } else {
396 wakaba 1.4 ## NOTE: Since subtypes are frequently added to the IANAREG and such
397     ## that our database might be out-of-date, we don't raise an error
398     ## for an unknown subtype, instead we report an "uncertain" status.
399     $onerror->(type => 'IMT:unknown subtype',
400     level => $self->{level}->{uncertain},
401     value => $type . '/' . $subtype);
402 wakaba 1.1 }
403    
404     for (keys %{$type_def->{parameter} or {}}) {
405     if ($type_def->{parameter}->{$_}->{required} and
406     not $has_param->{$_}) {
407 wakaba 1.4 $onerror->(type => 'IMT:parameter missing',
408     level => $self->{level}->{mime_fact},
409     text => $_,
410     value => $type . '/' . $subtype);
411 wakaba 1.1 }
412     }
413     }
414     } # check_imt
415    
416     1;
417 wakaba 1.5 ## $Date: 2008/08/29 13:34:36 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24