/[suikacvs]/markup/html/whatpm/Whatpm/IMTChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/IMTChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (hide annotations) (download)
Fri Nov 23 14:47:49 2007 UTC (16 years, 11 months ago) by wakaba
Branch: MAIN
Changes since 1.2: +94 -32 lines
++ whatpm/t/ChangeLog	23 Nov 2007 14:31:20 -0000
	* content-model-2.dat: Media type tests are revised.

2007-11-23  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	23 Nov 2007 14:32:47 -0000
	* IMTChecker.pm: Revised to raise errors and warnings as (poorly)
	specced in RFC 2046 and RFC 4288.
	(application/atom+xml): Definition added.

2007-11-23  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::IMTChecker;
2     use strict;
3    
4     ## ISSUE: RFC 2046 is so poorly written specification that
5     ## what we should do is unclear... It's even worse than
6     ## RFC 1521, which contains BNF rules for parameter values.
7    
8     our $Type;
9 wakaba 1.2
10     my $application_xml_charset = { ## TODO: ...
11     syntax => 'token',
12 wakaba 1.3 registered => 1,
13 wakaba 1.2 };
14    
15     $Type->{application}->{registered} = 1;
16    
17 wakaba 1.3 $Type->{application}->{subtype}->{'atom+xml'} = { ## NOTE: RFC 4287
18     parameter => {
19     type => { ## NOTE: RFC 5023
20     ## TODO: "entry"|"feed" (case-insensitive)
21     registered => 1,
22     ## NOTE: SHOULD for Atom Entry Document.
23     },
24     },
25     registered => 1,
26     };
27    
28 wakaba 1.2 $Type->{application}->{subtype}->{'rdf+xml'} = { # RFC 3870
29     parameter => {
30     charset => $application_xml_charset,
31     },
32     registered => 1,
33     ## RECOMMENDED that an RDF document follows new RDF/XML spec
34     ## rather than 1999 spec - this is not testable in this layer.
35     };
36    
37     $Type->{application}->{subtype}->{'rss+xml'} = {
38     parameter => {
39     },
40     ## NOTE: Not registered
41     };
42    
43     $Type->{audio}->{registered} = 1;
44    
45     $Type->{image}->{registered} = 1;
46    
47     $Type->{message}->{registered} = 1;
48    
49     $Type->{model}->{registered} = 1;
50    
51     $Type->{multipart}->{registered} = 1;
52    
53     $Type->{text}->{registered} = 1;
54    
55 wakaba 1.1 $Type->{text}->{subtype}->{plain} = {
56     parameter => {
57 wakaba 1.3 charset => {syntax => 'token', registered => 1}, # RFC 2046 ## TODO: registered?
58     'charset-edition' => {registered => 1}, # RFC 1922
59     'charset-extension' => {syntax => 'token', registered => 1}, # RFC 1922 ## TODO: registered?
60 wakaba 1.1 },
61 wakaba 1.2 registered => 1,
62 wakaba 1.1 };
63     $Type->{text}->{subtype}->{html} = { # RFC 2854
64     parameter => {
65 wakaba 1.3 charset => {registered => 1}, ## TODO: UTF-8 is preferred ## TODO: strongly recommended that it always be present ## NOTE: Syntax and range are not defined.
66     level => {obsolete => 1}, # RFC 1866
67 wakaba 1.1 version => {obsolete => 1}, # HTML 3.0
68     },
69 wakaba 1.2 registered => 1,
70 wakaba 1.1 };
71     $Type->{text}->{subtype}->{css} = { # RFC 2318
72     parameter => {
73 wakaba 1.3 charset => {registered => 1}, ## TODO: US-ASCII, iso-8859-X, utf-8 are recommended ## TODO: Any charset that is a superset of US-ASCII may be used ## NOTE: Syntax and range are not defined.
74 wakaba 1.1 },
75 wakaba 1.2 registered => 1,
76 wakaba 1.1 };
77     $Type->{text}->{subtype}->{javascript} = { # RFC 4329
78     parameter => {
79 wakaba 1.3 charset => {syntax => 'mime-charset', registered => 1}, ## TODO: SHOULD be registered
80 wakaba 1.1 e4x => {checker => sub { # HTML5 (but informative?)
81     my ($value, $onerror) = @_;
82     unless ($value eq '1') {
83     $onerror->(type => 'value syntax error:e4x', level => 'm');
84 wakaba 1.2 ## NOTE: Whether values other than "1" is non-conformant
85     ## or not is not defined actually...
86 wakaba 1.1 }
87     }},
88     },
89     obsolete => 1,
90 wakaba 1.2 registered => 1,
91 wakaba 1.1 };
92     $Type->{text}->{subtype}->{ecmascript} = { # RFC 4329
93     parameter => {
94     charset => $Type->{text}->{subtype}->{javascript}->{parameter}->{charset},
95     },
96 wakaba 1.2 registered => 1,
97 wakaba 1.1 };
98     $Type->{audio}->{subtype}->{mpeg} = { # RFC 3003
99 wakaba 1.2 registered => 1,
100 wakaba 1.1 };
101     my $CodecsParameter = { # RFC 4281
102     ## TODO: syntax and value check
103 wakaba 1.3 registered => 1,
104 wakaba 1.1 };
105     $Type->{audio}->{subtype}->{'3gpp'} = {
106     parameter => {
107     codecs => $CodecsParameter, # RFC 4281
108     },
109 wakaba 1.2 registered => 1,
110 wakaba 1.1 };
111 wakaba 1.2
112     $Type->{video}->{registered} = 1;
113    
114 wakaba 1.1 $Type->{video}->{subtype}->{'3gpp'} = {
115     parameter => {
116     codecs => $CodecsParameter, # RFC 4281
117     },
118 wakaba 1.2 registered => 1,
119 wakaba 1.1 };
120     $Type->{audio}->{subtype}->{'3gpp2'} = { # RFC 4393
121     parameter => {
122     codecs => $CodecsParameter, # RFC 4393 -> RFC 4281
123     },
124 wakaba 1.2 registered => 1,
125 wakaba 1.1 };
126     $Type->{video}->{subtype}->{'3gpp2'} = { # RFC 4393
127     parameter => {
128     codecs => $CodecsParameter, # RFC 4393 -> RFC 4281
129     },
130 wakaba 1.2 registered => 1,
131 wakaba 1.1 };
132     $Type->{application}->{subtype}->{'octet-stream'} = {
133     parameter => {
134 wakaba 1.3 conversions => {obsolete => 1, registered => 1}, # RFC 1341 ## TODO: syntax
135     name => {obsolete => 1, registered => 1}, # RFC 1341
136     padding => {registered => 1}, # RFC 2046
137     type => {registered => 1}, # RFC 2046
138 wakaba 1.1 },
139 wakaba 1.2 registered => 1,
140 wakaba 1.1 };
141     $Type->{application}->{subtype}->{javascript} = { # RFC 4329
142     parameter => {
143     charset => $Type->{text}->{subtype}->{javascript}->{parameter}->{charset},
144     },
145 wakaba 1.2 registered => 1,
146 wakaba 1.1 };
147     $Type->{application}->{subtype}->{ecmascript} = { # RFC 4329
148     parameter => {
149     charset => $Type->{text}->{subtype}->{ecmascript}->{parameter}->{charset},
150     },
151 wakaba 1.2 registered => 1,
152 wakaba 1.1 };
153     $Type->{multipart}->{parameter}->{boundary} = {
154     checker => sub {
155     my ($value, $onerror) = @_;
156     if ($value !~ /\A[0-9A-Za-z'()+_,.\x2F:=?-]{0,69}[0-9A-Za-z'()+_,.\x2F:=?\x20-]\z/) {
157     $onerror->(type => 'value syntax error:boundary', level => 'm');
158     }
159     },
160     required => 1,
161 wakaba 1.3 registered => 1,
162 wakaba 1.1 };
163     $Type->{message}->{subtype}->{partial} = {
164     parameter => {
165 wakaba 1.3 id => {required => 1, registered => 1}, # RFC 2046
166     number => {required => 1, registered => 1}, # RFC 2046
167     total => {registered => 1}, # RFC 2046 # required for the last fragment
168 wakaba 1.1 },
169 wakaba 1.2 registered => 1,
170 wakaba 1.1 };
171     $Type->{message}->{subtype}->{'external-body'} = {
172     parameter => {
173     'access-type' => {
174     required => 1,
175     syntax => 'token', ## TODO: registry?
176 wakaba 1.3 registered => 1,
177 wakaba 1.1 }, # RFC 2046
178 wakaba 1.3 expiration => {syntax => 'MIME date-time', registered => 1}, # RFC 2046
179     permission => {registered => 1}, # RFC 2046
180     size => {registered => 1}, # RFC 2046
181 wakaba 1.1 ## TODO: access-type dependent parameters
182     },
183 wakaba 1.2 registered => 1,
184 wakaba 1.1 };
185    
186 wakaba 1.3 our $MUSTLevel = 'm'; ## NOTE: RFC 2119 "MUST".
187     our $StronglyDiscouragedLevel = 's'; ## NOTE: "strongly discouraged".
188    
189 wakaba 1.1 sub check_imt ($$$$@) {
190     my (undef, $onerror, $type, $subtype, @parameter) = @_;
191    
192 wakaba 1.3 require Message::IMT::InternetMediaType;
193     my $dom = Message::DOM::DOMImplementation->new;
194 wakaba 1.1
195     local $Error::Depth = $Error::Depth + 1;
196    
197     my $imt = $dom->create_internet_media_type ($type, $subtype);
198     while (@parameter) {
199     $imt->add_parameter (shift @parameter => shift @parameter);
200     ## NOTE: Attribute duplication are not error, though its semantics
201     ## is not defined.
202     ## See <http://suika.fam.cx/gate/2005/sw/%E5%AA%92%E4%BD%93%E5%9E%8B/%E5%BC%95%E6%95%B0>.
203     }
204    
205     my $type = $imt->top_level_type;
206     my $subtype = $imt->subtype;
207    
208 wakaba 1.3 ## NOTE: RFC 2045 (MIME), RFC 2616 (HTTP/1.1), and RFC 4288 (IMT
209     ## registration) have different requirements on type and subtype names.
210     if ($type !~ /\A[A-Za-z0-9!#\$&.+^_-]{1,127}\z/) {
211     $onerror->(type => 'type:syntax error:'.$type, level => $MUSTLevel);
212     }
213     if ($subtype !~ /\A[A-Za-z0-9!#\$&.+^_-]{1,127}\z/) {
214     $onerror->(type => 'subtype:syntax error:'.$subtype, level => $MUSTLevel);
215     }
216    
217 wakaba 1.1 my $type_def = $Type->{$type};
218     my $has_param;
219 wakaba 1.2
220 wakaba 1.3 if ($type =~ /^x-/) {
221     $onerror->(type => 'private type', level => $StronglyDiscouragedLevel);
222     } elsif (not $type_def or not $type_def->{registered}) {
223     #} elsif ($type_def and not $type_def->{registered}) {
224     ## NOTE: Top-level type is seldom added.
225    
226     ## NOTE: RFC 2046 6. "Any format without a rigorous and public
227     ## definition must be named with an "X-" prefix" (strictly, this
228     ## is not an author requirement, but a requirement for media
229     ## type specfication author and it does not restrict use of
230     ## unregistered value).
231     $onerror->(type => 'unregistered type', level => 'w');
232 wakaba 1.2 }
233    
234 wakaba 1.1 if ($type_def) {
235     my $subtype_def = $type_def->{subtype}->{$subtype};
236 wakaba 1.2
237     if ($subtype =~ /^x[-\.]/) {
238 wakaba 1.3 $onerror->(type => 'private subtype', level => 'w');
239     ## NOTE: "x." is discouraged in RFC 4288.
240 wakaba 1.2 } elsif ($subtype_def and not $subtype_def->{registered}) {
241 wakaba 1.3 ## NOTE: RFC 2046 6. "Any format without a rigorous and public
242     ## definition must be named with an "X-" prefix" (strictly, this
243     ## is not an author requirement, but a requirement for media
244     ## type specfication author and it does not restrict use of
245     ## unregistered value).
246     $onerror->(type => 'unregistered subtype', level => 'w');
247 wakaba 1.2 }
248    
249 wakaba 1.1 if ($subtype_def) {
250 wakaba 1.3 if ($subtype_def->{obsolete}) {
251     $onerror->(type => 'obsolete subtype', level => 'w');
252     }
253    
254 wakaba 1.1 for (0..$imt->parameter_length-1) {
255     my $attr = $imt->get_attribute ($_);
256     my $value = $imt->get_value ($_);
257 wakaba 1.3
258     if ($attr !~ /\A[A-Za-z0-9!#\$&.+^_-]{1,127}\z/) {
259     $onerror->(type => 'attribute:syntax error:'.$attr,
260     level => $MUSTLevel);
261     }
262    
263 wakaba 1.1 $has_param->{$attr} = 1;
264     my $param_def = $subtype_def->{parameter}->{$attr}
265     || $type_def->{parameter}->{$attr};
266     if ($param_def) {
267     if (defined $param_def->{syntax}) {
268     if ($param_def->{syntax} eq 'mime-charset') { # RFC 2978
269 wakaba 1.3 ## TODO: ...
270 wakaba 1.1 if ($value =~ /[^A-Za-z0-9!#\x23%&'+^_`{}~-]/) {
271     $onerror->(type => 'value syntax error:'.$attr, level => 'm');
272     }
273     } elsif ($param_def->{syntax} eq 'token') { # RFC 2046
274 wakaba 1.3 ## TODO: ...
275 wakaba 1.1 if ($value =~ /[^\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]/) {
276     $onerror->(type => 'value syntax error:'.$attr, level => 'm');
277     }
278     }
279     ## TODO: syntax |MIME date-time|
280     if ($param_def->{checker}) {
281     $param_def->{checker}->($value, $onerror);
282     }
283     if ($param_def->{obsolete}) {
284 wakaba 1.3 ## TODO: error level
285 wakaba 1.1 $onerror->(type => 'obsolete parameter:'.$attr, level => 'm');
286     }
287     }
288 wakaba 1.3 }
289     if (not $param_def or not $param_def->{registered}) {
290     if ($subtype =~ /\./ or $subtype =~ /^x-/ or $type =~ /^x-/) {
291     ## NOTE: The parameter names SHOULD be fully specified for
292     ## personal or vendor tree subtype [RFC 4288]. Therefore, there
293     ## might be unknown parameters and still conforming.
294     $onerror->(type => 'parameter:'.$attr, level => 'unsupported');
295     } else {
296     ## NOTE: The parameter names MUST be fully specified for
297     ## standard tree. Therefore, unknown parameter is non-conforming,
298     ## unless it is standardized later.
299     $onerror->(type => 'parameter not allowed:'.$attr,
300     level => $MUSTLevel);
301     }
302 wakaba 1.1 }
303     }
304    
305     for (keys %{$subtype_def->{parameter} or {}}) {
306     if ($subtype_def->{parameter}->{$_}->{required} and
307     not $has_param->{$_}) {
308     $onerror->(type => 'parameter missing:'.$_, level => 'm');
309     }
310     }
311     } else {
312 wakaba 1.2 $onerror->(type => 'subtype', level => 'unsupported');
313 wakaba 1.1 }
314    
315     for (keys %{$type_def->{parameter} or {}}) {
316     if ($type_def->{parameter}->{$_}->{required} and
317     not $has_param->{$_}) {
318     $onerror->(type => 'parameter missing:'.$_, level => 'm');
319     }
320     }
321     } else {
322 wakaba 1.2 $onerror->(type => 'type', level => 'unsupported');
323 wakaba 1.1 }
324     } # check_imt
325    
326     1;
327 wakaba 1.3 ## $Date: 2007/06/30 13:12:33 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24