/[suikacvs]/markup/html/whatpm/Whatpm/IMTChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/IMTChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (show annotations) (download)
Fri Nov 23 14:47:49 2007 UTC (16 years, 11 months ago) by wakaba
Branch: MAIN
Changes since 1.2: +94 -32 lines
++ whatpm/t/ChangeLog	23 Nov 2007 14:31:20 -0000
	* content-model-2.dat: Media type tests are revised.

2007-11-23  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	23 Nov 2007 14:32:47 -0000
	* IMTChecker.pm: Revised to raise errors and warnings as (poorly)
	specced in RFC 2046 and RFC 4288.
	(application/atom+xml): Definition added.

2007-11-23  Wakaba  <wakaba@suika.fam.cx>

1 package Whatpm::IMTChecker;
2 use strict;
3
4 ## ISSUE: RFC 2046 is so poorly written specification that
5 ## what we should do is unclear... It's even worse than
6 ## RFC 1521, which contains BNF rules for parameter values.
7
8 our $Type;
9
10 my $application_xml_charset = { ## TODO: ...
11 syntax => 'token',
12 registered => 1,
13 };
14
15 $Type->{application}->{registered} = 1;
16
17 $Type->{application}->{subtype}->{'atom+xml'} = { ## NOTE: RFC 4287
18 parameter => {
19 type => { ## NOTE: RFC 5023
20 ## TODO: "entry"|"feed" (case-insensitive)
21 registered => 1,
22 ## NOTE: SHOULD for Atom Entry Document.
23 },
24 },
25 registered => 1,
26 };
27
28 $Type->{application}->{subtype}->{'rdf+xml'} = { # RFC 3870
29 parameter => {
30 charset => $application_xml_charset,
31 },
32 registered => 1,
33 ## RECOMMENDED that an RDF document follows new RDF/XML spec
34 ## rather than 1999 spec - this is not testable in this layer.
35 };
36
37 $Type->{application}->{subtype}->{'rss+xml'} = {
38 parameter => {
39 },
40 ## NOTE: Not registered
41 };
42
43 $Type->{audio}->{registered} = 1;
44
45 $Type->{image}->{registered} = 1;
46
47 $Type->{message}->{registered} = 1;
48
49 $Type->{model}->{registered} = 1;
50
51 $Type->{multipart}->{registered} = 1;
52
53 $Type->{text}->{registered} = 1;
54
55 $Type->{text}->{subtype}->{plain} = {
56 parameter => {
57 charset => {syntax => 'token', registered => 1}, # RFC 2046 ## TODO: registered?
58 'charset-edition' => {registered => 1}, # RFC 1922
59 'charset-extension' => {syntax => 'token', registered => 1}, # RFC 1922 ## TODO: registered?
60 },
61 registered => 1,
62 };
63 $Type->{text}->{subtype}->{html} = { # RFC 2854
64 parameter => {
65 charset => {registered => 1}, ## TODO: UTF-8 is preferred ## TODO: strongly recommended that it always be present ## NOTE: Syntax and range are not defined.
66 level => {obsolete => 1}, # RFC 1866
67 version => {obsolete => 1}, # HTML 3.0
68 },
69 registered => 1,
70 };
71 $Type->{text}->{subtype}->{css} = { # RFC 2318
72 parameter => {
73 charset => {registered => 1}, ## TODO: US-ASCII, iso-8859-X, utf-8 are recommended ## TODO: Any charset that is a superset of US-ASCII may be used ## NOTE: Syntax and range are not defined.
74 },
75 registered => 1,
76 };
77 $Type->{text}->{subtype}->{javascript} = { # RFC 4329
78 parameter => {
79 charset => {syntax => 'mime-charset', registered => 1}, ## TODO: SHOULD be registered
80 e4x => {checker => sub { # HTML5 (but informative?)
81 my ($value, $onerror) = @_;
82 unless ($value eq '1') {
83 $onerror->(type => 'value syntax error:e4x', level => 'm');
84 ## NOTE: Whether values other than "1" is non-conformant
85 ## or not is not defined actually...
86 }
87 }},
88 },
89 obsolete => 1,
90 registered => 1,
91 };
92 $Type->{text}->{subtype}->{ecmascript} = { # RFC 4329
93 parameter => {
94 charset => $Type->{text}->{subtype}->{javascript}->{parameter}->{charset},
95 },
96 registered => 1,
97 };
98 $Type->{audio}->{subtype}->{mpeg} = { # RFC 3003
99 registered => 1,
100 };
101 my $CodecsParameter = { # RFC 4281
102 ## TODO: syntax and value check
103 registered => 1,
104 };
105 $Type->{audio}->{subtype}->{'3gpp'} = {
106 parameter => {
107 codecs => $CodecsParameter, # RFC 4281
108 },
109 registered => 1,
110 };
111
112 $Type->{video}->{registered} = 1;
113
114 $Type->{video}->{subtype}->{'3gpp'} = {
115 parameter => {
116 codecs => $CodecsParameter, # RFC 4281
117 },
118 registered => 1,
119 };
120 $Type->{audio}->{subtype}->{'3gpp2'} = { # RFC 4393
121 parameter => {
122 codecs => $CodecsParameter, # RFC 4393 -> RFC 4281
123 },
124 registered => 1,
125 };
126 $Type->{video}->{subtype}->{'3gpp2'} = { # RFC 4393
127 parameter => {
128 codecs => $CodecsParameter, # RFC 4393 -> RFC 4281
129 },
130 registered => 1,
131 };
132 $Type->{application}->{subtype}->{'octet-stream'} = {
133 parameter => {
134 conversions => {obsolete => 1, registered => 1}, # RFC 1341 ## TODO: syntax
135 name => {obsolete => 1, registered => 1}, # RFC 1341
136 padding => {registered => 1}, # RFC 2046
137 type => {registered => 1}, # RFC 2046
138 },
139 registered => 1,
140 };
141 $Type->{application}->{subtype}->{javascript} = { # RFC 4329
142 parameter => {
143 charset => $Type->{text}->{subtype}->{javascript}->{parameter}->{charset},
144 },
145 registered => 1,
146 };
147 $Type->{application}->{subtype}->{ecmascript} = { # RFC 4329
148 parameter => {
149 charset => $Type->{text}->{subtype}->{ecmascript}->{parameter}->{charset},
150 },
151 registered => 1,
152 };
153 $Type->{multipart}->{parameter}->{boundary} = {
154 checker => sub {
155 my ($value, $onerror) = @_;
156 if ($value !~ /\A[0-9A-Za-z'()+_,.\x2F:=?-]{0,69}[0-9A-Za-z'()+_,.\x2F:=?\x20-]\z/) {
157 $onerror->(type => 'value syntax error:boundary', level => 'm');
158 }
159 },
160 required => 1,
161 registered => 1,
162 };
163 $Type->{message}->{subtype}->{partial} = {
164 parameter => {
165 id => {required => 1, registered => 1}, # RFC 2046
166 number => {required => 1, registered => 1}, # RFC 2046
167 total => {registered => 1}, # RFC 2046 # required for the last fragment
168 },
169 registered => 1,
170 };
171 $Type->{message}->{subtype}->{'external-body'} = {
172 parameter => {
173 'access-type' => {
174 required => 1,
175 syntax => 'token', ## TODO: registry?
176 registered => 1,
177 }, # RFC 2046
178 expiration => {syntax => 'MIME date-time', registered => 1}, # RFC 2046
179 permission => {registered => 1}, # RFC 2046
180 size => {registered => 1}, # RFC 2046
181 ## TODO: access-type dependent parameters
182 },
183 registered => 1,
184 };
185
186 our $MUSTLevel = 'm'; ## NOTE: RFC 2119 "MUST".
187 our $StronglyDiscouragedLevel = 's'; ## NOTE: "strongly discouraged".
188
189 sub check_imt ($$$$@) {
190 my (undef, $onerror, $type, $subtype, @parameter) = @_;
191
192 require Message::IMT::InternetMediaType;
193 my $dom = Message::DOM::DOMImplementation->new;
194
195 local $Error::Depth = $Error::Depth + 1;
196
197 my $imt = $dom->create_internet_media_type ($type, $subtype);
198 while (@parameter) {
199 $imt->add_parameter (shift @parameter => shift @parameter);
200 ## NOTE: Attribute duplication are not error, though its semantics
201 ## is not defined.
202 ## See <http://suika.fam.cx/gate/2005/sw/%E5%AA%92%E4%BD%93%E5%9E%8B/%E5%BC%95%E6%95%B0>.
203 }
204
205 my $type = $imt->top_level_type;
206 my $subtype = $imt->subtype;
207
208 ## NOTE: RFC 2045 (MIME), RFC 2616 (HTTP/1.1), and RFC 4288 (IMT
209 ## registration) have different requirements on type and subtype names.
210 if ($type !~ /\A[A-Za-z0-9!#\$&.+^_-]{1,127}\z/) {
211 $onerror->(type => 'type:syntax error:'.$type, level => $MUSTLevel);
212 }
213 if ($subtype !~ /\A[A-Za-z0-9!#\$&.+^_-]{1,127}\z/) {
214 $onerror->(type => 'subtype:syntax error:'.$subtype, level => $MUSTLevel);
215 }
216
217 my $type_def = $Type->{$type};
218 my $has_param;
219
220 if ($type =~ /^x-/) {
221 $onerror->(type => 'private type', level => $StronglyDiscouragedLevel);
222 } elsif (not $type_def or not $type_def->{registered}) {
223 #} elsif ($type_def and not $type_def->{registered}) {
224 ## NOTE: Top-level type is seldom added.
225
226 ## NOTE: RFC 2046 6. "Any format without a rigorous and public
227 ## definition must be named with an "X-" prefix" (strictly, this
228 ## is not an author requirement, but a requirement for media
229 ## type specfication author and it does not restrict use of
230 ## unregistered value).
231 $onerror->(type => 'unregistered type', level => 'w');
232 }
233
234 if ($type_def) {
235 my $subtype_def = $type_def->{subtype}->{$subtype};
236
237 if ($subtype =~ /^x[-\.]/) {
238 $onerror->(type => 'private subtype', level => 'w');
239 ## NOTE: "x." is discouraged in RFC 4288.
240 } elsif ($subtype_def and not $subtype_def->{registered}) {
241 ## NOTE: RFC 2046 6. "Any format without a rigorous and public
242 ## definition must be named with an "X-" prefix" (strictly, this
243 ## is not an author requirement, but a requirement for media
244 ## type specfication author and it does not restrict use of
245 ## unregistered value).
246 $onerror->(type => 'unregistered subtype', level => 'w');
247 }
248
249 if ($subtype_def) {
250 if ($subtype_def->{obsolete}) {
251 $onerror->(type => 'obsolete subtype', level => 'w');
252 }
253
254 for (0..$imt->parameter_length-1) {
255 my $attr = $imt->get_attribute ($_);
256 my $value = $imt->get_value ($_);
257
258 if ($attr !~ /\A[A-Za-z0-9!#\$&.+^_-]{1,127}\z/) {
259 $onerror->(type => 'attribute:syntax error:'.$attr,
260 level => $MUSTLevel);
261 }
262
263 $has_param->{$attr} = 1;
264 my $param_def = $subtype_def->{parameter}->{$attr}
265 || $type_def->{parameter}->{$attr};
266 if ($param_def) {
267 if (defined $param_def->{syntax}) {
268 if ($param_def->{syntax} eq 'mime-charset') { # RFC 2978
269 ## TODO: ...
270 if ($value =~ /[^A-Za-z0-9!#\x23%&'+^_`{}~-]/) {
271 $onerror->(type => 'value syntax error:'.$attr, level => 'm');
272 }
273 } elsif ($param_def->{syntax} eq 'token') { # RFC 2046
274 ## TODO: ...
275 if ($value =~ /[^\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]/) {
276 $onerror->(type => 'value syntax error:'.$attr, level => 'm');
277 }
278 }
279 ## TODO: syntax |MIME date-time|
280 if ($param_def->{checker}) {
281 $param_def->{checker}->($value, $onerror);
282 }
283 if ($param_def->{obsolete}) {
284 ## TODO: error level
285 $onerror->(type => 'obsolete parameter:'.$attr, level => 'm');
286 }
287 }
288 }
289 if (not $param_def or not $param_def->{registered}) {
290 if ($subtype =~ /\./ or $subtype =~ /^x-/ or $type =~ /^x-/) {
291 ## NOTE: The parameter names SHOULD be fully specified for
292 ## personal or vendor tree subtype [RFC 4288]. Therefore, there
293 ## might be unknown parameters and still conforming.
294 $onerror->(type => 'parameter:'.$attr, level => 'unsupported');
295 } else {
296 ## NOTE: The parameter names MUST be fully specified for
297 ## standard tree. Therefore, unknown parameter is non-conforming,
298 ## unless it is standardized later.
299 $onerror->(type => 'parameter not allowed:'.$attr,
300 level => $MUSTLevel);
301 }
302 }
303 }
304
305 for (keys %{$subtype_def->{parameter} or {}}) {
306 if ($subtype_def->{parameter}->{$_}->{required} and
307 not $has_param->{$_}) {
308 $onerror->(type => 'parameter missing:'.$_, level => 'm');
309 }
310 }
311 } else {
312 $onerror->(type => 'subtype', level => 'unsupported');
313 }
314
315 for (keys %{$type_def->{parameter} or {}}) {
316 if ($type_def->{parameter}->{$_}->{required} and
317 not $has_param->{$_}) {
318 $onerror->(type => 'parameter missing:'.$_, level => 'm');
319 }
320 }
321 } else {
322 $onerror->(type => 'type', level => 'unsupported');
323 }
324 } # check_imt
325
326 1;
327 ## $Date: 2007/06/30 13:12:33 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24