1 |
wakaba |
1.1 |
package Whatpm::IMTChecker; |
2 |
|
|
use strict; |
3 |
|
|
|
4 |
wakaba |
1.4 |
## NOTE: RFC 2046 sucks, it is a poorly written specification such that |
5 |
|
|
## what we should do is not entirely clear and it does define almost nothing |
6 |
|
|
## from the today's viewpoint... Suprisingly, it's even worse than |
7 |
|
|
## RFC 1521, the previous version of that specification, which does |
8 |
|
|
## contain BNF rules for parameter values at least. |
9 |
|
|
|
10 |
|
|
my $default_error_levels = { |
11 |
|
|
must => 'm', |
12 |
|
|
warn => 'w', |
13 |
|
|
info => 'i', |
14 |
|
|
uncertain => 'u', |
15 |
|
|
|
16 |
|
|
mime_must => 'm', # lowercase "must" |
17 |
|
|
mime_fact => 'm', |
18 |
|
|
mime_strongly_discouraged => 'w', |
19 |
|
|
mime_discouraged => 'w', |
20 |
|
|
}; |
21 |
wakaba |
1.1 |
|
22 |
|
|
our $Type; |
23 |
wakaba |
1.2 |
|
24 |
|
|
my $application_xml_charset = { ## TODO: ... |
25 |
|
|
syntax => 'token', |
26 |
wakaba |
1.3 |
registered => 1, |
27 |
wakaba |
1.2 |
}; |
28 |
|
|
|
29 |
|
|
$Type->{application}->{registered} = 1; |
30 |
|
|
|
31 |
wakaba |
1.3 |
$Type->{application}->{subtype}->{'atom+xml'} = { ## NOTE: RFC 4287 |
32 |
|
|
parameter => { |
33 |
|
|
type => { ## NOTE: RFC 5023 |
34 |
|
|
## TODO: "entry"|"feed" (case-insensitive) |
35 |
|
|
registered => 1, |
36 |
|
|
## NOTE: SHOULD for Atom Entry Document. |
37 |
|
|
}, |
38 |
|
|
}, |
39 |
|
|
registered => 1, |
40 |
|
|
}; |
41 |
|
|
|
42 |
wakaba |
1.2 |
$Type->{application}->{subtype}->{'rdf+xml'} = { # RFC 3870 |
43 |
|
|
parameter => { |
44 |
|
|
charset => $application_xml_charset, |
45 |
|
|
}, |
46 |
|
|
registered => 1, |
47 |
|
|
## RECOMMENDED that an RDF document follows new RDF/XML spec |
48 |
|
|
## rather than 1999 spec - this is not testable in this layer. |
49 |
|
|
}; |
50 |
|
|
|
51 |
|
|
$Type->{application}->{subtype}->{'rss+xml'} = { |
52 |
|
|
parameter => { |
53 |
|
|
}, |
54 |
|
|
## NOTE: Not registered |
55 |
|
|
}; |
56 |
|
|
|
57 |
|
|
$Type->{audio}->{registered} = 1; |
58 |
|
|
|
59 |
|
|
$Type->{image}->{registered} = 1; |
60 |
|
|
|
61 |
|
|
$Type->{message}->{registered} = 1; |
62 |
|
|
|
63 |
|
|
$Type->{model}->{registered} = 1; |
64 |
|
|
|
65 |
|
|
$Type->{multipart}->{registered} = 1; |
66 |
|
|
|
67 |
|
|
$Type->{text}->{registered} = 1; |
68 |
|
|
|
69 |
wakaba |
1.1 |
$Type->{text}->{subtype}->{plain} = { |
70 |
|
|
parameter => { |
71 |
wakaba |
1.3 |
charset => {syntax => 'token', registered => 1}, # RFC 2046 ## TODO: registered? |
72 |
|
|
'charset-edition' => {registered => 1}, # RFC 1922 |
73 |
|
|
'charset-extension' => {syntax => 'token', registered => 1}, # RFC 1922 ## TODO: registered? |
74 |
wakaba |
1.1 |
}, |
75 |
wakaba |
1.2 |
registered => 1, |
76 |
wakaba |
1.1 |
}; |
77 |
|
|
$Type->{text}->{subtype}->{html} = { # RFC 2854 |
78 |
|
|
parameter => { |
79 |
wakaba |
1.3 |
charset => {registered => 1}, ## TODO: UTF-8 is preferred ## TODO: strongly recommended that it always be present ## NOTE: Syntax and range are not defined. |
80 |
wakaba |
1.4 |
level => {obsolete => 'mime_fact'}, # RFC 1866 |
81 |
|
|
version => {obsolete => 'mime_fact'}, # HTML 3.0 |
82 |
wakaba |
1.1 |
}, |
83 |
wakaba |
1.2 |
registered => 1, |
84 |
wakaba |
1.1 |
}; |
85 |
|
|
$Type->{text}->{subtype}->{css} = { # RFC 2318 |
86 |
|
|
parameter => { |
87 |
wakaba |
1.3 |
charset => {registered => 1}, ## TODO: US-ASCII, iso-8859-X, utf-8 are recommended ## TODO: Any charset that is a superset of US-ASCII may be used ## NOTE: Syntax and range are not defined. |
88 |
wakaba |
1.1 |
}, |
89 |
wakaba |
1.2 |
registered => 1, |
90 |
wakaba |
1.1 |
}; |
91 |
|
|
$Type->{text}->{subtype}->{javascript} = { # RFC 4329 |
92 |
|
|
parameter => { |
93 |
wakaba |
1.3 |
charset => {syntax => 'mime-charset', registered => 1}, ## TODO: SHOULD be registered |
94 |
wakaba |
1.1 |
e4x => {checker => sub { # HTML5 (but informative?) |
95 |
wakaba |
1.4 |
my ($self, $value, $onerror) = @_; |
96 |
wakaba |
1.1 |
unless ($value eq '1') { |
97 |
wakaba |
1.4 |
$onerror->(type => 'e4x:syntax error', |
98 |
|
|
level => $self->{level}->{info}, |
99 |
|
|
value => $value); |
100 |
wakaba |
1.2 |
## NOTE: Whether values other than "1" is non-conformant |
101 |
|
|
## or not is not defined actually... |
102 |
wakaba |
1.1 |
} |
103 |
|
|
}}, |
104 |
|
|
}, |
105 |
|
|
obsolete => 1, |
106 |
wakaba |
1.2 |
registered => 1, |
107 |
wakaba |
1.1 |
}; |
108 |
|
|
$Type->{text}->{subtype}->{ecmascript} = { # RFC 4329 |
109 |
|
|
parameter => { |
110 |
|
|
charset => $Type->{text}->{subtype}->{javascript}->{parameter}->{charset}, |
111 |
|
|
}, |
112 |
wakaba |
1.2 |
registered => 1, |
113 |
wakaba |
1.1 |
}; |
114 |
|
|
$Type->{audio}->{subtype}->{mpeg} = { # RFC 3003 |
115 |
wakaba |
1.2 |
registered => 1, |
116 |
wakaba |
1.1 |
}; |
117 |
|
|
my $CodecsParameter = { # RFC 4281 |
118 |
|
|
## TODO: syntax and value check |
119 |
wakaba |
1.3 |
registered => 1, |
120 |
wakaba |
1.1 |
}; |
121 |
|
|
$Type->{audio}->{subtype}->{'3gpp'} = { |
122 |
|
|
parameter => { |
123 |
|
|
codecs => $CodecsParameter, # RFC 4281 |
124 |
|
|
}, |
125 |
wakaba |
1.2 |
registered => 1, |
126 |
wakaba |
1.1 |
}; |
127 |
wakaba |
1.2 |
|
128 |
|
|
$Type->{video}->{registered} = 1; |
129 |
|
|
|
130 |
wakaba |
1.1 |
$Type->{video}->{subtype}->{'3gpp'} = { |
131 |
|
|
parameter => { |
132 |
|
|
codecs => $CodecsParameter, # RFC 4281 |
133 |
|
|
}, |
134 |
wakaba |
1.2 |
registered => 1, |
135 |
wakaba |
1.1 |
}; |
136 |
|
|
$Type->{audio}->{subtype}->{'3gpp2'} = { # RFC 4393 |
137 |
|
|
parameter => { |
138 |
|
|
codecs => $CodecsParameter, # RFC 4393 -> RFC 4281 |
139 |
|
|
}, |
140 |
wakaba |
1.2 |
registered => 1, |
141 |
wakaba |
1.1 |
}; |
142 |
|
|
$Type->{video}->{subtype}->{'3gpp2'} = { # RFC 4393 |
143 |
|
|
parameter => { |
144 |
|
|
codecs => $CodecsParameter, # RFC 4393 -> RFC 4281 |
145 |
|
|
}, |
146 |
wakaba |
1.2 |
registered => 1, |
147 |
wakaba |
1.1 |
}; |
148 |
|
|
$Type->{application}->{subtype}->{'octet-stream'} = { |
149 |
|
|
parameter => { |
150 |
wakaba |
1.4 |
conversions => {obsolete => 'mime_fact', |
151 |
|
|
registered => 1}, # RFC 1341 ## TODO: syntax |
152 |
|
|
name => {obsolete => 'mime_fact', registered => 1}, # RFC 1341 |
153 |
wakaba |
1.3 |
padding => {registered => 1}, # RFC 2046 |
154 |
|
|
type => {registered => 1}, # RFC 2046 |
155 |
wakaba |
1.1 |
}, |
156 |
wakaba |
1.2 |
registered => 1, |
157 |
wakaba |
1.1 |
}; |
158 |
|
|
$Type->{application}->{subtype}->{javascript} = { # RFC 4329 |
159 |
|
|
parameter => { |
160 |
|
|
charset => $Type->{text}->{subtype}->{javascript}->{parameter}->{charset}, |
161 |
|
|
}, |
162 |
wakaba |
1.2 |
registered => 1, |
163 |
wakaba |
1.1 |
}; |
164 |
|
|
$Type->{application}->{subtype}->{ecmascript} = { # RFC 4329 |
165 |
|
|
parameter => { |
166 |
|
|
charset => $Type->{text}->{subtype}->{ecmascript}->{parameter}->{charset}, |
167 |
|
|
}, |
168 |
wakaba |
1.2 |
registered => 1, |
169 |
wakaba |
1.1 |
}; |
170 |
|
|
$Type->{multipart}->{parameter}->{boundary} = { |
171 |
|
|
checker => sub { |
172 |
wakaba |
1.4 |
my ($self, $value, $onerror) = @_; |
173 |
wakaba |
1.1 |
if ($value !~ /\A[0-9A-Za-z'()+_,.\x2F:=?-]{0,69}[0-9A-Za-z'()+_,.\x2F:=?\x20-]\z/) { |
174 |
wakaba |
1.4 |
$onerror->(type => 'boundary:syntax error', |
175 |
|
|
level => $self->{level}->{mime_fact}, # TODO: correct? |
176 |
|
|
value => $value); |
177 |
wakaba |
1.1 |
} |
178 |
|
|
}, |
179 |
|
|
required => 1, |
180 |
wakaba |
1.3 |
registered => 1, |
181 |
wakaba |
1.1 |
}; |
182 |
|
|
$Type->{message}->{subtype}->{partial} = { |
183 |
|
|
parameter => { |
184 |
wakaba |
1.3 |
id => {required => 1, registered => 1}, # RFC 2046 |
185 |
|
|
number => {required => 1, registered => 1}, # RFC 2046 |
186 |
|
|
total => {registered => 1}, # RFC 2046 # required for the last fragment |
187 |
wakaba |
1.1 |
}, |
188 |
wakaba |
1.2 |
registered => 1, |
189 |
wakaba |
1.1 |
}; |
190 |
|
|
$Type->{message}->{subtype}->{'external-body'} = { |
191 |
|
|
parameter => { |
192 |
|
|
'access-type' => { |
193 |
|
|
required => 1, |
194 |
|
|
syntax => 'token', ## TODO: registry? |
195 |
wakaba |
1.3 |
registered => 1, |
196 |
wakaba |
1.1 |
}, # RFC 2046 |
197 |
wakaba |
1.3 |
expiration => {syntax => 'MIME date-time', registered => 1}, # RFC 2046 |
198 |
|
|
permission => {registered => 1}, # RFC 2046 |
199 |
|
|
size => {registered => 1}, # RFC 2046 |
200 |
wakaba |
1.1 |
## TODO: access-type dependent parameters |
201 |
|
|
}, |
202 |
wakaba |
1.2 |
registered => 1, |
203 |
wakaba |
1.1 |
}; |
204 |
|
|
|
205 |
wakaba |
1.4 |
sub new ($) { |
206 |
|
|
my $self = bless {}, shift; |
207 |
|
|
|
208 |
|
|
$self->{level} = $default_error_levels; |
209 |
|
|
|
210 |
|
|
return $self; |
211 |
|
|
} # new |
212 |
wakaba |
1.3 |
|
213 |
wakaba |
1.1 |
sub check_imt ($$$$@) { |
214 |
wakaba |
1.4 |
my $self = ref $_[0] ? shift : shift->new; |
215 |
|
|
my ($onerror, $type, $subtype, @parameter) = @_; |
216 |
wakaba |
1.1 |
|
217 |
wakaba |
1.3 |
require Message::IMT::InternetMediaType; |
218 |
|
|
my $dom = Message::DOM::DOMImplementation->new; |
219 |
wakaba |
1.1 |
|
220 |
|
|
my $imt = $dom->create_internet_media_type ($type, $subtype); |
221 |
|
|
while (@parameter) { |
222 |
|
|
$imt->add_parameter (shift @parameter => shift @parameter); |
223 |
|
|
## NOTE: Attribute duplication are not error, though its semantics |
224 |
|
|
## is not defined. |
225 |
|
|
## See <http://suika.fam.cx/gate/2005/sw/%E5%AA%92%E4%BD%93%E5%9E%8B/%E5%BC%95%E6%95%B0>. |
226 |
|
|
} |
227 |
|
|
|
228 |
|
|
my $type = $imt->top_level_type; |
229 |
|
|
my $subtype = $imt->subtype; |
230 |
|
|
|
231 |
wakaba |
1.3 |
## NOTE: RFC 2045 (MIME), RFC 2616 (HTTP/1.1), and RFC 4288 (IMT |
232 |
|
|
## registration) have different requirements on type and subtype names. |
233 |
|
|
if ($type !~ /\A[A-Za-z0-9!#\$&.+^_-]{1,127}\z/) { |
234 |
wakaba |
1.4 |
$onerror->(type => 'IMT:type syntax error', |
235 |
|
|
level => $self->{level}->{must}, # RFC 4288 4.2. |
236 |
|
|
value => $type); |
237 |
wakaba |
1.3 |
} |
238 |
|
|
if ($subtype !~ /\A[A-Za-z0-9!#\$&.+^_-]{1,127}\z/) { |
239 |
wakaba |
1.4 |
$onerror->(type => 'IMT:subtype syntax error', |
240 |
|
|
level => $self->{level}->{must}, # RFC 4288 4.2. |
241 |
|
|
value => $subtype); |
242 |
wakaba |
1.3 |
} |
243 |
|
|
|
244 |
wakaba |
1.1 |
my $type_def = $Type->{$type}; |
245 |
|
|
my $has_param; |
246 |
wakaba |
1.2 |
|
247 |
wakaba |
1.3 |
if ($type =~ /^x-/) { |
248 |
wakaba |
1.4 |
$onerror->(type => 'IMT:private type', |
249 |
|
|
level => $self->{level}->{mime_strongly_discouraged}, |
250 |
|
|
value => $type); # RFC 2046 6. |
251 |
|
|
## NOTE: "discouraged" in RFC 4288 3.4. |
252 |
wakaba |
1.3 |
} elsif (not $type_def or not $type_def->{registered}) { |
253 |
|
|
#} elsif ($type_def and not $type_def->{registered}) { |
254 |
|
|
## NOTE: Top-level type is seldom added. |
255 |
|
|
|
256 |
|
|
## NOTE: RFC 2046 6. "Any format without a rigorous and public |
257 |
|
|
## definition must be named with an "X-" prefix" (strictly, this |
258 |
|
|
## is not an author requirement, but a requirement for media |
259 |
|
|
## type specfication author and it does not restrict use of |
260 |
|
|
## unregistered value). |
261 |
wakaba |
1.4 |
$onerror->(type => 'IMT:unregistered type', |
262 |
|
|
level => $self->{level}->{mime_must}, |
263 |
|
|
value => $type); |
264 |
wakaba |
1.2 |
} |
265 |
|
|
|
266 |
wakaba |
1.1 |
if ($type_def) { |
267 |
|
|
my $subtype_def = $type_def->{subtype}->{$subtype}; |
268 |
wakaba |
1.2 |
|
269 |
|
|
if ($subtype =~ /^x[-\.]/) { |
270 |
wakaba |
1.4 |
$onerror->(type => 'IMT:private subtype', |
271 |
|
|
level => $self->{level}->{mime_discouraged}, |
272 |
|
|
value => $type . '/' . $subtype); |
273 |
|
|
## NOTE: "x." and "x-" are discouraged in RFC 4288 3.4. |
274 |
wakaba |
1.2 |
} elsif ($subtype_def and not $subtype_def->{registered}) { |
275 |
wakaba |
1.3 |
## NOTE: RFC 2046 6. "Any format without a rigorous and public |
276 |
|
|
## definition must be named with an "X-" prefix" (strictly, this |
277 |
|
|
## is not an author requirement, but a requirement for media |
278 |
|
|
## type specfication author and it does not restrict use of |
279 |
|
|
## unregistered value). |
280 |
wakaba |
1.4 |
$onerror->(type => 'IMT:unregistered subtype', |
281 |
|
|
level => $self->{level}->{mime_must}, |
282 |
|
|
value => $type . '/' . $subtype); |
283 |
wakaba |
1.2 |
} |
284 |
|
|
|
285 |
wakaba |
1.1 |
if ($subtype_def) { |
286 |
wakaba |
1.4 |
## NOTE: Semantics (including its relationship between conformance) |
287 |
|
|
## is not defined for the "intended usage" keywords of the IMT |
288 |
|
|
## registration template. |
289 |
wakaba |
1.3 |
if ($subtype_def->{obsolete}) { |
290 |
wakaba |
1.4 |
$onerror->(type => 'IMT:obsolete subtype', |
291 |
|
|
level => $self->{level}->{warn}, |
292 |
|
|
value => $type . '/' . $subtype); |
293 |
|
|
} elsif ($subtype_def->{limited_use}) { |
294 |
|
|
$onerror->(type => 'IMT:limited use subtype', |
295 |
|
|
level => $self->{level}->{warn}, |
296 |
|
|
value => $type . '/' . $subtype); |
297 |
wakaba |
1.3 |
} |
298 |
|
|
|
299 |
wakaba |
1.1 |
for (0..$imt->parameter_length-1) { |
300 |
|
|
my $attr = $imt->get_attribute ($_); |
301 |
|
|
my $value = $imt->get_value ($_); |
302 |
wakaba |
1.3 |
|
303 |
|
|
if ($attr !~ /\A[A-Za-z0-9!#\$&.+^_-]{1,127}\z/) { |
304 |
wakaba |
1.4 |
$onerror->(type => 'IMT:attribute syntax error', |
305 |
|
|
level => $self->{level}->{mime_fact}, # RFC 4288 4.3. |
306 |
|
|
value => $attr); |
307 |
wakaba |
1.3 |
} |
308 |
|
|
|
309 |
wakaba |
1.1 |
$has_param->{$attr} = 1; |
310 |
|
|
my $param_def = $subtype_def->{parameter}->{$attr} |
311 |
|
|
|| $type_def->{parameter}->{$attr}; |
312 |
|
|
if ($param_def) { |
313 |
|
|
if (defined $param_def->{syntax}) { |
314 |
|
|
if ($param_def->{syntax} eq 'mime-charset') { # RFC 2978 |
315 |
wakaba |
1.3 |
## TODO: ... |
316 |
wakaba |
1.1 |
if ($value =~ /[^A-Za-z0-9!#\x23%&'+^_`{}~-]/) { |
317 |
|
|
$onerror->(type => 'value syntax error:'.$attr, level => 'm'); |
318 |
|
|
} |
319 |
|
|
} elsif ($param_def->{syntax} eq 'token') { # RFC 2046 |
320 |
wakaba |
1.3 |
## TODO: ... |
321 |
wakaba |
1.1 |
if ($value =~ /[^\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]/) { |
322 |
|
|
$onerror->(type => 'value syntax error:'.$attr, level => 'm'); |
323 |
|
|
} |
324 |
|
|
} |
325 |
|
|
## TODO: syntax |MIME date-time| |
326 |
wakaba |
1.4 |
} elsif ($param_def->{checker}) { |
327 |
|
|
$param_def->{checker}->($self, $value, $onerror); |
328 |
|
|
} |
329 |
|
|
|
330 |
|
|
if ($param_def->{obsolete}) { |
331 |
|
|
$onerror->(type => 'IMT:obsolete parameter', |
332 |
|
|
level => $self->{level}->{$param_def->{obsolete}}, |
333 |
|
|
value => $attr); |
334 |
|
|
## NOTE: The value of |$param_def->{obsolete}|, if it has a |
335 |
|
|
## true value, must be "mime_fact", which represents that |
336 |
|
|
## the parameter is defined in a previous version of the MIME |
337 |
|
|
## specification (or a related specification) and then |
338 |
|
|
## removed or marked as obsolete such that it seems that use of |
339 |
|
|
## that parameter is made non-conforming without using any |
340 |
|
|
## explicit statement on that fact. |
341 |
wakaba |
1.1 |
} |
342 |
wakaba |
1.3 |
} |
343 |
|
|
if (not $param_def or not $param_def->{registered}) { |
344 |
|
|
if ($subtype =~ /\./ or $subtype =~ /^x-/ or $type =~ /^x-/) { |
345 |
|
|
## NOTE: The parameter names SHOULD be fully specified for |
346 |
|
|
## personal or vendor tree subtype [RFC 4288]. Therefore, there |
347 |
|
|
## might be unknown parameters and still conforming. |
348 |
wakaba |
1.4 |
$onerror->(type => 'IMT:unknown parameter', |
349 |
|
|
level => $self->{level}->{uncertain}, |
350 |
|
|
value => $attr); |
351 |
wakaba |
1.3 |
} else { |
352 |
|
|
## NOTE: The parameter names MUST be fully specified for |
353 |
|
|
## standard tree. Therefore, unknown parameter is non-conforming, |
354 |
|
|
## unless it is standardized later. |
355 |
wakaba |
1.4 |
$onerror->(type => 'IMT:parameter not allowed', |
356 |
|
|
level => $self->{level}->{mime_fact}, |
357 |
|
|
value => $attr); |
358 |
wakaba |
1.3 |
} |
359 |
wakaba |
1.1 |
} |
360 |
|
|
} |
361 |
|
|
|
362 |
|
|
for (keys %{$subtype_def->{parameter} or {}}) { |
363 |
|
|
if ($subtype_def->{parameter}->{$_}->{required} and |
364 |
|
|
not $has_param->{$_}) { |
365 |
wakaba |
1.4 |
$onerror->(type => 'IMT:parameter missing', |
366 |
|
|
level => $self->{level}->{mime_fact}, |
367 |
|
|
text => $_, |
368 |
|
|
value => $type . '/' . $subtype); |
369 |
wakaba |
1.1 |
} |
370 |
|
|
} |
371 |
|
|
} else { |
372 |
wakaba |
1.4 |
## NOTE: Since subtypes are frequently added to the IANAREG and such |
373 |
|
|
## that our database might be out-of-date, we don't raise an error |
374 |
|
|
## for an unknown subtype, instead we report an "uncertain" status. |
375 |
|
|
$onerror->(type => 'IMT:unknown subtype', |
376 |
|
|
level => $self->{level}->{uncertain}, |
377 |
|
|
value => $type . '/' . $subtype); |
378 |
wakaba |
1.1 |
} |
379 |
|
|
|
380 |
|
|
for (keys %{$type_def->{parameter} or {}}) { |
381 |
|
|
if ($type_def->{parameter}->{$_}->{required} and |
382 |
|
|
not $has_param->{$_}) { |
383 |
wakaba |
1.4 |
$onerror->(type => 'IMT:parameter missing', |
384 |
|
|
level => $self->{level}->{mime_fact}, |
385 |
|
|
text => $_, |
386 |
|
|
value => $type . '/' . $subtype); |
387 |
wakaba |
1.1 |
} |
388 |
|
|
} |
389 |
|
|
} |
390 |
|
|
} # check_imt |
391 |
|
|
|
392 |
|
|
1; |
393 |
wakaba |
1.4 |
## $Date: 2007/11/23 14:47:49 $ |