/[suikacvs]/markup/html/whatpm/Whatpm/IMTChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/IMTChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (show annotations) (download)
Fri Dec 12 05:05:20 2008 UTC (15 years, 10 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.4: +25 -1 lines
++ whatpm/t/ChangeLog	12 Dec 2008 05:03:53 -0000
2008-12-12  Wakaba  <wakaba@suika.fam.cx>

	* ContentChecker.t: Added new test data file.

++ whatpm/t/dom-conformance/ChangeLog	12 Dec 2008 05:04:54 -0000
2008-12-12  Wakaba  <wakaba@suika.fam.cx>

	* html-form-input-1.dat, html-forms-1.dat: Added test data for
	@accept and @pattern.

	* html-form-textarea.dat: New test data file.

++ whatpm/Whatpm/ChangeLog	12 Dec 2008 05:00:26 -0000
2008-12-12  Wakaba  <wakaba@suika.fam.cx>

	* IMTChecker.pm: Added more definitions for subtypes.

++ whatpm/Whatpm/ContentChecker/ChangeLog	12 Dec 2008 05:03:26 -0000
2008-12-12  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm: Implemented <input accept> and <form accept>.  Raise a
	SHOULD-level error if @pattern but no @title.

1 package Whatpm::IMTChecker;
2 use strict;
3
4 ## NOTE: RFC 2046 sucks, it is a poorly written specification such that
5 ## what we should do is not entirely clear and it does define almost nothing
6 ## from the today's viewpoint... Suprisingly, it's even worse than
7 ## RFC 1521, the previous version of that specification, which does
8 ## contain BNF rules for parameter values at least.
9
10 my $default_error_levels = {
11 must => 'm',
12 warn => 'w',
13 info => 'i',
14 uncertain => 'u',
15
16 mime_must => 'm', # lowercase "must"
17 mime_fact => 'm',
18 mime_strongly_discouraged => 'w',
19 mime_discouraged => 'w',
20 };
21
22 our $Type;
23
24 my $application_xml_charset = { ## TODO: ...
25 syntax => 'token',
26 registered => 1,
27 };
28
29 $Type->{application}->{registered} = 1;
30
31 $Type->{application}->{subtype}->{'atom+xml'} = { ## NOTE: RFC 4287
32 parameter => {
33 type => { ## NOTE: RFC 5023
34 ## TODO: "entry"|"feed" (case-insensitive)
35 registered => 1,
36 ## NOTE: SHOULD for Atom Entry Document.
37 },
38 },
39 registered => 1,
40 };
41
42 $Type->{application}->{subtype}->{'rdf+xml'} = { # RFC 3870
43 parameter => {
44 charset => $application_xml_charset,
45 },
46 registered => 1,
47 ## RECOMMENDED that an RDF document follows new RDF/XML spec
48 ## rather than 1999 spec - this is not testable in this layer.
49 };
50
51 $Type->{application}->{subtype}->{'rss+xml'} = {
52 parameter => {
53 },
54 ## NOTE: Not registered
55 };
56
57 $Type->{application}->{subtype}->{xml} = { ## TODO: check IANAREG
58 registered => 1,
59 };
60
61 $Type->{audio}->{registered} = 1;
62
63 $Type->{audio}->{subtype}->{basic} = { ## TODO: check IANAREG
64 registered => 1,
65 };
66
67 $Type->{audio}->{subtype}->{mpeg} = { ## TODO: check IANAREG
68 registered => 1,
69 };
70
71 $Type->{image}->{registered} = 1;
72
73 $Type->{image}->{subtype}->{jpeg} = { ## TODO: check IANAREG
74 registered => 1,
75 };
76
77 $Type->{image}->{subtype}->{png} = { ## TODO: check IANAREG
78 registered => 1,
79 };
80
81 $Type->{image}->{subtype}->{'svg+xml'} = { ## TODO: check IANAREG
82 registered => 1,
83 };
84
85 $Type->{message}->{registered} = 1;
86
87 $Type->{model}->{registered} = 1;
88
89 $Type->{multipart}->{registered} = 1;
90
91 $Type->{text}->{registered} = 1;
92
93 $Type->{text}->{subtype}->{plain} = {
94 parameter => {
95 charset => {syntax => 'token', registered => 1}, # RFC 2046 ## TODO: registered?
96 'charset-edition' => {registered => 1}, # RFC 1922
97 'charset-extension' => {syntax => 'token', registered => 1}, # RFC 1922 ## TODO: registered?
98 },
99 registered => 1,
100 };
101 $Type->{text}->{subtype}->{html} = { # RFC 2854
102 parameter => {
103 charset => {registered => 1}, ## TODO: UTF-8 is preferred ## TODO: strongly recommended that it always be present ## NOTE: Syntax and range are not defined.
104 level => {obsolete => 'mime_fact'}, # RFC 1866
105 version => {obsolete => 'mime_fact'}, # HTML 3.0
106 },
107 registered => 1,
108 };
109 $Type->{text}->{subtype}->{css} = { # RFC 2318
110 parameter => {
111 charset => {registered => 1}, ## TODO: US-ASCII, iso-8859-X, utf-8 are recommended ## TODO: Any charset that is a superset of US-ASCII may be used ## NOTE: Syntax and range are not defined.
112 },
113 registered => 1,
114 };
115 $Type->{text}->{subtype}->{javascript} = { # RFC 4329
116 parameter => {
117 charset => {syntax => 'mime-charset', registered => 1}, ## TODO: SHOULD be registered
118 e4x => {checker => sub { # HTML5 (but informative?)
119 my ($self, $value, $onerror) = @_;
120 unless ($value eq '1') {
121 $onerror->(type => 'e4x:syntax error',
122 level => $self->{level}->{info},
123 value => $value);
124 ## NOTE: Whether values other than "1" is non-conformant
125 ## or not is not defined actually...
126 }
127 }},
128 },
129 obsolete => 1,
130 registered => 1,
131 };
132 $Type->{text}->{subtype}->{ecmascript} = { # RFC 4329
133 parameter => {
134 charset => $Type->{text}->{subtype}->{javascript}->{parameter}->{charset},
135 },
136 registered => 1,
137 };
138 $Type->{audio}->{subtype}->{mpeg} = { # RFC 3003
139 registered => 1,
140 };
141 my $CodecsParameter = { # RFC 4281
142 ## TODO: syntax and value check
143 registered => 1,
144 };
145 $Type->{audio}->{subtype}->{'3gpp'} = {
146 parameter => {
147 codecs => $CodecsParameter, # RFC 4281
148 },
149 registered => 1,
150 };
151
152 $Type->{video}->{registered} = 1;
153
154 $Type->{video}->{subtype}->{'3gpp'} = {
155 parameter => {
156 codecs => $CodecsParameter, # RFC 4281
157 },
158 registered => 1,
159 };
160 $Type->{audio}->{subtype}->{'3gpp2'} = { # RFC 4393
161 parameter => {
162 codecs => $CodecsParameter, # RFC 4393 -> RFC 4281
163 },
164 registered => 1,
165 };
166 $Type->{video}->{subtype}->{'3gpp2'} = { # RFC 4393
167 parameter => {
168 codecs => $CodecsParameter, # RFC 4393 -> RFC 4281
169 },
170 registered => 1,
171 };
172 $Type->{application}->{subtype}->{'octet-stream'} = {
173 parameter => {
174 conversions => {obsolete => 'mime_fact',
175 registered => 1}, # RFC 1341 ## TODO: syntax
176 name => {obsolete => 'mime_fact', registered => 1}, # RFC 1341
177 padding => {registered => 1}, # RFC 2046
178 type => {registered => 1}, # RFC 2046
179 },
180 registered => 1,
181 };
182 $Type->{application}->{subtype}->{javascript} = { # RFC 4329
183 parameter => {
184 charset => $Type->{text}->{subtype}->{javascript}->{parameter}->{charset},
185 },
186 registered => 1,
187 };
188 $Type->{application}->{subtype}->{ecmascript} = { # RFC 4329
189 parameter => {
190 charset => $Type->{text}->{subtype}->{ecmascript}->{parameter}->{charset},
191 },
192 registered => 1,
193 };
194 $Type->{multipart}->{parameter}->{boundary} = {
195 checker => sub {
196 my ($self, $value, $onerror) = @_;
197 if ($value !~ /\A[0-9A-Za-z'()+_,.\x2F:=?-]{0,69}[0-9A-Za-z'()+_,.\x2F:=?\x20-]\z/) {
198 $onerror->(type => 'boundary:syntax error',
199 level => $self->{level}->{mime_fact}, # TODO: correct?
200 value => $value);
201 }
202 },
203 required => 1,
204 registered => 1,
205 };
206 $Type->{message}->{subtype}->{partial} = {
207 parameter => {
208 id => {required => 1, registered => 1}, # RFC 2046
209 number => {required => 1, registered => 1}, # RFC 2046
210 total => {registered => 1}, # RFC 2046 # required for the last fragment
211 },
212 registered => 1,
213 };
214 $Type->{message}->{subtype}->{'external-body'} = {
215 parameter => {
216 'access-type' => {
217 required => 1,
218 syntax => 'token', ## TODO: registry?
219 registered => 1,
220 }, # RFC 2046
221 expiration => {syntax => 'MIME date-time', registered => 1}, # RFC 2046
222 permission => {registered => 1}, # RFC 2046
223 size => {registered => 1}, # RFC 2046
224 ## TODO: access-type dependent parameters
225 },
226 registered => 1,
227 };
228
229 sub new ($) {
230 my $self = bless {}, shift;
231
232 $self->{level} = $default_error_levels;
233
234 return $self;
235 } # new
236
237 sub check_imt ($$$$@) {
238 my $self = ref $_[0] ? shift : shift->new;
239 my ($onerror, $type, $subtype, @parameter) = @_;
240
241 require Message::IMT::InternetMediaType;
242 my $dom = Message::DOM::DOMImplementation->new;
243
244 my $imt = $dom->create_internet_media_type ($type, $subtype);
245 while (@parameter) {
246 $imt->add_parameter (shift @parameter => shift @parameter);
247 ## NOTE: Attribute duplication are not error, though its semantics
248 ## is not defined.
249 ## See <http://suika.fam.cx/gate/2005/sw/%E5%AA%92%E4%BD%93%E5%9E%8B/%E5%BC%95%E6%95%B0>.
250 }
251
252 my $type = $imt->top_level_type;
253 my $subtype = $imt->subtype;
254
255 ## NOTE: RFC 2045 (MIME), RFC 2616 (HTTP/1.1), and RFC 4288 (IMT
256 ## registration) have different requirements on type and subtype names.
257 if ($type !~ /\A[A-Za-z0-9!#\$&.+^_-]{1,127}\z/) {
258 $onerror->(type => 'IMT:type syntax error',
259 level => $self->{level}->{must}, # RFC 4288 4.2.
260 value => $type);
261 }
262 if ($subtype !~ /\A[A-Za-z0-9!#\$&.+^_-]{1,127}\z/) {
263 $onerror->(type => 'IMT:subtype syntax error',
264 level => $self->{level}->{must}, # RFC 4288 4.2.
265 value => $subtype);
266 }
267
268 my $type_def = $Type->{$type};
269 my $has_param;
270
271 if ($type =~ /^x-/) {
272 $onerror->(type => 'IMT:private type',
273 level => $self->{level}->{mime_strongly_discouraged},
274 value => $type); # RFC 2046 6.
275 ## NOTE: "discouraged" in RFC 4288 3.4.
276 } elsif (not $type_def or not $type_def->{registered}) {
277 #} elsif ($type_def and not $type_def->{registered}) {
278 ## NOTE: Top-level type is seldom added.
279
280 ## NOTE: RFC 2046 6. "Any format without a rigorous and public
281 ## definition must be named with an "X-" prefix" (strictly, this
282 ## is not an author requirement, but a requirement for media
283 ## type specfication author and it does not restrict use of
284 ## unregistered value).
285 $onerror->(type => 'IMT:unregistered type',
286 level => $self->{level}->{mime_must},
287 value => $type);
288 }
289
290 if ($type_def) {
291 my $subtype_def = $type_def->{subtype}->{$subtype};
292
293 if ($subtype =~ /^x[-\.]/) {
294 $onerror->(type => 'IMT:private subtype',
295 level => $self->{level}->{mime_discouraged},
296 value => $type . '/' . $subtype);
297 ## NOTE: "x." and "x-" are discouraged in RFC 4288 3.4.
298 } elsif ($subtype_def and not $subtype_def->{registered}) {
299 ## NOTE: RFC 2046 6. "Any format without a rigorous and public
300 ## definition must be named with an "X-" prefix" (strictly, this
301 ## is not an author requirement, but a requirement for media
302 ## type specfication author and it does not restrict use of
303 ## unregistered value).
304 $onerror->(type => 'IMT:unregistered subtype',
305 level => $self->{level}->{mime_must},
306 value => $type . '/' . $subtype);
307 }
308
309 if ($subtype_def) {
310 ## NOTE: Semantics (including its relationship between conformance)
311 ## is not defined for the "intended usage" keywords of the IMT
312 ## registration template.
313 if ($subtype_def->{obsolete}) {
314 $onerror->(type => 'IMT:obsolete subtype',
315 level => $self->{level}->{warn},
316 value => $type . '/' . $subtype);
317 } elsif ($subtype_def->{limited_use}) {
318 $onerror->(type => 'IMT:limited use subtype',
319 level => $self->{level}->{warn},
320 value => $type . '/' . $subtype);
321 }
322
323 for (0..$imt->parameter_length-1) {
324 my $attr = $imt->get_attribute ($_);
325 my $value = $imt->get_value ($_);
326
327 if ($attr !~ /\A[A-Za-z0-9!#\$&.+^_-]{1,127}\z/) {
328 $onerror->(type => 'IMT:attribute syntax error',
329 level => $self->{level}->{mime_fact}, # RFC 4288 4.3.
330 value => $attr);
331 }
332
333 $has_param->{$attr} = 1;
334 my $param_def = $subtype_def->{parameter}->{$attr}
335 || $type_def->{parameter}->{$attr};
336 if ($param_def) {
337 if (defined $param_def->{syntax}) {
338 if ($param_def->{syntax} eq 'mime-charset') { # RFC 2978
339 ## TODO: ...
340 if ($value =~ /[^A-Za-z0-9!#\x23%&'+^_`{}~-]/) {
341 $onerror->(type => 'value syntax error:'.$attr, level => 'm');
342 }
343 } elsif ($param_def->{syntax} eq 'token') { # RFC 2046
344 ## TODO: ...
345 if ($value =~ /[^\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]/) {
346 $onerror->(type => 'value syntax error:'.$attr, level => 'm');
347 }
348 }
349 ## TODO: syntax |MIME date-time|
350 } elsif ($param_def->{checker}) {
351 $param_def->{checker}->($self, $value, $onerror);
352 }
353
354 if ($param_def->{obsolete}) {
355 $onerror->(type => 'IMT:obsolete parameter',
356 level => $self->{level}->{$param_def->{obsolete}},
357 value => $attr);
358 ## NOTE: The value of |$param_def->{obsolete}|, if it has a
359 ## true value, must be "mime_fact", which represents that
360 ## the parameter is defined in a previous version of the MIME
361 ## specification (or a related specification) and then
362 ## removed or marked as obsolete such that it seems that use of
363 ## that parameter is made non-conforming without using any
364 ## explicit statement on that fact.
365 }
366 }
367 if (not $param_def or not $param_def->{registered}) {
368 if ($subtype =~ /\./ or $subtype =~ /^x-/ or $type =~ /^x-/) {
369 ## NOTE: The parameter names SHOULD be fully specified for
370 ## personal or vendor tree subtype [RFC 4288]. Therefore, there
371 ## might be unknown parameters and still conforming.
372 $onerror->(type => 'IMT:unknown parameter',
373 level => $self->{level}->{uncertain},
374 value => $attr);
375 } else {
376 ## NOTE: The parameter names MUST be fully specified for
377 ## standard tree. Therefore, unknown parameter is non-conforming,
378 ## unless it is standardized later.
379 $onerror->(type => 'IMT:parameter not allowed',
380 level => $self->{level}->{mime_fact},
381 value => $attr);
382 }
383 }
384 }
385
386 for (keys %{$subtype_def->{parameter} or {}}) {
387 if ($subtype_def->{parameter}->{$_}->{required} and
388 not $has_param->{$_}) {
389 $onerror->(type => 'IMT:parameter missing',
390 level => $self->{level}->{mime_fact},
391 text => $_,
392 value => $type . '/' . $subtype);
393 }
394 }
395 } else {
396 ## NOTE: Since subtypes are frequently added to the IANAREG and such
397 ## that our database might be out-of-date, we don't raise an error
398 ## for an unknown subtype, instead we report an "uncertain" status.
399 $onerror->(type => 'IMT:unknown subtype',
400 level => $self->{level}->{uncertain},
401 value => $type . '/' . $subtype);
402 }
403
404 for (keys %{$type_def->{parameter} or {}}) {
405 if ($type_def->{parameter}->{$_}->{required} and
406 not $has_param->{$_}) {
407 $onerror->(type => 'IMT:parameter missing',
408 level => $self->{level}->{mime_fact},
409 text => $_,
410 value => $type . '/' . $subtype);
411 }
412 }
413 }
414 } # check_imt
415
416 1;
417 ## $Date: 2008/08/29 13:34:36 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24