/[suikacvs]/markup/html/whatpm/Whatpm/IMTChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/IMTChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (hide annotations) (download)
Sat Jun 30 13:12:33 2007 UTC (17 years, 4 months ago) by wakaba
Branch: MAIN
Changes since 1.1: +76 -9 lines
++ whatpm/t/ChangeLog	30 Jun 2007 12:28:52 -0000
2007-06-30  Wakaba  <wakaba@suika.fam.cx>

	* URIChecker.t: Error level names in test results has
	been changed.

	* tokenizer-test-1.test: A test for bogus SYSTEM identifier
	is added.

	* content-model-1.dat, content-model-2.dat, content-model-3.dat,
	content-model-4.dat: Error messages has been changed.

	* ContentChecker.t: Appends error level to the error
	message if any.

++ whatpm/Whatpm/ChangeLog	30 Jun 2007 13:03:50 -0000
2007-06-30  Wakaba  <wakaba@suika.fam.cx>

	* IMTChecker.pm: Report warning for unregistered
	and private types/subtypes.

	* ContentChecker.pm, HTML.pm.src, IMTChecker.pm,
	URIChecker.pm, HTMLTable.pm: Error messages are now
	consistent; they are all listed in
	<http://suika.fam.cx/gate/2005/sw/Whatpm%20Error%20Types>.

1 wakaba 1.1 package Whatpm::IMTChecker;
2     use strict;
3    
4     ## ISSUE: RFC 2046 is so poorly written specification that
5     ## what we should do is unclear... It's even worse than
6     ## RFC 1521, which contains BNF rules for parameter values.
7    
8     our $Type;
9 wakaba 1.2
10     my $application_xml_charset = { ## TODO: ...
11     syntax => 'token',
12     };
13    
14     $Type->{application}->{registered} = 1;
15    
16     $Type->{application}->{subtype}->{'rdf+xml'} = { # RFC 3870
17     parameter => {
18     charset => $application_xml_charset,
19     },
20     registered => 1,
21    
22     ## RECOMMENDED that an RDF document follows new RDF/XML spec
23     ## rather than 1999 spec - this is not testable in this layer.
24     };
25    
26     $Type->{application}->{subtype}->{'rss+xml'} = {
27     parameter => {
28     },
29     ## NOTE: Not registered
30     };
31    
32     $Type->{audio}->{registered} = 1;
33    
34     $Type->{image}->{registered} = 1;
35    
36     $Type->{message}->{registered} = 1;
37    
38     $Type->{model}->{registered} = 1;
39    
40     $Type->{multipart}->{registered} = 1;
41    
42     $Type->{text}->{registered} = 1;
43    
44 wakaba 1.1 $Type->{text}->{subtype}->{plain} = {
45     parameter => {
46     charset => {syntax => 'token'}, # RFC 2046 ## TODO: registered?
47     'charset-edition' => {}, # RFC 1922
48     'charset-extension' => {syntax => 'token'}, # RFC 1922 ## TODO: registered?
49     },
50 wakaba 1.2 registered => 1,
51 wakaba 1.1 };
52     $Type->{text}->{subtype}->{html} = { # RFC 2854
53     parameter => {
54     charset => {}, ## TODO: UTF-8 is preferred ## TODO: strongly recommended that it always be present ## NOTE: Syntax and range are not defined.
55     level => {obsolete =>1}, # RFC 1866
56     version => {obsolete => 1}, # HTML 3.0
57     },
58 wakaba 1.2 registered => 1,
59 wakaba 1.1 };
60     $Type->{text}->{subtype}->{css} = { # RFC 2318
61     parameter => {
62     charset => {}, ## TODO: US-ASCII, iso-8859-X, utf-8 are recommended ## TODO: Any charset that is a superset of US-ASCII may be used ## NOTE: Syntax and range are not defined.
63     },
64 wakaba 1.2 registered => 1,
65 wakaba 1.1 };
66     $Type->{text}->{subtype}->{javascript} = { # RFC 4329
67     parameter => {
68     charset => {syntax => 'mime-charset'}, ## TODO: SHOULD be registered
69     e4x => {checker => sub { # HTML5 (but informative?)
70     my ($value, $onerror) = @_;
71     unless ($value eq '1') {
72     $onerror->(type => 'value syntax error:e4x', level => 'm');
73 wakaba 1.2 ## NOTE: Whether values other than "1" is non-conformant
74     ## or not is not defined actually...
75 wakaba 1.1 }
76     }},
77     },
78     obsolete => 1,
79 wakaba 1.2 registered => 1,
80 wakaba 1.1 };
81     $Type->{text}->{subtype}->{ecmascript} = { # RFC 4329
82     parameter => {
83     charset => $Type->{text}->{subtype}->{javascript}->{parameter}->{charset},
84     },
85 wakaba 1.2 registered => 1,
86 wakaba 1.1 };
87     $Type->{audio}->{subtype}->{mpeg} = { # RFC 3003
88 wakaba 1.2 registered => 1,
89 wakaba 1.1 };
90     my $CodecsParameter = { # RFC 4281
91     ## TODO: syntax and value check
92     };
93     $Type->{audio}->{subtype}->{'3gpp'} = {
94     parameter => {
95     codecs => $CodecsParameter, # RFC 4281
96     },
97 wakaba 1.2 registered => 1,
98 wakaba 1.1 };
99 wakaba 1.2
100     $Type->{video}->{registered} = 1;
101    
102 wakaba 1.1 $Type->{video}->{subtype}->{'3gpp'} = {
103     parameter => {
104     codecs => $CodecsParameter, # RFC 4281
105     },
106 wakaba 1.2 registered => 1,
107 wakaba 1.1 };
108     $Type->{audio}->{subtype}->{'3gpp2'} = { # RFC 4393
109     parameter => {
110     codecs => $CodecsParameter, # RFC 4393 -> RFC 4281
111     },
112 wakaba 1.2 registered => 1,
113 wakaba 1.1 };
114     $Type->{video}->{subtype}->{'3gpp2'} = { # RFC 4393
115     parameter => {
116     codecs => $CodecsParameter, # RFC 4393 -> RFC 4281
117     },
118 wakaba 1.2 registered => 1,
119 wakaba 1.1 };
120     $Type->{application}->{subtype}->{'octet-stream'} = {
121     parameter => {
122     conversions => {obsolete => 1}, # RFC 1341 ## TODO: syntax
123     name => {obsolete => 1}, # RFC 1341
124     padding => {}, # RFC 2046
125     type => {}, # RFC 2046
126     },
127 wakaba 1.2 registered => 1,
128 wakaba 1.1 };
129     $Type->{application}->{subtype}->{javascript} = { # RFC 4329
130     parameter => {
131     charset => $Type->{text}->{subtype}->{javascript}->{parameter}->{charset},
132     },
133 wakaba 1.2 registered => 1,
134 wakaba 1.1 };
135     $Type->{application}->{subtype}->{ecmascript} = { # RFC 4329
136     parameter => {
137     charset => $Type->{text}->{subtype}->{ecmascript}->{parameter}->{charset},
138     },
139 wakaba 1.2 registered => 1,
140 wakaba 1.1 };
141     $Type->{multipart}->{parameter}->{boundary} = {
142     checker => sub {
143     my ($value, $onerror) = @_;
144     if ($value !~ /\A[0-9A-Za-z'()+_,.\x2F:=?-]{0,69}[0-9A-Za-z'()+_,.\x2F:=?\x20-]\z/) {
145     $onerror->(type => 'value syntax error:boundary', level => 'm');
146     }
147     },
148     required => 1,
149     };
150     $Type->{message}->{subtype}->{partial} = {
151     parameter => {
152     id => {required => 1}, # RFC 2046
153     number => {required => 1}, # RFC 2046
154     total => {}, # RFC 2046 # required for the last fragment
155     },
156 wakaba 1.2 registered => 1,
157 wakaba 1.1 };
158     $Type->{message}->{subtype}->{'external-body'} = {
159     parameter => {
160     'access-type' => {
161     required => 1,
162     syntax => 'token', ## TODO: registry?
163     }, # RFC 2046
164     expiration => {syntax => 'MIME date-time'}, # RFC 2046
165     permission => {}, # RFC 2046
166     size => {}, # RFC 2046
167     ## TODO: access-type dependent parameters
168     },
169 wakaba 1.2 registered => 1,
170 wakaba 1.1 };
171    
172     sub check_imt ($$$$@) {
173     my (undef, $onerror, $type, $subtype, @parameter) = @_;
174    
175     require Message::IMT::InternetMediaType; ## From manakai
176     my $dom = 'Message::DOM::DOMImplementation'; ## ISSUE: This is not a formal way to instantiate it.
177    
178     local $Error::Depth = $Error::Depth + 1;
179    
180     my $imt = $dom->create_internet_media_type ($type, $subtype);
181     while (@parameter) {
182     $imt->add_parameter (shift @parameter => shift @parameter);
183     ## NOTE: Attribute duplication are not error, though its semantics
184     ## is not defined.
185     ## See <http://suika.fam.cx/gate/2005/sw/%E5%AA%92%E4%BD%93%E5%9E%8B/%E5%BC%95%E6%95%B0>.
186     }
187    
188     my $type = $imt->top_level_type;
189     my $subtype = $imt->subtype;
190    
191     my $type_def = $Type->{$type};
192     my $has_param;
193 wakaba 1.2
194     if ($type =~ /^x[-\.]/) { ## TODO: Is there x. tree?
195     $onerror->(type => 'private type', level => 's'); ## TODO: What level?
196     } elsif ($type_def and not $type_def->{registered}) {
197     $onerror->(type => 'unregistered type', level => 's'); ## TODO: What level?
198     }
199    
200 wakaba 1.1 if ($type_def) {
201     my $subtype_def = $type_def->{subtype}->{$subtype};
202 wakaba 1.2
203     if ($subtype =~ /^x[-\.]/) {
204     $onerror->(type => 'private subtype', level => 's'); ## TODO: What level?
205     } elsif ($subtype_def and not $subtype_def->{registered}) {
206     $onerror->(type => 'unregistered subtype', level => 's'); ## TODO: What level?
207     }
208     if ($subtype_def->{obsolete}) {
209     $onerror->(type => 'obsolete subtype', level => 's');
210     }
211    
212 wakaba 1.1 if ($subtype_def) {
213     for (0..$imt->parameter_length-1) {
214     my $attr = $imt->get_attribute ($_);
215     my $value = $imt->get_value ($_);
216     $has_param->{$attr} = 1;
217     my $param_def = $subtype_def->{parameter}->{$attr}
218     || $type_def->{parameter}->{$attr};
219     if ($param_def) {
220     if (defined $param_def->{syntax}) {
221     if ($param_def->{syntax} eq 'mime-charset') { # RFC 2978
222     if ($value =~ /[^A-Za-z0-9!#\x23%&'+^_`{}~-]/) {
223     $onerror->(type => 'value syntax error:'.$attr, level => 'm');
224     }
225     } elsif ($param_def->{syntax} eq 'token') { # RFC 2046
226     if ($value =~ /[^\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]/) {
227     $onerror->(type => 'value syntax error:'.$attr, level => 'm');
228     }
229     }
230     ## TODO: syntax |MIME date-time|
231     if ($param_def->{checker}) {
232     $param_def->{checker}->($value, $onerror);
233     }
234     if ($param_def->{obsolete}) {
235     $onerror->(type => 'obsolete parameter:'.$attr, level => 'm');
236     }
237     }
238     } else {
239 wakaba 1.2 $onerror->(type => 'parameter:'.$attr, level => 'unsupported');
240 wakaba 1.1 }
241     }
242    
243     for (keys %{$subtype_def->{parameter} or {}}) {
244     if ($subtype_def->{parameter}->{$_}->{required} and
245     not $has_param->{$_}) {
246     $onerror->(type => 'parameter missing:'.$_, level => 'm');
247     }
248     }
249     } else {
250 wakaba 1.2 $onerror->(type => 'subtype', level => 'unsupported');
251 wakaba 1.1 }
252    
253     for (keys %{$type_def->{parameter} or {}}) {
254     if ($type_def->{parameter}->{$_}->{required} and
255     not $has_param->{$_}) {
256     $onerror->(type => 'parameter missing:'.$_, level => 'm');
257     }
258     }
259     } else {
260 wakaba 1.2 $onerror->(type => 'type', level => 'unsupported');
261 wakaba 1.1 }
262     } # check_imt
263    
264     1;
265 wakaba 1.2 ## $Date: 2007/05/26 08:12:34 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24