/[suikacvs]/markup/html/whatpm/Whatpm/IMTChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/IMTChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (show annotations) (download)
Sat May 26 08:12:34 2007 UTC (17 years, 5 months ago) by wakaba
Branch: MAIN
++ whatpm/t/ChangeLog	26 May 2007 08:12:26 -0000
2007-05-26  Wakaba  <wakaba@suika.fam.cx>

	* content-model-2.dat: Errors on obsolete media
	type (i.e. |text/javascript|) are added to the expected results.

	* tree-test-1.dat: Tests for |style| elements' attributes
	are added.

++ whatpm/Whatpm/ChangeLog	26 May 2007 08:11:16 -0000
2007-05-26  Wakaba  <wakaba@suika.fam.cx>

	* IMTChecker.pm: New module.

	* ContentChecker.pm ($HTMLIMTAttrChecker): Call IMTChecker
	to test parameter value validity.

	* HTML.pm.src ($style_start_tag): Attributes were
	discarded.

1 package Whatpm::IMTChecker;
2 use strict;
3
4 ## ISSUE: RFC 2046 is so poorly written specification that
5 ## what we should do is unclear... It's even worse than
6 ## RFC 1521, which contains BNF rules for parameter values.
7
8 our $Type;
9 $Type->{text}->{subtype}->{plain} = {
10 parameter => {
11 charset => {syntax => 'token'}, # RFC 2046 ## TODO: registered?
12 'charset-edition' => {}, # RFC 1922
13 'charset-extension' => {syntax => 'token'}, # RFC 1922 ## TODO: registered?
14 },
15 };
16 $Type->{text}->{subtype}->{html} = { # RFC 2854
17 parameter => {
18 charset => {}, ## TODO: UTF-8 is preferred ## TODO: strongly recommended that it always be present ## NOTE: Syntax and range are not defined.
19 level => {obsolete =>1}, # RFC 1866
20 version => {obsolete => 1}, # HTML 3.0
21 },
22 };
23 $Type->{text}->{subtype}->{css} = { # RFC 2318
24 parameter => {
25 charset => {}, ## TODO: US-ASCII, iso-8859-X, utf-8 are recommended ## TODO: Any charset that is a superset of US-ASCII may be used ## NOTE: Syntax and range are not defined.
26 },
27 };
28 $Type->{text}->{subtype}->{javascript} = { # RFC 4329
29 parameter => {
30 charset => {syntax => 'mime-charset'}, ## TODO: SHOULD be registered
31 e4x => {checker => sub { # HTML5 (but informative?)
32 my ($value, $onerror) = @_;
33 unless ($value eq '1') {
34 $onerror->(type => 'value syntax error:e4x', level => 'm');
35 }
36 }},
37 },
38 obsolete => 1,
39 };
40 $Type->{text}->{subtype}->{ecmascript} = { # RFC 4329
41 parameter => {
42 charset => $Type->{text}->{subtype}->{javascript}->{parameter}->{charset},
43 },
44 };
45 $Type->{audio}->{subtype}->{mpeg} = { # RFC 3003
46 };
47 my $CodecsParameter = { # RFC 4281
48 ## TODO: syntax and value check
49 };
50 $Type->{audio}->{subtype}->{'3gpp'} = {
51 parameter => {
52 codecs => $CodecsParameter, # RFC 4281
53 },
54 };
55 $Type->{video}->{subtype}->{'3gpp'} = {
56 parameter => {
57 codecs => $CodecsParameter, # RFC 4281
58 },
59 };
60 $Type->{audio}->{subtype}->{'3gpp2'} = { # RFC 4393
61 parameter => {
62 codecs => $CodecsParameter, # RFC 4393 -> RFC 4281
63 },
64 };
65 $Type->{video}->{subtype}->{'3gpp2'} = { # RFC 4393
66 parameter => {
67 codecs => $CodecsParameter, # RFC 4393 -> RFC 4281
68 },
69 };
70 $Type->{application}->{subtype}->{'octet-stream'} = {
71 parameter => {
72 conversions => {obsolete => 1}, # RFC 1341 ## TODO: syntax
73 name => {obsolete => 1}, # RFC 1341
74 padding => {}, # RFC 2046
75 type => {}, # RFC 2046
76 },
77 };
78 $Type->{application}->{subtype}->{javascript} = { # RFC 4329
79 parameter => {
80 charset => $Type->{text}->{subtype}->{javascript}->{parameter}->{charset},
81 },
82 };
83 $Type->{application}->{subtype}->{ecmascript} = { # RFC 4329
84 parameter => {
85 charset => $Type->{text}->{subtype}->{ecmascript}->{parameter}->{charset},
86 },
87 };
88 $Type->{multipart}->{parameter}->{boundary} = {
89 checker => sub {
90 my ($value, $onerror) = @_;
91 if ($value !~ /\A[0-9A-Za-z'()+_,.\x2F:=?-]{0,69}[0-9A-Za-z'()+_,.\x2F:=?\x20-]\z/) {
92 $onerror->(type => 'value syntax error:boundary', level => 'm');
93 }
94 },
95 required => 1,
96 };
97 $Type->{message}->{subtype}->{partial} = {
98 parameter => {
99 id => {required => 1}, # RFC 2046
100 number => {required => 1}, # RFC 2046
101 total => {}, # RFC 2046 # required for the last fragment
102 },
103 };
104 $Type->{message}->{subtype}->{'external-body'} = {
105 parameter => {
106 'access-type' => {
107 required => 1,
108 syntax => 'token', ## TODO: registry?
109 }, # RFC 2046
110 expiration => {syntax => 'MIME date-time'}, # RFC 2046
111 permission => {}, # RFC 2046
112 size => {}, # RFC 2046
113 ## TODO: access-type dependent parameters
114 },
115 };
116
117 sub check_imt ($$$$@) {
118 my (undef, $onerror, $type, $subtype, @parameter) = @_;
119
120 require Message::IMT::InternetMediaType; ## From manakai
121 my $dom = 'Message::DOM::DOMImplementation'; ## ISSUE: This is not a formal way to instantiate it.
122
123 local $Error::Depth = $Error::Depth + 1;
124
125 my $imt = $dom->create_internet_media_type ($type, $subtype);
126 while (@parameter) {
127 $imt->add_parameter (shift @parameter => shift @parameter);
128 ## NOTE: Attribute duplication are not error, though its semantics
129 ## is not defined.
130 ## See <http://suika.fam.cx/gate/2005/sw/%E5%AA%92%E4%BD%93%E5%9E%8B/%E5%BC%95%E6%95%B0>.
131 }
132
133 my $type = $imt->top_level_type;
134 my $subtype = $imt->subtype;
135
136 my $type_def = $Type->{$type};
137 my $has_param;
138 if ($type_def) {
139 my $subtype_def = $type_def->{subtype}->{$subtype};
140 if ($subtype_def) {
141 for (0..$imt->parameter_length-1) {
142 my $attr = $imt->get_attribute ($_);
143 my $value = $imt->get_value ($_);
144 $has_param->{$attr} = 1;
145 my $param_def = $subtype_def->{parameter}->{$attr}
146 || $type_def->{parameter}->{$attr};
147 if ($param_def) {
148 if (defined $param_def->{syntax}) {
149 if ($param_def->{syntax} eq 'mime-charset') { # RFC 2978
150 if ($value =~ /[^A-Za-z0-9!#\x23%&'+^_`{}~-]/) {
151 $onerror->(type => 'value syntax error:'.$attr, level => 'm');
152 }
153 } elsif ($param_def->{syntax} eq 'token') { # RFC 2046
154 if ($value =~ /[^\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]/) {
155 $onerror->(type => 'value syntax error:'.$attr, level => 'm');
156 }
157 }
158 ## TODO: syntax |MIME date-time|
159 if ($param_def->{checker}) {
160 $param_def->{checker}->($value, $onerror);
161 }
162 if ($param_def->{obsolete}) {
163 $onerror->(type => 'obsolete parameter:'.$attr, level => 'm');
164 }
165 }
166 } else {
167 $onerror->(type => 'parameter not supported:'.$attr, level => 'w');
168 }
169 }
170
171 for (keys %{$subtype_def->{parameter} or {}}) {
172 if ($subtype_def->{parameter}->{$_}->{required} and
173 not $has_param->{$_}) {
174 $onerror->(type => 'parameter missing:'.$_, level => 'm');
175 }
176 }
177
178 if ($subtype_def->{obsolete}) {
179 $onerror->(type => 'obsolete subtype', level => 's');
180 }
181 } else {
182 $onerror->(type => 'subtype not supported', level => 'w');
183 }
184
185 for (keys %{$type_def->{parameter} or {}}) {
186 if ($type_def->{parameter}->{$_}->{required} and
187 not $has_param->{$_}) {
188 $onerror->(type => 'parameter missing:'.$_, level => 'm');
189 }
190 }
191 } else {
192 $onerror->(type => 'type not supported', level => 'w');
193 }
194 ## TODO: registered?
195 } # check_imt
196
197 1;
198 ## $Date: 2007/05/25 14:16:29 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24