/[suikacvs]/markup/html/whatpm/Whatpm/IMTChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/IMTChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (hide annotations) (download)
Sat May 26 08:12:34 2007 UTC (17 years, 5 months ago) by wakaba
Branch: MAIN
++ whatpm/t/ChangeLog	26 May 2007 08:12:26 -0000
2007-05-26  Wakaba  <wakaba@suika.fam.cx>

	* content-model-2.dat: Errors on obsolete media
	type (i.e. |text/javascript|) are added to the expected results.

	* tree-test-1.dat: Tests for |style| elements' attributes
	are added.

++ whatpm/Whatpm/ChangeLog	26 May 2007 08:11:16 -0000
2007-05-26  Wakaba  <wakaba@suika.fam.cx>

	* IMTChecker.pm: New module.

	* ContentChecker.pm ($HTMLIMTAttrChecker): Call IMTChecker
	to test parameter value validity.

	* HTML.pm.src ($style_start_tag): Attributes were
	discarded.

1 wakaba 1.1 package Whatpm::IMTChecker;
2     use strict;
3    
4     ## ISSUE: RFC 2046 is so poorly written specification that
5     ## what we should do is unclear... It's even worse than
6     ## RFC 1521, which contains BNF rules for parameter values.
7    
8     our $Type;
9     $Type->{text}->{subtype}->{plain} = {
10     parameter => {
11     charset => {syntax => 'token'}, # RFC 2046 ## TODO: registered?
12     'charset-edition' => {}, # RFC 1922
13     'charset-extension' => {syntax => 'token'}, # RFC 1922 ## TODO: registered?
14     },
15     };
16     $Type->{text}->{subtype}->{html} = { # RFC 2854
17     parameter => {
18     charset => {}, ## TODO: UTF-8 is preferred ## TODO: strongly recommended that it always be present ## NOTE: Syntax and range are not defined.
19     level => {obsolete =>1}, # RFC 1866
20     version => {obsolete => 1}, # HTML 3.0
21     },
22     };
23     $Type->{text}->{subtype}->{css} = { # RFC 2318
24     parameter => {
25     charset => {}, ## TODO: US-ASCII, iso-8859-X, utf-8 are recommended ## TODO: Any charset that is a superset of US-ASCII may be used ## NOTE: Syntax and range are not defined.
26     },
27     };
28     $Type->{text}->{subtype}->{javascript} = { # RFC 4329
29     parameter => {
30     charset => {syntax => 'mime-charset'}, ## TODO: SHOULD be registered
31     e4x => {checker => sub { # HTML5 (but informative?)
32     my ($value, $onerror) = @_;
33     unless ($value eq '1') {
34     $onerror->(type => 'value syntax error:e4x', level => 'm');
35     }
36     }},
37     },
38     obsolete => 1,
39     };
40     $Type->{text}->{subtype}->{ecmascript} = { # RFC 4329
41     parameter => {
42     charset => $Type->{text}->{subtype}->{javascript}->{parameter}->{charset},
43     },
44     };
45     $Type->{audio}->{subtype}->{mpeg} = { # RFC 3003
46     };
47     my $CodecsParameter = { # RFC 4281
48     ## TODO: syntax and value check
49     };
50     $Type->{audio}->{subtype}->{'3gpp'} = {
51     parameter => {
52     codecs => $CodecsParameter, # RFC 4281
53     },
54     };
55     $Type->{video}->{subtype}->{'3gpp'} = {
56     parameter => {
57     codecs => $CodecsParameter, # RFC 4281
58     },
59     };
60     $Type->{audio}->{subtype}->{'3gpp2'} = { # RFC 4393
61     parameter => {
62     codecs => $CodecsParameter, # RFC 4393 -> RFC 4281
63     },
64     };
65     $Type->{video}->{subtype}->{'3gpp2'} = { # RFC 4393
66     parameter => {
67     codecs => $CodecsParameter, # RFC 4393 -> RFC 4281
68     },
69     };
70     $Type->{application}->{subtype}->{'octet-stream'} = {
71     parameter => {
72     conversions => {obsolete => 1}, # RFC 1341 ## TODO: syntax
73     name => {obsolete => 1}, # RFC 1341
74     padding => {}, # RFC 2046
75     type => {}, # RFC 2046
76     },
77     };
78     $Type->{application}->{subtype}->{javascript} = { # RFC 4329
79     parameter => {
80     charset => $Type->{text}->{subtype}->{javascript}->{parameter}->{charset},
81     },
82     };
83     $Type->{application}->{subtype}->{ecmascript} = { # RFC 4329
84     parameter => {
85     charset => $Type->{text}->{subtype}->{ecmascript}->{parameter}->{charset},
86     },
87     };
88     $Type->{multipart}->{parameter}->{boundary} = {
89     checker => sub {
90     my ($value, $onerror) = @_;
91     if ($value !~ /\A[0-9A-Za-z'()+_,.\x2F:=?-]{0,69}[0-9A-Za-z'()+_,.\x2F:=?\x20-]\z/) {
92     $onerror->(type => 'value syntax error:boundary', level => 'm');
93     }
94     },
95     required => 1,
96     };
97     $Type->{message}->{subtype}->{partial} = {
98     parameter => {
99     id => {required => 1}, # RFC 2046
100     number => {required => 1}, # RFC 2046
101     total => {}, # RFC 2046 # required for the last fragment
102     },
103     };
104     $Type->{message}->{subtype}->{'external-body'} = {
105     parameter => {
106     'access-type' => {
107     required => 1,
108     syntax => 'token', ## TODO: registry?
109     }, # RFC 2046
110     expiration => {syntax => 'MIME date-time'}, # RFC 2046
111     permission => {}, # RFC 2046
112     size => {}, # RFC 2046
113     ## TODO: access-type dependent parameters
114     },
115     };
116    
117     sub check_imt ($$$$@) {
118     my (undef, $onerror, $type, $subtype, @parameter) = @_;
119    
120     require Message::IMT::InternetMediaType; ## From manakai
121     my $dom = 'Message::DOM::DOMImplementation'; ## ISSUE: This is not a formal way to instantiate it.
122    
123     local $Error::Depth = $Error::Depth + 1;
124    
125     my $imt = $dom->create_internet_media_type ($type, $subtype);
126     while (@parameter) {
127     $imt->add_parameter (shift @parameter => shift @parameter);
128     ## NOTE: Attribute duplication are not error, though its semantics
129     ## is not defined.
130     ## See <http://suika.fam.cx/gate/2005/sw/%E5%AA%92%E4%BD%93%E5%9E%8B/%E5%BC%95%E6%95%B0>.
131     }
132    
133     my $type = $imt->top_level_type;
134     my $subtype = $imt->subtype;
135    
136     my $type_def = $Type->{$type};
137     my $has_param;
138     if ($type_def) {
139     my $subtype_def = $type_def->{subtype}->{$subtype};
140     if ($subtype_def) {
141     for (0..$imt->parameter_length-1) {
142     my $attr = $imt->get_attribute ($_);
143     my $value = $imt->get_value ($_);
144     $has_param->{$attr} = 1;
145     my $param_def = $subtype_def->{parameter}->{$attr}
146     || $type_def->{parameter}->{$attr};
147     if ($param_def) {
148     if (defined $param_def->{syntax}) {
149     if ($param_def->{syntax} eq 'mime-charset') { # RFC 2978
150     if ($value =~ /[^A-Za-z0-9!#\x23%&'+^_`{}~-]/) {
151     $onerror->(type => 'value syntax error:'.$attr, level => 'm');
152     }
153     } elsif ($param_def->{syntax} eq 'token') { # RFC 2046
154     if ($value =~ /[^\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]/) {
155     $onerror->(type => 'value syntax error:'.$attr, level => 'm');
156     }
157     }
158     ## TODO: syntax |MIME date-time|
159     if ($param_def->{checker}) {
160     $param_def->{checker}->($value, $onerror);
161     }
162     if ($param_def->{obsolete}) {
163     $onerror->(type => 'obsolete parameter:'.$attr, level => 'm');
164     }
165     }
166     } else {
167     $onerror->(type => 'parameter not supported:'.$attr, level => 'w');
168     }
169     }
170    
171     for (keys %{$subtype_def->{parameter} or {}}) {
172     if ($subtype_def->{parameter}->{$_}->{required} and
173     not $has_param->{$_}) {
174     $onerror->(type => 'parameter missing:'.$_, level => 'm');
175     }
176     }
177    
178     if ($subtype_def->{obsolete}) {
179     $onerror->(type => 'obsolete subtype', level => 's');
180     }
181     } else {
182     $onerror->(type => 'subtype not supported', level => 'w');
183     }
184    
185     for (keys %{$type_def->{parameter} or {}}) {
186     if ($type_def->{parameter}->{$_}->{required} and
187     not $has_param->{$_}) {
188     $onerror->(type => 'parameter missing:'.$_, level => 'm');
189     }
190     }
191     } else {
192     $onerror->(type => 'type not supported', level => 'w');
193     }
194     ## TODO: registered?
195     } # check_imt
196    
197     1;
198     ## $Date: 2007/05/25 14:16:29 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24