1 |
wakaba |
1.1 |
package Whatpm::CSS::Parser; |
2 |
|
|
use strict; |
3 |
|
|
use Whatpm::CSS::Tokenizer qw(:token); |
4 |
|
|
require Whatpm::CSS::SelectorsParser; |
5 |
|
|
|
6 |
|
|
sub new ($) { |
7 |
wakaba |
1.3 |
my $self = bless {onerror => sub { }, must_level => 'm', |
8 |
wakaba |
1.5 |
message_level => 'w', |
9 |
wakaba |
1.3 |
unsupported_level => 'unsupported'}, shift; |
10 |
wakaba |
1.1 |
|
11 |
|
|
return $self; |
12 |
|
|
} # new |
13 |
|
|
|
14 |
|
|
sub BEFORE_STATEMENT_STATE () { 0 } |
15 |
|
|
sub BEFORE_DECLARATION_STATE () { 1 } |
16 |
|
|
sub IGNORED_STATEMENT_STATE () { 2 } |
17 |
|
|
sub IGNORED_DECLARATION_STATE () { 3 } |
18 |
|
|
|
19 |
wakaba |
1.5 |
our $Prop; ## By CSS property name |
20 |
|
|
our $Attr; ## By CSSOM attribute name |
21 |
|
|
our $Key; ## By internal key |
22 |
|
|
|
23 |
wakaba |
1.1 |
sub parse_char_string ($$) { |
24 |
|
|
my $self = $_[0]; |
25 |
|
|
|
26 |
|
|
my $s = $_[1]; |
27 |
|
|
pos ($s) = 0; |
28 |
wakaba |
1.2 |
my $line = 1; |
29 |
|
|
my $column = 0; |
30 |
|
|
|
31 |
|
|
my $_onerror = $self->{onerror}; |
32 |
|
|
my $onerror = sub { |
33 |
|
|
$_onerror->(@_, line => $line, column => $column); |
34 |
|
|
}; |
35 |
wakaba |
1.1 |
|
36 |
|
|
my $tt = Whatpm::CSS::Tokenizer->new; |
37 |
wakaba |
1.2 |
$tt->{onerror} = $onerror; |
38 |
wakaba |
1.1 |
$tt->{get_char} = sub { |
39 |
|
|
if (pos $s < length $s) { |
40 |
wakaba |
1.2 |
my $c = ord substr $s, pos ($s)++, 1; |
41 |
|
|
if ($c == 0x000A) { |
42 |
|
|
$line++; |
43 |
|
|
$column = 0; |
44 |
|
|
} elsif ($c == 0x000D) { |
45 |
|
|
unless (substr ($s, pos ($s), 1) eq "\x0A") { |
46 |
|
|
$line++; |
47 |
|
|
$column = 0; |
48 |
|
|
} else { |
49 |
|
|
$column++; |
50 |
|
|
} |
51 |
|
|
} else { |
52 |
|
|
$column++; |
53 |
|
|
} |
54 |
|
|
return $c; |
55 |
wakaba |
1.1 |
} else { |
56 |
|
|
return -1; |
57 |
|
|
} |
58 |
|
|
}; # $tt->{get_char} |
59 |
|
|
$tt->init; |
60 |
|
|
|
61 |
|
|
my $sp = Whatpm::CSS::SelectorsParser->new; |
62 |
wakaba |
1.2 |
$sp->{onerror} = $onerror; |
63 |
wakaba |
1.1 |
$sp->{must_level} = $self->{must_level}; |
64 |
wakaba |
1.2 |
$sp->{pseudo_element} = $self->{pseudo_element}; |
65 |
|
|
$sp->{pseudo_class} = $self->{pseudo_class}; |
66 |
wakaba |
1.1 |
|
67 |
wakaba |
1.4 |
my $nsmap = {}; |
68 |
|
|
$sp->{lookup_namespace_uri} = sub { |
69 |
|
|
return $nsmap->{$_[0]}; # $_[0] is '' (default namespace) or prefix |
70 |
|
|
}; # $sp->{lookup_namespace_uri} |
71 |
wakaba |
1.1 |
|
72 |
|
|
## TODO: Supported pseudo classes and elements... |
73 |
|
|
|
74 |
|
|
require Message::DOM::CSSStyleSheet; |
75 |
|
|
require Message::DOM::CSSRule; |
76 |
|
|
require Message::DOM::CSSStyleDeclaration; |
77 |
|
|
|
78 |
|
|
my $state = BEFORE_STATEMENT_STATE; |
79 |
|
|
my $t = $tt->get_next_token; |
80 |
|
|
|
81 |
|
|
my $open_rules = [[]]; |
82 |
|
|
my $current_rules = $open_rules->[-1]; |
83 |
|
|
my $current_decls; |
84 |
|
|
my $closing_tokens = []; |
85 |
wakaba |
1.3 |
my $charset_allowed = 1; |
86 |
wakaba |
1.4 |
my $namespace_allowed = 1; |
87 |
wakaba |
1.1 |
|
88 |
|
|
S: { |
89 |
|
|
if ($state == BEFORE_STATEMENT_STATE) { |
90 |
|
|
$t = $tt->get_next_token |
91 |
|
|
while $t->{type} == S_TOKEN or |
92 |
|
|
$t->{type} == CDO_TOKEN or |
93 |
|
|
$t->{type} == CDC_TOKEN; |
94 |
|
|
|
95 |
|
|
if ($t->{type} == ATKEYWORD_TOKEN) { |
96 |
wakaba |
1.5 |
if (lc $t->{value} eq 'namespace') { ## TODO: case folding |
97 |
wakaba |
1.4 |
$t = $tt->get_next_token; |
98 |
|
|
$t = $tt->get_next_token while $t->{type} == S_TOKEN; |
99 |
|
|
|
100 |
|
|
my $prefix; |
101 |
|
|
if ($t->{type} == IDENT_TOKEN) { |
102 |
|
|
$prefix = lc $t->{value}; |
103 |
|
|
## TODO: Unicode lowercase |
104 |
|
|
|
105 |
|
|
$t = $tt->get_next_token; |
106 |
|
|
$t = $tt->get_next_token while $t->{type} == S_TOKEN; |
107 |
|
|
} |
108 |
|
|
|
109 |
|
|
if ($t->{type} == STRING_TOKEN or $t->{type} == URI_TOKEN) { |
110 |
|
|
my $uri = $t->{value}; |
111 |
|
|
|
112 |
|
|
$t = $tt->get_next_token; |
113 |
|
|
$t = $tt->get_next_token while $t->{type} == S_TOKEN; |
114 |
|
|
|
115 |
|
|
## ISSUE: On handling of empty namespace URI, Firefox 2 and |
116 |
|
|
## Opera 9 work differently (See SuikaWiki:namespace). |
117 |
|
|
## TODO: We need to check what we do once it is specced. |
118 |
|
|
|
119 |
|
|
if ($t->{type} == SEMICOLON_TOKEN) { |
120 |
|
|
if ($namespace_allowed) { |
121 |
|
|
$nsmap->{defined $prefix ? $prefix : ''} = $uri; |
122 |
|
|
push @$current_rules, |
123 |
|
|
Message::DOM::CSSNamespaceRule->____new ($prefix, $uri); |
124 |
|
|
undef $charset_allowed; |
125 |
|
|
undef $namespace_allowed; |
126 |
|
|
} else { |
127 |
|
|
$onerror->(type => 'at:namespace:not allowed', |
128 |
|
|
level => $self->{must_level}, |
129 |
|
|
token => $t); |
130 |
|
|
} |
131 |
|
|
|
132 |
|
|
$t = $tt->get_next_token; |
133 |
|
|
## Stay in the state. |
134 |
|
|
redo S; |
135 |
|
|
} else { |
136 |
|
|
# |
137 |
|
|
} |
138 |
|
|
} else { |
139 |
|
|
# |
140 |
|
|
} |
141 |
|
|
|
142 |
|
|
$onerror->(type => 'syntax error:at:namespace', |
143 |
|
|
level => $self->{must_level}, |
144 |
|
|
token => $t); |
145 |
|
|
# |
146 |
wakaba |
1.5 |
} elsif (lc $t->{value} eq 'charset') { ## TODO: case folding |
147 |
wakaba |
1.3 |
$t = $tt->get_next_token; |
148 |
|
|
$t = $tt->get_next_token while $t->{type} == S_TOKEN; |
149 |
|
|
|
150 |
|
|
if ($t->{type} == STRING_TOKEN) { |
151 |
|
|
my $encoding = $t->{value}; |
152 |
|
|
|
153 |
|
|
$t = $tt->get_next_token; |
154 |
|
|
$t = $tt->get_next_token while $t->{type} == S_TOKEN; |
155 |
|
|
|
156 |
|
|
if ($t->{type} == SEMICOLON_TOKEN) { |
157 |
|
|
if ($charset_allowed) { |
158 |
|
|
push @$current_rules, |
159 |
|
|
Message::DOM::CSSCharsetRule->____new ($encoding); |
160 |
|
|
undef $charset_allowed; |
161 |
|
|
} else { |
162 |
|
|
$onerror->(type => 'at:charset:not allowed', |
163 |
|
|
level => $self->{must_level}, |
164 |
|
|
token => $t); |
165 |
|
|
} |
166 |
|
|
|
167 |
|
|
## TODO: Detect the conformance errors for @charset... |
168 |
|
|
|
169 |
|
|
$t = $tt->get_next_token; |
170 |
|
|
## Stay in the state. |
171 |
|
|
redo S; |
172 |
|
|
} else { |
173 |
|
|
# |
174 |
|
|
} |
175 |
|
|
} else { |
176 |
|
|
# |
177 |
|
|
} |
178 |
|
|
|
179 |
|
|
$onerror->(type => 'syntax error:at:charset', |
180 |
|
|
level => $self->{must_level}, |
181 |
|
|
token => $t); |
182 |
wakaba |
1.4 |
# |
183 |
wakaba |
1.3 |
## NOTE: When adding support for new at-rule, insert code |
184 |
wakaba |
1.4 |
## "undef $charset_allowed" and "undef $namespace_token" as |
185 |
|
|
## appropriate. |
186 |
wakaba |
1.3 |
} else { |
187 |
|
|
$onerror->(type => 'not supported:at:'.$t->{value}, |
188 |
|
|
level => $self->{unsupported_level}, |
189 |
|
|
token => $t); |
190 |
|
|
} |
191 |
wakaba |
1.1 |
|
192 |
|
|
$t = $tt->get_next_token; |
193 |
|
|
$state = IGNORED_STATEMENT_STATE; |
194 |
|
|
redo S; |
195 |
|
|
} elsif (@$open_rules > 1 and $t->{type} == RBRACE_TOKEN) { |
196 |
|
|
pop @$open_rules; |
197 |
|
|
## Stay in the state. |
198 |
|
|
$t = $tt->get_next_token; |
199 |
|
|
redo S; |
200 |
|
|
} elsif ($t->{type} == EOF_TOKEN) { |
201 |
|
|
if (@$open_rules > 1) { |
202 |
wakaba |
1.2 |
$onerror->(type => 'syntax error:block not closed', |
203 |
|
|
level => $self->{must_level}, |
204 |
|
|
token => $t); |
205 |
wakaba |
1.1 |
} |
206 |
|
|
|
207 |
|
|
last S; |
208 |
|
|
} else { |
209 |
wakaba |
1.3 |
undef $charset_allowed; |
210 |
wakaba |
1.4 |
undef $namespace_allowed; |
211 |
wakaba |
1.3 |
|
212 |
wakaba |
1.1 |
($t, my $selectors) = $sp->_parse_selectors_with_tokenizer |
213 |
|
|
($tt, LBRACE_TOKEN, $t); |
214 |
|
|
|
215 |
|
|
$t = $tt->get_next_token |
216 |
|
|
while $t->{type} != LBRACE_TOKEN and $t->{type} != EOF_TOKEN; |
217 |
|
|
|
218 |
|
|
if ($t->{type} == LBRACE_TOKEN) { |
219 |
|
|
$current_decls = Message::DOM::CSSStyleDeclaration->____new; |
220 |
|
|
my $rs = Message::DOM::CSSStyleRule->____new |
221 |
|
|
($selectors, $current_decls); |
222 |
|
|
push @{$current_rules}, $rs if defined $selectors; |
223 |
|
|
|
224 |
|
|
$state = BEFORE_DECLARATION_STATE; |
225 |
|
|
$t = $tt->get_next_token; |
226 |
|
|
redo S; |
227 |
|
|
} else { |
228 |
wakaba |
1.2 |
$onerror->(type => 'syntax error:after selectors', |
229 |
|
|
level => $self->{must_level}, |
230 |
|
|
token => $t); |
231 |
wakaba |
1.1 |
|
232 |
|
|
## Stay in the state. |
233 |
|
|
$t = $tt->get_next_token; |
234 |
|
|
redo S; |
235 |
|
|
} |
236 |
|
|
} |
237 |
|
|
} elsif ($state == BEFORE_DECLARATION_STATE) { |
238 |
|
|
## NOTE: DELIM? in declaration will be removed: |
239 |
|
|
## <http://csswg.inkedblade.net/spec/css2.1?s=declaration%20delim#issue-2>. |
240 |
|
|
|
241 |
wakaba |
1.5 |
my $prop_def; |
242 |
|
|
my $prop_value; |
243 |
|
|
my $prop_flag; |
244 |
wakaba |
1.1 |
$t = $tt->get_next_token while $t->{type} == S_TOKEN; |
245 |
|
|
if ($t->{type} == IDENT_TOKEN) { # property |
246 |
wakaba |
1.5 |
my $prop_name = lc $t->{value}; ## TODO: case folding |
247 |
|
|
$t = $tt->get_next_token; |
248 |
|
|
if ($t->{type} == COLON_TOKEN) { |
249 |
|
|
$t = $tt->get_next_token; |
250 |
|
|
$t = $tt->get_next_token while $t->{type} == S_TOKEN; |
251 |
|
|
|
252 |
|
|
$prop_def = $Prop->{$prop_name}; |
253 |
|
|
if ($prop_def) { |
254 |
|
|
($t, $prop_value) |
255 |
|
|
= $prop_def->{parse}->($self, $prop_name, $tt, $t, $onerror); |
256 |
|
|
if ($prop_value) { |
257 |
|
|
## NOTE: {parse} don't have to consume trailing spaces. |
258 |
|
|
$t = $tt->get_next_token while $t->{type} == S_TOKEN; |
259 |
|
|
|
260 |
|
|
if ($t->{type} == EXCLAMATION_TOKEN) { |
261 |
|
|
$t = $tt->get_next_token; |
262 |
|
|
$t = $tt->get_next_token while $t->{type} == S_TOKEN; |
263 |
|
|
if ($t->{type} == IDENT_TOKEN and |
264 |
|
|
lc $t->{value} eq 'important') { ## TODO: case folding |
265 |
|
|
$prop_flag = 'important'; |
266 |
|
|
|
267 |
|
|
$t = $tt->get_next_token; |
268 |
|
|
$t = $tt->get_next_token while $t->{type} == S_TOKEN; |
269 |
|
|
|
270 |
|
|
# |
271 |
|
|
} else { |
272 |
|
|
$onerror->(type => 'syntax error:important', |
273 |
|
|
level => $self->{must_level}, |
274 |
|
|
token => $t); |
275 |
|
|
|
276 |
|
|
## Reprocess. |
277 |
|
|
$state = IGNORED_DECLARATION_STATE; |
278 |
|
|
redo S; |
279 |
|
|
} |
280 |
|
|
} |
281 |
|
|
|
282 |
|
|
# |
283 |
|
|
} else { |
284 |
|
|
## Syntax error. |
285 |
|
|
|
286 |
|
|
## Reprocess. |
287 |
|
|
$state = IGNORED_DECLARATION_STATE; |
288 |
|
|
redo S; |
289 |
|
|
} |
290 |
|
|
} else { |
291 |
|
|
$onerror->(type => 'not supported:property', |
292 |
|
|
level => $self->{unsupported_level}, |
293 |
|
|
token => $t, value => $prop_name); |
294 |
|
|
|
295 |
|
|
# |
296 |
|
|
$state = IGNORED_DECLARATION_STATE; |
297 |
|
|
redo S; |
298 |
|
|
} |
299 |
|
|
} else { |
300 |
|
|
$onerror->(type => 'syntax error:property colon', |
301 |
|
|
level => $self->{must_level}, |
302 |
|
|
token => $t); |
303 |
wakaba |
1.1 |
|
304 |
wakaba |
1.5 |
# |
305 |
|
|
$state = IGNORED_DECLARATION_STATE; |
306 |
|
|
redo S; |
307 |
|
|
} |
308 |
|
|
} |
309 |
|
|
|
310 |
|
|
if ($t->{type} == RBRACE_TOKEN) { |
311 |
wakaba |
1.1 |
$t = $tt->get_next_token; |
312 |
wakaba |
1.5 |
$state = BEFORE_STATEMENT_STATE; |
313 |
|
|
#redo S; |
314 |
|
|
} elsif ($t->{type} == SEMICOLON_TOKEN) { |
315 |
wakaba |
1.1 |
$t = $tt->get_next_token; |
316 |
wakaba |
1.5 |
## Stay in the state. |
317 |
|
|
#redo S; |
318 |
wakaba |
1.1 |
} elsif ($t->{type} == EOF_TOKEN) { |
319 |
wakaba |
1.2 |
$onerror->(type => 'syntax error:ruleset not closed', |
320 |
|
|
level => $self->{must_level}, |
321 |
|
|
token => $t); |
322 |
wakaba |
1.1 |
## Reprocess. |
323 |
|
|
$state = BEFORE_STATEMENT_STATE; |
324 |
wakaba |
1.5 |
#redo S; |
325 |
|
|
} else { |
326 |
|
|
if ($prop_value) { |
327 |
|
|
$onerror->(type => 'syntax error:property semicolon', |
328 |
|
|
level => $self->{must_level}, |
329 |
|
|
token => $t); |
330 |
|
|
} else { |
331 |
|
|
$onerror->(type => 'syntax error:property name', |
332 |
|
|
level => $self->{must_level}, |
333 |
|
|
token => $t); |
334 |
|
|
} |
335 |
|
|
|
336 |
|
|
# |
337 |
|
|
$state = IGNORED_DECLARATION_STATE; |
338 |
wakaba |
1.1 |
redo S; |
339 |
|
|
} |
340 |
|
|
|
341 |
wakaba |
1.5 |
if ($prop_value) { |
342 |
|
|
$$current_decls->{$prop_def->{key}} = [$prop_value, $prop_flag]; |
343 |
|
|
} |
344 |
wakaba |
1.1 |
redo S; |
345 |
|
|
} elsif ($state == IGNORED_STATEMENT_STATE or |
346 |
|
|
$state == IGNORED_DECLARATION_STATE) { |
347 |
|
|
if (@$closing_tokens) { ## Something is yet in opening state. |
348 |
|
|
if ($t->{type} == EOF_TOKEN) { |
349 |
|
|
@$closing_tokens = (); |
350 |
|
|
## Reprocess. |
351 |
|
|
$state = $state == IGNORED_STATEMENT_STATE |
352 |
|
|
? BEFORE_STATEMENT_STATE : BEFORE_DECLARATION_STATE; |
353 |
|
|
redo S; |
354 |
|
|
} elsif ($t->{type} == $closing_tokens->[-1]) { |
355 |
|
|
pop @$closing_tokens; |
356 |
|
|
if (@$closing_tokens == 0 and |
357 |
|
|
$t->{type} == RBRACE_TOKEN and |
358 |
|
|
$state == IGNORED_STATEMENT_STATE) { |
359 |
|
|
$t = $tt->get_next_token; |
360 |
|
|
$state = BEFORE_STATEMENT_STATE; |
361 |
|
|
redo S; |
362 |
|
|
} else { |
363 |
|
|
$t = $tt->get_next_token; |
364 |
|
|
## Stay in the state. |
365 |
|
|
redo S; |
366 |
|
|
} |
367 |
|
|
} else { |
368 |
|
|
# |
369 |
|
|
} |
370 |
|
|
} else { |
371 |
|
|
if ($t->{type} == SEMICOLON_TOKEN) { |
372 |
|
|
$t = $tt->get_next_token; |
373 |
|
|
$state = $state == IGNORED_STATEMENT_STATE |
374 |
|
|
? BEFORE_STATEMENT_STATE : BEFORE_DECLARATION_STATE; |
375 |
|
|
redo S; |
376 |
|
|
} elsif ($state == IGNORED_DECLARATION_STATE and |
377 |
|
|
$t->{type} == RBRACE_TOKEN) { |
378 |
|
|
$t = $tt->get_next_token; |
379 |
|
|
$state = BEFORE_STATEMENT_STATE; |
380 |
|
|
redo S; |
381 |
|
|
} elsif ($t->{type} == EOF_TOKEN) { |
382 |
|
|
## Reprocess. |
383 |
|
|
$state = $state == IGNORED_STATEMENT_STATE |
384 |
|
|
? BEFORE_STATEMENT_STATE : BEFORE_DECLARATION_STATE; |
385 |
|
|
redo S; |
386 |
|
|
} else { |
387 |
|
|
# |
388 |
|
|
} |
389 |
|
|
} |
390 |
|
|
|
391 |
|
|
while (not { |
392 |
|
|
EOF_TOKEN, 1, |
393 |
|
|
RBRACE_TOKEN, 1, |
394 |
|
|
RBRACKET_TOKEN, 1, |
395 |
|
|
RPAREN_TOKEN, 1, |
396 |
|
|
SEMICOLON_TOKEN, 1, |
397 |
|
|
}->{$t->{type}}) { |
398 |
|
|
if ($t->{type} == LBRACE_TOKEN) { |
399 |
|
|
push @$closing_tokens, RBRACE_TOKEN; |
400 |
|
|
} elsif ($t->{type} == LBRACKET_TOKEN) { |
401 |
|
|
push @$closing_tokens, RBRACKET_TOKEN; |
402 |
|
|
} elsif ($t->{type} == LPAREN_TOKEN or $t->{type} == FUNCTION_TOKEN) { |
403 |
|
|
push @$closing_tokens, RPAREN_TOKEN; |
404 |
|
|
} |
405 |
|
|
|
406 |
|
|
$t = $tt->get_next_token; |
407 |
|
|
} |
408 |
|
|
|
409 |
|
|
# |
410 |
|
|
## Stay in the state. |
411 |
|
|
redo S; |
412 |
|
|
} else { |
413 |
|
|
die "$0: parse_char_string: Unknown state: $state"; |
414 |
|
|
} |
415 |
|
|
} # S |
416 |
|
|
|
417 |
|
|
my $ss = Message::DOM::CSSStyleSheet->____new |
418 |
|
|
(css_rules => $open_rules->[0], |
419 |
|
|
## TODO: href |
420 |
|
|
## TODO: owner_node |
421 |
|
|
## TODO: media |
422 |
|
|
type => 'text/css', ## TODO: OK? |
423 |
|
|
_parser => $self); |
424 |
|
|
return $ss; |
425 |
|
|
} # parse_char_string |
426 |
|
|
|
427 |
wakaba |
1.5 |
$Prop->{color} = { |
428 |
|
|
css => 'color', |
429 |
|
|
dom => 'color', |
430 |
|
|
key => 'color', |
431 |
|
|
parse => sub { |
432 |
|
|
my ($self, $prop_name, $tt, $t, $onerror) = @_; |
433 |
|
|
|
434 |
|
|
if ($t->{type} == IDENT_TOKEN) { |
435 |
|
|
if (lc $t->{value} eq 'blue') { ## TODO: case folding |
436 |
|
|
$t = $tt->get_next_token; |
437 |
|
|
return ($t, ["RGBA", 0, 0, 255, 0]); |
438 |
|
|
} else { |
439 |
|
|
# |
440 |
|
|
} |
441 |
|
|
} else { |
442 |
|
|
# |
443 |
|
|
} |
444 |
|
|
|
445 |
|
|
$onerror->(type => 'syntax error:color', |
446 |
|
|
level => $self->{must_level}, |
447 |
|
|
token => $t); |
448 |
|
|
|
449 |
|
|
return ($t, undef); |
450 |
|
|
}, |
451 |
|
|
serialize => sub { |
452 |
|
|
my ($self, $prop_name, $value) = @_; |
453 |
|
|
if ($value->[0] eq 'RGBA') { ## TODO: %d? %f? |
454 |
|
|
return sprintf 'rgba(%d, %d, %d, %f)', @$value[1, 2, 3, 4]; |
455 |
|
|
} else { |
456 |
|
|
return undef; |
457 |
|
|
} |
458 |
|
|
}, |
459 |
|
|
}; |
460 |
|
|
$Attr->{color} = $Prop->{color}; |
461 |
|
|
$Key->{color} = $Prop->{color}; |
462 |
|
|
|
463 |
wakaba |
1.1 |
1; |
464 |
wakaba |
1.5 |
## $Date: 2007/12/23 15:47:09 $ |