1 |
wakaba |
1.1 |
package Whatpm::CSS::Parser; |
2 |
|
|
use strict; |
3 |
|
|
use Whatpm::CSS::Tokenizer qw(:token); |
4 |
|
|
require Whatpm::CSS::SelectorsParser; |
5 |
|
|
|
6 |
|
|
sub new ($) { |
7 |
wakaba |
1.3 |
my $self = bless {onerror => sub { }, must_level => 'm', |
8 |
|
|
unsupported_level => 'unsupported'}, shift; |
9 |
wakaba |
1.1 |
|
10 |
|
|
return $self; |
11 |
|
|
} # new |
12 |
|
|
|
13 |
|
|
sub BEFORE_STATEMENT_STATE () { 0 } |
14 |
|
|
sub BEFORE_DECLARATION_STATE () { 1 } |
15 |
|
|
sub IGNORED_STATEMENT_STATE () { 2 } |
16 |
|
|
sub IGNORED_DECLARATION_STATE () { 3 } |
17 |
|
|
|
18 |
|
|
sub parse_char_string ($$) { |
19 |
|
|
my $self = $_[0]; |
20 |
|
|
|
21 |
|
|
my $s = $_[1]; |
22 |
|
|
pos ($s) = 0; |
23 |
wakaba |
1.2 |
my $line = 1; |
24 |
|
|
my $column = 0; |
25 |
|
|
|
26 |
|
|
my $_onerror = $self->{onerror}; |
27 |
|
|
my $onerror = sub { |
28 |
|
|
$_onerror->(@_, line => $line, column => $column); |
29 |
|
|
}; |
30 |
wakaba |
1.1 |
|
31 |
|
|
my $tt = Whatpm::CSS::Tokenizer->new; |
32 |
wakaba |
1.2 |
$tt->{onerror} = $onerror; |
33 |
wakaba |
1.1 |
$tt->{get_char} = sub { |
34 |
|
|
if (pos $s < length $s) { |
35 |
wakaba |
1.2 |
my $c = ord substr $s, pos ($s)++, 1; |
36 |
|
|
if ($c == 0x000A) { |
37 |
|
|
$line++; |
38 |
|
|
$column = 0; |
39 |
|
|
} elsif ($c == 0x000D) { |
40 |
|
|
unless (substr ($s, pos ($s), 1) eq "\x0A") { |
41 |
|
|
$line++; |
42 |
|
|
$column = 0; |
43 |
|
|
} else { |
44 |
|
|
$column++; |
45 |
|
|
} |
46 |
|
|
} else { |
47 |
|
|
$column++; |
48 |
|
|
} |
49 |
|
|
return $c; |
50 |
wakaba |
1.1 |
} else { |
51 |
|
|
return -1; |
52 |
|
|
} |
53 |
|
|
}; # $tt->{get_char} |
54 |
|
|
$tt->init; |
55 |
|
|
|
56 |
|
|
my $sp = Whatpm::CSS::SelectorsParser->new; |
57 |
wakaba |
1.2 |
$sp->{onerror} = $onerror; |
58 |
wakaba |
1.1 |
$sp->{must_level} = $self->{must_level}; |
59 |
wakaba |
1.2 |
$sp->{pseudo_element} = $self->{pseudo_element}; |
60 |
|
|
$sp->{pseudo_class} = $self->{pseudo_class}; |
61 |
wakaba |
1.1 |
|
62 |
|
|
## TODO: |
63 |
|
|
#$sp->{lookup_namespace_uri} = ...; |
64 |
|
|
|
65 |
|
|
## TODO: Supported pseudo classes and elements... |
66 |
|
|
|
67 |
|
|
require Message::DOM::CSSStyleSheet; |
68 |
|
|
require Message::DOM::CSSRule; |
69 |
|
|
require Message::DOM::CSSStyleDeclaration; |
70 |
|
|
|
71 |
|
|
my $state = BEFORE_STATEMENT_STATE; |
72 |
|
|
my $t = $tt->get_next_token; |
73 |
|
|
|
74 |
|
|
my $open_rules = [[]]; |
75 |
|
|
my $current_rules = $open_rules->[-1]; |
76 |
|
|
my $current_decls; |
77 |
|
|
my $closing_tokens = []; |
78 |
wakaba |
1.3 |
my $charset_allowed = 1; |
79 |
wakaba |
1.1 |
|
80 |
|
|
S: { |
81 |
|
|
if ($state == BEFORE_STATEMENT_STATE) { |
82 |
|
|
$t = $tt->get_next_token |
83 |
|
|
while $t->{type} == S_TOKEN or |
84 |
|
|
$t->{type} == CDO_TOKEN or |
85 |
|
|
$t->{type} == CDC_TOKEN; |
86 |
|
|
|
87 |
|
|
if ($t->{type} == ATKEYWORD_TOKEN) { |
88 |
wakaba |
1.3 |
if ($t->{value} eq 'charset') { |
89 |
|
|
$t = $tt->get_next_token; |
90 |
|
|
$t = $tt->get_next_token while $t->{type} == S_TOKEN; |
91 |
|
|
|
92 |
|
|
if ($t->{type} == STRING_TOKEN) { |
93 |
|
|
my $encoding = $t->{value}; |
94 |
|
|
|
95 |
|
|
$t = $tt->get_next_token; |
96 |
|
|
$t = $tt->get_next_token while $t->{type} == S_TOKEN; |
97 |
|
|
|
98 |
|
|
if ($t->{type} == SEMICOLON_TOKEN) { |
99 |
|
|
if ($charset_allowed) { |
100 |
|
|
push @$current_rules, |
101 |
|
|
Message::DOM::CSSCharsetRule->____new ($encoding); |
102 |
|
|
undef $charset_allowed; |
103 |
|
|
} else { |
104 |
|
|
$onerror->(type => 'at:charset:not allowed', |
105 |
|
|
level => $self->{must_level}, |
106 |
|
|
token => $t); |
107 |
|
|
} |
108 |
|
|
|
109 |
|
|
## TODO: Detect the conformance errors for @charset... |
110 |
|
|
|
111 |
|
|
$t = $tt->get_next_token; |
112 |
|
|
## Stay in the state. |
113 |
|
|
redo S; |
114 |
|
|
} else { |
115 |
|
|
# |
116 |
|
|
} |
117 |
|
|
} else { |
118 |
|
|
# |
119 |
|
|
} |
120 |
|
|
|
121 |
|
|
$onerror->(type => 'syntax error:at:charset', |
122 |
|
|
level => $self->{must_level}, |
123 |
|
|
token => $t); |
124 |
|
|
## NOTE: When adding support for new at-rule, insert code |
125 |
|
|
## "undef $charset_allowed" as appropriate. |
126 |
|
|
} else { |
127 |
|
|
$onerror->(type => 'not supported:at:'.$t->{value}, |
128 |
|
|
level => $self->{unsupported_level}, |
129 |
|
|
token => $t); |
130 |
|
|
} |
131 |
wakaba |
1.1 |
|
132 |
|
|
$t = $tt->get_next_token; |
133 |
|
|
$state = IGNORED_STATEMENT_STATE; |
134 |
|
|
redo S; |
135 |
|
|
} elsif (@$open_rules > 1 and $t->{type} == RBRACE_TOKEN) { |
136 |
|
|
pop @$open_rules; |
137 |
|
|
## Stay in the state. |
138 |
|
|
$t = $tt->get_next_token; |
139 |
|
|
redo S; |
140 |
|
|
} elsif ($t->{type} == EOF_TOKEN) { |
141 |
|
|
if (@$open_rules > 1) { |
142 |
wakaba |
1.2 |
$onerror->(type => 'syntax error:block not closed', |
143 |
|
|
level => $self->{must_level}, |
144 |
|
|
token => $t); |
145 |
wakaba |
1.1 |
} |
146 |
|
|
|
147 |
|
|
last S; |
148 |
|
|
} else { |
149 |
wakaba |
1.3 |
undef $charset_allowed; |
150 |
|
|
|
151 |
wakaba |
1.1 |
($t, my $selectors) = $sp->_parse_selectors_with_tokenizer |
152 |
|
|
($tt, LBRACE_TOKEN, $t); |
153 |
|
|
|
154 |
|
|
$t = $tt->get_next_token |
155 |
|
|
while $t->{type} != LBRACE_TOKEN and $t->{type} != EOF_TOKEN; |
156 |
|
|
|
157 |
|
|
if ($t->{type} == LBRACE_TOKEN) { |
158 |
|
|
$current_decls = Message::DOM::CSSStyleDeclaration->____new; |
159 |
|
|
my $rs = Message::DOM::CSSStyleRule->____new |
160 |
|
|
($selectors, $current_decls); |
161 |
|
|
push @{$current_rules}, $rs if defined $selectors; |
162 |
|
|
|
163 |
|
|
$state = BEFORE_DECLARATION_STATE; |
164 |
|
|
$t = $tt->get_next_token; |
165 |
|
|
redo S; |
166 |
|
|
} else { |
167 |
wakaba |
1.2 |
$onerror->(type => 'syntax error:after selectors', |
168 |
|
|
level => $self->{must_level}, |
169 |
|
|
token => $t); |
170 |
wakaba |
1.1 |
|
171 |
|
|
## Stay in the state. |
172 |
|
|
$t = $tt->get_next_token; |
173 |
|
|
redo S; |
174 |
|
|
} |
175 |
|
|
} |
176 |
|
|
} elsif ($state == BEFORE_DECLARATION_STATE) { |
177 |
|
|
## NOTE: DELIM? in declaration will be removed: |
178 |
|
|
## <http://csswg.inkedblade.net/spec/css2.1?s=declaration%20delim#issue-2>. |
179 |
|
|
|
180 |
|
|
$t = $tt->get_next_token while $t->{type} == S_TOKEN; |
181 |
|
|
if ($t->{type} == IDENT_TOKEN) { # property |
182 |
|
|
## TODO: If supported, ... |
183 |
|
|
|
184 |
|
|
$t = $tt->get_next_token; |
185 |
|
|
# |
186 |
|
|
} elsif ($t->{type} == RBRACE_TOKEN) { |
187 |
|
|
$t = $tt->get_next_token; |
188 |
|
|
$state = BEFORE_STATEMENT_STATE; |
189 |
|
|
redo S; |
190 |
|
|
} elsif ($t->{type} == EOF_TOKEN) { |
191 |
wakaba |
1.2 |
$onerror->(type => 'syntax error:ruleset not closed', |
192 |
|
|
level => $self->{must_level}, |
193 |
|
|
token => $t); |
194 |
wakaba |
1.1 |
## Reprocess. |
195 |
|
|
$state = BEFORE_STATEMENT_STATE; |
196 |
|
|
redo S; |
197 |
|
|
} |
198 |
|
|
|
199 |
|
|
# |
200 |
|
|
$state = IGNORED_DECLARATION_STATE; |
201 |
|
|
redo S; |
202 |
|
|
} elsif ($state == IGNORED_STATEMENT_STATE or |
203 |
|
|
$state == IGNORED_DECLARATION_STATE) { |
204 |
|
|
if (@$closing_tokens) { ## Something is yet in opening state. |
205 |
|
|
if ($t->{type} == EOF_TOKEN) { |
206 |
|
|
@$closing_tokens = (); |
207 |
|
|
## Reprocess. |
208 |
|
|
$state = $state == IGNORED_STATEMENT_STATE |
209 |
|
|
? BEFORE_STATEMENT_STATE : BEFORE_DECLARATION_STATE; |
210 |
|
|
redo S; |
211 |
|
|
} elsif ($t->{type} == $closing_tokens->[-1]) { |
212 |
|
|
pop @$closing_tokens; |
213 |
|
|
if (@$closing_tokens == 0 and |
214 |
|
|
$t->{type} == RBRACE_TOKEN and |
215 |
|
|
$state == IGNORED_STATEMENT_STATE) { |
216 |
|
|
$t = $tt->get_next_token; |
217 |
|
|
$state = BEFORE_STATEMENT_STATE; |
218 |
|
|
redo S; |
219 |
|
|
} else { |
220 |
|
|
$t = $tt->get_next_token; |
221 |
|
|
## Stay in the state. |
222 |
|
|
redo S; |
223 |
|
|
} |
224 |
|
|
} else { |
225 |
|
|
# |
226 |
|
|
} |
227 |
|
|
} else { |
228 |
|
|
if ($t->{type} == SEMICOLON_TOKEN) { |
229 |
|
|
$t = $tt->get_next_token; |
230 |
|
|
$state = $state == IGNORED_STATEMENT_STATE |
231 |
|
|
? BEFORE_STATEMENT_STATE : BEFORE_DECLARATION_STATE; |
232 |
|
|
redo S; |
233 |
|
|
} elsif ($state == IGNORED_DECLARATION_STATE and |
234 |
|
|
$t->{type} == RBRACE_TOKEN) { |
235 |
|
|
$t = $tt->get_next_token; |
236 |
|
|
$state = BEFORE_STATEMENT_STATE; |
237 |
|
|
redo S; |
238 |
|
|
} elsif ($t->{type} == EOF_TOKEN) { |
239 |
|
|
## Reprocess. |
240 |
|
|
$state = $state == IGNORED_STATEMENT_STATE |
241 |
|
|
? BEFORE_STATEMENT_STATE : BEFORE_DECLARATION_STATE; |
242 |
|
|
redo S; |
243 |
|
|
} else { |
244 |
|
|
# |
245 |
|
|
} |
246 |
|
|
} |
247 |
|
|
|
248 |
|
|
while (not { |
249 |
|
|
EOF_TOKEN, 1, |
250 |
|
|
RBRACE_TOKEN, 1, |
251 |
|
|
RBRACKET_TOKEN, 1, |
252 |
|
|
RPAREN_TOKEN, 1, |
253 |
|
|
SEMICOLON_TOKEN, 1, |
254 |
|
|
}->{$t->{type}}) { |
255 |
|
|
if ($t->{type} == LBRACE_TOKEN) { |
256 |
|
|
push @$closing_tokens, RBRACE_TOKEN; |
257 |
|
|
} elsif ($t->{type} == LBRACKET_TOKEN) { |
258 |
|
|
push @$closing_tokens, RBRACKET_TOKEN; |
259 |
|
|
} elsif ($t->{type} == LPAREN_TOKEN or $t->{type} == FUNCTION_TOKEN) { |
260 |
|
|
push @$closing_tokens, RPAREN_TOKEN; |
261 |
|
|
} |
262 |
|
|
|
263 |
|
|
$t = $tt->get_next_token; |
264 |
|
|
} |
265 |
|
|
|
266 |
|
|
# |
267 |
|
|
## Stay in the state. |
268 |
|
|
redo S; |
269 |
|
|
} else { |
270 |
|
|
die "$0: parse_char_string: Unknown state: $state"; |
271 |
|
|
} |
272 |
|
|
} # S |
273 |
|
|
|
274 |
|
|
my $ss = Message::DOM::CSSStyleSheet->____new |
275 |
|
|
(css_rules => $open_rules->[0], |
276 |
|
|
## TODO: href |
277 |
|
|
## TODO: owner_node |
278 |
|
|
## TODO: media |
279 |
|
|
type => 'text/css', ## TODO: OK? |
280 |
|
|
_parser => $self); |
281 |
|
|
return $ss; |
282 |
|
|
} # parse_char_string |
283 |
|
|
|
284 |
|
|
1; |
285 |
wakaba |
1.3 |
## $Date: 2007/12/23 08:33:55 $ |