/[suikacvs]/markup/html/whatpm/Whatpm/CSS/Parser.pm
Suika

Contents of /markup/html/whatpm/Whatpm/CSS/Parser.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (hide annotations) (download)
Mon Dec 31 03:00:42 2007 UTC (16 years, 10 months ago) by wakaba
Branch: MAIN
Changes since 1.4: +133 -10 lines
++ whatpm/Whatpm/CSS/ChangeLog	31 Dec 2007 03:00:38 -0000
2007-12-31  Wakaba  <wakaba@suika.fam.cx>

	* Parser.pm: At-keywords were not case-insensitive.
	Support for property value parsing.  Empty declarations
	were not supported.  Raise errors for broken declarations.
	Parser and serializer definition for |color| is added.

1 wakaba 1.1 package Whatpm::CSS::Parser;
2     use strict;
3     use Whatpm::CSS::Tokenizer qw(:token);
4     require Whatpm::CSS::SelectorsParser;
5    
6     sub new ($) {
7 wakaba 1.3 my $self = bless {onerror => sub { }, must_level => 'm',
8 wakaba 1.5 message_level => 'w',
9 wakaba 1.3 unsupported_level => 'unsupported'}, shift;
10 wakaba 1.1
11     return $self;
12     } # new
13    
14     sub BEFORE_STATEMENT_STATE () { 0 }
15     sub BEFORE_DECLARATION_STATE () { 1 }
16     sub IGNORED_STATEMENT_STATE () { 2 }
17     sub IGNORED_DECLARATION_STATE () { 3 }
18    
19 wakaba 1.5 our $Prop; ## By CSS property name
20     our $Attr; ## By CSSOM attribute name
21     our $Key; ## By internal key
22    
23 wakaba 1.1 sub parse_char_string ($$) {
24     my $self = $_[0];
25    
26     my $s = $_[1];
27     pos ($s) = 0;
28 wakaba 1.2 my $line = 1;
29     my $column = 0;
30    
31     my $_onerror = $self->{onerror};
32     my $onerror = sub {
33     $_onerror->(@_, line => $line, column => $column);
34     };
35 wakaba 1.1
36     my $tt = Whatpm::CSS::Tokenizer->new;
37 wakaba 1.2 $tt->{onerror} = $onerror;
38 wakaba 1.1 $tt->{get_char} = sub {
39     if (pos $s < length $s) {
40 wakaba 1.2 my $c = ord substr $s, pos ($s)++, 1;
41     if ($c == 0x000A) {
42     $line++;
43     $column = 0;
44     } elsif ($c == 0x000D) {
45     unless (substr ($s, pos ($s), 1) eq "\x0A") {
46     $line++;
47     $column = 0;
48     } else {
49     $column++;
50     }
51     } else {
52     $column++;
53     }
54     return $c;
55 wakaba 1.1 } else {
56     return -1;
57     }
58     }; # $tt->{get_char}
59     $tt->init;
60    
61     my $sp = Whatpm::CSS::SelectorsParser->new;
62 wakaba 1.2 $sp->{onerror} = $onerror;
63 wakaba 1.1 $sp->{must_level} = $self->{must_level};
64 wakaba 1.2 $sp->{pseudo_element} = $self->{pseudo_element};
65     $sp->{pseudo_class} = $self->{pseudo_class};
66 wakaba 1.1
67 wakaba 1.4 my $nsmap = {};
68     $sp->{lookup_namespace_uri} = sub {
69     return $nsmap->{$_[0]}; # $_[0] is '' (default namespace) or prefix
70     }; # $sp->{lookup_namespace_uri}
71 wakaba 1.1
72     ## TODO: Supported pseudo classes and elements...
73    
74     require Message::DOM::CSSStyleSheet;
75     require Message::DOM::CSSRule;
76     require Message::DOM::CSSStyleDeclaration;
77    
78     my $state = BEFORE_STATEMENT_STATE;
79     my $t = $tt->get_next_token;
80    
81     my $open_rules = [[]];
82     my $current_rules = $open_rules->[-1];
83     my $current_decls;
84     my $closing_tokens = [];
85 wakaba 1.3 my $charset_allowed = 1;
86 wakaba 1.4 my $namespace_allowed = 1;
87 wakaba 1.1
88     S: {
89     if ($state == BEFORE_STATEMENT_STATE) {
90     $t = $tt->get_next_token
91     while $t->{type} == S_TOKEN or
92     $t->{type} == CDO_TOKEN or
93     $t->{type} == CDC_TOKEN;
94    
95     if ($t->{type} == ATKEYWORD_TOKEN) {
96 wakaba 1.5 if (lc $t->{value} eq 'namespace') { ## TODO: case folding
97 wakaba 1.4 $t = $tt->get_next_token;
98     $t = $tt->get_next_token while $t->{type} == S_TOKEN;
99    
100     my $prefix;
101     if ($t->{type} == IDENT_TOKEN) {
102     $prefix = lc $t->{value};
103     ## TODO: Unicode lowercase
104    
105     $t = $tt->get_next_token;
106     $t = $tt->get_next_token while $t->{type} == S_TOKEN;
107     }
108    
109     if ($t->{type} == STRING_TOKEN or $t->{type} == URI_TOKEN) {
110     my $uri = $t->{value};
111    
112     $t = $tt->get_next_token;
113     $t = $tt->get_next_token while $t->{type} == S_TOKEN;
114    
115     ## ISSUE: On handling of empty namespace URI, Firefox 2 and
116     ## Opera 9 work differently (See SuikaWiki:namespace).
117     ## TODO: We need to check what we do once it is specced.
118    
119     if ($t->{type} == SEMICOLON_TOKEN) {
120     if ($namespace_allowed) {
121     $nsmap->{defined $prefix ? $prefix : ''} = $uri;
122     push @$current_rules,
123     Message::DOM::CSSNamespaceRule->____new ($prefix, $uri);
124     undef $charset_allowed;
125     undef $namespace_allowed;
126     } else {
127     $onerror->(type => 'at:namespace:not allowed',
128     level => $self->{must_level},
129     token => $t);
130     }
131    
132     $t = $tt->get_next_token;
133     ## Stay in the state.
134     redo S;
135     } else {
136     #
137     }
138     } else {
139     #
140     }
141    
142     $onerror->(type => 'syntax error:at:namespace',
143     level => $self->{must_level},
144     token => $t);
145     #
146 wakaba 1.5 } elsif (lc $t->{value} eq 'charset') { ## TODO: case folding
147 wakaba 1.3 $t = $tt->get_next_token;
148     $t = $tt->get_next_token while $t->{type} == S_TOKEN;
149    
150     if ($t->{type} == STRING_TOKEN) {
151     my $encoding = $t->{value};
152    
153     $t = $tt->get_next_token;
154     $t = $tt->get_next_token while $t->{type} == S_TOKEN;
155    
156     if ($t->{type} == SEMICOLON_TOKEN) {
157     if ($charset_allowed) {
158     push @$current_rules,
159     Message::DOM::CSSCharsetRule->____new ($encoding);
160     undef $charset_allowed;
161     } else {
162     $onerror->(type => 'at:charset:not allowed',
163     level => $self->{must_level},
164     token => $t);
165     }
166    
167     ## TODO: Detect the conformance errors for @charset...
168    
169     $t = $tt->get_next_token;
170     ## Stay in the state.
171     redo S;
172     } else {
173     #
174     }
175     } else {
176     #
177     }
178    
179     $onerror->(type => 'syntax error:at:charset',
180     level => $self->{must_level},
181     token => $t);
182 wakaba 1.4 #
183 wakaba 1.3 ## NOTE: When adding support for new at-rule, insert code
184 wakaba 1.4 ## "undef $charset_allowed" and "undef $namespace_token" as
185     ## appropriate.
186 wakaba 1.3 } else {
187     $onerror->(type => 'not supported:at:'.$t->{value},
188     level => $self->{unsupported_level},
189     token => $t);
190     }
191 wakaba 1.1
192     $t = $tt->get_next_token;
193     $state = IGNORED_STATEMENT_STATE;
194     redo S;
195     } elsif (@$open_rules > 1 and $t->{type} == RBRACE_TOKEN) {
196     pop @$open_rules;
197     ## Stay in the state.
198     $t = $tt->get_next_token;
199     redo S;
200     } elsif ($t->{type} == EOF_TOKEN) {
201     if (@$open_rules > 1) {
202 wakaba 1.2 $onerror->(type => 'syntax error:block not closed',
203     level => $self->{must_level},
204     token => $t);
205 wakaba 1.1 }
206    
207     last S;
208     } else {
209 wakaba 1.3 undef $charset_allowed;
210 wakaba 1.4 undef $namespace_allowed;
211 wakaba 1.3
212 wakaba 1.1 ($t, my $selectors) = $sp->_parse_selectors_with_tokenizer
213     ($tt, LBRACE_TOKEN, $t);
214    
215     $t = $tt->get_next_token
216     while $t->{type} != LBRACE_TOKEN and $t->{type} != EOF_TOKEN;
217    
218     if ($t->{type} == LBRACE_TOKEN) {
219     $current_decls = Message::DOM::CSSStyleDeclaration->____new;
220     my $rs = Message::DOM::CSSStyleRule->____new
221     ($selectors, $current_decls);
222     push @{$current_rules}, $rs if defined $selectors;
223    
224     $state = BEFORE_DECLARATION_STATE;
225     $t = $tt->get_next_token;
226     redo S;
227     } else {
228 wakaba 1.2 $onerror->(type => 'syntax error:after selectors',
229     level => $self->{must_level},
230     token => $t);
231 wakaba 1.1
232     ## Stay in the state.
233     $t = $tt->get_next_token;
234     redo S;
235     }
236     }
237     } elsif ($state == BEFORE_DECLARATION_STATE) {
238     ## NOTE: DELIM? in declaration will be removed:
239     ## <http://csswg.inkedblade.net/spec/css2.1?s=declaration%20delim#issue-2>.
240    
241 wakaba 1.5 my $prop_def;
242     my $prop_value;
243     my $prop_flag;
244 wakaba 1.1 $t = $tt->get_next_token while $t->{type} == S_TOKEN;
245     if ($t->{type} == IDENT_TOKEN) { # property
246 wakaba 1.5 my $prop_name = lc $t->{value}; ## TODO: case folding
247     $t = $tt->get_next_token;
248     if ($t->{type} == COLON_TOKEN) {
249     $t = $tt->get_next_token;
250     $t = $tt->get_next_token while $t->{type} == S_TOKEN;
251    
252     $prop_def = $Prop->{$prop_name};
253     if ($prop_def) {
254     ($t, $prop_value)
255     = $prop_def->{parse}->($self, $prop_name, $tt, $t, $onerror);
256     if ($prop_value) {
257     ## NOTE: {parse} don't have to consume trailing spaces.
258     $t = $tt->get_next_token while $t->{type} == S_TOKEN;
259    
260     if ($t->{type} == EXCLAMATION_TOKEN) {
261     $t = $tt->get_next_token;
262     $t = $tt->get_next_token while $t->{type} == S_TOKEN;
263     if ($t->{type} == IDENT_TOKEN and
264     lc $t->{value} eq 'important') { ## TODO: case folding
265     $prop_flag = 'important';
266    
267     $t = $tt->get_next_token;
268     $t = $tt->get_next_token while $t->{type} == S_TOKEN;
269    
270     #
271     } else {
272     $onerror->(type => 'syntax error:important',
273     level => $self->{must_level},
274     token => $t);
275    
276     ## Reprocess.
277     $state = IGNORED_DECLARATION_STATE;
278     redo S;
279     }
280     }
281    
282     #
283     } else {
284     ## Syntax error.
285    
286     ## Reprocess.
287     $state = IGNORED_DECLARATION_STATE;
288     redo S;
289     }
290     } else {
291     $onerror->(type => 'not supported:property',
292     level => $self->{unsupported_level},
293     token => $t, value => $prop_name);
294    
295     #
296     $state = IGNORED_DECLARATION_STATE;
297     redo S;
298     }
299     } else {
300     $onerror->(type => 'syntax error:property colon',
301     level => $self->{must_level},
302     token => $t);
303 wakaba 1.1
304 wakaba 1.5 #
305     $state = IGNORED_DECLARATION_STATE;
306     redo S;
307     }
308     }
309    
310     if ($t->{type} == RBRACE_TOKEN) {
311 wakaba 1.1 $t = $tt->get_next_token;
312 wakaba 1.5 $state = BEFORE_STATEMENT_STATE;
313     #redo S;
314     } elsif ($t->{type} == SEMICOLON_TOKEN) {
315 wakaba 1.1 $t = $tt->get_next_token;
316 wakaba 1.5 ## Stay in the state.
317     #redo S;
318 wakaba 1.1 } elsif ($t->{type} == EOF_TOKEN) {
319 wakaba 1.2 $onerror->(type => 'syntax error:ruleset not closed',
320     level => $self->{must_level},
321     token => $t);
322 wakaba 1.1 ## Reprocess.
323     $state = BEFORE_STATEMENT_STATE;
324 wakaba 1.5 #redo S;
325     } else {
326     if ($prop_value) {
327     $onerror->(type => 'syntax error:property semicolon',
328     level => $self->{must_level},
329     token => $t);
330     } else {
331     $onerror->(type => 'syntax error:property name',
332     level => $self->{must_level},
333     token => $t);
334     }
335    
336     #
337     $state = IGNORED_DECLARATION_STATE;
338 wakaba 1.1 redo S;
339     }
340    
341 wakaba 1.5 if ($prop_value) {
342     $$current_decls->{$prop_def->{key}} = [$prop_value, $prop_flag];
343     }
344 wakaba 1.1 redo S;
345     } elsif ($state == IGNORED_STATEMENT_STATE or
346     $state == IGNORED_DECLARATION_STATE) {
347     if (@$closing_tokens) { ## Something is yet in opening state.
348     if ($t->{type} == EOF_TOKEN) {
349     @$closing_tokens = ();
350     ## Reprocess.
351     $state = $state == IGNORED_STATEMENT_STATE
352     ? BEFORE_STATEMENT_STATE : BEFORE_DECLARATION_STATE;
353     redo S;
354     } elsif ($t->{type} == $closing_tokens->[-1]) {
355     pop @$closing_tokens;
356     if (@$closing_tokens == 0 and
357     $t->{type} == RBRACE_TOKEN and
358     $state == IGNORED_STATEMENT_STATE) {
359     $t = $tt->get_next_token;
360     $state = BEFORE_STATEMENT_STATE;
361     redo S;
362     } else {
363     $t = $tt->get_next_token;
364     ## Stay in the state.
365     redo S;
366     }
367     } else {
368     #
369     }
370     } else {
371     if ($t->{type} == SEMICOLON_TOKEN) {
372     $t = $tt->get_next_token;
373     $state = $state == IGNORED_STATEMENT_STATE
374     ? BEFORE_STATEMENT_STATE : BEFORE_DECLARATION_STATE;
375     redo S;
376     } elsif ($state == IGNORED_DECLARATION_STATE and
377     $t->{type} == RBRACE_TOKEN) {
378     $t = $tt->get_next_token;
379     $state = BEFORE_STATEMENT_STATE;
380     redo S;
381     } elsif ($t->{type} == EOF_TOKEN) {
382     ## Reprocess.
383     $state = $state == IGNORED_STATEMENT_STATE
384     ? BEFORE_STATEMENT_STATE : BEFORE_DECLARATION_STATE;
385     redo S;
386     } else {
387     #
388     }
389     }
390    
391     while (not {
392     EOF_TOKEN, 1,
393     RBRACE_TOKEN, 1,
394     RBRACKET_TOKEN, 1,
395     RPAREN_TOKEN, 1,
396     SEMICOLON_TOKEN, 1,
397     }->{$t->{type}}) {
398     if ($t->{type} == LBRACE_TOKEN) {
399     push @$closing_tokens, RBRACE_TOKEN;
400     } elsif ($t->{type} == LBRACKET_TOKEN) {
401     push @$closing_tokens, RBRACKET_TOKEN;
402     } elsif ($t->{type} == LPAREN_TOKEN or $t->{type} == FUNCTION_TOKEN) {
403     push @$closing_tokens, RPAREN_TOKEN;
404     }
405    
406     $t = $tt->get_next_token;
407     }
408    
409     #
410     ## Stay in the state.
411     redo S;
412     } else {
413     die "$0: parse_char_string: Unknown state: $state";
414     }
415     } # S
416    
417     my $ss = Message::DOM::CSSStyleSheet->____new
418     (css_rules => $open_rules->[0],
419     ## TODO: href
420     ## TODO: owner_node
421     ## TODO: media
422     type => 'text/css', ## TODO: OK?
423     _parser => $self);
424     return $ss;
425     } # parse_char_string
426    
427 wakaba 1.5 $Prop->{color} = {
428     css => 'color',
429     dom => 'color',
430     key => 'color',
431     parse => sub {
432     my ($self, $prop_name, $tt, $t, $onerror) = @_;
433    
434     if ($t->{type} == IDENT_TOKEN) {
435     if (lc $t->{value} eq 'blue') { ## TODO: case folding
436     $t = $tt->get_next_token;
437     return ($t, ["RGBA", 0, 0, 255, 0]);
438     } else {
439     #
440     }
441     } else {
442     #
443     }
444    
445     $onerror->(type => 'syntax error:color',
446     level => $self->{must_level},
447     token => $t);
448    
449     return ($t, undef);
450     },
451     serialize => sub {
452     my ($self, $prop_name, $value) = @_;
453     if ($value->[0] eq 'RGBA') { ## TODO: %d? %f?
454     return sprintf 'rgba(%d, %d, %d, %f)', @$value[1, 2, 3, 4];
455     } else {
456     return undef;
457     }
458     },
459     };
460     $Attr->{color} = $Prop->{color};
461     $Key->{color} = $Prop->{color};
462    
463 wakaba 1.1 1;
464 wakaba 1.5 ## $Date: 2007/12/23 15:47:09 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24