/[suikacvs]/markup/html/whatpm/Whatpm/CSS/Parser.pm
Suika

Contents of /markup/html/whatpm/Whatpm/CSS/Parser.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.6 - (hide annotations) (download)
Mon Dec 31 07:26:35 2007 UTC (16 years, 10 months ago) by wakaba
Branch: MAIN
Changes since 1.5: +115 -3 lines
++ whatpm/Whatpm/CSS/ChangeLog	31 Dec 2007 07:26:28 -0000
	* Parser.pm: Ignore property which is not supported by the
	application.  Support for properties with a keyword being
	able to be specified is added.

2007-12-31  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::CSS::Parser;
2     use strict;
3     use Whatpm::CSS::Tokenizer qw(:token);
4     require Whatpm::CSS::SelectorsParser;
5    
6     sub new ($) {
7 wakaba 1.3 my $self = bless {onerror => sub { }, must_level => 'm',
8 wakaba 1.5 message_level => 'w',
9 wakaba 1.3 unsupported_level => 'unsupported'}, shift;
10 wakaba 1.1
11     return $self;
12     } # new
13    
14     sub BEFORE_STATEMENT_STATE () { 0 }
15     sub BEFORE_DECLARATION_STATE () { 1 }
16     sub IGNORED_STATEMENT_STATE () { 2 }
17     sub IGNORED_DECLARATION_STATE () { 3 }
18    
19 wakaba 1.5 our $Prop; ## By CSS property name
20     our $Attr; ## By CSSOM attribute name
21     our $Key; ## By internal key
22    
23 wakaba 1.1 sub parse_char_string ($$) {
24     my $self = $_[0];
25    
26     my $s = $_[1];
27     pos ($s) = 0;
28 wakaba 1.2 my $line = 1;
29     my $column = 0;
30    
31     my $_onerror = $self->{onerror};
32     my $onerror = sub {
33     $_onerror->(@_, line => $line, column => $column);
34     };
35 wakaba 1.1
36     my $tt = Whatpm::CSS::Tokenizer->new;
37 wakaba 1.2 $tt->{onerror} = $onerror;
38 wakaba 1.1 $tt->{get_char} = sub {
39     if (pos $s < length $s) {
40 wakaba 1.2 my $c = ord substr $s, pos ($s)++, 1;
41     if ($c == 0x000A) {
42     $line++;
43     $column = 0;
44     } elsif ($c == 0x000D) {
45     unless (substr ($s, pos ($s), 1) eq "\x0A") {
46     $line++;
47     $column = 0;
48     } else {
49     $column++;
50     }
51     } else {
52     $column++;
53     }
54     return $c;
55 wakaba 1.1 } else {
56     return -1;
57     }
58     }; # $tt->{get_char}
59     $tt->init;
60    
61     my $sp = Whatpm::CSS::SelectorsParser->new;
62 wakaba 1.2 $sp->{onerror} = $onerror;
63 wakaba 1.1 $sp->{must_level} = $self->{must_level};
64 wakaba 1.2 $sp->{pseudo_element} = $self->{pseudo_element};
65     $sp->{pseudo_class} = $self->{pseudo_class};
66 wakaba 1.1
67 wakaba 1.4 my $nsmap = {};
68     $sp->{lookup_namespace_uri} = sub {
69     return $nsmap->{$_[0]}; # $_[0] is '' (default namespace) or prefix
70     }; # $sp->{lookup_namespace_uri}
71 wakaba 1.1
72     ## TODO: Supported pseudo classes and elements...
73    
74     require Message::DOM::CSSStyleSheet;
75     require Message::DOM::CSSRule;
76     require Message::DOM::CSSStyleDeclaration;
77    
78     my $state = BEFORE_STATEMENT_STATE;
79     my $t = $tt->get_next_token;
80    
81     my $open_rules = [[]];
82     my $current_rules = $open_rules->[-1];
83     my $current_decls;
84     my $closing_tokens = [];
85 wakaba 1.3 my $charset_allowed = 1;
86 wakaba 1.4 my $namespace_allowed = 1;
87 wakaba 1.1
88     S: {
89     if ($state == BEFORE_STATEMENT_STATE) {
90     $t = $tt->get_next_token
91     while $t->{type} == S_TOKEN or
92     $t->{type} == CDO_TOKEN or
93     $t->{type} == CDC_TOKEN;
94    
95     if ($t->{type} == ATKEYWORD_TOKEN) {
96 wakaba 1.5 if (lc $t->{value} eq 'namespace') { ## TODO: case folding
97 wakaba 1.4 $t = $tt->get_next_token;
98     $t = $tt->get_next_token while $t->{type} == S_TOKEN;
99    
100     my $prefix;
101     if ($t->{type} == IDENT_TOKEN) {
102     $prefix = lc $t->{value};
103     ## TODO: Unicode lowercase
104    
105     $t = $tt->get_next_token;
106     $t = $tt->get_next_token while $t->{type} == S_TOKEN;
107     }
108    
109     if ($t->{type} == STRING_TOKEN or $t->{type} == URI_TOKEN) {
110     my $uri = $t->{value};
111    
112     $t = $tt->get_next_token;
113     $t = $tt->get_next_token while $t->{type} == S_TOKEN;
114    
115     ## ISSUE: On handling of empty namespace URI, Firefox 2 and
116     ## Opera 9 work differently (See SuikaWiki:namespace).
117     ## TODO: We need to check what we do once it is specced.
118    
119     if ($t->{type} == SEMICOLON_TOKEN) {
120     if ($namespace_allowed) {
121     $nsmap->{defined $prefix ? $prefix : ''} = $uri;
122     push @$current_rules,
123     Message::DOM::CSSNamespaceRule->____new ($prefix, $uri);
124     undef $charset_allowed;
125     undef $namespace_allowed;
126     } else {
127     $onerror->(type => 'at:namespace:not allowed',
128     level => $self->{must_level},
129     token => $t);
130     }
131    
132     $t = $tt->get_next_token;
133     ## Stay in the state.
134     redo S;
135     } else {
136     #
137     }
138     } else {
139     #
140     }
141    
142     $onerror->(type => 'syntax error:at:namespace',
143     level => $self->{must_level},
144     token => $t);
145     #
146 wakaba 1.5 } elsif (lc $t->{value} eq 'charset') { ## TODO: case folding
147 wakaba 1.3 $t = $tt->get_next_token;
148     $t = $tt->get_next_token while $t->{type} == S_TOKEN;
149    
150     if ($t->{type} == STRING_TOKEN) {
151     my $encoding = $t->{value};
152    
153     $t = $tt->get_next_token;
154     $t = $tt->get_next_token while $t->{type} == S_TOKEN;
155    
156     if ($t->{type} == SEMICOLON_TOKEN) {
157     if ($charset_allowed) {
158     push @$current_rules,
159     Message::DOM::CSSCharsetRule->____new ($encoding);
160     undef $charset_allowed;
161     } else {
162     $onerror->(type => 'at:charset:not allowed',
163     level => $self->{must_level},
164     token => $t);
165     }
166    
167     ## TODO: Detect the conformance errors for @charset...
168    
169     $t = $tt->get_next_token;
170     ## Stay in the state.
171     redo S;
172     } else {
173     #
174     }
175     } else {
176     #
177     }
178    
179     $onerror->(type => 'syntax error:at:charset',
180     level => $self->{must_level},
181     token => $t);
182 wakaba 1.4 #
183 wakaba 1.3 ## NOTE: When adding support for new at-rule, insert code
184 wakaba 1.4 ## "undef $charset_allowed" and "undef $namespace_token" as
185     ## appropriate.
186 wakaba 1.3 } else {
187     $onerror->(type => 'not supported:at:'.$t->{value},
188     level => $self->{unsupported_level},
189     token => $t);
190     }
191 wakaba 1.1
192     $t = $tt->get_next_token;
193     $state = IGNORED_STATEMENT_STATE;
194     redo S;
195     } elsif (@$open_rules > 1 and $t->{type} == RBRACE_TOKEN) {
196     pop @$open_rules;
197     ## Stay in the state.
198     $t = $tt->get_next_token;
199     redo S;
200     } elsif ($t->{type} == EOF_TOKEN) {
201     if (@$open_rules > 1) {
202 wakaba 1.2 $onerror->(type => 'syntax error:block not closed',
203     level => $self->{must_level},
204     token => $t);
205 wakaba 1.1 }
206    
207     last S;
208     } else {
209 wakaba 1.3 undef $charset_allowed;
210 wakaba 1.4 undef $namespace_allowed;
211 wakaba 1.3
212 wakaba 1.1 ($t, my $selectors) = $sp->_parse_selectors_with_tokenizer
213     ($tt, LBRACE_TOKEN, $t);
214    
215     $t = $tt->get_next_token
216     while $t->{type} != LBRACE_TOKEN and $t->{type} != EOF_TOKEN;
217    
218     if ($t->{type} == LBRACE_TOKEN) {
219     $current_decls = Message::DOM::CSSStyleDeclaration->____new;
220     my $rs = Message::DOM::CSSStyleRule->____new
221     ($selectors, $current_decls);
222     push @{$current_rules}, $rs if defined $selectors;
223    
224     $state = BEFORE_DECLARATION_STATE;
225     $t = $tt->get_next_token;
226     redo S;
227     } else {
228 wakaba 1.2 $onerror->(type => 'syntax error:after selectors',
229     level => $self->{must_level},
230     token => $t);
231 wakaba 1.1
232     ## Stay in the state.
233     $t = $tt->get_next_token;
234     redo S;
235     }
236     }
237     } elsif ($state == BEFORE_DECLARATION_STATE) {
238     ## NOTE: DELIM? in declaration will be removed:
239     ## <http://csswg.inkedblade.net/spec/css2.1?s=declaration%20delim#issue-2>.
240    
241 wakaba 1.5 my $prop_def;
242     my $prop_value;
243     my $prop_flag;
244 wakaba 1.1 $t = $tt->get_next_token while $t->{type} == S_TOKEN;
245     if ($t->{type} == IDENT_TOKEN) { # property
246 wakaba 1.5 my $prop_name = lc $t->{value}; ## TODO: case folding
247     $t = $tt->get_next_token;
248     if ($t->{type} == COLON_TOKEN) {
249     $t = $tt->get_next_token;
250     $t = $tt->get_next_token while $t->{type} == S_TOKEN;
251    
252     $prop_def = $Prop->{$prop_name};
253 wakaba 1.6 if ($prop_def and $self->{prop}->{$prop_name}) {
254 wakaba 1.5 ($t, $prop_value)
255     = $prop_def->{parse}->($self, $prop_name, $tt, $t, $onerror);
256     if ($prop_value) {
257     ## NOTE: {parse} don't have to consume trailing spaces.
258     $t = $tt->get_next_token while $t->{type} == S_TOKEN;
259    
260     if ($t->{type} == EXCLAMATION_TOKEN) {
261     $t = $tt->get_next_token;
262     $t = $tt->get_next_token while $t->{type} == S_TOKEN;
263     if ($t->{type} == IDENT_TOKEN and
264     lc $t->{value} eq 'important') { ## TODO: case folding
265     $prop_flag = 'important';
266    
267     $t = $tt->get_next_token;
268     $t = $tt->get_next_token while $t->{type} == S_TOKEN;
269    
270     #
271     } else {
272     $onerror->(type => 'syntax error:important',
273     level => $self->{must_level},
274     token => $t);
275    
276     ## Reprocess.
277     $state = IGNORED_DECLARATION_STATE;
278     redo S;
279     }
280     }
281    
282     #
283     } else {
284     ## Syntax error.
285    
286     ## Reprocess.
287     $state = IGNORED_DECLARATION_STATE;
288     redo S;
289     }
290     } else {
291     $onerror->(type => 'not supported:property',
292     level => $self->{unsupported_level},
293     token => $t, value => $prop_name);
294    
295     #
296     $state = IGNORED_DECLARATION_STATE;
297     redo S;
298     }
299     } else {
300     $onerror->(type => 'syntax error:property colon',
301     level => $self->{must_level},
302     token => $t);
303 wakaba 1.1
304 wakaba 1.5 #
305     $state = IGNORED_DECLARATION_STATE;
306     redo S;
307     }
308     }
309    
310     if ($t->{type} == RBRACE_TOKEN) {
311 wakaba 1.1 $t = $tt->get_next_token;
312 wakaba 1.5 $state = BEFORE_STATEMENT_STATE;
313     #redo S;
314     } elsif ($t->{type} == SEMICOLON_TOKEN) {
315 wakaba 1.1 $t = $tt->get_next_token;
316 wakaba 1.5 ## Stay in the state.
317     #redo S;
318 wakaba 1.1 } elsif ($t->{type} == EOF_TOKEN) {
319 wakaba 1.2 $onerror->(type => 'syntax error:ruleset not closed',
320     level => $self->{must_level},
321     token => $t);
322 wakaba 1.1 ## Reprocess.
323     $state = BEFORE_STATEMENT_STATE;
324 wakaba 1.5 #redo S;
325     } else {
326     if ($prop_value) {
327     $onerror->(type => 'syntax error:property semicolon',
328     level => $self->{must_level},
329     token => $t);
330     } else {
331     $onerror->(type => 'syntax error:property name',
332     level => $self->{must_level},
333     token => $t);
334     }
335    
336     #
337     $state = IGNORED_DECLARATION_STATE;
338 wakaba 1.1 redo S;
339     }
340    
341 wakaba 1.5 if ($prop_value) {
342     $$current_decls->{$prop_def->{key}} = [$prop_value, $prop_flag];
343     }
344 wakaba 1.1 redo S;
345     } elsif ($state == IGNORED_STATEMENT_STATE or
346     $state == IGNORED_DECLARATION_STATE) {
347     if (@$closing_tokens) { ## Something is yet in opening state.
348     if ($t->{type} == EOF_TOKEN) {
349     @$closing_tokens = ();
350     ## Reprocess.
351     $state = $state == IGNORED_STATEMENT_STATE
352     ? BEFORE_STATEMENT_STATE : BEFORE_DECLARATION_STATE;
353     redo S;
354     } elsif ($t->{type} == $closing_tokens->[-1]) {
355     pop @$closing_tokens;
356     if (@$closing_tokens == 0 and
357     $t->{type} == RBRACE_TOKEN and
358     $state == IGNORED_STATEMENT_STATE) {
359     $t = $tt->get_next_token;
360     $state = BEFORE_STATEMENT_STATE;
361     redo S;
362     } else {
363     $t = $tt->get_next_token;
364     ## Stay in the state.
365     redo S;
366     }
367     } else {
368     #
369     }
370     } else {
371     if ($t->{type} == SEMICOLON_TOKEN) {
372     $t = $tt->get_next_token;
373     $state = $state == IGNORED_STATEMENT_STATE
374     ? BEFORE_STATEMENT_STATE : BEFORE_DECLARATION_STATE;
375     redo S;
376     } elsif ($state == IGNORED_DECLARATION_STATE and
377     $t->{type} == RBRACE_TOKEN) {
378     $t = $tt->get_next_token;
379     $state = BEFORE_STATEMENT_STATE;
380     redo S;
381     } elsif ($t->{type} == EOF_TOKEN) {
382     ## Reprocess.
383     $state = $state == IGNORED_STATEMENT_STATE
384     ? BEFORE_STATEMENT_STATE : BEFORE_DECLARATION_STATE;
385     redo S;
386     } else {
387     #
388     }
389     }
390    
391     while (not {
392     EOF_TOKEN, 1,
393     RBRACE_TOKEN, 1,
394     RBRACKET_TOKEN, 1,
395     RPAREN_TOKEN, 1,
396     SEMICOLON_TOKEN, 1,
397     }->{$t->{type}}) {
398     if ($t->{type} == LBRACE_TOKEN) {
399     push @$closing_tokens, RBRACE_TOKEN;
400     } elsif ($t->{type} == LBRACKET_TOKEN) {
401     push @$closing_tokens, RBRACKET_TOKEN;
402     } elsif ($t->{type} == LPAREN_TOKEN or $t->{type} == FUNCTION_TOKEN) {
403     push @$closing_tokens, RPAREN_TOKEN;
404     }
405    
406     $t = $tt->get_next_token;
407     }
408    
409     #
410     ## Stay in the state.
411     redo S;
412     } else {
413     die "$0: parse_char_string: Unknown state: $state";
414     }
415     } # S
416    
417     my $ss = Message::DOM::CSSStyleSheet->____new
418     (css_rules => $open_rules->[0],
419     ## TODO: href
420     ## TODO: owner_node
421     ## TODO: media
422     type => 'text/css', ## TODO: OK?
423     _parser => $self);
424     return $ss;
425     } # parse_char_string
426    
427 wakaba 1.5 $Prop->{color} = {
428     css => 'color',
429     dom => 'color',
430     key => 'color',
431     parse => sub {
432     my ($self, $prop_name, $tt, $t, $onerror) = @_;
433    
434     if ($t->{type} == IDENT_TOKEN) {
435     if (lc $t->{value} eq 'blue') { ## TODO: case folding
436     $t = $tt->get_next_token;
437 wakaba 1.6 return ($t, ["RGBA", 0, 0, 255, 1]);
438 wakaba 1.5 } else {
439     #
440     }
441     } else {
442     #
443     }
444    
445     $onerror->(type => 'syntax error:color',
446     level => $self->{must_level},
447     token => $t);
448    
449     return ($t, undef);
450     },
451     serialize => sub {
452     my ($self, $prop_name, $value) = @_;
453     if ($value->[0] eq 'RGBA') { ## TODO: %d? %f?
454     return sprintf 'rgba(%d, %d, %d, %f)', @$value[1, 2, 3, 4];
455     } else {
456     return undef;
457     }
458     },
459     };
460     $Attr->{color} = $Prop->{color};
461     $Key->{color} = $Prop->{color};
462    
463 wakaba 1.6 my $one_keyword_parser = sub {
464     my ($self, $prop_name, $tt, $t, $onerror) = @_;
465    
466     if ($t->{type} == IDENT_TOKEN) {
467     my $prop_value = lc $t->{value}; ## TODO: case folding
468     $t = $tt->get_next_token;
469     if ($Prop->{$prop_name}->{keyword}->{$prop_value} and
470     $self->{prop_value}->{$prop_name}->{$prop_value}) {
471     return ($t, ["KEYWORD", $prop_value]);
472     } elsif ($prop_value eq 'inherit') {
473     return ($t, ["KEYWORD", $prop_value]);
474     }
475     }
476    
477     $onerror->(type => 'syntax error:keyword',
478     level => $self->{must_level},
479     token => $t);
480     return ($t, undef);
481     };
482    
483     my $one_keyword_serializer = sub {
484     my ($self, $prop_name, $value) = @_;
485     if ($value->[0] eq 'KEYWORD') {
486     return $value->[1];
487     } else {
488     return undef;
489     }
490     };
491    
492     $Prop->{display} = {
493     css => 'display',
494     dom => 'display',
495     key => 'display',
496     parse => $one_keyword_parser,
497     serialize => $one_keyword_serializer,
498     keyword => {
499     block => 1, inline => 1, 'inline-block' => 1, 'inline-table' => 1,
500     'list-item' => 1, none => 1,
501     table => 1, 'table-caption' => 1, 'table-cell' => 1, 'table-column' => 1,
502     'table-column-group' => 1, 'table-header-group' => 1,
503     'table-footer-group' => 1, 'table-row' => 1, 'table-row-group' => 1,
504     },
505     };
506     $Attr->{display} = $Prop->{display};
507     $Key->{display} = $Prop->{display};
508    
509     $Prop->{position} = {
510     css => 'position',
511     dom => 'position',
512     key => 'position',
513     parse => $one_keyword_parser,
514     serialize => $one_keyword_serializer,
515     keyword => {
516     static => 1, relative => 1, absolute => 1, fixed => 1,
517     },
518     };
519     $Attr->{position} = $Prop->{position};
520     $Key->{position} = $Prop->{position};
521    
522     $Prop->{float} = {
523     css => 'float',
524     dom => 'css_float',
525     key => 'float',
526     parse => $one_keyword_parser,
527     serialize => $one_keyword_serializer,
528     keyword => {
529     left => 1, right => 1, none => 1,
530     },
531     };
532     $Attr->{css_float} = $Prop->{float};
533     $Attr->{style_float} = $Prop->{float}; ## NOTE: IEism
534     $Key->{float} = $Prop->{float};
535    
536     $Prop->{clear} = {
537     css => 'clear',
538     dom => 'clear',
539     key => 'clear',
540     parse => $one_keyword_parser,
541     serialize => $one_keyword_serializer,
542     keyword => {
543     left => 1, right => 1, none => 1, both => 1,
544     },
545     };
546     $Attr->{clear} = $Prop->{clear};
547     $Key->{clear} = $Prop->{clear};
548    
549     $Prop->{direction} = {
550     css => 'direction',
551     dom => 'direction',
552     key => 'direction',
553     parse => $one_keyword_parser,
554     serialize => $one_keyword_serializer,
555     keyword => {
556     ltr => 1, rtl => 1,
557     },
558     };
559     $Attr->{direction} = $Prop->{direction};
560     $Key->{direction} = $Prop->{direction};
561    
562     $Prop->{'unicode-bidi'} = {
563     css => 'unicode-bidi',
564     dom => 'unicode_bidi',
565     key => 'unicode_bidi',
566     parse => $one_keyword_parser,
567     serialize => $one_keyword_serializer,
568     keyword => {
569     normal => 1, embed => 1, 'bidi-override' => 1,
570     },
571     };
572     $Attr->{unicode_bidi} = $Prop->{'unicode-bidi'};
573     $Key->{unicode_bidi} = $Prop->{'unicode-bidi'};
574    
575 wakaba 1.1 1;
576 wakaba 1.6 ## $Date: 2007/12/31 03:00:42 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24