/[suikacvs]/markup/html/whatpm/Whatpm/CSS/SelectorsParser.pm
Suika

Contents of /markup/html/whatpm/Whatpm/CSS/SelectorsParser.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.9 - (hide annotations) (download)
Sun Jan 20 06:15:20 2008 UTC (18 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.8: +42 -2 lines
++ whatpm/Whatpm/CSS/ChangeLog	20 Jan 2008 06:15:14 -0000
	* Parser.pm, SelectorsParser.pm: |{href}| parameter added
	to all the onerror invocations.  The |{onerror}| function
	is no longer called with |{line}| and |{column}| parameters.

	* Tokenizer.pm: All token are now given |{line}| and |{column}|
	values.

2008-01-20  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::CSS::SelectorsParser;
2     use strict;
3 wakaba 1.9 our $VERSION=do{my @r=(q$Revision: 1.8 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5     require Exporter;
6     push our @ISA, 'Exporter';
7    
8     use Whatpm::CSS::Tokenizer qw(:token);
9    
10     sub new ($) {
11     my $self = bless {onerror => sub { }, lookup_namespace_uri => sub {
12     return undef;
13 wakaba 1.6 }, must_level => 'm'}, shift;
14 wakaba 1.9 #$self->{href} = \(uri in which the selectors appears);
15 wakaba 1.1 return $self;
16     } # new
17    
18     sub BEFORE_TYPE_SELECTOR_STATE () { 1 }
19     sub AFTER_NAME_STATE () { 2 }
20     sub BEFORE_LOCAL_NAME_STATE () { 3 }
21     sub BEFORE_SIMPLE_SELECTOR_STATE () { 4 }
22     sub BEFORE_CLASS_NAME_STATE () { 5 }
23     sub AFTER_COLON_STATE () { 6 }
24     sub AFTER_DOUBLE_COLON_STATE () { 7 }
25     sub AFTER_LBRACKET_STATE () { 8 }
26     sub AFTER_ATTR_NAME_STATE () { 9 }
27     sub BEFORE_ATTR_LOCAL_NAME_STATE () { 10 }
28     sub BEFORE_MATCH_STATE () { 11 }
29     sub BEFORE_VALUE_STATE () { 12 }
30     sub AFTER_VALUE_STATE () { 13 }
31     sub BEFORE_COMBINATOR_STATE () { 14 }
32     sub COMBINATOR_STATE () { 15 }
33     sub BEFORE_LANG_TAG_STATE () { 16 }
34     sub AFTER_LANG_TAG_STATE () { 17 }
35     sub BEFORE_AN_STATE () { 18 }
36     sub AFTER_AN_STATE () { 19 }
37     sub BEFORE_B_STATE () { 20 }
38     sub AFTER_B_STATE () { 21 }
39     sub AFTER_NEGATION_SIMPLE_SELECTOR_STATE () { 22 }
40 wakaba 1.3 sub BEFORE_CONTAINS_STRING_STATE () { 23 }
41 wakaba 1.1
42     sub NAMESPACE_SELECTOR () { 1 }
43     sub LOCAL_NAME_SELECTOR () { 2 }
44     sub ID_SELECTOR () { 3 }
45     sub CLASS_SELECTOR () { 4 }
46     sub PSEUDO_CLASS_SELECTOR () { 5 }
47     sub PSEUDO_ELEMENT_SELECTOR () { 6 }
48     sub ATTRIBUTE_SELECTOR () { 7 }
49    
50     sub DESCENDANT_COMBINATOR () { S_TOKEN }
51     sub CHILD_COMBINATOR () { GREATER_TOKEN }
52     sub ADJACENT_SIBLING_COMBINATOR () { PLUS_TOKEN }
53     sub GENERAL_SIBLING_COMBINATOR () { TILDE_TOKEN }
54    
55     sub EXISTS_MATCH () { 0 }
56     sub EQUALS_MATCH () { MATCH_TOKEN }
57     sub INCLUDES_MATCH () { INCLUDES_TOKEN }
58     sub DASH_MATCH () { DASHMATCH_TOKEN }
59     sub PREFIX_MATCH () { PREFIXMATCH_TOKEN }
60     sub SUFFIX_MATCH () { SUFFIXMATCH_TOKEN }
61     sub SUBSTRING_MATCH () { SUBSTRINGMATCH_TOKEN }
62    
63     our @EXPORT_OK = qw(NAMESPACE_SELECTOR LOCAL_NAME_SELECTOR ID_SELECTOR
64     CLASS_SELECTOR PSEUDO_CLASS_SELECTOR PSEUDO_ELEMENT_SELECTOR
65     ATTRIBUTE_SELECTOR
66     DESCENDANT_COMBINATOR CHILD_COMBINATOR
67     ADJACENT_SIBLING_COMBINATOR GENERAL_SIBLING_COMBINATOR
68     EXISTS_MATCH EQUALS_MATCH INCLUDES_MATCH DASH_MATCH PREFIX_MATCH
69     SUFFIX_MATCH SUBSTRING_MATCH);
70    
71     our %EXPORT_TAGS = (
72     selector => [qw(NAMESPACE_SELECTOR LOCAL_NAME_SELECTOR ID_SELECTOR
73     CLASS_SELECTOR PSEUDO_CLASS_SELECTOR PSEUDO_ELEMENT_SELECTOR
74     ATTRIBUTE_SELECTOR)],
75     combinator => [qw(DESCENDANT_COMBINATOR CHILD_COMBINATOR
76     ADJACENT_SIBLING_COMBINATOR GENERAL_SIBLING_COMBINATOR)],
77     match => [qw(EXISTS_MATCH EQUALS_MATCH INCLUDES_MATCH DASH_MATCH
78     PREFIX_MATCH SUFFIX_MATCH SUBSTRING_MATCH)],
79     );
80    
81     sub parse_string ($$) {
82     my $self = $_[0];
83    
84     my $s = $_[1];
85     pos ($s) = 0;
86    
87     my $tt = Whatpm::CSS::Tokenizer->new;
88     $tt->{onerror} = $self->{onerror};
89     $tt->{get_char} = sub {
90     if (pos $s < length $s) {
91     return ord substr $s, pos ($s)++, 1;
92     } else {
93     return -1;
94     }
95     }; # $tt->{get_char}
96     $tt->init;
97    
98 wakaba 1.7 $self->_parse_selectors_with_tokenizer ($tt, EOF_TOKEN);
99     } # parse_string
100    
101     sub _parse_selectors_with_tokenizer ($$$;$) {
102     my $self = $_[0];
103     my $tt = $_[1];
104     # $_[2] : End token (other than EOF_TOKEN - may be EOF_TOKEN if no other).
105     # $_[3] : The first token, or undef
106    
107 wakaba 1.2 my $default_namespace = $self->{lookup_namespace_uri}->('');
108 wakaba 1.1
109     ## ISSUE: The Selectors spec only poorly defines how tokens are mapped
110     ## to each component of selectors. In addition, it does not well define
111     ## where spaces and comments are able to be inserted.
112    
113     my $selectors = [];
114     my $selector = [DESCENDANT_COMBINATOR];
115     my $sss = [];
116     my $simple_selector;
117     my $has_pseudo_element;
118     my $in_negation;
119    
120     my $state = BEFORE_TYPE_SELECTOR_STATE;
121 wakaba 1.7 my $t = $_[3] || $tt->get_next_token;
122 wakaba 1.1 my $name;
123     S: {
124     if ($state == BEFORE_TYPE_SELECTOR_STATE) {
125     $in_negation = 2 if $in_negation;
126    
127     if ($t->{type} == IDENT_TOKEN) { ## element type or namespace prefix
128     $name = $t->{value};
129     $state = AFTER_NAME_STATE;
130     $t = $tt->get_next_token;
131     redo S;
132     } elsif ($t->{type} == STAR_TOKEN) { ## universal selector or prefix
133     undef $name;
134     $state = AFTER_NAME_STATE;
135     $t = $tt->get_next_token;
136     redo S;
137     } elsif ($t->{type} == VBAR_TOKEN) { ## null namespace
138     undef $name;
139     push @$sss, [NAMESPACE_SELECTOR, undef];
140    
141     $state = BEFORE_LOCAL_NAME_STATE;
142     $t = $tt->get_next_token;
143     redo S;
144     } elsif ($t->{type} == S_TOKEN) {
145     ## Stay in the state.
146     $t = $tt->get_next_token;
147     redo S;
148     } elsif ({
149     DOT_TOKEN, 1,
150     COLON_TOKEN, 1,
151     HASH_TOKEN, 1,
152     LBRACKET_TOKEN, 1,
153     RPAREN_TOKEN, 1, # :not(a ->> ) <<-
154     }->{$t->{type}}) {
155     $in_negation = 1 if $in_negation;
156     push @$sss, [NAMESPACE_SELECTOR, $default_namespace]
157     if defined $default_namespace;
158    
159     $state = BEFORE_SIMPLE_SELECTOR_STATE;
160     # Reprocess.
161     redo S;
162     } else {
163 wakaba 1.6 $self->{onerror}->(type => 'syntax error:before type selector',
164     level => $self->{must_level},
165 wakaba 1.9 uri => \$self->{href},
166 wakaba 1.6 token => $t);
167 wakaba 1.7 return ($t, undef);
168 wakaba 1.1 }
169     } elsif ($state == BEFORE_SIMPLE_SELECTOR_STATE) {
170     if ($in_negation and $in_negation++ == 2) {
171     $state = AFTER_NEGATION_SIMPLE_SELECTOR_STATE;
172     ## Reprocess.
173     redo S;
174     }
175    
176     if ($t->{type} == DOT_TOKEN) { ## class selector
177     if ($has_pseudo_element) {
178 wakaba 1.6 $self->{onerror}->(type => 'syntax error:after pseudo element',
179     level => $self->{must_level},
180 wakaba 1.9 uri => \$self->{href},
181 wakaba 1.6 token => $t);
182 wakaba 1.7 return ($t, undef);
183 wakaba 1.1 }
184     $state = BEFORE_CLASS_NAME_STATE;
185     $t = $tt->get_next_token;
186     redo S;
187     } elsif ($t->{type} == HASH_TOKEN) { ## ID selector
188     if ($has_pseudo_element) {
189 wakaba 1.6 $self->{onerror}->(type => 'syntax error:after pseudo element',
190     level => $self->{must_level},
191 wakaba 1.9 uri => \$self->{href},
192 wakaba 1.6 token => $t);
193 wakaba 1.7 return ($t, undef);
194 wakaba 1.1 }
195     push @$sss, [ID_SELECTOR, $t->{value}];
196     $state = BEFORE_SIMPLE_SELECTOR_STATE;
197     $t = $tt->get_next_token;
198     redo S;
199     } elsif ($t->{type} == COLON_TOKEN) { ## pseudo-class or pseudo-element
200     if ($has_pseudo_element) {
201 wakaba 1.6 $self->{onerror}->(type => 'syntax error:after pseudo element',
202     level => $self->{must_level},
203 wakaba 1.9 uri => \$self->{href},
204 wakaba 1.6 token => $t);
205 wakaba 1.7 return ($t, undef);
206 wakaba 1.1 }
207     $state = AFTER_COLON_STATE;
208     $t = $tt->get_next_token;
209     redo S;
210     } elsif ($t->{type} == LBRACKET_TOKEN) { ## attribute selector
211     if ($has_pseudo_element) {
212 wakaba 1.6 $self->{onerror}->(type => 'syntax error:after pseudo element',
213     level => $self->{must_level},
214 wakaba 1.9 uri => \$self->{href},
215 wakaba 1.6 token => $t);
216 wakaba 1.7 return ($t, undef);
217 wakaba 1.1 }
218     $state = AFTER_LBRACKET_STATE;
219     $t = $tt->get_next_token;
220     redo S;
221     } else {
222     $state = BEFORE_COMBINATOR_STATE;
223     ## Reprocess.
224     redo S;
225     }
226     } elsif ($state == AFTER_NAME_STATE) {
227     if ($t->{type} == VBAR_TOKEN) {
228     $state = BEFORE_LOCAL_NAME_STATE;
229     $t = $tt->get_next_token;
230     redo S;
231     } else { ## Type or universal selector w/o namespace prefix
232     push @$sss, [NAMESPACE_SELECTOR, $default_namespace]
233     if defined $default_namespace;
234     push @$sss, [LOCAL_NAME_SELECTOR, $name] if defined $name;
235    
236     $state = BEFORE_SIMPLE_SELECTOR_STATE;
237     ## reprocess.
238     redo S;
239     }
240     } elsif ($state == BEFORE_LOCAL_NAME_STATE) {
241     if ($t->{type} == IDENT_TOKEN) {
242     if (defined $name) { ## Prefix is neither empty nor "*"
243     my $uri = $self->{lookup_namespace_uri}->($name);
244     unless (defined $uri) {
245 wakaba 1.6 $self->{onerror}->(type => 'namespace prefix:not declared',
246     level => $self->{must_level},
247 wakaba 1.9 uri => \$self->{href},
248 wakaba 1.6 token => $t);
249 wakaba 1.7 return ($t, undef);
250 wakaba 1.1 }
251     push @$sss, [NAMESPACE_SELECTOR, $uri];
252     }
253     push @$sss, [LOCAL_NAME_SELECTOR, $t->{value}];
254    
255     $state = BEFORE_SIMPLE_SELECTOR_STATE;
256     $t = $tt->get_next_token;
257     redo S;
258     } elsif ($t->{type} == STAR_TOKEN) {
259     if (defined $name) { ## Prefix is neither empty nor "*"
260     my $uri = $self->{lookup_namespace_uri}->($name);
261     unless (defined $uri) {
262 wakaba 1.6 $self->{onerror}->(type => 'namespace prefix:not declared',
263     level => $self->{must_level},
264 wakaba 1.9 uri => \$self->{href},
265 wakaba 1.6 token => $t);
266 wakaba 1.7 return ($t, undef);
267 wakaba 1.1 }
268     push @$sss, [NAMESPACE_SELECTOR, $uri];
269     }
270     $state = BEFORE_SIMPLE_SELECTOR_STATE;
271     $t = $tt->get_next_token;
272     redo S;
273     } else { ## "|" not followed by type or universal selector
274 wakaba 1.6 $self->{onerror}->(type => 'syntax error:after namespace prefix',
275     level => $self->{must_level},
276 wakaba 1.9 uri => \$self->{href},
277 wakaba 1.6 token => $t);
278 wakaba 1.7 return ($t, undef);
279 wakaba 1.1 }
280     } elsif ($state == BEFORE_CLASS_NAME_STATE) {
281     if ($t->{type} == IDENT_TOKEN) {
282     push @$sss, [CLASS_SELECTOR, $t->{value}];
283    
284     $state = BEFORE_SIMPLE_SELECTOR_STATE;
285     $t = $tt->get_next_token;
286     redo S;
287     } else {
288 wakaba 1.6 $self->{onerror}->(type => 'syntax error:before class name',
289     level => $self->{must_level},
290 wakaba 1.9 uri => \$self->{href},
291 wakaba 1.6 token => $t);
292 wakaba 1.7 return ($t, undef);
293 wakaba 1.1 }
294     } elsif ($state == BEFORE_COMBINATOR_STATE) {
295     push @$selector, $sss;
296     $sss = [];
297    
298     if ($t->{type} == S_TOKEN) {
299     $state = COMBINATOR_STATE;
300     $t = $tt->get_next_token;
301     redo S;
302     } elsif ({
303     GREATER_TOKEN, 1,
304     PLUS_TOKEN, 1,
305     TILDE_TOKEN, 1,
306     COMMA_TOKEN, 1,
307     EOF_TOKEN, 1,
308 wakaba 1.7 $_[2], 1,
309 wakaba 1.1 }->{$t->{type}}) {
310     $state = COMBINATOR_STATE;
311     ## Reprocess.
312     redo S;
313     } else {
314 wakaba 1.6 $self->{onerror}->(type => 'syntax error:before combinator',
315     level => $self->{must_level},
316 wakaba 1.9 uri => \$self->{href},
317 wakaba 1.6 token => $t);
318 wakaba 1.7 return ($t, undef);
319 wakaba 1.1 }
320     } elsif ($state == COMBINATOR_STATE) {
321     if ($state == S_TOKEN) {
322     ## Stay in the state.
323     $t = $tt->get_next_token;
324     redo S;
325     } elsif ({
326     GREATER_TOKEN, 1,
327     PLUS_TOKEN, 1,
328     TILDE_TOKEN, 1,
329     }->{$t->{type}}) {
330     push @$selector, $t->{type};
331    
332     $state = BEFORE_TYPE_SELECTOR_STATE;
333     $t = $tt->get_next_token;
334     redo S;
335 wakaba 1.7 } elsif ($t->{type} == EOF_TOKEN or $t->{type} == $_[2]) {
336 wakaba 1.1 push @$selectors, $selector;
337 wakaba 1.7 return ($t, $selectors);
338 wakaba 1.1 } elsif ($t->{type} == COMMA_TOKEN) {
339     push @$selectors, $selector;
340     $selector = [DESCENDANT_COMBINATOR];
341     undef $has_pseudo_element;
342    
343     $state = BEFORE_TYPE_SELECTOR_STATE;
344     $t = $tt->get_next_token;
345     redo S;
346     } else {
347     push @$selector, S_TOKEN;
348    
349     $state = BEFORE_TYPE_SELECTOR_STATE;
350     ## Reprocess.
351     redo S;
352     }
353     } elsif ($state == AFTER_COLON_STATE) {
354     if ($t->{type} == IDENT_TOKEN) {
355     my $class = $t->{value};
356     $class =~ tr/A-Z/a-z/; ## TODO: ASCII case-insensitivity ok?
357     if ($self->{pseudo_class}->{$class} and
358     {
359     active => 1,
360     checked => 1,
361 wakaba 1.3 '-manakai-current' => 1,
362 wakaba 1.1 disabled => 1,
363     empty => 1,
364     enabled => 1,
365     'first-child' => 1,
366     'first-of-type' => 1,
367     focus => 1,
368     hover => 1,
369     indeterminate => 1, ## NOTE: Reserved in Selectors Level 3
370     'last-child' => 1,
371     'last-of-type' => 1,
372     link => 1,
373     'only-child' => 1,
374     'only-of-type' => 1,
375     root => 1,
376     target => 1,
377     visited => 1,
378     }->{$class}) {
379     push @$sss, [PSEUDO_CLASS_SELECTOR, $class];
380     } elsif ($self->{pseudo_element}->{$class} and
381     {'first-letter' => 1, 'first-line' => 1,
382     before => 1, after => 1}->{$class}) {
383     push @$sss, [PSEUDO_ELEMENT_SELECTOR, $class];
384     $has_pseudo_element = 1;
385     } else {
386 wakaba 1.6 ## TODO: Should we raise a different kind of error
387     ## if a pseudo class is known but not supported?
388     $self->{onerror}->(type => 'pseudo class:not allowed',
389     level => $self->{must_level},
390 wakaba 1.9 uri => \$self->{href},
391 wakaba 1.6 token => $t, value => $class);
392 wakaba 1.7 return ($t, undef);
393 wakaba 1.1 }
394    
395     $state = BEFORE_SIMPLE_SELECTOR_STATE;
396     $t = $tt->get_next_token;
397     redo S;
398     } elsif ($t->{type} == FUNCTION_TOKEN) {
399     my $class = $t->{value};
400     $class =~ tr/A-Z/a-z/; ## TODO: Is ASCII case-insensitivity OK?
401    
402     if ($class eq 'lang' and $self->{pseudo_class}->{$class}) {
403     $state = BEFORE_LANG_TAG_STATE;
404     $t = $tt->get_next_token;
405     redo S;
406     } elsif ($class eq 'not' and $self->{pseudo_class}->{$class} and
407     not $in_negation) {
408     $in_negation = 1;
409    
410     push @$sss, '';
411     $state = BEFORE_TYPE_SELECTOR_STATE;
412     $t = $tt->get_next_token;
413     redo S;
414     } elsif ({
415     'nth-child' => 1,
416     'nth-last-child' => 1,
417     'nth-of-type' => 1,
418     'nth-last-of-type' => 1,
419     }->{$class} and $self->{pseudo_class}->{$class}) {
420     $name = $class;
421    
422     $state = BEFORE_AN_STATE;
423     $t = $tt->get_next_token;
424     ## TODO: syntax of value in the spec is vague; need to reverse
425     ## engineer what Opera 9.5 does.
426     redo S;
427 wakaba 1.3 } elsif ($class eq '-manakai-contains' and
428     $self->{pseudo_class}->{$class}) {
429     $state = BEFORE_CONTAINS_STRING_STATE;
430     $t = $tt->get_next_token;
431     redo S;
432 wakaba 1.1 } else {
433 wakaba 1.6 $self->{onerror}->(type => 'pseudo class:not allowed',
434     level => $self->{must_level},
435 wakaba 1.9 uri => \$self->{href},
436 wakaba 1.6 token => $t, value => $class);
437 wakaba 1.7 return ($t, undef);
438 wakaba 1.1 }
439     } elsif ($t->{type} == COLON_TOKEN and
440     not $in_negation) { ## Pseudo-element
441     $state = AFTER_DOUBLE_COLON_STATE;
442     $t = $tt->get_next_token;
443     redo S;
444     } else {
445 wakaba 1.6 $self->{onerror}->(type => 'syntax error:after colon',
446     level => $self->{must_level},
447 wakaba 1.9 uri => \$self->{href},
448 wakaba 1.6 token => $t);
449 wakaba 1.7 return ($t, undef);
450 wakaba 1.1 }
451     } elsif ($state == AFTER_LBRACKET_STATE) { ## Attribute selector
452     $simple_selector = [ATTRIBUTE_SELECTOR];
453     if ($t->{type} == IDENT_TOKEN) {
454     $name = $t->{value};
455    
456     $state = AFTER_ATTR_NAME_STATE;
457     $t = $tt->get_next_token;
458     redo S;
459     } elsif ($t->{type} == VBAR_TOKEN) {
460     $simple_selector->[1] = ''; # null namespace
461    
462     $state = BEFORE_ATTR_LOCAL_NAME_STATE;
463     $t = $tt->get_next_token;
464     redo S;
465     } elsif ($t->{type} == STAR_TOKEN) {
466     $name = undef;
467    
468     $state = AFTER_ATTR_NAME_STATE;
469     $t = $tt->get_next_token;
470     redo S;
471     } elsif ($t->{type} == S_TOKEN) {
472     ## Stay in the state.
473     $t = $tt->get_next_token;
474     redo S;
475     } else {
476 wakaba 1.6 $self->{onerror}->(type => 'syntax error:before attr name',
477     level => $self->{must_level},
478 wakaba 1.9 uri => \$self->{href},
479 wakaba 1.6 token => $t);
480 wakaba 1.7 return ($t, undef);
481 wakaba 1.1 }
482     } elsif ($state == AFTER_ATTR_NAME_STATE) {
483     if ($t->{type} == VBAR_TOKEN) {
484     if (defined $name) {
485     my $uri = $self->{lookup_namespace_uri}->($name);
486     unless (defined $uri) {
487 wakaba 1.6 $self->{onerror}->(type => 'namespace prefix:not declared',
488     level => $self->{must_level},
489 wakaba 1.9 uri => \$self->{href},
490 wakaba 1.6 token => $t);
491 wakaba 1.7 return ($t, undef);
492 wakaba 1.1 }
493     $simple_selector->[1] = $uri;
494     }
495    
496     $state = BEFORE_ATTR_LOCAL_NAME_STATE;
497     $t = $tt->get_next_token;
498     redo S;
499     } else {
500     unless (defined $name) { ## [*]
501 wakaba 1.6 $self->{onerror}->(type => 'syntax error:after attr star',
502     level => $self->{must_level},
503 wakaba 1.9 uri => \$self->{href},
504 wakaba 1.6 token => $t);
505 wakaba 1.7 return ($t, undef);
506 wakaba 1.1 }
507     $simple_selector->[1] = ''; # null namespace
508     $simple_selector->[2] = $name;
509    
510     $state = BEFORE_MATCH_STATE;
511     ## Reprocess.
512     redo S;
513     }
514     } elsif ($state == BEFORE_ATTR_LOCAL_NAME_STATE) {
515     if ($t->{type} == IDENT_TOKEN) {
516     $simple_selector->[2] = $t->{value};
517    
518     $state = BEFORE_MATCH_STATE;
519     $t = $tt->get_next_token;
520     redo S;
521     } else {
522 wakaba 1.6 $self->{onerror}->(type => 'syntax error:before attr local name',
523     level => $self->{must_level},
524 wakaba 1.9 uri => \$self->{href},
525 wakaba 1.6 token => $t);
526 wakaba 1.7 return ($t, undef);
527 wakaba 1.1 }
528     } elsif ($state == BEFORE_MATCH_STATE) {
529     if ({
530     MATCH_TOKEN, 1,
531     INCLUDES_TOKEN, 1,
532     DASHMATCH_TOKEN, 1,
533     PREFIXMATCH_TOKEN, 1,
534     SUFFIXMATCH_TOKEN, 1,
535     SUBSTRINGMATCH_TOKEN, 1,
536     }->{$t->{type}}) {
537     $simple_selector->[3] = $t->{type};
538    
539     $state = BEFORE_VALUE_STATE;
540     $t = $tt->get_next_token;
541     redo S;
542     } elsif ($t->{type} == RBRACKET_TOKEN) {
543     push @$sss, $simple_selector;
544    
545     $state = BEFORE_SIMPLE_SELECTOR_STATE;
546     $t = $tt->get_next_token;
547     redo S;
548     } elsif ($t->{type} == S_TOKEN) {
549     ## Stay in the state.
550     $t = $tt->get_next_token;
551     redo S;
552     } else {
553 wakaba 1.6 $self->{onerror}->(type => 'syntax error:before match',
554     level => $self->{must_level},
555 wakaba 1.9 uri => \$self->{href},
556 wakaba 1.6 token => $t);
557 wakaba 1.7 return ($t, undef);
558 wakaba 1.1 }
559     } elsif ($state == BEFORE_VALUE_STATE) {
560     if ($t->{type} == IDENT_TOKEN or $t->{type} == STRING_TOKEN) {
561     $simple_selector->[4] = $t->{value};
562     push @$sss, $simple_selector;
563    
564     $state = AFTER_VALUE_STATE;
565     $t = $tt->get_next_token;
566     redo S;
567     } elsif ($t->{type} == S_TOKEN) {
568     ## Stay in the state.
569     $t = $tt->get_next_token;
570     redo S;
571     } else {
572 wakaba 1.6 $self->{onerror}->(type => 'syntax error:before attr value',
573     level => $self->{must_level},
574 wakaba 1.9 uri => \$self->{href},
575 wakaba 1.6 token => $t);
576 wakaba 1.7 return ($t, undef);
577 wakaba 1.1 }
578     } elsif ($state == AFTER_VALUE_STATE) {
579     if ($t->{type} == RBRACKET_TOKEN) {
580     $state = BEFORE_SIMPLE_SELECTOR_STATE;
581     $t = $tt->get_next_token;
582     redo S;
583     } else {
584 wakaba 1.6 $self->{onerror}->(type => 'syntax error:after attr value',
585     level => $self->{must_level},
586 wakaba 1.9 uri => \$self->{href},
587 wakaba 1.6 token => $t);
588 wakaba 1.7 return ($t, undef);
589 wakaba 1.1 }
590     } elsif ($state == AFTER_DOUBLE_COLON_STATE) {
591     if ($t->{type} == IDENT_TOKEN) {
592     my $pe = $t->{value};
593     $pe =~ tr/A-Z/a-z/; ## TODO: Is ASCII case-insensitive OK?
594     if ($self->{pseudo_element}->{$pe} and
595     {'first-letter' => 1, 'first-line' => 1,
596     after => 1, before => 1}->{$pe}) {
597     push @$sss, [PSEUDO_ELEMENT_SELECTOR, $pe];
598     $has_pseudo_element = 1;
599    
600     $state = BEFORE_SIMPLE_SELECTOR_STATE;
601     $t = $tt->get_next_token;
602     redo S;
603     } else {
604 wakaba 1.6 $self->{onerror}->(type => 'pseudo element:not allowed',
605     level => $self->{must_level},
606 wakaba 1.9 uri => \$self->{href},
607 wakaba 1.6 token => $t, value => $pe);
608 wakaba 1.7 return ($t, undef);
609 wakaba 1.1 }
610     } else {
611 wakaba 1.6 $self->{onerror}->(type => 'syntax error:after double colon',
612     level => $self->{must_level},
613 wakaba 1.9 uri => \$self->{href},
614 wakaba 1.6 token => $t);
615 wakaba 1.7 return ($t, undef);
616 wakaba 1.1 }
617     } elsif ($state == BEFORE_LANG_TAG_STATE) {
618     if ($t->{type} == IDENT_TOKEN) {
619     push @$sss, [PSEUDO_CLASS_SELECTOR, 'lang', $t->{value}];
620    
621     $state = AFTER_LANG_TAG_STATE;
622     $t = $tt->get_next_token;
623     redo S;
624     } elsif ($t->{type} == S_TOKEN) {
625     ## Stay in the state.
626     $t = $tt->get_next_token;
627     redo S;
628     } else {
629 wakaba 1.6 $self->{onerror}->(type => 'syntax error:before lang tag',
630     level => $self->{must_level},
631 wakaba 1.9 uri => \$self->{href},
632 wakaba 1.6 token => $t);
633 wakaba 1.7 return ($t, undef);
634 wakaba 1.1 }
635     } elsif ($state == AFTER_LANG_TAG_STATE) {
636     if ($t->{type} == RPAREN_TOKEN) {
637     $state = BEFORE_SIMPLE_SELECTOR_STATE;
638     $t = $tt->get_next_token;
639     redo S;
640     } elsif ($t->{type} == S_TOKEN) {
641     ## Stay in the state.
642     $t = $tt->get_next_token;
643     redo S;
644     } else {
645 wakaba 1.6 $self->{onerror}->(type => 'syntax error:after lang tag',
646     level => $self->{must_level},
647 wakaba 1.9 uri => \$self->{href},
648 wakaba 1.6 token => $t);
649 wakaba 1.7 return ($t, undef);
650 wakaba 1.1 }
651     } elsif ($state == BEFORE_AN_STATE) {
652     if ($t->{type} == DIMENSION_TOKEN) {
653     if (int $t->{number} == $t->{number}) {
654     my $n = $t->{value};
655     $n =~ tr/A-Z/a-z/; ## TODO: ascii ?
656     if ($n eq 'n') {
657     $simple_selector = [PSEUDO_CLASS_SELECTOR, $name,
658     0+$t->{number}, 0];
659    
660     $state = AFTER_AN_STATE;
661     $t = $tt->get_next_token;
662     redo S;
663     } elsif ($n =~ /\An-([0-9]+)\z/) {
664     push @$sss, [PSEUDO_CLASS_SELECTOR, $name, 0+$t->{number}, 0-$1];
665    
666     $state = AFTER_B_STATE;
667     $t = $tt->get_next_token;
668     redo S;
669     } else {
670 wakaba 1.6 $self->{onerror}->(type => 'syntax error:an+b',
671     level => $self->{must_level},
672 wakaba 1.9 uri => \$self->{href},
673 wakaba 1.6 token => $t);
674 wakaba 1.7 return ($t, undef);
675 wakaba 1.1 }
676     } else {
677 wakaba 1.6 $self->{onerror}->(type => 'syntax error:an+b',
678     level => $self->{must_level},
679 wakaba 1.9 uri => \$self->{href},
680 wakaba 1.6 token => $t);
681 wakaba 1.7 return ($t, undef);
682 wakaba 1.1 }
683     } elsif ($t->{type} == NUMBER_TOKEN) {
684     if (int $t->{number} == $t->{number}) {
685     push @$sss, [PSEUDO_CLASS_SELECTOR, $name, 0, 0+$t->{number}];
686    
687     $state = AFTER_B_STATE;
688     $t = $tt->get_next_token;
689     redo S;
690     } else { ## ISSUE: Is :nth-child(0.0) disallowed?
691 wakaba 1.6 $self->{onerror}->(type => 'not integer',
692     level => $self->{must_level},
693 wakaba 1.9 uri => \$self->{href},
694 wakaba 1.6 token => $t, value => $t->{number});
695 wakaba 1.7 return ($t, undef);
696 wakaba 1.1 }
697     } elsif ($t->{type} == IDENT_TOKEN) {
698     my $value = $t->{value};
699     $value =~ tr/A-Z/a-z/; ## TODO: ASCII case-insensitive?
700     if ($value eq 'odd') {
701     push @$sss, [PSEUDO_CLASS_SELECTOR, $name, 2, 1];
702    
703     $state = AFTER_B_STATE;
704     $t = $tt->get_next_token;
705     redo S;
706     } elsif ($value eq 'even') {
707     push @$sss, [PSEUDO_CLASS_SELECTOR, $name, 2, 0];
708    
709     $state = AFTER_B_STATE;
710     $t = $tt->get_next_token;
711     redo S;
712     } elsif ($value eq 'n' or $value eq '-n') {
713     ## ISSUE: :nth-child(-n) is not explicitly allowed, but appears
714     ## in an example in the spec.
715     $simple_selector = [PSEUDO_CLASS_SELECTOR, $name,
716     $value eq 'n' ? 1 : -1, 0];
717    
718     $state = AFTER_AN_STATE;
719     $t = $tt->get_next_token;
720     redo S;
721     } elsif ($value =~ /\A(-?)n-([0-9]+)\z/) {
722     push @$sss, [PSEUDO_CLASS_SELECTOR, $name, 0+($1.'1'), -$2];
723    
724     $state = AFTER_B_STATE;
725     $t = $tt->get_next_token;
726     redo S;
727     } else {
728 wakaba 1.6 $self->{onerror}->(type => 'syntax error:an+b',
729     level => $self->{must_level},
730 wakaba 1.9 uri => \$self->{href},
731 wakaba 1.6 token => $t);
732 wakaba 1.7 return ($t, undef);
733 wakaba 1.1 }
734     } elsif ($t->{type} == MINUS_TOKEN) {
735     ## ISSUE: Is :nth-child(- 1) allowed?
736     ## ISSUE: Is :nth-child(n-/**/6) or (-n-/**/6) allowed?
737     $t = $tt->get_next_token;
738     if ($t->{type} == DIMENSION_TOKEN || $t->{type} == IDENT_TOKEN) {
739     my $num = $t->{type} == IDENT_TOKEN ? 1 : $t->{number};
740     ## NOTE: :nth-child(-/**/n)
741     if (int $num == $num) {
742     my $n = $t->{value};
743     $n =~ tr/A-Z/a-z/; ## TODO: ASCII?
744     if ($n eq 'n') {
745     $simple_selector = [PSEUDO_CLASS_SELECTOR, $name, -$num, 0];
746    
747     $state = AFTER_AN_STATE;
748     $t = $tt->get_next_token;
749     redo S;
750     } elsif ($n =~ /\An-([0-9]+)\z/) {
751     $simple_selector = [PSEUDO_CLASS_SELECTOR, $name,
752     -$num, -$1];
753    
754     $state = AFTER_AN_STATE;
755     $t = $tt->get_next_token;
756     redo S;
757     } else {
758 wakaba 1.6 $self->{onerror}->(type => 'syntax error:an+b',
759     level => $self->{must_level},
760 wakaba 1.9 uri => \$self->{href},
761 wakaba 1.6 token => $t);
762 wakaba 1.7 return ($t, undef);
763 wakaba 1.1 }
764     } else {
765 wakaba 1.6 $self->{onerror}->(type => 'syntax error:an+b',
766     level => $self->{must_level},
767 wakaba 1.9 uri => \$self->{href},
768 wakaba 1.6 token => $t);
769 wakaba 1.7 return ($t, undef);
770 wakaba 1.1 }
771     } elsif ($t->{type} == NUMBER_TOKEN) {
772     if (int $t->{number} == $t->{number}) {
773     push @$sss, [PSEUDO_CLASS_SELECTOR, $name, 0, -$t->{number}];
774    
775     $state = AFTER_B_STATE;
776     $t = $tt->get_next_token;
777     redo S;
778     } else {
779 wakaba 1.6 $self->{onerror}->(type => 'syntax error:an+b',
780     level => $self->{must_level},
781 wakaba 1.9 uri => \$self->{href},
782 wakaba 1.6 token => $t);
783 wakaba 1.7 return ($t, undef);
784 wakaba 1.1 }
785     } else {
786 wakaba 1.6 $self->{onerror}->(type => 'syntax error:an+b',
787     level => $self->{must_level},
788 wakaba 1.9 uri => \$self->{href},
789 wakaba 1.6 token => $t);
790 wakaba 1.7 return ($t, undef);
791 wakaba 1.1 }
792     } elsif ($t->{type} == S_TOKEN) {
793     ## Stay in the state.
794     $t = $tt->get_next_token;
795     redo S;
796     } else {
797 wakaba 1.6 $self->{onerror}->(type => 'syntax error:an+b',
798     level => $self->{must_level},
799 wakaba 1.9 uri => \$self->{href},
800 wakaba 1.6 token => $t);
801 wakaba 1.7 return ($t, undef);
802 wakaba 1.1 }
803     } elsif ($state == AFTER_AN_STATE) {
804     ## ISSUE: :nth-child(1n +2) is allowed.
805     ## :nth-child(1n /**/ +2) and :nth-child(1n -2) are allowed?
806     if ($t->{type} == PLUS_TOKEN) {
807     $simple_selector->[3] = +1;
808    
809     $state = BEFORE_B_STATE;
810     $t = $tt->get_next_token;
811     redo S;
812     } elsif ($t->{type} == MINUS_TOKEN) {
813     $simple_selector->[3] = -1;
814    
815     $state = BEFORE_B_STATE;
816     $t = $tt->get_next_token;
817     redo S;
818     } elsif ($t->{type} == RPAREN_TOKEN) {
819     push @$sss, $simple_selector;
820    
821     $state = BEFORE_SIMPLE_SELECTOR_STATE;
822     $t = $tt->get_next_token;
823     redo S;
824     } elsif ($t->{type} == S_TOKEN) {
825     ## Stay in the state.
826     $t = $tt->get_next_token;
827     redo S;
828     } else {
829 wakaba 1.6 $self->{onerror}->(type => 'syntax error:an+b',
830     level => $self->{must_level},
831 wakaba 1.9 uri => \$self->{href},
832 wakaba 1.6 token => $t);
833 wakaba 1.7 return ($t, undef);
834 wakaba 1.1 }
835     } elsif ($state == BEFORE_B_STATE) {
836     ## ISSUE: Is S allowed?
837     if ($t->{type} == NUMBER_TOKEN) {
838     if (int $t->{number} == $t->{number}) {
839     $simple_selector->[3] *= $t->{number};
840     push @$sss, $simple_selector;
841    
842     $state = AFTER_B_STATE;
843     $t = $tt->get_next_token;
844     redo S;
845     } else {
846 wakaba 1.6 $self->{onerror}->(type => 'syntax error:an+b',
847     level => $self->{must_level},
848 wakaba 1.9 uri => \$self->{href},
849 wakaba 1.6 token => $t);
850 wakaba 1.7 return ($t, undef);
851 wakaba 1.1 }
852     } else {
853 wakaba 1.6 $self->{onerror}->(type => 'syntax error:an+b',
854     level => $self->{must_level},
855 wakaba 1.9 uri => \$self->{href},
856 wakaba 1.6 token => $t);
857 wakaba 1.7 return ($t, undef);
858 wakaba 1.1 }
859     } elsif ($state == AFTER_B_STATE) {
860     if ($t->{type} == RPAREN_TOKEN) {
861     $state = BEFORE_SIMPLE_SELECTOR_STATE;
862     $t = $tt->get_next_token;
863     redo S;
864     } elsif ($t->{type} == S_TOKEN) {
865     ## Stay in the state.
866     $t = $tt->get_next_token;
867     redo S;
868     } else {
869 wakaba 1.6 $self->{onerror}->(type => 'syntax error:after an+b',
870     level => $self->{must_level},
871 wakaba 1.9 uri => \$self->{href},
872 wakaba 1.6 token => $t);
873 wakaba 1.7 return ($t, undef);
874 wakaba 1.1 }
875     } elsif ($state == AFTER_NEGATION_SIMPLE_SELECTOR_STATE) {
876     if ($t->{type} == RPAREN_TOKEN) {
877     undef $in_negation;
878     my $simple_selector = [];
879     unshift @$simple_selector, pop @$sss while ref $sss->[-1];
880     pop @$sss; # dummy
881     unshift @$simple_selector, 'not';
882     unshift @$simple_selector, PSEUDO_CLASS_SELECTOR;
883     push @$sss, $simple_selector;
884    
885     $state = BEFORE_SIMPLE_SELECTOR_STATE;
886 wakaba 1.3 $t = $tt->get_next_token;
887     redo S;
888     } elsif ($t->{type} == S_TOKEN) {
889     ## Stay in the state.
890     $t = $tt->get_next_token;
891     redo S;
892     } else {
893 wakaba 1.6 $self->{onerror}->(type => 'syntax error:after not simple selector',
894     level => $self->{must_level},
895 wakaba 1.9 uri => \$self->{href},
896 wakaba 1.6 token => $t);
897 wakaba 1.7 return ($t, undef);
898 wakaba 1.3 }
899     } elsif ($state == BEFORE_CONTAINS_STRING_STATE) {
900 wakaba 1.4 if ($t->{type} == STRING_TOKEN or $t->{type} == IDENT_TOKEN) {
901 wakaba 1.3 push @$sss, [PSEUDO_CLASS_SELECTOR, '-manakai-contains', $t->{value}];
902    
903     $state = AFTER_LANG_TAG_STATE;
904 wakaba 1.1 $t = $tt->get_next_token;
905     redo S;
906     } elsif ($t->{type} == S_TOKEN) {
907     ## Stay in the state.
908     $t = $tt->get_next_token;
909     redo S;
910     } else {
911 wakaba 1.6 $self->{onerror}->(type => 'syntax error:before contains string',
912     level => $self->{must_level},
913 wakaba 1.9 uri => \$self->{href},
914 wakaba 1.6 token => $t);
915 wakaba 1.7 return ($t, undef);
916 wakaba 1.1 }
917     } else {
918     die "$0: Selectors Parser: $state: Unknown state";
919     }
920     } # S
921     } # parse_string
922    
923 wakaba 1.8 ## NOTE: Specificity in CSS 2.1 and Selectors 3 are incompatible.
924     ## What is implemented by this method is CSS 2.1's one.
925     ## (With Selectors 3 terminology and with Selectors 3 additions.)
926     sub get_selector_specificity ($$) {
927     my (undef, $selector) = @_;
928    
929     my $r = [0, 0, 0, 0]; # a, b, c, d
930    
931     ## a = 1 iff style="" attribute
932     ## b += 1 for ID attribute selectors
933     ## c += 1 for attribute, class, and pseudo-class selectors
934     ## d += 1 for type selectors and pseudo-elements
935    
936     for my $sss (@$selector) {
937     next unless ref $sss; # combinator
938     my @sss = @$sss;
939     while (@sss) {
940     my $ss = shift @sss;
941     if ($ss->[0] == LOCAL_NAME_SELECTOR or
942     $ss->[0] == PSEUDO_ELEMENT_SELECTOR) {
943     $r->[3]++;
944     } elsif ($ss->[0] == ATTRIBUTE_SELECTOR or
945     $ss->[0] == PSEUDO_CLASS_SELECTOR) {
946     $r->[2]++;
947     } elsif ($ss->[0] == CLASS_SELECTOR) {
948     if ($ss->[1] eq 'not') {
949     push @sss, @$ss[2..$#$ss];
950     } else {
951     $r->[2]++;
952     }
953     } elsif ($ss->[0] == ID_SELECTOR) {
954     $r->[1]++;
955     }
956     }
957     }
958    
959     return $r;
960     } # get_selector_specificity
961    
962 wakaba 1.5 =head1 LICENSE
963    
964     Copyright 2007 Wakaba <w@suika.fam.cx>
965    
966     This library is free software; you can redistribute it
967     and/or modify it under the same terms as Perl itself.
968    
969     =cut
970    
971 wakaba 1.1 1;
972 wakaba 1.9 # $Date: 2008/01/01 02:54:35 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24