/[suikacvs]/markup/html/whatpm/Whatpm/CSS/SelectorsParser.pm
Suika

Contents of /markup/html/whatpm/Whatpm/CSS/SelectorsParser.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.12 - (hide annotations) (download)
Sat Aug 16 07:35:23 2008 UTC (17 years, 7 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.11: +56 -49 lines
++ whatpm/Whatpm/ChangeLog	16 Aug 2008 07:34:18 -0000
	* CacheManifest.pm: Support for new style of error
	reports.

	* HTML.pm.src: Set line=1, column=1 to the document node.

2008-08-16  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/CSS/ChangeLog	16 Aug 2008 07:34:47 -0000
2008-08-16  Wakaba  <wakaba@suika.fam.cx>

	* MediaQueryParser.pm, SelectorsParser.pm, Parser.pm: Support
	for new style of error reports.

1 wakaba 1.1 package Whatpm::CSS::SelectorsParser;
2     use strict;
3 wakaba 1.12 our $VERSION=do{my @r=(q$Revision: 1.11 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5     require Exporter;
6     push our @ISA, 'Exporter';
7    
8     use Whatpm::CSS::Tokenizer qw(:token);
9    
10     sub new ($) {
11 wakaba 1.12 my $self = bless {
12     onerror => sub { },
13     lookup_namespace_uri => sub {
14     return undef;
15     },
16     level => {
17     must => 'm',
18     uncertain => 'u',
19     },
20     }, shift;
21 wakaba 1.9 #$self->{href} = \(uri in which the selectors appears);
22 wakaba 1.1 return $self;
23     } # new
24    
25     sub BEFORE_TYPE_SELECTOR_STATE () { 1 }
26     sub AFTER_NAME_STATE () { 2 }
27     sub BEFORE_LOCAL_NAME_STATE () { 3 }
28     sub BEFORE_SIMPLE_SELECTOR_STATE () { 4 }
29     sub BEFORE_CLASS_NAME_STATE () { 5 }
30     sub AFTER_COLON_STATE () { 6 }
31     sub AFTER_DOUBLE_COLON_STATE () { 7 }
32     sub AFTER_LBRACKET_STATE () { 8 }
33     sub AFTER_ATTR_NAME_STATE () { 9 }
34     sub BEFORE_ATTR_LOCAL_NAME_STATE () { 10 }
35     sub BEFORE_MATCH_STATE () { 11 }
36     sub BEFORE_VALUE_STATE () { 12 }
37     sub AFTER_VALUE_STATE () { 13 }
38     sub BEFORE_COMBINATOR_STATE () { 14 }
39     sub COMBINATOR_STATE () { 15 }
40     sub BEFORE_LANG_TAG_STATE () { 16 }
41     sub AFTER_LANG_TAG_STATE () { 17 }
42     sub BEFORE_AN_STATE () { 18 }
43     sub AFTER_AN_STATE () { 19 }
44     sub BEFORE_B_STATE () { 20 }
45     sub AFTER_B_STATE () { 21 }
46     sub AFTER_NEGATION_SIMPLE_SELECTOR_STATE () { 22 }
47 wakaba 1.3 sub BEFORE_CONTAINS_STRING_STATE () { 23 }
48 wakaba 1.1
49     sub NAMESPACE_SELECTOR () { 1 }
50     sub LOCAL_NAME_SELECTOR () { 2 }
51     sub ID_SELECTOR () { 3 }
52     sub CLASS_SELECTOR () { 4 }
53     sub PSEUDO_CLASS_SELECTOR () { 5 }
54     sub PSEUDO_ELEMENT_SELECTOR () { 6 }
55     sub ATTRIBUTE_SELECTOR () { 7 }
56    
57     sub DESCENDANT_COMBINATOR () { S_TOKEN }
58     sub CHILD_COMBINATOR () { GREATER_TOKEN }
59     sub ADJACENT_SIBLING_COMBINATOR () { PLUS_TOKEN }
60     sub GENERAL_SIBLING_COMBINATOR () { TILDE_TOKEN }
61    
62     sub EXISTS_MATCH () { 0 }
63     sub EQUALS_MATCH () { MATCH_TOKEN }
64     sub INCLUDES_MATCH () { INCLUDES_TOKEN }
65     sub DASH_MATCH () { DASHMATCH_TOKEN }
66     sub PREFIX_MATCH () { PREFIXMATCH_TOKEN }
67     sub SUFFIX_MATCH () { SUFFIXMATCH_TOKEN }
68     sub SUBSTRING_MATCH () { SUBSTRINGMATCH_TOKEN }
69    
70     our @EXPORT_OK = qw(NAMESPACE_SELECTOR LOCAL_NAME_SELECTOR ID_SELECTOR
71     CLASS_SELECTOR PSEUDO_CLASS_SELECTOR PSEUDO_ELEMENT_SELECTOR
72     ATTRIBUTE_SELECTOR
73     DESCENDANT_COMBINATOR CHILD_COMBINATOR
74     ADJACENT_SIBLING_COMBINATOR GENERAL_SIBLING_COMBINATOR
75     EXISTS_MATCH EQUALS_MATCH INCLUDES_MATCH DASH_MATCH PREFIX_MATCH
76     SUFFIX_MATCH SUBSTRING_MATCH);
77    
78     our %EXPORT_TAGS = (
79     selector => [qw(NAMESPACE_SELECTOR LOCAL_NAME_SELECTOR ID_SELECTOR
80     CLASS_SELECTOR PSEUDO_CLASS_SELECTOR PSEUDO_ELEMENT_SELECTOR
81     ATTRIBUTE_SELECTOR)],
82     combinator => [qw(DESCENDANT_COMBINATOR CHILD_COMBINATOR
83     ADJACENT_SIBLING_COMBINATOR GENERAL_SIBLING_COMBINATOR)],
84     match => [qw(EXISTS_MATCH EQUALS_MATCH INCLUDES_MATCH DASH_MATCH
85     PREFIX_MATCH SUFFIX_MATCH SUBSTRING_MATCH)],
86     );
87    
88     sub parse_string ($$) {
89     my $self = $_[0];
90    
91     my $s = $_[1];
92     pos ($s) = 0;
93    
94     my $tt = Whatpm::CSS::Tokenizer->new;
95     $tt->{onerror} = $self->{onerror};
96     $tt->{get_char} = sub {
97     if (pos $s < length $s) {
98     return ord substr $s, pos ($s)++, 1;
99     } else {
100     return -1;
101     }
102     }; # $tt->{get_char}
103     $tt->init;
104    
105 wakaba 1.7 $self->_parse_selectors_with_tokenizer ($tt, EOF_TOKEN);
106     } # parse_string
107    
108     sub _parse_selectors_with_tokenizer ($$$;$) {
109     my $self = $_[0];
110     my $tt = $_[1];
111     # $_[2] : End token (other than EOF_TOKEN - may be EOF_TOKEN if no other).
112     # $_[3] : The first token, or undef
113    
114 wakaba 1.2 my $default_namespace = $self->{lookup_namespace_uri}->('');
115 wakaba 1.1
116     ## ISSUE: The Selectors spec only poorly defines how tokens are mapped
117     ## to each component of selectors. In addition, it does not well define
118     ## where spaces and comments are able to be inserted.
119    
120     my $selectors = [];
121     my $selector = [DESCENDANT_COMBINATOR];
122     my $sss = [];
123     my $simple_selector;
124     my $has_pseudo_element;
125     my $in_negation;
126    
127     my $state = BEFORE_TYPE_SELECTOR_STATE;
128 wakaba 1.7 my $t = $_[3] || $tt->get_next_token;
129 wakaba 1.1 my $name;
130 wakaba 1.11 my $name_t;
131 wakaba 1.1 S: {
132     if ($state == BEFORE_TYPE_SELECTOR_STATE) {
133     $in_negation = 2 if $in_negation;
134    
135     if ($t->{type} == IDENT_TOKEN) { ## element type or namespace prefix
136     $name = $t->{value};
137 wakaba 1.11 $name_t = $t;
138 wakaba 1.1 $state = AFTER_NAME_STATE;
139     $t = $tt->get_next_token;
140     redo S;
141     } elsif ($t->{type} == STAR_TOKEN) { ## universal selector or prefix
142     undef $name;
143     $state = AFTER_NAME_STATE;
144     $t = $tt->get_next_token;
145     redo S;
146     } elsif ($t->{type} == VBAR_TOKEN) { ## null namespace
147     undef $name;
148     push @$sss, [NAMESPACE_SELECTOR, undef];
149    
150     $state = BEFORE_LOCAL_NAME_STATE;
151     $t = $tt->get_next_token;
152     redo S;
153     } elsif ($t->{type} == S_TOKEN) {
154     ## Stay in the state.
155     $t = $tt->get_next_token;
156     redo S;
157     } elsif ({
158     DOT_TOKEN, 1,
159     COLON_TOKEN, 1,
160     HASH_TOKEN, 1,
161     LBRACKET_TOKEN, 1,
162     RPAREN_TOKEN, 1, # :not(a ->> ) <<-
163     }->{$t->{type}}) {
164     $in_negation = 1 if $in_negation;
165     push @$sss, [NAMESPACE_SELECTOR, $default_namespace]
166     if defined $default_namespace;
167    
168     $state = BEFORE_SIMPLE_SELECTOR_STATE;
169     # Reprocess.
170     redo S;
171     } else {
172 wakaba 1.10 $self->{onerror}->(type => 'no sss',
173 wakaba 1.12 level => $self->{level}->{must},
174 wakaba 1.9 uri => \$self->{href},
175 wakaba 1.6 token => $t);
176 wakaba 1.7 return ($t, undef);
177 wakaba 1.1 }
178     } elsif ($state == BEFORE_SIMPLE_SELECTOR_STATE) {
179     if ($in_negation and $in_negation++ == 2) {
180     $state = AFTER_NEGATION_SIMPLE_SELECTOR_STATE;
181     ## Reprocess.
182     redo S;
183     }
184    
185     if ($t->{type} == DOT_TOKEN) { ## class selector
186     if ($has_pseudo_element) {
187 wakaba 1.10 $self->{onerror}->(type => 'ss after pseudo element',
188 wakaba 1.12 level => $self->{level}->{must},
189 wakaba 1.9 uri => \$self->{href},
190 wakaba 1.6 token => $t);
191 wakaba 1.7 return ($t, undef);
192 wakaba 1.1 }
193     $state = BEFORE_CLASS_NAME_STATE;
194     $t = $tt->get_next_token;
195     redo S;
196     } elsif ($t->{type} == HASH_TOKEN) { ## ID selector
197     if ($has_pseudo_element) {
198 wakaba 1.10 $self->{onerror}->(type => 'ss after pseudo element',
199 wakaba 1.12 level => $self->{level}->{must},
200 wakaba 1.9 uri => \$self->{href},
201 wakaba 1.6 token => $t);
202 wakaba 1.7 return ($t, undef);
203 wakaba 1.1 }
204     push @$sss, [ID_SELECTOR, $t->{value}];
205     $state = BEFORE_SIMPLE_SELECTOR_STATE;
206     $t = $tt->get_next_token;
207     redo S;
208     } elsif ($t->{type} == COLON_TOKEN) { ## pseudo-class or pseudo-element
209     if ($has_pseudo_element) {
210 wakaba 1.10 $self->{onerror}->(type => 'ss after pseudo element',
211 wakaba 1.12 level => $self->{level}->{must},
212 wakaba 1.9 uri => \$self->{href},
213 wakaba 1.6 token => $t);
214 wakaba 1.7 return ($t, undef);
215 wakaba 1.1 }
216     $state = AFTER_COLON_STATE;
217     $t = $tt->get_next_token;
218     redo S;
219     } elsif ($t->{type} == LBRACKET_TOKEN) { ## attribute selector
220     if ($has_pseudo_element) {
221 wakaba 1.12 $self->{onerror}->(type => 'ss after pseudo-element',
222     level => $self->{level}->{must},
223 wakaba 1.9 uri => \$self->{href},
224 wakaba 1.6 token => $t);
225 wakaba 1.7 return ($t, undef);
226 wakaba 1.1 }
227     $state = AFTER_LBRACKET_STATE;
228     $t = $tt->get_next_token;
229     redo S;
230     } else {
231     $state = BEFORE_COMBINATOR_STATE;
232     ## Reprocess.
233     redo S;
234     }
235     } elsif ($state == AFTER_NAME_STATE) {
236     if ($t->{type} == VBAR_TOKEN) {
237     $state = BEFORE_LOCAL_NAME_STATE;
238     $t = $tt->get_next_token;
239     redo S;
240     } else { ## Type or universal selector w/o namespace prefix
241     push @$sss, [NAMESPACE_SELECTOR, $default_namespace]
242     if defined $default_namespace;
243     push @$sss, [LOCAL_NAME_SELECTOR, $name] if defined $name;
244    
245     $state = BEFORE_SIMPLE_SELECTOR_STATE;
246     ## reprocess.
247     redo S;
248     }
249     } elsif ($state == BEFORE_LOCAL_NAME_STATE) {
250     if ($t->{type} == IDENT_TOKEN) {
251     if (defined $name) { ## Prefix is neither empty nor "*"
252     my $uri = $self->{lookup_namespace_uri}->($name);
253     unless (defined $uri) {
254 wakaba 1.6 $self->{onerror}->(type => 'namespace prefix:not declared',
255 wakaba 1.12 level => $self->{level}->{must},
256 wakaba 1.9 uri => \$self->{href},
257 wakaba 1.11 token => $name_t || $t,
258     value => $name);
259 wakaba 1.7 return ($t, undef);
260 wakaba 1.1 }
261     push @$sss, [NAMESPACE_SELECTOR, $uri];
262     }
263     push @$sss, [LOCAL_NAME_SELECTOR, $t->{value}];
264    
265     $state = BEFORE_SIMPLE_SELECTOR_STATE;
266     $t = $tt->get_next_token;
267     redo S;
268     } elsif ($t->{type} == STAR_TOKEN) {
269     if (defined $name) { ## Prefix is neither empty nor "*"
270     my $uri = $self->{lookup_namespace_uri}->($name);
271     unless (defined $uri) {
272 wakaba 1.6 $self->{onerror}->(type => 'namespace prefix:not declared',
273 wakaba 1.12 level => $self->{level}->{must},
274 wakaba 1.9 uri => \$self->{href},
275 wakaba 1.11 token => $name_t || $t,
276     value => $name);
277 wakaba 1.7 return ($t, undef);
278 wakaba 1.1 }
279     push @$sss, [NAMESPACE_SELECTOR, $uri];
280     }
281     $state = BEFORE_SIMPLE_SELECTOR_STATE;
282     $t = $tt->get_next_token;
283     redo S;
284     } else { ## "|" not followed by type or universal selector
285 wakaba 1.10 $self->{onerror}->(type => 'no local name selector',
286 wakaba 1.12 level => $self->{level}->{must},
287 wakaba 1.9 uri => \$self->{href},
288 wakaba 1.6 token => $t);
289 wakaba 1.7 return ($t, undef);
290 wakaba 1.1 }
291     } elsif ($state == BEFORE_CLASS_NAME_STATE) {
292     if ($t->{type} == IDENT_TOKEN) {
293     push @$sss, [CLASS_SELECTOR, $t->{value}];
294    
295     $state = BEFORE_SIMPLE_SELECTOR_STATE;
296     $t = $tt->get_next_token;
297     redo S;
298     } else {
299 wakaba 1.10 $self->{onerror}->(type => 'no class name',
300 wakaba 1.12 level => $self->{level}->{must},
301 wakaba 1.9 uri => \$self->{href},
302 wakaba 1.6 token => $t);
303 wakaba 1.7 return ($t, undef);
304 wakaba 1.1 }
305     } elsif ($state == BEFORE_COMBINATOR_STATE) {
306     push @$selector, $sss;
307     $sss = [];
308    
309     if ($t->{type} == S_TOKEN) {
310     $state = COMBINATOR_STATE;
311     $t = $tt->get_next_token;
312     redo S;
313     } elsif ({
314     GREATER_TOKEN, 1,
315     PLUS_TOKEN, 1,
316     TILDE_TOKEN, 1,
317     COMMA_TOKEN, 1,
318     EOF_TOKEN, 1,
319 wakaba 1.7 $_[2], 1,
320 wakaba 1.1 }->{$t->{type}}) {
321     $state = COMBINATOR_STATE;
322     ## Reprocess.
323     redo S;
324     } else {
325 wakaba 1.10 $self->{onerror}->(type => 'no combinator',
326 wakaba 1.12 level => $self->{level}->{must},
327 wakaba 1.9 uri => \$self->{href},
328 wakaba 1.6 token => $t);
329 wakaba 1.7 return ($t, undef);
330 wakaba 1.1 }
331     } elsif ($state == COMBINATOR_STATE) {
332 wakaba 1.11 if ($t->{type} == S_TOKEN) {
333 wakaba 1.1 ## Stay in the state.
334     $t = $tt->get_next_token;
335     redo S;
336     } elsif ({
337     GREATER_TOKEN, 1,
338     PLUS_TOKEN, 1,
339     TILDE_TOKEN, 1,
340     }->{$t->{type}}) {
341     push @$selector, $t->{type};
342    
343     $state = BEFORE_TYPE_SELECTOR_STATE;
344     $t = $tt->get_next_token;
345     redo S;
346 wakaba 1.7 } elsif ($t->{type} == EOF_TOKEN or $t->{type} == $_[2]) {
347 wakaba 1.1 push @$selectors, $selector;
348 wakaba 1.7 return ($t, $selectors);
349 wakaba 1.1 } elsif ($t->{type} == COMMA_TOKEN) {
350     push @$selectors, $selector;
351     $selector = [DESCENDANT_COMBINATOR];
352     undef $has_pseudo_element;
353    
354     $state = BEFORE_TYPE_SELECTOR_STATE;
355     $t = $tt->get_next_token;
356     redo S;
357     } else {
358     push @$selector, S_TOKEN;
359    
360     $state = BEFORE_TYPE_SELECTOR_STATE;
361     ## Reprocess.
362     redo S;
363     }
364     } elsif ($state == AFTER_COLON_STATE) {
365     if ($t->{type} == IDENT_TOKEN) {
366     my $class = $t->{value};
367     $class =~ tr/A-Z/a-z/; ## TODO: ASCII case-insensitivity ok?
368     if ($self->{pseudo_class}->{$class} and
369     {
370     active => 1,
371     checked => 1,
372 wakaba 1.3 '-manakai-current' => 1,
373 wakaba 1.1 disabled => 1,
374     empty => 1,
375     enabled => 1,
376     'first-child' => 1,
377     'first-of-type' => 1,
378     focus => 1,
379     hover => 1,
380     indeterminate => 1, ## NOTE: Reserved in Selectors Level 3
381     'last-child' => 1,
382     'last-of-type' => 1,
383     link => 1,
384     'only-child' => 1,
385     'only-of-type' => 1,
386     root => 1,
387     target => 1,
388     visited => 1,
389     }->{$class}) {
390     push @$sss, [PSEUDO_CLASS_SELECTOR, $class];
391     } elsif ($self->{pseudo_element}->{$class} and
392     {'first-letter' => 1, 'first-line' => 1,
393     before => 1, after => 1}->{$class}) {
394     push @$sss, [PSEUDO_ELEMENT_SELECTOR, $class];
395     $has_pseudo_element = 1;
396     } else {
397 wakaba 1.6 ## TODO: Should we raise a different kind of error
398     ## if a pseudo class is known but not supported?
399 wakaba 1.10 ## TODO: Maybe we should raise different type of error
400     ## for at least pseudo-classes which requires arguments.
401 wakaba 1.12 $self->{onerror}->(type => 'unknown pseudo-class',
402     level => $self->{level}->{uncertain},
403 wakaba 1.9 uri => \$self->{href},
404 wakaba 1.6 token => $t, value => $class);
405 wakaba 1.7 return ($t, undef);
406 wakaba 1.1 }
407    
408     $state = BEFORE_SIMPLE_SELECTOR_STATE;
409     $t = $tt->get_next_token;
410     redo S;
411     } elsif ($t->{type} == FUNCTION_TOKEN) {
412     my $class = $t->{value};
413     $class =~ tr/A-Z/a-z/; ## TODO: Is ASCII case-insensitivity OK?
414    
415     if ($class eq 'lang' and $self->{pseudo_class}->{$class}) {
416     $state = BEFORE_LANG_TAG_STATE;
417     $t = $tt->get_next_token;
418     redo S;
419     } elsif ($class eq 'not' and $self->{pseudo_class}->{$class} and
420     not $in_negation) {
421     $in_negation = 1;
422    
423     push @$sss, '';
424     $state = BEFORE_TYPE_SELECTOR_STATE;
425     $t = $tt->get_next_token;
426     redo S;
427     } elsif ({
428     'nth-child' => 1,
429     'nth-last-child' => 1,
430     'nth-of-type' => 1,
431     'nth-last-of-type' => 1,
432     }->{$class} and $self->{pseudo_class}->{$class}) {
433     $name = $class;
434    
435     $state = BEFORE_AN_STATE;
436     $t = $tt->get_next_token;
437     ## TODO: syntax of value in the spec is vague; need to reverse
438     ## engineer what Opera 9.5 does.
439     redo S;
440 wakaba 1.3 } elsif ($class eq '-manakai-contains' and
441     $self->{pseudo_class}->{$class}) {
442     $state = BEFORE_CONTAINS_STRING_STATE;
443     $t = $tt->get_next_token;
444     redo S;
445 wakaba 1.1 } else {
446 wakaba 1.12 $self->{onerror}->(type => 'unknown pseudo-class',
447     level => $self->{level}->{uncertain},
448 wakaba 1.9 uri => \$self->{href},
449 wakaba 1.6 token => $t, value => $class);
450 wakaba 1.7 return ($t, undef);
451 wakaba 1.1 }
452     } elsif ($t->{type} == COLON_TOKEN and
453     not $in_negation) { ## Pseudo-element
454     $state = AFTER_DOUBLE_COLON_STATE;
455     $t = $tt->get_next_token;
456     redo S;
457     } else {
458 wakaba 1.10 $self->{onerror}->(type => 'no pseudo-class name',
459 wakaba 1.12 level => $self->{level}->{must},
460 wakaba 1.9 uri => \$self->{href},
461 wakaba 1.6 token => $t);
462 wakaba 1.7 return ($t, undef);
463 wakaba 1.1 }
464     } elsif ($state == AFTER_LBRACKET_STATE) { ## Attribute selector
465     $simple_selector = [ATTRIBUTE_SELECTOR];
466     if ($t->{type} == IDENT_TOKEN) {
467     $name = $t->{value};
468 wakaba 1.11 $name_t = $t;
469 wakaba 1.1
470     $state = AFTER_ATTR_NAME_STATE;
471     $t = $tt->get_next_token;
472     redo S;
473     } elsif ($t->{type} == VBAR_TOKEN) {
474     $simple_selector->[1] = ''; # null namespace
475    
476     $state = BEFORE_ATTR_LOCAL_NAME_STATE;
477     $t = $tt->get_next_token;
478     redo S;
479     } elsif ($t->{type} == STAR_TOKEN) {
480     $name = undef;
481 wakaba 1.11 $name_t = undef;
482 wakaba 1.1
483     $state = AFTER_ATTR_NAME_STATE;
484     $t = $tt->get_next_token;
485     redo S;
486     } elsif ($t->{type} == S_TOKEN) {
487     ## Stay in the state.
488     $t = $tt->get_next_token;
489     redo S;
490     } else {
491 wakaba 1.10 $self->{onerror}->(type => 'no attr name',
492 wakaba 1.12 level => $self->{level}->{must},
493 wakaba 1.9 uri => \$self->{href},
494 wakaba 1.6 token => $t);
495 wakaba 1.7 return ($t, undef);
496 wakaba 1.1 }
497     } elsif ($state == AFTER_ATTR_NAME_STATE) {
498     if ($t->{type} == VBAR_TOKEN) {
499     if (defined $name) {
500     my $uri = $self->{lookup_namespace_uri}->($name);
501     unless (defined $uri) {
502 wakaba 1.6 $self->{onerror}->(type => 'namespace prefix:not declared',
503 wakaba 1.12 level => $self->{level}->{must},
504 wakaba 1.9 uri => \$self->{href},
505 wakaba 1.11 token => $name_t || $t,
506     value => $name);
507 wakaba 1.7 return ($t, undef);
508 wakaba 1.1 }
509     $simple_selector->[1] = $uri;
510     }
511    
512     $state = BEFORE_ATTR_LOCAL_NAME_STATE;
513     $t = $tt->get_next_token;
514     redo S;
515     } else {
516     unless (defined $name) { ## [*]
517 wakaba 1.10 $self->{onerror}->(type => 'no attr namespace separator',
518 wakaba 1.12 level => $self->{level}->{must},
519 wakaba 1.9 uri => \$self->{href},
520 wakaba 1.6 token => $t);
521 wakaba 1.7 return ($t, undef);
522 wakaba 1.1 }
523     $simple_selector->[1] = ''; # null namespace
524     $simple_selector->[2] = $name;
525    
526     $state = BEFORE_MATCH_STATE;
527     ## Reprocess.
528     redo S;
529     }
530     } elsif ($state == BEFORE_ATTR_LOCAL_NAME_STATE) {
531     if ($t->{type} == IDENT_TOKEN) {
532     $simple_selector->[2] = $t->{value};
533    
534     $state = BEFORE_MATCH_STATE;
535     $t = $tt->get_next_token;
536     redo S;
537     } else {
538 wakaba 1.10 $self->{onerror}->(type => 'no attr local name',
539 wakaba 1.12 level => $self->{level}->{must},
540 wakaba 1.9 uri => \$self->{href},
541 wakaba 1.6 token => $t);
542 wakaba 1.7 return ($t, undef);
543 wakaba 1.1 }
544     } elsif ($state == BEFORE_MATCH_STATE) {
545     if ({
546     MATCH_TOKEN, 1,
547     INCLUDES_TOKEN, 1,
548     DASHMATCH_TOKEN, 1,
549     PREFIXMATCH_TOKEN, 1,
550     SUFFIXMATCH_TOKEN, 1,
551     SUBSTRINGMATCH_TOKEN, 1,
552     }->{$t->{type}}) {
553     $simple_selector->[3] = $t->{type};
554    
555     $state = BEFORE_VALUE_STATE;
556     $t = $tt->get_next_token;
557     redo S;
558     } elsif ($t->{type} == RBRACKET_TOKEN) {
559     push @$sss, $simple_selector;
560    
561     $state = BEFORE_SIMPLE_SELECTOR_STATE;
562     $t = $tt->get_next_token;
563     redo S;
564     } elsif ($t->{type} == S_TOKEN) {
565     ## Stay in the state.
566     $t = $tt->get_next_token;
567     redo S;
568     } else {
569 wakaba 1.10 $self->{onerror}->(type => 'no attr match',
570 wakaba 1.12 level => $self->{level}->{must},
571 wakaba 1.9 uri => \$self->{href},
572 wakaba 1.6 token => $t);
573 wakaba 1.7 return ($t, undef);
574 wakaba 1.1 }
575     } elsif ($state == BEFORE_VALUE_STATE) {
576     if ($t->{type} == IDENT_TOKEN or $t->{type} == STRING_TOKEN) {
577     $simple_selector->[4] = $t->{value};
578     push @$sss, $simple_selector;
579    
580     $state = AFTER_VALUE_STATE;
581     $t = $tt->get_next_token;
582     redo S;
583     } elsif ($t->{type} == S_TOKEN) {
584     ## Stay in the state.
585     $t = $tt->get_next_token;
586     redo S;
587     } else {
588 wakaba 1.10 $self->{onerror}->(type => 'no attr value',
589 wakaba 1.12 level => $self->{level}->{must},
590 wakaba 1.9 uri => \$self->{href},
591 wakaba 1.6 token => $t);
592 wakaba 1.7 return ($t, undef);
593 wakaba 1.1 }
594     } elsif ($state == AFTER_VALUE_STATE) {
595     if ($t->{type} == RBRACKET_TOKEN) {
596     $state = BEFORE_SIMPLE_SELECTOR_STATE;
597     $t = $tt->get_next_token;
598     redo S;
599     } else {
600 wakaba 1.10 $self->{onerror}->(type => 'attr selector not closed',
601 wakaba 1.12 level => $self->{level}->{must},
602 wakaba 1.9 uri => \$self->{href},
603 wakaba 1.6 token => $t);
604 wakaba 1.7 return ($t, undef);
605 wakaba 1.1 }
606     } elsif ($state == AFTER_DOUBLE_COLON_STATE) {
607     if ($t->{type} == IDENT_TOKEN) {
608     my $pe = $t->{value};
609     $pe =~ tr/A-Z/a-z/; ## TODO: Is ASCII case-insensitive OK?
610     if ($self->{pseudo_element}->{$pe} and
611     {'first-letter' => 1, 'first-line' => 1,
612     after => 1, before => 1}->{$pe}) {
613     push @$sss, [PSEUDO_ELEMENT_SELECTOR, $pe];
614     $has_pseudo_element = 1;
615    
616     $state = BEFORE_SIMPLE_SELECTOR_STATE;
617     $t = $tt->get_next_token;
618     redo S;
619     } else {
620 wakaba 1.12 $self->{onerror}->(type => 'unknown pseudo-element',
621     level => $self->{level}->{uncertain},
622 wakaba 1.9 uri => \$self->{href},
623 wakaba 1.6 token => $t, value => $pe);
624 wakaba 1.7 return ($t, undef);
625 wakaba 1.1 }
626     } else {
627 wakaba 1.10 $self->{onerror}->(type => 'no pseudo-element name',
628 wakaba 1.12 level => $self->{level}->{must},
629 wakaba 1.9 uri => \$self->{href},
630 wakaba 1.6 token => $t);
631 wakaba 1.7 return ($t, undef);
632 wakaba 1.1 }
633     } elsif ($state == BEFORE_LANG_TAG_STATE) {
634     if ($t->{type} == IDENT_TOKEN) {
635     push @$sss, [PSEUDO_CLASS_SELECTOR, 'lang', $t->{value}];
636    
637     $state = AFTER_LANG_TAG_STATE;
638     $t = $tt->get_next_token;
639     redo S;
640     } elsif ($t->{type} == S_TOKEN) {
641     ## Stay in the state.
642     $t = $tt->get_next_token;
643     redo S;
644     } else {
645 wakaba 1.10 $self->{onerror}->(type => 'no lang tag',
646 wakaba 1.12 level => $self->{level}->{must},
647 wakaba 1.9 uri => \$self->{href},
648 wakaba 1.6 token => $t);
649 wakaba 1.7 return ($t, undef);
650 wakaba 1.1 }
651     } elsif ($state == AFTER_LANG_TAG_STATE) {
652     if ($t->{type} == RPAREN_TOKEN) {
653     $state = BEFORE_SIMPLE_SELECTOR_STATE;
654     $t = $tt->get_next_token;
655     redo S;
656     } elsif ($t->{type} == S_TOKEN) {
657     ## Stay in the state.
658     $t = $tt->get_next_token;
659     redo S;
660     } else {
661 wakaba 1.10 $self->{onerror}->(type => 'lang selector not closed',
662 wakaba 1.12 level => $self->{level}->{must},
663 wakaba 1.9 uri => \$self->{href},
664 wakaba 1.6 token => $t);
665 wakaba 1.7 return ($t, undef);
666 wakaba 1.1 }
667     } elsif ($state == BEFORE_AN_STATE) {
668     if ($t->{type} == DIMENSION_TOKEN) {
669     if (int $t->{number} == $t->{number}) {
670     my $n = $t->{value};
671     $n =~ tr/A-Z/a-z/; ## TODO: ascii ?
672     if ($n eq 'n') {
673     $simple_selector = [PSEUDO_CLASS_SELECTOR, $name,
674     0+$t->{number}, 0];
675    
676     $state = AFTER_AN_STATE;
677     $t = $tt->get_next_token;
678     redo S;
679     } elsif ($n =~ /\An-([0-9]+)\z/) {
680     push @$sss, [PSEUDO_CLASS_SELECTOR, $name, 0+$t->{number}, 0-$1];
681    
682     $state = AFTER_B_STATE;
683     $t = $tt->get_next_token;
684     redo S;
685     } else {
686 wakaba 1.10 $self->{onerror}->(type => 'an+b syntax error',
687 wakaba 1.12 level => $self->{level}->{must},
688 wakaba 1.9 uri => \$self->{href},
689 wakaba 1.6 token => $t);
690 wakaba 1.7 return ($t, undef);
691 wakaba 1.1 }
692     } else {
693 wakaba 1.10 $self->{onerror}->(type => 'an+b syntax error',
694 wakaba 1.12 level => $self->{level}->{must},
695 wakaba 1.9 uri => \$self->{href},
696 wakaba 1.6 token => $t);
697 wakaba 1.7 return ($t, undef);
698 wakaba 1.1 }
699     } elsif ($t->{type} == NUMBER_TOKEN) {
700     if (int $t->{number} == $t->{number}) {
701     push @$sss, [PSEUDO_CLASS_SELECTOR, $name, 0, 0+$t->{number}];
702    
703     $state = AFTER_B_STATE;
704     $t = $tt->get_next_token;
705     redo S;
706     } else { ## ISSUE: Is :nth-child(0.0) disallowed?
707 wakaba 1.10 $self->{onerror}->(type => 'an+b not integer',
708 wakaba 1.12 level => $self->{level}->{must},
709 wakaba 1.9 uri => \$self->{href},
710 wakaba 1.6 token => $t, value => $t->{number});
711 wakaba 1.7 return ($t, undef);
712 wakaba 1.1 }
713     } elsif ($t->{type} == IDENT_TOKEN) {
714     my $value = $t->{value};
715     $value =~ tr/A-Z/a-z/; ## TODO: ASCII case-insensitive?
716     if ($value eq 'odd') {
717     push @$sss, [PSEUDO_CLASS_SELECTOR, $name, 2, 1];
718    
719     $state = AFTER_B_STATE;
720     $t = $tt->get_next_token;
721     redo S;
722     } elsif ($value eq 'even') {
723     push @$sss, [PSEUDO_CLASS_SELECTOR, $name, 2, 0];
724    
725     $state = AFTER_B_STATE;
726     $t = $tt->get_next_token;
727     redo S;
728     } elsif ($value eq 'n' or $value eq '-n') {
729     ## ISSUE: :nth-child(-n) is not explicitly allowed, but appears
730     ## in an example in the spec.
731     $simple_selector = [PSEUDO_CLASS_SELECTOR, $name,
732     $value eq 'n' ? 1 : -1, 0];
733    
734     $state = AFTER_AN_STATE;
735     $t = $tt->get_next_token;
736     redo S;
737     } elsif ($value =~ /\A(-?)n-([0-9]+)\z/) {
738     push @$sss, [PSEUDO_CLASS_SELECTOR, $name, 0+($1.'1'), -$2];
739    
740     $state = AFTER_B_STATE;
741     $t = $tt->get_next_token;
742     redo S;
743     } else {
744 wakaba 1.10 $self->{onerror}->(type => 'an+b syntax error',
745 wakaba 1.12 level => $self->{level}->{must},
746 wakaba 1.9 uri => \$self->{href},
747 wakaba 1.6 token => $t);
748 wakaba 1.7 return ($t, undef);
749 wakaba 1.1 }
750     } elsif ($t->{type} == MINUS_TOKEN) {
751     ## ISSUE: Is :nth-child(- 1) allowed?
752     ## ISSUE: Is :nth-child(n-/**/6) or (-n-/**/6) allowed?
753     $t = $tt->get_next_token;
754     if ($t->{type} == DIMENSION_TOKEN || $t->{type} == IDENT_TOKEN) {
755     my $num = $t->{type} == IDENT_TOKEN ? 1 : $t->{number};
756     ## NOTE: :nth-child(-/**/n)
757     if (int $num == $num) {
758     my $n = $t->{value};
759     $n =~ tr/A-Z/a-z/; ## TODO: ASCII?
760     if ($n eq 'n') {
761     $simple_selector = [PSEUDO_CLASS_SELECTOR, $name, -$num, 0];
762    
763     $state = AFTER_AN_STATE;
764     $t = $tt->get_next_token;
765     redo S;
766     } elsif ($n =~ /\An-([0-9]+)\z/) {
767     $simple_selector = [PSEUDO_CLASS_SELECTOR, $name,
768     -$num, -$1];
769    
770     $state = AFTER_AN_STATE;
771     $t = $tt->get_next_token;
772     redo S;
773     } else {
774 wakaba 1.10 $self->{onerror}->(type => 'an+b syntax error',
775 wakaba 1.12 level => $self->{level}->{must},
776 wakaba 1.9 uri => \$self->{href},
777 wakaba 1.6 token => $t);
778 wakaba 1.7 return ($t, undef);
779 wakaba 1.1 }
780     } else {
781 wakaba 1.10 $self->{onerror}->(type => 'an+b syntax error',
782 wakaba 1.12 level => $self->{level}->{must},
783 wakaba 1.9 uri => \$self->{href},
784 wakaba 1.6 token => $t);
785 wakaba 1.7 return ($t, undef);
786 wakaba 1.1 }
787     } elsif ($t->{type} == NUMBER_TOKEN) {
788     if (int $t->{number} == $t->{number}) {
789     push @$sss, [PSEUDO_CLASS_SELECTOR, $name, 0, -$t->{number}];
790    
791     $state = AFTER_B_STATE;
792     $t = $tt->get_next_token;
793     redo S;
794     } else {
795 wakaba 1.10 $self->{onerror}->(type => 'an+b syntax error',
796 wakaba 1.12 level => $self->{level}->{must},
797 wakaba 1.9 uri => \$self->{href},
798 wakaba 1.6 token => $t);
799 wakaba 1.7 return ($t, undef);
800 wakaba 1.1 }
801     } else {
802 wakaba 1.10 $self->{onerror}->(type => 'an+b syntax error',
803 wakaba 1.12 level => $self->{level}->{must},
804 wakaba 1.9 uri => \$self->{href},
805 wakaba 1.6 token => $t);
806 wakaba 1.7 return ($t, undef);
807 wakaba 1.1 }
808     } elsif ($t->{type} == S_TOKEN) {
809     ## Stay in the state.
810     $t = $tt->get_next_token;
811     redo S;
812     } else {
813 wakaba 1.10 $self->{onerror}->(type => 'an+b syntax error',
814 wakaba 1.12 level => $self->{level}->{must},
815 wakaba 1.9 uri => \$self->{href},
816 wakaba 1.6 token => $t);
817 wakaba 1.7 return ($t, undef);
818 wakaba 1.1 }
819     } elsif ($state == AFTER_AN_STATE) {
820     ## ISSUE: :nth-child(1n +2) is allowed.
821     ## :nth-child(1n /**/ +2) and :nth-child(1n -2) are allowed?
822     if ($t->{type} == PLUS_TOKEN) {
823     $simple_selector->[3] = +1;
824    
825     $state = BEFORE_B_STATE;
826     $t = $tt->get_next_token;
827     redo S;
828     } elsif ($t->{type} == MINUS_TOKEN) {
829     $simple_selector->[3] = -1;
830    
831     $state = BEFORE_B_STATE;
832     $t = $tt->get_next_token;
833     redo S;
834     } elsif ($t->{type} == RPAREN_TOKEN) {
835     push @$sss, $simple_selector;
836    
837     $state = BEFORE_SIMPLE_SELECTOR_STATE;
838     $t = $tt->get_next_token;
839     redo S;
840     } elsif ($t->{type} == S_TOKEN) {
841     ## Stay in the state.
842     $t = $tt->get_next_token;
843     redo S;
844     } else {
845 wakaba 1.10 $self->{onerror}->(type => 'an+b syntax error',
846 wakaba 1.12 level => $self->{level}->{must},
847 wakaba 1.9 uri => \$self->{href},
848 wakaba 1.6 token => $t);
849 wakaba 1.7 return ($t, undef);
850 wakaba 1.1 }
851     } elsif ($state == BEFORE_B_STATE) {
852     ## ISSUE: Is S allowed?
853     if ($t->{type} == NUMBER_TOKEN) {
854     if (int $t->{number} == $t->{number}) {
855     $simple_selector->[3] *= $t->{number};
856     push @$sss, $simple_selector;
857    
858     $state = AFTER_B_STATE;
859     $t = $tt->get_next_token;
860     redo S;
861     } else {
862 wakaba 1.10 $self->{onerror}->(type => 'an+b syntax error',
863 wakaba 1.12 level => $self->{level}->{must},
864 wakaba 1.9 uri => \$self->{href},
865 wakaba 1.6 token => $t);
866 wakaba 1.7 return ($t, undef);
867 wakaba 1.1 }
868     } else {
869 wakaba 1.10 $self->{onerror}->(type => 'an+b syntax error',
870 wakaba 1.12 level => $self->{level}->{must},
871 wakaba 1.9 uri => \$self->{href},
872 wakaba 1.6 token => $t);
873 wakaba 1.7 return ($t, undef);
874 wakaba 1.1 }
875     } elsif ($state == AFTER_B_STATE) {
876     if ($t->{type} == RPAREN_TOKEN) {
877     $state = BEFORE_SIMPLE_SELECTOR_STATE;
878     $t = $tt->get_next_token;
879     redo S;
880     } elsif ($t->{type} == S_TOKEN) {
881     ## Stay in the state.
882     $t = $tt->get_next_token;
883     redo S;
884     } else {
885 wakaba 1.10 $self->{onerror}->(type => 'an+b not closed',
886 wakaba 1.12 level => $self->{level}->{must},
887 wakaba 1.9 uri => \$self->{href},
888 wakaba 1.6 token => $t);
889 wakaba 1.7 return ($t, undef);
890 wakaba 1.1 }
891     } elsif ($state == AFTER_NEGATION_SIMPLE_SELECTOR_STATE) {
892     if ($t->{type} == RPAREN_TOKEN) {
893     undef $in_negation;
894     my $simple_selector = [];
895     unshift @$simple_selector, pop @$sss while ref $sss->[-1];
896     pop @$sss; # dummy
897     unshift @$simple_selector, 'not';
898     unshift @$simple_selector, PSEUDO_CLASS_SELECTOR;
899     push @$sss, $simple_selector;
900    
901     $state = BEFORE_SIMPLE_SELECTOR_STATE;
902 wakaba 1.3 $t = $tt->get_next_token;
903     redo S;
904     } elsif ($t->{type} == S_TOKEN) {
905     ## Stay in the state.
906     $t = $tt->get_next_token;
907     redo S;
908     } else {
909 wakaba 1.10 $self->{onerror}->(type => 'not not closed',
910 wakaba 1.12 level => $self->{level}->{must},
911 wakaba 1.9 uri => \$self->{href},
912 wakaba 1.6 token => $t);
913 wakaba 1.7 return ($t, undef);
914 wakaba 1.3 }
915     } elsif ($state == BEFORE_CONTAINS_STRING_STATE) {
916 wakaba 1.4 if ($t->{type} == STRING_TOKEN or $t->{type} == IDENT_TOKEN) {
917 wakaba 1.3 push @$sss, [PSEUDO_CLASS_SELECTOR, '-manakai-contains', $t->{value}];
918    
919     $state = AFTER_LANG_TAG_STATE;
920 wakaba 1.1 $t = $tt->get_next_token;
921     redo S;
922     } elsif ($t->{type} == S_TOKEN) {
923     ## Stay in the state.
924     $t = $tt->get_next_token;
925     redo S;
926     } else {
927 wakaba 1.10 $self->{onerror}->(type => 'no contains string',
928 wakaba 1.12 level => $self->{level}->{must},
929 wakaba 1.9 uri => \$self->{href},
930 wakaba 1.6 token => $t);
931 wakaba 1.7 return ($t, undef);
932 wakaba 1.1 }
933     } else {
934     die "$0: Selectors Parser: $state: Unknown state";
935     }
936     } # S
937     } # parse_string
938    
939 wakaba 1.8 ## NOTE: Specificity in CSS 2.1 and Selectors 3 are incompatible.
940     ## What is implemented by this method is CSS 2.1's one.
941     ## (With Selectors 3 terminology and with Selectors 3 additions.)
942     sub get_selector_specificity ($$) {
943     my (undef, $selector) = @_;
944    
945     my $r = [0, 0, 0, 0]; # a, b, c, d
946    
947     ## a = 1 iff style="" attribute
948     ## b += 1 for ID attribute selectors
949     ## c += 1 for attribute, class, and pseudo-class selectors
950     ## d += 1 for type selectors and pseudo-elements
951    
952     for my $sss (@$selector) {
953     next unless ref $sss; # combinator
954     my @sss = @$sss;
955     while (@sss) {
956     my $ss = shift @sss;
957     if ($ss->[0] == LOCAL_NAME_SELECTOR or
958     $ss->[0] == PSEUDO_ELEMENT_SELECTOR) {
959     $r->[3]++;
960     } elsif ($ss->[0] == ATTRIBUTE_SELECTOR or
961     $ss->[0] == PSEUDO_CLASS_SELECTOR) {
962     $r->[2]++;
963     } elsif ($ss->[0] == CLASS_SELECTOR) {
964     if ($ss->[1] eq 'not') {
965     push @sss, @$ss[2..$#$ss];
966     } else {
967     $r->[2]++;
968     }
969     } elsif ($ss->[0] == ID_SELECTOR) {
970     $r->[1]++;
971     }
972     }
973     }
974    
975     return $r;
976     } # get_selector_specificity
977    
978 wakaba 1.5 =head1 LICENSE
979    
980 wakaba 1.12 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
981 wakaba 1.5
982     This library is free software; you can redistribute it
983     and/or modify it under the same terms as Perl itself.
984    
985     =cut
986    
987 wakaba 1.1 1;
988 wakaba 1.12 # $Date: 2008/02/10 07:34:10 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24