/[suikacvs]/markup/html/whatpm/Whatpm/CSS/Tokenizer.pm
Suika

Diff of /markup/html/whatpm/Whatpm/CSS/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.12 by wakaba, Sat Sep 8 15:43:12 2007 UTC revision 1.14 by wakaba, Sat Sep 22 12:16:33 2007 UTC
# Line 1  Line 1 
1  package Whatpm::CSS::Tokenizer;  package Whatpm::CSS::Tokenizer;
2  use strict;  use strict;
3    
4    require Exporter;
5    push our @ISA, 'Exporter';
6    
7  sub BEFORE_TOKEN_STATE () { 0 }  sub BEFORE_TOKEN_STATE () { 0 }
8  sub BEFORE_NMSTART_STATE () { 1 }  sub BEFORE_NMSTART_STATE () { 1 }
9  sub NAME_STATE () { 2 }  sub NAME_STATE () { 2 }
# Line 59  sub CDC_TOKEN () { 35 } Line 62  sub CDC_TOKEN () { 35 }
62  sub COMMENT_TOKEN () { 36 }  sub COMMENT_TOKEN () { 36 }
63  sub COMMENT_INVALID_TOKEN () { 37 }  sub COMMENT_INVALID_TOKEN () { 37 }
64  sub EOF_TOKEN () { 38 }  sub EOF_TOKEN () { 38 }
65    sub MINUS_TOKEN () { 39 }
66    sub STAR_TOKEN () { 40 }
67    sub VBAR_TOKEN () { 41 }
68    sub DOT_TOKEN () { 42 }
69    sub COLON_TOKEN () { 43 }
70    sub MATCH_TOKEN () { 44 }
71    sub EXCLAMATION_TOKEN () { 45 }
72    
73  our @TokenName = qw(  our @TokenName = qw(
74    0 IDENT ATKEYWORD HASH FUNCTION URI URI_INVALID URI_PREFIX URI_PREFIX_INVALID    0 IDENT ATKEYWORD HASH FUNCTION URI URI_INVALID URI_PREFIX URI_PREFIX_INVALID
# Line 66  our @TokenName = qw( Line 76  our @TokenName = qw(
76    0 DELIM PLUS GREATER COMMA TILDE DASHMATCH    0 DELIM PLUS GREATER COMMA TILDE DASHMATCH
77    PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH INCLUDES SEMICOLON    PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH INCLUDES SEMICOLON
78    LBRACE RBRACE LPAREN RPAREN LBRACKET RBRACKET S CDO CDC COMMENT    LBRACE RBRACE LPAREN RPAREN LBRACKET RBRACKET S CDO CDC COMMENT
79    COMMENT_INVALID EOF    COMMENT_INVALID EOF MINUS STAR VBAR DOT COLON MATCH EXCLAMATION
80    );
81    
82    our @EXPORT_OK = qw(
83      IDENT_TOKEN ATKEYWORD_TOKEN HASH_TOKEN FUNCTION_TOKEN URI_TOKEN
84      URI_INVALID_TOKEN URI_PREFIX_TOKEN URI_PREFIX_INVALID_TOKEN
85      STRING_TOKEN INVALID_TOKEN NUMBER_TOKEN DIMENSION_TOKEN PERCENTAGE_TOKEN
86      UNICODE_RANGE_TOKEN DELIM_TOKEN PLUS_TOKEN GREATER_TOKEN COMMA_TOKEN
87      TILDE_TOKEN DASHMATCH_TOKEN PREFIXMATCH_TOKEN SUFFIXMATCH_TOKEN
88      SUBSTRINGMATCH_TOKEN INCLUDES_TOKEN SEMICOLON_TOKEN LBRACE_TOKEN
89      RBRACE_TOKEN LPAREN_TOKEN RPAREN_TOKEN LBRACKET_TOKEN RBRACKET_TOKEN
90      S_TOKEN CDO_TOKEN CDC_TOKEN COMMENT_TOKEN COMMENT_INVALID_TOKEN EOF_TOKEN
91      MINUS_TOKEN STAR_TOKEN VBAR_TOKEN DOT_TOKEN COLON_TOKEN MATCH_TOKEN
92      EXCLAMATION_TOKEN
93  );  );
94    
95    our %EXPORT_TAGS = ('token' => [@EXPORT_OK]);
96    
97  sub new ($) {  sub new ($) {
98    my $self = bless {token => [], get_char => sub { -1 },    my $self = bless {token => [], get_char => sub { -1 },
99                      onerror => sub { }}, shift;                      onerror => sub { }}, shift;
# Line 267  sub get_next_token ($) { Line 292  sub get_next_token ($) {
292                return {type => CDO_TOKEN};                return {type => CDO_TOKEN};
293                #redo A;                #redo A;
294              } else {              } else {
295                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '!'};                unshift @{$self->{token}}, {type => EXCLAMATION_TOKEN};
296                ## NOTE: |-| in |ident| in |IDENT|                ## NOTE: |-| in |ident| in |IDENT|
297                $self->{t} = {type => IDENT_TOKEN, value => '-'};                $self->{t} = {type => IDENT_TOKEN, value => '-'};
298                $self->{state} = BEFORE_NMSTART_STATE;                $self->{state} = BEFORE_NMSTART_STATE;
# Line 276  sub get_next_token ($) { Line 301  sub get_next_token ($) {
301                #redo A;                #redo A;
302              }              }
303            } else {            } else {
304              unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '!'};              unshift @{$self->{token}}, {type => EXCLAMATION_TOKEN};
305              $self->{state} = BEFORE_TOKEN_STATE;              $self->{state} = BEFORE_TOKEN_STATE;
306              #reprocess              #reprocess
307              return {type => DELIM_TOKEN, value => '<'};              return {type => DELIM_TOKEN, value => '<'};
# Line 289  sub get_next_token ($) { Line 314  sub get_next_token ($) {
314            #redo A;            #redo A;
315          }          }
316        } elsif (my $t = {        } elsif (my $t = {
317                  0x003B => SEMICOLON_TOKEN, # ;                          0x0021 => EXCLAMATION_TOKEN, # !
318                  0x007B => LBRACE_TOKEN, # {                          0x002D => MINUS_TOKEN, # -
319                  0x007D => RBRACE_TOKEN, # }                          0x002E => DOT_TOKEN, # .
320                  0x0028 => LPAREN_TOKEN, # (                          0x003A => COLON_TOKEN, # :
321                  0x0029 => RPAREN_TOKEN, # )                          0x003B => SEMICOLON_TOKEN, # ;
322                  0x005B => LBRACKET_TOKEN, # [                          0x003D => MATCH_TOKEN, # =
323                  0x005D => RBRACKET_TOKEN, # ]                          0x007B => LBRACE_TOKEN, # {
324                            0x007D => RBRACE_TOKEN, # }
325                            0x0028 => LPAREN_TOKEN, # (
326                            0x0029 => RPAREN_TOKEN, # )
327                            0x005B => LBRACKET_TOKEN, # [
328                            0x005D => RBRACKET_TOKEN, # ]
329                 }->{$self->{c}}) {                 }->{$self->{c}}) {
330          # stay in the state          # stay in the state
331          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
# Line 348  sub get_next_token ($) { Line 378  sub get_next_token ($) {
378            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
379            return {type => $v};            return {type => $v};
380            #redo A;            #redo A;
381            } elsif ($v = {
382                           0x002A => STAR_TOKEN, # *
383                           0x007C => VBAR_TOKEN, # |
384                          }->{$c}) {
385              # stay in the state.
386              # reprocess
387              return {type => $v};
388              #redo A;
389          } else {          } else {
390            # stay in the state            # stay in the state
391            # reprocess            # reprocess
# Line 424  sub get_next_token ($) { Line 462  sub get_next_token ($) {
462              #$self->{t} = {type => IDENT_TOKEN, value => '-'};              #$self->{t} = {type => IDENT_TOKEN, value => '-'};
463              # stay in the state              # stay in the state
464              # reconsume              # reconsume
465              return {type => DELIM_TOKEN, value => '-'};              return {type => MINUS_TOKEN};
466              #redo A;              #redo A;
467            }            }
468          } elsif ($self->{t}->{type} == DIMENSION_TOKEN) {          } elsif ($self->{t}->{type} == DIMENSION_TOKEN) {
# Line 443  sub get_next_token ($) { Line 481  sub get_next_token ($) {
481              $t->{type} = NUMBER_TOKEN;              $t->{type} = NUMBER_TOKEN;
482              $t->{value} = '';              $t->{value} = '';
483              $self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1};              $self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1};
484              unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};              unshift @{$self->{token}}, {type => MINUS_TOKEN};
485              # stay in the state              # stay in the state
486              # reconsume              # reconsume
487              return $t;              return $t;
# Line 458  sub get_next_token ($) { Line 496  sub get_next_token ($) {
496                
497        if ($self->{t}->{type} == DIMENSION_TOKEN) {        if ($self->{t}->{type} == DIMENSION_TOKEN) {
498          ## NOTE: |-| after |NUMBER|.          ## NOTE: |-| after |NUMBER|.
499          unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};          unshift @{$self->{token}}, {type => MINUS_TOKEN};
500          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
501          # reprocess          # reprocess
502          $self->{t}->{type} = NUMBER_TOKEN;          $self->{t}->{type} = NUMBER_TOKEN;
# Line 468  sub get_next_token ($) { Line 506  sub get_next_token ($) {
506          ## NOTE: |-| not followed by |nmstart|.          ## NOTE: |-| not followed by |nmstart|.
507          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
508          # reprocess          # reprocess
509          return {type => DELIM_TOKEN, value => '-'};          return {type => MINUS_TOKEN};
510        }        }
511      } elsif ($self->{state} == AFTER_AT_STATE) {      } elsif ($self->{state} == AFTER_AT_STATE) {
512        if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z        if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z
# Line 511  sub get_next_token ($) { Line 549  sub get_next_token ($) {
549            return {type => DELIM_TOKEN, value => '@'};            return {type => DELIM_TOKEN, value => '@'};
550            #redo A;            #redo A;
551          } else {          } else {
552            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};            unshift @{$self->{token}}, {type => MINUS_TOKEN};
553            $self->{t} = {type => IDENT_TOKEN, value => '-'};            $self->{t} = {type => IDENT_TOKEN, value => '-'};
554            $self->{state} = BEFORE_NMSTART_STATE;            $self->{state} = BEFORE_NMSTART_STATE;
555            # reprocess            # reprocess
# Line 524  sub get_next_token ($) { Line 562  sub get_next_token ($) {
562          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
563          redo A;          redo A;
564        } else {        } else {
565          unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};          unshift @{$self->{token}}, {type => MINUS_TOKEN};
566          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
567          # reprocess          # reprocess
568          return {type => DELIM_TOKEN, value => '@'};          return {type => DELIM_TOKEN, value => '@'};
# Line 860  sub get_next_token ($) { Line 898  sub get_next_token ($) {
898              $self->{state} = BEFORE_TOKEN_STATE;              $self->{state} = BEFORE_TOKEN_STATE;
899              # reprocess              # reprocess
900              unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};              unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
901              unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};              unshift @{$self->{token}}, {type => MINUS_TOKEN};
902              $self->{t}->{type} = NUMBER_TOKEN;              $self->{t}->{type} = NUMBER_TOKEN;
903              $self->{t}->{value} = '';              $self->{t}->{value} = '';
904              return $self->{t};              return $self->{t};
# Line 885  sub get_next_token ($) { Line 923  sub get_next_token ($) {
923              $self->{state} = BEFORE_TOKEN_STATE;              $self->{state} = BEFORE_TOKEN_STATE;
924              # reprocess              # reprocess
925              unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};              unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
926              return {type => DELIM_TOKEN, value => '-'};              return {type => MINUS_TOKEN};
927              #redo A;              #redo A;
928            } elsif (length $self->{t}->{value}) {            } elsif (length $self->{t}->{value}) {
929              $self->{state} = BEFORE_TOKEN_STATE;              $self->{state} = BEFORE_TOKEN_STATE;
# Line 1057  sub get_next_token ($) { Line 1095  sub get_next_token ($) {
1095          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
1096          redo A;          redo A;
1097        } else {        } else {
1098          unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '.'};          unshift @{$self->{token}}, {type => DOT_TOKEN};
1099          $self->{t}->{number} = $self->{t}->{value};          $self->{t}->{number} = $self->{t}->{value};
1100          $self->{t}->{value} = '';          $self->{t}->{value} = '';
1101          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
# Line 1075  sub get_next_token ($) { Line 1113  sub get_next_token ($) {
1113        } else {        } else {
1114          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
1115          # reprocess          # reprocess
1116          return {type => DELIM_TOKEN, value => '.'};          return {type => DOT_TOKEN};
1117          #redo A;          #redo A;
1118        }        }
1119      } elsif ($self->{state} == NUMBER_DOT_NUMBER_STATE) {      } elsif ($self->{state} == NUMBER_DOT_NUMBER_STATE) {

Legend:
Removed from v.1.12  
changed lines
  Added in v.1.14

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24