/[suikacvs]/markup/html/whatpm/Whatpm/CSS/Tokenizer.pm
Suika

Diff of /markup/html/whatpm/Whatpm/CSS/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.10 by wakaba, Sat Sep 8 13:43:58 2007 UTC revision 1.13 by wakaba, Sat Sep 8 17:43:41 2007 UTC
# Line 59  sub CDC_TOKEN () { 35 } Line 59  sub CDC_TOKEN () { 35 }
59  sub COMMENT_TOKEN () { 36 }  sub COMMENT_TOKEN () { 36 }
60  sub COMMENT_INVALID_TOKEN () { 37 }  sub COMMENT_INVALID_TOKEN () { 37 }
61  sub EOF_TOKEN () { 38 }  sub EOF_TOKEN () { 38 }
62    sub MINUS_TOKEN () { 39 }
63    sub STAR_TOKEN () { 40 }
64    sub VBAR_TOKEN () { 41 }
65    sub DOT_TOKEN () { 42 }
66    sub COLON_TOKEN () { 43 }
67    sub MATCH_TOKEN () { 44 }
68    sub EXCLAMATION_TOKEN () { 45 }
69    
70  our @TokenName = qw(  our @TokenName = qw(
71    0 IDENT ATKEYWORD HASH FUNCTION URI URI_INVALID URI_PREFIX URI_PREFIX_INVALID    0 IDENT ATKEYWORD HASH FUNCTION URI URI_INVALID URI_PREFIX URI_PREFIX_INVALID
# Line 66  our @TokenName = qw( Line 73  our @TokenName = qw(
73    0 DELIM PLUS GREATER COMMA TILDE DASHMATCH    0 DELIM PLUS GREATER COMMA TILDE DASHMATCH
74    PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH INCLUDES SEMICOLON    PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH INCLUDES SEMICOLON
75    LBRACE RBRACE LPAREN RPAREN LBRACKET RBRACKET S CDO CDC COMMENT    LBRACE RBRACE LPAREN RPAREN LBRACKET RBRACKET S CDO CDC COMMENT
76    COMMENT_INVALID EOF    COMMENT_INVALID EOF MINUS STAR VBAR DOT COLON MATCH EXCLAMATION
77  );  );
78    
79  sub new ($) {  sub new ($) {
# Line 115  sub get_next_token ($) { Line 122  sub get_next_token ($) {
122                (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F                (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
123                (0x0061 <= $self->{c} and $self->{c} <= 0x0066) or # a..f                (0x0061 <= $self->{c} and $self->{c} <= 0x0066) or # a..f
124                $self->{c} == 0x003F) { # ?                $self->{c} == 0x003F) { # ?
125              $self->{t}->{value} .= '+' . chr $self->{c};              $self->{t}->{value} = chr $self->{c};
126              $self->{t}->{type} = UNICODE_RANGE_TOKEN;              $self->{t}->{type} = UNICODE_RANGE_TOKEN;
127              $self->{c} = $self->{get_char}->();              $self->{c} = $self->{get_char}->();
128              C: for (2..6) {              C: for (2..6) {
# Line 267  sub get_next_token ($) { Line 274  sub get_next_token ($) {
274                return {type => CDO_TOKEN};                return {type => CDO_TOKEN};
275                #redo A;                #redo A;
276              } else {              } else {
277                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '!'};                unshift @{$self->{token}}, {type => EXCLAMATION_TOKEN};
278                ## NOTE: |-| in |ident| in |IDENT|                ## NOTE: |-| in |ident| in |IDENT|
279                $self->{t} = {type => IDENT_TOKEN, value => '-'};                $self->{t} = {type => IDENT_TOKEN, value => '-'};
280                $self->{state} = BEFORE_NMSTART_STATE;                $self->{state} = BEFORE_NMSTART_STATE;
# Line 276  sub get_next_token ($) { Line 283  sub get_next_token ($) {
283                #redo A;                #redo A;
284              }              }
285            } else {            } else {
286              unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '!'};              unshift @{$self->{token}}, {type => EXCLAMATION_TOKEN};
287              $self->{state} = BEFORE_TOKEN_STATE;              $self->{state} = BEFORE_TOKEN_STATE;
288              #reprocess              #reprocess
289              return {type => DELIM_TOKEN, value => '<'};              return {type => DELIM_TOKEN, value => '<'};
# Line 289  sub get_next_token ($) { Line 296  sub get_next_token ($) {
296            #redo A;            #redo A;
297          }          }
298        } elsif (my $t = {        } elsif (my $t = {
299                  0x003B => SEMICOLON_TOKEN, # ;                          0x0021 => EXCLAMATION_TOKEN, # !
300                  0x007B => LBRACE_TOKEN, # {                          0x002D => MINUS_TOKEN, # -
301                  0x007D => RBRACE_TOKEN, # }                          0x002E => DOT_TOKEN, # .
302                  0x0028 => LPAREN_TOKEN, # (                          0x003A => COLON_TOKEN, # :
303                  0x0029 => RPAREN_TOKEN, # )                          0x003B => SEMICOLON_TOKEN, # ;
304                  0x005B => LBRACKET_TOKEN, # [                          0x003D => MATCH_TOKEN, # =
305                  0x005D => RBRACKET_TOKEN, # ]                          0x007B => LBRACE_TOKEN, # {
306                            0x007D => RBRACE_TOKEN, # }
307                            0x0028 => LPAREN_TOKEN, # (
308                            0x0029 => RPAREN_TOKEN, # )
309                            0x005B => LBRACKET_TOKEN, # [
310                            0x005D => RBRACKET_TOKEN, # ]
311                 }->{$self->{c}}) {                 }->{$self->{c}}) {
312          # stay in the state          # stay in the state
313          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
# Line 348  sub get_next_token ($) { Line 360  sub get_next_token ($) {
360            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
361            return {type => $v};            return {type => $v};
362            #redo A;            #redo A;
363            } elsif ($v = {
364                           0x002A => STAR_TOKEN, # *
365                           0x007C => VBAR_TOKEN, # |
366                          }->{$c}) {
367              # stay in the state.
368              # reprocess
369              return {type => $v};
370              #redo A;
371          } else {          } else {
372            # stay in the state            # stay in the state
373            # reprocess            # reprocess
# Line 424  sub get_next_token ($) { Line 444  sub get_next_token ($) {
444              #$self->{t} = {type => IDENT_TOKEN, value => '-'};              #$self->{t} = {type => IDENT_TOKEN, value => '-'};
445              # stay in the state              # stay in the state
446              # reconsume              # reconsume
447              return {type => DELIM_TOKEN, value => '-'};              return {type => MINUS_TOKEN};
448              #redo A;              #redo A;
449            }            }
450          } elsif ($self->{t}->{type} == DIMENSION_TOKEN) {          } elsif ($self->{t}->{type} == DIMENSION_TOKEN) {
# Line 443  sub get_next_token ($) { Line 463  sub get_next_token ($) {
463              $t->{type} = NUMBER_TOKEN;              $t->{type} = NUMBER_TOKEN;
464              $t->{value} = '';              $t->{value} = '';
465              $self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1};              $self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1};
466              unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};              unshift @{$self->{token}}, {type => MINUS_TOKEN};
467              # stay in the state              # stay in the state
468              # reconsume              # reconsume
469              return $t;              return $t;
# Line 458  sub get_next_token ($) { Line 478  sub get_next_token ($) {
478                
479        if ($self->{t}->{type} == DIMENSION_TOKEN) {        if ($self->{t}->{type} == DIMENSION_TOKEN) {
480          ## NOTE: |-| after |NUMBER|.          ## NOTE: |-| after |NUMBER|.
481          unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};          unshift @{$self->{token}}, {type => MINUS_TOKEN};
482          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
483          # reprocess          # reprocess
484          $self->{t}->{type} = NUMBER_TOKEN;          $self->{t}->{type} = NUMBER_TOKEN;
# Line 468  sub get_next_token ($) { Line 488  sub get_next_token ($) {
488          ## NOTE: |-| not followed by |nmstart|.          ## NOTE: |-| not followed by |nmstart|.
489          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
490          # reprocess          # reprocess
491          return {type => DELIM_TOKEN, value => '-'};          return {type => MINUS_TOKEN};
492        }        }
493      } elsif ($self->{state} == AFTER_AT_STATE) {      } elsif ($self->{state} == AFTER_AT_STATE) {
494        if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z        if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z
# Line 511  sub get_next_token ($) { Line 531  sub get_next_token ($) {
531            return {type => DELIM_TOKEN, value => '@'};            return {type => DELIM_TOKEN, value => '@'};
532            #redo A;            #redo A;
533          } else {          } else {
534            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};            unshift @{$self->{token}}, {type => MINUS_TOKEN};
535            $self->{t} = {type => IDENT_TOKEN, value => '-'};            $self->{t} = {type => IDENT_TOKEN, value => '-'};
536            $self->{state} = BEFORE_NMSTART_STATE;            $self->{state} = BEFORE_NMSTART_STATE;
537            # reprocess            # reprocess
# Line 524  sub get_next_token ($) { Line 544  sub get_next_token ($) {
544          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
545          redo A;          redo A;
546        } else {        } else {
547          unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};          unshift @{$self->{token}}, {type => MINUS_TOKEN};
548          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
549          # reprocess          # reprocess
550          return {type => DELIM_TOKEN, value => '@'};          return {type => DELIM_TOKEN, value => '@'};
# Line 860  sub get_next_token ($) { Line 880  sub get_next_token ($) {
880              $self->{state} = BEFORE_TOKEN_STATE;              $self->{state} = BEFORE_TOKEN_STATE;
881              # reprocess              # reprocess
882              unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};              unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
883              unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};              unshift @{$self->{token}}, {type => MINUS_TOKEN};
884              $self->{t}->{type} = NUMBER_TOKEN;              $self->{t}->{type} = NUMBER_TOKEN;
885              $self->{t}->{value} = '';              $self->{t}->{value} = '';
886              return $self->{t};              return $self->{t};
# Line 885  sub get_next_token ($) { Line 905  sub get_next_token ($) {
905              $self->{state} = BEFORE_TOKEN_STATE;              $self->{state} = BEFORE_TOKEN_STATE;
906              # reprocess              # reprocess
907              unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};              unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
908              return {type => DELIM_TOKEN, value => '-'};              return {type => MINUS_TOKEN};
909              #redo A;              #redo A;
910            } elsif (length $self->{t}->{value}) {            } elsif (length $self->{t}->{value}) {
911              $self->{state} = BEFORE_TOKEN_STATE;              $self->{state} = BEFORE_TOKEN_STATE;
# Line 1013  sub get_next_token ($) { Line 1033  sub get_next_token ($) {
1033                 $self->{c} == 0x000D or # \r                 $self->{c} == 0x000D or # \r
1034                 $self->{c} == 0x000C or # \f                 $self->{c} == 0x000C or # \f
1035                 $self->{c} == -1) {                 $self->{c} == -1) {
1036          $self->{t}->{type} = INVALID_TOKEN;          $self->{t}->{type} = {
1037              STRING_TOKEN, INVALID_TOKEN,
1038              INVALID_TOKEN, INVALID_TOKEN,
1039              URI_TOKEN, URI_INVALID_TOKEN,
1040              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
1041              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
1042              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
1043            }->{$self->{t}->{type}};
1044          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
1045          # reconsume          # reconsume
1046          return $self->{t};          return $self->{t};
# Line 1050  sub get_next_token ($) { Line 1077  sub get_next_token ($) {
1077          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
1078          redo A;          redo A;
1079        } else {        } else {
1080          unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '.'};          unshift @{$self->{token}}, {type => DOT_TOKEN};
1081          $self->{t}->{number} = $self->{t}->{value};          $self->{t}->{number} = $self->{t}->{value};
1082          $self->{t}->{value} = '';          $self->{t}->{value} = '';
1083          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
# Line 1068  sub get_next_token ($) { Line 1095  sub get_next_token ($) {
1095        } else {        } else {
1096          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
1097          # reprocess          # reprocess
1098          return {type => DELIM_TOKEN, value => '.'};          return {type => DOT_TOKEN};
1099          #redo A;          #redo A;
1100        }        }
1101      } elsif ($self->{state} == NUMBER_DOT_NUMBER_STATE) {      } elsif ($self->{state} == NUMBER_DOT_NUMBER_STATE) {

Legend:
Removed from v.1.10  
changed lines
  Added in v.1.13

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24