/[suikacvs]/markup/html/whatpm/Whatpm/CSS/Tokenizer.pm

Diff of /markup/html/whatpm/Whatpm/CSS/Tokenizer.pm

Parent Directory | Revision Log | View Patch Patch

-revision 1.4 by wakaba,
Sat Sep  8 02:58:24 2007 UTC
+revision 1.19 by wakaba,
Sat Jan 26 09:30:47 2008 UTC
 Line 1
  package Whatpm::CSS::Tokenizer;
  use strict;
+ our $VERSION=do{my @r=(q$Revision$=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
+ require Exporter;
+ push our @ISA, 'Exporter';
  sub BEFORE_TOKEN_STATE () { 0 }
  sub BEFORE_NMSTART_STATE () { 1 }
-Line 36 
 sub NUMBER_TOKEN () { 11 }
+Line 40 
 sub NUMBER_TOKEN () { 11 }
  sub DIMENSION_TOKEN () { 12 }
  sub PERCENTAGE_TOKEN () { 13 }
  sub UNICODE_RANGE_TOKEN () { 14 }
- sub UNICODE_RANGE_INVALID_TOKEN () { 15 }
  sub DELIM_TOKEN () { 16 }
  sub PLUS_TOKEN () { 17 }
  sub GREATER_TOKEN () { 18 }
-Line 60 
 sub CDC_TOKEN () { 35 }
+Line 63 
 sub CDC_TOKEN () { 35 }
  sub COMMENT_TOKEN () { 36 }
  sub COMMENT_INVALID_TOKEN () { 37 }
  sub EOF_TOKEN () { 38 }
+ sub MINUS_TOKEN () { 39 }
+ sub STAR_TOKEN () { 40 }
+ sub VBAR_TOKEN () { 41 }
+ sub DOT_TOKEN () { 42 }
+ sub COLON_TOKEN () { 43 }
+ sub MATCH_TOKEN () { 44 }
+ sub EXCLAMATION_TOKEN () { 45 }
  our @TokenName = qw(
 IDENT ATKEYWORD HASH FUNCTION URI URI_INVALID URI_PREFIX URI_PREFIX_INVALID
    STRING INVALID NUMBER DIMENSION PERCENTAGE UNICODE_RANGE
-   UNICODE_RANGE_INVALID DELIM PLUS GREATER COMMA TILDE DASHMATCH
+DELIM PLUS GREATER COMMA TILDE DASHMATCH
    PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH INCLUDES SEMICOLON
    LBRACE RBRACE LPAREN RPAREN LBRACKET RBRACKET S CDO CDC COMMENT
-   COMMENT_INVALID EOF
+   COMMENT_INVALID EOF MINUS STAR VBAR DOT COLON MATCH EXCLAMATION
+ );
+ our @EXPORT_OK = qw(
+   IDENT_TOKEN ATKEYWORD_TOKEN HASH_TOKEN FUNCTION_TOKEN URI_TOKEN
+   URI_INVALID_TOKEN URI_PREFIX_TOKEN URI_PREFIX_INVALID_TOKEN
+   STRING_TOKEN INVALID_TOKEN NUMBER_TOKEN DIMENSION_TOKEN PERCENTAGE_TOKEN
+   UNICODE_RANGE_TOKEN DELIM_TOKEN PLUS_TOKEN GREATER_TOKEN COMMA_TOKEN
+   TILDE_TOKEN DASHMATCH_TOKEN PREFIXMATCH_TOKEN SUFFIXMATCH_TOKEN
+   SUBSTRINGMATCH_TOKEN INCLUDES_TOKEN SEMICOLON_TOKEN LBRACE_TOKEN
+   RBRACE_TOKEN LPAREN_TOKEN RPAREN_TOKEN LBRACKET_TOKEN RBRACKET_TOKEN
+   S_TOKEN CDO_TOKEN CDC_TOKEN COMMENT_TOKEN COMMENT_INVALID_TOKEN EOF_TOKEN
+   MINUS_TOKEN STAR_TOKEN VBAR_TOKEN DOT_TOKEN COLON_TOKEN MATCH_TOKEN
+   EXCLAMATION_TOKEN
  );
+ our %EXPORT_TAGS = ('token' => [@EXPORT_OK]);
  sub new ($) {
-   my $self = bless {token => [], get_char => sub { -1 },
+   my $self = bless {token => [], get_char => sub { -1 }}, shift;
-                     onerror => sub { }}, shift;
    return $self;
  } # new
  sub init ($) {
    my $self = shift;
    $self->{state} = BEFORE_TOKEN_STATE;
-   $self->{c} = $self->{get_char}->();
+   $self->{c} = $self->{get_char}->($self);
+   #$self->{t} = {type => token-type, value => value, number => number};
  } # init
  sub get_next_token ($) {
-Line 88 
 sub get_next_token ($) {
+Line 113 
 sub get_next_token ($) {
      return shift @{$self->{token}};
    }
-   my $current_token;
    my $char;
    my $num; # |{num}|, if any.
    my $i; # |$i + 1|th character in |unicode| in |escape|.
-Line 103 
 sub get_next_token ($) {
+Line 127 
 sub get_next_token ($) {
      if ($self->{state} == BEFORE_TOKEN_STATE) {
        if ($self->{c} == 0x002D) { # -
          ## NOTE: |-| in |ident| in |IDENT|
-         $current_token = {type => IDENT_TOKEN, value => '-'};
+         $self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1,
+                       line => $self->{line}, column => $self->{column}};
          $self->{state} = BEFORE_NMSTART_STATE;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
+       } elsif ($self->{c} == 0x0055 or $self->{c} == 0x0075) { # U or u
+         $self->{t} = {type => IDENT_TOKEN, value => chr $self->{c},
+                       line => $self->{line}, column => $self->{column}};
+         $self->{c} = $self->{get_char}->($self);
+         if ($self->{c} == 0x002B) { # +
+           my ($l, $c) = ($self->{line}, $self->{column});
+           $self->{c} = $self->{get_char}->($self);
+           if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
+               (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
+               (0x0061 <= $self->{c} and $self->{c} <= 0x0066) or # a..f
+               $self->{c} == 0x003F) { # ?
+             $self->{t}->{value} = chr $self->{c};
+             $self->{t}->{type} = UNICODE_RANGE_TOKEN;
+             $self->{c} = $self->{get_char}->($self);
+             C: for (2..6) {
+               if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
+                   (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
+                   (0x0061 <= $self->{c} and $self->{c} <= 0x0066) or # a..f
+                   $self->{c} == 0x003F) { # ?
+                 $self->{t}->{value} .= chr $self->{c};
+                 $self->{c} = $self->{get_char}->($self);
+               } else {
+                 last C;
+               }
+             } # C
+             if ($self->{c} == 0x002D) { # -
+               $self->{c} = $self->{get_char}->($self);
+               if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
+                   (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
+                   (0x0061 <= $self->{c} and $self->{c} <= 0x0066)) { # a..f
+                 $self->{t}->{value} .= '-' . chr $self->{c};
+                 $self->{c} = $self->{get_char}->($self);
+                 C: for (2..6) {
+                   if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
+                       (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
+                       (0x0061 <= $self->{c} and $self->{c} <= 0x0066)) { # a..f
+                     $self->{t}->{value} .= chr $self->{c};
+                     $self->{c} = $self->{get_char}->($self);
+                   } else {
+                     last C;
+                   }
+                 } # C
+                 #
+               } else {
+                 my $token = $self->{t};
+                 $self->{t} = {type => IDENT_TOKEN, value => '-',
+                               line => $self->{line},
+                               column => $self->{column}};
+                 $self->{state} = BEFORE_NMSTART_STATE;
+                 # reprocess
+                 return $token;
+                 #redo A;
+               }
+             }
+             $self->{state} = BEFORE_TOKEN_STATE;
+             # reprocess
+             return $self->{t};
+             #redo A;
+           } else {
+             unshift @{$self->{token}},
+                 {type => PLUS_TOKEN, line => $l, column => $c};
+             $self->{state} = BEFORE_TOKEN_STATE;
+             # reprocess
+             return $self->{t};
+             #redo A;
+           }
+         } else {
+           $self->{state} = NAME_STATE;
+           # reprocess
+           redo A;
+         }
        } elsif ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z
                 (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
                 $self->{c} == 0x005F or # _
                 $self->{c} > 0x007F) { # nonascii
          ## NOTE: |nmstart| in |ident| in |IDENT|
-         $current_token = {type => IDENT_TOKEN, value => chr $self->{c}};
+         $self->{t} = {type => IDENT_TOKEN, value => chr $self->{c},
+                       line => $self->{line}, column => $self->{column}};
          $self->{state} = NAME_STATE;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } elsif ($self->{c} == 0x005C) { # \
          ## NOTE: |nmstart| in |ident| in |IDENT|
-         $current_token = {type => IDENT_TOKEN, value => ''};
+         $self->{t} = {type => IDENT_TOKEN, value => '',
+                       line => $self->{line}, column => $self->{column}};
          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } elsif ($self->{c} == 0x0040) { # @
          ## NOTE: |@| in |ATKEYWORD|
-         $current_token = {type => ATKEYWORD_TOKEN, value => ''};
+         $self->{t} = {type => ATKEYWORD_TOKEN, value => '',
+                       line => $self->{line}, column => $self->{column}};
          $self->{state} = AFTER_AT_STATE;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } elsif ($self->{c} == 0x0022 or $self->{c} == 0x0027) { # " or '
-         $current_token = {type => STRING_TOKEN, value => ''};
+         $self->{t} = {type => STRING_TOKEN, value => '',
+                       line => $self->{line}, column => $self->{column}};
          $self->{state} = STRING_STATE; $q = $self->{c};
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } elsif ($self->{c} == 0x0023) { # #
          ## NOTE: |#| in |HASH|.
-         $current_token = {type => HASH_TOKEN, value => ''};
+         $self->{t} = {type => HASH_TOKEN, value => '',
+                       line => $self->{line}, column => $self->{column}};
          $self->{state} = HASH_OPEN_STATE;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } elsif (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9
          ## NOTE: |num|.
-         $current_token = {type => NUMBER_TOKEN, value => chr $self->{c}};
+         $self->{t} = {type => NUMBER_TOKEN, value => chr $self->{c},
+                       line => $self->{line}, column => $self->{column}};
          $self->{state} = NUMBER_STATE;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } elsif ($self->{c} == 0x002E) { # .
          ## NOTE: |num|.
-         $current_token = {type => NUMBER_TOKEN, value => '0'};
+         $self->{t} = {type => NUMBER_TOKEN, value => '0',
+                       line => $self->{line}, column => $self->{column}};
          $self->{state} = NUMBER_FRACTION_STATE;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } elsif ($self->{c} == 0x002F) { # /
-         $self->{c} = $self->{get_char}->();
+         my ($l, $c) = ($self->{line}, $self->{column});
+         $self->{c} = $self->{get_char}->($self);
          if ($self->{c} == 0x002A) { # *
            C: {
-             $self->{c} = $self->{get_char}->();
+             $self->{c} = $self->{get_char}->($self);
              if ($self->{c} == 0x002A) { # *
                D: {
-                 $self->{c} = $self->{get_char}->();
+                 $self->{c} = $self->{get_char}->($self);
                  if ($self->{c} == 0x002F) { # /
                    #
                  } elsif ($self->{c} == 0x002A) { # *
-Line 178 
 sub get_next_token ($) {
+Line 285 
 sub get_next_token ($) {
            } # C
            # stay in the state.
-           $self->{c} = $self->{get_char}->();
+           $self->{c} = $self->{get_char}->($self);
            redo A;
          } else {
            # stay in the state.
            # reprocess
-           return {type => DELIM_STATE, value => '/'};
+           return {type => DELIM_TOKEN, value => '/', line => $l, column => $c};
            #redo A;
          }
        } elsif ($self->{c} == 0x003C) { # <
+         my ($l, $c) = ($self->{line}, $self->{column});
          ## NOTE: |CDO|
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          if ($self->{c} == 0x0021) { # !
-           $self->{c} = $self->{get_char}->();
+           $self->{c} = $self->{get_char}->($self);
-           if ($self->{c} == 0x002C) { # -
+           if ($self->{c} == 0x002D) { # -
-             $self->{c} = $self->{get_char}->();
+             $self->{c} = $self->{get_char}->($self);
-             if ($self->{c} == 0x002C) { # -
+             if ($self->{c} == 0x002D) { # -
                $self->{state} = BEFORE_TOKEN_STATE;
-               $self->{c} = $self->{get_char}->();
+               $self->{c} = $self->{get_char}->($self);
-               return {type => CDO_TOKEN};
+               return {type => CDO_TOKEN, line => $l, column => $c};
                #redo A;
              } else {
-               unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '!'};
+               unshift @{$self->{token}},
+                   {type => EXCLAMATION_TOKEN, line => $l, column => $c + 1};
                ## NOTE: |-| in |ident| in |IDENT|
-               $current_token = {type => IDENT_TOKEN, value => '-'};
+               $self->{t} = {type => IDENT_TOKEN, value => '-',
+                             line => $l, column => $c + 2};
                $self->{state} = BEFORE_NMSTART_STATE;
                #reprocess
-               return {type => DELIM_TOKEN, value => '<'};
+               return {type => DELIM_TOKEN, value => '<',
+                       line => $l, column => $c};
                #redo A;
              }
            } else {
-             unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '!'};
+             unshift @{$self->{token}}, {type => EXCLAMATION_TOKEN,
+                                         line => $l, column => $c + 1};
              $self->{state} = BEFORE_TOKEN_STATE;
              #reprocess
-             return {type => DELIM_TOKEN, value => '<'};
+             return {type => DELIM_TOKEN, value => '<',
+                     line => $l, column => $c};
              #redo A;
            }
          } else {
            $self->{state} = BEFORE_TOKEN_STATE;
            #reprocess
-           return {type => DELIM_TOKEN, value => '<'};
+           return {type => DELIM_TOKEN, value => '<',
+                   line => $l, column => $c};
            #redo A;
          }
        } elsif (my $t = {
-x003B => SEMICOLON_TOKEN, # ;
+x0021 => EXCLAMATION_TOKEN, # !
-x007B => LBRACE_TOKEN, # {
+x002D => MINUS_TOKEN, # -
-x007D => RBRACE_TOKEN, # }
+x002E => DOT_TOKEN, # .
-x0028 => LPAREN_TOKEN, # (
+x003A => COLON_TOKEN, # :
-x0029 => RPAREN_TOKEN, # )
+x003B => SEMICOLON_TOKEN, # ;
-x005B => LBRACKET_TOKEN, # [
+x003D => MATCH_TOKEN, # =
-x005D => RBRACKET_TOKEN, # ]
+x007B => LBRACE_TOKEN, # {
+x007D => RBRACE_TOKEN, # }
+x0028 => LPAREN_TOKEN, # (
+x0029 => RPAREN_TOKEN, # )
+x005B => LBRACKET_TOKEN, # [
+x005D => RBRACKET_TOKEN, # ]
                 }->{$self->{c}}) {
+         my ($l, $c) = ($self->{line}, $self->{column});
          # stay in the state
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
-         return {type => $t};
+         return {type => $t, line => $l, column => $c};
          # redo A;
        } elsif ({
 x0020 => 1, # SP
-Line 240 
 sub get_next_token ($) {
+Line 360 
 sub get_next_token ($) {
 x000A => 1, # \n
 x000C => 1, # \f
                 }->{$self->{c}}) {
+         my ($l, $c) = ($self->{line}, $self->{column});
          W: {
-           $self->{c} = $self->{get_char}->();
+           $self->{c} = $self->{get_char}->($self);
            if ({
 x0020 => 1, # SP
 x0009 => 1, # \t
-Line 256 
 sub get_next_token ($) {
+Line 377 
 sub get_next_token ($) {
 x002C => COMMA_TOKEN, # ,
 x007E => TILDE_TOKEN, # ~
                             }->{$self->{c}}) {
+             my ($l, $c) = ($self->{line}, $self->{column});
              # stay in the state
-             $self->{c} = $self->{get_char}->();
+             $self->{c} = $self->{get_char}->($self);
-             return {type => $v};
+             return {type => $v, line => $l, column => $c};
              #redo A;
            } else {
              # stay in the state
              # reprocess
-             return {type => S_TOKEN};
+             return {type => S_TOKEN, line => $l, column => $c};
              #redo A;
            }
          } # W
-Line 273 
 sub get_next_token ($) {
+Line 395 
 sub get_next_token ($) {
 x0024 => SUFFIXMATCH_TOKEN, # $
 x002A => SUBSTRINGMATCH_TOKEN, # *
                         }->{$self->{c}}) {
+         my ($line, $column) = ($self->{line}, $self->{column});
          my $c = $self->{c};
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          if ($self->{c} == 0x003D) { # =
            # stay in the state
-           $self->{c} = $self->{get_char}->();
+           $self->{c} = $self->{get_char}->($self);
-           return {type => $v};
+           return {type => $v, line => $line, column => $column};
+           #redo A;
+         } elsif ($v = {
+x002A => STAR_TOKEN, # *
+x007C => VBAR_TOKEN, # |
+                       }->{$c}) {
+           # stay in the state.
+           # reprocess
+           return {type => $v, line => $line, column => $column};
            #redo A;
          } else {
            # stay in the state
            # reprocess
-           return {type => DELIM_TOKEN, value => chr $c};
+           return {type => DELIM_TOKEN, value => chr $c,
+                   line => $line, column => $column};
            #redo A;
          }
        } elsif ($self->{c} == 0x002B) { # +
+         my ($l, $c) = ($self->{line}, $self->{column});
          # stay in the state
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
-         return {type => PLUS_TOKEN};
+         return {type => PLUS_TOKEN, line => $l, column => $c};
          #redo A;
        } elsif ($self->{c} == 0x003E) { # >
+         my ($l, $c) = ($self->{line}, $self->{column});
          # stay in the state
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
-         return {type => GREATER_TOKEN};
+         return {type => GREATER_TOKEN, line => $l, column => $c};
          #redo A;
        } elsif ($self->{c} == 0x002C) { # ,
+         my ($l, $c) = ($self->{line}, $self->{column});
          # stay in the state
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
-         return {type => COMMA_TOKEN};
+         return {type => COMMA_TOKEN, line => $l, column => $c};
          #redo A;
        } elsif ($self->{c} == 0x007E) { # ~
-         $self->{c} = $self->{get_char}->();
+         my ($l, $c) = ($self->{line}, $self->{column});
+         $self->{c} = $self->{get_char}->($self);
          if ($self->{c} == 0x003D) { # =
            # stay in the state
-           $self->{c} = $self->{get_char}->();
+           $self->{c} = $self->{get_char}->($self);
-           return {type => INCLUDES_TOKEN};
+           return {type => INCLUDES_TOKEN, line => $l, column => $c};
            #redo A;
          } else {
            # stay in the state
            # reprocess
-           return {type => TILDE_TOKEN};
+           return {type => TILDE_TOKEN, line => $l, column => $c};
            #redo A;
          }
        } elsif ($self->{c} == -1) {
          # stay in the state
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
-         return {type => EOF_TOKEN};
+         return {type => EOF_TOKEN,
+                 line => $self->{line}, column => $self->{column}};
          #redo A;
        } else {
          # stay in the state
-         $current_token = {type => DELIM_TOKEN, value => chr $self->{c}};
+         $self->{t} = {type => DELIM_TOKEN, value => chr $self->{c},
-         $self->{c} = $self->{get_char}->();
+                       line => $self->{line}, column => $self->{column}};
-         return $current_token;
+         $self->{c} = $self->{get_char}->($self);
+         return $self->{t};
          #redo A;
        }
      } elsif ($self->{state} == BEFORE_NMSTART_STATE) {
-Line 333 
 sub get_next_token ($) {
+Line 471 
 sub get_next_token ($) {
            (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
            $self->{c} == 0x005F or # _
            $self->{c} > 0x007F) { # nonascii
-         $current_token->{value} .= chr $self->{c};
+         $self->{t}->{value} .= chr $self->{c};
-         $current_token->{type} = DIMENSION_TOKEN
+         $self->{t}->{type} = DIMENSION_TOKEN
-             if $current_token->{type} == NUMBER_TOKEN;
+             if $self->{t}->{type} == NUMBER_TOKEN;
          $self->{state} = NAME_STATE;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } elsif ($self->{c} == 0x005C) { # \
- ## TODO: 12-\X, 12-\{nl}
          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
-       } elsif ($self->{c} == 0x002D and # -
+       } elsif ($self->{c} == 0x002D) { # -
-                $current_token->{type} == IDENT_TOKEN) {
+         if ($self->{t}->{type} == IDENT_TOKEN) {
-         $self->{c} = $self->{get_char}->();
+           $self->{c} = $self->{get_char}->($self);
-         if ($self->{c} == 0x003E) { # >
+           if ($self->{c} == 0x003E) { # >
-           $self->{state} = BEFORE_TOKEN_STATE;
+             $self->{state} = BEFORE_TOKEN_STATE;
-           $self->{c} = $self->{get_char}->();
+             $self->{c} = $self->{get_char}->($self);
-           return {type => CDC_TOKEN};
+             return {type => CDC_TOKEN,
-           #redo A;
+                     line => $self->{t}->{line},
+                     column => $self->{t}->{column}};
+             #redo A;
+           } else {
+             ## NOTE: |-|, |-|, $self->{c}
+             #$self->{t} = {type => IDENT_TOKEN, value => '-'};
+             $self->{t}->{column}++;
+             # stay in the state
+             # reconsume
+             return {type => MINUS_TOKEN,
+                     line => $self->{t}->{line},
+                     column => $self->{t}->{column} - 1};
+             #redo A;
+           }
+         } elsif ($self->{t}->{type} == DIMENSION_TOKEN) {
+           my ($l, $c) = ($self->{line}, $self->{column}); # second '-'
+           $self->{c} = $self->{get_char}->($self);
+           if ($self->{c} == 0x003E) { # >
+             unshift @{$self->{token}}, {type => CDC_TOKEN};
+             $self->{t}->{type} = NUMBER_TOKEN;
+             $self->{t}->{value} = '';
+             $self->{state} = BEFORE_TOKEN_STATE;
+             $self->{c} = $self->{get_char}->($self);
+             return $self->{t};
+             #redo A;
+           } else {
+             ## NOTE: NUMBER, |-|, |-|, $self->{c}
+             my $t = $self->{t};
+             $t->{type} = NUMBER_TOKEN;
+             $t->{value} = '';
+             $self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1,
+                           line => $l, column => $c};
+             unshift @{$self->{token}}, {type => MINUS_TOKEN,
+                                         line => $l, column => $c - 1};
+             # stay in the state
+             # reconsume
+             return $t;
+             #redo A;
+           }
          } else {
-           ## NOTE: |-|, |-|, $self->{c}
+           #
-           #$current_token = {type => IDENT_TOKEN, value => '-'};
-           # stay in the state
-           # reconsume
-           return {type => DELIM_TOKEN, value => '-'};
-           #redo A;
          }
        } else {
-         if ($current_token->{type} == NUMBER_TOKEN) {
+         #
-           ## NOTE: |-| after |NUMBER|.
+       }
-           unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};
-           $self->{state} = BEFORE_TOKEN_STATE;
+       if ($self->{t}->{type} == DIMENSION_TOKEN) {
-           # reconsume
+         ## NOTE: |-| after |NUMBER|.
-           $current_token->{value} = $current_token->{number};
+         unshift @{$self->{token}}, {type => MINUS_TOKEN,
-           delete $current_token->{number};
+                                     line => $self->{line},
-           return $current_token;
+                                     column => $self->{column} - 1};
-         } else {
+         ## BUG: column might be wrong if on the line boundary.
-           ## NOTE: |-| not followed by |nmstart|.
+         $self->{state} = BEFORE_TOKEN_STATE;
-           $self->{state} = BEFORE_TOKEN_STATE;
+         # reprocess
-           $self->{c} = $self->{get_char}->();
+         $self->{t}->{type} = NUMBER_TOKEN;
-           return {type => DELIM_TOKEN, value => '-'};
+         $self->{t}->{value} = '';
-         }
+         return $self->{t};
+       } else {
+         ## NOTE: |-| not followed by |nmstart|.
+         $self->{state} = BEFORE_TOKEN_STATE;
+         # reprocess
+         return {type => MINUS_TOKEN,
+                 line => $self->{line}, column => $self->{column} - 1};
+         ## BUG: column might be wrong if on the line boundary.
        }
      } elsif ($self->{state} == AFTER_AT_STATE) {
        if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z
            (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
            $self->{c} == 0x005F or # _
            $self->{c} > 0x007F) { # nonascii
-         $current_token->{value} .= chr $self->{c};
+         $self->{t}->{value} .= chr $self->{c};
          $self->{state} = NAME_STATE;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } elsif ($self->{c} == 0x002D) { # -
-         $current_token->{value} .= '-';
+         $self->{t}->{value} .= '-';
          $self->{state} = AFTER_AT_HYPHEN_STATE;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } elsif ($self->{c} == 0x005C) { # \
          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } else {
          $self->{state} = BEFORE_TOKEN_STATE;
          # reprocess
-         return {type => DELIM_TOKEN, value => '@'};
+         return {type => DELIM_TOKEN, value => '@',
+                 line => $self->{t}->{line},
+                 column => $self->{t}->{column}};
        }
      } elsif ($self->{state} == AFTER_AT_HYPHEN_STATE) {
        if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z
            (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
            $self->{c} == 0x005F or # _
            $self->{c} > 0x007F) { # nonascii
-         $current_token->{value} .= chr $self->{c};
+         $self->{t}->{value} .= chr $self->{c};
          $self->{state} = NAME_STATE;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } elsif ($self->{c} == 0x002D) { # -
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          if ($self->{c} == 0x003E) { # >
            unshift @{$self->{token}}, {type => CDC_TOKEN};
            $self->{state} = BEFORE_TOKEN_STATE;
-           $self->{c} = $self->{get_char}->();
+           $self->{c} = $self->{get_char}->($self);
            return {type => DELIM_TOKEN, value => '@'};
            #redo A;
          } else {
-           unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};
+           unshift @{$self->{token}}, {type => MINUS_TOKEN};
-           $current_token = {type => IDENT_TOKEN, value => '-'};
+           $self->{t} = {type => IDENT_TOKEN, value => '-'};
            $self->{state} = BEFORE_NMSTART_STATE;
            # reprocess
            return {type => DELIM_TOKEN, value => '@'};
-Line 427 
 sub get_next_token ($) {
+Line 606 
 sub get_next_token ($) {
        } elsif ($self->{c} == 0x005C) { # \
          ## TODO: @-\{nl}
          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } else {
-         unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};
+         unshift @{$self->{token}}, {type => MINUS_TOKEN};
          $self->{state} = BEFORE_TOKEN_STATE;
          # reprocess
          return {type => DELIM_TOKEN, value => '@'};
-Line 438 
 sub get_next_token ($) {
+Line 617 
 sub get_next_token ($) {
      } elsif ($self->{state} == AFTER_NUMBER_STATE) {
        if ($self->{c} == 0x002D) { # -
          ## NOTE: |-| in |ident|.
-         $current_token->{value} = '-';
+         $self->{t}->{hyphen} = 1;
+         $self->{t}->{value} = '-';
+         $self->{t}->{type} = DIMENSION_TOKEN;
          $self->{state} = BEFORE_NMSTART_STATE;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } elsif ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z
                 (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
                 $self->{c} == 0x005F or # _
                 $self->{c} > 0x007F) { # nonascii
          ## NOTE: |nmstart| in |ident|.
-         $current_token->{value} = chr $self->{c};
+         $self->{t}->{value} = chr $self->{c};
-         $current_token->{type} = DIMENSION_TOKEN;
+         $self->{t}->{type} = DIMENSION_TOKEN;
          $self->{state} = NAME_STATE;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } elsif ($self->{c} == 0x005C) { # \
          ## NOTE: |nmstart| in |ident| in |IDENT|
-         $current_token->{value} = '';
+         $self->{t}->{value} = '';
+         $self->{t}->{type} = DIMENSION_TOKEN;
          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } elsif ($self->{c} == 0x0025) { # %
-         $current_token->{type} = PERCENTAGE_TOKEN;
+         $self->{t}->{type} = PERCENTAGE_TOKEN;
          $self->{state} = BEFORE_TOKEN_STATE;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
-         return $current_token;
+         return $self->{t};
          #redo A;
        } else {
          $self->{state} = BEFORE_TOKEN_STATE;
          # reprocess
-         return $current_token;
+         return $self->{t};
          #redo A;
        }
      } elsif ($self->{state} == HASH_OPEN_STATE) {
-Line 478 
 sub get_next_token ($) {
+Line 660 
 sub get_next_token ($) {
            $self->{c} == 0x002D or # -
            $self->{c} == 0x005F or # _
            $self->{c} > 0x007F) { # nonascii
-         $current_token->{value} .= chr $self->{c};
+         $self->{t}->{value} .= chr $self->{c};
          $self->{state} = NAME_STATE;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } elsif ($self->{c} == 0x005C) { # \
          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } else {
          $self->{state} = BEFORE_TOKEN_STATE;
-         $self->{c} = $self->{get_char}->();
+         # reprocess
-         return {type => DELIM_TOKEN, value => '#'};
+         return {type => DELIM_TOKEN, value => '#',
+                 line => $self->{t}->{line},
+                 column => $self->{t}->{column}};
          #redo A;
        }
      } elsif ($self->{state} == NAME_STATE) {
-Line 500 
 sub get_next_token ($) {
+Line 684 
 sub get_next_token ($) {
            $self->{c} == 0x005F or # _
            $self->{c} == 0x002D or # -
            $self->{c} > 0x007F) { # nonascii
-         $current_token->{value} .= chr $self->{c};
+         $self->{t}->{value} .= chr $self->{c};
          # stay in the state
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } elsif ($self->{c} == 0x005C) { # \
          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } elsif ($self->{c} == 0x0028 and # (
-                $current_token->{type} == IDENT_TOKEN) { # (
+                $self->{t}->{type} == IDENT_TOKEN) { # (
-         my $func_name = $current_token->{value};
+         my $func_name = $self->{t}->{value};
          $func_name =~ tr/A-Z/a-z/; ## TODO: Unicode or ASCII case-insensitive?
          if ($func_name eq 'url' or $func_name eq 'url-prefix') {
-           if ($current_token->{has_escape}) {
+           if ($self->{t}->{has_escape}) {
              ## TODO: warn
            }
-           $current_token->{type}
+           $self->{t}->{type}
                = $func_name eq 'url' ? URI_TOKEN : URI_PREFIX_TOKEN;
-           $current_token->{value} = '';
+           $self->{t}->{value} = '';
            $self->{state} = URI_BEFORE_WSP_STATE;
-           $self->{c} = $self->{get_char}->();
+           $self->{c} = $self->{get_char}->($self);
            redo A;
          } else {
-           $current_token->{type} = FUNCTION_TOKEN;
+           $self->{t}->{type} = FUNCTION_TOKEN;
            $self->{state} = BEFORE_TOKEN_STATE;
-           $self->{c} = $self->{get_char}->();
+           $self->{c} = $self->{get_char}->($self);
-           return $current_token;
+           return $self->{t};
            #redo A;
          }
        } else {
          $self->{state} = BEFORE_TOKEN_STATE;
          # reconsume
-         return $current_token;
+         return $self->{t};
          #redo A;
        }
      } elsif ($self->{state} == URI_BEFORE_WSP_STATE) {
-Line 543 
 sub get_next_token ($) {
+Line 727 
 sub get_next_token ($) {
 x000A => 1, # \n
 x000C => 1, # \f
               }->{$self->{c}}) {
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
        }
        if ($self->{c} == -1) {
-         $current_token->{type} = {
+         $self->{t}->{type} = {
              URI_TOKEN, URI_INVALID_TOKEN,
              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
-         }->{$current_token->{type}};
+         }->{$self->{t}->{type}};
          $self->{state} = BEFORE_TOKEN_STATE;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
-         return $current_token;
+         return $self->{t};
          #redo A;
        } elsif ($self->{c} < 0x0020 or $self->{c} == 0x0028) { # C0 or (
          ## TODO: Should we consider matches of "(" and ")"?
-         $current_token->{type} = {
+         $self->{t}->{type} = {
              URI_TOKEN, URI_INVALID_TOKEN,
              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
-         }->{$current_token->{type}};
+         }->{$self->{t}->{type}};
          $self->{state} = URI_UNQUOTED_STATE;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } elsif ($self->{c} == 0x0022 or $self->{c} == 0x0027) { # " or '
          $self->{state} = STRING_STATE; $q = $self->{c};
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } elsif ($self->{c} == 0x0029) { # )
          $self->{state} = BEFORE_TOKEN_STATE;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
-         return $current_token;
+         return $self->{t};
          #redo A;
        } elsif ($self->{c} == 0x005C) { # \
          $self->{state} = ESCAPE_OPEN_STATE; $q = 1;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } else {
-         $current_token->{value} .= chr $self->{c};
+         $self->{t}->{value} .= chr $self->{c};
          $self->{state} = URI_UNQUOTED_STATE;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        }
      } elsif ($self->{state} == URI_UNQUOTED_STATE) {
-Line 595 
 sub get_next_token ($) {
+Line 779 
 sub get_next_token ($) {
 x000C => 1, # \f
            }->{$self->{c}}) {
          $self->{state} = URI_AFTER_WSP_STATE;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } elsif ($self->{c} == -1) {
-         $current_token->{type} = {
+         $self->{t}->{type} = {
              URI_TOKEN, URI_INVALID_TOKEN,
              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
-         }->{$current_token->{type}};
+         }->{$self->{t}->{type}};
          $self->{state} = BEFORE_TOKEN_STATE;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
-         return $current_token;
+         return $self->{t};
          #redo A;
        } elsif ($self->{c} < 0x0020 or {
 x0022 => 1, # "
-Line 614 
 sub get_next_token ($) {
+Line 798 
 sub get_next_token ($) {
 x0028 => 1, # (
        }->{$self->{c}}) { # C0 or (
          ## TODO: Should we consider matches of "(" and ")", '"', or "'"?
-         $current_token->{type} = {
+         $self->{t}->{type} = {
              URI_TOKEN, URI_INVALID_TOKEN,
              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
-         }->{$current_token->{type}};
+         }->{$self->{t}->{type}};
          # stay in the state.
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } elsif ($self->{c} == 0x0029) { # )
          $self->{state} = BEFORE_TOKEN_STATE;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
-         return $current_token;
+         return $self->{t};
          #redo A;
        } elsif ($self->{c} == 0x005C) { # \
          $self->{state} = ESCAPE_OPEN_STATE; $q = 1;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } else {
-         $current_token->{value} .= chr $self->{c};
+         $self->{t}->{value} .= chr $self->{c};
          # stay in the state.
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        }
      } elsif ($self->{state} == URI_AFTER_WSP_STATE) {
-Line 647 
 sub get_next_token ($) {
+Line 831 
 sub get_next_token ($) {
 x000C => 1, # \f
            }->{$self->{c}}) {
          # stay in the state.
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } elsif ($self->{c} == -1) {
-         $current_token->{type} = {
+         $self->{t}->{type} = {
              URI_TOKEN, URI_INVALID_TOKEN,
              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
-         }->{$current_token->{type}};
+         }->{$self->{t}->{type}};
          $self->{state} = BEFORE_TOKEN_STATE;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
-         return $current_token;
+         return $self->{t};
          #redo A;
        } elsif ($self->{c} == 0x0029) { # )
          $self->{state} = BEFORE_TOKEN_STATE;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
-         return $current_token;
+         return $self->{t};
          #redo A;
        } elsif ($self->{c} == 0x005C) { # \
          $self->{state} = ESCAPE_OPEN_STATE; $q = 1;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } else {
          ## TODO: Should we consider matches of "(" and ")", '"', or "'"?
-         $current_token->{type} = {
+         $self->{t}->{type} = {
              URI_TOKEN, URI_INVALID_TOKEN,
              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
-         }->{$current_token->{type}};
+         }->{$self->{t}->{type}};
          # stay in the state.
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        }
      } elsif ($self->{state} == ESCAPE_OPEN_STATE) {
-       $current_token->{has_escape} = 1;
+       $self->{t}->{has_escape} = 1;
        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9
          ## NOTE: second character of |unicode| in |escape|.
          $char = $self->{c} - 0x0030;
          $self->{state} = ESCAPE_STATE; $i = 2;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } elsif (0x0041 <= $self->{c} and $self->{c} <= 0x0046) { # A..F
          ## NOTE: second character of |unicode| in |escape|.
          $char = $self->{c} - 0x0041 + 0xA;
          $self->{state} = ESCAPE_STATE; $i = 2;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f
          ## NOTE: second character of |unicode| in |escape|.
-         $char = $self->{c} - 0x0061 - 0xA;
+         $char = $self->{c} - 0x0061 + 0xA;
          $self->{state} = ESCAPE_STATE; $i = 2;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } elsif ($self->{c} == 0x000A or # \n
                 $self->{c} == 0x000C) { # \f
          if ($q == 0) {
-           ## NOTE: In |escape| in ... in |ident|.
+           #
-           $self->{state} = BEFORE_TOKEN_STATE;
-           unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
-           return $current_token;
-           # reconsume
-           #redo A;
          } elsif ($q == 1) {
            ## NOTE: In |escape| in |URI|.
-           $current_token->{type} = {
+           $self->{t}->{type} = {
                URI_TOKEN, URI_INVALID_TOKEN,
                URI_INVALID_TOKEN, URI_INVALID_TOKEN,
                URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
                URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
-           }->{$current_token->{type}};
+           }->{$self->{t}->{type}};
-           $current_token->{value} .= chr $self->{c};
+           $self->{t}->{value} .= chr $self->{c};
            $self->{state} = URI_UNQUOTED_STATE;
-           $self->{c} = $self->{get_char}->();
+           $self->{c} = $self->{get_char}->($self);
            redo A;
          } else {
            ## Note: In |nl| in ... in |string| or |ident|.
-           $current_token->{value} .= chr $self->{c};
            $self->{state} = STRING_STATE;
-           $self->{c} = $self->{get_char}->();
+           $self->{c} = $self->{get_char}->($self);
            redo A;
          }
        } elsif ($self->{c} == 0x000D) { # \r
          if ($q == 0) {
-           ## NOTE: In |escape| in ... in |ident|.
+           #
-           $self->{state} = BEFORE_TOKEN_STATE;
-           unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
-           return $current_token;
-           # reconsume
-           #redo A;
          } elsif ($q == 1) {
-           $current_token->{type} = {
+           ## NOTE: In |escape| in |URI|.
+           $self->{t}->{type} = {
                URI_TOKEN, URI_INVALID_TOKEN,
                URI_INVALID_TOKEN, URI_INVALID_TOKEN,
                URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
                URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
-           }->{$current_token->{type}};
+           }->{$self->{t}->{type}};
-           $current_token->{value} .= "\x0D\x0A";
+           $self->{state} = ESCAPE_BEFORE_LF_STATE;
-           $self->{state} = URI_UNQUOTED_STATE;
+           $self->{c} = $self->{get_char}->($self);
-           $self->{c} = $self->{get_char}->();
            redo A;
          } else {
            ## Note: In |nl| in ... in |string| or |ident|.
-           $current_token->{value} .= "\x0D\x0A";
            $self->{state} = ESCAPE_BEFORE_LF_STATE;
-           $self->{c} = $self->{get_char}->();
+           $self->{c} = $self->{get_char}->($self);
            redo A;
          }
+       } elsif ($self->{c} == -1) {
+         #
        } else {
          ## NOTE: second character of |escape|.
-         $current_token->{value} .= chr $self->{c};
+         $self->{t}->{value} .= chr $self->{c};
          $self->{state} = $q == 0 ? NAME_STATE :
              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        }
+       if ($q == 0) {
+         if ($self->{t}->{type} == DIMENSION_TOKEN) {
+           if ($self->{t}->{hyphen} and $self->{t}->{value} eq '-') {
+             $self->{state} = BEFORE_TOKEN_STATE;
+             # reprocess
+             unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\',
+                                         line => $self->{line},
+                                         column => $self->{column} - 2};
+             unshift @{$self->{token}}, {type => MINUS_TOKEN,
+                                         line => $self->{line},
+                                         column => $self->{column} - 1};
+             ## BUG: line and column might be wrong if they are on the
+             ## line boundary.
+             $self->{t}->{type} = NUMBER_TOKEN;
+             $self->{t}->{value} = '';
+             return $self->{t};
+             #redo A;
+           } elsif (length $self->{t}->{value}) {
+             $self->{state} = BEFORE_TOKEN_STATE;
+             # reprocess
+             unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\',
+                                         line => $self->{line},
+                                         column => $self->{column} - 1};
+             ## BUG: line and column might be wrong if they are on the
+             ## line boundary.
+             return $self->{t};
+             #redo A;
+           } else {
+             $self->{state} = BEFORE_TOKEN_STATE;
+             # reprocess
+             unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\',
+                                         line => $self->{line},
+                                         column => $self->{column} - 1};
+             ## BUG: line and column might be wrong if they are on the
+             ## line boundary.
+             $self->{t}->{type} = NUMBER_TOKEN;
+             $self->{t}->{value} = '';
+             return $self->{t};
+             #redo A;
+           }
+         } else {
+           if ($self->{t}->{hyphen} and $self->{t}->{value} eq '-') {
+             $self->{state} = BEFORE_TOKEN_STATE;
+             # reprocess
+             unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\',
+                                         line => $self->{line},
+                                         column => $self->{column} - 2};
+             return {type => MINUS_TOKEN,
+                     line => $self->{line},
+                     column => $self->{column} - 1};
+             ## BUG: line and column might be wrong if they are on the
+             ## line boundary.
+             #redo A;
+           } elsif (length $self->{t}->{value}) {
+             $self->{state} = BEFORE_TOKEN_STATE;
+             # reprocess
+             unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\',
+                                         line => $self->{line},
+                                         column => $self->{column} - 1};
+             ## BUG: line and column might be wrong if they are on the
+             ## line boundary.
+             return $self->{t};
+             #redo A;
+           } else {
+             $self->{state} = BEFORE_TOKEN_STATE;
+             # reprocess
+             return {type => DELIM_TOKEN, value => '\\',
+                     line => $self->{line},
+                     column => $self->{column} - 1};
+             ## BUG: line and column might be wrong if they are on the
+             ## line boundary.
+             #redo A;
+           }
+         }
+       } elsif ($q == 1) {
+         $self->{state} = URI_UNQUOTED_STATE;
+         $self->{c} = $self->{get_char}->($self);
+         redo A;
+       } else {
+         unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\',
+                                     line => $self->{line},
+                                     column => $self->{column} - 1};
+         ## BUG: line and column might be wrong if they are on the
+         ## line boundary.
+         $self->{t}->{type} = {
+           STRING_TOKEN, INVALID_TOKEN,
+           URI_TOKEN, URI_INVALID_TOKEN,
+           URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
+         }->{$self->{t}->{type}} || $self->{t}->{type};
+         $self->{state} = BEFORE_TOKEN_STATE;
+         # reprocess
+         return $self->{t};
+         #redo A;
+       }
      } elsif ($self->{state} == ESCAPE_STATE) {
        ## NOTE: third..seventh character of |unicode| in |escape|.
        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9
          $char = $char * 0x10 + $self->{c} - 0x0030;
          $self->{state} = ++$i == 7 ? ESCAPE_BEFORE_NL_STATE : ESCAPE_STATE;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } elsif (0x0041 <= $self->{c} and $self->{c} <= 0x0046) { # A..F
          $char = $char * 0x10 + $self->{c} - 0x0041 + 0xA;
          $self->{state} = ++$i == 7 ? ESCAPE_BEFORE_NL_STATE : ESCAPE_STATE;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f
-         $char = $char * 0x10 + $self->{c} - 0x0061 - 0xA;
+         $char = $char * 0x10 + $self->{c} - 0x0061 + 0xA;
          $self->{state} = ++$i == 7 ? ESCAPE_BEFORE_NL_STATE : ESCAPE_STATE;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } elsif ($self->{c} == 0x0020 or # SP
                 $self->{c} == 0x000A or # \n
                 $self->{c} == 0x0009 or # \t
                 $self->{c} == 0x000C) { # \f
-         $current_token->{value} .= chr $char;
+         $self->{t}->{value} .= chr $char;
          $self->{state} = $q == 0 ? NAME_STATE :
              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } elsif ($self->{c} == 0x000D) { # \r
          $self->{state} = ESCAPE_BEFORE_LF_STATE;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } else {
-         $current_token->{value} .= chr $char;
+         $self->{t}->{value} .= chr $char;
          $self->{state} = $q == 0 ? NAME_STATE :
              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
          # reconsume
-Line 806 
 sub get_next_token ($) {
+Line 1075 
 sub get_next_token ($) {
            $self->{c} == 0x000A or # \n
            $self->{c} == 0x0009 or # \t
            $self->{c} == 0x000C) { # \f
-         $current_token->{value} .= chr $char;
+         $self->{t}->{value} .= chr $char;
          $self->{state} = $q == 0 ? NAME_STATE :
              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } elsif ($self->{c} == 0x000D) { # \r
          $self->{state} = ESCAPE_BEFORE_NL_STATE;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } else {
-         $current_token->{value} .= chr $char;
+         $self->{t}->{value} .= chr $char;
          $self->{state} = $q == 0 ? NAME_STATE :
              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
          # reconsume
          redo A;
        }
      } elsif ($self->{state} == ESCAPE_BEFORE_LF_STATE) {
-       ## NOTE: |\n| in |\r\n| in |unicode| in |escape|.
+       ## NOTE: |\n| in |\r\n| in |nl| in |escape|.
        if ($self->{c} == 0x000A) { # \n
-         $current_token->{value} .= chr $char;
          $self->{state} = $q == 0 ? NAME_STATE :
              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } else {
-         $current_token->{value} .= chr $char;
          $self->{state} = $q == 0 ? NAME_STATE :
              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
-         # reconsume
+         # reprocess
          redo A;
        }
      } elsif ($self->{state} == STRING_STATE) {
-Line 844 
 sub get_next_token ($) {
+Line 1111 
 sub get_next_token ($) {
        ## Or, in |URI|.
        if ($self->{c} == 0x005C) { # \
          $self->{state} = ESCAPE_OPEN_STATE;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } elsif ($self->{c} == $q) { # " | '
-         if ($current_token->{type} == STRING_TOKEN) {
+         if ($self->{t}->{type} == STRING_TOKEN) {
            $self->{state} = BEFORE_TOKEN_STATE;
-           $self->{c} = $self->{get_char}->();
+           $self->{c} = $self->{get_char}->($self);
-           return $current_token;
+           return $self->{t};
            #redo A;
          } else {
            $self->{state} = URI_AFTER_WSP_STATE;
-           $self->{c} = $self->{get_char}->();
+           $self->{c} = $self->{get_char}->($self);
            redo A;
          }
        } elsif ($self->{c} == 0x000A or # \n
                 $self->{c} == 0x000D or # \r
                 $self->{c} == 0x000C or # \f
                 $self->{c} == -1) {
-         $current_token->{type} = INVALID_TOKEN;
+         $self->{t}->{type} = {
+           STRING_TOKEN, INVALID_TOKEN,
+           INVALID_TOKEN, INVALID_TOKEN,
+           URI_TOKEN, URI_INVALID_TOKEN,
+           URI_INVALID_TOKEN, URI_INVALID_TOKEN,
+           URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
+           URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
+         }->{$self->{t}->{type}};
          $self->{state} = BEFORE_TOKEN_STATE;
          # reconsume
-         return $current_token;
+         return $self->{t};
          #redo A;
        } else {
-         $current_token->{value} .= chr $self->{c};
+         $self->{t}->{value} .= chr $self->{c};
          # stay in the state
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        }
      } elsif ($self->{state} == NUMBER_STATE) {
        ## NOTE: 2nd, 3rd, or ... character in |num| before |.|.
        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
-         $current_token->{value} .= chr $self->{c};
+         $self->{t}->{value} .= chr $self->{c};
          # stay in the state
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } elsif ($self->{c} == 0x002E) { # .
          $self->{state} = NUMBER_DOT_STATE;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } else {
-         $current_token->{number} = $current_token->{value};
+         $self->{t}->{number} = $self->{t}->{value};
-         $current_token->{value} = '';
+         $self->{t}->{value} = '';
          $self->{state} = AFTER_NUMBER_STATE;
          # reprocess
          redo A;
-Line 893 
 sub get_next_token ($) {
+Line 1167 
 sub get_next_token ($) {
      } elsif ($self->{state} == NUMBER_DOT_STATE) {
        ## NOTE: The character immediately following |.| in |num|.
        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
-         $current_token->{value} .= '.' . chr $self->{c};
+         $self->{t}->{value} .= '.' . chr $self->{c};
          $self->{state} = NUMBER_DOT_NUMBER_STATE;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } else {
-         unshift @{$self->{token}}, {type => DELIM_STATE, value => '.'};
+         unshift @{$self->{token}}, {type => DOT_TOKEN};
-         $current_token->{number} = $current_token->{value};
+         $self->{t}->{number} = $self->{t}->{value};
-         $current_token->{value} = '';
+         $self->{t}->{value} = '';
          $self->{state} = BEFORE_TOKEN_STATE;
          # reprocess
-         return $current_token;
+         return $self->{t};
          #redo A;
        }
      } elsif ($self->{state} == NUMBER_FRACTION_STATE) {
        ## NOTE: The character immediately following |.| at the beginning of |num|.
        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
-         $current_token->{value} .= '.' . chr $self->{c};
+         $self->{t}->{value} .= '.' . chr $self->{c};
          $self->{state} = NUMBER_DOT_NUMBER_STATE;
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } else {
          $self->{state} = BEFORE_TOKEN_STATE;
-         $self->{c} = $self->{get_char}->();
+         # reprocess
-         return {type => DELIM_TOKEN, value => '.'};
+         return {type => DOT_TOKEN,
+                 line => $self->{line}, column => $self->{column} - 1};
+         ## BUG: line and column might be wrong if they are on the
+         ## line boundary.
          #redo A;
        }
      } elsif ($self->{state} == NUMBER_DOT_NUMBER_STATE) {
        ## NOTE: |[0-9]| in |num| after |.|.
        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
-         $current_token->{value} .= chr $self->{c};
+         $self->{t}->{value} .= chr $self->{c};
          # stay in the state
-         $self->{c} = $self->{get_char}->();
+         $self->{c} = $self->{get_char}->($self);
          redo A;
        } else {
-         $current_token->{number} = $current_token->{value};
+         $self->{t}->{number} = $self->{t}->{value};
-         $current_token->{value} = '';
+         $self->{t}->{value} = '';
          $self->{state} = AFTER_NUMBER_STATE;
          # reprocess
          redo A;
-Line 937 
 sub get_next_token ($) {
+Line 1214 
 sub get_next_token ($) {
        die "$0: Unknown state |$self->{state}|";
      }
    } # A
+ } # get_next_token
-   ## TODO: |URI|, |UNICODE-RANGE|, |COMMENT|
+ sub serialize_token ($$) {
+   shift;
+   my $t = shift;
- } # get_next_token
+   ## NOTE: This function is not intended for roundtrip-able serialization.
+   if ($t->{type} == IDENT_TOKEN) {
+     return $t->{value};
+   } elsif ($t->{type} == ATKEYWORD_TOKEN) {
+     return '@' . $t->{value};
+   } elsif ($t->{type} == HASH_TOKEN) {
+     return '#' . $t->{value};
+   } elsif ($t->{type} == FUNCTION_TOKEN) {
+     return $t->{value} . '(';
+   } elsif ($t->{type} == URI_TOKEN) {
+     return 'url(' . $t->{value} . ')';
+   } elsif ($t->{type} == URI_INVALID_TOKEN) {
+     return 'url(' . $t->{value};
+   } elsif ($t->{type} == URI_PREFIX_TOKEN) {
+     return 'url-prefix(' . $t->{value} . ')';
+   } elsif ($t->{type} == URI_PREFIX_INVALID_TOKEN) {
+     return 'url-prefix(' . $t->{value};
+   } elsif ($t->{type} == STRING_TOKEN) {
+     return '"' . $t->{value} . '"';
+   } elsif ($t->{type} == INVALID_TOKEN) {
+     return '"' . $t->{value};
+   } elsif ($t->{type} == NUMBER_TOKEN) {
+     return $t->{number};
+   } elsif ($t->{type} == DIMENSION_TOKEN) {
+     return $t->{number} . $t->{value};
+   } elsif ($t->{type} == PERCENTAGE_TOKEN) {
+     return $t->{number} . '%';
+   } elsif ($t->{type} == UNICODE_RANGE_TOKEN) {
+     return 'U+' . $t->{value};
+   } elsif ($t->{type} == DELIM_TOKEN) {
+     return $t->{value};
+   } elsif ($t->{type} == PLUS_TOKEN) {
+     return '+';
+   } elsif ($t->{type} == GREATER_TOKEN) {
+     return '>';
+   } elsif ($t->{type} == COMMA_TOKEN) {
+     return ',';
+   } elsif ($t->{type} == TILDE_TOKEN) {
+     return '~';
+   } elsif ($t->{type} == DASHMATCH_TOKEN) {
+     return '|=';
+   } elsif ($t->{type} == PREFIXMATCH_TOKEN) {
+     return '^=';
+   } elsif ($t->{type} == SUFFIXMATCH_TOKEN) {
+     return '$=';
+   } elsif ($t->{type} == SUBSTRINGMATCH_TOKEN) {
+     return '*=';
+   } elsif ($t->{type} == INCLUDES_TOKEN) {
+     return '~=';
+   } elsif ($t->{type} == SEMICOLON_TOKEN) {
+     return ';';
+   } elsif ($t->{type} == LBRACE_TOKEN) {
+     return '{';
+   } elsif ($t->{type} == RBRACE_TOKEN) {
+     return '}';
+   } elsif ($t->{type} == LPAREN_TOKEN) {
+     return '(';
+   } elsif ($t->{type} == RPAREN_TOKEN) {
+     return ')';
+   } elsif ($t->{type} == LBRACKET_TOKEN) {
+     return '[';
+   } elsif ($t->{type} == RBRACKET_TOKEN) {
+     return ']';
+   } elsif ($t->{type} == S_TOKEN) {
+     return ' ';
+   } elsif ($t->{type} == CDO_TOKEN) {
+     return '<!--';
+   } elsif ($t->{type} == CDC_TOKEN) {
+     return '-->';
+   } elsif ($t->{type} == COMMENT_TOKEN) {
+     return '/**/';
+   } elsif ($t->{type} == COMMENT_INVALID_TOKEN) {
+     return '/*';
+   } elsif ($t->{type} == EOF_TOKEN) {
+     return '{EOF}';
+   } elsif ($t->{type} == MINUS_TOKEN) {
+     return '-';
+   } elsif ($t->{type} == STAR_TOKEN) {
+     return '*';
+   } elsif ($t->{type} == VBAR_TOKEN) {
+     return '|';
+   } elsif ($t->{type} == COLON_TOKEN) {
+     return ':';
+   } elsif ($t->{type} == MATCH_TOKEN) {
+     return '=';
+   } elsif ($t->{type} == EXCLAMATION_TOKEN) {
+     return '!';
+   } else {
+     return '{'.$t->{type}.'}';
+   }
+ } # serialize_token
+ =head1 LICENSE
+ Copyright 2007 Wakaba <w@suika.fam.cx>
+ This library is free software; you can redistribute it
+ and/or modify it under the same terms as Perl itself.
+ =cut
 ;
  # $Date$

 Legend:



Removed from v.1.4
 


changed lines


 
Added in v.1.19
 Legend:



Removed from v.1.4
 


changed lines


 
Added in v.1.19
-Removed from v.1.4
+Added in v.1.19

admin@suikawiki.org	ViewVC Help
Powered by ViewVC 1.1.24