/[suikacvs]/markup/html/whatpm/Whatpm/CSS/Tokenizer.pm
Suika

Diff of /markup/html/whatpm/Whatpm/CSS/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.6 by wakaba, Sat Sep 8 05:57:05 2007 UTC revision 1.11 by wakaba, Sat Sep 8 15:20:41 2007 UTC
# Line 102  sub get_next_token ($) { Line 102  sub get_next_token ($) {
102      if ($self->{state} == BEFORE_TOKEN_STATE) {      if ($self->{state} == BEFORE_TOKEN_STATE) {
103        if ($self->{c} == 0x002D) { # -        if ($self->{c} == 0x002D) { # -
104          ## NOTE: |-| in |ident| in |IDENT|          ## NOTE: |-| in |ident| in |IDENT|
105          $self->{t} = {type => IDENT_TOKEN, value => '-'};          $self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1};
106          $self->{state} = BEFORE_NMSTART_STATE;          $self->{state} = BEFORE_NMSTART_STATE;
107          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
108          redo A;          redo A;
# Line 251  sub get_next_token ($) { Line 251  sub get_next_token ($) {
251          } else {          } else {
252            # stay in the state.            # stay in the state.
253            # reprocess            # reprocess
254            return {type => DELIM_STATE, value => '/'};            return {type => DELIM_TOKEN, value => '/'};
255            #redo A;            #redo A;
256          }                  }        
257        } elsif ($self->{c} == 0x003C) { # <        } elsif ($self->{c} == 0x003C) { # <
# Line 259  sub get_next_token ($) { Line 259  sub get_next_token ($) {
259          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
260          if ($self->{c} == 0x0021) { # !          if ($self->{c} == 0x0021) { # !
261            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
262            if ($self->{c} == 0x002C) { # -            if ($self->{c} == 0x002D) { # -
263              $self->{c} = $self->{get_char}->();              $self->{c} = $self->{get_char}->();
264              if ($self->{c} == 0x002C) { # -              if ($self->{c} == 0x002D) { # -
265                $self->{state} = BEFORE_TOKEN_STATE;                $self->{state} = BEFORE_TOKEN_STATE;
266                $self->{c} = $self->{get_char}->();                $self->{c} = $self->{get_char}->();
267                return {type => CDO_TOKEN};                return {type => CDO_TOKEN};
# Line 408  sub get_next_token ($) { Line 408  sub get_next_token ($) {
408          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
409          redo A;          redo A;
410        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
 ## TODO: 12-\X, 12-\{nl}  
411          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
412          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
413          redo A;          redo A;
414        } elsif ($self->{c} == 0x002D and # -        } elsif ($self->{c} == 0x002D) { # -
415                 $self->{t}->{type} == IDENT_TOKEN) {          if ($self->{t}->{type} == IDENT_TOKEN) {
         $self->{c} = $self->{get_char}->();  
         if ($self->{c} == 0x003E) { # >  
           $self->{state} = BEFORE_TOKEN_STATE;  
416            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
417            return {type => CDC_TOKEN};            if ($self->{c} == 0x003E) { # >
418            #redo A;              $self->{state} = BEFORE_TOKEN_STATE;
419                $self->{c} = $self->{get_char}->();
420                return {type => CDC_TOKEN};
421                #redo A;
422              } else {
423                ## NOTE: |-|, |-|, $self->{c}
424                #$self->{t} = {type => IDENT_TOKEN, value => '-'};
425                # stay in the state
426                # reconsume
427                return {type => DELIM_TOKEN, value => '-'};
428                #redo A;
429              }
430            } elsif ($self->{t}->{type} == DIMENSION_TOKEN) {
431              $self->{c} = $self->{get_char}->();
432              if ($self->{c} == 0x003E) { # >
433                unshift @{$self->{token}}, {type => CDC_TOKEN};
434                $self->{t}->{type} = NUMBER_TOKEN;
435                $self->{t}->{value} = '';
436                $self->{state} = BEFORE_TOKEN_STATE;
437                $self->{c} = $self->{get_char}->();
438                return $self->{t};
439                #redo A;
440              } else {
441                ## NOTE: |-|, |-|, $self->{c}
442                my $t = $self->{t};
443                $t->{type} = NUMBER_TOKEN;
444                $t->{value} = '';
445                $self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1};
446                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};
447                # stay in the state
448                # reconsume
449                return $t;
450                #redo A;
451              }
452          } else {          } else {
453            ## NOTE: |-|, |-|, $self->{c}            #
           #$self->{t} = {type => IDENT_TOKEN, value => '-'};  
           # stay in the state  
           # reconsume  
           return {type => DELIM_TOKEN, value => '-'};  
           #redo A;  
454          }          }
455        } else {        } else {
456          if ($self->{t}->{type} == NUMBER_TOKEN) {          #
457            ## NOTE: |-| after |NUMBER|.        }
458            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};        
459            $self->{state} = BEFORE_TOKEN_STATE;        if ($self->{t}->{type} == DIMENSION_TOKEN) {
460            # reconsume          ## NOTE: |-| after |NUMBER|.
461            $self->{t}->{value} = $self->{t}->{number};          unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};
462            delete $self->{t}->{number};          $self->{state} = BEFORE_TOKEN_STATE;
463            return $self->{t};          # reprocess
464          } else {          $self->{t}->{type} = NUMBER_TOKEN;
465            ## NOTE: |-| not followed by |nmstart|.          $self->{t}->{value} = '';
466            $self->{state} = BEFORE_TOKEN_STATE;          return $self->{t};
467            $self->{c} = $self->{get_char}->();        } else {
468            return {type => DELIM_TOKEN, value => '-'};          ## NOTE: |-| not followed by |nmstart|.
469          }          $self->{state} = BEFORE_TOKEN_STATE;
470            # reprocess
471            return {type => DELIM_TOKEN, value => '-'};
472        }        }
473      } elsif ($self->{state} == AFTER_AT_STATE) {      } elsif ($self->{state} == AFTER_AT_STATE) {
474        if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z        if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z
# Line 506  sub get_next_token ($) { Line 532  sub get_next_token ($) {
532      } elsif ($self->{state} == AFTER_NUMBER_STATE) {      } elsif ($self->{state} == AFTER_NUMBER_STATE) {
533        if ($self->{c} == 0x002D) { # -        if ($self->{c} == 0x002D) { # -
534          ## NOTE: |-| in |ident|.          ## NOTE: |-| in |ident|.
535            $self->{t}->{hyphen} = 1;
536          $self->{t}->{value} = '-';          $self->{t}->{value} = '-';
537            $self->{t}->{type} = DIMENSION_TOKEN;
538          $self->{state} = BEFORE_NMSTART_STATE;          $self->{state} = BEFORE_NMSTART_STATE;
539          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
540          redo A;          redo A;
# Line 523  sub get_next_token ($) { Line 551  sub get_next_token ($) {
551        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
552          ## NOTE: |nmstart| in |ident| in |IDENT|          ## NOTE: |nmstart| in |ident| in |IDENT|
553          $self->{t}->{value} = '';          $self->{t}->{value} = '';
554            $self->{t}->{type} = DIMENSION_TOKEN;
555          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
556          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
557          redo A;          redo A;
# Line 556  sub get_next_token ($) { Line 585  sub get_next_token ($) {
585          redo A;          redo A;
586        } else {        } else {
587          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
588          $self->{c} = $self->{get_char}->();          # reprocess
589          return {type => DELIM_TOKEN, value => '#'};          return {type => DELIM_TOKEN, value => '#'};
590          #redo A;          #redo A;
591        }        }
# Line 765  sub get_next_token ($) { Line 794  sub get_next_token ($) {
794          redo A;          redo A;
795        } elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f        } elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f
796          ## NOTE: second character of |unicode| in |escape|.          ## NOTE: second character of |unicode| in |escape|.
797          $char = $self->{c} - 0x0061 - 0xA;          $char = $self->{c} - 0x0061 + 0xA;
798          $self->{state} = ESCAPE_STATE; $i = 2;          $self->{state} = ESCAPE_STATE; $i = 2;
799          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
800          redo A;          redo A;
801        } elsif ($self->{c} == 0x000A or # \n        } elsif ($self->{c} == 0x000A or # \n
802                 $self->{c} == 0x000C) { # \f                 $self->{c} == 0x000C) { # \f
803          if ($q == 0) {          if ($q == 0) {
804            ## NOTE: In |escape| in ... in |ident|.            #
           $self->{state} = BEFORE_TOKEN_STATE;  
           unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};  
           return $self->{t};  
           # reconsume  
           #redo A;  
805          } elsif ($q == 1) {          } elsif ($q == 1) {
806            ## NOTE: In |escape| in |URI|.            ## NOTE: In |escape| in |URI|.
807            $self->{t}->{type} = {            $self->{t}->{type} = {
# Line 799  sub get_next_token ($) { Line 823  sub get_next_token ($) {
823          }          }
824        } elsif ($self->{c} == 0x000D) { # \r        } elsif ($self->{c} == 0x000D) { # \r
825          if ($q == 0) {          if ($q == 0) {
826            ## NOTE: In |escape| in ... in |ident|.            #
           $self->{state} = BEFORE_TOKEN_STATE;  
           unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};  
           return $self->{t};  
           # reconsume  
           #redo A;  
827          } elsif ($q == 1) {          } elsif ($q == 1) {
828              ## NOTE: In |escape| in |URI|.
829            $self->{t}->{type} = {            $self->{t}->{type} = {
830                URI_TOKEN, URI_INVALID_TOKEN,                URI_TOKEN, URI_INVALID_TOKEN,
831                URI_INVALID_TOKEN, URI_INVALID_TOKEN,                URI_INVALID_TOKEN, URI_INVALID_TOKEN,
832                URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,                URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
833                URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,                URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
834            }->{$self->{t}->{type}};            }->{$self->{t}->{type}};
835            $self->{t}->{value} .= "\x0D\x0A";            $self->{t}->{value} .= "\x0D";
836            $self->{state} = URI_UNQUOTED_STATE;            $self->{state} = ESCAPE_BEFORE_LF_STATE;
837            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
838            redo A;            redo A;
839          } else {          } else {
840            ## Note: In |nl| in ... in |string| or |ident|.            ## Note: In |nl| in ... in |string| or |ident|.
841            $self->{t}->{value} .= "\x0D\x0A";            $self->{t}->{value} .= "\x0D";
842            $self->{state} = ESCAPE_BEFORE_LF_STATE;            $self->{state} = ESCAPE_BEFORE_LF_STATE;
843            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
844            redo A;            redo A;
845          }          }
846          } elsif ($self->{c} == -1) {
847            #
848        } else {        } else {
849          ## NOTE: second character of |escape|.          ## NOTE: second character of |escape|.
850          $self->{t}->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
# Line 831  sub get_next_token ($) { Line 853  sub get_next_token ($) {
853          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
854          redo A;          redo A;
855        }        }
856    
857          if ($q == 0) {
858            if ($self->{t}->{type} == DIMENSION_TOKEN) {
859              if ($self->{t}->{hyphen} and $self->{t}->{value} eq '-') {
860                $self->{state} = BEFORE_TOKEN_STATE;
861                # reprocess
862                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
863                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};
864                $self->{t}->{type} = NUMBER_TOKEN;
865                $self->{t}->{value} = '';
866                return $self->{t};
867                #redo A;
868              } elsif (length $self->{t}->{value}) {
869                $self->{state} = BEFORE_TOKEN_STATE;
870                # reprocess
871                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
872                return $self->{t};
873                #redo A;
874              } else {
875                $self->{state} = BEFORE_TOKEN_STATE;
876                # reprocess
877                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
878                $self->{t}->{type} = NUMBER_TOKEN;
879                $self->{t}->{value} = '';
880                return $self->{t};
881                #redo A;
882              }
883            } else {
884              if ($self->{t}->{hyphen} and $self->{t}->{value} eq '-') {
885                $self->{state} = BEFORE_TOKEN_STATE;
886                # reprocess
887                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
888                return {type => DELIM_TOKEN, value => '-'};
889                #redo A;
890              } elsif (length $self->{t}->{value}) {
891                $self->{state} = BEFORE_TOKEN_STATE;
892                # reprocess
893                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
894                return $self->{t};
895                #redo A;
896              } else {
897                $self->{state} = BEFORE_TOKEN_STATE;
898                # reprocess
899                return {type => DELIM_TOKEN, value => '\\'};
900                #redo A;
901              }
902            }
903          } elsif ($q == 1) {
904            $self->{state} = URI_UNQUOTED_STATE;
905            $self->{c} = $self->{get_char}->();
906            redo A;
907          } else {
908            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
909            $self->{t}->{type} = {
910              STRING_TOKEN, INVALID_TOKEN,
911              URI_TOKEN, URI_INVALID_TOKEN,
912              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
913            }->{$self->{t}->{type}} || $self->{t}->{type};
914            $self->{state} = BEFORE_TOKEN_STATE;
915            # reprocess
916            return $self->{t};
917            #redo A;
918          }
919      } elsif ($self->{state} == ESCAPE_STATE) {      } elsif ($self->{state} == ESCAPE_STATE) {
920        ## NOTE: third..seventh character of |unicode| in |escape|.        ## NOTE: third..seventh character of |unicode| in |escape|.
921        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9
# Line 844  sub get_next_token ($) { Line 929  sub get_next_token ($) {
929          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
930          redo A;          redo A;
931        } elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f        } elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f
932          $char = $char * 0x10 + $self->{c} - 0x0061 - 0xA;          $char = $char * 0x10 + $self->{c} - 0x0061 + 0xA;
933          $self->{state} = ++$i == 7 ? ESCAPE_BEFORE_NL_STATE : ESCAPE_STATE;          $self->{state} = ++$i == 7 ? ESCAPE_BEFORE_NL_STATE : ESCAPE_STATE;
934          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
935          redo A;          redo A;
# Line 893  sub get_next_token ($) { Line 978  sub get_next_token ($) {
978      } elsif ($self->{state} == ESCAPE_BEFORE_LF_STATE) {      } elsif ($self->{state} == ESCAPE_BEFORE_LF_STATE) {
979        ## NOTE: |\n| in |\r\n| in |unicode| in |escape|.        ## NOTE: |\n| in |\r\n| in |unicode| in |escape|.
980        if ($self->{c} == 0x000A) { # \n        if ($self->{c} == 0x000A) { # \n
981          $self->{t}->{value} .= chr $char;          $self->{t}->{value} .= chr $self->{c};
982          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
983              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
984          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
985          redo A;          redo A;
986        } else {        } else {
         $self->{t}->{value} .= chr $char;  
987          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
988              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
989          # reconsume          # reprocess
990          redo A;          redo A;
991        }        }
992      } elsif ($self->{state} == STRING_STATE) {      } elsif ($self->{state} == STRING_STATE) {
# Line 929  sub get_next_token ($) { Line 1013  sub get_next_token ($) {
1013                 $self->{c} == 0x000D or # \r                 $self->{c} == 0x000D or # \r
1014                 $self->{c} == 0x000C or # \f                 $self->{c} == 0x000C or # \f
1015                 $self->{c} == -1) {                 $self->{c} == -1) {
1016          $self->{t}->{type} = INVALID_TOKEN;          $self->{t}->{type} = {
1017              STRING_TOKEN, INVALID_TOKEN,
1018              INVALID_TOKEN, INVALID_TOKEN,
1019              URI_TOKEN, URI_INVALID_TOKEN,
1020              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
1021              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
1022              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
1023            }->{$self->{t}->{type}};
1024          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
1025          # reconsume          # reconsume
1026          return $self->{t};          return $self->{t};
# Line 966  sub get_next_token ($) { Line 1057  sub get_next_token ($) {
1057          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
1058          redo A;          redo A;
1059        } else {        } else {
1060          unshift @{$self->{token}}, {type => DELIM_STATE, value => '.'};          unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '.'};
1061          $self->{t}->{number} = $self->{t}->{value};          $self->{t}->{number} = $self->{t}->{value};
1062          $self->{t}->{value} = '';          $self->{t}->{value} = '';
1063          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
# Line 983  sub get_next_token ($) { Line 1074  sub get_next_token ($) {
1074          redo A;          redo A;
1075        } else {        } else {
1076          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
1077          $self->{c} = $self->{get_char}->();          # reprocess
1078          return {type => DELIM_TOKEN, value => '.'};          return {type => DELIM_TOKEN, value => '.'};
1079          #redo A;          #redo A;
1080        }        }

Legend:
Removed from v.1.6  
changed lines
  Added in v.1.11

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24