/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm.src
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm.src

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.18 by wakaba, Sun Oct 19 06:14:57 2008 UTC revision 1.19 by wakaba, Sun Oct 19 07:19:00 2008 UTC
# Line 183  sub AFTER_NDATA_STATE () { 87 } Line 183  sub AFTER_NDATA_STATE () { 87 }
183  sub BEFORE_NOTATION_NAME_STATE () { 88 }  sub BEFORE_NOTATION_NAME_STATE () { 88 }
184  sub NOTATION_NAME_STATE () { 89 }  sub NOTATION_NAME_STATE () { 89 }
185  sub AFTER_NOTATION_NAME_STATE () { 90 }  sub AFTER_NOTATION_NAME_STATE () { 90 }
186  sub BOGUS_MD_STATE () { 91 }  sub DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE () { 91 }
187    sub DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE () { 92 }
188    sub ENTITY_VALUE_ENTITY_STATE () { 93 }
189    sub BOGUS_MD_STATE () { 94 }
190    
191  ## Tree constructor state constants (see Whatpm::HTML for the full  ## Tree constructor state constants (see Whatpm::HTML for the full
192  ## list and descriptions)  ## list and descriptions)
# Line 2262  sub _get_next_token ($) { Line 2265  sub _get_next_token ($) {
2265          $self->{kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
2266          !!!next-input-character;          !!!next-input-character;
2267          redo A;          redo A;
2268  ## TODO: " and ' for ENTITY        } elsif ($self->{nc} == 0x0022 and # "
2269                   ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN or
2270                    $self->{ct}->{type} == PARAMETER_ENTITY_TOKEN)) {
2271            !!!cp (167.21);
2272            $self->{state} = DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE;
2273            $self->{ct}->{value} = ''; # ENTITY
2274            !!!next-input-character;
2275            redo A;
2276          } elsif ($self->{nc} == 0x0027 and # '
2277                   ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN or
2278                    $self->{ct}->{type} == PARAMETER_ENTITY_TOKEN)) {
2279            !!!cp (167.22);
2280            $self->{state} = DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE;
2281            $self->{ct}->{value} = ''; # ENTITY
2282            !!!next-input-character;
2283            redo A;
2284        } elsif ($self->{is_xml} and        } elsif ($self->{is_xml} and
2285                 $self->{ct}->{type} == DOCTYPE_TOKEN and                 $self->{ct}->{type} == DOCTYPE_TOKEN and
2286                 $self->{nc} == 0x005B) { # [                 $self->{nc} == 0x005B) { # [
# Line 4669  sub _get_next_token ($) { Line 4687  sub _get_next_token ($) {
4687          !!!next-input-character;          !!!next-input-character;
4688          redo A;          redo A;
4689        }        }
4690        } elsif ($self->{state} == DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE) {
4691          if ($self->{nc} == 0x0022) { # "
4692            $self->{state} = AFTER_NOTATION_NAME_STATE;
4693            !!!next-input-character;
4694            redo A;
4695          } elsif ($self->{nc} == 0x0026) { # &
4696            $self->{prev_state} = $self->{state};
4697            $self->{state} = ENTITY_VALUE_ENTITY_STATE;
4698            $self->{entity_add} = 0x0022; # "
4699            !!!next-input-character;
4700            redo A;
4701    ## TODO: %
4702          } elsif ($self->{nc} == -1) {
4703            !!!parse-error (type => 'unclosed entity value'); ## TODO: type
4704            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4705            ## Reconsume.
4706            !!!emit ($self->{ct}); # ENTITY
4707            redo A;
4708          } else {
4709            $self->{ct}->{value} .= chr $self->{nc}; # ENTITY
4710            !!!next-input-character;
4711            redo A;
4712          }
4713        } elsif ($self->{state} == DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE) {
4714          if ($self->{nc} == 0x0027) { # '
4715            $self->{state} = AFTER_NOTATION_NAME_STATE;
4716            !!!next-input-character;
4717            redo A;
4718          } elsif ($self->{nc} == 0x0026) { # &
4719            $self->{prev_state} = $self->{state};
4720            $self->{state} = ENTITY_VALUE_ENTITY_STATE;
4721            $self->{entity_add} = 0x0027; # '
4722            !!!next-input-character;
4723            redo A;
4724    ## TODO: %
4725          } elsif ($self->{nc} == -1) {
4726            !!!parse-error (type => 'unclosed entity value'); ## TODO: type
4727            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4728            ## Reconsume.
4729            !!!emit ($self->{ct}); # ENTITY
4730            redo A;
4731          } else {
4732            $self->{ct}->{value} .= chr $self->{nc}; # ENTITY
4733            !!!next-input-character;
4734            redo A;
4735          }
4736        } elsif ($self->{state} == ENTITY_VALUE_ENTITY_STATE) {
4737          ## TODO: XMLize
4738    
4739          if ($is_space->{$self->{nc}} or
4740              {
4741                0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
4742                $self->{entity_add} => 1,
4743              }->{$self->{nc}}) {
4744            ## Don't consume
4745            ## No error
4746            ## Return nothing.
4747            #
4748          } elsif ($self->{nc} == 0x0023) { # #
4749            $self->{ca} = $self->{ct};
4750            $self->{state} = ENTITY_HASH_STATE;
4751            $self->{kwd} = '#';
4752            !!!next-input-character;
4753            redo A;
4754          } elsif ((0x0041 <= $self->{nc} and
4755                    $self->{nc} <= 0x005A) or # A..Z
4756                   (0x0061 <= $self->{nc} and
4757                    $self->{nc} <= 0x007A)) { # a..z
4758            #
4759          } else {
4760            !!!parse-error (type => 'bare ero');
4761            ## Return nothing.
4762            #
4763          }
4764    
4765          $self->{ct}->{value} .= '&';
4766          $self->{state} = $self->{prev_state};
4767          ## Reconsume.
4768          redo A;
4769      } elsif ($self->{state} == AFTER_NOTATION_NAME_STATE) {      } elsif ($self->{state} == AFTER_NOTATION_NAME_STATE) {
4770        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
4771          ## Stay in the state.          ## Stay in the state.
# Line 4691  sub _get_next_token ($) { Line 4788  sub _get_next_token ($) {
4788          ## Reconsume.          ## Reconsume.
4789          redo A;          redo A;
4790        }        }
   
   
4791      } elsif ($self->{state} == BOGUS_MD_STATE) {      } elsif ($self->{state} == BOGUS_MD_STATE) {
4792        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
4793          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;

Legend:
Removed from v.1.18  
changed lines
  Added in v.1.19

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24