/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Diff of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.164 by wakaba, Sat Sep 13 06:33:39 2008 UTC revision 1.165 by wakaba, Sat Sep 13 07:51:33 2008 UTC
# Line 803  sub AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STAT Line 803  sub AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STAT
803  sub BOGUS_DOCTYPE_STATE () { 32 }  sub BOGUS_DOCTYPE_STATE () { 32 }
804  sub AFTER_ATTRIBUTE_VALUE_QUOTED_STATE () { 33 }  sub AFTER_ATTRIBUTE_VALUE_QUOTED_STATE () { 33 }
805  sub SELF_CLOSING_START_TAG_STATE () { 34 }  sub SELF_CLOSING_START_TAG_STATE () { 34 }
806  sub CDATA_BLOCK_STATE () { 35 }  sub CDATA_SECTION_STATE () { 35 }
807  sub MD_HYPHEN_STATE () { 36 } # "markup declaration open state" in the spec  sub MD_HYPHEN_STATE () { 36 } # "markup declaration open state" in the spec
808  sub MD_DOCTYPE_STATE () { 37 } # "markup declaration open state" in the spec  sub MD_DOCTYPE_STATE () { 37 } # "markup declaration open state" in the spec
809  sub MD_CDATA_STATE () { 38 } # "markup declaration open state" in the spec  sub MD_CDATA_STATE () { 38 } # "markup declaration open state" in the spec
810  sub CDATA_PCDATA_CLOSE_TAG_STATE () { 39 } # "close tag open state" in the spec  sub CDATA_PCDATA_CLOSE_TAG_STATE () { 39 } # "close tag open state" in the spec
811    sub CDATA_SECTION_MSE1_STATE () { 40 } # "CDATA section state" in the spec
812    sub CDATA_SECTION_MSE2_STATE () { 41 } # "CDATA section state" in the spec
813    
814  sub DOCTYPE_TOKEN () { 1 }  sub DOCTYPE_TOKEN () { 1 }
815  sub COMMENT_TOKEN () { 2 }  sub COMMENT_TOKEN () { 2 }
# Line 862  sub _initialize_tokenizer ($) { Line 864  sub _initialize_tokenizer ($) {
864    $self->{state} = DATA_STATE; # MUST    $self->{state} = DATA_STATE; # MUST
865    #$self->{state_keyword}; # initialized when used    #$self->{state_keyword}; # initialized when used
866    $self->{content_model} = PCDATA_CONTENT_MODEL; # be    $self->{content_model} = PCDATA_CONTENT_MODEL; # be
867    undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE    undef $self->{current_token};
868    undef $self->{current_attribute};    undef $self->{current_attribute};
869    undef $self->{last_emitted_start_tag_name};    undef $self->{last_emitted_start_tag_name};
870    undef $self->{last_attribute_value_state};    undef $self->{last_attribute_value_state};
# Line 2151  sub _get_next_token ($) { Line 2153  sub _get_next_token ($) {
2153        } elsif ($self->{state_keyword} eq '[CDATA' and        } elsif ($self->{state_keyword} eq '[CDATA' and
2154                 $self->{next_char} == 0x005B) { # [                 $self->{next_char} == 0x005B) { # [
2155          !!!cp (135.2);          !!!cp (135.2);
2156          $self->{state} = CDATA_BLOCK_STATE;          $self->{current_token} = {type => CHARACTER_TOKEN,
2157                                      data => '',
2158                                      line => $self->{line_prev},
2159                                      column => $self->{column_prev} - 7};
2160            $self->{state} = CDATA_SECTION_STATE;
2161          !!!next-input-character;          !!!next-input-character;
2162          redo A;          redo A;
2163        } else {        } else {
# Line 2876  sub _get_next_token ($) { Line 2882  sub _get_next_token ($) {
2882          !!!next-input-character;          !!!next-input-character;
2883          redo A;          redo A;
2884        }        }
2885      } elsif ($self->{state} == CDATA_BLOCK_STATE) {      } elsif ($self->{state} == CDATA_SECTION_STATE) {
2886        my $s = '';        ## NOTE: "CDATA section state" in the state is jointly implemented
2887          ## by three states, |CDATA_SECTION_STATE|, |CDATA_SECTION_MSE1_STATE|,
2888          ## and |CDATA_SECTION_MSE2_STATE|.
2889                
2890        my ($l, $c) = ($self->{line}, $self->{column});        if ($self->{next_char} == 0x005D) { # ]
2891            !!!cp (221.1);
2892        CS: while ($self->{next_char} != -1) {          $self->{state} = CDATA_SECTION_MSE1_STATE;
2893          if ($self->{next_char} == 0x005D) { # ]          !!!next-input-character;
2894            !!!next-input-character;          redo A;
2895            if ($self->{next_char} == 0x005D) { # ]        } elsif ($self->{next_char} == -1) {
2896              !!!next-input-character;          $self->{state} = DATA_STATE;
2897              MDC: {          !!!next-input-character;
2898                if ($self->{next_char} == 0x003E) { # >          if (length $self->{current_token}->{data}) { # character
2899                  !!!cp (221.1);            !!!cp (221.2);
2900                  !!!next-input-character;            !!!emit ($self->{current_token}); # character
                 last CS;  
               } elsif ($self->{next_char} == 0x005D) { # ]  
                 !!!cp (221.2);  
                 $s .= ']';  
                 !!!next-input-character;  
                 redo MDC;  
               } else {  
                 !!!cp (221.3);  
                 $s .= ']]';  
                 #  
               }  
             } # MDC  
           } else {  
             !!!cp (221.4);  
             $s .= ']';  
             #  
           }  
2901          } else {          } else {
2902            !!!cp (221.5);            !!!cp (221.3);
2903            #            ## No token to emit. $self->{current_token} is discarded.
2904          }          }        
2905          $s .= chr $self->{next_char};          redo A;
2906          } else {
2907            !!!cp (221.4);
2908            $self->{current_token}->{data} .= chr $self->{next_char};
2909            ## Stay in the state.
2910          !!!next-input-character;          !!!next-input-character;
2911        } # CS          redo A;
2912          }
       $self->{state} = DATA_STATE;  
       ## next-input-character done or EOF, which is reconsumed.  
2913    
2914        if (length $s) {        ## ISSUE: "text tokens" in spec.
2915        } elsif ($self->{state} == CDATA_SECTION_MSE1_STATE) {
2916          if ($self->{next_char} == 0x005D) { # ]
2917            !!!cp (221.5);
2918            $self->{state} = CDATA_SECTION_MSE2_STATE;
2919            !!!next-input-character;
2920            redo A;
2921          } else {
2922          !!!cp (221.6);          !!!cp (221.6);
2923          !!!emit ({type => CHARACTER_TOKEN, data => $s,          $self->{current_token}->{data} .= ']';
2924                    line => $l, column => $c});          $self->{state} = CDATA_SECTION_STATE;
2925            ## Reconsume.
2926            redo A;
2927          }
2928        } elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) {
2929          if ($self->{next_char} == 0x003E) { # >
2930            $self->{state} = DATA_STATE;
2931            !!!next-input-character;
2932            if (length $self->{current_token}->{data}) { # character
2933              !!!cp (221.7);
2934              !!!emit ($self->{current_token}); # character
2935            } else {
2936              !!!cp (221.8);
2937              ## No token to emit. $self->{current_token} is discarded.
2938            }
2939            redo A;
2940          } elsif ($self->{next_char} == 0x005D) { # ]
2941            !!!cp (221.9); # character
2942            $self->{current_token}->{data} .= ']'; ## Add first "]" of "]]]".
2943            ## Stay in the state.
2944            !!!next-input-character;
2945            redo A;
2946        } else {        } else {
2947          !!!cp (221.7);          !!!cp (221.11);
2948            $self->{current_token}->{data} .= ']]'; # character
2949            $self->{state} = CDATA_SECTION_STATE;
2950            ## Reconsume.
2951            redo A;
2952        }        }
   
       redo A;  
   
       ## ISSUE: "text tokens" in spec.  
       ## TODO: Streaming support  
2953      } else {      } else {
2954        die "$0: $self->{state}: Unknown state";        die "$0: $self->{state}: Unknown state";
2955      }      }

Legend:
Removed from v.1.164  
changed lines
  Added in v.1.165

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24