/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm.src
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm.src

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.7 by wakaba, Tue Oct 14 15:25:50 2008 UTC revision 1.8 by wakaba, Wed Oct 15 04:38:22 2008 UTC
# Line 114  sub HEXREF_HEX_STATE () { 48 } Line 114  sub HEXREF_HEX_STATE () { 48 }
114  sub ENTITY_NAME_STATE () { 49 }  sub ENTITY_NAME_STATE () { 49 }
115  sub PCDATA_STATE () { 50 } # "data state" in the spec  sub PCDATA_STATE () { 50 } # "data state" in the spec
116    
117    ## XML states
118    sub PI_STATE () { 51 }
119    sub PI_TARGET_STATE () { 52 }
120    sub PI_TARGET_AFTER_STATE () { 53 }
121    sub PI_DATA_STATE () { 54 }
122    sub PI_AFTER_STATE () { 55 }
123    sub PI_DATA_AFTER_STATE () { 56 }
124    
125  ## Tree constructor state constants (see Whatpm::HTML for the full  ## Tree constructor state constants (see Whatpm::HTML for the full
126  ## list and descriptions)  ## list and descriptions)
127    
# Line 500  sub _get_next_token ($) { Line 508  sub _get_next_token ($) {
508    
509            redo A;            redo A;
510          } elsif ($self->{nc} == 0x003F) { # ?          } elsif ($self->{nc} == 0x003F) { # ?
511            !!!cp (22);            if ($self->{is_xml}) {
512            !!!parse-error (type => 'pio',              !!!cp (22.1);
513                            line => $self->{line_prev},              $self->{state} = PI_STATE;
514                            column => $self->{column_prev});              !!!next-input-character;
515            $self->{state} = BOGUS_COMMENT_STATE;              redo A;
516            $self->{ct} = {type => COMMENT_TOKEN, data => '',            } else {
517                                      line => $self->{line_prev},              !!!cp (22);
518                                      column => $self->{column_prev},              !!!parse-error (type => 'pio',
519                                     };                              line => $self->{line_prev},
520            ## $self->{nc} is intentionally left as is                              column => $self->{column_prev});
521            redo A;              $self->{state} = BOGUS_COMMENT_STATE;
522                $self->{ct} = {type => COMMENT_TOKEN, data => '',
523                               line => $self->{line_prev},
524                               column => $self->{column_prev},
525                              };
526                ## $self->{nc} is intentionally left as is
527                redo A;
528              }
529          } else {          } else {
530            !!!cp (23);            !!!cp (23);
531            !!!parse-error (type => 'bare stago',            !!!parse-error (type => 'bare stago',
# Line 1558  sub _get_next_token ($) { Line 1573  sub _get_next_token ($) {
1573          redo A;          redo A;
1574        } elsif ($self->{s_kwd} eq '[CDATA' and        } elsif ($self->{s_kwd} eq '[CDATA' and
1575                 $self->{nc} == 0x005B) { # [                 $self->{nc} == 0x005B) { # [
         !!!cp (135.2);  
   
1576          if ($self->{is_xml} and          if ($self->{is_xml} and
1577              not $self->{tainted} and              not $self->{tainted} and
1578              @{$self->{open_elements} or []} == 0) {              @{$self->{open_elements} or []} == 0) {
1579              !!!cp (135.2);
1580            !!!parse-error (type => 'cdata outside of root element',            !!!parse-error (type => 'cdata outside of root element',
1581                            line => $self->{line_prev},                            line => $self->{line_prev},
1582                            column => $self->{column_prev} - 7);                            column => $self->{column_prev} - 7);
1583            $self->{tainted} = 1;            $self->{tainted} = 1;
1584            } else {
1585              !!!cp (135.21);
1586          }          }
1587    
1588          $self->{ct} = {type => CHARACTER_TOKEN,          $self->{ct} = {type => CHARACTER_TOKEN,
# Line 2343  sub _get_next_token ($) { Line 2359  sub _get_next_token ($) {
2359          redo A;          redo A;
2360        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
2361          if ($self->{is_xml}) {          if ($self->{is_xml}) {
2362              !!!cp (221.11);
2363            !!!parse-error (type => 'no mse'); ## TODO: type            !!!parse-error (type => 'no mse'); ## TODO: type
2364            } else {
2365              !!!cp (221.12);
2366          }          }
2367    
2368          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
# Line 2780  sub _get_next_token ($) { Line 2799  sub _get_next_token ($) {
2799          ## Reconsume.          ## Reconsume.
2800          redo A;          redo A;
2801        }        }
2802    
2803        ## XML-only states
2804    
2805        } elsif ($self->{state} == PI_STATE) {
2806          if ($is_space->{$self->{nc}} or
2807              $self->{nc} == 0x003F or # ? ## XML5: Same as "Anything else"
2808              $self->{nc} == -1) {
2809            !!!parse-error (type => 'bare pio', ## TODO: type
2810                            line => $self->{line_prev},
2811                            column => $self->{column_prev}
2812                                - 1 * ($self->{nc} != -1));
2813            $self->{state} = BOGUS_COMMENT_STATE;
2814            ## Reconsume.
2815            $self->{ct} = {type => COMMENT_TOKEN,
2816                           data => '?',
2817                           line => $self->{line_prev},
2818                           column => $self->{column_prev}
2819                               - 1 * ($self->{nc} != -1),
2820                          };
2821            redo A;
2822          } else {
2823            $self->{ct} = {type => PI_TOKEN,
2824                           target => chr $self->{nc},
2825                           data => '',
2826                           line => $self->{line_prev},
2827                           column => $self->{column_prev} - 1,
2828                          };
2829            $self->{state} = PI_TARGET_STATE;
2830            !!!next-input-character;
2831            redo A;
2832          }
2833        } elsif ($self->{state} == PI_TARGET_STATE) {
2834          if ($is_space->{$self->{nc}}) {
2835            $self->{state} = PI_TARGET_AFTER_STATE;
2836            !!!next-input-character;
2837            redo A;
2838          } elsif ($self->{nc} == -1) {
2839            !!!parse-error (type => 'no pic'); ## TODO: type
2840            $self->{state} = DATA_STATE;
2841            $self->{s_kwd} = '';
2842            ## Reconsume.
2843            !!!emit ($self->{ct}); # pi
2844            redo A;
2845          } elsif ($self->{nc} == 0x003F) { # ?
2846            $self->{state} = PI_AFTER_STATE;
2847            !!!next-input-character;
2848            redo A;
2849          } else {
2850            ## XML5: typo ("tag name" -> "target")
2851            $self->{ct}->{target} .= chr $self->{nc}; # pi
2852            !!!next-input-character;
2853            redo A;
2854          }
2855        } elsif ($self->{state} == PI_TARGET_AFTER_STATE) {
2856          if ($is_space->{$self->{nc}}) {
2857            ## Stay in the state.
2858            !!!next-input-character;
2859            redo A;
2860          } else {
2861            $self->{state} = PI_DATA_STATE;
2862            ## Reprocess.
2863            redo A;
2864          }
2865        } elsif ($self->{state} == PI_DATA_STATE) {
2866          if ($self->{nc} == 0x003F) { # ?
2867            $self->{state} = PI_DATA_AFTER_STATE;
2868            !!!next-input-character;
2869            redo A;
2870          } elsif ($self->{nc} == -1) {
2871            !!!parse-error (type => 'no pic'); ## TODO: type
2872            $self->{state} = DATA_STATE;
2873            $self->{s_kwd} = '';
2874            ## Reprocess.
2875            !!!emit ($self->{ct}); # pi
2876            redo A;
2877          } else {
2878            $self->{ct}->{data} .= chr $self->{nc}; # pi
2879            $self->{read_until}->($self->{ct}->{data}, q[?],
2880                                  length $self->{ct}->{data});
2881            ## Stay in the state.
2882            !!!next-input-character;
2883            ## Reprocess.
2884            redo A;
2885          }
2886        } elsif ($self->{state} == PI_AFTER_STATE) {
2887          if ($self->{nc} == 0x003E) { # >
2888            $self->{state} = DATA_STATE;
2889            $self->{s_kwd} = '';
2890            !!!next-input-character;
2891            !!!emit ($self->{ct}); # pi
2892            redo A;
2893          } elsif ($self->{nc} == 0x003F) { # ?
2894            !!!parse-error (type => 'no s after target', ## TODO: type
2895                            line => $self->{line_prev},
2896                            column => $self->{column_prev}); ## XML5: no error
2897            $self->{ct}->{data} .= '?';
2898            $self->{state} = PI_DATA_AFTER_STATE;
2899            !!!next-input-character;
2900            redo A;
2901          } else {
2902            !!!parse-error (type => 'no s after target', ## TODO: type
2903                            line => $self->{line_prev},
2904                            column => $self->{column_prev}
2905                                + 1 * ($self->{nc} == -1)); ## XML5: no error
2906            $self->{ct}->{data} .= '?'; ## XML5: not appended
2907            $self->{state} = PI_DATA_STATE;
2908            ## Reprocess.
2909            redo A;
2910          }
2911        } elsif ($self->{state} == PI_DATA_AFTER_STATE) {
2912          ## XML5: Same as "pi after state" in XML5
2913          if ($self->{nc} == 0x003E) { # >
2914            $self->{state} = DATA_STATE;
2915            $self->{s_kwd} = '';
2916            !!!next-input-character;
2917            !!!emit ($self->{ct}); # pi
2918            redo A;
2919          } elsif ($self->{nc} == 0x003F) { # ?
2920            $self->{ct}->{data} .= '?';
2921            ## Stay in the state.
2922            !!!next-input-character;
2923            redo A;
2924          } else {
2925            $self->{ct}->{data} .= '?'; ## XML5: not appended
2926            $self->{state} = PI_DATA_STATE;
2927            ## Reprocess.
2928            redo A;
2929          }
2930            
2931      } else {      } else {
2932        die "$0: $self->{state}: Unknown state";        die "$0: $self->{state}: Unknown state";
2933      }      }

Legend:
Removed from v.1.7  
changed lines
  Added in v.1.8

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24