/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.7 by wakaba, Tue Oct 14 15:25:50 2008 UTC revision 1.10 by wakaba, Wed Oct 15 08:51:02 2008 UTC
# Line 114  sub HEXREF_HEX_STATE () { 48 } Line 114  sub HEXREF_HEX_STATE () { 48 }
114  sub ENTITY_NAME_STATE () { 49 }  sub ENTITY_NAME_STATE () { 49 }
115  sub PCDATA_STATE () { 50 } # "data state" in the spec  sub PCDATA_STATE () { 50 } # "data state" in the spec
116    
117    ## XML states
118    sub PI_STATE () { 51 }
119    sub PI_TARGET_STATE () { 52 }
120    sub PI_TARGET_AFTER_STATE () { 53 }
121    sub PI_DATA_STATE () { 54 }
122    sub PI_AFTER_STATE () { 55 }
123    sub PI_DATA_AFTER_STATE () { 56 }
124    
125  ## Tree constructor state constants (see Whatpm::HTML for the full  ## Tree constructor state constants (see Whatpm::HTML for the full
126  ## list and descriptions)  ## list and descriptions)
127    
# Line 499  sub _get_next_token ($) { Line 507  sub _get_next_token ($) {
507        return  ($token);        return  ($token);
508        redo A;        redo A;
509      } elsif ($self->{state} == TAG_OPEN_STATE) {      } elsif ($self->{state} == TAG_OPEN_STATE) {
510          ## XML5: "tag state".
511    
512        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
513          if ($self->{nc} == 0x002F) { # /          if ($self->{nc} == 0x002F) { # /
514                        
# Line 630  sub _get_next_token ($) { Line 640  sub _get_next_token ($) {
640    
641            redo A;            redo A;
642          } elsif ($self->{nc} == 0x003F) { # ?          } elsif ($self->{nc} == 0x003F) { # ?
643                        if ($self->{is_xml}) {
644            $self->{parse_error}->(level => $self->{level}->{must}, type => 'pio',              
645                            line => $self->{line_prev},              $self->{state} = PI_STATE;
646                            column => $self->{column_prev});              
647            $self->{state} = BOGUS_COMMENT_STATE;      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
648            $self->{ct} = {type => COMMENT_TOKEN, data => '',        $self->{line_prev} = $self->{line};
649                                      line => $self->{line_prev},        $self->{column_prev} = $self->{column};
650                                      column => $self->{column_prev},        $self->{column}++;
651                                     };        $self->{nc}
652            ## $self->{nc} is intentionally left as is            = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
653            redo A;      } else {
654          } else {        $self->{set_nc}->($self);
655        }
656      
657                redo A;
658              } else {
659                
660                $self->{parse_error}->(level => $self->{level}->{must}, type => 'pio',
661                                line => $self->{line_prev},
662                                column => $self->{column_prev});
663                $self->{state} = BOGUS_COMMENT_STATE;
664                $self->{ct} = {type => COMMENT_TOKEN, data => '',
665                               line => $self->{line_prev},
666                               column => $self->{column_prev},
667                              };
668                ## $self->{nc} is intentionally left as is
669                redo A;
670              }
671            } elsif (not $self->{is_xml} or $is_space->{$self->{nc}}) {
672                        
673            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago',            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago',
674                            line => $self->{line_prev},                            line => $self->{line_prev},
# Line 656  sub _get_next_token ($) { Line 683  sub _get_next_token ($) {
683                     });                     });
684    
685            redo A;            redo A;
686            } else {
687              ## XML5: "<:" is a parse error.
688              
689              $self->{ct} = {type => START_TAG_TOKEN,
690                                        tag_name => chr ($self->{nc}),
691                                        line => $self->{line_prev},
692                                        column => $self->{column_prev}};
693              $self->{state} = TAG_NAME_STATE;
694              
695        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
696          $self->{line_prev} = $self->{line};
697          $self->{column_prev} = $self->{column};
698          $self->{column}++;
699          $self->{nc}
700              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
701        } else {
702          $self->{set_nc}->($self);
703        }
704      
705              redo A;
706          }          }
707        } else {        } else {
708          die "$0: $self->{content_model} in tag open";          die "$0: $self->{content_model} in tag open";
# Line 664  sub _get_next_token ($) { Line 711  sub _get_next_token ($) {
711        ## NOTE: The "close tag open state" in the spec is implemented as        ## NOTE: The "close tag open state" in the spec is implemented as
712        ## |CLOSE_TAG_OPEN_STATE| and |CDATA_RCDATA_CLOSE_TAG_STATE|.        ## |CLOSE_TAG_OPEN_STATE| and |CDATA_RCDATA_CLOSE_TAG_STATE|.
713    
714          ## XML5: "end tag state".
715    
716        my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"        my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"
717        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
718          if (defined $self->{last_stag_name}) {          if (defined $self->{last_stag_name}) {
# Line 725  sub _get_next_token ($) { Line 774  sub _get_next_token ($) {
774        
775          redo A;          redo A;
776        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
777          $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty end tag',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty end tag',
778                          line => $self->{line_prev}, ## "<" in "</>"                          line => $self->{line_prev}, ## "<" in "</>"
779                          column => $self->{column_prev} - 1);                          column => $self->{column_prev} - 1);
780          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
781          $self->{s_kwd} = '';          $self->{s_kwd} = '';
782                    if ($self->{is_xml}) {
783              
784              ## XML5: No parse error.
785              
786              ## NOTE: This parser raises a parse error, since it supports
787              ## XML1, not XML5.
788    
789              ## NOTE: A short end tag token.
790              my $ct = {type => END_TAG_TOKEN,
791                        tag_name => '',
792                        line => $self->{line_prev},
793                        column => $self->{column_prev} - 1,
794                       };
795              
796      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
797        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
798        $self->{column_prev} = $self->{column};        $self->{column_prev} = $self->{column};
# Line 742  sub _get_next_token ($) { Line 803  sub _get_next_token ($) {
803        $self->{set_nc}->($self);        $self->{set_nc}->($self);
804      }      }
805        
806              return  ($ct);
807            } else {
808              
809              
810        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
811          $self->{line_prev} = $self->{line};
812          $self->{column_prev} = $self->{column};
813          $self->{column}++;
814          $self->{nc}
815              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
816        } else {
817          $self->{set_nc}->($self);
818        }
819      
820            }
821          redo A;          redo A;
822        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
823                    
# Line 755  sub _get_next_token ($) { Line 831  sub _get_next_token ($) {
831                   });                   });
832    
833          redo A;          redo A;
834        } else {        } elsif (not $self->{is_xml} or
835                   $is_space->{$self->{nc}}) {
836                    
837          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus end tag');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus end tag',
838                            line => $self->{line_prev}, # "<" of "</"
839                            column => $self->{column_prev} - 1);
840          $self->{state} = BOGUS_COMMENT_STATE;          $self->{state} = BOGUS_COMMENT_STATE;
841          $self->{ct} = {type => COMMENT_TOKEN, data => '',          $self->{ct} = {type => COMMENT_TOKEN, data => '',
842                                    line => $self->{line_prev}, # "<" of "</"                                    line => $self->{line_prev}, # "<" of "</"
# Line 770  sub _get_next_token ($) { Line 849  sub _get_next_token ($) {
849          ## generated from the bogus end tag, as defined in the          ## generated from the bogus end tag, as defined in the
850          ## "bogus comment state" entry.          ## "bogus comment state" entry.
851          redo A;          redo A;
852          } else {
853            ## XML5: "</:" is a parse error.
854            
855            $self->{ct} = {type => END_TAG_TOKEN,
856                           tag_name => chr ($self->{nc}),
857                           line => $l, column => $c};
858            $self->{state} = TAG_NAME_STATE; ## XML5: "end tag name state".
859            
860        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
861          $self->{line_prev} = $self->{line};
862          $self->{column_prev} = $self->{column};
863          $self->{column}++;
864          $self->{nc}
865              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
866        } else {
867          $self->{set_nc}->($self);
868        }
869      
870            redo A;
871        }        }
872      } elsif ($self->{state} == CDATA_RCDATA_CLOSE_TAG_STATE) {      } elsif ($self->{state} == CDATA_RCDATA_CLOSE_TAG_STATE) {
873        my $ch = substr $self->{last_stag_name}, length $self->{s_kwd}, 1;        my $ch = substr $self->{last_stag_name}, length $self->{s_kwd}, 1;
# Line 2107  sub _get_next_token ($) { Line 2205  sub _get_next_token ($) {
2205                                    line => $self->{line_prev},                                    line => $self->{line_prev},
2206                                    column => $self->{column_prev} - 2,                                    column => $self->{column_prev} - 2,
2207                                   };                                   };
2208          $self->{state} = COMMENT_START_STATE;          $self->{state} = COMMENT_START_STATE; ## XML5: "comment state".
2209                    
2210      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2211        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2170  sub _get_next_token ($) { Line 2268  sub _get_next_token ($) {
2268        } elsif ((length $self->{s_kwd}) == 6 and        } elsif ((length $self->{s_kwd}) == 6 and
2269                 ($self->{nc} == 0x0045 or # E                 ($self->{nc} == 0x0045 or # E
2270                  $self->{nc} == 0x0065)) { # e                  $self->{nc} == 0x0065)) { # e
2271                    if ($self->{s_kwd} ne 'DOCTYP') {
2272              
2273              ## XML5: case-sensitive.
2274              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO
2275                              text => 'DOCTYPE',
2276                              line => $self->{line_prev},
2277                              column => $self->{column_prev} - 5);
2278            } else {
2279              
2280            }
2281          $self->{state} = DOCTYPE_STATE;          $self->{state} = DOCTYPE_STATE;
2282          $self->{ct} = {type => DOCTYPE_TOKEN,          $self->{ct} = {type => DOCTYPE_TOKEN,
2283                                    quirks => 1,                                    quirks => 1,
# Line 2228  sub _get_next_token ($) { Line 2335  sub _get_next_token ($) {
2335          redo A;          redo A;
2336        } elsif ($self->{s_kwd} eq '[CDATA' and        } elsif ($self->{s_kwd} eq '[CDATA' and
2337                 $self->{nc} == 0x005B) { # [                 $self->{nc} == 0x005B) { # [
           
   
2338          if ($self->{is_xml} and          if ($self->{is_xml} and
2339              not $self->{tainted} and              not $self->{tainted} and
2340              @{$self->{open_elements} or []} == 0) {              @{$self->{open_elements} or []} == 0) {
2341              
2342            $self->{parse_error}->(level => $self->{level}->{must}, type => 'cdata outside of root element',            $self->{parse_error}->(level => $self->{level}->{must}, type => 'cdata outside of root element',
2343                            line => $self->{line_prev},                            line => $self->{line_prev},
2344                            column => $self->{column_prev} - 7);                            column => $self->{column_prev} - 7);
2345            $self->{tainted} = 1;            $self->{tainted} = 1;
2346            } else {
2347              
2348          }          }
2349    
2350          $self->{ct} = {type => CHARACTER_TOKEN,          $self->{ct} = {type => CHARACTER_TOKEN,
# Line 2446  sub _get_next_token ($) { Line 2554  sub _get_next_token ($) {
2554          redo A;          redo A;
2555        }        }
2556      } elsif ($self->{state} == COMMENT_END_DASH_STATE) {      } elsif ($self->{state} == COMMENT_END_DASH_STATE) {
2557          ## XML5: "comment dash state".
2558    
2559        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2560                    
2561          $self->{state} = COMMENT_END_STATE;          $self->{state} = COMMENT_END_STATE;
# Line 2511  sub _get_next_token ($) { Line 2621  sub _get_next_token ($) {
2621          redo A;          redo A;
2622        } elsif ($self->{nc} == 0x002D) { # -        } elsif ($self->{nc} == 0x002D) { # -
2623                    
2624            ## XML5: Not a parse error.
2625          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',
2626                          line => $self->{line_prev},                          line => $self->{line_prev},
2627                          column => $self->{column_prev});                          column => $self->{column_prev});
# Line 2540  sub _get_next_token ($) { Line 2651  sub _get_next_token ($) {
2651          redo A;          redo A;
2652        } else {        } else {
2653                    
2654            ## XML5: Not a parse error.
2655          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',
2656                          line => $self->{line_prev},                          line => $self->{line_prev},
2657                          column => $self->{column_prev});                          column => $self->{column_prev});
# Line 3625  sub _get_next_token ($) { Line 3737  sub _get_next_token ($) {
3737        ## NOTE: "CDATA section state" in the state is jointly implemented        ## NOTE: "CDATA section state" in the state is jointly implemented
3738        ## by three states, |CDATA_SECTION_STATE|, |CDATA_SECTION_MSE1_STATE|,        ## by three states, |CDATA_SECTION_STATE|, |CDATA_SECTION_MSE1_STATE|,
3739        ## and |CDATA_SECTION_MSE2_STATE|.        ## and |CDATA_SECTION_MSE2_STATE|.
3740    
3741          ## XML5: "CDATA state".
3742                
3743        if ($self->{nc} == 0x005D) { # ]        if ($self->{nc} == 0x005D) { # ]
3744                    
# Line 3643  sub _get_next_token ($) { Line 3757  sub _get_next_token ($) {
3757          redo A;          redo A;
3758        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3759          if ($self->{is_xml}) {          if ($self->{is_xml}) {
3760              
3761            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no mse'); ## TODO: type            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no mse'); ## TODO: type
3762            } else {
3763              
3764          }          }
3765    
3766          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3767          $self->{s_kwd} = '';          $self->{s_kwd} = '';
3768                    ## Reconsume.
     if ($self->{char_buffer_pos} < length $self->{char_buffer}) {  
       $self->{line_prev} = $self->{line};  
       $self->{column_prev} = $self->{column};  
       $self->{column}++;  
       $self->{nc}  
           = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);  
     } else {  
       $self->{set_nc}->($self);  
     }  
     
3769          if (length $self->{ct}->{data}) { # character          if (length $self->{ct}->{data}) { # character
3770                        
3771            return  ($self->{ct}); # character            return  ($self->{ct}); # character
# Line 3691  sub _get_next_token ($) { Line 3798  sub _get_next_token ($) {
3798    
3799        ## ISSUE: "text tokens" in spec.        ## ISSUE: "text tokens" in spec.
3800      } elsif ($self->{state} == CDATA_SECTION_MSE1_STATE) {      } elsif ($self->{state} == CDATA_SECTION_MSE1_STATE) {
3801          ## XML5: "CDATA bracket state".
3802    
3803        if ($self->{nc} == 0x005D) { # ]        if ($self->{nc} == 0x005D) { # ]
3804                    
3805          $self->{state} = CDATA_SECTION_MSE2_STATE;          $self->{state} = CDATA_SECTION_MSE2_STATE;
# Line 3708  sub _get_next_token ($) { Line 3817  sub _get_next_token ($) {
3817          redo A;          redo A;
3818        } else {        } else {
3819                    
3820            ## XML5: If EOF, "]" is not appended and changed to the data state.
3821          $self->{ct}->{data} .= ']';          $self->{ct}->{data} .= ']';
3822          $self->{state} = CDATA_SECTION_STATE;          $self->{state} = CDATA_SECTION_STATE; ## XML5: Stay in the state.
3823          ## Reconsume.          ## Reconsume.
3824          redo A;          redo A;
3825        }        }
3826      } elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) {      } elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) {
3827          ## XML5: "CDATA end state".
3828    
3829        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
3830          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3831          $self->{s_kwd} = '';          $self->{s_kwd} = '';
# Line 3756  sub _get_next_token ($) { Line 3868  sub _get_next_token ($) {
3868                    
3869          $self->{ct}->{data} .= ']]'; # character          $self->{ct}->{data} .= ']]'; # character
3870          $self->{state} = CDATA_SECTION_STATE;          $self->{state} = CDATA_SECTION_STATE;
3871          ## Reconsume.          ## Reconsume. ## XML5: Emit.
3872          redo A;          redo A;
3873        }        }
3874      } elsif ($self->{state} == ENTITY_STATE) {      } elsif ($self->{state} == ENTITY_STATE) {
# Line 4260  sub _get_next_token ($) { Line 4372  sub _get_next_token ($) {
4372          ## Reconsume.          ## Reconsume.
4373          redo A;          redo A;
4374        }        }
4375    
4376        ## XML-only states
4377    
4378        } elsif ($self->{state} == PI_STATE) {
4379          if ($is_space->{$self->{nc}} or
4380              $self->{nc} == 0x003F or # ? ## XML5: Same as "Anything else"
4381              $self->{nc} == -1) {
4382            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare pio', ## TODO: type
4383                            line => $self->{line_prev},
4384                            column => $self->{column_prev}
4385                                - 1 * ($self->{nc} != -1));
4386            $self->{state} = BOGUS_COMMENT_STATE;
4387            ## Reconsume.
4388            $self->{ct} = {type => COMMENT_TOKEN,
4389                           data => '?',
4390                           line => $self->{line_prev},
4391                           column => $self->{column_prev}
4392                               - 1 * ($self->{nc} != -1),
4393                          };
4394            redo A;
4395          } else {
4396            $self->{ct} = {type => PI_TOKEN,
4397                           target => chr $self->{nc},
4398                           data => '',
4399                           line => $self->{line_prev},
4400                           column => $self->{column_prev} - 1,
4401                          };
4402            $self->{state} = PI_TARGET_STATE;
4403            
4404        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4405          $self->{line_prev} = $self->{line};
4406          $self->{column_prev} = $self->{column};
4407          $self->{column}++;
4408          $self->{nc}
4409              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4410        } else {
4411          $self->{set_nc}->($self);
4412        }
4413      
4414            redo A;
4415          }
4416        } elsif ($self->{state} == PI_TARGET_STATE) {
4417          if ($is_space->{$self->{nc}}) {
4418            $self->{state} = PI_TARGET_AFTER_STATE;
4419            
4420        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4421          $self->{line_prev} = $self->{line};
4422          $self->{column_prev} = $self->{column};
4423          $self->{column}++;
4424          $self->{nc}
4425              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4426        } else {
4427          $self->{set_nc}->($self);
4428        }
4429      
4430            redo A;
4431          } elsif ($self->{nc} == -1) {
4432            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type
4433            $self->{state} = DATA_STATE;
4434            $self->{s_kwd} = '';
4435            ## Reconsume.
4436            return  ($self->{ct}); # pi
4437            redo A;
4438          } elsif ($self->{nc} == 0x003F) { # ?
4439            $self->{state} = PI_AFTER_STATE;
4440            
4441        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4442          $self->{line_prev} = $self->{line};
4443          $self->{column_prev} = $self->{column};
4444          $self->{column}++;
4445          $self->{nc}
4446              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4447        } else {
4448          $self->{set_nc}->($self);
4449        }
4450      
4451            redo A;
4452          } else {
4453            ## XML5: typo ("tag name" -> "target")
4454            $self->{ct}->{target} .= chr $self->{nc}; # pi
4455            
4456        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4457          $self->{line_prev} = $self->{line};
4458          $self->{column_prev} = $self->{column};
4459          $self->{column}++;
4460          $self->{nc}
4461              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4462        } else {
4463          $self->{set_nc}->($self);
4464        }
4465      
4466            redo A;
4467          }
4468        } elsif ($self->{state} == PI_TARGET_AFTER_STATE) {
4469          if ($is_space->{$self->{nc}}) {
4470            ## Stay in the state.
4471            
4472        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4473          $self->{line_prev} = $self->{line};
4474          $self->{column_prev} = $self->{column};
4475          $self->{column}++;
4476          $self->{nc}
4477              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4478        } else {
4479          $self->{set_nc}->($self);
4480        }
4481      
4482            redo A;
4483          } else {
4484            $self->{state} = PI_DATA_STATE;
4485            ## Reprocess.
4486            redo A;
4487          }
4488        } elsif ($self->{state} == PI_DATA_STATE) {
4489          if ($self->{nc} == 0x003F) { # ?
4490            $self->{state} = PI_DATA_AFTER_STATE;
4491            
4492        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4493          $self->{line_prev} = $self->{line};
4494          $self->{column_prev} = $self->{column};
4495          $self->{column}++;
4496          $self->{nc}
4497              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4498        } else {
4499          $self->{set_nc}->($self);
4500        }
4501      
4502            redo A;
4503          } elsif ($self->{nc} == -1) {
4504            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type
4505            $self->{state} = DATA_STATE;
4506            $self->{s_kwd} = '';
4507            ## Reprocess.
4508            return  ($self->{ct}); # pi
4509            redo A;
4510          } else {
4511            $self->{ct}->{data} .= chr $self->{nc}; # pi
4512            $self->{read_until}->($self->{ct}->{data}, q[?],
4513                                  length $self->{ct}->{data});
4514            ## Stay in the state.
4515            
4516        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4517          $self->{line_prev} = $self->{line};
4518          $self->{column_prev} = $self->{column};
4519          $self->{column}++;
4520          $self->{nc}
4521              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4522        } else {
4523          $self->{set_nc}->($self);
4524        }
4525      
4526            ## Reprocess.
4527            redo A;
4528          }
4529        } elsif ($self->{state} == PI_AFTER_STATE) {
4530          if ($self->{nc} == 0x003E) { # >
4531            $self->{state} = DATA_STATE;
4532            $self->{s_kwd} = '';
4533            
4534        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4535          $self->{line_prev} = $self->{line};
4536          $self->{column_prev} = $self->{column};
4537          $self->{column}++;
4538          $self->{nc}
4539              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4540        } else {
4541          $self->{set_nc}->($self);
4542        }
4543      
4544            return  ($self->{ct}); # pi
4545            redo A;
4546          } elsif ($self->{nc} == 0x003F) { # ?
4547            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no s after target', ## TODO: type
4548                            line => $self->{line_prev},
4549                            column => $self->{column_prev}); ## XML5: no error
4550            $self->{ct}->{data} .= '?';
4551            $self->{state} = PI_DATA_AFTER_STATE;
4552            
4553        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4554          $self->{line_prev} = $self->{line};
4555          $self->{column_prev} = $self->{column};
4556          $self->{column}++;
4557          $self->{nc}
4558              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4559        } else {
4560          $self->{set_nc}->($self);
4561        }
4562      
4563            redo A;
4564          } else {
4565            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no s after target', ## TODO: type
4566                            line => $self->{line_prev},
4567                            column => $self->{column_prev}
4568                                + 1 * ($self->{nc} == -1)); ## XML5: no error
4569            $self->{ct}->{data} .= '?'; ## XML5: not appended
4570            $self->{state} = PI_DATA_STATE;
4571            ## Reprocess.
4572            redo A;
4573          }
4574        } elsif ($self->{state} == PI_DATA_AFTER_STATE) {
4575          ## XML5: Same as "pi after state" in XML5
4576          if ($self->{nc} == 0x003E) { # >
4577            $self->{state} = DATA_STATE;
4578            $self->{s_kwd} = '';
4579            
4580        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4581          $self->{line_prev} = $self->{line};
4582          $self->{column_prev} = $self->{column};
4583          $self->{column}++;
4584          $self->{nc}
4585              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4586        } else {
4587          $self->{set_nc}->($self);
4588        }
4589      
4590            return  ($self->{ct}); # pi
4591            redo A;
4592          } elsif ($self->{nc} == 0x003F) { # ?
4593            $self->{ct}->{data} .= '?';
4594            ## Stay in the state.
4595            
4596        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4597          $self->{line_prev} = $self->{line};
4598          $self->{column_prev} = $self->{column};
4599          $self->{column}++;
4600          $self->{nc}
4601              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4602        } else {
4603          $self->{set_nc}->($self);
4604        }
4605      
4606            redo A;
4607          } else {
4608            $self->{ct}->{data} .= '?'; ## XML5: not appended
4609            $self->{state} = PI_DATA_STATE;
4610            ## Reprocess.
4611            redo A;
4612          }
4613            
4614      } else {      } else {
4615        die "$0: $self->{state}: Unknown state";        die "$0: $self->{state}: Unknown state";
4616      }      }

Legend:
Removed from v.1.7  
changed lines
  Added in v.1.10

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24