/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.18 by wakaba, Sun Oct 19 06:14:57 2008 UTC revision 1.20 by wakaba, Sun Oct 19 08:20:29 2008 UTC
# Line 182  sub NDATA_STATE () { 86 } Line 182  sub NDATA_STATE () { 86 }
182  sub AFTER_NDATA_STATE () { 87 }  sub AFTER_NDATA_STATE () { 87 }
183  sub BEFORE_NOTATION_NAME_STATE () { 88 }  sub BEFORE_NOTATION_NAME_STATE () { 88 }
184  sub NOTATION_NAME_STATE () { 89 }  sub NOTATION_NAME_STATE () { 89 }
185  sub AFTER_NOTATION_NAME_STATE () { 90 }  sub DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE () { 90 }
186  sub BOGUS_MD_STATE () { 91 }  sub DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE () { 91 }
187    sub ENTITY_VALUE_ENTITY_STATE () { 92 }
188    sub AFTER_ELEMENT_NAME_STATE () { 93 }
189    sub BEFORE_ELEMENT_CONTENT_STATE () { 94 }
190    sub CONTENT_KEYWORD_STATE () { 95 }
191    sub AFTER_CM_GROUP_OPEN_STATE () { 96 }
192    sub CM_ELEMENT_NAME_STATE () { 97 }
193    sub AFTER_CM_ELEMENT_NAME_STATE () { 98 }
194    sub AFTER_CM_GROUP_CLOSE_STATE () { 99 }
195    sub AFTER_MD_DEF_STATE () { 100 }
196    sub BOGUS_MD_STATE () { 101 }
197    
198  ## Tree constructor state constants (see Whatpm::HTML for the full  ## Tree constructor state constants (see Whatpm::HTML for the full
199  ## list and descriptions)  ## list and descriptions)
# Line 3262  sub _get_next_token ($) { Line 3272  sub _get_next_token ($) {
3272      }      }
3273        
3274          redo A;          redo A;
3275  ## TODO: " and ' for ENTITY        } elsif ($self->{nc} == 0x0022 and # "
3276                   ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN or
3277                    $self->{ct}->{type} == PARAMETER_ENTITY_TOKEN)) {
3278            
3279            $self->{state} = DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE;
3280            $self->{ct}->{value} = ''; # ENTITY
3281            
3282        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3283          $self->{line_prev} = $self->{line};
3284          $self->{column_prev} = $self->{column};
3285          $self->{column}++;
3286          $self->{nc}
3287              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3288        } else {
3289          $self->{set_nc}->($self);
3290        }
3291      
3292            redo A;
3293          } elsif ($self->{nc} == 0x0027 and # '
3294                   ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN or
3295                    $self->{ct}->{type} == PARAMETER_ENTITY_TOKEN)) {
3296            
3297            $self->{state} = DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE;
3298            $self->{ct}->{value} = ''; # ENTITY
3299            
3300        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3301          $self->{line_prev} = $self->{line};
3302          $self->{column_prev} = $self->{column};
3303          $self->{column}++;
3304          $self->{nc}
3305              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3306        } else {
3307          $self->{set_nc}->($self);
3308        }
3309      
3310            redo A;
3311        } elsif ($self->{is_xml} and        } elsif ($self->{is_xml} and
3312                 $self->{ct}->{type} == DOCTYPE_TOKEN and                 $self->{ct}->{type} == DOCTYPE_TOKEN and
3313                 $self->{nc} == 0x005B) { # [                 $self->{nc} == 0x005B) { # [
# Line 6145  sub _get_next_token ($) { Line 6190  sub _get_next_token ($) {
6190          if ($self->{ct}->{type} == ATTLIST_TOKEN) {          if ($self->{ct}->{type} == ATTLIST_TOKEN) {
6191            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
6192          } elsif ($self->{ct}->{type} == ELEMENT_TOKEN) {          } elsif ($self->{ct}->{type} == ELEMENT_TOKEN) {
6193            ## TODO: ...            $self->{state} = AFTER_ELEMENT_NAME_STATE;
           $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;  
6194          } else { # ENTITY/NOTATION          } else { # ENTITY/NOTATION
6195            $self->{state} = AFTER_DOCTYPE_NAME_STATE;            $self->{state} = AFTER_DOCTYPE_NAME_STATE;
6196          }          }
# Line 7629  sub _get_next_token ($) { Line 7673  sub _get_next_token ($) {
7673        }        }
7674      } elsif ($self->{state} == NOTATION_NAME_STATE) {      } elsif ($self->{state} == NOTATION_NAME_STATE) {
7675        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
7676          $self->{state} = AFTER_NOTATION_NAME_STATE;          $self->{state} = AFTER_MD_DEF_STATE;
7677                    
7678      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7679        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 7689  sub _get_next_token ($) { Line 7733  sub _get_next_token ($) {
7733        
7734          redo A;          redo A;
7735        }        }
7736      } elsif ($self->{state} == AFTER_NOTATION_NAME_STATE) {      } elsif ($self->{state} == DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE) {
7737          if ($self->{nc} == 0x0022) { # "
7738            $self->{state} = AFTER_MD_DEF_STATE;
7739            
7740        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7741          $self->{line_prev} = $self->{line};
7742          $self->{column_prev} = $self->{column};
7743          $self->{column}++;
7744          $self->{nc}
7745              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7746        } else {
7747          $self->{set_nc}->($self);
7748        }
7749      
7750            redo A;
7751          } elsif ($self->{nc} == 0x0026) { # &
7752            $self->{prev_state} = $self->{state};
7753            $self->{state} = ENTITY_VALUE_ENTITY_STATE;
7754            $self->{entity_add} = 0x0022; # "
7755            
7756        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7757          $self->{line_prev} = $self->{line};
7758          $self->{column_prev} = $self->{column};
7759          $self->{column}++;
7760          $self->{nc}
7761              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7762        } else {
7763          $self->{set_nc}->($self);
7764        }
7765      
7766            redo A;
7767    ## TODO: %
7768          } elsif ($self->{nc} == -1) {
7769            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed entity value'); ## TODO: type
7770            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7771            ## Reconsume.
7772            return  ($self->{ct}); # ENTITY
7773            redo A;
7774          } else {
7775            $self->{ct}->{value} .= chr $self->{nc}; # ENTITY
7776            
7777        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7778          $self->{line_prev} = $self->{line};
7779          $self->{column_prev} = $self->{column};
7780          $self->{column}++;
7781          $self->{nc}
7782              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7783        } else {
7784          $self->{set_nc}->($self);
7785        }
7786      
7787            redo A;
7788          }
7789        } elsif ($self->{state} == DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE) {
7790          if ($self->{nc} == 0x0027) { # '
7791            $self->{state} = AFTER_MD_DEF_STATE;
7792            
7793        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7794          $self->{line_prev} = $self->{line};
7795          $self->{column_prev} = $self->{column};
7796          $self->{column}++;
7797          $self->{nc}
7798              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7799        } else {
7800          $self->{set_nc}->($self);
7801        }
7802      
7803            redo A;
7804          } elsif ($self->{nc} == 0x0026) { # &
7805            $self->{prev_state} = $self->{state};
7806            $self->{state} = ENTITY_VALUE_ENTITY_STATE;
7807            $self->{entity_add} = 0x0027; # '
7808            
7809        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7810          $self->{line_prev} = $self->{line};
7811          $self->{column_prev} = $self->{column};
7812          $self->{column}++;
7813          $self->{nc}
7814              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7815        } else {
7816          $self->{set_nc}->($self);
7817        }
7818      
7819            redo A;
7820    ## TODO: %
7821          } elsif ($self->{nc} == -1) {
7822            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed entity value'); ## TODO: type
7823            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7824            ## Reconsume.
7825            return  ($self->{ct}); # ENTITY
7826            redo A;
7827          } else {
7828            $self->{ct}->{value} .= chr $self->{nc}; # ENTITY
7829            
7830        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7831          $self->{line_prev} = $self->{line};
7832          $self->{column_prev} = $self->{column};
7833          $self->{column}++;
7834          $self->{nc}
7835              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7836        } else {
7837          $self->{set_nc}->($self);
7838        }
7839      
7840            redo A;
7841          }
7842        } elsif ($self->{state} == ENTITY_VALUE_ENTITY_STATE) {
7843          ## TODO: XMLize
7844    
7845          if ($is_space->{$self->{nc}} or
7846              {
7847                0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
7848                $self->{entity_add} => 1,
7849              }->{$self->{nc}}) {
7850            ## Don't consume
7851            ## No error
7852            ## Return nothing.
7853            #
7854          } elsif ($self->{nc} == 0x0023) { # #
7855            $self->{ca} = $self->{ct};
7856            $self->{state} = ENTITY_HASH_STATE;
7857            $self->{kwd} = '#';
7858            
7859        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7860          $self->{line_prev} = $self->{line};
7861          $self->{column_prev} = $self->{column};
7862          $self->{column}++;
7863          $self->{nc}
7864              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7865        } else {
7866          $self->{set_nc}->($self);
7867        }
7868      
7869            redo A;
7870          } elsif ((0x0041 <= $self->{nc} and
7871                    $self->{nc} <= 0x005A) or # A..Z
7872                   (0x0061 <= $self->{nc} and
7873                    $self->{nc} <= 0x007A)) { # a..z
7874            #
7875          } else {
7876            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero');
7877            ## Return nothing.
7878            #
7879          }
7880    
7881          $self->{ct}->{value} .= '&';
7882          $self->{state} = $self->{prev_state};
7883          ## Reconsume.
7884          redo A;
7885        } elsif ($self->{state} == AFTER_ELEMENT_NAME_STATE) {
7886          if ($is_space->{$self->{nc}}) {
7887            $self->{state} = BEFORE_ELEMENT_CONTENT_STATE;
7888            
7889        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7890          $self->{line_prev} = $self->{line};
7891          $self->{column_prev} = $self->{column};
7892          $self->{column}++;
7893          $self->{nc}
7894              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7895        } else {
7896          $self->{set_nc}->($self);
7897        }
7898      
7899            redo A;
7900          } elsif ($self->{nc} == 0x0028) { # (
7901            $self->{state} = AFTER_CM_GROUP_OPEN_STATE;
7902            $self->{ct}->{content} = ['('];
7903            $self->{group_depth} = 1;
7904            
7905        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7906          $self->{line_prev} = $self->{line};
7907          $self->{column_prev} = $self->{column};
7908          $self->{column}++;
7909          $self->{nc}
7910              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7911        } else {
7912          $self->{set_nc}->($self);
7913        }
7914      
7915            redo A;
7916          } elsif ($self->{nc} == 0x003E) { # >
7917            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md def'); ## TODO: type
7918            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7919            
7920        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7921          $self->{line_prev} = $self->{line};
7922          $self->{column_prev} = $self->{column};
7923          $self->{column}++;
7924          $self->{nc}
7925              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7926        } else {
7927          $self->{set_nc}->($self);
7928        }
7929      
7930            return  ($self->{ct}); # ELEMENT
7931            redo A;
7932          } elsif ($self->{nc} == -1) {
7933            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7934            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7935            
7936        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7937          $self->{line_prev} = $self->{line};
7938          $self->{column_prev} = $self->{column};
7939          $self->{column}++;
7940          $self->{nc}
7941              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7942        } else {
7943          $self->{set_nc}->($self);
7944        }
7945      
7946            return  ($self->{ct}); # ELEMENT
7947            redo A;
7948          } else {
7949            $self->{ct}->{content} = [chr $self->{nc}];
7950            $self->{state} = CONTENT_KEYWORD_STATE;
7951            
7952        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7953          $self->{line_prev} = $self->{line};
7954          $self->{column_prev} = $self->{column};
7955          $self->{column}++;
7956          $self->{nc}
7957              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7958        } else {
7959          $self->{set_nc}->($self);
7960        }
7961      
7962            redo A;
7963          }
7964        } elsif ($self->{state} == CONTENT_KEYWORD_STATE) {
7965          if ($is_space->{$self->{nc}}) {
7966            $self->{state} = AFTER_MD_DEF_STATE;
7967            
7968        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7969          $self->{line_prev} = $self->{line};
7970          $self->{column_prev} = $self->{column};
7971          $self->{column}++;
7972          $self->{nc}
7973              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7974        } else {
7975          $self->{set_nc}->($self);
7976        }
7977      
7978            redo A;
7979          } elsif ($self->{nc} == 0x003E) { # >
7980            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7981            
7982        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7983          $self->{line_prev} = $self->{line};
7984          $self->{column_prev} = $self->{column};
7985          $self->{column}++;
7986          $self->{nc}
7987              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7988        } else {
7989          $self->{set_nc}->($self);
7990        }
7991      
7992            return  ($self->{ct}); # ELEMENT
7993            redo A;
7994          } elsif ($self->{nc} == -1) {
7995            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7996            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7997            
7998        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7999          $self->{line_prev} = $self->{line};
8000          $self->{column_prev} = $self->{column};
8001          $self->{column}++;
8002          $self->{nc}
8003              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8004        } else {
8005          $self->{set_nc}->($self);
8006        }
8007      
8008            return  ($self->{ct}); # ELEMENT
8009            redo A;
8010          } else {
8011            $self->{ct}->{content}->[-1] .= chr $self->{nc}; # ELEMENT
8012            ## Stay in the state.
8013            
8014        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8015          $self->{line_prev} = $self->{line};
8016          $self->{column_prev} = $self->{column};
8017          $self->{column}++;
8018          $self->{nc}
8019              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8020        } else {
8021          $self->{set_nc}->($self);
8022        }
8023      
8024            redo A;
8025          }
8026        } elsif ($self->{state} == AFTER_CM_GROUP_OPEN_STATE) {
8027          if ($is_space->{$self->{nc}}) {
8028            ## Stay in the state.
8029            
8030        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8031          $self->{line_prev} = $self->{line};
8032          $self->{column_prev} = $self->{column};
8033          $self->{column}++;
8034          $self->{nc}
8035              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8036        } else {
8037          $self->{set_nc}->($self);
8038        }
8039      
8040            redo A;
8041          } elsif ($self->{nc} == 0x0028) { # (
8042            $self->{group_depth}++;
8043            push @{$self->{ct}->{content}}, chr $self->{nc};
8044            ## Stay in the state.
8045            
8046        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8047          $self->{line_prev} = $self->{line};
8048          $self->{column_prev} = $self->{column};
8049          $self->{column}++;
8050          $self->{nc}
8051              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8052        } else {
8053          $self->{set_nc}->($self);
8054        }
8055      
8056            redo A;
8057          } elsif ($self->{nc} == 0x007C or # |
8058                   $self->{nc} == 0x002C) { # ,
8059            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty element name'); ## TODO: type
8060            ## Stay in the state.
8061            
8062        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8063          $self->{line_prev} = $self->{line};
8064          $self->{column_prev} = $self->{column};
8065          $self->{column}++;
8066          $self->{nc}
8067              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8068        } else {
8069          $self->{set_nc}->($self);
8070        }
8071      
8072            redo A;
8073          } elsif ($self->{nc} == 0x0029) { # )
8074            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty element name'); ## TODO: type
8075            push @{$self->{ct}->{content}}, chr $self->{nc};
8076            $self->{group_depth}--;
8077            $self->{state} = AFTER_CM_GROUP_CLOSE_STATE;
8078            
8079        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8080          $self->{line_prev} = $self->{line};
8081          $self->{column_prev} = $self->{column};
8082          $self->{column}++;
8083          $self->{nc}
8084              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8085        } else {
8086          $self->{set_nc}->($self);
8087        }
8088      
8089            redo A;
8090          } elsif ($self->{nc} == 0x003E) { # >
8091            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8092            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8093            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8094            
8095        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8096          $self->{line_prev} = $self->{line};
8097          $self->{column_prev} = $self->{column};
8098          $self->{column}++;
8099          $self->{nc}
8100              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8101        } else {
8102          $self->{set_nc}->($self);
8103        }
8104      
8105            return  ($self->{ct}); # ELEMENT
8106            redo A;
8107          } elsif ($self->{nc} == -1) {
8108            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8109            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8110            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8111            
8112        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8113          $self->{line_prev} = $self->{line};
8114          $self->{column_prev} = $self->{column};
8115          $self->{column}++;
8116          $self->{nc}
8117              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8118        } else {
8119          $self->{set_nc}->($self);
8120        }
8121      
8122            return  ($self->{ct}); # ELEMENT
8123            redo A;
8124          } else {
8125            push @{$self->{ct}->{content}}, chr $self->{nc};
8126            $self->{state} = CM_ELEMENT_NAME_STATE;
8127            
8128        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8129          $self->{line_prev} = $self->{line};
8130          $self->{column_prev} = $self->{column};
8131          $self->{column}++;
8132          $self->{nc}
8133              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8134        } else {
8135          $self->{set_nc}->($self);
8136        }
8137      
8138            redo A;
8139          }
8140        } elsif ($self->{state} == CM_ELEMENT_NAME_STATE) {
8141          if ($is_space->{$self->{nc}}) {
8142            $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8143            
8144        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8145          $self->{line_prev} = $self->{line};
8146          $self->{column_prev} = $self->{column};
8147          $self->{column}++;
8148          $self->{nc}
8149              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8150        } else {
8151          $self->{set_nc}->($self);
8152        }
8153      
8154            redo A;
8155          } elsif ($self->{nc} == 0x002A or # *
8156                   $self->{nc} == 0x002B or # +
8157                   $self->{nc} == 0x003F) { # ?
8158            push @{$self->{ct}->{content}}, chr $self->{nc};
8159            $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8160            
8161        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8162          $self->{line_prev} = $self->{line};
8163          $self->{column_prev} = $self->{column};
8164          $self->{column}++;
8165          $self->{nc}
8166              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8167        } else {
8168          $self->{set_nc}->($self);
8169        }
8170      
8171            redo A;
8172          } elsif ($self->{nc} == 0x007C or # |
8173                   $self->{nc} == 0x002C) { # ,
8174            push @{$self->{ct}->{content}}, $self->{nc} == 0x007C ? ' | ' : ', ';
8175            $self->{state} = AFTER_CM_GROUP_OPEN_STATE;
8176            
8177        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8178          $self->{line_prev} = $self->{line};
8179          $self->{column_prev} = $self->{column};
8180          $self->{column}++;
8181          $self->{nc}
8182              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8183        } else {
8184          $self->{set_nc}->($self);
8185        }
8186      
8187            redo A;
8188          } elsif ($self->{nc} == 0x0029) { # )
8189            $self->{group_depth}--;
8190            push @{$self->{ct}->{content}}, chr $self->{nc};
8191            $self->{state} = AFTER_CM_GROUP_CLOSE_STATE;
8192            
8193        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8194          $self->{line_prev} = $self->{line};
8195          $self->{column_prev} = $self->{column};
8196          $self->{column}++;
8197          $self->{nc}
8198              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8199        } else {
8200          $self->{set_nc}->($self);
8201        }
8202      
8203            redo A;
8204          } elsif ($self->{nc} == 0x003E) { # >
8205            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8206            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8207            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8208            
8209        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8210          $self->{line_prev} = $self->{line};
8211          $self->{column_prev} = $self->{column};
8212          $self->{column}++;
8213          $self->{nc}
8214              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8215        } else {
8216          $self->{set_nc}->($self);
8217        }
8218      
8219            return  ($self->{ct}); # ELEMENT
8220            redo A;
8221          } elsif ($self->{nc} == -1) {
8222            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8223            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8224            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8225            
8226        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8227          $self->{line_prev} = $self->{line};
8228          $self->{column_prev} = $self->{column};
8229          $self->{column}++;
8230          $self->{nc}
8231              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8232        } else {
8233          $self->{set_nc}->($self);
8234        }
8235      
8236            return  ($self->{ct}); # ELEMENT
8237            redo A;
8238          } else {
8239            $self->{ct}->{content}->[-1] .= chr $self->{nc};
8240            ## Stay in the state.
8241            
8242        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8243          $self->{line_prev} = $self->{line};
8244          $self->{column_prev} = $self->{column};
8245          $self->{column}++;
8246          $self->{nc}
8247              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8248        } else {
8249          $self->{set_nc}->($self);
8250        }
8251      
8252            redo A;
8253          }
8254        } elsif ($self->{state} == AFTER_CM_ELEMENT_NAME_STATE) {
8255          if ($is_space->{$self->{nc}}) {
8256            ## Stay in the state.
8257            
8258        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8259          $self->{line_prev} = $self->{line};
8260          $self->{column_prev} = $self->{column};
8261          $self->{column}++;
8262          $self->{nc}
8263              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8264        } else {
8265          $self->{set_nc}->($self);
8266        }
8267      
8268            redo A;
8269          } elsif ($self->{nc} == 0x007C or # |
8270                   $self->{nc} == 0x002C) { # ,
8271            push @{$self->{ct}->{content}}, $self->{nc} == 0x007C ? ' | ' : ', ';
8272            $self->{state} = AFTER_CM_GROUP_OPEN_STATE;
8273            
8274        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8275          $self->{line_prev} = $self->{line};
8276          $self->{column_prev} = $self->{column};
8277          $self->{column}++;
8278          $self->{nc}
8279              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8280        } else {
8281          $self->{set_nc}->($self);
8282        }
8283      
8284            redo A;
8285          } elsif ($self->{nc} == 0x0029) { # )
8286            $self->{group_depth}--;
8287            push @{$self->{ct}->{content}}, chr $self->{nc};
8288            $self->{state} = AFTER_CM_GROUP_CLOSE_STATE;
8289            
8290        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8291          $self->{line_prev} = $self->{line};
8292          $self->{column_prev} = $self->{column};
8293          $self->{column}++;
8294          $self->{nc}
8295              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8296        } else {
8297          $self->{set_nc}->($self);
8298        }
8299      
8300            redo A;
8301          } elsif ($self->{nc} == 0x003E) { # >
8302            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8303            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8304            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8305            
8306        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8307          $self->{line_prev} = $self->{line};
8308          $self->{column_prev} = $self->{column};
8309          $self->{column}++;
8310          $self->{nc}
8311              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8312        } else {
8313          $self->{set_nc}->($self);
8314        }
8315      
8316            return  ($self->{ct}); # ELEMENT
8317            redo A;
8318          } elsif ($self->{nc} == -1) {
8319            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8320            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8321            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8322            
8323        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8324          $self->{line_prev} = $self->{line};
8325          $self->{column_prev} = $self->{column};
8326          $self->{column}++;
8327          $self->{nc}
8328              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8329        } else {
8330          $self->{set_nc}->($self);
8331        }
8332      
8333            return  ($self->{ct}); # ELEMENT
8334            redo A;
8335          } else {
8336            $self->{parse_error}->(level => $self->{level}->{must}, type => 'after element name'); ## TODO: type
8337            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8338            $self->{state} = BOGUS_MD_STATE;
8339            
8340        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8341          $self->{line_prev} = $self->{line};
8342          $self->{column_prev} = $self->{column};
8343          $self->{column}++;
8344          $self->{nc}
8345              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8346        } else {
8347          $self->{set_nc}->($self);
8348        }
8349      
8350            redo A;
8351          }
8352        } elsif ($self->{state} == AFTER_CM_GROUP_CLOSE_STATE) {
8353          if ($is_space->{$self->{nc}}) {
8354            if ($self->{group_depth}) {
8355              $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8356            } else {
8357              $self->{state} = AFTER_MD_DEF_STATE;
8358            }
8359            
8360        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8361          $self->{line_prev} = $self->{line};
8362          $self->{column_prev} = $self->{column};
8363          $self->{column}++;
8364          $self->{nc}
8365              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8366        } else {
8367          $self->{set_nc}->($self);
8368        }
8369      
8370            redo A;
8371          } elsif ($self->{nc} == 0x002A or # *
8372                   $self->{nc} == 0x002B or # +
8373                   $self->{nc} == 0x003F) { # ?
8374            push @{$self->{ct}->{content}}, chr $self->{nc};
8375            if ($self->{group_depth}) {
8376              $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8377            } else {
8378              $self->{state} = AFTER_MD_DEF_STATE;
8379            }
8380            
8381        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8382          $self->{line_prev} = $self->{line};
8383          $self->{column_prev} = $self->{column};
8384          $self->{column}++;
8385          $self->{nc}
8386              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8387        } else {
8388          $self->{set_nc}->($self);
8389        }
8390      
8391            redo A;
8392          } elsif ($self->{nc} == 0x0029) { # )
8393            if ($self->{group_depth}) {
8394              $self->{group_depth}--;
8395              push @{$self->{ct}->{content}}, chr $self->{nc};
8396              ## Stay in the state.
8397              
8398        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8399          $self->{line_prev} = $self->{line};
8400          $self->{column_prev} = $self->{column};
8401          $self->{column}++;
8402          $self->{nc}
8403              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8404        } else {
8405          $self->{set_nc}->($self);
8406        }
8407      
8408              redo A;
8409            } else {
8410              $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after md def'); ## TODO: type
8411              $self->{state} = BOGUS_MD_STATE;
8412              ## Reconsume.
8413              redo A;
8414            }
8415          } elsif ($self->{nc} == 0x003E) { # >
8416            if ($self->{group_depth}) {
8417              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8418              push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8419            }
8420            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8421            
8422        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8423          $self->{line_prev} = $self->{line};
8424          $self->{column_prev} = $self->{column};
8425          $self->{column}++;
8426          $self->{nc}
8427              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8428        } else {
8429          $self->{set_nc}->($self);
8430        }
8431      
8432            return  ($self->{ct}); # ELEMENT
8433            redo A;
8434          } elsif ($self->{nc} == -1) {
8435            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8436            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8437            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8438            
8439        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8440          $self->{line_prev} = $self->{line};
8441          $self->{column_prev} = $self->{column};
8442          $self->{column}++;
8443          $self->{nc}
8444              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8445        } else {
8446          $self->{set_nc}->($self);
8447        }
8448      
8449            return  ($self->{ct}); # ELEMENT
8450            redo A;
8451          } else {
8452            if ($self->{group_depth}) {
8453              $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8454            } else {
8455              $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after md def'); ## TODO: type
8456              $self->{state} = BOGUS_MD_STATE;
8457            }
8458            ## Reconsume.
8459            redo A;
8460          }
8461        } elsif ($self->{state} == AFTER_MD_DEF_STATE) {
8462        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
8463          ## Stay in the state.          ## Stay in the state.
8464                    
# Line 7717  sub _get_next_token ($) { Line 8486  sub _get_next_token ($) {
8486        $self->{set_nc}->($self);        $self->{set_nc}->($self);
8487      }      }
8488        
8489          return  ($self->{ct}); # ENTITY          return  ($self->{ct}); # ENTITY/ELEMENT
8490          redo A;          redo A;
8491        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
8492          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
# Line 7733  sub _get_next_token ($) { Line 8502  sub _get_next_token ($) {
8502        $self->{set_nc}->($self);        $self->{set_nc}->($self);
8503      }      }
8504        
8505          return  ($self->{ct}); # ENTITY          return  ($self->{ct}); # ENTITY/ELEMENT
8506          redo A;          redo A;
8507        } else {        } else {
8508          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after notation name'); ## TODO: type          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after md def'); ## TODO: type
8509          $self->{state} = BOGUS_MD_STATE;          $self->{state} = BOGUS_MD_STATE;
8510          ## Reconsume.          ## Reconsume.
8511          redo A;          redo A;
8512        }        }
   
   
8513      } elsif ($self->{state} == BOGUS_MD_STATE) {      } elsif ($self->{state} == BOGUS_MD_STATE) {
8514        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
8515          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;

Legend:
Removed from v.1.18  
changed lines
  Added in v.1.20

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24