/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.19 by wakaba, Sun Oct 19 07:19:00 2008 UTC revision 1.28 by wakaba, Sun Jul 5 04:38:45 2009 UTC
# Line 182  sub NDATA_STATE () { 86 } Line 182  sub NDATA_STATE () { 86 }
182  sub AFTER_NDATA_STATE () { 87 }  sub AFTER_NDATA_STATE () { 87 }
183  sub BEFORE_NOTATION_NAME_STATE () { 88 }  sub BEFORE_NOTATION_NAME_STATE () { 88 }
184  sub NOTATION_NAME_STATE () { 89 }  sub NOTATION_NAME_STATE () { 89 }
185  sub AFTER_NOTATION_NAME_STATE () { 90 }  sub DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE () { 90 }
186  sub DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE () { 91 }  sub DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE () { 91 }
187  sub DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE () { 92 }  sub ENTITY_VALUE_ENTITY_STATE () { 92 }
188  sub ENTITY_VALUE_ENTITY_STATE () { 93 }  sub AFTER_ELEMENT_NAME_STATE () { 93 }
189  sub BOGUS_MD_STATE () { 94 }  sub BEFORE_ELEMENT_CONTENT_STATE () { 94 }
190    sub CONTENT_KEYWORD_STATE () { 95 }
191    sub AFTER_CM_GROUP_OPEN_STATE () { 96 }
192    sub CM_ELEMENT_NAME_STATE () { 97 }
193    sub AFTER_CM_ELEMENT_NAME_STATE () { 98 }
194    sub AFTER_CM_GROUP_CLOSE_STATE () { 99 }
195    sub AFTER_MD_DEF_STATE () { 100 }
196    sub BOGUS_MD_STATE () { 101 }
197    
198  ## Tree constructor state constants (see Whatpm::HTML for the full  ## Tree constructor state constants (see Whatpm::HTML for the full
199  ## list and descriptions)  ## list and descriptions)
# Line 1733  sub _get_next_token ($) { Line 1740  sub _get_next_token ($) {
1740    
1741          redo A;          redo A;
1742        } else {        } else {
1743          if ($self->{nc} == 0x003D) { # =          if ($self->{nc} == 0x003D or $self->{nc} == 0x003C) { # =, <
1744                        
1745            ## XML5: Not a parse error.            ## XML5: Not a parse error.
1746            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');
# Line 1809  sub _get_next_token ($) { Line 1816  sub _get_next_token ($) {
1816      }      }
1817        
1818          redo A;          redo A;
1819          } elsif ($self->{is_xml} and
1820                   $is_space->{$self->{nc}}) {
1821            
1822            $self->{ca}->{value} .= ' ';
1823            ## Stay in the state.
1824            
1825        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1826          $self->{line_prev} = $self->{line};
1827          $self->{column_prev} = $self->{column};
1828          $self->{column}++;
1829          $self->{nc}
1830              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
1831        } else {
1832          $self->{set_nc}->($self);
1833        }
1834      
1835            redo A;
1836        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
1837          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed attribute value');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed attribute value');
1838          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
# Line 1856  sub _get_next_token ($) { Line 1880  sub _get_next_token ($) {
1880          }          }
1881          $self->{ca}->{value} .= chr ($self->{nc});          $self->{ca}->{value} .= chr ($self->{nc});
1882          $self->{read_until}->($self->{ca}->{value},          $self->{read_until}->($self->{ca}->{value},
1883                                q["&<],                                qq["&<\x09\x0C\x20],
1884                                length $self->{ca}->{value});                                length $self->{ca}->{value});
1885    
1886          ## Stay in the state          ## Stay in the state
# Line 1923  sub _get_next_token ($) { Line 1947  sub _get_next_token ($) {
1947      }      }
1948        
1949          redo A;          redo A;
1950          } elsif ($self->{is_xml} and
1951                   $is_space->{$self->{nc}}) {
1952            
1953            $self->{ca}->{value} .= ' ';
1954            ## Stay in the state.
1955            
1956        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1957          $self->{line_prev} = $self->{line};
1958          $self->{column_prev} = $self->{column};
1959          $self->{column}++;
1960          $self->{nc}
1961              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
1962        } else {
1963          $self->{set_nc}->($self);
1964        }
1965      
1966            redo A;
1967        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
1968          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed attribute value');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed attribute value');
1969          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
# Line 1970  sub _get_next_token ($) { Line 2011  sub _get_next_token ($) {
2011          }          }
2012          $self->{ca}->{value} .= chr ($self->{nc});          $self->{ca}->{value} .= chr ($self->{nc});
2013          $self->{read_until}->($self->{ca}->{value},          $self->{read_until}->($self->{ca}->{value},
2014                                q['&<],                                qq['&<\x09\x0C\x20],
2015                                length $self->{ca}->{value});                                length $self->{ca}->{value});
2016    
2017          ## Stay in the state          ## Stay in the state
# Line 2142  sub _get_next_token ($) { Line 2183  sub _get_next_token ($) {
2183               0x0022 => 1, # "               0x0022 => 1, # "
2184               0x0027 => 1, # '               0x0027 => 1, # '
2185               0x003D => 1, # =               0x003D => 1, # =
2186                 0x003C => 1, # <
2187              }->{$self->{nc}}) {              }->{$self->{nc}}) {
2188                        
2189            ## XML5: Not a parse error.            ## XML5: Not a parse error.
# Line 2151  sub _get_next_token ($) { Line 2193  sub _get_next_token ($) {
2193          }          }
2194          $self->{ca}->{value} .= chr ($self->{nc});          $self->{ca}->{value} .= chr ($self->{nc});
2195          $self->{read_until}->($self->{ca}->{value},          $self->{read_until}->($self->{ca}->{value},
2196                                q["'=& >],                                qq["'=& \x09\x0C>],
2197                                length $self->{ca}->{value});                                length $self->{ca}->{value});
2198    
2199          ## Stay in the state          ## Stay in the state
# Line 2957  sub _get_next_token ($) { Line 2999  sub _get_next_token ($) {
2999          redo A;          redo A;
3000        } else {        } else {
3001                    
         ## XML5: Not a parse error.  
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',  
                         line => $self->{line_prev},  
                         column => $self->{column_prev});  
3002          $self->{ct}->{data} .= '--' . chr ($self->{nc}); # comment          $self->{ct}->{data} .= '--' . chr ($self->{nc}); # comment
3003          $self->{state} = COMMENT_STATE;          $self->{state} = COMMENT_STATE;
3004                    
# Line 2992  sub _get_next_token ($) { Line 3030  sub _get_next_token ($) {
3030      }      }
3031        
3032          redo A;          redo A;
3033          } elsif ($self->{nc} == -1) {
3034            
3035            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3036            $self->{ct}->{quirks} = 1;
3037    
3038            $self->{state} = DATA_STATE;
3039            ## Reconsume.
3040            return  ($self->{ct}); # DOCTYPE (quirks)
3041    
3042            redo A;
3043        } else {        } else {
3044                    
3045          ## XML5: Unless EOF, swith to the bogus comment state.          ## XML5: Swith to the bogus comment state.
3046          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before DOCTYPE name');
3047          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
3048          ## reconsume          ## reconsume
# Line 4621  sub _get_next_token ($) { Line 4669  sub _get_next_token ($) {
4669              0x003C => 1, 0x0026 => 1, -1 => 1, # <, &              0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
4670              $self->{entity_add} => 1,              $self->{entity_add} => 1,
4671            }->{$self->{nc}}) {            }->{$self->{nc}}) {
4672                    if ($self->{is_xml}) {
4673              
4674              $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero',
4675                              line => $self->{line_prev},
4676                              column => $self->{column_prev}
4677                                  + ($self->{nc} == -1 ? 1 : 0));
4678            } else {
4679              
4680              ## No error
4681            }
4682          ## Don't consume          ## Don't consume
         ## No error  
4683          ## Return nothing.          ## Return nothing.
4684          #          #
4685        } elsif ($self->{nc} == 0x0023) { # #        } elsif ($self->{nc} == 0x0023) { # #
# Line 4642  sub _get_next_token ($) { Line 4698  sub _get_next_token ($) {
4698      }      }
4699        
4700          redo A;          redo A;
4701        } elsif ((0x0041 <= $self->{nc} and        } elsif ($self->{is_xml} or
4702                   (0x0041 <= $self->{nc} and
4703                  $self->{nc} <= 0x005A) or # A..Z                  $self->{nc} <= 0x005A) or # A..Z
4704                 (0x0061 <= $self->{nc} and                 (0x0061 <= $self->{nc} and
4705                  $self->{nc} <= 0x007A)) { # a..z                  $self->{nc} <= 0x007A)) { # a..z
# Line 4696  sub _get_next_token ($) { Line 4753  sub _get_next_token ($) {
4753          redo A;          redo A;
4754        }        }
4755      } elsif ($self->{state} == ENTITY_HASH_STATE) {      } elsif ($self->{state} == ENTITY_HASH_STATE) {
4756        if ($self->{nc} == 0x0078 or # x        if ($self->{nc} == 0x0078) { # x
4757            $self->{nc} == 0x0058) { # X          
4758            $self->{state} = HEXREF_X_STATE;
4759            $self->{kwd} .= chr $self->{nc};
4760            
4761        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4762          $self->{line_prev} = $self->{line};
4763          $self->{column_prev} = $self->{column};
4764          $self->{column}++;
4765          $self->{nc}
4766              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4767        } else {
4768          $self->{set_nc}->($self);
4769        }
4770      
4771            redo A;
4772          } elsif ($self->{nc} == 0x0058) { # X
4773                    
4774            if ($self->{is_xml}) {
4775              $self->{parse_error}->(level => $self->{level}->{must}, type => 'uppercase hcro'); ## TODO: type
4776            }
4777          $self->{state} = HEXREF_X_STATE;          $self->{state} = HEXREF_X_STATE;
4778          $self->{kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
4779                    
# Line 4803  sub _get_next_token ($) { Line 4878  sub _get_next_token ($) {
4878        my $code = $self->{kwd};        my $code = $self->{kwd};
4879        my $l = $self->{line_prev};        my $l = $self->{line_prev};
4880        my $c = $self->{column_prev};        my $c = $self->{column_prev};
4881        if ($charref_map->{$code}) {        if ((not $self->{is_xml} and $charref_map->{$code}) or
4882              ($self->{is_xml} and 0xD800 <= $code and $code <= 0xDFFF) or
4883              ($self->{is_xml} and $code == 0x0000)) {
4884                    
4885          $self->{parse_error}->(level => $self->{level}->{must}, type => 'invalid character reference',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'invalid character reference',
4886                          text => (sprintf 'U+%04X', $code),                          text => (sprintf 'U+%04X', $code),
# Line 4956  sub _get_next_token ($) { Line 5033  sub _get_next_token ($) {
5033        my $code = $self->{kwd};        my $code = $self->{kwd};
5034        my $l = $self->{line_prev};        my $l = $self->{line_prev};
5035        my $c = $self->{column_prev};        my $c = $self->{column_prev};
5036        if ($charref_map->{$code}) {        if ((not $self->{is_xml} and $charref_map->{$code}) or
5037              ($self->{is_xml} and 0xD800 <= $code and $code <= 0xDFFF) or
5038              ($self->{is_xml} and $code == 0x0000)) {
5039                    
5040          $self->{parse_error}->(level => $self->{level}->{must}, type => 'invalid character reference',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'invalid character reference',
5041                          text => (sprintf 'U+%04X', $code),                          text => (sprintf 'U+%04X', $code),
# Line 4990  sub _get_next_token ($) { Line 5069  sub _get_next_token ($) {
5069          redo A;          redo A;
5070        }        }
5071      } elsif ($self->{state} == ENTITY_NAME_STATE) {      } elsif ($self->{state} == ENTITY_NAME_STATE) {
5072        if (length $self->{kwd} < 30 and        if ((0x0041 <= $self->{nc} and # a
5073            ## NOTE: Some number greater than the maximum length of entity name             $self->{nc} <= 0x005A) or # x
5074            ((0x0041 <= $self->{nc} and # a            (0x0061 <= $self->{nc} and # a
5075              $self->{nc} <= 0x005A) or # x             $self->{nc} <= 0x007A) or # z
5076             (0x0061 <= $self->{nc} and # a            (0x0030 <= $self->{nc} and # 0
5077              $self->{nc} <= 0x007A) or # z             $self->{nc} <= 0x0039) or # 9
5078             (0x0030 <= $self->{nc} and # 0            $self->{nc} == 0x003B or # ;
5079              $self->{nc} <= 0x0039) or # 9            ($self->{is_xml} and
5080             $self->{nc} == 0x003B)) { # ;             not ($is_space->{$self->{nc}} or
5081                    {
5082                      0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
5083                      $self->{entity_add} => 1,
5084                    }->{$self->{nc}}))) {
5085          our $EntityChar;          our $EntityChar;
5086          $self->{kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
5087          if (defined $EntityChar->{$self->{kwd}}) {          if (defined $EntityChar->{$self->{kwd}} or
5088                $self->{ge}->{$self->{kwd}}) {
5089            if ($self->{nc} == 0x003B) { # ;            if ($self->{nc} == 0x003B) { # ;
5090                            if (defined $self->{ge}->{$self->{kwd}}) {
5091              $self->{entity__value} = $EntityChar->{$self->{kwd}};                if ($self->{ge}->{$self->{kwd}}->{only_text}) {
5092                    
5093                    $self->{entity__value} = $self->{ge}->{$self->{kwd}}->{value};
5094                  } else {
5095                    if (defined $self->{ge}->{$self->{kwd}}->{notation}) {
5096                      
5097                      $self->{parse_error}->(level => $self->{level}->{must}, type => 'unparsed entity', ## TODO: type
5098                                      value => $self->{kwd});
5099                    } else {
5100                      
5101                    }
5102                    $self->{entity__value} = '&' . $self->{kwd}; ## TODO: expand
5103                  }
5104                } else {
5105                  if ($self->{is_xml}) {
5106                    
5107                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'entity not declared', ## TODO: type
5108                                    value => $self->{kwd},
5109                                    level => {
5110                                              'amp;' => $self->{level}->{warn},
5111                                              'quot;' => $self->{level}->{warn},
5112                                              'lt;' => $self->{level}->{warn},
5113                                              'gt;' => $self->{level}->{warn},
5114                                              'apos;' => $self->{level}->{warn},
5115                                             }->{$self->{kwd}} ||
5116                                             $self->{level}->{must});
5117                  } else {
5118                    
5119                  }
5120                  $self->{entity__value} = $EntityChar->{$self->{kwd}};
5121                }
5122              $self->{entity__match} = 1;              $self->{entity__match} = 1;
5123                            
5124      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 5400  sub _get_next_token ($) { Line 5514  sub _get_next_token ($) {
5514          ## XML5: Not defined yet.          ## XML5: Not defined yet.
5515    
5516          ## TODO:          ## TODO:
5517    
5518            if (not $self->{stop_processing} and
5519                not $self->{document}->xml_standalone) {
5520              $self->{parse_error}->(level => $self->{level}->{must}, type => 'stop processing', ## TODO: type
5521                              level => $self->{level}->{info});
5522              $self->{stop_processing} = 1;
5523            }
5524    
5525                    
5526      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5527        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 5834  sub _get_next_token ($) { Line 5956  sub _get_next_token ($) {
5956          }          }
5957          $self->{ct} = {type => ELEMENT_TOKEN, name => '',          $self->{ct} = {type => ELEMENT_TOKEN, name => '',
5958                         line => $self->{line_prev},                         line => $self->{line_prev},
5959                         column => $self->{column_prev} - 6};                         column => $self->{column_prev} - 7};
5960          $self->{state} = DOCTYPE_MD_STATE;          $self->{state} = DOCTYPE_MD_STATE;
5961                    
5962      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 5902  sub _get_next_token ($) { Line 6024  sub _get_next_token ($) {
6024          $self->{ct} = {type => ATTLIST_TOKEN, name => '',          $self->{ct} = {type => ATTLIST_TOKEN, name => '',
6025                         attrdefs => [],                         attrdefs => [],
6026                         line => $self->{line_prev},                         line => $self->{line_prev},
6027                         column => $self->{column_prev} - 6};                         column => $self->{column_prev} - 7};
6028          $self->{state} = DOCTYPE_MD_STATE;          $self->{state} = DOCTYPE_MD_STATE;
6029                    
6030      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 5971  sub _get_next_token ($) { Line 6093  sub _get_next_token ($) {
6093          }          }
6094          $self->{ct} = {type => NOTATION_TOKEN, name => '',          $self->{ct} = {type => NOTATION_TOKEN, name => '',
6095                         line => $self->{line_prev},                         line => $self->{line_prev},
6096                         column => $self->{column_prev} - 6};                         column => $self->{column_prev} - 8};
6097          $self->{state} = DOCTYPE_MD_STATE;          $self->{state} = DOCTYPE_MD_STATE;
6098                    
6099      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 6183  sub _get_next_token ($) { Line 6305  sub _get_next_token ($) {
6305          if ($self->{ct}->{type} == ATTLIST_TOKEN) {          if ($self->{ct}->{type} == ATTLIST_TOKEN) {
6306            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
6307          } elsif ($self->{ct}->{type} == ELEMENT_TOKEN) {          } elsif ($self->{ct}->{type} == ELEMENT_TOKEN) {
6308            ## TODO: ...            $self->{state} = AFTER_ELEMENT_NAME_STATE;
           $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;  
6309          } else { # ENTITY/NOTATION          } else { # ENTITY/NOTATION
6310            $self->{state} = AFTER_DOCTYPE_NAME_STATE;            $self->{state} = AFTER_DOCTYPE_NAME_STATE;
6311          }          }
# Line 7667  sub _get_next_token ($) { Line 7788  sub _get_next_token ($) {
7788        }        }
7789      } elsif ($self->{state} == NOTATION_NAME_STATE) {      } elsif ($self->{state} == NOTATION_NAME_STATE) {
7790        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
7791          $self->{state} = AFTER_NOTATION_NAME_STATE;          $self->{state} = AFTER_MD_DEF_STATE;
7792                    
7793      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7794        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 7729  sub _get_next_token ($) { Line 7850  sub _get_next_token ($) {
7850        }        }
7851      } elsif ($self->{state} == DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE) {      } elsif ($self->{state} == DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE) {
7852        if ($self->{nc} == 0x0022) { # "        if ($self->{nc} == 0x0022) { # "
7853          $self->{state} = AFTER_NOTATION_NAME_STATE;          $self->{state} = AFTER_MD_DEF_STATE;
7854                    
7855      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7856        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 7782  sub _get_next_token ($) { Line 7903  sub _get_next_token ($) {
7903        }        }
7904      } elsif ($self->{state} == DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE) {      } elsif ($self->{state} == DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE) {
7905        if ($self->{nc} == 0x0027) { # '        if ($self->{nc} == 0x0027) { # '
7906          $self->{state} = AFTER_NOTATION_NAME_STATE;          $self->{state} = AFTER_MD_DEF_STATE;
7907                    
7908      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7909        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 7834  sub _get_next_token ($) { Line 7955  sub _get_next_token ($) {
7955          redo A;          redo A;
7956        }        }
7957      } elsif ($self->{state} == ENTITY_VALUE_ENTITY_STATE) {      } elsif ($self->{state} == ENTITY_VALUE_ENTITY_STATE) {
       ## TODO: XMLize  
   
7958        if ($is_space->{$self->{nc}} or        if ($is_space->{$self->{nc}} or
7959            {            {
7960              0x003C => 1, 0x0026 => 1, -1 => 1, # <, &              0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
7961              $self->{entity_add} => 1,              $self->{entity_add} => 1,
7962            }->{$self->{nc}}) {            }->{$self->{nc}}) {
7963            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero',
7964                            line => $self->{line_prev},
7965                            column => $self->{column_prev}
7966                                + ($self->{nc} == -1 ? 1 : 0));
7967          ## Don't consume          ## Don't consume
         ## No error  
7968          ## Return nothing.          ## Return nothing.
7969          #          #
7970        } elsif ($self->{nc} == 0x0023) { # #        } elsif ($self->{nc} == 0x0023) { # #
# Line 7861  sub _get_next_token ($) { Line 7983  sub _get_next_token ($) {
7983      }      }
7984        
7985          redo A;          redo A;
       } elsif ((0x0041 <= $self->{nc} and  
                 $self->{nc} <= 0x005A) or # A..Z  
                (0x0061 <= $self->{nc} and  
                 $self->{nc} <= 0x007A)) { # a..z  
         #  
7986        } else {        } else {
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero');  
         ## Return nothing.  
7987          #          #
7988        }        }
7989    
# Line 7876  sub _get_next_token ($) { Line 7991  sub _get_next_token ($) {
7991        $self->{state} = $self->{prev_state};        $self->{state} = $self->{prev_state};
7992        ## Reconsume.        ## Reconsume.
7993        redo A;        redo A;
7994      } elsif ($self->{state} == AFTER_NOTATION_NAME_STATE) {      } elsif ($self->{state} == AFTER_ELEMENT_NAME_STATE) {
7995        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
7996            $self->{state} = BEFORE_ELEMENT_CONTENT_STATE;
7997            
7998        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7999          $self->{line_prev} = $self->{line};
8000          $self->{column_prev} = $self->{column};
8001          $self->{column}++;
8002          $self->{nc}
8003              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8004        } else {
8005          $self->{set_nc}->($self);
8006        }
8007      
8008            redo A;
8009          } elsif ($self->{nc} == 0x0028) { # (
8010            $self->{state} = AFTER_CM_GROUP_OPEN_STATE;
8011            $self->{ct}->{content} = ['('];
8012            $self->{group_depth} = 1;
8013            
8014        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8015          $self->{line_prev} = $self->{line};
8016          $self->{column_prev} = $self->{column};
8017          $self->{column}++;
8018          $self->{nc}
8019              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8020        } else {
8021          $self->{set_nc}->($self);
8022        }
8023      
8024            redo A;
8025          } elsif ($self->{nc} == 0x003E) { # >
8026            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md def'); ## TODO: type
8027            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8028            
8029        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8030          $self->{line_prev} = $self->{line};
8031          $self->{column_prev} = $self->{column};
8032          $self->{column}++;
8033          $self->{nc}
8034              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8035        } else {
8036          $self->{set_nc}->($self);
8037        }
8038      
8039            return  ($self->{ct}); # ELEMENT
8040            redo A;
8041          } elsif ($self->{nc} == -1) {
8042            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8043            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8044            
8045        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8046          $self->{line_prev} = $self->{line};
8047          $self->{column_prev} = $self->{column};
8048          $self->{column}++;
8049          $self->{nc}
8050              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8051        } else {
8052          $self->{set_nc}->($self);
8053        }
8054      
8055            return  ($self->{ct}); # ELEMENT
8056            redo A;
8057          } else {
8058            $self->{ct}->{content} = [chr $self->{nc}];
8059            $self->{state} = CONTENT_KEYWORD_STATE;
8060            
8061        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8062          $self->{line_prev} = $self->{line};
8063          $self->{column_prev} = $self->{column};
8064          $self->{column}++;
8065          $self->{nc}
8066              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8067        } else {
8068          $self->{set_nc}->($self);
8069        }
8070      
8071            redo A;
8072          }
8073        } elsif ($self->{state} == CONTENT_KEYWORD_STATE) {
8074          if ($is_space->{$self->{nc}}) {
8075            $self->{state} = AFTER_MD_DEF_STATE;
8076            
8077        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8078          $self->{line_prev} = $self->{line};
8079          $self->{column_prev} = $self->{column};
8080          $self->{column}++;
8081          $self->{nc}
8082              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8083        } else {
8084          $self->{set_nc}->($self);
8085        }
8086      
8087            redo A;
8088          } elsif ($self->{nc} == 0x003E) { # >
8089            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8090            
8091        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8092          $self->{line_prev} = $self->{line};
8093          $self->{column_prev} = $self->{column};
8094          $self->{column}++;
8095          $self->{nc}
8096              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8097        } else {
8098          $self->{set_nc}->($self);
8099        }
8100      
8101            return  ($self->{ct}); # ELEMENT
8102            redo A;
8103          } elsif ($self->{nc} == -1) {
8104            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8105            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8106            
8107        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8108          $self->{line_prev} = $self->{line};
8109          $self->{column_prev} = $self->{column};
8110          $self->{column}++;
8111          $self->{nc}
8112              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8113        } else {
8114          $self->{set_nc}->($self);
8115        }
8116      
8117            return  ($self->{ct}); # ELEMENT
8118            redo A;
8119          } else {
8120            $self->{ct}->{content}->[-1] .= chr $self->{nc}; # ELEMENT
8121            ## Stay in the state.
8122            
8123        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8124          $self->{line_prev} = $self->{line};
8125          $self->{column_prev} = $self->{column};
8126          $self->{column}++;
8127          $self->{nc}
8128              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8129        } else {
8130          $self->{set_nc}->($self);
8131        }
8132      
8133            redo A;
8134          }
8135        } elsif ($self->{state} == AFTER_CM_GROUP_OPEN_STATE) {
8136          if ($is_space->{$self->{nc}}) {
8137            ## Stay in the state.
8138            
8139        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8140          $self->{line_prev} = $self->{line};
8141          $self->{column_prev} = $self->{column};
8142          $self->{column}++;
8143          $self->{nc}
8144              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8145        } else {
8146          $self->{set_nc}->($self);
8147        }
8148      
8149            redo A;
8150          } elsif ($self->{nc} == 0x0028) { # (
8151            $self->{group_depth}++;
8152            push @{$self->{ct}->{content}}, chr $self->{nc};
8153            ## Stay in the state.
8154            
8155        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8156          $self->{line_prev} = $self->{line};
8157          $self->{column_prev} = $self->{column};
8158          $self->{column}++;
8159          $self->{nc}
8160              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8161        } else {
8162          $self->{set_nc}->($self);
8163        }
8164      
8165            redo A;
8166          } elsif ($self->{nc} == 0x007C or # |
8167                   $self->{nc} == 0x002C) { # ,
8168            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty element name'); ## TODO: type
8169          ## Stay in the state.          ## Stay in the state.
8170                    
8171      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 7891  sub _get_next_token ($) { Line 8179  sub _get_next_token ($) {
8179      }      }
8180        
8181          redo A;          redo A;
8182          } elsif ($self->{nc} == 0x0029) { # )
8183            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty element name'); ## TODO: type
8184            push @{$self->{ct}->{content}}, chr $self->{nc};
8185            $self->{group_depth}--;
8186            $self->{state} = AFTER_CM_GROUP_CLOSE_STATE;
8187            
8188        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8189          $self->{line_prev} = $self->{line};
8190          $self->{column_prev} = $self->{column};
8191          $self->{column}++;
8192          $self->{nc}
8193              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8194        } else {
8195          $self->{set_nc}->($self);
8196        }
8197      
8198            redo A;
8199        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
8200            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8201            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8202          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8203                    
8204      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 7904  sub _get_next_token ($) { Line 8211  sub _get_next_token ($) {
8211        $self->{set_nc}->($self);        $self->{set_nc}->($self);
8212      }      }
8213        
8214          return  ($self->{ct}); # ENTITY          return  ($self->{ct}); # ELEMENT
8215          redo A;          redo A;
8216        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
8217          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8218            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8219          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8220                    
8221      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 7920  sub _get_next_token ($) { Line 8228  sub _get_next_token ($) {
8228        $self->{set_nc}->($self);        $self->{set_nc}->($self);
8229      }      }
8230        
8231          return  ($self->{ct}); # ENTITY          return  ($self->{ct}); # ELEMENT
8232            redo A;
8233          } else {
8234            push @{$self->{ct}->{content}}, chr $self->{nc};
8235            $self->{state} = CM_ELEMENT_NAME_STATE;
8236            
8237        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8238          $self->{line_prev} = $self->{line};
8239          $self->{column_prev} = $self->{column};
8240          $self->{column}++;
8241          $self->{nc}
8242              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8243        } else {
8244          $self->{set_nc}->($self);
8245        }
8246      
8247            redo A;
8248          }
8249        } elsif ($self->{state} == CM_ELEMENT_NAME_STATE) {
8250          if ($is_space->{$self->{nc}}) {
8251            $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8252            
8253        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8254          $self->{line_prev} = $self->{line};
8255          $self->{column_prev} = $self->{column};
8256          $self->{column}++;
8257          $self->{nc}
8258              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8259        } else {
8260          $self->{set_nc}->($self);
8261        }
8262      
8263            redo A;
8264          } elsif ($self->{nc} == 0x002A or # *
8265                   $self->{nc} == 0x002B or # +
8266                   $self->{nc} == 0x003F) { # ?
8267            push @{$self->{ct}->{content}}, chr $self->{nc};
8268            $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8269            
8270        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8271          $self->{line_prev} = $self->{line};
8272          $self->{column_prev} = $self->{column};
8273          $self->{column}++;
8274          $self->{nc}
8275              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8276        } else {
8277          $self->{set_nc}->($self);
8278        }
8279      
8280            redo A;
8281          } elsif ($self->{nc} == 0x007C or # |
8282                   $self->{nc} == 0x002C) { # ,
8283            push @{$self->{ct}->{content}}, $self->{nc} == 0x007C ? ' | ' : ', ';
8284            $self->{state} = AFTER_CM_GROUP_OPEN_STATE;
8285            
8286        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8287          $self->{line_prev} = $self->{line};
8288          $self->{column_prev} = $self->{column};
8289          $self->{column}++;
8290          $self->{nc}
8291              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8292        } else {
8293          $self->{set_nc}->($self);
8294        }
8295      
8296            redo A;
8297          } elsif ($self->{nc} == 0x0029) { # )
8298            $self->{group_depth}--;
8299            push @{$self->{ct}->{content}}, chr $self->{nc};
8300            $self->{state} = AFTER_CM_GROUP_CLOSE_STATE;
8301            
8302        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8303          $self->{line_prev} = $self->{line};
8304          $self->{column_prev} = $self->{column};
8305          $self->{column}++;
8306          $self->{nc}
8307              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8308        } else {
8309          $self->{set_nc}->($self);
8310        }
8311      
8312            redo A;
8313          } elsif ($self->{nc} == 0x003E) { # >
8314            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8315            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8316            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8317            
8318        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8319          $self->{line_prev} = $self->{line};
8320          $self->{column_prev} = $self->{column};
8321          $self->{column}++;
8322          $self->{nc}
8323              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8324        } else {
8325          $self->{set_nc}->($self);
8326        }
8327      
8328            return  ($self->{ct}); # ELEMENT
8329            redo A;
8330          } elsif ($self->{nc} == -1) {
8331            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8332            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8333            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8334            
8335        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8336          $self->{line_prev} = $self->{line};
8337          $self->{column_prev} = $self->{column};
8338          $self->{column}++;
8339          $self->{nc}
8340              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8341        } else {
8342          $self->{set_nc}->($self);
8343        }
8344      
8345            return  ($self->{ct}); # ELEMENT
8346            redo A;
8347          } else {
8348            $self->{ct}->{content}->[-1] .= chr $self->{nc};
8349            ## Stay in the state.
8350            
8351        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8352          $self->{line_prev} = $self->{line};
8353          $self->{column_prev} = $self->{column};
8354          $self->{column}++;
8355          $self->{nc}
8356              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8357        } else {
8358          $self->{set_nc}->($self);
8359        }
8360      
8361            redo A;
8362          }
8363        } elsif ($self->{state} == AFTER_CM_ELEMENT_NAME_STATE) {
8364          if ($is_space->{$self->{nc}}) {
8365            ## Stay in the state.
8366            
8367        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8368          $self->{line_prev} = $self->{line};
8369          $self->{column_prev} = $self->{column};
8370          $self->{column}++;
8371          $self->{nc}
8372              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8373        } else {
8374          $self->{set_nc}->($self);
8375        }
8376      
8377            redo A;
8378          } elsif ($self->{nc} == 0x007C or # |
8379                   $self->{nc} == 0x002C) { # ,
8380            push @{$self->{ct}->{content}}, $self->{nc} == 0x007C ? ' | ' : ', ';
8381            $self->{state} = AFTER_CM_GROUP_OPEN_STATE;
8382            
8383        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8384          $self->{line_prev} = $self->{line};
8385          $self->{column_prev} = $self->{column};
8386          $self->{column}++;
8387          $self->{nc}
8388              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8389        } else {
8390          $self->{set_nc}->($self);
8391        }
8392      
8393            redo A;
8394          } elsif ($self->{nc} == 0x0029) { # )
8395            $self->{group_depth}--;
8396            push @{$self->{ct}->{content}}, chr $self->{nc};
8397            $self->{state} = AFTER_CM_GROUP_CLOSE_STATE;
8398            
8399        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8400          $self->{line_prev} = $self->{line};
8401          $self->{column_prev} = $self->{column};
8402          $self->{column}++;
8403          $self->{nc}
8404              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8405        } else {
8406          $self->{set_nc}->($self);
8407        }
8408      
8409            redo A;
8410          } elsif ($self->{nc} == 0x003E) { # >
8411            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8412            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8413            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8414            
8415        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8416          $self->{line_prev} = $self->{line};
8417          $self->{column_prev} = $self->{column};
8418          $self->{column}++;
8419          $self->{nc}
8420              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8421        } else {
8422          $self->{set_nc}->($self);
8423        }
8424      
8425            return  ($self->{ct}); # ELEMENT
8426            redo A;
8427          } elsif ($self->{nc} == -1) {
8428            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8429            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8430            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8431            
8432        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8433          $self->{line_prev} = $self->{line};
8434          $self->{column_prev} = $self->{column};
8435          $self->{column}++;
8436          $self->{nc}
8437              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8438        } else {
8439          $self->{set_nc}->($self);
8440        }
8441      
8442            return  ($self->{ct}); # ELEMENT
8443            redo A;
8444          } else {
8445            $self->{parse_error}->(level => $self->{level}->{must}, type => 'after element name'); ## TODO: type
8446            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8447            $self->{state} = BOGUS_MD_STATE;
8448            
8449        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8450          $self->{line_prev} = $self->{line};
8451          $self->{column_prev} = $self->{column};
8452          $self->{column}++;
8453          $self->{nc}
8454              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8455        } else {
8456          $self->{set_nc}->($self);
8457        }
8458      
8459            redo A;
8460          }
8461        } elsif ($self->{state} == AFTER_CM_GROUP_CLOSE_STATE) {
8462          if ($is_space->{$self->{nc}}) {
8463            if ($self->{group_depth}) {
8464              $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8465            } else {
8466              $self->{state} = AFTER_MD_DEF_STATE;
8467            }
8468            
8469        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8470          $self->{line_prev} = $self->{line};
8471          $self->{column_prev} = $self->{column};
8472          $self->{column}++;
8473          $self->{nc}
8474              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8475        } else {
8476          $self->{set_nc}->($self);
8477        }
8478      
8479            redo A;
8480          } elsif ($self->{nc} == 0x002A or # *
8481                   $self->{nc} == 0x002B or # +
8482                   $self->{nc} == 0x003F) { # ?
8483            push @{$self->{ct}->{content}}, chr $self->{nc};
8484            if ($self->{group_depth}) {
8485              $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8486            } else {
8487              $self->{state} = AFTER_MD_DEF_STATE;
8488            }
8489            
8490        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8491          $self->{line_prev} = $self->{line};
8492          $self->{column_prev} = $self->{column};
8493          $self->{column}++;
8494          $self->{nc}
8495              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8496        } else {
8497          $self->{set_nc}->($self);
8498        }
8499      
8500            redo A;
8501          } elsif ($self->{nc} == 0x0029) { # )
8502            if ($self->{group_depth}) {
8503              $self->{group_depth}--;
8504              push @{$self->{ct}->{content}}, chr $self->{nc};
8505              ## Stay in the state.
8506              
8507        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8508          $self->{line_prev} = $self->{line};
8509          $self->{column_prev} = $self->{column};
8510          $self->{column}++;
8511          $self->{nc}
8512              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8513        } else {
8514          $self->{set_nc}->($self);
8515        }
8516      
8517              redo A;
8518            } else {
8519              $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after md def'); ## TODO: type
8520              $self->{state} = BOGUS_MD_STATE;
8521              ## Reconsume.
8522              redo A;
8523            }
8524          } elsif ($self->{nc} == 0x003E) { # >
8525            if ($self->{group_depth}) {
8526              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8527              push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8528            }
8529            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8530            
8531        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8532          $self->{line_prev} = $self->{line};
8533          $self->{column_prev} = $self->{column};
8534          $self->{column}++;
8535          $self->{nc}
8536              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8537        } else {
8538          $self->{set_nc}->($self);
8539        }
8540      
8541            return  ($self->{ct}); # ELEMENT
8542            redo A;
8543          } elsif ($self->{nc} == -1) {
8544            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8545            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8546            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8547            
8548        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8549          $self->{line_prev} = $self->{line};
8550          $self->{column_prev} = $self->{column};
8551          $self->{column}++;
8552          $self->{nc}
8553              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8554        } else {
8555          $self->{set_nc}->($self);
8556        }
8557      
8558            return  ($self->{ct}); # ELEMENT
8559            redo A;
8560          } else {
8561            if ($self->{group_depth}) {
8562              $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8563            } else {
8564              $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after md def'); ## TODO: type
8565              $self->{state} = BOGUS_MD_STATE;
8566            }
8567            ## Reconsume.
8568            redo A;
8569          }
8570        } elsif ($self->{state} == AFTER_MD_DEF_STATE) {
8571          if ($is_space->{$self->{nc}}) {
8572            ## Stay in the state.
8573            
8574        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8575          $self->{line_prev} = $self->{line};
8576          $self->{column_prev} = $self->{column};
8577          $self->{column}++;
8578          $self->{nc}
8579              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8580        } else {
8581          $self->{set_nc}->($self);
8582        }
8583      
8584            redo A;
8585          } elsif ($self->{nc} == 0x003E) { # >
8586            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8587            
8588        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8589          $self->{line_prev} = $self->{line};
8590          $self->{column_prev} = $self->{column};
8591          $self->{column}++;
8592          $self->{nc}
8593              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8594        } else {
8595          $self->{set_nc}->($self);
8596        }
8597      
8598            return  ($self->{ct}); # ENTITY/ELEMENT
8599            redo A;
8600          } elsif ($self->{nc} == -1) {
8601            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8602            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8603            
8604        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8605          $self->{line_prev} = $self->{line};
8606          $self->{column_prev} = $self->{column};
8607          $self->{column}++;
8608          $self->{nc}
8609              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8610        } else {
8611          $self->{set_nc}->($self);
8612        }
8613      
8614            return  ($self->{ct}); # ENTITY/ELEMENT
8615          redo A;          redo A;
8616        } else {        } else {
8617          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after notation name'); ## TODO: type          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after md def'); ## TODO: type
8618          $self->{state} = BOGUS_MD_STATE;          $self->{state} = BOGUS_MD_STATE;
8619          ## Reconsume.          ## Reconsume.
8620          redo A;          redo A;

Legend:
Removed from v.1.19  
changed lines
  Added in v.1.28

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24