/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm.src
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm.src

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.18 by wakaba, Sun Oct 19 06:14:57 2008 UTC revision 1.21 by wakaba, Sun Oct 19 09:25:21 2008 UTC
# Line 182  sub NDATA_STATE () { 86 } Line 182  sub NDATA_STATE () { 86 }
182  sub AFTER_NDATA_STATE () { 87 }  sub AFTER_NDATA_STATE () { 87 }
183  sub BEFORE_NOTATION_NAME_STATE () { 88 }  sub BEFORE_NOTATION_NAME_STATE () { 88 }
184  sub NOTATION_NAME_STATE () { 89 }  sub NOTATION_NAME_STATE () { 89 }
185  sub AFTER_NOTATION_NAME_STATE () { 90 }  sub DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE () { 90 }
186  sub BOGUS_MD_STATE () { 91 }  sub DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE () { 91 }
187    sub ENTITY_VALUE_ENTITY_STATE () { 92 }
188    sub AFTER_ELEMENT_NAME_STATE () { 93 }
189    sub BEFORE_ELEMENT_CONTENT_STATE () { 94 }
190    sub CONTENT_KEYWORD_STATE () { 95 }
191    sub AFTER_CM_GROUP_OPEN_STATE () { 96 }
192    sub CM_ELEMENT_NAME_STATE () { 97 }
193    sub AFTER_CM_ELEMENT_NAME_STATE () { 98 }
194    sub AFTER_CM_GROUP_CLOSE_STATE () { 99 }
195    sub AFTER_MD_DEF_STATE () { 100 }
196    sub BOGUS_MD_STATE () { 101 }
197    
198  ## Tree constructor state constants (see Whatpm::HTML for the full  ## Tree constructor state constants (see Whatpm::HTML for the full
199  ## list and descriptions)  ## list and descriptions)
# Line 2262  sub _get_next_token ($) { Line 2272  sub _get_next_token ($) {
2272          $self->{kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
2273          !!!next-input-character;          !!!next-input-character;
2274          redo A;          redo A;
2275  ## TODO: " and ' for ENTITY        } elsif ($self->{nc} == 0x0022 and # "
2276                   ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN or
2277                    $self->{ct}->{type} == PARAMETER_ENTITY_TOKEN)) {
2278            !!!cp (167.21);
2279            $self->{state} = DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE;
2280            $self->{ct}->{value} = ''; # ENTITY
2281            !!!next-input-character;
2282            redo A;
2283          } elsif ($self->{nc} == 0x0027 and # '
2284                   ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN or
2285                    $self->{ct}->{type} == PARAMETER_ENTITY_TOKEN)) {
2286            !!!cp (167.22);
2287            $self->{state} = DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE;
2288            $self->{ct}->{value} = ''; # ENTITY
2289            !!!next-input-character;
2290            redo A;
2291        } elsif ($self->{is_xml} and        } elsif ($self->{is_xml} and
2292                 $self->{ct}->{type} == DOCTYPE_TOKEN and                 $self->{ct}->{type} == DOCTYPE_TOKEN and
2293                 $self->{nc} == 0x005B) { # [                 $self->{nc} == 0x005B) { # [
# Line 3108  sub _get_next_token ($) { Line 3133  sub _get_next_token ($) {
3133          redo A;          redo A;
3134        }        }
3135      } elsif ($self->{state} == ENTITY_HASH_STATE) {      } elsif ($self->{state} == ENTITY_HASH_STATE) {
3136        if ($self->{nc} == 0x0078 or # x        if ($self->{nc} == 0x0078) { # x
           $self->{nc} == 0x0058) { # X  
3137          !!!cp (995);          !!!cp (995);
3138          $self->{state} = HEXREF_X_STATE;          $self->{state} = HEXREF_X_STATE;
3139          $self->{kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
3140          !!!next-input-character;          !!!next-input-character;
3141          redo A;          redo A;
3142          } elsif ($self->{nc} == 0x0058) { # X
3143            !!!cp (995.1);
3144            if ($self->{is_xml}) {
3145              !!!parse-error (type => 'uppercase hcro'); ## TODO: type
3146            }
3147            $self->{state} = HEXREF_X_STATE;
3148            $self->{kwd} .= chr $self->{nc};
3149            !!!next-input-character;
3150            redo A;
3151        } elsif (0x0030 <= $self->{nc} and        } elsif (0x0030 <= $self->{nc} and
3152                 $self->{nc} <= 0x0039) { # 0..9                 $self->{nc} <= 0x0039) { # 0..9
3153          !!!cp (994);          !!!cp (994);
# Line 3322  sub _get_next_token ($) { Line 3355  sub _get_next_token ($) {
3355          redo A;          redo A;
3356        }        }
3357      } elsif ($self->{state} == ENTITY_NAME_STATE) {      } elsif ($self->{state} == ENTITY_NAME_STATE) {
3358        if (length $self->{kwd} < 30 and        if ((0x0041 <= $self->{nc} and # a
3359            ## NOTE: Some number greater than the maximum length of entity name             $self->{nc} <= 0x005A) or # x
3360            ((0x0041 <= $self->{nc} and # a            (0x0061 <= $self->{nc} and # a
3361              $self->{nc} <= 0x005A) or # x             $self->{nc} <= 0x007A) or # z
3362             (0x0061 <= $self->{nc} and # a            (0x0030 <= $self->{nc} and # 0
3363              $self->{nc} <= 0x007A) or # z             $self->{nc} <= 0x0039) or # 9
3364             (0x0030 <= $self->{nc} and # 0            $self->{nc} == 0x003B) { # ;
             $self->{nc} <= 0x0039) or # 9  
            $self->{nc} == 0x003B)) { # ;  
3365          our $EntityChar;          our $EntityChar;
3366          $self->{kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
3367          if (defined $EntityChar->{$self->{kwd}}) {          if (defined $EntityChar->{$self->{kwd}} or
3368                $self->{ge}->{$self->{kwd}}) {
3369            if ($self->{nc} == 0x003B) { # ;            if ($self->{nc} == 0x003B) { # ;
3370              !!!cp (1020);              if (defined $self->{ge}->{$self->{kwd}}) {
3371              $self->{entity__value} = $EntityChar->{$self->{kwd}};                if ($self->{ge}->{$self->{kwd}}->{only_text}) {
3372                    !!!cp (1020.1);
3373                    $self->{entity__value} = $self->{ge}->{$self->{kwd}}->{value};
3374                  } else {
3375                    if (defined $self->{ge}->{$self->{kwd}}->{notation}) {
3376                      !!!cp (1020.2);
3377                      !!!parse-error (type => 'unparsed entity', ## TODO: type
3378                                      value => $self->{kwd});
3379                    } else {
3380                      !!!cp (1020.3);
3381                    }
3382                    $self->{entity__value} = '&' . $self->{kwd}; ## TODO: expand
3383                  }
3384                } else {
3385                  if ($self->{is_xml}) {
3386                    !!!cp (1020.4);
3387                    !!!parse-error (type => 'entity not declared', ## TODO: type
3388                                    value => $self->{kwd},
3389                                    level => {
3390                                              'amp;' => $self->{level}->{warn},
3391                                              'quot;' => $self->{level}->{warn},
3392                                              'lt;' => $self->{level}->{warn},
3393                                              'gt;' => $self->{level}->{warn},
3394                                              'apos;' => $self->{level}->{warn},
3395                                             }->{$self->{kwd}} ||
3396                                             $self->{level}->{must});
3397                  } else {
3398                    !!!cp (1020);
3399                  }
3400                  $self->{entity__value} = $EntityChar->{$self->{kwd}};
3401                }
3402              $self->{entity__match} = 1;              $self->{entity__match} = 1;
3403              !!!next-input-character;              !!!next-input-character;
3404              #              #
# Line 4025  sub _get_next_token ($) { Line 4087  sub _get_next_token ($) {
4087          if ($self->{ct}->{type} == ATTLIST_TOKEN) {          if ($self->{ct}->{type} == ATTLIST_TOKEN) {
4088            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
4089          } elsif ($self->{ct}->{type} == ELEMENT_TOKEN) {          } elsif ($self->{ct}->{type} == ELEMENT_TOKEN) {
4090            ## TODO: ...            $self->{state} = AFTER_ELEMENT_NAME_STATE;
           $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;  
4091          } else { # ENTITY/NOTATION          } else { # ENTITY/NOTATION
4092            $self->{state} = AFTER_DOCTYPE_NAME_STATE;            $self->{state} = AFTER_DOCTYPE_NAME_STATE;
4093          }          }
# Line 4649  sub _get_next_token ($) { Line 4710  sub _get_next_token ($) {
4710        }        }
4711      } elsif ($self->{state} == NOTATION_NAME_STATE) {      } elsif ($self->{state} == NOTATION_NAME_STATE) {
4712        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
4713          $self->{state} = AFTER_NOTATION_NAME_STATE;          $self->{state} = AFTER_MD_DEF_STATE;
4714          !!!next-input-character;          !!!next-input-character;
4715          redo A;          redo A;
4716        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
# Line 4669  sub _get_next_token ($) { Line 4730  sub _get_next_token ($) {
4730          !!!next-input-character;          !!!next-input-character;
4731          redo A;          redo A;
4732        }        }
4733      } elsif ($self->{state} == AFTER_NOTATION_NAME_STATE) {      } elsif ($self->{state} == DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE) {
4734          if ($self->{nc} == 0x0022) { # "
4735            $self->{state} = AFTER_MD_DEF_STATE;
4736            !!!next-input-character;
4737            redo A;
4738          } elsif ($self->{nc} == 0x0026) { # &
4739            $self->{prev_state} = $self->{state};
4740            $self->{state} = ENTITY_VALUE_ENTITY_STATE;
4741            $self->{entity_add} = 0x0022; # "
4742            !!!next-input-character;
4743            redo A;
4744    ## TODO: %
4745          } elsif ($self->{nc} == -1) {
4746            !!!parse-error (type => 'unclosed entity value'); ## TODO: type
4747            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4748            ## Reconsume.
4749            !!!emit ($self->{ct}); # ENTITY
4750            redo A;
4751          } else {
4752            $self->{ct}->{value} .= chr $self->{nc}; # ENTITY
4753            !!!next-input-character;
4754            redo A;
4755          }
4756        } elsif ($self->{state} == DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE) {
4757          if ($self->{nc} == 0x0027) { # '
4758            $self->{state} = AFTER_MD_DEF_STATE;
4759            !!!next-input-character;
4760            redo A;
4761          } elsif ($self->{nc} == 0x0026) { # &
4762            $self->{prev_state} = $self->{state};
4763            $self->{state} = ENTITY_VALUE_ENTITY_STATE;
4764            $self->{entity_add} = 0x0027; # '
4765            !!!next-input-character;
4766            redo A;
4767    ## TODO: %
4768          } elsif ($self->{nc} == -1) {
4769            !!!parse-error (type => 'unclosed entity value'); ## TODO: type
4770            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4771            ## Reconsume.
4772            !!!emit ($self->{ct}); # ENTITY
4773            redo A;
4774          } else {
4775            $self->{ct}->{value} .= chr $self->{nc}; # ENTITY
4776            !!!next-input-character;
4777            redo A;
4778          }
4779        } elsif ($self->{state} == ENTITY_VALUE_ENTITY_STATE) {
4780          ## TODO: XMLize
4781    
4782          if ($is_space->{$self->{nc}} or
4783              {
4784                0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
4785                $self->{entity_add} => 1,
4786              }->{$self->{nc}}) {
4787            ## Don't consume
4788            ## No error
4789            ## Return nothing.
4790            #
4791          } elsif ($self->{nc} == 0x0023) { # #
4792            $self->{ca} = $self->{ct};
4793            $self->{state} = ENTITY_HASH_STATE;
4794            $self->{kwd} = '#';
4795            !!!next-input-character;
4796            redo A;
4797          } elsif ((0x0041 <= $self->{nc} and
4798                    $self->{nc} <= 0x005A) or # A..Z
4799                   (0x0061 <= $self->{nc} and
4800                    $self->{nc} <= 0x007A)) { # a..z
4801            #
4802          } else {
4803            !!!parse-error (type => 'bare ero');
4804            ## Return nothing.
4805            #
4806          }
4807    
4808          $self->{ct}->{value} .= '&';
4809          $self->{state} = $self->{prev_state};
4810          ## Reconsume.
4811          redo A;
4812        } elsif ($self->{state} == AFTER_ELEMENT_NAME_STATE) {
4813          if ($is_space->{$self->{nc}}) {
4814            $self->{state} = BEFORE_ELEMENT_CONTENT_STATE;
4815            !!!next-input-character;
4816            redo A;
4817          } elsif ($self->{nc} == 0x0028) { # (
4818            $self->{state} = AFTER_CM_GROUP_OPEN_STATE;
4819            $self->{ct}->{content} = ['('];
4820            $self->{group_depth} = 1;
4821            !!!next-input-character;
4822            redo A;
4823          } elsif ($self->{nc} == 0x003E) { # >
4824            !!!parse-error (type => 'no md def'); ## TODO: type
4825            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4826            !!!next-input-character;
4827            !!!emit ($self->{ct}); # ELEMENT
4828            redo A;
4829          } elsif ($self->{nc} == -1) {
4830            !!!parse-error (type => 'unclosed md'); ## TODO: type
4831            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4832            !!!next-input-character;
4833            !!!emit ($self->{ct}); # ELEMENT
4834            redo A;
4835          } else {
4836            $self->{ct}->{content} = [chr $self->{nc}];
4837            $self->{state} = CONTENT_KEYWORD_STATE;
4838            !!!next-input-character;
4839            redo A;
4840          }
4841        } elsif ($self->{state} == CONTENT_KEYWORD_STATE) {
4842          if ($is_space->{$self->{nc}}) {
4843            $self->{state} = AFTER_MD_DEF_STATE;
4844            !!!next-input-character;
4845            redo A;
4846          } elsif ($self->{nc} == 0x003E) { # >
4847            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4848            !!!next-input-character;
4849            !!!emit ($self->{ct}); # ELEMENT
4850            redo A;
4851          } elsif ($self->{nc} == -1) {
4852            !!!parse-error (type => 'unclosed md'); ## TODO: type
4853            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4854            !!!next-input-character;
4855            !!!emit ($self->{ct}); # ELEMENT
4856            redo A;
4857          } else {
4858            $self->{ct}->{content}->[-1] .= chr $self->{nc}; # ELEMENT
4859            ## Stay in the state.
4860            !!!next-input-character;
4861            redo A;
4862          }
4863        } elsif ($self->{state} == AFTER_CM_GROUP_OPEN_STATE) {
4864        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
4865          ## Stay in the state.          ## Stay in the state.
4866          !!!next-input-character;          !!!next-input-character;
4867          redo A;          redo A;
4868          } elsif ($self->{nc} == 0x0028) { # (
4869            $self->{group_depth}++;
4870            push @{$self->{ct}->{content}}, chr $self->{nc};
4871            ## Stay in the state.
4872            !!!next-input-character;
4873            redo A;
4874          } elsif ($self->{nc} == 0x007C or # |
4875                   $self->{nc} == 0x002C) { # ,
4876            !!!parse-error (type => 'empty element name'); ## TODO: type
4877            ## Stay in the state.
4878            !!!next-input-character;
4879            redo A;
4880          } elsif ($self->{nc} == 0x0029) { # )
4881            !!!parse-error (type => 'empty element name'); ## TODO: type
4882            push @{$self->{ct}->{content}}, chr $self->{nc};
4883            $self->{group_depth}--;
4884            $self->{state} = AFTER_CM_GROUP_CLOSE_STATE;
4885            !!!next-input-character;
4886            redo A;
4887        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
4888            !!!parse-error (type => 'unclosed cm group'); ## TODO: type
4889            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
4890          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4891          !!!next-input-character;          !!!next-input-character;
4892          !!!emit ($self->{ct}); # ENTITY          !!!emit ($self->{ct}); # ELEMENT
4893          redo A;          redo A;
4894        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4895          !!!parse-error (type => 'unclosed md'); ## TODO: type          !!!parse-error (type => 'unclosed md'); ## TODO: type
4896            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
4897          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4898          !!!next-input-character;          !!!next-input-character;
4899          !!!emit ($self->{ct}); # ENTITY          !!!emit ($self->{ct}); # ELEMENT
4900            redo A;
4901          } else {
4902            push @{$self->{ct}->{content}}, chr $self->{nc};
4903            $self->{state} = CM_ELEMENT_NAME_STATE;
4904            !!!next-input-character;
4905            redo A;
4906          }
4907        } elsif ($self->{state} == CM_ELEMENT_NAME_STATE) {
4908          if ($is_space->{$self->{nc}}) {
4909            $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
4910            !!!next-input-character;
4911            redo A;
4912          } elsif ($self->{nc} == 0x002A or # *
4913                   $self->{nc} == 0x002B or # +
4914                   $self->{nc} == 0x003F) { # ?
4915            push @{$self->{ct}->{content}}, chr $self->{nc};
4916            $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
4917            !!!next-input-character;
4918            redo A;
4919          } elsif ($self->{nc} == 0x007C or # |
4920                   $self->{nc} == 0x002C) { # ,
4921            push @{$self->{ct}->{content}}, $self->{nc} == 0x007C ? ' | ' : ', ';
4922            $self->{state} = AFTER_CM_GROUP_OPEN_STATE;
4923            !!!next-input-character;
4924            redo A;
4925          } elsif ($self->{nc} == 0x0029) { # )
4926            $self->{group_depth}--;
4927            push @{$self->{ct}->{content}}, chr $self->{nc};
4928            $self->{state} = AFTER_CM_GROUP_CLOSE_STATE;
4929            !!!next-input-character;
4930            redo A;
4931          } elsif ($self->{nc} == 0x003E) { # >
4932            !!!parse-error (type => 'unclosed cm group'); ## TODO: type
4933            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
4934            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4935            !!!next-input-character;
4936            !!!emit ($self->{ct}); # ELEMENT
4937            redo A;
4938          } elsif ($self->{nc} == -1) {
4939            !!!parse-error (type => 'unclosed md'); ## TODO: type
4940            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
4941            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4942            !!!next-input-character;
4943            !!!emit ($self->{ct}); # ELEMENT
4944            redo A;
4945          } else {
4946            $self->{ct}->{content}->[-1] .= chr $self->{nc};
4947            ## Stay in the state.
4948            !!!next-input-character;
4949            redo A;
4950          }
4951        } elsif ($self->{state} == AFTER_CM_ELEMENT_NAME_STATE) {
4952          if ($is_space->{$self->{nc}}) {
4953            ## Stay in the state.
4954            !!!next-input-character;
4955            redo A;
4956          } elsif ($self->{nc} == 0x007C or # |
4957                   $self->{nc} == 0x002C) { # ,
4958            push @{$self->{ct}->{content}}, $self->{nc} == 0x007C ? ' | ' : ', ';
4959            $self->{state} = AFTER_CM_GROUP_OPEN_STATE;
4960            !!!next-input-character;
4961            redo A;
4962          } elsif ($self->{nc} == 0x0029) { # )
4963            $self->{group_depth}--;
4964            push @{$self->{ct}->{content}}, chr $self->{nc};
4965            $self->{state} = AFTER_CM_GROUP_CLOSE_STATE;
4966            !!!next-input-character;
4967            redo A;
4968          } elsif ($self->{nc} == 0x003E) { # >
4969            !!!parse-error (type => 'unclosed cm group'); ## TODO: type
4970            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
4971            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4972            !!!next-input-character;
4973            !!!emit ($self->{ct}); # ELEMENT
4974            redo A;
4975          } elsif ($self->{nc} == -1) {
4976            !!!parse-error (type => 'unclosed md'); ## TODO: type
4977            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
4978            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4979            !!!next-input-character;
4980            !!!emit ($self->{ct}); # ELEMENT
4981          redo A;          redo A;
4982        } else {        } else {
4983          !!!parse-error (type => 'string after notation name'); ## TODO: type          !!!parse-error (type => 'after element name'); ## TODO: type
4984            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
4985            $self->{state} = BOGUS_MD_STATE;
4986            !!!next-input-character;
4987            redo A;
4988          }
4989        } elsif ($self->{state} == AFTER_CM_GROUP_CLOSE_STATE) {
4990          if ($is_space->{$self->{nc}}) {
4991            if ($self->{group_depth}) {
4992              $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
4993            } else {
4994              $self->{state} = AFTER_MD_DEF_STATE;
4995            }
4996            !!!next-input-character;
4997            redo A;
4998          } elsif ($self->{nc} == 0x002A or # *
4999                   $self->{nc} == 0x002B or # +
5000                   $self->{nc} == 0x003F) { # ?
5001            push @{$self->{ct}->{content}}, chr $self->{nc};
5002            if ($self->{group_depth}) {
5003              $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
5004            } else {
5005              $self->{state} = AFTER_MD_DEF_STATE;
5006            }
5007            !!!next-input-character;
5008            redo A;
5009          } elsif ($self->{nc} == 0x0029) { # )
5010            if ($self->{group_depth}) {
5011              $self->{group_depth}--;
5012              push @{$self->{ct}->{content}}, chr $self->{nc};
5013              ## Stay in the state.
5014              !!!next-input-character;
5015              redo A;
5016            } else {
5017              !!!parse-error (type => 'string after md def'); ## TODO: type
5018              $self->{state} = BOGUS_MD_STATE;
5019              ## Reconsume.
5020              redo A;
5021            }
5022          } elsif ($self->{nc} == 0x003E) { # >
5023            if ($self->{group_depth}) {
5024              !!!parse-error (type => 'unclosed cm group'); ## TODO: type
5025              push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
5026            }
5027            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5028            !!!next-input-character;
5029            !!!emit ($self->{ct}); # ELEMENT
5030            redo A;
5031          } elsif ($self->{nc} == -1) {
5032            !!!parse-error (type => 'unclosed md'); ## TODO: type
5033            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
5034            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5035            !!!next-input-character;
5036            !!!emit ($self->{ct}); # ELEMENT
5037            redo A;
5038          } else {
5039            if ($self->{group_depth}) {
5040              $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
5041            } else {
5042              !!!parse-error (type => 'string after md def'); ## TODO: type
5043              $self->{state} = BOGUS_MD_STATE;
5044            }
5045            ## Reconsume.
5046            redo A;
5047          }
5048        } elsif ($self->{state} == AFTER_MD_DEF_STATE) {
5049          if ($is_space->{$self->{nc}}) {
5050            ## Stay in the state.
5051            !!!next-input-character;
5052            redo A;
5053          } elsif ($self->{nc} == 0x003E) { # >
5054            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5055            !!!next-input-character;
5056            !!!emit ($self->{ct}); # ENTITY/ELEMENT
5057            redo A;
5058          } elsif ($self->{nc} == -1) {
5059            !!!parse-error (type => 'unclosed md'); ## TODO: type
5060            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5061            !!!next-input-character;
5062            !!!emit ($self->{ct}); # ENTITY/ELEMENT
5063            redo A;
5064          } else {
5065            !!!parse-error (type => 'string after md def'); ## TODO: type
5066          $self->{state} = BOGUS_MD_STATE;          $self->{state} = BOGUS_MD_STATE;
5067          ## Reconsume.          ## Reconsume.
5068          redo A;          redo A;
5069        }        }
   
   
5070      } elsif ($self->{state} == BOGUS_MD_STATE) {      } elsif ($self->{state} == BOGUS_MD_STATE) {
5071        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
5072          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;

Legend:
Removed from v.1.18  
changed lines
  Added in v.1.21

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24