/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm.src
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm.src

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.17 by wakaba, Sun Oct 19 04:39:25 2008 UTC revision 1.18 by wakaba, Sun Oct 19 06:14:57 2008 UTC
# Line 177  sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATIO Line 177  sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATIO
177  sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE () { 82 }  sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE () { 82 }
178  sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE () { 83 }  sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE () { 83 }
179  sub AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE () { 84 }  sub AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE () { 84 }
180  sub BOGUS_MD_STATE () { 85 }  sub BEFORE_NDATA_STATE () { 85 }
181    sub NDATA_STATE () { 86 }
182    sub AFTER_NDATA_STATE () { 87 }
183    sub BEFORE_NOTATION_NAME_STATE () { 88 }
184    sub NOTATION_NAME_STATE () { 89 }
185    sub AFTER_NOTATION_NAME_STATE () { 90 }
186    sub BOGUS_MD_STATE () { 91 }
187    
188  ## Tree constructor state constants (see Whatpm::HTML for the full  ## Tree constructor state constants (see Whatpm::HTML for the full
189  ## list and descriptions)  ## list and descriptions)
# Line 2812  sub _get_next_token ($) { Line 2818  sub _get_next_token ($) {
2818        }        }
2819      } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {      } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2820        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
2821          !!!cp (215);          if ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN) {
2822          ## Stay in the state            !!!cp (215.1);
2823              $self->{state} = BEFORE_NDATA_STATE;
2824            } else {
2825              !!!cp (215);
2826              ## Stay in the state
2827            }
2828          !!!next-input-character;          !!!next-input-character;
2829          redo A;          redo A;
2830        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
# Line 2829  sub _get_next_token ($) { Line 2840  sub _get_next_token ($) {
2840          !!!next-input-character;          !!!next-input-character;
2841          !!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION          !!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
2842          redo A;          redo A;
2843  ## TODO: "NDATA"        } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
2844                   ($self->{nc} == 0x004E or # N
2845                    $self->{nc} == 0x006E)) { # n
2846            !!!cp (216.2);
2847            !!!parse-error (type => 'no space before NDATA'); ## TODO: type
2848            $self->{state} = NDATA_STATE;
2849            $self->{kwd} = chr $self->{nc};
2850            !!!next-input-character;
2851            redo A;
2852        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
2853          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
2854            !!!cp (217);            !!!cp (217);
# Line 2871  sub _get_next_token ($) { Line 2890  sub _get_next_token ($) {
2890          !!!next-input-character;          !!!next-input-character;
2891          redo A;          redo A;
2892        }        }
2893        } elsif ($self->{state} == BEFORE_NDATA_STATE) {
2894          if ($is_space->{$self->{nc}}) {
2895            !!!cp (218.3);
2896            ## Stay in the state.
2897            !!!next-input-character;
2898            redo A;
2899          } elsif ($self->{nc} == 0x003E) { # >
2900            !!!cp (218.4);
2901            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2902            !!!next-input-character;
2903            !!!emit ($self->{ct}); # ENTITY
2904            redo A;
2905          } elsif ($self->{nc} == 0x004E or # N
2906                   $self->{nc} == 0x006E) { # n
2907            !!!cp (218.5);
2908            $self->{state} = NDATA_STATE;
2909            $self->{kwd} = chr $self->{nc};
2910            !!!next-input-character;
2911            redo A;
2912          } elsif ($self->{nc} == -1) {
2913            !!!cp (218.6);
2914            !!!parse-error (type => 'unclosed md'); ## TODO: type
2915            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2916            ## reconsume
2917            !!!emit ($self->{ct}); # ENTITY
2918            redo A;
2919          } else {
2920            !!!cp (218.7);
2921            !!!parse-error (type => 'string after SYSTEM literal');
2922            $self->{state} = BOGUS_MD_STATE;
2923            !!!next-input-character;
2924            redo A;
2925          }
2926      } elsif ($self->{state} == BOGUS_DOCTYPE_STATE) {      } elsif ($self->{state} == BOGUS_DOCTYPE_STATE) {
2927        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2928          !!!cp (219);          !!!cp (219);
# Line 4502  sub _get_next_token ($) { Line 4554  sub _get_next_token ($) {
4554          ## Reconsume.          ## Reconsume.
4555          redo A;          redo A;
4556        }        }
4557        } elsif ($self->{state} == NDATA_STATE) {
4558          ## ASCII case-insensitive
4559          if ($self->{nc} == [
4560                undef,
4561                0x0044, # D
4562                0x0041, # A
4563                0x0054, # T
4564              ]->[length $self->{kwd}] or
4565              $self->{nc} == [
4566                undef,
4567                0x0064, # d
4568                0x0061, # a
4569                0x0074, # t
4570              ]->[length $self->{kwd}]) {
4571            !!!cp (172.2);
4572            ## Stay in the state.
4573            $self->{kwd} .= chr $self->{nc};
4574            !!!next-input-character;
4575            redo A;
4576          } elsif ((length $self->{kwd}) == 4 and
4577                   ($self->{nc} == 0x0041 or # A
4578                    $self->{nc} == 0x0061)) { # a
4579            if ($self->{kwd} ne 'NDAT' or $self->{nc} == 0x0061) { # a
4580              !!!cp (172.3);
4581              !!!parse-error (type => 'lowercase keyword', ## TODO: type
4582                              text => 'NDATA',
4583                              line => $self->{line_prev},
4584                              column => $self->{column_prev} - 4);
4585            } else {
4586              !!!cp (172.4);
4587            }
4588            $self->{state} = AFTER_NDATA_STATE;
4589            !!!next-input-character;
4590            redo A;
4591          } else {
4592            !!!parse-error (type => 'string after literal', ## TODO: type
4593                            line => $self->{line_prev},
4594                            column => $self->{column_prev} + 1
4595                                - length $self->{kwd});
4596            !!!cp (172.5);
4597            $self->{state} = BOGUS_MD_STATE;
4598            ## Reconsume.
4599            redo A;
4600          }
4601        } elsif ($self->{state} == AFTER_NDATA_STATE) {
4602          if ($is_space->{$self->{nc}}) {
4603            $self->{state} = BEFORE_NOTATION_NAME_STATE;
4604            !!!next-input-character;
4605            redo A;
4606          } elsif ($self->{nc} == 0x003E) { # >
4607            !!!parse-error (type => 'no notation name'); ## TODO: type
4608            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4609            !!!next-input-character;
4610            !!!emit ($self->{ct}); # ENTITY
4611            redo A;
4612          } elsif ($self->{nc} == -1) {
4613            !!!parse-error (type => 'unclosed md'); ## TODO: type
4614            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4615            !!!next-input-character;
4616            !!!emit ($self->{ct}); # ENTITY
4617            redo A;
4618          } else {
4619            !!!parse-error (type => 'string after literal', ## TODO: type
4620                            line => $self->{line_prev},
4621                            column => $self->{column_prev} + 1
4622                                - length $self->{kwd});
4623            $self->{state} = BOGUS_MD_STATE;
4624            ## Reconsume.
4625            redo A;
4626          }
4627        } elsif ($self->{state} == BEFORE_NOTATION_NAME_STATE) {
4628          if ($is_space->{$self->{nc}}) {
4629            ## Stay in the state.
4630            !!!next-input-character;
4631            redo A;
4632          } elsif ($self->{nc} == 0x003E) { # >
4633            !!!parse-error (type => 'no notation name'); ## TODO: type
4634            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4635            !!!next-input-character;
4636            !!!emit ($self->{ct}); # ENTITY
4637            redo A;
4638          } elsif ($self->{nc} == -1) {
4639            !!!parse-error (type => 'unclosed md'); ## TODO: type
4640            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4641            !!!next-input-character;
4642            !!!emit ($self->{ct}); # ENTITY
4643            redo A;
4644          } else {
4645            $self->{ct}->{notation} = chr $self->{nc}; # ENTITY
4646            $self->{state} = NOTATION_NAME_STATE;
4647            !!!next-input-character;
4648            redo A;
4649          }
4650        } elsif ($self->{state} == NOTATION_NAME_STATE) {
4651          if ($is_space->{$self->{nc}}) {
4652            $self->{state} = AFTER_NOTATION_NAME_STATE;
4653            !!!next-input-character;
4654            redo A;
4655          } elsif ($self->{nc} == 0x003E) { # >
4656            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4657            !!!next-input-character;
4658            !!!emit ($self->{ct}); # ENTITY
4659            redo A;
4660          } elsif ($self->{nc} == -1) {
4661            !!!parse-error (type => 'unclosed md'); ## TODO: type
4662            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4663            !!!next-input-character;
4664            !!!emit ($self->{ct}); # ENTITY
4665            redo A;
4666          } else {
4667            $self->{ct}->{notation} .= chr $self->{nc}; # ENTITY
4668            ## Stay in the state.
4669            !!!next-input-character;
4670            redo A;
4671          }
4672        } elsif ($self->{state} == AFTER_NOTATION_NAME_STATE) {
4673          if ($is_space->{$self->{nc}}) {
4674            ## Stay in the state.
4675            !!!next-input-character;
4676            redo A;
4677          } elsif ($self->{nc} == 0x003E) { # >
4678            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4679            !!!next-input-character;
4680            !!!emit ($self->{ct}); # ENTITY
4681            redo A;
4682          } elsif ($self->{nc} == -1) {
4683            !!!parse-error (type => 'unclosed md'); ## TODO: type
4684            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4685            !!!next-input-character;
4686            !!!emit ($self->{ct}); # ENTITY
4687            redo A;
4688          } else {
4689            !!!parse-error (type => 'string after notation name'); ## TODO: type
4690            $self->{state} = BOGUS_MD_STATE;
4691            ## Reconsume.
4692            redo A;
4693          }
4694    
4695    
4696      } elsif ($self->{state} == BOGUS_MD_STATE) {      } elsif ($self->{state} == BOGUS_MD_STATE) {
4697        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >

Legend:
Removed from v.1.17  
changed lines
  Added in v.1.18

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24