/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm.src
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm.src

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.13 by wakaba, Thu Oct 16 03:39:57 2008 UTC revision 1.14 by wakaba, Fri Oct 17 07:14:29 2008 UTC
# Line 16  BEGIN { Line 16  BEGIN {
16      PI_TOKEN      PI_TOKEN
17      ABORT_TOKEN      ABORT_TOKEN
18      END_OF_DOCTYPE_TOKEN      END_OF_DOCTYPE_TOKEN
19        ATTLIST_TOKEN
20        ELEMENT_TOKEN
21        GENERAL_ENTITY_TOKEN
22        PARAMETER_ENTITY_TOKEN
23        NOTATION_TOKEN
24    );    );
25        
26    our %EXPORT_TAGS = (    our %EXPORT_TAGS = (
# Line 29  BEGIN { Line 34  BEGIN {
34        PI_TOKEN        PI_TOKEN
35        ABORT_TOKEN        ABORT_TOKEN
36        END_OF_DOCTYPE_TOKEN        END_OF_DOCTYPE_TOKEN
37          ATTLIST_TOKEN
38          ELEMENT_TOKEN
39          GENERAL_ENTITY_TOKEN
40          PARAMETER_ENTITY_TOKEN
41          NOTATION_TOKEN
42      )],      )],
43    );    );
44  }  }
# Line 45  sub END_OF_FILE_TOKEN () { 5 } Line 55  sub END_OF_FILE_TOKEN () { 5 }
55  sub CHARACTER_TOKEN () { 6 }  sub CHARACTER_TOKEN () { 6 }
56  sub PI_TOKEN () { 7 } ## NOTE: XML only.  sub PI_TOKEN () { 7 } ## NOTE: XML only.
57  sub ABORT_TOKEN () { 8 } ## NOTE: For internal processing.  sub ABORT_TOKEN () { 8 } ## NOTE: For internal processing.
58  sub END_OF_DOCTYPE_TOKEN () { 9 } ## NOTE: XML only  sub END_OF_DOCTYPE_TOKEN () { 9 } ## NOTE: XML only.
59    sub ATTLIST_TOKEN () { 10 } ## NOTE: XML only.
60    sub ELEMENT_TOKEN () { 11 } ## NOTE: XML only.
61    sub GENERAL_ENTITY_TOKEN () { 12 } ## NOTE: XML only.
62    sub PARAMETER_ENTITY_TOKEN () { 13 } ## NOTE: XML only.
63    sub NOTATION_TOKEN () { 14 } ## NOTE: XML only.
64    
65  ## XML5: XML5 has "empty tag token".  In this implementation, it is  ## XML5: XML5 has "empty tag token".  In this implementation, it is
66  ## represented as a start tag token with $self->{self_closing} flag  ## represented as a start tag token with $self->{self_closing} flag
# Line 136  sub PI_AFTER_STATE () { 55 } Line 151  sub PI_AFTER_STATE () { 55 }
151  sub PI_DATA_AFTER_STATE () { 56 }  sub PI_DATA_AFTER_STATE () { 56 }
152  sub DOCTYPE_INTERNAL_SUBSET_STATE () { 57 }  sub DOCTYPE_INTERNAL_SUBSET_STATE () { 57 }
153  sub DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 58 }  sub DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 58 }
154  sub DOCTYPE_TAG_STATE () { 59 }  sub BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 59 }
155  sub BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 60 }  sub DOCTYPE_TAG_STATE () { 60 }
156    sub DOCTYPE_MARKUP_DECLARATION_OPEN_STATE () { 61 }
157    sub MD_ATTLIST_STATE () { 62 }
158    sub MD_E_STATE () { 63 }
159    sub MD_ELEMENT_STATE () { 64 }
160    sub MD_ENTITY_STATE () { 65 }
161    sub MD_NOTATION_STATE () { 66 }
162    sub DOCTYPE_MD_STATE () { 67 }
163    sub BEFORE_MD_NAME_STATE () { 68 }
164    sub MD_NAME_STATE () { 69 }
165    sub DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE () { 70 }
166    sub DOCTYPE_ATTLIST_NAME_AFTER_STATE () { 71 }
167    
168  ## Tree constructor state constants (see Whatpm::HTML for the full  ## Tree constructor state constants (see Whatpm::HTML for the full
169  ## list and descriptions)  ## list and descriptions)
# Line 1568  sub _get_next_token ($) { Line 1594  sub _get_next_token ($) {
1594          redo A;          redo A;
1595        }        }
1596      } elsif ($self->{state} == BOGUS_COMMENT_STATE) {      } elsif ($self->{state} == BOGUS_COMMENT_STATE) {
1597          ## XML5: "Bogus comment state" and "DOCTYPE bogus comment state".
1598    
1599        ## NOTE: Unlike spec's "bogus comment state", this implementation        ## NOTE: Unlike spec's "bogus comment state", this implementation
1600        ## consumes characters one-by-one basis.        ## consumes characters one-by-one basis.
1601                
# Line 1609  sub _get_next_token ($) { Line 1637  sub _get_next_token ($) {
1637          redo A;          redo A;
1638        }        }
1639      } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {      } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {
1640        ## XML5: "Markup declaration state" and "DOCTYPE markup        ## XML5: "Markup declaration state".
       ## declaration state".  
1641                
1642        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
1643          !!!cp (133);          !!!cp (133);
# Line 1868  sub _get_next_token ($) { Line 1895  sub _get_next_token ($) {
1895          redo A;          redo A;
1896        }        }
1897      } elsif ($self->{state} == COMMENT_STATE) {      } elsif ($self->{state} == COMMENT_STATE) {
1898          ## XML5: "Comment state" and "DOCTYPE comment state".
1899    
1900        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
1901          !!!cp (145);          !!!cp (145);
1902          $self->{state} = COMMENT_END_DASH_STATE;          $self->{state} = COMMENT_END_DASH_STATE;
# Line 1900  sub _get_next_token ($) { Line 1929  sub _get_next_token ($) {
1929          redo A;          redo A;
1930        }        }
1931      } elsif ($self->{state} == COMMENT_END_DASH_STATE) {      } elsif ($self->{state} == COMMENT_END_DASH_STATE) {
1932        ## XML5: "comment dash state".        ## XML5: "Comment dash state" and "DOCTYPE comment dash state".
1933    
1934        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
1935          !!!cp (148);          !!!cp (148);
# Line 1930  sub _get_next_token ($) { Line 1959  sub _get_next_token ($) {
1959          redo A;          redo A;
1960        }        }
1961      } elsif ($self->{state} == COMMENT_END_STATE) {      } elsif ($self->{state} == COMMENT_END_STATE) {
1962          ## XML5: "Comment end state" and "DOCTYPE comment end state".
1963    
1964        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
1965          if ($self->{in_subset}) {          if ($self->{in_subset}) {
1966            !!!cp (151.1);            !!!cp (151.1);
# Line 3130  sub _get_next_token ($) { Line 3161  sub _get_next_token ($) {
3161      ## XML-only states      ## XML-only states
3162    
3163      } elsif ($self->{state} == PI_STATE) {      } elsif ($self->{state} == PI_STATE) {
3164          ## XML5: "Pi state" and "DOCTYPE pi state".
3165    
3166        if ($is_space->{$self->{nc}} or        if ($is_space->{$self->{nc}} or
3167            $self->{nc} == 0x003F or # ? ## XML5: Same as "Anything else"            $self->{nc} == 0x003F or # ?
3168            $self->{nc} == -1) {            $self->{nc} == -1) {
3169            ## XML5: U+003F: "pi state": Same as "Anything else"; "DOCTYPE
3170            ## pi state": Switch to the "DOCTYPE pi after state".  EOF:
3171            ## "DOCTYPE pi state": Parse error, switch to the "data
3172            ## state".
3173          !!!parse-error (type => 'bare pio', ## TODO: type          !!!parse-error (type => 'bare pio', ## TODO: type
3174                          line => $self->{line_prev},                          line => $self->{line_prev},
3175                          column => $self->{column_prev}                          column => $self->{column_prev}
# Line 3147  sub _get_next_token ($) { Line 3184  sub _get_next_token ($) {
3184                        };                        };
3185          redo A;          redo A;
3186        } else {        } else {
3187            ## XML5: "DOCTYPE pi state": Stay in the state.
3188          $self->{ct} = {type => PI_TOKEN,          $self->{ct} = {type => PI_TOKEN,
3189                         target => chr $self->{nc},                         target => chr $self->{nc},
3190                         data => '',                         data => '',
# Line 3201  sub _get_next_token ($) { Line 3239  sub _get_next_token ($) {
3239        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3240          !!!parse-error (type => 'no pic'); ## TODO: type          !!!parse-error (type => 'no pic'); ## TODO: type
3241          if ($self->{in_subset}) {          if ($self->{in_subset}) {
3242            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state"
3243          } else {          } else {
3244            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
3245            $self->{s_kwd} = '';            $self->{s_kwd} = '';
# Line 3219  sub _get_next_token ($) { Line 3257  sub _get_next_token ($) {
3257          redo A;          redo A;
3258        }        }
3259      } elsif ($self->{state} == PI_AFTER_STATE) {      } elsif ($self->{state} == PI_AFTER_STATE) {
3260          ## XML5: Part of "Pi after state".
3261    
3262        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
3263          if ($self->{in_subset}) {          if ($self->{in_subset}) {
3264            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
# Line 3248  sub _get_next_token ($) { Line 3288  sub _get_next_token ($) {
3288          redo A;          redo A;
3289        }        }
3290      } elsif ($self->{state} == PI_DATA_AFTER_STATE) {      } elsif ($self->{state} == PI_DATA_AFTER_STATE) {
3291        ## XML5: Same as "pi after state" in XML5        ## XML5: Same as "pi after state" and "DOCTYPE pi after state".
3292    
3293        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
3294          if ($self->{in_subset}) {          if ($self->{in_subset}) {
3295            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
# Line 3351  sub _get_next_token ($) { Line 3392  sub _get_next_token ($) {
3392        }        }
3393      } elsif ($self->{state} == DOCTYPE_TAG_STATE) {      } elsif ($self->{state} == DOCTYPE_TAG_STATE) {
3394        if ($self->{nc} == 0x0021) { # !        if ($self->{nc} == 0x0021) { # !
3395          $self->{state} = MARKUP_DECLARATION_OPEN_STATE;          $self->{state} = DOCTYPE_MARKUP_DECLARATION_OPEN_STATE;
3396          !!!next-input-character;          !!!next-input-character;
3397          redo A;          redo A;
3398        } elsif ($self->{nc} == 0x003F) { # ?        } elsif ($self->{nc} == 0x003F) { # ?
# Line 3375  sub _get_next_token ($) { Line 3416  sub _get_next_token ($) {
3416          !!!next-input-character;          !!!next-input-character;
3417          redo A;          redo A;
3418        }        }
3419                } elsif ($self->{state} == DOCTYPE_MARKUP_DECLARATION_OPEN_STATE) {
3420          ## XML5: "DOCTYPE markup declaration state".
3421          
3422          if ($self->{nc} == 0x002D) { # -
3423            $self->{state} = MD_HYPHEN_STATE;
3424            !!!next-input-character;
3425            redo A;
3426          } elsif ($self->{nc} == 0x0045) { # E
3427            $self->{state} = MD_E_STATE;
3428            $self->{kwd} = chr $self->{nc};
3429            !!!next-input-character;
3430            redo A;
3431          } elsif ($self->{nc} == 0x0041) { # A
3432            $self->{state} = MD_ATTLIST_STATE;
3433            $self->{kwd} = chr $self->{nc};
3434            !!!next-input-character;
3435            redo A;
3436          } elsif ($self->{nc} == 0x004E) { # N
3437            $self->{state} = MD_NOTATION_STATE;
3438            $self->{kwd} = chr $self->{nc};
3439            !!!next-input-character;
3440            redo A;
3441          } else {
3442            #
3443          }
3444          
3445          ## XML5: No parse error.
3446          !!!parse-error (type => 'bogus comment',
3447                          line => $self->{line_prev},
3448                          column => $self->{column_prev} - 1);
3449          ## Reconsume.
3450          $self->{state} = BOGUS_COMMENT_STATE;
3451          $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded.
3452          redo A;
3453        } elsif ($self->{state} == MD_E_STATE) {
3454          if ($self->{nc} == 0x004E) { # N
3455            $self->{state} = MD_ENTITY_STATE;
3456            $self->{kwd} .= chr $self->{nc};
3457            !!!next-input-character;
3458            redo A;
3459          } elsif ($self->{nc} == 0x004C) { # L
3460            ## XML5: <!ELEMENT> not supported.
3461            $self->{state} = MD_ELEMENT_STATE;
3462            $self->{kwd} .= chr $self->{nc};
3463            !!!next-input-character;
3464            redo A;
3465          } else {
3466            ## XML5: No parse error.
3467            !!!parse-error (type => 'bogus comment',
3468                            line => $self->{line_prev},
3469                            column => $self->{column_prev} - 2
3470                                + 1 * ($self->{nc} == -1));
3471            ## Reconsume.
3472            $self->{state} = BOGUS_COMMENT_STATE;
3473            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
3474            redo A;
3475          }
3476        } elsif ($self->{state} == MD_ENTITY_STATE) {
3477          if ($self->{nc} == {
3478                'EN' => 0x0054, # T
3479                'ENT' => 0x0049, # I
3480                'ENTI' => 0x0054, # T
3481              }->{$self->{kwd}}) {
3482            ## Stay in the state.
3483            $self->{kwd} .= chr $self->{nc};
3484            !!!next-input-character;
3485            redo A;
3486          } elsif ($self->{kwd} eq 'ENTIT' and
3487                   $self->{nc} == 0x0059) { # Y
3488            $self->{ct} = {type => GENERAL_ENTITY_TOKEN, name => '', text => '',
3489                           line => $self->{line_prev},
3490                           column => $self->{column_prev} - 6};
3491            $self->{state} = DOCTYPE_MD_STATE;
3492            !!!next-input-character;
3493            redo A;
3494          } else {
3495            !!!parse-error (type => 'bogus comment',
3496                            line => $self->{line_prev},
3497                            column => $self->{column_prev} - 1
3498                                - (length $self->{kwd})
3499                                + 1 * ($self->{nc} == -1));
3500            $self->{state} = BOGUS_COMMENT_STATE;
3501            ## Reconsume.
3502            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
3503            redo A;
3504          }
3505        } elsif ($self->{state} == MD_ELEMENT_STATE) {
3506          if ($self->{nc} == {
3507                'EL' => 0x0045, # E
3508                'ELE' => 0x004D, # M
3509                'ELEM' => 0x0045, # E
3510                'ELEME' => 0x004E, # N
3511              }->{$self->{kwd}}) {
3512            ## Stay in the state.
3513            $self->{kwd} .= chr $self->{nc};
3514            !!!next-input-character;
3515            redo A;
3516          } elsif ($self->{kwd} eq 'ELEMEN' and
3517                   $self->{nc} == 0x0054) { # T
3518            $self->{ct} = {type => ELEMENT_TOKEN, name => '',
3519                           line => $self->{line_prev},
3520                           column => $self->{column_prev} - 6};
3521            $self->{state} = DOCTYPE_MD_STATE;
3522            !!!next-input-character;
3523            redo A;
3524          } else {
3525            !!!parse-error (type => 'bogus comment',
3526                            line => $self->{line_prev},
3527                            column => $self->{column_prev} - 1
3528                                - (length $self->{kwd})
3529                                + 1 * ($self->{nc} == -1));
3530            $self->{state} = BOGUS_COMMENT_STATE;
3531            ## Reconsume.
3532            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
3533            redo A;
3534          }
3535        } elsif ($self->{state} == MD_ATTLIST_STATE) {
3536          if ($self->{nc} == {
3537                'A' => 0x0054, # T
3538                'AT' => 0x0054, # T
3539                'ATT' => 0x004C, # L
3540                'ATTL' => 0x0049, # I
3541                'ATTLI' => 0x0053, # S
3542              }->{$self->{kwd}}) {
3543            ## Stay in the state.
3544            $self->{kwd} .= chr $self->{nc};
3545            !!!next-input-character;
3546            redo A;
3547          } elsif ($self->{kwd} eq 'ATTLIS' and
3548                   $self->{nc} == 0x0054) { # T
3549            $self->{ct} = {type => ATTLIST_TOKEN, name => '',
3550                           line => $self->{line_prev},
3551                           column => $self->{column_prev} - 6};
3552            $self->{state} = DOCTYPE_MD_STATE;
3553            !!!next-input-character;
3554            redo A;
3555          } else {
3556            !!!parse-error (type => 'bogus comment',
3557                            line => $self->{line_prev},
3558                            column => $self->{column_prev} - 1
3559                                 - (length $self->{kwd})
3560                                 + 1 * ($self->{nc} == -1));
3561            $self->{state} = BOGUS_COMMENT_STATE;
3562            ## Reconsume.
3563            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
3564            redo A;
3565          }
3566        } elsif ($self->{state} == MD_NOTATION_STATE) {
3567          if ($self->{nc} == {
3568                'N' => 0x004F, # O
3569                'NO' => 0x0054, # T
3570                'NOT' => 0x0041, # A
3571                'NOTA' => 0x0054, # T
3572                'NOTAT' => 0x0049, # I
3573                'NOTATI' => 0x004F, # O
3574              }->{$self->{kwd}}) {
3575            ## Stay in the state.
3576            $self->{kwd} .= chr $self->{nc};
3577            !!!next-input-character;
3578            redo A;
3579          } elsif ($self->{kwd} eq 'NOTATIO' and
3580                   $self->{nc} == 0x004E) { # N
3581            $self->{ct} = {type => NOTATION_TOKEN, name => '',
3582                           line => $self->{line_prev},
3583                           column => $self->{column_prev} - 6};
3584            $self->{state} = DOCTYPE_MD_STATE;
3585            !!!next-input-character;
3586            redo A;
3587          } else {
3588            !!!parse-error (type => 'bogus comment',
3589                            line => $self->{line_prev},
3590                            column => $self->{column_prev} - 1
3591                                - (length $self->{kwd})
3592                                + 1 * ($self->{nc} == -1));
3593            $self->{state} = BOGUS_COMMENT_STATE;
3594            ## Reconsume.
3595            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
3596            redo A;
3597          }
3598        } elsif ($self->{state} == DOCTYPE_MD_STATE) {
3599          ## XML5: "DOCTYPE ENTITY state", "DOCTYPE ATTLIST state", and
3600          ## "DOCTYPE NOTATION state".
3601    
3602          if ($is_space->{$self->{nc}}) {
3603            ## XML5: [NOTATION] Switch to the "DOCTYPE NOTATION identifier state".
3604            $self->{state} = BEFORE_MD_NAME_STATE;
3605            !!!next-input-character;
3606            redo A;
3607          } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
3608                   $self->{nc} == 0x0025) { # %
3609            ## XML5: Switch to the "DOCTYPE bogus comment state".
3610            !!!parse-error (type => 'no space before md name'); ## TODO: type
3611            $self->{state} = DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE;
3612            !!!next-input-character;
3613            redo A;
3614          } elsif ($self->{nc} == -1) {
3615            !!!parse-error (type => 'unclosed md'); ## TODO: type
3616            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
3617            ## Reconsume.
3618            redo A;
3619          } elsif ($self->{nc} == 0x003E) { # >
3620            ## XML5: Switch to the "DOCTYPE bogus comment state".
3621            !!!parse-error (type => 'no md name'); ## TODO: type
3622            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3623            !!!next-input-character;
3624            redo A;
3625          } else {
3626            ## XML5: Switch to the "DOCTYPE bogus comment state".
3627            !!!parse-error (type => 'no space before md name'); ## TODO: type
3628            $self->{state} = BEFORE_MD_NAME_STATE;
3629            redo A;
3630          }
3631        } elsif ($self->{state} == BEFORE_MD_NAME_STATE) {
3632          ## XML5: "DOCTYPE ENTITY parameter state", "DOCTYPE ENTITY type
3633          ## before state", "DOCTYPE ATTLIST name before state".
3634    
3635          if ($is_space->{$self->{nc}}) {
3636            ## Stay in the state.
3637            !!!next-input-character;
3638            redo A;
3639          } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
3640                   $self->{nc} == 0x0025) { # %
3641            $self->{state} = DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE;
3642            !!!next-input-character;
3643            redo A;
3644          } elsif ($self->{nc} == 0x003E) { # >
3645            ## XML5: Same as "Anything else".
3646            !!!parse-error (type => 'no md name'); ## TODO: type
3647            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3648            !!!next-input-character;
3649            redo A;
3650          } elsif ($self->{nc} == -1) {
3651            !!!parse-error (type => 'unclosed md'); ## TODO: type
3652            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
3653            ## Reconsume.
3654            redo A;
3655          } else {
3656            ## XML5: [ATTLIST] Not defined yet.
3657            $self->{ct}->{name} .= chr $self->{nc};
3658            $self->{state} = MD_NAME_STATE;
3659            !!!next-input-character;
3660            redo A;
3661          }
3662        } elsif ($self->{state} == DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE) {
3663          if ($is_space->{$self->{nc}}) {
3664            ## XML5: Switch to the "DOCTYPE ENTITY parameter state".
3665            $self->{ct}->{type} = PARAMETER_ENTITY_TOKEN;
3666            $self->{state} = BEFORE_MD_NAME_STATE;
3667            !!!next-input-character;
3668            redo A;
3669          } elsif ($self->{nc} == 0x003E) { # >
3670            ## XML5: Same as "Anything else".
3671            !!!parse-error (type => 'no md name'); ## TODO: type
3672            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3673            !!!next-input-character;
3674            redo A;
3675          } elsif ($self->{nc} == -1) {
3676            !!!parse-error (type => 'unclosed md');
3677            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
3678            ## Reconsume.
3679            redo A;
3680          } else {
3681            ## XML5: No parse error.
3682            !!!parse-error (type => 'no space after ENTITY percent'); ## TODO: type
3683            $self->{state} = BOGUS_COMMENT_STATE;
3684            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
3685            ## Reconsume.
3686            redo A;
3687          }
3688        } elsif ($self->{state} == MD_NAME_STATE) {
3689          ## XML5: "DOCTYPE ENTITY name state" and "DOCTYPE ATTLIST name state".
3690          
3691          if ($is_space->{$self->{nc}}) {
3692            ## TODO:
3693            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
3694            !!!next-input-character;
3695            redo A;
3696          } elsif ($self->{nc} == 0x003E) { # >
3697            if ($self->{ct}->{type} == ATTLIST_TOKEN) {
3698              #
3699            } else {
3700              !!!parse-error (type => 'no md body'); ## TODO: type
3701            }
3702            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3703            !!!next-input-character;
3704            !!!emit ($self->{ct}); # ELEMENT/ENTITY/ATTLIST/NOTATION
3705            redo A;
3706          } elsif ($self->{nc} == -1) {
3707            ## XML5: [ATTLIST] No parse error.
3708            !!!parse-error (type => 'unclosed md');
3709            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
3710            ## Reconsume.
3711            !!!emit ($self->{ct}); # ELEMENT/ENTITY/ATTLIST/NOTATION
3712            redo A;
3713          } else {
3714            ## XML5: [ATTLIST] Not defined yet.
3715            $self->{ct}->{name} .= chr $self->{nc};
3716            ## Stay in the state.
3717            !!!next-input-character;
3718            redo A;
3719          }
3720        } elsif ($self->{state} == DOCTYPE_ATTLIST_NAME_AFTER_STATE) {
3721          if ($is_space->{$self->{nc}}) {
3722            ## Stay in the state.
3723            !!!next-input-character;
3724            redo A;
3725          } elsif ($self->{nc} == 0x003E) { # >
3726            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3727            !!!next-input-character;
3728            !!!emit ($self->{ct}); # ATTLIST
3729            redo A;
3730          } elsif ($self->{nc} == -1) {
3731            ## XML5: No parse error.
3732            !!!parse-error (type => 'unclosed md'); ## TODO: type
3733            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
3734            redo A;
3735          } else {
3736            ## XML5: Not defined yet.
3737    
3738            ## TODO: ...
3739    
3740            $self->{state} = BOGUS_COMMENT_STATE;
3741            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
3742            ## Reconsume.
3743            redo A;
3744          }
3745    
3746      } else {      } else {
3747        die "$0: $self->{state}: Unknown state";        die "$0: $self->{state}: Unknown state";
3748      }      }

Legend:
Removed from v.1.13  
changed lines
  Added in v.1.14

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24