/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm.src
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm.src

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.12 by wakaba, Wed Oct 15 12:49:49 2008 UTC revision 1.13 by wakaba, Thu Oct 16 03:39:57 2008 UTC
# Line 15  BEGIN { Line 15  BEGIN {
15      CHARACTER_TOKEN      CHARACTER_TOKEN
16      PI_TOKEN      PI_TOKEN
17      ABORT_TOKEN      ABORT_TOKEN
18        END_OF_DOCTYPE_TOKEN
19    );    );
20        
21    our %EXPORT_TAGS = (    our %EXPORT_TAGS = (
# Line 27  BEGIN { Line 28  BEGIN {
28        CHARACTER_TOKEN        CHARACTER_TOKEN
29        PI_TOKEN        PI_TOKEN
30        ABORT_TOKEN        ABORT_TOKEN
31          END_OF_DOCTYPE_TOKEN
32      )],      )],
33    );    );
34  }  }
# Line 43  sub END_OF_FILE_TOKEN () { 5 } Line 45  sub END_OF_FILE_TOKEN () { 5 }
45  sub CHARACTER_TOKEN () { 6 }  sub CHARACTER_TOKEN () { 6 }
46  sub PI_TOKEN () { 7 } ## NOTE: XML only.  sub PI_TOKEN () { 7 } ## NOTE: XML only.
47  sub ABORT_TOKEN () { 8 } ## NOTE: For internal processing.  sub ABORT_TOKEN () { 8 } ## NOTE: For internal processing.
48    sub END_OF_DOCTYPE_TOKEN () { 9 } ## NOTE: XML only
49    
50  ## XML5: XML5 has "empty tag token".  In this implementation, it is  ## XML5: XML5 has "empty tag token".  In this implementation, it is
51  ## represented as a start tag token with $self->{self_closing} flag  ## represented as a start tag token with $self->{self_closing} flag
# Line 133  sub PI_AFTER_STATE () { 55 } Line 136  sub PI_AFTER_STATE () { 55 }
136  sub PI_DATA_AFTER_STATE () { 56 }  sub PI_DATA_AFTER_STATE () { 56 }
137  sub DOCTYPE_INTERNAL_SUBSET_STATE () { 57 }  sub DOCTYPE_INTERNAL_SUBSET_STATE () { 57 }
138  sub DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 58 }  sub DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 58 }
139    sub DOCTYPE_TAG_STATE () { 59 }
140    sub BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 60 }
141    
142  ## Tree constructor state constants (see Whatpm::HTML for the full  ## Tree constructor state constants (see Whatpm::HTML for the full
143  ## list and descriptions)  ## list and descriptions)
# Line 1563  sub _get_next_token ($) { Line 1568  sub _get_next_token ($) {
1568          redo A;          redo A;
1569        }        }
1570      } elsif ($self->{state} == BOGUS_COMMENT_STATE) {      } elsif ($self->{state} == BOGUS_COMMENT_STATE) {
       ## (only happen if PCDATA state)  
   
1571        ## NOTE: Unlike spec's "bogus comment state", this implementation        ## NOTE: Unlike spec's "bogus comment state", this implementation
1572        ## consumes characters one-by-one basis.        ## consumes characters one-by-one basis.
1573                
1574        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
1575          !!!cp (124);          if ($self->{in_subset}) {
1576          $self->{state} = DATA_STATE;            !!!cp (123);
1577          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1578            } else {
1579              !!!cp (124);
1580              $self->{state} = DATA_STATE;
1581              $self->{s_kwd} = '';
1582            }
1583          !!!next-input-character;          !!!next-input-character;
1584    
1585          !!!emit ($self->{ct}); # comment          !!!emit ($self->{ct}); # comment
1586          redo A;          redo A;
1587        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
1588          !!!cp (125);          if ($self->{in_subset}) {
1589          $self->{state} = DATA_STATE;            !!!cp (125.1);
1590          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1591            } else {
1592              !!!cp (125);
1593              $self->{state} = DATA_STATE;
1594              $self->{s_kwd} = '';
1595            }
1596          ## reconsume          ## reconsume
1597    
1598          !!!emit ($self->{ct}); # comment          !!!emit ($self->{ct}); # comment
# Line 1596  sub _get_next_token ($) { Line 1609  sub _get_next_token ($) {
1609          redo A;          redo A;
1610        }        }
1611      } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {      } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {
1612        ## (only happen if PCDATA state)        ## XML5: "Markup declaration state" and "DOCTYPE markup
1613          ## declaration state".
1614                
1615        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
1616          !!!cp (133);          !!!cp (133);
# Line 1772  sub _get_next_token ($) { Line 1786  sub _get_next_token ($) {
1786          !!!next-input-character;          !!!next-input-character;
1787          redo A;          redo A;
1788        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
         !!!cp (138);  
1789          !!!parse-error (type => 'bogus comment');          !!!parse-error (type => 'bogus comment');
1790          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
1791          $self->{s_kwd} = '';            !!!cp (138.1);
1792              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1793            } else {
1794              !!!cp (138);
1795              $self->{state} = DATA_STATE;
1796              $self->{s_kwd} = '';
1797            }
1798          !!!next-input-character;          !!!next-input-character;
1799    
1800          !!!emit ($self->{ct}); # comment          !!!emit ($self->{ct}); # comment
1801    
1802          redo A;          redo A;
1803        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
         !!!cp (139);  
1804          !!!parse-error (type => 'unclosed comment');          !!!parse-error (type => 'unclosed comment');
1805          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
1806          $self->{s_kwd} = '';            !!!cp (139.1);
1807              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1808            } else {
1809              !!!cp (139);
1810              $self->{state} = DATA_STATE;
1811              $self->{s_kwd} = '';
1812            }
1813          ## reconsume          ## reconsume
1814    
1815          !!!emit ($self->{ct}); # comment          !!!emit ($self->{ct}); # comment
# Line 1806  sub _get_next_token ($) { Line 1830  sub _get_next_token ($) {
1830          !!!next-input-character;          !!!next-input-character;
1831          redo A;          redo A;
1832        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
         !!!cp (142);  
1833          !!!parse-error (type => 'bogus comment');          !!!parse-error (type => 'bogus comment');
1834          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
1835          $self->{s_kwd} = '';            !!!cp (142.1);
1836              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1837            } else {
1838              !!!cp (142);
1839              $self->{state} = DATA_STATE;
1840              $self->{s_kwd} = '';
1841            }
1842          !!!next-input-character;          !!!next-input-character;
1843    
1844          !!!emit ($self->{ct}); # comment          !!!emit ($self->{ct}); # comment
1845    
1846          redo A;          redo A;
1847        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
         !!!cp (143);  
1848          !!!parse-error (type => 'unclosed comment');          !!!parse-error (type => 'unclosed comment');
1849          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
1850          $self->{s_kwd} = '';            !!!cp (143.1);
1851              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1852            } else {
1853              !!!cp (143);
1854              $self->{state} = DATA_STATE;
1855              $self->{s_kwd} = '';
1856            }
1857          ## reconsume          ## reconsume
1858    
1859          !!!emit ($self->{ct}); # comment          !!!emit ($self->{ct}); # comment
# Line 1840  sub _get_next_token ($) { Line 1874  sub _get_next_token ($) {
1874          !!!next-input-character;          !!!next-input-character;
1875          redo A;          redo A;
1876        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
         !!!cp (146);  
1877          !!!parse-error (type => 'unclosed comment');          !!!parse-error (type => 'unclosed comment');
1878          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
1879          $self->{s_kwd} = '';            !!!cp (146.1);
1880              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1881            } else {
1882              !!!cp (146);
1883              $self->{state} = DATA_STATE;
1884              $self->{s_kwd} = '';
1885            }
1886          ## reconsume          ## reconsume
1887    
1888          !!!emit ($self->{ct}); # comment          !!!emit ($self->{ct}); # comment
# Line 1869  sub _get_next_token ($) { Line 1908  sub _get_next_token ($) {
1908          !!!next-input-character;          !!!next-input-character;
1909          redo A;          redo A;
1910        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
         !!!cp (149);  
1911          !!!parse-error (type => 'unclosed comment');          !!!parse-error (type => 'unclosed comment');
1912          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
1913          $self->{s_kwd} = '';            !!!cp (149.1);
1914              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1915            } else {
1916              !!!cp (149);
1917              $self->{state} = DATA_STATE;
1918              $self->{s_kwd} = '';
1919            }
1920          ## reconsume          ## reconsume
1921    
1922          !!!emit ($self->{ct}); # comment          !!!emit ($self->{ct}); # comment
# Line 1887  sub _get_next_token ($) { Line 1931  sub _get_next_token ($) {
1931        }        }
1932      } elsif ($self->{state} == COMMENT_END_STATE) {      } elsif ($self->{state} == COMMENT_END_STATE) {
1933        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
1934          !!!cp (151);          if ($self->{in_subset}) {
1935          $self->{state} = DATA_STATE;            !!!cp (151.1);
1936          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1937            } else {
1938              !!!cp (151);
1939              $self->{state} = DATA_STATE;
1940              $self->{s_kwd} = '';
1941            }
1942          !!!next-input-character;          !!!next-input-character;
1943    
1944          !!!emit ($self->{ct}); # comment          !!!emit ($self->{ct}); # comment
# Line 1906  sub _get_next_token ($) { Line 1955  sub _get_next_token ($) {
1955          !!!next-input-character;          !!!next-input-character;
1956          redo A;          redo A;
1957        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
         !!!cp (153);  
1958          !!!parse-error (type => 'unclosed comment');          !!!parse-error (type => 'unclosed comment');
1959          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
1960          $self->{s_kwd} = '';            !!!cp (153.1);
1961              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1962            } else {
1963              !!!cp (153);
1964              $self->{state} = DATA_STATE;
1965              $self->{s_kwd} = '';
1966            }
1967          ## reconsume          ## reconsume
1968    
1969          !!!emit ($self->{ct}); # comment          !!!emit ($self->{ct}); # comment
# Line 1973  sub _get_next_token ($) { Line 2027  sub _get_next_token ($) {
2027          !!!cp (159.1);          !!!cp (159.1);
2028          !!!parse-error (type => 'no DOCTYPE name');          !!!parse-error (type => 'no DOCTYPE name');
2029          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2030            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
2031            $self->{in_subset} = 1;
2032          !!!next-input-character;          !!!next-input-character;
2033            !!!emit ($self->{ct}); # DOCTYPE
2034          redo A;          redo A;
2035        } else {        } else {
2036          !!!cp (160);          !!!cp (160);
# Line 2016  sub _get_next_token ($) { Line 2073  sub _get_next_token ($) {
2073        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
2074          !!!cp (163.1);          !!!cp (163.1);
2075          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2076            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
2077            $self->{in_subset} = 1;
2078          !!!next-input-character;          !!!next-input-character;
2079            !!!emit ($self->{ct}); # DOCTYPE
2080          redo A;          redo A;
2081        } else {        } else {
2082          !!!cp (164);          !!!cp (164);
# Line 2073  sub _get_next_token ($) { Line 2133  sub _get_next_token ($) {
2133          !!!cp (167.3);          !!!cp (167.3);
2134          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2135          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
2136            $self->{in_subset} = 1;
2137          !!!next-input-character;          !!!next-input-character;
2138            !!!emit ($self->{ct}); # DOCTYPE
2139          redo A;          redo A;
2140        } else {        } else {
2141          !!!cp (180);          !!!cp (180);
# Line 2227  sub _get_next_token ($) { Line 2289  sub _get_next_token ($) {
2289          !!!parse-error (type => 'no PUBLIC literal');          !!!parse-error (type => 'no PUBLIC literal');
2290          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2291          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
2292            $self->{in_subset} = 1;
2293          !!!next-input-character;          !!!next-input-character;
2294            !!!emit ($self->{ct}); # DOCTYPE
2295          redo A;          redo A;
2296        } else {        } else {
2297          !!!cp (186);          !!!cp (186);
# Line 2369  sub _get_next_token ($) { Line 2433  sub _get_next_token ($) {
2433          !!!parse-error (type => 'no SYSTEM literal');          !!!parse-error (type => 'no SYSTEM literal');
2434          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2435          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
2436            $self->{in_subset} = 1;
2437          !!!next-input-character;          !!!next-input-character;
2438            !!!emit ($self->{ct}); # DOCTYPE
2439          redo A;          redo A;
2440        } else {        } else {
2441          !!!cp (200);          !!!cp (200);
# Line 2427  sub _get_next_token ($) { Line 2493  sub _get_next_token ($) {
2493    
2494          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2495          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
2496            $self->{in_subset} = 1;
2497          !!!next-input-character;          !!!next-input-character;
2498            !!!emit ($self->{ct}); # DOCTYPE
2499          redo A;          redo A;
2500        } else {        } else {
2501          !!!cp (206);          !!!cp (206);
# Line 2550  sub _get_next_token ($) { Line 2618  sub _get_next_token ($) {
2618          !!!cp (218.1);          !!!cp (218.1);
2619          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2620          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
2621            $self->{in_subset} = 1;
2622          !!!next-input-character;          !!!next-input-character;
2623            !!!emit ($self->{ct}); # DOCTYPE
2624          redo A;          redo A;
2625        } else {        } else {
2626          !!!cp (218);          !!!cp (218);
# Line 2572  sub _get_next_token ($) { Line 2642  sub _get_next_token ($) {
2642    
2643          redo A;          redo A;
2644        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
2645          if ($self->{ct}->{has_internal_subset}) { # DOCTYPE          !!!cp (220.1);
2646            !!!cp (220.2);          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2647            ## Stay in the state.          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
2648            !!!next-input-character;          $self->{in_subset} = 1;
2649            redo A;          !!!next-input-character;
2650          } else {          !!!emit ($self->{ct}); # DOCTYPE
2651            !!!cp (220.1);          redo A;
           $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;  
           $self->{ct}->{has_internal_subset} = 1; # DOCTYPE  
           !!!next-input-character;  
           redo A;  
         }  
2652        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
2653          !!!cp (220);          !!!cp (220);
2654          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
# Line 3099  sub _get_next_token ($) { Line 3164  sub _get_next_token ($) {
3164          redo A;          redo A;
3165        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3166          !!!parse-error (type => 'no pic'); ## TODO: type          !!!parse-error (type => 'no pic'); ## TODO: type
3167          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
3168          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3169            } else {
3170              $self->{state} = DATA_STATE;
3171              $self->{s_kwd} = '';
3172            }
3173          ## Reconsume.          ## Reconsume.
3174          !!!emit ($self->{ct}); # pi          !!!emit ($self->{ct}); # pi
3175          redo A;          redo A;
# Line 3131  sub _get_next_token ($) { Line 3200  sub _get_next_token ($) {
3200          redo A;          redo A;
3201        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3202          !!!parse-error (type => 'no pic'); ## TODO: type          !!!parse-error (type => 'no pic'); ## TODO: type
3203          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
3204          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3205            } else {
3206              $self->{state} = DATA_STATE;
3207              $self->{s_kwd} = '';
3208            }
3209          ## Reprocess.          ## Reprocess.
3210          !!!emit ($self->{ct}); # pi          !!!emit ($self->{ct}); # pi
3211          redo A;          redo A;
# Line 3147  sub _get_next_token ($) { Line 3220  sub _get_next_token ($) {
3220        }        }
3221      } elsif ($self->{state} == PI_AFTER_STATE) {      } elsif ($self->{state} == PI_AFTER_STATE) {
3222        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
3223          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
3224          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3225            } else {
3226              $self->{state} = DATA_STATE;
3227              $self->{s_kwd} = '';
3228            }
3229          !!!next-input-character;          !!!next-input-character;
3230          !!!emit ($self->{ct}); # pi          !!!emit ($self->{ct}); # pi
3231          redo A;          redo A;
# Line 3173  sub _get_next_token ($) { Line 3250  sub _get_next_token ($) {
3250      } elsif ($self->{state} == PI_DATA_AFTER_STATE) {      } elsif ($self->{state} == PI_DATA_AFTER_STATE) {
3251        ## XML5: Same as "pi after state" in XML5        ## XML5: Same as "pi after state" in XML5
3252        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
3253          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
3254          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3255            } else {
3256              $self->{state} = DATA_STATE;
3257              $self->{s_kwd} = '';
3258            }
3259          !!!next-input-character;          !!!next-input-character;
3260          !!!emit ($self->{ct}); # pi          !!!emit ($self->{ct}); # pi
3261          redo A;          redo A;
# Line 3192  sub _get_next_token ($) { Line 3273  sub _get_next_token ($) {
3273    
3274      } elsif ($self->{state} == DOCTYPE_INTERNAL_SUBSET_STATE) {      } elsif ($self->{state} == DOCTYPE_INTERNAL_SUBSET_STATE) {
3275        if ($self->{nc} == 0x003C) { # <        if ($self->{nc} == 0x003C) { # <
3276          ## TODO:          $self->{state} = DOCTYPE_TAG_STATE;
3277          !!!next-input-character;          !!!next-input-character;
3278          redo A;          redo A;
3279        } elsif ($self->{nc} == 0x0025) { # %        } elsif ($self->{nc} == 0x0025) { # %
# Line 3202  sub _get_next_token ($) { Line 3283  sub _get_next_token ($) {
3283          !!!next-input-character;          !!!next-input-character;
3284          redo A;          redo A;
3285        } elsif ($self->{nc} == 0x005D) { # ]        } elsif ($self->{nc} == 0x005D) { # ]
3286            delete $self->{in_subset};
3287          $self->{state} = DOCTYPE_INTERNAL_SUBSET_AFTER_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_AFTER_STATE;
3288          !!!next-input-character;          !!!next-input-character;
3289          redo A;          redo A;
# Line 3211  sub _get_next_token ($) { Line 3293  sub _get_next_token ($) {
3293          redo A;          redo A;
3294        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3295          !!!parse-error (type => 'unclosed internal subset'); ## TODO: type          !!!parse-error (type => 'unclosed internal subset'); ## TODO: type
3296            delete $self->{in_subset};
3297          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3298          $self->{s_kwd} = '';          $self->{s_kwd} = '';
3299          ## Reconsume.          ## Reconsume.
3300          !!!emit ($self->{ct}); # DOCTYPE          !!!emit ({type => END_OF_DOCTYPE_TOKEN});
3301          redo A;          redo A;
3302        } else {        } else {
3303          unless ($self->{internal_subset_tainted}) {          unless ($self->{internal_subset_tainted}) {
# Line 3231  sub _get_next_token ($) { Line 3314  sub _get_next_token ($) {
3314          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3315          $self->{s_kwd} = '';          $self->{s_kwd} = '';
3316          !!!next-input-character;          !!!next-input-character;
3317          !!!emit ($self->{ct}); # DOCTYPE          !!!emit ({type => END_OF_DOCTYPE_TOKEN});
3318          redo A;          redo A;
3319        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3320          !!!parse-error (type => 'unclosed DOCTYPE');          !!!parse-error (type => 'unclosed DOCTYPE');
3321          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3322          $self->{s_kwd} = '';          $self->{s_kwd} = '';
3323          ## Reconsume.          ## Reconsume.
3324          !!!emit ($self->{ct}); # DOCTYPE          !!!emit ({type => END_OF_DOCTYPE_TOKEN});
3325          redo A;          redo A;
3326        } else {        } else {
3327          ## XML5: No parse error and stay in the state.          ## XML5: No parse error and stay in the state.
3328          !!!parse-error (type => 'string after internal subset'); ## TODO: type          !!!parse-error (type => 'string after internal subset'); ## TODO: type
3329    
3330          $self->{state} = BOGUS_DOCTYPE_STATE;          $self->{state} = BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE;
3331            !!!next-input-character;
3332            redo A;
3333          }
3334        } elsif ($self->{state} == BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE) {
3335          if ($self->{nc} == 0x003E) { # >
3336            $self->{state} = DATA_STATE;
3337            $self->{s_kwd} = '';
3338            !!!next-input-character;
3339            !!!emit ({type => END_OF_DOCTYPE_TOKEN});
3340            redo A;
3341          } elsif ($self->{nc} == -1) {
3342            $self->{state} = DATA_STATE;
3343            $self->{s_kwd} = '';
3344            ## Reconsume.
3345            !!!emit ({type => END_OF_DOCTYPE_TOKEN});
3346            redo A;
3347          } else {
3348            ## Stay in the state.
3349            !!!next-input-character;
3350            redo A;
3351          }
3352        } elsif ($self->{state} == DOCTYPE_TAG_STATE) {
3353          if ($self->{nc} == 0x0021) { # !
3354            $self->{state} = MARKUP_DECLARATION_OPEN_STATE;
3355            !!!next-input-character;
3356            redo A;
3357          } elsif ($self->{nc} == 0x003F) { # ?
3358            $self->{state} = PI_STATE;
3359            !!!next-input-character;
3360            redo A;
3361          } elsif ($self->{nc} == -1) {
3362            !!!parse-error (type => 'bare stago');
3363            $self->{state} = DATA_STATE;
3364            $self->{s_kwd} = '';
3365            ## Reconsume.
3366            redo A;
3367          } else {
3368            !!!parse-error (type => 'bare stago', ## XML5: Not a parse error.
3369                            line => $self->{line_prev},
3370                            column => $self->{column_prev});
3371            $self->{state} = BOGUS_COMMENT_STATE;
3372            $self->{ct} = {type => COMMENT_TOKEN,
3373                           data => '',
3374                          }; ## NOTE: Will be discarded.
3375          !!!next-input-character;          !!!next-input-character;
3376          redo A;          redo A;
3377        }        }

Legend:
Removed from v.1.12  
changed lines
  Added in v.1.13

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24