/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm.src
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm.src

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.14 by wakaba, Fri Oct 17 07:14:29 2008 UTC revision 1.16 by wakaba, Sat Oct 18 11:34:49 2008 UTC
# Line 164  sub BEFORE_MD_NAME_STATE () { 68 } Line 164  sub BEFORE_MD_NAME_STATE () { 68 }
164  sub MD_NAME_STATE () { 69 }  sub MD_NAME_STATE () { 69 }
165  sub DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE () { 70 }  sub DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE () { 70 }
166  sub DOCTYPE_ATTLIST_NAME_AFTER_STATE () { 71 }  sub DOCTYPE_ATTLIST_NAME_AFTER_STATE () { 71 }
167    sub DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE () { 72 }
168    sub DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE () { 73 }
169    sub DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE () { 74 }
170    sub DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE () { 75 }
171    sub BEFORE_ALLOWED_TOKEN_STATE () { 76 }
172    sub ALLOWED_TOKEN_STATE () { 77 }
173    sub AFTER_ALLOWED_TOKEN_STATE () { 78 }
174    sub AFTER_ALLOWED_TOKENS_STATE () { 79 }
175    sub BEFORE_ATTR_DEFAULT_STATE () { 80 }
176    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE () { 81 }
177    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE () { 82 }
178    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE () { 83 }
179    sub AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE () { 84 }
180    sub BOGUS_MD_STATE () { 85 }
181    
182  ## Tree constructor state constants (see Whatpm::HTML for the full  ## Tree constructor state constants (see Whatpm::HTML for the full
183  ## list and descriptions)  ## list and descriptions)
# Line 1257  sub _get_next_token ($) { Line 1271  sub _get_next_token ($) {
1271          redo A;          redo A;
1272        }        }
1273      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1274        ## XML5: "Tag attribute value double quoted state".        ## XML5: "Tag attribute value double quoted state" and "DOCTYPE
1275          ## ATTLIST attribute value double quoted state".
1276                
1277        if ($self->{nc} == 0x0022) { # "        if ($self->{nc} == 0x0022) { # "
1278          !!!cp (95);          if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1279          ## XML5: "Tag attribute name before state".            !!!cp (95.1);
1280          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;            ## XML5: "DOCTYPE ATTLIST name after state".
1281              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1282              $self->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE;
1283            } else {
1284              !!!cp (95);
1285              ## XML5: "Tag attribute name before state".
1286              $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1287            }
1288          !!!next-input-character;          !!!next-input-character;
1289          redo A;          redo A;
1290        } elsif ($self->{nc} == 0x0026) { # &        } elsif ($self->{nc} == 0x0026) { # &
# Line 1283  sub _get_next_token ($) { Line 1305  sub _get_next_token ($) {
1305          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1306            !!!cp (97);            !!!cp (97);
1307            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
1308    
1309              $self->{state} = DATA_STATE;
1310              $self->{s_kwd} = '';
1311              ## reconsume
1312              !!!emit ($self->{ct}); # start tag
1313              redo A;
1314          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1315            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1316            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1292  sub _get_next_token ($) { Line 1320  sub _get_next_token ($) {
1320              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
1321              !!!cp (99);              !!!cp (99);
1322            }            }
1323    
1324              $self->{state} = DATA_STATE;
1325              $self->{s_kwd} = '';
1326              ## reconsume
1327              !!!emit ($self->{ct}); # end tag
1328              redo A;
1329            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1330              ## XML5: No parse error above; not defined yet.
1331              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1332              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1333              ## Reconsume.
1334              !!!emit ($self->{ct}); # ATTLIST
1335              redo A;
1336          } else {          } else {
1337            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1338          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         !!!emit ($self->{ct}); # start tag or end tag  
   
         redo A;  
1339        } else {        } else {
1340            ## XML5 [ATTLIST]: Not defined yet.
1341          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
1342            !!!cp (100);            !!!cp (100);
1343            ## XML5: Not a parse error.            ## XML5: Not a parse error.
# Line 1320  sub _get_next_token ($) { Line 1355  sub _get_next_token ($) {
1355          redo A;          redo A;
1356        }        }
1357      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1358        ## XML5: "Tag attribute value single quoted state".        ## XML5: "Tag attribute value single quoted state" and "DOCTYPE
1359          ## ATTLIST attribute value single quoted state".
1360    
1361        if ($self->{nc} == 0x0027) { # '        if ($self->{nc} == 0x0027) { # '
1362          !!!cp (101);          if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1363          ## XML5: "Before attribute name state" (sic).            !!!cp (101.1);
1364          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;            ## XML5: "DOCTYPE ATTLIST name after state".
1365              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1366              $self->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE;
1367            } else {
1368              !!!cp (101);
1369              ## XML5: "Before attribute name state" (sic).
1370              $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1371            }
1372          !!!next-input-character;          !!!next-input-character;
1373          redo A;          redo A;
1374        } elsif ($self->{nc} == 0x0026) { # &        } elsif ($self->{nc} == 0x0026) { # &
# Line 1346  sub _get_next_token ($) { Line 1389  sub _get_next_token ($) {
1389          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1390            !!!cp (103);            !!!cp (103);
1391            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
1392    
1393              $self->{state} = DATA_STATE;
1394              $self->{s_kwd} = '';
1395              ## reconsume
1396              !!!emit ($self->{ct}); # start tag
1397              redo A;
1398          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1399            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1400            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1355  sub _get_next_token ($) { Line 1404  sub _get_next_token ($) {
1404              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
1405              !!!cp (105);              !!!cp (105);
1406            }            }
1407    
1408              $self->{state} = DATA_STATE;
1409              $self->{s_kwd} = '';
1410              ## reconsume
1411              !!!emit ($self->{ct}); # end tag
1412              redo A;
1413            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1414              ## XML5: No parse error above; not defined yet.
1415              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1416              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1417              ## Reconsume.
1418              !!!emit ($self->{ct}); # ATTLIST
1419              redo A;
1420          } else {          } else {
1421            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1422          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         !!!emit ($self->{ct}); # start tag or end tag  
   
         redo A;  
1423        } else {        } else {
1424            ## XML5 [ATTLIST]: Not defined yet.
1425          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
1426            !!!cp (106);            !!!cp (106);
1427            ## XML5: Not a parse error.            ## XML5: Not a parse error.
# Line 1386  sub _get_next_token ($) { Line 1442  sub _get_next_token ($) {
1442        ## XML5: "Tag attribute value unquoted state".        ## XML5: "Tag attribute value unquoted state".
1443    
1444        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
1445          !!!cp (107);          if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1446          ## XML5: "Tag attribute name before state".            !!!cp (107.1);
1447          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;            push @{$self->{ct}->{attrdefs}}, $self->{ca};
1448              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
1449            } else {
1450              !!!cp (107);
1451              ## XML5: "Tag attribute name before state".
1452              $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1453            }
1454          !!!next-input-character;          !!!next-input-character;
1455          redo A;          redo A;
1456        } elsif ($self->{nc} == 0x0026) { # &        } elsif ($self->{nc} == 0x0026) { # &
# Line 1409  sub _get_next_token ($) { Line 1471  sub _get_next_token ($) {
1471          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1472            !!!cp (109);            !!!cp (109);
1473            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
1474    
1475              $self->{state} = DATA_STATE;
1476              $self->{s_kwd} = '';
1477              !!!next-input-character;
1478              !!!emit ($self->{ct}); # start tag
1479              redo A;
1480          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1481            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1482            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1418  sub _get_next_token ($) { Line 1486  sub _get_next_token ($) {
1486              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
1487              !!!cp (111);              !!!cp (111);
1488            }            }
1489    
1490              $self->{state} = DATA_STATE;
1491              $self->{s_kwd} = '';
1492              !!!next-input-character;
1493              !!!emit ($self->{ct}); # end tag
1494              redo A;
1495            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1496              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1497              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1498              !!!next-input-character;
1499              !!!emit ($self->{ct}); # ATTLIST
1500              redo A;
1501          } else {          } else {
1502            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1503          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         !!!next-input-character;  
   
         !!!emit ($self->{ct}); # start tag or end tag  
   
         redo A;  
1504        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
         !!!parse-error (type => 'unclosed tag');  
1505          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1506            !!!cp (112);            !!!cp (112);
1507              !!!parse-error (type => 'unclosed tag');
1508            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
1509    
1510              $self->{state} = DATA_STATE;
1511              $self->{s_kwd} = '';
1512              ## reconsume
1513              !!!emit ($self->{ct}); # start tag
1514              redo A;
1515          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1516              !!!parse-error (type => 'unclosed tag');
1517            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1518            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
1519              !!!cp (113);              !!!cp (113);
# Line 1442  sub _get_next_token ($) { Line 1522  sub _get_next_token ($) {
1522              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
1523              !!!cp (114);              !!!cp (114);
1524            }            }
1525    
1526              $self->{state} = DATA_STATE;
1527              $self->{s_kwd} = '';
1528              ## reconsume
1529              !!!emit ($self->{ct}); # end tag
1530              redo A;
1531            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1532              !!!parse-error (type => 'unclosed md'); ## TODO: type
1533              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1534              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1535              ## Reconsume.
1536              !!!emit ($self->{ct}); # ATTLIST
1537              redo A;
1538          } else {          } else {
1539            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1540          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         !!!emit ($self->{ct}); # start tag or end tag  
   
         redo A;  
1541        } else {        } else {
1542          if ({          if ({
1543               0x0022 => 1, # "               0x0022 => 1, # "
# Line 2127  sub _get_next_token ($) { Line 2213  sub _get_next_token ($) {
2213          !!!next-input-character;          !!!next-input-character;
2214          redo A;          redo A;
2215        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
2216          !!!cp (166);          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
2217          $self->{state} = DATA_STATE;            !!!cp (166);
2218          $self->{s_kwd} = '';            $self->{state} = DATA_STATE;
2219              $self->{s_kwd} = '';
2220            } else {
2221              !!!cp (166.1);
2222              !!!parse-error (type => 'no md def'); ## TODO: type
2223              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2224            }
2225            
2226          !!!next-input-character;          !!!next-input-character;
2227            !!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         !!!emit ($self->{ct}); # DOCTYPE  
   
2228          redo A;          redo A;
2229        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
2230          !!!cp (167);          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
2231          !!!parse-error (type => 'unclosed DOCTYPE');            !!!cp (167);
2232          $self->{state} = DATA_STATE;            !!!parse-error (type => 'unclosed DOCTYPE');
2233          $self->{s_kwd} = '';            $self->{state} = DATA_STATE;
2234          ## reconsume            $self->{s_kwd} = '';
2235              $self->{ct}->{quirks} = 1;
2236          $self->{ct}->{quirks} = 1;          } else {
2237          !!!emit ($self->{ct}); # DOCTYPE            !!!cp (167.12);
2238              !!!parse-error (type => 'unclosed md'); ## TODO: type
2239              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2240            }
2241            
2242            ## Reconsume.
2243            !!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
2244          redo A;          redo A;
2245        } elsif ($self->{nc} == 0x0050 or # P        } elsif ($self->{nc} == 0x0050 or # P
2246                 $self->{nc} == 0x0070) { # p                 $self->{nc} == 0x0070) { # p
# Line 2160  sub _get_next_token ($) { Line 2256  sub _get_next_token ($) {
2256          $self->{kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
2257          !!!next-input-character;          !!!next-input-character;
2258          redo A;          redo A;
2259        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [  ## TODO: " and ' for ENTITY
2260          } elsif ($self->{is_xml} and
2261                   $self->{ct}->{type} == DOCTYPE_TOKEN and
2262                   $self->{nc} == 0x005B) { # [
2263          !!!cp (167.3);          !!!cp (167.3);
2264          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2265          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
# Line 2169  sub _get_next_token ($) { Line 2268  sub _get_next_token ($) {
2268          !!!emit ($self->{ct}); # DOCTYPE          !!!emit ($self->{ct}); # DOCTYPE
2269          redo A;          redo A;
2270        } else {        } else {
2271          !!!cp (180);          !!!parse-error (type => 'string after DOCTYPE name'); ## TODO: type
2272          !!!parse-error (type => 'string after DOCTYPE name');  
2273          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
2274              !!!cp (180);
2275              $self->{ct}->{quirks} = 1;
2276              $self->{state} = BOGUS_DOCTYPE_STATE;
2277            } else {
2278              !!!cp (180.1);
2279              $self->{state} = BOGUS_MD_STATE;
2280            }
2281    
         $self->{state} = BOGUS_DOCTYPE_STATE;  
2282          !!!next-input-character;          !!!next-input-character;
2283          redo A;          redo A;
2284        }        }
# Line 2215  sub _get_next_token ($) { Line 2320  sub _get_next_token ($) {
2320          !!!next-input-character;          !!!next-input-character;
2321          redo A;          redo A;
2322        } else {        } else {
2323          !!!cp (169);          !!!parse-error (type => 'string after DOCTYPE name', ## TODO: type
         !!!parse-error (type => 'string after DOCTYPE name',  
2324                          line => $self->{line_prev},                          line => $self->{line_prev},
2325                          column => $self->{column_prev} + 1 - length $self->{kwd});                          column => $self->{column_prev} + 1 - length $self->{kwd});
2326          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
2327              !!!cp (169);
2328          $self->{state} = BOGUS_DOCTYPE_STATE;            $self->{ct}->{quirks} = 1;
2329              $self->{state} = BOGUS_DOCTYPE_STATE;
2330            } else {
2331              !!!cp (169.1);
2332              $self->{state} = BOGUS_MD_STATE;
2333            }
2334          ## Reconsume.          ## Reconsume.
2335          redo A;          redo A;
2336        }        }
# Line 2263  sub _get_next_token ($) { Line 2372  sub _get_next_token ($) {
2372          !!!next-input-character;          !!!next-input-character;
2373          redo A;          redo A;
2374        } else {        } else {
2375          !!!cp (172);          !!!parse-error (type => 'string after DOCTYPE name', ## TODO: type
         !!!parse-error (type => 'string after DOCTYPE name',  
2376                          line => $self->{line_prev},                          line => $self->{line_prev},
2377                          column => $self->{column_prev} + 1 - length $self->{kwd});                          column => $self->{column_prev} + 1 - length $self->{kwd});
2378          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
2379              !!!cp (172);
2380          $self->{state} = BOGUS_DOCTYPE_STATE;            $self->{ct}->{quirks} = 1;
2381              $self->{state} = BOGUS_DOCTYPE_STATE;
2382            } else {
2383              !!!cp (172.1);
2384              $self->{state} = BOGUS_MD_STATE;
2385            }
2386          ## Reconsume.          ## Reconsume.
2387          redo A;          redo A;
2388        }        }
# Line 2292  sub _get_next_token ($) { Line 2405  sub _get_next_token ($) {
2405          !!!next-input-character;          !!!next-input-character;
2406          redo A;          redo A;
2407        } elsif ($self->{nc} eq 0x003E) { # >        } elsif ($self->{nc} eq 0x003E) { # >
         !!!cp (184);  
2408          !!!parse-error (type => 'no PUBLIC literal');          !!!parse-error (type => 'no PUBLIC literal');
2409            
2410          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
2411          $self->{s_kwd} = '';            !!!cp (184);
2412              $self->{state} = DATA_STATE;
2413              $self->{s_kwd} = '';
2414              $self->{ct}->{quirks} = 1;
2415            } else {
2416              !!!cp (184.1);
2417              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2418            }
2419            
2420          !!!next-input-character;          !!!next-input-character;
2421            !!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         !!!emit ($self->{ct}); # DOCTYPE  
   
2422          redo A;          redo A;
2423        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
2424          !!!cp (185);          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
2425          !!!parse-error (type => 'unclosed DOCTYPE');            !!!cp (185);
2426              !!!parse-error (type => 'unclosed DOCTYPE');
2427          $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
2428          $self->{s_kwd} = '';            $self->{s_kwd} = '';
2429              $self->{ct}->{quirks} = 1;
2430            } else {
2431              !!!cp (185.1);
2432              !!!parse-error (type => 'unclosed md'); ## TODO: type
2433              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2434            }
2435            
2436          ## reconsume          ## reconsume
   
         $self->{ct}->{quirks} = 1;  
2437          !!!emit ($self->{ct}); # DOCTYPE          !!!emit ($self->{ct}); # DOCTYPE
   
2438          redo A;          redo A;
2439        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [        } elsif ($self->{is_xml} and
2440                   $self->{ct}->{type} == DOCTYPE_TOKEN and
2441                   $self->{nc} == 0x005B) { # [
2442          !!!cp (186.1);          !!!cp (186.1);
2443          !!!parse-error (type => 'no PUBLIC literal');          !!!parse-error (type => 'no PUBLIC literal');
2444          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
# Line 2325  sub _get_next_token ($) { Line 2448  sub _get_next_token ($) {
2448          !!!emit ($self->{ct}); # DOCTYPE          !!!emit ($self->{ct}); # DOCTYPE
2449          redo A;          redo A;
2450        } else {        } else {
         !!!cp (186);  
2451          !!!parse-error (type => 'string after PUBLIC');          !!!parse-error (type => 'string after PUBLIC');
         $self->{ct}->{quirks} = 1;  
2452    
2453          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
2454              !!!cp (186);
2455              $self->{ct}->{quirks} = 1;
2456              $self->{state} = BOGUS_DOCTYPE_STATE;
2457            } else {
2458              !!!cp (186.2);
2459              $self->{state} = BOGUS_MD_STATE;
2460            }
2461    
2462          !!!next-input-character;          !!!next-input-character;
2463          redo A;          redo A;
2464        }        }
# Line 2340  sub _get_next_token ($) { Line 2469  sub _get_next_token ($) {
2469          !!!next-input-character;          !!!next-input-character;
2470          redo A;          redo A;
2471        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
         !!!cp (188);  
2472          !!!parse-error (type => 'unclosed PUBLIC literal');          !!!parse-error (type => 'unclosed PUBLIC literal');
2473    
2474          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
2475          $self->{s_kwd} = '';            !!!cp (188);
2476          !!!next-input-character;            $self->{state} = DATA_STATE;
2477              $self->{s_kwd} = '';
2478          $self->{ct}->{quirks} = 1;            $self->{ct}->{quirks} = 1;
2479          !!!emit ($self->{ct}); # DOCTYPE          } else {
2480              !!!cp (188.1);
2481              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2482            }
2483    
2484            !!!next-input-character;
2485            !!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
2486          redo A;          redo A;
2487        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
         !!!cp (189);  
2488          !!!parse-error (type => 'unclosed PUBLIC literal');          !!!parse-error (type => 'unclosed PUBLIC literal');
2489    
2490          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
2491          $self->{s_kwd} = '';            !!!cp (189);
2492          ## reconsume            $self->{state} = DATA_STATE;
2493              $self->{s_kwd} = '';
2494          $self->{ct}->{quirks} = 1;            $self->{ct}->{quirks} = 1;
2495            } else {
2496              !!!cp (189.1);
2497              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2498            }
2499            
2500            ## Reconsume.
2501          !!!emit ($self->{ct}); # DOCTYPE          !!!emit ($self->{ct}); # DOCTYPE
   
2502          redo A;          redo A;
2503        } else {        } else {
2504          !!!cp (190);          !!!cp (190);
2505          $self->{ct}->{pubid} # DOCTYPE          $self->{ct}->{pubid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
2506          $self->{read_until}->($self->{ct}->{pubid}, q[">],          $self->{read_until}->($self->{ct}->{pubid}, q[">],
2507                                length $self->{ct}->{pubid});                                length $self->{ct}->{pubid});
2508    
# Line 2381  sub _get_next_token ($) { Line 2517  sub _get_next_token ($) {
2517          !!!next-input-character;          !!!next-input-character;
2518          redo A;          redo A;
2519        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
         !!!cp (192);  
2520          !!!parse-error (type => 'unclosed PUBLIC literal');          !!!parse-error (type => 'unclosed PUBLIC literal');
2521    
2522          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
2523          $self->{s_kwd} = '';            !!!cp (192);
2524          !!!next-input-character;            $self->{state} = DATA_STATE;
2525              $self->{s_kwd} = '';
2526          $self->{ct}->{quirks} = 1;            $self->{ct}->{quirks} = 1;
2527          !!!emit ($self->{ct}); # DOCTYPE          } else {
2528              !!!cp (192.1);
2529              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2530            }
2531    
2532            !!!next-input-character;
2533            !!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
2534          redo A;          redo A;
2535        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
         !!!cp (193);  
2536          !!!parse-error (type => 'unclosed PUBLIC literal');          !!!parse-error (type => 'unclosed PUBLIC literal');
2537    
2538          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
2539          $self->{s_kwd} = '';            !!!cp (193);
2540              $self->{state} = DATA_STATE;
2541              $self->{s_kwd} = '';
2542              $self->{ct}->{quirks} = 1;
2543            } else {
2544              !!!cp (193.1);
2545              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2546            }
2547          
2548          ## reconsume          ## reconsume
2549            !!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         !!!emit ($self->{ct}); # DOCTYPE  
   
2550          redo A;          redo A;
2551        } else {        } else {
2552          !!!cp (194);          !!!cp (194);
2553          $self->{ct}->{pubid} # DOCTYPE          $self->{ct}->{pubid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
2554          $self->{read_until}->($self->{ct}->{pubid}, q['>],          $self->{read_until}->($self->{ct}->{pubid}, q['>],
2555                                length $self->{ct}->{pubid});                                length $self->{ct}->{pubid});
2556    
# Line 2423  sub _get_next_token ($) { Line 2566  sub _get_next_token ($) {
2566          redo A;          redo A;
2567        } elsif ($self->{nc} == 0x0022) { # "        } elsif ($self->{nc} == 0x0022) { # "
2568          !!!cp (196);          !!!cp (196);
2569          $self->{ct}->{sysid} = ''; # DOCTYPE          $self->{ct}->{sysid} = ''; # DOCTYPE/ENTITY/NOTATION
2570          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
2571          !!!next-input-character;          !!!next-input-character;
2572          redo A;          redo A;
2573        } elsif ($self->{nc} == 0x0027) { # '        } elsif ($self->{nc} == 0x0027) { # '
2574          !!!cp (197);          !!!cp (197);
2575          $self->{ct}->{sysid} = ''; # DOCTYPE          $self->{ct}->{sysid} = ''; # DOCTYPE/ENTITY/NOTATION
2576          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
2577          !!!next-input-character;          !!!next-input-character;
2578          redo A;          redo A;
2579        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
2580          if ($self->{is_xml}) {          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
2581            !!!cp (198.1);            if ($self->{is_xml}) {
2582            !!!parse-error (type => 'no SYSTEM literal');              !!!cp (198.1);
2583                !!!parse-error (type => 'no SYSTEM literal');
2584              } else {
2585                !!!cp (198);
2586              }
2587              $self->{state} = DATA_STATE;
2588              $self->{s_kwd} = '';
2589          } else {          } else {
2590            !!!cp (198);            if ($self->{ct}->{type} == NOTATION_TOKEN) {
2591                !!!cp (198.2);
2592              } else {
2593                !!!cp (198.3);
2594                !!!parse-error (type => 'no SYSTEM literal');            
2595              }
2596              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2597          }          }
2598          $self->{state} = DATA_STATE;          
         $self->{s_kwd} = '';  
2599          !!!next-input-character;          !!!next-input-character;
2600            !!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         !!!emit ($self->{ct}); # DOCTYPE  
   
2601          redo A;          redo A;
2602        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
2603          !!!cp (199);          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
2604          !!!parse-error (type => 'unclosed DOCTYPE');            !!!cp (199);
2605              !!!parse-error (type => 'unclosed DOCTYPE');
2606          $self->{state} = DATA_STATE;            
2607          $self->{s_kwd} = '';            $self->{state} = DATA_STATE;
2608              $self->{s_kwd} = '';
2609              $self->{ct}->{quirks} = 1;
2610            } else {
2611              !!!parse-error (type => 'unclosed md'); ## TODO: type
2612              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2613            }
2614            
2615          ## reconsume          ## reconsume
2616            !!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         !!!emit ($self->{ct}); # DOCTYPE  
   
2617          redo A;          redo A;
2618        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [        } elsif ($self->{is_xml} and
2619                   $self->{ct}->{type} == DOCTYPE_TOKEN and
2620                   $self->{nc} == 0x005B) { # [
2621          !!!cp (200.1);          !!!cp (200.1);
2622          !!!parse-error (type => 'no SYSTEM literal');          !!!parse-error (type => 'no SYSTEM literal');
2623          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
# Line 2469  sub _get_next_token ($) { Line 2627  sub _get_next_token ($) {
2627          !!!emit ($self->{ct}); # DOCTYPE          !!!emit ($self->{ct}); # DOCTYPE
2628          redo A;          redo A;
2629        } else {        } else {
         !!!cp (200);  
2630          !!!parse-error (type => 'string after PUBLIC literal');          !!!parse-error (type => 'string after PUBLIC literal');
         $self->{ct}->{quirks} = 1;  
2631    
2632          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
2633              !!!cp (200);
2634              $self->{ct}->{quirks} = 1;
2635              $self->{state} = BOGUS_DOCTYPE_STATE;
2636            } else {
2637              !!!cp (200.2);
2638              $self->{state} = BOGUS_MD_STATE;
2639            }
2640    
2641          !!!next-input-character;          !!!next-input-character;
2642          redo A;          redo A;
2643        }        }
# Line 2496  sub _get_next_token ($) { Line 2660  sub _get_next_token ($) {
2660          !!!next-input-character;          !!!next-input-character;
2661          redo A;          redo A;
2662        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
         !!!cp (204);  
2663          !!!parse-error (type => 'no SYSTEM literal');          !!!parse-error (type => 'no SYSTEM literal');
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
2664          !!!next-input-character;          !!!next-input-character;
2665    
2666          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
2667          !!!emit ($self->{ct}); # DOCTYPE            !!!cp (204);
2668              $self->{state} = DATA_STATE;
2669              $self->{s_kwd} = '';
2670              $self->{ct}->{quirks} = 1;
2671            } else {
2672              !!!cp (204.1);
2673              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2674            }
2675    
2676            !!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
2677          redo A;          redo A;
2678        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
2679          !!!cp (205);          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
2680          !!!parse-error (type => 'unclosed DOCTYPE');            !!!cp (205);
2681              !!!parse-error (type => 'unclosed DOCTYPE');
2682          $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
2683          $self->{s_kwd} = '';            $self->{s_kwd} = '';
2684              $self->{ct}->{quirks} = 1;
2685            } else {
2686              !!!cp (205.1);
2687              !!!parse-error (type => 'unclosed md'); ## TODO: type
2688              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2689            }
2690            
2691          ## reconsume          ## reconsume
2692            !!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         !!!emit ($self->{ct}); # DOCTYPE  
   
2693          redo A;          redo A;
2694        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [        } elsif ($self->{is_xml} and
2695                   $self->{ct}->{type} == DOCTYPE_TOKEN and
2696                   $self->{nc} == 0x005B) { # [
2697          !!!cp (206.1);          !!!cp (206.1);
2698          !!!parse-error (type => 'no SYSTEM literal');          !!!parse-error (type => 'no SYSTEM literal');
2699    
# Line 2529  sub _get_next_token ($) { Line 2704  sub _get_next_token ($) {
2704          !!!emit ($self->{ct}); # DOCTYPE          !!!emit ($self->{ct}); # DOCTYPE
2705          redo A;          redo A;
2706        } else {        } else {
         !!!cp (206);  
2707          !!!parse-error (type => 'string after SYSTEM');          !!!parse-error (type => 'string after SYSTEM');
         $self->{ct}->{quirks} = 1;  
2708    
2709          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
2710              !!!cp (206);          
2711              $self->{ct}->{quirks} = 1;
2712              $self->{state} = BOGUS_DOCTYPE_STATE;
2713            } else {
2714              !!!cp (206.2);
2715              $self->{state} = BOGUS_MD_STATE;
2716            }
2717    
2718          !!!next-input-character;          !!!next-input-character;
2719          redo A;          redo A;
2720        }        }
# Line 2544  sub _get_next_token ($) { Line 2725  sub _get_next_token ($) {
2725          !!!next-input-character;          !!!next-input-character;
2726          redo A;          redo A;
2727        } elsif (not $self->{is_xml} and $self->{nc} == 0x003E) { # >        } elsif (not $self->{is_xml} and $self->{nc} == 0x003E) { # >
         !!!cp (208);  
2728          !!!parse-error (type => 'unclosed SYSTEM literal');          !!!parse-error (type => 'unclosed SYSTEM literal');
2729    
2730          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
2731          $self->{s_kwd} = '';            !!!cp (208);
2732              $self->{state} = DATA_STATE;
2733              $self->{s_kwd} = '';
2734              $self->{ct}->{quirks} = 1;
2735            } else {
2736              !!!cp (208.1);
2737              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2738            }
2739            
2740          !!!next-input-character;          !!!next-input-character;
2741            !!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         !!!emit ($self->{ct}); # DOCTYPE  
   
2742          redo A;          redo A;
2743        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
         !!!cp (209);  
2744          !!!parse-error (type => 'unclosed SYSTEM literal');          !!!parse-error (type => 'unclosed SYSTEM literal');
2745    
2746          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
2747          $self->{s_kwd} = '';            !!!cp (209);
2748              $self->{state} = DATA_STATE;
2749              $self->{s_kwd} = '';
2750              $self->{ct}->{quirks} = 1;
2751            } else {
2752              !!!cp (209.1);
2753              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2754            }
2755            
2756          ## reconsume          ## reconsume
2757            !!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         !!!emit ($self->{ct}); # DOCTYPE  
   
2758          redo A;          redo A;
2759        } else {        } else {
2760          !!!cp (210);          !!!cp (210);
2761          $self->{ct}->{sysid} # DOCTYPE          $self->{ct}->{sysid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
2762          $self->{read_until}->($self->{ct}->{sysid}, q[">],          $self->{read_until}->($self->{ct}->{sysid}, q[">],
2763                                length $self->{ct}->{sysid});                                length $self->{ct}->{sysid});
2764    
# Line 2597  sub _get_next_token ($) { Line 2785  sub _get_next_token ($) {
2785    
2786          redo A;          redo A;
2787        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
         !!!cp (213);  
2788          !!!parse-error (type => 'unclosed SYSTEM literal');          !!!parse-error (type => 'unclosed SYSTEM literal');
2789    
2790          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
2791          $self->{s_kwd} = '';            !!!cp (213);
2792          ## reconsume            $self->{state} = DATA_STATE;
2793              $self->{s_kwd} = '';
2794          $self->{ct}->{quirks} = 1;            $self->{ct}->{quirks} = 1;
2795          !!!emit ($self->{ct}); # DOCTYPE          } else {
2796              !!!cp (213.1);
2797              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2798            }
2799    
2800            ## reconsume
2801            !!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
2802          redo A;          redo A;
2803        } else {        } else {
2804          !!!cp (214);          !!!cp (214);
2805          $self->{ct}->{sysid} # DOCTYPE          $self->{ct}->{sysid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
2806          $self->{read_until}->($self->{ct}->{sysid}, q['>],          $self->{read_until}->($self->{ct}->{sysid}, q['>],
2807                                length $self->{ct}->{sysid});                                length $self->{ct}->{sysid});
2808    
# Line 2626  sub _get_next_token ($) { Line 2817  sub _get_next_token ($) {
2817          !!!next-input-character;          !!!next-input-character;
2818          redo A;          redo A;
2819        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
2820          !!!cp (216);          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
2821          $self->{state} = DATA_STATE;            !!!cp (216);
2822          $self->{s_kwd} = '';            $self->{state} = DATA_STATE;
2823          !!!next-input-character;            $self->{s_kwd} = '';
2824            } else {
2825          !!!emit ($self->{ct}); # DOCTYPE            !!!cp (216.1);
2826              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2827            }
2828    
2829            !!!next-input-character;
2830            !!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
2831          redo A;          redo A;
2832    ## TODO: "NDATA"
2833        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
2834          !!!cp (217);          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
2835          !!!parse-error (type => 'unclosed DOCTYPE');            !!!cp (217);
2836          $self->{state} = DATA_STATE;            !!!parse-error (type => 'unclosed DOCTYPE');
2837          $self->{s_kwd} = '';            $self->{state} = DATA_STATE;
2838          ## reconsume            $self->{s_kwd} = '';
2839              $self->{ct}->{quirks} = 1;
2840          $self->{ct}->{quirks} = 1;          } else {
2841          !!!emit ($self->{ct}); # DOCTYPE            !!!cp (217.1);
2842              !!!parse-error (type => 'unclosed md'); ## TODO: type
2843              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2844            }
2845    
2846            ## reconsume
2847            !!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
2848          redo A;          redo A;
2849        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [        } elsif ($self->{is_xml} and
2850                   $self->{ct}->{type} == DOCTYPE_TOKEN and
2851                   $self->{nc} == 0x005B) { # [
2852          !!!cp (218.1);          !!!cp (218.1);
2853          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2854          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
# Line 2654  sub _get_next_token ($) { Line 2857  sub _get_next_token ($) {
2857          !!!emit ($self->{ct}); # DOCTYPE          !!!emit ($self->{ct}); # DOCTYPE
2858          redo A;          redo A;
2859        } else {        } else {
         !!!cp (218);  
2860          !!!parse-error (type => 'string after SYSTEM literal');          !!!parse-error (type => 'string after SYSTEM literal');
         #$self->{ct}->{quirks} = 1;  
2861    
2862          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
2863              !!!cp (218);
2864              #$self->{ct}->{quirks} = 1;
2865              $self->{state} = BOGUS_DOCTYPE_STATE;
2866            } else {
2867              !!!cp (218.2);
2868              $self->{state} = BOGUS_MD_STATE;
2869            }
2870    
2871          !!!next-input-character;          !!!next-input-character;
2872          redo A;          redo A;
2873        }        }
# Line 3547  sub _get_next_token ($) { Line 3756  sub _get_next_token ($) {
3756        } elsif ($self->{kwd} eq 'ATTLIS' and        } elsif ($self->{kwd} eq 'ATTLIS' and
3757                 $self->{nc} == 0x0054) { # T                 $self->{nc} == 0x0054) { # T
3758          $self->{ct} = {type => ATTLIST_TOKEN, name => '',          $self->{ct} = {type => ATTLIST_TOKEN, name => '',
3759                           attrdefs => [],
3760                         line => $self->{line_prev},                         line => $self->{line_prev},
3761                         column => $self->{column_prev} - 6};                         column => $self->{column_prev} - 6};
3762          $self->{state} = DOCTYPE_MD_STATE;          $self->{state} = DOCTYPE_MD_STATE;
# Line 3689  sub _get_next_token ($) { Line 3899  sub _get_next_token ($) {
3899        ## XML5: "DOCTYPE ENTITY name state" and "DOCTYPE ATTLIST name state".        ## XML5: "DOCTYPE ENTITY name state" and "DOCTYPE ATTLIST name state".
3900                
3901        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
3902          ## TODO:          if ($self->{ct}->{type} == ATTLIST_TOKEN) {
3903          $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
3904            } elsif ($self->{ct}->{type} == ELEMENT_TOKEN) {
3905              ## TODO: ...
3906              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
3907            } else { # ENTITY/NOTATION
3908              $self->{state} = AFTER_DOCTYPE_NAME_STATE;
3909            }
3910          !!!next-input-character;          !!!next-input-character;
3911          redo A;          redo A;
3912        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3913          if ($self->{ct}->{type} == ATTLIST_TOKEN) {          if ($self->{ct}->{type} == ATTLIST_TOKEN) {
3914            #            #
3915          } else {          } else {
3916            !!!parse-error (type => 'no md body'); ## TODO: type            !!!parse-error (type => 'no md def'); ## TODO: type
3917          }          }
3918          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3919          !!!next-input-character;          !!!next-input-character;
# Line 3731  sub _get_next_token ($) { Line 3947  sub _get_next_token ($) {
3947          ## XML5: No parse error.          ## XML5: No parse error.
3948          !!!parse-error (type => 'unclosed md'); ## TODO: type          !!!parse-error (type => 'unclosed md'); ## TODO: type
3949          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
3950            !!!emit ($self->{ct});
3951          redo A;          redo A;
3952        } else {        } else {
3953          ## XML5: Not defined yet.          ## XML5: Not defined yet.
3954            $self->{ca} = {name => chr ($self->{nc}), # attrdef
3955          ## TODO: ...                         tokens => [],
3956                           line => $self->{line}, column => $self->{column}};
3957          $self->{state} = BOGUS_COMMENT_STATE;          $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE;
3958          $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded          !!!next-input-character;
3959            redo A;
3960          }
3961        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE) {
3962          if ($is_space->{$self->{nc}}) {
3963            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE;
3964            !!!next-input-character;
3965            redo A;
3966          } elsif ($self->{nc} == 0x003E) { # >
3967            ## XML5: Same as "anything else".
3968            !!!parse-error (type => 'no attr type'); ## TODO: type
3969            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3970            !!!next-input-character;
3971            !!!emit ($self->{ct}); # ATTLIST
3972            redo A;
3973          } elsif ($self->{nc} == 0x0028) { # (
3974            ## XML5: Same as "anything else".
3975            !!!parse-error (type => 'no space before paren'); ## TODO: type
3976            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
3977            !!!next-input-character;
3978            redo A;
3979          } elsif ($self->{nc} == -1) {
3980            ## XML5: No parse error.
3981            !!!parse-error (type => 'unclosed md'); ## TODO: type
3982            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
3983            !!!next-input-character;
3984            !!!emit ($self->{ct}); # ATTLIST
3985            redo A;
3986          } else {
3987            ## XML5: Not defined yet.
3988            $self->{ca}->{name} .= chr $self->{nc};
3989            ## Stay in the state.
3990            !!!next-input-character;
3991            redo A;
3992          }
3993        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE) {
3994          if ($is_space->{$self->{nc}}) {
3995            ## Stay in the state.
3996            !!!next-input-character;
3997            redo A;
3998          } elsif ($self->{nc} == 0x003E) { # >
3999            ## XML5: Same as "anything else".
4000            !!!parse-error (type => 'no attr type'); ## TODO: type
4001            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4002            !!!next-input-character;
4003            !!!emit ($self->{ct}); # ATTLIST
4004            redo A;
4005          } elsif ($self->{nc} == 0x0028) { # (
4006            ## XML5: Same as "anything else".
4007            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
4008            !!!next-input-character;
4009            redo A;
4010          } elsif ($self->{nc} == -1) {
4011            ## XML5: No parse error.
4012            !!!parse-error (type => 'unclosed md'); ## TODO: type
4013            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
4014            !!!next-input-character;
4015            !!!emit ($self->{ct});
4016            redo A;
4017          } else {
4018            ## XML5: Not defined yet.
4019            $self->{ca}->{type} = chr $self->{nc};
4020            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE;
4021            !!!next-input-character;
4022            redo A;
4023          }
4024        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE) {
4025          if ($is_space->{$self->{nc}}) {
4026            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE;
4027            !!!next-input-character;
4028            redo A;
4029          } elsif ($self->{nc} == 0x0023) { # #
4030            ## XML5: Same as "anything else".
4031            !!!parse-error (type => 'no space before default value'); ## TODO: type
4032            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
4033            !!!next-input-character;
4034            redo A;
4035          } elsif ($self->{nc} == 0x0022) { # "
4036            ## XML5: Same as "anything else".
4037            !!!parse-error (type => 'no space before default value'); ## TODO: type
4038            $self->{ca}->{value} = '';
4039            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
4040            !!!next-input-character;
4041            redo A;
4042          } elsif ($self->{nc} == 0x0027) { # '
4043            ## XML5: Same as "anything else".
4044            !!!parse-error (type => 'no space before default value'); ## TODO: type
4045            $self->{ca}->{value} = '';
4046            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
4047            !!!next-input-character;
4048            redo A;
4049          } elsif ($self->{nc} == 0x003E) { # >
4050            ## XML5: Same as "anything else".
4051            !!!parse-error (type => 'no attr default'); ## TODO: type
4052            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4053            !!!next-input-character;
4054            !!!emit ($self->{ct}); # ATTLIST
4055            redo A;
4056          } elsif ($self->{nc} == 0x0028) { # (
4057            ## XML5: Same as "anything else".
4058            !!!parse-error (type => 'no space before paren'); ## TODO: type
4059            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
4060            !!!next-input-character;
4061            redo A;
4062          } elsif ($self->{nc} == -1) {
4063            ## XML5: No parse error.
4064            !!!parse-error (type => 'unclosed md'); ## TODO: type
4065            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
4066            !!!next-input-character;
4067            !!!emit ($self->{ct});
4068            redo A;
4069          } else {
4070            ## XML5: Not defined yet.
4071            $self->{ca}->{type} .= chr $self->{nc};
4072            ## Stay in the state.
4073            !!!next-input-character;
4074            redo A;
4075          }
4076        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE) {
4077          if ($is_space->{$self->{nc}}) {
4078            ## Stay in the state.
4079            !!!next-input-character;
4080            redo A;
4081          } elsif ($self->{nc} == 0x0028) { # (
4082            ## XML5: Same as "anything else".
4083            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
4084            !!!next-input-character;
4085            redo A;
4086          } elsif ($self->{nc} == 0x0023) { # #
4087            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
4088            !!!next-input-character;
4089            redo A;
4090          } elsif ($self->{nc} == 0x0022) { # "
4091            ## XML5: Same as "anything else".
4092            $self->{ca}->{value} = '';
4093            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
4094            !!!next-input-character;
4095            redo A;
4096          } elsif ($self->{nc} == 0x0027) { # '
4097            ## XML5: Same as "anything else".
4098            $self->{ca}->{value} = '';
4099            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
4100            !!!next-input-character;
4101            redo A;
4102          } elsif ($self->{nc} == 0x003E) { # >
4103            ## XML5: Same as "anything else".
4104            !!!parse-error (type => 'no attr default'); ## TODO: type
4105            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4106            !!!next-input-character;
4107            !!!emit ($self->{ct}); # ATTLIST
4108            redo A;
4109          } elsif ($self->{nc} == -1) {
4110            ## XML5: No parse error.
4111            !!!parse-error (type => 'unclosed md'); ## TODO: type
4112            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
4113            !!!next-input-character;
4114            !!!emit ($self->{ct});
4115            redo A;
4116          } else {
4117            ## XML5: Switch to the "DOCTYPE bogus comment state".
4118            !!!parse-error (type => 'unquoted attr value'); ## TODO: type
4119            $self->{ca}->{value} = '';
4120            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
4121            ## Reconsume.
4122            redo A;
4123          }
4124        } elsif ($self->{state} == BEFORE_ALLOWED_TOKEN_STATE) {
4125          if ($is_space->{$self->{nc}}) {
4126            ## Stay in the state.
4127            !!!next-input-character;
4128            redo A;
4129          } elsif ($self->{nc} == 0x007C) { # |
4130            !!!parse-error (type => 'empty allowed token'); ## TODO: type
4131            ## Stay in the state.
4132            !!!next-input-character;
4133            redo A;
4134          } elsif ($self->{nc} == 0x0029) { # )
4135            !!!parse-error (type => 'empty allowed token'); ## TODO: type
4136            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
4137            !!!next-input-character;
4138            redo A;
4139          } elsif ($self->{nc} == 0x003E) { # >
4140            !!!parse-error (type => 'unclosed allowed tokens'); ## TODO: type
4141            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4142            !!!next-input-character;
4143            !!!emit ($self->{ct}); # ATTLIST
4144            redo A;
4145          } elsif ($self->{nc} == -1) {
4146            ## XML5: No parse error.
4147            !!!parse-error (type => 'unclosed md'); ## TODO: type
4148            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
4149            !!!next-input-character;
4150            !!!emit ($self->{ct});
4151            redo A;
4152          } else {
4153            push @{$self->{ca}->{tokens}}, chr $self->{nc};
4154            $self->{state} = ALLOWED_TOKEN_STATE;
4155            !!!next-input-character;
4156            redo A;
4157          }
4158        } elsif ($self->{state} == ALLOWED_TOKEN_STATE) {
4159          if ($is_space->{$self->{nc}}) {
4160            $self->{state} = AFTER_ALLOWED_TOKEN_STATE;
4161            !!!next-input-character;
4162            redo A;
4163          } elsif ($self->{nc} == 0x007C) { # |
4164            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
4165            !!!next-input-character;
4166            redo A;
4167          } elsif ($self->{nc} == 0x0029) { # )
4168            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
4169            !!!next-input-character;
4170            redo A;
4171          } elsif ($self->{nc} == 0x003E) { # >
4172            !!!parse-error (type => 'unclosed allowed tokens'); ## TODO: type
4173            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4174            !!!next-input-character;
4175            !!!emit ($self->{ct}); # ATTLIST
4176            redo A;
4177          } elsif ($self->{nc} == -1) {
4178            ## XML5: No parse error.
4179            !!!parse-error (type => 'unclosed md'); ## TODO: type
4180            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
4181            !!!next-input-character;
4182            !!!emit ($self->{ct});
4183            redo A;
4184          } else {
4185            $self->{ca}->{tokens}->[-1] .= chr $self->{nc};
4186            ## Stay in the state.
4187            !!!next-input-character;
4188            redo A;
4189          }
4190        } elsif ($self->{state} == AFTER_ALLOWED_TOKEN_STATE) {
4191          if ($is_space->{$self->{nc}}) {
4192            ## Stay in the state.
4193            !!!next-input-character;
4194            redo A;
4195          } elsif ($self->{nc} == 0x007C) { # |
4196            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
4197            !!!next-input-character;
4198            redo A;
4199          } elsif ($self->{nc} == 0x0029) { # )
4200            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
4201            !!!next-input-character;
4202            redo A;
4203          } elsif ($self->{nc} == 0x003E) { # >
4204            !!!parse-error (type => 'unclosed allowed tokens'); ## TODO: type
4205            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4206            !!!next-input-character;
4207            !!!emit ($self->{ct}); # ATTLIST
4208            redo A;
4209          } elsif ($self->{nc} == -1) {
4210            ## XML5: No parse error.
4211            !!!parse-error (type => 'unclosed md'); ## TODO: type
4212            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
4213            !!!next-input-character;
4214            !!!emit ($self->{ct});
4215            redo A;
4216          } else {
4217            !!!parse-error (type => 'space in allowed token', ## TODO: type
4218                            line => $self->{line_prev},
4219                            column => $self->{column_prev});
4220            $self->{ca}->{tokens}->[-1] .= ' ' . chr $self->{nc};
4221            $self->{state} = ALLOWED_TOKEN_STATE;
4222            !!!next-input-character;
4223            redo A;
4224          }
4225        } elsif ($self->{state} == AFTER_ALLOWED_TOKENS_STATE) {
4226          if ($is_space->{$self->{nc}}) {
4227            $self->{state} = BEFORE_ATTR_DEFAULT_STATE;
4228            !!!next-input-character;
4229            redo A;
4230          } elsif ($self->{nc} == 0x0023) { # #
4231            !!!parse-error (type => 'no space before default value'); ## TODO: type
4232            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
4233            !!!next-input-character;
4234            redo A;
4235          } elsif ($self->{nc} == 0x0022) { # "
4236            !!!parse-error (type => 'no space before default value'); ## TODO: type
4237            $self->{ca}->{value} = '';
4238            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
4239            !!!next-input-character;
4240            redo A;
4241          } elsif ($self->{nc} == 0x0027) { # '
4242            !!!parse-error (type => 'no space before default value'); ## TODO: type
4243            $self->{ca}->{value} = '';
4244            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
4245            !!!next-input-character;
4246            redo A;
4247          } elsif ($self->{nc} == 0x003E) { # >
4248            !!!parse-error (type => 'no attr default'); ## TODO: type
4249            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4250            !!!next-input-character;
4251            !!!emit ($self->{ct}); # ATTLIST
4252            redo A;
4253          } elsif ($self->{nc} == -1) {
4254            !!!parse-error (type => 'unclosed md'); ## TODO: type
4255            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4256            !!!next-input-character;
4257            !!!emit ($self->{ct});
4258            redo A;
4259          } else {
4260            !!!parse-error (type => 'unquoted attr value'); ## TODO: type
4261            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
4262            ## Reconsume.
4263            redo A;
4264          }
4265        } elsif ($self->{state} == BEFORE_ATTR_DEFAULT_STATE) {
4266          if ($is_space->{$self->{nc}}) {
4267            ## Stay in the state.
4268            !!!next-input-character;
4269            redo A;
4270          } elsif ($self->{nc} == 0x0023) { # #
4271            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
4272            !!!next-input-character;
4273            redo A;
4274          } elsif ($self->{nc} == 0x0022) { # "
4275            $self->{ca}->{value} = '';
4276            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
4277            !!!next-input-character;
4278            redo A;
4279          } elsif ($self->{nc} == 0x0027) { # '
4280            $self->{ca}->{value} = '';
4281            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
4282            !!!next-input-character;
4283            redo A;
4284          } elsif ($self->{nc} == 0x003E) { # >
4285            !!!parse-error (type => 'no attr default'); ## TODO: type
4286            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4287            !!!next-input-character;
4288            !!!emit ($self->{ct}); # ATTLIST
4289            redo A;
4290          } elsif ($self->{nc} == -1) {
4291            !!!parse-error (type => 'unclosed md'); ## TODO: type
4292            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4293            !!!next-input-character;
4294            !!!emit ($self->{ct});
4295            redo A;
4296          } else {
4297            !!!parse-error (type => 'unquoted attr value'); ## TODO: type
4298            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
4299            ## Reconsume.
4300            redo A;
4301          }
4302        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE) {
4303          if ($is_space->{$self->{nc}}) {
4304            ## XML5: No parse error.
4305            !!!parse-error (type => 'no default type'); ## TODO: type
4306            $self->{state} = BOGUS_MD_STATE;
4307            ## Reconsume.
4308            redo A;
4309          } elsif ($self->{nc} == 0x0022) { # "
4310            ## XML5: Same as "anything else".
4311            $self->{ca}->{value} = '';
4312            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
4313            !!!next-input-character;
4314            redo A;
4315          } elsif ($self->{nc} == 0x0027) { # '
4316            ## XML5: Same as "anything else".
4317            $self->{ca}->{value} = '';
4318            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
4319            !!!next-input-character;
4320            redo A;
4321          } elsif ($self->{nc} == 0x003E) { # >
4322            ## XML5: Same as "anything else".
4323            !!!parse-error (type => 'no attr default'); ## TODO: type
4324            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4325            !!!next-input-character;
4326            !!!emit ($self->{ct}); # ATTLIST
4327            redo A;
4328          } elsif ($self->{nc} == -1) {
4329            ## XML5: No parse error.
4330            !!!parse-error (type => 'unclosed md'); ## TODO: type
4331            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
4332            !!!next-input-character;
4333            !!!emit ($self->{ct});
4334            redo A;
4335          } else {
4336            $self->{ca}->{default} = chr $self->{nc};
4337            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE;
4338            !!!next-input-character;
4339            redo A;
4340          }
4341        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE) {
4342          if ($is_space->{$self->{nc}}) {
4343            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE;
4344            !!!next-input-character;
4345            redo A;
4346          } elsif ($self->{nc} == 0x0022) { # "
4347            ## XML5: Same as "anything else".
4348            !!!parse-error (type => 'no space before default value'); ## TODO: type
4349            $self->{ca}->{value} = '';
4350            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
4351            !!!next-input-character;
4352            redo A;
4353          } elsif ($self->{nc} == 0x0027) { # '
4354            ## XML5: Same as "anything else".
4355            !!!parse-error (type => 'no space before default value'); ## TODO: type
4356            $self->{ca}->{value} = '';
4357            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
4358            !!!next-input-character;
4359            redo A;
4360          } elsif ($self->{nc} == 0x003E) { # >
4361            ## XML5: Same as "anything else".
4362            push @{$self->{ct}->{attrdefs}}, $self->{ca};
4363            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4364            !!!next-input-character;
4365            !!!emit ($self->{ct}); # ATTLIST
4366            redo A;
4367          } elsif ($self->{nc} == -1) {
4368            ## XML5: No parse error.
4369            !!!parse-error (type => 'unclosed md'); ## TODO: type
4370            push @{$self->{ct}->{attrdefs}}, $self->{ca};
4371            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
4372            !!!next-input-character;
4373            !!!emit ($self->{ct});
4374            redo A;
4375          } else {
4376            $self->{ca}->{default} .= chr $self->{nc};
4377            ## Stay in the state.
4378            !!!next-input-character;
4379            redo A;
4380          }
4381        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE) {
4382          if ($is_space->{$self->{nc}}) {
4383            ## Stay in the state.
4384            !!!next-input-character;
4385            redo A;
4386          } elsif ($self->{nc} == 0x0022) { # "
4387            $self->{ca}->{value} = '';
4388            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
4389            !!!next-input-character;
4390            redo A;
4391          } elsif ($self->{nc} == 0x0027) { # '
4392            $self->{ca}->{value} = '';
4393            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
4394            !!!next-input-character;
4395            redo A;
4396          } elsif ($self->{nc} == 0x003E) { # >
4397            push @{$self->{ct}->{attrdefs}}, $self->{ca};
4398            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4399            !!!next-input-character;
4400            !!!emit ($self->{ct}); # ATTLIST
4401            redo A;
4402          } elsif ($self->{nc} == -1) {
4403            ## XML5: No parse error.
4404            !!!parse-error (type => 'unclosed md'); ## TODO: type
4405            push @{$self->{ct}->{attrdefs}}, $self->{ca};
4406            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
4407            !!!next-input-character;
4408            !!!emit ($self->{ct});
4409            redo A;
4410          } else {
4411            ## XML5: Not defined yet.
4412            if ($self->{ca}->{default} eq 'FIXED') {
4413              $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
4414            } else {
4415              push @{$self->{ct}->{attrdefs}}, $self->{ca};
4416              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
4417            }
4418            ## Reconsume.
4419            redo A;
4420          }
4421        } elsif ($self->{state} == AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE) {
4422          if ($is_space->{$self->{nc}} or
4423              $self->{nc} == -1 or
4424              $self->{nc} == 0x003E) { # >
4425            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
4426            ## Reconsume.
4427            redo A;
4428          } else {
4429            !!!parse-error (type => 'no space before attr name'); ## TODO: type
4430            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
4431          ## Reconsume.          ## Reconsume.
4432          redo A;          redo A;
4433        }        }
4434    
4435        } elsif ($self->{state} == BOGUS_MD_STATE) {
4436          if ($self->{nc} == 0x003E) { # >
4437            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4438            !!!next-input-character;
4439            !!!emit ($self->{ct}); # ATTLIST/ENTITY/NOTATION
4440            redo A;
4441          } elsif ($self->{nc} == -1) {
4442            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4443            ## Reconsume.
4444            !!!emit ($self->{ct}); # ATTLIST/ENTITY/NOTATION
4445            redo A;
4446          } else {
4447            ## Stay in the state.
4448            !!!next-input-character;
4449            redo A;
4450          }
4451      } else {      } else {
4452        die "$0: $self->{state}: Unknown state";        die "$0: $self->{state}: Unknown state";
4453      }      }
# Line 3753  sub _get_next_token ($) { Line 4458  sub _get_next_token ($) {
4458    
4459  1;  1;
4460  ## $Date$  ## $Date$
4461                                    

Legend:
Removed from v.1.14  
changed lines
  Added in v.1.16

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24