/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm.src
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm.src

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.14 by wakaba, Fri Oct 17 07:14:29 2008 UTC revision 1.15 by wakaba, Sat Oct 18 08:05:29 2008 UTC
# Line 164  sub BEFORE_MD_NAME_STATE () { 68 } Line 164  sub BEFORE_MD_NAME_STATE () { 68 }
164  sub MD_NAME_STATE () { 69 }  sub MD_NAME_STATE () { 69 }
165  sub DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE () { 70 }  sub DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE () { 70 }
166  sub DOCTYPE_ATTLIST_NAME_AFTER_STATE () { 71 }  sub DOCTYPE_ATTLIST_NAME_AFTER_STATE () { 71 }
167    sub DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE () { 72 }
168    sub DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE () { 73 }
169    sub DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE () { 74 }
170    sub DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE () { 75 }
171    sub BEFORE_ALLOWED_TOKEN_STATE () { 76 }
172    sub ALLOWED_TOKEN_STATE () { 77 }
173    sub AFTER_ALLOWED_TOKEN_STATE () { 78 }
174    sub AFTER_ALLOWED_TOKENS_STATE () { 79 }
175    sub BEFORE_ATTR_DEFAULT_STATE () { 80 }
176    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE () { 81 }
177    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE () { 82 }
178    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE () { 83 }
179    sub AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE () { 84 }
180    
181  ## Tree constructor state constants (see Whatpm::HTML for the full  ## Tree constructor state constants (see Whatpm::HTML for the full
182  ## list and descriptions)  ## list and descriptions)
# Line 1257  sub _get_next_token ($) { Line 1270  sub _get_next_token ($) {
1270          redo A;          redo A;
1271        }        }
1272      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1273        ## XML5: "Tag attribute value double quoted state".        ## XML5: "Tag attribute value double quoted state" and "DOCTYPE
1274          ## ATTLIST attribute value double quoted state".
1275                
1276        if ($self->{nc} == 0x0022) { # "        if ($self->{nc} == 0x0022) { # "
1277          !!!cp (95);          if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1278          ## XML5: "Tag attribute name before state".            !!!cp (95.1);
1279          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;            ## XML5: "DOCTYPE ATTLIST name after state".
1280              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1281              $self->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE;
1282            } else {
1283              !!!cp (95);
1284              ## XML5: "Tag attribute name before state".
1285              $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1286            }
1287          !!!next-input-character;          !!!next-input-character;
1288          redo A;          redo A;
1289        } elsif ($self->{nc} == 0x0026) { # &        } elsif ($self->{nc} == 0x0026) { # &
# Line 1283  sub _get_next_token ($) { Line 1304  sub _get_next_token ($) {
1304          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1305            !!!cp (97);            !!!cp (97);
1306            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
1307    
1308              $self->{state} = DATA_STATE;
1309              $self->{s_kwd} = '';
1310              ## reconsume
1311              !!!emit ($self->{ct}); # start tag
1312              redo A;
1313          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1314            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1315            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1292  sub _get_next_token ($) { Line 1319  sub _get_next_token ($) {
1319              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
1320              !!!cp (99);              !!!cp (99);
1321            }            }
1322    
1323              $self->{state} = DATA_STATE;
1324              $self->{s_kwd} = '';
1325              ## reconsume
1326              !!!emit ($self->{ct}); # end tag
1327              redo A;
1328            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1329              ## XML5: No parse error above; not defined yet.
1330              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1331              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1332              ## Reconsume.
1333              !!!emit ($self->{ct}); # ATTLIST
1334              redo A;
1335          } else {          } else {
1336            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1337          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         !!!emit ($self->{ct}); # start tag or end tag  
   
         redo A;  
1338        } else {        } else {
1339            ## XML5 [ATTLIST]: Not defined yet.
1340          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
1341            !!!cp (100);            !!!cp (100);
1342            ## XML5: Not a parse error.            ## XML5: Not a parse error.
# Line 1320  sub _get_next_token ($) { Line 1354  sub _get_next_token ($) {
1354          redo A;          redo A;
1355        }        }
1356      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1357        ## XML5: "Tag attribute value single quoted state".        ## XML5: "Tag attribute value single quoted state" and "DOCTYPE
1358          ## ATTLIST attribute value single quoted state".
1359    
1360        if ($self->{nc} == 0x0027) { # '        if ($self->{nc} == 0x0027) { # '
1361          !!!cp (101);          if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1362          ## XML5: "Before attribute name state" (sic).            !!!cp (101.1);
1363          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;            ## XML5: "DOCTYPE ATTLIST name after state".
1364              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1365              $self->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE;
1366            } else {
1367              !!!cp (101);
1368              ## XML5: "Before attribute name state" (sic).
1369              $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1370            }
1371          !!!next-input-character;          !!!next-input-character;
1372          redo A;          redo A;
1373        } elsif ($self->{nc} == 0x0026) { # &        } elsif ($self->{nc} == 0x0026) { # &
# Line 1346  sub _get_next_token ($) { Line 1388  sub _get_next_token ($) {
1388          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1389            !!!cp (103);            !!!cp (103);
1390            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
1391    
1392              $self->{state} = DATA_STATE;
1393              $self->{s_kwd} = '';
1394              ## reconsume
1395              !!!emit ($self->{ct}); # start tag
1396              redo A;
1397          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1398            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1399            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1355  sub _get_next_token ($) { Line 1403  sub _get_next_token ($) {
1403              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
1404              !!!cp (105);              !!!cp (105);
1405            }            }
1406    
1407              $self->{state} = DATA_STATE;
1408              $self->{s_kwd} = '';
1409              ## reconsume
1410              !!!emit ($self->{ct}); # end tag
1411              redo A;
1412            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1413              ## XML5: No parse error above; not defined yet.
1414              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1415              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1416              ## Reconsume.
1417              !!!emit ($self->{ct}); # ATTLIST
1418              redo A;
1419          } else {          } else {
1420            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1421          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         !!!emit ($self->{ct}); # start tag or end tag  
   
         redo A;  
1422        } else {        } else {
1423            ## XML5 [ATTLIST]: Not defined yet.
1424          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
1425            !!!cp (106);            !!!cp (106);
1426            ## XML5: Not a parse error.            ## XML5: Not a parse error.
# Line 1386  sub _get_next_token ($) { Line 1441  sub _get_next_token ($) {
1441        ## XML5: "Tag attribute value unquoted state".        ## XML5: "Tag attribute value unquoted state".
1442    
1443        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
1444          !!!cp (107);          if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1445          ## XML5: "Tag attribute name before state".            !!!cp (107.1);
1446          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;            push @{$self->{ct}->{attrdefs}}, $self->{ca};
1447              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
1448            } else {
1449              !!!cp (107);
1450              ## XML5: "Tag attribute name before state".
1451              $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1452            }
1453          !!!next-input-character;          !!!next-input-character;
1454          redo A;          redo A;
1455        } elsif ($self->{nc} == 0x0026) { # &        } elsif ($self->{nc} == 0x0026) { # &
# Line 1409  sub _get_next_token ($) { Line 1470  sub _get_next_token ($) {
1470          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1471            !!!cp (109);            !!!cp (109);
1472            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
1473    
1474              $self->{state} = DATA_STATE;
1475              $self->{s_kwd} = '';
1476              !!!next-input-character;
1477              !!!emit ($self->{ct}); # start tag
1478              redo A;
1479          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1480            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1481            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1418  sub _get_next_token ($) { Line 1485  sub _get_next_token ($) {
1485              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
1486              !!!cp (111);              !!!cp (111);
1487            }            }
1488    
1489              $self->{state} = DATA_STATE;
1490              $self->{s_kwd} = '';
1491              !!!next-input-character;
1492              !!!emit ($self->{ct}); # end tag
1493              redo A;
1494            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1495              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1496              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1497              !!!next-input-character;
1498              !!!emit ($self->{ct}); # ATTLIST
1499              redo A;
1500          } else {          } else {
1501            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1502          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         !!!next-input-character;  
   
         !!!emit ($self->{ct}); # start tag or end tag  
   
         redo A;  
1503        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
         !!!parse-error (type => 'unclosed tag');  
1504          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1505            !!!cp (112);            !!!cp (112);
1506              !!!parse-error (type => 'unclosed tag');
1507            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
1508    
1509              $self->{state} = DATA_STATE;
1510              $self->{s_kwd} = '';
1511              ## reconsume
1512              !!!emit ($self->{ct}); # start tag
1513              redo A;
1514          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1515              !!!parse-error (type => 'unclosed tag');
1516            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1517            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
1518              !!!cp (113);              !!!cp (113);
# Line 1442  sub _get_next_token ($) { Line 1521  sub _get_next_token ($) {
1521              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
1522              !!!cp (114);              !!!cp (114);
1523            }            }
1524    
1525              $self->{state} = DATA_STATE;
1526              $self->{s_kwd} = '';
1527              ## reconsume
1528              !!!emit ($self->{ct}); # end tag
1529              redo A;
1530            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1531              !!!parse-error (type => 'unclosed md'); ## TODO: type
1532              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1533              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1534              ## Reconsume.
1535              !!!emit ($self->{ct}); # ATTLIST
1536              redo A;
1537          } else {          } else {
1538            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1539          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         !!!emit ($self->{ct}); # start tag or end tag  
   
         redo A;  
1540        } else {        } else {
1541          if ({          if ({
1542               0x0022 => 1, # "               0x0022 => 1, # "
# Line 3547  sub _get_next_token ($) { Line 3632  sub _get_next_token ($) {
3632        } elsif ($self->{kwd} eq 'ATTLIS' and        } elsif ($self->{kwd} eq 'ATTLIS' and
3633                 $self->{nc} == 0x0054) { # T                 $self->{nc} == 0x0054) { # T
3634          $self->{ct} = {type => ATTLIST_TOKEN, name => '',          $self->{ct} = {type => ATTLIST_TOKEN, name => '',
3635                           attrdefs => [],
3636                         line => $self->{line_prev},                         line => $self->{line_prev},
3637                         column => $self->{column_prev} - 6};                         column => $self->{column_prev} - 6};
3638          $self->{state} = DOCTYPE_MD_STATE;          $self->{state} = DOCTYPE_MD_STATE;
# Line 3731  sub _get_next_token ($) { Line 3817  sub _get_next_token ($) {
3817          ## XML5: No parse error.          ## XML5: No parse error.
3818          !!!parse-error (type => 'unclosed md'); ## TODO: type          !!!parse-error (type => 'unclosed md'); ## TODO: type
3819          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
3820            !!!emit ($self->{ct});
3821          redo A;          redo A;
3822        } else {        } else {
3823          ## XML5: Not defined yet.          ## XML5: Not defined yet.
3824            $self->{ca} = {name => chr ($self->{nc}), # attrdef
3825          ## TODO: ...                         tokens => [],
3826                           line => $self->{line}, column => $self->{column}};
3827            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE;
3828            !!!next-input-character;
3829            redo A;
3830          }
3831        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE) {
3832          if ($is_space->{$self->{nc}}) {
3833            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE;
3834            !!!next-input-character;
3835            redo A;
3836          } elsif ($self->{nc} == 0x003E) { # >
3837            ## XML5: Same as "anything else".
3838            !!!parse-error (type => 'no attr type'); ## TODO: type
3839            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3840            !!!next-input-character;
3841            !!!emit ($self->{ct}); # ATTLIST
3842            redo A;
3843          } elsif ($self->{nc} == 0x0028) { # (
3844            ## XML5: Same as "anything else".
3845            !!!parse-error (type => 'no space before paren'); ## TODO: type
3846            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
3847            !!!next-input-character;
3848            redo A;
3849          } elsif ($self->{nc} == -1) {
3850            ## XML5: No parse error.
3851            !!!parse-error (type => 'unclosed md'); ## TODO: type
3852            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
3853            !!!next-input-character;
3854            !!!emit ($self->{ct}); # ATTLIST
3855            redo A;
3856          } else {
3857            ## XML5: Not defined yet.
3858            $self->{ca}->{name} .= chr $self->{nc};
3859            ## Stay in the state.
3860            !!!next-input-character;
3861            redo A;
3862          }
3863        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE) {
3864          if ($is_space->{$self->{nc}}) {
3865            ## Stay in the state.
3866            !!!next-input-character;
3867            redo A;
3868          } elsif ($self->{nc} == 0x003E) { # >
3869            ## XML5: Same as "anything else".
3870            !!!parse-error (type => 'no attr type'); ## TODO: type
3871            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3872            !!!next-input-character;
3873            !!!emit ($self->{ct}); # ATTLIST
3874            redo A;
3875          } elsif ($self->{nc} == 0x0028) { # (
3876            ## XML5: Same as "anything else".
3877            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
3878            !!!next-input-character;
3879            redo A;
3880          } elsif ($self->{nc} == -1) {
3881            ## XML5: No parse error.
3882            !!!parse-error (type => 'unclosed md'); ## TODO: type
3883            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
3884            !!!next-input-character;
3885            !!!emit ($self->{ct});
3886            redo A;
3887          } else {
3888            ## XML5: Not defined yet.
3889            $self->{ca}->{type} = chr $self->{nc};
3890            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE;
3891            !!!next-input-character;
3892            redo A;
3893          }
3894        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE) {
3895          if ($is_space->{$self->{nc}}) {
3896            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE;
3897            !!!next-input-character;
3898            redo A;
3899          } elsif ($self->{nc} == 0x0023) { # #
3900            ## XML5: Same as "anything else".
3901            !!!parse-error (type => 'no space before default value'); ## TODO: type
3902            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
3903            !!!next-input-character;
3904            redo A;
3905          } elsif ($self->{nc} == 0x0022) { # "
3906            ## XML5: Same as "anything else".
3907            !!!parse-error (type => 'no space before default value'); ## TODO: type
3908            $self->{ca}->{value} = '';
3909            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
3910            !!!next-input-character;
3911            redo A;
3912          } elsif ($self->{nc} == 0x0027) { # '
3913            ## XML5: Same as "anything else".
3914            !!!parse-error (type => 'no space before default value'); ## TODO: type
3915            $self->{ca}->{value} = '';
3916            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
3917            !!!next-input-character;
3918            redo A;
3919          } elsif ($self->{nc} == 0x003E) { # >
3920            ## XML5: Same as "anything else".
3921            !!!parse-error (type => 'no attr default'); ## TODO: type
3922            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3923            !!!next-input-character;
3924            !!!emit ($self->{ct}); # ATTLIST
3925            redo A;
3926          } elsif ($self->{nc} == 0x0028) { # (
3927            ## XML5: Same as "anything else".
3928            !!!parse-error (type => 'no space before paren'); ## TODO: type
3929            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
3930            !!!next-input-character;
3931            redo A;
3932          } elsif ($self->{nc} == -1) {
3933            ## XML5: No parse error.
3934            !!!parse-error (type => 'unclosed md'); ## TODO: type
3935            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
3936            !!!next-input-character;
3937            !!!emit ($self->{ct});
3938            redo A;
3939          } else {
3940            ## XML5: Not defined yet.
3941            $self->{ca}->{type} .= chr $self->{nc};
3942            ## Stay in the state.
3943            !!!next-input-character;
3944            redo A;
3945          }
3946        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE) {
3947          if ($is_space->{$self->{nc}}) {
3948            ## Stay in the state.
3949            !!!next-input-character;
3950            redo A;
3951          } elsif ($self->{nc} == 0x0028) { # (
3952            ## XML5: Same as "anything else".
3953            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
3954            !!!next-input-character;
3955            redo A;
3956          } elsif ($self->{nc} == 0x0023) { # #
3957            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
3958            !!!next-input-character;
3959            redo A;
3960          } elsif ($self->{nc} == 0x0022) { # "
3961            ## XML5: Same as "anything else".
3962            $self->{ca}->{value} = '';
3963            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
3964            !!!next-input-character;
3965            redo A;
3966          } elsif ($self->{nc} == 0x0027) { # '
3967            ## XML5: Same as "anything else".
3968            $self->{ca}->{value} = '';
3969            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
3970            !!!next-input-character;
3971            redo A;
3972          } elsif ($self->{nc} == 0x003E) { # >
3973            ## XML5: Same as "anything else".
3974            !!!parse-error (type => 'no attr default'); ## TODO: type
3975            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3976            !!!next-input-character;
3977            !!!emit ($self->{ct}); # ATTLIST
3978            redo A;
3979          } elsif ($self->{nc} == -1) {
3980            ## XML5: No parse error.
3981            !!!parse-error (type => 'unclosed md'); ## TODO: type
3982            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
3983            !!!next-input-character;
3984            !!!emit ($self->{ct});
3985            redo A;
3986          } else {
3987            ## XML5: Switch to the "DOCTYPE bogus comment state".
3988            !!!parse-error (type => 'unquoted attr value'); ## TODO: type
3989            $self->{ca}->{value} = '';
3990            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
3991            ## Reconsume.
3992            redo A;
3993          }
3994        } elsif ($self->{state} == BEFORE_ALLOWED_TOKEN_STATE) {
3995          if ($is_space->{$self->{nc}}) {
3996            ## Stay in the state.
3997            !!!next-input-character;
3998            redo A;
3999          } elsif ($self->{nc} == 0x007C) { # |
4000            !!!parse-error (type => 'empty allowed token'); ## TODO: type
4001            ## Stay in the state.
4002            !!!next-input-character;
4003            redo A;
4004          } elsif ($self->{nc} == 0x0029) { # )
4005            !!!parse-error (type => 'empty allowed token'); ## TODO: type
4006            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
4007            !!!next-input-character;
4008            redo A;
4009          } elsif ($self->{nc} == 0x003E) { # >
4010            !!!parse-error (type => 'unclosed allowed tokens'); ## TODO: type
4011            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4012            !!!next-input-character;
4013            !!!emit ($self->{ct}); # ATTLIST
4014            redo A;
4015          } elsif ($self->{nc} == -1) {
4016            ## XML5: No parse error.
4017            !!!parse-error (type => 'unclosed md'); ## TODO: type
4018            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
4019            !!!next-input-character;
4020            !!!emit ($self->{ct});
4021            redo A;
4022          } else {
4023            push @{$self->{ca}->{tokens}}, chr $self->{nc};
4024            $self->{state} = ALLOWED_TOKEN_STATE;
4025            !!!next-input-character;
4026            redo A;
4027          }
4028        } elsif ($self->{state} == ALLOWED_TOKEN_STATE) {
4029          if ($is_space->{$self->{nc}}) {
4030            $self->{state} = AFTER_ALLOWED_TOKEN_STATE;
4031            !!!next-input-character;
4032            redo A;
4033          } elsif ($self->{nc} == 0x007C) { # |
4034            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
4035            !!!next-input-character;
4036            redo A;
4037          } elsif ($self->{nc} == 0x0029) { # )
4038            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
4039            !!!next-input-character;
4040            redo A;
4041          } elsif ($self->{nc} == 0x003E) { # >
4042            !!!parse-error (type => 'unclosed allowed tokens'); ## TODO: type
4043            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4044            !!!next-input-character;
4045            !!!emit ($self->{ct}); # ATTLIST
4046            redo A;
4047          } elsif ($self->{nc} == -1) {
4048            ## XML5: No parse error.
4049            !!!parse-error (type => 'unclosed md'); ## TODO: type
4050            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
4051            !!!next-input-character;
4052            !!!emit ($self->{ct});
4053            redo A;
4054          } else {
4055            $self->{ca}->{tokens}->[-1] .= chr $self->{nc};
4056            ## Stay in the state.
4057            !!!next-input-character;
4058            redo A;
4059          }
4060        } elsif ($self->{state} == AFTER_ALLOWED_TOKEN_STATE) {
4061          if ($is_space->{$self->{nc}}) {
4062            ## Stay in the state.
4063            !!!next-input-character;
4064            redo A;
4065          } elsif ($self->{nc} == 0x007C) { # |
4066            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
4067            !!!next-input-character;
4068            redo A;
4069          } elsif ($self->{nc} == 0x0029) { # )
4070            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
4071            !!!next-input-character;
4072            redo A;
4073          } elsif ($self->{nc} == 0x003E) { # >
4074            !!!parse-error (type => 'unclosed allowed tokens'); ## TODO: type
4075            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4076            !!!next-input-character;
4077            !!!emit ($self->{ct}); # ATTLIST
4078            redo A;
4079          } elsif ($self->{nc} == -1) {
4080            ## XML5: No parse error.
4081            !!!parse-error (type => 'unclosed md'); ## TODO: type
4082            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
4083            !!!next-input-character;
4084            !!!emit ($self->{ct});
4085            redo A;
4086          } else {
4087            !!!parse-error (type => 'space in allowed token', ## TODO: type
4088                            line => $self->{line_prev},
4089                            column => $self->{column_prev});
4090            $self->{ca}->{tokens}->[-1] .= ' ' . chr $self->{nc};
4091            $self->{state} = ALLOWED_TOKEN_STATE;
4092            !!!next-input-character;
4093            redo A;
4094          }
4095        } elsif ($self->{state} == AFTER_ALLOWED_TOKENS_STATE) {
4096          if ($is_space->{$self->{nc}}) {
4097            $self->{state} = BEFORE_ATTR_DEFAULT_STATE;
4098            !!!next-input-character;
4099            redo A;
4100          } elsif ($self->{nc} == 0x0023) { # #
4101            !!!parse-error (type => 'no space before default value'); ## TODO: type
4102            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
4103            !!!next-input-character;
4104            redo A;
4105          } elsif ($self->{nc} == 0x0022) { # "
4106            !!!parse-error (type => 'no space before default value'); ## TODO: type
4107            $self->{ca}->{value} = '';
4108            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
4109            !!!next-input-character;
4110            redo A;
4111          } elsif ($self->{nc} == 0x0027) { # '
4112            !!!parse-error (type => 'no space before default value'); ## TODO: type
4113            $self->{ca}->{value} = '';
4114            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
4115            !!!next-input-character;
4116            redo A;
4117          } elsif ($self->{nc} == 0x003E) { # >
4118            !!!parse-error (type => 'no attr default'); ## TODO: type
4119            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4120            !!!next-input-character;
4121            !!!emit ($self->{ct}); # ATTLIST
4122            redo A;
4123          } elsif ($self->{nc} == -1) {
4124            !!!parse-error (type => 'unclosed md'); ## TODO: type
4125            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4126            !!!next-input-character;
4127            !!!emit ($self->{ct});
4128            redo A;
4129          } else {
4130            !!!parse-error (type => 'unquoted attr value'); ## TODO: type
4131            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
4132            ## Reconsume.
4133            redo A;
4134          }
4135        } elsif ($self->{state} == BEFORE_ATTR_DEFAULT_STATE) {
4136          if ($is_space->{$self->{nc}}) {
4137            ## Stay in the state.
4138            !!!next-input-character;
4139            redo A;
4140          } elsif ($self->{nc} == 0x0023) { # #
4141            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
4142            !!!next-input-character;
4143            redo A;
4144          } elsif ($self->{nc} == 0x0022) { # "
4145            $self->{ca}->{value} = '';
4146            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
4147            !!!next-input-character;
4148            redo A;
4149          } elsif ($self->{nc} == 0x0027) { # '
4150            $self->{ca}->{value} = '';
4151            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
4152            !!!next-input-character;
4153            redo A;
4154          } elsif ($self->{nc} == 0x003E) { # >
4155            !!!parse-error (type => 'no attr default'); ## TODO: type
4156            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4157            !!!next-input-character;
4158            !!!emit ($self->{ct}); # ATTLIST
4159            redo A;
4160          } elsif ($self->{nc} == -1) {
4161            !!!parse-error (type => 'unclosed md'); ## TODO: type
4162            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4163            !!!next-input-character;
4164            !!!emit ($self->{ct});
4165            redo A;
4166          } else {
4167            !!!parse-error (type => 'unquoted attr value'); ## TODO: type
4168            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
4169            ## Reconsume.
4170            redo A;
4171          }
4172        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE) {
4173          if ($is_space->{$self->{nc}}) {
4174            ## XML5: No parse error.
4175            !!!parse-error (type => 'no default type'); ## TODO: type
4176          $self->{state} = BOGUS_COMMENT_STATE;          $self->{state} = BOGUS_COMMENT_STATE;
4177          $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded          $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
4178          ## Reconsume.          ## Reconsume.
4179          redo A;          redo A;
4180          } elsif ($self->{nc} == 0x0022) { # "
4181            ## XML5: Same as "anything else".
4182            $self->{ca}->{value} = '';
4183            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
4184            !!!next-input-character;
4185            redo A;
4186          } elsif ($self->{nc} == 0x0027) { # '
4187            ## XML5: Same as "anything else".
4188            $self->{ca}->{value} = '';
4189            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
4190            !!!next-input-character;
4191            redo A;
4192          } elsif ($self->{nc} == 0x003E) { # >
4193            ## XML5: Same as "anything else".
4194            !!!parse-error (type => 'no attr default'); ## TODO: type
4195            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4196            !!!next-input-character;
4197            !!!emit ($self->{ct}); # ATTLIST
4198            redo A;
4199          } elsif ($self->{nc} == -1) {
4200            ## XML5: No parse error.
4201            !!!parse-error (type => 'unclosed md'); ## TODO: type
4202            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
4203            !!!next-input-character;
4204            !!!emit ($self->{ct});
4205            redo A;
4206          } else {
4207            $self->{ca}->{default} = chr $self->{nc};
4208            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE;
4209            !!!next-input-character;
4210            redo A;
4211        }        }
4212        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE) {
4213          if ($is_space->{$self->{nc}}) {
4214            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE;
4215            !!!next-input-character;
4216            redo A;
4217          } elsif ($self->{nc} == 0x0022) { # "
4218            ## XML5: Same as "anything else".
4219            !!!parse-error (type => 'no space before default value'); ## TODO: type
4220            $self->{ca}->{value} = '';
4221            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
4222            !!!next-input-character;
4223            redo A;
4224          } elsif ($self->{nc} == 0x0027) { # '
4225            ## XML5: Same as "anything else".
4226            !!!parse-error (type => 'no space before default value'); ## TODO: type
4227            $self->{ca}->{value} = '';
4228            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
4229            !!!next-input-character;
4230            redo A;
4231          } elsif ($self->{nc} == 0x003E) { # >
4232            ## XML5: Same as "anything else".
4233            push @{$self->{ct}->{attrdefs}}, $self->{ca};
4234            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4235            !!!next-input-character;
4236            !!!emit ($self->{ct}); # ATTLIST
4237            redo A;
4238          } elsif ($self->{nc} == -1) {
4239            ## XML5: No parse error.
4240            !!!parse-error (type => 'unclosed md'); ## TODO: type
4241            push @{$self->{ct}->{attrdefs}}, $self->{ca};
4242            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
4243            !!!next-input-character;
4244            !!!emit ($self->{ct});
4245            redo A;
4246          } else {
4247            $self->{ca}->{default} .= chr $self->{nc};
4248            ## Stay in the state.
4249            !!!next-input-character;
4250            redo A;
4251          }
4252        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE) {
4253          if ($is_space->{$self->{nc}}) {
4254            ## Stay in the state.
4255            !!!next-input-character;
4256            redo A;
4257          } elsif ($self->{nc} == 0x0022) { # "
4258            $self->{ca}->{value} = '';
4259            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
4260            !!!next-input-character;
4261            redo A;
4262          } elsif ($self->{nc} == 0x0027) { # '
4263            $self->{ca}->{value} = '';
4264            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
4265            !!!next-input-character;
4266            redo A;
4267          } elsif ($self->{nc} == 0x003E) { # >
4268            push @{$self->{ct}->{attrdefs}}, $self->{ca};
4269            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4270            !!!next-input-character;
4271            !!!emit ($self->{ct}); # ATTLIST
4272            redo A;
4273          } elsif ($self->{nc} == -1) {
4274            ## XML5: No parse error.
4275            !!!parse-error (type => 'unclosed md'); ## TODO: type
4276            push @{$self->{ct}->{attrdefs}}, $self->{ca};
4277            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
4278            !!!next-input-character;
4279            !!!emit ($self->{ct});
4280            redo A;
4281          } else {
4282            ## XML5: Not defined yet.
4283            if ($self->{ca}->{default} eq 'FIXED') {
4284              $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
4285            } else {
4286              push @{$self->{ct}->{attrdefs}}, $self->{ca};
4287              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
4288            }
4289            ## Reconsume.
4290            redo A;
4291          }
4292        } elsif ($self->{state} == AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE) {
4293          if ($is_space->{$self->{nc}} or
4294              $self->{nc} == -1 or
4295              $self->{nc} == 0x003E) { # >
4296            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
4297            ## Reconsume.
4298            redo A;
4299          } else {
4300            !!!parse-error (type => 'no space before attr name'); ## TODO: type
4301            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
4302            ## Reconsume.
4303            redo A;
4304          }      
4305      } else {      } else {
4306        die "$0: $self->{state}: Unknown state";        die "$0: $self->{state}: Unknown state";
4307      }      }
# Line 3753  sub _get_next_token ($) { Line 4312  sub _get_next_token ($) {
4312    
4313  1;  1;
4314  ## $Date$  ## $Date$
4315                                    

Legend:
Removed from v.1.14  
changed lines
  Added in v.1.15

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24