/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Diff of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.70 by wakaba, Sat Mar 1 00:42:52 2008 UTC revision 1.75 by wakaba, Mon Mar 3 00:13:22 2008 UTC
# Line 8  use Error qw(:try); Line 8  use Error qw(:try);
8  ## doc.write ('');  ## doc.write ('');
9  ## alert (doc.compatMode);  ## alert (doc.compatMode);
10    
 ## ISSUE: HTML5 revision 967 says that the encoding layer MUST NOT  
 ## strip BOM and the HTML layer MUST ignore it.  Whether we can do it  
 ## is not yet clear.  
 ## "{U+FEFF}..." in UTF-16BE/UTF-16LE is three or four characters?  
 ## "{U+FEFF}..." in GB18030?  
   
11  ## TODO: Control charcters and noncharacters are not allowed (HTML5 revision 1263)  ## TODO: Control charcters and noncharacters are not allowed (HTML5 revision 1263)
12  ## TODO: 1252 parse error (revision 1264)  ## TODO: 1252 parse error (revision 1264)
13  ## TODO: 8859-11 = 874 (revision 1271)  ## TODO: 8859-11 = 874 (revision 1271)
# Line 24  my $permitted_slash_tag_name = { Line 18  my $permitted_slash_tag_name = {
18    meta => 1,    meta => 1,
19    hr => 1,    hr => 1,
20    br => 1,    br => 1,
21    img=> 1,    img => 1,
22    embed => 1,    embed => 1,
23    param => 1,    param => 1,
24    area => 1,    area => 1,
# Line 159  sub parse_byte_string ($$$$;$) { Line 153  sub parse_byte_string ($$$$;$) {
153    return $return;    return $return;
154  } # parse_byte_string  } # parse_byte_string
155    
156    ## NOTE: HTML5 spec says that the encoding layer MUST NOT strip BOM
157    ## and the HTML layer MUST ignore it.  However, we does strip BOM in
158    ## the encoding layer and the HTML layer does not ignore any U+FEFF,
159    ## because the core part of our HTML parser expects a string of character,
160    ## not a string of bytes or code units or anything which might contain a BOM.
161    ## Therefore, any parser interface that accepts a string of bytes,
162    ## such as |parse_byte_string| in this module, must ensure that it does
163    ## strip the BOM and never strip any ZWNBSP.
164    
165  *parse_char_string = \&parse_string;  *parse_char_string = \&parse_string;
166    
167  sub parse_string ($$$;$) {  sub parse_string ($$$;$) {
# Line 283  sub DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUO Line 286  sub DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUO
286  sub DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE () { 30 }  sub DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE () { 30 }
287  sub AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 31 }  sub AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 31 }
288  sub BOGUS_DOCTYPE_STATE () { 32 }  sub BOGUS_DOCTYPE_STATE () { 32 }
289    sub AFTER_ATTRIBUTE_VALUE_QUOTED_STATE () { 33 }
290    
291  sub DOCTYPE_TOKEN () { 1 }  sub DOCTYPE_TOKEN () { 1 }
292  sub COMMENT_TOKEN () { 2 }  sub COMMENT_TOKEN () { 2 }
# Line 342  sub _initialize_tokenizer ($) { Line 346  sub _initialize_tokenizer ($) {
346  ##   ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN)  ##   ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN)
347  ##   ->{public_identifier} (DOCTYPE_TOKEN)  ##   ->{public_identifier} (DOCTYPE_TOKEN)
348  ##   ->{system_identifier} (DOCTYPE_TOKEN)  ##   ->{system_identifier} (DOCTYPE_TOKEN)
349  ##   ->{correct} == 1 or 0 (DOCTYPE_TOKEN)  ##   ->{quirks} == 1 or 0 (DOCTYPE_TOKEN): "force-quirks" flag
350  ##   ->{attributes} isa HASH (START_TAG_TOKEN, END_TAG_TOKEN)  ##   ->{attributes} isa HASH (START_TAG_TOKEN, END_TAG_TOKEN)
351  ##        ->{name}  ##        ->{name}
352  ##        ->{value}  ##        ->{value}
# Line 381  sub _get_next_token ($) { Line 385  sub _get_next_token ($) {
385    A: {    A: {
386      if ($self->{state} == DATA_STATE) {      if ($self->{state} == DATA_STATE) {
387        if ($self->{next_input_character} == 0x0026) { # &        if ($self->{next_input_character} == 0x0026) { # &
388          if ($self->{content_model} & CM_ENTITY) { # PCDATA | RCDATA          if ($self->{content_model} & CM_ENTITY and # PCDATA | RCDATA
389                not $self->{escape}) {
390            $self->{state} = ENTITY_DATA_STATE;            $self->{state} = ENTITY_DATA_STATE;
391            !!!next-input-character;            !!!next-input-character;
392            redo A;            redo A;
# Line 436  sub _get_next_token ($) { Line 441  sub _get_next_token ($) {
441      } elsif ($self->{state} == ENTITY_DATA_STATE) {      } elsif ($self->{state} == ENTITY_DATA_STATE) {
442        ## (cannot happen in CDATA state)        ## (cannot happen in CDATA state)
443                
444        my $token = $self->_tokenize_attempt_to_consume_an_entity (0);        my $token = $self->_tokenize_attempt_to_consume_an_entity (0, -1);
445    
446        $self->{state} = DATA_STATE;        $self->{state} = DATA_STATE;
447        # next-input-character is already done        # next-input-character is already done
# Line 739  sub _get_next_token ($) { Line 744  sub _get_next_token ($) {
744    
745          redo A;          redo A;
746        } else {        } else {
747            if ({
748                 0x0022 => 1, # "
749                 0x0027 => 1, # '
750                 0x003D => 1, # =
751                }->{$self->{next_input_character}}) {
752              !!!parse-error (type => 'bad attribute name');
753            }
754          $self->{current_attribute} = {name => chr ($self->{next_input_character}),          $self->{current_attribute} = {name => chr ($self->{next_input_character}),
755                                value => ''};                                value => ''};
756          $self->{state} = ATTRIBUTE_NAME_STATE;          $self->{state} = ATTRIBUTE_NAME_STATE;
# Line 833  sub _get_next_token ($) { Line 845  sub _get_next_token ($) {
845    
846          redo A;          redo A;
847        } else {        } else {
848            if ($self->{next_input_character} == 0x0022 or # "
849                $self->{next_input_character} == 0x0027) { # '
850              !!!parse-error (type => 'bad attribute name');
851            }
852          $self->{current_attribute}->{name} .= chr ($self->{next_input_character});          $self->{current_attribute}->{name} .= chr ($self->{next_input_character});
853          ## Stay in the state          ## Stay in the state
854          !!!next-input-character;          !!!next-input-character;
# Line 979  sub _get_next_token ($) { Line 995  sub _get_next_token ($) {
995    
996          redo A;          redo A;
997        } else {        } else {
998            if ($self->{next_input_character} == 0x003D) { # =
999              !!!parse-error (type => 'bad attribute value');
1000            }
1001          $self->{current_attribute}->{value} .= chr ($self->{next_input_character});          $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1002          $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;          $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
1003          !!!next-input-character;          !!!next-input-character;
# Line 986  sub _get_next_token ($) { Line 1005  sub _get_next_token ($) {
1005        }        }
1006      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1007        if ($self->{next_input_character} == 0x0022) { # "        if ($self->{next_input_character} == 0x0022) { # "
1008          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1009          !!!next-input-character;          !!!next-input-character;
1010          redo A;          redo A;
1011        } elsif ($self->{next_input_character} == 0x0026) { # &        } elsif ($self->{next_input_character} == 0x0026) { # &
# Line 1022  sub _get_next_token ($) { Line 1041  sub _get_next_token ($) {
1041        }        }
1042      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1043        if ($self->{next_input_character} == 0x0027) { # '        if ($self->{next_input_character} == 0x0027) { # '
1044          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1045          !!!next-input-character;          !!!next-input-character;
1046          redo A;          redo A;
1047        } elsif ($self->{next_input_character} == 0x0026) { # &        } elsif ($self->{next_input_character} == 0x0026) { # &
# Line 1110  sub _get_next_token ($) { Line 1129  sub _get_next_token ($) {
1129    
1130          redo A;          redo A;
1131        } else {        } else {
1132            if ({
1133                 0x0022 => 1, # "
1134                 0x0027 => 1, # '
1135                 0x003D => 1, # =
1136                }->{$self->{next_input_character}}) {
1137              !!!parse-error (type => 'bad attribute value');
1138            }
1139          $self->{current_attribute}->{value} .= chr ($self->{next_input_character});          $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1140          ## Stay in the state          ## Stay in the state
1141          !!!next-input-character;          !!!next-input-character;
1142          redo A;          redo A;
1143        }        }
1144      } elsif ($self->{state} == ENTITY_IN_ATTRIBUTE_VALUE_STATE) {      } elsif ($self->{state} == ENTITY_IN_ATTRIBUTE_VALUE_STATE) {
1145        my $token = $self->_tokenize_attempt_to_consume_an_entity (1);        my $token = $self->_tokenize_attempt_to_consume_an_entity
1146              (1,
1147               $self->{last_attribute_value_state}
1148                 == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE ? 0x0022 : # "
1149               $self->{last_attribute_value_state}
1150                 == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE ? 0x0027 : # '
1151               -1);
1152    
1153        unless (defined $token) {        unless (defined $token) {
1154          $self->{current_attribute}->{value} .= '&';          $self->{current_attribute}->{value} .= '&';
# Line 1129  sub _get_next_token ($) { Line 1161  sub _get_next_token ($) {
1161        $self->{state} = $self->{last_attribute_value_state};        $self->{state} = $self->{last_attribute_value_state};
1162        # next-input-character is already done        # next-input-character is already done
1163        redo A;        redo A;
1164        } elsif ($self->{state} == AFTER_ATTRIBUTE_VALUE_QUOTED_STATE) {
1165          if ($self->{next_input_character} == 0x0009 or # HT
1166              $self->{next_input_character} == 0x000A or # LF
1167              $self->{next_input_character} == 0x000B or # VT
1168              $self->{next_input_character} == 0x000C or # FF
1169              $self->{next_input_character} == 0x0020) { # SP
1170            $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1171            !!!next-input-character;
1172            redo A;
1173          } elsif ($self->{next_input_character} == 0x003E) { # >
1174            if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1175              $self->{current_token}->{first_start_tag}
1176                  = not defined $self->{last_emitted_start_tag_name};
1177              $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1178            } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1179              $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1180              if ($self->{current_token}->{attributes}) {
1181                !!!parse-error (type => 'end tag attribute');
1182              }
1183            } else {
1184              die "$0: $self->{current_token}->{type}: Unknown token type";
1185            }
1186            $self->{state} = DATA_STATE;
1187            !!!next-input-character;
1188    
1189            !!!emit ($self->{current_token}); # start tag or end tag
1190    
1191            redo A;
1192          } elsif ($self->{next_input_character} == 0x002F) { # /
1193            !!!next-input-character;
1194            if ($self->{next_input_character} == 0x003E and # >
1195                $self->{current_token}->{type} == START_TAG_TOKEN and
1196                $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
1197              # permitted slash
1198              #
1199            } else {
1200              !!!parse-error (type => 'nestc');
1201            }
1202            $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1203            # next-input-character is already done
1204            redo A;
1205          } else {
1206            !!!parse-error (type => 'no space between attributes');
1207            $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1208            ## reconsume
1209            redo A;
1210          }
1211      } elsif ($self->{state} == BOGUS_COMMENT_STATE) {      } elsif ($self->{state} == BOGUS_COMMENT_STATE) {
1212        ## (only happen if PCDATA state)        ## (only happen if PCDATA state)
1213                
# Line 1368  sub _get_next_token ($) { Line 1447  sub _get_next_token ($) {
1447          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1448          !!!next-input-character;          !!!next-input-character;
1449    
1450          !!!emit ({type => DOCTYPE_TOKEN}); # incorrect          !!!emit ({type => DOCTYPE_TOKEN, quirks => 1});
1451    
1452          redo A;          redo A;
1453        } elsif ($self->{next_input_character} == -1) {        } elsif ($self->{next_input_character} == -1) {
# Line 1376  sub _get_next_token ($) { Line 1455  sub _get_next_token ($) {
1455          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1456          ## reconsume          ## reconsume
1457    
1458          !!!emit ({type => DOCTYPE_TOKEN}); # incorrect          !!!emit ({type => DOCTYPE_TOKEN, quirks => 1});
1459    
1460          redo A;          redo A;
1461        } else {        } else {
1462          $self->{current_token}          $self->{current_token}
1463              = {type => DOCTYPE_TOKEN,              = {type => DOCTYPE_TOKEN,
1464                 name => chr ($self->{next_input_character}),                 name => chr ($self->{next_input_character}),
1465                 correct => 1};                 #quirks => 0,
1466                  };
1467  ## ISSUE: "Set the token's name name to the" in the spec  ## ISSUE: "Set the token's name name to the" in the spec
1468          $self->{state} = DOCTYPE_NAME_STATE;          $self->{state} = DOCTYPE_NAME_STATE;
1469          !!!next-input-character;          !!!next-input-character;
# Line 1411  sub _get_next_token ($) { Line 1491  sub _get_next_token ($) {
1491          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1492          ## reconsume          ## reconsume
1493    
1494          delete $self->{current_token}->{correct};          $self->{current_token}->{quirks} = 1;
1495          !!!emit ($self->{current_token}); # DOCTYPE          !!!emit ($self->{current_token}); # DOCTYPE
1496    
1497          redo A;          redo A;
# Line 1443  sub _get_next_token ($) { Line 1523  sub _get_next_token ($) {
1523          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1524          ## reconsume          ## reconsume
1525    
1526          delete $self->{current_token}->{correct};          $self->{current_token}->{quirks} = 1;
1527          !!!emit ($self->{current_token}); # DOCTYPE          !!!emit ($self->{current_token}); # DOCTYPE
1528    
1529          redo A;          redo A;
# Line 1507  sub _get_next_token ($) { Line 1587  sub _get_next_token ($) {
1587        }        }
1588    
1589        !!!parse-error (type => 'string after DOCTYPE name');        !!!parse-error (type => 'string after DOCTYPE name');
1590          $self->{current_token}->{quirks} = 1;
1591    
1592        $self->{state} = BOGUS_DOCTYPE_STATE;        $self->{state} = BOGUS_DOCTYPE_STATE;
1593        # next-input-character is already done        # next-input-character is already done
1594        redo A;        redo A;
# Line 1534  sub _get_next_token ($) { Line 1616  sub _get_next_token ($) {
1616          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1617          !!!next-input-character;          !!!next-input-character;
1618    
1619          delete $self->{current_token}->{correct};          $self->{current_token}->{quirks} = 1;
1620          !!!emit ($self->{current_token}); # DOCTYPE          !!!emit ($self->{current_token}); # DOCTYPE
1621    
1622          redo A;          redo A;
# Line 1544  sub _get_next_token ($) { Line 1626  sub _get_next_token ($) {
1626          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1627          ## reconsume          ## reconsume
1628    
1629          delete $self->{current_token}->{correct};          $self->{current_token}->{quirks} = 1;
1630          !!!emit ($self->{current_token}); # DOCTYPE          !!!emit ($self->{current_token}); # DOCTYPE
1631    
1632          redo A;          redo A;
1633        } else {        } else {
1634          !!!parse-error (type => 'string after PUBLIC');          !!!parse-error (type => 'string after PUBLIC');
1635            $self->{current_token}->{quirks} = 1;
1636    
1637          $self->{state} = BOGUS_DOCTYPE_STATE;          $self->{state} = BOGUS_DOCTYPE_STATE;
1638          !!!next-input-character;          !!!next-input-character;
1639          redo A;          redo A;
# Line 1565  sub _get_next_token ($) { Line 1649  sub _get_next_token ($) {
1649          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1650          !!!next-input-character;          !!!next-input-character;
1651    
1652          delete $self->{current_token}->{correct};          $self->{current_token}->{quirks} = 1;
1653          !!!emit ($self->{current_token}); # DOCTYPE          !!!emit ($self->{current_token}); # DOCTYPE
1654    
1655          redo A;          redo A;
# Line 1575  sub _get_next_token ($) { Line 1659  sub _get_next_token ($) {
1659          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1660          ## reconsume          ## reconsume
1661    
1662          delete $self->{current_token}->{correct};          $self->{current_token}->{quirks} = 1;
1663          !!!emit ($self->{current_token}); # DOCTYPE          !!!emit ($self->{current_token}); # DOCTYPE
1664    
1665          redo A;          redo A;
# Line 1597  sub _get_next_token ($) { Line 1681  sub _get_next_token ($) {
1681          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1682          !!!next-input-character;          !!!next-input-character;
1683    
1684          delete $self->{current_token}->{correct};          $self->{current_token}->{quirks} = 1;
1685          !!!emit ($self->{current_token}); # DOCTYPE          !!!emit ($self->{current_token}); # DOCTYPE
1686    
1687          redo A;          redo A;
# Line 1607  sub _get_next_token ($) { Line 1691  sub _get_next_token ($) {
1691          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1692          ## reconsume          ## reconsume
1693    
1694          delete $self->{current_token}->{correct};          $self->{current_token}->{quirks} = 1;
1695          !!!emit ($self->{current_token}); # DOCTYPE          !!!emit ($self->{current_token}); # DOCTYPE
1696    
1697          redo A;          redo A;
# Line 1649  sub _get_next_token ($) { Line 1733  sub _get_next_token ($) {
1733          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1734          ## reconsume          ## reconsume
1735    
1736          delete $self->{current_token}->{correct};          $self->{current_token}->{quirks} = 1;
1737          !!!emit ($self->{current_token}); # DOCTYPE          !!!emit ($self->{current_token}); # DOCTYPE
1738    
1739          redo A;          redo A;
1740        } else {        } else {
1741          !!!parse-error (type => 'string after PUBLIC literal');          !!!parse-error (type => 'string after PUBLIC literal');
1742            $self->{current_token}->{quirks} = 1;
1743    
1744          $self->{state} = BOGUS_DOCTYPE_STATE;          $self->{state} = BOGUS_DOCTYPE_STATE;
1745          !!!next-input-character;          !!!next-input-character;
1746          redo A;          redo A;
# Line 1682  sub _get_next_token ($) { Line 1768  sub _get_next_token ($) {
1768          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1769          !!!next-input-character;          !!!next-input-character;
1770    
1771          delete $self->{current_token}->{correct};          $self->{current_token}->{quirks} = 1;
1772          !!!emit ($self->{current_token}); # DOCTYPE          !!!emit ($self->{current_token}); # DOCTYPE
1773    
1774          redo A;          redo A;
# Line 1692  sub _get_next_token ($) { Line 1778  sub _get_next_token ($) {
1778          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1779          ## reconsume          ## reconsume
1780    
1781          delete $self->{current_token}->{correct};          $self->{current_token}->{quirks} = 1;
1782          !!!emit ($self->{current_token}); # DOCTYPE          !!!emit ($self->{current_token}); # DOCTYPE
1783    
1784          redo A;          redo A;
1785        } else {        } else {
1786          !!!parse-error (type => 'string after SYSTEM');          !!!parse-error (type => 'string after SYSTEM');
1787            $self->{current_token}->{quirks} = 1;
1788    
1789          $self->{state} = BOGUS_DOCTYPE_STATE;          $self->{state} = BOGUS_DOCTYPE_STATE;
1790          !!!next-input-character;          !!!next-input-character;
1791          redo A;          redo A;
# Line 1713  sub _get_next_token ($) { Line 1801  sub _get_next_token ($) {
1801          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1802          !!!next-input-character;          !!!next-input-character;
1803    
1804          delete $self->{current_token}->{correct};          $self->{current_token}->{quirks} = 1;
1805          !!!emit ($self->{current_token}); # DOCTYPE          !!!emit ($self->{current_token}); # DOCTYPE
1806    
1807          redo A;          redo A;
# Line 1723  sub _get_next_token ($) { Line 1811  sub _get_next_token ($) {
1811          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1812          ## reconsume          ## reconsume
1813    
1814          delete $self->{current_token}->{correct};          $self->{current_token}->{quirks} = 1;
1815          !!!emit ($self->{current_token}); # DOCTYPE          !!!emit ($self->{current_token}); # DOCTYPE
1816    
1817          redo A;          redo A;
# Line 1745  sub _get_next_token ($) { Line 1833  sub _get_next_token ($) {
1833          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1834          !!!next-input-character;          !!!next-input-character;
1835    
1836          delete $self->{current_token}->{correct};          $self->{current_token}->{quirks} = 1;
1837          !!!emit ($self->{current_token}); # DOCTYPE          !!!emit ($self->{current_token}); # DOCTYPE
1838    
1839          redo A;          redo A;
# Line 1755  sub _get_next_token ($) { Line 1843  sub _get_next_token ($) {
1843          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1844          ## reconsume          ## reconsume
1845    
1846          delete $self->{current_token}->{correct};          $self->{current_token}->{quirks} = 1;
1847          !!!emit ($self->{current_token}); # DOCTYPE          !!!emit ($self->{current_token}); # DOCTYPE
1848    
1849          redo A;          redo A;
# Line 1787  sub _get_next_token ($) { Line 1875  sub _get_next_token ($) {
1875          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1876          ## reconsume          ## reconsume
1877    
1878          delete $self->{current_token}->{correct};          $self->{current_token}->{quirks} = 1;
1879          !!!emit ($self->{current_token}); # DOCTYPE          !!!emit ($self->{current_token}); # DOCTYPE
1880    
1881          redo A;          redo A;
1882        } else {        } else {
1883          !!!parse-error (type => 'string after SYSTEM literal');          !!!parse-error (type => 'string after SYSTEM literal');
1884            #$self->{current_token}->{quirks} = 1;
1885    
1886          $self->{state} = BOGUS_DOCTYPE_STATE;          $self->{state} = BOGUS_DOCTYPE_STATE;
1887          !!!next-input-character;          !!!next-input-character;
1888          redo A;          redo A;
# Line 1802  sub _get_next_token ($) { Line 1892  sub _get_next_token ($) {
1892          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1893          !!!next-input-character;          !!!next-input-character;
1894    
         delete $self->{current_token}->{correct};  
1895          !!!emit ($self->{current_token}); # DOCTYPE          !!!emit ($self->{current_token}); # DOCTYPE
1896    
1897          redo A;          redo A;
# Line 1811  sub _get_next_token ($) { Line 1900  sub _get_next_token ($) {
1900          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1901          ## reconsume          ## reconsume
1902    
         delete $self->{current_token}->{correct};  
1903          !!!emit ($self->{current_token}); # DOCTYPE          !!!emit ($self->{current_token}); # DOCTYPE
1904    
1905          redo A;          redo A;
# Line 1828  sub _get_next_token ($) { Line 1916  sub _get_next_token ($) {
1916    die "$0: _get_next_token: unexpected case";    die "$0: _get_next_token: unexpected case";
1917  } # _get_next_token  } # _get_next_token
1918    
1919  sub _tokenize_attempt_to_consume_an_entity ($$) {  sub _tokenize_attempt_to_consume_an_entity ($$$) {
1920    my ($self, $in_attr) = @_;    my ($self, $in_attr, $additional) = @_;
1921    
1922    if ({    if ({
1923         0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF,         0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF,
1924         0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & # 0x000D # CR         0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & # 0x000D # CR
1925           $additional => 1,
1926        }->{$self->{next_input_character}}) {        }->{$self->{next_input_character}}) {
1927      ## Don't consume      ## Don't consume
1928      ## No error      ## No error
# Line 2066  sub _tree_construction_initial ($) { Line 2155  sub _tree_construction_initial ($) {
2155        ## ISSUE: internalSubset = null??        ## ISSUE: internalSubset = null??
2156        $self->{document}->append_child ($doctype);        $self->{document}->append_child ($doctype);
2157                
2158        if (not $token->{correct} or $doctype_name ne 'HTML') {        if ($token->{quirks} or $doctype_name ne 'HTML') {
2159          $self->{document}->manakai_compat_mode ('quirks');          $self->{document}->manakai_compat_mode ('quirks');
2160        } elsif (defined $token->{public_identifier}) {        } elsif (defined $token->{public_identifier}) {
2161          my $pubid = $token->{public_identifier};          my $pubid = $token->{public_identifier};
# Line 2120  sub _tree_construction_initial ($) { Line 2209  sub _tree_construction_initial ($) {
2209            "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//EN" => 1,            "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//EN" => 1,
2210            "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//EN" => 1,            "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//EN" => 1,
2211            "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//EN" => 1,            "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//EN" => 1,
2212              "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED RELAXED 1.0//EN" => 1,
2213              "-//SOFTQUAD SOFTWARE//DTD HOTMETAL PRO 6.0::19990601::EXTENSIONS TO HTML 4.0//EN" => 1,
2214              "-//SOFTQUAD//DTD HOTMETAL PRO 4.0::19971010::EXTENSIONS TO HTML 4.0//EN" => 1,
2215            "-//SPYGLASS//DTD HTML 2.0 EXTENDED//EN" => 1,            "-//SPYGLASS//DTD HTML 2.0 EXTENDED//EN" => 1,
2216            "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//EN" => 1,            "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//EN" => 1,
2217            "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//EN" => 1,            "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//EN" => 1,
# Line 5464  sub set_inner_html ($$$) { Line 5556  sub set_inner_html ($$$) {
5556      $p->_initialize_tree_constructor;      $p->_initialize_tree_constructor;
5557    
5558      ## Step 2      ## Step 2
5559      my $node_ln = $node->local_name;      my $node_ln = $node->manakai_local_name;
5560      $p->{content_model} = {      $p->{content_model} = {
5561        title => RCDATA_CONTENT_MODEL,        title => RCDATA_CONTENT_MODEL,
5562        textarea => RCDATA_CONTENT_MODEL,        textarea => RCDATA_CONTENT_MODEL,
# Line 5504  sub set_inner_html ($$$) { Line 5596  sub set_inner_html ($$$) {
5596        if ($anode->node_type == 1) {        if ($anode->node_type == 1) {
5597          my $nsuri = $anode->namespace_uri;          my $nsuri = $anode->namespace_uri;
5598          if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {          if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
5599            if ($anode->local_name eq 'form') { ## TODO: case?            if ($anode->manakai_local_name eq 'form') {
5600              $p->{form_element} = $anode;              $p->{form_element} = $anode;
5601              last AN;              last AN;
5602            }            }

Legend:
Removed from v.1.70  
changed lines
  Added in v.1.75

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24