/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Diff of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.139 by wakaba, Sat May 24 04:26:27 2008 UTC revision 1.152 by wakaba, Sun Jun 29 11:15:53 2008 UTC
# Line 8  use Error qw(:try); Line 8  use Error qw(:try);
8  ## doc.write ('');  ## doc.write ('');
9  ## alert (doc.compatMode);  ## alert (doc.compatMode);
10    
 ## TODO: 1252 parse error (revision 1264)  
 ## TODO: 8859-11 = 874 (revision 1271)  
   
11  require IO::Handle;  require IO::Handle;
12    
13  my $HTML_NS = q<http://www.w3.org/1999/xhtml>;  my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
# Line 48  sub MISC_SPECIAL_EL () { 0b1000000000000 Line 45  sub MISC_SPECIAL_EL () { 0b1000000000000
45  sub FOREIGN_EL () { 0b10000000000000000000000000 }  sub FOREIGN_EL () { 0b10000000000000000000000000 }
46  sub FOREIGN_FLOW_CONTENT_EL () { 0b100000000000000000000000000 }  sub FOREIGN_FLOW_CONTENT_EL () { 0b100000000000000000000000000 }
47  sub MML_AXML_EL () { 0b1000000000000000000000000000 }  sub MML_AXML_EL () { 0b1000000000000000000000000000 }
48    sub RUBY_EL () { 0b10000000000000000000000000000 }
49    sub RUBY_COMPONENT_EL () { 0b100000000000000000000000000000 }
50    
51  sub TABLE_ROWS_EL () {  sub TABLE_ROWS_EL () {
52    TABLE_EL |    TABLE_EL |
# Line 55  sub TABLE_ROWS_EL () { Line 54  sub TABLE_ROWS_EL () {
54    TABLE_ROW_GROUP_EL    TABLE_ROW_GROUP_EL
55  }  }
56    
57    ## NOTE: Used in "generate implied end tags" algorithm.
58    ## NOTE: There is a code where a modified version of END_TAG_OPTIONAL_EL
59    ## is used in "generate implied end tags" implementation (search for the
60    ## function mae).
61  sub END_TAG_OPTIONAL_EL () {  sub END_TAG_OPTIONAL_EL () {
62    DD_EL |    DD_EL |
63    DT_EL |    DT_EL |
64    LI_EL |    LI_EL |
65    P_EL    P_EL |
66      RUBY_COMPONENT_EL
67  }  }
68    
69    ## NOTE: Used in </body> and EOF algorithms.
70  sub ALL_END_TAG_OPTIONAL_EL () {  sub ALL_END_TAG_OPTIONAL_EL () {
71    END_TAG_OPTIONAL_EL |    DD_EL |
72      DT_EL |
73      LI_EL |
74      P_EL |
75    
76    BODY_EL |    BODY_EL |
77    HTML_EL |    HTML_EL |
78    TABLE_CELL_EL |    TABLE_CELL_EL |
# Line 99  sub SPECIAL_EL () { Line 108  sub SPECIAL_EL () {
108    ADDRESS_EL |    ADDRESS_EL |
109    BODY_EL |    BODY_EL |
110    DIV_EL |    DIV_EL |
111    END_TAG_OPTIONAL_EL |  
112      DD_EL |
113      DT_EL |
114      LI_EL |
115      P_EL |
116    
117    FORM_EL |    FORM_EL |
118    FRAMESET_EL |    FRAMESET_EL |
119    HEADING_EL |    HEADING_EL |
# Line 173  my $el_category = { Line 187  my $el_category = {
187    param => MISC_SPECIAL_EL,    param => MISC_SPECIAL_EL,
188    plaintext => MISC_SPECIAL_EL,    plaintext => MISC_SPECIAL_EL,
189    pre => MISC_SPECIAL_EL,    pre => MISC_SPECIAL_EL,
190      rp => RUBY_COMPONENT_EL,
191      rt => RUBY_COMPONENT_EL,
192      ruby => RUBY_EL,
193    s => FORMATTING_EL,    s => FORMATTING_EL,
194    script => MISC_SPECIAL_EL,    script => MISC_SPECIAL_EL,
195    select => SELECT_EL,    select => SELECT_EL,
# Line 214  my $el_category_f = { Line 231  my $el_category_f = {
231  };  };
232    
233  my $svg_attr_name = {  my $svg_attr_name = {
234      attributename => 'attributeName',
235    attributetype => 'attributeType',    attributetype => 'attributeType',
236    basefrequency => 'baseFrequency',    basefrequency => 'baseFrequency',
237    baseprofile => 'baseProfile',    baseprofile => 'baseProfile',
# Line 224  my $svg_attr_name = { Line 242  my $svg_attr_name = {
242    diffuseconstant => 'diffuseConstant',    diffuseconstant => 'diffuseConstant',
243    edgemode => 'edgeMode',    edgemode => 'edgeMode',
244    externalresourcesrequired => 'externalResourcesRequired',    externalresourcesrequired => 'externalResourcesRequired',
   fecolormatrix => 'feColorMatrix',  
   fecomposite => 'feComposite',  
   fegaussianblur => 'feGaussianBlur',  
   femorphology => 'feMorphology',  
   fetile => 'feTile',  
245    filterres => 'filterRes',    filterres => 'filterRes',
246    filterunits => 'filterUnits',    filterunits => 'filterUnits',
247    glyphref => 'glyphRef',    glyphref => 'glyphRef',
# Line 262  my $svg_attr_name = { Line 275  my $svg_attr_name = {
275    repeatcount => 'repeatCount',    repeatcount => 'repeatCount',
276    repeatdur => 'repeatDur',    repeatdur => 'repeatDur',
277    requiredextensions => 'requiredExtensions',    requiredextensions => 'requiredExtensions',
278      requiredfeatures => 'requiredFeatures',
279    specularconstant => 'specularConstant',    specularconstant => 'specularConstant',
280    specularexponent => 'specularExponent',    specularexponent => 'specularExponent',
281    spreadmethod => 'spreadMethod',    spreadmethod => 'spreadMethod',
# Line 487  sub parse_byte_stream ($$$$;$) { Line 501  sub parse_byte_stream ($$$$;$) {
501        ## "Change the encoding" algorithm:        ## "Change the encoding" algorithm:
502    
503        ## Step 1            ## Step 1    
504        if ($charset->{iana_names}->{'utf-16'}) { ## ISSUE: UTF-16BE -> UTF-8? UTF-16LE -> UTF-8?        if ($charset->{category} &
505              Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) {
506          $charset = Message::Charset::Info->get_by_iana_name ('utf-8');          $charset = Message::Charset::Info->get_by_iana_name ('utf-8');
507          ($char_stream, $e_status) = $charset->get_decode_handle          ($char_stream, $e_status) = $charset->get_decode_handle
508              ($byte_stream,              ($byte_stream,
# Line 829  sub _initialize_tokenizer ($) { Line 844  sub _initialize_tokenizer ($) {
844  ##     |->{self_closing}| is used to save the value of |$self->{self_closing}|  ##     |->{self_closing}| is used to save the value of |$self->{self_closing}|
845  ##     while the token is pushed back to the stack.  ##     while the token is pushed back to the stack.
846    
 ## ISSUE: "When a DOCTYPE token is created, its  
 ## <i>self-closing flag</i> must be unset (its other state is that it  
 ## be set), and its attributes list must be empty.": Wrong subject?  
   
847  ## Emitted token MUST immediately be handled by the tree construction state.  ## Emitted token MUST immediately be handled by the tree construction state.
848    
849  ## Before each step, UA MAY check to see if either one of the scripts in  ## Before each step, UA MAY check to see if either one of the scripts in
# Line 1827  sub _get_next_token ($) { Line 1838  sub _get_next_token ($) {
1838          $self->{state} = SELF_CLOSING_START_TAG_STATE;          $self->{state} = SELF_CLOSING_START_TAG_STATE;
1839          !!!next-input-character;          !!!next-input-character;
1840          redo A;          redo A;
1841          } elsif ($self->{next_char} == -1) {
1842            !!!parse-error (type => 'unclosed tag');
1843            if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1844              !!!cp (122.3);
1845              $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1846            } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1847              if ($self->{current_token}->{attributes}) {
1848                !!!cp (122.1);
1849                !!!parse-error (type => 'end tag attribute');
1850              } else {
1851                ## NOTE: This state should never be reached.
1852                !!!cp (122.2);
1853              }
1854            } else {
1855              die "$0: $self->{current_token}->{type}: Unknown token type";
1856            }
1857            $self->{state} = DATA_STATE;
1858            ## Reconsume.
1859            !!!emit ($self->{current_token}); # start tag or end tag
1860            redo A;
1861        } else {        } else {
1862          !!!cp ('124.1');          !!!cp ('124.1');
1863          !!!parse-error (type => 'no space between attributes');          !!!parse-error (type => 'no space between attributes');
# Line 1859  sub _get_next_token ($) { Line 1890  sub _get_next_token ($) {
1890          !!!emit ($self->{current_token}); # start tag or end tag          !!!emit ($self->{current_token}); # start tag or end tag
1891    
1892          redo A;          redo A;
1893          } elsif ($self->{next_char} == -1) {
1894            !!!parse-error (type => 'unclosed tag');
1895            if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1896              !!!cp (124.7);
1897              $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1898            } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1899              if ($self->{current_token}->{attributes}) {
1900                !!!cp (124.5);
1901                !!!parse-error (type => 'end tag attribute');
1902              } else {
1903                ## NOTE: This state should never be reached.
1904                !!!cp (124.6);
1905              }
1906            } else {
1907              die "$0: $self->{current_token}->{type}: Unknown token type";
1908            }
1909            $self->{state} = DATA_STATE;
1910            ## Reconsume.
1911            !!!emit ($self->{current_token}); # start tag or end tag
1912            redo A;
1913        } else {        } else {
1914          !!!cp ('124.4');          !!!cp ('124.4');
1915          !!!parse-error (type => 'nestc');          !!!parse-error (type => 'nestc');
# Line 2700  sub _get_next_token ($) { Line 2751  sub _get_next_token ($) {
2751        } elsif ($self->{next_char} == -1) {        } elsif ($self->{next_char} == -1) {
2752          !!!cp (217);          !!!cp (217);
2753          !!!parse-error (type => 'unclosed DOCTYPE');          !!!parse-error (type => 'unclosed DOCTYPE');
   
2754          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2755          ## reconsume          ## reconsume
2756    
# Line 3103  sub _tree_construction_initial ($) { Line 3153  sub _tree_construction_initial ($) {
3153        } elsif (defined $token->{public_identifier}) {        } elsif (defined $token->{public_identifier}) {
3154          my $pubid = $token->{public_identifier};          my $pubid = $token->{public_identifier};
3155          $pubid =~ tr/a-z/A-z/;          $pubid =~ tr/a-z/A-z/;
3156          if ({          my $prefix = [
3157            "+//SILMARIL//DTD HTML PRO V0R11 19970101//EN" => 1,            "+//SILMARIL//DTD HTML PRO V0R11 19970101//",
3158            "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,            "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//",
3159            "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,            "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//",
3160            "-//IETF//DTD HTML 2.0 LEVEL 1//EN" => 1,            "-//IETF//DTD HTML 2.0 LEVEL 1//",
3161            "-//IETF//DTD HTML 2.0 LEVEL 2//EN" => 1,            "-//IETF//DTD HTML 2.0 LEVEL 2//",
3162            "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//EN" => 1,            "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//",
3163            "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//EN" => 1,            "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//",
3164            "-//IETF//DTD HTML 2.0 STRICT//EN" => 1,            "-//IETF//DTD HTML 2.0 STRICT//",
3165            "-//IETF//DTD HTML 2.0//EN" => 1,            "-//IETF//DTD HTML 2.0//",
3166            "-//IETF//DTD HTML 2.1E//EN" => 1,            "-//IETF//DTD HTML 2.1E//",
3167            "-//IETF//DTD HTML 3.0//EN" => 1,            "-//IETF//DTD HTML 3.0//",
3168            "-//IETF//DTD HTML 3.0//EN//" => 1,            "-//IETF//DTD HTML 3.2 FINAL//",
3169            "-//IETF//DTD HTML 3.2 FINAL//EN" => 1,            "-//IETF//DTD HTML 3.2//",
3170            "-//IETF//DTD HTML 3.2//EN" => 1,            "-//IETF//DTD HTML 3//",
3171            "-//IETF//DTD HTML 3//EN" => 1,            "-//IETF//DTD HTML LEVEL 0//",
3172            "-//IETF//DTD HTML LEVEL 0//EN" => 1,            "-//IETF//DTD HTML LEVEL 1//",
3173            "-//IETF//DTD HTML LEVEL 0//EN//2.0" => 1,            "-//IETF//DTD HTML LEVEL 2//",
3174            "-//IETF//DTD HTML LEVEL 1//EN" => 1,            "-//IETF//DTD HTML LEVEL 3//",
3175            "-//IETF//DTD HTML LEVEL 1//EN//2.0" => 1,            "-//IETF//DTD HTML STRICT LEVEL 0//",
3176            "-//IETF//DTD HTML LEVEL 2//EN" => 1,            "-//IETF//DTD HTML STRICT LEVEL 1//",
3177            "-//IETF//DTD HTML LEVEL 2//EN//2.0" => 1,            "-//IETF//DTD HTML STRICT LEVEL 2//",
3178            "-//IETF//DTD HTML LEVEL 3//EN" => 1,            "-//IETF//DTD HTML STRICT LEVEL 3//",
3179            "-//IETF//DTD HTML LEVEL 3//EN//3.0" => 1,            "-//IETF//DTD HTML STRICT//",
3180            "-//IETF//DTD HTML STRICT LEVEL 0//EN" => 1,            "-//IETF//DTD HTML//",
3181            "-//IETF//DTD HTML STRICT LEVEL 0//EN//2.0" => 1,            "-//METRIUS//DTD METRIUS PRESENTATIONAL//",
3182            "-//IETF//DTD HTML STRICT LEVEL 1//EN" => 1,            "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//",
3183            "-//IETF//DTD HTML STRICT LEVEL 1//EN//2.0" => 1,            "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//",
3184            "-//IETF//DTD HTML STRICT LEVEL 2//EN" => 1,            "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//",
3185            "-//IETF//DTD HTML STRICT LEVEL 2//EN//2.0" => 1,            "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//",
3186            "-//IETF//DTD HTML STRICT LEVEL 3//EN" => 1,            "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//",
3187            "-//IETF//DTD HTML STRICT LEVEL 3//EN//3.0" => 1,            "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//",
3188            "-//IETF//DTD HTML STRICT//EN" => 1,            "-//NETSCAPE COMM. CORP.//DTD HTML//",
3189            "-//IETF//DTD HTML STRICT//EN//2.0" => 1,            "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//",
3190            "-//IETF//DTD HTML STRICT//EN//3.0" => 1,            "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//",
3191            "-//IETF//DTD HTML//EN" => 1,            "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//",
3192            "-//IETF//DTD HTML//EN//2.0" => 1,            "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED RELAXED 1.0//",
3193            "-//IETF//DTD HTML//EN//3.0" => 1,            "-//SOFTQUAD SOFTWARE//DTD HOTMETAL PRO 6.0::19990601::EXTENSIONS TO HTML 4.0//",
3194            "-//METRIUS//DTD METRIUS PRESENTATIONAL//EN" => 1,            "-//SOFTQUAD//DTD HOTMETAL PRO 4.0::19971010::EXTENSIONS TO HTML 4.0//",
3195            "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//EN" => 1,            "-//SPYGLASS//DTD HTML 2.0 EXTENDED//",
3196            "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//EN" => 1,            "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//",
3197            "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//EN" => 1,            "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//",
3198            "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//EN" => 1,            "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//",
3199            "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//EN" => 1,            "-//W3C//DTD HTML 3 1995-03-24//",
3200            "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//EN" => 1,            "-//W3C//DTD HTML 3.2 DRAFT//",
3201            "-//NETSCAPE COMM. CORP.//DTD HTML//EN" => 1,            "-//W3C//DTD HTML 3.2 FINAL//",
3202            "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//EN" => 1,            "-//W3C//DTD HTML 3.2//",
3203            "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//EN" => 1,            "-//W3C//DTD HTML 3.2S DRAFT//",
3204            "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//EN" => 1,            "-//W3C//DTD HTML 4.0 FRAMESET//",
3205            "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED RELAXED 1.0//EN" => 1,            "-//W3C//DTD HTML 4.0 TRANSITIONAL//",
3206            "-//SOFTQUAD SOFTWARE//DTD HOTMETAL PRO 6.0::19990601::EXTENSIONS TO HTML 4.0//EN" => 1,            "-//W3C//DTD HTML EXPERIMETNAL 19960712//",
3207            "-//SOFTQUAD//DTD HOTMETAL PRO 4.0::19971010::EXTENSIONS TO HTML 4.0//EN" => 1,            "-//W3C//DTD HTML EXPERIMENTAL 970421//",
3208            "-//SPYGLASS//DTD HTML 2.0 EXTENDED//EN" => 1,            "-//W3C//DTD W3 HTML//",
3209            "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//EN" => 1,            "-//W3O//DTD W3 HTML 3.0//",
3210            "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//EN" => 1,            "-//WEBTECHS//DTD MOZILLA HTML 2.0//",
3211            "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//EN" => 1,            "-//WEBTECHS//DTD MOZILLA HTML//",
3212            "-//W3C//DTD HTML 3 1995-03-24//EN" => 1,          ]; # $prefix
3213            "-//W3C//DTD HTML 3.2 DRAFT//EN" => 1,          my $match;
3214            "-//W3C//DTD HTML 3.2 FINAL//EN" => 1,          for (@$prefix) {
3215            "-//W3C//DTD HTML 3.2//EN" => 1,            if (substr ($prefix, 0, length $_) eq $_) {
3216            "-//W3C//DTD HTML 3.2S DRAFT//EN" => 1,              $match = 1;
3217            "-//W3C//DTD HTML 4.0 FRAMESET//EN" => 1,              last;
3218            "-//W3C//DTD HTML 4.0 TRANSITIONAL//EN" => 1,            }
3219            "-//W3C//DTD HTML EXPERIMETNAL 19960712//EN" => 1,          }
3220            "-//W3C//DTD HTML EXPERIMENTAL 970421//EN" => 1,          if ($match or
3221            "-//W3C//DTD W3 HTML//EN" => 1,              $pubid eq "-//W3O//DTD W3 HTML STRICT 3.0//EN//" or
3222            "-//W3O//DTD W3 HTML 3.0//EN" => 1,              $pubid eq "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" or
3223            "-//W3O//DTD W3 HTML 3.0//EN//" => 1,              $pubid eq "HTML") {
           "-//W3O//DTD W3 HTML STRICT 3.0//EN//" => 1,  
           "-//WEBTECHS//DTD MOZILLA HTML 2.0//EN" => 1,  
           "-//WEBTECHS//DTD MOZILLA HTML//EN" => 1,  
           "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" => 1,  
           "HTML" => 1,  
         }->{$pubid}) {  
3224            !!!cp ('t5');            !!!cp ('t5');
3225            $self->{document}->manakai_compat_mode ('quirks');            $self->{document}->manakai_compat_mode ('quirks');
3226          } elsif ($pubid eq "-//W3C//DTD HTML 4.01 FRAMESET//EN" or          } elsif ($pubid =~ m[^-//W3C//DTD HTML 4.01 FRAMESET//] or
3227                   $pubid eq "-//W3C//DTD HTML 4.01 TRANSITIONAL//EN") {                   $pubid =~ m[^-//W3C//DTD HTML 4.01 TRANSITIONAL//]) {
3228            if (defined $token->{system_identifier}) {            if (defined $token->{system_identifier}) {
3229              !!!cp ('t6');              !!!cp ('t6');
3230              $self->{document}->manakai_compat_mode ('quirks');              $self->{document}->manakai_compat_mode ('quirks');
# Line 3188  sub _tree_construction_initial ($) { Line 3232  sub _tree_construction_initial ($) {
3232              !!!cp ('t7');              !!!cp ('t7');
3233              $self->{document}->manakai_compat_mode ('limited quirks');              $self->{document}->manakai_compat_mode ('limited quirks');
3234            }            }
3235          } elsif ($pubid eq "-//W3C//DTD XHTML 1.0 FRAMESET//EN" or          } elsif ($pubid =~ m[^-//W3C//DTD XHTML 1.0 FRAMESET//] or
3236                   $pubid eq "-//W3C//DTD XHTML 1.0 TRANSITIONAL//EN") {                   $pubid =~ m[^-//W3C//DTD XHTML 1.0 TRANSITIONAL//]) {
3237            !!!cp ('t8');            !!!cp ('t8');
3238            $self->{document}->manakai_compat_mode ('limited quirks');            $self->{document}->manakai_compat_mode ('limited quirks');
3239          } else {          } else {
# Line 3202  sub _tree_construction_initial ($) { Line 3246  sub _tree_construction_initial ($) {
3246          my $sysid = $token->{system_identifier};          my $sysid = $token->{system_identifier};
3247          $sysid =~ tr/A-Z/a-z/;          $sysid =~ tr/A-Z/a-z/;
3248          if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {          if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
3249            ## TODO: Check the spec: PUBLIC "(limited quirks)" "(quirks)"            ## NOTE: Ensure that |PUBLIC "(limited quirks)" "(quirks)"| is
3250              ## marked as quirks.
3251            $self->{document}->manakai_compat_mode ('quirks');            $self->{document}->manakai_compat_mode ('quirks');
3252            !!!cp ('t11');            !!!cp ('t11');
3253          } else {          } else {
# Line 3272  sub _tree_construction_root_element ($) Line 3317  sub _tree_construction_root_element ($)
3317    B: {    B: {
3318        if ($token->{type} == DOCTYPE_TOKEN) {        if ($token->{type} == DOCTYPE_TOKEN) {
3319          !!!cp ('t19');          !!!cp ('t19');
3320          !!!parse-error (type => 'in html:#DOCTYPE', token => $token);          !!!parse-error (type => 'in html:#DOCTYPE', token => $token); ## TODO: type
3321          ## Ignore the token          ## Ignore the token
3322          ## Stay in the insertion mode.          ## Stay in the insertion mode.
3323          !!!next-token;          !!!next-token;
# Line 3374  sub _reset_insertion_mode ($) { Line 3419  sub _reset_insertion_mode ($) {
3419        if ($self->{open_elements}->[0]->[0] eq $node->[0]) {        if ($self->{open_elements}->[0]->[0] eq $node->[0]) {
3420          $last = 1;          $last = 1;
3421          if (defined $self->{inner_html_node}) {          if (defined $self->{inner_html_node}) {
3422            if ($self->{inner_html_node}->[1] & TABLE_CELL_EL) {            !!!cp ('t28');
3423              !!!cp ('t27');            $node = $self->{inner_html_node};
3424              #          } else {
3425            } else {            die "_reset_insertion_mode: t27";
             !!!cp ('t28');  
             $node = $self->{inner_html_node};  
           }  
3426          }          }
3427        }        }
3428              
3429      ## Step 4..14        ## Step 4..14
3430      my $new_mode;        my $new_mode;
3431      if ($node->[1] & FOREIGN_EL) {        if ($node->[1] & FOREIGN_EL) {
3432        ## NOTE: Strictly spaking, the line below only applies to MathML and          !!!cp ('t28.1');
3433        ## SVG elements.  Currently the HTML syntax supports only MathML and          ## NOTE: Strictly spaking, the line below only applies to MathML and
3434        ## SVG elements as foreigners.          ## SVG elements.  Currently the HTML syntax supports only MathML and
3435        $new_mode = $self->{insertion_mode} | IN_FOREIGN_CONTENT_IM;          ## SVG elements as foreigners.
3436        ## ISSUE: What is set as the secondary insertion mode?          $new_mode = IN_BODY_IM | IN_FOREIGN_CONTENT_IM;
3437      } else {        } elsif ($node->[1] & TABLE_CELL_EL) {
3438        $new_mode = {          if ($last) {
3439              !!!cp ('t28.2');
3440              #
3441            } else {
3442              !!!cp ('t28.3');
3443              $new_mode = IN_CELL_IM;
3444            }
3445          } else {
3446            !!!cp ('t28.4');
3447            $new_mode = {
3448                        select => IN_SELECT_IM,                        select => IN_SELECT_IM,
3449                        ## NOTE: |option| and |optgroup| do not set                        ## NOTE: |option| and |optgroup| do not set
3450                        ## insertion mode to "in select" by themselves.                        ## insertion mode to "in select" by themselves.
                       td => IN_CELL_IM,  
                       th => IN_CELL_IM,  
3451                        tr => IN_ROW_IM,                        tr => IN_ROW_IM,
3452                        tbody => IN_TABLE_BODY_IM,                        tbody => IN_TABLE_BODY_IM,
3453                        thead => IN_TABLE_BODY_IM,                        thead => IN_TABLE_BODY_IM,
# Line 3410  sub _reset_insertion_mode ($) { Line 3459  sub _reset_insertion_mode ($) {
3459                        body => IN_BODY_IM,                        body => IN_BODY_IM,
3460                        frameset => IN_FRAMESET_IM,                        frameset => IN_FRAMESET_IM,
3461                       }->{$node->[0]->manakai_local_name};                       }->{$node->[0]->manakai_local_name};
3462      }        }
3463      $self->{insertion_mode} = $new_mode and return if defined $new_mode;        $self->{insertion_mode} = $new_mode and return if defined $new_mode;
3464                
3465        ## Step 15        ## Step 15
3466        if ($node->[1] & HTML_EL) {        if ($node->[1] & HTML_EL) {
# Line 3988  sub _tree_construction_main ($) { Line 4037  sub _tree_construction_main ($) {
4037            #            #
4038          } elsif ({          } elsif ({
4039                    b => 1, big => 1, blockquote => 1, body => 1, br => 1,                    b => 1, big => 1, blockquote => 1, body => 1, br => 1,
4040                    center => 1, code => 1, dd => 1, div => 1, dl => 1, em => 1,                    center => 1, code => 1, dd => 1, div => 1, dl => 1, dt => 1,
4041                    embed => 1, font => 1, h1 => 1, h2 => 1, h3 => 1, ## No h4!                    em => 1, embed => 1, font => 1, h1 => 1, h2 => 1, h3 => 1,
4042                    h5 => 1, h6 => 1, head => 1, hr => 1, i => 1, img => 1,                    h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, i => 1,
4043                    li => 1, menu => 1, meta => 1, nobr => 1, p => 1, pre => 1,                    img => 1, li => 1, listing => 1, menu => 1, meta => 1,
4044                    ruby => 1, s => 1, small => 1, span => 1, strong => 1,                    nobr => 1, ol => 1, p => 1, pre => 1, ruby => 1, s => 1,
4045                    sub => 1, sup => 1, table => 1, tt => 1, u => 1, ul => 1,                    small => 1, span => 1, strong => 1, strike => 1, sub => 1,
4046                    var => 1,                    sup => 1, table => 1, tt => 1, u => 1, ul => 1, var => 1,
4047                   }->{$token->{tag_name}}) {                   }->{$token->{tag_name}}) {
4048            !!!cp ('t87.2');            !!!cp ('t87.2');
4049            !!!parse-error (type => 'not closed',            !!!parse-error (type => 'not closed',
# Line 4074  sub _tree_construction_main ($) { Line 4123  sub _tree_construction_main ($) {
4123          !!!cp ('t87.5');          !!!cp ('t87.5');
4124          #          #
4125        } elsif ($token->{type} == END_OF_FILE_TOKEN) {        } elsif ($token->{type} == END_OF_FILE_TOKEN) {
         ## NOTE: "using the rules for secondary insertion mode" then "continue"  
4126          !!!cp ('t87.6');          !!!cp ('t87.6');
4127          #          !!!parse-error (type => 'not closed',
4128          ## TODO: ...                          value => $self->{open_elements}->[-1]->[0]
4129                                ->manakai_local_name,
4130                            token => $token);
4131    
4132            pop @{$self->{open_elements}}
4133                while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;
4134    
4135            $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
4136            ## Reprocess.
4137            next B;
4138        } else {        } else {
4139          die "$0: $token->{type}: Unknown token type";                  die "$0: $token->{type}: Unknown token type";        
4140        }        }
# Line 4256  sub _tree_construction_main ($) { Line 4313  sub _tree_construction_main ($) {
4313                                                 ->{has_reference});                                                 ->{has_reference});
4314                  } elsif ($token->{attributes}->{content}) {                  } elsif ($token->{attributes}->{content}) {
4315                    if ($token->{attributes}->{content}->{value}                    if ($token->{attributes}->{content}->{value}
4316                        =~ /\A[^;]*;[\x09-\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt]                        =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
4317                            [\x09-\x0D\x20]*=                            [\x09-\x0D\x20]*=
4318                            [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|                            [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
4319                            ([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) {                            ([^"'\x09-\x0D\x20][^\x09-\x0D\x20\x3B]*))/x) {
4320                      !!!cp ('t107');                      !!!cp ('t107');
4321                      ## NOTE: Whether the encoding is supported or not is handled                      ## NOTE: Whether the encoding is supported or not is handled
4322                      ## in the {change_encoding} callback.                      ## in the {change_encoding} callback.
# Line 4321  sub _tree_construction_main ($) { Line 4378  sub _tree_construction_main ($) {
4378                pop @{$self->{open_elements}} # <head>                pop @{$self->{open_elements}} # <head>
4379                    if $self->{insertion_mode} == AFTER_HEAD_IM;                    if $self->{insertion_mode} == AFTER_HEAD_IM;
4380                next B;                next B;
4381              } elsif ($token->{tag_name} eq 'style') {              } elsif ($token->{tag_name} eq 'style' or
4382                         $token->{tag_name} eq 'noframes') {
4383                ## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and                ## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and
4384                ## insertion mode IN_HEAD_IM)                ## insertion mode IN_HEAD_IM)
4385                ## NOTE: There is a "as if in head" code clone.                ## NOTE: There is a "as if in head" code clone.
# Line 5335  sub _tree_construction_main ($) { Line 5393  sub _tree_construction_main ($) {
5393                  next B;                  next B;
5394                }                }
5395                                
5396  ## TODO: Followings are removed from the latest spec.  ## TODO: Followings are removed from the latest spec.
5397                ## generate implied end tags                ## generate implied end tags
5398                while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {                while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5399                  !!!cp ('t224');                  !!!cp ('t224');
# Line 5833  sub _tree_construction_main ($) { Line 5891  sub _tree_construction_main ($) {
5891            !!!nack ('t277.1');            !!!nack ('t277.1');
5892            !!!next-token;            !!!next-token;
5893            next B;            next B;
5894          } elsif ($token->{tag_name} eq 'select' or          } elsif ({
5895                   $token->{tag_name} eq 'input' or                     select => 1, input => 1, textarea => 1,
5896                     }->{$token->{tag_name}} or
5897                   ($self->{insertion_mode} == IN_SELECT_IN_TABLE_IM and                   ($self->{insertion_mode} == IN_SELECT_IN_TABLE_IM and
5898                    {                    {
5899                     caption => 1, table => 1,                     caption => 1, table => 1,
# Line 6191  sub _tree_construction_main ($) { Line 6250  sub _tree_construction_main ($) {
6250            next B;            next B;
6251          } elsif ($token->{tag_name} eq 'noframes') {          } elsif ($token->{tag_name} eq 'noframes') {
6252            !!!cp ('t320');            !!!cp ('t320');
6253            ## NOTE: As if in body.            ## NOTE: As if in head.
6254            $parse_rcdata->(CDATA_CONTENT_MODEL);            $parse_rcdata->(CDATA_CONTENT_MODEL);
6255            next B;            next B;
6256          } else {          } else {
# Line 6319  sub _tree_construction_main ($) { Line 6378  sub _tree_construction_main ($) {
6378                                           ->{has_reference});                                           ->{has_reference});
6379            } elsif ($token->{attributes}->{content}) {            } elsif ($token->{attributes}->{content}) {
6380              if ($token->{attributes}->{content}->{value}              if ($token->{attributes}->{content}->{value}
6381                  =~ /\A[^;]*;[\x09-\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt]                  =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
6382                      [\x09-\x0D\x20]*=                      [\x09-\x0D\x20]*=
6383                      [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|                      [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
6384                      ([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) {                      ([^"'\x09-\x0D\x20][^\x09-\x0D\x20\x3B]*))/x) {
6385                !!!cp ('t336');                !!!cp ('t336');
6386                ## NOTE: Whether the encoding is supported or not is handled                ## NOTE: Whether the encoding is supported or not is handled
6387                ## in the {change_encoding} callback.                ## in the {change_encoding} callback.
# Line 6634  sub _tree_construction_main ($) { Line 6693  sub _tree_construction_main ($) {
6693                  xmp => 1,                  xmp => 1,
6694                  iframe => 1,                  iframe => 1,
6695                  noembed => 1,                  noembed => 1,
6696                  noframes => 1,                  noframes => 1, ## NOTE: This is an "as if in head" code clone.
6697                  noscript => 0, ## TODO: 1 if scripting is enabled                  noscript => 0, ## TODO: 1 if scripting is enabled
6698                 }->{$token->{tag_name}}) {                 }->{$token->{tag_name}}) {
6699          if ($token->{tag_name} eq 'xmp') {          if ($token->{tag_name} eq 'xmp') {
# Line 6656  sub _tree_construction_main ($) { Line 6715  sub _tree_construction_main ($) {
6715            !!!next-token;            !!!next-token;
6716            next B;            next B;
6717          } else {          } else {
6718              !!!ack ('t391.1');
6719    
6720            my $at = $token->{attributes};            my $at = $token->{attributes};
6721            my $form_attrs;            my $form_attrs;
6722            $form_attrs->{action} = $at->{action} if $at->{action};            $form_attrs->{action} = $at->{action} if $at->{action};
# Line 6699  sub _tree_construction_main ($) { Line 6760  sub _tree_construction_main ($) {
6760                           line => $token->{line}, column => $token->{column}},                           line => $token->{line}, column => $token->{column}},
6761                          {type => END_TAG_TOKEN, tag_name => 'form',                          {type => END_TAG_TOKEN, tag_name => 'form',
6762                           line => $token->{line}, column => $token->{column}};                           line => $token->{line}, column => $token->{column}};
           !!!nack ('t391.1'); ## NOTE: Not acknowledged.  
6763            !!!back-token (@tokens);            !!!back-token (@tokens);
6764            !!!next-token;            !!!next-token;
6765            next B;            next B;
# Line 6751  sub _tree_construction_main ($) { Line 6811  sub _tree_construction_main ($) {
6811          }          }
6812          !!!next-token;          !!!next-token;
6813          next B;          next B;
6814          } elsif ($token->{tag_name} eq 'rt' or
6815                   $token->{tag_name} eq 'rp') {
6816            ## has a |ruby| element in scope
6817            INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6818              my $node = $self->{open_elements}->[$_];
6819              if ($node->[1] & RUBY_EL) {
6820                !!!cp ('t398.1');
6821                ## generate implied end tags
6822                while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
6823                  !!!cp ('t398.2');
6824                  pop @{$self->{open_elements}};
6825                }
6826                unless ($self->{open_elements}->[-1]->[1] & RUBY_EL) {
6827                  !!!cp ('t398.3');
6828                  !!!parse-error (type => 'not closed',
6829                                  value => $self->{open_elements}->[-1]->[0]
6830                                      ->manakai_local_name,
6831                                  token => $token);
6832                  pop @{$self->{open_elements}}
6833                      while not $self->{open_elements}->[-1]->[1] & RUBY_EL;
6834                }
6835                last INSCOPE;
6836              } elsif ($node->[1] & SCOPING_EL) {
6837                !!!cp ('t398.4');
6838                last INSCOPE;
6839              }
6840            } # INSCOPE
6841    
6842            !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6843    
6844            !!!nack ('t398.5');
6845            !!!next-token;
6846            redo B;
6847        } elsif ($token->{tag_name} eq 'math' or        } elsif ($token->{tag_name} eq 'math' or
6848                 $token->{tag_name} eq 'svg') {                 $token->{tag_name} eq 'svg') {
6849          $reconstruct_active_formatting_elements->($insert_to_current);          $reconstruct_active_formatting_elements->($insert_to_current);
# Line 6939  sub _tree_construction_main ($) { Line 7032  sub _tree_construction_main ($) {
7032          } else {          } else {
7033            ## Step 1. generate implied end tags            ## Step 1. generate implied end tags
7034            while ({            while ({
7035                      ## END_TAG_OPTIONAL_EL
7036                    dd => ($token->{tag_name} ne 'dd'),                    dd => ($token->{tag_name} ne 'dd'),
7037                    dt => ($token->{tag_name} ne 'dt'),                    dt => ($token->{tag_name} ne 'dt'),
7038                    li => ($token->{tag_name} ne 'li'),                    li => ($token->{tag_name} ne 'li'),
7039                    p => 1,                    p => 1,
7040                      rt => 1,
7041                      rp => 1,
7042                   }->{$self->{open_elements}->[-1]->[0]->manakai_local_name}) {                   }->{$self->{open_elements}->[-1]->[0]->manakai_local_name}) {
7043              !!!cp ('t409');              !!!cp ('t409');
7044              pop @{$self->{open_elements}};              pop @{$self->{open_elements}};
# Line 7155  sub _tree_construction_main ($) { Line 7251  sub _tree_construction_main ($) {
7251              ## generate implied end tags              ## generate implied end tags
7252              while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {              while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
7253                !!!cp ('t430');                !!!cp ('t430');
7254                ## ISSUE: Can this case be reached?                ## NOTE: |<ruby><rt></ruby>|.
7255                  ## ISSUE: <ruby><rt></rt> will also take this code path,
7256                  ## which seems wrong.
7257                pop @{$self->{open_elements}};                pop @{$self->{open_elements}};
7258                  $node_i++;
7259              }              }
7260                    
7261              ## Step 2              ## Step 2
# Line 7173  sub _tree_construction_main ($) { Line 7272  sub _tree_construction_main ($) {
7272              }              }
7273                            
7274              ## Step 3              ## Step 3
7275              splice @{$self->{open_elements}}, $node_i;              splice @{$self->{open_elements}}, $node_i if $node_i < 0;
7276    
7277              !!!next-token;              !!!next-token;
7278              last S2;              last S2;

Legend:
Removed from v.1.139  
changed lines
  Added in v.1.152

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24