/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Diff of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.184 by wakaba, Mon Sep 15 09:02:27 2008 UTC revision 1.193 by wakaba, Sat Oct 4 04:06:33 2008 UTC
# Line 141  my $el_category = { Line 141  my $el_category = {
141    address => ADDRESS_EL,    address => ADDRESS_EL,
142    applet => MISC_SCOPING_EL,    applet => MISC_SCOPING_EL,
143    area => MISC_SPECIAL_EL,    area => MISC_SPECIAL_EL,
144      article => MISC_SPECIAL_EL,
145      aside => MISC_SPECIAL_EL,
146    b => FORMATTING_EL,    b => FORMATTING_EL,
147    base => MISC_SPECIAL_EL,    base => MISC_SPECIAL_EL,
148    basefont => MISC_SPECIAL_EL,    basefont => MISC_SPECIAL_EL,
# Line 154  my $el_category = { Line 156  my $el_category = {
156    center => MISC_SPECIAL_EL,    center => MISC_SPECIAL_EL,
157    col => MISC_SPECIAL_EL,    col => MISC_SPECIAL_EL,
158    colgroup => MISC_SPECIAL_EL,    colgroup => MISC_SPECIAL_EL,
159      command => MISC_SPECIAL_EL,
160      datagrid => MISC_SPECIAL_EL,
161    dd => DD_EL,    dd => DD_EL,
162      details => MISC_SPECIAL_EL,
163      dialog => MISC_SPECIAL_EL,
164    dir => MISC_SPECIAL_EL,    dir => MISC_SPECIAL_EL,
165    div => DIV_EL,    div => DIV_EL,
166    dl => MISC_SPECIAL_EL,    dl => MISC_SPECIAL_EL,
167    dt => DT_EL,    dt => DT_EL,
168    em => FORMATTING_EL,    em => FORMATTING_EL,
169    embed => MISC_SPECIAL_EL,    embed => MISC_SPECIAL_EL,
170      eventsource => MISC_SPECIAL_EL,
171    fieldset => MISC_SPECIAL_EL,    fieldset => MISC_SPECIAL_EL,
172      figure => MISC_SPECIAL_EL,
173    font => FORMATTING_EL,    font => FORMATTING_EL,
174      footer => MISC_SPECIAL_EL,
175    form => FORM_EL,    form => FORM_EL,
176    frame => MISC_SPECIAL_EL,    frame => MISC_SPECIAL_EL,
177    frameset => FRAMESET_EL,    frameset => FRAMESET_EL,
# Line 173  my $el_category = { Line 182  my $el_category = {
182    h5 => HEADING_EL,    h5 => HEADING_EL,
183    h6 => HEADING_EL,    h6 => HEADING_EL,
184    head => MISC_SPECIAL_EL,    head => MISC_SPECIAL_EL,
185      header => MISC_SPECIAL_EL,
186    hr => MISC_SPECIAL_EL,    hr => MISC_SPECIAL_EL,
187    html => HTML_EL,    html => HTML_EL,
188    i => FORMATTING_EL,    i => FORMATTING_EL,
189    iframe => MISC_SPECIAL_EL,    iframe => MISC_SPECIAL_EL,
190    img => MISC_SPECIAL_EL,    img => MISC_SPECIAL_EL,
191      #image => MISC_SPECIAL_EL, ## NOTE: Commented out in the spec.
192    input => MISC_SPECIAL_EL,    input => MISC_SPECIAL_EL,
193    isindex => MISC_SPECIAL_EL,    isindex => MISC_SPECIAL_EL,
194    li => LI_EL,    li => LI_EL,
# Line 186  my $el_category = { Line 197  my $el_category = {
197    marquee => MISC_SCOPING_EL,    marquee => MISC_SCOPING_EL,
198    menu => MISC_SPECIAL_EL,    menu => MISC_SPECIAL_EL,
199    meta => MISC_SPECIAL_EL,    meta => MISC_SPECIAL_EL,
200      nav => MISC_SPECIAL_EL,
201    nobr => NOBR_EL | FORMATTING_EL,    nobr => NOBR_EL | FORMATTING_EL,
202    noembed => MISC_SPECIAL_EL,    noembed => MISC_SPECIAL_EL,
203    noframes => MISC_SPECIAL_EL,    noframes => MISC_SPECIAL_EL,
# Line 204  my $el_category = { Line 216  my $el_category = {
216    s => FORMATTING_EL,    s => FORMATTING_EL,
217    script => MISC_SPECIAL_EL,    script => MISC_SPECIAL_EL,
218    select => SELECT_EL,    select => SELECT_EL,
219      section => MISC_SPECIAL_EL,
220    small => FORMATTING_EL,    small => FORMATTING_EL,
221    spacer => MISC_SPECIAL_EL,    spacer => MISC_SPECIAL_EL,
222    strike => FORMATTING_EL,    strike => FORMATTING_EL,
# Line 323  my $foreign_attr_xname = { Line 336  my $foreign_attr_xname = {
336    
337  ## ISSUE: xmlns:xlink="non-xlink-ns" is not an error.  ## ISSUE: xmlns:xlink="non-xlink-ns" is not an error.
338    
339  my $c1_entity_char = {  my $charref_map = {
340      0x0D => 0x000A,
341    0x80 => 0x20AC,    0x80 => 0x20AC,
342    0x81 => 0xFFFD,    0x81 => 0xFFFD,
343    0x82 => 0x201A,    0x82 => 0x201A,
# Line 356  my $c1_entity_char = { Line 370  my $c1_entity_char = {
370    0x9D => 0xFFFD,    0x9D => 0xFFFD,
371    0x9E => 0x017E,    0x9E => 0x017E,
372    0x9F => 0x0178,    0x9F => 0x0178,
373  }; # $c1_entity_char  }; # $charref_map
374    $charref_map->{$_} = 0xFFFD
375        for 0x0000..0x0008, 0x000B, 0x000E..0x001F, 0x007F,
376            0xD800..0xDFFF, 0xFDD0..0xFDDF, ## ISSUE: 0xFDEF
377            0xFFFE, 0xFFFF, 0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF,
378            0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE,
379            0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
380            0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, 0xDFFFF, 0xEFFFE,
381            0xEFFFF, 0xFFFFE, 0xFFFFF, 0x10FFFE, 0x10FFFF;
382    
383    ## TODO: Invoke the reset algorithm when a resettable element is
384    ## created (cf. HTML5 revision 2259).
385    
386  sub parse_byte_string ($$$$;$) {  sub parse_byte_string ($$$$;$) {
387    my $self = shift;    my $self = shift;
# Line 401  sub parse_byte_stream ($$$$;$$) { Line 426  sub parse_byte_stream ($$$$;$$) {
426            ## TODO: Is this ok?  Transfer protocol's parameter should be            ## TODO: Is this ok?  Transfer protocol's parameter should be
427            ## interpreted in its semantics?            ## interpreted in its semantics?
428    
       ## ISSUE: Unsupported encoding is not ignored according to the spec.  
429        ($char_stream, $e_status) = $charset->get_decode_handle        ($char_stream, $e_status) = $charset->get_decode_handle
430            ($byte_stream, allow_error_reporting => 1,            ($byte_stream, allow_error_reporting => 1,
431             allow_fallback => 1);             allow_fallback => 1);
# Line 409  sub parse_byte_stream ($$$$;$$) { Line 433  sub parse_byte_stream ($$$$;$$) {
433          $self->{confident} = 1;          $self->{confident} = 1;
434          last SNIFFING;          last SNIFFING;
435        } else {        } else {
436          ## TODO: unsupported error          !!!parse-error (type => 'charset:not supported',
437                            layer => 'encode',
438                            line => 1, column => 1,
439                            value => $charset_name,
440                            level => $self->{level}->{uncertain});
441        }        }
442      }      }
443    
# Line 853  sub CDATA_SECTION_STATE () { 35 } Line 881  sub CDATA_SECTION_STATE () { 35 }
881  sub MD_HYPHEN_STATE () { 36 } # "markup declaration open state" in the spec  sub MD_HYPHEN_STATE () { 36 } # "markup declaration open state" in the spec
882  sub MD_DOCTYPE_STATE () { 37 } # "markup declaration open state" in the spec  sub MD_DOCTYPE_STATE () { 37 } # "markup declaration open state" in the spec
883  sub MD_CDATA_STATE () { 38 } # "markup declaration open state" in the spec  sub MD_CDATA_STATE () { 38 } # "markup declaration open state" in the spec
884  sub CDATA_PCDATA_CLOSE_TAG_STATE () { 39 } # "close tag open state" in the spec  sub CDATA_RCDATA_CLOSE_TAG_STATE () { 39 } # "close tag open state" in the spec
885  sub CDATA_SECTION_MSE1_STATE () { 40 } # "CDATA section state" in the spec  sub CDATA_SECTION_MSE1_STATE () { 40 } # "CDATA section state" in the spec
886  sub CDATA_SECTION_MSE2_STATE () { 41 } # "CDATA section state" in the spec  sub CDATA_SECTION_MSE2_STATE () { 41 } # "CDATA section state" in the spec
887  sub PUBLIC_STATE () { 42 } # "after DOCTYPE name state" in the spec  sub PUBLIC_STATE () { 42 } # "after DOCTYPE name state" in the spec
# Line 867  sub NCR_NUM_STATE () { 46 } Line 895  sub NCR_NUM_STATE () { 46 }
895  sub HEXREF_X_STATE () { 47 }  sub HEXREF_X_STATE () { 47 }
896  sub HEXREF_HEX_STATE () { 48 }  sub HEXREF_HEX_STATE () { 48 }
897  sub ENTITY_NAME_STATE () { 49 }  sub ENTITY_NAME_STATE () { 49 }
898    sub PCDATA_STATE () { 50 } # "data state" in the spec
899    
900  sub DOCTYPE_TOKEN () { 1 }  sub DOCTYPE_TOKEN () { 1 }
901  sub COMMENT_TOKEN () { 2 }  sub COMMENT_TOKEN () { 2 }
# Line 965  sub _initialize_tokenizer ($) { Line 994  sub _initialize_tokenizer ($) {
994  ## TODO: Polytheistic slash SHOULD NOT be used. (Applied only to atheists.)  ## TODO: Polytheistic slash SHOULD NOT be used. (Applied only to atheists.)
995  ## (This requirement was dropped from HTML5 spec, unfortunately.)  ## (This requirement was dropped from HTML5 spec, unfortunately.)
996    
997    my $is_space = {
998      0x0009 => 1, # CHARACTER TABULATION (HT)
999      0x000A => 1, # LINE FEED (LF)
1000      #0x000B => 0, # LINE TABULATION (VT)
1001      0x000C => 1, # FORM FEED (FF)
1002      #0x000D => 1, # CARRIAGE RETURN (CR)
1003      0x0020 => 1, # SPACE (SP)
1004    };
1005    
1006  sub _get_next_token ($) {  sub _get_next_token ($) {
1007    my $self = shift;    my $self = shift;
1008    
# Line 982  sub _get_next_token ($) { Line 1020  sub _get_next_token ($) {
1020    }    }
1021    
1022    A: {    A: {
1023      if ($self->{state} == DATA_STATE) {      if ($self->{state} == PCDATA_STATE) {
1024          ## NOTE: Same as |DATA_STATE|, but only for |PCDATA| content model.
1025    
1026        if ($self->{nc} == 0x0026) { # &        if ($self->{nc} == 0x0026) { # &
1027          delete $self->{s_kwd};          !!!cp (0.1);
1028            ## NOTE: In the spec, the tokenizer is switched to the
1029            ## "entity data state".  In this implementation, the tokenizer
1030            ## is switched to the |ENTITY_STATE|, which is an implementation
1031            ## of the "consume a character reference" algorithm.
1032            $self->{entity_add} = -1;
1033            $self->{prev_state} = DATA_STATE;
1034            $self->{state} = ENTITY_STATE;
1035            !!!next-input-character;
1036            redo A;
1037          } elsif ($self->{nc} == 0x003C) { # <
1038            !!!cp (0.2);
1039            $self->{state} = TAG_OPEN_STATE;
1040            !!!next-input-character;
1041            redo A;
1042          } elsif ($self->{nc} == -1) {
1043            !!!cp (0.3);
1044            !!!emit ({type => END_OF_FILE_TOKEN,
1045                      line => $self->{line}, column => $self->{column}});
1046            last A; ## TODO: ok?
1047          } else {
1048            !!!cp (0.4);
1049            #
1050          }
1051    
1052          # Anything else
1053          my $token = {type => CHARACTER_TOKEN,
1054                       data => chr $self->{nc},
1055                       line => $self->{line}, column => $self->{column},
1056                      };
1057          $self->{read_until}->($token->{data}, q[<&], length $token->{data});
1058    
1059          ## Stay in the state.
1060          !!!next-input-character;
1061          !!!emit ($token);
1062          redo A;
1063        } elsif ($self->{state} == DATA_STATE) {
1064          $self->{s_kwd} = '' unless defined $self->{s_kwd};
1065          if ($self->{nc} == 0x0026) { # &
1066            $self->{s_kwd} = '';
1067          if ($self->{content_model} & CM_ENTITY and # PCDATA | RCDATA          if ($self->{content_model} & CM_ENTITY and # PCDATA | RCDATA
1068              not $self->{escape}) {              not $self->{escape}) {
1069            !!!cp (1);            !!!cp (1);
# Line 1003  sub _get_next_token ($) { Line 1082  sub _get_next_token ($) {
1082          }          }
1083        } elsif ($self->{nc} == 0x002D) { # -        } elsif ($self->{nc} == 0x002D) { # -
1084          if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA          if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
1085            if (defined $self->{s_kwd}) {            $self->{s_kwd} .= '-';
1086              !!!cp (2.1);            
             $self->{s_kwd} .= '-';  
           } else {  
             !!!cp (2.2);  
             $self->{s_kwd} = '-';  
           }  
   
1087            if ($self->{s_kwd} eq '<!--') {            if ($self->{s_kwd} eq '<!--') {
1088              !!!cp (3);              !!!cp (3);
1089              $self->{escape} = 1; # unless $self->{escape};              $self->{escape} = 1; # unless $self->{escape};
# Line 1028  sub _get_next_token ($) { Line 1101  sub _get_next_token ($) {
1101                    
1102          #          #
1103        } elsif ($self->{nc} == 0x0021) { # !        } elsif ($self->{nc} == 0x0021) { # !
1104          if (defined $self->{s_kwd}) {          if (length $self->{s_kwd}) {
1105            !!!cp (5.1);            !!!cp (5.1);
1106            $self->{s_kwd} .= '!';            $self->{s_kwd} .= '!';
1107            #            #
1108          } else {          } else {
1109            !!!cp (5.2);            !!!cp (5.2);
1110              #$self->{s_kwd} = '';
1111            #            #
1112          }          }
1113          #          #
1114        } elsif ($self->{nc} == 0x003C) { # <        } elsif ($self->{nc} == 0x003C) { # <
         delete $self->{s_kwd};  
1115          if ($self->{content_model} & CM_FULL_MARKUP or # PCDATA          if ($self->{content_model} & CM_FULL_MARKUP or # PCDATA
1116              (($self->{content_model} & CM_LIMITED_MARKUP) and # CDATA | RCDATA              (($self->{content_model} & CM_LIMITED_MARKUP) and # CDATA | RCDATA
1117               not $self->{escape})) {               not $self->{escape})) {
# Line 1048  sub _get_next_token ($) { Line 1121  sub _get_next_token ($) {
1121            redo A;            redo A;
1122          } else {          } else {
1123            !!!cp (7);            !!!cp (7);
1124              $self->{s_kwd} = '';
1125            #            #
1126          }          }
1127        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
1128          if ($self->{escape} and          if ($self->{escape} and
1129              ($self->{content_model} & CM_LIMITED_MARKUP)) { # RCDATA | CDATA              ($self->{content_model} & CM_LIMITED_MARKUP)) { # RCDATA | CDATA
1130            if (defined $self->{s_kwd} and $self->{s_kwd} eq '--') {            if ($self->{s_kwd} eq '--') {
1131              !!!cp (8);              !!!cp (8);
1132              delete $self->{escape};              delete $self->{escape};
1133            } else {            } else {
# Line 1063  sub _get_next_token ($) { Line 1137  sub _get_next_token ($) {
1137            !!!cp (10);            !!!cp (10);
1138          }          }
1139                    
1140          delete $self->{s_kwd};          $self->{s_kwd} = '';
1141          #          #
1142        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
1143          !!!cp (11);          !!!cp (11);
1144          delete $self->{s_kwd};          $self->{s_kwd} = '';
1145          !!!emit ({type => END_OF_FILE_TOKEN,          !!!emit ({type => END_OF_FILE_TOKEN,
1146                    line => $self->{line}, column => $self->{column}});                    line => $self->{line}, column => $self->{column}});
1147          last A; ## TODO: ok?          last A; ## TODO: ok?
1148        } else {        } else {
1149          !!!cp (12);          !!!cp (12);
1150          delete $self->{s_kwd};          $self->{s_kwd} = '';
1151          #          #
1152        }        }
1153    
# Line 1084  sub _get_next_token ($) { Line 1158  sub _get_next_token ($) {
1158                    };                    };
1159        if ($self->{read_until}->($token->{data}, q[-!<>&],        if ($self->{read_until}->($token->{data}, q[-!<>&],
1160                                  length $token->{data})) {                                  length $token->{data})) {
1161          delete $self->{s_kwd};          $self->{s_kwd} = '';
1162        }        }
1163    
1164        ## Stay in the data state        ## Stay in the data state.
1165          if ($self->{content_model} == PCDATA_CONTENT_MODEL) {
1166            !!!cp (13);
1167            $self->{state} = PCDATA_STATE;
1168          } else {
1169            !!!cp (14);
1170            ## Stay in the state.
1171          }
1172        !!!next-input-character;        !!!next-input-character;
1173        !!!emit ($token);        !!!emit ($token);
1174        redo A;        redo A;
# Line 1192  sub _get_next_token ($) { Line 1273  sub _get_next_token ($) {
1273        }        }
1274      } elsif ($self->{state} == CLOSE_TAG_OPEN_STATE) {      } elsif ($self->{state} == CLOSE_TAG_OPEN_STATE) {
1275        ## NOTE: The "close tag open state" in the spec is implemented as        ## NOTE: The "close tag open state" in the spec is implemented as
1276        ## |CLOSE_TAG_OPEN_STATE| and |CDATA_PCDATA_CLOSE_TAG_STATE|.        ## |CLOSE_TAG_OPEN_STATE| and |CDATA_RCDATA_CLOSE_TAG_STATE|.
1277    
1278        my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"        my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"
1279        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
1280          if (defined $self->{last_stag_name}) {          if (defined $self->{last_stag_name}) {
1281            $self->{state} = CDATA_PCDATA_CLOSE_TAG_STATE;            $self->{state} = CDATA_RCDATA_CLOSE_TAG_STATE;
1282            $self->{s_kwd} = '';            $self->{s_kwd} = '';
1283            ## Reconsume.            ## Reconsume.
1284            redo A;            redo A;
# Line 1268  sub _get_next_token ($) { Line 1349  sub _get_next_token ($) {
1349          ## "bogus comment state" entry.          ## "bogus comment state" entry.
1350          redo A;          redo A;
1351        }        }
1352      } elsif ($self->{state} == CDATA_PCDATA_CLOSE_TAG_STATE) {      } elsif ($self->{state} == CDATA_RCDATA_CLOSE_TAG_STATE) {
1353        my $ch = substr $self->{last_stag_name}, length $self->{s_kwd}, 1;        my $ch = substr $self->{last_stag_name}, length $self->{s_kwd}, 1;
1354        if (length $ch) {        if (length $ch) {
1355          my $CH = $ch;          my $CH = $ch;
# Line 1292  sub _get_next_token ($) { Line 1373  sub _get_next_token ($) {
1373            redo A;            redo A;
1374          }          }
1375        } else { # after "<{tag-name}"        } else { # after "<{tag-name}"
1376          unless ({          unless ($is_space->{$self->{nc}} or
1377                   0x0009 => 1, # HT                  {
                  0x000A => 1, # LF  
                  0x000B => 1, # VT  
                  0x000C => 1, # FF  
                  0x0020 => 1, # SP  
1378                   0x003E => 1, # >                   0x003E => 1, # >
1379                   0x002F => 1, # /                   0x002F => 1, # /
1380                   -1 => 1, # EOF                   -1 => 1, # EOF
# Line 1324  sub _get_next_token ($) { Line 1401  sub _get_next_token ($) {
1401          }          }
1402        }        }
1403      } elsif ($self->{state} == TAG_NAME_STATE) {      } elsif ($self->{state} == TAG_NAME_STATE) {
1404        if ($self->{nc} == 0x0009 or # HT        if ($is_space->{$self->{nc}}) {
           $self->{nc} == 0x000A or # LF  
           $self->{nc} == 0x000B or # VT  
           $self->{nc} == 0x000C or # FF  
           $self->{nc} == 0x0020) { # SP  
1405          !!!cp (34);          !!!cp (34);
1406          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1407          !!!next-input-character;          !!!next-input-character;
# Line 1400  sub _get_next_token ($) { Line 1473  sub _get_next_token ($) {
1473          redo A;          redo A;
1474        }        }
1475      } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {      } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {
1476        if ($self->{nc} == 0x0009 or # HT        if ($is_space->{$self->{nc}}) {
           $self->{nc} == 0x000A or # LF  
           $self->{nc} == 0x000B or # VT  
           $self->{nc} == 0x000C or # FF  
           $self->{nc} == 0x0020) { # SP  
1477          !!!cp (45);          !!!cp (45);
1478          ## Stay in the state          ## Stay in the state
1479          !!!next-input-character;          !!!next-input-character;
# Line 1500  sub _get_next_token ($) { Line 1569  sub _get_next_token ($) {
1569          }          }
1570        }; # $before_leave        }; # $before_leave
1571    
1572        if ($self->{nc} == 0x0009 or # HT        if ($is_space->{$self->{nc}}) {
           $self->{nc} == 0x000A or # LF  
           $self->{nc} == 0x000B or # VT  
           $self->{nc} == 0x000C or # FF  
           $self->{nc} == 0x0020) { # SP  
1573          !!!cp (59);          !!!cp (59);
1574          $before_leave->();          $before_leave->();
1575          $self->{state} = AFTER_ATTRIBUTE_NAME_STATE;          $self->{state} = AFTER_ATTRIBUTE_NAME_STATE;
# Line 1587  sub _get_next_token ($) { Line 1652  sub _get_next_token ($) {
1652          redo A;          redo A;
1653        }        }
1654      } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {      } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {
1655        if ($self->{nc} == 0x0009 or # HT        if ($is_space->{$self->{nc}}) {
           $self->{nc} == 0x000A or # LF  
           $self->{nc} == 0x000B or # VT  
           $self->{nc} == 0x000C or # FF  
           $self->{nc} == 0x0020) { # SP  
1656          !!!cp (71);          !!!cp (71);
1657          ## Stay in the state          ## Stay in the state
1658          !!!next-input-character;          !!!next-input-character;
# Line 1678  sub _get_next_token ($) { Line 1739  sub _get_next_token ($) {
1739          redo A;                  redo A;        
1740        }        }
1741      } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {      } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {
1742        if ($self->{nc} == 0x0009 or # HT        if ($is_space->{$self->{nc}}) {
           $self->{nc} == 0x000A or # LF  
           $self->{nc} == 0x000B or # VT  
           $self->{nc} == 0x000C or # FF  
           $self->{nc} == 0x0020) { # SP        
1743          !!!cp (83);          !!!cp (83);
1744          ## Stay in the state          ## Stay in the state
1745          !!!next-input-character;          !!!next-input-character;
# Line 1863  sub _get_next_token ($) { Line 1920  sub _get_next_token ($) {
1920          redo A;          redo A;
1921        }        }
1922      } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {
1923        if ($self->{nc} == 0x0009 or # HT        if ($is_space->{$self->{nc}}) {
           $self->{nc} == 0x000A or # LF  
           $self->{nc} == 0x000B or # HT  
           $self->{nc} == 0x000C or # FF  
           $self->{nc} == 0x0020) { # SP  
1924          !!!cp (107);          !!!cp (107);
1925          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1926          !!!next-input-character;          !!!next-input-character;
# Line 1949  sub _get_next_token ($) { Line 2002  sub _get_next_token ($) {
2002          redo A;          redo A;
2003        }        }
2004      } elsif ($self->{state} == AFTER_ATTRIBUTE_VALUE_QUOTED_STATE) {      } elsif ($self->{state} == AFTER_ATTRIBUTE_VALUE_QUOTED_STATE) {
2005        if ($self->{nc} == 0x0009 or # HT        if ($is_space->{$self->{nc}}) {
           $self->{nc} == 0x000A or # LF  
           $self->{nc} == 0x000B or # VT  
           $self->{nc} == 0x000C or # FF  
           $self->{nc} == 0x0020) { # SP  
2006          !!!cp (118);          !!!cp (118);
2007          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
2008          !!!next-input-character;          !!!next-input-character;
# Line 2394  sub _get_next_token ($) { Line 2443  sub _get_next_token ($) {
2443          redo A;          redo A;
2444        }        }
2445      } elsif ($self->{state} == DOCTYPE_STATE) {      } elsif ($self->{state} == DOCTYPE_STATE) {
2446        if ($self->{nc} == 0x0009 or # HT        if ($is_space->{$self->{nc}}) {
           $self->{nc} == 0x000A or # LF  
           $self->{nc} == 0x000B or # VT  
           $self->{nc} == 0x000C or # FF  
           $self->{nc} == 0x0020) { # SP  
2447          !!!cp (155);          !!!cp (155);
2448          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
2449          !!!next-input-character;          !!!next-input-character;
# Line 2411  sub _get_next_token ($) { Line 2456  sub _get_next_token ($) {
2456          redo A;          redo A;
2457        }        }
2458      } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {
2459        if ($self->{nc} == 0x0009 or # HT        if ($is_space->{$self->{nc}}) {
           $self->{nc} == 0x000A or # LF  
           $self->{nc} == 0x000B or # VT  
           $self->{nc} == 0x000C or # FF  
           $self->{nc} == 0x0020) { # SP  
2460          !!!cp (157);          !!!cp (157);
2461          ## Stay in the state          ## Stay in the state
2462          !!!next-input-character;          !!!next-input-character;
# Line 2449  sub _get_next_token ($) { Line 2490  sub _get_next_token ($) {
2490        }        }
2491      } elsif ($self->{state} == DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == DOCTYPE_NAME_STATE) {
2492  ## ISSUE: Redundant "First," in the spec.  ## ISSUE: Redundant "First," in the spec.
2493        if ($self->{nc} == 0x0009 or # HT        if ($is_space->{$self->{nc}}) {
           $self->{nc} == 0x000A or # LF  
           $self->{nc} == 0x000B or # VT  
           $self->{nc} == 0x000C or # FF  
           $self->{nc} == 0x0020) { # SP  
2494          !!!cp (161);          !!!cp (161);
2495          $self->{state} = AFTER_DOCTYPE_NAME_STATE;          $self->{state} = AFTER_DOCTYPE_NAME_STATE;
2496          !!!next-input-character;          !!!next-input-character;
# Line 2485  sub _get_next_token ($) { Line 2522  sub _get_next_token ($) {
2522          redo A;          redo A;
2523        }        }
2524      } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {
2525        if ($self->{nc} == 0x0009 or # HT        if ($is_space->{$self->{nc}}) {
           $self->{nc} == 0x000A or # LF  
           $self->{nc} == 0x000B or # VT  
           $self->{nc} == 0x000C or # FF  
           $self->{nc} == 0x0020) { # SP  
2526          !!!cp (165);          !!!cp (165);
2527          ## Stay in the state          ## Stay in the state
2528          !!!next-input-character;          !!!next-input-character;
# Line 2612  sub _get_next_token ($) { Line 2645  sub _get_next_token ($) {
2645          redo A;          redo A;
2646        }        }
2647      } elsif ($self->{state} == BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {      } elsif ($self->{state} == BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
2648        if ({        if ($is_space->{$self->{nc}}) {
             0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,  
             #0x000D => 1, # HT, LF, VT, FF, SP, CR  
           }->{$self->{nc}}) {  
2649          !!!cp (181);          !!!cp (181);
2650          ## Stay in the state          ## Stay in the state
2651          !!!next-input-character;          !!!next-input-character;
# Line 2742  sub _get_next_token ($) { Line 2772  sub _get_next_token ($) {
2772          redo A;          redo A;
2773        }        }
2774      } elsif ($self->{state} == AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {      } elsif ($self->{state} == AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
2775        if ({        if ($is_space->{$self->{nc}}) {
             0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,  
             #0x000D => 1, # HT, LF, VT, FF, SP, CR  
           }->{$self->{nc}}) {  
2776          !!!cp (195);          !!!cp (195);
2777          ## Stay in the state          ## Stay in the state
2778          !!!next-input-character;          !!!next-input-character;
# Line 2791  sub _get_next_token ($) { Line 2818  sub _get_next_token ($) {
2818          redo A;          redo A;
2819        }        }
2820      } elsif ($self->{state} == BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {      } elsif ($self->{state} == BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2821        if ({        if ($is_space->{$self->{nc}}) {
             0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,  
             #0x000D => 1, # HT, LF, VT, FF, SP, CR  
           }->{$self->{nc}}) {  
2822          !!!cp (201);          !!!cp (201);
2823          ## Stay in the state          ## Stay in the state
2824          !!!next-input-character;          !!!next-input-character;
# Line 2920  sub _get_next_token ($) { Line 2944  sub _get_next_token ($) {
2944          redo A;          redo A;
2945        }        }
2946      } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {      } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2947        if ({        if ($is_space->{$self->{nc}}) {
             0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,  
             #0x000D => 1, # HT, LF, VT, FF, SP, CR  
           }->{$self->{nc}}) {  
2948          !!!cp (215);          !!!cp (215);
2949          ## Stay in the state          ## Stay in the state
2950          !!!next-input-character;          !!!next-input-character;
# Line 3055  sub _get_next_token ($) { Line 3076  sub _get_next_token ($) {
3076          redo A;          redo A;
3077        }        }
3078      } elsif ($self->{state} == ENTITY_STATE) {      } elsif ($self->{state} == ENTITY_STATE) {
3079        if ({        if ($is_space->{$self->{nc}} or
3080          0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF,            {
3081          0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, &              0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
3082          $self->{entity_add} => 1,              $self->{entity_add} => 1,
3083        }->{$self->{nc}}) {            }->{$self->{nc}}) {
3084          !!!cp (1001);          !!!cp (1001);
3085          ## Don't consume          ## Don't consume
3086          ## No error          ## No error
# Line 3178  sub _get_next_token ($) { Line 3199  sub _get_next_token ($) {
3199        my $code = $self->{s_kwd};        my $code = $self->{s_kwd};
3200        my $l = $self->{line_prev};        my $l = $self->{line_prev};
3201        my $c = $self->{column_prev};        my $c = $self->{column_prev};
3202        if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {        if ($charref_map->{$code}) {
3203          !!!cp (1015);          !!!cp (1015);
3204          !!!parse-error (type => 'invalid character reference',          !!!parse-error (type => 'invalid character reference',
3205                          text => (sprintf 'U+%04X', $code),                          text => (sprintf 'U+%04X', $code),
3206                          line => $l, column => $c);                          line => $l, column => $c);
3207          $code = 0xFFFD;          $code = $charref_map->{$code};
3208        } elsif ($code > 0x10FFFF) {        } elsif ($code > 0x10FFFF) {
3209          !!!cp (1016);          !!!cp (1016);
3210          !!!parse-error (type => 'invalid character reference',          !!!parse-error (type => 'invalid character reference',
3211                          text => (sprintf 'U-%08X', $code),                          text => (sprintf 'U-%08X', $code),
3212                          line => $l, column => $c);                          line => $l, column => $c);
3213          $code = 0xFFFD;          $code = 0xFFFD;
       } elsif ($code == 0x000D) {  
         !!!cp (1017);  
         !!!parse-error (type => 'CR character reference',  
                         line => $l, column => $c);  
         $code = 0x000A;  
       } elsif (0x80 <= $code and $code <= 0x9F) {  
         !!!cp (1018);  
         !!!parse-error (type => 'C1 character reference',  
                         text => (sprintf 'U+%04X', $code),  
                         line => $l, column => $c);  
         $code = $c1_entity_char->{$code};  
3214        }        }
3215    
3216        if ($self->{prev_state} == DATA_STATE) {        if ($self->{prev_state} == DATA_STATE) {
# Line 3297  sub _get_next_token ($) { Line 3307  sub _get_next_token ($) {
3307        my $code = $self->{s_kwd};        my $code = $self->{s_kwd};
3308        my $l = $self->{line_prev};        my $l = $self->{line_prev};
3309        my $c = $self->{column_prev};        my $c = $self->{column_prev};
3310        if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {        if ($charref_map->{$code}) {
3311          !!!cp (1008);          !!!cp (1008);
3312          !!!parse-error (type => 'invalid character reference',          !!!parse-error (type => 'invalid character reference',
3313                          text => (sprintf 'U+%04X', $code),                          text => (sprintf 'U+%04X', $code),
3314                          line => $l, column => $c);                          line => $l, column => $c);
3315          $code = 0xFFFD;          $code = $charref_map->{$code};
3316        } elsif ($code > 0x10FFFF) {        } elsif ($code > 0x10FFFF) {
3317          !!!cp (1009);          !!!cp (1009);
3318          !!!parse-error (type => 'invalid character reference',          !!!parse-error (type => 'invalid character reference',
3319                          text => (sprintf 'U-%08X', $code),                          text => (sprintf 'U-%08X', $code),
3320                          line => $l, column => $c);                          line => $l, column => $c);
3321          $code = 0xFFFD;          $code = 0xFFFD;
       } elsif ($code == 0x000D) {  
         !!!cp (1010);  
         !!!parse-error (type => 'CR character reference', line => $l, column => $c);  
         $code = 0x000A;  
       } elsif (0x80 <= $code and $code <= 0x9F) {  
         !!!cp (1011);  
         !!!parse-error (type => 'C1 character reference', text => (sprintf 'U+%04X', $code), line => $l, column => $c);  
         $code = $c1_entity_char->{$code};  
3322        }        }
3323    
3324        if ($self->{prev_state} == DATA_STATE) {        if ($self->{prev_state} == DATA_STATE) {
# Line 3657  sub _tree_construction_initial ($) { Line 3659  sub _tree_construction_initial ($) {
3659        !!!ack-later;        !!!ack-later;
3660        return;        return;
3661      } elsif ($token->{type} == CHARACTER_TOKEN) {      } elsif ($token->{type} == CHARACTER_TOKEN) {
3662        if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D        if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
3663          ## Ignore the token          ## Ignore the token
3664    
3665          unless (length $token->{data}) {          unless (length $token->{data}) {
# Line 3714  sub _tree_construction_root_element ($) Line 3716  sub _tree_construction_root_element ($)
3716          !!!next-token;          !!!next-token;
3717          redo B;          redo B;
3718        } elsif ($token->{type} == CHARACTER_TOKEN) {        } elsif ($token->{type} == CHARACTER_TOKEN) {
3719          if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D          if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
3720            ## Ignore the token.            ## Ignore the token.
3721    
3722            unless (length $token->{data}) {            unless (length $token->{data}) {
# Line 4528  sub _tree_construction_main ($) { Line 4530  sub _tree_construction_main ($) {
4530    
4531      if ($self->{insertion_mode} & HEAD_IMS) {      if ($self->{insertion_mode} & HEAD_IMS) {
4532        if ($token->{type} == CHARACTER_TOKEN) {        if ($token->{type} == CHARACTER_TOKEN) {
4533          if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {          if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
4534            unless ($self->{insertion_mode} == BEFORE_HEAD_IM) {            unless ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4535              !!!cp ('t88.2');              !!!cp ('t88.2');
4536              $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);              $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
# Line 4707  sub _tree_construction_main ($) { Line 4709  sub _tree_construction_main ($) {
4709                  } elsif ($token->{attributes}->{content}) {                  } elsif ($token->{attributes}->{content}) {
4710                    if ($token->{attributes}->{content}->{value}                    if ($token->{attributes}->{content}->{value}
4711                        =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]                        =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
4712                            [\x09-\x0D\x20]*=                            [\x09\x0A\x0C\x0D\x20]*=
4713                            [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|                            [\x09\x0A\x0C\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
4714                            ([^"'\x09-\x0D\x20][^\x09-\x0D\x20\x3B]*))/x) {                            ([^"'\x09\x0A\x0C\x0D\x20]
4715                               [^\x09\x0A\x0C\x0D\x20\x3B]*))/x) {
4716                      !!!cp ('t107');                      !!!cp ('t107');
4717                      ## NOTE: Whether the encoding is supported or not is handled                      ## NOTE: Whether the encoding is supported or not is handled
4718                      ## in the {change_encoding} callback.                      ## in the {change_encoding} callback.
# Line 5518  sub _tree_construction_main ($) { Line 5521  sub _tree_construction_main ($) {
5521      } elsif ($self->{insertion_mode} & TABLE_IMS) {      } elsif ($self->{insertion_mode} & TABLE_IMS) {
5522        if ($token->{type} == CHARACTER_TOKEN) {        if ($token->{type} == CHARACTER_TOKEN) {
5523          if (not $open_tables->[-1]->[1] and # tainted          if (not $open_tables->[-1]->[1] and # tainted
5524              $token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {              $token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
5525            $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);            $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5526                                
5527            unless (length $token->{data}) {            unless (length $token->{data}) {
# Line 6202  sub _tree_construction_main ($) { Line 6205  sub _tree_construction_main ($) {
6205        }        }
6206      } elsif ($self->{insertion_mode} == IN_COLUMN_GROUP_IM) {      } elsif ($self->{insertion_mode} == IN_COLUMN_GROUP_IM) {
6207            if ($token->{type} == CHARACTER_TOKEN) {            if ($token->{type} == CHARACTER_TOKEN) {
6208              if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {              if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
6209                $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);                $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
6210                unless (length $token->{data}) {                unless (length $token->{data}) {
6211                  !!!cp ('t260');                  !!!cp ('t260');
# Line 6543  sub _tree_construction_main ($) { Line 6546  sub _tree_construction_main ($) {
6546        }        }
6547      } elsif ($self->{insertion_mode} & BODY_AFTER_IMS) {      } elsif ($self->{insertion_mode} & BODY_AFTER_IMS) {
6548        if ($token->{type} == CHARACTER_TOKEN) {        if ($token->{type} == CHARACTER_TOKEN) {
6549          if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {          if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
6550            my $data = $1;            my $data = $1;
6551            ## As if in body            ## As if in body
6552            $reconstruct_active_formatting_elements->($insert_to_current);            $reconstruct_active_formatting_elements->($insert_to_current);
# Line 6560  sub _tree_construction_main ($) { Line 6563  sub _tree_construction_main ($) {
6563          if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {          if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
6564            !!!cp ('t301');            !!!cp ('t301');
6565            !!!parse-error (type => 'after html:#text', token => $token);            !!!parse-error (type => 'after html:#text', token => $token);
6566              #
           ## Reprocess in the "after body" insertion mode.  
6567          } else {          } else {
6568            !!!cp ('t302');            !!!cp ('t302');
6569              ## "after body" insertion mode
6570              !!!parse-error (type => 'after body:#text', token => $token);
6571              #
6572          }          }
           
         ## "after body" insertion mode  
         !!!parse-error (type => 'after body:#text', token => $token);  
6573    
6574          $self->{insertion_mode} = IN_BODY_IM;          $self->{insertion_mode} = IN_BODY_IM;
6575          ## reprocess          ## reprocess
# Line 6577  sub _tree_construction_main ($) { Line 6579  sub _tree_construction_main ($) {
6579            !!!cp ('t303');            !!!cp ('t303');
6580            !!!parse-error (type => 'after html',            !!!parse-error (type => 'after html',
6581                            text => $token->{tag_name}, token => $token);                            text => $token->{tag_name}, token => $token);
6582                        #
           ## Reprocess in the "after body" insertion mode.  
6583          } else {          } else {
6584            !!!cp ('t304');            !!!cp ('t304');
6585              ## "after body" insertion mode
6586              !!!parse-error (type => 'after body',
6587                              text => $token->{tag_name}, token => $token);
6588              #
6589          }          }
6590    
         ## "after body" insertion mode  
         !!!parse-error (type => 'after body',  
                         text => $token->{tag_name}, token => $token);  
   
6591          $self->{insertion_mode} = IN_BODY_IM;          $self->{insertion_mode} = IN_BODY_IM;
6592          !!!ack-later;          !!!ack-later;
6593          ## reprocess          ## reprocess
# Line 6597  sub _tree_construction_main ($) { Line 6598  sub _tree_construction_main ($) {
6598            !!!parse-error (type => 'after html:/',            !!!parse-error (type => 'after html:/',
6599                            text => $token->{tag_name}, token => $token);                            text => $token->{tag_name}, token => $token);
6600                        
6601            $self->{insertion_mode} = AFTER_BODY_IM;            $self->{insertion_mode} = IN_BODY_IM;
6602            ## Reprocess in the "after body" insertion mode.            ## Reprocess.
6603              next B;
6604          } else {          } else {
6605            !!!cp ('t306');            !!!cp ('t306');
6606          }          }
# Line 6636  sub _tree_construction_main ($) { Line 6638  sub _tree_construction_main ($) {
6638        }        }
6639      } elsif ($self->{insertion_mode} & FRAME_IMS) {      } elsif ($self->{insertion_mode} & FRAME_IMS) {
6640        if ($token->{type} == CHARACTER_TOKEN) {        if ($token->{type} == CHARACTER_TOKEN) {
6641          if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {          if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
6642            $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);            $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
6643                        
6644            unless (length $token->{data}) {            unless (length $token->{data}) {
# Line 6646  sub _tree_construction_main ($) { Line 6648  sub _tree_construction_main ($) {
6648            }            }
6649          }          }
6650                    
6651          if ($token->{data} =~ s/^[^\x09\x0A\x0B\x0C\x20]+//) {          if ($token->{data} =~ s/^[^\x09\x0A\x0C\x20]+//) {
6652            if ($self->{insertion_mode} == IN_FRAMESET_IM) {            if ($self->{insertion_mode} == IN_FRAMESET_IM) {
6653              !!!cp ('t311');              !!!cp ('t311');
6654              !!!parse-error (type => 'in frameset:#text', token => $token);              !!!parse-error (type => 'in frameset:#text', token => $token);
# Line 6823  sub _tree_construction_main ($) { Line 6825  sub _tree_construction_main ($) {
6825            } elsif ($token->{attributes}->{content}) {            } elsif ($token->{attributes}->{content}) {
6826              if ($token->{attributes}->{content}->{value}              if ($token->{attributes}->{content}->{value}
6827                  =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]                  =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
6828                      [\x09-\x0D\x20]*=                      [\x09\x0A\x0C\x0D\x20]*=
6829                      [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|                      [\x09\x0A\x0C\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
6830                      ([^"'\x09-\x0D\x20][^\x09-\x0D\x20\x3B]*))/x) {                      ([^"'\x09\x0A\x0C\x0D\x20][^\x09\x0A\x0C\x0D\x20\x3B]*))
6831                       /x) {
6832                !!!cp ('t336');                !!!cp ('t336');
6833                ## NOTE: Whether the encoding is supported or not is handled                ## NOTE: Whether the encoding is supported or not is handled
6834                ## in the {change_encoding} callback.                ## in the {change_encoding} callback.
# Line 6970  sub _tree_construction_main ($) { Line 6973  sub _tree_construction_main ($) {
6973              last INSCOPE;              last INSCOPE;
6974            }            }
6975          } # INSCOPE          } # INSCOPE
6976    
6977            ## NOTE: Special, Scope (<li><foo><li> == <li><foo><li/></foo></li>)
6978              ## Interpreted as <li><foo/></li><li/> (non-conforming)
6979              ## blockquote (O9.27), center (O), dd (Fx3, O, S3.1.2, IE7),
6980              ## dt (Fx, O, S, IE), dl (O), fieldset (O, S, IE), form (Fx, O, S),
6981              ## hn (O), pre (O), applet (O, S), button (O, S), marquee (Fx, O, S),
6982              ## object (Fx)
6983              ## Generate non-tree (non-conforming)
6984              ## basefont (IE7 (where basefont is non-void)), center (IE),
6985              ## form (IE), hn (IE)
6986            ## address, div, p (<li><foo><li> == <li><foo/></li><li/>)
6987              ## Interpreted as <li><foo><li/></foo></li> (non-conforming)
6988              ## div (Fx, S)
6989                        
6990          ## Step 1          ## Step 1
6991          my $i = -1;          my $i = -1;
# Line 7350  sub _tree_construction_main ($) { Line 7366  sub _tree_construction_main ($) {
7366            !!!nack ('t380.1');            !!!nack ('t380.1');
7367          } elsif ({          } elsif ({
7368                    b => 1, big => 1, em => 1, font => 1, i => 1,                    b => 1, big => 1, em => 1, font => 1, i => 1,
7369                    s => 1, small => 1, strile => 1,                    s => 1, small => 1, strike => 1,
7370                    strong => 1, tt => 1, u => 1,                    strong => 1, tt => 1, u => 1,
7371                   }->{$token->{tag_name}}) {                   }->{$token->{tag_name}}) {
7372            !!!cp ('t375');            !!!cp ('t375');
# Line 7655  sub _tree_construction_main ($) { Line 7671  sub _tree_construction_main ($) {
7671        } elsif ({        } elsif ({
7672                  a => 1,                  a => 1,
7673                  b => 1, big => 1, em => 1, font => 1, i => 1,                  b => 1, big => 1, em => 1, font => 1, i => 1,
7674                  nobr => 1, s => 1, small => 1, strile => 1,                  nobr => 1, s => 1, small => 1, strike => 1,
7675                  strong => 1, tt => 1, u => 1,                  strong => 1, tt => 1, u => 1,
7676                 }->{$token->{tag_name}}) {                 }->{$token->{tag_name}}) {
7677          !!!cp ('t427');          !!!cp ('t427');
# Line 7746  sub _tree_construction_main ($) { Line 7762  sub _tree_construction_main ($) {
7762                ## Ignore the token                ## Ignore the token
7763                !!!next-token;                !!!next-token;
7764                last S2;                last S2;
             }  
7765    
7766                  ## NOTE: |<span><dd></span>a|: In Safari 3.1.2 and Opera
7767                  ## 9.27, "a" is a child of <dd> (conforming).  In
7768                  ## Firefox 3.0.2, "a" is a child of <body>.  In WinIE 7,
7769                  ## "a" is a child of both <body> and <dd>.
7770                }
7771                
7772              !!!cp ('t434');              !!!cp ('t434');
7773            }            }
7774                        

Legend:
Removed from v.1.184  
changed lines
  Added in v.1.193

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24