/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Diff of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.163 by wakaba, Sat Sep 13 04:19:56 2008 UTC revision 1.164 by wakaba, Sat Sep 13 06:33:39 2008 UTC
# Line 804  sub BOGUS_DOCTYPE_STATE () { 32 } Line 804  sub BOGUS_DOCTYPE_STATE () { 32 }
804  sub AFTER_ATTRIBUTE_VALUE_QUOTED_STATE () { 33 }  sub AFTER_ATTRIBUTE_VALUE_QUOTED_STATE () { 33 }
805  sub SELF_CLOSING_START_TAG_STATE () { 34 }  sub SELF_CLOSING_START_TAG_STATE () { 34 }
806  sub CDATA_BLOCK_STATE () { 35 }  sub CDATA_BLOCK_STATE () { 35 }
807  sub MD_HYPHEN_STATE () { 36 }  sub MD_HYPHEN_STATE () { 36 } # "markup declaration open state" in the spec
808  sub MD_DOCTYPE_STATE () { 37 }  sub MD_DOCTYPE_STATE () { 37 } # "markup declaration open state" in the spec
809  sub MD_CDATA_STATE () { 38 }  sub MD_CDATA_STATE () { 38 } # "markup declaration open state" in the spec
810    sub CDATA_PCDATA_CLOSE_TAG_STATE () { 39 } # "close tag open state" in the spec
811    
812  sub DOCTYPE_TOKEN () { 1 }  sub DOCTYPE_TOKEN () { 1 }
813  sub COMMENT_TOKEN () { 2 }  sub COMMENT_TOKEN () { 2 }
# Line 1122  sub _get_next_token ($) { Line 1123  sub _get_next_token ($) {
1123          die "$0: $self->{content_model} in tag open";          die "$0: $self->{content_model} in tag open";
1124        }        }
1125      } elsif ($self->{state} == CLOSE_TAG_OPEN_STATE) {      } elsif ($self->{state} == CLOSE_TAG_OPEN_STATE) {
1126          ## NOTE: The "close tag open state" in the spec is implemented as
1127          ## |CLOSE_TAG_OPEN_STATE| and |CDATA_PCDATA_CLOSE_TAG_STATE|.
1128    
1129        my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"        my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"
1130        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
1131          if (defined $self->{last_emitted_start_tag_name}) {          if (defined $self->{last_emitted_start_tag_name}) {
1132              $self->{state} = CDATA_PCDATA_CLOSE_TAG_STATE;
1133            ## NOTE: <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>            $self->{state_keyword} = '';
1134            my @next_char;            ## Reconsume.
1135            TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {            redo A;
             push @next_char, $self->{next_char};  
             my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);  
             my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;  
             if ($self->{next_char} == $c or $self->{next_char} == $C) {  
               !!!cp (24);  
               !!!next-input-character;  
               next TAGNAME;  
             } else {  
               !!!cp (25);  
               $self->{next_char} = shift @next_char; # reconsume  
               !!!back-next-input-character (@next_char);  
               $self->{state} = DATA_STATE;  
   
               !!!emit ({type => CHARACTER_TOKEN, data => '</',  
                         line => $l, column => $c,  
                        });  
     
               redo A;  
             }  
           }  
           push @next_char, $self->{next_char};  
         
           unless ($self->{next_char} == 0x0009 or # HT  
                   $self->{next_char} == 0x000A or # LF  
                   $self->{next_char} == 0x000B or # VT  
                   $self->{next_char} == 0x000C or # FF  
                   $self->{next_char} == 0x0020 or # SP  
                   $self->{next_char} == 0x003E or # >  
                   $self->{next_char} == 0x002F or # /  
                   $self->{next_char} == -1) {  
             !!!cp (26);  
             $self->{next_char} = shift @next_char; # reconsume  
             !!!back-next-input-character (@next_char);  
             $self->{state} = DATA_STATE;  
             !!!emit ({type => CHARACTER_TOKEN, data => '</',  
                       line => $l, column => $c,  
                      });  
             redo A;  
           } else {  
             !!!cp (27);  
             $self->{next_char} = shift @next_char;  
             !!!back-next-input-character (@next_char);  
             # and consume...  
           }  
1136          } else {          } else {
1137            ## No start tag token has ever been emitted            ## No start tag token has ever been emitted
1138              ## NOTE: See <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>.
1139            !!!cp (28);            !!!cp (28);
           # next-input-character is already done  
1140            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
1141              ## Reconsume.
1142            !!!emit ({type => CHARACTER_TOKEN, data => '</',            !!!emit ({type => CHARACTER_TOKEN, data => '</',
1143                      line => $l, column => $c,                      line => $l, column => $c,
1144                     });                     });
1145            redo A;            redo A;
1146          }          }
1147        }        }
1148          
1149        if (0x0041 <= $self->{next_char} and        if (0x0041 <= $self->{next_char} and
1150            $self->{next_char} <= 0x005A) { # A..Z            $self->{next_char} <= 0x005A) { # A..Z
1151          !!!cp (29);          !!!cp (29);
# Line 1231  sub _get_next_token ($) { Line 1192  sub _get_next_token ($) {
1192                                    line => $self->{line_prev}, # "<" of "</"                                    line => $self->{line_prev}, # "<" of "</"
1193                                    column => $self->{column_prev} - 1,                                    column => $self->{column_prev} - 1,
1194                                   };                                   };
1195          ## $self->{next_char} is intentionally left as is          ## NOTE: $self->{next_char} is intentionally left as is.
1196            ## Although the "anything else" case of the spec not explicitly
1197            ## states that the next input character is to be reconsumed,
1198            ## it will be included to the |data| of the comment token
1199            ## generated from the bogus end tag, as defined in the
1200            ## "bogus comment state" entry.
1201          redo A;          redo A;
1202        }        }
1203        } elsif ($self->{state} == CDATA_PCDATA_CLOSE_TAG_STATE) {
1204          my $ch = substr $self->{last_emitted_start_tag_name}, length $self->{state_keyword}, 1;
1205          if (length $ch) {
1206            my $CH = $ch;
1207            $ch =~ tr/a-z/A-Z/;
1208            my $nch = chr $self->{next_char};
1209            if ($nch eq $ch or $nch eq $CH) {
1210              !!!cp (24);
1211              ## Stay in the state.
1212              $self->{state_keyword} .= $nch;
1213              !!!next-input-character;
1214              redo A;
1215            } else {
1216              !!!cp (25);
1217              $self->{state} = DATA_STATE;
1218              ## Reconsume.
1219              !!!emit ({type => CHARACTER_TOKEN,
1220                        data => '</' . $self->{state_keyword},
1221                        line => $self->{line_prev},
1222                        column => $self->{column_prev} - 1 - length $self->{state_keyword},
1223                       });
1224              redo A;
1225            }
1226          } else { # after "<{tag-name}"
1227            unless ({
1228                     0x0009 => 1, # HT
1229                     0x000A => 1, # LF
1230                     0x000B => 1, # VT
1231                     0x000C => 1, # FF
1232                     0x0020 => 1, # SP
1233                     0x003E => 1, # >
1234                     0x002F => 1, # /
1235                     -1 => 1, # EOF
1236                    }->{$self->{next_char}}) {
1237              !!!cp (26);
1238              ## Reconsume.
1239              $self->{state} = DATA_STATE;
1240              !!!emit ({type => CHARACTER_TOKEN,
1241                        data => '</' . $self->{state_keyword},
1242                        line => $self->{line_prev},
1243                        column => $self->{column_prev} - 1 - length $self->{state_keyword},
1244                       });
1245              redo A;
1246            } else {
1247              !!!cp (27);
1248              $self->{current_token}
1249                  = {type => END_TAG_TOKEN,
1250                     tag_name => $self->{last_emitted_start_tag_name},
1251                     line => $self->{line_prev},
1252                     column => $self->{column_prev} - 1 - length $self->{state_keyword}};
1253              $self->{state} = TAG_NAME_STATE;
1254              ## Reconsume.
1255              redo A;
1256            }
1257          }
1258      } elsif ($self->{state} == TAG_NAME_STATE) {      } elsif ($self->{state} == TAG_NAME_STATE) {
1259        if ($self->{next_char} == 0x0009 or # HT        if ($self->{next_char} == 0x0009 or # HT
1260            $self->{next_char} == 0x000A or # LF            $self->{next_char} == 0x000A or # LF

Legend:
Removed from v.1.163  
changed lines
  Added in v.1.164

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24