/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src

Diff of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory | Revision Log | View Patch Patch

-revision 1.163 by wakaba,
Sat Sep 13 04:19:56 2008 UTC
+revision 1.164 by wakaba,
Sat Sep 13 06:33:39 2008 UTC
 Line 804 
 sub BOGUS_DOCTYPE_STATE () { 32 }
  sub AFTER_ATTRIBUTE_VALUE_QUOTED_STATE () { 33 }
  sub SELF_CLOSING_START_TAG_STATE () { 34 }
  sub CDATA_BLOCK_STATE () { 35 }
- sub MD_HYPHEN_STATE () { 36 }
+ sub MD_HYPHEN_STATE () { 36 } # "markup declaration open state" in the spec
- sub MD_DOCTYPE_STATE () { 37 }
+ sub MD_DOCTYPE_STATE () { 37 } # "markup declaration open state" in the spec
- sub MD_CDATA_STATE () { 38 }
+ sub MD_CDATA_STATE () { 38 } # "markup declaration open state" in the spec
+ sub CDATA_PCDATA_CLOSE_TAG_STATE () { 39 } # "close tag open state" in the spec
  sub DOCTYPE_TOKEN () { 1 }
  sub COMMENT_TOKEN () { 2 }
-Line 1122 
 sub _get_next_token ($) {
+Line 1123 
 sub _get_next_token ($) {
          die "$0: $self->{content_model} in tag open";
        }
      } elsif ($self->{state} == CLOSE_TAG_OPEN_STATE) {
+       ## NOTE: The "close tag open state" in the spec is implemented as
+       ## |CLOSE_TAG_OPEN_STATE| and |CDATA_PCDATA_CLOSE_TAG_STATE|.
        my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"
        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
          if (defined $self->{last_emitted_start_tag_name}) {
+           $self->{state} = CDATA_PCDATA_CLOSE_TAG_STATE;
-           ## NOTE: <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>
+           $self->{state_keyword} = '';
-           my @next_char;
+           ## Reconsume.
-           TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
+           redo A;
-             push @next_char, $self->{next_char};
-             my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
-             my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
-             if ($self->{next_char} == $c or $self->{next_char} == $C) {
-               !!!cp (24);
-               !!!next-input-character;
-               next TAGNAME;
-             } else {
-               !!!cp (25);
-               $self->{next_char} = shift @next_char; # reconsume
-               !!!back-next-input-character (@next_char);
-               $self->{state} = DATA_STATE;
-               !!!emit ({type => CHARACTER_TOKEN, data => '</',
-                         line => $l, column => $c,
-                        });
-               redo A;
-             }
-           }
-           push @next_char, $self->{next_char};
-           unless ($self->{next_char} == 0x0009 or # HT
-                   $self->{next_char} == 0x000A or # LF
-                   $self->{next_char} == 0x000B or # VT
-                   $self->{next_char} == 0x000C or # FF
-                   $self->{next_char} == 0x0020 or # SP
-                   $self->{next_char} == 0x003E or # >
-                   $self->{next_char} == 0x002F or # /
-                   $self->{next_char} == -1) {
-             !!!cp (26);
-             $self->{next_char} = shift @next_char; # reconsume
-             !!!back-next-input-character (@next_char);
-             $self->{state} = DATA_STATE;
-             !!!emit ({type => CHARACTER_TOKEN, data => '</',
-                       line => $l, column => $c,
-                      });
-             redo A;
-           } else {
-             !!!cp (27);
-             $self->{next_char} = shift @next_char;
-             !!!back-next-input-character (@next_char);
-             # and consume...
-           }
          } else {
            ## No start tag token has ever been emitted
+           ## NOTE: See <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>.
            !!!cp (28);
-           # next-input-character is already done
            $self->{state} = DATA_STATE;
+           ## Reconsume.
            !!!emit ({type => CHARACTER_TOKEN, data => '</',
                      line => $l, column => $c,
                     });
            redo A;
          }
        }
        if (0x0041 <= $self->{next_char} and
            $self->{next_char} <= 0x005A) { # A..Z
          !!!cp (29);
-Line 1231 
 sub _get_next_token ($) {
+Line 1192 
 sub _get_next_token ($) {
                                    line => $self->{line_prev}, # "<" of "</"
                                    column => $self->{column_prev} - 1,
                                   };
-         ## $self->{next_char} is intentionally left as is
+         ## NOTE: $self->{next_char} is intentionally left as is.
+         ## Although the "anything else" case of the spec not explicitly
+         ## states that the next input character is to be reconsumed,
+         ## it will be included to the |data| of the comment token
+         ## generated from the bogus end tag, as defined in the
+         ## "bogus comment state" entry.
          redo A;
        }
+     } elsif ($self->{state} == CDATA_PCDATA_CLOSE_TAG_STATE) {
+       my $ch = substr $self->{last_emitted_start_tag_name}, length $self->{state_keyword}, 1;
+       if (length $ch) {
+         my $CH = $ch;
+         $ch =~ tr/a-z/A-Z/;
+         my $nch = chr $self->{next_char};
+         if ($nch eq $ch or $nch eq $CH) {
+           !!!cp (24);
+           ## Stay in the state.
+           $self->{state_keyword} .= $nch;
+           !!!next-input-character;
+           redo A;
+         } else {
+           !!!cp (25);
+           $self->{state} = DATA_STATE;
+           ## Reconsume.
+           !!!emit ({type => CHARACTER_TOKEN,
+                     data => '</' . $self->{state_keyword},
+                     line => $self->{line_prev},
+                     column => $self->{column_prev} - 1 - length $self->{state_keyword},
+                    });
+           redo A;
+         }
+       } else { # after "<{tag-name}"
+         unless ({
+x0009 => 1, # HT
+x000A => 1, # LF
+x000B => 1, # VT
+x000C => 1, # FF
+x0020 => 1, # SP
+x003E => 1, # >
+x002F => 1, # /
+                  -1 => 1, # EOF
+                 }->{$self->{next_char}}) {
+           !!!cp (26);
+           ## Reconsume.
+           $self->{state} = DATA_STATE;
+           !!!emit ({type => CHARACTER_TOKEN,
+                     data => '</' . $self->{state_keyword},
+                     line => $self->{line_prev},
+                     column => $self->{column_prev} - 1 - length $self->{state_keyword},
+                    });
+           redo A;
+         } else {
+           !!!cp (27);
+           $self->{current_token}
+               = {type => END_TAG_TOKEN,
+                  tag_name => $self->{last_emitted_start_tag_name},
+                  line => $self->{line_prev},
+                  column => $self->{column_prev} - 1 - length $self->{state_keyword}};
+           $self->{state} = TAG_NAME_STATE;
+           ## Reconsume.
+           redo A;
+         }
+       }
      } elsif ($self->{state} == TAG_NAME_STATE) {
        if ($self->{next_char} == 0x0009 or # HT
            $self->{next_char} == 0x000A or # LF

 Legend:



Removed from v.1.163
 


changed lines


 
Added in v.1.164
 Legend:



Removed from v.1.163
 


changed lines


 
Added in v.1.164
-Removed from v.1.163
+Added in v.1.164

admin@suikawiki.org	ViewVC Help
Powered by ViewVC 1.1.24