/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Diff of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.75 by wakaba, Mon Mar 3 00:13:22 2008 UTC revision 1.76 by wakaba, Mon Mar 3 09:17:10 2008 UTC
# Line 179  sub parse_string ($$$;$) { Line 179  sub parse_string ($$$;$) {
179    my $i = 0;    my $i = 0;
180    my $line = 1;    my $line = 1;
181    my $column = 0;    my $column = 0;
182    $self->{set_next_input_character} = sub {    $self->{set_next_char} = sub {
183      my $self = shift;      my $self = shift;
184    
185      pop @{$self->{prev_input_character}};      pop @{$self->{prev_char}};
186      unshift @{$self->{prev_input_character}}, $self->{next_input_character};      unshift @{$self->{prev_char}}, $self->{next_char};
187    
188      $self->{next_input_character} = -1 and return if $i >= length $$s;      $self->{next_char} = -1 and return if $i >= length $$s;
189      $self->{next_input_character} = ord substr $$s, $i++, 1;      $self->{next_char} = ord substr $$s, $i++, 1;
190      $column++;      $column++;
191            
192      if ($self->{next_input_character} == 0x000A) { # LF      if ($self->{next_char} == 0x000A) { # LF
193        $line++;        $line++;
194        $column = 0;        $column = 0;
195      } elsif ($self->{next_input_character} == 0x000D) { # CR      } elsif ($self->{next_char} == 0x000D) { # CR
196        $i++ if substr ($$s, $i, 1) eq "\x0A";        $i++ if substr ($$s, $i, 1) eq "\x0A";
197        $self->{next_input_character} = 0x000A; # LF # MUST        $self->{next_char} = 0x000A; # LF # MUST
198        $line++;        $line++;
199        $column = 0;        $column = 0;
200      } elsif ($self->{next_input_character} > 0x10FFFF) {      } elsif ($self->{next_char} > 0x10FFFF) {
201        $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST        $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
202      } elsif ($self->{next_input_character} == 0x0000) { # NULL      } elsif ($self->{next_char} == 0x0000) { # NULL
203        !!!parse-error (type => 'NULL');        !!!parse-error (type => 'NULL');
204        $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST        $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
205      }      }
206    };    };
207    $self->{prev_input_character} = [-1, -1, -1];    $self->{prev_char} = [-1, -1, -1];
208    $self->{next_input_character} = -1;    $self->{next_char} = -1;
209    
210    my $onerror = $_[2] || sub {    my $onerror = $_[2] || sub {
211      my (%opt) = @_;      my (%opt) = @_;
# Line 226  sub parse_string ($$$;$) { Line 226  sub parse_string ($$$;$) {
226  sub new ($) {  sub new ($) {
227    my $class = shift;    my $class = shift;
228    my $self = bless {}, $class;    my $self = bless {}, $class;
229    $self->{set_next_input_character} = sub {    $self->{set_next_char} = sub {
230      $self->{next_input_character} = -1;      $self->{next_char} = -1;
231    };    };
232    $self->{parse_error} = sub {    $self->{parse_error} = sub {
233      #      #
# Line 333  sub _initialize_tokenizer ($) { Line 333  sub _initialize_tokenizer ($) {
333    undef $self->{last_emitted_start_tag_name};    undef $self->{last_emitted_start_tag_name};
334    undef $self->{last_attribute_value_state};    undef $self->{last_attribute_value_state};
335    $self->{char} = [];    $self->{char} = [];
336    # $self->{next_input_character}    # $self->{next_char}
337    !!!next-input-character;    !!!next-input-character;
338    $self->{token} = [];    $self->{token} = [];
339    # $self->{escape}    # $self->{escape}
# Line 384  sub _get_next_token ($) { Line 384  sub _get_next_token ($) {
384    
385    A: {    A: {
386      if ($self->{state} == DATA_STATE) {      if ($self->{state} == DATA_STATE) {
387        if ($self->{next_input_character} == 0x0026) { # &        if ($self->{next_char} == 0x0026) { # &
388          if ($self->{content_model} & CM_ENTITY and # PCDATA | RCDATA          if ($self->{content_model} & CM_ENTITY and # PCDATA | RCDATA
389              not $self->{escape}) {              not $self->{escape}) {
390            $self->{state} = ENTITY_DATA_STATE;            $self->{state} = ENTITY_DATA_STATE;
# Line 393  sub _get_next_token ($) { Line 393  sub _get_next_token ($) {
393          } else {          } else {
394            #            #
395          }          }
396        } elsif ($self->{next_input_character} == 0x002D) { # -        } elsif ($self->{next_char} == 0x002D) { # -
397          if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA          if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
398            unless ($self->{escape}) {            unless ($self->{escape}) {
399              if ($self->{prev_input_character}->[0] == 0x002D and # -              if ($self->{prev_char}->[0] == 0x002D and # -
400                  $self->{prev_input_character}->[1] == 0x0021 and # !                  $self->{prev_char}->[1] == 0x0021 and # !
401                  $self->{prev_input_character}->[2] == 0x003C) { # <                  $self->{prev_char}->[2] == 0x003C) { # <
402                $self->{escape} = 1;                $self->{escape} = 1;
403              }              }
404            }            }
405          }          }
406                    
407          #          #
408        } elsif ($self->{next_input_character} == 0x003C) { # <        } elsif ($self->{next_char} == 0x003C) { # <
409          if ($self->{content_model} & CM_FULL_MARKUP or # PCDATA          if ($self->{content_model} & CM_FULL_MARKUP or # PCDATA
410              (($self->{content_model} & CM_LIMITED_MARKUP) and # CDATA | RCDATA              (($self->{content_model} & CM_LIMITED_MARKUP) and # CDATA | RCDATA
411               not $self->{escape})) {               not $self->{escape})) {
# Line 415  sub _get_next_token ($) { Line 415  sub _get_next_token ($) {
415          } else {          } else {
416            #            #
417          }          }
418        } elsif ($self->{next_input_character} == 0x003E) { # >        } elsif ($self->{next_char} == 0x003E) { # >
419          if ($self->{escape} and          if ($self->{escape} and
420              ($self->{content_model} & CM_LIMITED_MARKUP)) { # RCDATA | CDATA              ($self->{content_model} & CM_LIMITED_MARKUP)) { # RCDATA | CDATA
421            if ($self->{prev_input_character}->[0] == 0x002D and # -            if ($self->{prev_char}->[0] == 0x002D and # -
422                $self->{prev_input_character}->[1] == 0x002D) { # -                $self->{prev_char}->[1] == 0x002D) { # -
423              delete $self->{escape};              delete $self->{escape};
424            }            }
425          }          }
426                    
427          #          #
428        } elsif ($self->{next_input_character} == -1) {        } elsif ($self->{next_char} == -1) {
429          !!!emit ({type => END_OF_FILE_TOKEN});          !!!emit ({type => END_OF_FILE_TOKEN});
430          last A; ## TODO: ok?          last A; ## TODO: ok?
431        }        }
432        # Anything else        # Anything else
433        my $token = {type => CHARACTER_TOKEN,        my $token = {type => CHARACTER_TOKEN,
434                     data => chr $self->{next_input_character}};                     data => chr $self->{next_char}};
435        ## Stay in the data state        ## Stay in the data state
436        !!!next-input-character;        !!!next-input-character;
437    
# Line 455  sub _get_next_token ($) { Line 455  sub _get_next_token ($) {
455        redo A;        redo A;
456      } elsif ($self->{state} == TAG_OPEN_STATE) {      } elsif ($self->{state} == TAG_OPEN_STATE) {
457        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
458          if ($self->{next_input_character} == 0x002F) { # /          if ($self->{next_char} == 0x002F) { # /
459            !!!next-input-character;            !!!next-input-character;
460            $self->{state} = CLOSE_TAG_OPEN_STATE;            $self->{state} = CLOSE_TAG_OPEN_STATE;
461            redo A;            redo A;
# Line 468  sub _get_next_token ($) { Line 468  sub _get_next_token ($) {
468            redo A;            redo A;
469          }          }
470        } elsif ($self->{content_model} & CM_FULL_MARKUP) { # PCDATA        } elsif ($self->{content_model} & CM_FULL_MARKUP) { # PCDATA
471          if ($self->{next_input_character} == 0x0021) { # !          if ($self->{next_char} == 0x0021) { # !
472            $self->{state} = MARKUP_DECLARATION_OPEN_STATE;            $self->{state} = MARKUP_DECLARATION_OPEN_STATE;
473            !!!next-input-character;            !!!next-input-character;
474            redo A;            redo A;
475          } elsif ($self->{next_input_character} == 0x002F) { # /          } elsif ($self->{next_char} == 0x002F) { # /
476            $self->{state} = CLOSE_TAG_OPEN_STATE;            $self->{state} = CLOSE_TAG_OPEN_STATE;
477            !!!next-input-character;            !!!next-input-character;
478            redo A;            redo A;
479          } elsif (0x0041 <= $self->{next_input_character} and          } elsif (0x0041 <= $self->{next_char} and
480                   $self->{next_input_character} <= 0x005A) { # A..Z                   $self->{next_char} <= 0x005A) { # A..Z
481            $self->{current_token}            $self->{current_token}
482              = {type => START_TAG_TOKEN,              = {type => START_TAG_TOKEN,
483                 tag_name => chr ($self->{next_input_character} + 0x0020)};                 tag_name => chr ($self->{next_char} + 0x0020)};
484            $self->{state} = TAG_NAME_STATE;            $self->{state} = TAG_NAME_STATE;
485            !!!next-input-character;            !!!next-input-character;
486            redo A;            redo A;
487          } elsif (0x0061 <= $self->{next_input_character} and          } elsif (0x0061 <= $self->{next_char} and
488                   $self->{next_input_character} <= 0x007A) { # a..z                   $self->{next_char} <= 0x007A) { # a..z
489            $self->{current_token} = {type => START_TAG_TOKEN,            $self->{current_token} = {type => START_TAG_TOKEN,
490                              tag_name => chr ($self->{next_input_character})};                              tag_name => chr ($self->{next_char})};
491            $self->{state} = TAG_NAME_STATE;            $self->{state} = TAG_NAME_STATE;
492            !!!next-input-character;            !!!next-input-character;
493            redo A;            redo A;
494          } elsif ($self->{next_input_character} == 0x003E) { # >          } elsif ($self->{next_char} == 0x003E) { # >
495            !!!parse-error (type => 'empty start tag');            !!!parse-error (type => 'empty start tag');
496            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
497            !!!next-input-character;            !!!next-input-character;
# Line 499  sub _get_next_token ($) { Line 499  sub _get_next_token ($) {
499            !!!emit ({type => CHARACTER_TOKEN, data => '<>'});            !!!emit ({type => CHARACTER_TOKEN, data => '<>'});
500    
501            redo A;            redo A;
502          } elsif ($self->{next_input_character} == 0x003F) { # ?          } elsif ($self->{next_char} == 0x003F) { # ?
503            !!!parse-error (type => 'pio');            !!!parse-error (type => 'pio');
504            $self->{state} = BOGUS_COMMENT_STATE;            $self->{state} = BOGUS_COMMENT_STATE;
505            ## $self->{next_input_character} is intentionally left as is            ## $self->{next_char} is intentionally left as is
506            redo A;            redo A;
507          } else {          } else {
508            !!!parse-error (type => 'bare stago');            !!!parse-error (type => 'bare stago');
# Line 522  sub _get_next_token ($) { Line 522  sub _get_next_token ($) {
522            ## NOTE: <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>            ## NOTE: <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>
523            my @next_char;            my @next_char;
524            TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {            TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
525              push @next_char, $self->{next_input_character};              push @next_char, $self->{next_char};
526              my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);              my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
527              my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;              my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
528              if ($self->{next_input_character} == $c or $self->{next_input_character} == $C) {              if ($self->{next_char} == $c or $self->{next_char} == $C) {
529                !!!next-input-character;                !!!next-input-character;
530                next TAGNAME;                next TAGNAME;
531              } else {              } else {
532                $self->{next_input_character} = shift @next_char; # reconsume                $self->{next_char} = shift @next_char; # reconsume
533                !!!back-next-input-character (@next_char);                !!!back-next-input-character (@next_char);
534                $self->{state} = DATA_STATE;                $self->{state} = DATA_STATE;
535    
# Line 538  sub _get_next_token ($) { Line 538  sub _get_next_token ($) {
538                redo A;                redo A;
539              }              }
540            }            }
541            push @next_char, $self->{next_input_character};            push @next_char, $self->{next_char};
542                
543            unless ($self->{next_input_character} == 0x0009 or # HT            unless ($self->{next_char} == 0x0009 or # HT
544                    $self->{next_input_character} == 0x000A or # LF                    $self->{next_char} == 0x000A or # LF
545                    $self->{next_input_character} == 0x000B or # VT                    $self->{next_char} == 0x000B or # VT
546                    $self->{next_input_character} == 0x000C or # FF                    $self->{next_char} == 0x000C or # FF
547                    $self->{next_input_character} == 0x0020 or # SP                    $self->{next_char} == 0x0020 or # SP
548                    $self->{next_input_character} == 0x003E or # >                    $self->{next_char} == 0x003E or # >
549                    $self->{next_input_character} == 0x002F or # /                    $self->{next_char} == 0x002F or # /
550                    $self->{next_input_character} == -1) {                    $self->{next_char} == -1) {
551              $self->{next_input_character} = shift @next_char; # reconsume              $self->{next_char} = shift @next_char; # reconsume
552              !!!back-next-input-character (@next_char);              !!!back-next-input-character (@next_char);
553              $self->{state} = DATA_STATE;              $self->{state} = DATA_STATE;
554              !!!emit ({type => CHARACTER_TOKEN, data => '</'});              !!!emit ({type => CHARACTER_TOKEN, data => '</'});
555              redo A;              redo A;
556            } else {            } else {
557              $self->{next_input_character} = shift @next_char;              $self->{next_char} = shift @next_char;
558              !!!back-next-input-character (@next_char);              !!!back-next-input-character (@next_char);
559              # and consume...              # and consume...
560            }            }
# Line 567  sub _get_next_token ($) { Line 567  sub _get_next_token ($) {
567          }          }
568        }        }
569                
570        if (0x0041 <= $self->{next_input_character} and        if (0x0041 <= $self->{next_char} and
571            $self->{next_input_character} <= 0x005A) { # A..Z            $self->{next_char} <= 0x005A) { # A..Z
572          $self->{current_token} = {type => END_TAG_TOKEN,          $self->{current_token} = {type => END_TAG_TOKEN,
573                            tag_name => chr ($self->{next_input_character} + 0x0020)};                            tag_name => chr ($self->{next_char} + 0x0020)};
574          $self->{state} = TAG_NAME_STATE;          $self->{state} = TAG_NAME_STATE;
575          !!!next-input-character;          !!!next-input-character;
576          redo A;          redo A;
577        } elsif (0x0061 <= $self->{next_input_character} and        } elsif (0x0061 <= $self->{next_char} and
578                 $self->{next_input_character} <= 0x007A) { # a..z                 $self->{next_char} <= 0x007A) { # a..z
579          $self->{current_token} = {type => END_TAG_TOKEN,          $self->{current_token} = {type => END_TAG_TOKEN,
580                            tag_name => chr ($self->{next_input_character})};                            tag_name => chr ($self->{next_char})};
581          $self->{state} = TAG_NAME_STATE;          $self->{state} = TAG_NAME_STATE;
582          !!!next-input-character;          !!!next-input-character;
583          redo A;          redo A;
584        } elsif ($self->{next_input_character} == 0x003E) { # >        } elsif ($self->{next_char} == 0x003E) { # >
585          !!!parse-error (type => 'empty end tag');          !!!parse-error (type => 'empty end tag');
586          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
587          !!!next-input-character;          !!!next-input-character;
588          redo A;          redo A;
589        } elsif ($self->{next_input_character} == -1) {        } elsif ($self->{next_char} == -1) {
590          !!!parse-error (type => 'bare etago');          !!!parse-error (type => 'bare etago');
591          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
592          # reconsume          # reconsume
# Line 597  sub _get_next_token ($) { Line 597  sub _get_next_token ($) {
597        } else {        } else {
598          !!!parse-error (type => 'bogus end tag');          !!!parse-error (type => 'bogus end tag');
599          $self->{state} = BOGUS_COMMENT_STATE;          $self->{state} = BOGUS_COMMENT_STATE;
600          ## $self->{next_input_character} is intentionally left as is          ## $self->{next_char} is intentionally left as is
601          redo A;          redo A;
602        }        }
603      } elsif ($self->{state} == TAG_NAME_STATE) {      } elsif ($self->{state} == TAG_NAME_STATE) {
604        if ($self->{next_input_character} == 0x0009 or # HT        if ($self->{next_char} == 0x0009 or # HT
605            $self->{next_input_character} == 0x000A or # LF            $self->{next_char} == 0x000A or # LF
606            $self->{next_input_character} == 0x000B or # VT            $self->{next_char} == 0x000B or # VT
607            $self->{next_input_character} == 0x000C or # FF            $self->{next_char} == 0x000C or # FF
608            $self->{next_input_character} == 0x0020) { # SP            $self->{next_char} == 0x0020) { # SP
609          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
610          !!!next-input-character;          !!!next-input-character;
611          redo A;          redo A;
612        } elsif ($self->{next_input_character} == 0x003E) { # >        } elsif ($self->{next_char} == 0x003E) { # >
613          if ($self->{current_token}->{type} == START_TAG_TOKEN) {          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
614            $self->{current_token}->{first_start_tag}            $self->{current_token}->{first_start_tag}
615                = not defined $self->{last_emitted_start_tag_name};                = not defined $self->{last_emitted_start_tag_name};
# Line 628  sub _get_next_token ($) { Line 628  sub _get_next_token ($) {
628          !!!emit ($self->{current_token}); # start tag or end tag          !!!emit ($self->{current_token}); # start tag or end tag
629    
630          redo A;          redo A;
631        } elsif (0x0041 <= $self->{next_input_character} and        } elsif (0x0041 <= $self->{next_char} and
632                 $self->{next_input_character} <= 0x005A) { # A..Z                 $self->{next_char} <= 0x005A) { # A..Z
633          $self->{current_token}->{tag_name} .= chr ($self->{next_input_character} + 0x0020);          $self->{current_token}->{tag_name} .= chr ($self->{next_char} + 0x0020);
634            # start tag or end tag            # start tag or end tag
635          ## Stay in this state          ## Stay in this state
636          !!!next-input-character;          !!!next-input-character;
637          redo A;          redo A;
638        } elsif ($self->{next_input_character} == -1) {        } elsif ($self->{next_char} == -1) {
639          !!!parse-error (type => 'unclosed tag');          !!!parse-error (type => 'unclosed tag');
640          if ($self->{current_token}->{type} == START_TAG_TOKEN) {          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
641            $self->{current_token}->{first_start_tag}            $self->{current_token}->{first_start_tag}
# Line 655  sub _get_next_token ($) { Line 655  sub _get_next_token ($) {
655          !!!emit ($self->{current_token}); # start tag or end tag          !!!emit ($self->{current_token}); # start tag or end tag
656    
657          redo A;          redo A;
658        } elsif ($self->{next_input_character} == 0x002F) { # /        } elsif ($self->{next_char} == 0x002F) { # /
659          !!!next-input-character;          !!!next-input-character;
660          if ($self->{next_input_character} == 0x003E and # >          if ($self->{next_char} == 0x003E and # >
661              $self->{current_token}->{type} == START_TAG_TOKEN and              $self->{current_token}->{type} == START_TAG_TOKEN and
662              $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {              $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
663            # permitted slash            # permitted slash
# Line 669  sub _get_next_token ($) { Line 669  sub _get_next_token ($) {
669          # next-input-character is already done          # next-input-character is already done
670          redo A;          redo A;
671        } else {        } else {
672          $self->{current_token}->{tag_name} .= chr $self->{next_input_character};          $self->{current_token}->{tag_name} .= chr $self->{next_char};
673            # start tag or end tag            # start tag or end tag
674          ## Stay in the state          ## Stay in the state
675          !!!next-input-character;          !!!next-input-character;
676          redo A;          redo A;
677        }        }
678      } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {      } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {
679        if ($self->{next_input_character} == 0x0009 or # HT        if ($self->{next_char} == 0x0009 or # HT
680            $self->{next_input_character} == 0x000A or # LF            $self->{next_char} == 0x000A or # LF
681            $self->{next_input_character} == 0x000B or # VT            $self->{next_char} == 0x000B or # VT
682            $self->{next_input_character} == 0x000C or # FF            $self->{next_char} == 0x000C or # FF
683            $self->{next_input_character} == 0x0020) { # SP            $self->{next_char} == 0x0020) { # SP
684          ## Stay in the state          ## Stay in the state
685          !!!next-input-character;          !!!next-input-character;
686          redo A;          redo A;
687        } elsif ($self->{next_input_character} == 0x003E) { # >        } elsif ($self->{next_char} == 0x003E) { # >
688          if ($self->{current_token}->{type} == START_TAG_TOKEN) {          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
689            $self->{current_token}->{first_start_tag}            $self->{current_token}->{first_start_tag}
690                = not defined $self->{last_emitted_start_tag_name};                = not defined $self->{last_emitted_start_tag_name};
# Line 703  sub _get_next_token ($) { Line 703  sub _get_next_token ($) {
703          !!!emit ($self->{current_token}); # start tag or end tag          !!!emit ($self->{current_token}); # start tag or end tag
704    
705          redo A;          redo A;
706        } elsif (0x0041 <= $self->{next_input_character} and        } elsif (0x0041 <= $self->{next_char} and
707                 $self->{next_input_character} <= 0x005A) { # A..Z                 $self->{next_char} <= 0x005A) { # A..Z
708          $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),          $self->{current_attribute} = {name => chr ($self->{next_char} + 0x0020),
709                                value => ''};                                value => ''};
710          $self->{state} = ATTRIBUTE_NAME_STATE;          $self->{state} = ATTRIBUTE_NAME_STATE;
711          !!!next-input-character;          !!!next-input-character;
712          redo A;          redo A;
713        } elsif ($self->{next_input_character} == 0x002F) { # /        } elsif ($self->{next_char} == 0x002F) { # /
714          !!!next-input-character;          !!!next-input-character;
715          if ($self->{next_input_character} == 0x003E and # >          if ($self->{next_char} == 0x003E and # >
716              $self->{current_token}->{type} == START_TAG_TOKEN and              $self->{current_token}->{type} == START_TAG_TOKEN and
717              $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {              $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
718            # permitted slash            # permitted slash
# Line 723  sub _get_next_token ($) { Line 723  sub _get_next_token ($) {
723          ## Stay in the state          ## Stay in the state
724          # next-input-character is already done          # next-input-character is already done
725          redo A;          redo A;
726        } elsif ($self->{next_input_character} == -1) {        } elsif ($self->{next_char} == -1) {
727          !!!parse-error (type => 'unclosed tag');          !!!parse-error (type => 'unclosed tag');
728          if ($self->{current_token}->{type} == START_TAG_TOKEN) {          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
729            $self->{current_token}->{first_start_tag}            $self->{current_token}->{first_start_tag}
# Line 748  sub _get_next_token ($) { Line 748  sub _get_next_token ($) {
748               0x0022 => 1, # "               0x0022 => 1, # "
749               0x0027 => 1, # '               0x0027 => 1, # '
750               0x003D => 1, # =               0x003D => 1, # =
751              }->{$self->{next_input_character}}) {              }->{$self->{next_char}}) {
752            !!!parse-error (type => 'bad attribute name');            !!!parse-error (type => 'bad attribute name');
753          }          }
754          $self->{current_attribute} = {name => chr ($self->{next_input_character}),          $self->{current_attribute} = {name => chr ($self->{next_char}),
755                                value => ''};                                value => ''};
756          $self->{state} = ATTRIBUTE_NAME_STATE;          $self->{state} = ATTRIBUTE_NAME_STATE;
757          !!!next-input-character;          !!!next-input-character;
# Line 769  sub _get_next_token ($) { Line 769  sub _get_next_token ($) {
769          }          }
770        }; # $before_leave        }; # $before_leave
771    
772        if ($self->{next_input_character} == 0x0009 or # HT        if ($self->{next_char} == 0x0009 or # HT
773            $self->{next_input_character} == 0x000A or # LF            $self->{next_char} == 0x000A or # LF
774            $self->{next_input_character} == 0x000B or # VT            $self->{next_char} == 0x000B or # VT
775            $self->{next_input_character} == 0x000C or # FF            $self->{next_char} == 0x000C or # FF
776            $self->{next_input_character} == 0x0020) { # SP            $self->{next_char} == 0x0020) { # SP
777          $before_leave->();          $before_leave->();
778          $self->{state} = AFTER_ATTRIBUTE_NAME_STATE;          $self->{state} = AFTER_ATTRIBUTE_NAME_STATE;
779          !!!next-input-character;          !!!next-input-character;
780          redo A;          redo A;
781        } elsif ($self->{next_input_character} == 0x003D) { # =        } elsif ($self->{next_char} == 0x003D) { # =
782          $before_leave->();          $before_leave->();
783          $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;          $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
784          !!!next-input-character;          !!!next-input-character;
785          redo A;          redo A;
786        } elsif ($self->{next_input_character} == 0x003E) { # >        } elsif ($self->{next_char} == 0x003E) { # >
787          $before_leave->();          $before_leave->();
788          if ($self->{current_token}->{type} == START_TAG_TOKEN) {          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
789            $self->{current_token}->{first_start_tag}            $self->{current_token}->{first_start_tag}
# Line 803  sub _get_next_token ($) { Line 803  sub _get_next_token ($) {
803          !!!emit ($self->{current_token}); # start tag or end tag          !!!emit ($self->{current_token}); # start tag or end tag
804    
805          redo A;          redo A;
806        } elsif (0x0041 <= $self->{next_input_character} and        } elsif (0x0041 <= $self->{next_char} and
807                 $self->{next_input_character} <= 0x005A) { # A..Z                 $self->{next_char} <= 0x005A) { # A..Z
808          $self->{current_attribute}->{name} .= chr ($self->{next_input_character} + 0x0020);          $self->{current_attribute}->{name} .= chr ($self->{next_char} + 0x0020);
809          ## Stay in the state          ## Stay in the state
810          !!!next-input-character;          !!!next-input-character;
811          redo A;          redo A;
812        } elsif ($self->{next_input_character} == 0x002F) { # /        } elsif ($self->{next_char} == 0x002F) { # /
813          $before_leave->();          $before_leave->();
814          !!!next-input-character;          !!!next-input-character;
815          if ($self->{next_input_character} == 0x003E and # >          if ($self->{next_char} == 0x003E and # >
816              $self->{current_token}->{type} == START_TAG_TOKEN and              $self->{current_token}->{type} == START_TAG_TOKEN and
817              $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {              $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
818            # permitted slash            # permitted slash
# Line 823  sub _get_next_token ($) { Line 823  sub _get_next_token ($) {
823          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
824          # next-input-character is already done          # next-input-character is already done
825          redo A;          redo A;
826        } elsif ($self->{next_input_character} == -1) {        } elsif ($self->{next_char} == -1) {
827          !!!parse-error (type => 'unclosed tag');          !!!parse-error (type => 'unclosed tag');
828          $before_leave->();          $before_leave->();
829          if ($self->{current_token}->{type} == START_TAG_TOKEN) {          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
# Line 845  sub _get_next_token ($) { Line 845  sub _get_next_token ($) {
845    
846          redo A;          redo A;
847        } else {        } else {
848          if ($self->{next_input_character} == 0x0022 or # "          if ($self->{next_char} == 0x0022 or # "
849              $self->{next_input_character} == 0x0027) { # '              $self->{next_char} == 0x0027) { # '
850            !!!parse-error (type => 'bad attribute name');            !!!parse-error (type => 'bad attribute name');
851          }          }
852          $self->{current_attribute}->{name} .= chr ($self->{next_input_character});          $self->{current_attribute}->{name} .= chr ($self->{next_char});
853          ## Stay in the state          ## Stay in the state
854          !!!next-input-character;          !!!next-input-character;
855          redo A;          redo A;
856        }        }
857      } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {      } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {
858        if ($self->{next_input_character} == 0x0009 or # HT        if ($self->{next_char} == 0x0009 or # HT
859            $self->{next_input_character} == 0x000A or # LF            $self->{next_char} == 0x000A or # LF
860            $self->{next_input_character} == 0x000B or # VT            $self->{next_char} == 0x000B or # VT
861            $self->{next_input_character} == 0x000C or # FF            $self->{next_char} == 0x000C or # FF
862            $self->{next_input_character} == 0x0020) { # SP            $self->{next_char} == 0x0020) { # SP
863          ## Stay in the state          ## Stay in the state
864          !!!next-input-character;          !!!next-input-character;
865          redo A;          redo A;
866        } elsif ($self->{next_input_character} == 0x003D) { # =        } elsif ($self->{next_char} == 0x003D) { # =
867          $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;          $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
868          !!!next-input-character;          !!!next-input-character;
869          redo A;          redo A;
870        } elsif ($self->{next_input_character} == 0x003E) { # >        } elsif ($self->{next_char} == 0x003E) { # >
871          if ($self->{current_token}->{type} == START_TAG_TOKEN) {          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
872            $self->{current_token}->{first_start_tag}            $self->{current_token}->{first_start_tag}
873                = not defined $self->{last_emitted_start_tag_name};                = not defined $self->{last_emitted_start_tag_name};
# Line 886  sub _get_next_token ($) { Line 886  sub _get_next_token ($) {
886          !!!emit ($self->{current_token}); # start tag or end tag          !!!emit ($self->{current_token}); # start tag or end tag
887    
888          redo A;          redo A;
889        } elsif (0x0041 <= $self->{next_input_character} and        } elsif (0x0041 <= $self->{next_char} and
890                 $self->{next_input_character} <= 0x005A) { # A..Z                 $self->{next_char} <= 0x005A) { # A..Z
891          $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),          $self->{current_attribute} = {name => chr ($self->{next_char} + 0x0020),
892                                value => ''};                                value => ''};
893          $self->{state} = ATTRIBUTE_NAME_STATE;          $self->{state} = ATTRIBUTE_NAME_STATE;
894          !!!next-input-character;          !!!next-input-character;
895          redo A;          redo A;
896        } elsif ($self->{next_input_character} == 0x002F) { # /        } elsif ($self->{next_char} == 0x002F) { # /
897          !!!next-input-character;          !!!next-input-character;
898          if ($self->{next_input_character} == 0x003E and # >          if ($self->{next_char} == 0x003E and # >
899              $self->{current_token}->{type} == START_TAG_TOKEN and              $self->{current_token}->{type} == START_TAG_TOKEN and
900              $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {              $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
901            # permitted slash            # permitted slash
# Line 907  sub _get_next_token ($) { Line 907  sub _get_next_token ($) {
907          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
908          # next-input-character is already done          # next-input-character is already done
909          redo A;          redo A;
910        } elsif ($self->{next_input_character} == -1) {        } elsif ($self->{next_char} == -1) {
911          !!!parse-error (type => 'unclosed tag');          !!!parse-error (type => 'unclosed tag');
912          if ($self->{current_token}->{type} == START_TAG_TOKEN) {          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
913            $self->{current_token}->{first_start_tag}            $self->{current_token}->{first_start_tag}
# Line 928  sub _get_next_token ($) { Line 928  sub _get_next_token ($) {
928    
929          redo A;          redo A;
930        } else {        } else {
931          $self->{current_attribute} = {name => chr ($self->{next_input_character}),          $self->{current_attribute} = {name => chr ($self->{next_char}),
932                                value => ''};                                value => ''};
933          $self->{state} = ATTRIBUTE_NAME_STATE;          $self->{state} = ATTRIBUTE_NAME_STATE;
934          !!!next-input-character;          !!!next-input-character;
935          redo A;                  redo A;        
936        }        }
937      } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {      } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {
938        if ($self->{next_input_character} == 0x0009 or # HT        if ($self->{next_char} == 0x0009 or # HT
939            $self->{next_input_character} == 0x000A or # LF            $self->{next_char} == 0x000A or # LF
940            $self->{next_input_character} == 0x000B or # VT            $self->{next_char} == 0x000B or # VT
941            $self->{next_input_character} == 0x000C or # FF            $self->{next_char} == 0x000C or # FF
942            $self->{next_input_character} == 0x0020) { # SP                  $self->{next_char} == 0x0020) { # SP      
943          ## Stay in the state          ## Stay in the state
944          !!!next-input-character;          !!!next-input-character;
945          redo A;          redo A;
946        } elsif ($self->{next_input_character} == 0x0022) { # "        } elsif ($self->{next_char} == 0x0022) { # "
947          $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;          $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
948          !!!next-input-character;          !!!next-input-character;
949          redo A;          redo A;
950        } elsif ($self->{next_input_character} == 0x0026) { # &        } elsif ($self->{next_char} == 0x0026) { # &
951          $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;          $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
952          ## reconsume          ## reconsume
953          redo A;          redo A;
954        } elsif ($self->{next_input_character} == 0x0027) { # '        } elsif ($self->{next_char} == 0x0027) { # '
955          $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;          $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
956          !!!next-input-character;          !!!next-input-character;
957          redo A;          redo A;
958        } elsif ($self->{next_input_character} == 0x003E) { # >        } elsif ($self->{next_char} == 0x003E) { # >
959          if ($self->{current_token}->{type} == START_TAG_TOKEN) {          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
960            $self->{current_token}->{first_start_tag}            $self->{current_token}->{first_start_tag}
961                = not defined $self->{last_emitted_start_tag_name};                = not defined $self->{last_emitted_start_tag_name};
# Line 974  sub _get_next_token ($) { Line 974  sub _get_next_token ($) {
974          !!!emit ($self->{current_token}); # start tag or end tag          !!!emit ($self->{current_token}); # start tag or end tag
975    
976          redo A;          redo A;
977        } elsif ($self->{next_input_character} == -1) {        } elsif ($self->{next_char} == -1) {
978          !!!parse-error (type => 'unclosed tag');          !!!parse-error (type => 'unclosed tag');
979          if ($self->{current_token}->{type} == START_TAG_TOKEN) {          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
980            $self->{current_token}->{first_start_tag}            $self->{current_token}->{first_start_tag}
# Line 995  sub _get_next_token ($) { Line 995  sub _get_next_token ($) {
995    
996          redo A;          redo A;
997        } else {        } else {
998          if ($self->{next_input_character} == 0x003D) { # =          if ($self->{next_char} == 0x003D) { # =
999            !!!parse-error (type => 'bad attribute value');            !!!parse-error (type => 'bad attribute value');
1000          }          }
1001          $self->{current_attribute}->{value} .= chr ($self->{next_input_character});          $self->{current_attribute}->{value} .= chr ($self->{next_char});
1002          $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;          $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
1003          !!!next-input-character;          !!!next-input-character;
1004          redo A;          redo A;
1005        }        }
1006      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1007        if ($self->{next_input_character} == 0x0022) { # "        if ($self->{next_char} == 0x0022) { # "
1008          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1009          !!!next-input-character;          !!!next-input-character;
1010          redo A;          redo A;
1011        } elsif ($self->{next_input_character} == 0x0026) { # &        } elsif ($self->{next_char} == 0x0026) { # &
1012          $self->{last_attribute_value_state} = $self->{state};          $self->{last_attribute_value_state} = $self->{state};
1013          $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;          $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1014          !!!next-input-character;          !!!next-input-character;
1015          redo A;          redo A;
1016        } elsif ($self->{next_input_character} == -1) {        } elsif ($self->{next_char} == -1) {
1017          !!!parse-error (type => 'unclosed attribute value');          !!!parse-error (type => 'unclosed attribute value');
1018          if ($self->{current_token}->{type} == START_TAG_TOKEN) {          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1019            $self->{current_token}->{first_start_tag}            $self->{current_token}->{first_start_tag}
# Line 1034  sub _get_next_token ($) { Line 1034  sub _get_next_token ($) {
1034    
1035          redo A;          redo A;
1036        } else {        } else {
1037          $self->{current_attribute}->{value} .= chr ($self->{next_input_character});          $self->{current_attribute}->{value} .= chr ($self->{next_char});
1038          ## Stay in the state          ## Stay in the state
1039          !!!next-input-character;          !!!next-input-character;
1040          redo A;          redo A;
1041        }        }
1042      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1043        if ($self->{next_input_character} == 0x0027) { # '        if ($self->{next_char} == 0x0027) { # '
1044          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1045          !!!next-input-character;          !!!next-input-character;
1046          redo A;          redo A;
1047        } elsif ($self->{next_input_character} == 0x0026) { # &        } elsif ($self->{next_char} == 0x0026) { # &
1048          $self->{last_attribute_value_state} = $self->{state};          $self->{last_attribute_value_state} = $self->{state};
1049          $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;          $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1050          !!!next-input-character;          !!!next-input-character;
1051          redo A;          redo A;
1052        } elsif ($self->{next_input_character} == -1) {        } elsif ($self->{next_char} == -1) {
1053          !!!parse-error (type => 'unclosed attribute value');          !!!parse-error (type => 'unclosed attribute value');
1054          if ($self->{current_token}->{type} == START_TAG_TOKEN) {          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1055            $self->{current_token}->{first_start_tag}            $self->{current_token}->{first_start_tag}
# Line 1070  sub _get_next_token ($) { Line 1070  sub _get_next_token ($) {
1070    
1071          redo A;          redo A;
1072        } else {        } else {
1073          $self->{current_attribute}->{value} .= chr ($self->{next_input_character});          $self->{current_attribute}->{value} .= chr ($self->{next_char});
1074          ## Stay in the state          ## Stay in the state
1075          !!!next-input-character;          !!!next-input-character;
1076          redo A;          redo A;
1077        }        }
1078      } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {
1079        if ($self->{next_input_character} == 0x0009 or # HT        if ($self->{next_char} == 0x0009 or # HT
1080            $self->{next_input_character} == 0x000A or # LF            $self->{next_char} == 0x000A or # LF
1081            $self->{next_input_character} == 0x000B or # HT            $self->{next_char} == 0x000B or # HT
1082            $self->{next_input_character} == 0x000C or # FF            $self->{next_char} == 0x000C or # FF
1083            $self->{next_input_character} == 0x0020) { # SP            $self->{next_char} == 0x0020) { # SP
1084          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1085          !!!next-input-character;          !!!next-input-character;
1086          redo A;          redo A;
1087        } elsif ($self->{next_input_character} == 0x0026) { # &        } elsif ($self->{next_char} == 0x0026) { # &
1088          $self->{last_attribute_value_state} = $self->{state};          $self->{last_attribute_value_state} = $self->{state};
1089          $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;          $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1090          !!!next-input-character;          !!!next-input-character;
1091          redo A;          redo A;
1092        } elsif ($self->{next_input_character} == 0x003E) { # >        } elsif ($self->{next_char} == 0x003E) { # >
1093          if ($self->{current_token}->{type} == START_TAG_TOKEN) {          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1094            $self->{current_token}->{first_start_tag}            $self->{current_token}->{first_start_tag}
1095                = not defined $self->{last_emitted_start_tag_name};                = not defined $self->{last_emitted_start_tag_name};
# Line 1108  sub _get_next_token ($) { Line 1108  sub _get_next_token ($) {
1108          !!!emit ($self->{current_token}); # start tag or end tag          !!!emit ($self->{current_token}); # start tag or end tag
1109    
1110          redo A;          redo A;
1111        } elsif ($self->{next_input_character} == -1) {        } elsif ($self->{next_char} == -1) {
1112          !!!parse-error (type => 'unclosed tag');          !!!parse-error (type => 'unclosed tag');
1113          if ($self->{current_token}->{type} == START_TAG_TOKEN) {          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1114            $self->{current_token}->{first_start_tag}            $self->{current_token}->{first_start_tag}
# Line 1133  sub _get_next_token ($) { Line 1133  sub _get_next_token ($) {
1133               0x0022 => 1, # "               0x0022 => 1, # "
1134               0x0027 => 1, # '               0x0027 => 1, # '
1135               0x003D => 1, # =               0x003D => 1, # =
1136              }->{$self->{next_input_character}}) {              }->{$self->{next_char}}) {
1137            !!!parse-error (type => 'bad attribute value');            !!!parse-error (type => 'bad attribute value');
1138          }          }
1139          $self->{current_attribute}->{value} .= chr ($self->{next_input_character});          $self->{current_attribute}->{value} .= chr ($self->{next_char});
1140          ## Stay in the state          ## Stay in the state
1141          !!!next-input-character;          !!!next-input-character;
1142          redo A;          redo A;
# Line 1162  sub _get_next_token ($) { Line 1162  sub _get_next_token ($) {
1162        # next-input-character is already done        # next-input-character is already done
1163        redo A;        redo A;
1164      } elsif ($self->{state} == AFTER_ATTRIBUTE_VALUE_QUOTED_STATE) {      } elsif ($self->{state} == AFTER_ATTRIBUTE_VALUE_QUOTED_STATE) {
1165        if ($self->{next_input_character} == 0x0009 or # HT        if ($self->{next_char} == 0x0009 or # HT
1166            $self->{next_input_character} == 0x000A or # LF            $self->{next_char} == 0x000A or # LF
1167            $self->{next_input_character} == 0x000B or # VT            $self->{next_char} == 0x000B or # VT
1168            $self->{next_input_character} == 0x000C or # FF            $self->{next_char} == 0x000C or # FF
1169            $self->{next_input_character} == 0x0020) { # SP            $self->{next_char} == 0x0020) { # SP
1170          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1171          !!!next-input-character;          !!!next-input-character;
1172          redo A;          redo A;
1173        } elsif ($self->{next_input_character} == 0x003E) { # >        } elsif ($self->{next_char} == 0x003E) { # >
1174          if ($self->{current_token}->{type} == START_TAG_TOKEN) {          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1175            $self->{current_token}->{first_start_tag}            $self->{current_token}->{first_start_tag}
1176                = not defined $self->{last_emitted_start_tag_name};                = not defined $self->{last_emitted_start_tag_name};
# Line 1189  sub _get_next_token ($) { Line 1189  sub _get_next_token ($) {
1189          !!!emit ($self->{current_token}); # start tag or end tag          !!!emit ($self->{current_token}); # start tag or end tag
1190    
1191          redo A;          redo A;
1192        } elsif ($self->{next_input_character} == 0x002F) { # /        } elsif ($self->{next_char} == 0x002F) { # /
1193          !!!next-input-character;          !!!next-input-character;
1194          if ($self->{next_input_character} == 0x003E and # >          if ($self->{next_char} == 0x003E and # >
1195              $self->{current_token}->{type} == START_TAG_TOKEN and              $self->{current_token}->{type} == START_TAG_TOKEN and
1196              $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {              $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
1197            # permitted slash            # permitted slash
# Line 1214  sub _get_next_token ($) { Line 1214  sub _get_next_token ($) {
1214        my $token = {type => COMMENT_TOKEN, data => ''};        my $token = {type => COMMENT_TOKEN, data => ''};
1215    
1216        BC: {        BC: {
1217          if ($self->{next_input_character} == 0x003E) { # >          if ($self->{next_char} == 0x003E) { # >
1218            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
1219            !!!next-input-character;            !!!next-input-character;
1220    
1221            !!!emit ($token);            !!!emit ($token);
1222    
1223            redo A;            redo A;
1224          } elsif ($self->{next_input_character} == -1) {          } elsif ($self->{next_char} == -1) {
1225            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
1226            ## reconsume            ## reconsume
1227    
# Line 1229  sub _get_next_token ($) { Line 1229  sub _get_next_token ($) {
1229    
1230            redo A;            redo A;
1231          } else {          } else {
1232            $token->{data} .= chr ($self->{next_input_character});            $token->{data} .= chr ($self->{next_char});
1233            !!!next-input-character;            !!!next-input-character;
1234            redo BC;            redo BC;
1235          }          }
# Line 1238  sub _get_next_token ($) { Line 1238  sub _get_next_token ($) {
1238        ## (only happen if PCDATA state)        ## (only happen if PCDATA state)
1239    
1240        my @next_char;        my @next_char;
1241        push @next_char, $self->{next_input_character};        push @next_char, $self->{next_char};
1242                
1243        if ($self->{next_input_character} == 0x002D) { # -        if ($self->{next_char} == 0x002D) { # -
1244          !!!next-input-character;          !!!next-input-character;
1245          push @next_char, $self->{next_input_character};          push @next_char, $self->{next_char};
1246          if ($self->{next_input_character} == 0x002D) { # -          if ($self->{next_char} == 0x002D) { # -
1247            $self->{current_token} = {type => COMMENT_TOKEN, data => ''};            $self->{current_token} = {type => COMMENT_TOKEN, data => ''};
1248            $self->{state} = COMMENT_START_STATE;            $self->{state} = COMMENT_START_STATE;
1249            !!!next-input-character;            !!!next-input-character;
1250            redo A;            redo A;
1251          }          }
1252        } elsif ($self->{next_input_character} == 0x0044 or # D        } elsif ($self->{next_char} == 0x0044 or # D
1253                 $self->{next_input_character} == 0x0064) { # d                 $self->{next_char} == 0x0064) { # d
1254          !!!next-input-character;          !!!next-input-character;
1255          push @next_char, $self->{next_input_character};          push @next_char, $self->{next_char};
1256          if ($self->{next_input_character} == 0x004F or # O          if ($self->{next_char} == 0x004F or # O
1257              $self->{next_input_character} == 0x006F) { # o              $self->{next_char} == 0x006F) { # o
1258            !!!next-input-character;            !!!next-input-character;
1259            push @next_char, $self->{next_input_character};            push @next_char, $self->{next_char};
1260            if ($self->{next_input_character} == 0x0043 or # C            if ($self->{next_char} == 0x0043 or # C
1261                $self->{next_input_character} == 0x0063) { # c                $self->{next_char} == 0x0063) { # c
1262              !!!next-input-character;              !!!next-input-character;
1263              push @next_char, $self->{next_input_character};              push @next_char, $self->{next_char};
1264              if ($self->{next_input_character} == 0x0054 or # T              if ($self->{next_char} == 0x0054 or # T
1265                  $self->{next_input_character} == 0x0074) { # t                  $self->{next_char} == 0x0074) { # t
1266                !!!next-input-character;                !!!next-input-character;
1267                push @next_char, $self->{next_input_character};                push @next_char, $self->{next_char};
1268                if ($self->{next_input_character} == 0x0059 or # Y                if ($self->{next_char} == 0x0059 or # Y
1269                    $self->{next_input_character} == 0x0079) { # y                    $self->{next_char} == 0x0079) { # y
1270                  !!!next-input-character;                  !!!next-input-character;
1271                  push @next_char, $self->{next_input_character};                  push @next_char, $self->{next_char};
1272                  if ($self->{next_input_character} == 0x0050 or # P                  if ($self->{next_char} == 0x0050 or # P
1273                      $self->{next_input_character} == 0x0070) { # p                      $self->{next_char} == 0x0070) { # p
1274                    !!!next-input-character;                    !!!next-input-character;
1275                    push @next_char, $self->{next_input_character};                    push @next_char, $self->{next_char};
1276                    if ($self->{next_input_character} == 0x0045 or # E                    if ($self->{next_char} == 0x0045 or # E
1277                        $self->{next_input_character} == 0x0065) { # e                        $self->{next_char} == 0x0065) { # e
1278                      ## ISSUE: What a stupid code this is!                      ## ISSUE: What a stupid code this is!
1279                      $self->{state} = DOCTYPE_STATE;                      $self->{state} = DOCTYPE_STATE;
1280                      !!!next-input-character;                      !!!next-input-character;
# Line 1288  sub _get_next_token ($) { Line 1288  sub _get_next_token ($) {
1288        }        }
1289    
1290        !!!parse-error (type => 'bogus comment');        !!!parse-error (type => 'bogus comment');
1291        $self->{next_input_character} = shift @next_char;        $self->{next_char} = shift @next_char;
1292        !!!back-next-input-character (@next_char);        !!!back-next-input-character (@next_char);
1293        $self->{state} = BOGUS_COMMENT_STATE;        $self->{state} = BOGUS_COMMENT_STATE;
1294        redo A;        redo A;
# Line 1296  sub _get_next_token ($) { Line 1296  sub _get_next_token ($) {
1296        ## ISSUE: typos in spec: chacacters, is is a parse error        ## ISSUE: typos in spec: chacacters, is is a parse error
1297        ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?        ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?
1298      } elsif ($self->{state} == COMMENT_START_STATE) {      } elsif ($self->{state} == COMMENT_START_STATE) {
1299        if ($self->{next_input_character} == 0x002D) { # -        if ($self->{next_char} == 0x002D) { # -
1300          $self->{state} = COMMENT_START_DASH_STATE;          $self->{state} = COMMENT_START_DASH_STATE;
1301          !!!next-input-character;          !!!next-input-character;
1302          redo A;          redo A;
1303        } elsif ($self->{next_input_character} == 0x003E) { # >        } elsif ($self->{next_char} == 0x003E) { # >
1304          !!!parse-error (type => 'bogus comment');          !!!parse-error (type => 'bogus comment');
1305          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1306          !!!next-input-character;          !!!next-input-character;
# Line 1308  sub _get_next_token ($) { Line 1308  sub _get_next_token ($) {
1308          !!!emit ($self->{current_token}); # comment          !!!emit ($self->{current_token}); # comment
1309    
1310          redo A;          redo A;
1311        } elsif ($self->{next_input_character} == -1) {        } elsif ($self->{next_char} == -1) {
1312          !!!parse-error (type => 'unclosed comment');          !!!parse-error (type => 'unclosed comment');
1313          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1314          ## reconsume          ## reconsume
# Line 1318  sub _get_next_token ($) { Line 1318  sub _get_next_token ($) {
1318          redo A;          redo A;
1319        } else {        } else {
1320          $self->{current_token}->{data} # comment          $self->{current_token}->{data} # comment
1321              .= chr ($self->{next_input_character});              .= chr ($self->{next_char});
1322          $self->{state} = COMMENT_STATE;          $self->{state} = COMMENT_STATE;
1323          !!!next-input-character;          !!!next-input-character;
1324          redo A;          redo A;
1325        }        }
1326      } elsif ($self->{state} == COMMENT_START_DASH_STATE) {      } elsif ($self->{state} == COMMENT_START_DASH_STATE) {
1327        if ($self->{next_input_character} == 0x002D) { # -        if ($self->{next_char} == 0x002D) { # -
1328          $self->{state} = COMMENT_END_STATE;          $self->{state} = COMMENT_END_STATE;
1329          !!!next-input-character;          !!!next-input-character;
1330          redo A;          redo A;
1331        } elsif ($self->{next_input_character} == 0x003E) { # >        } elsif ($self->{next_char} == 0x003E) { # >
1332          !!!parse-error (type => 'bogus comment');          !!!parse-error (type => 'bogus comment');
1333          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1334          !!!next-input-character;          !!!next-input-character;
# Line 1336  sub _get_next_token ($) { Line 1336  sub _get_next_token ($) {
1336          !!!emit ($self->{current_token}); # comment          !!!emit ($self->{current_token}); # comment
1337    
1338          redo A;          redo A;
1339        } elsif ($self->{next_input_character} == -1) {        } elsif ($self->{next_char} == -1) {
1340          !!!parse-error (type => 'unclosed comment');          !!!parse-error (type => 'unclosed comment');
1341          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1342          ## reconsume          ## reconsume
# Line 1346  sub _get_next_token ($) { Line 1346  sub _get_next_token ($) {
1346          redo A;          redo A;
1347        } else {        } else {
1348          $self->{current_token}->{data} # comment          $self->{current_token}->{data} # comment
1349              .= '-' . chr ($self->{next_input_character});              .= '-' . chr ($self->{next_char});
1350          $self->{state} = COMMENT_STATE;          $self->{state} = COMMENT_STATE;
1351          !!!next-input-character;          !!!next-input-character;
1352          redo A;          redo A;
1353        }        }
1354      } elsif ($self->{state} == COMMENT_STATE) {      } elsif ($self->{state} == COMMENT_STATE) {
1355        if ($self->{next_input_character} == 0x002D) { # -        if ($self->{next_char} == 0x002D) { # -
1356          $self->{state} = COMMENT_END_DASH_STATE;          $self->{state} = COMMENT_END_DASH_STATE;
1357          !!!next-input-character;          !!!next-input-character;
1358          redo A;          redo A;
1359        } elsif ($self->{next_input_character} == -1) {        } elsif ($self->{next_char} == -1) {
1360          !!!parse-error (type => 'unclosed comment');          !!!parse-error (type => 'unclosed comment');
1361          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1362          ## reconsume          ## reconsume
# Line 1365  sub _get_next_token ($) { Line 1365  sub _get_next_token ($) {
1365    
1366          redo A;          redo A;
1367        } else {        } else {
1368          $self->{current_token}->{data} .= chr ($self->{next_input_character}); # comment          $self->{current_token}->{data} .= chr ($self->{next_char}); # comment
1369          ## Stay in the state          ## Stay in the state
1370          !!!next-input-character;          !!!next-input-character;
1371          redo A;          redo A;
1372        }        }
1373      } elsif ($self->{state} == COMMENT_END_DASH_STATE) {      } elsif ($self->{state} == COMMENT_END_DASH_STATE) {
1374        if ($self->{next_input_character} == 0x002D) { # -        if ($self->{next_char} == 0x002D) { # -
1375          $self->{state} = COMMENT_END_STATE;          $self->{state} = COMMENT_END_STATE;
1376          !!!next-input-character;          !!!next-input-character;
1377          redo A;          redo A;
1378        } elsif ($self->{next_input_character} == -1) {        } elsif ($self->{next_char} == -1) {
1379          !!!parse-error (type => 'unclosed comment');          !!!parse-error (type => 'unclosed comment');
1380          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1381          ## reconsume          ## reconsume
# Line 1384  sub _get_next_token ($) { Line 1384  sub _get_next_token ($) {
1384    
1385          redo A;          redo A;
1386        } else {        } else {
1387          $self->{current_token}->{data} .= '-' . chr ($self->{next_input_character}); # comment          $self->{current_token}->{data} .= '-' . chr ($self->{next_char}); # comment
1388          $self->{state} = COMMENT_STATE;          $self->{state} = COMMENT_STATE;
1389          !!!next-input-character;          !!!next-input-character;
1390          redo A;          redo A;
1391        }        }
1392      } elsif ($self->{state} == COMMENT_END_STATE) {      } elsif ($self->{state} == COMMENT_END_STATE) {
1393        if ($self->{next_input_character} == 0x003E) { # >        if ($self->{next_char} == 0x003E) { # >
1394          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1395          !!!next-input-character;          !!!next-input-character;
1396    
1397          !!!emit ($self->{current_token}); # comment          !!!emit ($self->{current_token}); # comment
1398    
1399          redo A;          redo A;
1400        } elsif ($self->{next_input_character} == 0x002D) { # -        } elsif ($self->{next_char} == 0x002D) { # -
1401          !!!parse-error (type => 'dash in comment');          !!!parse-error (type => 'dash in comment');
1402          $self->{current_token}->{data} .= '-'; # comment          $self->{current_token}->{data} .= '-'; # comment
1403          ## Stay in the state          ## Stay in the state
1404          !!!next-input-character;          !!!next-input-character;
1405          redo A;          redo A;
1406        } elsif ($self->{next_input_character} == -1) {        } elsif ($self->{next_char} == -1) {
1407          !!!parse-error (type => 'unclosed comment');          !!!parse-error (type => 'unclosed comment');
1408          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1409          ## reconsume          ## reconsume
# Line 1413  sub _get_next_token ($) { Line 1413  sub _get_next_token ($) {
1413          redo A;          redo A;
1414        } else {        } else {
1415          !!!parse-error (type => 'dash in comment');          !!!parse-error (type => 'dash in comment');
1416          $self->{current_token}->{data} .= '--' . chr ($self->{next_input_character}); # comment          $self->{current_token}->{data} .= '--' . chr ($self->{next_char}); # comment
1417          $self->{state} = COMMENT_STATE;          $self->{state} = COMMENT_STATE;
1418          !!!next-input-character;          !!!next-input-character;
1419          redo A;          redo A;
1420        }        }
1421      } elsif ($self->{state} == DOCTYPE_STATE) {      } elsif ($self->{state} == DOCTYPE_STATE) {
1422        if ($self->{next_input_character} == 0x0009 or # HT        if ($self->{next_char} == 0x0009 or # HT
1423            $self->{next_input_character} == 0x000A or # LF            $self->{next_char} == 0x000A or # LF
1424            $self->{next_input_character} == 0x000B or # VT            $self->{next_char} == 0x000B or # VT
1425            $self->{next_input_character} == 0x000C or # FF            $self->{next_char} == 0x000C or # FF
1426            $self->{next_input_character} == 0x0020) { # SP            $self->{next_char} == 0x0020) { # SP
1427          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
1428          !!!next-input-character;          !!!next-input-character;
1429          redo A;          redo A;
# Line 1434  sub _get_next_token ($) { Line 1434  sub _get_next_token ($) {
1434          redo A;          redo A;
1435        }        }
1436      } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {
1437        if ($self->{next_input_character} == 0x0009 or # HT        if ($self->{next_char} == 0x0009 or # HT
1438            $self->{next_input_character} == 0x000A or # LF            $self->{next_char} == 0x000A or # LF
1439            $self->{next_input_character} == 0x000B or # VT            $self->{next_char} == 0x000B or # VT
1440            $self->{next_input_character} == 0x000C or # FF            $self->{next_char} == 0x000C or # FF
1441            $self->{next_input_character} == 0x0020) { # SP            $self->{next_char} == 0x0020) { # SP
1442          ## Stay in the state          ## Stay in the state
1443          !!!next-input-character;          !!!next-input-character;
1444          redo A;          redo A;
1445        } elsif ($self->{next_input_character} == 0x003E) { # >        } elsif ($self->{next_char} == 0x003E) { # >
1446          !!!parse-error (type => 'no DOCTYPE name');          !!!parse-error (type => 'no DOCTYPE name');
1447          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1448          !!!next-input-character;          !!!next-input-character;
# Line 1450  sub _get_next_token ($) { Line 1450  sub _get_next_token ($) {
1450          !!!emit ({type => DOCTYPE_TOKEN, quirks => 1});          !!!emit ({type => DOCTYPE_TOKEN, quirks => 1});
1451    
1452          redo A;          redo A;
1453        } elsif ($self->{next_input_character} == -1) {        } elsif ($self->{next_char} == -1) {
1454          !!!parse-error (type => 'no DOCTYPE name');          !!!parse-error (type => 'no DOCTYPE name');
1455          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1456          ## reconsume          ## reconsume
# Line 1461  sub _get_next_token ($) { Line 1461  sub _get_next_token ($) {
1461        } else {        } else {
1462          $self->{current_token}          $self->{current_token}
1463              = {type => DOCTYPE_TOKEN,              = {type => DOCTYPE_TOKEN,
1464                 name => chr ($self->{next_input_character}),                 name => chr ($self->{next_char}),
1465                 #quirks => 0,                 #quirks => 0,
1466                };                };
1467  ## ISSUE: "Set the token's name name to the" in the spec  ## ISSUE: "Set the token's name name to the" in the spec
# Line 1471  sub _get_next_token ($) { Line 1471  sub _get_next_token ($) {
1471        }        }
1472      } elsif ($self->{state} == DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == DOCTYPE_NAME_STATE) {
1473  ## ISSUE: Redundant "First," in the spec.  ## ISSUE: Redundant "First," in the spec.
1474        if ($self->{next_input_character} == 0x0009 or # HT        if ($self->{next_char} == 0x0009 or # HT
1475            $self->{next_input_character} == 0x000A or # LF            $self->{next_char} == 0x000A or # LF
1476            $self->{next_input_character} == 0x000B or # VT            $self->{next_char} == 0x000B or # VT
1477            $self->{next_input_character} == 0x000C or # FF            $self->{next_char} == 0x000C or # FF
1478            $self->{next_input_character} == 0x0020) { # SP            $self->{next_char} == 0x0020) { # SP
1479          $self->{state} = AFTER_DOCTYPE_NAME_STATE;          $self->{state} = AFTER_DOCTYPE_NAME_STATE;
1480          !!!next-input-character;          !!!next-input-character;
1481          redo A;          redo A;
1482        } elsif ($self->{next_input_character} == 0x003E) { # >        } elsif ($self->{next_char} == 0x003E) { # >
1483          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1484          !!!next-input-character;          !!!next-input-character;
1485    
1486          !!!emit ($self->{current_token}); # DOCTYPE          !!!emit ($self->{current_token}); # DOCTYPE
1487    
1488          redo A;          redo A;
1489        } elsif ($self->{next_input_character} == -1) {        } elsif ($self->{next_char} == -1) {
1490          !!!parse-error (type => 'unclosed DOCTYPE');          !!!parse-error (type => 'unclosed DOCTYPE');
1491          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1492          ## reconsume          ## reconsume
# Line 1497  sub _get_next_token ($) { Line 1497  sub _get_next_token ($) {
1497          redo A;          redo A;
1498        } else {        } else {
1499          $self->{current_token}->{name}          $self->{current_token}->{name}
1500            .= chr ($self->{next_input_character}); # DOCTYPE            .= chr ($self->{next_char}); # DOCTYPE
1501          ## Stay in the state          ## Stay in the state
1502          !!!next-input-character;          !!!next-input-character;
1503          redo A;          redo A;
1504        }        }
1505      } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {
1506        if ($self->{next_input_character} == 0x0009 or # HT        if ($self->{next_char} == 0x0009 or # HT
1507            $self->{next_input_character} == 0x000A or # LF            $self->{next_char} == 0x000A or # LF
1508            $self->{next_input_character} == 0x000B or # VT            $self->{next_char} == 0x000B or # VT
1509            $self->{next_input_character} == 0x000C or # FF            $self->{next_char} == 0x000C or # FF
1510            $self->{next_input_character} == 0x0020) { # SP            $self->{next_char} == 0x0020) { # SP
1511          ## Stay in the state          ## Stay in the state
1512          !!!next-input-character;          !!!next-input-character;
1513          redo A;          redo A;
1514        } elsif ($self->{next_input_character} == 0x003E) { # >        } elsif ($self->{next_char} == 0x003E) { # >
1515          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1516          !!!next-input-character;          !!!next-input-character;
1517    
1518          !!!emit ($self->{current_token}); # DOCTYPE          !!!emit ($self->{current_token}); # DOCTYPE
1519    
1520          redo A;          redo A;
1521        } elsif ($self->{next_input_character} == -1) {        } elsif ($self->{next_char} == -1) {
1522          !!!parse-error (type => 'unclosed DOCTYPE');          !!!parse-error (type => 'unclosed DOCTYPE');
1523          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1524          ## reconsume          ## reconsume
# Line 1527  sub _get_next_token ($) { Line 1527  sub _get_next_token ($) {
1527          !!!emit ($self->{current_token}); # DOCTYPE          !!!emit ($self->{current_token}); # DOCTYPE
1528    
1529          redo A;          redo A;
1530        } elsif ($self->{next_input_character} == 0x0050 or # P        } elsif ($self->{next_char} == 0x0050 or # P
1531                 $self->{next_input_character} == 0x0070) { # p                 $self->{next_char} == 0x0070) { # p
1532          !!!next-input-character;          !!!next-input-character;
1533          if ($self->{next_input_character} == 0x0055 or # U          if ($self->{next_char} == 0x0055 or # U
1534              $self->{next_input_character} == 0x0075) { # u              $self->{next_char} == 0x0075) { # u
1535            !!!next-input-character;            !!!next-input-character;
1536            if ($self->{next_input_character} == 0x0042 or # B            if ($self->{next_char} == 0x0042 or # B
1537                $self->{next_input_character} == 0x0062) { # b                $self->{next_char} == 0x0062) { # b
1538              !!!next-input-character;              !!!next-input-character;
1539              if ($self->{next_input_character} == 0x004C or # L              if ($self->{next_char} == 0x004C or # L
1540                  $self->{next_input_character} == 0x006C) { # l                  $self->{next_char} == 0x006C) { # l
1541                !!!next-input-character;                !!!next-input-character;
1542                if ($self->{next_input_character} == 0x0049 or # I                if ($self->{next_char} == 0x0049 or # I
1543                    $self->{next_input_character} == 0x0069) { # i                    $self->{next_char} == 0x0069) { # i
1544                  !!!next-input-character;                  !!!next-input-character;
1545                  if ($self->{next_input_character} == 0x0043 or # C                  if ($self->{next_char} == 0x0043 or # C
1546                      $self->{next_input_character} == 0x0063) { # c                      $self->{next_char} == 0x0063) { # c
1547                    $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;                    $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
1548                    !!!next-input-character;                    !!!next-input-character;
1549                    redo A;                    redo A;
# Line 1554  sub _get_next_token ($) { Line 1554  sub _get_next_token ($) {
1554          }          }
1555    
1556          #          #
1557        } elsif ($self->{next_input_character} == 0x0053 or # S        } elsif ($self->{next_char} == 0x0053 or # S
1558                 $self->{next_input_character} == 0x0073) { # s                 $self->{next_char} == 0x0073) { # s
1559          !!!next-input-character;          !!!next-input-character;
1560          if ($self->{next_input_character} == 0x0059 or # Y          if ($self->{next_char} == 0x0059 or # Y
1561              $self->{next_input_character} == 0x0079) { # y              $self->{next_char} == 0x0079) { # y
1562            !!!next-input-character;            !!!next-input-character;
1563            if ($self->{next_input_character} == 0x0053 or # S            if ($self->{next_char} == 0x0053 or # S
1564                $self->{next_input_character} == 0x0073) { # s                $self->{next_char} == 0x0073) { # s
1565              !!!next-input-character;              !!!next-input-character;
1566              if ($self->{next_input_character} == 0x0054 or # T              if ($self->{next_char} == 0x0054 or # T
1567                  $self->{next_input_character} == 0x0074) { # t                  $self->{next_char} == 0x0074) { # t
1568                !!!next-input-character;                !!!next-input-character;
1569                if ($self->{next_input_character} == 0x0045 or # E                if ($self->{next_char} == 0x0045 or # E
1570                    $self->{next_input_character} == 0x0065) { # e                    $self->{next_char} == 0x0065) { # e
1571                  !!!next-input-character;                  !!!next-input-character;
1572                  if ($self->{next_input_character} == 0x004D or # M                  if ($self->{next_char} == 0x004D or # M
1573                      $self->{next_input_character} == 0x006D) { # m                      $self->{next_char} == 0x006D) { # m
1574                    $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;                    $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
1575                    !!!next-input-character;                    !!!next-input-character;
1576                    redo A;                    redo A;
# Line 1596  sub _get_next_token ($) { Line 1596  sub _get_next_token ($) {
1596        if ({        if ({
1597              0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,              0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1598              #0x000D => 1, # HT, LF, VT, FF, SP, CR              #0x000D => 1, # HT, LF, VT, FF, SP, CR
1599            }->{$self->{next_input_character}}) {            }->{$self->{next_char}}) {
1600          ## Stay in the state          ## Stay in the state
1601          !!!next-input-character;          !!!next-input-character;
1602          redo A;          redo A;
1603        } elsif ($self->{next_input_character} eq 0x0022) { # "        } elsif ($self->{next_char} eq 0x0022) { # "
1604          $self->{current_token}->{public_identifier} = ''; # DOCTYPE          $self->{current_token}->{public_identifier} = ''; # DOCTYPE
1605          $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE;          $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE;
1606          !!!next-input-character;          !!!next-input-character;
1607          redo A;          redo A;
1608        } elsif ($self->{next_input_character} eq 0x0027) { # '        } elsif ($self->{next_char} eq 0x0027) { # '
1609          $self->{current_token}->{public_identifier} = ''; # DOCTYPE          $self->{current_token}->{public_identifier} = ''; # DOCTYPE
1610          $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE;          $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE;
1611          !!!next-input-character;          !!!next-input-character;
1612          redo A;          redo A;
1613        } elsif ($self->{next_input_character} eq 0x003E) { # >        } elsif ($self->{next_char} eq 0x003E) { # >
1614          !!!parse-error (type => 'no PUBLIC literal');          !!!parse-error (type => 'no PUBLIC literal');
1615    
1616          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
# Line 1620  sub _get_next_token ($) { Line 1620  sub _get_next_token ($) {
1620          !!!emit ($self->{current_token}); # DOCTYPE          !!!emit ($self->{current_token}); # DOCTYPE
1621    
1622          redo A;          redo A;
1623        } elsif ($self->{next_input_character} == -1) {        } elsif ($self->{next_char} == -1) {
1624          !!!parse-error (type => 'unclosed DOCTYPE');          !!!parse-error (type => 'unclosed DOCTYPE');
1625    
1626          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
# Line 1639  sub _get_next_token ($) { Line 1639  sub _get_next_token ($) {
1639          redo A;          redo A;
1640        }        }
1641      } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE) {      } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE) {
1642        if ($self->{next_input_character} == 0x0022) { # "        if ($self->{next_char} == 0x0022) { # "
1643          $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;          $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
1644          !!!next-input-character;          !!!next-input-character;
1645          redo A;          redo A;
1646        } elsif ($self->{next_input_character} == 0x003E) { # >        } elsif ($self->{next_char} == 0x003E) { # >
1647          !!!parse-error (type => 'unclosed PUBLIC literal');          !!!parse-error (type => 'unclosed PUBLIC literal');
1648    
1649          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
# Line 1653  sub _get_next_token ($) { Line 1653  sub _get_next_token ($) {
1653          !!!emit ($self->{current_token}); # DOCTYPE          !!!emit ($self->{current_token}); # DOCTYPE
1654    
1655          redo A;          redo A;
1656        } elsif ($self->{next_input_character} == -1) {        } elsif ($self->{next_char} == -1) {
1657          !!!parse-error (type => 'unclosed PUBLIC literal');          !!!parse-error (type => 'unclosed PUBLIC literal');
1658    
1659          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
# Line 1665  sub _get_next_token ($) { Line 1665  sub _get_next_token ($) {
1665          redo A;          redo A;
1666        } else {        } else {
1667          $self->{current_token}->{public_identifier} # DOCTYPE          $self->{current_token}->{public_identifier} # DOCTYPE
1668              .= chr $self->{next_input_character};              .= chr $self->{next_char};
1669          ## Stay in the state          ## Stay in the state
1670          !!!next-input-character;          !!!next-input-character;
1671          redo A;          redo A;
1672        }        }
1673      } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE) {      } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE) {
1674        if ($self->{next_input_character} == 0x0027) { # '        if ($self->{next_char} == 0x0027) { # '
1675          $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;          $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
1676          !!!next-input-character;          !!!next-input-character;
1677          redo A;          redo A;
1678        } elsif ($self->{next_input_character} == 0x003E) { # >        } elsif ($self->{next_char} == 0x003E) { # >
1679          !!!parse-error (type => 'unclosed PUBLIC literal');          !!!parse-error (type => 'unclosed PUBLIC literal');
1680    
1681          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
# Line 1685  sub _get_next_token ($) { Line 1685  sub _get_next_token ($) {
1685          !!!emit ($self->{current_token}); # DOCTYPE          !!!emit ($self->{current_token}); # DOCTYPE
1686    
1687          redo A;          redo A;
1688        } elsif ($self->{next_input_character} == -1) {        } elsif ($self->{next_char} == -1) {
1689          !!!parse-error (type => 'unclosed PUBLIC literal');          !!!parse-error (type => 'unclosed PUBLIC literal');
1690    
1691          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
# Line 1697  sub _get_next_token ($) { Line 1697  sub _get_next_token ($) {
1697          redo A;          redo A;
1698        } else {        } else {
1699          $self->{current_token}->{public_identifier} # DOCTYPE          $self->{current_token}->{public_identifier} # DOCTYPE
1700              .= chr $self->{next_input_character};              .= chr $self->{next_char};
1701          ## Stay in the state          ## Stay in the state
1702          !!!next-input-character;          !!!next-input-character;
1703          redo A;          redo A;
# Line 1706  sub _get_next_token ($) { Line 1706  sub _get_next_token ($) {
1706        if ({        if ({
1707              0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,              0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1708              #0x000D => 1, # HT, LF, VT, FF, SP, CR              #0x000D => 1, # HT, LF, VT, FF, SP, CR
1709            }->{$self->{next_input_character}}) {            }->{$self->{next_char}}) {
1710          ## Stay in the state          ## Stay in the state
1711          !!!next-input-character;          !!!next-input-character;
1712          redo A;          redo A;
1713        } elsif ($self->{next_input_character} == 0x0022) { # "        } elsif ($self->{next_char} == 0x0022) { # "
1714          $self->{current_token}->{system_identifier} = ''; # DOCTYPE          $self->{current_token}->{system_identifier} = ''; # DOCTYPE
1715          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
1716          !!!next-input-character;          !!!next-input-character;
1717          redo A;          redo A;
1718        } elsif ($self->{next_input_character} == 0x0027) { # '        } elsif ($self->{next_char} == 0x0027) { # '
1719          $self->{current_token}->{system_identifier} = ''; # DOCTYPE          $self->{current_token}->{system_identifier} = ''; # DOCTYPE
1720          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
1721          !!!next-input-character;          !!!next-input-character;
1722          redo A;          redo A;
1723        } elsif ($self->{next_input_character} == 0x003E) { # >        } elsif ($self->{next_char} == 0x003E) { # >
1724          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1725          !!!next-input-character;          !!!next-input-character;
1726    
1727          !!!emit ($self->{current_token}); # DOCTYPE          !!!emit ($self->{current_token}); # DOCTYPE
1728    
1729          redo A;          redo A;
1730        } elsif ($self->{next_input_character} == -1) {        } elsif ($self->{next_char} == -1) {
1731          !!!parse-error (type => 'unclosed DOCTYPE');          !!!parse-error (type => 'unclosed DOCTYPE');
1732    
1733          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
# Line 1749  sub _get_next_token ($) { Line 1749  sub _get_next_token ($) {
1749        if ({        if ({
1750              0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,              0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1751              #0x000D => 1, # HT, LF, VT, FF, SP, CR              #0x000D => 1, # HT, LF, VT, FF, SP, CR
1752            }->{$self->{next_input_character}}) {            }->{$self->{next_char}}) {
1753          ## Stay in the state          ## Stay in the state
1754          !!!next-input-character;          !!!next-input-character;
1755          redo A;          redo A;
1756        } elsif ($self->{next_input_character} == 0x0022) { # "        } elsif ($self->{next_char} == 0x0022) { # "
1757          $self->{current_token}->{system_identifier} = ''; # DOCTYPE          $self->{current_token}->{system_identifier} = ''; # DOCTYPE
1758          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
1759          !!!next-input-character;          !!!next-input-character;
1760          redo A;          redo A;
1761        } elsif ($self->{next_input_character} == 0x0027) { # '        } elsif ($self->{next_char} == 0x0027) { # '
1762          $self->{current_token}->{system_identifier} = ''; # DOCTYPE          $self->{current_token}->{system_identifier} = ''; # DOCTYPE
1763          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
1764          !!!next-input-character;          !!!next-input-character;
1765          redo A;          redo A;
1766        } elsif ($self->{next_input_character} == 0x003E) { # >        } elsif ($self->{next_char} == 0x003E) { # >
1767          !!!parse-error (type => 'no SYSTEM literal');          !!!parse-error (type => 'no SYSTEM literal');
1768          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1769          !!!next-input-character;          !!!next-input-character;
# Line 1772  sub _get_next_token ($) { Line 1772  sub _get_next_token ($) {
1772          !!!emit ($self->{current_token}); # DOCTYPE          !!!emit ($self->{current_token}); # DOCTYPE
1773    
1774          redo A;          redo A;
1775        } elsif ($self->{next_input_character} == -1) {        } elsif ($self->{next_char} == -1) {
1776          !!!parse-error (type => 'unclosed DOCTYPE');          !!!parse-error (type => 'unclosed DOCTYPE');
1777    
1778          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
# Line 1791  sub _get_next_token ($) { Line 1791  sub _get_next_token ($) {
1791          redo A;          redo A;
1792        }        }
1793      } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE) {      } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE) {
1794        if ($self->{next_input_character} == 0x0022) { # "        if ($self->{next_char} == 0x0022) { # "
1795          $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;          $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
1796          !!!next-input-character;          !!!next-input-character;
1797          redo A;          redo A;
1798        } elsif ($self->{next_input_character} == 0x003E) { # >        } elsif ($self->{next_char} == 0x003E) { # >
1799          !!!parse-error (type => 'unclosed PUBLIC literal');          !!!parse-error (type => 'unclosed PUBLIC literal');
1800    
1801          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
# Line 1805  sub _get_next_token ($) { Line 1805  sub _get_next_token ($) {
1805          !!!emit ($self->{current_token}); # DOCTYPE          !!!emit ($self->{current_token}); # DOCTYPE
1806    
1807          redo A;          redo A;
1808        } elsif ($self->{next_input_character} == -1) {        } elsif ($self->{next_char} == -1) {
1809          !!!parse-error (type => 'unclosed SYSTEM literal');          !!!parse-error (type => 'unclosed SYSTEM literal');
1810    
1811          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
# Line 1817  sub _get_next_token ($) { Line 1817  sub _get_next_token ($) {
1817          redo A;          redo A;
1818        } else {        } else {
1819          $self->{current_token}->{system_identifier} # DOCTYPE          $self->{current_token}->{system_identifier} # DOCTYPE
1820              .= chr $self->{next_input_character};              .= chr $self->{next_char};
1821          ## Stay in the state          ## Stay in the state
1822          !!!next-input-character;          !!!next-input-character;
1823          redo A;          redo A;
1824        }        }
1825      } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE) {      } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE) {
1826        if ($self->{next_input_character} == 0x0027) { # '        if ($self->{next_char} == 0x0027) { # '
1827          $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;          $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
1828          !!!next-input-character;          !!!next-input-character;
1829          redo A;          redo A;
1830        } elsif ($self->{next_input_character} == 0x003E) { # >        } elsif ($self->{next_char} == 0x003E) { # >
1831          !!!parse-error (type => 'unclosed PUBLIC literal');          !!!parse-error (type => 'unclosed PUBLIC literal');
1832    
1833          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
# Line 1837  sub _get_next_token ($) { Line 1837  sub _get_next_token ($) {
1837          !!!emit ($self->{current_token}); # DOCTYPE          !!!emit ($self->{current_token}); # DOCTYPE
1838    
1839          redo A;          redo A;
1840        } elsif ($self->{next_input_character} == -1) {        } elsif ($self->{next_char} == -1) {
1841          !!!parse-error (type => 'unclosed SYSTEM literal');          !!!parse-error (type => 'unclosed SYSTEM literal');
1842    
1843          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
# Line 1849  sub _get_next_token ($) { Line 1849  sub _get_next_token ($) {
1849          redo A;          redo A;
1850        } else {        } else {
1851          $self->{current_token}->{system_identifier} # DOCTYPE          $self->{current_token}->{system_identifier} # DOCTYPE
1852              .= chr $self->{next_input_character};              .= chr $self->{next_char};
1853          ## Stay in the state          ## Stay in the state
1854          !!!next-input-character;          !!!next-input-character;
1855          redo A;          redo A;
# Line 1858  sub _get_next_token ($) { Line 1858  sub _get_next_token ($) {
1858        if ({        if ({
1859              0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,              0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1860              #0x000D => 1, # HT, LF, VT, FF, SP, CR              #0x000D => 1, # HT, LF, VT, FF, SP, CR
1861            }->{$self->{next_input_character}}) {            }->{$self->{next_char}}) {
1862          ## Stay in the state          ## Stay in the state
1863          !!!next-input-character;          !!!next-input-character;
1864          redo A;          redo A;
1865        } elsif ($self->{next_input_character} == 0x003E) { # >        } elsif ($self->{next_char} == 0x003E) { # >
1866          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1867          !!!next-input-character;          !!!next-input-character;
1868    
1869          !!!emit ($self->{current_token}); # DOCTYPE          !!!emit ($self->{current_token}); # DOCTYPE
1870    
1871          redo A;          redo A;
1872        } elsif ($self->{next_input_character} == -1) {        } elsif ($self->{next_char} == -1) {
1873          !!!parse-error (type => 'unclosed DOCTYPE');          !!!parse-error (type => 'unclosed DOCTYPE');
1874    
1875          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
# Line 1888  sub _get_next_token ($) { Line 1888  sub _get_next_token ($) {
1888          redo A;          redo A;
1889        }        }
1890      } elsif ($self->{state} == BOGUS_DOCTYPE_STATE) {      } elsif ($self->{state} == BOGUS_DOCTYPE_STATE) {
1891        if ($self->{next_input_character} == 0x003E) { # >        if ($self->{next_char} == 0x003E) { # >
1892          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1893          !!!next-input-character;          !!!next-input-character;
1894    
1895          !!!emit ($self->{current_token}); # DOCTYPE          !!!emit ($self->{current_token}); # DOCTYPE
1896    
1897          redo A;          redo A;
1898        } elsif ($self->{next_input_character} == -1) {        } elsif ($self->{next_char} == -1) {
1899          !!!parse-error (type => 'unclosed DOCTYPE');          !!!parse-error (type => 'unclosed DOCTYPE');
1900          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1901          ## reconsume          ## reconsume
# Line 1923  sub _tokenize_attempt_to_consume_an_enti Line 1923  sub _tokenize_attempt_to_consume_an_enti
1923         0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF,         0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF,
1924         0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & # 0x000D # CR         0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & # 0x000D # CR
1925         $additional => 1,         $additional => 1,
1926        }->{$self->{next_input_character}}) {        }->{$self->{next_char}}) {
1927      ## Don't consume      ## Don't consume
1928      ## No error      ## No error
1929      return undef;      return undef;
1930    } elsif ($self->{next_input_character} == 0x0023) { # #    } elsif ($self->{next_char} == 0x0023) { # #
1931      !!!next-input-character;      !!!next-input-character;
1932      if ($self->{next_input_character} == 0x0078 or # x      if ($self->{next_char} == 0x0078 or # x
1933          $self->{next_input_character} == 0x0058) { # X          $self->{next_char} == 0x0058) { # X
1934        my $code;        my $code;
1935        X: {        X: {
1936          my $x_char = $self->{next_input_character};          my $x_char = $self->{next_char};
1937          !!!next-input-character;          !!!next-input-character;
1938          if (0x0030 <= $self->{next_input_character} and          if (0x0030 <= $self->{next_char} and
1939              $self->{next_input_character} <= 0x0039) { # 0..9              $self->{next_char} <= 0x0039) { # 0..9
1940            $code ||= 0;            $code ||= 0;
1941            $code *= 0x10;            $code *= 0x10;
1942            $code += $self->{next_input_character} - 0x0030;            $code += $self->{next_char} - 0x0030;
1943            redo X;            redo X;
1944          } elsif (0x0061 <= $self->{next_input_character} and          } elsif (0x0061 <= $self->{next_char} and
1945                   $self->{next_input_character} <= 0x0066) { # a..f                   $self->{next_char} <= 0x0066) { # a..f
1946            $code ||= 0;            $code ||= 0;
1947            $code *= 0x10;            $code *= 0x10;
1948            $code += $self->{next_input_character} - 0x0060 + 9;            $code += $self->{next_char} - 0x0060 + 9;
1949            redo X;            redo X;
1950          } elsif (0x0041 <= $self->{next_input_character} and          } elsif (0x0041 <= $self->{next_char} and
1951                   $self->{next_input_character} <= 0x0046) { # A..F                   $self->{next_char} <= 0x0046) { # A..F
1952            $code ||= 0;            $code ||= 0;
1953            $code *= 0x10;            $code *= 0x10;
1954            $code += $self->{next_input_character} - 0x0040 + 9;            $code += $self->{next_char} - 0x0040 + 9;
1955            redo X;            redo X;
1956          } elsif (not defined $code) { # no hexadecimal digit          } elsif (not defined $code) { # no hexadecimal digit
1957            !!!parse-error (type => 'bare hcro');            !!!parse-error (type => 'bare hcro');
1958            !!!back-next-input-character ($x_char, $self->{next_input_character});            !!!back-next-input-character ($x_char, $self->{next_char});
1959            $self->{next_input_character} = 0x0023; # #            $self->{next_char} = 0x0023; # #
1960            return undef;            return undef;
1961          } elsif ($self->{next_input_character} == 0x003B) { # ;          } elsif ($self->{next_char} == 0x003B) { # ;
1962            !!!next-input-character;            !!!next-input-character;
1963          } else {          } else {
1964            !!!parse-error (type => 'no refc');            !!!parse-error (type => 'no refc');
# Line 1981  sub _tokenize_attempt_to_consume_an_enti Line 1981  sub _tokenize_attempt_to_consume_an_enti
1981          return {type => CHARACTER_TOKEN, data => chr $code,          return {type => CHARACTER_TOKEN, data => chr $code,
1982                  has_reference => 1};                  has_reference => 1};
1983        } # X        } # X
1984      } elsif (0x0030 <= $self->{next_input_character} and      } elsif (0x0030 <= $self->{next_char} and
1985               $self->{next_input_character} <= 0x0039) { # 0..9               $self->{next_char} <= 0x0039) { # 0..9
1986        my $code = $self->{next_input_character} - 0x0030;        my $code = $self->{next_char} - 0x0030;
1987        !!!next-input-character;        !!!next-input-character;
1988                
1989        while (0x0030 <= $self->{next_input_character} and        while (0x0030 <= $self->{next_char} and
1990                  $self->{next_input_character} <= 0x0039) { # 0..9                  $self->{next_char} <= 0x0039) { # 0..9
1991          $code *= 10;          $code *= 10;
1992          $code += $self->{next_input_character} - 0x0030;          $code += $self->{next_char} - 0x0030;
1993                    
1994          !!!next-input-character;          !!!next-input-character;
1995        }        }
1996    
1997        if ($self->{next_input_character} == 0x003B) { # ;        if ($self->{next_char} == 0x003B) { # ;
1998          !!!next-input-character;          !!!next-input-character;
1999        } else {        } else {
2000          !!!parse-error (type => 'no refc');          !!!parse-error (type => 'no refc');
# Line 2017  sub _tokenize_attempt_to_consume_an_enti Line 2017  sub _tokenize_attempt_to_consume_an_enti
2017        return {type => CHARACTER_TOKEN, data => chr $code, has_reference => 1};        return {type => CHARACTER_TOKEN, data => chr $code, has_reference => 1};
2018      } else {      } else {
2019        !!!parse-error (type => 'bare nero');        !!!parse-error (type => 'bare nero');
2020        !!!back-next-input-character ($self->{next_input_character});        !!!back-next-input-character ($self->{next_char});
2021        $self->{next_input_character} = 0x0023; # #        $self->{next_char} = 0x0023; # #
2022        return undef;        return undef;
2023      }      }
2024    } elsif ((0x0041 <= $self->{next_input_character} and    } elsif ((0x0041 <= $self->{next_char} and
2025              $self->{next_input_character} <= 0x005A) or              $self->{next_char} <= 0x005A) or
2026             (0x0061 <= $self->{next_input_character} and             (0x0061 <= $self->{next_char} and
2027              $self->{next_input_character} <= 0x007A)) {              $self->{next_char} <= 0x007A)) {
2028      my $entity_name = chr $self->{next_input_character};      my $entity_name = chr $self->{next_char};
2029      !!!next-input-character;      !!!next-input-character;
2030    
2031      my $value = $entity_name;      my $value = $entity_name;
# Line 2035  sub _tokenize_attempt_to_consume_an_enti Line 2035  sub _tokenize_attempt_to_consume_an_enti
2035    
2036      while (length $entity_name < 10 and      while (length $entity_name < 10 and
2037             ## NOTE: Some number greater than the maximum length of entity name             ## NOTE: Some number greater than the maximum length of entity name
2038             ((0x0041 <= $self->{next_input_character} and # a             ((0x0041 <= $self->{next_char} and # a
2039               $self->{next_input_character} <= 0x005A) or # x               $self->{next_char} <= 0x005A) or # x
2040              (0x0061 <= $self->{next_input_character} and # a              (0x0061 <= $self->{next_char} and # a
2041               $self->{next_input_character} <= 0x007A) or # z               $self->{next_char} <= 0x007A) or # z
2042              (0x0030 <= $self->{next_input_character} and # 0              (0x0030 <= $self->{next_char} and # 0
2043               $self->{next_input_character} <= 0x0039) or # 9               $self->{next_char} <= 0x0039) or # 9
2044              $self->{next_input_character} == 0x003B)) { # ;              $self->{next_char} == 0x003B)) { # ;
2045        $entity_name .= chr $self->{next_input_character};        $entity_name .= chr $self->{next_char};
2046        if (defined $EntityChar->{$entity_name}) {        if (defined $EntityChar->{$entity_name}) {
2047          if ($self->{next_input_character} == 0x003B) { # ;          if ($self->{next_char} == 0x003B) { # ;
2048            $value = $EntityChar->{$entity_name};            $value = $EntityChar->{$entity_name};
2049            $match = 1;            $match = 1;
2050            !!!next-input-character;            !!!next-input-character;
# Line 2055  sub _tokenize_attempt_to_consume_an_enti Line 2055  sub _tokenize_attempt_to_consume_an_enti
2055            !!!next-input-character;            !!!next-input-character;
2056          }          }
2057        } else {        } else {
2058          $value .= chr $self->{next_input_character};          $value .= chr $self->{next_char};
2059          $match *= 2;          $match *= 2;
2060          !!!next-input-character;          !!!next-input-character;
2061        }        }
# Line 5516  sub set_inner_html ($$$) { Line 5516  sub set_inner_html ($$$) {
5516      my $i = 0;      my $i = 0;
5517      my $line = 1;      my $line = 1;
5518      my $column = 0;      my $column = 0;
5519      $p->{set_next_input_character} = sub {      $p->{set_next_char} = sub {
5520        my $self = shift;        my $self = shift;
5521    
5522        pop @{$self->{prev_input_character}};        pop @{$self->{prev_char}};
5523        unshift @{$self->{prev_input_character}}, $self->{next_input_character};        unshift @{$self->{prev_char}}, $self->{next_char};
5524    
5525        $self->{next_input_character} = -1 and return if $i >= length $$s;        $self->{next_char} = -1 and return if $i >= length $$s;
5526        $self->{next_input_character} = ord substr $$s, $i++, 1;        $self->{next_char} = ord substr $$s, $i++, 1;
5527        $column++;        $column++;
5528    
5529        if ($self->{next_input_character} == 0x000A) { # LF        if ($self->{next_char} == 0x000A) { # LF
5530          $line++;          $line++;
5531          $column = 0;          $column = 0;
5532        } elsif ($self->{next_input_character} == 0x000D) { # CR        } elsif ($self->{next_char} == 0x000D) { # CR
5533          $i++ if substr ($$s, $i, 1) eq "\x0A";          $i++ if substr ($$s, $i, 1) eq "\x0A";
5534          $self->{next_input_character} = 0x000A; # LF # MUST          $self->{next_char} = 0x000A; # LF # MUST
5535          $line++;          $line++;
5536          $column = 0;          $column = 0;
5537        } elsif ($self->{next_input_character} > 0x10FFFF) {        } elsif ($self->{next_char} > 0x10FFFF) {
5538          $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST          $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
5539        } elsif ($self->{next_input_character} == 0x0000) { # NULL        } elsif ($self->{next_char} == 0x0000) { # NULL
5540          !!!parse-error (type => 'NULL');          !!!parse-error (type => 'NULL');
5541          $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST          $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
5542        }        }
5543      };      };
5544      $p->{prev_input_character} = [-1, -1, -1];      $p->{prev_char} = [-1, -1, -1];
5545      $p->{next_input_character} = -1;      $p->{next_char} = -1;
5546            
5547      my $ponerror = $onerror || sub {      my $ponerror = $onerror || sub {
5548        my (%opt) = @_;        my (%opt) = @_;

Legend:
Removed from v.1.75  
changed lines
  Added in v.1.76

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24