/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm.src
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm.src

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1 by wakaba, Tue Oct 14 02:27:58 2008 UTC revision 1.3 by wakaba, Tue Oct 14 05:34:05 2008 UTC
# Line 2  package Whatpm::HTML::Tokenizer; Line 2  package Whatpm::HTML::Tokenizer;
2  use strict;  use strict;
3  our $VERSION=do{my @r=(q$Revision$=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};  our $VERSION=do{my @r=(q$Revision$=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4    
5    BEGIN {
6      require Exporter;
7      push our @ISA, 'Exporter';
8    
9      our @EXPORT_OK = qw(
10        DOCTYPE_TOKEN
11        COMMENT_TOKEN
12        START_TAG_TOKEN
13        END_TAG_TOKEN
14        END_OF_FILE_TOKEN
15        CHARACTER_TOKEN
16        PI_TOKEN
17        ABORT_TOKEN
18      );
19      
20      our %EXPORT_TAGS = (
21        token => [qw(
22          DOCTYPE_TOKEN
23          COMMENT_TOKEN
24          START_TAG_TOKEN
25          END_TAG_TOKEN
26          END_OF_FILE_TOKEN
27          CHARACTER_TOKEN
28          PI_TOKEN
29          ABORT_TOKEN
30        )],
31      );
32    }
33    
34    ## Token types
35    
36    sub DOCTYPE_TOKEN () { 1 }
37    sub COMMENT_TOKEN () { 2 }
38    sub START_TAG_TOKEN () { 3 }
39    sub END_TAG_TOKEN () { 4 }
40    sub END_OF_FILE_TOKEN () { 5 }
41    sub CHARACTER_TOKEN () { 6 }
42    sub PI_TOKEN () { 7 } # XML5
43    sub ABORT_TOKEN () { 8 } # Not a token actually
44    
45  package Whatpm::HTML;  package Whatpm::HTML;
46    
47    BEGIN { Whatpm::HTML::Tokenizer->import (':token') }
48    
49  ## Content model flags  ## Content model flags
50    
51  sub CM_ENTITY () { 0b001 } # & markup in data  sub CM_ENTITY () { 0b001 } # & markup in data
# Line 72  sub HEXREF_HEX_STATE () { 48 } Line 114  sub HEXREF_HEX_STATE () { 48 }
114  sub ENTITY_NAME_STATE () { 49 }  sub ENTITY_NAME_STATE () { 49 }
115  sub PCDATA_STATE () { 50 } # "data state" in the spec  sub PCDATA_STATE () { 50 } # "data state" in the spec
116    
 ## Token types  
   
 sub DOCTYPE_TOKEN () { 1 }  
 sub COMMENT_TOKEN () { 2 }  
 sub START_TAG_TOKEN () { 3 }  
 sub END_TAG_TOKEN () { 4 }  
 sub END_OF_FILE_TOKEN () { 5 }  
 sub CHARACTER_TOKEN () { 6 }  
   
117  ## Tree constructor state constants (see Whatpm::HTML for the full  ## Tree constructor state constants (see Whatpm::HTML for the full
118  ## list and descriptions)  ## list and descriptions)
119    
# Line 142  sub _initialize_tokenizer ($) { Line 175  sub _initialize_tokenizer ($) {
175    #$self->{level}    #$self->{level}
176    #$self->{set_nc}    #$self->{set_nc}
177    #$self->{parse_error}    #$self->{parse_error}
178      #$self->{is_xml} (if XML)
179    
180    $self->{state} = DATA_STATE; # MUST    $self->{state} = DATA_STATE; # MUST
181    #$self->{s_kwd}; # state keyword - initialized when used    #$self->{s_kwd}; # state keyword - initialized when used
# Line 1357  sub _get_next_token ($) { Line 1391  sub _get_next_token ($) {
1391          $self->{s_kwd} = chr $self->{nc};          $self->{s_kwd} = chr $self->{nc};
1392          !!!next-input-character;          !!!next-input-character;
1393          redo A;          redo A;
1394        } elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and        } elsif ((($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and
1395                 $self->{open_elements}->[-1]->[1] & FOREIGN_EL and                   $self->{open_elements}->[-1]->[1] & FOREIGN_EL) or
1396                    $self->{is_xml}) and
1397                 $self->{nc} == 0x005B) { # [                 $self->{nc} == 0x005B) { # [
1398          !!!cp (135.4);                          !!!cp (135.4);                
1399          $self->{state} = MD_CDATA_STATE;          $self->{state} = MD_CDATA_STATE;

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.3

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24