/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Diff of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.62 by wakaba, Sun Nov 11 04:59:35 2007 UTC revision 1.63 by wakaba, Sun Nov 11 06:54:36 2007 UTC
# Line 1  Line 1 
1  package Whatpm::HTML;  package Whatpm::HTML;
2  use strict;  use strict;
3  our $VERSION=do{my @r=(q$Revision$=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};  our $VERSION=do{my @r=(q$Revision$=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4    use Error qw(:try);
5    
6  ## ISSUE:  ## ISSUE:
7  ## var doc = implementation.createDocument (null, null, null);  ## var doc = implementation.createDocument (null, null, null);
# Line 84  my $formatting_category = { Line 85  my $formatting_category = {
85  };  };
86  # $phrasing_category: all other elements  # $phrasing_category: all other elements
87    
88    sub parse_byte_string ($$$$;$) {
89      my $self = ref $_[0] ? shift : shift->new;
90      my $charset = shift;
91      my $bytes_s = ref $_[0] ? $_[0] : \($_[0]);
92      my $s;
93      
94      if (defined $charset) {
95        require Encode;
96        $s = \ (Encode::decode ($charset, $$bytes_s));
97        $self->{input_encoding} = lc $charset; ## TODO: normalize name ## TODO: set $doc->input_encoding
98        $self->{confident} = 1;
99      } else {
100        $s = ref $_[0] ? $_[0] : \($_[0]);
101        $self->{confident} = 0;
102      }
103    
104      $self->{change_encoding} = sub {
105        my $self = shift;
106        my $charset = lc shift;
107        ## TODO: if $charset is supported
108        ## TODO: normalize charset name
109    
110        ## "Change the encoding" algorithm:
111    
112        ## Step 1    
113        if ($charset eq 'utf-16') { ## ISSUE: UTF-16BE -> UTF-8? UTF-16LE -> UTF-8?
114          $charset = 'utf-8';
115        }
116    
117        ## Step 2
118        if (defined $self->{input_encoding} and
119            $self->{input_encoding} eq $charset) {
120          $self->{confident} = 1;
121          return;
122        }
123    
124        !!!parse-error (type => 'charset label detected', level => 'w');
125    
126        ## Step 3
127        # if (can) {
128          ## change the encoding on the fly.
129          #$self->{confident} = 1;
130          #return;
131        # }
132    
133        ## Step 4
134        throw Whatpm::HTML::RestartParser (charset => $charset);
135      }; # $self->{change_encoding}
136    
137      my @args = @_; shift @args; # $s
138      my $return;
139      try {
140        $return = $self->parse_char_string ($s, @args);  
141      } catch Whatpm::HTML::RestartParser with {
142        my $charset = shift->{charset};
143        $s = \ (Encode::decode ($charset, $$bytes_s));    
144        $self->{input_encoding} = $charset; ## TODO: $doc->input_encoding;
145        $self->{confident} = 1;
146        $return = $self->parse_char_string ($s, @args);
147      };
148      return $return;
149    } # parse_byte_string
150    
151    *parse_char_string = \&parse_string;
152    
153  sub parse_string ($$$;$) {  sub parse_string ($$$;$) {
154    my $self = shift->new;    my $self = ref $_[0] ? shift : shift->new;
155    my $s = \$_[0];    my $s = ref $_[0] ? $_[0] : \($_[0]);
156    $self->{document} = $_[1];    $self->{document} = $_[1];
157      @{$self->{document}->child_nodes} = ();
158    
159    ## NOTE: |set_inner_html| copies most of this method's code    ## NOTE: |set_inner_html| copies most of this method's code
160    
161      $self->{confident} = 1 unless exists $self->{confident};
162    
163    my $i = 0;    my $i = 0;
164    my $line = 1;    my $line = 1;
165    my $column = 0;    my $column = 0;
# Line 147  sub new ($) { Line 216  sub new ($) {
216    $self->{parse_error} = sub {    $self->{parse_error} = sub {
217      #      #
218    };    };
219      $self->{change_encoding} = sub {
220        # if ($_[0] is a supported encoding) {
221        #   run "change the encoding" algorithm;
222        #   throw Whatpm::HTML::RestartParser (charset => $new_encoding);
223        # }
224      };
225    $self->{application_cache_selection} = sub {    $self->{application_cache_selection} = sub {
226      #      #
227    };    };
# Line 2785  sub _tree_construction_main ($) { Line 2860  sub _tree_construction_main ($) {
2860                pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.                pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
2861    
2862                unless ($self->{confident}) {                unless ($self->{confident}) {
                 my $charset;  
2863                  if ($token->{attributes}->{charset}) { ## TODO: And if supported                  if ($token->{attributes}->{charset}) { ## TODO: And if supported
2864                    $charset = $token->{attributes}->{charset}->{value};                    $self->{change_encoding}
2865                  }                        ->($self, $token->{attributes}->{charset}->{value});
2866                  if ($token->{attributes}->{'http-equiv'}) {                  } elsif ($token->{attributes}->{content}) {
2867                    ## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition.                    ## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition.
2868                    if ($token->{attributes}->{'http-equiv'}->{value}                    if ($token->{attributes}->{content}->{value}
2869                        =~ /\A[^;]*;[\x09-\x0D\x20]*charset[\x09-\x0D\x20]*=                        =~ /\A[^;]*;[\x09-\x0D\x20]*charset[\x09-\x0D\x20]*=
2870                            [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|                            [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
2871                            ([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) {                            ([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) {
2872                      $charset = defined $1 ? $1 : defined $2 ? $2 : $3;                      $self->{change_encoding}
2873                    } ## TODO: And if supported                          ->($self, defined $1 ? $1 : defined $2 ? $2 : $3);
2874                      }
2875                  }                  }
                 ## TODO: Change the encoding  
2876                }                }
2877    
               ## TODO: Extracting |charset| from |meta|.  
2878                pop @{$self->{open_elements}}                pop @{$self->{open_elements}}
2879                    if $self->{insertion_mode} == AFTER_HEAD_IM;                    if $self->{insertion_mode} == AFTER_HEAD_IM;
2880                !!!next-token;                !!!next-token;
# Line 4375  sub _tree_construction_main ($) { Line 4448  sub _tree_construction_main ($) {
4448          pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.          pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
4449    
4450          unless ($self->{confident}) {          unless ($self->{confident}) {
           my $charset;  
4451            if ($token->{attributes}->{charset}) { ## TODO: And if supported            if ($token->{attributes}->{charset}) { ## TODO: And if supported
4452              $charset = $token->{attributes}->{charset}->{value};              $self->{change_encoding}
4453            }                  ->($self, $token->{attributes}->{charset}->{value});
4454            if ($token->{attributes}->{'http-equiv'}) {            } elsif ($token->{attributes}->{content}) {
4455              ## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition.              ## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition.
4456              if ($token->{attributes}->{'http-equiv'}->{value}              if ($token->{attributes}->{content}->{value}
4457                  =~ /\A[^;]*;[\x09-\x0D\x20]*charset[\x09-\x0D\x20]*=                  =~ /\A[^;]*;[\x09-\x0D\x20]*charset[\x09-\x0D\x20]*=
4458                      [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|                      [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
4459                      ([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) {                      ([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) {
4460                $charset = defined $1 ? $1 : defined $2 ? $2 : $3;                $self->{change_encoding}
4461              } ## TODO: And if supported                    ->($self, defined $1 ? $1 : defined $2 ? $2 : $3);
4462                }
4463            }            }
           ## TODO: Change the encoding  
4464          }          }
4465    
4466          !!!next-token;          !!!next-token;
# Line 5214  sub set_inner_html ($$$) { Line 5286  sub set_inner_html ($$$) {
5286    my $s = \$_[0];    my $s = \$_[0];
5287    my $onerror = $_[1];    my $onerror = $_[1];
5288    
5289      ## ISSUE: Should {confident} be true?
5290    
5291    my $nt = $node->node_type;    my $nt = $node->node_type;
5292    if ($nt == 9) {    if ($nt == 9) {
5293      # MUST      # MUST
# Line 5366  sub set_inner_html ($$$) { Line 5440  sub set_inner_html ($$$) {
5440    
5441  } # tree construction stage  } # tree construction stage
5442    
5443    package Whatpm::HTML::RestartParser;
5444    push our @ISA, 'Error';
5445    
5446  1;  1;
5447  # $Date$  # $Date$

Legend:
Removed from v.1.62  
changed lines
  Added in v.1.63

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24