/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Diff of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.160 by wakaba, Wed Sep 10 10:27:08 2008 UTC revision 1.161 by wakaba, Wed Sep 10 10:46:50 2008 UTC
# Line 381  sub parse_byte_stream ($$$$;$) { Line 381  sub parse_byte_stream ($$$$;$) {
381    
382      ## Step 1      ## Step 1
383      if (defined $charset_name) {      if (defined $charset_name) {
384        $charset = Message::Charset::Info->get_by_iana_name ($charset_name);        $charset = Message::Charset::Info->get_by_html_name ($charset_name);
385              ## TODO: Is this ok?  Transfer protocol's parameter should be
386              ## interpreted in its semantics?
387    
388        ## ISSUE: Unsupported encoding is not ignored according to the spec.        ## ISSUE: Unsupported encoding is not ignored according to the spec.
389        ($char_stream, $e_status) = $charset->get_decode_handle        ($char_stream, $e_status) = $charset->get_decode_handle
# Line 405  sub parse_byte_stream ($$$$;$) { Line 407  sub parse_byte_stream ($$$$;$) {
407    
408      ## Step 3      ## Step 3
409      if ($byte_buffer =~ /^\xFE\xFF/) {      if ($byte_buffer =~ /^\xFE\xFF/) {
410        $charset = Message::Charset::Info->get_by_iana_name ('utf-16be');        $charset = Message::Charset::Info->get_by_html_name ('utf-16be');
411        ($char_stream, $e_status) = $charset->get_decode_handle        ($char_stream, $e_status) = $charset->get_decode_handle
412            ($byte_stream, allow_error_reporting => 1,            ($byte_stream, allow_error_reporting => 1,
413             allow_fallback => 1, byte_buffer => \$byte_buffer);             allow_fallback => 1, byte_buffer => \$byte_buffer);
414        $self->{confident} = 1;        $self->{confident} = 1;
415        last SNIFFING;        last SNIFFING;
416      } elsif ($byte_buffer =~ /^\xFF\xFE/) {      } elsif ($byte_buffer =~ /^\xFF\xFE/) {
417        $charset = Message::Charset::Info->get_by_iana_name ('utf-16le');        $charset = Message::Charset::Info->get_by_html_name ('utf-16le');
418        ($char_stream, $e_status) = $charset->get_decode_handle        ($char_stream, $e_status) = $charset->get_decode_handle
419            ($byte_stream, allow_error_reporting => 1,            ($byte_stream, allow_error_reporting => 1,
420             allow_fallback => 1, byte_buffer => \$byte_buffer);             allow_fallback => 1, byte_buffer => \$byte_buffer);
421        $self->{confident} = 1;        $self->{confident} = 1;
422        last SNIFFING;        last SNIFFING;
423      } elsif ($byte_buffer =~ /^\xEF\xBB\xBF/) {      } elsif ($byte_buffer =~ /^\xEF\xBB\xBF/) {
424        $charset = Message::Charset::Info->get_by_iana_name ('utf-8');        $charset = Message::Charset::Info->get_by_html_name ('utf-8');
425        ($char_stream, $e_status) = $charset->get_decode_handle        ($char_stream, $e_status) = $charset->get_decode_handle
426            ($byte_stream, allow_error_reporting => 1,            ($byte_stream, allow_error_reporting => 1,
427             allow_fallback => 1, byte_buffer => \$byte_buffer);             allow_fallback => 1, byte_buffer => \$byte_buffer);
# Line 438  sub parse_byte_stream ($$$$;$) { Line 440  sub parse_byte_stream ($$$$;$) {
440      $charset_name = Whatpm::Charset::UniversalCharDet->detect_byte_string      $charset_name = Whatpm::Charset::UniversalCharDet->detect_byte_string
441          ($byte_buffer);          ($byte_buffer);
442      if (defined $charset_name) {      if (defined $charset_name) {
443        $charset = Message::Charset::Info->get_by_iana_name ($charset_name);        $charset = Message::Charset::Info->get_by_html_name ($charset_name);
444    
445        ## ISSUE: Unsupported encoding is not ignored according to the spec.        ## ISSUE: Unsupported encoding is not ignored according to the spec.
446        require Whatpm::Charset::DecodeHandle;        require Whatpm::Charset::DecodeHandle;
# Line 461  sub parse_byte_stream ($$$$;$) { Line 463  sub parse_byte_stream ($$$$;$) {
463    
464      ## Step 7: default      ## Step 7: default
465      ## TODO: Make this configurable.      ## TODO: Make this configurable.
466      $charset = Message::Charset::Info->get_by_iana_name ('windows-1252');      $charset = Message::Charset::Info->get_by_html_name ('windows-1252');
467          ## NOTE: We choose |windows-1252| here, since |utf-8| should be          ## NOTE: We choose |windows-1252| here, since |utf-8| should be
468          ## detectable in the step 6.          ## detectable in the step 6.
469      require Whatpm::Charset::DecodeHandle;      require Whatpm::Charset::DecodeHandle;
# Line 505  sub parse_byte_stream ($$$$;$) { Line 507  sub parse_byte_stream ($$$$;$) {
507      $charset_name = shift;      $charset_name = shift;
508      my $token = shift;      my $token = shift;
509    
510      $charset = Message::Charset::Info->get_by_iana_name ($charset_name);      $charset = Message::Charset::Info->get_by_html_name ($charset_name);
511      ($char_stream, $e_status) = $charset->get_decode_handle      ($char_stream, $e_status) = $charset->get_decode_handle
512          ($byte_stream, allow_error_reporting => 1, allow_fallback => 1,          ($byte_stream, allow_error_reporting => 1, allow_fallback => 1,
513           byte_buffer => \ $buffer->{buffer});           byte_buffer => \ $buffer->{buffer});
# Line 516  sub parse_byte_stream ($$$$;$) { Line 518  sub parse_byte_stream ($$$$;$) {
518        ## Step 1            ## Step 1    
519        if ($charset->{category} &        if ($charset->{category} &
520            Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) {            Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) {
521          $charset = Message::Charset::Info->get_by_iana_name ('utf-8');          $charset = Message::Charset::Info->get_by_html_name ('utf-8');
522          ($char_stream, $e_status) = $charset->get_decode_handle          ($char_stream, $e_status) = $charset->get_decode_handle
523              ($byte_stream,              ($byte_stream,
524               byte_buffer => \ $buffer->{buffer});               byte_buffer => \ $buffer->{buffer});

Legend:
Removed from v.1.160  
changed lines
  Added in v.1.161

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24