381 |
|
|
382 |
## Step 1 |
## Step 1 |
383 |
if (defined $charset_name) { |
if (defined $charset_name) { |
384 |
$charset = Message::Charset::Info->get_by_iana_name ($charset_name); |
$charset = Message::Charset::Info->get_by_html_name ($charset_name); |
385 |
|
## TODO: Is this ok? Transfer protocol's parameter should be |
386 |
|
## interpreted in its semantics? |
387 |
|
|
388 |
## ISSUE: Unsupported encoding is not ignored according to the spec. |
## ISSUE: Unsupported encoding is not ignored according to the spec. |
389 |
($char_stream, $e_status) = $charset->get_decode_handle |
($char_stream, $e_status) = $charset->get_decode_handle |
407 |
|
|
408 |
## Step 3 |
## Step 3 |
409 |
if ($byte_buffer =~ /^\xFE\xFF/) { |
if ($byte_buffer =~ /^\xFE\xFF/) { |
410 |
$charset = Message::Charset::Info->get_by_iana_name ('utf-16be'); |
$charset = Message::Charset::Info->get_by_html_name ('utf-16be'); |
411 |
($char_stream, $e_status) = $charset->get_decode_handle |
($char_stream, $e_status) = $charset->get_decode_handle |
412 |
($byte_stream, allow_error_reporting => 1, |
($byte_stream, allow_error_reporting => 1, |
413 |
allow_fallback => 1, byte_buffer => \$byte_buffer); |
allow_fallback => 1, byte_buffer => \$byte_buffer); |
414 |
$self->{confident} = 1; |
$self->{confident} = 1; |
415 |
last SNIFFING; |
last SNIFFING; |
416 |
} elsif ($byte_buffer =~ /^\xFF\xFE/) { |
} elsif ($byte_buffer =~ /^\xFF\xFE/) { |
417 |
$charset = Message::Charset::Info->get_by_iana_name ('utf-16le'); |
$charset = Message::Charset::Info->get_by_html_name ('utf-16le'); |
418 |
($char_stream, $e_status) = $charset->get_decode_handle |
($char_stream, $e_status) = $charset->get_decode_handle |
419 |
($byte_stream, allow_error_reporting => 1, |
($byte_stream, allow_error_reporting => 1, |
420 |
allow_fallback => 1, byte_buffer => \$byte_buffer); |
allow_fallback => 1, byte_buffer => \$byte_buffer); |
421 |
$self->{confident} = 1; |
$self->{confident} = 1; |
422 |
last SNIFFING; |
last SNIFFING; |
423 |
} elsif ($byte_buffer =~ /^\xEF\xBB\xBF/) { |
} elsif ($byte_buffer =~ /^\xEF\xBB\xBF/) { |
424 |
$charset = Message::Charset::Info->get_by_iana_name ('utf-8'); |
$charset = Message::Charset::Info->get_by_html_name ('utf-8'); |
425 |
($char_stream, $e_status) = $charset->get_decode_handle |
($char_stream, $e_status) = $charset->get_decode_handle |
426 |
($byte_stream, allow_error_reporting => 1, |
($byte_stream, allow_error_reporting => 1, |
427 |
allow_fallback => 1, byte_buffer => \$byte_buffer); |
allow_fallback => 1, byte_buffer => \$byte_buffer); |
440 |
$charset_name = Whatpm::Charset::UniversalCharDet->detect_byte_string |
$charset_name = Whatpm::Charset::UniversalCharDet->detect_byte_string |
441 |
($byte_buffer); |
($byte_buffer); |
442 |
if (defined $charset_name) { |
if (defined $charset_name) { |
443 |
$charset = Message::Charset::Info->get_by_iana_name ($charset_name); |
$charset = Message::Charset::Info->get_by_html_name ($charset_name); |
444 |
|
|
445 |
## ISSUE: Unsupported encoding is not ignored according to the spec. |
## ISSUE: Unsupported encoding is not ignored according to the spec. |
446 |
require Whatpm::Charset::DecodeHandle; |
require Whatpm::Charset::DecodeHandle; |
463 |
|
|
464 |
## Step 7: default |
## Step 7: default |
465 |
## TODO: Make this configurable. |
## TODO: Make this configurable. |
466 |
$charset = Message::Charset::Info->get_by_iana_name ('windows-1252'); |
$charset = Message::Charset::Info->get_by_html_name ('windows-1252'); |
467 |
## NOTE: We choose |windows-1252| here, since |utf-8| should be |
## NOTE: We choose |windows-1252| here, since |utf-8| should be |
468 |
## detectable in the step 6. |
## detectable in the step 6. |
469 |
require Whatpm::Charset::DecodeHandle; |
require Whatpm::Charset::DecodeHandle; |
507 |
$charset_name = shift; |
$charset_name = shift; |
508 |
my $token = shift; |
my $token = shift; |
509 |
|
|
510 |
$charset = Message::Charset::Info->get_by_iana_name ($charset_name); |
$charset = Message::Charset::Info->get_by_html_name ($charset_name); |
511 |
($char_stream, $e_status) = $charset->get_decode_handle |
($char_stream, $e_status) = $charset->get_decode_handle |
512 |
($byte_stream, allow_error_reporting => 1, allow_fallback => 1, |
($byte_stream, allow_error_reporting => 1, allow_fallback => 1, |
513 |
byte_buffer => \ $buffer->{buffer}); |
byte_buffer => \ $buffer->{buffer}); |
518 |
## Step 1 |
## Step 1 |
519 |
if ($charset->{category} & |
if ($charset->{category} & |
520 |
Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) { |
Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) { |
521 |
$charset = Message::Charset::Info->get_by_iana_name ('utf-8'); |
$charset = Message::Charset::Info->get_by_html_name ('utf-8'); |
522 |
($char_stream, $e_status) = $charset->get_decode_handle |
($char_stream, $e_status) = $charset->get_decode_handle |
523 |
($byte_stream, |
($byte_stream, |
524 |
byte_buffer => \ $buffer->{buffer}); |
byte_buffer => \ $buffer->{buffer}); |