| 381 |
|
|
| 382 |
## Step 1 |
## Step 1 |
| 383 |
if (defined $charset_name) { |
if (defined $charset_name) { |
| 384 |
$charset = Message::Charset::Info->get_by_iana_name ($charset_name); |
$charset = Message::Charset::Info->get_by_html_name ($charset_name); |
| 385 |
|
## TODO: Is this ok? Transfer protocol's parameter should be |
| 386 |
|
## interpreted in its semantics? |
| 387 |
|
|
| 388 |
## ISSUE: Unsupported encoding is not ignored according to the spec. |
## ISSUE: Unsupported encoding is not ignored according to the spec. |
| 389 |
($char_stream, $e_status) = $charset->get_decode_handle |
($char_stream, $e_status) = $charset->get_decode_handle |
| 407 |
|
|
| 408 |
## Step 3 |
## Step 3 |
| 409 |
if ($byte_buffer =~ /^\xFE\xFF/) { |
if ($byte_buffer =~ /^\xFE\xFF/) { |
| 410 |
$charset = Message::Charset::Info->get_by_iana_name ('utf-16be'); |
$charset = Message::Charset::Info->get_by_html_name ('utf-16be'); |
| 411 |
($char_stream, $e_status) = $charset->get_decode_handle |
($char_stream, $e_status) = $charset->get_decode_handle |
| 412 |
($byte_stream, allow_error_reporting => 1, |
($byte_stream, allow_error_reporting => 1, |
| 413 |
allow_fallback => 1, byte_buffer => \$byte_buffer); |
allow_fallback => 1, byte_buffer => \$byte_buffer); |
| 414 |
$self->{confident} = 1; |
$self->{confident} = 1; |
| 415 |
last SNIFFING; |
last SNIFFING; |
| 416 |
} elsif ($byte_buffer =~ /^\xFF\xFE/) { |
} elsif ($byte_buffer =~ /^\xFF\xFE/) { |
| 417 |
$charset = Message::Charset::Info->get_by_iana_name ('utf-16le'); |
$charset = Message::Charset::Info->get_by_html_name ('utf-16le'); |
| 418 |
($char_stream, $e_status) = $charset->get_decode_handle |
($char_stream, $e_status) = $charset->get_decode_handle |
| 419 |
($byte_stream, allow_error_reporting => 1, |
($byte_stream, allow_error_reporting => 1, |
| 420 |
allow_fallback => 1, byte_buffer => \$byte_buffer); |
allow_fallback => 1, byte_buffer => \$byte_buffer); |
| 421 |
$self->{confident} = 1; |
$self->{confident} = 1; |
| 422 |
last SNIFFING; |
last SNIFFING; |
| 423 |
} elsif ($byte_buffer =~ /^\xEF\xBB\xBF/) { |
} elsif ($byte_buffer =~ /^\xEF\xBB\xBF/) { |
| 424 |
$charset = Message::Charset::Info->get_by_iana_name ('utf-8'); |
$charset = Message::Charset::Info->get_by_html_name ('utf-8'); |
| 425 |
($char_stream, $e_status) = $charset->get_decode_handle |
($char_stream, $e_status) = $charset->get_decode_handle |
| 426 |
($byte_stream, allow_error_reporting => 1, |
($byte_stream, allow_error_reporting => 1, |
| 427 |
allow_fallback => 1, byte_buffer => \$byte_buffer); |
allow_fallback => 1, byte_buffer => \$byte_buffer); |
| 440 |
$charset_name = Whatpm::Charset::UniversalCharDet->detect_byte_string |
$charset_name = Whatpm::Charset::UniversalCharDet->detect_byte_string |
| 441 |
($byte_buffer); |
($byte_buffer); |
| 442 |
if (defined $charset_name) { |
if (defined $charset_name) { |
| 443 |
$charset = Message::Charset::Info->get_by_iana_name ($charset_name); |
$charset = Message::Charset::Info->get_by_html_name ($charset_name); |
| 444 |
|
|
| 445 |
## ISSUE: Unsupported encoding is not ignored according to the spec. |
## ISSUE: Unsupported encoding is not ignored according to the spec. |
| 446 |
require Whatpm::Charset::DecodeHandle; |
require Whatpm::Charset::DecodeHandle; |
| 463 |
|
|
| 464 |
## Step 7: default |
## Step 7: default |
| 465 |
## TODO: Make this configurable. |
## TODO: Make this configurable. |
| 466 |
$charset = Message::Charset::Info->get_by_iana_name ('windows-1252'); |
$charset = Message::Charset::Info->get_by_html_name ('windows-1252'); |
| 467 |
## NOTE: We choose |windows-1252| here, since |utf-8| should be |
## NOTE: We choose |windows-1252| here, since |utf-8| should be |
| 468 |
## detectable in the step 6. |
## detectable in the step 6. |
| 469 |
require Whatpm::Charset::DecodeHandle; |
require Whatpm::Charset::DecodeHandle; |
| 507 |
$charset_name = shift; |
$charset_name = shift; |
| 508 |
my $token = shift; |
my $token = shift; |
| 509 |
|
|
| 510 |
$charset = Message::Charset::Info->get_by_iana_name ($charset_name); |
$charset = Message::Charset::Info->get_by_html_name ($charset_name); |
| 511 |
($char_stream, $e_status) = $charset->get_decode_handle |
($char_stream, $e_status) = $charset->get_decode_handle |
| 512 |
($byte_stream, allow_error_reporting => 1, allow_fallback => 1, |
($byte_stream, allow_error_reporting => 1, allow_fallback => 1, |
| 513 |
byte_buffer => \ $buffer->{buffer}); |
byte_buffer => \ $buffer->{buffer}); |
| 518 |
## Step 1 |
## Step 1 |
| 519 |
if ($charset->{category} & |
if ($charset->{category} & |
| 520 |
Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) { |
Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) { |
| 521 |
$charset = Message::Charset::Info->get_by_iana_name ('utf-8'); |
$charset = Message::Charset::Info->get_by_html_name ('utf-8'); |
| 522 |
($char_stream, $e_status) = $charset->get_decode_handle |
($char_stream, $e_status) = $charset->get_decode_handle |
| 523 |
($byte_stream, |
($byte_stream, |
| 524 |
byte_buffer => \ $buffer->{buffer}); |
byte_buffer => \ $buffer->{buffer}); |