372 |
my ($char_stream, $e_status); |
my ($char_stream, $e_status); |
373 |
|
|
374 |
SNIFFING: { |
SNIFFING: { |
375 |
|
## NOTE: By setting |allow_fallback| option true when the |
376 |
|
## |get_decode_handle| method is invoked, we ignore what the HTML5 |
377 |
|
## spec requires, i.e. unsupported encoding should be ignored. |
378 |
|
## TODO: We should not do this unless the parser is invoked |
379 |
|
## in the conformance checking mode, in which this behavior |
380 |
|
## would be useful. |
381 |
|
|
382 |
## Step 1 |
## Step 1 |
383 |
if (defined $charset_name) { |
if (defined $charset_name) { |
481 |
$self->{confident} = 0; |
$self->{confident} = 0; |
482 |
} # SNIFFING |
} # SNIFFING |
483 |
|
|
|
$self->{input_encoding} = $charset->get_iana_name; |
|
484 |
if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) { |
if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) { |
485 |
|
$self->{input_encoding} = $charset->get_iana_name; ## TODO: Should we set actual charset decoder's encoding name? |
486 |
!!!parse-error (type => 'chardecode:fallback', |
!!!parse-error (type => 'chardecode:fallback', |
487 |
text => $self->{input_encoding}, |
#text => $self->{input_encoding}, |
488 |
level => $self->{level}->{uncertain}, |
level => $self->{level}->{uncertain}, |
489 |
line => 1, column => 1, |
line => 1, column => 1, |
490 |
layer => 'encode'); |
layer => 'encode'); |
491 |
} elsif (not ($e_status & |
} elsif (not ($e_status & |
492 |
Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL())) { |
Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL())) { |
493 |
|
$self->{input_encoding} = $charset->get_iana_name; |
494 |
!!!parse-error (type => 'chardecode:no error', |
!!!parse-error (type => 'chardecode:no error', |
495 |
text => $self->{input_encoding}, |
text => $self->{input_encoding}, |
496 |
level => $self->{level}->{uncertain}, |
level => $self->{level}->{uncertain}, |
497 |
line => 1, column => 1, |
line => 1, column => 1, |
498 |
layer => 'encode'); |
layer => 'encode'); |
499 |
|
} else { |
500 |
|
$self->{input_encoding} = $charset->get_iana_name; |
501 |
} |
} |
502 |
|
|
503 |
$self->{change_encoding} = sub { |
$self->{change_encoding} = sub { |
569 |
} catch Whatpm::HTML::RestartParser with { |
} catch Whatpm::HTML::RestartParser with { |
570 |
## NOTE: Invoked after {change_encoding}. |
## NOTE: Invoked after {change_encoding}. |
571 |
|
|
|
$self->{input_encoding} = $charset->get_iana_name; |
|
572 |
if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) { |
if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) { |
573 |
|
$self->{input_encoding} = $charset->get_iana_name; ## TODO: Should we set actual charset decoder's encoding name? |
574 |
!!!parse-error (type => 'chardecode:fallback', |
!!!parse-error (type => 'chardecode:fallback', |
|
text => $self->{input_encoding}, |
|
575 |
level => $self->{level}->{uncertain}, |
level => $self->{level}->{uncertain}, |
576 |
|
#text => $self->{input_encoding}, |
577 |
line => 1, column => 1, |
line => 1, column => 1, |
578 |
layer => 'encode'); |
layer => 'encode'); |
579 |
} elsif (not ($e_status & |
} elsif (not ($e_status & |
580 |
Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL())) { |
Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL())) { |
581 |
|
$self->{input_encoding} = $charset->get_iana_name; |
582 |
!!!parse-error (type => 'chardecode:no error', |
!!!parse-error (type => 'chardecode:no error', |
583 |
text => $self->{input_encoding}, |
text => $self->{input_encoding}, |
584 |
level => $self->{level}->{uncertain}, |
level => $self->{level}->{uncertain}, |
585 |
line => 1, column => 1, |
line => 1, column => 1, |
586 |
layer => 'encode'); |
layer => 'encode'); |
587 |
|
} else { |
588 |
|
$self->{input_encoding} = $charset->get_iana_name; |
589 |
} |
} |
590 |
$self->{confident} = 1; |
$self->{confident} = 1; |
591 |
$char_stream->onerror ($char_onerror); |
$char_stream->onerror ($char_onerror); |