28 |
|
|
29 |
if ($opt{type} eq 'chardecode:no error') { |
if ($opt{type} eq 'chardecode:no error') { |
30 |
$self->result->layer_uncertain ('encode'); |
$self->result->layer_uncertain ('encode'); |
31 |
} elsif ($opt{type} eq 'chardecode:fallback') { |
} elsif ($opt{type} eq 'chardecode:fallback' or |
32 |
|
$opt{type} eq 'charset:not supported') { |
33 |
$self->result->layer_uncertain ('charset'); |
$self->result->layer_uncertain ('charset'); |
34 |
$self->result->layer_uncertain ('syntax'); |
$self->result->layer_uncertain ('syntax'); |
35 |
$self->result->layer_uncertain ('structure'); |
$self->result->layer_uncertain ('structure'); |
79 |
$doc->manakai_charset ($input->{official_charset}) |
$doc->manakai_charset ($input->{official_charset}) |
80 |
if defined $input->{official_charset}; |
if defined $input->{official_charset}; |
81 |
|
|
82 |
|
## TODO: We need to issue some warning if media type/charset is |
83 |
|
## explicitly overridden by the user. |
84 |
|
|
85 |
$doc->document_uri ($input->url); |
$doc->document_uri ($input->url); |
86 |
$doc->manakai_entity_base_uri ($input->{base_uri}); |
$doc->manakai_entity_base_uri ($input->{base_uri}); |
87 |
|
|
93 |
|
|
94 |
sub source_charset ($) { |
sub source_charset ($) { |
95 |
my $self = shift; |
my $self = shift; |
96 |
return $self->input->{charset} || ($self->{structure}->owner_document || $self->{structure})->input_encoding; |
return (($self->{structure}->owner_document || $self->{structure})->input_encoding || $self->input->{charset}); |
97 |
## TODO: Can we always use input_encoding? |
## TODO: We need some way to get the source charset reliably. The |
98 |
|
## |input_encoding| DOM attribute might be intentionally left blank |
99 |
|
## when the input is the direct input form, but even that case the |
100 |
|
## charset information should be useful because the input string |
101 |
|
## might be a byte sequence. In addition, the |input_encoding| does |
102 |
|
## not reflect the fallback encoding in use. On the contrary, the |
103 |
|
## |{charset}| property of the |input| object is always the value |
104 |
|
## from the lower-level protocol and that might be ignored by the |
105 |
|
## HTML sniffer. |
106 |
} # source_charset |
} # source_charset |
107 |
|
|
108 |
1; |
1; |