25 |
my $result = $self->result; |
my $result = $self->result; |
26 |
|
|
27 |
my $onerror = sub { |
my $onerror = sub { |
28 |
$result->add_error (@_, layer => 'syntax'); |
my %opt = @_; |
29 |
|
$result->add_error (layer => 'syntax', %opt); |
30 |
|
|
31 |
|
if ($opt{type} eq 'chardecode:no error') { |
32 |
|
$self->result->layer_uncertain ('encode'); |
33 |
|
} elsif ($opt{type} eq 'chardecode:fallback') { |
34 |
|
$self->result->layer_uncertain ('charset'); |
35 |
|
$self->result->layer_uncertain ('syntax'); |
36 |
|
$self->result->layer_uncertain ('structure'); |
37 |
|
$self->result->layer_uncertain ('semantics'); |
38 |
|
} |
39 |
}; |
}; |
40 |
|
|
41 |
my $dom = Message::DOM::DOMImplementation->new; |
my $dom = Message::DOM::DOMImplementation->new; |
43 |
my $el; |
my $el; |
44 |
my $inner_html_element = $input->{inner_html_element}; |
my $inner_html_element = $input->{inner_html_element}; |
45 |
if (defined $inner_html_element and length $inner_html_element) { |
if (defined $inner_html_element and length $inner_html_element) { |
46 |
$input->{charset} ||= 'windows-1252'; ## TODO: for now. |
$input->{charset} ||= 'utf-8'; |
47 |
my $t = \($input->{s}); |
my $t = \($input->{s}); |
48 |
unless ($input->{is_char_string}) { |
unless ($input->{is_char_string}) { |
49 |
$t = \(Encode::decode ($input->{charset}, $$t)); |
$t = \(Encode::decode ($input->{charset}, $$t)); |
50 |
$self->result->layer_uncertain ('encode'); |
$self->result->layer_applicable ('encode'); |
51 |
} |
} |
52 |
|
|
53 |
$el = $doc->create_element_ns |
$el = $doc->create_element_ns |
55 |
Whatpm::HTML->set_inner_html ($el, $$t, $onerror); |
Whatpm::HTML->set_inner_html ($el, $$t, $onerror); |
56 |
|
|
57 |
$self->{structure} = $el; |
$self->{structure} = $el; |
58 |
|
$self->{_structure_root} = $doc; |
59 |
|
## NOTE: This is necessary, otherwise it would be garbage collected |
60 |
|
## before $el is useless, since $el->owner_document is only a weak |
61 |
|
## reference. |
62 |
} else { |
} else { |
63 |
if ($input->{is_char_string}) { |
if ($input->{is_char_string}) { |
64 |
Whatpm::HTML->parse_char_string ($input->{s} => $doc, $onerror); |
Whatpm::HTML->parse_char_string ($input->{s} => $doc, $onerror); |
65 |
} else { |
} else { |
66 |
|
$self->result->layer_applicable ('encode'); |
67 |
Whatpm::HTML->parse_byte_string |
Whatpm::HTML->parse_byte_string |
68 |
($input->{charset}, $input->{s} => $doc, $onerror); |
($input->{charset}, $input->{s} => $doc, $onerror); |
|
$self->result->layer_uncertain ('encode'); |
|
69 |
} |
} |
70 |
|
|
71 |
$self->{structure} = $doc; |
$self->{structure} = $doc; |