1 |
package WebHACC::Language::HTML; |
2 |
use strict; |
3 |
require WebHACC::Language::DOM; |
4 |
push our @ISA, 'WebHACC::Language::DOM'; |
5 |
|
6 |
require Message::DOM::DOMImplementation; |
7 |
|
8 |
sub new ($) { |
9 |
return bless {}, shift; |
10 |
} # new |
11 |
|
12 |
sub generate_syntax_error_section ($) { |
13 |
my $self = shift; |
14 |
|
15 |
require Encode; |
16 |
require Whatpm::HTML; |
17 |
|
18 |
my $out = $self->output; |
19 |
$out->start_section (role => 'parse-errors'); |
20 |
$out->start_error_list (role => 'parse-errors'); |
21 |
|
22 |
my $input = $self->input; |
23 |
my $result = $self->result; |
24 |
|
25 |
my $onerror = sub { |
26 |
$result->add_error (@_, layer => 'syntax'); |
27 |
}; |
28 |
|
29 |
my $dom = Message::DOM::DOMImplementation->new; |
30 |
my $doc = $dom->create_document; |
31 |
my $el; |
32 |
my $inner_html_element = $input->{inner_html_element}; |
33 |
if (defined $inner_html_element and length $inner_html_element) { |
34 |
$input->{charset} ||= 'windows-1252'; ## TODO: for now. |
35 |
my $t = \($input->{s}); |
36 |
unless ($input->{is_char_string}) { |
37 |
$t = \(Encode::decode ($input->{charset}, $$t)); |
38 |
} |
39 |
|
40 |
$el = $doc->create_element_ns |
41 |
('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]); |
42 |
Whatpm::HTML->set_inner_html ($el, $$t, $onerror); |
43 |
|
44 |
$self->{structure} = $el; |
45 |
} else { |
46 |
if ($input->{is_char_string}) { |
47 |
Whatpm::HTML->parse_char_string ($input->{s} => $doc, $onerror); |
48 |
} else { |
49 |
Whatpm::HTML->parse_byte_string |
50 |
($input->{charset}, $input->{s} => $doc, $onerror); |
51 |
} |
52 |
|
53 |
$self->{structure} = $doc; |
54 |
} |
55 |
$doc->manakai_charset ($input->{official_charset}) |
56 |
if defined $input->{official_charset}; |
57 |
|
58 |
$doc->document_uri ($input->{uri}); |
59 |
$doc->manakai_entity_base_uri ($input->{base_uri}); |
60 |
|
61 |
$out->end_error_list (role => 'parse-errors'); |
62 |
$out->end_section; |
63 |
} # generate_syntax_error_section |
64 |
|
65 |
sub source_charset ($) { |
66 |
my $self = shift; |
67 |
return $self->input->{charset} || ($self->{structure}->owner_document || $self->{structure})->input_encoding; |
68 |
## TODO: Can we always use input_encoding? |
69 |
} # source_charset |
70 |
|
71 |
1; |