/[suikacvs]/test/html-webhacc/WebHACC/Language/HTML.pm
Suika

Contents of /test/html-webhacc/WebHACC/Language/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.11 - (show annotations) (download)
Mon Sep 15 07:20:40 2008 UTC (16 years, 9 months ago) by wakaba
Branch: MAIN
Changes since 1.10: +1 -1 lines
++ ChangeLog	15 Sep 2008 07:20:09 -0000
2008-09-15  Wakaba  <wakaba@suika.fam.cx>

	* error-description-source.xml: "control char" errors are now
	handled by UnicodeChecker module.  New "non unicode" error added.

++ html/WebHACC/Language/ChangeLog	15 Sep 2008 07:20:35 -0000
2008-09-15  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm: Set the mode of the UnicodeChecker as HTML5.

1 package WebHACC::Language::HTML;
2 use strict;
3 require WebHACC::Language::DOM;
4 push our @ISA, 'WebHACC::Language::DOM';
5
6 sub new ($) {
7 return bless {}, shift;
8 } # new
9
10 sub generate_syntax_error_section ($) {
11 my $self = shift;
12
13 require Message::DOM::DOMImplementation;
14 require Encode;
15 require Whatpm::HTML;
16
17 my $out = $self->output;
18 $out->start_section (role => 'parse-errors');
19 $out->start_error_list (role => 'parse-errors');
20 $self->result->layer_applicable ('syntax');
21
22 my $input = $self->input;
23 my $result = $self->result;
24
25 my $onerror = sub {
26 my %opt = @_;
27 $result->add_error (layer => 'syntax', %opt);
28
29 if ($opt{type} eq 'chardecode:no error') {
30 $self->result->layer_uncertain ('encode');
31 } elsif ($opt{type} eq 'chardecode:fallback') {
32 $self->result->layer_uncertain ('charset');
33 $self->result->layer_uncertain ('syntax');
34 $self->result->layer_uncertain ('structure');
35 $self->result->layer_uncertain ('semantics');
36 }
37 };
38
39 $self->result->layer_applicable ('charset');
40 my $char_checker = sub ($) {
41 require Whatpm::Charset::UnicodeChecker;
42 return Whatpm::Charset::UnicodeChecker->new_handle ($_[0], 'html5');
43 }; # $char_checker
44
45 my $dom = Message::DOM::DOMImplementation->new;
46 my $doc = $dom->create_document;
47 my $el;
48 my $inner_html_element = $input->{inner_html_element};
49 if (defined $inner_html_element and length $inner_html_element) {
50 $input->{charset} ||= 'utf-8';
51 my $t = \($input->{s});
52 unless ($input->{is_char_string}) {
53 $t = \(Encode::decode ($input->{charset}, $$t));
54 $self->result->layer_applicable ('encode');
55 }
56
57 $el = $doc->create_element_ns
58 ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
59 Whatpm::HTML->set_inner_html ($el, $$t, $onerror, $char_checker);
60
61 $self->{structure} = $el;
62 $self->{_structure_root} = $doc;
63 ## NOTE: This is necessary, otherwise it would be garbage collected
64 ## before $el is useless, since $el->owner_document is only a weak
65 ## reference.
66 } else {
67 if ($input->{is_char_string}) {
68 Whatpm::HTML->parse_char_string ($input->{s} => $doc,
69 $onerror, $char_checker);
70 } else {
71 $self->result->layer_applicable ('encode');
72 Whatpm::HTML->parse_byte_string
73 ($input->{charset}, $input->{s} => $doc, $onerror, $char_checker);
74 }
75
76 $self->{structure} = $doc;
77 }
78 $doc->manakai_charset ($input->{official_charset})
79 if defined $input->{official_charset};
80
81 $doc->document_uri ($input->url);
82 $doc->manakai_entity_base_uri ($input->{base_uri});
83
84 $doc->input_encoding (undef) if $input->isa ('WebHACC::Input::Text');
85
86 $out->end_error_list (role => 'parse-errors');
87 $out->end_section;
88 } # generate_syntax_error_section
89
90 sub source_charset ($) {
91 my $self = shift;
92 return $self->input->{charset} || ($self->{structure}->owner_document || $self->{structure})->input_encoding;
93 ## TODO: Can we always use input_encoding?
94 } # source_charset
95
96 1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24