/[suikacvs]/test/html-webhacc/WebHACC/Language/HTML.pm
Suika

Contents of /test/html-webhacc/WebHACC/Language/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.10 - (hide annotations) (download)
Thu Sep 11 09:13:57 2008 UTC (16 years, 9 months ago) by wakaba
Branch: MAIN
Changes since 1.9: +10 -5 lines
++ ChangeLog	11 Sep 2008 09:13:08 -0000
2008-09-11  Wakaba  <wakaba@suika.fam.cx>

	* error-description-source.xml: Unicode checker
	errors are added.

++ html/WebHACC/Language/ChangeLog	11 Sep 2008 09:13:53 -0000
2008-09-11  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm: Support for the Unicode character checking.

++ html/WebHACC/ChangeLog	11 Sep 2008 09:13:31 -0000
2008-09-11  Wakaba  <wakaba@suika.fam.cx>

	* Result.pm (generate_result_section): Initial
	support for the charset layer.

1 wakaba 1.1 package WebHACC::Language::HTML;
2     use strict;
3     require WebHACC::Language::DOM;
4     push our @ISA, 'WebHACC::Language::DOM';
5    
6     sub new ($) {
7     return bless {}, shift;
8     } # new
9    
10     sub generate_syntax_error_section ($) {
11     my $self = shift;
12    
13 wakaba 1.4 require Message::DOM::DOMImplementation;
14 wakaba 1.1 require Encode;
15     require Whatpm::HTML;
16    
17     my $out = $self->output;
18 wakaba 1.3 $out->start_section (role => 'parse-errors');
19     $out->start_error_list (role => 'parse-errors');
20 wakaba 1.5 $self->result->layer_applicable ('syntax');
21 wakaba 1.1
22     my $input = $self->input;
23     my $result = $self->result;
24    
25     my $onerror = sub {
26 wakaba 1.7 my %opt = @_;
27     $result->add_error (layer => 'syntax', %opt);
28    
29     if ($opt{type} eq 'chardecode:no error') {
30     $self->result->layer_uncertain ('encode');
31     } elsif ($opt{type} eq 'chardecode:fallback') {
32     $self->result->layer_uncertain ('charset');
33     $self->result->layer_uncertain ('syntax');
34     $self->result->layer_uncertain ('structure');
35     $self->result->layer_uncertain ('semantics');
36     }
37 wakaba 1.1 };
38    
39 wakaba 1.10 $self->result->layer_applicable ('charset');
40     my $char_checker = sub ($) {
41     require Whatpm::Charset::UnicodeChecker;
42     return Whatpm::Charset::UnicodeChecker->new_handle ($_[0]);
43     }; # $char_checker
44    
45 wakaba 1.1 my $dom = Message::DOM::DOMImplementation->new;
46     my $doc = $dom->create_document;
47     my $el;
48     my $inner_html_element = $input->{inner_html_element};
49     if (defined $inner_html_element and length $inner_html_element) {
50 wakaba 1.7 $input->{charset} ||= 'utf-8';
51 wakaba 1.1 my $t = \($input->{s});
52     unless ($input->{is_char_string}) {
53     $t = \(Encode::decode ($input->{charset}, $$t));
54 wakaba 1.7 $self->result->layer_applicable ('encode');
55 wakaba 1.1 }
56    
57     $el = $doc->create_element_ns
58     ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
59 wakaba 1.10 Whatpm::HTML->set_inner_html ($el, $$t, $onerror, $char_checker);
60 wakaba 1.1
61     $self->{structure} = $el;
62 wakaba 1.7 $self->{_structure_root} = $doc;
63     ## NOTE: This is necessary, otherwise it would be garbage collected
64     ## before $el is useless, since $el->owner_document is only a weak
65     ## reference.
66 wakaba 1.1 } else {
67     if ($input->{is_char_string}) {
68 wakaba 1.10 Whatpm::HTML->parse_char_string ($input->{s} => $doc,
69     $onerror, $char_checker);
70 wakaba 1.1 } else {
71 wakaba 1.7 $self->result->layer_applicable ('encode');
72 wakaba 1.1 Whatpm::HTML->parse_byte_string
73 wakaba 1.10 ($input->{charset}, $input->{s} => $doc, $onerror, $char_checker);
74 wakaba 1.1 }
75    
76     $self->{structure} = $doc;
77     }
78     $doc->manakai_charset ($input->{official_charset})
79     if defined $input->{official_charset};
80    
81 wakaba 1.9 $doc->document_uri ($input->url);
82 wakaba 1.1 $doc->manakai_entity_base_uri ($input->{base_uri});
83    
84 wakaba 1.8 $doc->input_encoding (undef) if $input->isa ('WebHACC::Input::Text');
85    
86 wakaba 1.3 $out->end_error_list (role => 'parse-errors');
87 wakaba 1.1 $out->end_section;
88     } # generate_syntax_error_section
89    
90     sub source_charset ($) {
91     my $self = shift;
92     return $self->input->{charset} || ($self->{structure}->owner_document || $self->{structure})->input_encoding;
93     ## TODO: Can we always use input_encoding?
94     } # source_charset
95    
96     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24