/[suikacvs]/test/html-webhacc/WebHACC/Language/HTML.pm
Suika

Contents of /test/html-webhacc/WebHACC/Language/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.12 - (hide annotations) (download)
Sun Sep 21 05:09:35 2008 UTC (16 years, 9 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.11: +15 -3 lines
++ ChangeLog	21 Sep 2008 05:08:51 -0000
2008-09-21  Wakaba  <wakaba@suika.fam.cx>

	* error-description-source.xml: An error for non-supported
	charset="" parameter is added.

++ html/WebHACC/Language/ChangeLog	21 Sep 2008 05:09:27 -0000
2008-09-21  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm (generate_syntax_error_section): Now |charset:not
	supported| is also an error that might invalidate the validation
	result.

1 wakaba 1.1 package WebHACC::Language::HTML;
2     use strict;
3     require WebHACC::Language::DOM;
4     push our @ISA, 'WebHACC::Language::DOM';
5    
6     sub new ($) {
7     return bless {}, shift;
8     } # new
9    
10     sub generate_syntax_error_section ($) {
11     my $self = shift;
12    
13 wakaba 1.4 require Message::DOM::DOMImplementation;
14 wakaba 1.1 require Encode;
15     require Whatpm::HTML;
16    
17     my $out = $self->output;
18 wakaba 1.3 $out->start_section (role => 'parse-errors');
19     $out->start_error_list (role => 'parse-errors');
20 wakaba 1.5 $self->result->layer_applicable ('syntax');
21 wakaba 1.1
22     my $input = $self->input;
23     my $result = $self->result;
24    
25     my $onerror = sub {
26 wakaba 1.7 my %opt = @_;
27     $result->add_error (layer => 'syntax', %opt);
28    
29     if ($opt{type} eq 'chardecode:no error') {
30     $self->result->layer_uncertain ('encode');
31 wakaba 1.12 } elsif ($opt{type} eq 'chardecode:fallback' or
32     $opt{type} eq 'charset:not supported') {
33 wakaba 1.7 $self->result->layer_uncertain ('charset');
34     $self->result->layer_uncertain ('syntax');
35     $self->result->layer_uncertain ('structure');
36     $self->result->layer_uncertain ('semantics');
37     }
38 wakaba 1.1 };
39    
40 wakaba 1.10 $self->result->layer_applicable ('charset');
41     my $char_checker = sub ($) {
42     require Whatpm::Charset::UnicodeChecker;
43 wakaba 1.11 return Whatpm::Charset::UnicodeChecker->new_handle ($_[0], 'html5');
44 wakaba 1.10 }; # $char_checker
45    
46 wakaba 1.1 my $dom = Message::DOM::DOMImplementation->new;
47     my $doc = $dom->create_document;
48     my $el;
49     my $inner_html_element = $input->{inner_html_element};
50     if (defined $inner_html_element and length $inner_html_element) {
51 wakaba 1.7 $input->{charset} ||= 'utf-8';
52 wakaba 1.1 my $t = \($input->{s});
53     unless ($input->{is_char_string}) {
54     $t = \(Encode::decode ($input->{charset}, $$t));
55 wakaba 1.7 $self->result->layer_applicable ('encode');
56 wakaba 1.1 }
57    
58     $el = $doc->create_element_ns
59     ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
60 wakaba 1.10 Whatpm::HTML->set_inner_html ($el, $$t, $onerror, $char_checker);
61 wakaba 1.1
62     $self->{structure} = $el;
63 wakaba 1.7 $self->{_structure_root} = $doc;
64     ## NOTE: This is necessary, otherwise it would be garbage collected
65     ## before $el is useless, since $el->owner_document is only a weak
66     ## reference.
67 wakaba 1.1 } else {
68     if ($input->{is_char_string}) {
69 wakaba 1.10 Whatpm::HTML->parse_char_string ($input->{s} => $doc,
70     $onerror, $char_checker);
71 wakaba 1.1 } else {
72 wakaba 1.7 $self->result->layer_applicable ('encode');
73 wakaba 1.1 Whatpm::HTML->parse_byte_string
74 wakaba 1.10 ($input->{charset}, $input->{s} => $doc, $onerror, $char_checker);
75 wakaba 1.1 }
76    
77     $self->{structure} = $doc;
78     }
79     $doc->manakai_charset ($input->{official_charset})
80     if defined $input->{official_charset};
81    
82 wakaba 1.12 ## TODO: We need to issue some warning if media type/charset is
83     ## explicitly overridden by the user.
84    
85 wakaba 1.9 $doc->document_uri ($input->url);
86 wakaba 1.1 $doc->manakai_entity_base_uri ($input->{base_uri});
87    
88 wakaba 1.8 $doc->input_encoding (undef) if $input->isa ('WebHACC::Input::Text');
89    
90 wakaba 1.3 $out->end_error_list (role => 'parse-errors');
91 wakaba 1.1 $out->end_section;
92     } # generate_syntax_error_section
93    
94     sub source_charset ($) {
95     my $self = shift;
96 wakaba 1.12 return (($self->{structure}->owner_document || $self->{structure})->input_encoding || $self->input->{charset});
97     ## TODO: We need some way to get the source charset reliably. The
98     ## |input_encoding| DOM attribute might be intentionally left blank
99     ## when the input is the direct input form, but even that case the
100     ## charset information should be useful because the input string
101     ## might be a byte sequence. In addition, the |input_encoding| does
102     ## not reflect the fallback encoding in use. On the contrary, the
103     ## |{charset}| property of the |input| object is always the value
104     ## from the lower-level protocol and that might be ignored by the
105     ## HTML sniffer.
106 wakaba 1.1 } # source_charset
107    
108     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24