/[suikacvs]/markup/html/whatpm/t/HTML-tree.t
Suika

Contents of /markup/html/whatpm/t/HTML-tree.t

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.28 - (hide annotations) (download) (as text)
Sun Apr 13 10:36:41 2008 UTC (16 years, 6 months ago) by wakaba
Branch: MAIN
Changes since 1.27: +2 -2 lines
File MIME type: application/x-troff
++ whatpm/t/ChangeLog	13 Apr 2008 10:11:49 -0000
	* HTML-tokenizer.t: Raise a parse error if there are disallowed
	character (for compatibility with existing html5lib test data).

	* tokenizer-test-1.test: Some test results are updated with
	regard to parse errors on disallowed characters.

	* tokenizer-test-2.dat: Test data for disallowed characters
	are added (HTML5 revision 1263).

2008-04-13  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	13 Apr 2008 10:12:20 -0000
	* HTML.pm.src: Raise an parse error for any disallowed
	character (HTML5 revision 1263).

2008-04-13  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3    
4 wakaba 1.23 my $DEBUG = $ENV{DEBUG};
5    
6 wakaba 1.1 my $dir_name;
7 wakaba 1.2 my $test_dir_name;
8 wakaba 1.1 BEGIN {
9 wakaba 1.2 $test_dir_name = 't/';
10 wakaba 1.1 $dir_name = 't/tree-construction/';
11     my $skip = "You don't have make command";
12     eval q{
13     system ("cd $test_dir_name; make tree-construction-files") == 0 or die
14     unless -f $dir_name.'tests1.dat';
15     $skip = '';
16     };
17     if ($skip) {
18     print "1..1\n";
19     print "ok 1 # $skip\n";
20     exit;
21     }
22     }
23    
24     use Test;
25 wakaba 1.28 BEGIN { plan tests => 1920 }
26 wakaba 1.1
27     use Data::Dumper;
28     $Data::Dumper::Useqq = 1;
29     sub Data::Dumper::qquote {
30     my $s = shift;
31     $s =~ s/([^\x20\x21-\x26\x28-\x5B\x5D-\x7E])/sprintf '\x{%02X}', ord $1/ge;
32     return q<qq'> . $s . q<'>;
33     } # Data::Dumper::qquote
34    
35 wakaba 1.23
36     if ($DEBUG) {
37 wakaba 1.25 my $not_found = {%{$Whatpm::HTML::Debug::cp or {}}};
38 wakaba 1.23 $Whatpm::HTML::Debug::cp_pass = sub {
39     my $id = shift;
40     delete $not_found->{$id};
41     };
42    
43     END {
44     for my $id (sort {$a <=> $b || $a cmp $b} keys %$not_found) {
45     print "# checkpoint $id is not reached\n";
46     }
47     }
48     }
49    
50 wakaba 1.2 for my $file_name (grep {$_} split /\s+/, qq[
51 wakaba 1.12 ${test_dir_name}tokenizer-test-2.dat
52 wakaba 1.2 ${dir_name}tests1.dat
53     ${dir_name}tests2.dat
54     ${dir_name}tests3.dat
55     ${dir_name}tests4.dat
56 wakaba 1.11 ${dir_name}tests5.dat
57     ${dir_name}tests6.dat
58 wakaba 1.27 ${dir_name}tests7.dat
59 wakaba 1.2 ${test_dir_name}tree-test-1.dat
60 wakaba 1.14 ${test_dir_name}tree-test-2.dat
61 wakaba 1.1 ]) {
62 wakaba 1.2 open my $file, '<', $file_name
63     or die "$0: $file_name: $!";
64 wakaba 1.13 print "# $file_name\n";
65 wakaba 1.1
66     my $test;
67     my $mode = 'data';
68 wakaba 1.12 my $escaped;
69 wakaba 1.1 while (<$file>) {
70     s/\x0D\x0A/\x0A/;
71     if (/^#data$/) {
72     undef $test;
73     $test->{data} = '';
74     $mode = 'data';
75 wakaba 1.12 undef $escaped;
76     } elsif (/^#data escaped$/) {
77     undef $test;
78     $test->{data} = '';
79     $mode = 'data';
80     $escaped = 1;
81 wakaba 1.1 } elsif (/^#errors$/) {
82     $test->{errors} = [];
83     $mode = 'errors';
84 wakaba 1.13 $test->{data} =~ s/\x0D?\x0A\z//;
85     $test->{data} =~ s/\\u([0-9A-Fa-f]{4})/chr hex $1/ge if $escaped;
86 wakaba 1.24 $test->{data} =~ s/\\U([0-9A-Fa-f]{8})/chr hex $1/ge if $escaped;
87 wakaba 1.12 undef $escaped;
88 wakaba 1.1 } elsif (/^#document$/) {
89     $test->{document} = '';
90     $mode = 'document';
91 wakaba 1.12 undef $escaped;
92     } elsif (/^#document escaped$/) {
93     $test->{document} = '';
94     $mode = 'document';
95     $escaped = 1;
96 wakaba 1.19 } elsif (/^#document-fragment$/) {
97     $test->{element} = '';
98     $mode = 'element';
99     undef $escaped;
100 wakaba 1.5 } elsif (/^#document-fragment (\S+)$/) {
101     $test->{document} = '';
102     $mode = 'document';
103     $test->{element} = $1;
104 wakaba 1.12 undef $escaped;
105     } elsif (/^#document-fragment (\S+) escaped$/) {
106     $test->{document} = '';
107     $mode = 'document';
108     $test->{element} = $1;
109     $escaped = 1;
110 wakaba 1.2 } elsif (defined $test->{document} and /^$/) {
111 wakaba 1.13 $test->{document} =~ s/\\u([0-9A-Fa-f]{4})/chr hex $1/ge if $escaped;
112 wakaba 1.24 $test->{document} =~ s/\\U([0-9A-Fa-f]{8})/chr hex $1/ge if $escaped;
113 wakaba 1.2 test ($test);
114 wakaba 1.1 undef $test;
115     } else {
116     if ($mode eq 'data' or $mode eq 'document') {
117 wakaba 1.13 $test->{$mode} .= $_;
118 wakaba 1.19 } elsif ($mode eq 'element') {
119     tr/\x0D\x0A//d;
120     $test->{$mode} .= $_;
121 wakaba 1.1 } elsif ($mode eq 'errors') {
122     tr/\x0D\x0A//d;
123     push @{$test->{errors}}, $_;
124     }
125     }
126     }
127     test ($test) if $test->{errors};
128     }
129    
130 wakaba 1.4 use Whatpm::HTML;
131     use Whatpm::NanoDOM;
132 wakaba 1.1
133     sub test ($) {
134     my $test = shift;
135    
136 wakaba 1.4 my $doc = Whatpm::NanoDOM::Document->new;
137 wakaba 1.1 my @errors;
138    
139     $SIG{INT} = sub {
140 wakaba 1.3 print scalar serialize ($doc);
141 wakaba 1.1 exit;
142     };
143 wakaba 1.3
144 wakaba 1.5 my $onerror = sub {
145     my %opt = @_;
146     push @errors, join ':', $opt{line}, $opt{column}, $opt{type};
147     };
148     my $result;
149     unless (defined $test->{element}) {
150     Whatpm::HTML->parse_string ($test->{data} => $doc, $onerror);
151     $result = serialize ($doc);
152     } else {
153     my $el = $doc->create_element_ns
154     ('http://www.w3.org/1999/xhtml', [undef, $test->{element}]);
155     Whatpm::HTML->set_inner_html ($el, $test->{data}, $onerror);
156     $result = serialize ($el);
157     }
158    
159 wakaba 1.1 ok scalar @errors, scalar @{$test->{errors}},
160 wakaba 1.21 'Parse error: ' . Data::Dumper::qquote ($test->{data}) . '; ' .
161 wakaba 1.1 join (', ', @errors) . ';' . join (', ', @{$test->{errors}});
162    
163 wakaba 1.21 ok $result, $test->{document},
164     'Document tree: ' . Data::Dumper::qquote ($test->{data});
165 wakaba 1.1 } # test
166    
167     sub serialize ($) {
168     my $node = shift;
169     my $r = '';
170    
171     my @node = map { [$_, ''] } @{$node->child_nodes};
172     while (@node) {
173     my $child = shift @node;
174     my $nt = $child->[0]->node_type;
175     if ($nt == $child->[0]->ELEMENT_NODE) {
176     $r .= '| ' . $child->[1] . '<' . $child->[0]->tag_name . ">\x0A"; ## ISSUE: case?
177    
178 wakaba 1.2 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value] }
179 wakaba 1.1 @{$child->[0]->attributes}) {
180     $r .= '| ' . $child->[1] . ' ' . $attr->[0] . '="'; ## ISSUE: case?
181     $r .= $attr->[1] . '"' . "\x0A";
182     }
183    
184     unshift @node,
185     map { [$_, $child->[1] . ' '] } @{$child->[0]->child_nodes};
186     } elsif ($nt == $child->[0]->TEXT_NODE) {
187     $r .= '| ' . $child->[1] . '"' . $child->[0]->data . '"' . "\x0A";
188     } elsif ($nt == $child->[0]->COMMENT_NODE) {
189     $r .= '| ' . $child->[1] . '<!-- ' . $child->[0]->data . " -->\x0A";
190     } elsif ($nt == $child->[0]->DOCUMENT_TYPE_NODE) {
191 wakaba 1.24 $r .= '| ' . $child->[1] . '<!DOCTYPE ' . $child->[0]->name;
192     my $pubid = $child->[0]->public_id;
193     $r .= ' PUBLIC "' . $pubid . '"' if length $pubid;
194     my $sysid = $child->[0]->system_id;
195     $r .= ' SYSTEM' if not length $pubid and length $sysid;
196     $r .= ' "' . $sysid . '"' if length $sysid;
197     $r .= ">\x0A";
198 wakaba 1.1 } else {
199     $r .= '| ' . $child->[1] . $child->[0]->node_type . "\x0A"; # error
200     }
201     }
202    
203     return $r;
204     } # serialize
205    
206     ## License: Public Domain.
207 wakaba 1.28 ## $Date: 2008/04/13 06:44:27 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24