/[suikacvs]/markup/html/whatpm/t/HTML-tree.t
Suika

Contents of /markup/html/whatpm/t/HTML-tree.t

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (hide annotations) (download) (as text)
Tue May 1 06:22:12 2007 UTC (18 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.1: +15 -13 lines
File MIME type: application/x-troff
++ whatpm/What/ChangeLog	1 May 2007 06:20:06 -0000
2007-05-01  Wakaba  <wakaba@suika.fam.cx>

	* NanoDOM.pm (last_child, previous_sibling): New attributes.
	(clone_node): Attribute nodes were not completely copied.

	* HTML.pm.src: Many bugs are fixed.

++ whatpm/t/ChangeLog	1 May 2007 06:21:52 -0000
2007-05-01  Wakaba  <wakaba@suika.fam.cx>

	* HTML-tree.t: New test file is added.  Sort key
	was incorrect.

	* HTML-tokenizer.t: New test file is added.

	* tokenizer-test-1.test, tree-test-1.dat: New tests.

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3    
4     my $dir_name;
5 wakaba 1.2 my $test_dir_name;
6 wakaba 1.1 BEGIN {
7 wakaba 1.2 $test_dir_name = 't/';
8 wakaba 1.1 $dir_name = 't/tree-construction/';
9     my $skip = "You don't have make command";
10     eval q{
11     system ("cd $test_dir_name; make tree-construction-files") == 0 or die
12     unless -f $dir_name.'tests1.dat';
13     $skip = '';
14     };
15     if ($skip) {
16     print "1..1\n";
17     print "ok 1 # $skip\n";
18     exit;
19     }
20     }
21    
22     use Test;
23 wakaba 1.2 BEGIN { plan tests => 402 }
24 wakaba 1.1
25     use Data::Dumper;
26     $Data::Dumper::Useqq = 1;
27     sub Data::Dumper::qquote {
28     my $s = shift;
29     $s =~ s/([^\x20\x21-\x26\x28-\x5B\x5D-\x7E])/sprintf '\x{%02X}', ord $1/ge;
30     return q<qq'> . $s . q<'>;
31     } # Data::Dumper::qquote
32    
33 wakaba 1.2 for my $file_name (grep {$_} split /\s+/, qq[
34     ${dir_name}tests1.dat
35     ${dir_name}tests2.dat
36     ${dir_name}tests3.dat
37     ${dir_name}tests4.dat
38     ${test_dir_name}tree-test-1.dat
39 wakaba 1.1 ]) {
40 wakaba 1.2 open my $file, '<', $file_name
41     or die "$0: $file_name: $!";
42 wakaba 1.1
43     my $test;
44     my $mode = 'data';
45     while (<$file>) {
46     s/\x0D\x0A/\x0A/;
47     if (/^#data$/) {
48     undef $test;
49     $test->{data} = '';
50     $mode = 'data';
51     } elsif (/^#errors$/) {
52     $test->{errors} = [];
53     $mode = 'errors';
54     $test->{data} =~ s/\x0D?\x0A\z//;
55     } elsif (/^#document$/) {
56     $test->{document} = '';
57     $mode = 'document';
58 wakaba 1.2 } elsif (defined $test->{document} and /^$/) {
59     test ($test);
60 wakaba 1.1 undef $test;
61     } else {
62     if ($mode eq 'data' or $mode eq 'document') {
63     $test->{$mode} .= $_;
64     } elsif ($mode eq 'errors') {
65     tr/\x0D\x0A//d;
66     push @{$test->{errors}}, $_;
67     }
68     }
69     }
70     test ($test) if $test->{errors};
71     }
72    
73     use What::HTML;
74    
75     sub test ($) {
76     my $test = shift;
77    
78     my $s = $test->{data};
79    
80     my $p = What::HTML->new;
81     my $i = 0;
82     $p->{set_next_input_character} = sub {
83     my $self = shift;
84     $self->{next_input_character} = -1 and return if $i >= length $s;
85     $self->{next_input_character} = ord substr $s, $i++, 1;
86    
87     if ($self->{next_input_character} == 0x000D) { # CR
88     if ($i >= length $s) {
89     #
90     } else {
91     my $next_char = ord substr $s, $i++, 1;
92     if ($next_char == 0x000A) { # LF
93     #
94     } else {
95     push @{$self->{char}}, $next_char;
96     }
97     }
98     $self->{next_input_character} = 0x000A; # LF # MUST
99     } elsif ($self->{next_input_character} > 0x10FFFF) {
100     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
101     } elsif ($self->{next_input_character} == 0x0000) { # NULL
102     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
103     }
104     };
105    
106     my @errors;
107     $p->{parse_error} = sub {
108     my $msg = shift;
109     push @errors, $msg;
110     };
111    
112     $SIG{INT} = sub {
113     print scalar serialize ($p->{document});
114     exit;
115     };
116    
117     $p->_initialize_tokenizer;
118     $p->_initialize_tree_constructor;
119     $p->_construct_tree;
120     $p->_terminate_tree_constructor;
121    
122     ok scalar @errors, scalar @{$test->{errors}},
123     'Parse error: ' . $test->{data} . '; ' .
124     join (', ', @errors) . ';' . join (', ', @{$test->{errors}});
125    
126     my $doc = $p->{document};
127     my $doc_s = serialize ($doc);
128     ok $doc_s, $test->{document}, 'Document tree: ' . $test->{data};
129     } # test
130    
131     sub serialize ($) {
132     my $node = shift;
133     my $r = '';
134    
135     my @node = map { [$_, ''] } @{$node->child_nodes};
136     while (@node) {
137     my $child = shift @node;
138     my $nt = $child->[0]->node_type;
139     if ($nt == $child->[0]->ELEMENT_NODE) {
140     $r .= '| ' . $child->[1] . '<' . $child->[0]->tag_name . ">\x0A"; ## ISSUE: case?
141    
142 wakaba 1.2 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value] }
143 wakaba 1.1 @{$child->[0]->attributes}) {
144     $r .= '| ' . $child->[1] . ' ' . $attr->[0] . '="'; ## ISSUE: case?
145     $r .= $attr->[1] . '"' . "\x0A";
146     }
147    
148     unshift @node,
149     map { [$_, $child->[1] . ' '] } @{$child->[0]->child_nodes};
150     } elsif ($nt == $child->[0]->TEXT_NODE) {
151     $r .= '| ' . $child->[1] . '"' . $child->[0]->data . '"' . "\x0A";
152     } elsif ($nt == $child->[0]->COMMENT_NODE) {
153     $r .= '| ' . $child->[1] . '<!-- ' . $child->[0]->data . " -->\x0A";
154     } elsif ($nt == $child->[0]->DOCUMENT_TYPE_NODE) {
155     $r .= '| ' . $child->[1] . '<!DOCTYPE ' . $child->[0]->name . ">\x0A";
156     } else {
157     $r .= '| ' . $child->[1] . $child->[0]->node_type . "\x0A"; # error
158     }
159     }
160    
161     return $r;
162     } # serialize
163    
164     ## License: Public Domain.
165 wakaba 1.2 ## $Date: 2007/04/30 14:12:02 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24