/[suikacvs]/markup/html/whatpm/t/HTML-tokenizer.t
Suika

Contents of /markup/html/whatpm/t/HTML-tokenizer.t

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (hide annotations) (download) (as text)
Mon Apr 30 14:12:02 2007 UTC (18 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.2: +4 -4 lines
File MIME type: application/x-troff
++ whatpm/What/ChangeLog	30 Apr 2007 14:11:13 -0000
	* HTML.pm.src: Some typos are fixed.

2007-04-30  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/t/ChangeLog	30 Apr 2007 14:11:55 -0000
	* .cvsignore: |tree-consturction| is added.

	* HTML-tree.t: New test.

	* Makefile: Rules for tree constructor tests are added.

2007-04-30  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3    
4 wakaba 1.2 my $dir_name;
5 wakaba 1.1 BEGIN {
6 wakaba 1.3 my $test_dir_name = 't/';
7 wakaba 1.2 $dir_name = 't/tokenizer/';
8 wakaba 1.1 my $skip = "You don't have JSON module";
9     eval q{
10     use JSON 1.00;
11     $skip = "You don't have make command";
12 wakaba 1.3 system ("cd $test_dir_name; make tokenizer-files") == 0 or die
13 wakaba 1.2 unless -f $dir_name.'test1.test';
14 wakaba 1.1 $skip = '';
15     };
16     if ($skip) {
17     print "1..1\n";
18     print "ok 1 # $skip\n";
19     exit;
20     }
21     $JSON::UnMapping = 1;
22 wakaba 1.2 $JSON::UTF8 = 1;
23 wakaba 1.1 }
24    
25     use Test;
26 wakaba 1.3 BEGIN { plan tests => 67 }
27 wakaba 1.2
28 wakaba 1.1 use Data::Dumper;
29 wakaba 1.2 $Data::Dumper::Useqq = 1;
30     sub Data::Dumper::qquote {
31     my $s = shift;
32     $s =~ s/([^\x20\x21-\x26\x28-\x5B\x5D-\x7E])/sprintf '\x{%02X}', ord $1/ge;
33     return q<qq'> . $s . q<'>;
34     } # Data::Dumper::qquote
35 wakaba 1.1
36     use What::HTML;
37    
38     for my $file_name (qw[
39 wakaba 1.2 test1.test
40     test2.test
41     contentModelFlags.test
42 wakaba 1.1 ]) {
43 wakaba 1.2 open my $file, '<', $dir_name.$file_name
44     or die "$0: $dir_name$file_name: $!";
45 wakaba 1.1 local $/ = undef;
46     my $js = <$file>;
47     close $file;
48    
49     my $tests = jsonToObj ($js)->{tests};
50     TEST: for my $test (@$tests) {
51     my $s = $test->{input};
52    
53     my $j = 1;
54     while ($j < @{$test->{output}}) {
55     if (ref $test->{output}->[$j - 1] and
56     $test->{output}->[$j - 1]->[0] eq 'Character' and
57     ref $test->{output}->[$j] and
58     $test->{output}->[$j]->[0] eq 'Character') {
59     $test->{output}->[$j - 1]->[1]
60     .= $test->{output}->[$j]->[1];
61     splice @{$test->{output}}, $j, 1;
62     }
63     $j++;
64     }
65    
66 wakaba 1.2 my @cm = @{$test->{contentModelFlags} || ['PCDATA']};
67     my $last_start_tag = $test->{lastStartTag};
68 wakaba 1.1 for my $cm (@cm) {
69     my $p = What::HTML->new;
70     my $i = 0;
71     $p->{set_next_input_character} = sub {
72     my $self = shift;
73     $self->{next_input_character} = -1 and return if $i >= length $s;
74     $self->{next_input_character} = ord substr $s, $i++, 1;
75 wakaba 1.2
76     if ($self->{next_input_character} == 0x000D) { # CR
77     if ($i >= length $s) {
78     #
79     } else {
80     my $next_char = ord substr $s, $i++, 1;
81     if ($next_char == 0x000A) { # LF
82     #
83     } else {
84     push @{$self->{char}}, $next_char;
85     }
86     }
87     $self->{next_input_character} = 0x000A; # LF # MUST
88     } elsif ($self->{next_input_character} > 0x10FFFF) {
89     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
90     } elsif ($self->{next_input_character} == 0x0000) { # NULL
91     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
92     }
93 wakaba 1.1 };
94    
95     my @token;
96     $p->{parse_error} = sub {
97     push @token, 'ParseError';
98     };
99    
100     $p->_initialize_tokenizer;
101     $p->{content_model_flag} = $cm;
102 wakaba 1.2 $p->{last_emitted_start_tag_name} = $last_start_tag;
103 wakaba 1.1
104     while (1) {
105     my $token = $p->_get_next_token;
106     last if $token->{type} eq 'end-of-file';
107    
108     my $test_token = [
109     {
110     DOCTYPE => 'DOCTYPE',
111     'start tag' => 'StartTag',
112     'end tag' => 'EndTag',
113     comment => 'Comment',
114     character => 'Character',
115     }->{$token->{type}} || $token->{type},
116     ];
117     $test_token->[1] = $token->{name} if defined $token->{name};
118     $test_token->[1] = $token->{tag_name} if defined $token->{tag_name};
119     $test_token->[1] = $token->{data} if defined $token->{data};
120     $test_token->[2] = $token->{error} ? 1 : 0 if $token->{type} eq 'DOCTYPE';
121     $test_token->[2] = {map {$_->{name} => $_->{value}} values %{$token->{attributes}}}
122     if $token->{type} eq 'start tag';
123    
124     if (@token and ref $token[-1] and $token[-1]->[0] eq 'Character' and
125     $test_token->[0] eq 'Character') {
126     $token[-1]->[1] .= $test_token->[1];
127     } else {
128     push @token, $test_token;
129     }
130     }
131    
132     my $expected_dump = Dumper ($test->{output});
133     my $parser_dump = Dumper (\@token);
134 wakaba 1.2 ok $parser_dump, $expected_dump,
135     $test->{description} . ': ' . $test->{input};
136 wakaba 1.1 }
137     }
138     }
139    
140 wakaba 1.3 ## $Date: 2007/04/30 11:45:24 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24