/[suikacvs]/test/html-whatpm/parser-manakai.cgi
Suika

Contents of /test/html-whatpm/parser-manakai.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.9 - (hide annotations) (download)
Thu Dec 11 03:22:57 2008 UTC (15 years, 11 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.8: +107 -45 lines
++ ChangeLog	11 Dec 2008 03:20:10 -0000
2008-12-11  Wakaba  <wakaba@suika.fam.cx>

	* error-description-source.en.xml: Added descriptions for errors
	from Regexp::Parser and Regexp::Parser::JavaScript modules.

	* cc.cgi: Added support for JavaScript regular expressions.

++ html/WebHACC/Language/ChangeLog	11 Dec 2008 03:18:54 -0000
2008-12-11  Wakaba  <wakaba@suika.fam.cx>

	* RegExpJS.pm: New module.

++ html/WebHACC/ChangeLog	11 Dec 2008 03:22:42 -0000
2008-12-11  Wakaba  <wakaba@suika.fam.cx>

	* Output.pm (generate_input_section): Added support for JavaScript
	regular expressions.

2008-12-10  Wakaba  <wakaba@suika.fam.cx>

	* Result.pm: Added support for |valueref| parameter of an error.
	|pos_end| should point the (intercharacter) position where the
	highlighted substring ends, not the character before the position,
	otherwise empty substring cannot be represented well.

1 wakaba 1.3 #!/usr/bin/perl
2 wakaba 1.1 use strict;
3    
4     use lib qw[/home/httpd/html/www/markup/html/whatpm
5 wakaba 1.6 /home/wakaba/work/manakai2/lib];
6 wakaba 1.1 use CGI::Carp qw[fatalsToBrowser];
7     use Time::HiRes qw/time/;
8    
9 wakaba 1.6 use Message::CGI::HTTP;
10     my $http = Message::CGI::HTTP->new;
11 wakaba 1.1
12     ## TODO: _charset_
13    
14 wakaba 1.6 my @mode = split m#/#, scalar $http->get_meta_variable ('PATH_INFO'), -1;
15 wakaba 1.2 shift @mode if @mode and $mode[0] == '';
16 wakaba 1.1 ## TODO: decode unreserved characters
17    
18 wakaba 1.6 my $s = $http->get_parameter ('s');
19 wakaba 1.1 if (length $s > 1000_000) {
20     print STDOUT "Status: 400 Document Too Long\nContent-Type: text/plain; charset=us-ascii\n\nToo long";
21     exit;
22     }
23     my $char_length = length $s;
24     my %time;
25     my $time1;
26     my $time2;
27    
28     require Message::DOM::DOMImplementation;
29 wakaba 1.4 my $dom = Message::DOM::DOMImplementation->new;
30 wakaba 1.2 # $| = 1;
31 wakaba 1.1 my $doc;
32 wakaba 1.2 my $el;
33 wakaba 1.1
34 wakaba 1.7
35 wakaba 1.2 if (@mode == 3 and $mode[0] eq 'html' and
36 wakaba 1.7 ($mode[2] eq 'html' or $mode[2] eq 'test' or $mode[2] eq 'xml')) {
37 wakaba 1.1 print STDOUT "Content-Type: text/plain; charset=utf-8\n\n";
38    
39     require Encode;
40     require Whatpm::HTML;
41    
42     $time1 = time;
43     $s = Encode::decode ('utf-8', $s);
44     $time2 = time;
45     $time{decode} = $time2 - $time1;
46    
47    
48     print STDOUT "#errors\n";
49    
50     my $onerror = sub {
51     my (%opt) = @_;
52 wakaba 1.8 print STDOUT "$opt{line},$opt{column},$opt{type};$opt{level};$opt{value}\n";
53 wakaba 1.1 };
54    
55 wakaba 1.2 $doc = $dom->create_document;
56 wakaba 1.4 $doc->manakai_is_html (1);
57 wakaba 1.1 $time1 = time;
58 wakaba 1.2 if (length $mode[1]) {
59     $el = $doc->create_element_ns
60     ('http://www.w3.org/1999/xhtml', [undef, $mode[1]]);
61     Whatpm::HTML->set_inner_html ($el, $s, $onerror);
62     } else {
63     Whatpm::HTML->parse_string ($s => $doc, $onerror);
64     }
65 wakaba 1.1 $time2 = time;
66     $time{parse} = $time2 - $time1;
67    
68     print "#document\n";
69    
70     my $out;
71 wakaba 1.2 if ($mode[2] eq 'html') {
72     $time1 = time;
73 wakaba 1.4 $out = \( ($el or $doc)->inner_html );
74 wakaba 1.2 $time2 = time;
75     $time{serialize_html} = $time2 - $time1;
76 wakaba 1.7 } elsif ($mode[2] eq 'xml') {
77     $doc->manakai_is_html (0);
78     $time1 = time;
79     $out = \( ($el or $doc)->inner_html );
80     $time2 = time;
81     $time{serialize_xml} = $time2 - $time1;
82     $doc->manakai_is_html (1);
83 wakaba 1.1 } else { # test
84 wakaba 1.9 require Whatpm::HTML::Dumper;
85 wakaba 1.2 $time1 = time;
86 wakaba 1.9 $out = \Whatpm::HTML::Dumper::dumptree ($el || $doc);
87     $time2 = time;
88     $time{serialize_test} = $time2 - $time1;
89     }
90     print STDOUT Encode::encode ('utf-8', $$out);
91     print STDOUT "\n";
92     } elsif (@mode == 3 and $mode[0] eq 'xml1' and
93     ($mode[2] eq 'html' or $mode[2] eq 'test' or $mode[2] eq 'xml')) {
94     print STDOUT "Content-Type: text/plain; charset=utf-8\n\n";
95    
96     require Encode;
97     require Whatpm::XML::Parser;
98    
99     $time1 = time;
100     $s = Encode::decode ('utf-8', $s);
101     $time2 = time;
102     $time{decode} = $time2 - $time1;
103    
104     print STDOUT "#errors\n";
105    
106     my $onerror = sub {
107     my (%opt) = @_;
108     print STDOUT "$opt{line},$opt{column},$opt{type};$opt{level};$opt{value}\n";
109     };
110    
111     $doc = $dom->create_document;
112     $time1 = time;
113     ## TODO:
114     #if (length $mode[1]) {
115     # $el = $doc->create_element_ns
116     # ('http://www.w3.org/1999/xhtml', [undef, $mode[1]]);
117     # #Whatpm::HTML->set_inner_html ($el, $s, $onerror);
118     #} else {
119     Whatpm::XML::Parser->parse_char_string ($s => $doc, $onerror);
120     #}
121     $time2 = time;
122     $time{parse_xml1} = $time2 - $time1;
123    
124     print "#document\n";
125    
126     my $out;
127     if ($mode[2] eq 'html') {
128     $doc->manakai_is_html (1);
129     $time1 = time;
130     $out = \( ($el or $doc)->inner_html );
131     $time2 = time;
132     $time{serialize_html} = $time2 - $time1;
133     $doc->manakai_is_html (0);
134     } elsif ($mode[2] eq 'xml') {
135     $time1 = time;
136     $out = \( ($el or $doc)->inner_html );
137     $time2 = time;
138     $time{serialize_xml} = $time2 - $time1;
139     } else { # test
140     require Whatpm::HTML::Dumper;
141     $time1 = time;
142     $out = \Whatpm::HTML::Dumper::dumptree ($el || $doc);
143 wakaba 1.2 $time2 = time;
144     $time{serialize_test} = $time2 - $time1;
145 wakaba 1.1 }
146     print STDOUT Encode::encode ('utf-8', $$out);
147     print STDOUT "\n";
148 wakaba 1.2 } elsif (@mode == 3 and $mode[0] eq 'xhtml' and
149 wakaba 1.7 ($mode[2] eq 'html' or $mode[2] eq 'test' or $mode[2] eq 'xml')) {
150 wakaba 1.1 print STDOUT "Content-Type: text/plain; charset=utf-8\n\n";
151    
152     require Message::DOM::XMLParserTemp;
153     print STDOUT "#errors\n";
154    
155     my $onerror = sub {
156     my $err = shift;
157     print STDOUT $err->location->line_number, ",";
158 wakaba 1.2 print STDOUT $err->location->column_number, ",";
159 wakaba 1.1 print STDOUT $err->text, "\n";
160     return 1;
161     };
162    
163     open my $fh, '<', \$s;
164     my $time1 = time;
165     $doc = Message::DOM::XMLParserTemp->parse_byte_stream
166     ($fh => $dom, $onerror, charset => 'utf-8');
167     my $time2 = time;
168     $time{parse_xml} = $time2 - $time1;
169    
170     print "#document\n";
171    
172     my $out;
173 wakaba 1.2 if ($mode[2] eq 'html') {
174 wakaba 1.7 $doc->manakai_is_html (0);
175     $time1 = time;
176     $out = \( $doc->inner_html ); ## TODO: $el case
177     $time2 = time;
178     $time{serialize_html} = $time2 - $time1;
179     $doc->manakai_is_html (1);
180     } elsif ($mode[2] eq 'xml') {
181 wakaba 1.4 $time1 = time;
182     $out = \( $doc->inner_html ); ## TODO: $el case
183     $time2 = time;
184     $time{serialize_xml} = $time2 - $time1;
185 wakaba 1.1 } else { # test
186 wakaba 1.9 require Whatpm::HTML::Dumper;
187     $time1 = time;
188     $out = \Whatpm::HTML::Dumper::dumptree ($el || $doc);
189     $time2 = time;
190     $time{serialize_test} = $time2 - $time1;
191     }
192     print STDOUT Encode::encode ('utf-8', $$out);
193     print STDOUT "\n";
194     } elsif (@mode == 3 and $mode[0] eq 'swml' and $mode[1] eq '' and
195     ($mode[2] eq 'html' or $mode[2] eq 'test' or $mode[2] eq 'xml')) {
196     print STDOUT "Content-Type: text/plain; charset=utf-8\n\n";
197    
198     require Encode;
199     $time1 = time;
200     $s = Encode::decode ('utf-8', $s);
201     $time2 = time;
202     $time{decode} = $time2 - $time1;
203    
204     require Whatpm::SWML::Parser;
205     $doc = $dom->create_document;
206     my $p = Whatpm::SWML::Parser->new;
207     $p->parse_char_string ($s => $doc);
208    
209     print "#document\n";
210    
211     my $out;
212     if ($mode[2] eq 'html') {
213     $doc->manakai_is_html (0);
214     $time1 = time;
215     $out = \( $doc->inner_html );
216     $time2 = time;
217     $time{serialize_html} = $time2 - $time1;
218     $doc->manakai_is_html (1);
219     } elsif ($mode[2] eq 'xml') {
220     $time1 = time;
221     $out = \( $doc->inner_html );
222     $time2 = time;
223     $time{serialize_xml} = $time2 - $time1;
224     } else { # test
225     require Whatpm::HTML::Dumper;
226 wakaba 1.1 $time1 = time;
227 wakaba 1.9 $out = \Whatpm::HTML::Dumper::dumptree ($el || $doc);
228 wakaba 1.1 $time2 = time;
229     $time{serialize_test} = $time2 - $time1;
230     }
231     print STDOUT Encode::encode ('utf-8', $$out);
232     print STDOUT "\n";
233 wakaba 1.5 } elsif (@mode == 3 and $mode[0] eq 'h2h' and $mode[1] eq '' and
234 wakaba 1.7 ($mode[2] eq 'html' or $mode[2] eq 'test' or $mode[2] eq 'xml')) {
235 wakaba 1.5 print STDOUT "Content-Type: text/plain; charset=utf-8\n\n";
236    
237     require Encode;
238     $time1 = time;
239     $s = Encode::decode ('utf-8', $s);
240     $time2 = time;
241     $time{decode} = $time2 - $time1;
242    
243     require Whatpm::H2H;
244     $doc = $dom->create_document;
245     Whatpm::H2H->parse_string ($s => $doc);
246    
247     print "#document\n";
248    
249     my $out;
250     if ($mode[2] eq 'html') {
251 wakaba 1.7 $doc->manakai_is_html (0);
252     $time1 = time;
253     $out = \( $doc->inner_html );
254     $time2 = time;
255     $time{serialize_html} = $time2 - $time1;
256     $doc->manakai_is_html (1);
257     } elsif ($mode[2] eq 'xml') {
258 wakaba 1.5 $time1 = time;
259     $out = \( $doc->inner_html );
260     $time2 = time;
261     $time{serialize_xml} = $time2 - $time1;
262     } else { # test
263 wakaba 1.9 require Whatpm::HTML::Dumper;
264 wakaba 1.5 $time1 = time;
265 wakaba 1.9 $out = \Whatpm::HTML::Dumper::dumptree ($el || $doc);
266 wakaba 1.5 $time2 = time;
267     $time{serialize_test} = $time2 - $time1;
268     }
269     print STDOUT Encode::encode ('utf-8', $$out);
270     print STDOUT "\n";
271 wakaba 1.1 } else {
272     print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n404";
273     exit;
274     }
275    
276 wakaba 1.6 if ($http->get_parameter ('dom5')) {
277 wakaba 1.1 require Whatpm::ContentChecker;
278 wakaba 1.2 my $onerror = sub {
279     my %opt = @_;
280     print STDOUT get_node_path ($opt{node}) . ';' . $opt{type} . "\n";
281     };
282 wakaba 1.1 print STDOUT "#domerrors\n";
283     $time1 = time;
284 wakaba 1.2 if ($el) {
285     Whatpm::ContentChecker->check_element ($el, $onerror);
286     } else {
287     Whatpm::ContentChecker->check_document ($doc, $onerror);
288     }
289 wakaba 1.1 $time2 = time;
290     $time{check} = $time2 - $time1;
291     }
292    
293     print STDOUT "#log\n";
294 wakaba 1.9 for (qw/decode parse parse_xml parse_xml1
295     serialize_html serialize_xml serialize_test
296 wakaba 1.1 check/) {
297     next unless defined $time{$_};
298     print STDOUT {
299     decode => 'bytes->chars',
300     parse => 'html5(chars)->dom5',
301 wakaba 1.9 parse_xml => 'xml(chars)->dom5',
302     parse_xml1 => 'xml1(chars)->dom5',
303 wakaba 1.2 serialize_html => 'dom5->html5(char)',
304     serialize_xml => 'dom5->xml1(char)',
305     serialize_test => 'dom5->test(char)',
306 wakaba 1.1 check => 'dom5 check',
307     }->{$_};
308     print STDOUT "\t", $time{$_}, "s\n";
309     open my $file, '>>', ".manakai-$_.txt" or die ".manakai-$_.txt: $!";
310     print $file $char_length, "\t", $time{$_}, "\n";
311     }
312    
313     exit;
314    
315     sub get_node_path ($) {
316     my $node = shift;
317     my @r;
318     while (defined $node) {
319     my $rs;
320     if ($node->node_type == 1) {
321     $rs = $node->manakai_local_name;
322     $node = $node->parent_node;
323     } elsif ($node->node_type == 2) {
324     $rs = '@' . $node->manakai_local_name;
325     $node = $node->owner_element;
326     } elsif ($node->node_type == 3) {
327     $rs = '"' . $node->data . '"';
328     $node = $node->parent_node;
329     } elsif ($node->node_type == 9) {
330     $rs = '';
331     $node = $node->parent_node;
332     } else {
333     $rs = '#' . $node->node_type;
334     $node = $node->parent_node;
335     }
336     unshift @r, $rs;
337     }
338     return join '/', @r;
339     } # get_node_path
340    
341     =head1 AUTHOR
342    
343     Wakaba <w@suika.fam.cx>.
344    
345     =head1 LICENSE
346    
347 wakaba 1.9 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
348 wakaba 1.1
349     This library is free software; you can redistribute it
350     and/or modify it under the same terms as Perl itself.
351    
352     =cut
353    
354 wakaba 1.9 ## $Date: 2008/07/18 14:44:17 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24