/[pub]/test/html-whatpm/parser-manakai.cgi
Suika

Contents of /test/html-whatpm/parser-manakai.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.9 - (show annotations) (download)
Thu Dec 11 03:22:57 2008 UTC (15 years, 11 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.8: +107 -45 lines
++ ChangeLog	11 Dec 2008 03:20:10 -0000
2008-12-11  Wakaba  <wakaba@suika.fam.cx>

	* error-description-source.en.xml: Added descriptions for errors
	from Regexp::Parser and Regexp::Parser::JavaScript modules.

	* cc.cgi: Added support for JavaScript regular expressions.

++ html/WebHACC/Language/ChangeLog	11 Dec 2008 03:18:54 -0000
2008-12-11  Wakaba  <wakaba@suika.fam.cx>

	* RegExpJS.pm: New module.

++ html/WebHACC/ChangeLog	11 Dec 2008 03:22:42 -0000
2008-12-11  Wakaba  <wakaba@suika.fam.cx>

	* Output.pm (generate_input_section): Added support for JavaScript
	regular expressions.

2008-12-10  Wakaba  <wakaba@suika.fam.cx>

	* Result.pm: Added support for |valueref| parameter of an error.
	|pos_end| should point the (intercharacter) position where the
	highlighted substring ends, not the character before the position,
	otherwise empty substring cannot be represented well.

1 #!/usr/bin/perl
2 use strict;
3
4 use lib qw[/home/httpd/html/www/markup/html/whatpm
5 /home/wakaba/work/manakai2/lib];
6 use CGI::Carp qw[fatalsToBrowser];
7 use Time::HiRes qw/time/;
8
9 use Message::CGI::HTTP;
10 my $http = Message::CGI::HTTP->new;
11
12 ## TODO: _charset_
13
14 my @mode = split m#/#, scalar $http->get_meta_variable ('PATH_INFO'), -1;
15 shift @mode if @mode and $mode[0] == '';
16 ## TODO: decode unreserved characters
17
18 my $s = $http->get_parameter ('s');
19 if (length $s > 1000_000) {
20 print STDOUT "Status: 400 Document Too Long\nContent-Type: text/plain; charset=us-ascii\n\nToo long";
21 exit;
22 }
23 my $char_length = length $s;
24 my %time;
25 my $time1;
26 my $time2;
27
28 require Message::DOM::DOMImplementation;
29 my $dom = Message::DOM::DOMImplementation->new;
30 # $| = 1;
31 my $doc;
32 my $el;
33
34
35 if (@mode == 3 and $mode[0] eq 'html' and
36 ($mode[2] eq 'html' or $mode[2] eq 'test' or $mode[2] eq 'xml')) {
37 print STDOUT "Content-Type: text/plain; charset=utf-8\n\n";
38
39 require Encode;
40 require Whatpm::HTML;
41
42 $time1 = time;
43 $s = Encode::decode ('utf-8', $s);
44 $time2 = time;
45 $time{decode} = $time2 - $time1;
46
47
48 print STDOUT "#errors\n";
49
50 my $onerror = sub {
51 my (%opt) = @_;
52 print STDOUT "$opt{line},$opt{column},$opt{type};$opt{level};$opt{value}\n";
53 };
54
55 $doc = $dom->create_document;
56 $doc->manakai_is_html (1);
57 $time1 = time;
58 if (length $mode[1]) {
59 $el = $doc->create_element_ns
60 ('http://www.w3.org/1999/xhtml', [undef, $mode[1]]);
61 Whatpm::HTML->set_inner_html ($el, $s, $onerror);
62 } else {
63 Whatpm::HTML->parse_string ($s => $doc, $onerror);
64 }
65 $time2 = time;
66 $time{parse} = $time2 - $time1;
67
68 print "#document\n";
69
70 my $out;
71 if ($mode[2] eq 'html') {
72 $time1 = time;
73 $out = \( ($el or $doc)->inner_html );
74 $time2 = time;
75 $time{serialize_html} = $time2 - $time1;
76 } elsif ($mode[2] eq 'xml') {
77 $doc->manakai_is_html (0);
78 $time1 = time;
79 $out = \( ($el or $doc)->inner_html );
80 $time2 = time;
81 $time{serialize_xml} = $time2 - $time1;
82 $doc->manakai_is_html (1);
83 } else { # test
84 require Whatpm::HTML::Dumper;
85 $time1 = time;
86 $out = \Whatpm::HTML::Dumper::dumptree ($el || $doc);
87 $time2 = time;
88 $time{serialize_test} = $time2 - $time1;
89 }
90 print STDOUT Encode::encode ('utf-8', $$out);
91 print STDOUT "\n";
92 } elsif (@mode == 3 and $mode[0] eq 'xml1' and
93 ($mode[2] eq 'html' or $mode[2] eq 'test' or $mode[2] eq 'xml')) {
94 print STDOUT "Content-Type: text/plain; charset=utf-8\n\n";
95
96 require Encode;
97 require Whatpm::XML::Parser;
98
99 $time1 = time;
100 $s = Encode::decode ('utf-8', $s);
101 $time2 = time;
102 $time{decode} = $time2 - $time1;
103
104 print STDOUT "#errors\n";
105
106 my $onerror = sub {
107 my (%opt) = @_;
108 print STDOUT "$opt{line},$opt{column},$opt{type};$opt{level};$opt{value}\n";
109 };
110
111 $doc = $dom->create_document;
112 $time1 = time;
113 ## TODO:
114 #if (length $mode[1]) {
115 # $el = $doc->create_element_ns
116 # ('http://www.w3.org/1999/xhtml', [undef, $mode[1]]);
117 # #Whatpm::HTML->set_inner_html ($el, $s, $onerror);
118 #} else {
119 Whatpm::XML::Parser->parse_char_string ($s => $doc, $onerror);
120 #}
121 $time2 = time;
122 $time{parse_xml1} = $time2 - $time1;
123
124 print "#document\n";
125
126 my $out;
127 if ($mode[2] eq 'html') {
128 $doc->manakai_is_html (1);
129 $time1 = time;
130 $out = \( ($el or $doc)->inner_html );
131 $time2 = time;
132 $time{serialize_html} = $time2 - $time1;
133 $doc->manakai_is_html (0);
134 } elsif ($mode[2] eq 'xml') {
135 $time1 = time;
136 $out = \( ($el or $doc)->inner_html );
137 $time2 = time;
138 $time{serialize_xml} = $time2 - $time1;
139 } else { # test
140 require Whatpm::HTML::Dumper;
141 $time1 = time;
142 $out = \Whatpm::HTML::Dumper::dumptree ($el || $doc);
143 $time2 = time;
144 $time{serialize_test} = $time2 - $time1;
145 }
146 print STDOUT Encode::encode ('utf-8', $$out);
147 print STDOUT "\n";
148 } elsif (@mode == 3 and $mode[0] eq 'xhtml' and
149 ($mode[2] eq 'html' or $mode[2] eq 'test' or $mode[2] eq 'xml')) {
150 print STDOUT "Content-Type: text/plain; charset=utf-8\n\n";
151
152 require Message::DOM::XMLParserTemp;
153 print STDOUT "#errors\n";
154
155 my $onerror = sub {
156 my $err = shift;
157 print STDOUT $err->location->line_number, ",";
158 print STDOUT $err->location->column_number, ",";
159 print STDOUT $err->text, "\n";
160 return 1;
161 };
162
163 open my $fh, '<', \$s;
164 my $time1 = time;
165 $doc = Message::DOM::XMLParserTemp->parse_byte_stream
166 ($fh => $dom, $onerror, charset => 'utf-8');
167 my $time2 = time;
168 $time{parse_xml} = $time2 - $time1;
169
170 print "#document\n";
171
172 my $out;
173 if ($mode[2] eq 'html') {
174 $doc->manakai_is_html (0);
175 $time1 = time;
176 $out = \( $doc->inner_html ); ## TODO: $el case
177 $time2 = time;
178 $time{serialize_html} = $time2 - $time1;
179 $doc->manakai_is_html (1);
180 } elsif ($mode[2] eq 'xml') {
181 $time1 = time;
182 $out = \( $doc->inner_html ); ## TODO: $el case
183 $time2 = time;
184 $time{serialize_xml} = $time2 - $time1;
185 } else { # test
186 require Whatpm::HTML::Dumper;
187 $time1 = time;
188 $out = \Whatpm::HTML::Dumper::dumptree ($el || $doc);
189 $time2 = time;
190 $time{serialize_test} = $time2 - $time1;
191 }
192 print STDOUT Encode::encode ('utf-8', $$out);
193 print STDOUT "\n";
194 } elsif (@mode == 3 and $mode[0] eq 'swml' and $mode[1] eq '' and
195 ($mode[2] eq 'html' or $mode[2] eq 'test' or $mode[2] eq 'xml')) {
196 print STDOUT "Content-Type: text/plain; charset=utf-8\n\n";
197
198 require Encode;
199 $time1 = time;
200 $s = Encode::decode ('utf-8', $s);
201 $time2 = time;
202 $time{decode} = $time2 - $time1;
203
204 require Whatpm::SWML::Parser;
205 $doc = $dom->create_document;
206 my $p = Whatpm::SWML::Parser->new;
207 $p->parse_char_string ($s => $doc);
208
209 print "#document\n";
210
211 my $out;
212 if ($mode[2] eq 'html') {
213 $doc->manakai_is_html (0);
214 $time1 = time;
215 $out = \( $doc->inner_html );
216 $time2 = time;
217 $time{serialize_html} = $time2 - $time1;
218 $doc->manakai_is_html (1);
219 } elsif ($mode[2] eq 'xml') {
220 $time1 = time;
221 $out = \( $doc->inner_html );
222 $time2 = time;
223 $time{serialize_xml} = $time2 - $time1;
224 } else { # test
225 require Whatpm::HTML::Dumper;
226 $time1 = time;
227 $out = \Whatpm::HTML::Dumper::dumptree ($el || $doc);
228 $time2 = time;
229 $time{serialize_test} = $time2 - $time1;
230 }
231 print STDOUT Encode::encode ('utf-8', $$out);
232 print STDOUT "\n";
233 } elsif (@mode == 3 and $mode[0] eq 'h2h' and $mode[1] eq '' and
234 ($mode[2] eq 'html' or $mode[2] eq 'test' or $mode[2] eq 'xml')) {
235 print STDOUT "Content-Type: text/plain; charset=utf-8\n\n";
236
237 require Encode;
238 $time1 = time;
239 $s = Encode::decode ('utf-8', $s);
240 $time2 = time;
241 $time{decode} = $time2 - $time1;
242
243 require Whatpm::H2H;
244 $doc = $dom->create_document;
245 Whatpm::H2H->parse_string ($s => $doc);
246
247 print "#document\n";
248
249 my $out;
250 if ($mode[2] eq 'html') {
251 $doc->manakai_is_html (0);
252 $time1 = time;
253 $out = \( $doc->inner_html );
254 $time2 = time;
255 $time{serialize_html} = $time2 - $time1;
256 $doc->manakai_is_html (1);
257 } elsif ($mode[2] eq 'xml') {
258 $time1 = time;
259 $out = \( $doc->inner_html );
260 $time2 = time;
261 $time{serialize_xml} = $time2 - $time1;
262 } else { # test
263 require Whatpm::HTML::Dumper;
264 $time1 = time;
265 $out = \Whatpm::HTML::Dumper::dumptree ($el || $doc);
266 $time2 = time;
267 $time{serialize_test} = $time2 - $time1;
268 }
269 print STDOUT Encode::encode ('utf-8', $$out);
270 print STDOUT "\n";
271 } else {
272 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n404";
273 exit;
274 }
275
276 if ($http->get_parameter ('dom5')) {
277 require Whatpm::ContentChecker;
278 my $onerror = sub {
279 my %opt = @_;
280 print STDOUT get_node_path ($opt{node}) . ';' . $opt{type} . "\n";
281 };
282 print STDOUT "#domerrors\n";
283 $time1 = time;
284 if ($el) {
285 Whatpm::ContentChecker->check_element ($el, $onerror);
286 } else {
287 Whatpm::ContentChecker->check_document ($doc, $onerror);
288 }
289 $time2 = time;
290 $time{check} = $time2 - $time1;
291 }
292
293 print STDOUT "#log\n";
294 for (qw/decode parse parse_xml parse_xml1
295 serialize_html serialize_xml serialize_test
296 check/) {
297 next unless defined $time{$_};
298 print STDOUT {
299 decode => 'bytes->chars',
300 parse => 'html5(chars)->dom5',
301 parse_xml => 'xml(chars)->dom5',
302 parse_xml1 => 'xml1(chars)->dom5',
303 serialize_html => 'dom5->html5(char)',
304 serialize_xml => 'dom5->xml1(char)',
305 serialize_test => 'dom5->test(char)',
306 check => 'dom5 check',
307 }->{$_};
308 print STDOUT "\t", $time{$_}, "s\n";
309 open my $file, '>>', ".manakai-$_.txt" or die ".manakai-$_.txt: $!";
310 print $file $char_length, "\t", $time{$_}, "\n";
311 }
312
313 exit;
314
315 sub get_node_path ($) {
316 my $node = shift;
317 my @r;
318 while (defined $node) {
319 my $rs;
320 if ($node->node_type == 1) {
321 $rs = $node->manakai_local_name;
322 $node = $node->parent_node;
323 } elsif ($node->node_type == 2) {
324 $rs = '@' . $node->manakai_local_name;
325 $node = $node->owner_element;
326 } elsif ($node->node_type == 3) {
327 $rs = '"' . $node->data . '"';
328 $node = $node->parent_node;
329 } elsif ($node->node_type == 9) {
330 $rs = '';
331 $node = $node->parent_node;
332 } else {
333 $rs = '#' . $node->node_type;
334 $node = $node->parent_node;
335 }
336 unshift @r, $rs;
337 }
338 return join '/', @r;
339 } # get_node_path
340
341 =head1 AUTHOR
342
343 Wakaba <w@suika.fam.cx>.
344
345 =head1 LICENSE
346
347 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
348
349 This library is free software; you can redistribute it
350 and/or modify it under the same terms as Perl itself.
351
352 =cut
353
354 ## $Date: 2008/07/18 14:44:17 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24