/[suikacvs]/webroot/regexp/visualizer/regexp.cgi
Suika

Contents of /webroot/regexp/visualizer/regexp.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (show annotations) (download)
Sun Dec 7 10:24:21 2008 UTC (15 years, 10 months ago) by wakaba
Branch: MAIN
Changes since 1.1: +30 -8 lines
++ swe/visualizer/ChangeLog	7 Dec 2008 10:24:13 -0000
	* input.html: New document.

	* regexp.cgi: Use ?s= as input.  Handle errors.  Specify style
	sheet.

2008-12-07  Wakaba  <wakaba@suika.fam.cx>

1 #!/usr/bin/perl
2 use strict;
3 use warnings;
4 use feature 'state';
5 use CGI::Carp qw(fatalsToBrowser);
6
7 use lib q[/home/wakaba/work/manakai2/lib];
8 use Message::CGI::Util qw/percent_decode htescape/;
9 use Message::CGI::HTTP;
10
11 use Regexp::Parser;
12 use Graph::Easy;
13
14 use Scalar::Util qw/refaddr/;
15
16 my $default_map = {};
17 for (qw/. \C \w \W \s \S \d \D \X \1 \2 \3 \4 \5 \6 \7 \8 \9
18 \A ^ \B \b \G \Z \z $/) {
19 $default_map->{$_} = qq[Perl /$_/];
20 }
21
22 my $assertion_map = {
23 ifmatch => '(?=)',
24 '<ifmatch' => '(?<=)',
25 unlessm => '(?!)',
26 '<unlessm' => '(?<!)',
27 };
28
29 my $cgi = Message::CGI::HTTP->new;
30
31 my $regexp = percent_decode $cgi->get_parameter ('s') // '';
32 $regexp = '(?:)' unless length $regexp;
33
34 my $parser = Regexp::Parser->new;
35
36 package Regexp::Parser;
37
38 # start of char class range (or maybe just char)
39 $parser->add_handler('cc' => sub {
40 my ($S) = @_;
41 return if ${&Rx} =~ m{ \G (?= ] | \z ) }xgc;
42 push @{ $S->{next} }, qw< cc >;
43 my ($lhs, $rhs, $before_range);
44 my $ret = \$lhs;
45
46 {
47 if (${&Rx} =~ m{ \G ( \\ ) }xgcs) {
48 my $c = $1;
49 $$ret = $S->$c(1);
50 }
51 elsif (${&Rx} =~ m{ \G \[ ([.=:]) (\^?) (.*?) \1 \] }xgcs) {
52 my ($how, $neg, $name) = ($1, $2, $3);
53 my $posix = "POSIX_$name";
54 if ($S->can($posix)) { $$ret = $S->$posix($neg, $how) }
55 else { $S->error(RPe_BADPOS, "$how$neg$name$how") }
56 }
57 elsif (${&Rx} =~ m{ \G (.) }xgcs) {
58 $$ret = $S->force_object(anyof_char => $1);
59 }
60
61 if ($ret == \$lhs) {
62 if (${&Rx} =~ m{ \G (?= - ) }xgc) {
63 if ($lhs->visual =~ /^(?:\[[:.=]|\\[dDsSwWpP])/) {
64 $S->warn(RPe_FRANGE, $lhs->visual, "");
65 $ret = $lhs;
66 last;
67 }
68 $before_range = &RxPOS++;
69 $ret = \$rhs;
70 redo;
71 }
72 $ret = $lhs;
73 }
74 elsif ($ret == \$rhs) {
75 if ($rhs->visual =~ /^(?:\[[:.=]|\\[dDsSwWpP])/) {
76 $S->warn(RPe_FRANGE, $lhs->visual, $rhs->visual);
77 &RxPOS = $before_range;
78 $ret = $lhs;
79 }
80 elsif ($lhs->data gt $rhs->data) {
81 $S->error(RPe_IRANGE, $lhs->visual, $rhs->visual);
82 }
83 else {
84 $ret = $S->object(anyof_range => $lhs, $rhs);
85 }
86 }
87 }
88
89 return if &SIZE_ONLY;
90 return $ret;
91 });
92
93 package main;
94
95 $parser->parse ($regexp);
96 my $eregexp = htescape $regexp;
97
98 if ($parser->errnum) {
99 binmode STDOUT, ':encoding(utf-8)';
100 print "Content-Type: text/html; charset=utf-8\n\n";
101 print q[<!DOCTYPE HTML><html lang=en>
102 <title>Regular expression visualizer: $eregexp</title>
103 <link rel="stylesheet" href="/www/style/html/xhtml"/>
104 </head>
105 <body>
106 <h1>Regular expression visualizer</h1>
107
108 <p>Input: <code>], $eregexp, q[</code></p>
109
110 <p>Error: ], htescape ($parser->errmsg);
111 exit;
112 }
113
114 binmode STDOUT, ':encoding(utf-8)';
115 print "Content-Type: application/xhtml+xml; charset=utf-8\n\n";
116
117 add_regexp ($parser->root);
118
119 print q[<html lang="en" xmlns="http://www.w3.org/1999/xhtml">
120 <head><title>Regular expression visualizer: $eregexp</title>
121 <link rel="stylesheet" href="/www/style/html/xhtml"/>
122 </head>
123 <body>
124 <h1>Regular expression visualizer</h1>
125
126 <p>Input: <code>], $eregexp, q[</code></p>];
127
128 my @regexp;
129 while (@regexp) {
130 my $nodes = shift @regexp;
131
132 my $index = get_graph_index ($nodes);
133 print "<section><h2>Regexp #$index</h2>\n\n";
134
135 my $g = generate_graph ($nodes);
136 print $g->as_svg;
137
138 print "</section>\n";
139 }
140
141 print q[</body></html>];
142
143 sub escape_value ($) {
144 my $v = shift;
145 $v =~ s/(\W)/sprintf '\x{%04X}', ord $1/ge;
146 $v;
147 } # escape_value
148
149 sub escape_code ($) {
150 my $v = shift;
151 $v =~ s/([^\x20-\x5B\x5D-\x7E])/sprintf '\x{%04X}', ord $1/ge;
152 $v;
153 } # escape_code
154
155 sub add_regexp ($) {
156 my $nodes = shift;
157 push @regexp, $nodes;
158 } # add_regexp
159
160 sub get_graph_index ($) {
161 state $index;
162 state $next_index ||= 0;
163
164 my $nodes = shift;
165 $index->{$nodes} //= $next_index++;
166 return $index->{$nodes};
167 } # get_graph_index
168
169 sub generate_graph ($$) {
170 my ($root_nodes) = @_;
171
172 my $g = Graph::Easy->new;
173
174 $g->set_attributes ('node.start' => {fill => 'blue', color => 'white'});
175 $g->set_attributes ('node.success' => {fill => 'green', color => 'white'});
176
177 my $start_n = $g->add_node ('START');
178 $start_n->set_attribute (class => 'start');
179 my $success_n = $g->add_node ('SUCCESS');
180 $success_n->set_attribute (class => 'success');
181
182 my ($first_ns, $last_ns, $is_optional) = add_to_graph ($root_nodes => $g);
183 $g->add_edge ($start_n => $_) for @$first_ns;
184 $g->add_edge ($_ => $success_n) for @$last_ns;
185 $g->add_edge ($start_n => $success_n) if $is_optional;
186
187 return $g;
188 } # generate_graph
189
190 sub add_to_graph ($$) {
191 my ($node, $g) = @_;
192
193 my $family = ref $node eq 'ARRAY' ? '' : $node->family;
194 my $type = ref $node eq 'ARRAY' ? '' : $node->type;
195 if ($family eq 'quant') {
196 my ($min, $max) = ($node->min, $node->max);
197 return ([], [], 1) if $max eq '0';
198 my ($first_ns, $last_ns, $is_optional) = add_to_graph ($node->data => $g);
199
200 my $label;
201 if ($max eq '') {
202 if ($min == 0) {
203 $is_optional = 1;
204 $label = '';
205
206 } elsif ($min == 1) {
207 $label = '';
208
209 } else {
210 $label = 'at least ' . ($min - 1);
211
212 }
213 } elsif ($max == 1) {
214 if ($min == 0) {
215 $is_optional = 1;
216
217 } else {
218
219 }
220 } else {
221 $label = 'at most ' . ($max - 1);
222 if ($min == 0) {
223 $is_optional = 1;
224
225
226 } elsif ($min == 1) {
227
228 } else {
229 $label = 'at least ' . ($min - 1) . ', ' . $label;
230
231 }
232 }
233
234 if (@$first_ns != 1 or @$last_ns != 1) {
235 my $n = $g->add_node (refaddr $first_ns);
236 $n->set_attribute (label => '');
237 my $m = $n;
238 unless ($is_optional) {
239 $m = $g->add_node (refaddr $last_ns);
240 $m->set_attribute (label => '');
241 } else {
242 $is_optional = 0;
243 }
244 $g->add_edge ($n => $_) for @$first_ns;
245 $g->add_edge ($_ => $m) for @$last_ns;
246 $first_ns = [$n];
247 $last_ns = [$m];
248 }
249
250 if (defined $label) {
251 my $e = $g->add_edge ($last_ns->[0] => $first_ns->[0]);
252 $e->set_attribute (label => $label);
253 }
254
255 return ($first_ns, $last_ns, $is_optional);
256 } elsif ($type eq 'branch') {
257 my @first_n;
258 my @last_n;
259 my $is_optional = 0;
260 for (@{$node->data}) {
261 my ($f_ns, $l_ns, $is_opt) = add_to_graph ($_ => $g);
262 push @first_n, @$f_ns;
263 push @last_n, @$l_ns;
264 $is_optional |= $is_opt;
265 }
266 return (\@first_n, \@last_n, $is_optional);
267 } elsif ($type eq 'anyof') {
268 if ($node->neg) {
269 my $nodes = Regexp::Parser::branch->new ($node->{rx});
270 $nodes->{data} = $node->data;
271
272 add_regexp ($nodes);
273
274 my $n = $g->add_node (refaddr $nodes);
275 my $label = 'NOT #' . get_graph_index ($nodes);
276 $n->set_attribute (label => $label);
277
278 return ([$n], [$n], 0);
279 } else {
280 my @first_n;
281 my @last_n;
282 for (@{$node->data}) {
283 my ($f_ns, $l_ns) = add_to_graph ($_ => $g);
284 push @first_n, @$f_ns;
285 push @last_n, @$l_ns;
286 }
287 return (\@first_n, \@last_n, 0);
288 }
289 } elsif ($type eq '') {
290 my $prev_ns;
291 my $first_ns;
292 my $is_optional = 1;
293 for (@{$node}) {
294 my ($f_ns, $l_ns, $is_opt) = add_to_graph ($_ => $g);
295 if ($prev_ns) {
296 if (@$prev_ns > 1 and @$f_ns > 1) {
297 my $n = $g->add_node (refaddr $f_ns);
298 $n->set_attribute (label => '');
299 $g->add_edge ($_ => $n) for @$prev_ns;
300 $g->add_edge ($n => $_) for @$f_ns;
301 } else {
302 for my $prev_n (@$prev_ns) {
303 for my $f_n (@$f_ns) {
304 $g->add_edge ($prev_n => $f_n);
305 }
306 }
307 }
308 if ($is_optional) {
309 push @$first_ns, @$f_ns;
310 }
311 if ($is_opt) {
312 push @$prev_ns, @$l_ns;
313 } else {
314 $prev_ns = $l_ns if @$l_ns;
315 }
316 } else {
317 $first_ns = $f_ns;
318 $prev_ns = $l_ns if @$l_ns;
319 }
320 $is_optional &= $is_opt;
321 }
322 return ($first_ns || [], $prev_ns || [], $is_optional);
323 } elsif ($family eq 'group' or $family eq 'open' or $type eq 'suspend') {
324 ## TODO: (?:) vs () vs (?>), (?:)->on, (?:)->off
325 my ($f_ns, $l_ns, $is_opt) = add_to_graph ($node->data => $g);
326 return ($f_ns, $l_ns, $is_opt);
327 } elsif ($type eq 'ifthen') {
328 my $nodes = $node->data;
329
330 my $groupp = $nodes->[0];
331 my $label = $groupp ? '(?' . $groupp->visual . ')' : '';
332 my $n = $g->add_node (refaddr $groupp);
333 $n->set_attribute (label => $label);
334
335 my $l = $g->add_node (refaddr $nodes);
336 $l->set_attribute (label => '');
337
338 my $branch = $nodes->[1];
339 my $branches = $branch ? $branch->data : [];
340
341 my $true = $branches->[0];
342 if ($true) {
343 my ($f_ns, $l_ns, $is_opt) = add_to_graph ($true => $g);
344 $g->add_edge ($n => $_)->set_attribute (label => 'true') for @$f_ns;
345 $g->add_edge ($_ => $l) for @$l_ns;
346 $g->add_edge ($n => $l)->set_attribute (label => 'true') if $is_opt;
347 }
348
349 my $false = $branches->[1];
350 if ($false) {
351 my ($f_ns, $l_ns, $is_opt) = add_to_graph ($false => $g);
352 $g->add_edge ($n => $_)->set_attribute (label => 'false') for @$f_ns;
353 $g->add_edge ($_ => $l) for @$l_ns;
354 $g->add_edge ($n => $l)->set_attribute (label => 'false') if $is_opt;
355 }
356
357 return ([$n], [$l], 0);
358 } elsif ($type eq 'eval' or $type eq 'logical') {
359 my $n = $g->add_node (refaddr $node);
360 my $label = $type eq 'eval' ? '(?{})' : '(??{})';
361 $label .= ' ' . escape_code $node->data;
362 $n->set_attribute (label => $label);
363 return ([$n], [$n], 0);
364 } elsif ($family eq 'assertion') {
365 my $nodes = $node->data;
366 add_regexp ($nodes);
367
368 my $n = $g->add_node (refaddr $nodes);
369 $type = '<' . $type if $node->dir < 0;
370 my $label = $assertion_map->{$type} // $type;
371 $label .= ' #' . get_graph_index ($nodes);
372 $n->set_attribute (label => $label);
373
374 return ([$n], [$n], 0);
375 } elsif ($family eq 'anyof_class') {
376 my $data = $node->data;
377 my $label;
378 if ($data eq 'POSIX') {
379 my $how = ${$node->{how}};
380 if ($how eq ':') {
381 $label = 'POSIX ' . $node->{type};
382 $label = 'NOT ' . $label if $node->neg;
383 } else {
384 $label = $how . $node->neg . $node->{type} . $how;
385 }
386 } else {
387 my $data_family = $data->family;
388 if ($data_family eq 'prop') {
389 $label = 'property ' . $node->type;
390 $label = 'NOT ' . $label if $node->neg;
391 } elsif ($data_family eq 'space') {
392 $label = $data->neg ? 'Perl /\S/' : 'Perl /\s/';
393 } elsif ($data_family eq 'alnum') {
394 $label = $data->neg ? 'Perl /\W/' : 'Perl /\w/';
395 } elsif ($data_family eq 'digit') {
396 $label = $data->neg ? 'Perl /\D/' : 'Perl /\d/';
397 } else {
398 $label = $data->visual;
399 }
400 }
401
402 my $n = $g->add_node (refaddr $node);
403 $n->set_attribute (label => $label);
404
405 return ([$n] => [$n]);
406 } elsif ($family eq 'exact' or $type eq 'anyof_char') {
407 my $n = $g->add_node (refaddr $node);
408
409 my $label = escape_value $node->data;
410 $n->set_attribute (label => qq[ "$label" ]);
411
412 return ([$n] => [$n]);
413 } elsif ($family eq 'flags') {
414 ## TODO: scope
415 my $n = $g->add_node (refaddr $node);
416
417 my $label = $node->visual;
418 $n->set_attribute (label => $label);
419
420 return ([$n] => [$n], 0);
421 } elsif ($family eq 'minmod') {
422 my $nodes = $node->data;
423 add_regexp ($nodes);
424
425 my $n = $g->add_node (refaddr $nodes);
426 my $label = 'non-greedy #' . get_graph_index ($nodes);
427 $n->set_attribute (label => $label);
428
429 return ([$n], [$n], 0);
430 } elsif ($family eq 'anyof_range') {
431 my $n = $g->add_node (refaddr $node);
432
433 my $start = escape_value $node->data->[0]->data;
434 my $end = escape_value $node->data->[1]->data;
435 my $label = qq[ one of "$start" .. "$end" ];
436 $n->set_attribute (label => $label);
437
438 return ([$n] => [$n], 0);
439 } else {
440 # anyof_char
441 # anyof_range
442
443 my $n = $g->add_node (refaddr $node);
444
445 my $label = $node->visual;
446 $label = $default_map->{$label} // escape_value $label;
447 $label .= ' (' . $type . ')';
448 $n->set_attribute (label => $label);
449
450 return ([$n] => [$n], 0);
451 }
452 } # add_to_graph
453

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24