/[suikacvs]/webroot/regexp/visualizer/regexp.cgi
Suika

Contents of /webroot/regexp/visualizer/regexp.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (hide annotations) (download)
Sun Dec 7 10:24:21 2008 UTC (15 years, 11 months ago) by wakaba
Branch: MAIN
Changes since 1.1: +30 -8 lines
++ swe/visualizer/ChangeLog	7 Dec 2008 10:24:13 -0000
	* input.html: New document.

	* regexp.cgi: Use ?s= as input.  Handle errors.  Specify style
	sheet.

2008-12-07  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3     use warnings;
4     use feature 'state';
5     use CGI::Carp qw(fatalsToBrowser);
6    
7     use lib q[/home/wakaba/work/manakai2/lib];
8 wakaba 1.2 use Message::CGI::Util qw/percent_decode htescape/;
9     use Message::CGI::HTTP;
10 wakaba 1.1
11     use Regexp::Parser;
12     use Graph::Easy;
13    
14     use Scalar::Util qw/refaddr/;
15    
16     my $default_map = {};
17     for (qw/. \C \w \W \s \S \d \D \X \1 \2 \3 \4 \5 \6 \7 \8 \9
18     \A ^ \B \b \G \Z \z $/) {
19     $default_map->{$_} = qq[Perl /$_/];
20     }
21    
22     my $assertion_map = {
23     ifmatch => '(?=)',
24     '<ifmatch' => '(?<=)',
25     unlessm => '(?!)',
26     '<unlessm' => '(?<!)',
27     };
28    
29 wakaba 1.2 my $cgi = Message::CGI::HTTP->new;
30    
31     my $regexp = percent_decode $cgi->get_parameter ('s') // '';
32 wakaba 1.1 $regexp = '(?:)' unless length $regexp;
33    
34     my $parser = Regexp::Parser->new;
35    
36     package Regexp::Parser;
37    
38     # start of char class range (or maybe just char)
39     $parser->add_handler('cc' => sub {
40     my ($S) = @_;
41     return if ${&Rx} =~ m{ \G (?= ] | \z ) }xgc;
42     push @{ $S->{next} }, qw< cc >;
43     my ($lhs, $rhs, $before_range);
44     my $ret = \$lhs;
45    
46     {
47     if (${&Rx} =~ m{ \G ( \\ ) }xgcs) {
48     my $c = $1;
49     $$ret = $S->$c(1);
50     }
51     elsif (${&Rx} =~ m{ \G \[ ([.=:]) (\^?) (.*?) \1 \] }xgcs) {
52     my ($how, $neg, $name) = ($1, $2, $3);
53     my $posix = "POSIX_$name";
54     if ($S->can($posix)) { $$ret = $S->$posix($neg, $how) }
55     else { $S->error(RPe_BADPOS, "$how$neg$name$how") }
56     }
57     elsif (${&Rx} =~ m{ \G (.) }xgcs) {
58     $$ret = $S->force_object(anyof_char => $1);
59     }
60    
61     if ($ret == \$lhs) {
62     if (${&Rx} =~ m{ \G (?= - ) }xgc) {
63     if ($lhs->visual =~ /^(?:\[[:.=]|\\[dDsSwWpP])/) {
64     $S->warn(RPe_FRANGE, $lhs->visual, "");
65     $ret = $lhs;
66     last;
67     }
68     $before_range = &RxPOS++;
69     $ret = \$rhs;
70     redo;
71     }
72     $ret = $lhs;
73     }
74     elsif ($ret == \$rhs) {
75     if ($rhs->visual =~ /^(?:\[[:.=]|\\[dDsSwWpP])/) {
76     $S->warn(RPe_FRANGE, $lhs->visual, $rhs->visual);
77     &RxPOS = $before_range;
78     $ret = $lhs;
79     }
80     elsif ($lhs->data gt $rhs->data) {
81     $S->error(RPe_IRANGE, $lhs->visual, $rhs->visual);
82     }
83     else {
84     $ret = $S->object(anyof_range => $lhs, $rhs);
85     }
86     }
87     }
88    
89     return if &SIZE_ONLY;
90     return $ret;
91     });
92    
93     package main;
94    
95     $parser->parse ($regexp);
96 wakaba 1.2 my $eregexp = htescape $regexp;
97    
98     if ($parser->errnum) {
99     binmode STDOUT, ':encoding(utf-8)';
100     print "Content-Type: text/html; charset=utf-8\n\n";
101     print q[<!DOCTYPE HTML><html lang=en>
102     <title>Regular expression visualizer: $eregexp</title>
103     <link rel="stylesheet" href="/www/style/html/xhtml"/>
104     </head>
105     <body>
106     <h1>Regular expression visualizer</h1>
107    
108     <p>Input: <code>], $eregexp, q[</code></p>
109    
110     <p>Error: ], htescape ($parser->errmsg);
111     exit;
112     }
113 wakaba 1.1
114     binmode STDOUT, ':encoding(utf-8)';
115     print "Content-Type: application/xhtml+xml; charset=utf-8\n\n";
116    
117     add_regexp ($parser->root);
118    
119 wakaba 1.2 print q[<html lang="en" xmlns="http://www.w3.org/1999/xhtml">
120     <head><title>Regular expression visualizer: $eregexp</title>
121     <link rel="stylesheet" href="/www/style/html/xhtml"/>
122 wakaba 1.1 </head>
123 wakaba 1.2 <body>
124     <h1>Regular expression visualizer</h1>
125    
126     <p>Input: <code>], $eregexp, q[</code></p>];
127 wakaba 1.1
128     my @regexp;
129     while (@regexp) {
130     my $nodes = shift @regexp;
131    
132     my $index = get_graph_index ($nodes);
133 wakaba 1.2 print "<section><h2>Regexp #$index</h2>\n\n";
134 wakaba 1.1
135     my $g = generate_graph ($nodes);
136     print $g->as_svg;
137    
138     print "</section>\n";
139     }
140    
141     print q[</body></html>];
142    
143     sub escape_value ($) {
144     my $v = shift;
145     $v =~ s/(\W)/sprintf '\x{%04X}', ord $1/ge;
146     $v;
147     } # escape_value
148    
149     sub escape_code ($) {
150     my $v = shift;
151     $v =~ s/([^\x20-\x5B\x5D-\x7E])/sprintf '\x{%04X}', ord $1/ge;
152     $v;
153     } # escape_code
154    
155     sub add_regexp ($) {
156     my $nodes = shift;
157     push @regexp, $nodes;
158     } # add_regexp
159    
160     sub get_graph_index ($) {
161     state $index;
162     state $next_index ||= 0;
163    
164     my $nodes = shift;
165     $index->{$nodes} //= $next_index++;
166     return $index->{$nodes};
167     } # get_graph_index
168    
169     sub generate_graph ($$) {
170     my ($root_nodes) = @_;
171    
172     my $g = Graph::Easy->new;
173    
174     $g->set_attributes ('node.start' => {fill => 'blue', color => 'white'});
175     $g->set_attributes ('node.success' => {fill => 'green', color => 'white'});
176    
177     my $start_n = $g->add_node ('START');
178     $start_n->set_attribute (class => 'start');
179     my $success_n = $g->add_node ('SUCCESS');
180     $success_n->set_attribute (class => 'success');
181    
182     my ($first_ns, $last_ns, $is_optional) = add_to_graph ($root_nodes => $g);
183     $g->add_edge ($start_n => $_) for @$first_ns;
184     $g->add_edge ($_ => $success_n) for @$last_ns;
185     $g->add_edge ($start_n => $success_n) if $is_optional;
186    
187     return $g;
188     } # generate_graph
189    
190     sub add_to_graph ($$) {
191     my ($node, $g) = @_;
192    
193     my $family = ref $node eq 'ARRAY' ? '' : $node->family;
194     my $type = ref $node eq 'ARRAY' ? '' : $node->type;
195     if ($family eq 'quant') {
196     my ($min, $max) = ($node->min, $node->max);
197     return ([], [], 1) if $max eq '0';
198     my ($first_ns, $last_ns, $is_optional) = add_to_graph ($node->data => $g);
199    
200     my $label;
201     if ($max eq '') {
202     if ($min == 0) {
203     $is_optional = 1;
204     $label = '';
205    
206     } elsif ($min == 1) {
207     $label = '';
208    
209     } else {
210     $label = 'at least ' . ($min - 1);
211    
212     }
213     } elsif ($max == 1) {
214     if ($min == 0) {
215     $is_optional = 1;
216    
217     } else {
218    
219     }
220     } else {
221     $label = 'at most ' . ($max - 1);
222     if ($min == 0) {
223     $is_optional = 1;
224    
225    
226     } elsif ($min == 1) {
227    
228     } else {
229     $label = 'at least ' . ($min - 1) . ', ' . $label;
230    
231     }
232     }
233    
234     if (@$first_ns != 1 or @$last_ns != 1) {
235     my $n = $g->add_node (refaddr $first_ns);
236     $n->set_attribute (label => '');
237     my $m = $n;
238     unless ($is_optional) {
239     $m = $g->add_node (refaddr $last_ns);
240     $m->set_attribute (label => '');
241     } else {
242     $is_optional = 0;
243     }
244     $g->add_edge ($n => $_) for @$first_ns;
245     $g->add_edge ($_ => $m) for @$last_ns;
246     $first_ns = [$n];
247     $last_ns = [$m];
248     }
249    
250     if (defined $label) {
251     my $e = $g->add_edge ($last_ns->[0] => $first_ns->[0]);
252     $e->set_attribute (label => $label);
253     }
254    
255     return ($first_ns, $last_ns, $is_optional);
256     } elsif ($type eq 'branch') {
257     my @first_n;
258     my @last_n;
259     my $is_optional = 0;
260     for (@{$node->data}) {
261     my ($f_ns, $l_ns, $is_opt) = add_to_graph ($_ => $g);
262     push @first_n, @$f_ns;
263     push @last_n, @$l_ns;
264     $is_optional |= $is_opt;
265     }
266     return (\@first_n, \@last_n, $is_optional);
267     } elsif ($type eq 'anyof') {
268     if ($node->neg) {
269     my $nodes = Regexp::Parser::branch->new ($node->{rx});
270     $nodes->{data} = $node->data;
271    
272     add_regexp ($nodes);
273    
274     my $n = $g->add_node (refaddr $nodes);
275     my $label = 'NOT #' . get_graph_index ($nodes);
276     $n->set_attribute (label => $label);
277    
278     return ([$n], [$n], 0);
279     } else {
280     my @first_n;
281     my @last_n;
282     for (@{$node->data}) {
283     my ($f_ns, $l_ns) = add_to_graph ($_ => $g);
284     push @first_n, @$f_ns;
285     push @last_n, @$l_ns;
286     }
287     return (\@first_n, \@last_n, 0);
288     }
289     } elsif ($type eq '') {
290     my $prev_ns;
291     my $first_ns;
292     my $is_optional = 1;
293     for (@{$node}) {
294     my ($f_ns, $l_ns, $is_opt) = add_to_graph ($_ => $g);
295     if ($prev_ns) {
296     if (@$prev_ns > 1 and @$f_ns > 1) {
297     my $n = $g->add_node (refaddr $f_ns);
298     $n->set_attribute (label => '');
299     $g->add_edge ($_ => $n) for @$prev_ns;
300     $g->add_edge ($n => $_) for @$f_ns;
301     } else {
302     for my $prev_n (@$prev_ns) {
303     for my $f_n (@$f_ns) {
304     $g->add_edge ($prev_n => $f_n);
305     }
306     }
307     }
308     if ($is_optional) {
309     push @$first_ns, @$f_ns;
310     }
311     if ($is_opt) {
312     push @$prev_ns, @$l_ns;
313     } else {
314     $prev_ns = $l_ns if @$l_ns;
315     }
316     } else {
317     $first_ns = $f_ns;
318     $prev_ns = $l_ns if @$l_ns;
319     }
320     $is_optional &= $is_opt;
321     }
322     return ($first_ns || [], $prev_ns || [], $is_optional);
323     } elsif ($family eq 'group' or $family eq 'open' or $type eq 'suspend') {
324     ## TODO: (?:) vs () vs (?>), (?:)->on, (?:)->off
325     my ($f_ns, $l_ns, $is_opt) = add_to_graph ($node->data => $g);
326     return ($f_ns, $l_ns, $is_opt);
327     } elsif ($type eq 'ifthen') {
328     my $nodes = $node->data;
329    
330     my $groupp = $nodes->[0];
331     my $label = $groupp ? '(?' . $groupp->visual . ')' : '';
332     my $n = $g->add_node (refaddr $groupp);
333     $n->set_attribute (label => $label);
334    
335     my $l = $g->add_node (refaddr $nodes);
336     $l->set_attribute (label => '');
337    
338     my $branch = $nodes->[1];
339     my $branches = $branch ? $branch->data : [];
340    
341     my $true = $branches->[0];
342     if ($true) {
343     my ($f_ns, $l_ns, $is_opt) = add_to_graph ($true => $g);
344     $g->add_edge ($n => $_)->set_attribute (label => 'true') for @$f_ns;
345     $g->add_edge ($_ => $l) for @$l_ns;
346     $g->add_edge ($n => $l)->set_attribute (label => 'true') if $is_opt;
347     }
348    
349     my $false = $branches->[1];
350     if ($false) {
351     my ($f_ns, $l_ns, $is_opt) = add_to_graph ($false => $g);
352     $g->add_edge ($n => $_)->set_attribute (label => 'false') for @$f_ns;
353     $g->add_edge ($_ => $l) for @$l_ns;
354     $g->add_edge ($n => $l)->set_attribute (label => 'false') if $is_opt;
355     }
356    
357     return ([$n], [$l], 0);
358     } elsif ($type eq 'eval' or $type eq 'logical') {
359     my $n = $g->add_node (refaddr $node);
360     my $label = $type eq 'eval' ? '(?{})' : '(??{})';
361     $label .= ' ' . escape_code $node->data;
362     $n->set_attribute (label => $label);
363     return ([$n], [$n], 0);
364     } elsif ($family eq 'assertion') {
365     my $nodes = $node->data;
366     add_regexp ($nodes);
367    
368     my $n = $g->add_node (refaddr $nodes);
369     $type = '<' . $type if $node->dir < 0;
370     my $label = $assertion_map->{$type} // $type;
371     $label .= ' #' . get_graph_index ($nodes);
372     $n->set_attribute (label => $label);
373    
374     return ([$n], [$n], 0);
375     } elsif ($family eq 'anyof_class') {
376     my $data = $node->data;
377     my $label;
378     if ($data eq 'POSIX') {
379     my $how = ${$node->{how}};
380     if ($how eq ':') {
381     $label = 'POSIX ' . $node->{type};
382     $label = 'NOT ' . $label if $node->neg;
383     } else {
384     $label = $how . $node->neg . $node->{type} . $how;
385     }
386     } else {
387     my $data_family = $data->family;
388     if ($data_family eq 'prop') {
389     $label = 'property ' . $node->type;
390     $label = 'NOT ' . $label if $node->neg;
391     } elsif ($data_family eq 'space') {
392     $label = $data->neg ? 'Perl /\S/' : 'Perl /\s/';
393     } elsif ($data_family eq 'alnum') {
394     $label = $data->neg ? 'Perl /\W/' : 'Perl /\w/';
395     } elsif ($data_family eq 'digit') {
396     $label = $data->neg ? 'Perl /\D/' : 'Perl /\d/';
397     } else {
398     $label = $data->visual;
399     }
400     }
401    
402     my $n = $g->add_node (refaddr $node);
403     $n->set_attribute (label => $label);
404    
405     return ([$n] => [$n]);
406     } elsif ($family eq 'exact' or $type eq 'anyof_char') {
407     my $n = $g->add_node (refaddr $node);
408    
409     my $label = escape_value $node->data;
410     $n->set_attribute (label => qq[ "$label" ]);
411    
412     return ([$n] => [$n]);
413     } elsif ($family eq 'flags') {
414     ## TODO: scope
415     my $n = $g->add_node (refaddr $node);
416    
417     my $label = $node->visual;
418     $n->set_attribute (label => $label);
419    
420     return ([$n] => [$n], 0);
421     } elsif ($family eq 'minmod') {
422     my $nodes = $node->data;
423     add_regexp ($nodes);
424    
425     my $n = $g->add_node (refaddr $nodes);
426     my $label = 'non-greedy #' . get_graph_index ($nodes);
427     $n->set_attribute (label => $label);
428    
429     return ([$n], [$n], 0);
430     } elsif ($family eq 'anyof_range') {
431     my $n = $g->add_node (refaddr $node);
432    
433     my $start = escape_value $node->data->[0]->data;
434     my $end = escape_value $node->data->[1]->data;
435     my $label = qq[ one of "$start" .. "$end" ];
436     $n->set_attribute (label => $label);
437    
438     return ([$n] => [$n], 0);
439     } else {
440     # anyof_char
441     # anyof_range
442    
443     my $n = $g->add_node (refaddr $node);
444    
445     my $label = $node->visual;
446     $label = $default_map->{$label} // escape_value $label;
447     $label .= ' (' . $type . ')';
448     $n->set_attribute (label => $label);
449    
450     return ([$n] => [$n], 0);
451     }
452     } # add_to_graph
453    

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24