/[suikacvs]/webroot/regexp/visualizer/regexp.cgi
Suika

Contents of /webroot/regexp/visualizer/regexp.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (hide annotations) (download)
Sun Dec 7 09:48:35 2008 UTC (15 years, 11 months ago) by wakaba
Branch: MAIN
++ swe/visualizer/ChangeLog	7 Dec 2008 09:48:11 -0000
2008-12-07  Wakaba  <wakaba@suika.fam.cx>

	* .htaccess: New file.

	* ChangeLog: New file.

	* regexp.cgi: New script.

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3     use warnings;
4     use feature 'state';
5     use CGI::Carp qw(fatalsToBrowser);
6    
7     use lib q[/home/wakaba/work/manakai2/lib];
8     use Message::CGI::Util qw/percent_decode/;
9    
10     use Regexp::Parser;
11     use Graph::Easy;
12    
13     use Scalar::Util qw/refaddr/;
14    
15     my $default_map = {};
16     for (qw/. \C \w \W \s \S \d \D \X \1 \2 \3 \4 \5 \6 \7 \8 \9
17     \A ^ \B \b \G \Z \z $/) {
18     $default_map->{$_} = qq[Perl /$_/];
19     }
20    
21     my $assertion_map = {
22     ifmatch => '(?=)',
23     '<ifmatch' => '(?<=)',
24     unlessm => '(?!)',
25     '<unlessm' => '(?<!)',
26     };
27    
28     my $regexp = percent_decode $ENV{QUERY_STRING};
29     $regexp = '(?:)' unless length $regexp;
30    
31     my $parser = Regexp::Parser->new;
32    
33     package Regexp::Parser;
34    
35     # start of char class range (or maybe just char)
36     $parser->add_handler('cc' => sub {
37     my ($S) = @_;
38     return if ${&Rx} =~ m{ \G (?= ] | \z ) }xgc;
39     push @{ $S->{next} }, qw< cc >;
40     my ($lhs, $rhs, $before_range);
41     my $ret = \$lhs;
42    
43     {
44     if (${&Rx} =~ m{ \G ( \\ ) }xgcs) {
45     my $c = $1;
46     $$ret = $S->$c(1);
47     }
48     elsif (${&Rx} =~ m{ \G \[ ([.=:]) (\^?) (.*?) \1 \] }xgcs) {
49     my ($how, $neg, $name) = ($1, $2, $3);
50     my $posix = "POSIX_$name";
51     if ($S->can($posix)) { $$ret = $S->$posix($neg, $how) }
52     else { $S->error(RPe_BADPOS, "$how$neg$name$how") }
53     }
54     elsif (${&Rx} =~ m{ \G (.) }xgcs) {
55     $$ret = $S->force_object(anyof_char => $1);
56     }
57    
58     if ($ret == \$lhs) {
59     if (${&Rx} =~ m{ \G (?= - ) }xgc) {
60     if ($lhs->visual =~ /^(?:\[[:.=]|\\[dDsSwWpP])/) {
61     $S->warn(RPe_FRANGE, $lhs->visual, "");
62     $ret = $lhs;
63     last;
64     }
65     $before_range = &RxPOS++;
66     $ret = \$rhs;
67     redo;
68     }
69     $ret = $lhs;
70     }
71     elsif ($ret == \$rhs) {
72     if ($rhs->visual =~ /^(?:\[[:.=]|\\[dDsSwWpP])/) {
73     $S->warn(RPe_FRANGE, $lhs->visual, $rhs->visual);
74     &RxPOS = $before_range;
75     $ret = $lhs;
76     }
77     elsif ($lhs->data gt $rhs->data) {
78     $S->error(RPe_IRANGE, $lhs->visual, $rhs->visual);
79     }
80     else {
81     $ret = $S->object(anyof_range => $lhs, $rhs);
82     }
83     }
84     }
85    
86     return if &SIZE_ONLY;
87     return $ret;
88     });
89    
90     package main;
91    
92     $parser->parse ($regexp);
93    
94     binmode STDOUT, ':encoding(utf-8)';
95     print "Content-Type: application/xhtml+xml; charset=utf-8\n\n";
96    
97     print $parser->errnum, $parser->errmsg;
98    
99     add_regexp ($parser->root);
100    
101     print q[<html xmlns="http://www.w3.org/1999/xhtml">
102     <head><title></title>
103     </head>
104     <body>];
105    
106     my @regexp;
107     while (@regexp) {
108     my $nodes = shift @regexp;
109    
110     my $index = get_graph_index ($nodes);
111     print "<section><h1>Regexp #$index</h1>\n\n";
112    
113     my $g = generate_graph ($nodes);
114     print $g->as_svg;
115    
116     print "</section>\n";
117     }
118    
119     print q[</body></html>];
120    
121     sub escape_value ($) {
122     my $v = shift;
123     $v =~ s/(\W)/sprintf '\x{%04X}', ord $1/ge;
124     $v;
125     } # escape_value
126    
127     sub escape_code ($) {
128     my $v = shift;
129     $v =~ s/([^\x20-\x5B\x5D-\x7E])/sprintf '\x{%04X}', ord $1/ge;
130     $v;
131     } # escape_code
132    
133     sub add_regexp ($) {
134     my $nodes = shift;
135     push @regexp, $nodes;
136     } # add_regexp
137    
138     sub get_graph_index ($) {
139     state $index;
140     state $next_index ||= 0;
141    
142     my $nodes = shift;
143     $index->{$nodes} //= $next_index++;
144     return $index->{$nodes};
145     } # get_graph_index
146    
147     sub generate_graph ($$) {
148     my ($root_nodes) = @_;
149    
150     my $g = Graph::Easy->new;
151    
152     $g->set_attributes ('node.start' => {fill => 'blue', color => 'white'});
153     $g->set_attributes ('node.success' => {fill => 'green', color => 'white'});
154    
155     my $start_n = $g->add_node ('START');
156     $start_n->set_attribute (class => 'start');
157     my $success_n = $g->add_node ('SUCCESS');
158     $success_n->set_attribute (class => 'success');
159    
160     my ($first_ns, $last_ns, $is_optional) = add_to_graph ($root_nodes => $g);
161     $g->add_edge ($start_n => $_) for @$first_ns;
162     $g->add_edge ($_ => $success_n) for @$last_ns;
163     $g->add_edge ($start_n => $success_n) if $is_optional;
164    
165     return $g;
166     } # generate_graph
167    
168     sub add_to_graph ($$) {
169     my ($node, $g) = @_;
170    
171     my $family = ref $node eq 'ARRAY' ? '' : $node->family;
172     my $type = ref $node eq 'ARRAY' ? '' : $node->type;
173     if ($family eq 'quant') {
174     my ($min, $max) = ($node->min, $node->max);
175     return ([], [], 1) if $max eq '0';
176     my ($first_ns, $last_ns, $is_optional) = add_to_graph ($node->data => $g);
177    
178     my $label;
179     if ($max eq '') {
180     if ($min == 0) {
181     $is_optional = 1;
182     $label = '';
183    
184     } elsif ($min == 1) {
185     $label = '';
186    
187     } else {
188     $label = 'at least ' . ($min - 1);
189    
190     }
191     } elsif ($max == 1) {
192     if ($min == 0) {
193     $is_optional = 1;
194    
195     } else {
196    
197     }
198     } else {
199     $label = 'at most ' . ($max - 1);
200     if ($min == 0) {
201     $is_optional = 1;
202    
203    
204     } elsif ($min == 1) {
205    
206     } else {
207     $label = 'at least ' . ($min - 1) . ', ' . $label;
208    
209     }
210     }
211    
212     if (@$first_ns != 1 or @$last_ns != 1) {
213     my $n = $g->add_node (refaddr $first_ns);
214     $n->set_attribute (label => '');
215     my $m = $n;
216     unless ($is_optional) {
217     $m = $g->add_node (refaddr $last_ns);
218     $m->set_attribute (label => '');
219     } else {
220     $is_optional = 0;
221     }
222     $g->add_edge ($n => $_) for @$first_ns;
223     $g->add_edge ($_ => $m) for @$last_ns;
224     $first_ns = [$n];
225     $last_ns = [$m];
226     }
227    
228     if (defined $label) {
229     my $e = $g->add_edge ($last_ns->[0] => $first_ns->[0]);
230     $e->set_attribute (label => $label);
231     }
232    
233     return ($first_ns, $last_ns, $is_optional);
234     } elsif ($type eq 'branch') {
235     my @first_n;
236     my @last_n;
237     my $is_optional = 0;
238     for (@{$node->data}) {
239     my ($f_ns, $l_ns, $is_opt) = add_to_graph ($_ => $g);
240     push @first_n, @$f_ns;
241     push @last_n, @$l_ns;
242     $is_optional |= $is_opt;
243     }
244     return (\@first_n, \@last_n, $is_optional);
245     } elsif ($type eq 'anyof') {
246     if ($node->neg) {
247     my $nodes = Regexp::Parser::branch->new ($node->{rx});
248     $nodes->{data} = $node->data;
249    
250     add_regexp ($nodes);
251    
252     my $n = $g->add_node (refaddr $nodes);
253     my $label = 'NOT #' . get_graph_index ($nodes);
254     $n->set_attribute (label => $label);
255    
256     return ([$n], [$n], 0);
257     } else {
258     my @first_n;
259     my @last_n;
260     for (@{$node->data}) {
261     my ($f_ns, $l_ns) = add_to_graph ($_ => $g);
262     push @first_n, @$f_ns;
263     push @last_n, @$l_ns;
264     }
265     return (\@first_n, \@last_n, 0);
266     }
267     } elsif ($type eq '') {
268     my $prev_ns;
269     my $first_ns;
270     my $is_optional = 1;
271     for (@{$node}) {
272     my ($f_ns, $l_ns, $is_opt) = add_to_graph ($_ => $g);
273     if ($prev_ns) {
274     if (@$prev_ns > 1 and @$f_ns > 1) {
275     my $n = $g->add_node (refaddr $f_ns);
276     $n->set_attribute (label => '');
277     $g->add_edge ($_ => $n) for @$prev_ns;
278     $g->add_edge ($n => $_) for @$f_ns;
279     } else {
280     for my $prev_n (@$prev_ns) {
281     for my $f_n (@$f_ns) {
282     $g->add_edge ($prev_n => $f_n);
283     }
284     }
285     }
286     if ($is_optional) {
287     push @$first_ns, @$f_ns;
288     }
289     if ($is_opt) {
290     push @$prev_ns, @$l_ns;
291     } else {
292     $prev_ns = $l_ns if @$l_ns;
293     }
294     } else {
295     $first_ns = $f_ns;
296     $prev_ns = $l_ns if @$l_ns;
297     }
298     $is_optional &= $is_opt;
299     }
300     return ($first_ns || [], $prev_ns || [], $is_optional);
301     } elsif ($family eq 'group' or $family eq 'open' or $type eq 'suspend') {
302     ## TODO: (?:) vs () vs (?>), (?:)->on, (?:)->off
303     my ($f_ns, $l_ns, $is_opt) = add_to_graph ($node->data => $g);
304     return ($f_ns, $l_ns, $is_opt);
305     } elsif ($type eq 'ifthen') {
306     my $nodes = $node->data;
307    
308     my $groupp = $nodes->[0];
309     my $label = $groupp ? '(?' . $groupp->visual . ')' : '';
310     my $n = $g->add_node (refaddr $groupp);
311     $n->set_attribute (label => $label);
312    
313     my $l = $g->add_node (refaddr $nodes);
314     $l->set_attribute (label => '');
315    
316     my $branch = $nodes->[1];
317     my $branches = $branch ? $branch->data : [];
318    
319     my $true = $branches->[0];
320     if ($true) {
321     my ($f_ns, $l_ns, $is_opt) = add_to_graph ($true => $g);
322     $g->add_edge ($n => $_)->set_attribute (label => 'true') for @$f_ns;
323     $g->add_edge ($_ => $l) for @$l_ns;
324     $g->add_edge ($n => $l)->set_attribute (label => 'true') if $is_opt;
325     }
326    
327     my $false = $branches->[1];
328     if ($false) {
329     my ($f_ns, $l_ns, $is_opt) = add_to_graph ($false => $g);
330     $g->add_edge ($n => $_)->set_attribute (label => 'false') for @$f_ns;
331     $g->add_edge ($_ => $l) for @$l_ns;
332     $g->add_edge ($n => $l)->set_attribute (label => 'false') if $is_opt;
333     }
334    
335     return ([$n], [$l], 0);
336     } elsif ($type eq 'eval' or $type eq 'logical') {
337     my $n = $g->add_node (refaddr $node);
338     my $label = $type eq 'eval' ? '(?{})' : '(??{})';
339     $label .= ' ' . escape_code $node->data;
340     $n->set_attribute (label => $label);
341     return ([$n], [$n], 0);
342     } elsif ($family eq 'assertion') {
343     my $nodes = $node->data;
344     add_regexp ($nodes);
345    
346     my $n = $g->add_node (refaddr $nodes);
347     $type = '<' . $type if $node->dir < 0;
348     my $label = $assertion_map->{$type} // $type;
349     $label .= ' #' . get_graph_index ($nodes);
350     $n->set_attribute (label => $label);
351    
352     return ([$n], [$n], 0);
353     } elsif ($family eq 'anyof_class') {
354     my $data = $node->data;
355     my $label;
356     if ($data eq 'POSIX') {
357     my $how = ${$node->{how}};
358     if ($how eq ':') {
359     $label = 'POSIX ' . $node->{type};
360     $label = 'NOT ' . $label if $node->neg;
361     } else {
362     $label = $how . $node->neg . $node->{type} . $how;
363     }
364     } else {
365     my $data_family = $data->family;
366     if ($data_family eq 'prop') {
367     $label = 'property ' . $node->type;
368     $label = 'NOT ' . $label if $node->neg;
369     } elsif ($data_family eq 'space') {
370     $label = $data->neg ? 'Perl /\S/' : 'Perl /\s/';
371     } elsif ($data_family eq 'alnum') {
372     $label = $data->neg ? 'Perl /\W/' : 'Perl /\w/';
373     } elsif ($data_family eq 'digit') {
374     $label = $data->neg ? 'Perl /\D/' : 'Perl /\d/';
375     } else {
376     $label = $data->visual;
377     }
378     }
379    
380     my $n = $g->add_node (refaddr $node);
381     $n->set_attribute (label => $label);
382    
383     return ([$n] => [$n]);
384     } elsif ($family eq 'exact' or $type eq 'anyof_char') {
385     my $n = $g->add_node (refaddr $node);
386    
387     my $label = escape_value $node->data;
388     $n->set_attribute (label => qq[ "$label" ]);
389    
390     return ([$n] => [$n]);
391     } elsif ($family eq 'flags') {
392     ## TODO: scope
393     my $n = $g->add_node (refaddr $node);
394    
395     my $label = $node->visual;
396     $n->set_attribute (label => $label);
397    
398     return ([$n] => [$n], 0);
399     } elsif ($family eq 'minmod') {
400     my $nodes = $node->data;
401     add_regexp ($nodes);
402    
403     my $n = $g->add_node (refaddr $nodes);
404     my $label = 'non-greedy #' . get_graph_index ($nodes);
405     $n->set_attribute (label => $label);
406    
407     return ([$n], [$n], 0);
408     } elsif ($family eq 'anyof_range') {
409     my $n = $g->add_node (refaddr $node);
410    
411     my $start = escape_value $node->data->[0]->data;
412     my $end = escape_value $node->data->[1]->data;
413     my $label = qq[ one of "$start" .. "$end" ];
414     $n->set_attribute (label => $label);
415    
416     return ([$n] => [$n], 0);
417     } else {
418     # anyof_char
419     # anyof_range
420    
421     my $n = $g->add_node (refaddr $node);
422    
423     my $label = $node->visual;
424     $label = $default_map->{$label} // escape_value $label;
425     $label .= ' (' . $type . ')';
426     $n->set_attribute (label => $label);
427    
428     return ([$n] => [$n], 0);
429     }
430     } # add_to_graph
431    

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24