/[suikacvs]/webroot/regexp/visualizer/regexp.cgi
Suika

Contents of /webroot/regexp/visualizer/regexp.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (show annotations) (download)
Sun Dec 7 09:48:35 2008 UTC (15 years, 10 months ago) by wakaba
Branch: MAIN
++ swe/visualizer/ChangeLog	7 Dec 2008 09:48:11 -0000
2008-12-07  Wakaba  <wakaba@suika.fam.cx>

	* .htaccess: New file.

	* ChangeLog: New file.

	* regexp.cgi: New script.

1 #!/usr/bin/perl
2 use strict;
3 use warnings;
4 use feature 'state';
5 use CGI::Carp qw(fatalsToBrowser);
6
7 use lib q[/home/wakaba/work/manakai2/lib];
8 use Message::CGI::Util qw/percent_decode/;
9
10 use Regexp::Parser;
11 use Graph::Easy;
12
13 use Scalar::Util qw/refaddr/;
14
15 my $default_map = {};
16 for (qw/. \C \w \W \s \S \d \D \X \1 \2 \3 \4 \5 \6 \7 \8 \9
17 \A ^ \B \b \G \Z \z $/) {
18 $default_map->{$_} = qq[Perl /$_/];
19 }
20
21 my $assertion_map = {
22 ifmatch => '(?=)',
23 '<ifmatch' => '(?<=)',
24 unlessm => '(?!)',
25 '<unlessm' => '(?<!)',
26 };
27
28 my $regexp = percent_decode $ENV{QUERY_STRING};
29 $regexp = '(?:)' unless length $regexp;
30
31 my $parser = Regexp::Parser->new;
32
33 package Regexp::Parser;
34
35 # start of char class range (or maybe just char)
36 $parser->add_handler('cc' => sub {
37 my ($S) = @_;
38 return if ${&Rx} =~ m{ \G (?= ] | \z ) }xgc;
39 push @{ $S->{next} }, qw< cc >;
40 my ($lhs, $rhs, $before_range);
41 my $ret = \$lhs;
42
43 {
44 if (${&Rx} =~ m{ \G ( \\ ) }xgcs) {
45 my $c = $1;
46 $$ret = $S->$c(1);
47 }
48 elsif (${&Rx} =~ m{ \G \[ ([.=:]) (\^?) (.*?) \1 \] }xgcs) {
49 my ($how, $neg, $name) = ($1, $2, $3);
50 my $posix = "POSIX_$name";
51 if ($S->can($posix)) { $$ret = $S->$posix($neg, $how) }
52 else { $S->error(RPe_BADPOS, "$how$neg$name$how") }
53 }
54 elsif (${&Rx} =~ m{ \G (.) }xgcs) {
55 $$ret = $S->force_object(anyof_char => $1);
56 }
57
58 if ($ret == \$lhs) {
59 if (${&Rx} =~ m{ \G (?= - ) }xgc) {
60 if ($lhs->visual =~ /^(?:\[[:.=]|\\[dDsSwWpP])/) {
61 $S->warn(RPe_FRANGE, $lhs->visual, "");
62 $ret = $lhs;
63 last;
64 }
65 $before_range = &RxPOS++;
66 $ret = \$rhs;
67 redo;
68 }
69 $ret = $lhs;
70 }
71 elsif ($ret == \$rhs) {
72 if ($rhs->visual =~ /^(?:\[[:.=]|\\[dDsSwWpP])/) {
73 $S->warn(RPe_FRANGE, $lhs->visual, $rhs->visual);
74 &RxPOS = $before_range;
75 $ret = $lhs;
76 }
77 elsif ($lhs->data gt $rhs->data) {
78 $S->error(RPe_IRANGE, $lhs->visual, $rhs->visual);
79 }
80 else {
81 $ret = $S->object(anyof_range => $lhs, $rhs);
82 }
83 }
84 }
85
86 return if &SIZE_ONLY;
87 return $ret;
88 });
89
90 package main;
91
92 $parser->parse ($regexp);
93
94 binmode STDOUT, ':encoding(utf-8)';
95 print "Content-Type: application/xhtml+xml; charset=utf-8\n\n";
96
97 print $parser->errnum, $parser->errmsg;
98
99 add_regexp ($parser->root);
100
101 print q[<html xmlns="http://www.w3.org/1999/xhtml">
102 <head><title></title>
103 </head>
104 <body>];
105
106 my @regexp;
107 while (@regexp) {
108 my $nodes = shift @regexp;
109
110 my $index = get_graph_index ($nodes);
111 print "<section><h1>Regexp #$index</h1>\n\n";
112
113 my $g = generate_graph ($nodes);
114 print $g->as_svg;
115
116 print "</section>\n";
117 }
118
119 print q[</body></html>];
120
121 sub escape_value ($) {
122 my $v = shift;
123 $v =~ s/(\W)/sprintf '\x{%04X}', ord $1/ge;
124 $v;
125 } # escape_value
126
127 sub escape_code ($) {
128 my $v = shift;
129 $v =~ s/([^\x20-\x5B\x5D-\x7E])/sprintf '\x{%04X}', ord $1/ge;
130 $v;
131 } # escape_code
132
133 sub add_regexp ($) {
134 my $nodes = shift;
135 push @regexp, $nodes;
136 } # add_regexp
137
138 sub get_graph_index ($) {
139 state $index;
140 state $next_index ||= 0;
141
142 my $nodes = shift;
143 $index->{$nodes} //= $next_index++;
144 return $index->{$nodes};
145 } # get_graph_index
146
147 sub generate_graph ($$) {
148 my ($root_nodes) = @_;
149
150 my $g = Graph::Easy->new;
151
152 $g->set_attributes ('node.start' => {fill => 'blue', color => 'white'});
153 $g->set_attributes ('node.success' => {fill => 'green', color => 'white'});
154
155 my $start_n = $g->add_node ('START');
156 $start_n->set_attribute (class => 'start');
157 my $success_n = $g->add_node ('SUCCESS');
158 $success_n->set_attribute (class => 'success');
159
160 my ($first_ns, $last_ns, $is_optional) = add_to_graph ($root_nodes => $g);
161 $g->add_edge ($start_n => $_) for @$first_ns;
162 $g->add_edge ($_ => $success_n) for @$last_ns;
163 $g->add_edge ($start_n => $success_n) if $is_optional;
164
165 return $g;
166 } # generate_graph
167
168 sub add_to_graph ($$) {
169 my ($node, $g) = @_;
170
171 my $family = ref $node eq 'ARRAY' ? '' : $node->family;
172 my $type = ref $node eq 'ARRAY' ? '' : $node->type;
173 if ($family eq 'quant') {
174 my ($min, $max) = ($node->min, $node->max);
175 return ([], [], 1) if $max eq '0';
176 my ($first_ns, $last_ns, $is_optional) = add_to_graph ($node->data => $g);
177
178 my $label;
179 if ($max eq '') {
180 if ($min == 0) {
181 $is_optional = 1;
182 $label = '';
183
184 } elsif ($min == 1) {
185 $label = '';
186
187 } else {
188 $label = 'at least ' . ($min - 1);
189
190 }
191 } elsif ($max == 1) {
192 if ($min == 0) {
193 $is_optional = 1;
194
195 } else {
196
197 }
198 } else {
199 $label = 'at most ' . ($max - 1);
200 if ($min == 0) {
201 $is_optional = 1;
202
203
204 } elsif ($min == 1) {
205
206 } else {
207 $label = 'at least ' . ($min - 1) . ', ' . $label;
208
209 }
210 }
211
212 if (@$first_ns != 1 or @$last_ns != 1) {
213 my $n = $g->add_node (refaddr $first_ns);
214 $n->set_attribute (label => '');
215 my $m = $n;
216 unless ($is_optional) {
217 $m = $g->add_node (refaddr $last_ns);
218 $m->set_attribute (label => '');
219 } else {
220 $is_optional = 0;
221 }
222 $g->add_edge ($n => $_) for @$first_ns;
223 $g->add_edge ($_ => $m) for @$last_ns;
224 $first_ns = [$n];
225 $last_ns = [$m];
226 }
227
228 if (defined $label) {
229 my $e = $g->add_edge ($last_ns->[0] => $first_ns->[0]);
230 $e->set_attribute (label => $label);
231 }
232
233 return ($first_ns, $last_ns, $is_optional);
234 } elsif ($type eq 'branch') {
235 my @first_n;
236 my @last_n;
237 my $is_optional = 0;
238 for (@{$node->data}) {
239 my ($f_ns, $l_ns, $is_opt) = add_to_graph ($_ => $g);
240 push @first_n, @$f_ns;
241 push @last_n, @$l_ns;
242 $is_optional |= $is_opt;
243 }
244 return (\@first_n, \@last_n, $is_optional);
245 } elsif ($type eq 'anyof') {
246 if ($node->neg) {
247 my $nodes = Regexp::Parser::branch->new ($node->{rx});
248 $nodes->{data} = $node->data;
249
250 add_regexp ($nodes);
251
252 my $n = $g->add_node (refaddr $nodes);
253 my $label = 'NOT #' . get_graph_index ($nodes);
254 $n->set_attribute (label => $label);
255
256 return ([$n], [$n], 0);
257 } else {
258 my @first_n;
259 my @last_n;
260 for (@{$node->data}) {
261 my ($f_ns, $l_ns) = add_to_graph ($_ => $g);
262 push @first_n, @$f_ns;
263 push @last_n, @$l_ns;
264 }
265 return (\@first_n, \@last_n, 0);
266 }
267 } elsif ($type eq '') {
268 my $prev_ns;
269 my $first_ns;
270 my $is_optional = 1;
271 for (@{$node}) {
272 my ($f_ns, $l_ns, $is_opt) = add_to_graph ($_ => $g);
273 if ($prev_ns) {
274 if (@$prev_ns > 1 and @$f_ns > 1) {
275 my $n = $g->add_node (refaddr $f_ns);
276 $n->set_attribute (label => '');
277 $g->add_edge ($_ => $n) for @$prev_ns;
278 $g->add_edge ($n => $_) for @$f_ns;
279 } else {
280 for my $prev_n (@$prev_ns) {
281 for my $f_n (@$f_ns) {
282 $g->add_edge ($prev_n => $f_n);
283 }
284 }
285 }
286 if ($is_optional) {
287 push @$first_ns, @$f_ns;
288 }
289 if ($is_opt) {
290 push @$prev_ns, @$l_ns;
291 } else {
292 $prev_ns = $l_ns if @$l_ns;
293 }
294 } else {
295 $first_ns = $f_ns;
296 $prev_ns = $l_ns if @$l_ns;
297 }
298 $is_optional &= $is_opt;
299 }
300 return ($first_ns || [], $prev_ns || [], $is_optional);
301 } elsif ($family eq 'group' or $family eq 'open' or $type eq 'suspend') {
302 ## TODO: (?:) vs () vs (?>), (?:)->on, (?:)->off
303 my ($f_ns, $l_ns, $is_opt) = add_to_graph ($node->data => $g);
304 return ($f_ns, $l_ns, $is_opt);
305 } elsif ($type eq 'ifthen') {
306 my $nodes = $node->data;
307
308 my $groupp = $nodes->[0];
309 my $label = $groupp ? '(?' . $groupp->visual . ')' : '';
310 my $n = $g->add_node (refaddr $groupp);
311 $n->set_attribute (label => $label);
312
313 my $l = $g->add_node (refaddr $nodes);
314 $l->set_attribute (label => '');
315
316 my $branch = $nodes->[1];
317 my $branches = $branch ? $branch->data : [];
318
319 my $true = $branches->[0];
320 if ($true) {
321 my ($f_ns, $l_ns, $is_opt) = add_to_graph ($true => $g);
322 $g->add_edge ($n => $_)->set_attribute (label => 'true') for @$f_ns;
323 $g->add_edge ($_ => $l) for @$l_ns;
324 $g->add_edge ($n => $l)->set_attribute (label => 'true') if $is_opt;
325 }
326
327 my $false = $branches->[1];
328 if ($false) {
329 my ($f_ns, $l_ns, $is_opt) = add_to_graph ($false => $g);
330 $g->add_edge ($n => $_)->set_attribute (label => 'false') for @$f_ns;
331 $g->add_edge ($_ => $l) for @$l_ns;
332 $g->add_edge ($n => $l)->set_attribute (label => 'false') if $is_opt;
333 }
334
335 return ([$n], [$l], 0);
336 } elsif ($type eq 'eval' or $type eq 'logical') {
337 my $n = $g->add_node (refaddr $node);
338 my $label = $type eq 'eval' ? '(?{})' : '(??{})';
339 $label .= ' ' . escape_code $node->data;
340 $n->set_attribute (label => $label);
341 return ([$n], [$n], 0);
342 } elsif ($family eq 'assertion') {
343 my $nodes = $node->data;
344 add_regexp ($nodes);
345
346 my $n = $g->add_node (refaddr $nodes);
347 $type = '<' . $type if $node->dir < 0;
348 my $label = $assertion_map->{$type} // $type;
349 $label .= ' #' . get_graph_index ($nodes);
350 $n->set_attribute (label => $label);
351
352 return ([$n], [$n], 0);
353 } elsif ($family eq 'anyof_class') {
354 my $data = $node->data;
355 my $label;
356 if ($data eq 'POSIX') {
357 my $how = ${$node->{how}};
358 if ($how eq ':') {
359 $label = 'POSIX ' . $node->{type};
360 $label = 'NOT ' . $label if $node->neg;
361 } else {
362 $label = $how . $node->neg . $node->{type} . $how;
363 }
364 } else {
365 my $data_family = $data->family;
366 if ($data_family eq 'prop') {
367 $label = 'property ' . $node->type;
368 $label = 'NOT ' . $label if $node->neg;
369 } elsif ($data_family eq 'space') {
370 $label = $data->neg ? 'Perl /\S/' : 'Perl /\s/';
371 } elsif ($data_family eq 'alnum') {
372 $label = $data->neg ? 'Perl /\W/' : 'Perl /\w/';
373 } elsif ($data_family eq 'digit') {
374 $label = $data->neg ? 'Perl /\D/' : 'Perl /\d/';
375 } else {
376 $label = $data->visual;
377 }
378 }
379
380 my $n = $g->add_node (refaddr $node);
381 $n->set_attribute (label => $label);
382
383 return ([$n] => [$n]);
384 } elsif ($family eq 'exact' or $type eq 'anyof_char') {
385 my $n = $g->add_node (refaddr $node);
386
387 my $label = escape_value $node->data;
388 $n->set_attribute (label => qq[ "$label" ]);
389
390 return ([$n] => [$n]);
391 } elsif ($family eq 'flags') {
392 ## TODO: scope
393 my $n = $g->add_node (refaddr $node);
394
395 my $label = $node->visual;
396 $n->set_attribute (label => $label);
397
398 return ([$n] => [$n], 0);
399 } elsif ($family eq 'minmod') {
400 my $nodes = $node->data;
401 add_regexp ($nodes);
402
403 my $n = $g->add_node (refaddr $nodes);
404 my $label = 'non-greedy #' . get_graph_index ($nodes);
405 $n->set_attribute (label => $label);
406
407 return ([$n], [$n], 0);
408 } elsif ($family eq 'anyof_range') {
409 my $n = $g->add_node (refaddr $node);
410
411 my $start = escape_value $node->data->[0]->data;
412 my $end = escape_value $node->data->[1]->data;
413 my $label = qq[ one of "$start" .. "$end" ];
414 $n->set_attribute (label => $label);
415
416 return ([$n] => [$n], 0);
417 } else {
418 # anyof_char
419 # anyof_range
420
421 my $n = $g->add_node (refaddr $node);
422
423 my $label = $node->visual;
424 $label = $default_map->{$label} // escape_value $label;
425 $label .= ' (' . $type . ')';
426 $n->set_attribute (label => $label);
427
428 return ([$n] => [$n], 0);
429 }
430 } # add_to_graph
431

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24