/[suikacvs]/webroot/regexp/lib/Regexp/Visualize/Simple.pm
Suika

Contents of /webroot/regexp/lib/Regexp/Visualize/Simple.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (show annotations) (download)
Mon Dec 8 12:21:26 2008 UTC (17 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.1: +40 -17 lines
++ swe/visualizer/ChangeLog	8 Dec 2008 12:21:06 -0000
2008-12-08  Wakaba  <wakaba@suika.fam.cx>

	* input.html: Added |select| for language selection.

	* regexp.cgi: Don't percent-decode query parameter values decoded
	by Message::CGI::HTTP twice.  Added JavaScript regular expression
	support.  Regular expression input was not expanded in |title|
	elements.

++ swe/lib/Regexp/Parser/ChangeLog	8 Dec 2008 12:17:06 -0000
2008-12-08  Wakaba  <wakaba@suika.fam.cx>

	* JavaScript.pm: New module.

++ swe/lib/Regexp/Visualize/ChangeLog	8 Dec 2008 12:19:40 -0000
2008-12-08  Wakaba  <wakaba@suika.fam.cx>

	* Simple.pm (next_graph): Plot "FAIL" node if necessary.  Don't
	plot "SUCCESS" node if not necessary.
	(add_to_graph): Support for empty []/[^] character classes allowed
	in JavaScript regular expressions.

2008-12-07  Wakaba  <wakaba@suika.fam.cx>

	* Simple.pm (push_regexp_node): Invoke |get_graph_index| to return
	the index of the pushed regexp.  This invocation is necessary such
	that the order the regexps are pushed is reflected to the index.

1 package Regexp::Visualize::Simple;
2 use strict;
3 use warnings;
4
5 use Graph::Easy;
6
7 use Scalar::Util qw/refaddr/;
8
9 my $default_map = {};
10 for (qw/. \C \w \W \s \S \d \D \X \1 \2 \3 \4 \5 \6 \7 \8 \9
11 \A ^ \B \b \G \Z \z $/) {
12 $default_map->{$_} = qq[Perl /$_/];
13 }
14
15 my $assertion_map = {
16 ifmatch => '(?=)',
17 '<ifmatch' => '(?<=)',
18 unlessm => '(?!)',
19 '<unlessm' => '(?<!)',
20 };
21
22 sub new ($) {
23 my $self = bless {}, shift;
24 $self->{next_index} = 0;
25 $self->{regexp_nodes} = [];
26 return $self;
27 } # new
28
29 sub _escape_value ($) {
30 my $v = shift;
31 $v =~ s/(\W)/sprintf '\x{%04X}', ord $1/ge;
32 $v;
33 } # _escape_value
34
35 sub _escape_code ($) {
36 my $v = shift;
37 $v =~ s/([^\x20-\x5B\x5D-\x7E])/sprintf '\x{%04X}', ord $1/ge;
38 $v;
39 } # _escape_code
40
41 sub push_regexp_node ($$) {
42 my $self = shift;
43 my $nodes = shift; # Regexp::Parser's node or array ref of nodes
44 push @{$self->{regexp}}, $nodes;
45 return $self->get_graph_index ($nodes);
46 } # push_regexp_node
47
48 sub shift_regexp_node ($) {
49 my $self = shift;
50 return shift @{$self->{regexp}};
51 } # shift_regexp_node
52
53 sub has_regexp_node ($) {
54 my $self = shift;
55 return scalar @{$self->{regexp}};
56 } # has_regexp_node
57
58 sub get_graph_index ($$) {
59 my $self = shift;
60 my $nodes = shift;
61 $self->{index}->{$nodes} //= $self->{next_index}++;
62 return $self->{index}->{$nodes};
63 } # get_graph_index
64
65 sub next_graph ($) {
66 my $self = shift;
67 my $root_nodes = $self->shift_regexp_node;
68 return (undef, undef) unless $root_nodes;
69
70 my $g = Graph::Easy->new;
71
72 $g->set_attributes ('node.start' => {fill => 'blue', color => 'white'});
73 $g->set_attributes ('node.success' => {fill => 'green', color => 'white'});
74 $g->set_attributes ('node.fail' => {fill => 'red', color => 'white'});
75 $g->set_attributes ('edge.fail' => {color => 'gray'});
76
77 my $start_n = $g->add_node ('START');
78 $start_n->set_attribute (class => 'start');
79 my $success_n = $g->add_node ('SUCCESS');
80 $success_n->set_attribute (class => 'success');
81 my $fail_n = $g->add_node ('FAIL');
82 $fail_n->set_attribute (class => 'fail');
83
84 my ($first_ns, $last_ns, $is_optional)
85 = $self->_add_to_graph ($root_nodes => $g);
86 $g->add_edge ($start_n => $_) for @$first_ns;
87 $g->add_edge ($_ => $success_n) for @$last_ns;
88 $g->add_edge ($start_n => $success_n) if $is_optional;
89
90 $g->del_node ($success_n) unless $success_n->incoming;
91 $g->del_node ($fail_n) unless $fail_n->incoming;
92 $_->set_attribute ('class' => 'fail') for $fail_n->outgoing;
93
94 return ($g, $self->get_graph_index ($root_nodes));
95 } # next_graph
96
97 sub _add_to_graph ($$$) {
98 my ($self, $node, $g) = @_;
99
100 my $family = ref $node eq 'ARRAY' ? '' : $node->family;
101 my $type = ref $node eq 'ARRAY' ? '' : $node->type;
102 if ($family eq 'quant') {
103 my ($min, $max) = ($node->min, $node->max);
104 return ([], [], 1) if $max eq '0';
105 my ($first_ns, $last_ns, $is_optional)
106 = $self->_add_to_graph ($node->data => $g);
107
108 my $label;
109 if ($max eq '') {
110 if ($min == 0) {
111 $is_optional = 1;
112 $label = '';
113
114 } elsif ($min == 1) {
115 $label = '';
116
117 } else {
118 $label = 'at least ' . ($min - 1);
119
120 }
121 } elsif ($max == 1) {
122 if ($min == 0) {
123 $is_optional = 1;
124
125 } else {
126
127 }
128 } else {
129 $label = 'at most ' . ($max - 1);
130 if ($min == 0) {
131 $is_optional = 1;
132
133
134 } elsif ($min == 1) {
135
136 } else {
137 $label = 'at least ' . ($min - 1) . ', ' . $label;
138
139 }
140 }
141
142 if (@$first_ns != 1 or @$last_ns != 1) {
143 my $n = $g->add_node (refaddr $first_ns);
144 $n->set_attribute (label => '');
145 my $m = $n;
146 unless ($is_optional) {
147 $m = $g->add_node (refaddr $last_ns);
148 $m->set_attribute (label => '');
149 } else {
150 $is_optional = 0;
151 }
152 $g->add_edge ($n => $_) for @$first_ns;
153 $g->add_edge ($_ => $m) for @$last_ns;
154 $first_ns = [$n];
155 $last_ns = [$m];
156 }
157
158 if (defined $label) {
159 my $e = $g->add_edge ($last_ns->[0] => $first_ns->[0]);
160 $e->set_attribute (label => $label);
161 }
162
163 return ($first_ns, $last_ns, $is_optional);
164 } elsif ($type eq 'branch') {
165 my @first_n;
166 my @last_n;
167 my $is_optional = 0;
168 for (@{$node->data}) {
169 my ($f_ns, $l_ns, $is_opt) = $self->_add_to_graph ($_ => $g);
170 push @first_n, @$f_ns;
171 push @last_n, @$l_ns;
172 $is_optional |= $is_opt;
173 }
174 return (\@first_n, \@last_n, $is_optional);
175 } elsif ($type eq 'anyof') {
176 my $data = $node->data;
177 if ($node->neg) {
178 if (@$data) {
179 my $nodes = Regexp::Parser::branch->new ($node->{rx});
180 $nodes->{data} = $node->data;
181
182 $self->push_regexp_node ($nodes);
183
184 my $n = $g->add_node (refaddr $nodes);
185 my $label = 'NOT #' . $self->get_graph_index ($nodes);
186 $n->set_attribute (label => $label);
187
188 return ([$n], [$n], 0);
189 } else {
190 my $n = $g->add_node (refaddr $node);
191 my $label = 'Any character';
192 $n->set_attribute (label => $label);
193
194 return ([$n], [$n], 0);
195 }
196 } else {
197 if (@$data) {
198 my @first_n;
199 my @last_n;
200 for (@{$node->data}) {
201 my ($f_ns, $l_ns) = $self->_add_to_graph ($_ => $g);
202 push @first_n, @$f_ns;
203 push @last_n, @$l_ns;
204 }
205 return (\@first_n, \@last_n, 0);
206 } else {
207 my $fail_n = $g->node ('FAIL');
208 return ([$fail_n], [$fail_n], 0);
209 }
210 }
211 } elsif ($type eq '') {
212 my $prev_ns;
213 my $first_ns;
214 my $is_optional = 1;
215 for (@{$node}) {
216 my ($f_ns, $l_ns, $is_opt) = $self->_add_to_graph ($_ => $g);
217 if ($prev_ns) {
218 if (@$prev_ns > 1 and @$f_ns > 1) {
219 my $n = $g->add_node (refaddr $f_ns);
220 $n->set_attribute (label => '');
221 $g->add_edge ($_ => $n) for @$prev_ns;
222 $g->add_edge ($n => $_) for @$f_ns;
223 } else {
224 for my $prev_n (@$prev_ns) {
225 for my $f_n (@$f_ns) {
226 $g->add_edge ($prev_n => $f_n);
227 }
228 }
229 }
230 if ($is_optional) {
231 push @$first_ns, @$f_ns;
232 }
233 if ($is_opt) {
234 push @$prev_ns, @$l_ns;
235 } else {
236 $prev_ns = $l_ns if @$l_ns;
237 }
238 } else {
239 $first_ns = $f_ns;
240 $prev_ns = $l_ns if @$l_ns;
241 }
242 $is_optional &= $is_opt;
243 }
244 return ($first_ns || [], $prev_ns || [], $is_optional);
245 } elsif ($family eq 'group' or $family eq 'open' or $type eq 'suspend') {
246 ## TODO: (?:) vs () vs (?>), (?:)->on, (?:)->off
247 my ($f_ns, $l_ns, $is_opt) = $self->_add_to_graph ($node->data => $g);
248 return ($f_ns, $l_ns, $is_opt);
249 } elsif ($type eq 'ifthen') {
250 my $nodes = $node->data;
251
252 my $groupp = $nodes->[0];
253 my $label = $groupp ? '(?' . $groupp->visual . ')' : '';
254 my $n = $g->add_node (refaddr $groupp);
255 $n->set_attribute (label => $label);
256
257 my $l = $g->add_node (refaddr $nodes);
258 $l->set_attribute (label => '');
259
260 my $branch = $nodes->[1];
261 my $branches = $branch ? $branch->data : [];
262
263 my $true = $branches->[0];
264 if ($true) {
265 my ($f_ns, $l_ns, $is_opt) = $self->_add_to_graph ($true => $g);
266 $g->add_edge ($n => $_)->set_attribute (label => 'true') for @$f_ns;
267 $g->add_edge ($_ => $l) for @$l_ns;
268 $g->add_edge ($n => $l)->set_attribute (label => 'true') if $is_opt;
269 }
270
271 my $false = $branches->[1];
272 if ($false) {
273 my ($f_ns, $l_ns, $is_opt) = $self->_add_to_graph ($false => $g);
274 $g->add_edge ($n => $_)->set_attribute (label => 'false') for @$f_ns;
275 $g->add_edge ($_ => $l) for @$l_ns;
276 $g->add_edge ($n => $l)->set_attribute (label => 'false') if $is_opt;
277 }
278
279 return ([$n], [$l], 0);
280 } elsif ($type eq 'eval' or $type eq 'logical') {
281 my $n = $g->add_node (refaddr $node);
282 my $label = $type eq 'eval' ? '(?{})' : '(??{})';
283 $label .= ' ' . _escape_code $node->data;
284 $n->set_attribute (label => $label);
285 return ([$n], [$n], 0);
286 } elsif ($family eq 'assertion') {
287 my $nodes = $node->data;
288 $self->push_regexp_node ($nodes);
289
290 my $n = $g->add_node (refaddr $nodes);
291 $type = '<' . $type if $node->dir < 0;
292 my $label = $assertion_map->{$type} // $type;
293 $label .= ' #' . $self->get_graph_index ($nodes);
294 $n->set_attribute (label => $label);
295
296 return ([$n], [$n], 0);
297 } elsif ($family eq 'anyof_class') {
298 my $data = $node->data;
299 my $label;
300 if ($data eq 'POSIX') {
301 my $how = ${$node->{how}};
302 if ($how eq ':') {
303 $label = 'POSIX ' . $node->{type};
304 $label = 'NOT ' . $label if $node->neg;
305 } else {
306 $label = $how . $node->neg . $node->{type} . $how;
307 }
308 } else {
309 my $data_family = $data->family;
310 if ($data_family eq 'prop') {
311 $label = 'property ' . $node->type;
312 $label = 'NOT ' . $label if $node->neg;
313 } elsif ($data_family eq 'space') {
314 $label = $data->neg ? 'Perl /\S/' : 'Perl /\s/';
315 } elsif ($data_family eq 'alnum') {
316 $label = $data->neg ? 'Perl /\W/' : 'Perl /\w/';
317 } elsif ($data_family eq 'digit') {
318 $label = $data->neg ? 'Perl /\D/' : 'Perl /\d/';
319 } else {
320 $label = $data->visual;
321 }
322 }
323
324 my $n = $g->add_node (refaddr $node);
325 $n->set_attribute (label => $label);
326
327 return ([$n] => [$n]);
328 } elsif ($family eq 'exact' or $type eq 'anyof_char') {
329 my $n = $g->add_node (refaddr $node);
330
331 my $label = _escape_value $node->data;
332 $n->set_attribute (label => qq[ "$label" ]);
333
334 return ([$n] => [$n]);
335 } elsif ($family eq 'flags') {
336 ## TODO: scope
337 my $n = $g->add_node (refaddr $node);
338
339 my $label = $node->visual;
340 $n->set_attribute (label => $label);
341
342 return ([$n] => [$n], 0);
343 } elsif ($family eq 'minmod') {
344 my $nodes = $node->data;
345 $self->push_regexp_node ($nodes);
346
347 my $n = $g->add_node (refaddr $nodes);
348 my $label = 'non-greedy #' . $self->get_graph_index ($nodes);
349 $n->set_attribute (label => $label);
350
351 return ([$n], [$n], 0);
352 } elsif ($family eq 'anyof_range') {
353 my $n = $g->add_node (refaddr $node);
354
355 my $start = _escape_value $node->data->[0]->data;
356 my $end = _escape_value $node->data->[1]->data;
357 my $label = qq[ one of "$start" .. "$end" ];
358 $n->set_attribute (label => $label);
359
360 return ([$n] => [$n], 0);
361 } else {
362 # anyof_char
363 # anyof_range
364
365 my $n = $g->add_node (refaddr $node);
366
367 my $label = $node->visual;
368 $label = $default_map->{$label} // _escape_value $label;
369 $label .= ' (' . $type . ')';
370 $n->set_attribute (label => $label);
371
372 return ([$n] => [$n], 0);
373 }
374 } # _add_to_graph
375
376 1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24