1 |
package Regexp::Parser::JavaScript; |
2 |
our $VERSION=do{my @r=(q$Revision: 1.1 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r}; |
3 |
use strict; |
4 |
use warnings; |
5 |
|
6 |
use Regexp::Parser::Perl58; |
7 |
use Regexp::Parser qw(:original :RPe); |
8 |
push our @ISA, 'Regexp::Parser::Perl58'; |
9 |
|
10 |
use constant RPJSe_OCTESC => 80130, 'Octal escape is obsolete'; |
11 |
|
12 |
my $IdentifierStart = qr/[ |
13 |
## UnicodeLetter |
14 |
\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl} |
15 |
|
16 |
$ _ |
17 |
]/x; |
18 |
|
19 |
sub init ($) { |
20 |
my $self = shift; |
21 |
$self->SUPER::init; |
22 |
|
23 |
$self->{enum_to_level}->{[RPe_BADESC]->[0]} = 'm'; |
24 |
$self->{enum_to_level}->{[RPe_BGROUP]->[0]} = 'm'; |
25 |
$self->{enum_to_level}->{[RPJSe_OCTESC]->[0]} = 'm'; |
26 |
$self->{enum_to_level}->{[RPe_FRANGE]->[0]} = 'm'; |
27 |
|
28 |
$self->del_handler (qw/ |
29 |
\a \e \A \C \G \N \P \p \X \Z \z |
30 |
(?# (?$ (?@ (?< (?> (?{ (?? (?p (?( |
31 |
/); |
32 |
|
33 |
$self->add_handler('\v' => sub { |
34 |
my ($S, $cc) = @_; |
35 |
return $S->force_object(anyof_char => "\x0B", '\v') if $cc; |
36 |
return $S->object(exact => "\x0B" => '\v'); |
37 |
}); |
38 |
|
39 |
$self->add_handler('\n' => sub { |
40 |
my ($S, $cc) = @_; |
41 |
return $S->force_object(anyof_char => "\x0A", '\n') if $cc; |
42 |
return $S->object(exact => "\x0A" => '\n'); |
43 |
}); |
44 |
|
45 |
$self->add_handler('\r' => sub { |
46 |
my ($S, $cc) = @_; |
47 |
return $S->force_object(anyof_char => "\x0D", '\r') if $cc; |
48 |
return $S->object(exact => "\x0D" => '\r'); |
49 |
}); |
50 |
|
51 |
# backslash |
52 |
$self->add_handler('\\' => sub { |
53 |
my ($S, $cc) = @_; |
54 |
my $c = '\\'; |
55 |
|
56 |
if (${&Rx} =~ m{ \G (.) }xgcs) { |
57 |
$c .= (my $n = $1); |
58 |
|
59 |
return $S->$c($cc) if $S->can($c); |
60 |
|
61 |
my $family = $cc ? 'anyof_char' : 'exact'; |
62 |
|
63 |
if ($n =~ /\d/) { |
64 |
## See <http://suika.fam.cx/%7Ewakaba/wiki/sw/n/%E5%85%AB%E9%80%B2%E3%82%A8%E3%82%B9%E3%82%B1%E3%83%BC%E3%83%97>. |
65 |
|
66 |
if ($n =~ /^[0-3]/ and ${&Rx} =~ m{ \G ([0-7]{1,2}) }xgc) { |
67 |
$n .= $1; |
68 |
} |
69 |
elsif ($n =~ /^[4-7]/ and ${&Rx} =~ m{ \G ([0-7]) }xgc) { |
70 |
$n .= $1; |
71 |
} |
72 |
|
73 |
# outside of char class, \nnn might be backref |
74 |
if (!&SIZE_ONLY and !$cc and $n !~ /^0/) { |
75 |
unless ($n > $S->{maxpar}) { |
76 |
return $S->object(ref => $n, "\\$n"); |
77 |
} |
78 |
} |
79 |
if ($n =~ /^[89]/) { |
80 |
$S->warn(RPe_BGROUP); |
81 |
return $S->object($family => chr 0x30 + $n, |
82 |
sprintf("\\x%02x", 0x30 + $n)); |
83 |
} |
84 |
|
85 |
if ($n eq '0') { |
86 |
# |
87 |
} elsif ($n =~ /^0/ or $cc) { |
88 |
$S->warn(RPJSe_OCTESC); |
89 |
} elsif ($n > $S->{maxpar}) { |
90 |
$S->warn(RPe_BGROUP); |
91 |
} |
92 |
return $S->object($family => chr oct $n, sprintf("\\%03s", $n)); |
93 |
} |
94 |
|
95 |
$S->warn(RPe_BADESC, $c = $n, "") if $n =~ /$IdentifierStart/o; |
96 |
|
97 |
return $S->object($family => $n, $c); |
98 |
} |
99 |
|
100 |
$S->error(RPe_ESLASH); |
101 |
}); |
102 |
|
103 |
# control character |
104 |
$self->add_handler('\c' => sub { |
105 |
my ($S, $cc) = @_; |
106 |
## See <http://suika.fam.cx/%7Ewakaba/wiki/sw/n/%5Cc>. |
107 |
if (${&Rx} =~ m{ \G ([A-Za-z]) }xgc) { |
108 |
my $c = $1; |
109 |
$c =~ tr/a-z/A-Z/; |
110 |
return $S->force_object(anyof_char => chr(64 ^ ord $c), "\\c$c") if $cc; |
111 |
return $S->object(exact => chr(64 ^ ord $c), "\\c$c"); |
112 |
} else { |
113 |
my $c = 'c'; |
114 |
$S->warn(RPe_BADESC, $c, ""); |
115 |
return $S->object(($cc ? 'anyof_char' : 'exact') => $c, $c); |
116 |
} |
117 |
}); |
118 |
|
119 |
# hex character |
120 |
$self->add_handler('\x' => sub { |
121 |
my ($S, $cc) = @_; |
122 |
|
123 |
my $num; |
124 |
if (${&Rx} =~ m{ \G ( [0-9A-Fa-f]{2} ) }sxgc) { |
125 |
$num = hex $1; |
126 |
} else { |
127 |
$num = ord 'x'; |
128 |
$S->warn(RPe_BADESC, 'x', ""); |
129 |
} |
130 |
|
131 |
my $rep = sprintf("\\x%02X", $num); |
132 |
return $S->force_object(anyof_char => chr $num, $rep) if $cc; |
133 |
return $S->object(exact => chr $num, $rep); |
134 |
}); |
135 |
|
136 |
# \u |
137 |
$self->add_handler('\u' => sub { |
138 |
my ($S, $cc) = @_; |
139 |
|
140 |
my $num; |
141 |
if (${&Rx} =~ m{ \G ( [0-9A-Fa-f]{4} ) }sxgc) { |
142 |
$num = hex $1; |
143 |
} else { |
144 |
$num = ord 'u'; |
145 |
$S->warn(RPe_BADESC, 'u', ""); |
146 |
} |
147 |
|
148 |
my $rep = sprintf("\\u%04X", $num); |
149 |
return $S->force_object(anyof_char => chr $num, $rep) if $cc; |
150 |
return $S->object(exact => chr $num, $rep); |
151 |
}); |
152 |
|
153 |
# start of char class range (or maybe just char) |
154 |
$self->add_handler('cc' => sub { |
155 |
my ($S) = @_; |
156 |
return if ${&Rx} =~ m{ \G (?= ] | \z ) }xgc; |
157 |
push @{ $S->{next} }, qw< cc >; |
158 |
my ($lhs, $rhs, $before_range); |
159 |
my $ret = \$lhs; |
160 |
|
161 |
{ |
162 |
if (${&Rx} =~ m{ \G ( \\ ) }xgcs) { |
163 |
my $c = $1; |
164 |
$$ret = $S->$c(1); |
165 |
} |
166 |
elsif (${&Rx} =~ m{ \G (.) }xgcs) { |
167 |
$$ret = $S->force_object(anyof_char => $1); |
168 |
} |
169 |
|
170 |
if ($ret == \$lhs) { |
171 |
if (${&Rx} =~ m{ \G (?= - ) }xgc) { |
172 |
if ($lhs->visual =~ /^\\[dDsSwW]/) { |
173 |
$S->warn(RPe_FRANGE, $lhs->visual, ""); |
174 |
$ret = $lhs; |
175 |
last; |
176 |
} |
177 |
$before_range = &RxPOS++; |
178 |
$ret = \$rhs; |
179 |
redo; |
180 |
} |
181 |
$ret = $lhs; |
182 |
} |
183 |
elsif ($ret == \$rhs) { |
184 |
if ($rhs->visual =~ /^\\[dDsSwW]/) { |
185 |
$S->warn(RPe_FRANGE, $lhs->visual, $rhs->visual); |
186 |
&RxPOS = $before_range; |
187 |
$ret = $lhs; |
188 |
} |
189 |
elsif ($lhs->data gt $rhs->data) { ## ->visual in the original code. |
190 |
$S->error(RPe_IRANGE, $lhs->visual, $rhs->visual); |
191 |
} |
192 |
else { |
193 |
$ret = $S->object(anyof_range => $lhs, $rhs); |
194 |
} |
195 |
} |
196 |
} |
197 |
|
198 |
return if &SIZE_ONLY; |
199 |
return $ret; |
200 |
}); |
201 |
|
202 |
# char class ] at beginning |
203 |
$self->add_handler('cc]' => sub { |
204 |
my ($S) = @_; |
205 |
return unless ${&Rx} =~ m{ \G ] }xgc; |
206 |
pop @{ $S->{next} }; # cc |
207 |
pop @{ $S->{next} }; # cce] |
208 |
return $S->object(anyof_close => "]"); |
209 |
}); |
210 |
|
211 |
# some kind of assertion... |
212 |
$self->add_handler('(?' => sub { |
213 |
my ($S) = @_; |
214 |
my $c = '(?'; |
215 |
|
216 |
if (${&Rx} =~ m{ \G (.) }xgcs) { |
217 |
my $n = "$c$1"; |
218 |
return $S->$n if $S->can($n); |
219 |
&RxPOS--; |
220 |
} |
221 |
else { |
222 |
$S->error(RPe_SEQINC); |
223 |
} |
224 |
|
225 |
my $old = &RxPOS; |
226 |
|
227 |
if (${&Rx} =~ m{ \G : }xgc) { |
228 |
push @{ $S->{flags} }, &Rf; |
229 |
push @{ $S->{next} }, qw< c) atom >; |
230 |
return $S->object('group', 0, 0); |
231 |
} |
232 |
|
233 |
&RxPOS++; |
234 |
$S->error(RPe_NOTREC, 0, substr(${&Rx}, $old)); |
235 |
}); |
236 |
|
237 |
} # init |
238 |
|
239 |
1; |
240 |
|
241 |
__END__ |
242 |
|
243 |
=head1 LICENSE |
244 |
|
245 |
Copyright 2008-2009 Wakaba <w@suika.fam.cx>. |
246 |
|
247 |
This library is free software; you can redistribute it and/or modify |
248 |
it under the same terms as Perl itself. |
249 |
|
250 |
=cut |
251 |
|
252 |
# $Date: 2009/01/13 23:00:46 $ |