1 |
package Regexp::Parser::JavaScript; |
2 |
use strict; |
3 |
use warnings; |
4 |
|
5 |
use Regexp::Parser::Perl58; |
6 |
use Regexp::Parser qw(:original :RPe); |
7 |
push our @ISA, 'Regexp::Parser::Perl58'; |
8 |
|
9 |
use constant RPJSe_OCTESC => 80130, 'Octal escape is obsolete'; |
10 |
|
11 |
my $IdentifierStart = qr/[ |
12 |
## UnicodeLetter |
13 |
\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl} |
14 |
|
15 |
$ _ |
16 |
]/x; |
17 |
|
18 |
sub init ($) { |
19 |
my $self = shift; |
20 |
$self->SUPER::init; |
21 |
|
22 |
$self->{enum_to_level}->{[RPe_BADESC]->[0]} = 'm'; |
23 |
$self->{enum_to_level}->{[RPe_BGROUP]->[0]} = 'm'; |
24 |
$self->{enum_to_level}->{[RPJSe_OCTESC]->[0]} = 'm'; |
25 |
$self->{enum_to_level}->{[RPe_FRANGE]->[0]} = 'm'; |
26 |
|
27 |
$self->del_handler (qw/ |
28 |
\a \e \A \C \G \N \P \p \X \Z \z |
29 |
(?# (?$ (?@ (?< (?> (?{ (?? (?p (?( |
30 |
/); |
31 |
|
32 |
$self->add_handler('\v' => sub { |
33 |
my ($S, $cc) = @_; |
34 |
return $S->force_object(anyof_char => "\x0B", '\v') if $cc; |
35 |
return $S->object(exact => "\x0B" => '\v'); |
36 |
}); |
37 |
|
38 |
$self->add_handler('\n' => sub { |
39 |
my ($S, $cc) = @_; |
40 |
return $S->force_object(anyof_char => "\x0A", '\n') if $cc; |
41 |
return $S->object(exact => "\x0A" => '\n'); |
42 |
}); |
43 |
|
44 |
$self->add_handler('\r' => sub { |
45 |
my ($S, $cc) = @_; |
46 |
return $S->force_object(anyof_char => "\x0D", '\r') if $cc; |
47 |
return $S->object(exact => "\x0D" => '\r'); |
48 |
}); |
49 |
|
50 |
# backslash |
51 |
$self->add_handler('\\' => sub { |
52 |
my ($S, $cc) = @_; |
53 |
my $c = '\\'; |
54 |
|
55 |
if (${&Rx} =~ m{ \G (.) }xgcs) { |
56 |
$c .= (my $n = $1); |
57 |
|
58 |
return $S->$c($cc) if $S->can($c); |
59 |
|
60 |
my $family = $cc ? 'anyof_char' : 'exact'; |
61 |
|
62 |
if ($n =~ /\d/) { |
63 |
## See <http://suika.fam.cx/%7Ewakaba/wiki/sw/n/%E5%85%AB%E9%80%B2%E3%82%A8%E3%82%B9%E3%82%B1%E3%83%BC%E3%83%97>. |
64 |
|
65 |
if ($n =~ /^[0-3]/ and ${&Rx} =~ m{ \G ([0-7]{1,2}) }xgc) { |
66 |
$n .= $1; |
67 |
} |
68 |
elsif ($n =~ /^[4-7]/ and ${&Rx} =~ m{ \G ([0-7]) }xgc) { |
69 |
$n .= $1; |
70 |
} |
71 |
|
72 |
# outside of char class, \nnn might be backref |
73 |
if (!&SIZE_ONLY and !$cc and $n !~ /^0/) { |
74 |
unless ($n > $S->{maxpar}) { |
75 |
return $S->object(ref => $n, "\\$n"); |
76 |
} |
77 |
} |
78 |
if ($n =~ /^[89]/) { |
79 |
$S->warn(RPe_BGROUP); |
80 |
return $S->object($family => chr 0x30 + $n, |
81 |
sprintf("\\x%02x", 0x30 + $n)); |
82 |
} |
83 |
|
84 |
if ($n eq '0') { |
85 |
# |
86 |
} elsif ($n =~ /^0/ or $cc) { |
87 |
$S->warn(RPJSe_OCTESC); |
88 |
} elsif ($n > $S->{maxpar}) { |
89 |
$S->warn(RPe_BGROUP); |
90 |
} |
91 |
return $S->object($family => chr oct $n, sprintf("\\%03s", $n)); |
92 |
} |
93 |
|
94 |
$S->warn(RPe_BADESC, $c = $n, "") if $n =~ /$IdentifierStart/o; |
95 |
|
96 |
return $S->object($family => $n, $c); |
97 |
} |
98 |
|
99 |
$S->error(RPe_ESLASH); |
100 |
}); |
101 |
|
102 |
# control character |
103 |
$self->add_handler('\c' => sub { |
104 |
my ($S, $cc) = @_; |
105 |
## See <http://suika.fam.cx/%7Ewakaba/wiki/sw/n/%5Cc>. |
106 |
if (${&Rx} =~ m{ \G ([A-Za-z]) }xgc) { |
107 |
my $c = $1; |
108 |
$c =~ tr/a-z/A-Z/; |
109 |
return $S->force_object(anyof_char => chr(64 ^ ord $c), "\\c$c") if $cc; |
110 |
return $S->object(exact => chr(64 ^ ord $c), "\\c$c"); |
111 |
} else { |
112 |
my $c = 'c'; |
113 |
$S->warn(RPe_BADESC, $c, ""); |
114 |
return $S->object(($cc ? 'anyof_char' : 'exact') => $c, $c); |
115 |
} |
116 |
}); |
117 |
|
118 |
# hex character |
119 |
$self->add_handler('\x' => sub { |
120 |
my ($S, $cc) = @_; |
121 |
|
122 |
my $num; |
123 |
if (${&Rx} =~ m{ \G ( [0-9A-Fa-f]{2} ) }sxgc) { |
124 |
$num = hex $1; |
125 |
} else { |
126 |
$num = ord 'x'; |
127 |
$S->warn(RPe_BADESC, 'x', ""); |
128 |
} |
129 |
|
130 |
my $rep = sprintf("\\x%02X", $num); |
131 |
return $S->force_object(anyof_char => chr $num, $rep) if $cc; |
132 |
return $S->object(exact => chr $num, $rep); |
133 |
}); |
134 |
|
135 |
# \u |
136 |
$self->add_handler('\u' => sub { |
137 |
my ($S, $cc) = @_; |
138 |
|
139 |
my $num; |
140 |
if (${&Rx} =~ m{ \G ( [0-9A-Fa-f]{4} ) }sxgc) { |
141 |
$num = hex $1; |
142 |
} else { |
143 |
$num = ord 'u'; |
144 |
$S->warn(RPe_BADESC, 'u', ""); |
145 |
} |
146 |
|
147 |
my $rep = sprintf("\\u%04X", $num); |
148 |
return $S->force_object(anyof_char => chr $num, $rep) if $cc; |
149 |
return $S->object(exact => chr $num, $rep); |
150 |
}); |
151 |
|
152 |
# start of char class range (or maybe just char) |
153 |
$self->add_handler('cc' => sub { |
154 |
my ($S) = @_; |
155 |
return if ${&Rx} =~ m{ \G (?= ] | \z ) }xgc; |
156 |
push @{ $S->{next} }, qw< cc >; |
157 |
my ($lhs, $rhs, $before_range); |
158 |
my $ret = \$lhs; |
159 |
|
160 |
{ |
161 |
if (${&Rx} =~ m{ \G ( \\ ) }xgcs) { |
162 |
my $c = $1; |
163 |
$$ret = $S->$c(1); |
164 |
} |
165 |
elsif (${&Rx} =~ m{ \G (.) }xgcs) { |
166 |
$$ret = $S->force_object(anyof_char => $1); |
167 |
} |
168 |
|
169 |
if ($ret == \$lhs) { |
170 |
if (${&Rx} =~ m{ \G (?= - ) }xgc) { |
171 |
if ($lhs->visual =~ /^\\[dDsSwW]/) { |
172 |
$S->warn(RPe_FRANGE, $lhs->visual, ""); |
173 |
$ret = $lhs; |
174 |
last; |
175 |
} |
176 |
$before_range = &RxPOS++; |
177 |
$ret = \$rhs; |
178 |
redo; |
179 |
} |
180 |
$ret = $lhs; |
181 |
} |
182 |
elsif ($ret == \$rhs) { |
183 |
if ($rhs->visual =~ /^\\[dDsSwW]/) { |
184 |
$S->warn(RPe_FRANGE, $lhs->visual, $rhs->visual); |
185 |
&RxPOS = $before_range; |
186 |
$ret = $lhs; |
187 |
} |
188 |
elsif ($lhs->data gt $rhs->data) { ## ->visual in the original code. |
189 |
$S->error(RPe_IRANGE, $lhs->visual, $rhs->visual); |
190 |
} |
191 |
else { |
192 |
$ret = $S->object(anyof_range => $lhs, $rhs); |
193 |
} |
194 |
} |
195 |
} |
196 |
|
197 |
return if &SIZE_ONLY; |
198 |
return $ret; |
199 |
}); |
200 |
|
201 |
# char class ] at beginning |
202 |
$self->add_handler('cc]' => sub { |
203 |
my ($S) = @_; |
204 |
return unless ${&Rx} =~ m{ \G ] }xgc; |
205 |
pop @{ $S->{next} }; # cc |
206 |
pop @{ $S->{next} }; # cce] |
207 |
return $S->object(anyof_close => "]"); |
208 |
}); |
209 |
|
210 |
# some kind of assertion... |
211 |
$self->add_handler('(?' => sub { |
212 |
my ($S) = @_; |
213 |
my $c = '(?'; |
214 |
|
215 |
if (${&Rx} =~ m{ \G (.) }xgcs) { |
216 |
my $n = "$c$1"; |
217 |
return $S->$n if $S->can($n); |
218 |
&RxPOS--; |
219 |
} |
220 |
else { |
221 |
$S->error(RPe_SEQINC); |
222 |
} |
223 |
|
224 |
my $old = &RxPOS; |
225 |
|
226 |
if (${&Rx} =~ m{ \G : }xgc) { |
227 |
push @{ $S->{flags} }, &Rf; |
228 |
push @{ $S->{next} }, qw< c) atom >; |
229 |
return $S->object('group', 0, 0); |
230 |
} |
231 |
|
232 |
&RxPOS++; |
233 |
$S->error(RPe_NOTREC, 0, substr(${&Rx}, $old)); |
234 |
}); |
235 |
|
236 |
} # init |
237 |
|
238 |
1; |