1 |
wakaba |
1.1 |
package Regexp::Parser::JavaScript; |
2 |
|
|
use strict; |
3 |
|
|
use warnings; |
4 |
|
|
|
5 |
|
|
use Regexp::Parser::Perl58; |
6 |
|
|
use Regexp::Parser qw(:original :RPe); |
7 |
|
|
push our @ISA, 'Regexp::Parser::Perl58'; |
8 |
|
|
|
9 |
wakaba |
1.2 |
use constant RPJSe_OCTESC => 80130, 'Octal escape is obsolete'; |
10 |
|
|
|
11 |
|
|
my $IdentifierStart = qr/[ |
12 |
|
|
## UnicodeLetter |
13 |
|
|
\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl} |
14 |
|
|
|
15 |
|
|
$ _ |
16 |
|
|
]/x; |
17 |
|
|
|
18 |
wakaba |
1.1 |
sub init ($) { |
19 |
|
|
my $self = shift; |
20 |
|
|
$self->SUPER::init; |
21 |
|
|
|
22 |
wakaba |
1.2 |
$self->{enum_to_level}->{[RPe_BADESC]->[0]} = 'm'; |
23 |
|
|
$self->{enum_to_level}->{[RPe_BGROUP]->[0]} = 'm'; |
24 |
|
|
$self->{enum_to_level}->{[RPJSe_OCTESC]->[0]} = 'm'; |
25 |
|
|
$self->{enum_to_level}->{[RPe_FRANGE]->[0]} = 'm'; |
26 |
|
|
|
27 |
wakaba |
1.1 |
$self->del_handler (qw/ |
28 |
|
|
\a \e \A \C \G \N \P \p \X \Z \z |
29 |
|
|
(?# (?$ (?@ (?< (?> (?{ (?? (?p (?( |
30 |
|
|
/); |
31 |
|
|
|
32 |
|
|
$self->add_handler('\v' => sub { |
33 |
|
|
my ($S, $cc) = @_; |
34 |
|
|
return $S->force_object(anyof_char => "\x0B", '\v') if $cc; |
35 |
|
|
return $S->object(exact => "\x0B" => '\v'); |
36 |
|
|
}); |
37 |
|
|
|
38 |
|
|
$self->add_handler('\n' => sub { |
39 |
|
|
my ($S, $cc) = @_; |
40 |
|
|
return $S->force_object(anyof_char => "\x0A", '\n') if $cc; |
41 |
|
|
return $S->object(exact => "\x0A" => '\n'); |
42 |
|
|
}); |
43 |
|
|
|
44 |
|
|
$self->add_handler('\r' => sub { |
45 |
|
|
my ($S, $cc) = @_; |
46 |
|
|
return $S->force_object(anyof_char => "\x0D", '\r') if $cc; |
47 |
|
|
return $S->object(exact => "\x0D" => '\r'); |
48 |
|
|
}); |
49 |
|
|
|
50 |
|
|
# backslash |
51 |
|
|
$self->add_handler('\\' => sub { |
52 |
|
|
my ($S, $cc) = @_; |
53 |
|
|
my $c = '\\'; |
54 |
|
|
|
55 |
|
|
if (${&Rx} =~ m{ \G (.) }xgcs) { |
56 |
|
|
$c .= (my $n = $1); |
57 |
|
|
|
58 |
|
|
return $S->$c($cc) if $S->can($c); |
59 |
|
|
|
60 |
wakaba |
1.2 |
my $family = $cc ? 'anyof_char' : 'exact'; |
61 |
|
|
|
62 |
wakaba |
1.1 |
if ($n =~ /\d/) { |
63 |
|
|
## See <http://suika.fam.cx/%7Ewakaba/wiki/sw/n/%E5%85%AB%E9%80%B2%E3%82%A8%E3%82%B9%E3%82%B1%E3%83%BC%E3%83%97>. |
64 |
|
|
|
65 |
|
|
if ($n =~ /^[0-3]/ and ${&Rx} =~ m{ \G ([0-7]{1,2}) }xgc) { |
66 |
|
|
$n .= $1; |
67 |
|
|
} |
68 |
|
|
elsif ($n =~ /^[4-7]/ and ${&Rx} =~ m{ \G ([0-7]) }xgc) { |
69 |
|
|
$n .= $1; |
70 |
|
|
} |
71 |
|
|
|
72 |
|
|
# outside of char class, \nnn might be backref |
73 |
|
|
if (!&SIZE_ONLY and !$cc and $n !~ /^0/) { |
74 |
|
|
unless ($n > $S->{maxpar}) { |
75 |
|
|
return $S->object(ref => $n, "\\$n"); |
76 |
|
|
} |
77 |
|
|
} |
78 |
|
|
if ($n =~ /^[89]/) { |
79 |
wakaba |
1.2 |
$S->warn(RPe_BGROUP); |
80 |
|
|
return $S->object($family => chr 0x30 + $n, |
81 |
wakaba |
1.1 |
sprintf("\\x%02x", 0x30 + $n)); |
82 |
|
|
} |
83 |
|
|
|
84 |
wakaba |
1.2 |
if ($n eq '0') { |
85 |
|
|
# |
86 |
|
|
} elsif ($n =~ /^0/ or $cc) { |
87 |
|
|
$S->warn(RPJSe_OCTESC); |
88 |
|
|
} elsif ($n > $S->{maxpar}) { |
89 |
|
|
$S->warn(RPe_BGROUP); |
90 |
|
|
} |
91 |
|
|
return $S->object($family => chr oct $n, sprintf("\\%03s", $n)); |
92 |
wakaba |
1.1 |
} |
93 |
|
|
|
94 |
wakaba |
1.2 |
$S->warn(RPe_BADESC, $c = $n, "") if $n =~ /$IdentifierStart/o; |
95 |
wakaba |
1.1 |
|
96 |
wakaba |
1.2 |
return $S->object($family => $n, $c); |
97 |
wakaba |
1.1 |
} |
98 |
|
|
|
99 |
|
|
$S->error(RPe_ESLASH); |
100 |
|
|
}); |
101 |
|
|
|
102 |
|
|
# control character |
103 |
|
|
$self->add_handler('\c' => sub { |
104 |
|
|
my ($S, $cc) = @_; |
105 |
wakaba |
1.2 |
## See <http://suika.fam.cx/%7Ewakaba/wiki/sw/n/%5Cc>. |
106 |
|
|
if (${&Rx} =~ m{ \G ([A-Za-z]) }xgc) { |
107 |
|
|
my $c = $1; |
108 |
|
|
$c =~ tr/a-z/A-Z/; |
109 |
|
|
return $S->force_object(anyof_char => chr(64 ^ ord $c), "\\c$c") if $cc; |
110 |
|
|
return $S->object(exact => chr(64 ^ ord $c), "\\c$c"); |
111 |
|
|
} else { |
112 |
|
|
my $c = 'c'; |
113 |
|
|
$S->warn(RPe_BADESC, $c, ""); |
114 |
|
|
return $S->object(($cc ? 'anyof_char' : 'exact') => $c, $c); |
115 |
|
|
} |
116 |
wakaba |
1.1 |
}); |
117 |
|
|
|
118 |
|
|
# hex character |
119 |
|
|
$self->add_handler('\x' => sub { |
120 |
|
|
my ($S, $cc) = @_; |
121 |
|
|
|
122 |
|
|
my $num; |
123 |
|
|
if (${&Rx} =~ m{ \G ( [0-9A-Fa-f]{2} ) }sxgc) { |
124 |
|
|
$num = hex $1; |
125 |
|
|
} else { |
126 |
|
|
$num = ord 'x'; |
127 |
wakaba |
1.2 |
$S->warn(RPe_BADESC, 'x', ""); |
128 |
wakaba |
1.1 |
} |
129 |
|
|
|
130 |
|
|
my $rep = sprintf("\\x%02X", $num); |
131 |
|
|
return $S->force_object(anyof_char => chr $num, $rep) if $cc; |
132 |
|
|
return $S->object(exact => chr $num, $rep); |
133 |
|
|
}); |
134 |
|
|
|
135 |
|
|
# \u |
136 |
|
|
$self->add_handler('\u' => sub { |
137 |
|
|
my ($S, $cc) = @_; |
138 |
|
|
|
139 |
|
|
my $num; |
140 |
|
|
if (${&Rx} =~ m{ \G ( [0-9A-Fa-f]{4} ) }sxgc) { |
141 |
|
|
$num = hex $1; |
142 |
|
|
} else { |
143 |
|
|
$num = ord 'u'; |
144 |
wakaba |
1.2 |
$S->warn(RPe_BADESC, 'u', ""); |
145 |
wakaba |
1.1 |
} |
146 |
|
|
|
147 |
|
|
my $rep = sprintf("\\u%04X", $num); |
148 |
|
|
return $S->force_object(anyof_char => chr $num, $rep) if $cc; |
149 |
|
|
return $S->object(exact => chr $num, $rep); |
150 |
|
|
}); |
151 |
|
|
|
152 |
|
|
# start of char class range (or maybe just char) |
153 |
|
|
$self->add_handler('cc' => sub { |
154 |
|
|
my ($S) = @_; |
155 |
|
|
return if ${&Rx} =~ m{ \G (?= ] | \z ) }xgc; |
156 |
|
|
push @{ $S->{next} }, qw< cc >; |
157 |
|
|
my ($lhs, $rhs, $before_range); |
158 |
|
|
my $ret = \$lhs; |
159 |
|
|
|
160 |
|
|
{ |
161 |
|
|
if (${&Rx} =~ m{ \G ( \\ ) }xgcs) { |
162 |
|
|
my $c = $1; |
163 |
|
|
$$ret = $S->$c(1); |
164 |
|
|
} |
165 |
|
|
elsif (${&Rx} =~ m{ \G (.) }xgcs) { |
166 |
|
|
$$ret = $S->force_object(anyof_char => $1); |
167 |
|
|
} |
168 |
|
|
|
169 |
|
|
if ($ret == \$lhs) { |
170 |
|
|
if (${&Rx} =~ m{ \G (?= - ) }xgc) { |
171 |
|
|
if ($lhs->visual =~ /^\\[dDsSwW]/) { |
172 |
|
|
$S->warn(RPe_FRANGE, $lhs->visual, ""); |
173 |
|
|
$ret = $lhs; |
174 |
|
|
last; |
175 |
|
|
} |
176 |
|
|
$before_range = &RxPOS++; |
177 |
|
|
$ret = \$rhs; |
178 |
|
|
redo; |
179 |
|
|
} |
180 |
|
|
$ret = $lhs; |
181 |
|
|
} |
182 |
|
|
elsif ($ret == \$rhs) { |
183 |
|
|
if ($rhs->visual =~ /^\\[dDsSwW]/) { |
184 |
|
|
$S->warn(RPe_FRANGE, $lhs->visual, $rhs->visual); |
185 |
|
|
&RxPOS = $before_range; |
186 |
|
|
$ret = $lhs; |
187 |
|
|
} |
188 |
|
|
elsif ($lhs->data gt $rhs->data) { ## ->visual in the original code. |
189 |
|
|
$S->error(RPe_IRANGE, $lhs->visual, $rhs->visual); |
190 |
|
|
} |
191 |
|
|
else { |
192 |
|
|
$ret = $S->object(anyof_range => $lhs, $rhs); |
193 |
|
|
} |
194 |
|
|
} |
195 |
|
|
} |
196 |
|
|
|
197 |
|
|
return if &SIZE_ONLY; |
198 |
|
|
return $ret; |
199 |
|
|
}); |
200 |
|
|
|
201 |
|
|
# char class ] at beginning |
202 |
|
|
$self->add_handler('cc]' => sub { |
203 |
|
|
my ($S) = @_; |
204 |
|
|
return unless ${&Rx} =~ m{ \G ] }xgc; |
205 |
|
|
pop @{ $S->{next} }; # cc |
206 |
|
|
pop @{ $S->{next} }; # cce] |
207 |
|
|
return $S->object(anyof_close => "]"); |
208 |
|
|
}); |
209 |
|
|
|
210 |
|
|
# some kind of assertion... |
211 |
|
|
$self->add_handler('(?' => sub { |
212 |
|
|
my ($S) = @_; |
213 |
|
|
my $c = '(?'; |
214 |
|
|
|
215 |
|
|
if (${&Rx} =~ m{ \G (.) }xgcs) { |
216 |
|
|
my $n = "$c$1"; |
217 |
|
|
return $S->$n if $S->can($n); |
218 |
|
|
&RxPOS--; |
219 |
|
|
} |
220 |
|
|
else { |
221 |
|
|
$S->error(RPe_SEQINC); |
222 |
|
|
} |
223 |
|
|
|
224 |
|
|
my $old = &RxPOS; |
225 |
|
|
|
226 |
|
|
if (${&Rx} =~ m{ \G : }xgc) { |
227 |
|
|
push @{ $S->{flags} }, &Rf; |
228 |
|
|
push @{ $S->{next} }, qw< c) atom >; |
229 |
|
|
return $S->object('group', 0, 0); |
230 |
|
|
} |
231 |
|
|
|
232 |
|
|
&RxPOS++; |
233 |
|
|
$S->error(RPe_NOTREC, 0, substr(${&Rx}, $old)); |
234 |
|
|
}); |
235 |
|
|
|
236 |
|
|
} # init |
237 |
|
|
|
238 |
|
|
1; |