1 |
wakaba |
1.1 |
package Regexp::Parser::JavaScript; |
2 |
|
|
use strict; |
3 |
|
|
use warnings; |
4 |
|
|
|
5 |
|
|
use Regexp::Parser::Perl58; |
6 |
|
|
use Regexp::Parser qw(:original :RPe); |
7 |
|
|
push our @ISA, 'Regexp::Parser::Perl58'; |
8 |
|
|
|
9 |
|
|
sub init ($) { |
10 |
|
|
my $self = shift; |
11 |
|
|
$self->SUPER::init; |
12 |
|
|
|
13 |
|
|
$self->del_handler (qw/ |
14 |
|
|
\a \e \A \C \G \N \P \p \X \Z \z |
15 |
|
|
(?# (?$ (?@ (?< (?> (?{ (?? (?p (?( |
16 |
|
|
/); |
17 |
|
|
|
18 |
|
|
$self->add_handler('\v' => sub { |
19 |
|
|
my ($S, $cc) = @_; |
20 |
|
|
return $S->force_object(anyof_char => "\x0B", '\v') if $cc; |
21 |
|
|
return $S->object(exact => "\x0B" => '\v'); |
22 |
|
|
}); |
23 |
|
|
|
24 |
|
|
$self->add_handler('\n' => sub { |
25 |
|
|
my ($S, $cc) = @_; |
26 |
|
|
return $S->force_object(anyof_char => "\x0A", '\n') if $cc; |
27 |
|
|
return $S->object(exact => "\x0A" => '\n'); |
28 |
|
|
}); |
29 |
|
|
|
30 |
|
|
$self->add_handler('\r' => sub { |
31 |
|
|
my ($S, $cc) = @_; |
32 |
|
|
return $S->force_object(anyof_char => "\x0D", '\r') if $cc; |
33 |
|
|
return $S->object(exact => "\x0D" => '\r'); |
34 |
|
|
}); |
35 |
|
|
|
36 |
|
|
# backslash |
37 |
|
|
$self->add_handler('\\' => sub { |
38 |
|
|
my ($S, $cc) = @_; |
39 |
|
|
my $c = '\\'; |
40 |
|
|
|
41 |
|
|
if (${&Rx} =~ m{ \G (.) }xgcs) { |
42 |
|
|
$c .= (my $n = $1); |
43 |
|
|
|
44 |
|
|
return $S->$c($cc) if $S->can($c); |
45 |
|
|
|
46 |
|
|
if ($n =~ /\d/) { |
47 |
|
|
## See <http://suika.fam.cx/%7Ewakaba/wiki/sw/n/%E5%85%AB%E9%80%B2%E3%82%A8%E3%82%B9%E3%82%B1%E3%83%BC%E3%83%97>. |
48 |
|
|
|
49 |
|
|
if ($n =~ /^[0-3]/ and ${&Rx} =~ m{ \G ([0-7]{1,2}) }xgc) { |
50 |
|
|
$n .= $1; |
51 |
|
|
} |
52 |
|
|
elsif ($n =~ /^[4-7]/ and ${&Rx} =~ m{ \G ([0-7]) }xgc) { |
53 |
|
|
$n .= $1; |
54 |
|
|
} |
55 |
|
|
|
56 |
|
|
# outside of char class, \nnn might be backref |
57 |
|
|
if (!&SIZE_ONLY and !$cc and $n !~ /^0/) { |
58 |
|
|
unless ($n > $S->{maxpar}) { |
59 |
|
|
return $S->object(ref => $n, "\\$n"); |
60 |
|
|
} |
61 |
|
|
} |
62 |
|
|
if ($n =~ /^[89]/) { |
63 |
|
|
## TODO: warning |
64 |
|
|
return $S->object(exact => chr 0x30 + $n, |
65 |
|
|
sprintf("\\x%02x", 0x30 + $n)); |
66 |
|
|
} |
67 |
|
|
|
68 |
|
|
## TODO: warning |
69 |
|
|
return $S->object(exact => chr oct $n, sprintf("\\%03s", $n)); |
70 |
|
|
} |
71 |
|
|
|
72 |
|
|
$S->warn(RPe_BADESC, $c = $n, "") if $n =~ /[a-zA-Z]/; |
73 |
|
|
|
74 |
|
|
return $S->object(exact => $n, $c); |
75 |
|
|
} |
76 |
|
|
|
77 |
|
|
$S->error(RPe_ESLASH); |
78 |
|
|
}); |
79 |
|
|
|
80 |
|
|
# control character |
81 |
|
|
$self->add_handler('\c' => sub { |
82 |
|
|
my ($S, $cc) = @_; |
83 |
|
|
${&Rx} =~ m{ \G (.?) }xgc; |
84 |
|
|
## TODO: error unless [A-Za-z] |
85 |
|
|
my $c = $1; |
86 |
|
|
return $S->force_object(anyof_char => chr(64 ^ ord $c), "\\c$c") if $cc; |
87 |
|
|
return $S->object(exact => chr(64 ^ ord $c), "\\c$c"); |
88 |
|
|
}); |
89 |
|
|
|
90 |
|
|
# hex character |
91 |
|
|
$self->add_handler('\x' => sub { |
92 |
|
|
my ($S, $cc) = @_; |
93 |
|
|
|
94 |
|
|
my $num; |
95 |
|
|
if (${&Rx} =~ m{ \G ( [0-9A-Fa-f]{2} ) }sxgc) { |
96 |
|
|
$num = hex $1; |
97 |
|
|
} else { |
98 |
|
|
$num = ord 'x'; |
99 |
|
|
} |
100 |
|
|
|
101 |
|
|
my $rep = sprintf("\\x%02X", $num); |
102 |
|
|
return $S->force_object(anyof_char => chr $num, $rep) if $cc; |
103 |
|
|
return $S->object(exact => chr $num, $rep); |
104 |
|
|
}); |
105 |
|
|
|
106 |
|
|
# \u |
107 |
|
|
$self->add_handler('\u' => sub { |
108 |
|
|
my ($S, $cc) = @_; |
109 |
|
|
|
110 |
|
|
my $num; |
111 |
|
|
if (${&Rx} =~ m{ \G ( [0-9A-Fa-f]{4} ) }sxgc) { |
112 |
|
|
$num = hex $1; |
113 |
|
|
} else { |
114 |
|
|
$num = ord 'u'; |
115 |
|
|
} |
116 |
|
|
|
117 |
|
|
my $rep = sprintf("\\u%04X", $num); |
118 |
|
|
return $S->force_object(anyof_char => chr $num, $rep) if $cc; |
119 |
|
|
return $S->object(exact => chr $num, $rep); |
120 |
|
|
}); |
121 |
|
|
|
122 |
|
|
# start of char class range (or maybe just char) |
123 |
|
|
$self->add_handler('cc' => sub { |
124 |
|
|
my ($S) = @_; |
125 |
|
|
return if ${&Rx} =~ m{ \G (?= ] | \z ) }xgc; |
126 |
|
|
push @{ $S->{next} }, qw< cc >; |
127 |
|
|
my ($lhs, $rhs, $before_range); |
128 |
|
|
my $ret = \$lhs; |
129 |
|
|
|
130 |
|
|
{ |
131 |
|
|
if (${&Rx} =~ m{ \G ( \\ ) }xgcs) { |
132 |
|
|
my $c = $1; |
133 |
|
|
$$ret = $S->$c(1); |
134 |
|
|
} |
135 |
|
|
elsif (${&Rx} =~ m{ \G (.) }xgcs) { |
136 |
|
|
$$ret = $S->force_object(anyof_char => $1); |
137 |
|
|
} |
138 |
|
|
|
139 |
|
|
if ($ret == \$lhs) { |
140 |
|
|
if (${&Rx} =~ m{ \G (?= - ) }xgc) { |
141 |
|
|
if ($lhs->visual =~ /^\\[dDsSwW]/) { |
142 |
|
|
$S->warn(RPe_FRANGE, $lhs->visual, ""); |
143 |
|
|
$ret = $lhs; |
144 |
|
|
last; |
145 |
|
|
} |
146 |
|
|
$before_range = &RxPOS++; |
147 |
|
|
$ret = \$rhs; |
148 |
|
|
redo; |
149 |
|
|
} |
150 |
|
|
$ret = $lhs; |
151 |
|
|
} |
152 |
|
|
elsif ($ret == \$rhs) { |
153 |
|
|
if ($rhs->visual =~ /^\\[dDsSwW]/) { |
154 |
|
|
$S->warn(RPe_FRANGE, $lhs->visual, $rhs->visual); |
155 |
|
|
&RxPOS = $before_range; |
156 |
|
|
$ret = $lhs; |
157 |
|
|
} |
158 |
|
|
elsif ($lhs->data gt $rhs->data) { ## ->visual in the original code. |
159 |
|
|
$S->error(RPe_IRANGE, $lhs->visual, $rhs->visual); |
160 |
|
|
} |
161 |
|
|
else { |
162 |
|
|
$ret = $S->object(anyof_range => $lhs, $rhs); |
163 |
|
|
} |
164 |
|
|
} |
165 |
|
|
} |
166 |
|
|
|
167 |
|
|
return if &SIZE_ONLY; |
168 |
|
|
return $ret; |
169 |
|
|
}); |
170 |
|
|
|
171 |
|
|
# char class ] at beginning |
172 |
|
|
$self->add_handler('cc]' => sub { |
173 |
|
|
my ($S) = @_; |
174 |
|
|
return unless ${&Rx} =~ m{ \G ] }xgc; |
175 |
|
|
pop @{ $S->{next} }; # cc |
176 |
|
|
pop @{ $S->{next} }; # cce] |
177 |
|
|
return $S->object(anyof_close => "]"); |
178 |
|
|
}); |
179 |
|
|
|
180 |
|
|
# some kind of assertion... |
181 |
|
|
$self->add_handler('(?' => sub { |
182 |
|
|
my ($S) = @_; |
183 |
|
|
my $c = '(?'; |
184 |
|
|
|
185 |
|
|
if (${&Rx} =~ m{ \G (.) }xgcs) { |
186 |
|
|
my $n = "$c$1"; |
187 |
|
|
return $S->$n if $S->can($n); |
188 |
|
|
&RxPOS--; |
189 |
|
|
} |
190 |
|
|
else { |
191 |
|
|
$S->error(RPe_SEQINC); |
192 |
|
|
} |
193 |
|
|
|
194 |
|
|
my $old = &RxPOS; |
195 |
|
|
|
196 |
|
|
if (${&Rx} =~ m{ \G : }xgc) { |
197 |
|
|
push @{ $S->{flags} }, &Rf; |
198 |
|
|
push @{ $S->{next} }, qw< c) atom >; |
199 |
|
|
return $S->object('group', 0, 0); |
200 |
|
|
} |
201 |
|
|
|
202 |
|
|
&RxPOS++; |
203 |
|
|
$S->error(RPe_NOTREC, 0, substr(${&Rx}, $old)); |
204 |
|
|
}); |
205 |
|
|
|
206 |
|
|
} # init |
207 |
|
|
|
208 |
|
|
1; |