/[suikacvs]/webroot/regexp/lib/Regexp/Parser/JavaScript.pm
Suika

Contents of /webroot/regexp/lib/Regexp/Parser/JavaScript.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (show annotations) (download)
Tue Dec 9 03:22:47 2008 UTC (15 years, 11 months ago) by wakaba
Branch: MAIN
Changes since 1.1: +41 -11 lines
++ swe/lib/Regexp/Parser/ChangeLog	9 Dec 2008 03:22:39 -0000
	* JavaScript.pm: Report ECMA 262 conformance errors as warnings.

	* Perl58.pm: Made error levels customizable.

2008-12-09  Wakaba  <wakaba@suika.fam.cx>

1 package Regexp::Parser::JavaScript;
2 use strict;
3 use warnings;
4
5 use Regexp::Parser::Perl58;
6 use Regexp::Parser qw(:original :RPe);
7 push our @ISA, 'Regexp::Parser::Perl58';
8
9 use constant RPJSe_OCTESC => 80130, 'Octal escape is obsolete';
10
11 my $IdentifierStart = qr/[
12 ## UnicodeLetter
13 \p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}
14
15 $ _
16 ]/x;
17
18 sub init ($) {
19 my $self = shift;
20 $self->SUPER::init;
21
22 $self->{enum_to_level}->{[RPe_BADESC]->[0]} = 'm';
23 $self->{enum_to_level}->{[RPe_BGROUP]->[0]} = 'm';
24 $self->{enum_to_level}->{[RPJSe_OCTESC]->[0]} = 'm';
25 $self->{enum_to_level}->{[RPe_FRANGE]->[0]} = 'm';
26
27 $self->del_handler (qw/
28 \a \e \A \C \G \N \P \p \X \Z \z
29 (?# (?$ (?@ (?< (?> (?{ (?? (?p (?(
30 /);
31
32 $self->add_handler('\v' => sub {
33 my ($S, $cc) = @_;
34 return $S->force_object(anyof_char => "\x0B", '\v') if $cc;
35 return $S->object(exact => "\x0B" => '\v');
36 });
37
38 $self->add_handler('\n' => sub {
39 my ($S, $cc) = @_;
40 return $S->force_object(anyof_char => "\x0A", '\n') if $cc;
41 return $S->object(exact => "\x0A" => '\n');
42 });
43
44 $self->add_handler('\r' => sub {
45 my ($S, $cc) = @_;
46 return $S->force_object(anyof_char => "\x0D", '\r') if $cc;
47 return $S->object(exact => "\x0D" => '\r');
48 });
49
50 # backslash
51 $self->add_handler('\\' => sub {
52 my ($S, $cc) = @_;
53 my $c = '\\';
54
55 if (${&Rx} =~ m{ \G (.) }xgcs) {
56 $c .= (my $n = $1);
57
58 return $S->$c($cc) if $S->can($c);
59
60 my $family = $cc ? 'anyof_char' : 'exact';
61
62 if ($n =~ /\d/) {
63 ## See <http://suika.fam.cx/%7Ewakaba/wiki/sw/n/%E5%85%AB%E9%80%B2%E3%82%A8%E3%82%B9%E3%82%B1%E3%83%BC%E3%83%97>.
64
65 if ($n =~ /^[0-3]/ and ${&Rx} =~ m{ \G ([0-7]{1,2}) }xgc) {
66 $n .= $1;
67 }
68 elsif ($n =~ /^[4-7]/ and ${&Rx} =~ m{ \G ([0-7]) }xgc) {
69 $n .= $1;
70 }
71
72 # outside of char class, \nnn might be backref
73 if (!&SIZE_ONLY and !$cc and $n !~ /^0/) {
74 unless ($n > $S->{maxpar}) {
75 return $S->object(ref => $n, "\\$n");
76 }
77 }
78 if ($n =~ /^[89]/) {
79 $S->warn(RPe_BGROUP);
80 return $S->object($family => chr 0x30 + $n,
81 sprintf("\\x%02x", 0x30 + $n));
82 }
83
84 if ($n eq '0') {
85 #
86 } elsif ($n =~ /^0/ or $cc) {
87 $S->warn(RPJSe_OCTESC);
88 } elsif ($n > $S->{maxpar}) {
89 $S->warn(RPe_BGROUP);
90 }
91 return $S->object($family => chr oct $n, sprintf("\\%03s", $n));
92 }
93
94 $S->warn(RPe_BADESC, $c = $n, "") if $n =~ /$IdentifierStart/o;
95
96 return $S->object($family => $n, $c);
97 }
98
99 $S->error(RPe_ESLASH);
100 });
101
102 # control character
103 $self->add_handler('\c' => sub {
104 my ($S, $cc) = @_;
105 ## See <http://suika.fam.cx/%7Ewakaba/wiki/sw/n/%5Cc>.
106 if (${&Rx} =~ m{ \G ([A-Za-z]) }xgc) {
107 my $c = $1;
108 $c =~ tr/a-z/A-Z/;
109 return $S->force_object(anyof_char => chr(64 ^ ord $c), "\\c$c") if $cc;
110 return $S->object(exact => chr(64 ^ ord $c), "\\c$c");
111 } else {
112 my $c = 'c';
113 $S->warn(RPe_BADESC, $c, "");
114 return $S->object(($cc ? 'anyof_char' : 'exact') => $c, $c);
115 }
116 });
117
118 # hex character
119 $self->add_handler('\x' => sub {
120 my ($S, $cc) = @_;
121
122 my $num;
123 if (${&Rx} =~ m{ \G ( [0-9A-Fa-f]{2} ) }sxgc) {
124 $num = hex $1;
125 } else {
126 $num = ord 'x';
127 $S->warn(RPe_BADESC, 'x', "");
128 }
129
130 my $rep = sprintf("\\x%02X", $num);
131 return $S->force_object(anyof_char => chr $num, $rep) if $cc;
132 return $S->object(exact => chr $num, $rep);
133 });
134
135 # \u
136 $self->add_handler('\u' => sub {
137 my ($S, $cc) = @_;
138
139 my $num;
140 if (${&Rx} =~ m{ \G ( [0-9A-Fa-f]{4} ) }sxgc) {
141 $num = hex $1;
142 } else {
143 $num = ord 'u';
144 $S->warn(RPe_BADESC, 'u', "");
145 }
146
147 my $rep = sprintf("\\u%04X", $num);
148 return $S->force_object(anyof_char => chr $num, $rep) if $cc;
149 return $S->object(exact => chr $num, $rep);
150 });
151
152 # start of char class range (or maybe just char)
153 $self->add_handler('cc' => sub {
154 my ($S) = @_;
155 return if ${&Rx} =~ m{ \G (?= ] | \z ) }xgc;
156 push @{ $S->{next} }, qw< cc >;
157 my ($lhs, $rhs, $before_range);
158 my $ret = \$lhs;
159
160 {
161 if (${&Rx} =~ m{ \G ( \\ ) }xgcs) {
162 my $c = $1;
163 $$ret = $S->$c(1);
164 }
165 elsif (${&Rx} =~ m{ \G (.) }xgcs) {
166 $$ret = $S->force_object(anyof_char => $1);
167 }
168
169 if ($ret == \$lhs) {
170 if (${&Rx} =~ m{ \G (?= - ) }xgc) {
171 if ($lhs->visual =~ /^\\[dDsSwW]/) {
172 $S->warn(RPe_FRANGE, $lhs->visual, "");
173 $ret = $lhs;
174 last;
175 }
176 $before_range = &RxPOS++;
177 $ret = \$rhs;
178 redo;
179 }
180 $ret = $lhs;
181 }
182 elsif ($ret == \$rhs) {
183 if ($rhs->visual =~ /^\\[dDsSwW]/) {
184 $S->warn(RPe_FRANGE, $lhs->visual, $rhs->visual);
185 &RxPOS = $before_range;
186 $ret = $lhs;
187 }
188 elsif ($lhs->data gt $rhs->data) { ## ->visual in the original code.
189 $S->error(RPe_IRANGE, $lhs->visual, $rhs->visual);
190 }
191 else {
192 $ret = $S->object(anyof_range => $lhs, $rhs);
193 }
194 }
195 }
196
197 return if &SIZE_ONLY;
198 return $ret;
199 });
200
201 # char class ] at beginning
202 $self->add_handler('cc]' => sub {
203 my ($S) = @_;
204 return unless ${&Rx} =~ m{ \G ] }xgc;
205 pop @{ $S->{next} }; # cc
206 pop @{ $S->{next} }; # cce]
207 return $S->object(anyof_close => "]");
208 });
209
210 # some kind of assertion...
211 $self->add_handler('(?' => sub {
212 my ($S) = @_;
213 my $c = '(?';
214
215 if (${&Rx} =~ m{ \G (.) }xgcs) {
216 my $n = "$c$1";
217 return $S->$n if $S->can($n);
218 &RxPOS--;
219 }
220 else {
221 $S->error(RPe_SEQINC);
222 }
223
224 my $old = &RxPOS;
225
226 if (${&Rx} =~ m{ \G : }xgc) {
227 push @{ $S->{flags} }, &Rf;
228 push @{ $S->{next} }, qw< c) atom >;
229 return $S->object('group', 0, 0);
230 }
231
232 &RxPOS++;
233 $S->error(RPe_NOTREC, 0, substr(${&Rx}, $old));
234 });
235
236 } # init
237
238 1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24