/[suikacvs]/webroot/regexp/lib/Regexp/Parser/JavaScript.pm
Suika

Contents of /webroot/regexp/lib/Regexp/Parser/JavaScript.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (hide annotations) (download)
Tue Dec 9 03:22:47 2008 UTC (15 years, 11 months ago) by wakaba
Branch: MAIN
Changes since 1.1: +41 -11 lines
++ swe/lib/Regexp/Parser/ChangeLog	9 Dec 2008 03:22:39 -0000
	* JavaScript.pm: Report ECMA 262 conformance errors as warnings.

	* Perl58.pm: Made error levels customizable.

2008-12-09  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Regexp::Parser::JavaScript;
2     use strict;
3     use warnings;
4    
5     use Regexp::Parser::Perl58;
6     use Regexp::Parser qw(:original :RPe);
7     push our @ISA, 'Regexp::Parser::Perl58';
8    
9 wakaba 1.2 use constant RPJSe_OCTESC => 80130, 'Octal escape is obsolete';
10    
11     my $IdentifierStart = qr/[
12     ## UnicodeLetter
13     \p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}
14    
15     $ _
16     ]/x;
17    
18 wakaba 1.1 sub init ($) {
19     my $self = shift;
20     $self->SUPER::init;
21    
22 wakaba 1.2 $self->{enum_to_level}->{[RPe_BADESC]->[0]} = 'm';
23     $self->{enum_to_level}->{[RPe_BGROUP]->[0]} = 'm';
24     $self->{enum_to_level}->{[RPJSe_OCTESC]->[0]} = 'm';
25     $self->{enum_to_level}->{[RPe_FRANGE]->[0]} = 'm';
26    
27 wakaba 1.1 $self->del_handler (qw/
28     \a \e \A \C \G \N \P \p \X \Z \z
29     (?# (?$ (?@ (?< (?> (?{ (?? (?p (?(
30     /);
31    
32     $self->add_handler('\v' => sub {
33     my ($S, $cc) = @_;
34     return $S->force_object(anyof_char => "\x0B", '\v') if $cc;
35     return $S->object(exact => "\x0B" => '\v');
36     });
37    
38     $self->add_handler('\n' => sub {
39     my ($S, $cc) = @_;
40     return $S->force_object(anyof_char => "\x0A", '\n') if $cc;
41     return $S->object(exact => "\x0A" => '\n');
42     });
43    
44     $self->add_handler('\r' => sub {
45     my ($S, $cc) = @_;
46     return $S->force_object(anyof_char => "\x0D", '\r') if $cc;
47     return $S->object(exact => "\x0D" => '\r');
48     });
49    
50     # backslash
51     $self->add_handler('\\' => sub {
52     my ($S, $cc) = @_;
53     my $c = '\\';
54    
55     if (${&Rx} =~ m{ \G (.) }xgcs) {
56     $c .= (my $n = $1);
57    
58     return $S->$c($cc) if $S->can($c);
59    
60 wakaba 1.2 my $family = $cc ? 'anyof_char' : 'exact';
61    
62 wakaba 1.1 if ($n =~ /\d/) {
63     ## See <http://suika.fam.cx/%7Ewakaba/wiki/sw/n/%E5%85%AB%E9%80%B2%E3%82%A8%E3%82%B9%E3%82%B1%E3%83%BC%E3%83%97>.
64    
65     if ($n =~ /^[0-3]/ and ${&Rx} =~ m{ \G ([0-7]{1,2}) }xgc) {
66     $n .= $1;
67     }
68     elsif ($n =~ /^[4-7]/ and ${&Rx} =~ m{ \G ([0-7]) }xgc) {
69     $n .= $1;
70     }
71    
72     # outside of char class, \nnn might be backref
73     if (!&SIZE_ONLY and !$cc and $n !~ /^0/) {
74     unless ($n > $S->{maxpar}) {
75     return $S->object(ref => $n, "\\$n");
76     }
77     }
78     if ($n =~ /^[89]/) {
79 wakaba 1.2 $S->warn(RPe_BGROUP);
80     return $S->object($family => chr 0x30 + $n,
81 wakaba 1.1 sprintf("\\x%02x", 0x30 + $n));
82     }
83    
84 wakaba 1.2 if ($n eq '0') {
85     #
86     } elsif ($n =~ /^0/ or $cc) {
87     $S->warn(RPJSe_OCTESC);
88     } elsif ($n > $S->{maxpar}) {
89     $S->warn(RPe_BGROUP);
90     }
91     return $S->object($family => chr oct $n, sprintf("\\%03s", $n));
92 wakaba 1.1 }
93    
94 wakaba 1.2 $S->warn(RPe_BADESC, $c = $n, "") if $n =~ /$IdentifierStart/o;
95 wakaba 1.1
96 wakaba 1.2 return $S->object($family => $n, $c);
97 wakaba 1.1 }
98    
99     $S->error(RPe_ESLASH);
100     });
101    
102     # control character
103     $self->add_handler('\c' => sub {
104     my ($S, $cc) = @_;
105 wakaba 1.2 ## See <http://suika.fam.cx/%7Ewakaba/wiki/sw/n/%5Cc>.
106     if (${&Rx} =~ m{ \G ([A-Za-z]) }xgc) {
107     my $c = $1;
108     $c =~ tr/a-z/A-Z/;
109     return $S->force_object(anyof_char => chr(64 ^ ord $c), "\\c$c") if $cc;
110     return $S->object(exact => chr(64 ^ ord $c), "\\c$c");
111     } else {
112     my $c = 'c';
113     $S->warn(RPe_BADESC, $c, "");
114     return $S->object(($cc ? 'anyof_char' : 'exact') => $c, $c);
115     }
116 wakaba 1.1 });
117    
118     # hex character
119     $self->add_handler('\x' => sub {
120     my ($S, $cc) = @_;
121    
122     my $num;
123     if (${&Rx} =~ m{ \G ( [0-9A-Fa-f]{2} ) }sxgc) {
124     $num = hex $1;
125     } else {
126     $num = ord 'x';
127 wakaba 1.2 $S->warn(RPe_BADESC, 'x', "");
128 wakaba 1.1 }
129    
130     my $rep = sprintf("\\x%02X", $num);
131     return $S->force_object(anyof_char => chr $num, $rep) if $cc;
132     return $S->object(exact => chr $num, $rep);
133     });
134    
135     # \u
136     $self->add_handler('\u' => sub {
137     my ($S, $cc) = @_;
138    
139     my $num;
140     if (${&Rx} =~ m{ \G ( [0-9A-Fa-f]{4} ) }sxgc) {
141     $num = hex $1;
142     } else {
143     $num = ord 'u';
144 wakaba 1.2 $S->warn(RPe_BADESC, 'u', "");
145 wakaba 1.1 }
146    
147     my $rep = sprintf("\\u%04X", $num);
148     return $S->force_object(anyof_char => chr $num, $rep) if $cc;
149     return $S->object(exact => chr $num, $rep);
150     });
151    
152     # start of char class range (or maybe just char)
153     $self->add_handler('cc' => sub {
154     my ($S) = @_;
155     return if ${&Rx} =~ m{ \G (?= ] | \z ) }xgc;
156     push @{ $S->{next} }, qw< cc >;
157     my ($lhs, $rhs, $before_range);
158     my $ret = \$lhs;
159    
160     {
161     if (${&Rx} =~ m{ \G ( \\ ) }xgcs) {
162     my $c = $1;
163     $$ret = $S->$c(1);
164     }
165     elsif (${&Rx} =~ m{ \G (.) }xgcs) {
166     $$ret = $S->force_object(anyof_char => $1);
167     }
168    
169     if ($ret == \$lhs) {
170     if (${&Rx} =~ m{ \G (?= - ) }xgc) {
171     if ($lhs->visual =~ /^\\[dDsSwW]/) {
172     $S->warn(RPe_FRANGE, $lhs->visual, "");
173     $ret = $lhs;
174     last;
175     }
176     $before_range = &RxPOS++;
177     $ret = \$rhs;
178     redo;
179     }
180     $ret = $lhs;
181     }
182     elsif ($ret == \$rhs) {
183     if ($rhs->visual =~ /^\\[dDsSwW]/) {
184     $S->warn(RPe_FRANGE, $lhs->visual, $rhs->visual);
185     &RxPOS = $before_range;
186     $ret = $lhs;
187     }
188     elsif ($lhs->data gt $rhs->data) { ## ->visual in the original code.
189     $S->error(RPe_IRANGE, $lhs->visual, $rhs->visual);
190     }
191     else {
192     $ret = $S->object(anyof_range => $lhs, $rhs);
193     }
194     }
195     }
196    
197     return if &SIZE_ONLY;
198     return $ret;
199     });
200    
201     # char class ] at beginning
202     $self->add_handler('cc]' => sub {
203     my ($S) = @_;
204     return unless ${&Rx} =~ m{ \G ] }xgc;
205     pop @{ $S->{next} }; # cc
206     pop @{ $S->{next} }; # cce]
207     return $S->object(anyof_close => "]");
208     });
209    
210     # some kind of assertion...
211     $self->add_handler('(?' => sub {
212     my ($S) = @_;
213     my $c = '(?';
214    
215     if (${&Rx} =~ m{ \G (.) }xgcs) {
216     my $n = "$c$1";
217     return $S->$n if $S->can($n);
218     &RxPOS--;
219     }
220     else {
221     $S->error(RPe_SEQINC);
222     }
223    
224     my $old = &RxPOS;
225    
226     if (${&Rx} =~ m{ \G : }xgc) {
227     push @{ $S->{flags} }, &Rf;
228     push @{ $S->{next} }, qw< c) atom >;
229     return $S->object('group', 0, 0);
230     }
231    
232     &RxPOS++;
233     $S->error(RPe_NOTREC, 0, substr(${&Rx}, $old));
234     });
235    
236     } # init
237    
238     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24