1 |
use strict; |
2 |
|
3 |
sub split1 ($) { |
4 |
split m#[\s|/]+#, shift; |
5 |
} |
6 |
sub rq ($) { |
7 |
my $s = shift; |
8 |
$s =~ tr/"//d; |
9 |
$s; |
10 |
} |
11 |
|
12 |
our $LIST = [ |
13 |
{ |
14 |
name => q<URI (RFC 1630)>, |
15 |
class => { |
16 |
alpha => [split1 q<a | b | c | d | e | f | g | h | i | j | k | |
17 |
l | m | n | o | p | q | r | s | t | u | v | |
18 |
w | x | y | z | A | B | C | D | E | F | G | |
19 |
H | I | J | K | L | M | N | O | P | Q | R | |
20 |
S | T | U | V | W | X | Y | Z>], |
21 |
xalpha => [split1 q<alpha | digit | safe | extra | escape>], |
22 |
xpalpha => [split1 q<xalpha +>], |
23 |
digit => [split1 q<0 |1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9>], |
24 |
safe => [split1 q<$ | - | _ | @ | . | &>], |
25 |
extra => [split1 q<! | * | " | ' | ( | ) | ,>], |
26 |
reserved => [split1 q<= | ; | # | ? | : | space>, '/'], |
27 |
hex => [split1 q<digit | a | b | c | d | e | f | A | B | C | |
28 |
D | E | F>], |
29 |
national => [split1 q<{ | } | [ | ] | \ | ^ | ~>, '|'], |
30 |
punctuation => ['<', '>'], |
31 |
alphanum2 => [split1 q<alpha | digit | - | _ | . | +>], |
32 |
'Reserved characters' => ['%', '/', '.', '#', '?', '+', '*', '!'], |
33 |
'Unsafe characters' => ['%x00-1F', 'SP', '%x7F', '%x80-9F', '%xA0-FF'], |
34 |
}, |
35 |
}, |
36 |
{ |
37 |
name => q<URL (RFC 1738)>, |
38 |
class => { |
39 |
alphadigit => ['alpha', 'digit'], |
40 |
lowalpha => [split1 rq q<"a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | |
41 |
"i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | |
42 |
"q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | |
43 |
"y" | "z">], |
44 |
hialpha => [split1 rq q<"A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | |
45 |
"J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | |
46 |
"S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z">], |
47 |
alpha => ['hialpha', 'lowalpha'], |
48 |
digit => [split1 rq q<"0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | |
49 |
"8" | "9">], |
50 |
safe => [split1 rq q<"$" | "-" | "_" | "." | "+">], |
51 |
extra => [split1 rq q<"!" | "*" | "'" | "(" | ")" | ",">], |
52 |
national => [split1 rq q<"{" | "}" "\" | "^" | "~" | "[" | "]" | "`">, '|'], |
53 |
punctuation => [split1 rq q<"<" | ">" | "#" | "%">, '"'], |
54 |
reserved => [split1 rq q<";" | "?" | ":" | "@" | "&" | "=">, '/'], |
55 |
hex => [split1 rq q<digit | "A" | "B" | "C" | "D" | "E" | "F" | |
56 |
"a" | "b" | "c" | "d" | "e" | "f">], |
57 |
uchar => [qw/unreserved escape/], |
58 |
unreserved => [qw/alpha digit safe extra/], |
59 |
xchar => [qw/unreserved reserved escape/], |
60 |
scheme => [qw/lowalpha digit + - ./], |
61 |
'No corresponding graphic US-ASCII' => [qw/%x80-9F %xA0-FF %x00-1F %x7F/], |
62 |
'Unsafe' => [qw/< > " # % { } | \ ^ ~ [ ] `/, 'SP'], |
63 |
user => [qw/uchar ; ? & =/], |
64 |
password => [qw/uchar ; ? & =/], |
65 |
}, |
66 |
}, |
67 |
{ |
68 |
name => q<Relative URL (RFC 1808)>, |
69 |
class => { |
70 |
param => [qw#pchar /#], |
71 |
scheme => [qw/alpha digit + - ./], |
72 |
net_loc => [qw/pchar ; ?/], |
73 |
query => [qw/uchar reserved/], |
74 |
fragment => [qw/uchar reserved/], |
75 |
pchar => [qw/uchar : @ & =/], |
76 |
uchar => [qw/unreserved escape/], |
77 |
unreserved => [qw/alpha digit safe extra/], |
78 |
hex => [split1 rq q<digit | "A" | "B" | "C" | "D" | "E" | "F" | |
79 |
"a" | "b" | "c" | "d" | "e" | "f">], |
80 |
lowalpha => [split1 rq q<"a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | |
81 |
"i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | |
82 |
"q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | |
83 |
"y" | "z">], |
84 |
hialpha => [split1 rq q<"A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | |
85 |
"J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | |
86 |
"S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z">], |
87 |
alpha => ['hialpha', 'lowalpha'], |
88 |
digit => [split1 rq q<"0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | |
89 |
"8" | "9">], |
90 |
safe => [split1 rq q<"$" | "-" | "_" | "." | "+">], |
91 |
extra => [split1 rq q<"!" | "*" | "'" | "(" | ")" | ",">], |
92 |
national => [split1 rq q<"{" | "}" "\" | "^" | "~" | "[" | "]" | "`">, '|'], |
93 |
reserved => [split1 rq q<";" | "?" | ":" | "@" | "&" | "=">, '/'], |
94 |
punctuation => [qw/< > # % "/], |
95 |
}, |
96 |
}, |
97 |
{ |
98 |
name => q<URI (RFC 2396)>, |
99 |
class => { |
100 |
uric_no_slash => [split1 rq q<unreserved | escaped | ";" | "?" | ":" | "@" | |
101 |
"&" | "=" | "+" | "$" | ",">], |
102 |
rel_segment => [qw<unreserved escaped ; @ & = + $ ,>], |
103 |
scheme => [qw/alpha digit + - ./], |
104 |
reg_name => [qw/unreserved escaped $ , ; : @ & = +/], |
105 |
userinfo => [qw/unreserved escaped ; : & = + $ ,/], |
106 |
pchar => [qw/unreserved escaped : @ & = + $ ,/], |
107 |
query => [qw/uric/], |
108 |
fragment => [qw/uric/], |
109 |
uric => [qw/reserved unreserved escaped/], |
110 |
reserved => [split1 rq q<";" | "?" | ":" | "@" | "&" | "=" | "+" | |
111 |
"$" | ",">, '/'], |
112 |
unreserved => [qw/alphanum mark/], |
113 |
mark => [split1 rq q<"-" | "_" | "." | "!" | "~" | "*" | "'" | |
114 |
"(" | ")">], |
115 |
alphanum => [qw/alpha digit/], |
116 |
alpha => [qw/lowalpha upalpha/], |
117 |
lowalpha => [split1 rq q<"a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | |
118 |
"j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | |
119 |
"s" | "t" | "u" | "v" | "w" | "x" | "y" | "z">], |
120 |
upalpha => [split1 rq q<"A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | |
121 |
"J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | |
122 |
"S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z">], |
123 |
digit => [split1 rq q<"0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | |
124 |
"8" | "9">], |
125 |
hex => [qw/digit A B C D E F a b c d e f/], |
126 |
'Excluded US-ASCII Characters' => [qw/control space delims unwise/], |
127 |
control => [qw/%x00-1F %x7F/], |
128 |
space => [qw/SP/], |
129 |
delims => [qw/< > # % "/], |
130 |
unwise => [split1 rq q/"{" | "}" | "\" | "^" | "[" | "]" | "`"/, '|'], |
131 |
}, |
132 |
}, |
133 |
{ |
134 |
name => q<URI (RFC 2396 + RFC 2732)>, |
135 |
class => { |
136 |
uric_no_slash => [split1 rq q<unreserved | escaped | ";" | "?" | ":" | "@" | |
137 |
"&" | "=" | "+" | "$" | ",">], |
138 |
rel_segment => [qw<unreserved escaped ; @ & = + $ ,>], |
139 |
scheme => [qw/alpha digit + - ./], |
140 |
reg_name => [qw/unreserved escaped $ , ; : @ & = +/], |
141 |
userinfo => [qw/unreserved escaped ; : & = + $ ,/], |
142 |
pchar => [qw/unreserved escaped : @ & = + $ ,/], |
143 |
query => [qw/uric/], |
144 |
fragment => [qw/uric/], |
145 |
uric => [qw/reserved unreserved escaped/], |
146 |
reserved => [split1 rq q<";" | "?" | ":" | "@" | "&" | "=" | "+" | |
147 |
"$" | "," [ ]>, '/'], |
148 |
unreserved => [qw/alphanum mark/], |
149 |
mark => [split1 rq q<"-" | "_" | "." | "!" | "~" | "*" | "'" | |
150 |
"(" | ")">], |
151 |
alphanum => [qw/alpha digit/], |
152 |
alpha => [qw/lowalpha upalpha/], |
153 |
lowalpha => [split1 rq q<"a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | |
154 |
"j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | |
155 |
"s" | "t" | "u" | "v" | "w" | "x" | "y" | "z">], |
156 |
upalpha => [split1 rq q<"A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | |
157 |
"J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | |
158 |
"S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z">], |
159 |
digit => [split1 rq q<"0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | |
160 |
"8" | "9">], |
161 |
hex => [qw/digit A B C D E F a b c d e f/], |
162 |
'Excluded US-ASCII Characters' => [qw/control space delims unwise/], |
163 |
control => [qw/%x00-1F %x7F/], |
164 |
space => [qw/SP/], |
165 |
delims => [qw/< > # % "/], |
166 |
unwise => [split1 rq q/"{" | "}" | "\" | "^" | "`"/, '|'], |
167 |
}, |
168 |
}, |
169 |
{ |
170 |
name => q<URI (RFC 3986)>, |
171 |
class => { |
172 |
reserved => [qw/gen-delims sub-delims/], |
173 |
'gen-delims' => [qw/: ? # [ ] @/, '/'], |
174 |
'sub-delims' => [qw/! $ & ' ( ) * + , ; =/], |
175 |
unreserved => [qw/ALPHA DIGIT - . _ ~/], |
176 |
ALPHA => [qw/A B C D E F G H I J K L M N O P Q R S T U V W X Y Z |
177 |
a b c d e f g h i j k l m n o p q r s t u v w x y z/], |
178 |
DIGIT => [qw/0 1 2 3 4 5 6 7 8 9/], |
179 |
scheme => [qw/ALPHA DIGIT + - ./], |
180 |
userinfo => [qw/unreserved sub-delims :/], |
181 |
'IP-literal' => [qw/unreserved sub-delims :/], |
182 |
'reg-name' => [qw/unreserved sub-delims/], |
183 |
pchar => [qw/unreserved sub-delims : @/], |
184 |
query => [qw/pchar ?/, '/'], |
185 |
fragment => [qw/pchar ?/, '/'], |
186 |
}, |
187 |
}, |
188 |
{ |
189 |
name => q<IRI (RFC 3987)>, |
190 |
class => { |
191 |
reserved => [qw/gen-delims sub-delims/], |
192 |
'gen-delims' => [qw/: ? # [ ] @/, '/'], |
193 |
'sub-delims' => [qw/! $ & ' ( ) * + , ; =/], |
194 |
unreserved => [qw/ALPHA DIGIT - . _ ~ ucschar/], # iunreserved |
195 |
ALPHA => [qw/A B C D E F G H I J K L M N O P Q R S T U V W X Y Z |
196 |
a b c d e f g h i j k l m n o p q r s t u v w x y z/], |
197 |
DIGIT => [qw/0 1 2 3 4 5 6 7 8 9/], |
198 |
scheme => [qw/ALPHA DIGIT + - ./], |
199 |
userinfo => [qw/unreserved sub-delims :/], # iuserinfo |
200 |
'IP-literal' => [qw/unreserved sub-delims :/], |
201 |
'reg-name' => [qw/unreserved sub-delims/], # ireg-name |
202 |
pchar => [qw/unreserved sub-delims : @/], # ipchar |
203 |
query => [qw/pchar ?/, '/', qw/iprivate/], # iquery |
204 |
fragment => [qw/pchar ?/, '/'], # ifragment |
205 |
ucschar => [qw/%xA0-FF %x0100-D7FF %xF900-FDCF %xFDF0-FFEF |
206 |
%x10000-1FFFD %x20000-2FFFD %x30000-3FFFD |
207 |
%x40000-4FFFD %x50000-5FFFD %x60000-6FFFD |
208 |
%x70000-7FFFD %x80000-8FFFD %x90000-9FFFD |
209 |
%xA0000-AFFFD %xB0000-BFFFD %xC0000-CFFFD |
210 |
%xD0000-DFFFD %xE1000-EFFFD/], |
211 |
iprivate => [qw/%xE000-F8FF %xF0000-FFFFD %x100000-10FFFD/], |
212 |
}, |
213 |
}, |
214 |
{ |
215 |
name => q<LRIRI (NOTE-leiri-20081103)>, |
216 |
class => { |
217 |
reserved => [qw/gen-delims sub-delims/], |
218 |
'gen-delims' => [qw/: ? # [ ] @/, '/'], |
219 |
'sub-delims' => [qw/! $ & ' ( ) * + , ; =/], |
220 |
unreserved => [qw/ALPHA DIGIT - . _ ~ ucschar/], # iunreserved |
221 |
ALPHA => [qw/A B C D E F G H I J K L M N O P Q R S T U V W X Y Z |
222 |
a b c d e f g h i j k l m n o p q r s t u v w x y z/], |
223 |
DIGIT => [qw/0 1 2 3 4 5 6 7 8 9/], |
224 |
scheme => [qw/ALPHA DIGIT + - ./], |
225 |
userinfo => [qw/unreserved sub-delims :/], # iuserinfo |
226 |
'IP-literal' => [qw/unreserved sub-delims :/], |
227 |
'reg-name' => [qw/unreserved sub-delims/], # ireg-name |
228 |
pchar => [qw/unreserved sub-delims : @/], # ipchar |
229 |
query => [qw/pchar ?/, '/', qw/iprivate/], # iquery |
230 |
fragment => [qw/pchar ?/, '/'], # ifragment |
231 |
ucschar => [qw/%xA0-FF %x0100-D7FF %xF900-FDCF %xFDF0-FFEF |
232 |
%x10000-1FFFD %x20000-2FFFD %x30000-3FFFD |
233 |
%x40000-4FFFD %x50000-5FFFD %x60000-6FFFD |
234 |
%x70000-7FFFD %x80000-8FFFD %x90000-9FFFD |
235 |
%xA0000-AFFFD %xB0000-BFFFD %xC0000-CFFFD |
236 |
%xD0000-DFFFD %xE1000-EFFFD |
237 |
%x00-1F %x7F %x80-9F %xE000-F8FF %xFDD0-FDEF |
238 |
%xFFF0-FFFD %x1FFFE-1FFFF %x2FFFE-2FFFF |
239 |
%x3FFFE-3FFFF %x4FFFE-4FFFF %x5FFFE-5FFFF |
240 |
%x6FFFE-6FFFF %x7FFFE-7FFFF %x8FFFE-8FFFF |
241 |
%x9FFFE-9FFFF %xAFFFE-AFFFF %xBFFFE-BFFFF |
242 |
%xCFFFE-CFFFF %xDFFFE-DFFFF %xEFFFE-EFFFF |
243 |
%xF0000-FFFFD %xFFFFE-FFFFF |
244 |
%x100000-10FFFD %x10FFFE-10FFFF |
245 |
SP < > " { } | \ ^ `/], |
246 |
iprivate => [qw/%xE000-F8FF %xF0000-FFFFD %x100000-10FFFD/], |
247 |
}, |
248 |
}, |
249 |
{ |
250 |
name => q<URL parsing (WA1 r3244)>, |
251 |
class => { |
252 |
reserved => [qw/gen-delims sub-delims/], |
253 |
'gen-delims' => [qw/: ? # [ ] @/, '/'], |
254 |
'sub-delims' => [qw/! $ & ' ( ) * + , ; =/], |
255 |
unreserved => [qw/ALPHA DIGIT - . _ ~ |
256 |
%xA0-FF %x0100-D7FF %xF900-FDCF %xFDF0-FFEF |
257 |
%x10000-1FFFD %x20000-2FFFD %x30000-3FFFD |
258 |
%x40000-4FFFD %x50000-5FFFD %x60000-6FFFD |
259 |
%x70000-7FFFD %x80000-8FFFD %x90000-9FFFD |
260 |
%xA0000-AFFFD %xB0000-BFFFD %xC0000-CFFFD |
261 |
%xD0000-DFFFD %xE1000-EFFFD |
262 |
%x00-1F %x7F %x80-9F %xFDD0-FDEF |
263 |
%xFFF0-FFFD %x1FFFE-1FFFF %x2FFFE-2FFFF |
264 |
%x3FFFE-3FFFF %x4FFFE-4FFFF %x5FFFE-5FFFF |
265 |
%x6FFFE-6FFFF %x7FFFE-7FFFF %x8FFFE-8FFFF |
266 |
%x9FFFE-9FFFF %xAFFFE-AFFFF %xBFFFE-BFFFF |
267 |
%xCFFFE-CFFFF %xDFFFE-DFFFF %xEFFFE-EFFFF |
268 |
%xFFFFE-FFFFF %x10FFFE-10FFFF |
269 |
SP < > " { } | \ ^ ` [ ] |
270 |
%xE000-F8FF %xF0000-FFFFD %x100000-10FFFD/], |
271 |
ALPHA => [qw/A B C D E F G H I J K L M N O P Q R S T U V W X Y Z |
272 |
a b c d e f g h i j k l m n o p q r s t u v w x y z/], |
273 |
DIGIT => [qw/0 1 2 3 4 5 6 7 8 9/], |
274 |
scheme => [qw/ALPHA DIGIT + - ./], |
275 |
userinfo => [qw/unreserved sub-delims :/], |
276 |
'IP-literal' => [qw/unreserved sub-delims :/], |
277 |
'reg-name' => [qw/unreserved sub-delims/], |
278 |
pchar => [qw/unreserved sub-delims : @/], |
279 |
query => [qw/pchar ?/, '/'], |
280 |
fragment => [qw/pchar ? #/, '/'], |
281 |
}, |
282 |
}, |
283 |
]; |
284 |
|
285 |
1; |