use strict;

sub split1 ($) {
  split m#[\s|/]+#, shift;
}
sub rq ($) {
  my $s = shift;
  $s =~ tr/"//d;
  $s;
}

our $LIST = [
  {
    name => q<URI (RFC 1630)>,
    class => {
      alpha => [split1 q<a | b | c | d | e | f | g | h | i | j | k |
                            l | m | n | o  | p | q | r | s | t | u | v |
                            w | x | y | z | A | B | C  | D | E | F | G |
                            H | I | J | K | L | M | N | O | P |  Q | R |
                            S | T | U | V | W | X | Y | Z>],
      xalpha => [split1 q<alpha | digit | safe | extra | escape>],
      xpalpha => [split1 q<xalpha  +>],
      digit => [split1 q<0 |1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9>],
      safe => [split1 q<$ | - | _ | @ | . | &>],
      extra => [split1 q<! | * | " |  ' | ( | ) | ,>],
      reserved => [split1 q<= | ; | # | ? | : | space>, '/'],
      hex => [split1 q<digit | a | b | c | d | e | f | A | B | C |
                            D | E | F>],
      national => [split1 q<{ | } | [ | ] | \ | ^ | ~>, '|'],
      punctuation => ['<', '>'],
      alphanum2 => [split1 q<alpha | digit | - | _ | . | +>],
      'Reserved characters' => ['%', '/', '.', '#', '?', '+', '*', '!'],
      'Unsafe characters' => ['%x00-1F', 'SP', '%x7F', '%x80-9F', '%xA0-FF'],
    },
  },
  {
    name => q<URL (RFC 1738)>,
    class => {
      alphadigit => ['alpha', 'digit'],
      lowalpha => [split1 rq q<"a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" |
                 "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" |
                 "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" |
                 "y" | "z">],
      hialpha => [split1 rq q<"A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" |
                 "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" |
                 "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z">],
      alpha => ['hialpha', 'lowalpha'],
      digit => [split1 rq q<"0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
                 "8" | "9">],
      safe => [split1 rq q<"$" | "-" | "_" | "." | "+">],
      extra => [split1 rq q<"!" | "*" | "'" | "(" | ")" | ",">],
      national => [split1 rq q<"{" | "}" "\" | "^" | "~" | "[" | "]" | "`">, '|'],
      punctuation => [split1 rq q<"<" | ">" | "#" | "%">, '"'],
      reserved => [split1 rq q<";" | "?" | ":" | "@" | "&" | "=">, '/'],
      hex => [split1 rq q<digit | "A" | "B" | "C" | "D" | "E" | "F" |
                 "a" | "b" | "c" | "d" | "e" | "f">],
      uchar => [qw/unreserved escape/],
      unreserved => [qw/alpha digit safe extra/],
      xchar => [qw/unreserved reserved escape/],
      scheme => [qw/lowalpha digit + - ./],
      'No corresponding graphic US-ASCII' => [qw/%x80-9F %xA0-FF %x00-1F %x7F/],
      'Unsafe' => [qw/< > " # % { } | \ ^ ~ [ ] `/, 'SP'],
      user => [qw/uchar ; ? & =/],
      password => [qw/uchar ; ? & =/],
    },
  },
  {
    name => q<Relative URL (RFC 1808)>,
    class => {
      param => [qw#pchar /#],
      scheme => [qw/alpha digit + - ./],
      net_loc => [qw/pchar ; ?/],
      query => [qw/uchar reserved/],
      fragment => [qw/uchar reserved/],
      pchar => [qw/uchar : @ & =/],
      uchar => [qw/unreserved escape/],
      unreserved => [qw/alpha digit safe extra/],
      hex => [split1 rq q<digit | "A" | "B" | "C" | "D" | "E" | "F" |
                         "a" | "b" | "c" | "d" | "e" | "f">],
      lowalpha => [split1 rq q<"a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" |
                 "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" |
                 "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" |
                 "y" | "z">],
      hialpha => [split1 rq q<"A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" |
                 "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" |
                 "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z">],
      alpha => ['hialpha', 'lowalpha'],
      digit => [split1 rq q<"0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
                 "8" | "9">],
      safe => [split1 rq q<"$" | "-" | "_" | "." | "+">],
      extra => [split1 rq q<"!" | "*" | "'" | "(" | ")" | ",">],
      national => [split1 rq q<"{" | "}" "\" | "^" | "~" | "[" | "]" | "`">, '|'],
      reserved => [split1 rq q<";" | "?" | ":" | "@" | "&" | "=">, '/'],
      punctuation => [qw/< > # % "/],
    },
  },
  {
    name => q<URI (RFC 2396)>,
    class => {
      uric_no_slash => [split1 rq q<unreserved | escaped | ";" | "?" | ":" | "@" |
                      "&" | "=" | "+" | "$" | ",">],
      rel_segment => [qw<unreserved escaped ; @ & = + $ ,>],
      scheme => [qw/alpha digit + - ./],
      reg_name => [qw/unreserved escaped $ , ; : @ & = +/],
      userinfo => [qw/unreserved escaped ; : & = + $ ,/],
      pchar => [qw/unreserved escaped : @ & = + $ ,/],
      query => [qw/uric/],
      fragment => [qw/uric/],
      uric => [qw/reserved unreserved escaped/],
      reserved => [split1 rq q<";" | "?" | ":" | "@" | "&" | "=" | "+" |
                      "$" | ",">, '/'],
      unreserved => [qw/alphanum mark/],
      mark => [split1 rq q<"-" | "_" | "." | "!" | "~" | "*" | "'" |
                      "(" | ")">],
      alphanum => [qw/alpha digit/],
      alpha => [qw/lowalpha upalpha/],
      lowalpha => [split1 rq q<"a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" |
                 "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" |
                 "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z">],
      upalpha => [split1 rq q<"A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" |
                 "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" |
                 "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z">],
      digit => [split1 rq q<"0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
                 "8" | "9">],
      hex => [qw/digit A B C D E F a b c d e f/],
      'Excluded US-ASCII Characters' => [qw/control space delims unwise/],
      control => [qw/%x00-1F %x7F/],
      space => [qw/SP/],
      delims => [qw/< > # % "/],
      unwise => [split1 rq q/"{" | "}" | "\" | "^" | "[" | "]" | "`"/, '|'],
    },
  },
  {
    name => q<URI (RFC 2396 + RFC 2732)>,
    class => {
      uric_no_slash => [split1 rq q<unreserved | escaped | ";" | "?" | ":" | "@" |
                      "&" | "=" | "+" | "$" | ",">],
      rel_segment => [qw<unreserved escaped ; @ & = + $ ,>],
      scheme => [qw/alpha digit + - ./],
      reg_name => [qw/unreserved escaped $ , ; : @ & = +/],
      userinfo => [qw/unreserved escaped ; : & = + $ ,/],
      pchar => [qw/unreserved escaped : @ & = + $ ,/],
      query => [qw/uric/],
      fragment => [qw/uric/],
      uric => [qw/reserved unreserved escaped/],
      reserved => [split1 rq q<";" | "?" | ":" | "@" | "&" | "=" | "+" |
                      "$" | "," [ ]>, '/'],
      unreserved => [qw/alphanum mark/],
      mark => [split1 rq q<"-" | "_" | "." | "!" | "~" | "*" | "'" |
                      "(" | ")">],
      alphanum => [qw/alpha digit/],
      alpha => [qw/lowalpha upalpha/],
      lowalpha => [split1 rq q<"a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" |
                 "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" |
                 "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z">],
      upalpha => [split1 rq q<"A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" |
                 "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" |
                 "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z">],
      digit => [split1 rq q<"0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
                 "8" | "9">],
      hex => [qw/digit A B C D E F a b c d e f/],
      'Excluded US-ASCII Characters' => [qw/control space delims unwise/],
      control => [qw/%x00-1F %x7F/],
      space => [qw/SP/],
      delims => [qw/< > # % "/],
      unwise => [split1 rq q/"{" | "}" | "\" | "^" | "`"/, '|'],
    },
  },
  {
    name => q<URI (RFC 3986)>,
    class => {
      reserved => [qw/gen-delims sub-delims/],
      'gen-delims' => [qw/: ? # [ ] @/, '/'],
      'sub-delims' => [qw/! $ & ' ( ) * + , ; =/],
      unreserved => [qw/ALPHA DIGIT - . _ ~/],
      ALPHA => [qw/A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
                   a b c d e f g h i j k l m n o p q r s t u v w x y z/],
      DIGIT => [qw/0 1 2 3 4 5 6 7 8 9/],
      scheme => [qw/ALPHA DIGIT + - ./],
      userinfo => [qw/unreserved sub-delims :/],
      'IP-literal' => [qw/unreserved sub-delims :/],
      'reg-name' => [qw/unreserved sub-delims/],
      pchar => [qw/unreserved sub-delims : @/],
      query => [qw/pchar ?/, '/'],
      fragment => [qw/pchar ?/, '/'],
    },
  },
  {
    name => q<IRI (RFC 3987)>,
    class => {
      reserved => [qw/gen-delims sub-delims/],
      'gen-delims' => [qw/: ? # [ ] @/, '/'],
      'sub-delims' => [qw/! $ & ' ( ) * + , ; =/],
      unreserved => [qw/ALPHA DIGIT - . _ ~ ucschar/], # iunreserved
      ALPHA => [qw/A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
                   a b c d e f g h i j k l m n o p q r s t u v w x y z/],
      DIGIT => [qw/0 1 2 3 4 5 6 7 8 9/],
      scheme => [qw/ALPHA DIGIT + - ./],
      userinfo => [qw/unreserved sub-delims :/], # iuserinfo
      'IP-literal' => [qw/unreserved sub-delims :/],
      'reg-name' => [qw/unreserved sub-delims/], # ireg-name
      pchar => [qw/unreserved sub-delims : @/], # ipchar
      query => [qw/pchar ?/, '/', qw/iprivate/], # iquery
      fragment => [qw/pchar ?/, '/'], # ifragment
      ucschar => [qw/%xA0-FF %x0100-D7FF %xF900-FDCF %xFDF0-FFEF
                     %x10000-1FFFD %x20000-2FFFD %x30000-3FFFD
                     %x40000-4FFFD %x50000-5FFFD %x60000-6FFFD
                     %x70000-7FFFD %x80000-8FFFD %x90000-9FFFD
                     %xA0000-AFFFD %xB0000-BFFFD %xC0000-CFFFD
                     %xD0000-DFFFD %xE1000-EFFFD/],
      iprivate => [qw/%xE000-F8FF %xF0000-FFFFD %x100000-10FFFD/],
    },
  },
  {
    name => q<LRIRI (NOTE-leiri-20081103)>,
    class => {
      reserved => [qw/gen-delims sub-delims/],
      'gen-delims' => [qw/: ? # [ ] @/, '/'],
      'sub-delims' => [qw/! $ & ' ( ) * + , ; =/],
      unreserved => [qw/ALPHA DIGIT - . _ ~ ucschar/], # iunreserved
      ALPHA => [qw/A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
                   a b c d e f g h i j k l m n o p q r s t u v w x y z/],
      DIGIT => [qw/0 1 2 3 4 5 6 7 8 9/],
      scheme => [qw/ALPHA DIGIT + - ./],
      userinfo => [qw/unreserved sub-delims :/], # iuserinfo
      'IP-literal' => [qw/unreserved sub-delims :/],
      'reg-name' => [qw/unreserved sub-delims/], # ireg-name
      pchar => [qw/unreserved sub-delims : @/], # ipchar
      query => [qw/pchar ?/, '/', qw/iprivate/], # iquery
      fragment => [qw/pchar ?/, '/'], # ifragment
      ucschar => [qw/%xA0-FF %x0100-D7FF %xF900-FDCF %xFDF0-FFEF
                     %x10000-1FFFD %x20000-2FFFD %x30000-3FFFD
                     %x40000-4FFFD %x50000-5FFFD %x60000-6FFFD
                     %x70000-7FFFD %x80000-8FFFD %x90000-9FFFD
                     %xA0000-AFFFD %xB0000-BFFFD %xC0000-CFFFD
                     %xD0000-DFFFD %xE1000-EFFFD
                     %x00-1F %x7F %x80-9F %xE000-F8FF %xFDD0-FDEF
                     %xFFF0-FFFD %x1FFFE-1FFFF %x2FFFE-2FFFF
                     %x3FFFE-3FFFF %x4FFFE-4FFFF %x5FFFE-5FFFF
                     %x6FFFE-6FFFF %x7FFFE-7FFFF %x8FFFE-8FFFF
                     %x9FFFE-9FFFF %xAFFFE-AFFFF %xBFFFE-BFFFF
                     %xCFFFE-CFFFF %xDFFFE-DFFFF %xEFFFE-EFFFF
                     %xF0000-FFFFD %xFFFFE-FFFFF
                     %x100000-10FFFD %x10FFFE-10FFFF
                     SP < > " { } | \ ^ `/],
      iprivate => [qw/%xE000-F8FF %xF0000-FFFFD %x100000-10FFFD/],
    },
  },
  {
    name => q<URL parsing (WA1 r3244)>,
    class => {
      reserved => [qw/gen-delims sub-delims/],
      'gen-delims' => [qw/: ? # [ ] @/, '/'],
      'sub-delims' => [qw/! $ & ' ( ) * + , ; =/],
      unreserved => [qw/ALPHA DIGIT - . _ ~
                     %xA0-FF %x0100-D7FF %xF900-FDCF %xFDF0-FFEF
                     %x10000-1FFFD %x20000-2FFFD %x30000-3FFFD
                     %x40000-4FFFD %x50000-5FFFD %x60000-6FFFD
                     %x70000-7FFFD %x80000-8FFFD %x90000-9FFFD
                     %xA0000-AFFFD %xB0000-BFFFD %xC0000-CFFFD
                     %xD0000-DFFFD %xE1000-EFFFD
                     %x00-1F %x7F %x80-9F %xFDD0-FDEF
                     %xFFF0-FFFD %x1FFFE-1FFFF %x2FFFE-2FFFF
                     %x3FFFE-3FFFF %x4FFFE-4FFFF %x5FFFE-5FFFF
                     %x6FFFE-6FFFF %x7FFFE-7FFFF %x8FFFE-8FFFF
                     %x9FFFE-9FFFF %xAFFFE-AFFFF %xBFFFE-BFFFF
                     %xCFFFE-CFFFF %xDFFFE-DFFFF %xEFFFE-EFFFF
                     %xFFFFE-FFFFF %x10FFFE-10FFFF
                     SP < > " { } | \ ^ ` [ ]
                     %xE000-F8FF %xF0000-FFFFD %x100000-10FFFD/],
      ALPHA => [qw/A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
                   a b c d e f g h i j k l m n o p q r s t u v w x y z/],
      DIGIT => [qw/0 1 2 3 4 5 6 7 8 9/],
      scheme => [qw/ALPHA DIGIT + - ./],
      userinfo => [qw/unreserved sub-delims :/],
      'IP-literal' => [qw/unreserved sub-delims :/],
      'reg-name' => [qw/unreserved sub-delims/],
      pchar => [qw/unreserved sub-delims : @/],
      query => [qw/pchar ?/, '/'],
      fragment => [qw/pchar ? #/, '/'],
    },
  },
];

1;
