#!/usr/bin/perl
use strict;
require 'uricls.pl';
our $LIST;

my %argv;
for (@ARGV) {
  $argv{$1} = 1 if /^--([^\s=]+)$/;
}
if ($argv{help}) {
  require Pod::Usage;
  Pod::Usage::pod2usage (-exitval => 0, -verbose => 1);
}

sub add (%) {
  my (%opt) = @_;
  push @{$opt{table}->[0]}, $opt{name};
  push @{$opt{table}->[$_]}, [] for 1..$#{$opt{table}};
  for my $class (sort keys %{$opt{list}}) {
    ELEMENT: for my $element (@{$opt{list}->{$class}}) {
      for (@{$opt{table}}) {
        if ($_->[0] eq $element) {
          push @{$_->[-1]}, $class;
          next ELEMENT;
        }
      }
      my $row = [$element];
      for (my $i = $#{$opt{table}->[0]} - 1; $i > 0; $i--) {
        push @$row, [];
      }
      push @$row, [$class];
      push @{$opt{table}}, $row;
    }
  }
}

sub element ($) {
  my $el = shift;
  if (length $el > 1 || $el =~ /[A-Z%]/) {
    qq<<code>$el</code>>;
  } else {
    qq<<code class="char">$el</code>>;
  }
}

sub elements ($) {
  join ', ', map {element $_} @{$_[0]};
}

sub serialize ($$) {
  my ($tbl, $span) = @_;
  my $r = q<<table border><thead><tr>>;
  $r .= join '', map {qq<<th>$_</th>>} @{$tbl->[0]};
  $r .= q<</tr></thead><tbody>>;
  for my $i (1..$#$tbl) {
    $r .= q<<tr>>;
    $r .= q<<th>>.element ($tbl->[$i]->[0]).q<</th>>;
    for my $j (1..$#{$tbl->[$i]}) {
      if ($tbl->[$i]->[$j]) {
        $r .= q{<td};
        $r .= q< rowspan=">.$span->[$i]->[$j]->{row}.q<"> if $span->[$i]->[$j]->{row};
        $r .= q< colspan=">.$span->[$i]->[$j]->{col}.q<"> if $span->[$i]->[$j]->{col};
        $r .= q{>};
        $r .= elements ($tbl->[$i]->[$j]) || '-';
        $r .= q<</td>>;
      }
    }
    $r .= qq<</tr>\n>;
  }
  $r .= qq<</tbody></table>\n>;
  $r;
}

sub equal ($$) {
  my ($x, $y) = @_;
  return 0 unless @$x and @$y;
  return 0 unless @$x == @$y;
  for (0..$#$x) {
    return 0 unless $x->[$_] eq $y->[$_];
  }
  return 1;
}

sub merge_row ($$) {
  my ($tbl, $span) = @_;
  for my $i (1..($#$tbl-1)) {
    for my $j (1..$#{$tbl->[$i]}) {
      if ($tbl->[$i]->[$j]) {
        my $s = 1;
        for my $ii (($i+1)..$#$tbl) {
          if (equal $tbl->[$i]->[$j] => $tbl->[$ii]->[$j]) {
            $s++;
            $tbl->[$ii]->[$j] = 0;
          } else {
            last;
          }
        }
        $span->[$i]->[$j]->{row} = $s if $s > 1;
      }
    }
  }
}

sub merge_col ($$) {
  my ($tbl, $span) = @_;
  for my $i (1..$#$tbl) {
    for my $j (1..($#{$tbl->[$i]}-1)) {
      if ($tbl->[$i]->[$j]) {
        my $s = 1;
        for my $jj (($j+1)..$#{$tbl->[$i]}) {
          if ($tbl->[$i]->[$jj] and
              $span->[$i]->[$j]->{row} == $span->[$i]->[$jj]->{row} and
              equal $tbl->[$i]->[$j] => $tbl->[$i]->[$jj]) {
            $s++;
            $tbl->[$i]->[$jj] = 0;
          } else {
            last;
          }
        }
        $span->[$i]->[$j]->{col} = $s if $s > 1;
      }
    }
  }
}

sub remove_empty_row ($) {
  my @tbl = @{+shift};
  ROW: for (my $i = $#tbl; $i > 1; $i--) {
    for my $j (1..$#{$tbl[$i]}) {
      next ROW if not $tbl[$i]->[$j] or
                  @{$tbl[$i]->[$j]};
    }
    delete $tbl[$i];
  }
  [grep defined $_, @tbl];
}

my $tbl = [map {[$_]} '', qw<alpha digit - _ . * ! ' ( ) , + : ; = $ @ & ? / [ ] { } \ ^ | ` % # " < > >, 'SP', '%x00-1F', '%x7F'];
my @span;

for (@$LIST) {
  add table => $tbl, name => $_->{name}, list => $_->{class};
}
$tbl = remove_empty_row $tbl;
merge_row $tbl, \@span;
merge_col $tbl, \@span;

print STDOUT qq<<!DOCTYPE table SYSTEM>\n>;
print STDOUT serialize $tbl, \@span;

=head1 NAME

mkclstbl - URI Character Class History Table Generator

=head1 SYNOPSIS

  mkclstbl.pl > table.html
  mkclstbl.pl --help

=head1 DESCRIPTION

This script generates an HTML table that describes what character belongs
to what character class(es) in versions of URI specification.

=head1 OPTIONS

This script can be specified an arguments:

=over 4

=item --help

Show help message.

=back

=head1 SEE ALSO

URI Specifications and DTDs.

<http://suika.fam.cx/www/2004/uri/>.

=head1 LICENSE

Copyright 2004 Wakaba <w@suika.fam.cx>

This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

=cut

# $Date: 2010/06/18 14:23:05 $
