package Whatpm::HTMLTable;
use strict;
## An implementation of "Forming a table" algorithm in HTML5
sub form_table ($$$;$) {
my (undef, $table_el, $onerror, $levels) = @_;
$onerror ||= sub { };
$levels ||= {must => 'm'};
## Step 1
my $x_width = 0;
## Step 2
my $y_height = 0;
my $y_max_node;
## Step 3
my $pending_tfoot = [];
## Step 4
my $table = {
#caption
column => [],
column_group => [],
row => [], ## NOTE: HTML5 algorithm doesn't associate rows with
s.
row_group => [],
cell => [],
height => 0,
width => 0,
element => $table_el,
};
my @column_has_anchored_cell;
my @row_has_anchored_cell;
my @column_generated_by;
my @row_generated_by;
## Step 5
my @table_child = @{$table_el->child_nodes};
return $table unless @table_child;
## Step 6
for (0..$#table_child) {
my $el = $table_child[$_];
next unless $el->node_type == 1; # ELEMENT_NODE
next unless $el->manakai_local_name eq 'caption';
my $nsuri = $el->namespace_uri;
next unless defined $nsuri;
next unless $nsuri eq q;
$table->{caption} = {element => $el};
splice @table_child, $_, 1, ();
last;
}
my $process_row_group;
my $end = sub {
## Step 19 (End)
for (@$pending_tfoot) {
$process_row_group->($_);
}
## Step 20
for (0 .. $x_width - 1) {
unless ($column_has_anchored_cell[$_]) {
if ($table->{column}->[$_]) {
$onerror->(type => 'column with no anchored cell',
node => $table->{column}->[$_]->{element},
level => $levels->{must});
} else {
$onerror->(type => 'colspan creates column with no anchored cell',
node => $column_generated_by[$_],
level => $levels->{must});
}
last; # only one error.
}
}
for (0 .. $y_height - 1) {
unless ($row_has_anchored_cell[$_]) {
if ($table->{row}->[$_]) {
$onerror->(type => 'row with no anchored cell',
node => $table->{row}->[$_]->{element},
level => $levels->{must});
} else {
$onerror->(type => 'rowspan creates row with no anchored cell',
node => $row_generated_by[$_],
level => $levels->{must});
}
last; # only one error.
}
}
## Step 21
#return $table;
}; # $end
## Step 7, 8
my $current_element;
my $current_ln;
NEXT_CHILD: {
$current_element = shift @table_child;
if (defined $current_element) {
redo NEXT_CHILD unless $current_element->node_type == 1;
my $nsuri = $current_element->namespace_uri;
redo NEXT_CHILD unless defined $nsuri and
$nsuri eq q;
$current_ln = $current_element->manakai_local_name;
redo NEXT_CHILD unless {
colgroup => 1,
thead => 1,
tbody => 1,
tfoot => 1,
tr => 1,
}->{$current_ln};
} else {
## Step 6 2nd paragraph
$end->();
$table->{width} = $x_width;
$table->{height} = $y_height;
return $table;
}
} # NEXT_CHILD
## Step 9
while ($current_ln eq 'colgroup') { # Step 9, Step 9.4
## Step 9.1: column groups
my @col = grep {
$_->node_type == 1 and
defined $_->namespace_uri and
$_->namespace_uri eq q and
$_->manakai_local_name eq 'col'
} @{$current_element->child_nodes};
if (@col) {
## Step 1
my $x_start = $x_width;
## Step 2, 6
while (@col) {
my $current_column = shift @col;
## Step 3: columns
my $span = 1;
my $col_span = $current_column->get_attribute_ns (undef, 'span');
## Parse non-negative integer
if (defined $col_span and
$col_span =~ /^[\x09\x0A\x0C\x0D\x20]*([0-9]+)/) {
$span = $1 || 1;
}
## Step 4, 5
$table->{column}->[$x_width++] = {element => $current_column}
for 1..$span;
}
## Step 7
my $cg = {element => $current_element,
x => $x_start, y => 0,
width => $x_width - $x_start};
$table->{column_group}->[$_] = $cg for $x_start .. $x_width - 1;
} else { # no children
## Step 1
my $span = 1;
my $col_span = $current_element->get_attribute_ns (undef, 'span');
## Parse non-negative integer
if (defined $col_span and
$col_span =~ /^[\x09\x0A\x0C\x0D\x20]*([0-9]+)/) {
$span = $1 || 1;
}
## Step 2
$x_width += $span;
## Step 3
my $cg = {element => $current_element,
x => $x_width - $span, y => 0,
width => $span};
$table->{column_group}->[$_] = $cg for $cg->{x} .. $x_width - 1;
}
## Step 9.2, 9.3
NEXT_CHILD: {
$current_element = shift @table_child;
if (defined $current_element) {
redo NEXT_CHILD unless $current_element->node_type == 1;
my $nsuri = $current_element->namespace_uri;
redo NEXT_CHILD unless defined $nsuri and
$nsuri eq q;
$current_ln = $current_element->manakai_local_name;
redo NEXT_CHILD unless {
colgroup => 1,
thead => 1,
tbody => 1,
tfoot => 1,
tr => 1,
}->{$current_ln};
} else {
## End of subsection
## Step 5 of overall steps 2nd paragraph
$end->();
$table->{width} = $x_width;
$table->{height} = $y_height;
return $table;
}
} # NEXT_CHILD
}
## Step 10
my $y_current = 0;
## Step 11
my @downward_growing_cells;
my $growing_downward_growing_cells = sub {
for (@downward_growing_cells) {
for my $x ($_->[1] .. ($_->[1] + $_->[2] - 1)) {
$table->{cell}->[$x]->[$y_current] = [$_->[0]];
$_->[0]->{height}++;
}
}
}; # $growing_downward_growing_cells
my $process_row = sub {
## Step 1
$y_height++ if $y_height == $y_current;
## Step 2
my $x_current = 0;
## Step 5
my $tr = shift;
$table->{row}->[$y_current] = {element => $tr};
my @tdth = grep {
$_->node_type == 1 and
defined $_->namespace_uri and
$_->namespace_uri eq q and
{td => 1, th => 1}->{$_->manakai_local_name}
} @{$tr->child_nodes};
my $current_cell = shift @tdth;
## Step 3
$growing_downward_growing_cells->();
## Step 4
return unless $current_cell;
CELL: while (1) {
## Step 6: cells
$x_current++
while ($x_current < $x_width and
$table->{cell}->[$x_current]->[$y_current]);
## Step 7
$x_width++ if $x_current == $x_width;
## Step 8
my $colspan = 1;
my $attr_value = $current_cell->get_attribute_ns (undef, 'colspan');
if (defined $attr_value
and $attr_value =~ /^[\x09\x0A\x0C\x0D\x20]*([0-9]+)/) {
$colspan = $1 || 1;
}
## Step 9
my $rowspan = 1;
my $attr_value = $current_cell->get_attribute_ns (undef, 'rowspan');
if (defined $attr_value and
$attr_value =~ /^[\x09\x0A\x0C\x0D\x20]*([0-9]+)/) {
$rowspan = $1;
}
## Step 10
my $cell_grows_downward;
if ($rowspan == 0) {
$cell_grows_downward = 1;
$rowspan = 1;
}
## Step 11
if ($x_width < $x_current + $colspan) {
@column_generated_by[$_] = $current_cell
for $x_width .. $x_current + $colspan - 1;
$x_width = $x_current + $colspan;
}
## Step 12
if ($y_height < $y_current + $rowspan) {
@row_generated_by[$_] = $current_cell
for $y_height .. $y_current + $rowspan - 1;
$y_height = $y_current + $rowspan;
$y_max_node = $current_cell;
}
## Step 13
my $cell = {
is_header => ($current_cell->manakai_local_name eq 'th'),
element => $current_cell,
x => $x_current, y => $y_current,
width => $colspan, height => $rowspan,
};
$column_has_anchored_cell[$x_current] = 1;
$row_has_anchored_cell[$y_current] = 1;
for my $x ($x_current .. ($x_current + $colspan - 1)) {
for my $y ($y_current .. ($y_current + $rowspan - 1)) {
unless ($table->{cell}->[$x]->[$y]) {
$table->{cell}->[$x]->[$y] = [$cell];
} else {
$onerror->(type => 'cell overlapping',
text => "$x,$y",
node => $current_cell,
level => $levels->{must});
push @{$table->{cell}->[$x]->[$y]}, $cell;
}
}
}
## Whether the cell is an empty data cell or not
if (not $cell->{is_header}) {
$cell->{is_empty} = 1;
for my $node (@{$current_cell->child_nodes}) {
my $nt = $node->node_type;
if ($nt == 3 or $nt == 4) { # TEXT_NODE / CDATA_SECTION_NODE
if ($node->data =~ /\P{WhiteSpace}/) {
delete $cell->{is_empty};
last;
}
} elsif ($nt == 1) { # ELEMENT_NODE
delete $cell->{is_empty};
last;
}
}
## NOTE: Entity references are not supported
}
## Step 14
if ($cell_grows_downward) {
push @downward_growing_cells, [$cell, $x_current, $colspan];
}
## Step 15
$x_current += $colspan;
## Step 16-18
$current_cell = shift @tdth;
if (defined $current_cell) {
## Step 17-18
#
} else {
## Step 16
$y_current++;
last CELL;
}
} # CELL
}; # $process_row
$process_row_group = sub ($) {
## Step 1
my $y_start = $y_height;
## Step 2
for (grep {
$_->node_type == 1 and
defined $_->namespace_uri and
$_->namespace_uri eq q and
$_->manakai_local_name eq 'tr'
} @{$_[0]->child_nodes}) {
$process_row->($_);
}
## Step 3
if ($y_height > $y_start) {
my $rg = {element => $current_element, ## ISSUE: "element being processed"? Otherwise, $current_element may be a thead element while the element being processed is a tfoot element, for example.
x => 0, y => $y_start,
height => $y_height - $y_start};
$table->{row_group}->[$_] = $rg for $y_start .. $y_height - 1;
}
## Step 4
## Ending a row group
## Step 1
while ($y_current < $y_height) {
## Step 1
$growing_downward_growing_cells->();
## Step 2
$y_current++;
}
## Step 2
@downward_growing_cells = ();
}; # $process_row_group
## Step 12: rows
unshift @table_child, $current_element;
ROWS: {
NEXT_CHILD: {
$current_element = shift @table_child;
if (defined $current_element) {
redo NEXT_CHILD unless $current_element->node_type == 1;
my $nsuri = $current_element->namespace_uri;
redo NEXT_CHILD unless defined $nsuri and
$nsuri eq q;
$current_ln = $current_element->manakai_local_name;
redo NEXT_CHILD unless {
thead => 1,
tbody => 1,
tfoot => 1,
tr => 1,
}->{$current_ln};
} else {
## Step 6 2nd paragraph
$end->();
$table->{width} = $x_width;
$table->{height} = $y_height;
return $table;
}
} # NEXT_CHILD
## Step 13
if ($current_ln eq 'tr') {
$process_row->($current_element);
# advance (done at the first of ROWS)
redo ROWS;
}
## Step 14
## Ending a row group
## Step 1
while ($y_current < $y_height) {
## Step 1
$growing_downward_growing_cells->();
## Step 2
$y_current++;
}
## Step 2
@downward_growing_cells = ();
## Step 15
if ($current_ln eq 'tfoot') {
push @$pending_tfoot, $current_element;
# advance (done at the top of ROWS)
redo ROWS;
}
## Step 16
# thead or tbody
$process_row_group->($current_element);
## Step 17
# Advance (done at the top of ROWS).
## Step 18
redo ROWS;
} # ROWS
$end->();
$table->{width} = $x_width;
$table->{height} = $y_height;
return $table;
} # form_table
sub assign_header ($$;$$) {
my (undef, $table, $onerror, $levels) = @_;
$onerror ||= sub { };
$levels ||= {must => 'm'};
my $assign_header = sub ($$$) {
my $_cell = shift;
my ($x, $y) = @_;
for my $__cell (@{$_cell or []}) {
if ($__cell and $__cell->{element} and
not $__cell->{is_header} and
not $__cell->{element}->has_attribute_ns (undef, 'headers')) {
$__cell->{header}->{$x}->{$y} = 1;
}
}
}; # $assign_header
my @headers_cell;
my $id_to_cell = {};
## ISSUE: ID duplication, non-TH reference
for my $x (0 .. $table->{width} - 1) {
for my $y (0 .. $table->{height} - 1) {
my $cell = $table->{cell}->[$x]->[$y];
$cell = $cell->[0] if $cell; # anchored cell is always ->{cell}[][][0].
next if $cell->{x} != $x;
next if $cell->{y} != $y;
if ($cell) {
if ($cell->{is_header}) {
my $id = $cell->{element}->get_attribute_ns (undef, 'id');
if (defined $id and not $id_to_cell->{$id}) {
$id_to_cell->{$id} = $cell;
}
my $scope = $cell->{element}->get_attribute_ns (undef, 'scope');
$scope = $scope ? lc $scope : ''; ## TODO: case
if ($scope eq 'row') {
for my $_x ($x + $cell->{width} .. $table->{width} - 1) {
for my $_y ($y .. $y + $cell->{height} - 1) {
$assign_header->($table->{cell}->[$_x]->[$_y] => $x, $y);
}
}
} elsif ($scope eq 'col') {
for my $_x ($x .. $x + $cell->{width} - 1) {
for my $_y ($y .. $table->{height} - 1) {
$assign_header->($table->{cell}->[$_x]->[$_y] => $x, $y);
}
}
} elsif ($scope eq 'rowgroup') {
## NOTE: A cell cannot exceed across a row group boundary.
if ($table->{row_group}->[$y] and
$table->{row_group}->[$y]->{height}) {
for my $_x ($x .. $table->{width} - 1) {
for my $_y ($y ..
$table->{row_group}->[$y]->{y} +
$table->{row_group}->[$y]->{height} - 1) {
$assign_header->($table->{cell}->[$_x]->[$_y] => $x, $y);
}
}
}
## TODO: Should we raise a warning?
} elsif ($scope eq 'colgroup') {
if ($table->{column_group}->[$x] and
$table->{column_group}->{width} and
$table->{column_group}->[$x]->{x} == $x) { # anchored
for my $_x ($x ..
$table->{column_group}->[$x]->{x} +
$table->{column_group}->[$x]->{width} - 1) {
for my $_y ($y .. $table->{height} - 1) {
$assign_header->($table->{cell}->[$_x]->[$_y] => $x, $y);
}
}
}
## TODO: Warning?
} else { # auto
## 1.
my $header_width = $cell->{width};
W: for ($x + $cell->{width} .. $table->{width} - 1) {
my $_cell = $table->{cell}->[$_]->[$y];
for (@{$_cell or []}) {
if ($_->{element} and not $_->{is_empty}) {
last W; # not empty
}
}
$header_width++;
} # W
## 2.
my $_x = $x + $header_width;
## 3.
my $_y = $y + $cell->{height}; # $cell->{height} == header_{height}
## 4.
HORIZONTAL: {
last HORIZONTAL if $_x == $table->{width}; # goto Vertical
## 5. # goto Vertical
last HORIZONTAL
if $table->{cell}->[$_x]->[$y] and
$table->{cell}->[$_x]->[$y]->[0] and # anchored
$table->{cell}->[$_x]->[$y]->[0]->{is_header};
## 6.
for my $_y ($y .. $y + $cell->{height} - 1) {
$assign_header->($table->{cell}->[$_x]->[$_y] => $x, $y);
}
## 7.
$_x++;
## 8.
redo HORIZONTAL;
} # HORIZONTAL
## 9. Vertical
VERTICAL: {
last VERTICAL if $_y == $table->{height}; # goto END
## 10.
if ($table->{cell}->[$x]->[$_y]) {
my $h_cell = $table->{cell}->[$x]->[$_y]->[0]; # anchored cell
if ($h_cell and $h_cell->{is_header}) {
## 10.1.
my $width = $h_cell->{width};
W: for ($h_cell->{x} + $width .. $table->{width} - 1) {
my $_cell = $table->{cell}->[$_]->[$y];
for (@{$_cell or []}) {
if ($_->{element} and not $_->{is_empty}) {
last W; # not empty
}
}
$width++;
} # W
## 10.2. # goto end
last VERTICAL if $width == $header_width;
} # 10.
}
## 11.
for my $_x ($x .. $x + $header_width - 1) {
$assign_header->($table->{cell}->[$_x]->[$_y] => $x, $y);
}
## 12.
$_y++;
## 13. # goto vertical (wrong)
redo VERTICAL;
} # VERTICAL
## 14. End
# (we have already done)
}
} else { # data cell
if ($cell->{element} and
$cell->{element}->has_attribute_ns (undef, 'headers')) {
push @headers_cell, $cell;
}
}
}
}
}
for my $headers_cell (@headers_cell) {
my @headers = split /[\x09\x0A\x0C\x0D\x20]+/,
$headers_cell->{element}->get_attribute_ns (undef, 'headers');
my %headers;
for my $header_id (@headers) {
next unless length $header_id;
if ($headers{$header_id}) {
$onerror->(type => 'duplicate token', value => $header_id,
node => $headers_cell->{element}->get_attribute_node_ns
(undef, 'headers'),
level => $levels->{must});
next;
}
$headers{$header_id} = 1;
if ($id_to_cell->{$header_id}) {
my $header_cell = $id_to_cell->{$header_id};
$headers_cell->{header}->{$header_cell->{x}}->{$header_cell->{y}} = 1;
} else {
$onerror->(type => 'no referenced header cell', value => $header_id,
node => $headers_cell->{element}->get_attribute_node_ns
(undef, 'headers'),
level => $levels->{must});
}
}
}
## NOTE: The "tree order" constraints in the spec algorithm are irrelevant
## in fact.
## NOTE: We does not support ID attributes other than HTML "id" attribute.
} # assign_header
1;
## $Date: 2008/09/20 11:25:56 $