| 1 | wakaba | 1.1 | package Whatpm::HTMLTable; | 
| 2 |  |  | use strict; | 
| 3 |  |  |  | 
| 4 |  |  | ## An implementation of "Forming a table" algorithm in HTML5 | 
| 5 | wakaba | 1.9 | sub form_table ($$$;$) { | 
| 6 | wakaba | 1.15 | my (undef, $table_el, $onerror, $levels) = @_; | 
| 7 | wakaba | 1.2 | $onerror ||= sub { }; | 
| 8 | wakaba | 1.15 | $levels ||= {must => 'm'}; | 
| 9 | wakaba | 1.1 |  | 
| 10 |  |  | ## Step 1 | 
| 11 | wakaba | 1.7 | my $x_width = 0; | 
| 12 | wakaba | 1.1 |  | 
| 13 |  |  | ## Step 2 | 
| 14 | wakaba | 1.7 | my $y_height = 0; | 
| 15 | wakaba | 1.1 | my $y_max_node; | 
| 16 | wakaba | 1.8 |  | 
| 17 |  |  | ## Step 3 | 
| 18 |  |  | my $pending_tfoot = []; | 
| 19 | wakaba | 1.1 |  | 
| 20 | wakaba | 1.8 | ## Step 4 | 
| 21 | wakaba | 1.1 | my $table = { | 
| 22 |  |  | #caption | 
| 23 |  |  | column => [], | 
| 24 |  |  | column_group => [], | 
| 25 | wakaba | 1.9 | row => [], ## NOTE: HTML5 algorithm doesn't associate rows with <tr>s. | 
| 26 | wakaba | 1.1 | row_group => [], | 
| 27 |  |  | cell => [], | 
| 28 | wakaba | 1.11 | height => 0, | 
| 29 |  |  | width => 0, | 
| 30 | wakaba | 1.12 | element => $table_el, | 
| 31 | wakaba | 1.1 | }; | 
| 32 |  |  |  | 
| 33 | wakaba | 1.9 | my @column_has_anchored_cell; | 
| 34 |  |  | my @row_has_anchored_cell; | 
| 35 | wakaba | 1.1 | my @column_generated_by; | 
| 36 | wakaba | 1.9 | my @row_generated_by; | 
| 37 |  |  |  | 
| 38 |  |  | ## Step 5 | 
| 39 |  |  | my @table_child = @{$table_el->child_nodes}; | 
| 40 |  |  | return $table unless @table_child; | 
| 41 |  |  |  | 
| 42 | wakaba | 1.10 | ## Step 6 | 
| 43 |  |  | for (0..$#table_child) { | 
| 44 |  |  | my $el = $table_child[$_]; | 
| 45 |  |  | next unless $el->node_type == 1; # ELEMENT_NODE | 
| 46 |  |  | next unless $el->manakai_local_name eq 'caption'; | 
| 47 |  |  | my $nsuri = $el->namespace_uri; | 
| 48 |  |  | next unless defined $nsuri; | 
| 49 |  |  | next unless $nsuri eq q<http://www.w3.org/1999/xhtml>; | 
| 50 |  |  | $table->{caption} = {element => $el}; | 
| 51 |  |  | splice @table_child, $_, 1, (); | 
| 52 |  |  | last; | 
| 53 |  |  | } | 
| 54 |  |  |  | 
| 55 | wakaba | 1.9 | my $process_row_group; | 
| 56 |  |  | my $end = sub { | 
| 57 | wakaba | 1.10 | ## Step 19 (End) | 
| 58 | wakaba | 1.9 | for (@$pending_tfoot) { | 
| 59 |  |  | $process_row_group->($_); | 
| 60 |  |  | } | 
| 61 |  |  |  | 
| 62 | wakaba | 1.10 | ## Step 20 | 
| 63 | wakaba | 1.7 | for (0 .. $x_width - 1) { | 
| 64 | wakaba | 1.9 | unless ($column_has_anchored_cell[$_]) { | 
| 65 | wakaba | 1.1 | if ($table->{column}->[$_]) { | 
| 66 |  |  | $onerror->(type => 'column with no anchored cell', | 
| 67 | wakaba | 1.9 | node => $table->{column}->[$_]->{element}, | 
| 68 | wakaba | 1.15 | level => $levels->{must}); | 
| 69 | wakaba | 1.1 | } else { | 
| 70 | wakaba | 1.4 | $onerror->(type => 'colspan creates column with no anchored cell', | 
| 71 | wakaba | 1.9 | node => $column_generated_by[$_], | 
| 72 | wakaba | 1.15 | level => $levels->{must}); | 
| 73 | wakaba | 1.1 | } | 
| 74 | wakaba | 1.9 | last; # only one error. | 
| 75 | wakaba | 1.1 | } | 
| 76 |  |  | } | 
| 77 | wakaba | 1.9 | for (0 .. $y_height - 1) { | 
| 78 |  |  | unless ($row_has_anchored_cell[$_]) { | 
| 79 |  |  | if ($table->{row}->[$_]) { | 
| 80 |  |  | $onerror->(type => 'row with no anchored cell', | 
| 81 |  |  | node => $table->{row}->[$_]->{element}, | 
| 82 | wakaba | 1.15 | level => $levels->{must}); | 
| 83 | wakaba | 1.9 | } else { | 
| 84 |  |  | $onerror->(type => 'rowspan creates row with no anchored cell', | 
| 85 |  |  | node => $row_generated_by[$_], | 
| 86 | wakaba | 1.15 | level => $levels->{must}); | 
| 87 | wakaba | 1.9 | } | 
| 88 |  |  | last; # only one error. | 
| 89 |  |  | } | 
| 90 |  |  | } | 
| 91 |  |  |  | 
| 92 | wakaba | 1.10 | ## Step 21 | 
| 93 | wakaba | 1.9 | #return $table; | 
| 94 |  |  | }; # $end | 
| 95 | wakaba | 1.1 |  | 
| 96 | wakaba | 1.10 | ## Step 7, 8 | 
| 97 | wakaba | 1.1 | my $current_element; | 
| 98 |  |  | my $current_ln; | 
| 99 |  |  | NEXT_CHILD: { | 
| 100 |  |  | $current_element = shift @table_child; | 
| 101 |  |  | if (defined $current_element) { | 
| 102 |  |  | redo NEXT_CHILD unless $current_element->node_type == 1; | 
| 103 |  |  | my $nsuri = $current_element->namespace_uri; | 
| 104 |  |  | redo NEXT_CHILD unless defined $nsuri and | 
| 105 |  |  | $nsuri eq q<http://www.w3.org/1999/xhtml>; | 
| 106 |  |  | $current_ln = $current_element->manakai_local_name; | 
| 107 |  |  |  | 
| 108 |  |  | redo NEXT_CHILD unless { | 
| 109 |  |  | colgroup => 1, | 
| 110 |  |  | thead => 1, | 
| 111 |  |  | tbody => 1, | 
| 112 |  |  | tfoot => 1, | 
| 113 |  |  | tr => 1, | 
| 114 |  |  | }->{$current_ln}; | 
| 115 |  |  | } else { | 
| 116 | wakaba | 1.8 | ## Step 6 2nd paragraph | 
| 117 | wakaba | 1.9 | $end->(); | 
| 118 | wakaba | 1.11 | $table->{width} = $x_width; | 
| 119 |  |  | $table->{height} = $y_height; | 
| 120 | wakaba | 1.9 | return $table; | 
| 121 | wakaba | 1.1 | } | 
| 122 |  |  | } # NEXT_CHILD | 
| 123 |  |  |  | 
| 124 | wakaba | 1.10 | ## Step 9 | 
| 125 |  |  | while ($current_ln eq 'colgroup') { # Step 9, Step 9.4 | 
| 126 |  |  | ## Step 9.1: column groups | 
| 127 | wakaba | 1.1 | my @col = grep { | 
| 128 |  |  | $_->node_type == 1 and | 
| 129 |  |  | defined $_->namespace_uri and | 
| 130 |  |  | $_->namespace_uri eq q<http://www.w3.org/1999/xhtml> and | 
| 131 |  |  | $_->manakai_local_name eq 'col' | 
| 132 |  |  | } @{$current_element->child_nodes}; | 
| 133 |  |  | if (@col) { | 
| 134 |  |  | ## Step 1 | 
| 135 | wakaba | 1.7 | my $x_start = $x_width; | 
| 136 | wakaba | 1.1 |  | 
| 137 |  |  | ## Step 2, 6 | 
| 138 |  |  | while (@col) { | 
| 139 |  |  | my $current_column = shift @col; | 
| 140 |  |  |  | 
| 141 |  |  | ## Step 3: columns | 
| 142 |  |  | my $span = 1; | 
| 143 |  |  | my $col_span = $current_column->get_attribute_ns (undef, 'span'); | 
| 144 |  |  | ## Parse non-negative integer | 
| 145 | wakaba | 1.18 | if (defined $col_span and | 
| 146 |  |  | $col_span =~ /^[\x09\x0A\x0C\x0D\x20]*([0-9]+)/) { | 
| 147 | wakaba | 1.1 | $span = $1 || 1; | 
| 148 |  |  | } | 
| 149 |  |  |  | 
| 150 |  |  | ## Step 4, 5 | 
| 151 | wakaba | 1.11 | $table->{column}->[$x_width++] = {element => $current_column} | 
| 152 | wakaba | 1.7 | for 1..$span; | 
| 153 | wakaba | 1.1 | } | 
| 154 |  |  |  | 
| 155 |  |  | ## Step 7 | 
| 156 |  |  | my $cg = {element => $current_element, | 
| 157 | wakaba | 1.7 | x => $x_start, y => 0, | 
| 158 |  |  | width => $x_width - $x_start}; | 
| 159 |  |  | $table->{column_group}->[$_] = $cg for $x_start .. $x_width - 1; | 
| 160 | wakaba | 1.1 | } else { # no <col> children | 
| 161 |  |  | ## Step 1 | 
| 162 |  |  | my $span = 1; | 
| 163 |  |  | my $col_span = $current_element->get_attribute_ns (undef, 'span'); | 
| 164 |  |  | ## Parse non-negative integer | 
| 165 | wakaba | 1.18 | if (defined $col_span and | 
| 166 |  |  | $col_span =~ /^[\x09\x0A\x0C\x0D\x20]*([0-9]+)/) { | 
| 167 | wakaba | 1.1 | $span = $1 || 1; | 
| 168 |  |  | } | 
| 169 |  |  |  | 
| 170 |  |  | ## Step 2 | 
| 171 | wakaba | 1.7 | $x_width += $span; | 
| 172 | wakaba | 1.1 |  | 
| 173 |  |  | ## Step 3 | 
| 174 |  |  | my $cg = {element => $current_element, | 
| 175 | wakaba | 1.7 | x => $x_width - $span, y => 0, | 
| 176 | wakaba | 1.1 | width => $span}; | 
| 177 | wakaba | 1.7 | $table->{column_group}->[$_] = $cg for $cg->{x} .. $x_width - 1; | 
| 178 | wakaba | 1.1 | } | 
| 179 |  |  |  | 
| 180 | wakaba | 1.10 | ## Step 9.2, 9.3 | 
| 181 | wakaba | 1.1 | NEXT_CHILD: { | 
| 182 |  |  | $current_element = shift @table_child; | 
| 183 |  |  | if (defined $current_element) { | 
| 184 |  |  | redo NEXT_CHILD unless $current_element->node_type == 1; | 
| 185 |  |  | my $nsuri = $current_element->namespace_uri; | 
| 186 |  |  | redo NEXT_CHILD unless defined $nsuri and | 
| 187 |  |  | $nsuri eq q<http://www.w3.org/1999/xhtml>; | 
| 188 |  |  | $current_ln = $current_element->manakai_local_name; | 
| 189 |  |  |  | 
| 190 |  |  | redo NEXT_CHILD unless { | 
| 191 |  |  | colgroup => 1, | 
| 192 |  |  | thead => 1, | 
| 193 |  |  | tbody => 1, | 
| 194 |  |  | tfoot => 1, | 
| 195 |  |  | tr => 1, | 
| 196 |  |  | }->{$current_ln}; | 
| 197 |  |  | } else { | 
| 198 |  |  | ## End of subsection | 
| 199 | wakaba | 1.9 |  | 
| 200 | wakaba | 1.1 | ## Step 5 of overall steps 2nd paragraph | 
| 201 | wakaba | 1.9 | $end->(); | 
| 202 | wakaba | 1.11 | $table->{width} = $x_width; | 
| 203 |  |  | $table->{height} = $y_height; | 
| 204 | wakaba | 1.9 | return $table; | 
| 205 | wakaba | 1.1 | } | 
| 206 |  |  | } # NEXT_CHILD | 
| 207 |  |  | } | 
| 208 |  |  |  | 
| 209 | wakaba | 1.10 | ## Step 10 | 
| 210 | wakaba | 1.1 | my $y_current = 0; | 
| 211 |  |  |  | 
| 212 | wakaba | 1.10 | ## Step 11 | 
| 213 | wakaba | 1.1 | my @downward_growing_cells; | 
| 214 |  |  |  | 
| 215 |  |  | my $growing_downward_growing_cells = sub { | 
| 216 |  |  | for (@downward_growing_cells) { | 
| 217 |  |  | for my $x ($_->[1] .. ($_->[1] + $_->[2] - 1)) { | 
| 218 |  |  | $table->{cell}->[$x]->[$y_current] = [$_->[0]]; | 
| 219 |  |  | $_->[0]->{height}++; | 
| 220 |  |  | } | 
| 221 |  |  | } | 
| 222 |  |  | }; # $growing_downward_growing_cells | 
| 223 |  |  |  | 
| 224 |  |  | my $process_row = sub { | 
| 225 |  |  | ## Step 1 | 
| 226 | wakaba | 1.7 | $y_height++ if $y_height == $y_current; | 
| 227 | wakaba | 1.1 |  | 
| 228 |  |  | ## Step 2 | 
| 229 | wakaba | 1.7 | my $x_current = 0; | 
| 230 |  |  |  | 
| 231 | wakaba | 1.17 | ## Step 5 | 
| 232 | wakaba | 1.1 | my $tr = shift; | 
| 233 | wakaba | 1.9 | $table->{row}->[$y_current] = {element => $tr}; | 
| 234 | wakaba | 1.1 | my @tdth = grep { | 
| 235 |  |  | $_->node_type == 1 and | 
| 236 |  |  | defined $_->namespace_uri and | 
| 237 |  |  | $_->namespace_uri eq q<http://www.w3.org/1999/xhtml> and | 
| 238 |  |  | {td => 1, th => 1}->{$_->manakai_local_name} | 
| 239 |  |  | } @{$tr->child_nodes}; | 
| 240 | wakaba | 1.7 | my $current_cell = shift @tdth; | 
| 241 |  |  |  | 
| 242 | wakaba | 1.17 | ## Step 3 | 
| 243 | wakaba | 1.7 | $growing_downward_growing_cells->(); | 
| 244 | wakaba | 1.1 |  | 
| 245 | wakaba | 1.17 | ## Step 4 | 
| 246 |  |  | return unless $current_cell; | 
| 247 | wakaba | 1.9 |  | 
| 248 | wakaba | 1.7 | CELL: while (1) { | 
| 249 | wakaba | 1.17 | ## Step 6: cells | 
| 250 | wakaba | 1.1 | $x_current++ | 
| 251 | wakaba | 1.7 | while ($x_current < $x_width and | 
| 252 | wakaba | 1.1 | $table->{cell}->[$x_current]->[$y_current]); | 
| 253 |  |  |  | 
| 254 | wakaba | 1.17 | ## Step 7 | 
| 255 | wakaba | 1.7 | $x_width++ if $x_current == $x_width; | 
| 256 |  |  |  | 
| 257 | wakaba | 1.17 | ## Step 8 | 
| 258 | wakaba | 1.1 | my $colspan = 1; | 
| 259 |  |  | my $attr_value = $current_cell->get_attribute_ns (undef, 'colspan'); | 
| 260 | wakaba | 1.18 | if (defined $attr_value | 
| 261 |  |  | and $attr_value =~ /^[\x09\x0A\x0C\x0D\x20]*([0-9]+)/) { | 
| 262 | wakaba | 1.1 | $colspan = $1 || 1; | 
| 263 |  |  | } | 
| 264 |  |  |  | 
| 265 | wakaba | 1.17 | ## Step 9 | 
| 266 | wakaba | 1.1 | my $rowspan = 1; | 
| 267 |  |  | my $attr_value = $current_cell->get_attribute_ns (undef, 'rowspan'); | 
| 268 | wakaba | 1.18 | if (defined $attr_value and | 
| 269 |  |  | $attr_value =~ /^[\x09\x0A\x0C\x0D\x20]*([0-9]+)/) { | 
| 270 | wakaba | 1.1 | $rowspan = $1; | 
| 271 |  |  | } | 
| 272 |  |  |  | 
| 273 | wakaba | 1.17 | ## Step 10 | 
| 274 | wakaba | 1.1 | my $cell_grows_downward; | 
| 275 |  |  | if ($rowspan == 0) { | 
| 276 |  |  | $cell_grows_downward = 1; | 
| 277 |  |  | $rowspan = 1; | 
| 278 |  |  | } | 
| 279 |  |  |  | 
| 280 | wakaba | 1.17 | ## Step 11 | 
| 281 | wakaba | 1.7 | if ($x_width < $x_current + $colspan) { | 
| 282 | wakaba | 1.1 | @column_generated_by[$_] = $current_cell | 
| 283 | wakaba | 1.7 | for $x_width .. $x_current + $colspan - 1; | 
| 284 |  |  | $x_width = $x_current + $colspan; | 
| 285 | wakaba | 1.1 | } | 
| 286 |  |  |  | 
| 287 | wakaba | 1.17 | ## Step 12 | 
| 288 | wakaba | 1.7 | if ($y_height < $y_current + $rowspan) { | 
| 289 | wakaba | 1.9 | @row_generated_by[$_] = $current_cell | 
| 290 |  |  | for $y_height .. $y_current + $rowspan - 1; | 
| 291 | wakaba | 1.7 | $y_height = $y_current + $rowspan; | 
| 292 | wakaba | 1.1 | $y_max_node = $current_cell; | 
| 293 |  |  | } | 
| 294 |  |  |  | 
| 295 | wakaba | 1.17 | ## Step 13 | 
| 296 | wakaba | 1.1 | my $cell = { | 
| 297 | wakaba | 1.2 | is_header => ($current_cell->manakai_local_name eq 'th'), | 
| 298 | wakaba | 1.1 | element => $current_cell, | 
| 299 |  |  | x => $x_current, y => $y_current, | 
| 300 |  |  | width => $colspan, height => $rowspan, | 
| 301 |  |  | }; | 
| 302 | wakaba | 1.9 | $column_has_anchored_cell[$x_current] = 1; | 
| 303 |  |  | $row_has_anchored_cell[$y_current] = 1; | 
| 304 | wakaba | 1.1 | for my $x ($x_current .. ($x_current + $colspan - 1)) { | 
| 305 |  |  | for my $y ($y_current .. ($y_current + $rowspan - 1)) { | 
| 306 |  |  | unless ($table->{cell}->[$x]->[$y]) { | 
| 307 |  |  | $table->{cell}->[$x]->[$y] = [$cell]; | 
| 308 |  |  | } else { | 
| 309 | wakaba | 1.15 | $onerror->(type => 'cell overlapping', | 
| 310 |  |  | text => "$x,$y", | 
| 311 |  |  | node => $current_cell, | 
| 312 |  |  | level => $levels->{must}); | 
| 313 | wakaba | 1.1 | push @{$table->{cell}->[$x]->[$y]}, $cell; | 
| 314 |  |  | } | 
| 315 |  |  | } | 
| 316 |  |  | } | 
| 317 | wakaba | 1.11 |  | 
| 318 |  |  | ## Whether the cell is an empty data cell or not | 
| 319 |  |  | if (not $cell->{is_header}) { | 
| 320 |  |  | $cell->{is_empty} = 1; | 
| 321 |  |  | for my $node (@{$current_cell->child_nodes}) { | 
| 322 |  |  | my $nt = $node->node_type; | 
| 323 |  |  | if ($nt == 3 or $nt == 4) { # TEXT_NODE / CDATA_SECTION_NODE | 
| 324 | wakaba | 1.16 | if ($node->data =~ /\P{WhiteSpace}/) { | 
| 325 | wakaba | 1.11 | delete $cell->{is_empty}; | 
| 326 |  |  | last; | 
| 327 |  |  | } | 
| 328 |  |  | } elsif ($nt == 1) { # ELEMENT_NODE | 
| 329 |  |  | delete $cell->{is_empty}; | 
| 330 |  |  | last; | 
| 331 |  |  | } | 
| 332 |  |  | } | 
| 333 |  |  | ## NOTE: Entity references are not supported | 
| 334 |  |  | } | 
| 335 | wakaba | 1.1 |  | 
| 336 | wakaba | 1.17 | ## Step 14 | 
| 337 | wakaba | 1.1 | if ($cell_grows_downward) { | 
| 338 |  |  | push @downward_growing_cells, [$cell, $x_current, $colspan]; | 
| 339 |  |  | } | 
| 340 |  |  |  | 
| 341 | wakaba | 1.17 | ## Step 15 | 
| 342 | wakaba | 1.1 | $x_current += $colspan; | 
| 343 | wakaba | 1.7 |  | 
| 344 | wakaba | 1.17 | ## Step 16-18 | 
| 345 | wakaba | 1.7 | $current_cell = shift @tdth; | 
| 346 |  |  | if (defined $current_cell) { | 
| 347 | wakaba | 1.17 | ## Step 17-18 | 
| 348 | wakaba | 1.7 | # | 
| 349 |  |  | } else { | 
| 350 | wakaba | 1.17 | ## Step 16 | 
| 351 | wakaba | 1.7 | $y_current++; | 
| 352 |  |  | last CELL; | 
| 353 |  |  | } | 
| 354 |  |  | } # CELL | 
| 355 | wakaba | 1.1 | }; # $process_row | 
| 356 |  |  |  | 
| 357 | wakaba | 1.9 | $process_row_group = sub ($) { | 
| 358 | wakaba | 1.8 | ## Step 1 | 
| 359 |  |  | my $y_start = $y_height; | 
| 360 |  |  |  | 
| 361 |  |  | ## Step 2 | 
| 362 |  |  | for (grep { | 
| 363 |  |  | $_->node_type == 1 and | 
| 364 |  |  | defined $_->namespace_uri and | 
| 365 |  |  | $_->namespace_uri eq q<http://www.w3.org/1999/xhtml> and | 
| 366 |  |  | $_->manakai_local_name eq 'tr' | 
| 367 |  |  | } @{$_[0]->child_nodes}) { | 
| 368 |  |  | $process_row->($_); | 
| 369 |  |  | } | 
| 370 |  |  |  | 
| 371 |  |  | ## Step 3 | 
| 372 |  |  | if ($y_height > $y_start) { | 
| 373 | wakaba | 1.14 | my $rg = {element => $current_element, ## ISSUE: "element being processed"?  Otherwise, $current_element may be a thead element while the element being processed is a tfoot element, for example. | 
| 374 | wakaba | 1.8 | x => 0, y => $y_start, | 
| 375 |  |  | height => $y_height - $y_start}; | 
| 376 |  |  | $table->{row_group}->[$_] = $rg for $y_start .. $y_height - 1; | 
| 377 |  |  | } | 
| 378 |  |  |  | 
| 379 |  |  | ## Step 4 | 
| 380 |  |  | ## Ending a row group | 
| 381 |  |  | ## Step 1 | 
| 382 |  |  | while ($y_current < $y_height) { | 
| 383 |  |  | ## Step 1 | 
| 384 |  |  | $growing_downward_growing_cells->(); | 
| 385 |  |  |  | 
| 386 |  |  | ## Step 2 | 
| 387 |  |  | $y_current++; | 
| 388 |  |  | } | 
| 389 | wakaba | 1.9 | ## Step 2 | 
| 390 | wakaba | 1.8 | @downward_growing_cells = (); | 
| 391 |  |  | }; # $process_row_group | 
| 392 |  |  |  | 
| 393 | wakaba | 1.10 | ## Step 12: rows | 
| 394 | wakaba | 1.1 | unshift @table_child, $current_element; | 
| 395 |  |  | ROWS: { | 
| 396 |  |  | NEXT_CHILD: { | 
| 397 |  |  | $current_element = shift @table_child; | 
| 398 |  |  | if (defined $current_element) { | 
| 399 |  |  | redo NEXT_CHILD unless $current_element->node_type == 1; | 
| 400 |  |  | my $nsuri = $current_element->namespace_uri; | 
| 401 |  |  | redo NEXT_CHILD unless defined $nsuri and | 
| 402 |  |  | $nsuri eq q<http://www.w3.org/1999/xhtml>; | 
| 403 |  |  | $current_ln = $current_element->manakai_local_name; | 
| 404 |  |  |  | 
| 405 |  |  | redo NEXT_CHILD unless { | 
| 406 |  |  | thead => 1, | 
| 407 |  |  | tbody => 1, | 
| 408 |  |  | tfoot => 1, | 
| 409 |  |  | tr => 1, | 
| 410 |  |  | }->{$current_ln}; | 
| 411 |  |  | } else { | 
| 412 | wakaba | 1.8 | ## Step 6 2nd paragraph | 
| 413 | wakaba | 1.9 | $end->(); | 
| 414 | wakaba | 1.11 | $table->{width} = $x_width; | 
| 415 |  |  | $table->{height} = $y_height; | 
| 416 | wakaba | 1.9 | return $table; | 
| 417 | wakaba | 1.1 | } | 
| 418 |  |  | } # NEXT_CHILD | 
| 419 |  |  |  | 
| 420 | wakaba | 1.10 | ## Step 13 | 
| 421 | wakaba | 1.1 | if ($current_ln eq 'tr') { | 
| 422 |  |  | $process_row->($current_element); | 
| 423 | wakaba | 1.8 | # advance (done at the first of ROWS) | 
| 424 | wakaba | 1.1 | redo ROWS; | 
| 425 |  |  | } | 
| 426 |  |  |  | 
| 427 | wakaba | 1.10 | ## Step 14 | 
| 428 | wakaba | 1.1 | ## Ending a row group | 
| 429 |  |  | ## Step 1 | 
| 430 | wakaba | 1.7 | while ($y_current < $y_height) { | 
| 431 | wakaba | 1.1 | ## Step 1 | 
| 432 | wakaba | 1.9 | $growing_downward_growing_cells->(); | 
| 433 |  |  |  | 
| 434 |  |  | ## Step 2 | 
| 435 | wakaba | 1.1 | $y_current++; | 
| 436 |  |  | } | 
| 437 | wakaba | 1.9 | ## Step 2 | 
| 438 | wakaba | 1.1 | @downward_growing_cells = (); | 
| 439 |  |  |  | 
| 440 | wakaba | 1.10 | ## Step 15 | 
| 441 | wakaba | 1.8 | if ($current_ln eq 'tfoot') { | 
| 442 |  |  | push @$pending_tfoot, $current_element; | 
| 443 |  |  | # advance (done at the top of ROWS) | 
| 444 |  |  | redo ROWS; | 
| 445 | wakaba | 1.1 | } | 
| 446 |  |  |  | 
| 447 | wakaba | 1.10 | ## Step 16 | 
| 448 | wakaba | 1.8 | # thead or tbody | 
| 449 |  |  | $process_row_group->($current_element); | 
| 450 | wakaba | 1.1 |  | 
| 451 | wakaba | 1.10 | ## Step 17 | 
| 452 | wakaba | 1.8 | # Advance (done at the top of ROWS). | 
| 453 | wakaba | 1.1 |  | 
| 454 | wakaba | 1.10 | ## Step 18 | 
| 455 | wakaba | 1.8 | redo ROWS; | 
| 456 | wakaba | 1.1 | } # ROWS | 
| 457 | wakaba | 1.8 |  | 
| 458 | wakaba | 1.9 | $end->(); | 
| 459 | wakaba | 1.11 | $table->{width} = $x_width; | 
| 460 |  |  | $table->{height} = $y_height; | 
| 461 | wakaba | 1.8 | return $table; | 
| 462 | wakaba | 1.1 | } # form_table | 
| 463 |  |  |  | 
| 464 | wakaba | 1.11 | sub assign_header ($$;$$) { | 
| 465 | wakaba | 1.15 | my (undef, $table, $onerror, $levels) = @_; | 
| 466 | wakaba | 1.11 | $onerror ||= sub { }; | 
| 467 | wakaba | 1.15 | $levels ||= {must => 'm'}; | 
| 468 | wakaba | 1.11 |  | 
| 469 |  |  | my $assign_header = sub ($$$) { | 
| 470 |  |  | my $_cell = shift; | 
| 471 |  |  | my ($x, $y) = @_; | 
| 472 |  |  |  | 
| 473 |  |  | for my $__cell (@{$_cell or []}) { | 
| 474 |  |  | if ($__cell and $__cell->{element} and | 
| 475 |  |  | not $__cell->{is_header} and | 
| 476 |  |  | not $__cell->{element}->has_attribute_ns (undef, 'headers')) { | 
| 477 |  |  | $__cell->{header}->{$x}->{$y} = 1; | 
| 478 |  |  | } | 
| 479 |  |  | } | 
| 480 |  |  | }; # $assign_header | 
| 481 |  |  |  | 
| 482 | wakaba | 1.12 | my @headers_cell; | 
| 483 |  |  | my $id_to_cell = {}; | 
| 484 |  |  | ## ISSUE: ID duplication, non-TH reference | 
| 485 |  |  |  | 
| 486 | wakaba | 1.11 | for my $x (0 .. $table->{width} - 1) { | 
| 487 |  |  | for my $y (0 .. $table->{height} - 1) { | 
| 488 |  |  | my $cell = $table->{cell}->[$x]->[$y]; | 
| 489 |  |  | $cell = $cell->[0] if $cell; # anchored cell is always ->{cell}[][][0]. | 
| 490 |  |  | next if $cell->{x} != $x; | 
| 491 |  |  | next if $cell->{y} != $y; | 
| 492 |  |  | if ($cell) { | 
| 493 |  |  | if ($cell->{is_header}) { | 
| 494 | wakaba | 1.12 | my $id = $cell->{element}->get_attribute_ns (undef, 'id'); | 
| 495 |  |  | if (defined $id and not $id_to_cell->{$id}) { | 
| 496 |  |  | $id_to_cell->{$id} = $cell; | 
| 497 |  |  | } | 
| 498 |  |  |  | 
| 499 | wakaba | 1.11 | my $scope = $cell->{element}->get_attribute_ns (undef, 'scope'); | 
| 500 |  |  | $scope = $scope ? lc $scope : ''; ## TODO: case | 
| 501 |  |  | if ($scope eq 'row') { | 
| 502 |  |  | for my $_x ($x + $cell->{width} .. $table->{width} - 1) { | 
| 503 |  |  | for my $_y ($y .. $y + $cell->{height} - 1) { | 
| 504 |  |  | $assign_header->($table->{cell}->[$_x]->[$_y] => $x, $y); | 
| 505 |  |  | } | 
| 506 |  |  | } | 
| 507 |  |  | } elsif ($scope eq 'col') { | 
| 508 |  |  | for my $_x ($x .. $x + $cell->{width} - 1) { | 
| 509 |  |  | for my $_y ($y .. $table->{height} - 1) { | 
| 510 |  |  | $assign_header->($table->{cell}->[$_x]->[$_y] => $x, $y); | 
| 511 |  |  | } | 
| 512 |  |  | } | 
| 513 |  |  | } elsif ($scope eq 'rowgroup') { | 
| 514 |  |  | ## NOTE: A cell cannot exceed across a row group boundary. | 
| 515 |  |  | if ($table->{row_group}->[$y] and | 
| 516 |  |  | $table->{row_group}->[$y]->{height}) { | 
| 517 |  |  | for my $_x ($x .. $table->{width} - 1) { | 
| 518 |  |  | for my $_y ($y .. | 
| 519 |  |  | $table->{row_group}->[$y]->{y} + | 
| 520 |  |  | $table->{row_group}->[$y]->{height} - 1) { | 
| 521 |  |  | $assign_header->($table->{cell}->[$_x]->[$_y] => $x, $y); | 
| 522 |  |  | } | 
| 523 |  |  | } | 
| 524 |  |  | } | 
| 525 |  |  | ## TODO: Should we raise a warning? | 
| 526 |  |  | } elsif ($scope eq 'colgroup') { | 
| 527 |  |  | if ($table->{column_group}->[$x] and | 
| 528 |  |  | $table->{column_group}->{width} and | 
| 529 |  |  | $table->{column_group}->[$x]->{x} == $x) { # anchored | 
| 530 |  |  | for my $_x ($x .. | 
| 531 |  |  | $table->{column_group}->[$x]->{x} + | 
| 532 |  |  | $table->{column_group}->[$x]->{width} - 1) { | 
| 533 |  |  | for my $_y ($y .. $table->{height} - 1) { | 
| 534 |  |  | $assign_header->($table->{cell}->[$_x]->[$_y] => $x, $y); | 
| 535 |  |  | } | 
| 536 |  |  | } | 
| 537 |  |  | } | 
| 538 |  |  | ## TODO: Warning? | 
| 539 |  |  | } else { # auto | 
| 540 |  |  | ## 1. | 
| 541 |  |  | my $header_width = $cell->{width}; | 
| 542 |  |  | W: for ($x + $cell->{width} .. $table->{width} - 1) { | 
| 543 |  |  | my $_cell = $table->{cell}->[$_]->[$y]; | 
| 544 |  |  | for (@{$_cell or []}) { | 
| 545 |  |  | if ($_->{element} and not $_->{is_empty}) { | 
| 546 |  |  | last W; # not empty | 
| 547 |  |  | } | 
| 548 |  |  | } | 
| 549 |  |  | $header_width++; | 
| 550 |  |  | } # W | 
| 551 |  |  |  | 
| 552 |  |  | ## 2. | 
| 553 |  |  | my $_x = $x + $header_width; | 
| 554 |  |  |  | 
| 555 |  |  | ## 3. | 
| 556 | wakaba | 1.17 | my $_y = $y + $cell->{height}; # $cell->{height} == header_{height} | 
| 557 |  |  |  | 
| 558 |  |  | ## 4. | 
| 559 | wakaba | 1.11 | HORIZONTAL: { | 
| 560 |  |  | last HORIZONTAL if $_x == $table->{width}; # goto Vertical | 
| 561 |  |  |  | 
| 562 | wakaba | 1.17 | ## 5. # goto Vertical | 
| 563 | wakaba | 1.11 | last HORIZONTAL | 
| 564 |  |  | if $table->{cell}->[$_x]->[$y] and | 
| 565 |  |  | $table->{cell}->[$_x]->[$y]->[0] and # anchored | 
| 566 |  |  | $table->{cell}->[$_x]->[$y]->[0]->{is_header}; | 
| 567 |  |  |  | 
| 568 | wakaba | 1.17 | ## 6. | 
| 569 | wakaba | 1.11 | for my $_y ($y .. $y + $cell->{height} - 1) { | 
| 570 |  |  | $assign_header->($table->{cell}->[$_x]->[$_y] => $x, $y); | 
| 571 |  |  | } | 
| 572 |  |  |  | 
| 573 | wakaba | 1.17 | ## 7. | 
| 574 | wakaba | 1.11 | $_x++; | 
| 575 |  |  |  | 
| 576 | wakaba | 1.17 | ## 8. | 
| 577 | wakaba | 1.11 | redo HORIZONTAL; | 
| 578 |  |  | } # HORIZONTAL | 
| 579 |  |  |  | 
| 580 | wakaba | 1.17 | ## 9. Vertical | 
| 581 | wakaba | 1.11 | VERTICAL: { | 
| 582 | wakaba | 1.17 | last VERTICAL if $_y == $table->{height}; # goto END | 
| 583 | wakaba | 1.11 |  | 
| 584 |  |  | ## 10. | 
| 585 |  |  | if ($table->{cell}->[$x]->[$_y]) { | 
| 586 |  |  | my $h_cell = $table->{cell}->[$x]->[$_y]->[0]; # anchored cell | 
| 587 |  |  | if ($h_cell and $h_cell->{is_header}) { | 
| 588 |  |  | ## 10.1. | 
| 589 |  |  | my $width = $h_cell->{width}; | 
| 590 |  |  | W: for ($h_cell->{x} + $width .. $table->{width} - 1) { | 
| 591 |  |  | my $_cell = $table->{cell}->[$_]->[$y]; | 
| 592 |  |  | for (@{$_cell or []}) { | 
| 593 |  |  | if ($_->{element} and not $_->{is_empty}) { | 
| 594 |  |  | last W; # not empty | 
| 595 |  |  | } | 
| 596 |  |  | } | 
| 597 |  |  | $width++; | 
| 598 |  |  | } # W | 
| 599 |  |  |  | 
| 600 |  |  | ## 10.2. # goto end | 
| 601 |  |  | last VERTICAL if $width == $header_width; | 
| 602 |  |  | } # 10. | 
| 603 |  |  | } | 
| 604 |  |  |  | 
| 605 |  |  | ## 11. | 
| 606 |  |  | for my $_x ($x .. $x + $header_width - 1) { | 
| 607 |  |  | $assign_header->($table->{cell}->[$_x]->[$_y] => $x, $y); | 
| 608 |  |  | } | 
| 609 |  |  |  | 
| 610 |  |  | ## 12. | 
| 611 |  |  | $_y++; | 
| 612 |  |  |  | 
| 613 |  |  | ## 13. # goto vertical (wrong) | 
| 614 |  |  | redo VERTICAL; | 
| 615 |  |  | } # VERTICAL | 
| 616 |  |  |  | 
| 617 |  |  | ## 14. End | 
| 618 |  |  | # (we have already done) | 
| 619 |  |  | } | 
| 620 |  |  | } else { # data cell | 
| 621 | wakaba | 1.12 | if ($cell->{element} and | 
| 622 |  |  | $cell->{element}->has_attribute_ns (undef, 'headers')) { | 
| 623 |  |  | push @headers_cell, $cell; | 
| 624 |  |  | } | 
| 625 | wakaba | 1.11 | } | 
| 626 |  |  | } | 
| 627 |  |  | } | 
| 628 |  |  | } | 
| 629 |  |  |  | 
| 630 | wakaba | 1.12 | for my $headers_cell (@headers_cell) { | 
| 631 | wakaba | 1.18 | my @headers = split /[\x09\x0A\x0C\x0D\x20]+/, | 
| 632 | wakaba | 1.12 | $headers_cell->{element}->get_attribute_ns (undef, 'headers'); | 
| 633 |  |  | my %headers; | 
| 634 |  |  | for my $header_id (@headers) { | 
| 635 |  |  | next unless length $header_id; | 
| 636 |  |  | if ($headers{$header_id}) { | 
| 637 |  |  | $onerror->(type => 'duplicate token', value => $header_id, | 
| 638 |  |  | node => $headers_cell->{element}->get_attribute_node_ns | 
| 639 |  |  | (undef, 'headers'), | 
| 640 | wakaba | 1.15 | level => $levels->{must}); | 
| 641 | wakaba | 1.12 | next; | 
| 642 |  |  | } | 
| 643 |  |  | $headers{$header_id} = 1; | 
| 644 |  |  |  | 
| 645 |  |  | if ($id_to_cell->{$header_id}) { | 
| 646 |  |  | my $header_cell = $id_to_cell->{$header_id}; | 
| 647 |  |  | $headers_cell->{header}->{$header_cell->{x}}->{$header_cell->{y}} = 1; | 
| 648 |  |  | } else { | 
| 649 | wakaba | 1.15 | $onerror->(type => 'no referenced header cell', value => $header_id, | 
| 650 | wakaba | 1.12 | node => $headers_cell->{element}->get_attribute_node_ns | 
| 651 |  |  | (undef, 'headers'), | 
| 652 | wakaba | 1.15 | level => $levels->{must}); | 
| 653 | wakaba | 1.12 | } | 
| 654 |  |  | } | 
| 655 |  |  | } | 
| 656 | wakaba | 1.11 |  | 
| 657 |  |  | ## NOTE: The "tree order" constraints in the spec algorithm are irrelevant | 
| 658 |  |  | ## in fact. | 
| 659 | wakaba | 1.12 |  | 
| 660 |  |  | ## NOTE: We does not support ID attributes other than HTML "id" attribute. | 
| 661 | wakaba | 1.11 | } # assign_header | 
| 662 | wakaba | 1.1 |  | 
| 663 |  |  | 1; | 
| 664 | wakaba | 1.18 | ## $Date: 2008/08/30 15:14:32 $ |