/[suikacvs]/markup/html/whatpm/Whatpm/HTMLTable.pm
Suika

Contents of /markup/html/whatpm/Whatpm/HTMLTable.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.18 - (show annotations) (download)
Sat Sep 20 11:25:56 2008 UTC (16 years, 1 month ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.17: +10 -6 lines
++ whatpm/t/ChangeLog	20 Sep 2008 11:22:21 -0000
	* content-model-1.dat, content-model-2.dat, content-model-4.dat:
	Test data for U+000B are added (cf. HTML5 revision 1738).

2008-09-20  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	20 Sep 2008 11:21:11 -0000
	* HTML.pm.src: Reminding places where U+000B is allowed as a space
	character is fixed (cf. HTML5 revision 1738).

	* ContentChecker.pm, HTMLTable.pm: U+000B is no longer part of
	space characters (HTML5 revision 1738).

2008-09-20  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ContentChecker/ChangeLog	20 Sep 2008 11:18:32 -0000
	* HTML.pm: U+000B is no longer part of space characters (HTML5
	revision 1738).

2008-09-20  Wakaba  <wakaba@suika.fam.cx>

1 package Whatpm::HTMLTable;
2 use strict;
3
4 ## An implementation of "Forming a table" algorithm in HTML5
5 sub form_table ($$$;$) {
6 my (undef, $table_el, $onerror, $levels) = @_;
7 $onerror ||= sub { };
8 $levels ||= {must => 'm'};
9
10 ## Step 1
11 my $x_width = 0;
12
13 ## Step 2
14 my $y_height = 0;
15 my $y_max_node;
16
17 ## Step 3
18 my $pending_tfoot = [];
19
20 ## Step 4
21 my $table = {
22 #caption
23 column => [],
24 column_group => [],
25 row => [], ## NOTE: HTML5 algorithm doesn't associate rows with <tr>s.
26 row_group => [],
27 cell => [],
28 height => 0,
29 width => 0,
30 element => $table_el,
31 };
32
33 my @column_has_anchored_cell;
34 my @row_has_anchored_cell;
35 my @column_generated_by;
36 my @row_generated_by;
37
38 ## Step 5
39 my @table_child = @{$table_el->child_nodes};
40 return $table unless @table_child;
41
42 ## Step 6
43 for (0..$#table_child) {
44 my $el = $table_child[$_];
45 next unless $el->node_type == 1; # ELEMENT_NODE
46 next unless $el->manakai_local_name eq 'caption';
47 my $nsuri = $el->namespace_uri;
48 next unless defined $nsuri;
49 next unless $nsuri eq q<http://www.w3.org/1999/xhtml>;
50 $table->{caption} = {element => $el};
51 splice @table_child, $_, 1, ();
52 last;
53 }
54
55 my $process_row_group;
56 my $end = sub {
57 ## Step 19 (End)
58 for (@$pending_tfoot) {
59 $process_row_group->($_);
60 }
61
62 ## Step 20
63 for (0 .. $x_width - 1) {
64 unless ($column_has_anchored_cell[$_]) {
65 if ($table->{column}->[$_]) {
66 $onerror->(type => 'column with no anchored cell',
67 node => $table->{column}->[$_]->{element},
68 level => $levels->{must});
69 } else {
70 $onerror->(type => 'colspan creates column with no anchored cell',
71 node => $column_generated_by[$_],
72 level => $levels->{must});
73 }
74 last; # only one error.
75 }
76 }
77 for (0 .. $y_height - 1) {
78 unless ($row_has_anchored_cell[$_]) {
79 if ($table->{row}->[$_]) {
80 $onerror->(type => 'row with no anchored cell',
81 node => $table->{row}->[$_]->{element},
82 level => $levels->{must});
83 } else {
84 $onerror->(type => 'rowspan creates row with no anchored cell',
85 node => $row_generated_by[$_],
86 level => $levels->{must});
87 }
88 last; # only one error.
89 }
90 }
91
92 ## Step 21
93 #return $table;
94 }; # $end
95
96 ## Step 7, 8
97 my $current_element;
98 my $current_ln;
99 NEXT_CHILD: {
100 $current_element = shift @table_child;
101 if (defined $current_element) {
102 redo NEXT_CHILD unless $current_element->node_type == 1;
103 my $nsuri = $current_element->namespace_uri;
104 redo NEXT_CHILD unless defined $nsuri and
105 $nsuri eq q<http://www.w3.org/1999/xhtml>;
106 $current_ln = $current_element->manakai_local_name;
107
108 redo NEXT_CHILD unless {
109 colgroup => 1,
110 thead => 1,
111 tbody => 1,
112 tfoot => 1,
113 tr => 1,
114 }->{$current_ln};
115 } else {
116 ## Step 6 2nd paragraph
117 $end->();
118 $table->{width} = $x_width;
119 $table->{height} = $y_height;
120 return $table;
121 }
122 } # NEXT_CHILD
123
124 ## Step 9
125 while ($current_ln eq 'colgroup') { # Step 9, Step 9.4
126 ## Step 9.1: column groups
127 my @col = grep {
128 $_->node_type == 1 and
129 defined $_->namespace_uri and
130 $_->namespace_uri eq q<http://www.w3.org/1999/xhtml> and
131 $_->manakai_local_name eq 'col'
132 } @{$current_element->child_nodes};
133 if (@col) {
134 ## Step 1
135 my $x_start = $x_width;
136
137 ## Step 2, 6
138 while (@col) {
139 my $current_column = shift @col;
140
141 ## Step 3: columns
142 my $span = 1;
143 my $col_span = $current_column->get_attribute_ns (undef, 'span');
144 ## Parse non-negative integer
145 if (defined $col_span and
146 $col_span =~ /^[\x09\x0A\x0C\x0D\x20]*([0-9]+)/) {
147 $span = $1 || 1;
148 }
149
150 ## Step 4, 5
151 $table->{column}->[$x_width++] = {element => $current_column}
152 for 1..$span;
153 }
154
155 ## Step 7
156 my $cg = {element => $current_element,
157 x => $x_start, y => 0,
158 width => $x_width - $x_start};
159 $table->{column_group}->[$_] = $cg for $x_start .. $x_width - 1;
160 } else { # no <col> children
161 ## Step 1
162 my $span = 1;
163 my $col_span = $current_element->get_attribute_ns (undef, 'span');
164 ## Parse non-negative integer
165 if (defined $col_span and
166 $col_span =~ /^[\x09\x0A\x0C\x0D\x20]*([0-9]+)/) {
167 $span = $1 || 1;
168 }
169
170 ## Step 2
171 $x_width += $span;
172
173 ## Step 3
174 my $cg = {element => $current_element,
175 x => $x_width - $span, y => 0,
176 width => $span};
177 $table->{column_group}->[$_] = $cg for $cg->{x} .. $x_width - 1;
178 }
179
180 ## Step 9.2, 9.3
181 NEXT_CHILD: {
182 $current_element = shift @table_child;
183 if (defined $current_element) {
184 redo NEXT_CHILD unless $current_element->node_type == 1;
185 my $nsuri = $current_element->namespace_uri;
186 redo NEXT_CHILD unless defined $nsuri and
187 $nsuri eq q<http://www.w3.org/1999/xhtml>;
188 $current_ln = $current_element->manakai_local_name;
189
190 redo NEXT_CHILD unless {
191 colgroup => 1,
192 thead => 1,
193 tbody => 1,
194 tfoot => 1,
195 tr => 1,
196 }->{$current_ln};
197 } else {
198 ## End of subsection
199
200 ## Step 5 of overall steps 2nd paragraph
201 $end->();
202 $table->{width} = $x_width;
203 $table->{height} = $y_height;
204 return $table;
205 }
206 } # NEXT_CHILD
207 }
208
209 ## Step 10
210 my $y_current = 0;
211
212 ## Step 11
213 my @downward_growing_cells;
214
215 my $growing_downward_growing_cells = sub {
216 for (@downward_growing_cells) {
217 for my $x ($_->[1] .. ($_->[1] + $_->[2] - 1)) {
218 $table->{cell}->[$x]->[$y_current] = [$_->[0]];
219 $_->[0]->{height}++;
220 }
221 }
222 }; # $growing_downward_growing_cells
223
224 my $process_row = sub {
225 ## Step 1
226 $y_height++ if $y_height == $y_current;
227
228 ## Step 2
229 my $x_current = 0;
230
231 ## Step 5
232 my $tr = shift;
233 $table->{row}->[$y_current] = {element => $tr};
234 my @tdth = grep {
235 $_->node_type == 1 and
236 defined $_->namespace_uri and
237 $_->namespace_uri eq q<http://www.w3.org/1999/xhtml> and
238 {td => 1, th => 1}->{$_->manakai_local_name}
239 } @{$tr->child_nodes};
240 my $current_cell = shift @tdth;
241
242 ## Step 3
243 $growing_downward_growing_cells->();
244
245 ## Step 4
246 return unless $current_cell;
247
248 CELL: while (1) {
249 ## Step 6: cells
250 $x_current++
251 while ($x_current < $x_width and
252 $table->{cell}->[$x_current]->[$y_current]);
253
254 ## Step 7
255 $x_width++ if $x_current == $x_width;
256
257 ## Step 8
258 my $colspan = 1;
259 my $attr_value = $current_cell->get_attribute_ns (undef, 'colspan');
260 if (defined $attr_value
261 and $attr_value =~ /^[\x09\x0A\x0C\x0D\x20]*([0-9]+)/) {
262 $colspan = $1 || 1;
263 }
264
265 ## Step 9
266 my $rowspan = 1;
267 my $attr_value = $current_cell->get_attribute_ns (undef, 'rowspan');
268 if (defined $attr_value and
269 $attr_value =~ /^[\x09\x0A\x0C\x0D\x20]*([0-9]+)/) {
270 $rowspan = $1;
271 }
272
273 ## Step 10
274 my $cell_grows_downward;
275 if ($rowspan == 0) {
276 $cell_grows_downward = 1;
277 $rowspan = 1;
278 }
279
280 ## Step 11
281 if ($x_width < $x_current + $colspan) {
282 @column_generated_by[$_] = $current_cell
283 for $x_width .. $x_current + $colspan - 1;
284 $x_width = $x_current + $colspan;
285 }
286
287 ## Step 12
288 if ($y_height < $y_current + $rowspan) {
289 @row_generated_by[$_] = $current_cell
290 for $y_height .. $y_current + $rowspan - 1;
291 $y_height = $y_current + $rowspan;
292 $y_max_node = $current_cell;
293 }
294
295 ## Step 13
296 my $cell = {
297 is_header => ($current_cell->manakai_local_name eq 'th'),
298 element => $current_cell,
299 x => $x_current, y => $y_current,
300 width => $colspan, height => $rowspan,
301 };
302 $column_has_anchored_cell[$x_current] = 1;
303 $row_has_anchored_cell[$y_current] = 1;
304 for my $x ($x_current .. ($x_current + $colspan - 1)) {
305 for my $y ($y_current .. ($y_current + $rowspan - 1)) {
306 unless ($table->{cell}->[$x]->[$y]) {
307 $table->{cell}->[$x]->[$y] = [$cell];
308 } else {
309 $onerror->(type => 'cell overlapping',
310 text => "$x,$y",
311 node => $current_cell,
312 level => $levels->{must});
313 push @{$table->{cell}->[$x]->[$y]}, $cell;
314 }
315 }
316 }
317
318 ## Whether the cell is an empty data cell or not
319 if (not $cell->{is_header}) {
320 $cell->{is_empty} = 1;
321 for my $node (@{$current_cell->child_nodes}) {
322 my $nt = $node->node_type;
323 if ($nt == 3 or $nt == 4) { # TEXT_NODE / CDATA_SECTION_NODE
324 if ($node->data =~ /\P{WhiteSpace}/) {
325 delete $cell->{is_empty};
326 last;
327 }
328 } elsif ($nt == 1) { # ELEMENT_NODE
329 delete $cell->{is_empty};
330 last;
331 }
332 }
333 ## NOTE: Entity references are not supported
334 }
335
336 ## Step 14
337 if ($cell_grows_downward) {
338 push @downward_growing_cells, [$cell, $x_current, $colspan];
339 }
340
341 ## Step 15
342 $x_current += $colspan;
343
344 ## Step 16-18
345 $current_cell = shift @tdth;
346 if (defined $current_cell) {
347 ## Step 17-18
348 #
349 } else {
350 ## Step 16
351 $y_current++;
352 last CELL;
353 }
354 } # CELL
355 }; # $process_row
356
357 $process_row_group = sub ($) {
358 ## Step 1
359 my $y_start = $y_height;
360
361 ## Step 2
362 for (grep {
363 $_->node_type == 1 and
364 defined $_->namespace_uri and
365 $_->namespace_uri eq q<http://www.w3.org/1999/xhtml> and
366 $_->manakai_local_name eq 'tr'
367 } @{$_[0]->child_nodes}) {
368 $process_row->($_);
369 }
370
371 ## Step 3
372 if ($y_height > $y_start) {
373 my $rg = {element => $current_element, ## ISSUE: "element being processed"? Otherwise, $current_element may be a thead element while the element being processed is a tfoot element, for example.
374 x => 0, y => $y_start,
375 height => $y_height - $y_start};
376 $table->{row_group}->[$_] = $rg for $y_start .. $y_height - 1;
377 }
378
379 ## Step 4
380 ## Ending a row group
381 ## Step 1
382 while ($y_current < $y_height) {
383 ## Step 1
384 $growing_downward_growing_cells->();
385
386 ## Step 2
387 $y_current++;
388 }
389 ## Step 2
390 @downward_growing_cells = ();
391 }; # $process_row_group
392
393 ## Step 12: rows
394 unshift @table_child, $current_element;
395 ROWS: {
396 NEXT_CHILD: {
397 $current_element = shift @table_child;
398 if (defined $current_element) {
399 redo NEXT_CHILD unless $current_element->node_type == 1;
400 my $nsuri = $current_element->namespace_uri;
401 redo NEXT_CHILD unless defined $nsuri and
402 $nsuri eq q<http://www.w3.org/1999/xhtml>;
403 $current_ln = $current_element->manakai_local_name;
404
405 redo NEXT_CHILD unless {
406 thead => 1,
407 tbody => 1,
408 tfoot => 1,
409 tr => 1,
410 }->{$current_ln};
411 } else {
412 ## Step 6 2nd paragraph
413 $end->();
414 $table->{width} = $x_width;
415 $table->{height} = $y_height;
416 return $table;
417 }
418 } # NEXT_CHILD
419
420 ## Step 13
421 if ($current_ln eq 'tr') {
422 $process_row->($current_element);
423 # advance (done at the first of ROWS)
424 redo ROWS;
425 }
426
427 ## Step 14
428 ## Ending a row group
429 ## Step 1
430 while ($y_current < $y_height) {
431 ## Step 1
432 $growing_downward_growing_cells->();
433
434 ## Step 2
435 $y_current++;
436 }
437 ## Step 2
438 @downward_growing_cells = ();
439
440 ## Step 15
441 if ($current_ln eq 'tfoot') {
442 push @$pending_tfoot, $current_element;
443 # advance (done at the top of ROWS)
444 redo ROWS;
445 }
446
447 ## Step 16
448 # thead or tbody
449 $process_row_group->($current_element);
450
451 ## Step 17
452 # Advance (done at the top of ROWS).
453
454 ## Step 18
455 redo ROWS;
456 } # ROWS
457
458 $end->();
459 $table->{width} = $x_width;
460 $table->{height} = $y_height;
461 return $table;
462 } # form_table
463
464 sub assign_header ($$;$$) {
465 my (undef, $table, $onerror, $levels) = @_;
466 $onerror ||= sub { };
467 $levels ||= {must => 'm'};
468
469 my $assign_header = sub ($$$) {
470 my $_cell = shift;
471 my ($x, $y) = @_;
472
473 for my $__cell (@{$_cell or []}) {
474 if ($__cell and $__cell->{element} and
475 not $__cell->{is_header} and
476 not $__cell->{element}->has_attribute_ns (undef, 'headers')) {
477 $__cell->{header}->{$x}->{$y} = 1;
478 }
479 }
480 }; # $assign_header
481
482 my @headers_cell;
483 my $id_to_cell = {};
484 ## ISSUE: ID duplication, non-TH reference
485
486 for my $x (0 .. $table->{width} - 1) {
487 for my $y (0 .. $table->{height} - 1) {
488 my $cell = $table->{cell}->[$x]->[$y];
489 $cell = $cell->[0] if $cell; # anchored cell is always ->{cell}[][][0].
490 next if $cell->{x} != $x;
491 next if $cell->{y} != $y;
492 if ($cell) {
493 if ($cell->{is_header}) {
494 my $id = $cell->{element}->get_attribute_ns (undef, 'id');
495 if (defined $id and not $id_to_cell->{$id}) {
496 $id_to_cell->{$id} = $cell;
497 }
498
499 my $scope = $cell->{element}->get_attribute_ns (undef, 'scope');
500 $scope = $scope ? lc $scope : ''; ## TODO: case
501 if ($scope eq 'row') {
502 for my $_x ($x + $cell->{width} .. $table->{width} - 1) {
503 for my $_y ($y .. $y + $cell->{height} - 1) {
504 $assign_header->($table->{cell}->[$_x]->[$_y] => $x, $y);
505 }
506 }
507 } elsif ($scope eq 'col') {
508 for my $_x ($x .. $x + $cell->{width} - 1) {
509 for my $_y ($y .. $table->{height} - 1) {
510 $assign_header->($table->{cell}->[$_x]->[$_y] => $x, $y);
511 }
512 }
513 } elsif ($scope eq 'rowgroup') {
514 ## NOTE: A cell cannot exceed across a row group boundary.
515 if ($table->{row_group}->[$y] and
516 $table->{row_group}->[$y]->{height}) {
517 for my $_x ($x .. $table->{width} - 1) {
518 for my $_y ($y ..
519 $table->{row_group}->[$y]->{y} +
520 $table->{row_group}->[$y]->{height} - 1) {
521 $assign_header->($table->{cell}->[$_x]->[$_y] => $x, $y);
522 }
523 }
524 }
525 ## TODO: Should we raise a warning?
526 } elsif ($scope eq 'colgroup') {
527 if ($table->{column_group}->[$x] and
528 $table->{column_group}->{width} and
529 $table->{column_group}->[$x]->{x} == $x) { # anchored
530 for my $_x ($x ..
531 $table->{column_group}->[$x]->{x} +
532 $table->{column_group}->[$x]->{width} - 1) {
533 for my $_y ($y .. $table->{height} - 1) {
534 $assign_header->($table->{cell}->[$_x]->[$_y] => $x, $y);
535 }
536 }
537 }
538 ## TODO: Warning?
539 } else { # auto
540 ## 1.
541 my $header_width = $cell->{width};
542 W: for ($x + $cell->{width} .. $table->{width} - 1) {
543 my $_cell = $table->{cell}->[$_]->[$y];
544 for (@{$_cell or []}) {
545 if ($_->{element} and not $_->{is_empty}) {
546 last W; # not empty
547 }
548 }
549 $header_width++;
550 } # W
551
552 ## 2.
553 my $_x = $x + $header_width;
554
555 ## 3.
556 my $_y = $y + $cell->{height}; # $cell->{height} == header_{height}
557
558 ## 4.
559 HORIZONTAL: {
560 last HORIZONTAL if $_x == $table->{width}; # goto Vertical
561
562 ## 5. # goto Vertical
563 last HORIZONTAL
564 if $table->{cell}->[$_x]->[$y] and
565 $table->{cell}->[$_x]->[$y]->[0] and # anchored
566 $table->{cell}->[$_x]->[$y]->[0]->{is_header};
567
568 ## 6.
569 for my $_y ($y .. $y + $cell->{height} - 1) {
570 $assign_header->($table->{cell}->[$_x]->[$_y] => $x, $y);
571 }
572
573 ## 7.
574 $_x++;
575
576 ## 8.
577 redo HORIZONTAL;
578 } # HORIZONTAL
579
580 ## 9. Vertical
581 VERTICAL: {
582 last VERTICAL if $_y == $table->{height}; # goto END
583
584 ## 10.
585 if ($table->{cell}->[$x]->[$_y]) {
586 my $h_cell = $table->{cell}->[$x]->[$_y]->[0]; # anchored cell
587 if ($h_cell and $h_cell->{is_header}) {
588 ## 10.1.
589 my $width = $h_cell->{width};
590 W: for ($h_cell->{x} + $width .. $table->{width} - 1) {
591 my $_cell = $table->{cell}->[$_]->[$y];
592 for (@{$_cell or []}) {
593 if ($_->{element} and not $_->{is_empty}) {
594 last W; # not empty
595 }
596 }
597 $width++;
598 } # W
599
600 ## 10.2. # goto end
601 last VERTICAL if $width == $header_width;
602 } # 10.
603 }
604
605 ## 11.
606 for my $_x ($x .. $x + $header_width - 1) {
607 $assign_header->($table->{cell}->[$_x]->[$_y] => $x, $y);
608 }
609
610 ## 12.
611 $_y++;
612
613 ## 13. # goto vertical (wrong)
614 redo VERTICAL;
615 } # VERTICAL
616
617 ## 14. End
618 # (we have already done)
619 }
620 } else { # data cell
621 if ($cell->{element} and
622 $cell->{element}->has_attribute_ns (undef, 'headers')) {
623 push @headers_cell, $cell;
624 }
625 }
626 }
627 }
628 }
629
630 for my $headers_cell (@headers_cell) {
631 my @headers = split /[\x09\x0A\x0C\x0D\x20]+/,
632 $headers_cell->{element}->get_attribute_ns (undef, 'headers');
633 my %headers;
634 for my $header_id (@headers) {
635 next unless length $header_id;
636 if ($headers{$header_id}) {
637 $onerror->(type => 'duplicate token', value => $header_id,
638 node => $headers_cell->{element}->get_attribute_node_ns
639 (undef, 'headers'),
640 level => $levels->{must});
641 next;
642 }
643 $headers{$header_id} = 1;
644
645 if ($id_to_cell->{$header_id}) {
646 my $header_cell = $id_to_cell->{$header_id};
647 $headers_cell->{header}->{$header_cell->{x}}->{$header_cell->{y}} = 1;
648 } else {
649 $onerror->(type => 'no referenced header cell', value => $header_id,
650 node => $headers_cell->{element}->get_attribute_node_ns
651 (undef, 'headers'),
652 level => $levels->{must});
653 }
654 }
655 }
656
657 ## NOTE: The "tree order" constraints in the spec algorithm are irrelevant
658 ## in fact.
659
660 ## NOTE: We does not support ID attributes other than HTML "id" attribute.
661 } # assign_header
662
663 1;
664 ## $Date: 2008/08/30 15:14:32 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24