/[suikacvs]/markup/html/whatpm/Whatpm/HTMLTable.pm
Suika

Contents of /markup/html/whatpm/Whatpm/HTMLTable.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.16 - (show annotations) (download)
Sat Aug 30 14:37:46 2008 UTC (16 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.15: +2 -2 lines
++ whatpm/t/ChangeLog	30 Aug 2008 14:25:03 -0000
	* content-model-4.dat: Some test data for <time> are added (c.f.
	HTML5 revision 2094).

2008-08-30  Wakaba  <wakaba@suika.fam.cx>

	* ContentType.t: Support for image/svg+xml (HTML revision 2096).

2008-08-30  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	30 Aug 2008 14:23:04 -0000
	* HTMLTable.pm: Zs is not what we want; we want White_Space! (HTML5
	revision 2094).

2008-08-30  Wakaba  <wakaba@suika.fam.cx>

	* ContentType.pm: Support for image/svg+xml (HTML5 revision 2096).

2008-08-30  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ContentChecker/ChangeLog	30 Aug 2008 14:35:43 -0000
	* HTML.pm: Use White_Space instead of Zs for date or time
	string in content (HTML5 revision 2094).  Make "YYYY-MM-DDHH:MM" (that
	misses a white space or "T" literal between day and hour)
	not raise two errors.

2008-08-30  Wakaba  <wakaba@suika.fam.cx>

1 package Whatpm::HTMLTable;
2 use strict;
3
4 ## An implementation of "Forming a table" algorithm in HTML5
5 sub form_table ($$$;$) {
6 my (undef, $table_el, $onerror, $levels) = @_;
7 $onerror ||= sub { };
8 $levels ||= {must => 'm'};
9
10 ## Step 1
11 my $x_width = 0;
12
13 ## Step 2
14 my $y_height = 0;
15 my $y_max_node;
16
17 ## Step 3
18 my $pending_tfoot = [];
19
20 ## Step 4
21 my $table = {
22 #caption
23 column => [],
24 column_group => [],
25 row => [], ## NOTE: HTML5 algorithm doesn't associate rows with <tr>s.
26 row_group => [],
27 cell => [],
28 height => 0,
29 width => 0,
30 element => $table_el,
31 };
32
33 my @column_has_anchored_cell;
34 my @row_has_anchored_cell;
35 my @column_generated_by;
36 my @row_generated_by;
37
38 ## Step 5
39 my @table_child = @{$table_el->child_nodes};
40 return $table unless @table_child;
41
42 ## Step 6
43 for (0..$#table_child) {
44 my $el = $table_child[$_];
45 next unless $el->node_type == 1; # ELEMENT_NODE
46 next unless $el->manakai_local_name eq 'caption';
47 my $nsuri = $el->namespace_uri;
48 next unless defined $nsuri;
49 next unless $nsuri eq q<http://www.w3.org/1999/xhtml>;
50 $table->{caption} = {element => $el};
51 splice @table_child, $_, 1, ();
52 last;
53 }
54
55 my $process_row_group;
56 my $end = sub {
57 ## Step 19 (End)
58 for (@$pending_tfoot) {
59 $process_row_group->($_);
60 }
61
62 ## Step 20
63 for (0 .. $x_width - 1) {
64 unless ($column_has_anchored_cell[$_]) {
65 if ($table->{column}->[$_]) {
66 $onerror->(type => 'column with no anchored cell',
67 node => $table->{column}->[$_]->{element},
68 level => $levels->{must});
69 } else {
70 $onerror->(type => 'colspan creates column with no anchored cell',
71 node => $column_generated_by[$_],
72 level => $levels->{must});
73 }
74 last; # only one error.
75 }
76 }
77 for (0 .. $y_height - 1) {
78 unless ($row_has_anchored_cell[$_]) {
79 if ($table->{row}->[$_]) {
80 $onerror->(type => 'row with no anchored cell',
81 node => $table->{row}->[$_]->{element},
82 level => $levels->{must});
83 } else {
84 $onerror->(type => 'rowspan creates row with no anchored cell',
85 node => $row_generated_by[$_],
86 level => $levels->{must});
87 }
88 last; # only one error.
89 }
90 }
91
92 ## Step 21
93 #return $table;
94 }; # $end
95
96 ## Step 7, 8
97 my $current_element;
98 my $current_ln;
99 NEXT_CHILD: {
100 $current_element = shift @table_child;
101 if (defined $current_element) {
102 redo NEXT_CHILD unless $current_element->node_type == 1;
103 my $nsuri = $current_element->namespace_uri;
104 redo NEXT_CHILD unless defined $nsuri and
105 $nsuri eq q<http://www.w3.org/1999/xhtml>;
106 $current_ln = $current_element->manakai_local_name;
107
108 redo NEXT_CHILD unless {
109 colgroup => 1,
110 thead => 1,
111 tbody => 1,
112 tfoot => 1,
113 tr => 1,
114 }->{$current_ln};
115 } else {
116 ## Step 6 2nd paragraph
117 $end->();
118 $table->{width} = $x_width;
119 $table->{height} = $y_height;
120 return $table;
121 }
122 } # NEXT_CHILD
123
124 ## Step 9
125 while ($current_ln eq 'colgroup') { # Step 9, Step 9.4
126 ## Step 9.1: column groups
127 my @col = grep {
128 $_->node_type == 1 and
129 defined $_->namespace_uri and
130 $_->namespace_uri eq q<http://www.w3.org/1999/xhtml> and
131 $_->manakai_local_name eq 'col'
132 } @{$current_element->child_nodes};
133 if (@col) {
134 ## Step 1
135 my $x_start = $x_width;
136
137 ## Step 2, 6
138 while (@col) {
139 my $current_column = shift @col;
140
141 ## Step 3: columns
142 my $span = 1;
143 my $col_span = $current_column->get_attribute_ns (undef, 'span');
144 ## Parse non-negative integer
145 if (defined $col_span and $col_span =~ /^[\x09-\x0D\x20]*([0-9]+)/) {
146 $span = $1 || 1;
147 }
148
149 ## Step 4, 5
150 $table->{column}->[$x_width++] = {element => $current_column}
151 for 1..$span;
152 }
153
154 ## Step 7
155 my $cg = {element => $current_element,
156 x => $x_start, y => 0,
157 width => $x_width - $x_start};
158 $table->{column_group}->[$_] = $cg for $x_start .. $x_width - 1;
159 } else { # no <col> children
160 ## Step 1
161 my $span = 1;
162 my $col_span = $current_element->get_attribute_ns (undef, 'span');
163 ## Parse non-negative integer
164 if (defined $col_span and $col_span =~ /^[\x09-\x0D\x20]*([0-9]+)/) {
165 $span = $1 || 1;
166 }
167
168 ## Step 2
169 $x_width += $span;
170
171 ## Step 3
172 my $cg = {element => $current_element,
173 x => $x_width - $span, y => 0,
174 width => $span};
175 $table->{column_group}->[$_] = $cg for $cg->{x} .. $x_width - 1;
176 }
177
178 ## Step 9.2, 9.3
179 NEXT_CHILD: {
180 $current_element = shift @table_child;
181 if (defined $current_element) {
182 redo NEXT_CHILD unless $current_element->node_type == 1;
183 my $nsuri = $current_element->namespace_uri;
184 redo NEXT_CHILD unless defined $nsuri and
185 $nsuri eq q<http://www.w3.org/1999/xhtml>;
186 $current_ln = $current_element->manakai_local_name;
187
188 redo NEXT_CHILD unless {
189 colgroup => 1,
190 thead => 1,
191 tbody => 1,
192 tfoot => 1,
193 tr => 1,
194 }->{$current_ln};
195 } else {
196 ## End of subsection
197
198 ## Step 5 of overall steps 2nd paragraph
199 $end->();
200 $table->{width} = $x_width;
201 $table->{height} = $y_height;
202 return $table;
203 }
204 } # NEXT_CHILD
205 }
206
207 ## Step 10
208 my $y_current = 0;
209
210 ## Step 11
211 my @downward_growing_cells;
212
213 my $growing_downward_growing_cells = sub {
214 for (@downward_growing_cells) {
215 for my $x ($_->[1] .. ($_->[1] + $_->[2] - 1)) {
216 $table->{cell}->[$x]->[$y_current] = [$_->[0]];
217 $_->[0]->{height}++;
218 }
219 }
220 }; # $growing_downward_growing_cells
221
222 my $process_row = sub {
223 ## Step 1
224 $y_height++ if $y_height == $y_current;
225
226 ## Step 2
227 my $x_current = 0;
228
229 ## Step 3
230 my $tr = shift;
231 $table->{row}->[$y_current] = {element => $tr};
232 my @tdth = grep {
233 $_->node_type == 1 and
234 defined $_->namespace_uri and
235 $_->namespace_uri eq q<http://www.w3.org/1999/xhtml> and
236 {td => 1, th => 1}->{$_->manakai_local_name}
237 } @{$tr->child_nodes};
238 my $current_cell = shift @tdth;
239
240 ## Step 4
241 $growing_downward_growing_cells->();
242
243 return unless $current_cell;
244 ## ISSUE: Support for empty <tr></tr> (removed at revision 1376).
245
246 CELL: while (1) {
247 ## Step 5: cells
248 $x_current++
249 while ($x_current < $x_width and
250 $table->{cell}->[$x_current]->[$y_current]);
251
252 ## Step 6
253 $x_width++ if $x_current == $x_width;
254
255 ## Step 7
256 my $colspan = 1;
257 my $attr_value = $current_cell->get_attribute_ns (undef, 'colspan');
258 if (defined $attr_value and $attr_value =~ /^[\x09-\x0D\x20]*([0-9]+)/) {
259 $colspan = $1 || 1;
260 }
261
262 ## Step 8
263 my $rowspan = 1;
264 my $attr_value = $current_cell->get_attribute_ns (undef, 'rowspan');
265 if (defined $attr_value and $attr_value =~ /^[\x09-\x0D\x20]*([0-9]+)/) {
266 $rowspan = $1;
267 }
268
269 ## Step 9
270 my $cell_grows_downward;
271 if ($rowspan == 0) {
272 $cell_grows_downward = 1;
273 $rowspan = 1;
274 }
275
276 ## Step 10
277 if ($x_width < $x_current + $colspan) {
278 @column_generated_by[$_] = $current_cell
279 for $x_width .. $x_current + $colspan - 1;
280 $x_width = $x_current + $colspan;
281 }
282
283 ## Step 11
284 if ($y_height < $y_current + $rowspan) {
285 @row_generated_by[$_] = $current_cell
286 for $y_height .. $y_current + $rowspan - 1;
287 $y_height = $y_current + $rowspan;
288 $y_max_node = $current_cell;
289 }
290
291 ## Step 12
292 my $cell = {
293 is_header => ($current_cell->manakai_local_name eq 'th'),
294 element => $current_cell,
295 x => $x_current, y => $y_current,
296 width => $colspan, height => $rowspan,
297 };
298 $column_has_anchored_cell[$x_current] = 1;
299 $row_has_anchored_cell[$y_current] = 1;
300 for my $x ($x_current .. ($x_current + $colspan - 1)) {
301 for my $y ($y_current .. ($y_current + $rowspan - 1)) {
302 unless ($table->{cell}->[$x]->[$y]) {
303 $table->{cell}->[$x]->[$y] = [$cell];
304 } else {
305 $onerror->(type => 'cell overlapping',
306 text => "$x,$y",
307 node => $current_cell,
308 level => $levels->{must});
309 push @{$table->{cell}->[$x]->[$y]}, $cell;
310 }
311 }
312 }
313
314 ## Whether the cell is an empty data cell or not
315 if (not $cell->{is_header}) {
316 $cell->{is_empty} = 1;
317 for my $node (@{$current_cell->child_nodes}) {
318 my $nt = $node->node_type;
319 if ($nt == 3 or $nt == 4) { # TEXT_NODE / CDATA_SECTION_NODE
320 if ($node->data =~ /\P{WhiteSpace}/) {
321 delete $cell->{is_empty};
322 last;
323 }
324 } elsif ($nt == 1) { # ELEMENT_NODE
325 delete $cell->{is_empty};
326 last;
327 }
328 }
329 ## NOTE: Entity references are not supported
330 }
331
332 ## Step 13
333 if ($cell_grows_downward) {
334 push @downward_growing_cells, [$cell, $x_current, $colspan];
335 }
336
337 ## Step 14
338 $x_current += $colspan;
339
340 ## Step 15-17
341 $current_cell = shift @tdth;
342 if (defined $current_cell) {
343 ## Step 16-17
344 #
345 } else {
346 ## Step 15
347 $y_current++;
348 last CELL;
349 }
350 } # CELL
351 }; # $process_row
352
353 $process_row_group = sub ($) {
354 ## Step 1
355 my $y_start = $y_height;
356
357 ## Step 2
358 for (grep {
359 $_->node_type == 1 and
360 defined $_->namespace_uri and
361 $_->namespace_uri eq q<http://www.w3.org/1999/xhtml> and
362 $_->manakai_local_name eq 'tr'
363 } @{$_[0]->child_nodes}) {
364 $process_row->($_);
365 }
366
367 ## Step 3
368 if ($y_height > $y_start) {
369 my $rg = {element => $current_element, ## ISSUE: "element being processed"? Otherwise, $current_element may be a thead element while the element being processed is a tfoot element, for example.
370 x => 0, y => $y_start,
371 height => $y_height - $y_start};
372 $table->{row_group}->[$_] = $rg for $y_start .. $y_height - 1;
373 }
374
375 ## Step 4
376 ## Ending a row group
377 ## Step 1
378 while ($y_current < $y_height) {
379 ## Step 1
380 $growing_downward_growing_cells->();
381
382 ## Step 2
383 $y_current++;
384 }
385 ## Step 2
386 @downward_growing_cells = ();
387 }; # $process_row_group
388
389 ## Step 12: rows
390 unshift @table_child, $current_element;
391 ROWS: {
392 NEXT_CHILD: {
393 $current_element = shift @table_child;
394 if (defined $current_element) {
395 redo NEXT_CHILD unless $current_element->node_type == 1;
396 my $nsuri = $current_element->namespace_uri;
397 redo NEXT_CHILD unless defined $nsuri and
398 $nsuri eq q<http://www.w3.org/1999/xhtml>;
399 $current_ln = $current_element->manakai_local_name;
400
401 redo NEXT_CHILD unless {
402 thead => 1,
403 tbody => 1,
404 tfoot => 1,
405 tr => 1,
406 }->{$current_ln};
407 } else {
408 ## Step 6 2nd paragraph
409 $end->();
410 $table->{width} = $x_width;
411 $table->{height} = $y_height;
412 return $table;
413 }
414 } # NEXT_CHILD
415
416 ## Step 13
417 if ($current_ln eq 'tr') {
418 $process_row->($current_element);
419 # advance (done at the first of ROWS)
420 redo ROWS;
421 }
422
423 ## Step 14
424 ## Ending a row group
425 ## Step 1
426 while ($y_current < $y_height) {
427 ## Step 1
428 $growing_downward_growing_cells->();
429
430 ## Step 2
431 $y_current++;
432 }
433 ## Step 2
434 @downward_growing_cells = ();
435
436 ## Step 15
437 if ($current_ln eq 'tfoot') {
438 push @$pending_tfoot, $current_element;
439 # advance (done at the top of ROWS)
440 redo ROWS;
441 }
442
443 ## Step 16
444 # thead or tbody
445 $process_row_group->($current_element);
446
447 ## Step 17
448 # Advance (done at the top of ROWS).
449
450 ## Step 18
451 redo ROWS;
452 } # ROWS
453
454 $end->();
455 $table->{width} = $x_width;
456 $table->{height} = $y_height;
457 return $table;
458 } # form_table
459
460 sub assign_header ($$;$$) {
461 my (undef, $table, $onerror, $levels) = @_;
462 $onerror ||= sub { };
463 $levels ||= {must => 'm'};
464
465 my $assign_header = sub ($$$) {
466 my $_cell = shift;
467 my ($x, $y) = @_;
468
469 for my $__cell (@{$_cell or []}) {
470 if ($__cell and $__cell->{element} and
471 not $__cell->{is_header} and
472 not $__cell->{element}->has_attribute_ns (undef, 'headers')) {
473 $__cell->{header}->{$x}->{$y} = 1;
474 }
475 }
476 }; # $assign_header
477
478 my @headers_cell;
479 my $id_to_cell = {};
480 ## ISSUE: ID duplication, non-TH reference
481
482 for my $x (0 .. $table->{width} - 1) {
483 for my $y (0 .. $table->{height} - 1) {
484 my $cell = $table->{cell}->[$x]->[$y];
485 $cell = $cell->[0] if $cell; # anchored cell is always ->{cell}[][][0].
486 next if $cell->{x} != $x;
487 next if $cell->{y} != $y;
488 if ($cell) {
489 if ($cell->{is_header}) {
490 my $id = $cell->{element}->get_attribute_ns (undef, 'id');
491 if (defined $id and not $id_to_cell->{$id}) {
492 $id_to_cell->{$id} = $cell;
493 }
494
495 my $scope = $cell->{element}->get_attribute_ns (undef, 'scope');
496 $scope = $scope ? lc $scope : ''; ## TODO: case
497 if ($scope eq 'row') {
498 for my $_x ($x + $cell->{width} .. $table->{width} - 1) {
499 for my $_y ($y .. $y + $cell->{height} - 1) {
500 $assign_header->($table->{cell}->[$_x]->[$_y] => $x, $y);
501 }
502 }
503 } elsif ($scope eq 'col') {
504 for my $_x ($x .. $x + $cell->{width} - 1) {
505 for my $_y ($y .. $table->{height} - 1) {
506 $assign_header->($table->{cell}->[$_x]->[$_y] => $x, $y);
507 }
508 }
509 } elsif ($scope eq 'rowgroup') {
510 ## NOTE: A cell cannot exceed across a row group boundary.
511 if ($table->{row_group}->[$y] and
512 $table->{row_group}->[$y]->{height}) {
513 for my $_x ($x .. $table->{width} - 1) {
514 for my $_y ($y ..
515 $table->{row_group}->[$y]->{y} +
516 $table->{row_group}->[$y]->{height} - 1) {
517 $assign_header->($table->{cell}->[$_x]->[$_y] => $x, $y);
518 }
519 }
520 }
521 ## TODO: Should we raise a warning?
522 } elsif ($scope eq 'colgroup') {
523 if ($table->{column_group}->[$x] and
524 $table->{column_group}->{width} and
525 $table->{column_group}->[$x]->{x} == $x) { # anchored
526 for my $_x ($x ..
527 $table->{column_group}->[$x]->{x} +
528 $table->{column_group}->[$x]->{width} - 1) {
529 for my $_y ($y .. $table->{height} - 1) {
530 $assign_header->($table->{cell}->[$_x]->[$_y] => $x, $y);
531 }
532 }
533 }
534 ## TODO: Warning?
535 } else { # auto
536 ## 1.
537 my $header_width = $cell->{width};
538 W: for ($x + $cell->{width} .. $table->{width} - 1) {
539 my $_cell = $table->{cell}->[$_]->[$y];
540 for (@{$_cell or []}) {
541 if ($_->{element} and not $_->{is_empty}) {
542 last W; # not empty
543 }
544 }
545 $header_width++;
546 } # W
547
548 ## 2.
549 my $_x = $x + $header_width;
550
551 ## 3.
552 HORIZONTAL: {
553 last HORIZONTAL if $_x == $table->{width}; # goto Vertical
554
555 ## 4. # goto Vertical
556 last HORIZONTAL
557 if $table->{cell}->[$_x]->[$y] and
558 $table->{cell}->[$_x]->[$y]->[0] and # anchored
559 $table->{cell}->[$_x]->[$y]->[0]->{is_header};
560
561 ## 5.
562 for my $_y ($y .. $y + $cell->{height} - 1) {
563 $assign_header->($table->{cell}->[$_x]->[$_y] => $x, $y);
564 }
565
566 ## 6.
567 $_x++;
568
569 ## 7.
570 redo HORIZONTAL;
571 } # HORIZONTAL
572
573 ## 8. Vertical
574 my $_y = $y + $cell->{height};
575
576 VERTICAL: {
577 ## 9. # goto END
578 last VERTICAL if $_y == $table->{height};
579
580 ## 10.
581 if ($table->{cell}->[$x]->[$_y]) {
582 my $h_cell = $table->{cell}->[$x]->[$_y]->[0]; # anchored cell
583 if ($h_cell and $h_cell->{is_header}) {
584 ## 10.1.
585 my $width = $h_cell->{width};
586 W: for ($h_cell->{x} + $width .. $table->{width} - 1) {
587 my $_cell = $table->{cell}->[$_]->[$y];
588 for (@{$_cell or []}) {
589 if ($_->{element} and not $_->{is_empty}) {
590 last W; # not empty
591 }
592 }
593 $width++;
594 } # W
595
596 ## 10.2. # goto end
597 last VERTICAL if $width == $header_width;
598 } # 10.
599 }
600
601 ## 11.
602 for my $_x ($x .. $x + $header_width - 1) {
603 $assign_header->($table->{cell}->[$_x]->[$_y] => $x, $y);
604 }
605
606 ## 12.
607 $_y++;
608
609 ## 13. # goto vertical (wrong)
610 redo VERTICAL;
611 } # VERTICAL
612
613 ## 14. End
614 # (we have already done)
615 }
616 } else { # data cell
617 if ($cell->{element} and
618 $cell->{element}->has_attribute_ns (undef, 'headers')) {
619 push @headers_cell, $cell;
620 }
621 }
622 }
623 }
624 }
625
626 for my $headers_cell (@headers_cell) {
627 my @headers = split /[\x09-\x0D\x20]+/,
628 $headers_cell->{element}->get_attribute_ns (undef, 'headers');
629 my %headers;
630 for my $header_id (@headers) {
631 next unless length $header_id;
632 if ($headers{$header_id}) {
633 $onerror->(type => 'duplicate token', value => $header_id,
634 node => $headers_cell->{element}->get_attribute_node_ns
635 (undef, 'headers'),
636 level => $levels->{must});
637 next;
638 }
639 $headers{$header_id} = 1;
640
641 if ($id_to_cell->{$header_id}) {
642 my $header_cell = $id_to_cell->{$header_id};
643 $headers_cell->{header}->{$header_cell->{x}}->{$header_cell->{y}} = 1;
644 } else {
645 $onerror->(type => 'no referenced header cell', value => $header_id,
646 node => $headers_cell->{element}->get_attribute_node_ns
647 (undef, 'headers'),
648 level => $levels->{must});
649 }
650 }
651 }
652
653 ## NOTE: The "tree order" constraints in the spec algorithm are irrelevant
654 ## in fact.
655
656 ## NOTE: We does not support ID attributes other than HTML "id" attribute.
657 } # assign_header
658
659 1;
660 ## $Date: 2008/08/15 14:13:42 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24