1 |
package Whatpm::SWML::Parser; |
package Whatpm::SWML::Parser; |
2 |
use strict; |
use strict; |
3 |
|
|
4 |
|
sub AA_NS () { q<http://pc5.2ch.net/test/read.cgi/hp/1096723178/aavocab#> } |
5 |
|
sub HTML_NS () { q<http://www.w3.org/1999/xhtml> } |
6 |
|
sub SW09_NS () { q<urn:x-suika-fam-cx:markup:suikawiki:0:9:> } |
7 |
|
sub SW10_NS () { q<urn:x-suika-fam-cx:markup:suikawiki:0:10:> } |
8 |
|
sub XML_NS () { q<http://www.w3.org/XML/1998/namespace> } |
9 |
|
|
10 |
|
sub IN_SECTION_IM () { 0 } |
11 |
|
sub IN_TABLE_ROW_IM () { 1 } |
12 |
|
sub IN_PARAGRAPH_IM () { 2 } |
13 |
|
|
14 |
|
sub BLOCK_START_TAG_TOKEN () { 1 } |
15 |
|
sub BLOCK_END_TAG_TOKEN () { 2 } |
16 |
|
sub CHARACTER_TOKEN () { 3 } |
17 |
|
sub COMMENT_PARAGRAPH_START_TOKEN () { 4 } |
18 |
|
sub EDITORIAL_NOTE_START_TOKEN () { 5 } |
19 |
|
sub ELEMENT_TOKEN () { 6 } |
20 |
|
sub EMPHASIS_TOKEN () { 7 } |
21 |
|
sub EMPTY_LINE_TOKEN () { 8 } |
22 |
|
sub END_OF_FILE_TOKEN () { 9 } |
23 |
|
sub FORM_TOKEN () { 10 } |
24 |
|
sub HEADING_START_TOKEN () { 11 } |
25 |
|
sub HEADING_END_TOKEN () { 12 } |
26 |
|
sub INLINE_START_TAG_TOKEN () { 13 } |
27 |
|
sub INLINE_MIDDLE_TAG_TOKEN () { 14 } |
28 |
|
sub INLINE_END_TAG_TOKEN () { 15 } |
29 |
|
sub LABELED_LIST_START_TOKEN () { 16 } |
30 |
|
sub LABELED_LIST_MIDDLE_TOKEN () { 17 } |
31 |
|
sub LIST_START_TOKEN () { 18 } |
32 |
|
sub PREFORMATTED_START_TOKEN () { 19 } |
33 |
|
sub PREFORMATTED_END_TOKEN () { 20 } |
34 |
|
sub QUOTATION_START_TOKEN () { 21 } |
35 |
|
sub STRONG_TOKEN () { 22 } |
36 |
|
sub TABLE_ROW_START_TOKEN () { 23 } |
37 |
|
sub TABLE_ROW_END_TOKEN () { 24 } |
38 |
|
sub TABLE_CELL_START_TOKEN () { 25 } |
39 |
|
sub TABLE_CELL_END_TOKEN () { 26 } |
40 |
|
sub TABLE_COLSPAN_CELL_TOKEN () { 27 } |
41 |
|
|
42 |
sub new ($) { |
sub new ($) { |
43 |
my $self = bless { |
my $self = bless { |
59 |
$_->set_user_data (manakai_source_line => 1); |
$_->set_user_data (manakai_source_line => 1); |
60 |
$_->set_user_data (manakai_source_column => 1); |
$_->set_user_data (manakai_source_column => 1); |
61 |
} |
} |
|
$self->{oe} = {node => $doc->document_element->last_child, |
|
|
section_depth => 0, |
|
|
quotation_depth => 0, |
|
|
list_depth => 0}; |
|
62 |
|
|
63 |
my $_onerror = $_[2] || sub { |
my $_onerror = $_[2] || sub { |
64 |
my %opt = @_; |
my %opt = @_; |
90 |
|
|
91 |
if ($$s =~ s/^\[([0-9]+)\]//) { |
if ($$s =~ s/^\[([0-9]+)\]//) { |
92 |
push @nt, {type => ELEMENT_TOKEN, |
push @nt, {type => ELEMENT_TOKEN, |
93 |
local_name => 'anchor-end', namespace => $SW09_NS, |
local_name => 'anchor-end', namespace => SW09_NS, |
94 |
anchor => $1, content => '[' . $1 . ']'}; |
anchor => $1, content => '[' . $1 . ']'}; |
95 |
$column += $+[0] - $-[0]; |
$column += $+[0] - $-[0]; |
96 |
} |
} |
97 |
|
|
98 |
while (length $$s) { |
while (length $$s) { |
99 |
if ($$s =~ s/^\[\[#([a-z-}+)//) { |
if ($$s =~ s/^\[\[#([a-z-]+)//) { |
100 |
$column = $+[0] - $-[0]; |
$column = $+[0] - $-[0]; |
101 |
my $t = {type => FORM_TOKEN, name => $1, |
my $t = {type => FORM_TOKEN, name => $1, |
102 |
line => $line, column => $column}; |
line => $line, column => $column}; |
105 |
$column += $+[0] - $-[0]; |
$column += $+[0] - $-[0]; |
106 |
} |
} |
107 |
my @param; |
my @param; |
108 |
while ($$s =~ s/^:/) { |
while ($$s =~ s/^://) { |
109 |
if ($$s =~ s/^'((?>[^'\\]|\\.)*)//) { |
if ($$s =~ s/^'((?>[^'\\]|\\.)*)//) { |
110 |
$column += 1 + $+[0] - $-[0]; |
$column += 1 + $+[0] - $-[0]; |
111 |
my $n = $1; |
my $n = $1; |
165 |
$column++ if $$s =~ s/^\]//; |
$column++ if $$s =~ s/^\]//; |
166 |
} else { |
} else { |
167 |
$t->{local_name} = 'anchor-external'; |
$t->{local_name} = 'anchor-external'; |
168 |
$t->{namespace} = $SW09_NS; |
$t->{namespace} = SW09_NS; |
169 |
} |
} |
170 |
push @nt, $t; |
push @nt, $t; |
171 |
} elsif ($$s =~ s/^\]>>([0-9]+)\]//) { |
} elsif ($$s =~ s/^\]>>([0-9]+)\]//) { |
184 |
$column += $+[0] - $-[0]; |
$column += $+[0] - $-[0]; |
185 |
} elsif ($$s =~ s/^>>([0-9]+)//) { |
} elsif ($$s =~ s/^>>([0-9]+)//) { |
186 |
push @nt, {type => ELEMENT_TOKEN, |
push @nt, {type => ELEMENT_TOKEN, |
187 |
local_name => 'anchor-internal', namespace => $SW09_NS, |
local_name => 'anchor-internal', namespace => SW09_NS, |
188 |
anchor => $1, |
anchor => $1, |
189 |
line => $line, column => $column}; |
line => $line, column => $column}; |
190 |
$column += $+[0] - $-[0]; |
$column += $+[0] - $-[0]; |
191 |
} elsif ($$s =~ s/^__&&//) { |
} elsif ($$s =~ s/^__&&//) { |
192 |
if ($$s =~ s/^(.+?)&&__//) { |
if ($$s =~ s/^(.+?)&&__//) { |
193 |
push @nt, {type => ELEMENT_TOKEN, |
push @nt, {type => ELEMENT_TOKEN, |
194 |
local_name => 'replace', namespace => $SW09_NS, |
local_name => 'replace', namespace => SW09_NS, |
195 |
by => $1, |
by => $1, |
196 |
line => $line, column => $column}; |
line => $line, column => $column}; |
197 |
$column += 4 + $+[0] - $-[0]; |
$column += 4 + $+[0] - $-[0]; |
288 |
$column = $real_column; |
$column = $real_column; |
289 |
push @nt, {type => LABELED_LIST_MIDDLE_TOKEN, |
push @nt, {type => LABELED_LIST_MIDDLE_TOKEN, |
290 |
line => $line, column => $column}; |
line => $line, column => $column}; |
291 |
$column += $+[0] - $-[0] if $data =~ s/^:[\x09\x20]*//; |
$column += $+[0] - $-[0] if $s =~ s/^:[\x09\x20]*//; |
292 |
$tokenize_text->(\$s); |
$tokenize_text->(\$s); |
293 |
} |
} |
294 |
$continuous_line = 1; |
$continuous_line = 1; |
336 |
push @nt, {type => BLOCK_END_TAG_TOKEN, tag_name => 'PRE', |
push @nt, {type => BLOCK_END_TAG_TOKEN, tag_name => 'PRE', |
337 |
line => $line, column => $column}; |
line => $line, column => $column}; |
338 |
undef $continuous_line; |
undef $continuous_line; |
339 |
break; |
last; |
340 |
} else { |
} else { |
341 |
push @nt, {type => CHARACTER_TOKEN, data => "\x0A", |
push @nt, {type => CHARACTER_TOKEN, data => "\x0A", |
342 |
line => $line, column => $column} |
line => $line, column => $column} |
412 |
undef $continuous_line; |
undef $continuous_line; |
413 |
return shift @nt; |
return shift @nt; |
414 |
} elsif ($s eq '__IMAGE__') { |
} elsif ($s eq '__IMAGE__') { |
415 |
my $image = $doc->create_element_ns ($NS_SW09, 'image'); |
my $image = $doc->create_element_ns (SW09_NS, 'image'); |
416 |
$_->set_user_data (manakai_source_line => $line); |
$image->set_user_data (manakai_source_line => $line); |
417 |
$_->set_user_data (manakai_source_column => 1); |
$image->set_user_data (manakai_source_column => 1); |
418 |
$image->text_content (join "\x0A", $s, @s); |
$image->text_content (join "\x0A", $s, @s); |
419 |
($line, $column) = ($line + @s, 1); |
($line, $column) = ($line + @s, 1); |
420 |
@s = (); |
@s = (); |
435 |
## NOTE: "Parse a magic line". |
## NOTE: "Parse a magic line". |
436 |
|
|
437 |
my $s = shift @s; |
my $s = shift @s; |
438 |
|
if ($s =~ s/^([^\x09\x20]+)//) { |
439 |
|
$column += $+[0] - $-[0]; |
440 |
|
my ($name, $version) = split m#/#, $1, 2; |
441 |
|
my $el = $doc->document_element; |
442 |
|
$el->set_attribute_ns (SW09_NS, 'sw:Name' => $name); |
443 |
|
$el->set_attribute_ns (SW09_NS, 'sw:Version' => $version) |
444 |
|
if defined $version; |
445 |
|
} |
446 |
|
|
447 |
## TODO:... |
my $head = $doc->first_child; |
448 |
|
while (length $s) { |
449 |
|
$column += $+[0] - $-[0] if $s =~ s/^[\x09\x20]+//; |
450 |
|
my $name = ''; |
451 |
|
if ($s =~ s/^([^=]*)=//) { |
452 |
|
$name = $1; |
453 |
|
$column += length $name + 1; |
454 |
|
} |
455 |
|
my $param = $doc->create_element_ns (SW09_NS, 'parameter'); |
456 |
|
$param->set_attribute (name => $name); |
457 |
|
$param->set_user_data (manakai_source_line => $line); |
458 |
|
$param->set_user_data (manakai_source_column => $column); |
459 |
|
$head->append_child ($param); |
460 |
|
|
461 |
|
$column++ if $s =~ s/^"//; |
462 |
|
if ($s =~ s/^([^"]+)//) { |
463 |
|
my $values = $1; |
464 |
|
$column += length $values; |
465 |
|
$values =~ tr/\\//d; |
466 |
|
for (split /,/, $values, -1) { |
467 |
|
my $value = $doc->create_element_ns (SW09_NS, 'value'); |
468 |
|
$value->text_content ($_); |
469 |
|
$value->set_user_data (manakai_source_line => $line); |
470 |
|
$value->set_user_data (manakai_source_column => $column); |
471 |
|
$param->append_child ($value); |
472 |
|
} |
473 |
|
} |
474 |
|
$column++ if $s =~ s/^"//; |
475 |
|
} |
476 |
|
|
477 |
$line = 2; |
$line = 2; |
478 |
$column = 0; |
$column = 1; |
479 |
} |
} |
480 |
|
|
481 |
## NOTE: Switched to the "body" mode. |
## NOTE: Switched to the "body" mode. |
482 |
|
|
483 |
|
my $oe = [{node => $doc->document_element->last_child, |
484 |
|
section_depth => 0, |
485 |
|
quotation_depth => 0, |
486 |
|
list_depth => 0}]; |
487 |
|
my $structural_elements = { |
488 |
|
body => 1, section => 1, insert => 1, delete => 1, blockquote => 1, |
489 |
|
h1 => 1, ul => 1, ol => 1, dl => 1, li => 1, dt => 1, dd => 1, |
490 |
|
table => 1, tbody => 1, tr => 1, td => 1, p => 1, 'comment-p' => 1, |
491 |
|
ed => 1, pre => 1, |
492 |
|
}; |
493 |
|
|
494 |
|
my $im = IN_SECTION_IM; |
495 |
$get_next_token->(); |
$get_next_token->(); |
496 |
|
|
497 |
|
A: { |
498 |
|
if ($im == IN_PARAGRAPH_IM) { |
499 |
|
if ($token->{type} == CHARACTER_TOKEN) { |
500 |
|
$oe->[-1]->{node}->manakai_append_text ($token->{data}); |
501 |
|
$get_next_token->(); |
502 |
|
redo A; |
503 |
|
} elsif ($token->{type} == INLINE_START_TAG_TOKEN) { |
504 |
|
if (not defined $token->{tag_name}) { |
505 |
|
my $el = $doc->create_element_ns (SW09_NS, 'anchor'); |
506 |
|
$oe->[-1]->{node}->append_child ($el); |
507 |
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
508 |
|
$el->set_user_data (manakai_source_line => $token->{line}); |
509 |
|
$el->set_user_data (manakai_source_column => $token->{column}); |
510 |
|
|
511 |
|
$get_next_token->(); |
512 |
|
redo A; |
513 |
|
} else { |
514 |
|
my $type = { |
515 |
|
AA => [AA_NS, 'aa'], |
516 |
|
ABBR => [HTML_NS, 'abbr'], |
517 |
|
CITE => [HTML_NS, 'cite'], |
518 |
|
CODE => [HTML_NS, 'code'], |
519 |
|
CSECTION => [SW10_NS, 'csection'], |
520 |
|
DEL => [HTML_NS, 'del'], |
521 |
|
DFN => [HTML_NS, 'dfn'], |
522 |
|
INS => [HTML_NS, 'ins'], |
523 |
|
KBD => [HTML_NS, 'kbd'], |
524 |
|
KEY => [SW10_NS, 'key'], |
525 |
|
Q => [HTML_NS, 'q'], |
526 |
|
QN => [SW10_NS, 'qn'], |
527 |
|
RUBY => [HTML_NS, 'ruby'], |
528 |
|
RUBYB => [HTML_NS, 'rubyb'], |
529 |
|
SAMP => [HTML_NS, 'samp'], |
530 |
|
SPAN => [HTML_NS, 'span'], |
531 |
|
SRC => [SW10_NS, 'src'], |
532 |
|
SUB => [HTML_NS, 'sub'], |
533 |
|
SUP => [HTML_NS, 'sup'], |
534 |
|
TIME => [HTML_NS, 'time'], |
535 |
|
VAR => [HTML_NS, 'var'], |
536 |
|
WEAK => [SW09_NS, 'weak'], |
537 |
|
}->{$token->{tag_name}} || [SW10_NS, $token->{tag_name}]; |
538 |
|
my $el = $doc->create_element_ns (SW10_NS, 'td'); |
539 |
|
$oe->[-1]->{node}->append_child ($el); |
540 |
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
541 |
|
$el->set_user_data (manakai_source_line => $token->{line}); |
542 |
|
$el->set_user_data (manakai_source_column => $token->{column}); |
543 |
|
|
544 |
|
$el->set_attribute (class => $token->{classes}) |
545 |
|
if defined $token->{classes}; |
546 |
|
$el->set_attribute_ns (XML_NS, 'xml:lang' => $token->{language}) |
547 |
|
if defined $token->{language}; |
548 |
|
|
549 |
|
$get_next_token->(); |
550 |
|
redo A; |
551 |
|
} |
552 |
|
} elsif ($token->{type} == INLINE_MIDDLE_TAG_TOKEN) { |
553 |
|
my ($ns, $ln, $pop) = @{{ |
554 |
|
rt => [SW10_NS, 'attrvalue', 1], |
555 |
|
title => [SW10_NS, 'attrvalue', 1], |
556 |
|
nsuri => [SW10_NS, 'attrvalue', 1], |
557 |
|
qn => [SW10_NS, 'nsuri'], |
558 |
|
ruby => [HTML_NS, 'rt'], |
559 |
|
rubyb => [HTML_NS, 'rt'], |
560 |
|
}->{$oe->[-1]->{node}->manakai_local_name} || [SW10_NS, 'title']}; |
561 |
|
pop @$oe if $pop; |
562 |
|
|
563 |
|
my $el = $doc->create_element_ns ($ns, $ln); |
564 |
|
$oe->[-1]->{node}->append_child ($el); |
565 |
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
566 |
|
$el->set_user_data (manakai_source_line => $token->{line}); |
567 |
|
$el->set_user_data (manakai_source_column => $token->{column}); |
568 |
|
|
569 |
|
$el->set_attribute_ns (XML_NS, 'xml:lang' => $token->{language}) |
570 |
|
if defined $token->{language}; |
571 |
|
|
572 |
|
$get_next_token->(); |
573 |
|
redo A; |
574 |
|
} elsif ($token->{type} == INLINE_END_TAG_TOKEN) { |
575 |
|
pop @$oe if { |
576 |
|
rt => 1, title => 1, nsuri => 1, attrvalue => 1, |
577 |
|
}->{$oe->[-1]->{node}->manakai_local_name}; |
578 |
|
|
579 |
|
if ({%$structural_elements, |
580 |
|
strong => 1, em => 1}->{$oe->[-1]->{node}->manakai_local_name}) { |
581 |
|
my $el = $doc->create_element_ns |
582 |
|
(SW09_NS, |
583 |
|
defined $token->{res_scheme} |
584 |
|
? 'anchor-external' : 'anchor-internal'); |
585 |
|
$oe->[-1]->{node}->append_child ($el); |
586 |
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
587 |
|
$el->set_user_data (manakai_source_line => $token->{line}); |
588 |
|
$el->set_user_data (manakai_source_column => $token->{column}); |
589 |
|
$el->text_content (']]'); |
590 |
|
} |
591 |
|
|
592 |
|
$oe->[-1]->{node}->set_attribute_ns (SW09_NS, 'sw:anchor', |
593 |
|
$token->{anchor}) |
594 |
|
if defined $token->{anchor}; |
595 |
|
$oe->[-1]->{node}->set_attribute_ns (SW09_NS, 'sw:resScheme', |
596 |
|
$token->{res_scheme}) |
597 |
|
if defined $token->{res_scheme}; |
598 |
|
$oe->[-1]->{node}->set_attribute_ns (SW09_NS, 'sw:resParameter', |
599 |
|
$token->{res_parameter}) |
600 |
|
if defined $token->{res_parameter}; |
601 |
|
|
602 |
|
pop @$oe; |
603 |
|
|
604 |
|
$get_next_token->(); |
605 |
|
redo A; |
606 |
|
} elsif ($token->{type} == STRONG_TOKEN) { |
607 |
|
my $el = $doc->create_element_ns (HTML_NS, 'strong'); |
608 |
|
$oe->[-1]->{node}->append_child ($el); |
609 |
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
610 |
|
$el->set_user_data (manakai_source_line => $token->{line}); |
611 |
|
$el->set_user_data (manakai_source_column => $token->{column}); |
612 |
|
|
613 |
|
$get_next_token->(); |
614 |
|
redo A; |
615 |
|
} elsif ($token->{type} == EMPHASIS_TOKEN) { |
616 |
|
my $el = $doc->create_element_ns (HTML_NS, 'em'); |
617 |
|
$oe->[-1]->{node}->append_child ($el); |
618 |
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
619 |
|
$el->set_user_data (manakai_source_line => $token->{line}); |
620 |
|
$el->set_user_data (manakai_source_column => $token->{column}); |
621 |
|
|
622 |
|
$get_next_token->(); |
623 |
|
redo A; |
624 |
|
} elsif ($token->{type} == FORM_TOKEN) { |
625 |
|
## There is an exact code clone. |
626 |
|
if ($token->{name} eq 'form') { |
627 |
|
my $el = $doc->create_element_ns (SW09_NS, 'form'); |
628 |
|
$oe->[-1]->{node}->append_child ($el); |
629 |
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
630 |
|
$el->set_user_data (manakai_source_line => $token->{line}); |
631 |
|
$el->set_user_data (manakai_source_column => $token->{column}); |
632 |
|
|
633 |
|
$el->set_attribute (id => $token->{id}) if defined $token->{id}; |
634 |
|
$el->set_attribute (input => shift @{$token->{parameters}}) |
635 |
|
if @{$token->{parameter}}; |
636 |
|
$el->set_attribute (template => shift @{$token->{parameters}}) |
637 |
|
if @{$token->{parameter}}; |
638 |
|
$el->set_attribute (option => shift @{$token->{parameters}}) |
639 |
|
if @{$token->{parameter}}; |
640 |
|
$el->set_attribute (parameter => join ':', @{$token->{parameters}}) |
641 |
|
if @{$token->{parameter}}; |
642 |
|
|
643 |
|
$get_next_token->(); |
644 |
|
redo A; |
645 |
|
} else { |
646 |
|
my $el = $doc->create_element_ns (SW09_NS, 'form'); |
647 |
|
$oe->[-1]->{node}->append_child ($el); |
648 |
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
649 |
|
$el->set_user_data (manakai_source_line => $token->{line}); |
650 |
|
$el->set_user_data (manakai_source_column => $token->{column}); |
651 |
|
|
652 |
|
$el->set_attribute (ref => $token->{name}); |
653 |
|
$el->set_attribute (id => $token->{id}) if defined $token->{id}; |
654 |
|
$el->set_attribute (parameter => join ':', @{$token->{parameters}}) |
655 |
|
if @{$token->{parameter}}; |
656 |
|
|
657 |
|
$get_next_token->(); |
658 |
|
redo A; |
659 |
|
} |
660 |
|
} elsif ($token->{type} == ELEMENT_TOKEN) { |
661 |
|
## NOTE: There is an exact code clone. |
662 |
|
my $el = $doc->create_element_ns |
663 |
|
($token->{namespace}, $token->{local_name}); |
664 |
|
$oe->[-1]->{node}->append_child ($el); |
665 |
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
666 |
|
$el->set_user_data (manakai_source_line => $token->{line}); |
667 |
|
$el->set_user_data (manakai_source_column => $token->{column}); |
668 |
|
|
669 |
|
$el->set_attribute_ns (SW09_NS, 'sw:anchor', $token->{anchor}) |
670 |
|
if defined $token->{anchor}; |
671 |
|
$el->set_attribute (by => $token->{by}) if defined $token->{by}; |
672 |
|
$el->set_attribute_ns (SW09_NS, 'sw:resScheme', $token->{res_scheme}) |
673 |
|
if defined $token->{res_scheme}; |
674 |
|
$el->set_attribute_ns (SW09_NS, 'sw:resParameter', |
675 |
|
$token->{res_parameter}) |
676 |
|
if defined $token->{res_parameter}; |
677 |
|
$el->text_content ($token->{content}) if defined $token->{content}; |
678 |
|
|
679 |
|
$get_next_token->(); |
680 |
|
redo A; |
681 |
|
} elsif ($token->{type} == LABELED_LIST_MIDDLE_TOKEN) { |
682 |
|
pop @$oe while not $structural_elements |
683 |
|
->{$oe->[-1]->{node}->manakai_local_name}; |
684 |
|
pop @$oe if $oe->[-1]->{node}->manakai_local_name eq 'dt'; |
685 |
|
|
686 |
|
my $el = $doc->create_element_ns (HTML_NS, 'dt'); |
687 |
|
$oe->[-1]->{node}->append_child ($el); |
688 |
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
689 |
|
$el->set_user_data (manakai_source_line => $token->{line}); |
690 |
|
$el->set_user_data (manakai_source_column => $token->{column}); |
691 |
|
|
692 |
|
$get_next_token->(); |
693 |
|
redo A; |
694 |
|
} elsif ($token->{type} == HEADING_END_TOKEN) { |
695 |
|
pop @$oe while not $structural_elements |
696 |
|
->{$oe->[-1]->{node}->manakai_local_name}; |
697 |
|
pop @$oe if $oe->[-1]->{node}->manakai_local_name eq 'h1'; |
698 |
|
|
699 |
|
$im = IN_SECTION_IM; |
700 |
|
$get_next_token->(); |
701 |
|
redo A; |
702 |
|
} elsif ($token->{type} == TABLE_CELL_END_TOKEN) { |
703 |
|
pop @$oe while not $structural_elements |
704 |
|
->{$oe->[-1]->{node}->manakai_local_name}; |
705 |
|
pop @$oe if $oe->[-1]->{node}->manakai_local_name eq 'td'; |
706 |
|
|
707 |
|
$im = IN_TABLE_ROW_IM; |
708 |
|
$get_next_token->(); |
709 |
|
redo A; |
710 |
|
} elsif (($token->{type} == BLOCK_END_TAG_TOKEN and |
711 |
|
$token->{tag_name} eq 'PRE') or |
712 |
|
$token->{type} == PREFORMATTED_END_TOKEN) { |
713 |
|
pop @$oe while not $structural_elements |
714 |
|
->{$oe->[-1]->{node}->manakai_local_name}; |
715 |
|
pop @$oe if $oe->[-1]->{node}->manakai_local_name eq 'pre'; |
716 |
|
|
717 |
|
$im = IN_SECTION_IM; |
718 |
|
$get_next_token->(); |
719 |
|
redo A; |
720 |
|
} else { |
721 |
|
pop @$oe while not $structural_elements |
722 |
|
->{$oe->[-1]->{node}->manakai_local_name}; |
723 |
|
|
724 |
|
$im = IN_SECTION_IM; |
725 |
|
$get_next_token->(); |
726 |
|
redo A; |
727 |
|
} |
728 |
|
} elsif ($im == IN_SECTION_IM) { |
729 |
|
if ($token->{type} == HEADING_START_TOKEN) { |
730 |
|
B: { |
731 |
|
pop @$oe and redo B |
732 |
|
if {body => 1, section => 1, insert => 1, delete => 1} |
733 |
|
->{$oe->[-1]->{node}->manakai_local_name} or |
734 |
|
$token->{depth} <= $oe->[-1]->{section_depth}; |
735 |
|
if ($token->{depth} > $oe->[-1]->{section_depth} + 1) { |
736 |
|
my $el = $doc->create_element_ns (HTML_NS, 'section'); |
737 |
|
$oe->[-1]->{node}->append_child ($el); |
738 |
|
push @$oe, {node => $el, |
739 |
|
section_depth => $oe->[-1]->{section_depth} + 1, |
740 |
|
quotation_depth => 0, list_depth => 0}; |
741 |
|
redo B; |
742 |
|
} |
743 |
|
} # B |
744 |
|
|
745 |
|
my $el = $doc->create_element_ns (HTML_NS, 'section'); |
746 |
|
$oe->[-1]->{node}->append_child ($el); |
747 |
|
push @$oe, {node => $el, |
748 |
|
section_depth => $oe->[-1]->{section_depth} + 1, |
749 |
|
quotation_depth => 0, list_depth => 0}; |
750 |
|
|
751 |
|
my $el2 = $doc->create_element_ns (HTML_NS, 'h1'); |
752 |
|
$oe->[-1]->{node}->append_child ($el2); |
753 |
|
push @$oe, {%{$oe->[-1]}, node => $el2}; |
754 |
|
|
755 |
|
$im = IN_PARAGRAPH_IM; |
756 |
|
$get_next_token->(); |
757 |
|
redo A; |
758 |
|
} elsif ($token->{type} == BLOCK_START_TAG_TOKEN and |
759 |
|
($token->{tag_name} eq 'INS' or |
760 |
|
$token->{tag_name} eq 'DEL')) { |
761 |
|
my $el = $doc->create_element_ns |
762 |
|
(SW09_NS, ($token->{tag_name} eq 'INS' ? 'insert' : 'delete')); |
763 |
|
$oe->[-1]->{node}->append_child ($el); |
764 |
|
push @$oe, {node => $el, section_depth => 0, |
765 |
|
quotation_depth => 0, list_depth => 0}; |
766 |
|
$el->set_attribute (class => $token->{classes}) |
767 |
|
if defined $token->{classes}; |
768 |
|
$get_next_token->(); |
769 |
|
redo A; |
770 |
|
} elsif ($token->{type} == QUOTATION_START_TOKEN) { |
771 |
|
B: { |
772 |
|
pop @$oe and redo B |
773 |
|
if {body => 1, section => 1, insert => 1, delete => 1, |
774 |
|
blockquote => 1} |
775 |
|
->{$oe->[-1]->{node}->manakai_local_name} or |
776 |
|
$token->{depth} <= $oe->[-1]->{quotation_depth}; |
777 |
|
if ($token->{depth} > $oe->[-1]->{quotation_depth} + 1) { |
778 |
|
my $el = $doc->create_element_ns (HTML_NS, 'blockquote'); |
779 |
|
$oe->[-1]->{node}->append_child ($el); |
780 |
|
push @$oe, {node => $el, section_depth => 0, |
781 |
|
quotation_depth => $oe->[-1]->{quotation_depth} + 1, |
782 |
|
list_depth => 0}; |
783 |
|
redo B; |
784 |
|
} |
785 |
|
} # B |
786 |
|
|
787 |
|
my $el = $doc->create_element_ns (HTML_NS, 'blockquote'); |
788 |
|
$oe->[-1]->{node}->append_child ($el); |
789 |
|
push @$oe, {node => $el, section_depth => 0, |
790 |
|
quotation_depth => $oe->[-1]->{quotation_depth} + 1, |
791 |
|
list_depth => 0}; |
792 |
|
|
793 |
|
$get_next_token->(); |
794 |
|
redo A; |
795 |
|
} elsif ($token->{type} == LIST_START_TOKEN) { |
796 |
|
my $depth = length $token->{depth}; |
797 |
|
B: { |
798 |
|
pop @$oe and redo B if $oe->[-1]->{list_depth} > $depth; |
799 |
|
if ($oe->[-1]->{list_depth} < $depth) { |
800 |
|
my $type = substr $token->{depth}, $oe->[-1]->{list_depth}; |
801 |
|
my $el = $doc->create_element_ns |
802 |
|
(HTML_NS, $type eq '-' ? 'ul' : 'ol'); |
803 |
|
$oe->[-1]->{node}->append_child ($el); |
804 |
|
push @$oe, {%{$oe->[-1]}, node => $el, |
805 |
|
list_depth => $oe->[-1]->{list_depth} + 1}; |
806 |
|
if ($oe->[-1]->{list_depth} < $depth) { |
807 |
|
my $el = $doc->create_element_ns (HTML_NS, 'li'); |
808 |
|
$oe->[-1]->{node}->append_child ($el); |
809 |
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
810 |
|
} |
811 |
|
redo B; |
812 |
|
} |
813 |
|
} # B |
814 |
|
|
815 |
|
pop @$oe if $oe->[-1]->{list_depth} == $depth and |
816 |
|
not {ul => 1, ol => 1}->{$oe->[-1]->{node}->manakai_local_name}; |
817 |
|
|
818 |
|
my $el = $doc->create_element_ns (HTML_NS, 'li'); |
819 |
|
$oe->[-1]->{node}->append_child ($el); |
820 |
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
821 |
|
|
822 |
|
$im = IN_PARAGRAPH_IM; |
823 |
|
$get_next_token->(); |
824 |
|
redo A; |
825 |
|
} elsif ($token->{type} == LABELED_LIST_START_TOKEN) { |
826 |
|
pop @$oe if $oe->[-1]->{node}->manakai_local_name eq 'dd'; |
827 |
|
if ($oe->[-1]->{node}->manakai_local_name ne 'dl') { |
828 |
|
my $el = $doc->create_element_ns (HTML_NS, 'dl'); |
829 |
|
$oe->[-1]->{node}->append_child ($el); |
830 |
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
831 |
|
} |
832 |
|
|
833 |
|
my $el = $doc->create_element_ns (HTML_NS, 'dt'); |
834 |
|
$oe->[-1]->{node}->append_child ($el); |
835 |
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
836 |
|
|
837 |
|
$im = IN_PARAGRAPH_IM; |
838 |
|
$get_next_token->(); |
839 |
|
redo A; |
840 |
|
} elsif ($token->{type} == TABLE_ROW_START_TOKEN) { |
841 |
|
my $el = $doc->create_element_ns (HTML_NS, 'table'); |
842 |
|
$oe->[-1]->{node}->append_child ($el); |
843 |
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
844 |
|
|
845 |
|
$el = $doc->create_element_ns (HTML_NS, 'tbody'); |
846 |
|
$oe->[-1]->{node}->append_child ($el); |
847 |
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
848 |
|
|
849 |
|
$el = $doc->create_element_ns (HTML_NS, 'tr'); |
850 |
|
$oe->[-1]->{node}->append_child ($el); |
851 |
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
852 |
|
|
853 |
|
$im = IN_TABLE_ROW_IM; |
854 |
|
$get_next_token->(); |
855 |
|
redo A; |
856 |
|
} elsif (($token->{type} == BLOCK_START_TAG_TOKEN and |
857 |
|
$token->{tag_name} eq 'PRE') or |
858 |
|
$token->{type} == PREFORMATTED_START_TOKEN) { |
859 |
|
my $el = $doc->create_element_ns (HTML_NS, 'pre'); |
860 |
|
$oe->[-1]->{node}->append_child ($el); |
861 |
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
862 |
|
|
863 |
|
$el->set_attribute (class => $token->{classes}) |
864 |
|
if defined $token->{classes}; |
865 |
|
|
866 |
|
$im = IN_PARAGRAPH_IM; |
867 |
|
$get_next_token->(); |
868 |
|
redo A; |
869 |
|
} elsif ($token->{type} == COMMENT_PARAGRAPH_START_TOKEN) { |
870 |
|
my $el = $doc->create_element_ns (SW10_NS, 'comment-p'); |
871 |
|
$oe->[-1]->{node}->append_child ($el); |
872 |
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
873 |
|
|
874 |
|
$im = IN_PARAGRAPH_IM; |
875 |
|
$get_next_token->(); |
876 |
|
redo A; |
877 |
|
} elsif ($token->{type} == EDITORIAL_NOTE_START_TOKEN) { |
878 |
|
my $el = $doc->create_element_ns (SW10_NS, 'ed'); |
879 |
|
$oe->[-1]->{node}->append_child ($el); |
880 |
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
881 |
|
|
882 |
|
$im = IN_PARAGRAPH_IM; |
883 |
|
$get_next_token->(); |
884 |
|
redo A; |
885 |
|
} elsif ($token->{type} == EMPTY_LINE_TOKEN) { |
886 |
|
pop @$oe while {body => 1, section => 1, insert => 1, delete => 1} |
887 |
|
->{$oe->[-1]->{node}->manakai_local_name}; |
888 |
|
$get_next_token->(); |
889 |
|
redo A; |
890 |
|
} elsif ($token->{type} == BLOCK_END_TAG_TOKEN) { |
891 |
|
if ($token->{type} eq 'INS') { |
892 |
|
for (reverse 1..$#$oe) { |
893 |
|
if ($oe->[$_]->{node}->manakai_local_name eq 'insert') { |
894 |
|
splice @$oe, $_; |
895 |
|
last; |
896 |
|
} |
897 |
|
} |
898 |
|
} elsif ($token->{type} eq 'DEL') { |
899 |
|
for (reverse 1..$#$oe) { |
900 |
|
if ($oe->[$_]->{node}->manakai_local_name eq 'delete') { |
901 |
|
splice @$oe, $_; |
902 |
|
last; |
903 |
|
} |
904 |
|
} |
905 |
|
} else { |
906 |
|
## NOTE: Ignore the token. |
907 |
|
} |
908 |
|
} elsif ($token->{type} == FORM_TOKEN) { |
909 |
|
## There is an exact code clone. |
910 |
|
if ($token->{name} eq 'form') { |
911 |
|
my $el = $doc->create_element_ns (SW09_NS, 'form'); |
912 |
|
$oe->[-1]->{node}->append_child ($el); |
913 |
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
914 |
|
$el->set_user_data (manakai_source_line => $token->{line}); |
915 |
|
$el->set_user_data (manakai_source_column => $token->{column}); |
916 |
|
|
917 |
|
$el->set_attribute (id => $token->{id}) if defined $token->{id}; |
918 |
|
$el->set_attribute (input => shift @{$token->{parameters}}) |
919 |
|
if @{$token->{parameter}}; |
920 |
|
$el->set_attribute (template => shift @{$token->{parameters}}) |
921 |
|
if @{$token->{parameter}}; |
922 |
|
$el->set_attribute (option => shift @{$token->{parameters}}) |
923 |
|
if @{$token->{parameter}}; |
924 |
|
$el->set_attribute (parameter => join ':', @{$token->{parameters}}) |
925 |
|
if @{$token->{parameter}}; |
926 |
|
|
927 |
|
$get_next_token->(); |
928 |
|
redo A; |
929 |
|
} else { |
930 |
|
my $el = $doc->create_element_ns (SW09_NS, 'form'); |
931 |
|
$oe->[-1]->{node}->append_child ($el); |
932 |
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
933 |
|
$el->set_user_data (manakai_source_line => $token->{line}); |
934 |
|
$el->set_user_data (manakai_source_column => $token->{column}); |
935 |
|
|
936 |
|
$el->set_attribute (ref => $token->{name}); |
937 |
|
$el->set_attribute (id => $token->{id}) if defined $token->{id}; |
938 |
|
$el->set_attribute (parameter => join ':', @{$token->{parameters}}) |
939 |
|
if @{$token->{parameter}}; |
940 |
|
|
941 |
|
$get_next_token->(); |
942 |
|
redo A; |
943 |
|
} |
944 |
|
} elsif ($token->{type} == ELEMENT_TOKEN and |
945 |
|
$token->{local_name} eq 'replace') { |
946 |
|
## NOTE: There is an exact code clone. |
947 |
|
my $el = $doc->create_element_ns |
948 |
|
($token->{namespace}, $token->{local_name}); |
949 |
|
$oe->[-1]->{node}->append_child ($el); |
950 |
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
951 |
|
$el->set_user_data (manakai_source_line => $token->{line}); |
952 |
|
$el->set_user_data (manakai_source_column => $token->{column}); |
953 |
|
|
954 |
|
$el->set_attribute_ns (SW09_NS, 'sw:anchor', $token->{anchor}) |
955 |
|
if defined $token->{anchor}; |
956 |
|
$el->set_attribute (by => $token->{by}) if defined $token->{by}; |
957 |
|
$el->set_attribute_ns (SW09_NS, 'sw:resScheme', $token->{res_scheme}) |
958 |
|
if defined $token->{res_scheme}; |
959 |
|
$el->set_attribute_ns (SW09_NS, 'sw:resParameter', |
960 |
|
$token->{res_parameter}) |
961 |
|
if defined $token->{res_parameter}; |
962 |
|
$el->text_content ($token->{content}) if defined $token->{content}; |
963 |
|
|
964 |
|
$get_next_token->(); |
965 |
|
redo A; |
966 |
|
} elsif ($token->{type} == END_OF_FILE_TOKEN) { |
967 |
|
return; |
968 |
|
} elsif ({LABELED_LIST_MIDDLE_TOKEN => 1, |
969 |
|
HEADING_END_TOKEN => 1, |
970 |
|
PREFORMATTED_END_TOKEN => 1, |
971 |
|
TABLE_ROW_END_TOKEN => 1, |
972 |
|
TABLE_CELL_START_TOKEN => 1, |
973 |
|
TABLE_CELL_END_TOKEN => 1, |
974 |
|
TABLE_COLSPAN_CELL_TOKEN => 1}->{$token->{type}}) { |
975 |
|
## NOTE: Ignore the token. |
976 |
|
} else { |
977 |
|
my $el = $doc->create_element_ns (HTML_NS, 'p'); |
978 |
|
$oe->[-1]->{node}->append_child ($el); |
979 |
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
980 |
|
|
981 |
|
$im = IN_PARAGRAPH_IM; |
982 |
|
## Reprocess. |
983 |
|
redo A; |
984 |
|
} |
985 |
|
} elsif ($im == IN_TABLE_ROW_IM) { |
986 |
|
if ($token->{type} == TABLE_CELL_START_TOKEN) { |
987 |
|
my $el = $doc->create_element_ns (HTML_NS, 'td'); |
988 |
|
$oe->[-1]->{node}->append_child ($el); |
989 |
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
990 |
|
$el->set_user_data (manakai_source_line => $token->{line}); |
991 |
|
$el->set_user_data (manakai_source_column => $token->{column}); |
992 |
|
|
993 |
|
$im = IN_PARAGRAPH_IM; |
994 |
|
$get_next_token->(); |
995 |
|
redo A; |
996 |
|
} elsif ($token->{type} == TABLE_COLSPAN_CELL_TOKEN) { |
997 |
|
my $lc = $oe->[-1]->{node}->last_child; |
998 |
|
if ($lc and $lc->manakai_local_name eq 'td') { |
999 |
|
$lc->set_attribute |
1000 |
|
(colspan => ($lc->get_attribute ('colspan') || 0) + 1); |
1001 |
|
} else { |
1002 |
|
my $el = $doc->create_element_ns (SW10_NS, 'td'); |
1003 |
|
$oe->[-1]->{node}->append_child ($el); |
1004 |
|
$el->set_user_data (manakai_source_line => $token->{line}); |
1005 |
|
$el->set_user_data (manakai_source_column => $token->{column}); |
1006 |
|
} |
1007 |
|
|
1008 |
|
$get_next_token->(); |
1009 |
|
redo A; |
1010 |
|
} elsif ($token->{type} == TABLE_ROW_END_TOKEN) { |
1011 |
|
pop @$oe if $oe->[-1]->{node}->manakai_local_name eq 'tr'; |
1012 |
|
$get_next_token->(); |
1013 |
|
redo A; |
1014 |
|
} elsif ($token->{type} == TABLE_ROW_START_TOKEN) { |
1015 |
|
my $el = $doc->create_element_ns (HTML_NS, 'tr'); |
1016 |
|
$oe->[-1]->{node}->append_child ($el); |
1017 |
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
1018 |
|
$el->set_user_data (manakai_source_line => $token->{line}); |
1019 |
|
$el->set_user_data (manakai_source_column => $token->{column}); |
1020 |
|
|
1021 |
|
$get_next_token->(); |
1022 |
|
redo A; |
1023 |
|
} else { |
1024 |
|
$im = IN_SECTION_IM; |
1025 |
|
## Reprocess. |
1026 |
|
redo A; |
1027 |
|
} |
1028 |
|
} else { |
1029 |
|
die "$0: Unknown insertion mode: $im"; |
1030 |
|
} |
1031 |
|
} # A |
1032 |
} # parse_char_string |
} # parse_char_string |
1033 |
|
|
1034 |
1; |
1; |