1 |
wakaba |
1.1 |
package Whatpm::SWML::Parser; |
2 |
|
|
use strict; |
3 |
|
|
|
4 |
wakaba |
1.2 |
sub AA_NS () { q<http://pc5.2ch.net/test/read.cgi/hp/1096723178/aavocab#> } |
5 |
|
|
sub HTML_NS () { q<http://www.w3.org/1999/xhtml> } |
6 |
|
|
sub SW09_NS () { q<urn:x-suika-fam-cx:markup:suikawiki:0:9:> } |
7 |
|
|
sub SW10_NS () { q<urn:x-suika-fam-cx:markup:suikawiki:0:10:> } |
8 |
|
|
sub XML_NS () { q<http://www.w3.org/XML/1998/namespace> } |
9 |
|
|
|
10 |
|
|
sub IN_SECTION_IM () { 0 } |
11 |
|
|
sub IN_TABLE_ROW_IM () { 1 } |
12 |
|
|
sub IN_PARAGRAPH_IM () { 2 } |
13 |
|
|
|
14 |
|
|
sub BLOCK_START_TAG_TOKEN () { 1 } |
15 |
|
|
sub BLOCK_END_TAG_TOKEN () { 2 } |
16 |
|
|
sub CHARACTER_TOKEN () { 3 } |
17 |
|
|
sub COMMENT_PARAGRAPH_START_TOKEN () { 4 } |
18 |
|
|
sub EDITORIAL_NOTE_START_TOKEN () { 5 } |
19 |
|
|
sub ELEMENT_TOKEN () { 6 } |
20 |
|
|
sub EMPHASIS_TOKEN () { 7 } |
21 |
|
|
sub EMPTY_LINE_TOKEN () { 8 } |
22 |
|
|
sub END_OF_FILE_TOKEN () { 9 } |
23 |
|
|
sub FORM_TOKEN () { 10 } |
24 |
|
|
sub HEADING_START_TOKEN () { 11 } |
25 |
|
|
sub HEADING_END_TOKEN () { 12 } |
26 |
|
|
sub INLINE_START_TAG_TOKEN () { 13 } |
27 |
|
|
sub INLINE_MIDDLE_TAG_TOKEN () { 14 } |
28 |
|
|
sub INLINE_END_TAG_TOKEN () { 15 } |
29 |
|
|
sub LABELED_LIST_START_TOKEN () { 16 } |
30 |
|
|
sub LABELED_LIST_MIDDLE_TOKEN () { 17 } |
31 |
|
|
sub LIST_START_TOKEN () { 18 } |
32 |
|
|
sub PREFORMATTED_START_TOKEN () { 19 } |
33 |
|
|
sub PREFORMATTED_END_TOKEN () { 20 } |
34 |
|
|
sub QUOTATION_START_TOKEN () { 21 } |
35 |
|
|
sub STRONG_TOKEN () { 22 } |
36 |
|
|
sub TABLE_ROW_START_TOKEN () { 23 } |
37 |
|
|
sub TABLE_ROW_END_TOKEN () { 24 } |
38 |
|
|
sub TABLE_CELL_START_TOKEN () { 25 } |
39 |
|
|
sub TABLE_CELL_END_TOKEN () { 26 } |
40 |
|
|
sub TABLE_COLSPAN_CELL_TOKEN () { 27 } |
41 |
wakaba |
1.1 |
|
42 |
|
|
sub new ($) { |
43 |
|
|
my $self = bless { |
44 |
|
|
}, $_[0]; |
45 |
|
|
return $self; |
46 |
|
|
} # new |
47 |
|
|
|
48 |
|
|
sub parse_char_string ($$$;$) { |
49 |
|
|
my $self = shift; |
50 |
|
|
my @s = split /\x0D\x0A?|\x0A/, ref $_[0] ? ${$_[0]} : $_[0], -1; |
51 |
|
|
|
52 |
|
|
my $doc = $_[1]; |
53 |
wakaba |
1.4 |
@{$doc->child_nodes} = (); |
54 |
|
|
my $html_el = $doc->create_element_ns (HTML_NS, [undef, 'html']); |
55 |
|
|
$doc->append_child ($html_el); |
56 |
|
|
$html_el->set_attribute_ns |
57 |
|
|
('http://www.w3.org/2000/xmlns/', [undef, 'xmlns'] => HTML_NS); |
58 |
|
|
my $head_el = $doc->create_element_ns (HTML_NS, [undef, 'head']); |
59 |
|
|
$html_el->append_child ($head_el); |
60 |
|
|
my $body_el = $doc->create_element_ns (HTML_NS, [undef, 'body']); |
61 |
|
|
$html_el->append_child ($body_el); |
62 |
|
|
for ($doc, $html_el, $head_el, $body_el) { |
63 |
wakaba |
1.1 |
$_->set_user_data (manakai_source_line => 1); |
64 |
|
|
$_->set_user_data (manakai_source_column => 1); |
65 |
|
|
} |
66 |
|
|
|
67 |
|
|
my $_onerror = $_[2] || sub { |
68 |
|
|
my %opt = @_; |
69 |
|
|
my $r = 'Line ' . $opt{line} . ' column ' . $opt{column} . ': '; |
70 |
|
|
|
71 |
|
|
if ($opt{token}) { |
72 |
|
|
$r .= 'Token ' . (defined $opt{token}->{value} |
73 |
|
|
? $opt{token}->{value} : $opt{token}->{type}) . ': '; |
74 |
|
|
} |
75 |
|
|
|
76 |
|
|
$r .= $opt{type} . ';' . $opt{level}; |
77 |
|
|
|
78 |
|
|
warn $r . "\n"; |
79 |
|
|
}; # $_onerror |
80 |
|
|
|
81 |
|
|
my $line = 0; |
82 |
|
|
my $column = 0; |
83 |
|
|
my $token; |
84 |
|
|
my @nt; |
85 |
|
|
|
86 |
|
|
my $onerror = sub { |
87 |
|
|
$_onerror->(line => $line, column => $column, token => $token, @_); |
88 |
|
|
}; # $onerror |
89 |
|
|
|
90 |
|
|
my $continuous_line; |
91 |
|
|
|
92 |
|
|
my $tokenize_text = sub { |
93 |
|
|
my $s = shift; # ref |
94 |
|
|
|
95 |
|
|
if ($$s =~ s/^\[([0-9]+)\]//) { |
96 |
|
|
push @nt, {type => ELEMENT_TOKEN, |
97 |
wakaba |
1.2 |
local_name => 'anchor-end', namespace => SW09_NS, |
98 |
wakaba |
1.1 |
anchor => $1, content => '[' . $1 . ']'}; |
99 |
|
|
$column += $+[0] - $-[0]; |
100 |
|
|
} |
101 |
|
|
|
102 |
|
|
while (length $$s) { |
103 |
wakaba |
1.2 |
if ($$s =~ s/^\[\[#([a-z-]+)//) { |
104 |
wakaba |
1.1 |
$column = $+[0] - $-[0]; |
105 |
|
|
my $t = {type => FORM_TOKEN, name => $1, |
106 |
|
|
line => $line, column => $column}; |
107 |
|
|
if ($$s =~ s/^\(([^()\\]*)\)//) { |
108 |
|
|
$t->{id} = $1; |
109 |
|
|
$column += $+[0] - $-[0]; |
110 |
|
|
} |
111 |
|
|
my @param; |
112 |
wakaba |
1.2 |
while ($$s =~ s/^://) { |
113 |
wakaba |
1.1 |
if ($$s =~ s/^'((?>[^'\\]|\\.)*)//) { |
114 |
|
|
$column += 1 + $+[0] - $-[0]; |
115 |
|
|
my $n = $1; |
116 |
|
|
$n =~ tr/\\//d; |
117 |
|
|
push @param, $n; |
118 |
|
|
$column++ if $$s =~ s/\A\\\z//; |
119 |
|
|
$column++ if $$s =~ s/^'//; |
120 |
|
|
} elsif ($$s =~ s/^([^':][^:]*)//) { |
121 |
|
|
$column += 1 + $+[0] - $-[0]; |
122 |
|
|
push @param, $1; |
123 |
|
|
} |
124 |
|
|
} |
125 |
|
|
$t->{parameters} = \@param; |
126 |
|
|
$column += 2 if $$s =~ s/^\]\]//; |
127 |
|
|
push @nt, $t; |
128 |
|
|
} elsif ($$s =~ s/^\[\[//) { |
129 |
|
|
push @nt, {type => INLINE_START_TAG_TOKEN}; |
130 |
|
|
$column += 2; |
131 |
|
|
} elsif ($$s =~ s/^\[([A-Z]+)(?>\(([^()\\]*)\))?(?>\@[0-9A-Za-z-]*)?\[//) { |
132 |
|
|
push @nt, {type => INLINE_START_TAG_TOKEN, |
133 |
|
|
tag_name => $1, classes => $2, language => $3, |
134 |
|
|
line => $line, column => $column}; |
135 |
|
|
$column += $+[0] - $-[0]; |
136 |
|
|
} elsif ($$s =~ s/^\]\]//) { |
137 |
|
|
push @nt, {type => INLINE_END_TAG_TOKEN, |
138 |
|
|
line => $line, column => $column}; |
139 |
|
|
$column += 2; |
140 |
|
|
} elsif ($$s =~ s/^(\]?)<([0-9A-Za-z%+._-]+)://) { |
141 |
|
|
my $t = {type => $1 ? INLINE_END_TAG_TOKEN : ELEMENT_TOKEN, |
142 |
|
|
res_scheme => $2, res_parameter => '', |
143 |
|
|
line => $line, column => $column}; |
144 |
|
|
$column += $+[0] - $-[0]; |
145 |
|
|
|
146 |
|
|
while (length $$s) { |
147 |
|
|
if ($$s =~ s/^([^>"]+)//) { |
148 |
|
|
$t->{res_parameter} .= $1; |
149 |
|
|
$column += $+[0] - $-[0]; |
150 |
|
|
} elsif ($$s =~ s/^("(?>[^"\\]|\\.)*)//) { |
151 |
|
|
$t->{res_parameter} .= $1; |
152 |
|
|
$column += $+[0] - $-[0]; |
153 |
|
|
$column++ if $$s =~ s/\A\\\z//; |
154 |
|
|
$column++ if $$s =~ s/^"//; |
155 |
|
|
} else { |
156 |
|
|
last; |
157 |
|
|
} |
158 |
|
|
} |
159 |
|
|
|
160 |
|
|
$column++ if $$s =~ s/^>//; |
161 |
|
|
|
162 |
|
|
$t->{content} = $t->{res_scheme} . ':' . $t->{res_parameter}; |
163 |
|
|
if ($t->{res_scheme} !~ /[A-Z]/) { |
164 |
|
|
$t->{res_parameter} = $t->{content}; |
165 |
|
|
$t->{res_scheme} = 'URI'; |
166 |
|
|
} |
167 |
|
|
|
168 |
|
|
if ($t->{type} == INLINE_END_TAG_TOKEN) { |
169 |
|
|
$column++ if $$s =~ s/^\]//; |
170 |
|
|
} else { |
171 |
|
|
$t->{local_name} = 'anchor-external'; |
172 |
wakaba |
1.2 |
$t->{namespace} = SW09_NS; |
173 |
wakaba |
1.1 |
} |
174 |
|
|
push @nt, $t; |
175 |
|
|
} elsif ($$s =~ s/^\]>>([0-9]+)\]//) { |
176 |
|
|
push @nt, {type => INLINE_END_TAG_TOKEN, |
177 |
|
|
anchor => $1, |
178 |
|
|
line => $line, column => $column}; |
179 |
|
|
$column += $+[0] - $-[0]; |
180 |
|
|
} elsif ($$s =~ s/^\][\x09\x20]*(?>\@([0-9a-zA-Z-]*))?\[//) { |
181 |
|
|
push @nt, {type => INLINE_MIDDLE_TAG_TOKEN, |
182 |
|
|
language => $1, |
183 |
|
|
line => $line, column => $column}; |
184 |
|
|
$column += $+[0] - $-[0]; |
185 |
|
|
} elsif ($$s =~ s/\^''('?)//) { |
186 |
|
|
push @nt, {type => $1 ? STRONG_TOKEN : EMPHASIS_TOKEN, |
187 |
|
|
line => $line, column => $column}; |
188 |
|
|
$column += $+[0] - $-[0]; |
189 |
|
|
} elsif ($$s =~ s/^>>([0-9]+)//) { |
190 |
|
|
push @nt, {type => ELEMENT_TOKEN, |
191 |
wakaba |
1.2 |
local_name => 'anchor-internal', namespace => SW09_NS, |
192 |
wakaba |
1.1 |
anchor => $1, |
193 |
|
|
line => $line, column => $column}; |
194 |
|
|
$column += $+[0] - $-[0]; |
195 |
|
|
} elsif ($$s =~ s/^__&&//) { |
196 |
|
|
if ($$s =~ s/^(.+?)&&__//) { |
197 |
|
|
push @nt, {type => ELEMENT_TOKEN, |
198 |
wakaba |
1.2 |
local_name => 'replace', namespace => SW09_NS, |
199 |
wakaba |
1.1 |
by => $1, |
200 |
|
|
line => $line, column => $column}; |
201 |
|
|
$column += 4 + $+[0] - $-[0]; |
202 |
|
|
} else { |
203 |
|
|
push @nt, {type => CHARACTER_TOKEN, |
204 |
|
|
data => '__&&', |
205 |
|
|
line => $line, column => $column}; |
206 |
|
|
$column += 4; |
207 |
|
|
} |
208 |
|
|
} elsif ($$s =~ s/^([^<>\['_]+)//) { |
209 |
|
|
push @nt, {type => CHARACTER_TOKEN, data => $1, |
210 |
|
|
line => $line, column => $column}; |
211 |
|
|
$column += $+[0] - $-[0]; |
212 |
|
|
} else { |
213 |
|
|
push @nt, {type => CHARACTER_TOKEN, data => substr ($$s, 0, 1), |
214 |
|
|
line => $line, column => $column}; |
215 |
|
|
substr ($$s, 0, 1) = ''; |
216 |
|
|
$column++; |
217 |
|
|
} |
218 |
|
|
} |
219 |
|
|
}; # $tokenize_text |
220 |
|
|
|
221 |
|
|
my $get_next_token = sub { |
222 |
|
|
if (@nt) { |
223 |
|
|
return shift @nt; |
224 |
|
|
} |
225 |
|
|
|
226 |
|
|
if (not @s) { |
227 |
|
|
return {type => END_OF_FILE_TOKEN, line => $line, column => $column}; |
228 |
|
|
} |
229 |
|
|
|
230 |
|
|
my $s = shift @s; |
231 |
|
|
($line, $column) = ($line + 1, 1); |
232 |
|
|
if ($s eq '') { |
233 |
|
|
undef $continuous_line; |
234 |
|
|
return {type => EMPTY_LINE_TOKEN, line => $line, column => $column}; |
235 |
|
|
} elsif ($s =~ /^[\x09\x20]/) { |
236 |
|
|
push @nt, {type => PREFORMATTED_START_TOKEN, |
237 |
|
|
line => $line, column => $column}; |
238 |
|
|
$tokenize_text->(\$s); |
239 |
|
|
while (@s) { |
240 |
|
|
my $s = shift @s; |
241 |
|
|
($line, $column) = ($line + 1, 1); |
242 |
|
|
if ($s eq '') { |
243 |
|
|
push @nt, {type => PREFORMATTED_END_TOKEN, |
244 |
|
|
line => $line, column => $column}; |
245 |
|
|
unshift @s, $s; |
246 |
|
|
$line--; |
247 |
|
|
last; |
248 |
|
|
} elsif ($s =~ /\A\](INS|DEL)\][\x09\x20]*\z/) { |
249 |
|
|
push @nt, {type => PREFORMATTED_END_TOKEN, |
250 |
|
|
line => $line, column => $column}; |
251 |
|
|
push @nt, {type => BLOCK_END_TAG_TOKEN, tag_name => $1, |
252 |
|
|
line => $line, column => $column}; |
253 |
|
|
last; |
254 |
|
|
} else { |
255 |
|
|
push @nt, {type => CHARACTER_TOKEN, data => "\x0A", |
256 |
|
|
line => $line, column => $column}; |
257 |
|
|
$tokenize_text->(\$s); |
258 |
|
|
} |
259 |
|
|
} |
260 |
|
|
return shift @nt; |
261 |
wakaba |
1.3 |
} elsif ($s =~ s/^(\*+)[\x09\x20]*//) { |
262 |
wakaba |
1.1 |
push @nt, {type => HEADING_START_TOKEN, depth => length $1, |
263 |
|
|
line => $line, column => $column}; |
264 |
|
|
$column += $+[0] - $-[0]; |
265 |
|
|
$tokenize_text->(\$s); |
266 |
|
|
push @nt, {type => HEADING_END_TOKEN, |
267 |
|
|
line => $line, column => $column}; |
268 |
|
|
undef $continuous_line; |
269 |
|
|
return shift @nt; |
270 |
wakaba |
1.3 |
} elsif ($s =~ s/^([-=]+)[\x09\x20]*//) { |
271 |
wakaba |
1.1 |
push @nt, {type => LIST_START_TOKEN, depth => $1, |
272 |
|
|
line => $line, column => $column}; |
273 |
|
|
$column += $+[0] - $-[0]; |
274 |
|
|
$tokenize_text->(\$s); |
275 |
|
|
$continuous_line = 1; |
276 |
|
|
return shift @nt; |
277 |
|
|
} elsif ($s =~ s/^:([^:]*)//) { |
278 |
|
|
my $name = $1; |
279 |
|
|
if ($s eq '') { |
280 |
|
|
push @nt, {type => CHARACTER_TOKEN, data => ':', |
281 |
|
|
line => $line, column => $column}; |
282 |
|
|
$column++; |
283 |
|
|
$tokenize_text->(\$name); |
284 |
|
|
} else { |
285 |
|
|
my $real_column = $column + 1 + length $name; |
286 |
|
|
push @nt, {type => LABELED_LIST_START_TOKEN, |
287 |
|
|
line => $line, column => $column}; |
288 |
|
|
$name =~ s/\A[\x09\x20]*//; |
289 |
|
|
$column += 1 + $+[0] - $-[0]; |
290 |
|
|
$name =~ s/[\x09\x20]+\z//; |
291 |
|
|
$tokenize_text->(\$s); |
292 |
|
|
$column = $real_column; |
293 |
|
|
push @nt, {type => LABELED_LIST_MIDDLE_TOKEN, |
294 |
|
|
line => $line, column => $column}; |
295 |
wakaba |
1.2 |
$column += $+[0] - $-[0] if $s =~ s/^:[\x09\x20]*//; |
296 |
wakaba |
1.1 |
$tokenize_text->(\$s); |
297 |
|
|
} |
298 |
|
|
$continuous_line = 1; |
299 |
|
|
return shift @nt; |
300 |
|
|
} elsif ($s =~ s/^(>+)//) { |
301 |
|
|
my $depth = length $1; |
302 |
|
|
if ($depth == 2 and $s =~ /^[0-9]/) { |
303 |
|
|
push @nt, {type => CHARACTER_TOKEN, data => "\x0A", |
304 |
|
|
line => $line, column => $column} |
305 |
|
|
if $continuous_line; |
306 |
|
|
$s = '>>' . $s; |
307 |
|
|
$tokenize_text->(\$s); |
308 |
|
|
} else { |
309 |
|
|
push @nt, {type => QUOTATION_START_TOKEN, depth => $depth, |
310 |
|
|
line => $line, column => $column}; |
311 |
|
|
$column += $depth; |
312 |
|
|
$column += $+[0] - $-[0] if $s =~ s/^[\x09\x20]+//; |
313 |
|
|
if ($s =~ s/^\@\@[\x09\x20]*//) { |
314 |
|
|
push @nt, {type => EDITORIAL_NOTE_START_TOKEN, |
315 |
|
|
line => $line, column => $column}; |
316 |
|
|
$column += $+[0] - $-[0]; |
317 |
|
|
} elsif ($s =~ s/^;;[\x09\x20]*//) { |
318 |
|
|
push @nt, {type => COMMENT_PARAGRAPH_START_TOKEN, |
319 |
|
|
line => $line, column => $column}; |
320 |
|
|
$column += $+[0] - $-[0]; |
321 |
|
|
} |
322 |
|
|
$tokenize_text->(\$s); |
323 |
|
|
} |
324 |
|
|
$continuous_line = 1; |
325 |
|
|
return shift @nt; |
326 |
|
|
} elsif ($s =~ /\A\[(INS|DEL)(?>\(([^()\\]*)\))?\[[\x09\x20]*\z/) { |
327 |
|
|
undef $continuous_line; |
328 |
|
|
return {type => BLOCK_START_TAG_TOKEN, tag_name => $1, |
329 |
|
|
classes => $2, |
330 |
|
|
line => $line, column => $column}; |
331 |
|
|
} elsif ($s =~ /\A\[PRE(?>\(([^()\\]*)\))?\[[\x09\x20]*\z/) { |
332 |
|
|
undef $continuous_line; |
333 |
|
|
push @nt, {type => BLOCK_START_TAG_TOKEN, tag_name => 'PRE', |
334 |
|
|
classes => $1, |
335 |
|
|
line => $line, column => $column}; |
336 |
|
|
while (@s) { |
337 |
|
|
my $s = shift @s; |
338 |
|
|
($line, $column) = ($line + 1, 1); |
339 |
|
|
if ($s =~ /\A\]PRE\][\x09\x20]*\z/) { |
340 |
|
|
push @nt, {type => BLOCK_END_TAG_TOKEN, tag_name => 'PRE', |
341 |
|
|
line => $line, column => $column}; |
342 |
|
|
undef $continuous_line; |
343 |
wakaba |
1.2 |
last; |
344 |
wakaba |
1.1 |
} else { |
345 |
|
|
push @nt, {type => CHARACTER_TOKEN, data => "\x0A", |
346 |
|
|
line => $line, column => $column} |
347 |
|
|
if $continuous_line; |
348 |
|
|
$tokenize_text->(\$s); |
349 |
|
|
$continuous_line = 1; |
350 |
|
|
} |
351 |
|
|
} |
352 |
|
|
return shift @nt; |
353 |
|
|
} elsif ($s =~ s/^\@\@[\x09\x20]*//) { |
354 |
|
|
push @nt, {type => EDITORIAL_NOTE_START_TOKEN, |
355 |
|
|
line => $line, column => $column}; |
356 |
|
|
$column += $+[0] - $-[0]; |
357 |
|
|
$tokenize_text->(\$s); |
358 |
|
|
$continuous_line = 1; |
359 |
|
|
return shift @nt; |
360 |
|
|
} elsif ($s =~ s/^;;[\x09\x20]*//) { |
361 |
|
|
push @nt, {type => COMMENT_PARAGRAPH_START_TOKEN, |
362 |
|
|
line => $line, column => $column}; |
363 |
|
|
$column += $+[0] - $-[0]; |
364 |
|
|
$tokenize_text->(\$s); |
365 |
|
|
undef $continuous_line; |
366 |
|
|
return shift @nt; |
367 |
|
|
} elsif ($s =~ /\A\](INS|DEL)\][\x09\x20]*\z/) { |
368 |
|
|
$continuous_line = 1; |
369 |
|
|
return {type => BLOCK_END_TAG_TOKEN, tag_name => $1, |
370 |
|
|
line => $line, column => $column}; |
371 |
|
|
} elsif ($s =~ /^,/) { |
372 |
|
|
push @nt, {type => TABLE_ROW_START_TOKEN, |
373 |
|
|
line => $line, column => $column}; |
374 |
|
|
while ($s =~ s/^,[\x09\x20]*//) { |
375 |
|
|
$column += $+[0] - $-[0]; |
376 |
|
|
my $cell; |
377 |
|
|
my $cell_quoted; |
378 |
|
|
my $column_quoted = $column; |
379 |
|
|
my $column_cell = $column; |
380 |
|
|
if ($s =~ s/^"//) { |
381 |
|
|
$s =~ s/^((?>[^"\\]|\\.)*)//; |
382 |
|
|
$cell_quoted = $1; |
383 |
|
|
$column += 1 + length $cell_quoted; |
384 |
|
|
$cell_quoted =~ tr/\\//d; |
385 |
|
|
$column++ if $s =~ s/\A\\\z//; |
386 |
|
|
$column++ if $s =~ s/^"//; |
387 |
|
|
} |
388 |
|
|
if ($s =~ s/^([^,]+)//) { |
389 |
|
|
$cell = $1; |
390 |
|
|
$column += length $cell; |
391 |
|
|
$cell =~ s/[\x09\x20]+\z//; |
392 |
|
|
} |
393 |
|
|
if (not defined $cell_quoted and defined $cell and |
394 |
|
|
$cell eq '==') { |
395 |
|
|
push @nt, {type => TABLE_COLSPAN_CELL_TOKEN, |
396 |
|
|
line => $line, column => $column_cell}; |
397 |
|
|
} else { |
398 |
|
|
push @nt, {type => TABLE_CELL_START_TOKEN, |
399 |
|
|
line => $line, |
400 |
|
|
column => defined $column_quoted ? $column_quoted: $column_cell}; |
401 |
|
|
my $real_column = $column; |
402 |
|
|
$column = $column_quoted + 1; |
403 |
|
|
$tokenize_text->(\$cell_quoted) if defined $cell_quoted; |
404 |
|
|
## NOTE: When a quoted-pair is used, column numbers |
405 |
|
|
## reported in this $tokenize_text call might be wrong. |
406 |
|
|
$column = $column_cell; |
407 |
|
|
$tokenize_text->(\$cell) if defined $cell; |
408 |
|
|
$column = $column_quoted; |
409 |
|
|
push @nt, {type => TABLE_CELL_END_TOKEN, |
410 |
|
|
line => $line, |
411 |
|
|
column => $column}; |
412 |
|
|
} |
413 |
|
|
} |
414 |
|
|
push @nt, {type => TABLE_ROW_END_TOKEN, |
415 |
|
|
line => $line, column => $column}; |
416 |
|
|
undef $continuous_line; |
417 |
|
|
return shift @nt; |
418 |
|
|
} elsif ($s eq '__IMAGE__') { |
419 |
wakaba |
1.4 |
my $image = $doc->create_element_ns (SW09_NS, [undef, 'image']); |
420 |
wakaba |
1.2 |
$image->set_user_data (manakai_source_line => $line); |
421 |
|
|
$image->set_user_data (manakai_source_column => 1); |
422 |
wakaba |
1.1 |
$image->text_content (join "\x0A", $s, @s); |
423 |
|
|
($line, $column) = ($line + @s, 1); |
424 |
|
|
@s = (); |
425 |
|
|
$doc->document_element->append_child ($image); |
426 |
|
|
return {type => END_OF_FILE_TOKEN, |
427 |
|
|
line => $line, column => $column}; |
428 |
|
|
} else { |
429 |
|
|
push @nt, {type => CHARACTER_TOKEN, data => "\x0A", |
430 |
|
|
line => $line, column => $column} if $continuous_line; |
431 |
|
|
$tokenize_text->(\$s); |
432 |
|
|
$continuous_line = 1; |
433 |
|
|
return shift @nt; |
434 |
|
|
} |
435 |
wakaba |
1.3 |
}; # $get_next_token |
436 |
wakaba |
1.1 |
|
437 |
|
|
## NOTE: The "initial" mode. |
438 |
|
|
if (@s and $s[0] =~ /^#\?/) { |
439 |
|
|
## NOTE: "Parse a magic line". |
440 |
|
|
|
441 |
|
|
my $s = shift @s; |
442 |
wakaba |
1.2 |
if ($s =~ s/^([^\x09\x20]+)//) { |
443 |
|
|
$column += $+[0] - $-[0]; |
444 |
|
|
my ($name, $version) = split m#/#, $1, 2; |
445 |
|
|
my $el = $doc->document_element; |
446 |
wakaba |
1.4 |
$el->set_attribute_ns (SW09_NS, ['sw', 'Name'] => $name); |
447 |
|
|
$el->set_attribute_ns (SW09_NS, ['sw', 'Version'] => $version) |
448 |
wakaba |
1.2 |
if defined $version; |
449 |
|
|
} |
450 |
wakaba |
1.1 |
|
451 |
wakaba |
1.2 |
while (length $s) { |
452 |
|
|
$column += $+[0] - $-[0] if $s =~ s/^[\x09\x20]+//; |
453 |
|
|
my $name = ''; |
454 |
|
|
if ($s =~ s/^([^=]*)=//) { |
455 |
|
|
$name = $1; |
456 |
|
|
$column += length $name + 1; |
457 |
|
|
} |
458 |
wakaba |
1.4 |
my $param = $doc->create_element_ns (SW09_NS, [undef, 'parameter']); |
459 |
|
|
$param->set_attribute_ns (undef, [undef, 'name'] => $name); |
460 |
wakaba |
1.2 |
$param->set_user_data (manakai_source_line => $line); |
461 |
|
|
$param->set_user_data (manakai_source_column => $column); |
462 |
wakaba |
1.4 |
$head_el->append_child ($param); |
463 |
wakaba |
1.2 |
|
464 |
|
|
$column++ if $s =~ s/^"//; |
465 |
|
|
if ($s =~ s/^([^"]+)//) { |
466 |
|
|
my $values = $1; |
467 |
|
|
$column += length $values; |
468 |
|
|
$values =~ tr/\\//d; |
469 |
|
|
for (split /,/, $values, -1) { |
470 |
wakaba |
1.4 |
my $value = $doc->create_element_ns (SW09_NS, [undef, 'value']); |
471 |
wakaba |
1.2 |
$value->text_content ($_); |
472 |
|
|
$value->set_user_data (manakai_source_line => $line); |
473 |
|
|
$value->set_user_data (manakai_source_column => $column); |
474 |
|
|
$param->append_child ($value); |
475 |
|
|
} |
476 |
|
|
} |
477 |
|
|
$column++ if $s =~ s/^"//; |
478 |
|
|
} |
479 |
wakaba |
1.1 |
|
480 |
|
|
$line = 2; |
481 |
wakaba |
1.2 |
$column = 1; |
482 |
wakaba |
1.1 |
} |
483 |
|
|
|
484 |
|
|
## NOTE: Switched to the "body" mode. |
485 |
wakaba |
1.2 |
|
486 |
wakaba |
1.4 |
my $oe = [{node => $body_el, |
487 |
wakaba |
1.2 |
section_depth => 0, |
488 |
|
|
quotation_depth => 0, |
489 |
|
|
list_depth => 0}]; |
490 |
|
|
my $structural_elements = { |
491 |
|
|
body => 1, section => 1, insert => 1, delete => 1, blockquote => 1, |
492 |
|
|
h1 => 1, ul => 1, ol => 1, dl => 1, li => 1, dt => 1, dd => 1, |
493 |
|
|
table => 1, tbody => 1, tr => 1, td => 1, p => 1, 'comment-p' => 1, |
494 |
|
|
ed => 1, pre => 1, |
495 |
|
|
}; |
496 |
|
|
|
497 |
|
|
my $im = IN_SECTION_IM; |
498 |
wakaba |
1.3 |
$token = $get_next_token->(); |
499 |
wakaba |
1.1 |
|
500 |
wakaba |
1.2 |
A: { |
501 |
|
|
if ($im == IN_PARAGRAPH_IM) { |
502 |
|
|
if ($token->{type} == CHARACTER_TOKEN) { |
503 |
|
|
$oe->[-1]->{node}->manakai_append_text ($token->{data}); |
504 |
wakaba |
1.3 |
$token = $get_next_token->(); |
505 |
wakaba |
1.2 |
redo A; |
506 |
|
|
} elsif ($token->{type} == INLINE_START_TAG_TOKEN) { |
507 |
|
|
if (not defined $token->{tag_name}) { |
508 |
wakaba |
1.4 |
my $el = $doc->create_element_ns (SW09_NS, [undef, 'anchor']); |
509 |
wakaba |
1.2 |
$oe->[-1]->{node}->append_child ($el); |
510 |
|
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
511 |
|
|
$el->set_user_data (manakai_source_line => $token->{line}); |
512 |
|
|
$el->set_user_data (manakai_source_column => $token->{column}); |
513 |
|
|
|
514 |
wakaba |
1.3 |
$token = $get_next_token->(); |
515 |
wakaba |
1.2 |
redo A; |
516 |
|
|
} else { |
517 |
|
|
my $type = { |
518 |
|
|
AA => [AA_NS, 'aa'], |
519 |
|
|
ABBR => [HTML_NS, 'abbr'], |
520 |
|
|
CITE => [HTML_NS, 'cite'], |
521 |
|
|
CODE => [HTML_NS, 'code'], |
522 |
|
|
CSECTION => [SW10_NS, 'csection'], |
523 |
|
|
DEL => [HTML_NS, 'del'], |
524 |
|
|
DFN => [HTML_NS, 'dfn'], |
525 |
|
|
INS => [HTML_NS, 'ins'], |
526 |
|
|
KBD => [HTML_NS, 'kbd'], |
527 |
|
|
KEY => [SW10_NS, 'key'], |
528 |
|
|
Q => [HTML_NS, 'q'], |
529 |
|
|
QN => [SW10_NS, 'qn'], |
530 |
|
|
RUBY => [HTML_NS, 'ruby'], |
531 |
|
|
RUBYB => [HTML_NS, 'rubyb'], |
532 |
|
|
SAMP => [HTML_NS, 'samp'], |
533 |
|
|
SPAN => [HTML_NS, 'span'], |
534 |
|
|
SRC => [SW10_NS, 'src'], |
535 |
|
|
SUB => [HTML_NS, 'sub'], |
536 |
|
|
SUP => [HTML_NS, 'sup'], |
537 |
|
|
TIME => [HTML_NS, 'time'], |
538 |
|
|
VAR => [HTML_NS, 'var'], |
539 |
|
|
WEAK => [SW09_NS, 'weak'], |
540 |
|
|
}->{$token->{tag_name}} || [SW10_NS, $token->{tag_name}]; |
541 |
wakaba |
1.4 |
my $el = $doc->create_element_ns (SW10_NS, [undef, 'td']); |
542 |
wakaba |
1.2 |
$oe->[-1]->{node}->append_child ($el); |
543 |
|
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
544 |
|
|
$el->set_user_data (manakai_source_line => $token->{line}); |
545 |
|
|
$el->set_user_data (manakai_source_column => $token->{column}); |
546 |
|
|
|
547 |
wakaba |
1.4 |
$el->set_attribute_ns (undef, [undef, 'class'] => $token->{classes}) |
548 |
wakaba |
1.2 |
if defined $token->{classes}; |
549 |
wakaba |
1.4 |
$el->set_attribute_ns (XML_NS, ['xml', 'lang'] => $token->{language}) |
550 |
wakaba |
1.2 |
if defined $token->{language}; |
551 |
|
|
|
552 |
wakaba |
1.3 |
$token = $get_next_token->(); |
553 |
wakaba |
1.2 |
redo A; |
554 |
|
|
} |
555 |
|
|
} elsif ($token->{type} == INLINE_MIDDLE_TAG_TOKEN) { |
556 |
|
|
my ($ns, $ln, $pop) = @{{ |
557 |
wakaba |
1.3 |
rt => [HTML_NS, 'rt', 1], |
558 |
wakaba |
1.2 |
title => [SW10_NS, 'attrvalue', 1], |
559 |
|
|
nsuri => [SW10_NS, 'attrvalue', 1], |
560 |
|
|
qn => [SW10_NS, 'nsuri'], |
561 |
|
|
ruby => [HTML_NS, 'rt'], |
562 |
|
|
rubyb => [HTML_NS, 'rt'], |
563 |
|
|
}->{$oe->[-1]->{node}->manakai_local_name} || [SW10_NS, 'title']}; |
564 |
|
|
pop @$oe if $pop; |
565 |
|
|
|
566 |
wakaba |
1.4 |
my $el = $doc->create_element_ns ($ns, [undef, $ln]); |
567 |
wakaba |
1.2 |
$oe->[-1]->{node}->append_child ($el); |
568 |
|
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
569 |
|
|
$el->set_user_data (manakai_source_line => $token->{line}); |
570 |
|
|
$el->set_user_data (manakai_source_column => $token->{column}); |
571 |
|
|
|
572 |
wakaba |
1.4 |
$el->set_attribute_ns (XML_NS, ['xml', 'lang'] => $token->{language}) |
573 |
wakaba |
1.2 |
if defined $token->{language}; |
574 |
|
|
|
575 |
wakaba |
1.3 |
$token = $get_next_token->(); |
576 |
wakaba |
1.2 |
redo A; |
577 |
|
|
} elsif ($token->{type} == INLINE_END_TAG_TOKEN) { |
578 |
|
|
pop @$oe if { |
579 |
|
|
rt => 1, title => 1, nsuri => 1, attrvalue => 1, |
580 |
|
|
}->{$oe->[-1]->{node}->manakai_local_name}; |
581 |
|
|
|
582 |
|
|
if ({%$structural_elements, |
583 |
|
|
strong => 1, em => 1}->{$oe->[-1]->{node}->manakai_local_name}) { |
584 |
|
|
my $el = $doc->create_element_ns |
585 |
|
|
(SW09_NS, |
586 |
wakaba |
1.4 |
[undef, defined $token->{res_scheme} |
587 |
|
|
? 'anchor-external' : 'anchor-internal']); |
588 |
wakaba |
1.2 |
$oe->[-1]->{node}->append_child ($el); |
589 |
|
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
590 |
|
|
$el->set_user_data (manakai_source_line => $token->{line}); |
591 |
|
|
$el->set_user_data (manakai_source_column => $token->{column}); |
592 |
|
|
$el->text_content (']]'); |
593 |
|
|
} |
594 |
|
|
|
595 |
wakaba |
1.4 |
$oe->[-1]->{node}->set_attribute_ns (SW09_NS, ['sw', 'anchor'], |
596 |
wakaba |
1.2 |
$token->{anchor}) |
597 |
|
|
if defined $token->{anchor}; |
598 |
wakaba |
1.4 |
$oe->[-1]->{node}->set_attribute_ns (SW09_NS, ['sw', 'resScheme'], |
599 |
wakaba |
1.2 |
$token->{res_scheme}) |
600 |
|
|
if defined $token->{res_scheme}; |
601 |
wakaba |
1.4 |
$oe->[-1]->{node}->set_attribute_ns (SW09_NS, ['sw', 'resParameter'], |
602 |
wakaba |
1.2 |
$token->{res_parameter}) |
603 |
|
|
if defined $token->{res_parameter}; |
604 |
|
|
|
605 |
|
|
pop @$oe; |
606 |
|
|
|
607 |
wakaba |
1.3 |
$token = $get_next_token->(); |
608 |
wakaba |
1.2 |
redo A; |
609 |
|
|
} elsif ($token->{type} == STRONG_TOKEN) { |
610 |
wakaba |
1.4 |
my $el = $doc->create_element_ns (HTML_NS, [undef, 'strong']); |
611 |
wakaba |
1.2 |
$oe->[-1]->{node}->append_child ($el); |
612 |
|
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
613 |
|
|
$el->set_user_data (manakai_source_line => $token->{line}); |
614 |
|
|
$el->set_user_data (manakai_source_column => $token->{column}); |
615 |
|
|
|
616 |
wakaba |
1.3 |
$token = $get_next_token->(); |
617 |
wakaba |
1.2 |
redo A; |
618 |
|
|
} elsif ($token->{type} == EMPHASIS_TOKEN) { |
619 |
wakaba |
1.4 |
my $el = $doc->create_element_ns (HTML_NS, [undef, 'em']); |
620 |
wakaba |
1.2 |
$oe->[-1]->{node}->append_child ($el); |
621 |
|
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
622 |
|
|
$el->set_user_data (manakai_source_line => $token->{line}); |
623 |
|
|
$el->set_user_data (manakai_source_column => $token->{column}); |
624 |
|
|
|
625 |
wakaba |
1.3 |
$token = $get_next_token->(); |
626 |
wakaba |
1.2 |
redo A; |
627 |
|
|
} elsif ($token->{type} == FORM_TOKEN) { |
628 |
|
|
## There is an exact code clone. |
629 |
|
|
if ($token->{name} eq 'form') { |
630 |
wakaba |
1.4 |
my $el = $doc->create_element_ns (SW09_NS, [undef, 'form']); |
631 |
wakaba |
1.2 |
$oe->[-1]->{node}->append_child ($el); |
632 |
|
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
633 |
|
|
$el->set_user_data (manakai_source_line => $token->{line}); |
634 |
|
|
$el->set_user_data (manakai_source_column => $token->{column}); |
635 |
|
|
|
636 |
wakaba |
1.4 |
$el->set_attribute_ns (undef, [undef, 'id'] |
637 |
|
|
=> $token->{id}) if defined $token->{id}; |
638 |
|
|
$el->set_attribute_ns (undef, [undef, 'input'] |
639 |
|
|
=> shift @{$token->{parameters}}) |
640 |
wakaba |
1.2 |
if @{$token->{parameter}}; |
641 |
wakaba |
1.4 |
$el->set_attribute_ns (undef, [undef, 'template'] |
642 |
|
|
=> shift @{$token->{parameters}}) |
643 |
wakaba |
1.2 |
if @{$token->{parameter}}; |
644 |
wakaba |
1.4 |
$el->set_attribute_ns (undef, [undef, 'option'] |
645 |
|
|
=> shift @{$token->{parameters}}) |
646 |
wakaba |
1.2 |
if @{$token->{parameter}}; |
647 |
wakaba |
1.4 |
$el->set_attribute_ns (undef, [undef, 'parameter'] |
648 |
|
|
=> join ':', @{$token->{parameters}}) |
649 |
wakaba |
1.2 |
if @{$token->{parameter}}; |
650 |
|
|
|
651 |
wakaba |
1.3 |
$token = $get_next_token->(); |
652 |
wakaba |
1.2 |
redo A; |
653 |
|
|
} else { |
654 |
wakaba |
1.4 |
my $el = $doc->create_element_ns (SW09_NS, [undef, 'form']); |
655 |
wakaba |
1.2 |
$oe->[-1]->{node}->append_child ($el); |
656 |
|
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
657 |
|
|
$el->set_user_data (manakai_source_line => $token->{line}); |
658 |
|
|
$el->set_user_data (manakai_source_column => $token->{column}); |
659 |
|
|
|
660 |
wakaba |
1.4 |
$el->set_attribute_ns (undef, [undef, 'ref'] |
661 |
|
|
=> $token->{name}); |
662 |
|
|
$el->set_attribute_ns (undef, [undef, 'id'] |
663 |
|
|
=> $token->{id}) if defined $token->{id}; |
664 |
|
|
$el->set_attribute_ns (undef, [undef, 'parameter'] |
665 |
|
|
=> join ':', @{$token->{parameters}}) |
666 |
wakaba |
1.2 |
if @{$token->{parameter}}; |
667 |
|
|
|
668 |
wakaba |
1.3 |
$token = $get_next_token->(); |
669 |
wakaba |
1.2 |
redo A; |
670 |
|
|
} |
671 |
|
|
} elsif ($token->{type} == ELEMENT_TOKEN) { |
672 |
|
|
## NOTE: There is an exact code clone. |
673 |
|
|
my $el = $doc->create_element_ns |
674 |
wakaba |
1.4 |
($token->{namespace}, [undef, $token->{local_name}]); |
675 |
wakaba |
1.2 |
$oe->[-1]->{node}->append_child ($el); |
676 |
|
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
677 |
|
|
$el->set_user_data (manakai_source_line => $token->{line}); |
678 |
|
|
$el->set_user_data (manakai_source_column => $token->{column}); |
679 |
|
|
|
680 |
wakaba |
1.4 |
$el->set_attribute_ns (SW09_NS, ['sw', 'anchor'], $token->{anchor}) |
681 |
wakaba |
1.2 |
if defined $token->{anchor}; |
682 |
wakaba |
1.4 |
$el->set_attribute_ns (undef, [undef, 'by'] |
683 |
|
|
=> $token->{by}) if defined $token->{by}; |
684 |
|
|
$el->set_attribute_ns (SW09_NS, ['sw', 'resScheme'], |
685 |
|
|
$token->{res_scheme}) |
686 |
wakaba |
1.2 |
if defined $token->{res_scheme}; |
687 |
wakaba |
1.4 |
$el->set_attribute_ns (SW09_NS, ['sw', 'resParameter'], |
688 |
wakaba |
1.2 |
$token->{res_parameter}) |
689 |
|
|
if defined $token->{res_parameter}; |
690 |
|
|
$el->text_content ($token->{content}) if defined $token->{content}; |
691 |
|
|
|
692 |
wakaba |
1.3 |
$token = $get_next_token->(); |
693 |
wakaba |
1.2 |
redo A; |
694 |
|
|
} elsif ($token->{type} == LABELED_LIST_MIDDLE_TOKEN) { |
695 |
|
|
pop @$oe while not $structural_elements |
696 |
|
|
->{$oe->[-1]->{node}->manakai_local_name}; |
697 |
|
|
pop @$oe if $oe->[-1]->{node}->manakai_local_name eq 'dt'; |
698 |
|
|
|
699 |
wakaba |
1.4 |
my $el = $doc->create_element_ns (HTML_NS, [undef, 'dt']); |
700 |
wakaba |
1.2 |
$oe->[-1]->{node}->append_child ($el); |
701 |
|
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
702 |
|
|
$el->set_user_data (manakai_source_line => $token->{line}); |
703 |
|
|
$el->set_user_data (manakai_source_column => $token->{column}); |
704 |
|
|
|
705 |
wakaba |
1.3 |
$token = $get_next_token->(); |
706 |
wakaba |
1.2 |
redo A; |
707 |
|
|
} elsif ($token->{type} == HEADING_END_TOKEN) { |
708 |
|
|
pop @$oe while not $structural_elements |
709 |
|
|
->{$oe->[-1]->{node}->manakai_local_name}; |
710 |
|
|
pop @$oe if $oe->[-1]->{node}->manakai_local_name eq 'h1'; |
711 |
|
|
|
712 |
|
|
$im = IN_SECTION_IM; |
713 |
wakaba |
1.3 |
$token = $get_next_token->(); |
714 |
wakaba |
1.2 |
redo A; |
715 |
|
|
} elsif ($token->{type} == TABLE_CELL_END_TOKEN) { |
716 |
|
|
pop @$oe while not $structural_elements |
717 |
|
|
->{$oe->[-1]->{node}->manakai_local_name}; |
718 |
|
|
pop @$oe if $oe->[-1]->{node}->manakai_local_name eq 'td'; |
719 |
|
|
|
720 |
|
|
$im = IN_TABLE_ROW_IM; |
721 |
wakaba |
1.3 |
$token = $get_next_token->(); |
722 |
wakaba |
1.2 |
redo A; |
723 |
|
|
} elsif (($token->{type} == BLOCK_END_TAG_TOKEN and |
724 |
|
|
$token->{tag_name} eq 'PRE') or |
725 |
|
|
$token->{type} == PREFORMATTED_END_TOKEN) { |
726 |
|
|
pop @$oe while not $structural_elements |
727 |
|
|
->{$oe->[-1]->{node}->manakai_local_name}; |
728 |
|
|
pop @$oe if $oe->[-1]->{node}->manakai_local_name eq 'pre'; |
729 |
|
|
|
730 |
|
|
$im = IN_SECTION_IM; |
731 |
wakaba |
1.3 |
$token = $get_next_token->(); |
732 |
wakaba |
1.2 |
redo A; |
733 |
|
|
} else { |
734 |
|
|
pop @$oe while not $structural_elements |
735 |
|
|
->{$oe->[-1]->{node}->manakai_local_name}; |
736 |
|
|
|
737 |
|
|
$im = IN_SECTION_IM; |
738 |
wakaba |
1.3 |
## Reconsume. |
739 |
wakaba |
1.2 |
redo A; |
740 |
|
|
} |
741 |
|
|
} elsif ($im == IN_SECTION_IM) { |
742 |
|
|
if ($token->{type} == HEADING_START_TOKEN) { |
743 |
|
|
B: { |
744 |
|
|
pop @$oe and redo B |
745 |
wakaba |
1.3 |
if not {body => 1, section => 1, insert => 1, delete => 1} |
746 |
wakaba |
1.2 |
->{$oe->[-1]->{node}->manakai_local_name} or |
747 |
|
|
$token->{depth} <= $oe->[-1]->{section_depth}; |
748 |
|
|
if ($token->{depth} > $oe->[-1]->{section_depth} + 1) { |
749 |
wakaba |
1.4 |
my $el = $doc->create_element_ns (HTML_NS, [undef, 'section']); |
750 |
wakaba |
1.2 |
$oe->[-1]->{node}->append_child ($el); |
751 |
|
|
push @$oe, {node => $el, |
752 |
|
|
section_depth => $oe->[-1]->{section_depth} + 1, |
753 |
|
|
quotation_depth => 0, list_depth => 0}; |
754 |
|
|
redo B; |
755 |
|
|
} |
756 |
|
|
} # B |
757 |
|
|
|
758 |
wakaba |
1.4 |
my $el = $doc->create_element_ns (HTML_NS, [undef, 'section']); |
759 |
wakaba |
1.2 |
$oe->[-1]->{node}->append_child ($el); |
760 |
|
|
push @$oe, {node => $el, |
761 |
|
|
section_depth => $oe->[-1]->{section_depth} + 1, |
762 |
|
|
quotation_depth => 0, list_depth => 0}; |
763 |
|
|
|
764 |
wakaba |
1.4 |
my $el2 = $doc->create_element_ns (HTML_NS, [undef, 'h1']); |
765 |
wakaba |
1.2 |
$oe->[-1]->{node}->append_child ($el2); |
766 |
|
|
push @$oe, {%{$oe->[-1]}, node => $el2}; |
767 |
|
|
|
768 |
|
|
$im = IN_PARAGRAPH_IM; |
769 |
wakaba |
1.3 |
$token = $get_next_token->(); |
770 |
wakaba |
1.2 |
redo A; |
771 |
|
|
} elsif ($token->{type} == BLOCK_START_TAG_TOKEN and |
772 |
|
|
($token->{tag_name} eq 'INS' or |
773 |
|
|
$token->{tag_name} eq 'DEL')) { |
774 |
|
|
my $el = $doc->create_element_ns |
775 |
wakaba |
1.4 |
(SW09_NS, |
776 |
|
|
[undef, $token->{tag_name} eq 'INS' ? 'insert' : 'delete']); |
777 |
wakaba |
1.2 |
$oe->[-1]->{node}->append_child ($el); |
778 |
|
|
push @$oe, {node => $el, section_depth => 0, |
779 |
|
|
quotation_depth => 0, list_depth => 0}; |
780 |
wakaba |
1.4 |
$el->set_attribute_ns (undef, [undef, 'class'] => $token->{classes}) |
781 |
wakaba |
1.2 |
if defined $token->{classes}; |
782 |
wakaba |
1.3 |
$token = $get_next_token->(); |
783 |
wakaba |
1.2 |
redo A; |
784 |
|
|
} elsif ($token->{type} == QUOTATION_START_TOKEN) { |
785 |
|
|
B: { |
786 |
|
|
pop @$oe and redo B |
787 |
wakaba |
1.3 |
if not {body => 1, section => 1, insert => 1, delete => 1, |
788 |
wakaba |
1.2 |
blockquote => 1} |
789 |
|
|
->{$oe->[-1]->{node}->manakai_local_name} or |
790 |
|
|
$token->{depth} <= $oe->[-1]->{quotation_depth}; |
791 |
|
|
if ($token->{depth} > $oe->[-1]->{quotation_depth} + 1) { |
792 |
wakaba |
1.4 |
my $el = $doc->create_element_ns (HTML_NS, [undef, 'blockquote']); |
793 |
wakaba |
1.2 |
$oe->[-1]->{node}->append_child ($el); |
794 |
|
|
push @$oe, {node => $el, section_depth => 0, |
795 |
|
|
quotation_depth => $oe->[-1]->{quotation_depth} + 1, |
796 |
|
|
list_depth => 0}; |
797 |
|
|
redo B; |
798 |
|
|
} |
799 |
|
|
} # B |
800 |
|
|
|
801 |
wakaba |
1.4 |
my $el = $doc->create_element_ns (HTML_NS, [undef, 'blockquote']); |
802 |
wakaba |
1.2 |
$oe->[-1]->{node}->append_child ($el); |
803 |
|
|
push @$oe, {node => $el, section_depth => 0, |
804 |
|
|
quotation_depth => $oe->[-1]->{quotation_depth} + 1, |
805 |
|
|
list_depth => 0}; |
806 |
|
|
|
807 |
wakaba |
1.3 |
$token = $get_next_token->(); |
808 |
wakaba |
1.2 |
redo A; |
809 |
|
|
} elsif ($token->{type} == LIST_START_TOKEN) { |
810 |
|
|
my $depth = length $token->{depth}; |
811 |
wakaba |
1.3 |
my $list_type = substr ($token->{depth}, -1, 1) eq '-' ? 'ul' : 'ol'; |
812 |
wakaba |
1.2 |
B: { |
813 |
|
|
pop @$oe and redo B if $oe->[-1]->{list_depth} > $depth; |
814 |
wakaba |
1.3 |
pop @$oe and redo B if $oe->[-1]->{list_depth} == $depth and |
815 |
|
|
$list_type ne $oe->[-1]->{node}->manakai_local_name; |
816 |
wakaba |
1.2 |
if ($oe->[-1]->{list_depth} < $depth) { |
817 |
wakaba |
1.3 |
my $type = substr $token->{depth}, $oe->[-1]->{list_depth}, 1; |
818 |
wakaba |
1.2 |
my $el = $doc->create_element_ns |
819 |
wakaba |
1.4 |
(HTML_NS, [undef, $type eq '-' ? 'ul' : 'ol']); |
820 |
wakaba |
1.2 |
$oe->[-1]->{node}->append_child ($el); |
821 |
|
|
push @$oe, {%{$oe->[-1]}, node => $el, |
822 |
|
|
list_depth => $oe->[-1]->{list_depth} + 1}; |
823 |
|
|
if ($oe->[-1]->{list_depth} < $depth) { |
824 |
wakaba |
1.4 |
my $el = $doc->create_element_ns (HTML_NS, [undef, 'li']); |
825 |
wakaba |
1.2 |
$oe->[-1]->{node}->append_child ($el); |
826 |
|
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
827 |
|
|
} |
828 |
|
|
redo B; |
829 |
|
|
} |
830 |
|
|
} # B |
831 |
|
|
|
832 |
wakaba |
1.4 |
my $el = $doc->create_element_ns (HTML_NS, [undef, 'li']); |
833 |
wakaba |
1.2 |
$oe->[-1]->{node}->append_child ($el); |
834 |
|
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
835 |
|
|
|
836 |
|
|
$im = IN_PARAGRAPH_IM; |
837 |
wakaba |
1.3 |
$token = $get_next_token->(); |
838 |
wakaba |
1.2 |
redo A; |
839 |
|
|
} elsif ($token->{type} == LABELED_LIST_START_TOKEN) { |
840 |
|
|
pop @$oe if $oe->[-1]->{node}->manakai_local_name eq 'dd'; |
841 |
|
|
if ($oe->[-1]->{node}->manakai_local_name ne 'dl') { |
842 |
wakaba |
1.4 |
my $el = $doc->create_element_ns (HTML_NS, [undef, 'dl']); |
843 |
wakaba |
1.2 |
$oe->[-1]->{node}->append_child ($el); |
844 |
|
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
845 |
|
|
} |
846 |
|
|
|
847 |
wakaba |
1.4 |
my $el = $doc->create_element_ns (HTML_NS, [undef, 'dt']); |
848 |
wakaba |
1.2 |
$oe->[-1]->{node}->append_child ($el); |
849 |
|
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
850 |
|
|
|
851 |
|
|
$im = IN_PARAGRAPH_IM; |
852 |
wakaba |
1.3 |
$token = $get_next_token->(); |
853 |
wakaba |
1.2 |
redo A; |
854 |
|
|
} elsif ($token->{type} == TABLE_ROW_START_TOKEN) { |
855 |
wakaba |
1.4 |
my $el = $doc->create_element_ns (HTML_NS, [undef, 'table']); |
856 |
wakaba |
1.2 |
$oe->[-1]->{node}->append_child ($el); |
857 |
|
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
858 |
|
|
|
859 |
wakaba |
1.4 |
$el = $doc->create_element_ns (HTML_NS, [undef, 'tbody']); |
860 |
wakaba |
1.2 |
$oe->[-1]->{node}->append_child ($el); |
861 |
|
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
862 |
|
|
|
863 |
wakaba |
1.4 |
$el = $doc->create_element_ns (HTML_NS, [undef, 'tr']); |
864 |
wakaba |
1.2 |
$oe->[-1]->{node}->append_child ($el); |
865 |
|
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
866 |
|
|
|
867 |
|
|
$im = IN_TABLE_ROW_IM; |
868 |
wakaba |
1.3 |
$token = $get_next_token->(); |
869 |
wakaba |
1.2 |
redo A; |
870 |
|
|
} elsif (($token->{type} == BLOCK_START_TAG_TOKEN and |
871 |
|
|
$token->{tag_name} eq 'PRE') or |
872 |
|
|
$token->{type} == PREFORMATTED_START_TOKEN) { |
873 |
wakaba |
1.4 |
my $el = $doc->create_element_ns (HTML_NS, [undef, 'pre']); |
874 |
wakaba |
1.2 |
$oe->[-1]->{node}->append_child ($el); |
875 |
|
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
876 |
|
|
|
877 |
wakaba |
1.4 |
$el->set_attribute_ns (undef, [undef, 'class'] => $token->{classes}) |
878 |
wakaba |
1.2 |
if defined $token->{classes}; |
879 |
|
|
|
880 |
|
|
$im = IN_PARAGRAPH_IM; |
881 |
wakaba |
1.3 |
$token = $get_next_token->(); |
882 |
wakaba |
1.2 |
redo A; |
883 |
|
|
} elsif ($token->{type} == COMMENT_PARAGRAPH_START_TOKEN) { |
884 |
wakaba |
1.4 |
my $el = $doc->create_element_ns (SW10_NS, [undef, 'comment-p']); |
885 |
wakaba |
1.2 |
$oe->[-1]->{node}->append_child ($el); |
886 |
|
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
887 |
|
|
|
888 |
|
|
$im = IN_PARAGRAPH_IM; |
889 |
wakaba |
1.3 |
$token = $get_next_token->(); |
890 |
wakaba |
1.2 |
redo A; |
891 |
|
|
} elsif ($token->{type} == EDITORIAL_NOTE_START_TOKEN) { |
892 |
wakaba |
1.4 |
my $el = $doc->create_element_ns (SW10_NS, [undef, 'ed']); |
893 |
wakaba |
1.2 |
$oe->[-1]->{node}->append_child ($el); |
894 |
|
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
895 |
|
|
|
896 |
|
|
$im = IN_PARAGRAPH_IM; |
897 |
wakaba |
1.3 |
$token = $get_next_token->(); |
898 |
wakaba |
1.2 |
redo A; |
899 |
|
|
} elsif ($token->{type} == EMPTY_LINE_TOKEN) { |
900 |
wakaba |
1.3 |
pop @$oe while not {body => 1, section => 1, insert => 1, delete => 1} |
901 |
wakaba |
1.2 |
->{$oe->[-1]->{node}->manakai_local_name}; |
902 |
wakaba |
1.3 |
$token = $get_next_token->(); |
903 |
wakaba |
1.2 |
redo A; |
904 |
|
|
} elsif ($token->{type} == BLOCK_END_TAG_TOKEN) { |
905 |
wakaba |
1.3 |
if ($token->{tag_name} eq 'INS') { |
906 |
wakaba |
1.2 |
for (reverse 1..$#$oe) { |
907 |
|
|
if ($oe->[$_]->{node}->manakai_local_name eq 'insert') { |
908 |
|
|
splice @$oe, $_; |
909 |
|
|
last; |
910 |
|
|
} |
911 |
|
|
} |
912 |
wakaba |
1.3 |
} elsif ($token->{tag_name} eq 'DEL') { |
913 |
wakaba |
1.2 |
for (reverse 1..$#$oe) { |
914 |
|
|
if ($oe->[$_]->{node}->manakai_local_name eq 'delete') { |
915 |
|
|
splice @$oe, $_; |
916 |
|
|
last; |
917 |
|
|
} |
918 |
|
|
} |
919 |
|
|
} else { |
920 |
|
|
## NOTE: Ignore the token. |
921 |
|
|
} |
922 |
wakaba |
1.3 |
$token = $get_next_token->(); |
923 |
|
|
redo A; |
924 |
wakaba |
1.2 |
} elsif ($token->{type} == FORM_TOKEN) { |
925 |
|
|
## There is an exact code clone. |
926 |
|
|
if ($token->{name} eq 'form') { |
927 |
wakaba |
1.4 |
my $el = $doc->create_element_ns (SW09_NS, [undef, 'form']); |
928 |
wakaba |
1.2 |
$oe->[-1]->{node}->append_child ($el); |
929 |
|
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
930 |
|
|
$el->set_user_data (manakai_source_line => $token->{line}); |
931 |
|
|
$el->set_user_data (manakai_source_column => $token->{column}); |
932 |
|
|
|
933 |
wakaba |
1.4 |
$el->set_attribute_ns (undef, [undef, 'id'] |
934 |
|
|
=> $token->{id}) if defined $token->{id}; |
935 |
|
|
$el->set_attribute_ns (undef, [undef, 'input'] |
936 |
|
|
=> shift @{$token->{parameters}}) |
937 |
wakaba |
1.2 |
if @{$token->{parameter}}; |
938 |
wakaba |
1.4 |
$el->set_attribute_ns (undef, [undef, 'template'] |
939 |
|
|
=> shift @{$token->{parameters}}) |
940 |
wakaba |
1.2 |
if @{$token->{parameter}}; |
941 |
wakaba |
1.4 |
$el->set_attribute_ns (undef, [undef, 'option'] |
942 |
|
|
=> shift @{$token->{parameters}}) |
943 |
wakaba |
1.2 |
if @{$token->{parameter}}; |
944 |
wakaba |
1.4 |
$el->set_attribute_ns (undef, [undef, 'parameter'] |
945 |
|
|
=> join ':', @{$token->{parameters}}) |
946 |
wakaba |
1.2 |
if @{$token->{parameter}}; |
947 |
|
|
|
948 |
wakaba |
1.3 |
$token = $get_next_token->(); |
949 |
wakaba |
1.2 |
redo A; |
950 |
|
|
} else { |
951 |
wakaba |
1.4 |
my $el = $doc->create_element_ns (SW09_NS, [undef, 'form']); |
952 |
wakaba |
1.2 |
$oe->[-1]->{node}->append_child ($el); |
953 |
|
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
954 |
|
|
$el->set_user_data (manakai_source_line => $token->{line}); |
955 |
|
|
$el->set_user_data (manakai_source_column => $token->{column}); |
956 |
|
|
|
957 |
wakaba |
1.4 |
$el->set_attribute_ns (undef, [undef, 'ref'] => $token->{name}); |
958 |
|
|
$el->set_attribute_ns (undef, [undef, 'id'] |
959 |
|
|
=> $token->{id}) if defined $token->{id}; |
960 |
|
|
$el->set_attribute_ns (undef, [undef, 'parameter'] |
961 |
|
|
=> join ':', @{$token->{parameters}}) |
962 |
wakaba |
1.2 |
if @{$token->{parameter}}; |
963 |
|
|
|
964 |
wakaba |
1.3 |
$token = $get_next_token->(); |
965 |
wakaba |
1.2 |
redo A; |
966 |
|
|
} |
967 |
|
|
} elsif ($token->{type} == ELEMENT_TOKEN and |
968 |
|
|
$token->{local_name} eq 'replace') { |
969 |
|
|
## NOTE: There is an exact code clone. |
970 |
|
|
my $el = $doc->create_element_ns |
971 |
wakaba |
1.4 |
($token->{namespace}, [undef, $token->{local_name}]); |
972 |
wakaba |
1.2 |
$oe->[-1]->{node}->append_child ($el); |
973 |
|
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
974 |
|
|
$el->set_user_data (manakai_source_line => $token->{line}); |
975 |
|
|
$el->set_user_data (manakai_source_column => $token->{column}); |
976 |
|
|
|
977 |
wakaba |
1.4 |
$el->set_attribute_ns (SW09_NS, ['sw', 'anchor'], $token->{anchor}) |
978 |
wakaba |
1.2 |
if defined $token->{anchor}; |
979 |
wakaba |
1.4 |
$el->set_attribute_ns (undef, [undef, 'by'] |
980 |
|
|
=> $token->{by}) if defined $token->{by}; |
981 |
|
|
$el->set_attribute_ns (SW09_NS, ['sw', 'resScheme'], |
982 |
|
|
$token->{res_scheme}) |
983 |
wakaba |
1.2 |
if defined $token->{res_scheme}; |
984 |
wakaba |
1.4 |
$el->set_attribute_ns (SW09_NS, ['sw', 'resParameter'], |
985 |
wakaba |
1.2 |
$token->{res_parameter}) |
986 |
|
|
if defined $token->{res_parameter}; |
987 |
|
|
$el->text_content ($token->{content}) if defined $token->{content}; |
988 |
|
|
|
989 |
wakaba |
1.3 |
$token = $get_next_token->(); |
990 |
wakaba |
1.2 |
redo A; |
991 |
|
|
} elsif ($token->{type} == END_OF_FILE_TOKEN) { |
992 |
|
|
return; |
993 |
|
|
} elsif ({LABELED_LIST_MIDDLE_TOKEN => 1, |
994 |
|
|
HEADING_END_TOKEN => 1, |
995 |
|
|
PREFORMATTED_END_TOKEN => 1, |
996 |
|
|
TABLE_ROW_END_TOKEN => 1, |
997 |
|
|
TABLE_CELL_START_TOKEN => 1, |
998 |
|
|
TABLE_CELL_END_TOKEN => 1, |
999 |
|
|
TABLE_COLSPAN_CELL_TOKEN => 1}->{$token->{type}}) { |
1000 |
|
|
## NOTE: Ignore the token. |
1001 |
|
|
} else { |
1002 |
wakaba |
1.4 |
my $el = $doc->create_element_ns (HTML_NS, [undef, 'p']); |
1003 |
wakaba |
1.2 |
$oe->[-1]->{node}->append_child ($el); |
1004 |
|
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
1005 |
|
|
|
1006 |
|
|
$im = IN_PARAGRAPH_IM; |
1007 |
|
|
## Reprocess. |
1008 |
|
|
redo A; |
1009 |
|
|
} |
1010 |
|
|
} elsif ($im == IN_TABLE_ROW_IM) { |
1011 |
|
|
if ($token->{type} == TABLE_CELL_START_TOKEN) { |
1012 |
wakaba |
1.4 |
my $el = $doc->create_element_ns (HTML_NS, [undef, 'td']); |
1013 |
wakaba |
1.2 |
$oe->[-1]->{node}->append_child ($el); |
1014 |
|
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
1015 |
|
|
$el->set_user_data (manakai_source_line => $token->{line}); |
1016 |
|
|
$el->set_user_data (manakai_source_column => $token->{column}); |
1017 |
|
|
|
1018 |
|
|
$im = IN_PARAGRAPH_IM; |
1019 |
wakaba |
1.3 |
$token = $get_next_token->(); |
1020 |
wakaba |
1.2 |
redo A; |
1021 |
|
|
} elsif ($token->{type} == TABLE_COLSPAN_CELL_TOKEN) { |
1022 |
|
|
my $lc = $oe->[-1]->{node}->last_child; |
1023 |
|
|
if ($lc and $lc->manakai_local_name eq 'td') { |
1024 |
wakaba |
1.4 |
$lc->set_attribute_ns |
1025 |
|
|
(undef, [undef, 'colspan'], |
1026 |
|
|
($lc->get_attribute_ns (undef, 'colspan') || 0) + 1); |
1027 |
wakaba |
1.2 |
} else { |
1028 |
wakaba |
1.4 |
my $el = $doc->create_element_ns (SW10_NS, [undef, 'td']); |
1029 |
wakaba |
1.2 |
$oe->[-1]->{node}->append_child ($el); |
1030 |
|
|
$el->set_user_data (manakai_source_line => $token->{line}); |
1031 |
|
|
$el->set_user_data (manakai_source_column => $token->{column}); |
1032 |
|
|
} |
1033 |
|
|
|
1034 |
wakaba |
1.3 |
$token = $get_next_token->(); |
1035 |
wakaba |
1.2 |
redo A; |
1036 |
|
|
} elsif ($token->{type} == TABLE_ROW_END_TOKEN) { |
1037 |
|
|
pop @$oe if $oe->[-1]->{node}->manakai_local_name eq 'tr'; |
1038 |
wakaba |
1.3 |
$token = $get_next_token->(); |
1039 |
wakaba |
1.2 |
redo A; |
1040 |
|
|
} elsif ($token->{type} == TABLE_ROW_START_TOKEN) { |
1041 |
wakaba |
1.4 |
my $el = $doc->create_element_ns (HTML_NS, [undef, 'tr']); |
1042 |
wakaba |
1.2 |
$oe->[-1]->{node}->append_child ($el); |
1043 |
|
|
push @$oe, {%{$oe->[-1]}, node => $el}; |
1044 |
|
|
$el->set_user_data (manakai_source_line => $token->{line}); |
1045 |
|
|
$el->set_user_data (manakai_source_column => $token->{column}); |
1046 |
|
|
|
1047 |
wakaba |
1.3 |
$token = $get_next_token->(); |
1048 |
wakaba |
1.2 |
redo A; |
1049 |
|
|
} else { |
1050 |
|
|
$im = IN_SECTION_IM; |
1051 |
|
|
## Reprocess. |
1052 |
|
|
redo A; |
1053 |
|
|
} |
1054 |
|
|
} else { |
1055 |
|
|
die "$0: Unknown insertion mode: $im"; |
1056 |
|
|
} |
1057 |
|
|
} # A |
1058 |
wakaba |
1.1 |
} # parse_char_string |
1059 |
|
|
|
1060 |
|
|
1; |