1 |
package Whatpm::H2H; |
2 |
use strict; |
3 |
|
4 |
sub H2H_NS () { q<http://suika.fam.cx/~wakaba/archive/2005/manakai/Markup/H2H/> } |
5 |
sub HTML_NS () { q<http://www.w3.org/1999/xhtml> } |
6 |
sub HTML3_NS () { q<urn:x-suika-fam-cx:markup:ietf:html:3:draft:00:> } |
7 |
sub SW09_NS () { q<urn:x-suika-fam-cx:markup:suikawiki:0:9:> } |
8 |
sub XHTML2_NS () { q<http://www.w3.org/2002/06/xhtml2/> } |
9 |
|
10 |
sub parse_string ($$$) { |
11 |
my $self = bless { |
12 |
token => [], |
13 |
location => {}, |
14 |
doc => $_[2], |
15 |
}, $_[0]; |
16 |
|
17 |
my $s = ''.$_[1]; |
18 |
$s =~ s/\x0D\x0A/\x0A/g; |
19 |
$s =~ tr/\x0D/\x0A/; |
20 |
$self->{line} = [split /\x0A/, $s]; |
21 |
|
22 |
local $Error::Depth = $Error::Depth + 1; |
23 |
$self->{doc}->strict_error_checking (0); |
24 |
my $doc_el = $self->{doc}->create_element_ns (HTML_NS, 'html'); |
25 |
$doc_el->set_attribute_ns (q<http://www.w3.org/2000/xmlns/>, 'xmlns', HTML_NS); |
26 |
$self->{doc}->append_child ($doc_el); |
27 |
|
28 |
$self->_construct_tree; |
29 |
|
30 |
return $self->{doc}; |
31 |
} # parse_string |
32 |
|
33 |
sub _shift_token ($) { |
34 |
my $self = $_[0]; |
35 |
|
36 |
if (@{$self->{token}}) { |
37 |
return shift @{$self->{token}}; |
38 |
} |
39 |
|
40 |
my $attrvalue = sub { |
41 |
my $v = shift; |
42 |
$v =~ s/"/"/g; |
43 |
$v =~ s/</</g; |
44 |
$v =~ s/>/>/g; |
45 |
$v =~ s/®/\x{00AE}/g; |
46 |
$v =~ s/♥/\x{2661}/g; |
47 |
$v =~ s/&/&/g; |
48 |
return $v; |
49 |
}; |
50 |
|
51 |
my $uriv = sub { |
52 |
my $v = $attrvalue->(shift); |
53 |
$v =~ s/^\{/(/; |
54 |
$v =~ s/\}$/)/; |
55 |
$v =~ s/^\#([0-9si]+)$/($1)/; |
56 |
$v =~ s/^\(([0-9]{4})([0-9]{2})([0-9]{2})([^)]*)\)$/($1, $2, $3$4)/; |
57 |
$v =~ s/[si]/, /g if $v =~ /^\(/ and $v =~ /\)$/; |
58 |
return $v; |
59 |
}; |
60 |
|
61 |
my $r = {type => '#EOF'}; |
62 |
L: while (defined (my $line = shift @{$self->{line}})) { |
63 |
if ($line =~ s/^([A-Z]+|T[0-9])(\*?\+?\*?)(?:\s+|$)//) { |
64 |
my $command = $1; |
65 |
my $flag = $2; |
66 |
$r = {type => 'start', value => $command}; |
67 |
|
68 |
my $uri; |
69 |
if ($flag =~ /\*/ and $line =~ s/^([^{\s]\S*)\s*//) { |
70 |
$uri = $1; |
71 |
} |
72 |
|
73 |
my $attr = ''; |
74 |
if ($line =~ s/^\{(\s*(?:[A-Za-z][^{}]*)?)\}\s*//) { |
75 |
$attr = $1; |
76 |
} |
77 |
|
78 |
if (not defined $uri and |
79 |
$flag =~ /\*/ and $line =~ s/^([^{\s]\S*)\s*//) { |
80 |
$uri = $1; |
81 |
} |
82 |
|
83 |
my @token; |
84 |
my $info = { |
85 |
# val# val#(*) |
86 |
ABBR => [2, 2], |
87 |
ACRONYM => [2, 2], |
88 |
CITE => [2, 1], |
89 |
LDIARY => [4, 4], |
90 |
LIMG => [4, 4], |
91 |
LINK => [2, 1], |
92 |
LMG => [2, 2], |
93 |
LNEW => [2, 2], |
94 |
PERSON => [2, 2], |
95 |
RIBU => [2, 2], |
96 |
RUBY => [2, 2], |
97 |
SEE => [2, 2], |
98 |
}->{$command}; |
99 |
my @value = split /\s+/, $line, |
100 |
($flag =~ /\*/ ? $info->[1] : $info->[0]) || 1; |
101 |
|
102 |
push @token, {type => 'uri', value => $uriv->($uri)} if defined $uri; |
103 |
|
104 |
my %attr; |
105 |
while ($attr =~ /([A-Za-z0-9_-]+)\s*(?:=>?|:)\s*([^";,]+|"[^"]+")/gc) { |
106 |
my $name = lc $1; |
107 |
my $value = $2; |
108 |
$value =~ tr/"//d; |
109 |
$attr{$name} = $value; |
110 |
} |
111 |
delete $attr{'content-type'}; |
112 |
|
113 |
if ({ |
114 |
ABBR => 1, ACRONYM => 1, |
115 |
RUBY => 1, RIBU => 1, |
116 |
}->{$command}) { |
117 |
if (@value == 1 and $attr{title}) { |
118 |
push @value, $attr{title}; |
119 |
delete $attr{title}; |
120 |
} |
121 |
if (@value == 2) { |
122 |
unshift @{$self->{line}}, |
123 |
'RB', |
124 |
'DATA {} >>'.$value[1], |
125 |
'/RB', |
126 |
'RT', |
127 |
'DATA {} >>'.$value[0], |
128 |
'/RT', |
129 |
'/'.$command; |
130 |
} else { |
131 |
unshift @{$self->{line}}, 'DATA {} >>'.$value[0], '/'.$command; |
132 |
} |
133 |
} elsif ($command eq 'CITE') { |
134 |
if (@value == 2) { |
135 |
if (defined $uri or $value[0] !~ /^[a-z-]+:/) { |
136 |
unshift @{$self->{line}}, |
137 |
'SRC', |
138 |
'DATA {} >>'.$value[0].' '.$value[1], |
139 |
'/SRC', 'BODYTEXT'; |
140 |
} else { |
141 |
push @token, {type => 'uri', value => $uriv->($value[0])}; |
142 |
unshift @{$self->{line}}, |
143 |
'SRC', |
144 |
'DATA {} >>'.$value[1], |
145 |
'/SRC', 'BODYTEXT'; |
146 |
} |
147 |
} elsif (@value == 1) { |
148 |
if (defined $uri or $value[0] !~ /^[a-z-]+:/) { |
149 |
unshift @{$self->{line}}, |
150 |
'SRC', |
151 |
'DATA {} >>'.$value[0], |
152 |
'/SRC', 'BODYTEXT'; |
153 |
} else { |
154 |
push @token, {type => 'uri', value => $uriv->($value[0])}; |
155 |
unshift @{$self->{line}}, 'BODYTEXT'; |
156 |
} |
157 |
} else { |
158 |
unshift @{$self->{line}}, 'BODYTEXT'; |
159 |
} |
160 |
} elsif ($command eq 'DATA') { |
161 |
my @token; |
162 |
$line =~ s/^>>//; |
163 |
while (length $line) { |
164 |
if ($line =~ s/^<([a-z0-9]+)\s*//) { |
165 |
my $tagname = $1; |
166 |
push @token, {type => 'start', value => $tagname}; |
167 |
while ($line =~ s/^([a-z-]+)\s*=\s*"([^"]*)"\s*//) { |
168 |
push @token, {type => $1, value => $attrvalue->($2)}; |
169 |
} |
170 |
$line =~ s#^/?\s*>##; |
171 |
push @token, {type => 'end', value => $tagname} |
172 |
if $tagname eq 'img' or $tagname eq 'input' or |
173 |
$tagname eq 'br'; |
174 |
} elsif ($line =~ s#^</([a-z0-9]+)\s*>##) { |
175 |
push @token, {type => 'end', value => $1}; |
176 |
} elsif ($line =~ s/^<!--(.*?)-->//) { |
177 |
push @token, {type => 'html-comment', value => $1}; |
178 |
} elsif ($line =~ s/^&([a-z]+);//) { |
179 |
my $name = $1; |
180 |
if ($name eq 'amp') { |
181 |
push @token, {type => 'text', value => '&'}; |
182 |
} elsif ($name eq 'lt') { |
183 |
push @token, {type => 'text', value => '<'}; |
184 |
} elsif ($name eq 'gt') { |
185 |
push @token, {type => 'text', value => '>'}; |
186 |
} elsif ($name eq 'quot') { |
187 |
push @token, {type => 'text', value => '"'}; |
188 |
} elsif ($name eq 'reg') { |
189 |
push @token, {type => 'text', value => "\x{00AE}"}; |
190 |
} elsif ($name eq 'hearts') { |
191 |
push @token, {type => 'text', value => "\x{2661}"}; |
192 |
} else { |
193 |
push @token, {type => 'char', value => $name}; |
194 |
} |
195 |
} elsif ($line =~ s/^&#([0-9]+);//) { |
196 |
push @token, {type => 'text', value => ord $1}; |
197 |
} elsif ($line =~ s/^&#x([0-9A-Fa-f]+);//) { |
198 |
push @token, {type => 'text', value => ord hex $1}; |
199 |
} elsif ($line =~ s/^([^<&]+)//) { |
200 |
push @token, {type => 'text', value => $1}; |
201 |
} else { |
202 |
push @token, {type => 'text', value => substr ($line, 0, 1)}; |
203 |
substr ($line, 0, 1) = ''; |
204 |
} |
205 |
} |
206 |
push @token, {type => 'eol'}; |
207 |
|
208 |
$r = shift @token; |
209 |
push @{$self->{token}}, @token; |
210 |
last L; |
211 |
} elsif ({ |
212 |
DD => 1, DT => 1, |
213 |
DEL => 1, INS => 1, |
214 |
LI => 1, |
215 |
RB => 1, RT => 1, |
216 |
STRONG => 1, |
217 |
YAMI => 1, |
218 |
EM => 1, |
219 |
HOUR => 1, KION => 1, LUNCH => 1, |
220 |
TAION => 1, TENKI => 1, THEME => 1, |
221 |
T1 => 1, T2 => 1, T3 => 1, T4 => 1, |
222 |
T5 => 1, T6 => 1, T7 => 1, SP => 1, |
223 |
}->{$command}) { |
224 |
if (@value) { |
225 |
unshift @{$self->{line}}, 'DATA {} >>'.$value[0], '/'.$command; |
226 |
} |
227 |
} elsif ($command eq 'DIV') { |
228 |
if (@value) { |
229 |
$r = {type => 'class', value => $value[0]}; |
230 |
} |
231 |
} elsif ($command eq 'LDIARY') { |
232 |
$value[0] =~ s/^([0-9]{4})([0-9]{2})([0-9]{2})/$1, $2, $3/; |
233 |
$value[0] =~ s/[is]/, /; |
234 |
$r = {type => 'start', value => 'LINK'}; |
235 |
push @token, {type => 'uri', value => "($value[0])"}; |
236 |
unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/LINK'; |
237 |
} elsif ($command eq 'LIMG') { |
238 |
$r = {type => 'start', value => 'IMG'}; |
239 |
push @token, {type => 'uri', value => $uriv->($value[0])}; |
240 |
unshift @{$self->{line}}, 'DATA {} >>'.$value[3], '/IMG'; |
241 |
} elsif ($command eq 'LMG') { |
242 |
$r = {type => 'start', value => 'IMG'}; |
243 |
push @token, {type => 'uri', value => $uriv->($value[0])}; |
244 |
unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/IMG'; |
245 |
} elsif ($command eq 'LINK') { |
246 |
if (@value == 2) { |
247 |
push @token, {type => 'uri', value => $uriv->($value[0])}; |
248 |
unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/LINK'; |
249 |
} elsif ($flag =~ /\+/) { |
250 |
push @token, {type => 'uri', value => $uriv->($value[0])}; |
251 |
} else { |
252 |
unshift @{$self->{line}}, 'DATA {} >>'.$value[0], '/LINK'; |
253 |
} |
254 |
} elsif ($command eq 'NEW') { |
255 |
$r = {type => 'start', value => 'SECTION'}; |
256 |
unshift @{$self->{line}}, 'H', 'DATA {} >>'.$value[0], '/H'; |
257 |
} elsif ($command eq 'LNEW') { |
258 |
$r = {type => 'start', value => 'SECTION'}; |
259 |
push @token, {type => 'uri', value => $uriv->($value[0])}; |
260 |
unshift @{$self->{line}}, 'H', 'DATA {} >>'.$value[1], '/H'; |
261 |
} elsif ($command eq 'SUB') { |
262 |
$r = {type => 'start', value => 'SUB'}; |
263 |
unshift @{$self->{line}}, 'H', 'DATA {} >>'.$value[0], '/H'; |
264 |
} elsif ($command eq 'PERSON') { |
265 |
push @token, {type => 'key', value => $attrvalue->($value[0])}; |
266 |
unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/PERSON'; |
267 |
} elsif ($command eq 'SEE') { |
268 |
if (@value == 2) { |
269 |
push @token, {type => 'key', value => $attrvalue->($value[0])}; |
270 |
unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/SEE'; |
271 |
} else { |
272 |
unshift @{$self->{line}}, 'DATA {} >>'.$value[0], '/SEE'; |
273 |
} |
274 |
} elsif ($command eq 'SPAN') { |
275 |
if (@value == 2) { |
276 |
push @token, {type => 'class', |
277 |
value => $attrvalue->($value[0])}; |
278 |
unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/SPAN'; |
279 |
} else { |
280 |
unshift @{$self->{line}}, 'DATA {} >>'.$value[0], '/SPAN'; |
281 |
} |
282 |
} elsif ($command eq 'OK') { |
283 |
$r = {type => '#EOF'}; |
284 |
next L; |
285 |
} elsif ($command eq 'XML') { |
286 |
unshift @{$self->{line}}, 'DATA {} >>XML '.$line; |
287 |
next L; |
288 |
} |
289 |
|
290 |
for (keys %attr) { |
291 |
push @token, {type => $_, value => $attrvalue->($attr{$_})}; |
292 |
} |
293 |
|
294 |
push @{$self->{token}}, @token; |
295 |
last L; |
296 |
} elsif ($line eq 'H2H/1.0') { |
297 |
$r = {type => 'magic', value => 'H2H/1.0'}; |
298 |
last L; |
299 |
} elsif ($line =~ m#^/([A-Z]+)\s*$#) { |
300 |
$r = {type => 'end', value => $1}; |
301 |
last L; |
302 |
} elsif ($line =~ s/^!#//) { |
303 |
$r = {type => 'hnf-comment', value => $line}; |
304 |
last L; |
305 |
} elsif ($line =~ s/^!//) { |
306 |
$r = {type => 'html-comment', value => $line}; |
307 |
last L; |
308 |
} else { |
309 |
unshift @{$self->{line}}, 'DATA {} >>'.$line; |
310 |
next L; |
311 |
} |
312 |
} # L |
313 |
|
314 |
return $r; |
315 |
} # _shift_token |
316 |
|
317 |
sub _construct_tree ($) { |
318 |
my $self = $_[0]; |
319 |
|
320 |
my $doc_el = $self->{doc}->document_element; |
321 |
my $head_el = $self->{doc}->create_element_ns (HTML_NS, 'head'); |
322 |
my $body_el = $self->{doc}->create_element_ns (HTML_NS, 'body'); |
323 |
$doc_el->append_child ($head_el); |
324 |
$doc_el->append_child ($body_el); |
325 |
$doc_el->set_user_data ('command-name' => '#html'); |
326 |
$head_el->set_user_data ('command-name' => '#head'); |
327 |
$body_el->set_user_data ('command-name' => '#body'); |
328 |
$doc_el->set_attribute_ns (SW09_NS, 'sw9:Name' => 'H2H'); |
329 |
$doc_el->set_attribute_ns (SW09_NS, 'sw9:Version' => '0.9'); |
330 |
|
331 |
my $parent = { |
332 |
subsection => $body_el, |
333 |
attr => $body_el, |
334 |
text => $body_el, |
335 |
}; |
336 |
my $state = 'data'; |
337 |
## data - normal |
338 |
## list - UL or OL |
339 |
## br - after br start tag token |
340 |
## eol - after eol token |
341 |
|
342 |
T: while (my $token = $self->_shift_token) { |
343 |
last T if $token->{type} eq '#EOF'; |
344 |
|
345 |
if ($token->{type} eq 'text') { |
346 |
if ($state eq 'list') { |
347 |
my $li_el = $self->{doc}->create_element_ns (HTML_NS, 'li'); |
348 |
$li_el->manakai_append_text ($token->{value}); |
349 |
$parent->{text}->append_child ($li_el); |
350 |
} else { |
351 |
$parent->{text}->manakai_append_text ("\x0A") if $state eq 'eol'; |
352 |
$parent->{text}->manakai_append_text ($token->{value}); |
353 |
$state = 'data'; |
354 |
} |
355 |
} elsif ($token->{type} eq 'eol') { |
356 |
if ($state eq 'eol') { |
357 |
$parent->{text}->manakai_append_text ("\x0A"); |
358 |
} else { |
359 |
$state = $state eq 'br' ? 'data' : 'eol'; |
360 |
} |
361 |
} elsif ($token->{type} eq 'start') { |
362 |
my $info = { |
363 |
# nsuri, qname, parent, state |
364 |
ABBR => [HTML_NS, 'abbr', $parent->{text}, 'data'], |
365 |
ACRONYM => [HTML_NS, 'abbr', $parent->{text}, 'data'], |
366 |
BODYTEXT => [HTML3_NS, 'bodytext', $parent->{text}, 'data'], |
367 |
CITE => [HTML_NS, 'blockquote', $parent->{text}, 'data', |
368 |
{PRE => 1}], |
369 |
DD => [HTML_NS, 'dd', $parent->{text}, 'data'], |
370 |
DEL => [HTML_NS, 'del', $parent->{text}, 'data'], |
371 |
DIV => [HTML_NS, 'div', $parent->{text}, 'data', {P => 1}], |
372 |
DL => [HTML_NS, 'dl', $parent->{text}, 'data'], |
373 |
DT => [HTML_NS, 'dt', $parent->{text}, 'data'], |
374 |
EM => [HTML_NS, 'em', $parent->{text}, 'data'], |
375 |
FN => [H2H_NS, 'fn', $parent->{text}, 'data'], |
376 |
H => [XHTML2_NS, 'h', $parent->{text}, 'data'], |
377 |
HOUR => [H2H_NS, 'hour', $head_el, 'data'], |
378 |
IMG => [HTML_NS, 'img', $parent->{text}, 'data'], |
379 |
INS => [HTML_NS, 'ins', $parent->{text}, 'data'], |
380 |
KION => [H2H_NS, 'kion', $head_el, 'data'], |
381 |
LI => [HTML_NS, 'li', $parent->{text}, 'data'], |
382 |
LINK => [HTML_NS, 'a', $parent->{text}, 'data'], |
383 |
LUNCH => [H2H_NS, 'lunch', $head_el, 'data'], |
384 |
OL => [HTML_NS, 'ol', $parent->{text}, 'list', {PRE => 1}], |
385 |
P => [HTML_NS, 'p', $parent->{text}, 'data', |
386 |
{P => 1, PRE => 1}], |
387 |
PERSON => [HTML3_NS, 'person', $parent->{text}, 'data'], |
388 |
PRE => [HTML_NS, 'pre', $parent->{text}, 'data', |
389 |
{P => 1}], |
390 |
RB => [HTML_NS, 'rb', $parent->{text}, 'data'], |
391 |
RIBU => [HTML_NS, 'ruby', $parent->{text}, 'data'], |
392 |
RT => [HTML_NS, 'rt', $parent->{text}, 'data'], |
393 |
RUBY => [HTML_NS, 'ruby', $parent->{text}, 'data'], |
394 |
SECTION => [HTML_NS, 'section', $body_el, 'data'], |
395 |
SEE => [HTML_NS, 'i', $parent->{text}, 'data'], |
396 |
SP => [H2H_NS, 'sp', $head_el, 'data'], |
397 |
SPAN => [HTML_NS, 'span', $parent->{text}, 'data'], |
398 |
SRC => [HTML3_NS, 'credit', $parent->{text}, 'data'], |
399 |
STRONG => [HTML_NS, 'strong', $parent->{text}, 'data'], |
400 |
SUBSECTION => [HTML_NS, 'section', $parent->{subsection}, 'data'], |
401 |
T1 => [H2H_NS, 't1', $head_el, 'data'], |
402 |
T2 => [H2H_NS, 't2', $head_el, 'data'], |
403 |
T3 => [H2H_NS, 't3', $head_el, 'data'], |
404 |
T4 => [H2H_NS, 't4', $head_el, 'data'], |
405 |
T5 => [H2H_NS, 't5', $head_el, 'data'], |
406 |
T6 => [H2H_NS, 't6', $head_el, 'data'], |
407 |
T7 => [H2H_NS, 't7', $head_el, 'data'], |
408 |
TAION => [H2H_NS, 'taion', $head_el, 'data'], |
409 |
TENKI => [H2H_NS, 'tenki', $head_el, 'data'], |
410 |
THEME => [H2H_NS, 'theme', $head_el, 'data'], |
411 |
UL => [HTML_NS, 'ul', $parent->{text}, 'list', {PRE => 1}], |
412 |
YAMI => [H2H_NS, 'yami', $parent->{text}, 'data'], |
413 |
a => [HTML_NS, 'a', $parent->{text}, 'data'], |
414 |
abbr => [HTML_NS, 'abbr', $parent->{text}, 'data'], |
415 |
acronym => [HTML_NS, 'abbr', $parent->{text}, 'data'], |
416 |
b => [HTML_NS, 'b', $parent->{text}, 'data'], |
417 |
blockquote => [HTML_NS, 'blockquote', $parent->{text}, 'data'], |
418 |
br => [HTML_NS, 'br', $parent->{text}, 'br'], |
419 |
caption => [HTML_NS, 'caption', $parent->{text}, 'data'], |
420 |
code => [HTML_NS, 'code', $parent->{text}, 'data'], |
421 |
dd => [HTML_NS, 'dd', $parent->{text}, 'data'], |
422 |
del => [HTML_NS, 'del', $parent->{text}, 'data'], |
423 |
dfn => [HTML_NS, 'dfn', $parent->{text}, 'data'], |
424 |
div => [HTML_NS, 'div', $parent->{text}, 'data'], |
425 |
dl => [HTML_NS, 'dl', $parent->{text}, 'data'], |
426 |
dt => [HTML_NS, 'dt', $parent->{text}, 'data'], |
427 |
em => [HTML_NS, 'em', $parent->{text}, 'data'], |
428 |
form => [HTML_NS, 'form', $parent->{text}, 'data'], |
429 |
h1 => [HTML_NS, 'h1', $parent->{text}, 'data'], |
430 |
h2 => [HTML_NS, 'h2', $parent->{text}, 'data'], |
431 |
h3 => [HTML_NS, 'h3', $parent->{text}, 'data'], |
432 |
h4 => [HTML_NS, 'h4', $parent->{text}, 'data'], |
433 |
i => [HTML_NS, 'i', $parent->{text}, 'data'], |
434 |
img => [HTML_NS, 'img', $parent->{text}, 'data'], |
435 |
input => [HTML_NS, 'input', $parent->{text}, 'data'], |
436 |
ins => [HTML_NS, 'ins', $parent->{text}, 'data'], |
437 |
kbd => [HTML_NS, 'kbd', $parent->{text}, 'data'], |
438 |
label => [HTML_NS, 'label', $parent->{text}, 'data'], |
439 |
li => [HTML_NS, 'li', $parent->{text}, 'data'], |
440 |
ol => [HTML_NS, 'ol', $parent->{text}, 'data'], |
441 |
p => [HTML_NS, 'p', $parent->{text}, 'data'], |
442 |
pre => [HTML_NS, 'pre', $parent->{text}, 'data'], |
443 |
q => [HTML_NS, 'q', $parent->{text}, 'data'], |
444 |
rb => [HTML_NS, 'rb', $parent->{text}, 'data'], |
445 |
rp => [HTML_NS, 'rp', $parent->{text}, 'data'], |
446 |
rt => [HTML_NS, 'rt', $parent->{text}, 'data'], |
447 |
ruby => [HTML_NS, 'ruby', $parent->{text}, 'data'], |
448 |
samp => [HTML_NS, 'samp', $parent->{text}, 'data'], |
449 |
span => [HTML_NS, 'span', $parent->{text}, 'data'], |
450 |
strong => [HTML_NS, 'strong', $parent->{text}, 'data'], |
451 |
sub => [HTML_NS, 'sub', $parent->{text}, 'data'], |
452 |
sup => [HTML_NS, 'sup', $parent->{text}, 'data'], |
453 |
table => [HTML_NS, 'table', $parent->{text}, 'data'], |
454 |
tbody => [HTML_NS, 'tbody', $parent->{text}, 'data'], |
455 |
td => [HTML_NS, 'td', $parent->{text}, 'data'], |
456 |
th => [HTML_NS, 'th', $parent->{text}, 'data'], |
457 |
thead => [HTML_NS, 'thead', $parent->{text}, 'data'], |
458 |
tr => [HTML_NS, 'tr', $parent->{text}, 'data'], |
459 |
ul => [HTML_NS, 'ul', $parent->{text}, 'data'], |
460 |
var => [HTML_NS, 'var', $parent->{text}, 'data'], |
461 |
}->{$token->{value}} |
462 |
|| [H2H_NS, $token->{value}, $parent->{text}, 'data']; |
463 |
while ($info->[4]->{$info->[2]->get_user_data ('command-name')}) { |
464 |
$info->[2] = $info->[2]->parent_node; |
465 |
} |
466 |
my $el = $self->{doc}->create_element_ns ($info->[0], $info->[1]); |
467 |
$el->set_user_data ('command-name', $token->{value}); |
468 |
$info->[2]->append_child ($el); |
469 |
$parent->{text} = $el; |
470 |
$parent->{attr} = $el; |
471 |
$parent->{subsection} = $el if $token->{value} eq 'SECTION'; |
472 |
$state = $info->[3]; |
473 |
} elsif ($token->{type} eq 'end') { |
474 |
E: while (my $et = $parent->{text}->get_user_data ('command-name')) { |
475 |
$parent->{text} = $parent->{text}->parent_node; |
476 |
last E if $et eq $token->{value}; |
477 |
last E if $et eq '#body'; |
478 |
} |
479 |
$parent->{attr} = $parent->{text}; |
480 |
$state = { |
481 |
UL => 'list', |
482 |
OL => 'list', |
483 |
}->{$parent->{text}->get_user_data ('command-name')} |
484 |
|| $state eq 'br' ? 'br' : 'data'; |
485 |
} elsif ($token->{type} eq 'char') { |
486 |
my $el = $self->{doc}->create_element_ns (H2H_NS, 'char'); |
487 |
$el->manakai_append_text ($token->{value}); |
488 |
$parent->{text}->append_child ($el); |
489 |
$state = 'data' if $state eq 'br'; |
490 |
} elsif ($token->{type} eq 'magic') { |
491 |
my ($name, $version) = split m#/#, $token->{value}, 2; |
492 |
$doc_el->set_attribute_ns (SW09_NS, 'sw9:Name', $name); |
493 |
$doc_el->set_attribute_ns (SW09_NS, 'sw9:Version', $version); |
494 |
} elsif ($token->{type} eq 'hnf-comment') { |
495 |
my $com = $self->{doc}->create_element_ns |
496 |
(H2H_NS, 'hnf-comment'); |
497 |
$com->text_content ($token->{value}); |
498 |
$parent->{text}->append_child ($com); |
499 |
} elsif ($token->{type} eq 'html-comment') { |
500 |
my $com = $self->{doc}->create_element_ns |
501 |
(H2H_NS, 'html-comment'); |
502 |
$com->text_content ($token->{value}); |
503 |
$parent->{text}->append_child ($com); |
504 |
} elsif ($token->{type} eq 'source') { |
505 |
my $src = $self->{doc}->create_element_ns (HTML3_NS, 'credit'); |
506 |
$src->manakai_append_text ($token->{value}); |
507 |
$parent->{text}->append_child ($src); |
508 |
} elsif ($token->{type} eq 'uri') { |
509 |
my $v = $token->{value}; |
510 |
if ($v =~ /^\(([^()]+)\)$/) { |
511 |
my @v = split /\s*,\s*/, $1; |
512 |
$parent->{attr}->set_attribute_ns |
513 |
(H2H_NS, 'href-year' => $v[0]+0); |
514 |
$parent->{attr}->set_attribute_ns |
515 |
(H2H_NS, 'href-month' => $v[1]+0); |
516 |
$parent->{attr}->set_attribute_ns |
517 |
(H2H_NS, 'href-day' => $v[2]+0); |
518 |
$parent->{attr}->set_attribute_ns |
519 |
(H2H_NS, 'href-section' => $v[3]+0) if $v[3]; |
520 |
$parent->{attr}->set_attribute_ns |
521 |
(H2H_NS, 'href-subsection' => $v[4]+0) |
522 |
if $v[4]; |
523 |
} else { |
524 |
my $xuri = $parent->{attr}->manakai_expanded_uri; |
525 |
if ($xuri eq HTML_NS . 'a') { |
526 |
$parent->{attr}->set_attribute_ns |
527 |
(undef, href => $token->{value}); |
528 |
} elsif ($xuri eq HTML_NS . 'blockquote') { |
529 |
$parent->{attr}->set_attribute_ns |
530 |
(undef, cite => $token->{value}); |
531 |
} else { |
532 |
$parent->{attr}->set_attribute_ns |
533 |
(XHTML2_NS, href => $token->{value}); |
534 |
} |
535 |
} |
536 |
} elsif ({ |
537 |
title => 1, style => 1, |
538 |
class => 1, href => 1, 'accept-charset' => 1, |
539 |
action => 1, method => 1, alt => 1, src => 1, |
540 |
type => 1, value => 1, name => 1, accesskey => 1, |
541 |
for => 1, cite => 1, onclick => 1, colspan => 1, |
542 |
scope => 1, summary => 1, |
543 |
}->{$token->{type}}) { |
544 |
$parent->{attr}->set_attribute_ns |
545 |
(undef, $token->{type}, $token->{value}); |
546 |
} elsif ($token->{type} eq 'cat') { |
547 |
for (split /\s*,\s*/, $token->{value}) { |
548 |
my $el = $self->{doc}->create_element_ns (H2H_NS, 'cat'); |
549 |
$el->manakai_append_text ($_); |
550 |
$parent->{attr}->append_child ($el); |
551 |
} |
552 |
} elsif ($token->{type} eq 'lang' or $token->{type} eq 'xml:lang') { |
553 |
$parent->{attr}->set_attribute_ns |
554 |
(q<http://www.w3.org/XML/1998/namespace>, |
555 |
'xml:lang' => $token->{value}); |
556 |
} elsif ($token->{type} eq 'id') { |
557 |
$parent->{attr}->set_attribute_ns |
558 |
(q<http://www.w3.org/XML/1998/namespace>, |
559 |
'xml:id' => $token->{value}); |
560 |
} elsif ($token->{type} eq 'wbradded') { |
561 |
# ignore |
562 |
} else { |
563 |
# key, level, place, position, time |
564 |
$parent->{attr}->set_attribute_ns |
565 |
(H2H_NS, $token->{type}, $token->{value}); |
566 |
} |
567 |
} |
568 |
} # _construct_tree |
569 |
|
570 |
1; |
571 |
## $Date: 2007/08/05 07:12:45 $ |