1 |
wakaba |
1.1 |
package Whatpm::H2H; |
2 |
|
|
use strict; |
3 |
|
|
|
4 |
|
|
sub H2H_NS () { q<http://suika.fam.cx/~wakaba/archive/2005/manakai/Markup/H2H/> } |
5 |
|
|
sub HTML_NS () { q<http://www.w3.org/1999/xhtml> } |
6 |
|
|
sub HTML3_NS () { q<urn:x-suika-fam-cx:markup:ietf:html:3:draft:00:> } |
7 |
|
|
sub SW09_NS () { q<urn:x-suika-fam-cx:markup:suikawiki:0:9:> } |
8 |
|
|
sub XHTML2_NS () { q<http://www.w3.org/2002/06/xhtml2/> } |
9 |
|
|
|
10 |
|
|
sub parse_string ($$$) { |
11 |
|
|
my $self = bless { |
12 |
|
|
token => [], |
13 |
|
|
location => {}, |
14 |
|
|
doc => $_[2], |
15 |
|
|
}, $_[0]; |
16 |
|
|
|
17 |
|
|
my $s = ''.$_[1]; |
18 |
|
|
$s =~ s/\x0D\x0A/\x0A/g; |
19 |
|
|
$s =~ tr/\x0D/\x0A/; |
20 |
|
|
$self->{line} = [split /\x0A/, $s]; |
21 |
|
|
|
22 |
|
|
local $Error::Depth = $Error::Depth + 1; |
23 |
|
|
$self->{doc}->strict_error_checking (0); |
24 |
|
|
my $doc_el = $self->{doc}->create_element_ns (HTML_NS, 'html'); |
25 |
|
|
$doc_el->set_attribute_ns (q<http://www.w3.org/2000/xmlns/>, 'xmlns', HTML_NS); |
26 |
|
|
$self->{doc}->append_child ($doc_el); |
27 |
|
|
|
28 |
|
|
$self->_construct_tree; |
29 |
|
|
|
30 |
|
|
return $self->{doc}; |
31 |
|
|
} # parse_string |
32 |
|
|
|
33 |
|
|
sub _shift_token ($) { |
34 |
|
|
my $self = $_[0]; |
35 |
|
|
|
36 |
|
|
if (@{$self->{token}}) { |
37 |
|
|
return shift @{$self->{token}}; |
38 |
|
|
} |
39 |
|
|
|
40 |
|
|
my $attrvalue = sub { |
41 |
|
|
my $v = shift; |
42 |
|
|
$v =~ s/"/"/g; |
43 |
|
|
$v =~ s/</</g; |
44 |
|
|
$v =~ s/>/>/g; |
45 |
|
|
$v =~ s/®/\x{00AE}/g; |
46 |
|
|
$v =~ s/♥/\x{2661}/g; |
47 |
|
|
$v =~ s/&/&/g; |
48 |
|
|
return $v; |
49 |
|
|
}; |
50 |
|
|
|
51 |
|
|
my $uriv = sub { |
52 |
|
|
my $v = $attrvalue->(shift); |
53 |
|
|
$v =~ s/^\{/(/; |
54 |
|
|
$v =~ s/\}$/)/; |
55 |
|
|
$v =~ s/^\#([0-9si]+)$/($1)/; |
56 |
|
|
$v =~ s/^\(([0-9]{4})([0-9]{2})([0-9]{2})([^)]*)\)$/($1, $2, $3$4)/; |
57 |
|
|
$v =~ s/[si]/, /g if $v =~ /^\(/ and $v =~ /\)$/; |
58 |
|
|
return $v; |
59 |
|
|
}; |
60 |
|
|
|
61 |
|
|
my $r = {type => '#EOF'}; |
62 |
|
|
L: while (defined (my $line = shift @{$self->{line}})) { |
63 |
|
|
if ($line =~ s/^([A-Z]+|T[0-9])(\*?\+?\*?)(?:\s+|$)//) { |
64 |
|
|
my $command = $1; |
65 |
|
|
my $flag = $2; |
66 |
|
|
$r = {type => 'start', value => $command}; |
67 |
|
|
|
68 |
|
|
my $uri; |
69 |
|
|
if ($flag =~ /\*/ and $line =~ s/^([^{\s]\S*)\s*//) { |
70 |
|
|
$uri = $1; |
71 |
|
|
} |
72 |
|
|
|
73 |
|
|
my $attr = ''; |
74 |
|
|
if ($line =~ s/^\{(\s*(?:[A-Za-z][^{}]*)?)\}\s*//) { |
75 |
|
|
$attr = $1; |
76 |
|
|
} |
77 |
|
|
|
78 |
|
|
if (not defined $uri and |
79 |
|
|
$flag =~ /\*/ and $line =~ s/^([^{\s]\S*)\s*//) { |
80 |
|
|
$uri = $1; |
81 |
|
|
} |
82 |
|
|
|
83 |
|
|
my @token; |
84 |
|
|
my $info = { |
85 |
|
|
# val# val#(*) |
86 |
|
|
ABBR => [2, 2], |
87 |
|
|
ACRONYM => [2, 2], |
88 |
|
|
CITE => [2, 1], |
89 |
|
|
LDIARY => [4, 4], |
90 |
|
|
LIMG => [4, 4], |
91 |
|
|
LINK => [2, 1], |
92 |
|
|
LMG => [2, 2], |
93 |
|
|
LNEW => [2, 2], |
94 |
|
|
PERSON => [2, 2], |
95 |
|
|
RIBU => [2, 2], |
96 |
|
|
RUBY => [2, 2], |
97 |
|
|
SEE => [2, 2], |
98 |
|
|
}->{$command}; |
99 |
|
|
my @value = split /\s+/, $line, |
100 |
|
|
($flag =~ /\*/ ? $info->[1] : $info->[0]) || 1; |
101 |
|
|
|
102 |
|
|
push @token, {type => 'uri', value => $uriv->($uri)} if defined $uri; |
103 |
|
|
|
104 |
|
|
my %attr; |
105 |
|
|
while ($attr =~ /([A-Za-z0-9_-]+)\s*(?:=>?|:)\s*([^";,]+|"[^"]+")/gc) { |
106 |
|
|
my $name = lc $1; |
107 |
|
|
my $value = $2; |
108 |
|
|
$value =~ tr/"//d; |
109 |
|
|
$attr{$name} = $value; |
110 |
|
|
} |
111 |
|
|
delete $attr{'content-type'}; |
112 |
|
|
|
113 |
|
|
if ({ |
114 |
|
|
ABBR => 1, ACRONYM => 1, |
115 |
|
|
RUBY => 1, RIBU => 1, |
116 |
|
|
}->{$command}) { |
117 |
|
|
if (@value == 1 and $attr{title}) { |
118 |
|
|
push @value, $attr{title}; |
119 |
|
|
delete $attr{title}; |
120 |
|
|
} |
121 |
|
|
if (@value == 2) { |
122 |
|
|
unshift @{$self->{line}}, |
123 |
|
|
'RB', |
124 |
|
|
'DATA {} >>'.$value[1], |
125 |
|
|
'/RB', |
126 |
|
|
'RT', |
127 |
|
|
'DATA {} >>'.$value[0], |
128 |
|
|
'/RT', |
129 |
|
|
'/'.$command; |
130 |
|
|
} else { |
131 |
|
|
unshift @{$self->{line}}, 'DATA {} >>'.$value[0], '/'.$command; |
132 |
|
|
} |
133 |
|
|
} elsif ($command eq 'CITE') { |
134 |
|
|
if (@value == 2) { |
135 |
|
|
if (defined $uri or $value[0] !~ /^[a-z-]+:/) { |
136 |
|
|
unshift @{$self->{line}}, |
137 |
|
|
'SRC', |
138 |
|
|
'DATA {} >>'.$value[0].' '.$value[1], |
139 |
|
|
'/SRC', 'BODYTEXT'; |
140 |
|
|
} else { |
141 |
|
|
push @token, {type => 'uri', value => $uriv->($value[0])}; |
142 |
|
|
unshift @{$self->{line}}, |
143 |
|
|
'SRC', |
144 |
|
|
'DATA {} >>'.$value[1], |
145 |
|
|
'/SRC', 'BODYTEXT'; |
146 |
|
|
} |
147 |
|
|
} elsif (@value == 1) { |
148 |
|
|
if (defined $uri or $value[0] !~ /^[a-z-]+:/) { |
149 |
|
|
unshift @{$self->{line}}, |
150 |
|
|
'SRC', |
151 |
|
|
'DATA {} >>'.$value[0], |
152 |
|
|
'/SRC', 'BODYTEXT'; |
153 |
|
|
} else { |
154 |
|
|
push @token, {type => 'uri', value => $uriv->($value[0])}; |
155 |
|
|
unshift @{$self->{line}}, 'BODYTEXT'; |
156 |
|
|
} |
157 |
|
|
} else { |
158 |
|
|
unshift @{$self->{line}}, 'BODYTEXT'; |
159 |
|
|
} |
160 |
|
|
} elsif ($command eq 'DATA') { |
161 |
|
|
my @token; |
162 |
|
|
$line =~ s/^>>//; |
163 |
|
|
while (length $line) { |
164 |
|
|
if ($line =~ s/^<([a-z0-9]+)\s*//) { |
165 |
|
|
my $tagname = $1; |
166 |
|
|
push @token, {type => 'start', value => $tagname}; |
167 |
|
|
while ($line =~ s/^([a-z-]+)\s*=\s*"([^"]*)"\s*//) { |
168 |
|
|
push @token, {type => $1, value => $attrvalue->($2)}; |
169 |
|
|
} |
170 |
|
|
$line =~ s#^/?\s*>##; |
171 |
|
|
push @token, {type => 'end', value => $tagname} |
172 |
|
|
if $tagname eq 'img' or $tagname eq 'input' or |
173 |
|
|
$tagname eq 'br'; |
174 |
|
|
} elsif ($line =~ s#^</([a-z0-9]+)\s*>##) { |
175 |
|
|
push @token, {type => 'end', value => $1}; |
176 |
|
|
} elsif ($line =~ s/^<!--(.*?)-->//) { |
177 |
|
|
push @token, {type => 'html-comment', value => $1}; |
178 |
|
|
} elsif ($line =~ s/^&([a-z]+);//) { |
179 |
|
|
my $name = $1; |
180 |
|
|
if ($name eq 'amp') { |
181 |
|
|
push @token, {type => 'text', value => '&'}; |
182 |
|
|
} elsif ($name eq 'lt') { |
183 |
|
|
push @token, {type => 'text', value => '<'}; |
184 |
|
|
} elsif ($name eq 'gt') { |
185 |
|
|
push @token, {type => 'text', value => '>'}; |
186 |
|
|
} elsif ($name eq 'quot') { |
187 |
|
|
push @token, {type => 'text', value => '"'}; |
188 |
|
|
} elsif ($name eq 'reg') { |
189 |
|
|
push @token, {type => 'text', value => "\x{00AE}"}; |
190 |
|
|
} elsif ($name eq 'hearts') { |
191 |
|
|
push @token, {type => 'text', value => "\x{2661}"}; |
192 |
|
|
} else { |
193 |
|
|
push @token, {type => 'char', value => $name}; |
194 |
|
|
} |
195 |
|
|
} elsif ($line =~ s/^&#([0-9]+);//) { |
196 |
|
|
push @token, {type => 'text', value => ord $1}; |
197 |
|
|
} elsif ($line =~ s/^&#x([0-9A-Fa-f]+);//) { |
198 |
|
|
push @token, {type => 'text', value => ord hex $1}; |
199 |
|
|
} elsif ($line =~ s/^([^<&]+)//) { |
200 |
|
|
push @token, {type => 'text', value => $1}; |
201 |
|
|
} else { |
202 |
|
|
push @token, {type => 'text', value => substr ($line, 0, 1)}; |
203 |
|
|
substr ($line, 0, 1) = ''; |
204 |
|
|
} |
205 |
|
|
} |
206 |
|
|
push @token, {type => 'eol'}; |
207 |
|
|
|
208 |
|
|
$r = shift @token; |
209 |
|
|
push @{$self->{token}}, @token; |
210 |
|
|
last L; |
211 |
|
|
} elsif ({ |
212 |
|
|
DD => 1, DT => 1, |
213 |
|
|
DEL => 1, INS => 1, |
214 |
|
|
LI => 1, |
215 |
|
|
RB => 1, RT => 1, |
216 |
|
|
STRONG => 1, |
217 |
|
|
YAMI => 1, |
218 |
|
|
EM => 1, |
219 |
|
|
HOUR => 1, KION => 1, LUNCH => 1, |
220 |
|
|
TAION => 1, TENKI => 1, THEME => 1, |
221 |
|
|
T1 => 1, T2 => 1, T3 => 1, T4 => 1, |
222 |
|
|
T5 => 1, T6 => 1, T7 => 1, SP => 1, |
223 |
|
|
}->{$command}) { |
224 |
|
|
if (@value) { |
225 |
|
|
unshift @{$self->{line}}, 'DATA {} >>'.$value[0], '/'.$command; |
226 |
|
|
} |
227 |
|
|
} elsif ($command eq 'DIV') { |
228 |
|
|
if (@value) { |
229 |
|
|
$r = {type => 'class', value => $value[0]}; |
230 |
|
|
} |
231 |
|
|
} elsif ($command eq 'LDIARY') { |
232 |
|
|
$value[0] =~ s/^([0-9]{4})([0-9]{2})([0-9]{2})/$1, $2, $3/; |
233 |
|
|
$value[0] =~ s/[is]/, /; |
234 |
|
|
$r = {type => 'start', value => 'LINK'}; |
235 |
|
|
push @token, {type => 'uri', value => "($value[0])"}; |
236 |
|
|
unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/LINK'; |
237 |
|
|
} elsif ($command eq 'LIMG') { |
238 |
|
|
$r = {type => 'start', value => 'IMG'}; |
239 |
|
|
push @token, {type => 'uri', value => $uriv->($value[0])}; |
240 |
|
|
unshift @{$self->{line}}, 'DATA {} >>'.$value[3], '/IMG'; |
241 |
|
|
} elsif ($command eq 'LMG') { |
242 |
|
|
$r = {type => 'start', value => 'IMG'}; |
243 |
|
|
push @token, {type => 'uri', value => $uriv->($value[0])}; |
244 |
|
|
unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/IMG'; |
245 |
|
|
} elsif ($command eq 'LINK') { |
246 |
|
|
if (@value == 2) { |
247 |
|
|
push @token, {type => 'uri', value => $uriv->($value[0])}; |
248 |
|
|
unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/LINK'; |
249 |
|
|
} elsif ($flag =~ /\+/) { |
250 |
|
|
push @token, {type => 'uri', value => $uriv->($value[0])}; |
251 |
|
|
} else { |
252 |
|
|
unshift @{$self->{line}}, 'DATA {} >>'.$value[0], '/LINK'; |
253 |
|
|
} |
254 |
|
|
} elsif ($command eq 'NEW') { |
255 |
|
|
$r = {type => 'start', value => 'SECTION'}; |
256 |
|
|
unshift @{$self->{line}}, 'H', 'DATA {} >>'.$value[0], '/H'; |
257 |
|
|
} elsif ($command eq 'LNEW') { |
258 |
|
|
$r = {type => 'start', value => 'SECTION'}; |
259 |
|
|
push @token, {type => 'uri', value => $uriv->($value[0])}; |
260 |
|
|
unshift @{$self->{line}}, 'H', 'DATA {} >>'.$value[1], '/H'; |
261 |
|
|
} elsif ($command eq 'SUB') { |
262 |
|
|
$r = {type => 'start', value => 'SUB'}; |
263 |
|
|
unshift @{$self->{line}}, 'H', 'DATA {} >>'.$value[0], '/H'; |
264 |
|
|
} elsif ($command eq 'PERSON') { |
265 |
|
|
push @token, {type => 'key', value => $attrvalue->($value[0])}; |
266 |
|
|
unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/PERSON'; |
267 |
|
|
} elsif ($command eq 'SEE') { |
268 |
|
|
if (@value == 2) { |
269 |
|
|
push @token, {type => 'key', value => $attrvalue->($value[0])}; |
270 |
|
|
unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/SEE'; |
271 |
|
|
} else { |
272 |
|
|
unshift @{$self->{line}}, 'DATA {} >>'.$value[0], '/SEE'; |
273 |
|
|
} |
274 |
|
|
} elsif ($command eq 'SPAN') { |
275 |
|
|
if (@value == 2) { |
276 |
|
|
push @token, {type => 'class', |
277 |
|
|
value => $attrvalue->($value[0])}; |
278 |
|
|
unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/SPAN'; |
279 |
|
|
} else { |
280 |
|
|
unshift @{$self->{line}}, 'DATA {} >>'.$value[0], '/SPAN'; |
281 |
|
|
} |
282 |
|
|
} elsif ($command eq 'OK') { |
283 |
|
|
$r = {type => '#EOF'}; |
284 |
|
|
next L; |
285 |
|
|
} elsif ($command eq 'XML') { |
286 |
|
|
unshift @{$self->{line}}, 'DATA {} >>XML '.$line; |
287 |
|
|
next L; |
288 |
|
|
} |
289 |
|
|
|
290 |
|
|
for (keys %attr) { |
291 |
|
|
push @token, {type => $_, value => $attrvalue->($attr{$_})}; |
292 |
|
|
} |
293 |
|
|
|
294 |
|
|
push @{$self->{token}}, @token; |
295 |
|
|
last L; |
296 |
|
|
} elsif ($line eq 'H2H/1.0') { |
297 |
|
|
$r = {type => 'magic', value => 'H2H/1.0'}; |
298 |
|
|
last L; |
299 |
|
|
} elsif ($line =~ m#^/([A-Z]+)\s*$#) { |
300 |
|
|
$r = {type => 'end', value => $1}; |
301 |
|
|
last L; |
302 |
|
|
} elsif ($line =~ s/^!#//) { |
303 |
|
|
$r = {type => 'hnf-comment', value => $line}; |
304 |
|
|
last L; |
305 |
|
|
} elsif ($line =~ s/^!//) { |
306 |
|
|
$r = {type => 'html-comment', value => $line}; |
307 |
|
|
last L; |
308 |
|
|
} else { |
309 |
|
|
unshift @{$self->{line}}, 'DATA {} >>'.$line; |
310 |
|
|
next L; |
311 |
|
|
} |
312 |
|
|
} # L |
313 |
|
|
|
314 |
|
|
return $r; |
315 |
|
|
} # _shift_token |
316 |
|
|
|
317 |
|
|
sub _construct_tree ($) { |
318 |
|
|
my $self = $_[0]; |
319 |
|
|
|
320 |
|
|
my $doc_el = $self->{doc}->document_element; |
321 |
|
|
my $head_el = $self->{doc}->create_element_ns (HTML_NS, 'head'); |
322 |
|
|
my $body_el = $self->{doc}->create_element_ns (HTML_NS, 'body'); |
323 |
|
|
$doc_el->append_child ($head_el); |
324 |
|
|
$doc_el->append_child ($body_el); |
325 |
|
|
$doc_el->set_user_data ('command-name' => '#html'); |
326 |
|
|
$head_el->set_user_data ('command-name' => '#head'); |
327 |
|
|
$body_el->set_user_data ('command-name' => '#body'); |
328 |
|
|
$doc_el->set_attribute_ns (SW09_NS, 'sw9:Name' => 'H2H'); |
329 |
|
|
$doc_el->set_attribute_ns (SW09_NS, 'sw9:Version' => '0.9'); |
330 |
|
|
|
331 |
|
|
my $parent = { |
332 |
|
|
subsection => $body_el, |
333 |
|
|
attr => $body_el, |
334 |
|
|
text => $body_el, |
335 |
|
|
}; |
336 |
|
|
my $state = 'data'; |
337 |
|
|
## data - normal |
338 |
|
|
## list - UL or OL |
339 |
|
|
## br - after br start tag token |
340 |
|
|
## eol - after eol token |
341 |
|
|
|
342 |
|
|
T: while (my $token = $self->_shift_token) { |
343 |
|
|
last T if $token->{type} eq '#EOF'; |
344 |
|
|
|
345 |
|
|
if ($token->{type} eq 'text') { |
346 |
|
|
if ($state eq 'list') { |
347 |
|
|
my $li_el = $self->{doc}->create_element_ns (HTML_NS, 'li'); |
348 |
|
|
$li_el->manakai_append_text ($token->{value}); |
349 |
|
|
$parent->{text}->append_child ($li_el); |
350 |
|
|
} else { |
351 |
|
|
$parent->{text}->manakai_append_text ("\x0A") if $state eq 'eol'; |
352 |
|
|
$parent->{text}->manakai_append_text ($token->{value}); |
353 |
|
|
$state = 'data'; |
354 |
|
|
} |
355 |
|
|
} elsif ($token->{type} eq 'eol') { |
356 |
|
|
if ($state eq 'eol') { |
357 |
|
|
$parent->{text}->manakai_append_text ("\x0A"); |
358 |
|
|
} else { |
359 |
|
|
$state = $state eq 'br' ? 'data' : 'eol'; |
360 |
|
|
} |
361 |
|
|
} elsif ($token->{type} eq 'start') { |
362 |
|
|
my $info = { |
363 |
|
|
# nsuri, qname, parent, state |
364 |
|
|
ABBR => [HTML_NS, 'abbr', $parent->{text}, 'data'], |
365 |
|
|
ACRONYM => [HTML_NS, 'abbr', $parent->{text}, 'data'], |
366 |
|
|
BODYTEXT => [HTML3_NS, 'bodytext', $parent->{text}, 'data'], |
367 |
|
|
CITE => [HTML_NS, 'blockquote', $parent->{text}, 'data', |
368 |
|
|
{PRE => 1}], |
369 |
|
|
DD => [HTML_NS, 'dd', $parent->{text}, 'data'], |
370 |
|
|
DEL => [HTML_NS, 'del', $parent->{text}, 'data'], |
371 |
|
|
DIV => [HTML_NS, 'div', $parent->{text}, 'data', {P => 1}], |
372 |
|
|
DL => [HTML_NS, 'dl', $parent->{text}, 'data'], |
373 |
|
|
DT => [HTML_NS, 'dt', $parent->{text}, 'data'], |
374 |
|
|
EM => [HTML_NS, 'em', $parent->{text}, 'data'], |
375 |
|
|
FN => [H2H_NS, 'fn', $parent->{text}, 'data'], |
376 |
|
|
H => [XHTML2_NS, 'h', $parent->{text}, 'data'], |
377 |
|
|
HOUR => [H2H_NS, 'hour', $head_el, 'data'], |
378 |
|
|
IMG => [HTML_NS, 'img', $parent->{text}, 'data'], |
379 |
|
|
INS => [HTML_NS, 'ins', $parent->{text}, 'data'], |
380 |
|
|
KION => [H2H_NS, 'kion', $head_el, 'data'], |
381 |
|
|
LI => [HTML_NS, 'li', $parent->{text}, 'data'], |
382 |
|
|
LINK => [HTML_NS, 'a', $parent->{text}, 'data'], |
383 |
|
|
LUNCH => [H2H_NS, 'lunch', $head_el, 'data'], |
384 |
|
|
OL => [HTML_NS, 'ol', $parent->{text}, 'list', {PRE => 1}], |
385 |
|
|
P => [HTML_NS, 'p', $parent->{text}, 'data', |
386 |
|
|
{P => 1, PRE => 1}], |
387 |
|
|
PERSON => [HTML3_NS, 'person', $parent->{text}, 'data'], |
388 |
|
|
PRE => [HTML_NS, 'pre', $parent->{text}, 'data', |
389 |
|
|
{P => 1}], |
390 |
|
|
RB => [HTML_NS, 'rb', $parent->{text}, 'data'], |
391 |
|
|
RIBU => [HTML_NS, 'ruby', $parent->{text}, 'data'], |
392 |
|
|
RT => [HTML_NS, 'rt', $parent->{text}, 'data'], |
393 |
|
|
RUBY => [HTML_NS, 'ruby', $parent->{text}, 'data'], |
394 |
|
|
SECTION => [HTML_NS, 'section', $body_el, 'data'], |
395 |
|
|
SEE => [HTML_NS, 'i', $parent->{text}, 'data'], |
396 |
|
|
SP => [H2H_NS, 'sp', $head_el, 'data'], |
397 |
|
|
SPAN => [HTML_NS, 'span', $parent->{text}, 'data'], |
398 |
|
|
SRC => [HTML3_NS, 'credit', $parent->{text}, 'data'], |
399 |
|
|
STRONG => [HTML_NS, 'strong', $parent->{text}, 'data'], |
400 |
|
|
SUBSECTION => [HTML_NS, 'section', $parent->{subsection}, 'data'], |
401 |
|
|
T1 => [H2H_NS, 't1', $head_el, 'data'], |
402 |
|
|
T2 => [H2H_NS, 't2', $head_el, 'data'], |
403 |
|
|
T3 => [H2H_NS, 't3', $head_el, 'data'], |
404 |
|
|
T4 => [H2H_NS, 't4', $head_el, 'data'], |
405 |
|
|
T5 => [H2H_NS, 't5', $head_el, 'data'], |
406 |
|
|
T6 => [H2H_NS, 't6', $head_el, 'data'], |
407 |
|
|
T7 => [H2H_NS, 't7', $head_el, 'data'], |
408 |
|
|
TAION => [H2H_NS, 'taion', $head_el, 'data'], |
409 |
|
|
TENKI => [H2H_NS, 'tenki', $head_el, 'data'], |
410 |
|
|
THEME => [H2H_NS, 'theme', $head_el, 'data'], |
411 |
|
|
UL => [HTML_NS, 'ul', $parent->{text}, 'list', {PRE => 1}], |
412 |
|
|
YAMI => [H2H_NS, 'yami', $parent->{text}, 'data'], |
413 |
|
|
a => [HTML_NS, 'a', $parent->{text}, 'data'], |
414 |
|
|
abbr => [HTML_NS, 'abbr', $parent->{text}, 'data'], |
415 |
|
|
acronym => [HTML_NS, 'abbr', $parent->{text}, 'data'], |
416 |
|
|
blockquote => [HTML_NS, 'blockquote', $parent->{text}, 'data'], |
417 |
|
|
br => [HTML_NS, 'br', $parent->{text}, 'br'], |
418 |
|
|
caption => [HTML_NS, 'caption', $parent->{text}, 'data'], |
419 |
|
|
code => [HTML_NS, 'code', $parent->{text}, 'data'], |
420 |
|
|
dd => [HTML_NS, 'dd', $parent->{text}, 'data'], |
421 |
|
|
del => [HTML_NS, 'del', $parent->{text}, 'data'], |
422 |
|
|
dfn => [HTML_NS, 'dfn', $parent->{text}, 'data'], |
423 |
|
|
div => [HTML_NS, 'div', $parent->{text}, 'data'], |
424 |
|
|
dl => [HTML_NS, 'dl', $parent->{text}, 'data'], |
425 |
|
|
dt => [HTML_NS, 'dt', $parent->{text}, 'data'], |
426 |
|
|
em => [HTML_NS, 'em', $parent->{text}, 'data'], |
427 |
|
|
form => [HTML_NS, 'form', $parent->{text}, 'data'], |
428 |
|
|
h1 => [HTML_NS, 'h1', $parent->{text}, 'data'], |
429 |
|
|
h2 => [HTML_NS, 'h2', $parent->{text}, 'data'], |
430 |
|
|
h3 => [HTML_NS, 'h3', $parent->{text}, 'data'], |
431 |
|
|
h4 => [HTML_NS, 'h4', $parent->{text}, 'data'], |
432 |
|
|
img => [HTML_NS, 'img', $parent->{text}, 'data'], |
433 |
|
|
input => [HTML_NS, 'input', $parent->{text}, 'data'], |
434 |
|
|
ins => [HTML_NS, 'ins', $parent->{text}, 'data'], |
435 |
|
|
kbd => [HTML_NS, 'kbd', $parent->{text}, 'data'], |
436 |
|
|
label => [HTML_NS, 'label', $parent->{text}, 'data'], |
437 |
|
|
li => [HTML_NS, 'li', $parent->{text}, 'data'], |
438 |
|
|
ol => [HTML_NS, 'ol', $parent->{text}, 'data'], |
439 |
|
|
p => [HTML_NS, 'p', $parent->{text}, 'data'], |
440 |
|
|
pre => [HTML_NS, 'pre', $parent->{text}, 'data'], |
441 |
|
|
q => [HTML_NS, 'q', $parent->{text}, 'data'], |
442 |
|
|
rb => [HTML_NS, 'rb', $parent->{text}, 'data'], |
443 |
|
|
rp => [HTML_NS, 'rp', $parent->{text}, 'data'], |
444 |
|
|
rt => [HTML_NS, 'rt', $parent->{text}, 'data'], |
445 |
|
|
ruby => [HTML_NS, 'ruby', $parent->{text}, 'data'], |
446 |
|
|
span => [HTML_NS, 'span', $parent->{text}, 'data'], |
447 |
|
|
strong => [HTML_NS, 'strong', $parent->{text}, 'data'], |
448 |
|
|
sup => [HTML_NS, 'sup', $parent->{text}, 'data'], |
449 |
|
|
table => [HTML_NS, 'table', $parent->{text}, 'data'], |
450 |
|
|
tbody => [HTML_NS, 'tbody', $parent->{text}, 'data'], |
451 |
|
|
td => [HTML_NS, 'td', $parent->{text}, 'data'], |
452 |
|
|
th => [HTML_NS, 'th', $parent->{text}, 'data'], |
453 |
|
|
thead => [HTML_NS, 'thead', $parent->{text}, 'data'], |
454 |
|
|
tr => [HTML_NS, 'tr', $parent->{text}, 'data'], |
455 |
|
|
ul => [HTML_NS, 'ul', $parent->{text}, 'data'], |
456 |
|
|
var => [HTML_NS, 'var', $parent->{text}, 'data'], |
457 |
|
|
}->{$token->{value}} |
458 |
|
|
|| [H2H_NS, $token->{value}, $parent->{text}, 'data']; |
459 |
|
|
while ($info->[4]->{$info->[2]->get_user_data ('command-name')}) { |
460 |
|
|
$info->[2] = $info->[2]->parent_node; |
461 |
|
|
} |
462 |
|
|
my $el = $self->{doc}->create_element_ns ($info->[0], $info->[1]); |
463 |
|
|
$el->set_user_data ('command-name', $token->{value}); |
464 |
|
|
$info->[2]->append_child ($el); |
465 |
|
|
$parent->{text} = $el; |
466 |
|
|
$parent->{attr} = $el; |
467 |
|
|
$parent->{subsection} = $el if $token->{value} eq 'SECTION'; |
468 |
|
|
$state = $info->[3]; |
469 |
|
|
} elsif ($token->{type} eq 'end') { |
470 |
|
|
E: while (my $et = $parent->{text}->get_user_data ('command-name')) { |
471 |
|
|
$parent->{text} = $parent->{text}->parent_node; |
472 |
|
|
last E if $et eq $token->{value}; |
473 |
|
|
last E if $et eq '#body'; |
474 |
|
|
} |
475 |
|
|
$parent->{attr} = $parent->{text}; |
476 |
|
|
$state = { |
477 |
|
|
UL => 'list', |
478 |
|
|
OL => 'list', |
479 |
|
|
}->{$parent->{text}->get_user_data ('command-name')} |
480 |
|
|
|| $state eq 'br' ? 'br' : 'data'; |
481 |
|
|
} elsif ($token->{type} eq 'char') { |
482 |
|
|
my $el = $self->{doc}->create_element_ns (H2H_NS, 'char'); |
483 |
|
|
$el->manakai_append_text ($token->{value}); |
484 |
|
|
$parent->{text}->append_child ($el); |
485 |
|
|
$state = 'data' if $state eq 'br'; |
486 |
|
|
} elsif ($token->{type} eq 'magic') { |
487 |
|
|
my ($name, $version) = split m#/#, $token->{value}, 2; |
488 |
|
|
$doc_el->set_attribute_ns (SW09_NS, 'sw9:Name', $name); |
489 |
|
|
$doc_el->set_attribute_ns (SW09_NS, 'sw9:Version', $version); |
490 |
|
|
} elsif ($token->{type} eq 'hnf-comment') { |
491 |
|
|
my $com = $self->{doc}->create_element_ns |
492 |
|
|
(H2H_NS, 'hnf-comment'); |
493 |
|
|
$com->text_content ($token->{value}); |
494 |
|
|
$parent->{text}->append_child ($com); |
495 |
|
|
} elsif ($token->{type} eq 'html-comment') { |
496 |
|
|
my $com = $self->{doc}->create_element_ns |
497 |
|
|
(H2H_NS, 'html-comment'); |
498 |
|
|
$com->text_content ($token->{value}); |
499 |
|
|
$parent->{text}->append_child ($com); |
500 |
|
|
} elsif ($token->{type} eq 'source') { |
501 |
|
|
my $src = $self->{doc}->create_element_ns (HTML3_NS, 'credit'); |
502 |
|
|
$src->manakai_append_text ($token->{value}); |
503 |
|
|
$parent->{text}->append_child ($src); |
504 |
|
|
} elsif ($token->{type} eq 'uri') { |
505 |
|
|
my $v = $token->{value}; |
506 |
|
|
if ($v =~ /^\(([^()]+)\)$/) { |
507 |
|
|
my @v = split /\s*,\s*/, $1; |
508 |
|
|
$parent->{attr}->set_attribute_ns |
509 |
|
|
(H2H_NS, 'href-year' => $v[0]+0); |
510 |
|
|
$parent->{attr}->set_attribute_ns |
511 |
|
|
(H2H_NS, 'href-month' => $v[1]+0); |
512 |
|
|
$parent->{attr}->set_attribute_ns |
513 |
|
|
(H2H_NS, 'href-day' => $v[2]+0); |
514 |
|
|
$parent->{attr}->set_attribute_ns |
515 |
|
|
(H2H_NS, 'href-section' => $v[3]+0) if $v[3]; |
516 |
|
|
$parent->{attr}->set_attribute_ns |
517 |
|
|
(H2H_NS, 'href-subsection' => $v[4]+0) |
518 |
|
|
if $v[4]; |
519 |
|
|
} else { |
520 |
|
|
my $xuri = $parent->{attr}->manakai_expanded_uri; |
521 |
|
|
if ($xuri eq HTML_NS . 'a') { |
522 |
|
|
$parent->{attr}->set_attribute_ns |
523 |
|
|
(undef, href => $token->{value}); |
524 |
|
|
} elsif ($xuri eq HTML_NS . 'blockquote') { |
525 |
|
|
$parent->{attr}->set_attribute_ns |
526 |
|
|
(undef, cite => $token->{value}); |
527 |
|
|
} else { |
528 |
|
|
$parent->{attr}->set_attribute_ns |
529 |
|
|
(XHTML2_NS, href => $token->{value}); |
530 |
|
|
} |
531 |
|
|
} |
532 |
|
|
} elsif ({ |
533 |
|
|
title => 1, style => 1, |
534 |
|
|
class => 1, href => 1, 'accept-charset' => 1, |
535 |
|
|
action => 1, method => 1, alt => 1, src => 1, |
536 |
|
|
type => 1, value => 1, name => 1, accesskey => 1, |
537 |
|
|
for => 1, cite => 1, onclick => 1, colspan => 1, |
538 |
|
|
scope => 1, summary => 1, |
539 |
|
|
}->{$token->{type}}) { |
540 |
|
|
$parent->{attr}->set_attribute_ns |
541 |
|
|
(undef, $token->{type}, $token->{value}); |
542 |
|
|
} elsif ($token->{type} eq 'cat') { |
543 |
|
|
for (split /\s*,\s*/, $token->{value}) { |
544 |
|
|
my $el = $self->{doc}->create_element_ns (H2H_NS, 'cat'); |
545 |
|
|
$el->manakai_append_text ($_); |
546 |
|
|
$parent->{attr}->append_child ($el); |
547 |
|
|
} |
548 |
|
|
} elsif ($token->{type} eq 'lang' or $token->{type} eq 'xml:lang') { |
549 |
|
|
$parent->{attr}->set_attribute_ns |
550 |
|
|
(q<http://www.w3.org/XML/1998/namespace>, |
551 |
|
|
'xml:lang' => $token->{value}); |
552 |
|
|
} elsif ($token->{type} eq 'id') { |
553 |
|
|
$parent->{attr}->set_attribute_ns |
554 |
|
|
(q<http://www.w3.org/XML/1998/namespace>, |
555 |
|
|
'xml:id' => $token->{value}); |
556 |
|
|
} elsif ($token->{type} eq 'wbradded') { |
557 |
|
|
# ignore |
558 |
|
|
} else { |
559 |
|
|
# key, level, place, position, time |
560 |
|
|
$parent->{attr}->set_attribute_ns |
561 |
|
|
(H2H_NS, $token->{type}, $token->{value}); |
562 |
|
|
} |
563 |
|
|
} |
564 |
|
|
} # _construct_tree |
565 |
|
|
|
566 |
|
|
1; |
567 |
|
|
## $Date: 2007/06/30 13:12:33 $ |