| 1 |
wakaba |
1.1 |
package Whatpm::H2H; |
| 2 |
|
|
use strict; |
| 3 |
|
|
|
| 4 |
|
|
sub H2H_NS () { q<http://suika.fam.cx/~wakaba/archive/2005/manakai/Markup/H2H/> } |
| 5 |
|
|
sub HTML_NS () { q<http://www.w3.org/1999/xhtml> } |
| 6 |
|
|
sub HTML3_NS () { q<urn:x-suika-fam-cx:markup:ietf:html:3:draft:00:> } |
| 7 |
|
|
sub SW09_NS () { q<urn:x-suika-fam-cx:markup:suikawiki:0:9:> } |
| 8 |
|
|
sub XHTML2_NS () { q<http://www.w3.org/2002/06/xhtml2/> } |
| 9 |
|
|
|
| 10 |
|
|
sub parse_string ($$$) { |
| 11 |
|
|
my $self = bless { |
| 12 |
|
|
token => [], |
| 13 |
|
|
location => {}, |
| 14 |
|
|
doc => $_[2], |
| 15 |
|
|
}, $_[0]; |
| 16 |
|
|
|
| 17 |
|
|
my $s = ''.$_[1]; |
| 18 |
|
|
$s =~ s/\x0D\x0A/\x0A/g; |
| 19 |
|
|
$s =~ tr/\x0D/\x0A/; |
| 20 |
|
|
$self->{line} = [split /\x0A/, $s]; |
| 21 |
|
|
|
| 22 |
|
|
local $Error::Depth = $Error::Depth + 1; |
| 23 |
|
|
$self->{doc}->strict_error_checking (0); |
| 24 |
|
|
my $doc_el = $self->{doc}->create_element_ns (HTML_NS, 'html'); |
| 25 |
|
|
$doc_el->set_attribute_ns (q<http://www.w3.org/2000/xmlns/>, 'xmlns', HTML_NS); |
| 26 |
|
|
$self->{doc}->append_child ($doc_el); |
| 27 |
|
|
|
| 28 |
|
|
$self->_construct_tree; |
| 29 |
|
|
|
| 30 |
|
|
return $self->{doc}; |
| 31 |
|
|
} # parse_string |
| 32 |
|
|
|
| 33 |
|
|
sub _shift_token ($) { |
| 34 |
|
|
my $self = $_[0]; |
| 35 |
|
|
|
| 36 |
|
|
if (@{$self->{token}}) { |
| 37 |
|
|
return shift @{$self->{token}}; |
| 38 |
|
|
} |
| 39 |
|
|
|
| 40 |
|
|
my $attrvalue = sub { |
| 41 |
|
|
my $v = shift; |
| 42 |
|
|
$v =~ s/"/"/g; |
| 43 |
|
|
$v =~ s/</</g; |
| 44 |
|
|
$v =~ s/>/>/g; |
| 45 |
|
|
$v =~ s/®/\x{00AE}/g; |
| 46 |
|
|
$v =~ s/♥/\x{2661}/g; |
| 47 |
|
|
$v =~ s/&/&/g; |
| 48 |
|
|
return $v; |
| 49 |
|
|
}; |
| 50 |
|
|
|
| 51 |
|
|
my $uriv = sub { |
| 52 |
|
|
my $v = $attrvalue->(shift); |
| 53 |
|
|
$v =~ s/^\{/(/; |
| 54 |
|
|
$v =~ s/\}$/)/; |
| 55 |
|
|
$v =~ s/^\#([0-9si]+)$/($1)/; |
| 56 |
|
|
$v =~ s/^\(([0-9]{4})([0-9]{2})([0-9]{2})([^)]*)\)$/($1, $2, $3$4)/; |
| 57 |
|
|
$v =~ s/[si]/, /g if $v =~ /^\(/ and $v =~ /\)$/; |
| 58 |
|
|
return $v; |
| 59 |
|
|
}; |
| 60 |
|
|
|
| 61 |
|
|
my $r = {type => '#EOF'}; |
| 62 |
|
|
L: while (defined (my $line = shift @{$self->{line}})) { |
| 63 |
|
|
if ($line =~ s/^([A-Z]+|T[0-9])(\*?\+?\*?)(?:\s+|$)//) { |
| 64 |
|
|
my $command = $1; |
| 65 |
|
|
my $flag = $2; |
| 66 |
|
|
$r = {type => 'start', value => $command}; |
| 67 |
|
|
|
| 68 |
|
|
my $uri; |
| 69 |
|
|
if ($flag =~ /\*/ and $line =~ s/^([^{\s]\S*)\s*//) { |
| 70 |
|
|
$uri = $1; |
| 71 |
|
|
} |
| 72 |
|
|
|
| 73 |
|
|
my $attr = ''; |
| 74 |
|
|
if ($line =~ s/^\{(\s*(?:[A-Za-z][^{}]*)?)\}\s*//) { |
| 75 |
|
|
$attr = $1; |
| 76 |
|
|
} |
| 77 |
|
|
|
| 78 |
|
|
if (not defined $uri and |
| 79 |
|
|
$flag =~ /\*/ and $line =~ s/^([^{\s]\S*)\s*//) { |
| 80 |
|
|
$uri = $1; |
| 81 |
|
|
} |
| 82 |
|
|
|
| 83 |
|
|
my @token; |
| 84 |
|
|
my $info = { |
| 85 |
|
|
# val# val#(*) |
| 86 |
|
|
ABBR => [2, 2], |
| 87 |
|
|
ACRONYM => [2, 2], |
| 88 |
|
|
CITE => [2, 1], |
| 89 |
|
|
LDIARY => [4, 4], |
| 90 |
|
|
LIMG => [4, 4], |
| 91 |
|
|
LINK => [2, 1], |
| 92 |
|
|
LMG => [2, 2], |
| 93 |
|
|
LNEW => [2, 2], |
| 94 |
|
|
PERSON => [2, 2], |
| 95 |
|
|
RIBU => [2, 2], |
| 96 |
|
|
RUBY => [2, 2], |
| 97 |
|
|
SEE => [2, 2], |
| 98 |
|
|
}->{$command}; |
| 99 |
|
|
my @value = split /\s+/, $line, |
| 100 |
|
|
($flag =~ /\*/ ? $info->[1] : $info->[0]) || 1; |
| 101 |
|
|
|
| 102 |
|
|
push @token, {type => 'uri', value => $uriv->($uri)} if defined $uri; |
| 103 |
|
|
|
| 104 |
|
|
my %attr; |
| 105 |
|
|
while ($attr =~ /([A-Za-z0-9_-]+)\s*(?:=>?|:)\s*([^";,]+|"[^"]+")/gc) { |
| 106 |
|
|
my $name = lc $1; |
| 107 |
|
|
my $value = $2; |
| 108 |
|
|
$value =~ tr/"//d; |
| 109 |
|
|
$attr{$name} = $value; |
| 110 |
|
|
} |
| 111 |
|
|
delete $attr{'content-type'}; |
| 112 |
|
|
|
| 113 |
|
|
if ({ |
| 114 |
|
|
ABBR => 1, ACRONYM => 1, |
| 115 |
|
|
RUBY => 1, RIBU => 1, |
| 116 |
|
|
}->{$command}) { |
| 117 |
|
|
if (@value == 1 and $attr{title}) { |
| 118 |
|
|
push @value, $attr{title}; |
| 119 |
|
|
delete $attr{title}; |
| 120 |
|
|
} |
| 121 |
|
|
if (@value == 2) { |
| 122 |
|
|
unshift @{$self->{line}}, |
| 123 |
|
|
'RB', |
| 124 |
|
|
'DATA {} >>'.$value[1], |
| 125 |
|
|
'/RB', |
| 126 |
|
|
'RT', |
| 127 |
|
|
'DATA {} >>'.$value[0], |
| 128 |
|
|
'/RT', |
| 129 |
|
|
'/'.$command; |
| 130 |
|
|
} else { |
| 131 |
|
|
unshift @{$self->{line}}, 'DATA {} >>'.$value[0], '/'.$command; |
| 132 |
|
|
} |
| 133 |
|
|
} elsif ($command eq 'CITE') { |
| 134 |
|
|
if (@value == 2) { |
| 135 |
|
|
if (defined $uri or $value[0] !~ /^[a-z-]+:/) { |
| 136 |
|
|
unshift @{$self->{line}}, |
| 137 |
|
|
'SRC', |
| 138 |
|
|
'DATA {} >>'.$value[0].' '.$value[1], |
| 139 |
|
|
'/SRC', 'BODYTEXT'; |
| 140 |
|
|
} else { |
| 141 |
|
|
push @token, {type => 'uri', value => $uriv->($value[0])}; |
| 142 |
|
|
unshift @{$self->{line}}, |
| 143 |
|
|
'SRC', |
| 144 |
|
|
'DATA {} >>'.$value[1], |
| 145 |
|
|
'/SRC', 'BODYTEXT'; |
| 146 |
|
|
} |
| 147 |
|
|
} elsif (@value == 1) { |
| 148 |
|
|
if (defined $uri or $value[0] !~ /^[a-z-]+:/) { |
| 149 |
|
|
unshift @{$self->{line}}, |
| 150 |
|
|
'SRC', |
| 151 |
|
|
'DATA {} >>'.$value[0], |
| 152 |
|
|
'/SRC', 'BODYTEXT'; |
| 153 |
|
|
} else { |
| 154 |
|
|
push @token, {type => 'uri', value => $uriv->($value[0])}; |
| 155 |
|
|
unshift @{$self->{line}}, 'BODYTEXT'; |
| 156 |
|
|
} |
| 157 |
|
|
} else { |
| 158 |
|
|
unshift @{$self->{line}}, 'BODYTEXT'; |
| 159 |
|
|
} |
| 160 |
|
|
} elsif ($command eq 'DATA') { |
| 161 |
|
|
my @token; |
| 162 |
|
|
$line =~ s/^>>//; |
| 163 |
|
|
while (length $line) { |
| 164 |
|
|
if ($line =~ s/^<([a-z0-9]+)\s*//) { |
| 165 |
|
|
my $tagname = $1; |
| 166 |
|
|
push @token, {type => 'start', value => $tagname}; |
| 167 |
|
|
while ($line =~ s/^([a-z-]+)\s*=\s*"([^"]*)"\s*//) { |
| 168 |
|
|
push @token, {type => $1, value => $attrvalue->($2)}; |
| 169 |
|
|
} |
| 170 |
|
|
$line =~ s#^/?\s*>##; |
| 171 |
|
|
push @token, {type => 'end', value => $tagname} |
| 172 |
|
|
if $tagname eq 'img' or $tagname eq 'input' or |
| 173 |
|
|
$tagname eq 'br'; |
| 174 |
|
|
} elsif ($line =~ s#^</([a-z0-9]+)\s*>##) { |
| 175 |
|
|
push @token, {type => 'end', value => $1}; |
| 176 |
|
|
} elsif ($line =~ s/^<!--(.*?)-->//) { |
| 177 |
|
|
push @token, {type => 'html-comment', value => $1}; |
| 178 |
|
|
} elsif ($line =~ s/^&([a-z]+);//) { |
| 179 |
|
|
my $name = $1; |
| 180 |
|
|
if ($name eq 'amp') { |
| 181 |
|
|
push @token, {type => 'text', value => '&'}; |
| 182 |
|
|
} elsif ($name eq 'lt') { |
| 183 |
|
|
push @token, {type => 'text', value => '<'}; |
| 184 |
|
|
} elsif ($name eq 'gt') { |
| 185 |
|
|
push @token, {type => 'text', value => '>'}; |
| 186 |
|
|
} elsif ($name eq 'quot') { |
| 187 |
|
|
push @token, {type => 'text', value => '"'}; |
| 188 |
|
|
} elsif ($name eq 'reg') { |
| 189 |
|
|
push @token, {type => 'text', value => "\x{00AE}"}; |
| 190 |
|
|
} elsif ($name eq 'hearts') { |
| 191 |
|
|
push @token, {type => 'text', value => "\x{2661}"}; |
| 192 |
|
|
} else { |
| 193 |
|
|
push @token, {type => 'char', value => $name}; |
| 194 |
|
|
} |
| 195 |
|
|
} elsif ($line =~ s/^&#([0-9]+);//) { |
| 196 |
|
|
push @token, {type => 'text', value => ord $1}; |
| 197 |
|
|
} elsif ($line =~ s/^&#x([0-9A-Fa-f]+);//) { |
| 198 |
|
|
push @token, {type => 'text', value => ord hex $1}; |
| 199 |
|
|
} elsif ($line =~ s/^([^<&]+)//) { |
| 200 |
|
|
push @token, {type => 'text', value => $1}; |
| 201 |
|
|
} else { |
| 202 |
|
|
push @token, {type => 'text', value => substr ($line, 0, 1)}; |
| 203 |
|
|
substr ($line, 0, 1) = ''; |
| 204 |
|
|
} |
| 205 |
|
|
} |
| 206 |
|
|
push @token, {type => 'eol'}; |
| 207 |
|
|
|
| 208 |
|
|
$r = shift @token; |
| 209 |
|
|
push @{$self->{token}}, @token; |
| 210 |
|
|
last L; |
| 211 |
|
|
} elsif ({ |
| 212 |
|
|
DD => 1, DT => 1, |
| 213 |
|
|
DEL => 1, INS => 1, |
| 214 |
|
|
LI => 1, |
| 215 |
|
|
RB => 1, RT => 1, |
| 216 |
|
|
STRONG => 1, |
| 217 |
|
|
YAMI => 1, |
| 218 |
|
|
EM => 1, |
| 219 |
|
|
HOUR => 1, KION => 1, LUNCH => 1, |
| 220 |
|
|
TAION => 1, TENKI => 1, THEME => 1, |
| 221 |
|
|
T1 => 1, T2 => 1, T3 => 1, T4 => 1, |
| 222 |
|
|
T5 => 1, T6 => 1, T7 => 1, SP => 1, |
| 223 |
|
|
}->{$command}) { |
| 224 |
|
|
if (@value) { |
| 225 |
|
|
unshift @{$self->{line}}, 'DATA {} >>'.$value[0], '/'.$command; |
| 226 |
|
|
} |
| 227 |
|
|
} elsif ($command eq 'DIV') { |
| 228 |
|
|
if (@value) { |
| 229 |
|
|
$r = {type => 'class', value => $value[0]}; |
| 230 |
|
|
} |
| 231 |
|
|
} elsif ($command eq 'LDIARY') { |
| 232 |
|
|
$value[0] =~ s/^([0-9]{4})([0-9]{2})([0-9]{2})/$1, $2, $3/; |
| 233 |
|
|
$value[0] =~ s/[is]/, /; |
| 234 |
|
|
$r = {type => 'start', value => 'LINK'}; |
| 235 |
|
|
push @token, {type => 'uri', value => "($value[0])"}; |
| 236 |
|
|
unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/LINK'; |
| 237 |
|
|
} elsif ($command eq 'LIMG') { |
| 238 |
|
|
$r = {type => 'start', value => 'IMG'}; |
| 239 |
|
|
push @token, {type => 'uri', value => $uriv->($value[0])}; |
| 240 |
|
|
unshift @{$self->{line}}, 'DATA {} >>'.$value[3], '/IMG'; |
| 241 |
|
|
} elsif ($command eq 'LMG') { |
| 242 |
|
|
$r = {type => 'start', value => 'IMG'}; |
| 243 |
|
|
push @token, {type => 'uri', value => $uriv->($value[0])}; |
| 244 |
|
|
unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/IMG'; |
| 245 |
|
|
} elsif ($command eq 'LINK') { |
| 246 |
|
|
if (@value == 2) { |
| 247 |
|
|
push @token, {type => 'uri', value => $uriv->($value[0])}; |
| 248 |
|
|
unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/LINK'; |
| 249 |
|
|
} elsif ($flag =~ /\+/) { |
| 250 |
|
|
push @token, {type => 'uri', value => $uriv->($value[0])}; |
| 251 |
|
|
} else { |
| 252 |
|
|
unshift @{$self->{line}}, 'DATA {} >>'.$value[0], '/LINK'; |
| 253 |
|
|
} |
| 254 |
|
|
} elsif ($command eq 'NEW') { |
| 255 |
|
|
$r = {type => 'start', value => 'SECTION'}; |
| 256 |
|
|
unshift @{$self->{line}}, 'H', 'DATA {} >>'.$value[0], '/H'; |
| 257 |
|
|
} elsif ($command eq 'LNEW') { |
| 258 |
|
|
$r = {type => 'start', value => 'SECTION'}; |
| 259 |
|
|
push @token, {type => 'uri', value => $uriv->($value[0])}; |
| 260 |
|
|
unshift @{$self->{line}}, 'H', 'DATA {} >>'.$value[1], '/H'; |
| 261 |
|
|
} elsif ($command eq 'SUB') { |
| 262 |
|
|
$r = {type => 'start', value => 'SUB'}; |
| 263 |
|
|
unshift @{$self->{line}}, 'H', 'DATA {} >>'.$value[0], '/H'; |
| 264 |
|
|
} elsif ($command eq 'PERSON') { |
| 265 |
|
|
push @token, {type => 'key', value => $attrvalue->($value[0])}; |
| 266 |
|
|
unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/PERSON'; |
| 267 |
|
|
} elsif ($command eq 'SEE') { |
| 268 |
|
|
if (@value == 2) { |
| 269 |
|
|
push @token, {type => 'key', value => $attrvalue->($value[0])}; |
| 270 |
|
|
unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/SEE'; |
| 271 |
|
|
} else { |
| 272 |
|
|
unshift @{$self->{line}}, 'DATA {} >>'.$value[0], '/SEE'; |
| 273 |
|
|
} |
| 274 |
|
|
} elsif ($command eq 'SPAN') { |
| 275 |
|
|
if (@value == 2) { |
| 276 |
|
|
push @token, {type => 'class', |
| 277 |
|
|
value => $attrvalue->($value[0])}; |
| 278 |
|
|
unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/SPAN'; |
| 279 |
|
|
} else { |
| 280 |
|
|
unshift @{$self->{line}}, 'DATA {} >>'.$value[0], '/SPAN'; |
| 281 |
|
|
} |
| 282 |
|
|
} elsif ($command eq 'OK') { |
| 283 |
|
|
$r = {type => '#EOF'}; |
| 284 |
|
|
next L; |
| 285 |
|
|
} elsif ($command eq 'XML') { |
| 286 |
|
|
unshift @{$self->{line}}, 'DATA {} >>XML '.$line; |
| 287 |
|
|
next L; |
| 288 |
|
|
} |
| 289 |
|
|
|
| 290 |
|
|
for (keys %attr) { |
| 291 |
|
|
push @token, {type => $_, value => $attrvalue->($attr{$_})}; |
| 292 |
|
|
} |
| 293 |
|
|
|
| 294 |
|
|
push @{$self->{token}}, @token; |
| 295 |
|
|
last L; |
| 296 |
|
|
} elsif ($line eq 'H2H/1.0') { |
| 297 |
|
|
$r = {type => 'magic', value => 'H2H/1.0'}; |
| 298 |
|
|
last L; |
| 299 |
|
|
} elsif ($line =~ m#^/([A-Z]+)\s*$#) { |
| 300 |
|
|
$r = {type => 'end', value => $1}; |
| 301 |
|
|
last L; |
| 302 |
|
|
} elsif ($line =~ s/^!#//) { |
| 303 |
|
|
$r = {type => 'hnf-comment', value => $line}; |
| 304 |
|
|
last L; |
| 305 |
|
|
} elsif ($line =~ s/^!//) { |
| 306 |
|
|
$r = {type => 'html-comment', value => $line}; |
| 307 |
|
|
last L; |
| 308 |
|
|
} else { |
| 309 |
|
|
unshift @{$self->{line}}, 'DATA {} >>'.$line; |
| 310 |
|
|
next L; |
| 311 |
|
|
} |
| 312 |
|
|
} # L |
| 313 |
|
|
|
| 314 |
|
|
return $r; |
| 315 |
|
|
} # _shift_token |
| 316 |
|
|
|
| 317 |
|
|
sub _construct_tree ($) { |
| 318 |
|
|
my $self = $_[0]; |
| 319 |
|
|
|
| 320 |
|
|
my $doc_el = $self->{doc}->document_element; |
| 321 |
|
|
my $head_el = $self->{doc}->create_element_ns (HTML_NS, 'head'); |
| 322 |
|
|
my $body_el = $self->{doc}->create_element_ns (HTML_NS, 'body'); |
| 323 |
|
|
$doc_el->append_child ($head_el); |
| 324 |
|
|
$doc_el->append_child ($body_el); |
| 325 |
|
|
$doc_el->set_user_data ('command-name' => '#html'); |
| 326 |
|
|
$head_el->set_user_data ('command-name' => '#head'); |
| 327 |
|
|
$body_el->set_user_data ('command-name' => '#body'); |
| 328 |
|
|
$doc_el->set_attribute_ns (SW09_NS, 'sw9:Name' => 'H2H'); |
| 329 |
|
|
$doc_el->set_attribute_ns (SW09_NS, 'sw9:Version' => '0.9'); |
| 330 |
|
|
|
| 331 |
|
|
my $parent = { |
| 332 |
|
|
subsection => $body_el, |
| 333 |
|
|
attr => $body_el, |
| 334 |
|
|
text => $body_el, |
| 335 |
|
|
}; |
| 336 |
|
|
my $state = 'data'; |
| 337 |
|
|
## data - normal |
| 338 |
|
|
## list - UL or OL |
| 339 |
|
|
## br - after br start tag token |
| 340 |
|
|
## eol - after eol token |
| 341 |
|
|
|
| 342 |
|
|
T: while (my $token = $self->_shift_token) { |
| 343 |
|
|
last T if $token->{type} eq '#EOF'; |
| 344 |
|
|
|
| 345 |
|
|
if ($token->{type} eq 'text') { |
| 346 |
|
|
if ($state eq 'list') { |
| 347 |
|
|
my $li_el = $self->{doc}->create_element_ns (HTML_NS, 'li'); |
| 348 |
|
|
$li_el->manakai_append_text ($token->{value}); |
| 349 |
|
|
$parent->{text}->append_child ($li_el); |
| 350 |
|
|
} else { |
| 351 |
|
|
$parent->{text}->manakai_append_text ("\x0A") if $state eq 'eol'; |
| 352 |
|
|
$parent->{text}->manakai_append_text ($token->{value}); |
| 353 |
|
|
$state = 'data'; |
| 354 |
|
|
} |
| 355 |
|
|
} elsif ($token->{type} eq 'eol') { |
| 356 |
|
|
if ($state eq 'eol') { |
| 357 |
|
|
$parent->{text}->manakai_append_text ("\x0A"); |
| 358 |
|
|
} else { |
| 359 |
|
|
$state = $state eq 'br' ? 'data' : 'eol'; |
| 360 |
|
|
} |
| 361 |
|
|
} elsif ($token->{type} eq 'start') { |
| 362 |
|
|
my $info = { |
| 363 |
|
|
# nsuri, qname, parent, state |
| 364 |
|
|
ABBR => [HTML_NS, 'abbr', $parent->{text}, 'data'], |
| 365 |
|
|
ACRONYM => [HTML_NS, 'abbr', $parent->{text}, 'data'], |
| 366 |
|
|
BODYTEXT => [HTML3_NS, 'bodytext', $parent->{text}, 'data'], |
| 367 |
|
|
CITE => [HTML_NS, 'blockquote', $parent->{text}, 'data', |
| 368 |
|
|
{PRE => 1}], |
| 369 |
|
|
DD => [HTML_NS, 'dd', $parent->{text}, 'data'], |
| 370 |
|
|
DEL => [HTML_NS, 'del', $parent->{text}, 'data'], |
| 371 |
|
|
DIV => [HTML_NS, 'div', $parent->{text}, 'data', {P => 1}], |
| 372 |
|
|
DL => [HTML_NS, 'dl', $parent->{text}, 'data'], |
| 373 |
|
|
DT => [HTML_NS, 'dt', $parent->{text}, 'data'], |
| 374 |
|
|
EM => [HTML_NS, 'em', $parent->{text}, 'data'], |
| 375 |
|
|
FN => [H2H_NS, 'fn', $parent->{text}, 'data'], |
| 376 |
|
|
H => [XHTML2_NS, 'h', $parent->{text}, 'data'], |
| 377 |
|
|
HOUR => [H2H_NS, 'hour', $head_el, 'data'], |
| 378 |
|
|
IMG => [HTML_NS, 'img', $parent->{text}, 'data'], |
| 379 |
|
|
INS => [HTML_NS, 'ins', $parent->{text}, 'data'], |
| 380 |
|
|
KION => [H2H_NS, 'kion', $head_el, 'data'], |
| 381 |
|
|
LI => [HTML_NS, 'li', $parent->{text}, 'data'], |
| 382 |
|
|
LINK => [HTML_NS, 'a', $parent->{text}, 'data'], |
| 383 |
|
|
LUNCH => [H2H_NS, 'lunch', $head_el, 'data'], |
| 384 |
|
|
OL => [HTML_NS, 'ol', $parent->{text}, 'list', {PRE => 1}], |
| 385 |
|
|
P => [HTML_NS, 'p', $parent->{text}, 'data', |
| 386 |
|
|
{P => 1, PRE => 1}], |
| 387 |
|
|
PERSON => [HTML3_NS, 'person', $parent->{text}, 'data'], |
| 388 |
|
|
PRE => [HTML_NS, 'pre', $parent->{text}, 'data', |
| 389 |
|
|
{P => 1}], |
| 390 |
|
|
RB => [HTML_NS, 'rb', $parent->{text}, 'data'], |
| 391 |
|
|
RIBU => [HTML_NS, 'ruby', $parent->{text}, 'data'], |
| 392 |
|
|
RT => [HTML_NS, 'rt', $parent->{text}, 'data'], |
| 393 |
|
|
RUBY => [HTML_NS, 'ruby', $parent->{text}, 'data'], |
| 394 |
|
|
SECTION => [HTML_NS, 'section', $body_el, 'data'], |
| 395 |
|
|
SEE => [HTML_NS, 'i', $parent->{text}, 'data'], |
| 396 |
|
|
SP => [H2H_NS, 'sp', $head_el, 'data'], |
| 397 |
|
|
SPAN => [HTML_NS, 'span', $parent->{text}, 'data'], |
| 398 |
|
|
SRC => [HTML3_NS, 'credit', $parent->{text}, 'data'], |
| 399 |
|
|
STRONG => [HTML_NS, 'strong', $parent->{text}, 'data'], |
| 400 |
|
|
SUBSECTION => [HTML_NS, 'section', $parent->{subsection}, 'data'], |
| 401 |
|
|
T1 => [H2H_NS, 't1', $head_el, 'data'], |
| 402 |
|
|
T2 => [H2H_NS, 't2', $head_el, 'data'], |
| 403 |
|
|
T3 => [H2H_NS, 't3', $head_el, 'data'], |
| 404 |
|
|
T4 => [H2H_NS, 't4', $head_el, 'data'], |
| 405 |
|
|
T5 => [H2H_NS, 't5', $head_el, 'data'], |
| 406 |
|
|
T6 => [H2H_NS, 't6', $head_el, 'data'], |
| 407 |
|
|
T7 => [H2H_NS, 't7', $head_el, 'data'], |
| 408 |
|
|
TAION => [H2H_NS, 'taion', $head_el, 'data'], |
| 409 |
|
|
TENKI => [H2H_NS, 'tenki', $head_el, 'data'], |
| 410 |
|
|
THEME => [H2H_NS, 'theme', $head_el, 'data'], |
| 411 |
|
|
UL => [HTML_NS, 'ul', $parent->{text}, 'list', {PRE => 1}], |
| 412 |
|
|
YAMI => [H2H_NS, 'yami', $parent->{text}, 'data'], |
| 413 |
|
|
a => [HTML_NS, 'a', $parent->{text}, 'data'], |
| 414 |
|
|
abbr => [HTML_NS, 'abbr', $parent->{text}, 'data'], |
| 415 |
|
|
acronym => [HTML_NS, 'abbr', $parent->{text}, 'data'], |
| 416 |
|
|
blockquote => [HTML_NS, 'blockquote', $parent->{text}, 'data'], |
| 417 |
|
|
br => [HTML_NS, 'br', $parent->{text}, 'br'], |
| 418 |
|
|
caption => [HTML_NS, 'caption', $parent->{text}, 'data'], |
| 419 |
|
|
code => [HTML_NS, 'code', $parent->{text}, 'data'], |
| 420 |
|
|
dd => [HTML_NS, 'dd', $parent->{text}, 'data'], |
| 421 |
|
|
del => [HTML_NS, 'del', $parent->{text}, 'data'], |
| 422 |
|
|
dfn => [HTML_NS, 'dfn', $parent->{text}, 'data'], |
| 423 |
|
|
div => [HTML_NS, 'div', $parent->{text}, 'data'], |
| 424 |
|
|
dl => [HTML_NS, 'dl', $parent->{text}, 'data'], |
| 425 |
|
|
dt => [HTML_NS, 'dt', $parent->{text}, 'data'], |
| 426 |
|
|
em => [HTML_NS, 'em', $parent->{text}, 'data'], |
| 427 |
|
|
form => [HTML_NS, 'form', $parent->{text}, 'data'], |
| 428 |
|
|
h1 => [HTML_NS, 'h1', $parent->{text}, 'data'], |
| 429 |
|
|
h2 => [HTML_NS, 'h2', $parent->{text}, 'data'], |
| 430 |
|
|
h3 => [HTML_NS, 'h3', $parent->{text}, 'data'], |
| 431 |
|
|
h4 => [HTML_NS, 'h4', $parent->{text}, 'data'], |
| 432 |
|
|
img => [HTML_NS, 'img', $parent->{text}, 'data'], |
| 433 |
|
|
input => [HTML_NS, 'input', $parent->{text}, 'data'], |
| 434 |
|
|
ins => [HTML_NS, 'ins', $parent->{text}, 'data'], |
| 435 |
|
|
kbd => [HTML_NS, 'kbd', $parent->{text}, 'data'], |
| 436 |
|
|
label => [HTML_NS, 'label', $parent->{text}, 'data'], |
| 437 |
|
|
li => [HTML_NS, 'li', $parent->{text}, 'data'], |
| 438 |
|
|
ol => [HTML_NS, 'ol', $parent->{text}, 'data'], |
| 439 |
|
|
p => [HTML_NS, 'p', $parent->{text}, 'data'], |
| 440 |
|
|
pre => [HTML_NS, 'pre', $parent->{text}, 'data'], |
| 441 |
|
|
q => [HTML_NS, 'q', $parent->{text}, 'data'], |
| 442 |
|
|
rb => [HTML_NS, 'rb', $parent->{text}, 'data'], |
| 443 |
|
|
rp => [HTML_NS, 'rp', $parent->{text}, 'data'], |
| 444 |
|
|
rt => [HTML_NS, 'rt', $parent->{text}, 'data'], |
| 445 |
|
|
ruby => [HTML_NS, 'ruby', $parent->{text}, 'data'], |
| 446 |
|
|
span => [HTML_NS, 'span', $parent->{text}, 'data'], |
| 447 |
|
|
strong => [HTML_NS, 'strong', $parent->{text}, 'data'], |
| 448 |
|
|
sup => [HTML_NS, 'sup', $parent->{text}, 'data'], |
| 449 |
|
|
table => [HTML_NS, 'table', $parent->{text}, 'data'], |
| 450 |
|
|
tbody => [HTML_NS, 'tbody', $parent->{text}, 'data'], |
| 451 |
|
|
td => [HTML_NS, 'td', $parent->{text}, 'data'], |
| 452 |
|
|
th => [HTML_NS, 'th', $parent->{text}, 'data'], |
| 453 |
|
|
thead => [HTML_NS, 'thead', $parent->{text}, 'data'], |
| 454 |
|
|
tr => [HTML_NS, 'tr', $parent->{text}, 'data'], |
| 455 |
|
|
ul => [HTML_NS, 'ul', $parent->{text}, 'data'], |
| 456 |
|
|
var => [HTML_NS, 'var', $parent->{text}, 'data'], |
| 457 |
|
|
}->{$token->{value}} |
| 458 |
|
|
|| [H2H_NS, $token->{value}, $parent->{text}, 'data']; |
| 459 |
|
|
while ($info->[4]->{$info->[2]->get_user_data ('command-name')}) { |
| 460 |
|
|
$info->[2] = $info->[2]->parent_node; |
| 461 |
|
|
} |
| 462 |
|
|
my $el = $self->{doc}->create_element_ns ($info->[0], $info->[1]); |
| 463 |
|
|
$el->set_user_data ('command-name', $token->{value}); |
| 464 |
|
|
$info->[2]->append_child ($el); |
| 465 |
|
|
$parent->{text} = $el; |
| 466 |
|
|
$parent->{attr} = $el; |
| 467 |
|
|
$parent->{subsection} = $el if $token->{value} eq 'SECTION'; |
| 468 |
|
|
$state = $info->[3]; |
| 469 |
|
|
} elsif ($token->{type} eq 'end') { |
| 470 |
|
|
E: while (my $et = $parent->{text}->get_user_data ('command-name')) { |
| 471 |
|
|
$parent->{text} = $parent->{text}->parent_node; |
| 472 |
|
|
last E if $et eq $token->{value}; |
| 473 |
|
|
last E if $et eq '#body'; |
| 474 |
|
|
} |
| 475 |
|
|
$parent->{attr} = $parent->{text}; |
| 476 |
|
|
$state = { |
| 477 |
|
|
UL => 'list', |
| 478 |
|
|
OL => 'list', |
| 479 |
|
|
}->{$parent->{text}->get_user_data ('command-name')} |
| 480 |
|
|
|| $state eq 'br' ? 'br' : 'data'; |
| 481 |
|
|
} elsif ($token->{type} eq 'char') { |
| 482 |
|
|
my $el = $self->{doc}->create_element_ns (H2H_NS, 'char'); |
| 483 |
|
|
$el->manakai_append_text ($token->{value}); |
| 484 |
|
|
$parent->{text}->append_child ($el); |
| 485 |
|
|
$state = 'data' if $state eq 'br'; |
| 486 |
|
|
} elsif ($token->{type} eq 'magic') { |
| 487 |
|
|
my ($name, $version) = split m#/#, $token->{value}, 2; |
| 488 |
|
|
$doc_el->set_attribute_ns (SW09_NS, 'sw9:Name', $name); |
| 489 |
|
|
$doc_el->set_attribute_ns (SW09_NS, 'sw9:Version', $version); |
| 490 |
|
|
} elsif ($token->{type} eq 'hnf-comment') { |
| 491 |
|
|
my $com = $self->{doc}->create_element_ns |
| 492 |
|
|
(H2H_NS, 'hnf-comment'); |
| 493 |
|
|
$com->text_content ($token->{value}); |
| 494 |
|
|
$parent->{text}->append_child ($com); |
| 495 |
|
|
} elsif ($token->{type} eq 'html-comment') { |
| 496 |
|
|
my $com = $self->{doc}->create_element_ns |
| 497 |
|
|
(H2H_NS, 'html-comment'); |
| 498 |
|
|
$com->text_content ($token->{value}); |
| 499 |
|
|
$parent->{text}->append_child ($com); |
| 500 |
|
|
} elsif ($token->{type} eq 'source') { |
| 501 |
|
|
my $src = $self->{doc}->create_element_ns (HTML3_NS, 'credit'); |
| 502 |
|
|
$src->manakai_append_text ($token->{value}); |
| 503 |
|
|
$parent->{text}->append_child ($src); |
| 504 |
|
|
} elsif ($token->{type} eq 'uri') { |
| 505 |
|
|
my $v = $token->{value}; |
| 506 |
|
|
if ($v =~ /^\(([^()]+)\)$/) { |
| 507 |
|
|
my @v = split /\s*,\s*/, $1; |
| 508 |
|
|
$parent->{attr}->set_attribute_ns |
| 509 |
|
|
(H2H_NS, 'href-year' => $v[0]+0); |
| 510 |
|
|
$parent->{attr}->set_attribute_ns |
| 511 |
|
|
(H2H_NS, 'href-month' => $v[1]+0); |
| 512 |
|
|
$parent->{attr}->set_attribute_ns |
| 513 |
|
|
(H2H_NS, 'href-day' => $v[2]+0); |
| 514 |
|
|
$parent->{attr}->set_attribute_ns |
| 515 |
|
|
(H2H_NS, 'href-section' => $v[3]+0) if $v[3]; |
| 516 |
|
|
$parent->{attr}->set_attribute_ns |
| 517 |
|
|
(H2H_NS, 'href-subsection' => $v[4]+0) |
| 518 |
|
|
if $v[4]; |
| 519 |
|
|
} else { |
| 520 |
|
|
my $xuri = $parent->{attr}->manakai_expanded_uri; |
| 521 |
|
|
if ($xuri eq HTML_NS . 'a') { |
| 522 |
|
|
$parent->{attr}->set_attribute_ns |
| 523 |
|
|
(undef, href => $token->{value}); |
| 524 |
|
|
} elsif ($xuri eq HTML_NS . 'blockquote') { |
| 525 |
|
|
$parent->{attr}->set_attribute_ns |
| 526 |
|
|
(undef, cite => $token->{value}); |
| 527 |
|
|
} else { |
| 528 |
|
|
$parent->{attr}->set_attribute_ns |
| 529 |
|
|
(XHTML2_NS, href => $token->{value}); |
| 530 |
|
|
} |
| 531 |
|
|
} |
| 532 |
|
|
} elsif ({ |
| 533 |
|
|
title => 1, style => 1, |
| 534 |
|
|
class => 1, href => 1, 'accept-charset' => 1, |
| 535 |
|
|
action => 1, method => 1, alt => 1, src => 1, |
| 536 |
|
|
type => 1, value => 1, name => 1, accesskey => 1, |
| 537 |
|
|
for => 1, cite => 1, onclick => 1, colspan => 1, |
| 538 |
|
|
scope => 1, summary => 1, |
| 539 |
|
|
}->{$token->{type}}) { |
| 540 |
|
|
$parent->{attr}->set_attribute_ns |
| 541 |
|
|
(undef, $token->{type}, $token->{value}); |
| 542 |
|
|
} elsif ($token->{type} eq 'cat') { |
| 543 |
|
|
for (split /\s*,\s*/, $token->{value}) { |
| 544 |
|
|
my $el = $self->{doc}->create_element_ns (H2H_NS, 'cat'); |
| 545 |
|
|
$el->manakai_append_text ($_); |
| 546 |
|
|
$parent->{attr}->append_child ($el); |
| 547 |
|
|
} |
| 548 |
|
|
} elsif ($token->{type} eq 'lang' or $token->{type} eq 'xml:lang') { |
| 549 |
|
|
$parent->{attr}->set_attribute_ns |
| 550 |
|
|
(q<http://www.w3.org/XML/1998/namespace>, |
| 551 |
|
|
'xml:lang' => $token->{value}); |
| 552 |
|
|
} elsif ($token->{type} eq 'id') { |
| 553 |
|
|
$parent->{attr}->set_attribute_ns |
| 554 |
|
|
(q<http://www.w3.org/XML/1998/namespace>, |
| 555 |
|
|
'xml:id' => $token->{value}); |
| 556 |
|
|
} elsif ($token->{type} eq 'wbradded') { |
| 557 |
|
|
# ignore |
| 558 |
|
|
} else { |
| 559 |
|
|
# key, level, place, position, time |
| 560 |
|
|
$parent->{attr}->set_attribute_ns |
| 561 |
|
|
(H2H_NS, $token->{type}, $token->{value}); |
| 562 |
|
|
} |
| 563 |
|
|
} |
| 564 |
|
|
} # _construct_tree |
| 565 |
|
|
|
| 566 |
|
|
1; |
| 567 |
|
|
## $Date: 2007/06/30 13:12:33 $ |