/[suikacvs]/markup/html/whatpm/Whatpm/H2H.pm
Suika

Contents of /markup/html/whatpm/Whatpm/H2H.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (show annotations) (download)
Sun Aug 17 05:09:12 2008 UTC (16 years, 2 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.3: +4 -3 lines
++ whatpm/Whatpm/ChangeLog	17 Aug 2008 05:06:46 -0000
2008-08-17  Wakaba  <wakaba@suika.fam.cx>

	* H2H.pm (_shift_token): Support for unquoted HTML attribute
	values.

++ whatpm/Whatpm/ContentChecker/ChangeLog	17 Aug 2008 05:08:51 -0000
2008-08-17  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm (%XHTML2CommonAttrStatus): HTML5 status was missing.

1 package Whatpm::H2H;
2 use strict;
3
4 sub H2H_NS () { q<http://suika.fam.cx/~wakaba/archive/2005/manakai/Markup/H2H/> }
5 sub HTML_NS () { q<http://www.w3.org/1999/xhtml> }
6 sub HTML3_NS () { q<urn:x-suika-fam-cx:markup:ietf:html:3:draft:00:> }
7 sub SW09_NS () { q<urn:x-suika-fam-cx:markup:suikawiki:0:9:> }
8 sub XHTML2_NS () { q<http://www.w3.org/2002/06/xhtml2/> }
9
10 sub parse_string ($$$) {
11 my $self = bless {
12 token => [],
13 location => {},
14 doc => $_[2],
15 }, $_[0];
16
17 my $s = ''.$_[1];
18 $s =~ s/\x0D\x0A/\x0A/g;
19 $s =~ tr/\x0D/\x0A/;
20 $self->{line} = [split /\x0A/, $s];
21
22 local $Error::Depth = $Error::Depth + 1;
23 $self->{doc}->strict_error_checking (0);
24 my $doc_el = $self->{doc}->create_element_ns (HTML_NS, 'html');
25 $doc_el->set_attribute_ns (q<http://www.w3.org/2000/xmlns/>, 'xmlns', HTML_NS);
26 $self->{doc}->append_child ($doc_el);
27
28 $self->_construct_tree;
29
30 return $self->{doc};
31 } # parse_string
32
33 sub _shift_token ($) {
34 my $self = $_[0];
35
36 if (@{$self->{token}}) {
37 return shift @{$self->{token}};
38 }
39
40 my $attrvalue = sub {
41 my $v = shift;
42 $v =~ s/&quot;/"/g;
43 $v =~ s/&lt;/</g;
44 $v =~ s/&gt;/>/g;
45 $v =~ s/&reg;/\x{00AE}/g;
46 $v =~ s/&hearts;/\x{2661}/g;
47 $v =~ s/&amp;/&/g;
48 return $v;
49 };
50
51 my $uriv = sub {
52 my $v = $attrvalue->(shift);
53 $v =~ s/^\{/(/;
54 $v =~ s/\}$/)/;
55 $v =~ s/^\#([0-9si]+)$/($1)/;
56 $v =~ s/^\(([0-9]{4})([0-9]{2})([0-9]{2})([^)]*)\)$/($1, $2, $3$4)/;
57 $v =~ s/[si]/, /g if $v =~ /^\(/ and $v =~ /\)$/;
58 return $v;
59 };
60
61 my $r = {type => '#EOF'};
62 L: while (defined (my $line = shift @{$self->{line}})) {
63 if ($line =~ s/^([A-Z]+|T[0-9])(\*?\+?\*?)(?:\s+|$)//) {
64 my $command = $1;
65 my $flag = $2;
66 $r = {type => 'start', value => $command};
67
68 my $uri;
69 if ($flag =~ /\*/ and $line =~ s/^([^{\s]\S*)\s*//) {
70 $uri = $1;
71 }
72
73 my $attr = '';
74 if ($line =~ s/^\{(\s*(?:[A-Za-z][^{}]*)?)\}\s*//) {
75 $attr = $1;
76 }
77
78 if (not defined $uri and
79 $flag =~ /\*/ and $line =~ s/^([^{\s]\S*)\s*//) {
80 $uri = $1;
81 }
82
83 my @token;
84 my $info = {
85 # val# val#(*)
86 ABBR => [2, 2],
87 ACRONYM => [2, 2],
88 CITE => [2, 1],
89 LDIARY => [4, 4],
90 LIMG => [4, 4],
91 LINK => [2, 1],
92 LMG => [2, 2],
93 LNEW => [2, 2],
94 PERSON => [2, 2],
95 RIBU => [2, 2],
96 RUBY => [2, 2],
97 SEE => [2, 2],
98 }->{$command};
99 my @value = split /\s+/, $line,
100 ($flag =~ /\*/ ? $info->[1] : $info->[0]) || 1;
101
102 push @token, {type => 'uri', value => $uriv->($uri)} if defined $uri;
103
104 my %attr;
105 while ($attr =~ /([A-Za-z0-9_-]+)\s*(?:=>?|:)\s*([^";,]+|"[^"]+")/gc) {
106 my $name = lc $1;
107 my $value = $2;
108 $value =~ tr/"//d;
109 $attr{$name} = $value;
110 }
111 delete $attr{'content-type'};
112
113 if ({
114 ABBR => 1, ACRONYM => 1,
115 RUBY => 1, RIBU => 1,
116 }->{$command}) {
117 if (@value == 1 and $attr{title}) {
118 push @value, $attr{title};
119 delete $attr{title};
120 }
121 if (@value == 2) {
122 unshift @{$self->{line}},
123 'RB',
124 'DATA {} >>'.$value[1],
125 '/RB',
126 'RT',
127 'DATA {} >>'.$value[0],
128 '/RT',
129 '/'.$command;
130 } else {
131 unshift @{$self->{line}}, 'DATA {} >>'.$value[0], '/'.$command;
132 }
133 } elsif ($command eq 'CITE') {
134 if (@value == 2) {
135 if (defined $uri or $value[0] !~ /^[a-z-]+:/) {
136 unshift @{$self->{line}},
137 'SRC',
138 'DATA {} >>'.$value[0].' '.$value[1],
139 '/SRC', 'BODYTEXT';
140 } else {
141 push @token, {type => 'uri', value => $uriv->($value[0])};
142 unshift @{$self->{line}},
143 'SRC',
144 'DATA {} >>'.$value[1],
145 '/SRC', 'BODYTEXT';
146 }
147 } elsif (@value == 1) {
148 if (defined $uri or $value[0] !~ /^[a-z-]+:/) {
149 unshift @{$self->{line}},
150 'SRC',
151 'DATA {} >>'.$value[0],
152 '/SRC', 'BODYTEXT';
153 } else {
154 push @token, {type => 'uri', value => $uriv->($value[0])};
155 unshift @{$self->{line}}, 'BODYTEXT';
156 }
157 } else {
158 unshift @{$self->{line}}, 'BODYTEXT';
159 }
160 } elsif ($command eq 'DATA') {
161 my @token;
162 $line =~ s/^>>//;
163 while (length $line) {
164 if ($line =~ s/^<([a-z0-9]+)\s*//) {
165 my $tagname = $1;
166 push @token, {type => 'start', value => $tagname};
167 while ($line =~ s/^([a-z-]+)\s*=\s*(?>"([^"]*)"|([^\s">]*))\s*//) {
168 push @token, {type => $1,
169 value => $attrvalue->(defined $2 ? $2 : $3)};
170 }
171 $line =~ s#^/?\s*>##;
172 push @token, {type => 'end', value => $tagname}
173 if $tagname eq 'img' or $tagname eq 'input' or
174 $tagname eq 'br';
175 } elsif ($line =~ s#^</([a-z0-9]+)\s*>##) {
176 push @token, {type => 'end', value => $1};
177 } elsif ($line =~ s/^<!--(.*?)-->//) {
178 push @token, {type => 'html-comment', value => $1};
179 } elsif ($line =~ s/^&([a-z]+);//) {
180 my $name = $1;
181 if ($name eq 'amp') {
182 push @token, {type => 'text', value => '&'};
183 } elsif ($name eq 'lt') {
184 push @token, {type => 'text', value => '<'};
185 } elsif ($name eq 'gt') {
186 push @token, {type => 'text', value => '>'};
187 } elsif ($name eq 'quot') {
188 push @token, {type => 'text', value => '"'};
189 } elsif ($name eq 'reg') {
190 push @token, {type => 'text', value => "\x{00AE}"};
191 } elsif ($name eq 'hearts') {
192 push @token, {type => 'text', value => "\x{2661}"};
193 } else {
194 push @token, {type => 'char', value => $name};
195 }
196 } elsif ($line =~ s/^&#([0-9]+);//) {
197 push @token, {type => 'text', value => ord $1};
198 } elsif ($line =~ s/^&#x([0-9A-Fa-f]+);//) {
199 push @token, {type => 'text', value => ord hex $1};
200 } elsif ($line =~ s/^([^<&]+)//) {
201 push @token, {type => 'text', value => $1};
202 } else {
203 push @token, {type => 'text', value => substr ($line, 0, 1)};
204 substr ($line, 0, 1) = '';
205 }
206 }
207 push @token, {type => 'eol'};
208
209 $r = shift @token;
210 push @{$self->{token}}, @token;
211 last L;
212 } elsif ({
213 DD => 1, DT => 1,
214 DEL => 1, INS => 1,
215 LI => 1,
216 RB => 1, RT => 1,
217 STRONG => 1,
218 YAMI => 1,
219 EM => 1,
220 HOUR => 1, KION => 1, LUNCH => 1,
221 TAION => 1, TENKI => 1, THEME => 1,
222 T1 => 1, T2 => 1, T3 => 1, T4 => 1,
223 T5 => 1, T6 => 1, T7 => 1, SP => 1,
224 }->{$command}) {
225 if (@value) {
226 unshift @{$self->{line}}, 'DATA {} >>'.$value[0], '/'.$command;
227 }
228 } elsif ($command eq 'DIV') {
229 if (@value) {
230 $r = {type => 'class', value => $value[0]};
231 }
232 } elsif ($command eq 'LDIARY') {
233 $value[0] =~ s/^([0-9]{4})([0-9]{2})([0-9]{2})/$1, $2, $3/;
234 $value[0] =~ s/[is]/, /;
235 $r = {type => 'start', value => 'LINK'};
236 push @token, {type => 'uri', value => "($value[0])"};
237 unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/LINK';
238 } elsif ($command eq 'LIMG') {
239 $r = {type => 'start', value => 'IMG'};
240 push @token, {type => 'uri', value => $uriv->($value[0])};
241 unshift @{$self->{line}}, 'DATA {} >>'.$value[3], '/IMG';
242 } elsif ($command eq 'LMG') {
243 $r = {type => 'start', value => 'IMG'};
244 push @token, {type => 'uri', value => $uriv->($value[0])};
245 unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/IMG';
246 } elsif ($command eq 'LINK') {
247 if (@value == 2) {
248 push @token, {type => 'uri', value => $uriv->($value[0])};
249 unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/LINK';
250 } elsif ($flag =~ /\+/) {
251 push @token, {type => 'uri', value => $uriv->($value[0])};
252 } else {
253 unshift @{$self->{line}}, 'DATA {} >>'.$value[0], '/LINK';
254 }
255 } elsif ($command eq 'NEW') {
256 $r = {type => 'start', value => 'SECTION'};
257 unshift @{$self->{line}}, 'H', 'DATA {} >>'.$value[0], '/H';
258 } elsif ($command eq 'LNEW') {
259 $r = {type => 'start', value => 'SECTION'};
260 push @token, {type => 'uri', value => $uriv->($value[0])};
261 unshift @{$self->{line}}, 'H', 'DATA {} >>'.$value[1], '/H';
262 } elsif ($command eq 'SUB') {
263 $r = {type => 'start', value => 'SUB'};
264 unshift @{$self->{line}}, 'H', 'DATA {} >>'.$value[0], '/H';
265 } elsif ($command eq 'PERSON') {
266 push @token, {type => 'key', value => $attrvalue->($value[0])};
267 unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/PERSON';
268 } elsif ($command eq 'SEE') {
269 if (@value == 2) {
270 push @token, {type => 'key', value => $attrvalue->($value[0])};
271 unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/SEE';
272 } else {
273 unshift @{$self->{line}}, 'DATA {} >>'.$value[0], '/SEE';
274 }
275 } elsif ($command eq 'SPAN') {
276 if (@value == 2) {
277 push @token, {type => 'class',
278 value => $attrvalue->($value[0])};
279 unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/SPAN';
280 } else {
281 unshift @{$self->{line}}, 'DATA {} >>'.$value[0], '/SPAN';
282 }
283 } elsif ($command eq 'OK') {
284 $r = {type => '#EOF'};
285 next L;
286 } elsif ($command eq 'XML') {
287 unshift @{$self->{line}}, 'DATA {} >>XML '.$line;
288 next L;
289 }
290
291 for (keys %attr) {
292 push @token, {type => $_, value => $attrvalue->($attr{$_})};
293 }
294
295 push @{$self->{token}}, @token;
296 last L;
297 } elsif ($line eq 'H2H/1.0') {
298 $r = {type => 'magic', value => 'H2H/1.0'};
299 last L;
300 } elsif ($line =~ m#^/([A-Z]+)\s*$#) {
301 $r = {type => 'end', value => $1};
302 last L;
303 } elsif ($line =~ s/^!#//) {
304 $r = {type => 'hnf-comment', value => $line};
305 last L;
306 } elsif ($line =~ s/^!//) {
307 $r = {type => 'html-comment', value => $line};
308 last L;
309 } else {
310 unshift @{$self->{line}}, 'DATA {} >>'.$line;
311 next L;
312 }
313 } # L
314
315 return $r;
316 } # _shift_token
317
318 sub _construct_tree ($) {
319 my $self = $_[0];
320
321 my $doc_el = $self->{doc}->document_element;
322 my $head_el = $self->{doc}->create_element_ns (HTML_NS, 'head');
323 my $body_el = $self->{doc}->create_element_ns (HTML_NS, 'body');
324 $doc_el->append_child ($head_el);
325 $doc_el->append_child ($body_el);
326 $doc_el->set_user_data ('command-name' => '#html');
327 $head_el->set_user_data ('command-name' => '#head');
328 $body_el->set_user_data ('command-name' => '#body');
329 $doc_el->set_attribute_ns (SW09_NS, 'sw9:Name' => 'H2H');
330 $doc_el->set_attribute_ns (SW09_NS, 'sw9:Version' => '0.9');
331
332 my $parent = {
333 subsection => $body_el,
334 attr => $body_el,
335 text => $body_el,
336 };
337 my $state = 'data';
338 ## data - normal
339 ## list - UL or OL
340 ## br - after br start tag token
341 ## eol - after eol token
342
343 T: while (my $token = $self->_shift_token) {
344 last T if $token->{type} eq '#EOF';
345
346 if ($token->{type} eq 'text') {
347 if ($state eq 'list') {
348 my $li_el = $self->{doc}->create_element_ns (HTML_NS, 'li');
349 $li_el->manakai_append_text ($token->{value});
350 $parent->{text}->append_child ($li_el);
351 } else {
352 $parent->{text}->manakai_append_text ("\x0A") if $state eq 'eol';
353 $parent->{text}->manakai_append_text ($token->{value});
354 $state = 'data';
355 }
356 } elsif ($token->{type} eq 'eol') {
357 if ($state eq 'eol') {
358 $parent->{text}->manakai_append_text ("\x0A");
359 } else {
360 $state = $state eq 'br' ? 'data' : 'eol';
361 }
362 } elsif ($token->{type} eq 'start') {
363 my $info = {
364 # nsuri, qname, parent, state
365 ABBR => [HTML_NS, 'abbr', $parent->{text}, 'data'],
366 ACRONYM => [HTML_NS, 'abbr', $parent->{text}, 'data'],
367 BODYTEXT => [HTML3_NS, 'bodytext', $parent->{text}, 'data'],
368 CITE => [HTML_NS, 'blockquote', $parent->{text}, 'data',
369 {PRE => 1}],
370 DD => [HTML_NS, 'dd', $parent->{text}, 'data'],
371 DEL => [HTML_NS, 'del', $parent->{text}, 'data'],
372 DIV => [HTML_NS, 'div', $parent->{text}, 'data', {P => 1}],
373 DL => [HTML_NS, 'dl', $parent->{text}, 'data'],
374 DT => [HTML_NS, 'dt', $parent->{text}, 'data'],
375 EM => [HTML_NS, 'em', $parent->{text}, 'data'],
376 FN => [H2H_NS, 'fn', $parent->{text}, 'data'],
377 H => [XHTML2_NS, 'h', $parent->{text}, 'data'],
378 HOUR => [H2H_NS, 'hour', $head_el, 'data'],
379 IMG => [HTML_NS, 'img', $parent->{text}, 'data'],
380 INS => [HTML_NS, 'ins', $parent->{text}, 'data'],
381 KION => [H2H_NS, 'kion', $head_el, 'data'],
382 LI => [HTML_NS, 'li', $parent->{text}, 'data'],
383 LINK => [HTML_NS, 'a', $parent->{text}, 'data'],
384 LUNCH => [H2H_NS, 'lunch', $head_el, 'data'],
385 OL => [HTML_NS, 'ol', $parent->{text}, 'list', {PRE => 1}],
386 P => [HTML_NS, 'p', $parent->{text}, 'data',
387 {P => 1, PRE => 1}],
388 PERSON => [HTML3_NS, 'person', $parent->{text}, 'data'],
389 PRE => [HTML_NS, 'pre', $parent->{text}, 'data',
390 {P => 1}],
391 RB => [HTML_NS, 'rb', $parent->{text}, 'data'],
392 RIBU => [HTML_NS, 'ruby', $parent->{text}, 'data'],
393 RT => [HTML_NS, 'rt', $parent->{text}, 'data'],
394 RUBY => [HTML_NS, 'ruby', $parent->{text}, 'data'],
395 SECTION => [HTML_NS, 'section', $body_el, 'data'],
396 SEE => [HTML_NS, 'i', $parent->{text}, 'data'],
397 SP => [H2H_NS, 'sp', $head_el, 'data'],
398 SPAN => [HTML_NS, 'span', $parent->{text}, 'data'],
399 SRC => [HTML3_NS, 'credit', $parent->{text}, 'data'],
400 STRONG => [HTML_NS, 'strong', $parent->{text}, 'data'],
401 SUBSECTION => [HTML_NS, 'section', $parent->{subsection}, 'data'],
402 T1 => [H2H_NS, 't1', $head_el, 'data'],
403 T2 => [H2H_NS, 't2', $head_el, 'data'],
404 T3 => [H2H_NS, 't3', $head_el, 'data'],
405 T4 => [H2H_NS, 't4', $head_el, 'data'],
406 T5 => [H2H_NS, 't5', $head_el, 'data'],
407 T6 => [H2H_NS, 't6', $head_el, 'data'],
408 T7 => [H2H_NS, 't7', $head_el, 'data'],
409 TAION => [H2H_NS, 'taion', $head_el, 'data'],
410 TENKI => [H2H_NS, 'tenki', $head_el, 'data'],
411 THEME => [H2H_NS, 'theme', $head_el, 'data'],
412 UL => [HTML_NS, 'ul', $parent->{text}, 'list', {PRE => 1}],
413 YAMI => [H2H_NS, 'yami', $parent->{text}, 'data'],
414 a => [HTML_NS, 'a', $parent->{text}, 'data'],
415 abbr => [HTML_NS, 'abbr', $parent->{text}, 'data'],
416 acronym => [HTML_NS, 'abbr', $parent->{text}, 'data'],
417 b => [HTML_NS, 'b', $parent->{text}, 'data'],
418 blockquote => [HTML_NS, 'blockquote', $parent->{text}, 'data'],
419 br => [HTML_NS, 'br', $parent->{text}, 'br'],
420 caption => [HTML_NS, 'caption', $parent->{text}, 'data'],
421 code => [HTML_NS, 'code', $parent->{text}, 'data'],
422 dd => [HTML_NS, 'dd', $parent->{text}, 'data'],
423 del => [HTML_NS, 'del', $parent->{text}, 'data'],
424 dfn => [HTML_NS, 'dfn', $parent->{text}, 'data'],
425 div => [HTML_NS, 'div', $parent->{text}, 'data'],
426 dl => [HTML_NS, 'dl', $parent->{text}, 'data'],
427 dt => [HTML_NS, 'dt', $parent->{text}, 'data'],
428 em => [HTML_NS, 'em', $parent->{text}, 'data'],
429 form => [HTML_NS, 'form', $parent->{text}, 'data'],
430 h1 => [HTML_NS, 'h1', $parent->{text}, 'data'],
431 h2 => [HTML_NS, 'h2', $parent->{text}, 'data'],
432 h3 => [HTML_NS, 'h3', $parent->{text}, 'data'],
433 h4 => [HTML_NS, 'h4', $parent->{text}, 'data'],
434 i => [HTML_NS, 'i', $parent->{text}, 'data'],
435 img => [HTML_NS, 'img', $parent->{text}, 'data'],
436 input => [HTML_NS, 'input', $parent->{text}, 'data'],
437 ins => [HTML_NS, 'ins', $parent->{text}, 'data'],
438 kbd => [HTML_NS, 'kbd', $parent->{text}, 'data'],
439 label => [HTML_NS, 'label', $parent->{text}, 'data'],
440 li => [HTML_NS, 'li', $parent->{text}, 'data'],
441 ol => [HTML_NS, 'ol', $parent->{text}, 'data'],
442 p => [HTML_NS, 'p', $parent->{text}, 'data'],
443 pre => [HTML_NS, 'pre', $parent->{text}, 'data'],
444 q => [HTML_NS, 'q', $parent->{text}, 'data'],
445 rb => [HTML_NS, 'rb', $parent->{text}, 'data'],
446 rp => [HTML_NS, 'rp', $parent->{text}, 'data'],
447 rt => [HTML_NS, 'rt', $parent->{text}, 'data'],
448 ruby => [HTML_NS, 'ruby', $parent->{text}, 'data'],
449 samp => [HTML_NS, 'samp', $parent->{text}, 'data'],
450 span => [HTML_NS, 'span', $parent->{text}, 'data'],
451 strong => [HTML_NS, 'strong', $parent->{text}, 'data'],
452 sub => [HTML_NS, 'sub', $parent->{text}, 'data'],
453 sup => [HTML_NS, 'sup', $parent->{text}, 'data'],
454 table => [HTML_NS, 'table', $parent->{text}, 'data'],
455 tbody => [HTML_NS, 'tbody', $parent->{text}, 'data'],
456 td => [HTML_NS, 'td', $parent->{text}, 'data'],
457 th => [HTML_NS, 'th', $parent->{text}, 'data'],
458 thead => [HTML_NS, 'thead', $parent->{text}, 'data'],
459 tr => [HTML_NS, 'tr', $parent->{text}, 'data'],
460 ul => [HTML_NS, 'ul', $parent->{text}, 'data'],
461 var => [HTML_NS, 'var', $parent->{text}, 'data'],
462 }->{$token->{value}}
463 || [H2H_NS, $token->{value}, $parent->{text}, 'data'];
464 while ($info->[4]->{$info->[2]->get_user_data ('command-name')}) {
465 $info->[2] = $info->[2]->parent_node;
466 }
467 my $el = $self->{doc}->create_element_ns ($info->[0], $info->[1]);
468 $el->set_user_data ('command-name', $token->{value});
469 $info->[2]->append_child ($el);
470 $parent->{text} = $el;
471 $parent->{attr} = $el;
472 $parent->{subsection} = $el if $token->{value} eq 'SECTION';
473 $state = $info->[3];
474 } elsif ($token->{type} eq 'end') {
475 E: while (my $et = $parent->{text}->get_user_data ('command-name')) {
476 $parent->{text} = $parent->{text}->parent_node;
477 last E if $et eq $token->{value};
478 last E if $et eq '#body';
479 }
480 $parent->{attr} = $parent->{text};
481 $state = {
482 UL => 'list',
483 OL => 'list',
484 }->{$parent->{text}->get_user_data ('command-name')}
485 || $state eq 'br' ? 'br' : 'data';
486 } elsif ($token->{type} eq 'char') {
487 my $el = $self->{doc}->create_element_ns (H2H_NS, 'char');
488 $el->manakai_append_text ($token->{value});
489 $parent->{text}->append_child ($el);
490 $state = 'data' if $state eq 'br';
491 } elsif ($token->{type} eq 'magic') {
492 my ($name, $version) = split m#/#, $token->{value}, 2;
493 $doc_el->set_attribute_ns (SW09_NS, 'sw9:Name', $name);
494 $doc_el->set_attribute_ns (SW09_NS, 'sw9:Version', $version);
495 } elsif ($token->{type} eq 'hnf-comment') {
496 my $com = $self->{doc}->create_element_ns
497 (H2H_NS, 'hnf-comment');
498 $com->text_content ($token->{value});
499 $parent->{text}->append_child ($com);
500 } elsif ($token->{type} eq 'html-comment') {
501 my $com = $self->{doc}->create_element_ns
502 (H2H_NS, 'html-comment');
503 $com->text_content ($token->{value});
504 $parent->{text}->append_child ($com);
505 } elsif ($token->{type} eq 'source') {
506 my $src = $self->{doc}->create_element_ns (HTML3_NS, 'credit');
507 $src->manakai_append_text ($token->{value});
508 $parent->{text}->append_child ($src);
509 } elsif ($token->{type} eq 'uri') {
510 my $v = $token->{value};
511 if ($v =~ /^\(([^()]+)\)$/) {
512 my @v = split /\s*,\s*/, $1;
513 $parent->{attr}->set_attribute_ns
514 (H2H_NS, 'href-year' => $v[0]+0);
515 $parent->{attr}->set_attribute_ns
516 (H2H_NS, 'href-month' => $v[1]+0);
517 $parent->{attr}->set_attribute_ns
518 (H2H_NS, 'href-day' => $v[2]+0);
519 $parent->{attr}->set_attribute_ns
520 (H2H_NS, 'href-section' => $v[3]+0) if $v[3];
521 $parent->{attr}->set_attribute_ns
522 (H2H_NS, 'href-subsection' => $v[4]+0)
523 if $v[4];
524 } else {
525 my $xuri = $parent->{attr}->manakai_expanded_uri;
526 if ($xuri eq HTML_NS . 'a') {
527 $parent->{attr}->set_attribute_ns
528 (undef, href => $token->{value});
529 } elsif ($xuri eq HTML_NS . 'blockquote') {
530 $parent->{attr}->set_attribute_ns
531 (undef, cite => $token->{value});
532 } else {
533 $parent->{attr}->set_attribute_ns
534 (XHTML2_NS, href => $token->{value});
535 }
536 }
537 } elsif ({
538 title => 1, style => 1,
539 class => 1, href => 1, 'accept-charset' => 1,
540 action => 1, method => 1, alt => 1, src => 1,
541 type => 1, value => 1, name => 1, accesskey => 1,
542 for => 1, cite => 1, onclick => 1, colspan => 1,
543 scope => 1, summary => 1,
544 }->{$token->{type}}) {
545 $parent->{attr}->set_attribute_ns
546 (undef, $token->{type}, $token->{value});
547 } elsif ($token->{type} eq 'cat') {
548 for (split /\s*,\s*/, $token->{value}) {
549 my $el = $self->{doc}->create_element_ns (H2H_NS, 'cat');
550 $el->manakai_append_text ($_);
551 $parent->{attr}->append_child ($el);
552 }
553 } elsif ($token->{type} eq 'lang' or $token->{type} eq 'xml:lang') {
554 $parent->{attr}->set_attribute_ns
555 (q<http://www.w3.org/XML/1998/namespace>,
556 'xml:lang' => $token->{value});
557 } elsif ($token->{type} eq 'id') {
558 $parent->{attr}->set_attribute_ns
559 (q<http://www.w3.org/XML/1998/namespace>,
560 'xml:id' => $token->{value});
561 } elsif ($token->{type} eq 'wbradded') {
562 # ignore
563 } else {
564 # key, level, place, position, time
565 $parent->{attr}->set_attribute_ns
566 (H2H_NS, $token->{type}, $token->{value});
567 }
568 }
569 } # _construct_tree
570
571 1;
572 ## $Date: 2007/08/05 09:24:56 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24