/[suikacvs]/markup/html/whatpm/Whatpm/H2H.pm
Suika

Contents of /markup/html/whatpm/Whatpm/H2H.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (show annotations) (download)
Sun Aug 5 09:24:56 2007 UTC (17 years, 3 months ago) by wakaba
Branch: MAIN
Changes since 1.2: +4 -1 lines
++ whatpm/Whatpm/ChangeLog	5 Aug 2007 09:23:31 -0000
	* H2H.pm: |b|, |i|, and |sub| are added to the
	list of allowed HTML elements.

2007-08-05  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ContentChecker/ChangeLog	5 Aug 2007 09:24:54 -0000
	* Atom.pm: |link|-related checks are added; |type|
	media type checks are added.  |hreflang| now warns
	as unimplemented.

2007-08-05  Wakaba  <wakaba@suika.fam.cx>

1 package Whatpm::H2H;
2 use strict;
3
4 sub H2H_NS () { q<http://suika.fam.cx/~wakaba/archive/2005/manakai/Markup/H2H/> }
5 sub HTML_NS () { q<http://www.w3.org/1999/xhtml> }
6 sub HTML3_NS () { q<urn:x-suika-fam-cx:markup:ietf:html:3:draft:00:> }
7 sub SW09_NS () { q<urn:x-suika-fam-cx:markup:suikawiki:0:9:> }
8 sub XHTML2_NS () { q<http://www.w3.org/2002/06/xhtml2/> }
9
10 sub parse_string ($$$) {
11 my $self = bless {
12 token => [],
13 location => {},
14 doc => $_[2],
15 }, $_[0];
16
17 my $s = ''.$_[1];
18 $s =~ s/\x0D\x0A/\x0A/g;
19 $s =~ tr/\x0D/\x0A/;
20 $self->{line} = [split /\x0A/, $s];
21
22 local $Error::Depth = $Error::Depth + 1;
23 $self->{doc}->strict_error_checking (0);
24 my $doc_el = $self->{doc}->create_element_ns (HTML_NS, 'html');
25 $doc_el->set_attribute_ns (q<http://www.w3.org/2000/xmlns/>, 'xmlns', HTML_NS);
26 $self->{doc}->append_child ($doc_el);
27
28 $self->_construct_tree;
29
30 return $self->{doc};
31 } # parse_string
32
33 sub _shift_token ($) {
34 my $self = $_[0];
35
36 if (@{$self->{token}}) {
37 return shift @{$self->{token}};
38 }
39
40 my $attrvalue = sub {
41 my $v = shift;
42 $v =~ s/&quot;/"/g;
43 $v =~ s/&lt;/</g;
44 $v =~ s/&gt;/>/g;
45 $v =~ s/&reg;/\x{00AE}/g;
46 $v =~ s/&hearts;/\x{2661}/g;
47 $v =~ s/&amp;/&/g;
48 return $v;
49 };
50
51 my $uriv = sub {
52 my $v = $attrvalue->(shift);
53 $v =~ s/^\{/(/;
54 $v =~ s/\}$/)/;
55 $v =~ s/^\#([0-9si]+)$/($1)/;
56 $v =~ s/^\(([0-9]{4})([0-9]{2})([0-9]{2})([^)]*)\)$/($1, $2, $3$4)/;
57 $v =~ s/[si]/, /g if $v =~ /^\(/ and $v =~ /\)$/;
58 return $v;
59 };
60
61 my $r = {type => '#EOF'};
62 L: while (defined (my $line = shift @{$self->{line}})) {
63 if ($line =~ s/^([A-Z]+|T[0-9])(\*?\+?\*?)(?:\s+|$)//) {
64 my $command = $1;
65 my $flag = $2;
66 $r = {type => 'start', value => $command};
67
68 my $uri;
69 if ($flag =~ /\*/ and $line =~ s/^([^{\s]\S*)\s*//) {
70 $uri = $1;
71 }
72
73 my $attr = '';
74 if ($line =~ s/^\{(\s*(?:[A-Za-z][^{}]*)?)\}\s*//) {
75 $attr = $1;
76 }
77
78 if (not defined $uri and
79 $flag =~ /\*/ and $line =~ s/^([^{\s]\S*)\s*//) {
80 $uri = $1;
81 }
82
83 my @token;
84 my $info = {
85 # val# val#(*)
86 ABBR => [2, 2],
87 ACRONYM => [2, 2],
88 CITE => [2, 1],
89 LDIARY => [4, 4],
90 LIMG => [4, 4],
91 LINK => [2, 1],
92 LMG => [2, 2],
93 LNEW => [2, 2],
94 PERSON => [2, 2],
95 RIBU => [2, 2],
96 RUBY => [2, 2],
97 SEE => [2, 2],
98 }->{$command};
99 my @value = split /\s+/, $line,
100 ($flag =~ /\*/ ? $info->[1] : $info->[0]) || 1;
101
102 push @token, {type => 'uri', value => $uriv->($uri)} if defined $uri;
103
104 my %attr;
105 while ($attr =~ /([A-Za-z0-9_-]+)\s*(?:=>?|:)\s*([^";,]+|"[^"]+")/gc) {
106 my $name = lc $1;
107 my $value = $2;
108 $value =~ tr/"//d;
109 $attr{$name} = $value;
110 }
111 delete $attr{'content-type'};
112
113 if ({
114 ABBR => 1, ACRONYM => 1,
115 RUBY => 1, RIBU => 1,
116 }->{$command}) {
117 if (@value == 1 and $attr{title}) {
118 push @value, $attr{title};
119 delete $attr{title};
120 }
121 if (@value == 2) {
122 unshift @{$self->{line}},
123 'RB',
124 'DATA {} >>'.$value[1],
125 '/RB',
126 'RT',
127 'DATA {} >>'.$value[0],
128 '/RT',
129 '/'.$command;
130 } else {
131 unshift @{$self->{line}}, 'DATA {} >>'.$value[0], '/'.$command;
132 }
133 } elsif ($command eq 'CITE') {
134 if (@value == 2) {
135 if (defined $uri or $value[0] !~ /^[a-z-]+:/) {
136 unshift @{$self->{line}},
137 'SRC',
138 'DATA {} >>'.$value[0].' '.$value[1],
139 '/SRC', 'BODYTEXT';
140 } else {
141 push @token, {type => 'uri', value => $uriv->($value[0])};
142 unshift @{$self->{line}},
143 'SRC',
144 'DATA {} >>'.$value[1],
145 '/SRC', 'BODYTEXT';
146 }
147 } elsif (@value == 1) {
148 if (defined $uri or $value[0] !~ /^[a-z-]+:/) {
149 unshift @{$self->{line}},
150 'SRC',
151 'DATA {} >>'.$value[0],
152 '/SRC', 'BODYTEXT';
153 } else {
154 push @token, {type => 'uri', value => $uriv->($value[0])};
155 unshift @{$self->{line}}, 'BODYTEXT';
156 }
157 } else {
158 unshift @{$self->{line}}, 'BODYTEXT';
159 }
160 } elsif ($command eq 'DATA') {
161 my @token;
162 $line =~ s/^>>//;
163 while (length $line) {
164 if ($line =~ s/^<([a-z0-9]+)\s*//) {
165 my $tagname = $1;
166 push @token, {type => 'start', value => $tagname};
167 while ($line =~ s/^([a-z-]+)\s*=\s*"([^"]*)"\s*//) {
168 push @token, {type => $1, value => $attrvalue->($2)};
169 }
170 $line =~ s#^/?\s*>##;
171 push @token, {type => 'end', value => $tagname}
172 if $tagname eq 'img' or $tagname eq 'input' or
173 $tagname eq 'br';
174 } elsif ($line =~ s#^</([a-z0-9]+)\s*>##) {
175 push @token, {type => 'end', value => $1};
176 } elsif ($line =~ s/^<!--(.*?)-->//) {
177 push @token, {type => 'html-comment', value => $1};
178 } elsif ($line =~ s/^&([a-z]+);//) {
179 my $name = $1;
180 if ($name eq 'amp') {
181 push @token, {type => 'text', value => '&'};
182 } elsif ($name eq 'lt') {
183 push @token, {type => 'text', value => '<'};
184 } elsif ($name eq 'gt') {
185 push @token, {type => 'text', value => '>'};
186 } elsif ($name eq 'quot') {
187 push @token, {type => 'text', value => '"'};
188 } elsif ($name eq 'reg') {
189 push @token, {type => 'text', value => "\x{00AE}"};
190 } elsif ($name eq 'hearts') {
191 push @token, {type => 'text', value => "\x{2661}"};
192 } else {
193 push @token, {type => 'char', value => $name};
194 }
195 } elsif ($line =~ s/^&#([0-9]+);//) {
196 push @token, {type => 'text', value => ord $1};
197 } elsif ($line =~ s/^&#x([0-9A-Fa-f]+);//) {
198 push @token, {type => 'text', value => ord hex $1};
199 } elsif ($line =~ s/^([^<&]+)//) {
200 push @token, {type => 'text', value => $1};
201 } else {
202 push @token, {type => 'text', value => substr ($line, 0, 1)};
203 substr ($line, 0, 1) = '';
204 }
205 }
206 push @token, {type => 'eol'};
207
208 $r = shift @token;
209 push @{$self->{token}}, @token;
210 last L;
211 } elsif ({
212 DD => 1, DT => 1,
213 DEL => 1, INS => 1,
214 LI => 1,
215 RB => 1, RT => 1,
216 STRONG => 1,
217 YAMI => 1,
218 EM => 1,
219 HOUR => 1, KION => 1, LUNCH => 1,
220 TAION => 1, TENKI => 1, THEME => 1,
221 T1 => 1, T2 => 1, T3 => 1, T4 => 1,
222 T5 => 1, T6 => 1, T7 => 1, SP => 1,
223 }->{$command}) {
224 if (@value) {
225 unshift @{$self->{line}}, 'DATA {} >>'.$value[0], '/'.$command;
226 }
227 } elsif ($command eq 'DIV') {
228 if (@value) {
229 $r = {type => 'class', value => $value[0]};
230 }
231 } elsif ($command eq 'LDIARY') {
232 $value[0] =~ s/^([0-9]{4})([0-9]{2})([0-9]{2})/$1, $2, $3/;
233 $value[0] =~ s/[is]/, /;
234 $r = {type => 'start', value => 'LINK'};
235 push @token, {type => 'uri', value => "($value[0])"};
236 unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/LINK';
237 } elsif ($command eq 'LIMG') {
238 $r = {type => 'start', value => 'IMG'};
239 push @token, {type => 'uri', value => $uriv->($value[0])};
240 unshift @{$self->{line}}, 'DATA {} >>'.$value[3], '/IMG';
241 } elsif ($command eq 'LMG') {
242 $r = {type => 'start', value => 'IMG'};
243 push @token, {type => 'uri', value => $uriv->($value[0])};
244 unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/IMG';
245 } elsif ($command eq 'LINK') {
246 if (@value == 2) {
247 push @token, {type => 'uri', value => $uriv->($value[0])};
248 unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/LINK';
249 } elsif ($flag =~ /\+/) {
250 push @token, {type => 'uri', value => $uriv->($value[0])};
251 } else {
252 unshift @{$self->{line}}, 'DATA {} >>'.$value[0], '/LINK';
253 }
254 } elsif ($command eq 'NEW') {
255 $r = {type => 'start', value => 'SECTION'};
256 unshift @{$self->{line}}, 'H', 'DATA {} >>'.$value[0], '/H';
257 } elsif ($command eq 'LNEW') {
258 $r = {type => 'start', value => 'SECTION'};
259 push @token, {type => 'uri', value => $uriv->($value[0])};
260 unshift @{$self->{line}}, 'H', 'DATA {} >>'.$value[1], '/H';
261 } elsif ($command eq 'SUB') {
262 $r = {type => 'start', value => 'SUB'};
263 unshift @{$self->{line}}, 'H', 'DATA {} >>'.$value[0], '/H';
264 } elsif ($command eq 'PERSON') {
265 push @token, {type => 'key', value => $attrvalue->($value[0])};
266 unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/PERSON';
267 } elsif ($command eq 'SEE') {
268 if (@value == 2) {
269 push @token, {type => 'key', value => $attrvalue->($value[0])};
270 unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/SEE';
271 } else {
272 unshift @{$self->{line}}, 'DATA {} >>'.$value[0], '/SEE';
273 }
274 } elsif ($command eq 'SPAN') {
275 if (@value == 2) {
276 push @token, {type => 'class',
277 value => $attrvalue->($value[0])};
278 unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/SPAN';
279 } else {
280 unshift @{$self->{line}}, 'DATA {} >>'.$value[0], '/SPAN';
281 }
282 } elsif ($command eq 'OK') {
283 $r = {type => '#EOF'};
284 next L;
285 } elsif ($command eq 'XML') {
286 unshift @{$self->{line}}, 'DATA {} >>XML '.$line;
287 next L;
288 }
289
290 for (keys %attr) {
291 push @token, {type => $_, value => $attrvalue->($attr{$_})};
292 }
293
294 push @{$self->{token}}, @token;
295 last L;
296 } elsif ($line eq 'H2H/1.0') {
297 $r = {type => 'magic', value => 'H2H/1.0'};
298 last L;
299 } elsif ($line =~ m#^/([A-Z]+)\s*$#) {
300 $r = {type => 'end', value => $1};
301 last L;
302 } elsif ($line =~ s/^!#//) {
303 $r = {type => 'hnf-comment', value => $line};
304 last L;
305 } elsif ($line =~ s/^!//) {
306 $r = {type => 'html-comment', value => $line};
307 last L;
308 } else {
309 unshift @{$self->{line}}, 'DATA {} >>'.$line;
310 next L;
311 }
312 } # L
313
314 return $r;
315 } # _shift_token
316
317 sub _construct_tree ($) {
318 my $self = $_[0];
319
320 my $doc_el = $self->{doc}->document_element;
321 my $head_el = $self->{doc}->create_element_ns (HTML_NS, 'head');
322 my $body_el = $self->{doc}->create_element_ns (HTML_NS, 'body');
323 $doc_el->append_child ($head_el);
324 $doc_el->append_child ($body_el);
325 $doc_el->set_user_data ('command-name' => '#html');
326 $head_el->set_user_data ('command-name' => '#head');
327 $body_el->set_user_data ('command-name' => '#body');
328 $doc_el->set_attribute_ns (SW09_NS, 'sw9:Name' => 'H2H');
329 $doc_el->set_attribute_ns (SW09_NS, 'sw9:Version' => '0.9');
330
331 my $parent = {
332 subsection => $body_el,
333 attr => $body_el,
334 text => $body_el,
335 };
336 my $state = 'data';
337 ## data - normal
338 ## list - UL or OL
339 ## br - after br start tag token
340 ## eol - after eol token
341
342 T: while (my $token = $self->_shift_token) {
343 last T if $token->{type} eq '#EOF';
344
345 if ($token->{type} eq 'text') {
346 if ($state eq 'list') {
347 my $li_el = $self->{doc}->create_element_ns (HTML_NS, 'li');
348 $li_el->manakai_append_text ($token->{value});
349 $parent->{text}->append_child ($li_el);
350 } else {
351 $parent->{text}->manakai_append_text ("\x0A") if $state eq 'eol';
352 $parent->{text}->manakai_append_text ($token->{value});
353 $state = 'data';
354 }
355 } elsif ($token->{type} eq 'eol') {
356 if ($state eq 'eol') {
357 $parent->{text}->manakai_append_text ("\x0A");
358 } else {
359 $state = $state eq 'br' ? 'data' : 'eol';
360 }
361 } elsif ($token->{type} eq 'start') {
362 my $info = {
363 # nsuri, qname, parent, state
364 ABBR => [HTML_NS, 'abbr', $parent->{text}, 'data'],
365 ACRONYM => [HTML_NS, 'abbr', $parent->{text}, 'data'],
366 BODYTEXT => [HTML3_NS, 'bodytext', $parent->{text}, 'data'],
367 CITE => [HTML_NS, 'blockquote', $parent->{text}, 'data',
368 {PRE => 1}],
369 DD => [HTML_NS, 'dd', $parent->{text}, 'data'],
370 DEL => [HTML_NS, 'del', $parent->{text}, 'data'],
371 DIV => [HTML_NS, 'div', $parent->{text}, 'data', {P => 1}],
372 DL => [HTML_NS, 'dl', $parent->{text}, 'data'],
373 DT => [HTML_NS, 'dt', $parent->{text}, 'data'],
374 EM => [HTML_NS, 'em', $parent->{text}, 'data'],
375 FN => [H2H_NS, 'fn', $parent->{text}, 'data'],
376 H => [XHTML2_NS, 'h', $parent->{text}, 'data'],
377 HOUR => [H2H_NS, 'hour', $head_el, 'data'],
378 IMG => [HTML_NS, 'img', $parent->{text}, 'data'],
379 INS => [HTML_NS, 'ins', $parent->{text}, 'data'],
380 KION => [H2H_NS, 'kion', $head_el, 'data'],
381 LI => [HTML_NS, 'li', $parent->{text}, 'data'],
382 LINK => [HTML_NS, 'a', $parent->{text}, 'data'],
383 LUNCH => [H2H_NS, 'lunch', $head_el, 'data'],
384 OL => [HTML_NS, 'ol', $parent->{text}, 'list', {PRE => 1}],
385 P => [HTML_NS, 'p', $parent->{text}, 'data',
386 {P => 1, PRE => 1}],
387 PERSON => [HTML3_NS, 'person', $parent->{text}, 'data'],
388 PRE => [HTML_NS, 'pre', $parent->{text}, 'data',
389 {P => 1}],
390 RB => [HTML_NS, 'rb', $parent->{text}, 'data'],
391 RIBU => [HTML_NS, 'ruby', $parent->{text}, 'data'],
392 RT => [HTML_NS, 'rt', $parent->{text}, 'data'],
393 RUBY => [HTML_NS, 'ruby', $parent->{text}, 'data'],
394 SECTION => [HTML_NS, 'section', $body_el, 'data'],
395 SEE => [HTML_NS, 'i', $parent->{text}, 'data'],
396 SP => [H2H_NS, 'sp', $head_el, 'data'],
397 SPAN => [HTML_NS, 'span', $parent->{text}, 'data'],
398 SRC => [HTML3_NS, 'credit', $parent->{text}, 'data'],
399 STRONG => [HTML_NS, 'strong', $parent->{text}, 'data'],
400 SUBSECTION => [HTML_NS, 'section', $parent->{subsection}, 'data'],
401 T1 => [H2H_NS, 't1', $head_el, 'data'],
402 T2 => [H2H_NS, 't2', $head_el, 'data'],
403 T3 => [H2H_NS, 't3', $head_el, 'data'],
404 T4 => [H2H_NS, 't4', $head_el, 'data'],
405 T5 => [H2H_NS, 't5', $head_el, 'data'],
406 T6 => [H2H_NS, 't6', $head_el, 'data'],
407 T7 => [H2H_NS, 't7', $head_el, 'data'],
408 TAION => [H2H_NS, 'taion', $head_el, 'data'],
409 TENKI => [H2H_NS, 'tenki', $head_el, 'data'],
410 THEME => [H2H_NS, 'theme', $head_el, 'data'],
411 UL => [HTML_NS, 'ul', $parent->{text}, 'list', {PRE => 1}],
412 YAMI => [H2H_NS, 'yami', $parent->{text}, 'data'],
413 a => [HTML_NS, 'a', $parent->{text}, 'data'],
414 abbr => [HTML_NS, 'abbr', $parent->{text}, 'data'],
415 acronym => [HTML_NS, 'abbr', $parent->{text}, 'data'],
416 b => [HTML_NS, 'b', $parent->{text}, 'data'],
417 blockquote => [HTML_NS, 'blockquote', $parent->{text}, 'data'],
418 br => [HTML_NS, 'br', $parent->{text}, 'br'],
419 caption => [HTML_NS, 'caption', $parent->{text}, 'data'],
420 code => [HTML_NS, 'code', $parent->{text}, 'data'],
421 dd => [HTML_NS, 'dd', $parent->{text}, 'data'],
422 del => [HTML_NS, 'del', $parent->{text}, 'data'],
423 dfn => [HTML_NS, 'dfn', $parent->{text}, 'data'],
424 div => [HTML_NS, 'div', $parent->{text}, 'data'],
425 dl => [HTML_NS, 'dl', $parent->{text}, 'data'],
426 dt => [HTML_NS, 'dt', $parent->{text}, 'data'],
427 em => [HTML_NS, 'em', $parent->{text}, 'data'],
428 form => [HTML_NS, 'form', $parent->{text}, 'data'],
429 h1 => [HTML_NS, 'h1', $parent->{text}, 'data'],
430 h2 => [HTML_NS, 'h2', $parent->{text}, 'data'],
431 h3 => [HTML_NS, 'h3', $parent->{text}, 'data'],
432 h4 => [HTML_NS, 'h4', $parent->{text}, 'data'],
433 i => [HTML_NS, 'i', $parent->{text}, 'data'],
434 img => [HTML_NS, 'img', $parent->{text}, 'data'],
435 input => [HTML_NS, 'input', $parent->{text}, 'data'],
436 ins => [HTML_NS, 'ins', $parent->{text}, 'data'],
437 kbd => [HTML_NS, 'kbd', $parent->{text}, 'data'],
438 label => [HTML_NS, 'label', $parent->{text}, 'data'],
439 li => [HTML_NS, 'li', $parent->{text}, 'data'],
440 ol => [HTML_NS, 'ol', $parent->{text}, 'data'],
441 p => [HTML_NS, 'p', $parent->{text}, 'data'],
442 pre => [HTML_NS, 'pre', $parent->{text}, 'data'],
443 q => [HTML_NS, 'q', $parent->{text}, 'data'],
444 rb => [HTML_NS, 'rb', $parent->{text}, 'data'],
445 rp => [HTML_NS, 'rp', $parent->{text}, 'data'],
446 rt => [HTML_NS, 'rt', $parent->{text}, 'data'],
447 ruby => [HTML_NS, 'ruby', $parent->{text}, 'data'],
448 samp => [HTML_NS, 'samp', $parent->{text}, 'data'],
449 span => [HTML_NS, 'span', $parent->{text}, 'data'],
450 strong => [HTML_NS, 'strong', $parent->{text}, 'data'],
451 sub => [HTML_NS, 'sub', $parent->{text}, 'data'],
452 sup => [HTML_NS, 'sup', $parent->{text}, 'data'],
453 table => [HTML_NS, 'table', $parent->{text}, 'data'],
454 tbody => [HTML_NS, 'tbody', $parent->{text}, 'data'],
455 td => [HTML_NS, 'td', $parent->{text}, 'data'],
456 th => [HTML_NS, 'th', $parent->{text}, 'data'],
457 thead => [HTML_NS, 'thead', $parent->{text}, 'data'],
458 tr => [HTML_NS, 'tr', $parent->{text}, 'data'],
459 ul => [HTML_NS, 'ul', $parent->{text}, 'data'],
460 var => [HTML_NS, 'var', $parent->{text}, 'data'],
461 }->{$token->{value}}
462 || [H2H_NS, $token->{value}, $parent->{text}, 'data'];
463 while ($info->[4]->{$info->[2]->get_user_data ('command-name')}) {
464 $info->[2] = $info->[2]->parent_node;
465 }
466 my $el = $self->{doc}->create_element_ns ($info->[0], $info->[1]);
467 $el->set_user_data ('command-name', $token->{value});
468 $info->[2]->append_child ($el);
469 $parent->{text} = $el;
470 $parent->{attr} = $el;
471 $parent->{subsection} = $el if $token->{value} eq 'SECTION';
472 $state = $info->[3];
473 } elsif ($token->{type} eq 'end') {
474 E: while (my $et = $parent->{text}->get_user_data ('command-name')) {
475 $parent->{text} = $parent->{text}->parent_node;
476 last E if $et eq $token->{value};
477 last E if $et eq '#body';
478 }
479 $parent->{attr} = $parent->{text};
480 $state = {
481 UL => 'list',
482 OL => 'list',
483 }->{$parent->{text}->get_user_data ('command-name')}
484 || $state eq 'br' ? 'br' : 'data';
485 } elsif ($token->{type} eq 'char') {
486 my $el = $self->{doc}->create_element_ns (H2H_NS, 'char');
487 $el->manakai_append_text ($token->{value});
488 $parent->{text}->append_child ($el);
489 $state = 'data' if $state eq 'br';
490 } elsif ($token->{type} eq 'magic') {
491 my ($name, $version) = split m#/#, $token->{value}, 2;
492 $doc_el->set_attribute_ns (SW09_NS, 'sw9:Name', $name);
493 $doc_el->set_attribute_ns (SW09_NS, 'sw9:Version', $version);
494 } elsif ($token->{type} eq 'hnf-comment') {
495 my $com = $self->{doc}->create_element_ns
496 (H2H_NS, 'hnf-comment');
497 $com->text_content ($token->{value});
498 $parent->{text}->append_child ($com);
499 } elsif ($token->{type} eq 'html-comment') {
500 my $com = $self->{doc}->create_element_ns
501 (H2H_NS, 'html-comment');
502 $com->text_content ($token->{value});
503 $parent->{text}->append_child ($com);
504 } elsif ($token->{type} eq 'source') {
505 my $src = $self->{doc}->create_element_ns (HTML3_NS, 'credit');
506 $src->manakai_append_text ($token->{value});
507 $parent->{text}->append_child ($src);
508 } elsif ($token->{type} eq 'uri') {
509 my $v = $token->{value};
510 if ($v =~ /^\(([^()]+)\)$/) {
511 my @v = split /\s*,\s*/, $1;
512 $parent->{attr}->set_attribute_ns
513 (H2H_NS, 'href-year' => $v[0]+0);
514 $parent->{attr}->set_attribute_ns
515 (H2H_NS, 'href-month' => $v[1]+0);
516 $parent->{attr}->set_attribute_ns
517 (H2H_NS, 'href-day' => $v[2]+0);
518 $parent->{attr}->set_attribute_ns
519 (H2H_NS, 'href-section' => $v[3]+0) if $v[3];
520 $parent->{attr}->set_attribute_ns
521 (H2H_NS, 'href-subsection' => $v[4]+0)
522 if $v[4];
523 } else {
524 my $xuri = $parent->{attr}->manakai_expanded_uri;
525 if ($xuri eq HTML_NS . 'a') {
526 $parent->{attr}->set_attribute_ns
527 (undef, href => $token->{value});
528 } elsif ($xuri eq HTML_NS . 'blockquote') {
529 $parent->{attr}->set_attribute_ns
530 (undef, cite => $token->{value});
531 } else {
532 $parent->{attr}->set_attribute_ns
533 (XHTML2_NS, href => $token->{value});
534 }
535 }
536 } elsif ({
537 title => 1, style => 1,
538 class => 1, href => 1, 'accept-charset' => 1,
539 action => 1, method => 1, alt => 1, src => 1,
540 type => 1, value => 1, name => 1, accesskey => 1,
541 for => 1, cite => 1, onclick => 1, colspan => 1,
542 scope => 1, summary => 1,
543 }->{$token->{type}}) {
544 $parent->{attr}->set_attribute_ns
545 (undef, $token->{type}, $token->{value});
546 } elsif ($token->{type} eq 'cat') {
547 for (split /\s*,\s*/, $token->{value}) {
548 my $el = $self->{doc}->create_element_ns (H2H_NS, 'cat');
549 $el->manakai_append_text ($_);
550 $parent->{attr}->append_child ($el);
551 }
552 } elsif ($token->{type} eq 'lang' or $token->{type} eq 'xml:lang') {
553 $parent->{attr}->set_attribute_ns
554 (q<http://www.w3.org/XML/1998/namespace>,
555 'xml:lang' => $token->{value});
556 } elsif ($token->{type} eq 'id') {
557 $parent->{attr}->set_attribute_ns
558 (q<http://www.w3.org/XML/1998/namespace>,
559 'xml:id' => $token->{value});
560 } elsif ($token->{type} eq 'wbradded') {
561 # ignore
562 } else {
563 # key, level, place, position, time
564 $parent->{attr}->set_attribute_ns
565 (H2H_NS, $token->{type}, $token->{value});
566 }
567 }
568 } # _construct_tree
569
570 1;
571 ## $Date: 2007/08/05 07:12:45 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24