/[suikacvs]/markup/html/whatpm/Whatpm/H2H.pm
Suika

Contents of /markup/html/whatpm/Whatpm/H2H.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (hide annotations) (download)
Sun Aug 17 05:09:12 2008 UTC (16 years, 2 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.3: +4 -3 lines
++ whatpm/Whatpm/ChangeLog	17 Aug 2008 05:06:46 -0000
2008-08-17  Wakaba  <wakaba@suika.fam.cx>

	* H2H.pm (_shift_token): Support for unquoted HTML attribute
	values.

++ whatpm/Whatpm/ContentChecker/ChangeLog	17 Aug 2008 05:08:51 -0000
2008-08-17  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm (%XHTML2CommonAttrStatus): HTML5 status was missing.

1 wakaba 1.1 package Whatpm::H2H;
2     use strict;
3    
4     sub H2H_NS () { q<http://suika.fam.cx/~wakaba/archive/2005/manakai/Markup/H2H/> }
5     sub HTML_NS () { q<http://www.w3.org/1999/xhtml> }
6     sub HTML3_NS () { q<urn:x-suika-fam-cx:markup:ietf:html:3:draft:00:> }
7     sub SW09_NS () { q<urn:x-suika-fam-cx:markup:suikawiki:0:9:> }
8     sub XHTML2_NS () { q<http://www.w3.org/2002/06/xhtml2/> }
9    
10     sub parse_string ($$$) {
11     my $self = bless {
12     token => [],
13     location => {},
14     doc => $_[2],
15     }, $_[0];
16    
17     my $s = ''.$_[1];
18     $s =~ s/\x0D\x0A/\x0A/g;
19     $s =~ tr/\x0D/\x0A/;
20     $self->{line} = [split /\x0A/, $s];
21    
22     local $Error::Depth = $Error::Depth + 1;
23     $self->{doc}->strict_error_checking (0);
24     my $doc_el = $self->{doc}->create_element_ns (HTML_NS, 'html');
25     $doc_el->set_attribute_ns (q<http://www.w3.org/2000/xmlns/>, 'xmlns', HTML_NS);
26     $self->{doc}->append_child ($doc_el);
27    
28     $self->_construct_tree;
29    
30     return $self->{doc};
31     } # parse_string
32    
33     sub _shift_token ($) {
34     my $self = $_[0];
35    
36     if (@{$self->{token}}) {
37     return shift @{$self->{token}};
38     }
39    
40     my $attrvalue = sub {
41     my $v = shift;
42     $v =~ s/&quot;/"/g;
43     $v =~ s/&lt;/</g;
44     $v =~ s/&gt;/>/g;
45     $v =~ s/&reg;/\x{00AE}/g;
46     $v =~ s/&hearts;/\x{2661}/g;
47     $v =~ s/&amp;/&/g;
48     return $v;
49     };
50    
51     my $uriv = sub {
52     my $v = $attrvalue->(shift);
53     $v =~ s/^\{/(/;
54     $v =~ s/\}$/)/;
55     $v =~ s/^\#([0-9si]+)$/($1)/;
56     $v =~ s/^\(([0-9]{4})([0-9]{2})([0-9]{2})([^)]*)\)$/($1, $2, $3$4)/;
57     $v =~ s/[si]/, /g if $v =~ /^\(/ and $v =~ /\)$/;
58     return $v;
59     };
60    
61     my $r = {type => '#EOF'};
62     L: while (defined (my $line = shift @{$self->{line}})) {
63     if ($line =~ s/^([A-Z]+|T[0-9])(\*?\+?\*?)(?:\s+|$)//) {
64     my $command = $1;
65     my $flag = $2;
66     $r = {type => 'start', value => $command};
67    
68     my $uri;
69     if ($flag =~ /\*/ and $line =~ s/^([^{\s]\S*)\s*//) {
70     $uri = $1;
71     }
72    
73     my $attr = '';
74     if ($line =~ s/^\{(\s*(?:[A-Za-z][^{}]*)?)\}\s*//) {
75     $attr = $1;
76     }
77    
78     if (not defined $uri and
79     $flag =~ /\*/ and $line =~ s/^([^{\s]\S*)\s*//) {
80     $uri = $1;
81     }
82    
83     my @token;
84     my $info = {
85     # val# val#(*)
86     ABBR => [2, 2],
87     ACRONYM => [2, 2],
88     CITE => [2, 1],
89     LDIARY => [4, 4],
90     LIMG => [4, 4],
91     LINK => [2, 1],
92     LMG => [2, 2],
93     LNEW => [2, 2],
94     PERSON => [2, 2],
95     RIBU => [2, 2],
96     RUBY => [2, 2],
97     SEE => [2, 2],
98     }->{$command};
99     my @value = split /\s+/, $line,
100     ($flag =~ /\*/ ? $info->[1] : $info->[0]) || 1;
101    
102     push @token, {type => 'uri', value => $uriv->($uri)} if defined $uri;
103    
104     my %attr;
105     while ($attr =~ /([A-Za-z0-9_-]+)\s*(?:=>?|:)\s*([^";,]+|"[^"]+")/gc) {
106     my $name = lc $1;
107     my $value = $2;
108     $value =~ tr/"//d;
109     $attr{$name} = $value;
110     }
111     delete $attr{'content-type'};
112    
113     if ({
114     ABBR => 1, ACRONYM => 1,
115     RUBY => 1, RIBU => 1,
116     }->{$command}) {
117     if (@value == 1 and $attr{title}) {
118     push @value, $attr{title};
119     delete $attr{title};
120     }
121     if (@value == 2) {
122     unshift @{$self->{line}},
123     'RB',
124     'DATA {} >>'.$value[1],
125     '/RB',
126     'RT',
127     'DATA {} >>'.$value[0],
128     '/RT',
129     '/'.$command;
130     } else {
131     unshift @{$self->{line}}, 'DATA {} >>'.$value[0], '/'.$command;
132     }
133     } elsif ($command eq 'CITE') {
134     if (@value == 2) {
135     if (defined $uri or $value[0] !~ /^[a-z-]+:/) {
136     unshift @{$self->{line}},
137     'SRC',
138     'DATA {} >>'.$value[0].' '.$value[1],
139     '/SRC', 'BODYTEXT';
140     } else {
141     push @token, {type => 'uri', value => $uriv->($value[0])};
142     unshift @{$self->{line}},
143     'SRC',
144     'DATA {} >>'.$value[1],
145     '/SRC', 'BODYTEXT';
146     }
147     } elsif (@value == 1) {
148     if (defined $uri or $value[0] !~ /^[a-z-]+:/) {
149     unshift @{$self->{line}},
150     'SRC',
151     'DATA {} >>'.$value[0],
152     '/SRC', 'BODYTEXT';
153     } else {
154     push @token, {type => 'uri', value => $uriv->($value[0])};
155     unshift @{$self->{line}}, 'BODYTEXT';
156     }
157     } else {
158     unshift @{$self->{line}}, 'BODYTEXT';
159     }
160     } elsif ($command eq 'DATA') {
161     my @token;
162     $line =~ s/^>>//;
163     while (length $line) {
164     if ($line =~ s/^<([a-z0-9]+)\s*//) {
165     my $tagname = $1;
166     push @token, {type => 'start', value => $tagname};
167 wakaba 1.4 while ($line =~ s/^([a-z-]+)\s*=\s*(?>"([^"]*)"|([^\s">]*))\s*//) {
168     push @token, {type => $1,
169     value => $attrvalue->(defined $2 ? $2 : $3)};
170 wakaba 1.1 }
171     $line =~ s#^/?\s*>##;
172     push @token, {type => 'end', value => $tagname}
173     if $tagname eq 'img' or $tagname eq 'input' or
174     $tagname eq 'br';
175     } elsif ($line =~ s#^</([a-z0-9]+)\s*>##) {
176     push @token, {type => 'end', value => $1};
177     } elsif ($line =~ s/^<!--(.*?)-->//) {
178     push @token, {type => 'html-comment', value => $1};
179     } elsif ($line =~ s/^&([a-z]+);//) {
180     my $name = $1;
181     if ($name eq 'amp') {
182     push @token, {type => 'text', value => '&'};
183     } elsif ($name eq 'lt') {
184     push @token, {type => 'text', value => '<'};
185     } elsif ($name eq 'gt') {
186     push @token, {type => 'text', value => '>'};
187     } elsif ($name eq 'quot') {
188     push @token, {type => 'text', value => '"'};
189     } elsif ($name eq 'reg') {
190     push @token, {type => 'text', value => "\x{00AE}"};
191     } elsif ($name eq 'hearts') {
192     push @token, {type => 'text', value => "\x{2661}"};
193     } else {
194     push @token, {type => 'char', value => $name};
195     }
196     } elsif ($line =~ s/^&#([0-9]+);//) {
197     push @token, {type => 'text', value => ord $1};
198     } elsif ($line =~ s/^&#x([0-9A-Fa-f]+);//) {
199     push @token, {type => 'text', value => ord hex $1};
200     } elsif ($line =~ s/^([^<&]+)//) {
201     push @token, {type => 'text', value => $1};
202     } else {
203     push @token, {type => 'text', value => substr ($line, 0, 1)};
204     substr ($line, 0, 1) = '';
205     }
206     }
207     push @token, {type => 'eol'};
208    
209     $r = shift @token;
210     push @{$self->{token}}, @token;
211     last L;
212     } elsif ({
213     DD => 1, DT => 1,
214     DEL => 1, INS => 1,
215     LI => 1,
216     RB => 1, RT => 1,
217     STRONG => 1,
218     YAMI => 1,
219     EM => 1,
220     HOUR => 1, KION => 1, LUNCH => 1,
221     TAION => 1, TENKI => 1, THEME => 1,
222     T1 => 1, T2 => 1, T3 => 1, T4 => 1,
223     T5 => 1, T6 => 1, T7 => 1, SP => 1,
224     }->{$command}) {
225     if (@value) {
226     unshift @{$self->{line}}, 'DATA {} >>'.$value[0], '/'.$command;
227     }
228     } elsif ($command eq 'DIV') {
229     if (@value) {
230     $r = {type => 'class', value => $value[0]};
231     }
232     } elsif ($command eq 'LDIARY') {
233     $value[0] =~ s/^([0-9]{4})([0-9]{2})([0-9]{2})/$1, $2, $3/;
234     $value[0] =~ s/[is]/, /;
235     $r = {type => 'start', value => 'LINK'};
236     push @token, {type => 'uri', value => "($value[0])"};
237     unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/LINK';
238     } elsif ($command eq 'LIMG') {
239     $r = {type => 'start', value => 'IMG'};
240     push @token, {type => 'uri', value => $uriv->($value[0])};
241     unshift @{$self->{line}}, 'DATA {} >>'.$value[3], '/IMG';
242     } elsif ($command eq 'LMG') {
243     $r = {type => 'start', value => 'IMG'};
244     push @token, {type => 'uri', value => $uriv->($value[0])};
245     unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/IMG';
246     } elsif ($command eq 'LINK') {
247     if (@value == 2) {
248     push @token, {type => 'uri', value => $uriv->($value[0])};
249     unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/LINK';
250     } elsif ($flag =~ /\+/) {
251     push @token, {type => 'uri', value => $uriv->($value[0])};
252     } else {
253     unshift @{$self->{line}}, 'DATA {} >>'.$value[0], '/LINK';
254     }
255     } elsif ($command eq 'NEW') {
256     $r = {type => 'start', value => 'SECTION'};
257     unshift @{$self->{line}}, 'H', 'DATA {} >>'.$value[0], '/H';
258     } elsif ($command eq 'LNEW') {
259     $r = {type => 'start', value => 'SECTION'};
260     push @token, {type => 'uri', value => $uriv->($value[0])};
261     unshift @{$self->{line}}, 'H', 'DATA {} >>'.$value[1], '/H';
262     } elsif ($command eq 'SUB') {
263     $r = {type => 'start', value => 'SUB'};
264     unshift @{$self->{line}}, 'H', 'DATA {} >>'.$value[0], '/H';
265     } elsif ($command eq 'PERSON') {
266     push @token, {type => 'key', value => $attrvalue->($value[0])};
267     unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/PERSON';
268     } elsif ($command eq 'SEE') {
269     if (@value == 2) {
270     push @token, {type => 'key', value => $attrvalue->($value[0])};
271     unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/SEE';
272     } else {
273     unshift @{$self->{line}}, 'DATA {} >>'.$value[0], '/SEE';
274     }
275     } elsif ($command eq 'SPAN') {
276     if (@value == 2) {
277     push @token, {type => 'class',
278     value => $attrvalue->($value[0])};
279     unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/SPAN';
280     } else {
281     unshift @{$self->{line}}, 'DATA {} >>'.$value[0], '/SPAN';
282     }
283     } elsif ($command eq 'OK') {
284     $r = {type => '#EOF'};
285     next L;
286     } elsif ($command eq 'XML') {
287     unshift @{$self->{line}}, 'DATA {} >>XML '.$line;
288     next L;
289     }
290    
291     for (keys %attr) {
292     push @token, {type => $_, value => $attrvalue->($attr{$_})};
293     }
294    
295     push @{$self->{token}}, @token;
296     last L;
297     } elsif ($line eq 'H2H/1.0') {
298     $r = {type => 'magic', value => 'H2H/1.0'};
299     last L;
300     } elsif ($line =~ m#^/([A-Z]+)\s*$#) {
301     $r = {type => 'end', value => $1};
302     last L;
303     } elsif ($line =~ s/^!#//) {
304     $r = {type => 'hnf-comment', value => $line};
305     last L;
306     } elsif ($line =~ s/^!//) {
307     $r = {type => 'html-comment', value => $line};
308     last L;
309     } else {
310     unshift @{$self->{line}}, 'DATA {} >>'.$line;
311     next L;
312     }
313     } # L
314    
315     return $r;
316     } # _shift_token
317    
318     sub _construct_tree ($) {
319     my $self = $_[0];
320    
321     my $doc_el = $self->{doc}->document_element;
322     my $head_el = $self->{doc}->create_element_ns (HTML_NS, 'head');
323     my $body_el = $self->{doc}->create_element_ns (HTML_NS, 'body');
324     $doc_el->append_child ($head_el);
325     $doc_el->append_child ($body_el);
326     $doc_el->set_user_data ('command-name' => '#html');
327     $head_el->set_user_data ('command-name' => '#head');
328     $body_el->set_user_data ('command-name' => '#body');
329     $doc_el->set_attribute_ns (SW09_NS, 'sw9:Name' => 'H2H');
330     $doc_el->set_attribute_ns (SW09_NS, 'sw9:Version' => '0.9');
331    
332     my $parent = {
333     subsection => $body_el,
334     attr => $body_el,
335     text => $body_el,
336     };
337     my $state = 'data';
338     ## data - normal
339     ## list - UL or OL
340     ## br - after br start tag token
341     ## eol - after eol token
342    
343     T: while (my $token = $self->_shift_token) {
344     last T if $token->{type} eq '#EOF';
345    
346     if ($token->{type} eq 'text') {
347     if ($state eq 'list') {
348     my $li_el = $self->{doc}->create_element_ns (HTML_NS, 'li');
349     $li_el->manakai_append_text ($token->{value});
350     $parent->{text}->append_child ($li_el);
351     } else {
352     $parent->{text}->manakai_append_text ("\x0A") if $state eq 'eol';
353     $parent->{text}->manakai_append_text ($token->{value});
354     $state = 'data';
355     }
356     } elsif ($token->{type} eq 'eol') {
357     if ($state eq 'eol') {
358     $parent->{text}->manakai_append_text ("\x0A");
359     } else {
360     $state = $state eq 'br' ? 'data' : 'eol';
361     }
362     } elsif ($token->{type} eq 'start') {
363     my $info = {
364     # nsuri, qname, parent, state
365     ABBR => [HTML_NS, 'abbr', $parent->{text}, 'data'],
366     ACRONYM => [HTML_NS, 'abbr', $parent->{text}, 'data'],
367     BODYTEXT => [HTML3_NS, 'bodytext', $parent->{text}, 'data'],
368     CITE => [HTML_NS, 'blockquote', $parent->{text}, 'data',
369     {PRE => 1}],
370     DD => [HTML_NS, 'dd', $parent->{text}, 'data'],
371     DEL => [HTML_NS, 'del', $parent->{text}, 'data'],
372     DIV => [HTML_NS, 'div', $parent->{text}, 'data', {P => 1}],
373     DL => [HTML_NS, 'dl', $parent->{text}, 'data'],
374     DT => [HTML_NS, 'dt', $parent->{text}, 'data'],
375     EM => [HTML_NS, 'em', $parent->{text}, 'data'],
376     FN => [H2H_NS, 'fn', $parent->{text}, 'data'],
377     H => [XHTML2_NS, 'h', $parent->{text}, 'data'],
378     HOUR => [H2H_NS, 'hour', $head_el, 'data'],
379     IMG => [HTML_NS, 'img', $parent->{text}, 'data'],
380     INS => [HTML_NS, 'ins', $parent->{text}, 'data'],
381     KION => [H2H_NS, 'kion', $head_el, 'data'],
382     LI => [HTML_NS, 'li', $parent->{text}, 'data'],
383     LINK => [HTML_NS, 'a', $parent->{text}, 'data'],
384     LUNCH => [H2H_NS, 'lunch', $head_el, 'data'],
385     OL => [HTML_NS, 'ol', $parent->{text}, 'list', {PRE => 1}],
386     P => [HTML_NS, 'p', $parent->{text}, 'data',
387     {P => 1, PRE => 1}],
388     PERSON => [HTML3_NS, 'person', $parent->{text}, 'data'],
389     PRE => [HTML_NS, 'pre', $parent->{text}, 'data',
390     {P => 1}],
391     RB => [HTML_NS, 'rb', $parent->{text}, 'data'],
392     RIBU => [HTML_NS, 'ruby', $parent->{text}, 'data'],
393     RT => [HTML_NS, 'rt', $parent->{text}, 'data'],
394     RUBY => [HTML_NS, 'ruby', $parent->{text}, 'data'],
395     SECTION => [HTML_NS, 'section', $body_el, 'data'],
396     SEE => [HTML_NS, 'i', $parent->{text}, 'data'],
397     SP => [H2H_NS, 'sp', $head_el, 'data'],
398     SPAN => [HTML_NS, 'span', $parent->{text}, 'data'],
399     SRC => [HTML3_NS, 'credit', $parent->{text}, 'data'],
400     STRONG => [HTML_NS, 'strong', $parent->{text}, 'data'],
401     SUBSECTION => [HTML_NS, 'section', $parent->{subsection}, 'data'],
402     T1 => [H2H_NS, 't1', $head_el, 'data'],
403     T2 => [H2H_NS, 't2', $head_el, 'data'],
404     T3 => [H2H_NS, 't3', $head_el, 'data'],
405     T4 => [H2H_NS, 't4', $head_el, 'data'],
406     T5 => [H2H_NS, 't5', $head_el, 'data'],
407     T6 => [H2H_NS, 't6', $head_el, 'data'],
408     T7 => [H2H_NS, 't7', $head_el, 'data'],
409     TAION => [H2H_NS, 'taion', $head_el, 'data'],
410     TENKI => [H2H_NS, 'tenki', $head_el, 'data'],
411     THEME => [H2H_NS, 'theme', $head_el, 'data'],
412     UL => [HTML_NS, 'ul', $parent->{text}, 'list', {PRE => 1}],
413     YAMI => [H2H_NS, 'yami', $parent->{text}, 'data'],
414     a => [HTML_NS, 'a', $parent->{text}, 'data'],
415     abbr => [HTML_NS, 'abbr', $parent->{text}, 'data'],
416     acronym => [HTML_NS, 'abbr', $parent->{text}, 'data'],
417 wakaba 1.3 b => [HTML_NS, 'b', $parent->{text}, 'data'],
418 wakaba 1.1 blockquote => [HTML_NS, 'blockquote', $parent->{text}, 'data'],
419     br => [HTML_NS, 'br', $parent->{text}, 'br'],
420     caption => [HTML_NS, 'caption', $parent->{text}, 'data'],
421     code => [HTML_NS, 'code', $parent->{text}, 'data'],
422     dd => [HTML_NS, 'dd', $parent->{text}, 'data'],
423     del => [HTML_NS, 'del', $parent->{text}, 'data'],
424     dfn => [HTML_NS, 'dfn', $parent->{text}, 'data'],
425     div => [HTML_NS, 'div', $parent->{text}, 'data'],
426     dl => [HTML_NS, 'dl', $parent->{text}, 'data'],
427     dt => [HTML_NS, 'dt', $parent->{text}, 'data'],
428     em => [HTML_NS, 'em', $parent->{text}, 'data'],
429     form => [HTML_NS, 'form', $parent->{text}, 'data'],
430     h1 => [HTML_NS, 'h1', $parent->{text}, 'data'],
431     h2 => [HTML_NS, 'h2', $parent->{text}, 'data'],
432     h3 => [HTML_NS, 'h3', $parent->{text}, 'data'],
433     h4 => [HTML_NS, 'h4', $parent->{text}, 'data'],
434 wakaba 1.3 i => [HTML_NS, 'i', $parent->{text}, 'data'],
435 wakaba 1.1 img => [HTML_NS, 'img', $parent->{text}, 'data'],
436     input => [HTML_NS, 'input', $parent->{text}, 'data'],
437     ins => [HTML_NS, 'ins', $parent->{text}, 'data'],
438     kbd => [HTML_NS, 'kbd', $parent->{text}, 'data'],
439     label => [HTML_NS, 'label', $parent->{text}, 'data'],
440     li => [HTML_NS, 'li', $parent->{text}, 'data'],
441     ol => [HTML_NS, 'ol', $parent->{text}, 'data'],
442     p => [HTML_NS, 'p', $parent->{text}, 'data'],
443     pre => [HTML_NS, 'pre', $parent->{text}, 'data'],
444     q => [HTML_NS, 'q', $parent->{text}, 'data'],
445     rb => [HTML_NS, 'rb', $parent->{text}, 'data'],
446     rp => [HTML_NS, 'rp', $parent->{text}, 'data'],
447     rt => [HTML_NS, 'rt', $parent->{text}, 'data'],
448     ruby => [HTML_NS, 'ruby', $parent->{text}, 'data'],
449 wakaba 1.2 samp => [HTML_NS, 'samp', $parent->{text}, 'data'],
450 wakaba 1.1 span => [HTML_NS, 'span', $parent->{text}, 'data'],
451     strong => [HTML_NS, 'strong', $parent->{text}, 'data'],
452 wakaba 1.3 sub => [HTML_NS, 'sub', $parent->{text}, 'data'],
453 wakaba 1.1 sup => [HTML_NS, 'sup', $parent->{text}, 'data'],
454     table => [HTML_NS, 'table', $parent->{text}, 'data'],
455     tbody => [HTML_NS, 'tbody', $parent->{text}, 'data'],
456     td => [HTML_NS, 'td', $parent->{text}, 'data'],
457     th => [HTML_NS, 'th', $parent->{text}, 'data'],
458     thead => [HTML_NS, 'thead', $parent->{text}, 'data'],
459     tr => [HTML_NS, 'tr', $parent->{text}, 'data'],
460     ul => [HTML_NS, 'ul', $parent->{text}, 'data'],
461     var => [HTML_NS, 'var', $parent->{text}, 'data'],
462     }->{$token->{value}}
463     || [H2H_NS, $token->{value}, $parent->{text}, 'data'];
464     while ($info->[4]->{$info->[2]->get_user_data ('command-name')}) {
465     $info->[2] = $info->[2]->parent_node;
466     }
467     my $el = $self->{doc}->create_element_ns ($info->[0], $info->[1]);
468     $el->set_user_data ('command-name', $token->{value});
469     $info->[2]->append_child ($el);
470     $parent->{text} = $el;
471     $parent->{attr} = $el;
472     $parent->{subsection} = $el if $token->{value} eq 'SECTION';
473     $state = $info->[3];
474     } elsif ($token->{type} eq 'end') {
475     E: while (my $et = $parent->{text}->get_user_data ('command-name')) {
476     $parent->{text} = $parent->{text}->parent_node;
477     last E if $et eq $token->{value};
478     last E if $et eq '#body';
479     }
480     $parent->{attr} = $parent->{text};
481     $state = {
482     UL => 'list',
483     OL => 'list',
484     }->{$parent->{text}->get_user_data ('command-name')}
485     || $state eq 'br' ? 'br' : 'data';
486     } elsif ($token->{type} eq 'char') {
487     my $el = $self->{doc}->create_element_ns (H2H_NS, 'char');
488     $el->manakai_append_text ($token->{value});
489     $parent->{text}->append_child ($el);
490     $state = 'data' if $state eq 'br';
491     } elsif ($token->{type} eq 'magic') {
492     my ($name, $version) = split m#/#, $token->{value}, 2;
493     $doc_el->set_attribute_ns (SW09_NS, 'sw9:Name', $name);
494     $doc_el->set_attribute_ns (SW09_NS, 'sw9:Version', $version);
495     } elsif ($token->{type} eq 'hnf-comment') {
496     my $com = $self->{doc}->create_element_ns
497     (H2H_NS, 'hnf-comment');
498     $com->text_content ($token->{value});
499     $parent->{text}->append_child ($com);
500     } elsif ($token->{type} eq 'html-comment') {
501     my $com = $self->{doc}->create_element_ns
502     (H2H_NS, 'html-comment');
503     $com->text_content ($token->{value});
504     $parent->{text}->append_child ($com);
505     } elsif ($token->{type} eq 'source') {
506     my $src = $self->{doc}->create_element_ns (HTML3_NS, 'credit');
507     $src->manakai_append_text ($token->{value});
508     $parent->{text}->append_child ($src);
509     } elsif ($token->{type} eq 'uri') {
510     my $v = $token->{value};
511     if ($v =~ /^\(([^()]+)\)$/) {
512     my @v = split /\s*,\s*/, $1;
513     $parent->{attr}->set_attribute_ns
514     (H2H_NS, 'href-year' => $v[0]+0);
515     $parent->{attr}->set_attribute_ns
516     (H2H_NS, 'href-month' => $v[1]+0);
517     $parent->{attr}->set_attribute_ns
518     (H2H_NS, 'href-day' => $v[2]+0);
519     $parent->{attr}->set_attribute_ns
520     (H2H_NS, 'href-section' => $v[3]+0) if $v[3];
521     $parent->{attr}->set_attribute_ns
522     (H2H_NS, 'href-subsection' => $v[4]+0)
523     if $v[4];
524     } else {
525     my $xuri = $parent->{attr}->manakai_expanded_uri;
526     if ($xuri eq HTML_NS . 'a') {
527     $parent->{attr}->set_attribute_ns
528     (undef, href => $token->{value});
529     } elsif ($xuri eq HTML_NS . 'blockquote') {
530     $parent->{attr}->set_attribute_ns
531     (undef, cite => $token->{value});
532     } else {
533     $parent->{attr}->set_attribute_ns
534     (XHTML2_NS, href => $token->{value});
535     }
536     }
537     } elsif ({
538     title => 1, style => 1,
539     class => 1, href => 1, 'accept-charset' => 1,
540     action => 1, method => 1, alt => 1, src => 1,
541     type => 1, value => 1, name => 1, accesskey => 1,
542     for => 1, cite => 1, onclick => 1, colspan => 1,
543     scope => 1, summary => 1,
544     }->{$token->{type}}) {
545     $parent->{attr}->set_attribute_ns
546     (undef, $token->{type}, $token->{value});
547     } elsif ($token->{type} eq 'cat') {
548     for (split /\s*,\s*/, $token->{value}) {
549     my $el = $self->{doc}->create_element_ns (H2H_NS, 'cat');
550     $el->manakai_append_text ($_);
551     $parent->{attr}->append_child ($el);
552     }
553     } elsif ($token->{type} eq 'lang' or $token->{type} eq 'xml:lang') {
554     $parent->{attr}->set_attribute_ns
555     (q<http://www.w3.org/XML/1998/namespace>,
556     'xml:lang' => $token->{value});
557     } elsif ($token->{type} eq 'id') {
558     $parent->{attr}->set_attribute_ns
559     (q<http://www.w3.org/XML/1998/namespace>,
560     'xml:id' => $token->{value});
561     } elsif ($token->{type} eq 'wbradded') {
562     # ignore
563     } else {
564     # key, level, place, position, time
565     $parent->{attr}->set_attribute_ns
566     (H2H_NS, $token->{type}, $token->{value});
567     }
568     }
569     } # _construct_tree
570    
571     1;
572 wakaba 1.4 ## $Date: 2007/08/05 09:24:56 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24