/[suikacvs]/markup/html/whatpm/Whatpm/H2H.pm
Suika

Contents of /markup/html/whatpm/Whatpm/H2H.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (hide annotations) (download)
Sun Aug 5 09:24:56 2007 UTC (17 years, 3 months ago) by wakaba
Branch: MAIN
Changes since 1.2: +4 -1 lines
++ whatpm/Whatpm/ChangeLog	5 Aug 2007 09:23:31 -0000
	* H2H.pm: |b|, |i|, and |sub| are added to the
	list of allowed HTML elements.

2007-08-05  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ContentChecker/ChangeLog	5 Aug 2007 09:24:54 -0000
	* Atom.pm: |link|-related checks are added; |type|
	media type checks are added.  |hreflang| now warns
	as unimplemented.

2007-08-05  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::H2H;
2     use strict;
3    
4     sub H2H_NS () { q<http://suika.fam.cx/~wakaba/archive/2005/manakai/Markup/H2H/> }
5     sub HTML_NS () { q<http://www.w3.org/1999/xhtml> }
6     sub HTML3_NS () { q<urn:x-suika-fam-cx:markup:ietf:html:3:draft:00:> }
7     sub SW09_NS () { q<urn:x-suika-fam-cx:markup:suikawiki:0:9:> }
8     sub XHTML2_NS () { q<http://www.w3.org/2002/06/xhtml2/> }
9    
10     sub parse_string ($$$) {
11     my $self = bless {
12     token => [],
13     location => {},
14     doc => $_[2],
15     }, $_[0];
16    
17     my $s = ''.$_[1];
18     $s =~ s/\x0D\x0A/\x0A/g;
19     $s =~ tr/\x0D/\x0A/;
20     $self->{line} = [split /\x0A/, $s];
21    
22     local $Error::Depth = $Error::Depth + 1;
23     $self->{doc}->strict_error_checking (0);
24     my $doc_el = $self->{doc}->create_element_ns (HTML_NS, 'html');
25     $doc_el->set_attribute_ns (q<http://www.w3.org/2000/xmlns/>, 'xmlns', HTML_NS);
26     $self->{doc}->append_child ($doc_el);
27    
28     $self->_construct_tree;
29    
30     return $self->{doc};
31     } # parse_string
32    
33     sub _shift_token ($) {
34     my $self = $_[0];
35    
36     if (@{$self->{token}}) {
37     return shift @{$self->{token}};
38     }
39    
40     my $attrvalue = sub {
41     my $v = shift;
42     $v =~ s/&quot;/"/g;
43     $v =~ s/&lt;/</g;
44     $v =~ s/&gt;/>/g;
45     $v =~ s/&reg;/\x{00AE}/g;
46     $v =~ s/&hearts;/\x{2661}/g;
47     $v =~ s/&amp;/&/g;
48     return $v;
49     };
50    
51     my $uriv = sub {
52     my $v = $attrvalue->(shift);
53     $v =~ s/^\{/(/;
54     $v =~ s/\}$/)/;
55     $v =~ s/^\#([0-9si]+)$/($1)/;
56     $v =~ s/^\(([0-9]{4})([0-9]{2})([0-9]{2})([^)]*)\)$/($1, $2, $3$4)/;
57     $v =~ s/[si]/, /g if $v =~ /^\(/ and $v =~ /\)$/;
58     return $v;
59     };
60    
61     my $r = {type => '#EOF'};
62     L: while (defined (my $line = shift @{$self->{line}})) {
63     if ($line =~ s/^([A-Z]+|T[0-9])(\*?\+?\*?)(?:\s+|$)//) {
64     my $command = $1;
65     my $flag = $2;
66     $r = {type => 'start', value => $command};
67    
68     my $uri;
69     if ($flag =~ /\*/ and $line =~ s/^([^{\s]\S*)\s*//) {
70     $uri = $1;
71     }
72    
73     my $attr = '';
74     if ($line =~ s/^\{(\s*(?:[A-Za-z][^{}]*)?)\}\s*//) {
75     $attr = $1;
76     }
77    
78     if (not defined $uri and
79     $flag =~ /\*/ and $line =~ s/^([^{\s]\S*)\s*//) {
80     $uri = $1;
81     }
82    
83     my @token;
84     my $info = {
85     # val# val#(*)
86     ABBR => [2, 2],
87     ACRONYM => [2, 2],
88     CITE => [2, 1],
89     LDIARY => [4, 4],
90     LIMG => [4, 4],
91     LINK => [2, 1],
92     LMG => [2, 2],
93     LNEW => [2, 2],
94     PERSON => [2, 2],
95     RIBU => [2, 2],
96     RUBY => [2, 2],
97     SEE => [2, 2],
98     }->{$command};
99     my @value = split /\s+/, $line,
100     ($flag =~ /\*/ ? $info->[1] : $info->[0]) || 1;
101    
102     push @token, {type => 'uri', value => $uriv->($uri)} if defined $uri;
103    
104     my %attr;
105     while ($attr =~ /([A-Za-z0-9_-]+)\s*(?:=>?|:)\s*([^";,]+|"[^"]+")/gc) {
106     my $name = lc $1;
107     my $value = $2;
108     $value =~ tr/"//d;
109     $attr{$name} = $value;
110     }
111     delete $attr{'content-type'};
112    
113     if ({
114     ABBR => 1, ACRONYM => 1,
115     RUBY => 1, RIBU => 1,
116     }->{$command}) {
117     if (@value == 1 and $attr{title}) {
118     push @value, $attr{title};
119     delete $attr{title};
120     }
121     if (@value == 2) {
122     unshift @{$self->{line}},
123     'RB',
124     'DATA {} >>'.$value[1],
125     '/RB',
126     'RT',
127     'DATA {} >>'.$value[0],
128     '/RT',
129     '/'.$command;
130     } else {
131     unshift @{$self->{line}}, 'DATA {} >>'.$value[0], '/'.$command;
132     }
133     } elsif ($command eq 'CITE') {
134     if (@value == 2) {
135     if (defined $uri or $value[0] !~ /^[a-z-]+:/) {
136     unshift @{$self->{line}},
137     'SRC',
138     'DATA {} >>'.$value[0].' '.$value[1],
139     '/SRC', 'BODYTEXT';
140     } else {
141     push @token, {type => 'uri', value => $uriv->($value[0])};
142     unshift @{$self->{line}},
143     'SRC',
144     'DATA {} >>'.$value[1],
145     '/SRC', 'BODYTEXT';
146     }
147     } elsif (@value == 1) {
148     if (defined $uri or $value[0] !~ /^[a-z-]+:/) {
149     unshift @{$self->{line}},
150     'SRC',
151     'DATA {} >>'.$value[0],
152     '/SRC', 'BODYTEXT';
153     } else {
154     push @token, {type => 'uri', value => $uriv->($value[0])};
155     unshift @{$self->{line}}, 'BODYTEXT';
156     }
157     } else {
158     unshift @{$self->{line}}, 'BODYTEXT';
159     }
160     } elsif ($command eq 'DATA') {
161     my @token;
162     $line =~ s/^>>//;
163     while (length $line) {
164     if ($line =~ s/^<([a-z0-9]+)\s*//) {
165     my $tagname = $1;
166     push @token, {type => 'start', value => $tagname};
167     while ($line =~ s/^([a-z-]+)\s*=\s*"([^"]*)"\s*//) {
168     push @token, {type => $1, value => $attrvalue->($2)};
169     }
170     $line =~ s#^/?\s*>##;
171     push @token, {type => 'end', value => $tagname}
172     if $tagname eq 'img' or $tagname eq 'input' or
173     $tagname eq 'br';
174     } elsif ($line =~ s#^</([a-z0-9]+)\s*>##) {
175     push @token, {type => 'end', value => $1};
176     } elsif ($line =~ s/^<!--(.*?)-->//) {
177     push @token, {type => 'html-comment', value => $1};
178     } elsif ($line =~ s/^&([a-z]+);//) {
179     my $name = $1;
180     if ($name eq 'amp') {
181     push @token, {type => 'text', value => '&'};
182     } elsif ($name eq 'lt') {
183     push @token, {type => 'text', value => '<'};
184     } elsif ($name eq 'gt') {
185     push @token, {type => 'text', value => '>'};
186     } elsif ($name eq 'quot') {
187     push @token, {type => 'text', value => '"'};
188     } elsif ($name eq 'reg') {
189     push @token, {type => 'text', value => "\x{00AE}"};
190     } elsif ($name eq 'hearts') {
191     push @token, {type => 'text', value => "\x{2661}"};
192     } else {
193     push @token, {type => 'char', value => $name};
194     }
195     } elsif ($line =~ s/^&#([0-9]+);//) {
196     push @token, {type => 'text', value => ord $1};
197     } elsif ($line =~ s/^&#x([0-9A-Fa-f]+);//) {
198     push @token, {type => 'text', value => ord hex $1};
199     } elsif ($line =~ s/^([^<&]+)//) {
200     push @token, {type => 'text', value => $1};
201     } else {
202     push @token, {type => 'text', value => substr ($line, 0, 1)};
203     substr ($line, 0, 1) = '';
204     }
205     }
206     push @token, {type => 'eol'};
207    
208     $r = shift @token;
209     push @{$self->{token}}, @token;
210     last L;
211     } elsif ({
212     DD => 1, DT => 1,
213     DEL => 1, INS => 1,
214     LI => 1,
215     RB => 1, RT => 1,
216     STRONG => 1,
217     YAMI => 1,
218     EM => 1,
219     HOUR => 1, KION => 1, LUNCH => 1,
220     TAION => 1, TENKI => 1, THEME => 1,
221     T1 => 1, T2 => 1, T3 => 1, T4 => 1,
222     T5 => 1, T6 => 1, T7 => 1, SP => 1,
223     }->{$command}) {
224     if (@value) {
225     unshift @{$self->{line}}, 'DATA {} >>'.$value[0], '/'.$command;
226     }
227     } elsif ($command eq 'DIV') {
228     if (@value) {
229     $r = {type => 'class', value => $value[0]};
230     }
231     } elsif ($command eq 'LDIARY') {
232     $value[0] =~ s/^([0-9]{4})([0-9]{2})([0-9]{2})/$1, $2, $3/;
233     $value[0] =~ s/[is]/, /;
234     $r = {type => 'start', value => 'LINK'};
235     push @token, {type => 'uri', value => "($value[0])"};
236     unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/LINK';
237     } elsif ($command eq 'LIMG') {
238     $r = {type => 'start', value => 'IMG'};
239     push @token, {type => 'uri', value => $uriv->($value[0])};
240     unshift @{$self->{line}}, 'DATA {} >>'.$value[3], '/IMG';
241     } elsif ($command eq 'LMG') {
242     $r = {type => 'start', value => 'IMG'};
243     push @token, {type => 'uri', value => $uriv->($value[0])};
244     unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/IMG';
245     } elsif ($command eq 'LINK') {
246     if (@value == 2) {
247     push @token, {type => 'uri', value => $uriv->($value[0])};
248     unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/LINK';
249     } elsif ($flag =~ /\+/) {
250     push @token, {type => 'uri', value => $uriv->($value[0])};
251     } else {
252     unshift @{$self->{line}}, 'DATA {} >>'.$value[0], '/LINK';
253     }
254     } elsif ($command eq 'NEW') {
255     $r = {type => 'start', value => 'SECTION'};
256     unshift @{$self->{line}}, 'H', 'DATA {} >>'.$value[0], '/H';
257     } elsif ($command eq 'LNEW') {
258     $r = {type => 'start', value => 'SECTION'};
259     push @token, {type => 'uri', value => $uriv->($value[0])};
260     unshift @{$self->{line}}, 'H', 'DATA {} >>'.$value[1], '/H';
261     } elsif ($command eq 'SUB') {
262     $r = {type => 'start', value => 'SUB'};
263     unshift @{$self->{line}}, 'H', 'DATA {} >>'.$value[0], '/H';
264     } elsif ($command eq 'PERSON') {
265     push @token, {type => 'key', value => $attrvalue->($value[0])};
266     unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/PERSON';
267     } elsif ($command eq 'SEE') {
268     if (@value == 2) {
269     push @token, {type => 'key', value => $attrvalue->($value[0])};
270     unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/SEE';
271     } else {
272     unshift @{$self->{line}}, 'DATA {} >>'.$value[0], '/SEE';
273     }
274     } elsif ($command eq 'SPAN') {
275     if (@value == 2) {
276     push @token, {type => 'class',
277     value => $attrvalue->($value[0])};
278     unshift @{$self->{line}}, 'DATA {} >>'.$value[1], '/SPAN';
279     } else {
280     unshift @{$self->{line}}, 'DATA {} >>'.$value[0], '/SPAN';
281     }
282     } elsif ($command eq 'OK') {
283     $r = {type => '#EOF'};
284     next L;
285     } elsif ($command eq 'XML') {
286     unshift @{$self->{line}}, 'DATA {} >>XML '.$line;
287     next L;
288     }
289    
290     for (keys %attr) {
291     push @token, {type => $_, value => $attrvalue->($attr{$_})};
292     }
293    
294     push @{$self->{token}}, @token;
295     last L;
296     } elsif ($line eq 'H2H/1.0') {
297     $r = {type => 'magic', value => 'H2H/1.0'};
298     last L;
299     } elsif ($line =~ m#^/([A-Z]+)\s*$#) {
300     $r = {type => 'end', value => $1};
301     last L;
302     } elsif ($line =~ s/^!#//) {
303     $r = {type => 'hnf-comment', value => $line};
304     last L;
305     } elsif ($line =~ s/^!//) {
306     $r = {type => 'html-comment', value => $line};
307     last L;
308     } else {
309     unshift @{$self->{line}}, 'DATA {} >>'.$line;
310     next L;
311     }
312     } # L
313    
314     return $r;
315     } # _shift_token
316    
317     sub _construct_tree ($) {
318     my $self = $_[0];
319    
320     my $doc_el = $self->{doc}->document_element;
321     my $head_el = $self->{doc}->create_element_ns (HTML_NS, 'head');
322     my $body_el = $self->{doc}->create_element_ns (HTML_NS, 'body');
323     $doc_el->append_child ($head_el);
324     $doc_el->append_child ($body_el);
325     $doc_el->set_user_data ('command-name' => '#html');
326     $head_el->set_user_data ('command-name' => '#head');
327     $body_el->set_user_data ('command-name' => '#body');
328     $doc_el->set_attribute_ns (SW09_NS, 'sw9:Name' => 'H2H');
329     $doc_el->set_attribute_ns (SW09_NS, 'sw9:Version' => '0.9');
330    
331     my $parent = {
332     subsection => $body_el,
333     attr => $body_el,
334     text => $body_el,
335     };
336     my $state = 'data';
337     ## data - normal
338     ## list - UL or OL
339     ## br - after br start tag token
340     ## eol - after eol token
341    
342     T: while (my $token = $self->_shift_token) {
343     last T if $token->{type} eq '#EOF';
344    
345     if ($token->{type} eq 'text') {
346     if ($state eq 'list') {
347     my $li_el = $self->{doc}->create_element_ns (HTML_NS, 'li');
348     $li_el->manakai_append_text ($token->{value});
349     $parent->{text}->append_child ($li_el);
350     } else {
351     $parent->{text}->manakai_append_text ("\x0A") if $state eq 'eol';
352     $parent->{text}->manakai_append_text ($token->{value});
353     $state = 'data';
354     }
355     } elsif ($token->{type} eq 'eol') {
356     if ($state eq 'eol') {
357     $parent->{text}->manakai_append_text ("\x0A");
358     } else {
359     $state = $state eq 'br' ? 'data' : 'eol';
360     }
361     } elsif ($token->{type} eq 'start') {
362     my $info = {
363     # nsuri, qname, parent, state
364     ABBR => [HTML_NS, 'abbr', $parent->{text}, 'data'],
365     ACRONYM => [HTML_NS, 'abbr', $parent->{text}, 'data'],
366     BODYTEXT => [HTML3_NS, 'bodytext', $parent->{text}, 'data'],
367     CITE => [HTML_NS, 'blockquote', $parent->{text}, 'data',
368     {PRE => 1}],
369     DD => [HTML_NS, 'dd', $parent->{text}, 'data'],
370     DEL => [HTML_NS, 'del', $parent->{text}, 'data'],
371     DIV => [HTML_NS, 'div', $parent->{text}, 'data', {P => 1}],
372     DL => [HTML_NS, 'dl', $parent->{text}, 'data'],
373     DT => [HTML_NS, 'dt', $parent->{text}, 'data'],
374     EM => [HTML_NS, 'em', $parent->{text}, 'data'],
375     FN => [H2H_NS, 'fn', $parent->{text}, 'data'],
376     H => [XHTML2_NS, 'h', $parent->{text}, 'data'],
377     HOUR => [H2H_NS, 'hour', $head_el, 'data'],
378     IMG => [HTML_NS, 'img', $parent->{text}, 'data'],
379     INS => [HTML_NS, 'ins', $parent->{text}, 'data'],
380     KION => [H2H_NS, 'kion', $head_el, 'data'],
381     LI => [HTML_NS, 'li', $parent->{text}, 'data'],
382     LINK => [HTML_NS, 'a', $parent->{text}, 'data'],
383     LUNCH => [H2H_NS, 'lunch', $head_el, 'data'],
384     OL => [HTML_NS, 'ol', $parent->{text}, 'list', {PRE => 1}],
385     P => [HTML_NS, 'p', $parent->{text}, 'data',
386     {P => 1, PRE => 1}],
387     PERSON => [HTML3_NS, 'person', $parent->{text}, 'data'],
388     PRE => [HTML_NS, 'pre', $parent->{text}, 'data',
389     {P => 1}],
390     RB => [HTML_NS, 'rb', $parent->{text}, 'data'],
391     RIBU => [HTML_NS, 'ruby', $parent->{text}, 'data'],
392     RT => [HTML_NS, 'rt', $parent->{text}, 'data'],
393     RUBY => [HTML_NS, 'ruby', $parent->{text}, 'data'],
394     SECTION => [HTML_NS, 'section', $body_el, 'data'],
395     SEE => [HTML_NS, 'i', $parent->{text}, 'data'],
396     SP => [H2H_NS, 'sp', $head_el, 'data'],
397     SPAN => [HTML_NS, 'span', $parent->{text}, 'data'],
398     SRC => [HTML3_NS, 'credit', $parent->{text}, 'data'],
399     STRONG => [HTML_NS, 'strong', $parent->{text}, 'data'],
400     SUBSECTION => [HTML_NS, 'section', $parent->{subsection}, 'data'],
401     T1 => [H2H_NS, 't1', $head_el, 'data'],
402     T2 => [H2H_NS, 't2', $head_el, 'data'],
403     T3 => [H2H_NS, 't3', $head_el, 'data'],
404     T4 => [H2H_NS, 't4', $head_el, 'data'],
405     T5 => [H2H_NS, 't5', $head_el, 'data'],
406     T6 => [H2H_NS, 't6', $head_el, 'data'],
407     T7 => [H2H_NS, 't7', $head_el, 'data'],
408     TAION => [H2H_NS, 'taion', $head_el, 'data'],
409     TENKI => [H2H_NS, 'tenki', $head_el, 'data'],
410     THEME => [H2H_NS, 'theme', $head_el, 'data'],
411     UL => [HTML_NS, 'ul', $parent->{text}, 'list', {PRE => 1}],
412     YAMI => [H2H_NS, 'yami', $parent->{text}, 'data'],
413     a => [HTML_NS, 'a', $parent->{text}, 'data'],
414     abbr => [HTML_NS, 'abbr', $parent->{text}, 'data'],
415     acronym => [HTML_NS, 'abbr', $parent->{text}, 'data'],
416 wakaba 1.3 b => [HTML_NS, 'b', $parent->{text}, 'data'],
417 wakaba 1.1 blockquote => [HTML_NS, 'blockquote', $parent->{text}, 'data'],
418     br => [HTML_NS, 'br', $parent->{text}, 'br'],
419     caption => [HTML_NS, 'caption', $parent->{text}, 'data'],
420     code => [HTML_NS, 'code', $parent->{text}, 'data'],
421     dd => [HTML_NS, 'dd', $parent->{text}, 'data'],
422     del => [HTML_NS, 'del', $parent->{text}, 'data'],
423     dfn => [HTML_NS, 'dfn', $parent->{text}, 'data'],
424     div => [HTML_NS, 'div', $parent->{text}, 'data'],
425     dl => [HTML_NS, 'dl', $parent->{text}, 'data'],
426     dt => [HTML_NS, 'dt', $parent->{text}, 'data'],
427     em => [HTML_NS, 'em', $parent->{text}, 'data'],
428     form => [HTML_NS, 'form', $parent->{text}, 'data'],
429     h1 => [HTML_NS, 'h1', $parent->{text}, 'data'],
430     h2 => [HTML_NS, 'h2', $parent->{text}, 'data'],
431     h3 => [HTML_NS, 'h3', $parent->{text}, 'data'],
432     h4 => [HTML_NS, 'h4', $parent->{text}, 'data'],
433 wakaba 1.3 i => [HTML_NS, 'i', $parent->{text}, 'data'],
434 wakaba 1.1 img => [HTML_NS, 'img', $parent->{text}, 'data'],
435     input => [HTML_NS, 'input', $parent->{text}, 'data'],
436     ins => [HTML_NS, 'ins', $parent->{text}, 'data'],
437     kbd => [HTML_NS, 'kbd', $parent->{text}, 'data'],
438     label => [HTML_NS, 'label', $parent->{text}, 'data'],
439     li => [HTML_NS, 'li', $parent->{text}, 'data'],
440     ol => [HTML_NS, 'ol', $parent->{text}, 'data'],
441     p => [HTML_NS, 'p', $parent->{text}, 'data'],
442     pre => [HTML_NS, 'pre', $parent->{text}, 'data'],
443     q => [HTML_NS, 'q', $parent->{text}, 'data'],
444     rb => [HTML_NS, 'rb', $parent->{text}, 'data'],
445     rp => [HTML_NS, 'rp', $parent->{text}, 'data'],
446     rt => [HTML_NS, 'rt', $parent->{text}, 'data'],
447     ruby => [HTML_NS, 'ruby', $parent->{text}, 'data'],
448 wakaba 1.2 samp => [HTML_NS, 'samp', $parent->{text}, 'data'],
449 wakaba 1.1 span => [HTML_NS, 'span', $parent->{text}, 'data'],
450     strong => [HTML_NS, 'strong', $parent->{text}, 'data'],
451 wakaba 1.3 sub => [HTML_NS, 'sub', $parent->{text}, 'data'],
452 wakaba 1.1 sup => [HTML_NS, 'sup', $parent->{text}, 'data'],
453     table => [HTML_NS, 'table', $parent->{text}, 'data'],
454     tbody => [HTML_NS, 'tbody', $parent->{text}, 'data'],
455     td => [HTML_NS, 'td', $parent->{text}, 'data'],
456     th => [HTML_NS, 'th', $parent->{text}, 'data'],
457     thead => [HTML_NS, 'thead', $parent->{text}, 'data'],
458     tr => [HTML_NS, 'tr', $parent->{text}, 'data'],
459     ul => [HTML_NS, 'ul', $parent->{text}, 'data'],
460     var => [HTML_NS, 'var', $parent->{text}, 'data'],
461     }->{$token->{value}}
462     || [H2H_NS, $token->{value}, $parent->{text}, 'data'];
463     while ($info->[4]->{$info->[2]->get_user_data ('command-name')}) {
464     $info->[2] = $info->[2]->parent_node;
465     }
466     my $el = $self->{doc}->create_element_ns ($info->[0], $info->[1]);
467     $el->set_user_data ('command-name', $token->{value});
468     $info->[2]->append_child ($el);
469     $parent->{text} = $el;
470     $parent->{attr} = $el;
471     $parent->{subsection} = $el if $token->{value} eq 'SECTION';
472     $state = $info->[3];
473     } elsif ($token->{type} eq 'end') {
474     E: while (my $et = $parent->{text}->get_user_data ('command-name')) {
475     $parent->{text} = $parent->{text}->parent_node;
476     last E if $et eq $token->{value};
477     last E if $et eq '#body';
478     }
479     $parent->{attr} = $parent->{text};
480     $state = {
481     UL => 'list',
482     OL => 'list',
483     }->{$parent->{text}->get_user_data ('command-name')}
484     || $state eq 'br' ? 'br' : 'data';
485     } elsif ($token->{type} eq 'char') {
486     my $el = $self->{doc}->create_element_ns (H2H_NS, 'char');
487     $el->manakai_append_text ($token->{value});
488     $parent->{text}->append_child ($el);
489     $state = 'data' if $state eq 'br';
490     } elsif ($token->{type} eq 'magic') {
491     my ($name, $version) = split m#/#, $token->{value}, 2;
492     $doc_el->set_attribute_ns (SW09_NS, 'sw9:Name', $name);
493     $doc_el->set_attribute_ns (SW09_NS, 'sw9:Version', $version);
494     } elsif ($token->{type} eq 'hnf-comment') {
495     my $com = $self->{doc}->create_element_ns
496     (H2H_NS, 'hnf-comment');
497     $com->text_content ($token->{value});
498     $parent->{text}->append_child ($com);
499     } elsif ($token->{type} eq 'html-comment') {
500     my $com = $self->{doc}->create_element_ns
501     (H2H_NS, 'html-comment');
502     $com->text_content ($token->{value});
503     $parent->{text}->append_child ($com);
504     } elsif ($token->{type} eq 'source') {
505     my $src = $self->{doc}->create_element_ns (HTML3_NS, 'credit');
506     $src->manakai_append_text ($token->{value});
507     $parent->{text}->append_child ($src);
508     } elsif ($token->{type} eq 'uri') {
509     my $v = $token->{value};
510     if ($v =~ /^\(([^()]+)\)$/) {
511     my @v = split /\s*,\s*/, $1;
512     $parent->{attr}->set_attribute_ns
513     (H2H_NS, 'href-year' => $v[0]+0);
514     $parent->{attr}->set_attribute_ns
515     (H2H_NS, 'href-month' => $v[1]+0);
516     $parent->{attr}->set_attribute_ns
517     (H2H_NS, 'href-day' => $v[2]+0);
518     $parent->{attr}->set_attribute_ns
519     (H2H_NS, 'href-section' => $v[3]+0) if $v[3];
520     $parent->{attr}->set_attribute_ns
521     (H2H_NS, 'href-subsection' => $v[4]+0)
522     if $v[4];
523     } else {
524     my $xuri = $parent->{attr}->manakai_expanded_uri;
525     if ($xuri eq HTML_NS . 'a') {
526     $parent->{attr}->set_attribute_ns
527     (undef, href => $token->{value});
528     } elsif ($xuri eq HTML_NS . 'blockquote') {
529     $parent->{attr}->set_attribute_ns
530     (undef, cite => $token->{value});
531     } else {
532     $parent->{attr}->set_attribute_ns
533     (XHTML2_NS, href => $token->{value});
534     }
535     }
536     } elsif ({
537     title => 1, style => 1,
538     class => 1, href => 1, 'accept-charset' => 1,
539     action => 1, method => 1, alt => 1, src => 1,
540     type => 1, value => 1, name => 1, accesskey => 1,
541     for => 1, cite => 1, onclick => 1, colspan => 1,
542     scope => 1, summary => 1,
543     }->{$token->{type}}) {
544     $parent->{attr}->set_attribute_ns
545     (undef, $token->{type}, $token->{value});
546     } elsif ($token->{type} eq 'cat') {
547     for (split /\s*,\s*/, $token->{value}) {
548     my $el = $self->{doc}->create_element_ns (H2H_NS, 'cat');
549     $el->manakai_append_text ($_);
550     $parent->{attr}->append_child ($el);
551     }
552     } elsif ($token->{type} eq 'lang' or $token->{type} eq 'xml:lang') {
553     $parent->{attr}->set_attribute_ns
554     (q<http://www.w3.org/XML/1998/namespace>,
555     'xml:lang' => $token->{value});
556     } elsif ($token->{type} eq 'id') {
557     $parent->{attr}->set_attribute_ns
558     (q<http://www.w3.org/XML/1998/namespace>,
559     'xml:id' => $token->{value});
560     } elsif ($token->{type} eq 'wbradded') {
561     # ignore
562     } else {
563     # key, level, place, position, time
564     $parent->{attr}->set_attribute_ns
565     (H2H_NS, $token->{type}, $token->{value});
566     }
567     }
568     } # _construct_tree
569    
570     1;
571 wakaba 1.3 ## $Date: 2007/08/05 07:12:45 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24