1 |
package Whatpm::HTML::Serializer; |
2 |
use strict; |
3 |
our $VERSION=do{my @r=(q$Revision: 1.5 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r}; |
4 |
|
5 |
sub get_inner_html ($$$) { |
6 |
my (undef, $node, $on_error) = @_; |
7 |
|
8 |
## Step 1 |
9 |
my $s = ''; |
10 |
|
11 |
my $in_cdata; |
12 |
my $parent = $node; |
13 |
while (defined $parent) { |
14 |
if ($parent->node_type == 1 and |
15 |
$parent->namespace_uri eq 'http://www.w3.org/1999/xhtml' and |
16 |
{ |
17 |
style => 1, script => 1, xmp => 1, iframe => 1, |
18 |
noembed => 1, noframes => 1, noscript => 1, |
19 |
}->{$parent->local_name}) { ## TODO: case thingy |
20 |
$in_cdata = 1; |
21 |
} |
22 |
$parent = $parent->parent_node; |
23 |
} |
24 |
|
25 |
## Step 2 |
26 |
my @node = @{$node->child_nodes}; |
27 |
C: while (@node) { |
28 |
my $child = shift @node; |
29 |
unless (ref $child) { |
30 |
if ($child eq 'cdata-out') { |
31 |
$in_cdata = 0; |
32 |
} else { |
33 |
$s .= $child; # end tag |
34 |
} |
35 |
next C; |
36 |
} |
37 |
|
38 |
my $nt = $child->node_type; |
39 |
if ($nt == 1) { # Element |
40 |
my $tag_name = $child->tag_name; ## TODO: manakai_tag_name |
41 |
$s .= '<' . $tag_name; |
42 |
## NOTE: Non-HTML case: |
43 |
## <http://permalink.gmane.org/gmane.org.w3c.whatwg.discuss/11191> |
44 |
|
45 |
my @attrs = @{$child->attributes}; # sort order MUST be stable |
46 |
for my $attr (@attrs) { # order is implementation dependent |
47 |
my $attr_name = $attr->name; ## TODO: manakai_name |
48 |
$s .= ' ' . $attr_name . '="'; |
49 |
my $attr_value = $attr->value; |
50 |
## escape |
51 |
$attr_value =~ s/&/&/g; |
52 |
$attr_value =~ s/</</g; |
53 |
$attr_value =~ s/>/>/g; |
54 |
$attr_value =~ s/"/"/g; # in attribute mode |
55 |
$attr_value =~ s/\xA0/ /g; |
56 |
$s .= $attr_value . '"'; |
57 |
} |
58 |
$s .= '>'; |
59 |
|
60 |
next C if { |
61 |
area => 1, base => 1, basefont => 1, bgsound => 1, |
62 |
br => 1, col => 1, embed => 1, frame => 1, hr => 1, |
63 |
img => 1, input => 1, link => 1, meta => 1, param => 1, |
64 |
spacer => 1, wbr => 1, keygen => 1, |
65 |
}->{$tag_name}; |
66 |
|
67 |
$s .= "\x0A" if {pre => 1, textarea => 1, listing => 1}->{$tag_name}; |
68 |
|
69 |
if (not $in_cdata and { |
70 |
style => 1, script => 1, xmp => 1, iframe => 1, |
71 |
noembed => 1, noframes => 1, noscript => 1, |
72 |
plaintext => 1, |
73 |
}->{$tag_name}) { |
74 |
unshift @node, 'cdata-out'; |
75 |
$in_cdata = 1; |
76 |
} |
77 |
|
78 |
unshift @node, @{$child->child_nodes}, '</' . $tag_name . '>'; |
79 |
} elsif ($nt == 3 or $nt == 4) { |
80 |
if ($in_cdata) { |
81 |
$s .= $child->data; |
82 |
} else { |
83 |
my $value = $child->data; |
84 |
$value =~ s/&/&/g; |
85 |
$value =~ s/</</g; |
86 |
$value =~ s/>/>/g; |
87 |
#$value =~ s/"/"/g; |
88 |
$value =~ s/\xA0/ /g; |
89 |
$s .= $value; |
90 |
} |
91 |
} elsif ($nt == 8) { |
92 |
$s .= '<!--' . $child->data . '-->'; |
93 |
} elsif ($nt == 10) { |
94 |
$s .= '<!DOCTYPE ' . $child->name . '>'; |
95 |
} elsif ($nt == 5) { # entrefs |
96 |
push @node, @{$child->child_nodes}; |
97 |
} elsif ($nt == 7) { # PIs |
98 |
$s .= '<?' . $child->target . ' ' . $child->data . '>'; |
99 |
} else { |
100 |
$on_error->($child) if defined $on_error; |
101 |
} |
102 |
} # C |
103 |
|
104 |
## Step 3 |
105 |
return \$s; |
106 |
} # get_inner_html |
107 |
|
108 |
=head1 LICENSE |
109 |
|
110 |
Copyright 2007-2008 Wakaba <w@suika.fam.cx> |
111 |
|
112 |
This library is free software; you can redistribute it |
113 |
and/or modify it under the same terms as Perl itself. |
114 |
|
115 |
=cut |
116 |
|
117 |
1; |
118 |
## $Date: 2008/05/24 12:04:21 $ |