1 |
wakaba |
1.1 |
package Whatpm::HTML::Serializer; |
2 |
|
|
use strict; |
3 |
|
|
our $VERSION=do{my @r=(q$Revision: 1.1 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r}; |
4 |
|
|
|
5 |
|
|
sub get_inner_html ($$$) { |
6 |
|
|
my (undef, $node, $on_error) = @_; |
7 |
|
|
|
8 |
|
|
## Step 1 |
9 |
|
|
my $s = ''; |
10 |
|
|
|
11 |
|
|
my $in_cdata; |
12 |
|
|
my $parent = $node; |
13 |
|
|
while (defined $parent) { |
14 |
|
|
if ($parent->node_type == 1 and |
15 |
|
|
$parent->namespace_uri eq 'http://www.w3.org/1999/xhtml' and |
16 |
|
|
{ |
17 |
|
|
style => 1, script => 1, xmp => 1, iframe => 1, |
18 |
|
|
noembed => 1, noframes => 1, noscript => 1, |
19 |
|
|
}->{$parent->local_name}) { ## TODO: case thingy |
20 |
|
|
$in_cdata = 1; |
21 |
|
|
} |
22 |
|
|
$parent = $parent->parent_node; |
23 |
|
|
} |
24 |
|
|
|
25 |
|
|
## Step 2 |
26 |
|
|
my @node = @{$node->child_nodes}; |
27 |
|
|
C: while (@node) { |
28 |
|
|
my $child = shift @node; |
29 |
|
|
unless (ref $child) { |
30 |
|
|
if ($child eq 'cdata-out') { |
31 |
|
|
$in_cdata = 0; |
32 |
|
|
} else { |
33 |
|
|
$s .= $child; # end tag |
34 |
|
|
} |
35 |
|
|
next C; |
36 |
|
|
} |
37 |
|
|
|
38 |
|
|
my $nt = $child->node_type; |
39 |
|
|
if ($nt == 1) { # Element |
40 |
|
|
my $tag_name = $child->tag_name; ## TODO: manakai_tag_name |
41 |
|
|
$s .= '<' . $tag_name; |
42 |
|
|
## NOTE: Non-HTML case: |
43 |
|
|
## <http://permalink.gmane.org/gmane.org.w3c.whatwg.discuss/11191> |
44 |
|
|
|
45 |
|
|
my @attrs = @{$child->attributes}; # sort order MUST be stable |
46 |
|
|
for my $attr (@attrs) { # order is implementation dependent |
47 |
|
|
my $attr_name = $attr->name; ## TODO: manakai_name |
48 |
|
|
$s .= ' ' . $attr_name . '="'; |
49 |
|
|
my $attr_value = $attr->value; |
50 |
|
|
## escape |
51 |
|
|
$attr_value =~ s/&/&/g; |
52 |
|
|
$attr_value =~ s/</</g; |
53 |
|
|
$attr_value =~ s/>/>/g; |
54 |
|
|
$attr_value =~ s/"/"/g; |
55 |
|
|
$s .= $attr_value . '"'; |
56 |
|
|
} |
57 |
|
|
$s .= '>'; |
58 |
|
|
|
59 |
|
|
next C if { |
60 |
|
|
area => 1, base => 1, basefont => 1, bgsound => 1, |
61 |
|
|
br => 1, col => 1, embed => 1, frame => 1, hr => 1, |
62 |
|
|
img => 1, input => 1, link => 1, meta => 1, param => 1, |
63 |
|
|
spacer => 1, wbr => 1, |
64 |
|
|
}->{$tag_name}; |
65 |
|
|
|
66 |
|
|
$s .= "\x0A" if $tag_name eq 'pre' or $tag_name eq 'textarea'; |
67 |
|
|
|
68 |
|
|
if (not $in_cdata and { |
69 |
|
|
style => 1, script => 1, xmp => 1, iframe => 1, |
70 |
|
|
noembed => 1, noframes => 1, noscript => 1, |
71 |
|
|
plaintext => 1, |
72 |
|
|
}->{$tag_name}) { |
73 |
|
|
unshift @node, 'cdata-out'; |
74 |
|
|
$in_cdata = 1; |
75 |
|
|
} |
76 |
|
|
|
77 |
|
|
unshift @node, @{$child->child_nodes}, '</' . $tag_name . '>'; |
78 |
|
|
} elsif ($nt == 3 or $nt == 4) { |
79 |
|
|
if ($in_cdata) { |
80 |
|
|
$s .= $child->data; |
81 |
|
|
} else { |
82 |
|
|
my $value = $child->data; |
83 |
|
|
$value =~ s/&/&/g; |
84 |
|
|
$value =~ s/</</g; |
85 |
|
|
$value =~ s/>/>/g; |
86 |
|
|
$value =~ s/"/"/g; |
87 |
|
|
$s .= $value; |
88 |
|
|
} |
89 |
|
|
} elsif ($nt == 8) { |
90 |
|
|
$s .= '<!--' . $child->data . '-->'; |
91 |
|
|
} elsif ($nt == 10) { |
92 |
|
|
$s .= '<!DOCTYPE ' . $child->name . '>'; |
93 |
|
|
} elsif ($nt == 5) { # entrefs |
94 |
|
|
push @node, @{$child->child_nodes}; |
95 |
|
|
} else { |
96 |
|
|
$on_error->($child) if defined $on_error; |
97 |
|
|
} |
98 |
|
|
## ISSUE: This code does not support PIs. |
99 |
|
|
} # C |
100 |
|
|
|
101 |
|
|
## Step 3 |
102 |
|
|
return \$s; |
103 |
|
|
} # get_inner_html |
104 |
|
|
|
105 |
|
|
=head1 LICENSE |
106 |
|
|
|
107 |
|
|
Copyright 2007 Wakaba <w@suika.fam.cx> |
108 |
|
|
|
109 |
|
|
This library is free software; you can redistribute it |
110 |
|
|
and/or modify it under the same terms as Perl itself. |
111 |
|
|
|
112 |
|
|
=cut |
113 |
|
|
|
114 |
|
|
1; |
115 |
|
|
## $Date:$ |