| 1 |
wakaba |
1.1 |
package Whatpm::HTML::Serializer; |
| 2 |
|
|
use strict; |
| 3 |
wakaba |
1.6 |
our $VERSION=do{my @r=(q$Revision: 1.5 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r}; |
| 4 |
wakaba |
1.1 |
|
| 5 |
|
|
sub get_inner_html ($$$) { |
| 6 |
|
|
my (undef, $node, $on_error) = @_; |
| 7 |
|
|
|
| 8 |
|
|
## Step 1 |
| 9 |
|
|
my $s = ''; |
| 10 |
|
|
|
| 11 |
|
|
my $in_cdata; |
| 12 |
|
|
my $parent = $node; |
| 13 |
|
|
while (defined $parent) { |
| 14 |
|
|
if ($parent->node_type == 1 and |
| 15 |
|
|
$parent->namespace_uri eq 'http://www.w3.org/1999/xhtml' and |
| 16 |
|
|
{ |
| 17 |
|
|
style => 1, script => 1, xmp => 1, iframe => 1, |
| 18 |
|
|
noembed => 1, noframes => 1, noscript => 1, |
| 19 |
|
|
}->{$parent->local_name}) { ## TODO: case thingy |
| 20 |
|
|
$in_cdata = 1; |
| 21 |
|
|
} |
| 22 |
|
|
$parent = $parent->parent_node; |
| 23 |
|
|
} |
| 24 |
|
|
|
| 25 |
|
|
## Step 2 |
| 26 |
|
|
my @node = @{$node->child_nodes}; |
| 27 |
|
|
C: while (@node) { |
| 28 |
|
|
my $child = shift @node; |
| 29 |
|
|
unless (ref $child) { |
| 30 |
|
|
if ($child eq 'cdata-out') { |
| 31 |
|
|
$in_cdata = 0; |
| 32 |
|
|
} else { |
| 33 |
|
|
$s .= $child; # end tag |
| 34 |
|
|
} |
| 35 |
|
|
next C; |
| 36 |
|
|
} |
| 37 |
|
|
|
| 38 |
|
|
my $nt = $child->node_type; |
| 39 |
|
|
if ($nt == 1) { # Element |
| 40 |
|
|
my $tag_name = $child->tag_name; ## TODO: manakai_tag_name |
| 41 |
|
|
$s .= '<' . $tag_name; |
| 42 |
|
|
## NOTE: Non-HTML case: |
| 43 |
|
|
## <http://permalink.gmane.org/gmane.org.w3c.whatwg.discuss/11191> |
| 44 |
|
|
|
| 45 |
|
|
my @attrs = @{$child->attributes}; # sort order MUST be stable |
| 46 |
|
|
for my $attr (@attrs) { # order is implementation dependent |
| 47 |
|
|
my $attr_name = $attr->name; ## TODO: manakai_name |
| 48 |
|
|
$s .= ' ' . $attr_name . '="'; |
| 49 |
|
|
my $attr_value = $attr->value; |
| 50 |
|
|
## escape |
| 51 |
|
|
$attr_value =~ s/&/&/g; |
| 52 |
|
|
$attr_value =~ s/</</g; |
| 53 |
|
|
$attr_value =~ s/>/>/g; |
| 54 |
wakaba |
1.5 |
$attr_value =~ s/"/"/g; # in attribute mode |
| 55 |
wakaba |
1.2 |
$attr_value =~ s/\xA0/ /g; |
| 56 |
wakaba |
1.1 |
$s .= $attr_value . '"'; |
| 57 |
|
|
} |
| 58 |
|
|
$s .= '>'; |
| 59 |
|
|
|
| 60 |
|
|
next C if { |
| 61 |
|
|
area => 1, base => 1, basefont => 1, bgsound => 1, |
| 62 |
|
|
br => 1, col => 1, embed => 1, frame => 1, hr => 1, |
| 63 |
|
|
img => 1, input => 1, link => 1, meta => 1, param => 1, |
| 64 |
wakaba |
1.6 |
spacer => 1, wbr => 1, keygen => 1, |
| 65 |
wakaba |
1.1 |
}->{$tag_name}; |
| 66 |
|
|
|
| 67 |
wakaba |
1.4 |
$s .= "\x0A" if {pre => 1, textarea => 1, listing => 1}->{$tag_name}; |
| 68 |
wakaba |
1.1 |
|
| 69 |
|
|
if (not $in_cdata and { |
| 70 |
|
|
style => 1, script => 1, xmp => 1, iframe => 1, |
| 71 |
|
|
noembed => 1, noframes => 1, noscript => 1, |
| 72 |
|
|
plaintext => 1, |
| 73 |
|
|
}->{$tag_name}) { |
| 74 |
|
|
unshift @node, 'cdata-out'; |
| 75 |
|
|
$in_cdata = 1; |
| 76 |
|
|
} |
| 77 |
|
|
|
| 78 |
|
|
unshift @node, @{$child->child_nodes}, '</' . $tag_name . '>'; |
| 79 |
|
|
} elsif ($nt == 3 or $nt == 4) { |
| 80 |
|
|
if ($in_cdata) { |
| 81 |
|
|
$s .= $child->data; |
| 82 |
|
|
} else { |
| 83 |
|
|
my $value = $child->data; |
| 84 |
|
|
$value =~ s/&/&/g; |
| 85 |
|
|
$value =~ s/</</g; |
| 86 |
|
|
$value =~ s/>/>/g; |
| 87 |
wakaba |
1.5 |
#$value =~ s/"/"/g; |
| 88 |
wakaba |
1.2 |
$value =~ s/\xA0/ /g; |
| 89 |
wakaba |
1.1 |
$s .= $value; |
| 90 |
|
|
} |
| 91 |
|
|
} elsif ($nt == 8) { |
| 92 |
|
|
$s .= '<!--' . $child->data . '-->'; |
| 93 |
|
|
} elsif ($nt == 10) { |
| 94 |
|
|
$s .= '<!DOCTYPE ' . $child->name . '>'; |
| 95 |
|
|
} elsif ($nt == 5) { # entrefs |
| 96 |
|
|
push @node, @{$child->child_nodes}; |
| 97 |
wakaba |
1.2 |
} elsif ($nt == 7) { # PIs |
| 98 |
wakaba |
1.3 |
$s .= '<?' . $child->target . ' ' . $child->data . '>'; |
| 99 |
wakaba |
1.1 |
} else { |
| 100 |
|
|
$on_error->($child) if defined $on_error; |
| 101 |
|
|
} |
| 102 |
|
|
} # C |
| 103 |
|
|
|
| 104 |
|
|
## Step 3 |
| 105 |
|
|
return \$s; |
| 106 |
|
|
} # get_inner_html |
| 107 |
|
|
|
| 108 |
|
|
=head1 LICENSE |
| 109 |
|
|
|
| 110 |
wakaba |
1.2 |
Copyright 2007-2008 Wakaba <w@suika.fam.cx> |
| 111 |
wakaba |
1.1 |
|
| 112 |
|
|
This library is free software; you can redistribute it |
| 113 |
|
|
and/or modify it under the same terms as Perl itself. |
| 114 |
|
|
|
| 115 |
|
|
=cut |
| 116 |
|
|
|
| 117 |
|
|
1; |
| 118 |
wakaba |
1.6 |
## $Date: 2008/05/24 12:04:21 $ |