#!/usr/bin/perl
use strict;
use lib qw[/home/httpd/html/www/markup/html/whatpm
/home/wakaba/work/manakai/lib
/home/wakaba/public_html/-temp/wiki/lib];
use CGI::Carp qw[fatalsToBrowser];
use Scalar::Util qw[refaddr];
use SuikaWiki::Input::HTTP; ## TODO: Use some better CGI module
sub htescape ($) {
my $s = $_[0];
$s =~ s/&/&/g;
$s =~ s/</g;
$s =~ s/>/>/g;
$s =~ s/"/"/g;
$s =~ s!([\x00-\x09\x0B-\x1F\x7F-\x80])!sprintf 'U+%04X', ord $1!ge;
return $s;
} # htescape
my $http = SuikaWiki::Input::HTTP->new;
## TODO: _charset_
my $input_format = $http->parameter ('i') || 'text/html';
my $inner_html_element = $http->parameter ('e');
my $input_uri = 'thismessage:/';
my $s = $http->parameter ('s');
if (length $s > 1000_000) {
print STDOUT "Status: 400 Document Too Long\nContent-Type: text/plain; charset=us-ascii\n\nToo long";
exit;
}
my @nav;
print STDOUT qq[Content-Type: text/html; charset=utf-8
Web Document Conformance Checker (BETA)
Web Document Conformance Checker (beta)
- Document URI
<@{[htescape $input_uri]}>
- Internet Media Type
@{[htescape $input_format]}
]; # no
yet
push @nav, ['#document-info' => 'Information'];
require Message::DOM::DOMImplementation;
my $dom = Message::DOM::DOMImplementation->____new;
my $doc;
my $el;
if ($input_format eq 'text/html') {
require Encode;
require Whatpm::HTML;
$s = Encode::decode ('utf-8', $s);
print STDOUT qq[
Character Encoding
(none)
Document Source
];
push @nav, ['#source-string' => 'Source'];
print_source_string (\$s);
print STDOUT qq[
Parse Errors
];
push @nav, ['#parse-errors' => 'Parse Error'];
my $onerror = sub {
my (%opt) = @_;
if ($opt{column} > 0) {
print STDOUT qq[- Line $opt{line} column $opt{column}: ];
} else {
$opt{line}--;
print STDOUT qq[
- Line $opt{line}: ];
}
print STDOUT qq[@{[htescape $opt{type}]}
\n];
};
$doc = $dom->create_document;
if (defined $inner_html_element and length $inner_html_element) {
$el = $doc->create_element_ns
('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
Whatpm::HTML->set_inner_html ($el, $s, $onerror);
} else {
Whatpm::HTML->parse_string ($s => $doc, $onerror);
}
print STDOUT qq[
];
} elsif ($input_format eq 'application/xhtml+xml') {
require Message::DOM::XMLParserTemp;
require Encode;
my $t = Encode::decode ('utf-8', $s);
print STDOUT qq[
Character Encoding
(none)
Document Source
];
push @nav, ['#source-string' => 'Source'];
print_source_string (\$t);
print STDOUT qq[
Parse Errors
];
push @nav, ['#parse-errors' => 'Parse Error'];
my $onerror = sub {
my $err = shift;
my $line = $err->location->line_number;
print STDOUT qq[- Line $line column ];
print STDOUT $err->location->column_number, ": ";
print STDOUT htescape $err->text, "
\n";
return 1;
};
open my $fh, '<', \$s;
$doc = Message::DOM::XMLParserTemp->parse_byte_stream
($fh => $dom, $onerror, charset => 'utf-8');
print STDOUT qq[
];
} else {
print STDOUT qq[
Media type @{[htescape $input_format]}
is not supported!
];
push @nav, ['#result-summary' => 'Result'];
}
if (defined $doc or defined $el) {
print STDOUT qq[
Document Tree
];
push @nav, ['#document-tree' => 'Tree'];
print_document_tree ($el || $doc);
print STDOUT qq[
Document Errors
];
push @nav, ['#document-errors' => 'Document Error'];
require Whatpm::ContentChecker;
my $onerror = sub {
my %opt = @_;
print STDOUT qq[- ],
htescape get_node_path ($opt{node}),
": ", htescape $opt{type}, "
\n";
};
if ($el) {
Whatpm::ContentChecker->check_element ($el, $onerror);
} else {
Whatpm::ContentChecker->check_document ($doc, $onerror);
}
print STDOUT qq[
];
}
## TODO: Show result
print STDOUT qq[
];
for (@nav) {
print STDOUT qq[- $_->[1]
];
}
print STDOUT qq[
];
exit;
sub print_source_string ($) {
my $s = $_[0];
my $i = 1;
print STDOUT qq[\n];
while ($$s =~ /\G([^\x0A]*?)\x0D?\x0A/gc) {
print STDOUT qq[- ], htescape $1, "
\n";
$i++;
}
if ($$s =~ /\G([^\x0A]+)/gc) {
print STDOUT qq[- ], htescape $1, "
\n";
}
print STDOUT "
";
} # print_input_string
sub print_document_tree ($) {
my $node = shift;
my $r = '';
my @node = ($node);
while (@node) {
my $child = shift @node;
unless (ref $child) {
$r .= $child;
next;
}
my $node_id = 'node-'.refaddr $child;
my $nt = $child->node_type;
if ($nt == $child->ELEMENT_NODE) {
$r .= qq'' . htescape ($child->tag_name) .
'
'; ## ISSUE: case
if ($child->has_attributes) {
$r .= '';
for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, 'node-'.refaddr $_] }
@{$child->attributes}) {
$r .= qq'' . htescape ($attr->[0]) . '
= '; ## ISSUE: case?
$r .= '' . htescape ($attr->[1]) . '
'; ## TODO: children
}
$r .= '
';
}
if ($node->has_child_nodes) {
$r .= '';
unshift @node, @{$child->child_nodes}, '
';
}
} elsif ($nt == $child->TEXT_NODE) {
$r .= qq'' . htescape ($child->data) . '
';
} elsif ($nt == $child->CDATA_SECTION_NODE) {
$r .= qq'<[CDATA[
' . htescape ($child->data) . '
]]>
';
} elsif ($nt == $child->COMMENT_NODE) {
$r .= qq'<!--
' . htescape ($child->data) . '
-->
';
} elsif ($nt == $child->DOCUMENT_NODE) {
$r .= qq'- Document
';
if ($child->has_child_nodes) {
$r .= '';
unshift @node, @{$child->child_nodes}, '
';
}
} elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
$r .= qq'<!DOCTYPE>
';
$r .= '- Name =
@{[htescape ($child->name)]}
';
$r .= '- Public identifier =
@{[htescape ($child->public_id)]}
';
$r .= '- System identifier =
@{[htescape ($child->system_id)]}
';
$r .= '
';
} elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
$r .= qq'<?@{[htescape ($child->target)]}?>
';
$r .= '- @{[htescape ($child->data)]}
';
} else {
$r .= qq'- @{[$child->node_type]} @{[htescape ($child->node_name)]}
'; # error
}
}
$r .= '
';
print STDOUT $r;
} # print_document_tree
sub get_node_path ($) {
my $node = shift;
my @r;
while (defined $node) {
my $rs;
if ($node->node_type == 1) {
$rs = $node->manakai_local_name;
$node = $node->parent_node;
} elsif ($node->node_type == 2) {
$rs = '@' . $node->manakai_local_name;
$node = $node->owner_element;
} elsif ($node->node_type == 3) {
$rs = '"' . $node->data . '"';
$node = $node->parent_node;
} elsif ($node->node_type == 9) {
$rs = '';
$node = $node->parent_node;
} else {
$rs = '#' . $node->node_type;
$node = $node->parent_node;
}
unshift @r, $rs;
}
return join '/', @r;
} # get_node_path
=head1 AUTHOR
Wakaba .
=head1 LICENSE
Copyright 2007 Wakaba
This library is free software; you can redistribute it
and/or modify it under the same terms as Perl itself.
=cut
## $Date: 2007/06/27 13:30:15 $