#!/usr/bin/perl
use strict;
use lib qw[/home/httpd/html/www/markup/html/whatpm
/home/wakaba/work/manakai/lib
/home/wakaba/public_html/-temp/wiki/lib];
use CGI::Carp qw[fatalsToBrowser];
use Scalar::Util qw[refaddr];
use SuikaWiki::Input::HTTP; ## TODO: Use some better CGI module
sub htescape ($) {
my $s = $_[0];
$s =~ s/&/&/g;
$s =~ s/</g;
$s =~ s/>/>/g;
$s =~ s/"/"/g;
$s =~ s!([\x00-\x09\x0B-\x1F\x7F-\x80])!sprintf 'U+%04X ', ord $1!ge;
return $s;
} # htescape
my $http = SuikaWiki::Input::HTTP->new;
## TODO: _charset_
my $input_format = $http->parameter ('i') || 'text/html';
my $inner_html_element = $http->parameter ('e');
my $input_uri = 'thismessage:/';
my $s = $http->parameter ('s');
if (length $s > 1000_000) {
print STDOUT "Status: 400 Document Too Long\nContent-Type: text/plain; charset=us-ascii\n\nToo long";
exit;
}
load_text_catalog ('en'); ## TODO: conneg
my @nav;
print STDOUT qq[Content-Type: text/html; charset=utf-8
Web Document Conformance Checker (BETA)
Web Document Conformance Checker (beta )
Document URI
<@{[htescape $input_uri]} >
Internet Media Type
@{[htescape $input_format]}
]; # no yet
push @nav, ['#document-info' => 'Information'];
require Message::DOM::DOMImplementation;
my $dom = Message::DOM::DOMImplementation->____new;
my $doc;
my $el;
if ($input_format eq 'text/html') {
require Encode;
require Whatpm::HTML;
$s = Encode::decode ('utf-8', $s);
print STDOUT qq[
Character Encoding
(none)
Document Source
];
push @nav, ['#source-string' => 'Source'];
print_source_string (\$s);
print STDOUT qq[
Parse Errors
];
push @nav, ['#parse-errors' => 'Parse Error'];
my $onerror = sub {
my (%opt) = @_;
my ($cls, $msg) = get_text ($opt{type}, $opt{level});
if ($opt{column} > 0) {
print STDOUT qq[Line $opt{line} column $opt{column} \n];
} else {
$opt{line} = $opt{line} - 1 || 1;
print STDOUT qq[Line $opt{line} \n];
}
print STDOUT qq[$msg \n];
};
$doc = $dom->create_document;
if (defined $inner_html_element and length $inner_html_element) {
$el = $doc->create_element_ns
('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
Whatpm::HTML->set_inner_html ($el, $s, $onerror);
} else {
Whatpm::HTML->parse_string ($s => $doc, $onerror);
}
print STDOUT qq[
];
} elsif ($input_format eq 'application/xhtml+xml') {
require Message::DOM::XMLParserTemp;
require Encode;
my $t = Encode::decode ('utf-8', $s);
print STDOUT qq[
Character Encoding
(none)
Document Source
];
push @nav, ['#source-string' => 'Source'];
print_source_string (\$t);
print STDOUT qq[
Parse Errors
];
push @nav, ['#parse-errors' => 'Parse Error'];
my $onerror = sub {
my $err = shift;
my $line = $err->location->line_number;
print STDOUT qq[Line $line column ];
print STDOUT $err->location->column_number, "";
print STDOUT htescape $err->text, " \n";
return 1;
};
open my $fh, '<', \$s;
$doc = Message::DOM::XMLParserTemp->parse_byte_stream
($fh => $dom, $onerror, charset => 'utf-8');
print STDOUT qq[
];
} else {
print STDOUT qq[
Media type @{[htescape $input_format]}
is not supported!
];
push @nav, ['#result-summary' => 'Result'];
}
if (defined $doc or defined $el) {
print STDOUT qq[
Document Tree
];
push @nav, ['#document-tree' => 'Tree'];
print_document_tree ($el || $doc);
print STDOUT qq[
Document Errors
];
push @nav, ['#document-errors' => 'Document Error'];
require Whatpm::ContentChecker;
my $onerror = sub {
my %opt = @_;
my ($cls, $msg) = get_text ($opt{type}, $opt{level});
print STDOUT qq[] . get_node_link ($opt{node}) .
qq[ \n], $msg, " \n";
};
my $elements;
if ($el) {
$elements = Whatpm::ContentChecker->check_element ($el, $onerror);
} else {
$elements = Whatpm::ContentChecker->check_document ($doc, $onerror);
}
print STDOUT qq[
];
if (@{$elements->{table}}) {
require JSON;
print STDOUT qq[
Tables
Structure of tables are visualized here if scripting is enabled.
];
my $i = 0;
for my $table_el (@{$elements->{table}}) {
$i++;
print STDOUT qq[
] .
get_node_link ($table_el) . q[ ];
my $table = Whatpm::HTMLTable->form_table ($table_el);
for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
next unless $_;
delete $_->{element};
}
for (@{$table->{row_group}}) {
next unless $_;
next unless $_->{element};
$_->{type} = $_->{element}->manakai_local_name;
delete $_->{element};
}
for (@{$table->{cell}}) {
next unless $_;
for (@{$_}) {
next unless $_;
for (@$_) {
$_->{id} = refaddr $_->{element} if defined $_->{element};
delete $_->{element};
}
}
}
print STDOUT '];
}
print STDOUT qq[
];
}
if (keys %{$elements->{term}}) {
print STDOUT qq[
Terms
];
for my $term (sort {$a cmp $b} keys %{$elements->{term}}) {
print STDOUT qq[@{[htescape $term]} ];
for (@{$elements->{term}->{$term}}) {
print STDOUT qq[].get_node_link ($_).qq[ ];
}
}
print STDOUT qq[ ];
}
}
## TODO: Show result
print STDOUT qq[
];
for (@nav) {
print STDOUT qq[$_->[1] ];
}
print STDOUT qq[
];
exit;
sub print_source_string ($) {
my $s = $_[0];
my $i = 1;
print STDOUT qq[\n];
if (length $$s) {
while ($$s =~ /\G([^\x0A]*?)\x0D?\x0A/gc) {
print STDOUT qq[], htescape $1, " \n";
$i++;
}
if ($$s =~ /\G([^\x0A]+)/gc) {
print STDOUT qq[], htescape $1, " \n";
}
} else {
print STDOUT q[ ];
}
print STDOUT " ";
} # print_input_string
sub print_document_tree ($) {
my $node = shift;
my $r = '';
my @node = ($node);
while (@node) {
my $child = shift @node;
unless (ref $child) {
$r .= $child;
next;
}
my $node_id = 'node-'.refaddr $child;
my $nt = $child->node_type;
if ($nt == $child->ELEMENT_NODE) {
my $child_nsuri = $child->namespace_uri;
$r .= qq[] . htescape ($child->tag_name) .
'
'; ## ISSUE: case
if ($child->has_attributes) {
$r .= '';
for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
@{$child->attributes}) {
$r .= qq[] . htescape ($attr->[0]) . '
= '; ## ISSUE: case?
$r .= '' . htescape ($attr->[1]) . ' '; ## TODO: children
}
$r .= ' ';
}
if ($child->has_child_nodes) {
$r .= '';
unshift @node, @{$child->child_nodes}, ' ';
} else {
$r .= '';
}
} elsif ($nt == $child->TEXT_NODE) {
$r .= qq'' . htescape ($child->data) . ' ';
} elsif ($nt == $child->CDATA_SECTION_NODE) {
$r .= qq'<[CDATA[
' . htescape ($child->data) . ' ]]>
';
} elsif ($nt == $child->COMMENT_NODE) {
$r .= qq'';
} elsif ($nt == $child->DOCUMENT_NODE) {
$r .= qq'Document';
$r .= qq[];
$r .= qq[@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]} ];
$r .= qq[@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]} ];
$r .= qq[ ];
if ($child->has_child_nodes) {
$r .= '';
unshift @node, @{$child->child_nodes}, ' ';
}
} elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
$r .= qq'<!DOCTYPE>
';
$r .= qq[Name = @{[htescape ($child->name)]} ];
$r .= qq[Public identifier = @{[htescape ($child->public_id)]} ];
$r .= qq[System identifier = @{[htescape ($child->system_id)]} ];
$r .= ' ';
} elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
$r .= qq'<?@{[htescape ($child->target)]}
@{[htescape ($child->data)]} ?>
';
} else {
$r .= qq'@{[$child->node_type]} @{[htescape ($child->node_name)]} '; # error
}
}
$r .= ' ';
print STDOUT $r;
} # print_document_tree
sub get_node_path ($) {
my $node = shift;
my @r;
while (defined $node) {
my $rs;
if ($node->node_type == 1) {
$rs = $node->manakai_local_name;
$node = $node->parent_node;
} elsif ($node->node_type == 2) {
$rs = '@' . $node->manakai_local_name;
$node = $node->owner_element;
} elsif ($node->node_type == 3) {
$rs = '"' . $node->data . '"';
$node = $node->parent_node;
} elsif ($node->node_type == 9) {
$rs = '';
$node = $node->parent_node;
} else {
$rs = '#' . $node->node_type;
$node = $node->parent_node;
}
unshift @r, $rs;
}
return join '/', @r;
} # get_node_path
sub get_node_link ($) {
return qq[] .
htescape (get_node_path ($_[0])) . qq[ ];
} # get_node_link
{
my $Msg = {};
sub load_text_catalog ($) {
my $lang = shift; # MUST be a canonical lang name
open my $file, '<', "cc-msg.$lang.txt" or die "$0: cc-msg.$lang.txt: $!";
while (<$file>) {
if (s/^([^;]+);([^;]*);//) {
my ($type, $cls, $msg) = ($1, $2, $_);
$msg =~ tr/\x0D\x0A//d;
$Msg->{$type} = [$cls, $msg];
}
}
} # load_text_catalog
sub get_text ($) {
my ($type, $level) = @_;
$type = $level . ':' . $type if defined $level;
my @arg;
{
if (defined $Msg->{$type}) {
my $msg = $Msg->{$type}->[1];
$msg =~ s/\$([0-9]+)/defined $arg[$1] ? htescape ($arg[$1]) : '(undef)'/ge;
return ($Msg->{$type}->[0], $msg);
} elsif ($type =~ s/:([^:]*)$//) {
unshift @arg, $1;
redo;
}
}
return ('', htescape ($_[0]));
} # get_text
}
=head1 AUTHOR
Wakaba .
=head1 LICENSE
Copyright 2007 Wakaba
This library is free software; you can redistribute it
and/or modify it under the same terms as Perl itself.
=cut
## $Date: 2007/07/01 06:21:46 $