--- test/html-webhacc/cc.cgi 2007/11/05 09:33:52 1.23 +++ test/html-webhacc/cc.cgi 2008/07/21 05:24:32 1.55 @@ -6,20 +6,16 @@ /home/wakaba/work/manakai2/lib]; use CGI::Carp qw[fatalsToBrowser]; use Scalar::Util qw[refaddr]; -use Time::HiRes qw/time/; -sub htescape ($) { - my $s = $_[0]; - $s =~ s/&/&/g; - $s =~ s/</g; - $s =~ s/>/>/g; - $s =~ s/"/"/g; - $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{ - sprintf 'U+%04X', ord $1; - }ge; - return $s; -} # htescape + require WebHACC::Input; + require WebHACC::Result; + require WebHACC::Output; +my $out; + + require Message::DOM::DOMImplementation; + my $dom = Message::DOM::DOMImplementation->new; +{ use Message::CGI::HTTP; my $http = Message::CGI::HTTP->new; @@ -27,17 +23,14 @@ print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400"; exit; } - - binmode STDOUT, ':utf8'; - $| = 1; - - require Message::DOM::DOMImplementation; - my $dom = Message::DOM::DOMImplementation->new; - + load_text_catalog ('en'); ## TODO: conneg - my @nav; - print STDOUT qq[Content-Type: text/html; charset=utf-8 + $out = WebHACC::Output->new; + $out->handle (*STDOUT); + $out->set_utf8; + $out->set_flush; + $out->html (qq[Content-Type: text/html; charset=utf-8 @@ -48,516 +41,126 @@
<@{[htescape $input->{request_uri}]}>
<@{[htescape $input->{uri}]}>
<@{[htescape $input->{base_uri}]}>
@{[htescape $input->{media_type}]}
- @{[$input->{media_type_overridden} ? '(overridden)' : '']}'.htescape ($input->{charset}).'
' : '(none)']}
- @{[$input->{charset_overridden} ? '(overridden)' : '']}Note: Due to the limitation of the -network library in use, the content of this section might -not be the real header.
- -Status code | ]; - print STDOUT qq[@{[htescape ($input->{header_status_code})]} |
---|---|
Status text | ]; - print STDOUT qq[@{[htescape ($input->{header_status_text})]} |
@{[htescape ($_->[0])]} | ];
- print STDOUT qq[@{[htescape ($_->[1])]} |
] . htescape ($child->tag_name) .
- '
'; ## ISSUE: case
-
- if ($child->has_attributes) {
- $r .= '] . htescape ($attr->[0]) . '
= '; ## ISSUE: case?
- $r .= '' . htescape ($attr->[1]) . '
' . htescape ($child->data) . '
<[CDATA[
' . htescape ($child->data) . '
]]>
<!--
' . htescape ($child->data) . '
-->
@{[htescape ($child->xml_version)]}
@{[htescape ($child->xml_encoding)]}
<!DOCTYPE>
@{[htescape ($child->name)]}
@{[htescape ($child->public_id)]}
@{[htescape ($child->system_id)]}
<?@{[htescape ($child->target)]}
@{[htescape ($child->data)]}
?>
<$euri>
Oppotunistic Caching Namespace | -Fallback Entry |
---|---|
<$euri> |
- <$euri2> | ];
- }
+ if (defined $doc or defined $el) {
- print STDOUT qq[
<$euri>
@{[htescape $id]}
@{[htescape $class]}
The conformance - checker cannot decide whether the document is conforming or - not, since the document contains one or more unsupported - features. The document might or might not be conforming.
]; - } elsif ($result->{conforming_min}) { - print STDOUT qq[No conformance-error is - found in this document.
]; - } elsif ($result->{conforming_max}) { - print STDOUT qq[This document - is likely non-conforming, but in rare case - it might be conforming.
]; - } else { - print STDOUT qq[This document is - non-conforming.
]; - } - - print STDOUT qq[- | MUST‐level -Errors | -SHOULD‐level -Errors | -Warnings | -Score | |
---|---|---|---|---|---|
$label | $result->{$_->[1]}->{must}$uncertain | $result->{$_->[1]}->{should}$uncertain | $result->{$_->[1]}->{warning}$uncertain | ]; - if ($uncertain) { - print qq[−∞..$result->{$_->[1]}->{score_max} | ]; - } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) { - print qq[$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max} | $result->{$_->[1]}->{score_min} | ]; - } - } - - $score_max += $score_base; - - print STDOUT qq[ -
Semantics | 0? | 0? | 0? | −∞..$score_base | |
Total | -$must_error? | -$should_error? | -$warning? | -−∞..$score_max |
Important: This conformance checking service -is under development. The result above might be wrong.
-Media type @{[htescape $input->{media_type}]}
is not supported!
Input Error: @{[htescape ($input->{error_status_text})]}
-<' . $euri .
+ '>
';
+ } elsif (defined $resource->{bnodeid}) {
+ return htescape ('_:' . $resource->{bnodeid});
+ } elsif ($resource->{nodes}) {
+ return '(rdf:XMLLiteral)';
+ } elsif (defined $resource->{value}) {
+ my $elang = htescape (defined $resource->{language}
+ ? $resource->{language} : '');
+ my $r = qq[] . htescape ($resource->{value}) . '
';
+ if (defined $resource->{datatype}) {
+ my $euri = htescape ($resource->{datatype});
+ $r .= '^^<' . $euri .
+ '>
';
+ } elsif (length $resource->{language}) {
+ $r .= '@' . htescape ($resource->{language});
}
- }
-
- if (defined $err->{node}) {
- $r .= ' ' if length $r;
- $r = get_node_link ($err->{node});
- }
-
- if (defined $err->{index}) {
- $r .= ' ' if length $r;
- $r .= 'Index ' . (0+$err->{index});
- }
-
- if (defined $err->{value}) {
- $r .= ' ' if length $r;
- $r .= '' . htescape ($err->{value}) . '
';
- }
-
- return $r;
-} # get_error_label
-
-sub get_error_level_label ($) {
- my $err = shift;
-
- my $r = '';
-
- if (not defined $err->{level} or $err->{level} eq 'm') {
- $r = qq[MUST‐level
- error: ];
- } elsif ($err->{level} eq 's') {
- $r = qq[SHOULD‐level
- error: ];
- } elsif ($err->{level} eq 'w') {
- $r = qq[Warning:
- ];
- } elsif ($err->{level} eq 'unsupported') {
- $r = qq[Not
- supported: ];
+ return $r;
} else {
- my $elevel = htescape ($err->{level});
- $r = qq[$elevel:
- ];
- }
-
- return $r;
-} # get_error_level_label
-
-sub get_node_path ($) {
- my $node = shift;
- my @r;
- while (defined $node) {
- my $rs;
- if ($node->node_type == 1) {
- $rs = $node->manakai_local_name;
- $node = $node->parent_node;
- } elsif ($node->node_type == 2) {
- $rs = '@' . $node->manakai_local_name;
- $node = $node->owner_element;
- } elsif ($node->node_type == 3) {
- $rs = '"' . $node->data . '"';
- $node = $node->parent_node;
- } elsif ($node->node_type == 9) {
- @r = ('') unless @r;
- $rs = '';
- $node = $node->parent_node;
- } else {
- $rs = '#' . $node->node_type;
- $node = $node->parent_node;
- }
- unshift @r, $rs;
+ return '??';
}
- return join '/', @r;
-} # get_node_path
-
-sub get_node_link ($) {
- return qq[] .
- htescape (get_node_path ($_[0])) . qq[];
-} # get_node_link
+} # get_rdf_resource_html
{
my $Msg = {};
sub load_text_catalog ($) {
+# my $self = shift;
my $lang = shift; # MUST be a canonical lang name
- open my $file, '<', "cc-msg.$lang.txt" or die "$0: cc-msg.$lang.txt: $!";
+ open my $file, '<:utf8', "cc-msg.$lang.txt"
+ or die "$0: cc-msg.$lang.txt: $!";
while (<$file>) {
if (s/^([^;]+);([^;]*);//) {
my ($type, $cls, $msg) = ($1, $2, $_);
@@ -883,40 +307,43 @@
}
} # load_text_catalog
-sub get_text ($) {
+sub get_text ($;$$) {
+# my $self = shift;
my ($type, $level, $node) = @_;
$type = $level . ':' . $type if defined $level;
+ $level = 'm' unless defined $level;
my @arg;
{
if (defined $Msg->{$type}) {
my $msg = $Msg->{$type}->[1];
$msg =~ s{\$([0-9]+)}{
- defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
- }ge;
+ defined $arg[$1] ? ($arg[$1]) : '(undef)';
+ }ge; ##BUG: ^ must be escaped
$msg =~ s{{\@([A-Za-z0-9:_.-]+)}}{
UNIVERSAL::can ($node, 'get_attribute_ns')
- ? htescape ($node->get_attribute_ns (undef, $1)) : ''
- }ge;
- $msg =~ s{{\@}}{
- UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
+ ? ($node->get_attribute_ns (undef, $1)) : ''
+ }ge; ## BUG: ^ must be escaped
+ $msg =~ s{{\@}}{ ## BUG: v must be escaped
+ UNIVERSAL::can ($node, 'value') ? ($node->value) : ''
}ge;
$msg =~ s{{local-name}}{
UNIVERSAL::can ($node, 'manakai_local_name')
- ? htescape ($node->manakai_local_name) : ''
- }ge;
+ ? ($node->manakai_local_name) : ''
+ }ge; ## BUG: ^ must be escaped
$msg =~ s{{element-local-name}}{
(UNIVERSAL::can ($node, 'owner_element') and
$node->owner_element)
- ? htescape ($node->owner_element->manakai_local_name)
- : ''
+ ? ($node->owner_element->manakai_local_name)
+ : '' ## BUG: ^ must be escaped
}ge;
- return ($type, $Msg->{$type}->[0], $msg);
+ return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
} elsif ($type =~ s/:([^:]*)$//) {
unshift @arg, $1;
redo;
}
}
- return ($type, '', htescape ($_[0]));
+ return ($type, 'level-'.$level, ($_[0]));
+ ## BUG: ^ must be escaped
} # get_text
}
@@ -924,15 +351,18 @@
sub get_input_document ($$) {
my ($http, $dom) = @_;
- my $request_uri = $http->get_parameter ('uri');
- my $r = {};
+ require Encode;
+ my $request_uri = Encode::decode ('utf-8', $http->get_parameter ('uri'));
+ my $r = WebHACC::Input->new;
if (defined $request_uri and length $request_uri) {
my $uri = $dom->create_uri_reference ($request_uri);
unless ({
http => 1,
}->{lc $uri->uri_scheme}) {
- return {uri => $request_uri, request_uri => $request_uri,
- error_status_text => 'URI scheme not allowed'};
+ $r = WebHACC::Input::Error->new;
+ $r->{uri} = $request_uri;
+ $r->{request_uri} = $request_uri;
+ $r->{error_status_text} = 'URL scheme not allowed';
}
require Message::Util::HostPermit;
@@ -959,8 +389,11 @@
Allow host=*
EOH
unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
- return {uri => $request_uri, request_uri => $request_uri,
- error_status_text => 'Connection to the host is forbidden'};
+ my $r = WebHACC::Input::Error->new;
+ $r->{uri} = $request_uri;
+ $r->{request_uri} = $request_uri;
+ $r->{error_status_text} = 'Connection to the host is forbidden';
+ return $r;
}
require LWP::UserAgent;
@@ -972,6 +405,7 @@
$ua->protocols_allowed ([qw/http/]);
$ua->max_size (1000_000);
my $req = HTTP::Request->new (GET => $request_uri);
+ $req->header ('Accept-Encoding' => 'identity, *; q=0');
my $res = $ua->request ($req);
## TODO: 401 sets |is_success| true.
if ($res->is_success or $http->get_parameter ('error-page')) {
@@ -981,12 +415,10 @@
## TODO: More strict parsing...
my $ct = $res->header ('Content-Type');
- if (defined $ct and $ct =~ m#^([0-9A-Za-z._+-]+/[0-9A-Za-z._+-]+)#) {
- $r->{media_type} = lc $1;
- }
if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
$r->{charset} = lc $1;
$r->{charset} =~ tr/\\//d;
+ $r->{official_charset} = $r->{charset};
}
my $input_charset = $http->get_parameter ('charset');
@@ -994,9 +426,22 @@
$r->{charset_overridden}
= (not defined $r->{charset} or $r->{charset} ne $input_charset);
$r->{charset} = $input_charset;
- }
+ }
+
+ ## TODO: Support for HTTP Content-Encoding
$r->{s} = ''.$res->content;
+
+ require Whatpm::ContentType;
+ ($r->{official_type}, $r->{media_type})
+ = Whatpm::ContentType->get_sniffed_type
+ (get_file_head => sub {
+ return substr $r->{s}, 0, shift;
+ },
+ http_content_type_byte => $ct,
+ has_http_content_encoding =>
+ defined $res->header ('Content-Encoding'),
+ supported_image_types => {});
} else {
$r->{uri} = $res->request->uri;
$r->{request_uri} = $request_uri;
@@ -1017,7 +462,18 @@
$r->{charset} = ''.$http->get_parameter ('_charset_');
$r->{charset} =~ s/\s+//g;
$r->{charset} = 'utf-8' if $r->{charset} eq '';
+ $r->{official_charset} = $r->{charset};
$r->{header_field} = [];
+
+ require Whatpm::ContentType;
+ ($r->{official_type}, $r->{media_type})
+ = Whatpm::ContentType->get_sniffed_type
+ (get_file_head => sub {
+ return substr $r->{s}, 0, shift;
+ },
+ http_content_type_byte => undef,
+ has_http_content_encoding => 0,
+ supported_image_types => {});
}
my $input_format = $http->get_parameter ('i');
@@ -1034,6 +490,7 @@
if ($r->{media_type} eq 'text/xml') {
unless (defined $r->{charset}) {
$r->{charset} = 'us-ascii';
+ $r->{official_charset} = $r->{charset};
} elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
$r->{charset_overridden} = 0;
}
@@ -1045,6 +502,8 @@
return $r;
}
+ $r->{inner_html_element} = $http->get_parameter ('e');
+
return $r;
} # get_input_document
@@ -1077,11 +536,11 @@
=head1 LICENSE
-Copyright 2007 Wakaba