--- test/html-webhacc/cc.cgi 2007/11/05 09:33:52 1.23 +++ test/html-webhacc/cc.cgi 2008/07/20 14:58:24 1.53 @@ -6,20 +6,16 @@ /home/wakaba/work/manakai2/lib]; use CGI::Carp qw[fatalsToBrowser]; use Scalar::Util qw[refaddr]; -use Time::HiRes qw/time/; -sub htescape ($) { - my $s = $_[0]; - $s =~ s/&/&/g; - $s =~ s/</g; - $s =~ s/>/>/g; - $s =~ s/"/"/g; - $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{ - sprintf 'U+%04X', ord $1; - }ge; - return $s; -} # htescape + require WebHACC::Input; + require WebHACC::Result; + require WebHACC::Output; +my $out; + + require Message::DOM::DOMImplementation; + my $dom = Message::DOM::DOMImplementation->new; +{ use Message::CGI::HTTP; my $http = Message::CGI::HTTP->new; @@ -27,17 +23,14 @@ print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400"; exit; } - - binmode STDOUT, ':utf8'; - $| = 1; - - require Message::DOM::DOMImplementation; - my $dom = Message::DOM::DOMImplementation->new; - + load_text_catalog ('en'); ## TODO: conneg - my @nav; - print STDOUT qq[Content-Type: text/html; charset=utf-8 + $out = WebHACC::Output->new; + $out->handle (*STDOUT); + $out->set_utf8; + $out->set_flush; + $out->html (qq[Content-Type: text/html; charset=utf-8 @@ -48,109 +41,86 @@
<@{[htescape $input->{request_uri}]}>
<@{[htescape $input->{uri}]}>
<@{[htescape $input->{base_uri}]}>
@{[htescape $input->{media_type}]}
- @{[$input->{media_type_overridden} ? '(overridden)' : '']}'.htescape ($input->{charset}).'
' : '(none)']}
- @{[$input->{charset_overridden} ? '(overridden)' : '']}]);
+ $out->text ($input->{media_type});
+ $out->html (qq[
]);
+ if ($input->{media_type_overridden}) {
+ $out->html ('(overridden)');
+ } elsif (defined $input->{official_type}) {
+ if ($input->{media_type} eq $input->{official_type}) {
+ #
+ } else {
+ $out->html ('(sniffed; official type is: ');
+ $out->text ($input->{official_type});
+ $out->html ('
)');
+ }
+ } else {
+ $out->html ('(sniffed)');
+ }
+ $out->html (q[');
+ $out->text ($input->{charset});
+ $out->html ('
');
+ } else {
+ $out->text ('(none)');
+ }
+ $out->html (' overridden') if $input->{charset_overridden};
+ $out->html (qq[
Note: Due to the limitation of the -network library in use, the content of this section might -not be the real header.
- -Status code | ]; - print STDOUT qq[@{[htescape ($input->{header_status_code})]} |
---|---|
Status text | ]; - print STDOUT qq[@{[htescape ($input->{header_status_text})]} |
@{[htescape ($_->[0])]} | ];
- print STDOUT qq[@{[htescape ($_->[1])]} |
] . htescape ($child->tag_name) .
- '
'; ## ISSUE: case
-
- if ($child->has_attributes) {
- $r .= '] . htescape ($attr->[0]) . '
= '; ## ISSUE: case?
- $r .= '' . htescape ($attr->[1]) . '
' . htescape ($child->data) . '
<[CDATA[
' . htescape ($child->data) . '
]]>
<!--
' . htescape ($child->data) . '
-->
@{[htescape ($child->xml_version)]}
@{[htescape ($child->xml_encoding)]}
<!DOCTYPE>
@{[htescape ($child->name)]}
@{[htescape ($child->public_id)]}
@{[htescape ($child->system_id)]}
<?@{[htescape ($child->target)]}
@{[htescape ($child->data)]}
?>
@{[htescape $subinput->{media_type}]}
+ <$ebaseuri>
Note: Due to the limitation of the +network library in use, the content of this section might +not be the real header.
- print STDOUT qq[ -Oppotunistic Caching Namespace | -Fallback Entry |
---|---|
<$euri> |
- <$euri2> | ];
- }
-
- print STDOUT qq[
<$euri>
@{[htescape $id]}
@{[htescape $class]}
<' . $euri .
+ '>
';
+ } elsif (defined $resource->{bnodeid}) {
+ return htescape ('_:' . $resource->{bnodeid});
+ } elsif ($resource->{nodes}) {
+ return '(rdf:XMLLiteral)';
+ } elsif (defined $resource->{value}) {
+ my $elang = htescape (defined $resource->{language}
+ ? $resource->{language} : '');
+ my $r = qq[] . htescape ($resource->{value}) . '
';
+ if (defined $resource->{datatype}) {
+ my $euri = htescape ($resource->{datatype});
+ $r .= '^^<' . $euri .
+ '>
';
+ } elsif (length $resource->{language}) {
+ $r .= '@' . htescape ($resource->{language});
}
+ return $r;
+ } else {
+ return '??';
}
- print STDOUT qq[The conformance @@ -733,147 +486,48 @@ print STDOUT qq[
Important: This conformance checking service -is under development. The result above might be wrong.
-]; - push @nav, ['#result-summary' => 'Result']; +is under development. The result above might be wrong.]; + $out->end_section; } # print_result_section -sub print_result_unknown_type_section ($) { - my $input = shift; - - print STDOUT qq[ -Media type @{[htescape $input->{media_type}]}
is not supported!
Input Error: @{[htescape ($input->{error_status_text})]}
-' . htescape ($err->{value}) . '
';
- }
-
- return $r;
-} # get_error_label
-
-sub get_error_level_label ($) {
- my $err = shift;
-
- my $r = '';
-
- if (not defined $err->{level} or $err->{level} eq 'm') {
- $r = qq[MUST‐level
- error: ];
- } elsif ($err->{level} eq 's') {
- $r = qq[SHOULD‐level
- error: ];
- } elsif ($err->{level} eq 'w') {
- $r = qq[Warning:
- ];
- } elsif ($err->{level} eq 'unsupported') {
- $r = qq[Not
- supported: ];
- } else {
- my $elevel = htescape ($err->{level});
- $r = qq[$elevel:
- ];
- }
-
- return $r;
-} # get_error_level_label
-
-sub get_node_path ($) {
- my $node = shift;
- my @r;
- while (defined $node) {
- my $rs;
- if ($node->node_type == 1) {
- $rs = $node->manakai_local_name;
- $node = $node->parent_node;
- } elsif ($node->node_type == 2) {
- $rs = '@' . $node->manakai_local_name;
- $node = $node->owner_element;
- } elsif ($node->node_type == 3) {
- $rs = '"' . $node->data . '"';
- $node = $node->parent_node;
- } elsif ($node->node_type == 9) {
- @r = ('') unless @r;
- $rs = '';
- $node = $node->parent_node;
- } else {
- $rs = '#' . $node->node_type;
- $node = $node->parent_node;
- }
- unshift @r, $rs;
- }
- return join '/', @r;
-} # get_node_path
-
-sub get_node_link ($) {
- return qq[] .
- htescape (get_node_path ($_[0])) . qq[];
-} # get_node_link
+ $out->start_section (id => 'result-summary', title => 'Result');
+ print STDOUT qq[
+Input Error: @{[htescape ($input->{error_status_text})]}
]; + $out->end_section; +} # print_result_input_error_section { my $Msg = {}; sub load_text_catalog ($) { +# my $self = shift; my $lang = shift; # MUST be a canonical lang name - open my $file, '<', "cc-msg.$lang.txt" or die "$0: cc-msg.$lang.txt: $!"; + open my $file, '<:utf8', "cc-msg.$lang.txt" + or die "$0: cc-msg.$lang.txt: $!"; while (<$file>) { if (s/^([^;]+);([^;]*);//) { my ($type, $cls, $msg) = ($1, $2, $_); @@ -883,40 +537,43 @@ } } # load_text_catalog -sub get_text ($) { +sub get_text ($;$$) { +# my $self = shift; my ($type, $level, $node) = @_; $type = $level . ':' . $type if defined $level; + $level = 'm' unless defined $level; my @arg; { if (defined $Msg->{$type}) { my $msg = $Msg->{$type}->[1]; $msg =~ s{\$([0-9]+)}{ - defined $arg[$1] ? htescape ($arg[$1]) : '(undef)'; - }ge; + defined $arg[$1] ? ($arg[$1]) : '(undef)'; + }ge; ##BUG: ^ must be escaped $msg =~ s{{\@([A-Za-z0-9:_.-]+)}}{ UNIVERSAL::can ($node, 'get_attribute_ns') - ? htescape ($node->get_attribute_ns (undef, $1)) : '' - }ge; - $msg =~ s{{\@}}{ - UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : '' + ? ($node->get_attribute_ns (undef, $1)) : '' + }ge; ## BUG: ^ must be escaped + $msg =~ s{{\@}}{ ## BUG: v must be escaped + UNIVERSAL::can ($node, 'value') ? ($node->value) : '' }ge; $msg =~ s{{local-name}}{ UNIVERSAL::can ($node, 'manakai_local_name') - ? htescape ($node->manakai_local_name) : '' - }ge; + ? ($node->manakai_local_name) : '' + }ge; ## BUG: ^ must be escaped $msg =~ s{{element-local-name}}{ (UNIVERSAL::can ($node, 'owner_element') and $node->owner_element) - ? htescape ($node->owner_element->manakai_local_name) - : '' + ? ($node->owner_element->manakai_local_name) + : '' ## BUG: ^ must be escaped }ge; - return ($type, $Msg->{$type}->[0], $msg); + return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg); } elsif ($type =~ s/:([^:]*)$//) { unshift @arg, $1; redo; } } - return ($type, '', htescape ($_[0])); + return ($type, 'level-'.$level, ($_[0])); + ## BUG: ^ must be escaped } # get_text } @@ -925,7 +582,7 @@ my ($http, $dom) = @_; my $request_uri = $http->get_parameter ('uri'); - my $r = {}; + my $r = WebHACC::Input->new; if (defined $request_uri and length $request_uri) { my $uri = $dom->create_uri_reference ($request_uri); unless ({ @@ -972,6 +629,7 @@ $ua->protocols_allowed ([qw/http/]); $ua->max_size (1000_000); my $req = HTTP::Request->new (GET => $request_uri); + $req->header ('Accept-Encoding' => 'identity, *; q=0'); my $res = $ua->request ($req); ## TODO: 401 sets |is_success| true. if ($res->is_success or $http->get_parameter ('error-page')) { @@ -981,12 +639,10 @@ ## TODO: More strict parsing... my $ct = $res->header ('Content-Type'); - if (defined $ct and $ct =~ m#^([0-9A-Za-z._+-]+/[0-9A-Za-z._+-]+)#) { - $r->{media_type} = lc $1; - } if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) { $r->{charset} = lc $1; $r->{charset} =~ tr/\\//d; + $r->{official_charset} = $r->{charset}; } my $input_charset = $http->get_parameter ('charset'); @@ -994,9 +650,22 @@ $r->{charset_overridden} = (not defined $r->{charset} or $r->{charset} ne $input_charset); $r->{charset} = $input_charset; - } + } + + ## TODO: Support for HTTP Content-Encoding $r->{s} = ''.$res->content; + + require Whatpm::ContentType; + ($r->{official_type}, $r->{media_type}) + = Whatpm::ContentType->get_sniffed_type + (get_file_head => sub { + return substr $r->{s}, 0, shift; + }, + http_content_type_byte => $ct, + has_http_content_encoding => + defined $res->header ('Content-Encoding'), + supported_image_types => {}); } else { $r->{uri} = $res->request->uri; $r->{request_uri} = $request_uri; @@ -1017,7 +686,18 @@ $r->{charset} = ''.$http->get_parameter ('_charset_'); $r->{charset} =~ s/\s+//g; $r->{charset} = 'utf-8' if $r->{charset} eq ''; + $r->{official_charset} = $r->{charset}; $r->{header_field} = []; + + require Whatpm::ContentType; + ($r->{official_type}, $r->{media_type}) + = Whatpm::ContentType->get_sniffed_type + (get_file_head => sub { + return substr $r->{s}, 0, shift; + }, + http_content_type_byte => undef, + has_http_content_encoding => 0, + supported_image_types => {}); } my $input_format = $http->get_parameter ('i'); @@ -1034,6 +714,7 @@ if ($r->{media_type} eq 'text/xml') { unless (defined $r->{charset}) { $r->{charset} = 'us-ascii'; + $r->{official_charset} = $r->{charset}; } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') { $r->{charset_overridden} = 0; } @@ -1045,6 +726,8 @@ return $r; } + $r->{inner_html_element} = $http->get_parameter ('e'); + return $r; } # get_input_document @@ -1077,11 +760,11 @@ =head1 LICENSE -Copyright 2007 Wakaba