--- test/html-webhacc/cc.cgi 2007/06/27 11:08:03 1.1 +++ test/html-webhacc/cc.cgi 2008/08/14 15:50:42 1.62 @@ -2,226 +2,137 @@ use strict; use lib qw[/home/httpd/html/www/markup/html/whatpm - /home/wakaba/work/manakai/lib - /home/wakaba/public_html/-temp/wiki/lib]; + /home/wakaba/work/manakai2/lib]; use CGI::Carp qw[fatalsToBrowser]; -use Time::HiRes qw/time/; -use SuikaWiki::Input::HTTP; ## TODO: Use some better CGI module + require WebHACC::Input; -my $http = SuikaWiki::Input::HTTP->new; +{ + require Message::CGI::HTTP; + my $http = Message::CGI::HTTP->new; + + require WebHACC::Output; + my $out = WebHACC::Output->new; + $out->handle (*STDOUT); + $out->set_utf8; -## TODO: _charset_ - -my @mode = split m#/#, scalar $http->meta_variable ('PATH_INFO'), -1; -shift @mode if @mode and $mode[0] == ''; -## TODO: decode unreserved characters - - my $s = $http->parameter ('s'); - if (length $s > 1000_000) { - print STDOUT "Status: 400 Document Too Long\nContent-Type: text/plain; charset=us-ascii\n\nToo long"; + if ($http->get_meta_variable ('PATH_INFO') ne '/') { + $out->http_error (404); exit; } - my $char_length = length $s; - my %time; - my $time1; - my $time2; - - require Message::DOM::DOMImplementation; - my $dom = Message::DOM::DOMImplementation->____new; -# $| = 1; - my $doc; - my $el; - -if (@mode == 3 and $mode[0] eq 'html' and - ($mode[2] eq 'html' or $mode[2] eq 'test')) { - print STDOUT "Content-Type: text/plain; charset=utf-8\n\n"; - - require Encode; - require Whatpm::HTML; - - $time1 = time; - $s = Encode::decode ('utf-8', $s); - $time2 = time; - $time{decode} = $time2 - $time1; + + ## TODO: We need real conneg support... + my $primary_language = 'en'; + if ($ENV{HTTP_ACCEPT_LANGUAGE} =~ /ja/) { + $primary_language = 'ja'; + } + $out->load_text_catalog ($primary_language); + $out->set_flush; + $out->http_header; + $out->html_header; + $out->unset_flush; + + $out->generate_input_section ($http); + + my $u = $http->get_parameter ('uri'); + my $s = $http->get_parameter ('s'); + if ((not defined $u or not length $u) and + (not defined $s or not length $s)) { + exit; + } - print STDOUT "#errors\n"; + require WebHACC::Result; + my $result = WebHACC::Result->new; + $result->output ($out); - my $onerror = sub { - my (%opt) = @_; - print STDOUT "$opt{line},$opt{column},$opt{type}\n"; - }; - - $doc = $dom->create_document; - $time1 = time; - if (length $mode[1]) { - $el = $doc->create_element_ns - ('http://www.w3.org/1999/xhtml', [undef, $mode[1]]); - Whatpm::HTML->set_inner_html ($el, $s, $onerror); - } else { - Whatpm::HTML->parse_string ($s => $doc, $onerror); - } - $time2 = time; - $time{parse} = $time2 - $time1; + require WebHACC::Input; + my $input = WebHACC::Input->get_document ($http => $result => $out); - print "#document\n"; + check_and_print ($input => $result => $out); + + $out->nav_list; - my $out; - if ($mode[2] eq 'html') { - $time1 = time; - $out = Whatpm::HTML->get_inner_html ($el || $doc); - $time2 = time; - $time{serialize_html} = $time2 - $time1; - } else { # test - $time1 = time; - $out = test_serialize ($el || $doc); - $time2 = time; - $time{serialize_test} = $time2 - $time1; - } - print STDOUT Encode::encode ('utf-8', $$out); - print STDOUT "\n"; -} elsif (@mode == 3 and $mode[0] eq 'xhtml' and - ($mode[2] eq 'html' or $mode[2] eq 'test')) { - print STDOUT "Content-Type: text/plain; charset=utf-8\n\n"; - - require Message::DOM::XMLParserTemp; - print STDOUT "#errors\n"; - - my $onerror = sub { - my $err = shift; - print STDOUT $err->location->line_number, ","; - print STDOUT $err->location->column_number, ","; - print STDOUT $err->text, "\n"; - return 1; - }; - - open my $fh, '<', \$s; - my $time1 = time; - $doc = Message::DOM::XMLParserTemp->parse_byte_stream - ($fh => $dom, $onerror, charset => 'utf-8'); - my $time2 = time; - $time{parse_xml} = $time2 - $time1; - - print "#document\n"; - - my $out; - if ($mode[2] eq 'html') { - ## TODO: Use XHTML serializer - #$out = Whatpm::HTML->get_inner_html ($doc); - } else { # test - $time1 = time; - $out = test_serialize ($doc); - $time2 = time; - $time{serialize_test} = $time2 - $time1; - } - print STDOUT Encode::encode ('utf-8', $$out); - print STDOUT "\n"; -} else { - print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n404"; exit; } - if ($http->parameter ('dom5')) { - require Whatpm::ContentChecker; - my $onerror = sub { - my %opt = @_; - print STDOUT get_node_path ($opt{node}) . ';' . $opt{type} . "\n"; - }; - print STDOUT "#domerrors\n"; - $time1 = time; - if ($el) { - Whatpm::ContentChecker->check_element ($el, $onerror); - } else { - Whatpm::ContentChecker->check_document ($doc, $onerror); - } - $time2 = time; - $time{check} = $time2 - $time1; - } - - print STDOUT "#log\n"; - for (qw/decode parse parse_xml serialize_html serialize_xml serialize_test - check/) { - next unless defined $time{$_}; - print STDOUT { - decode => 'bytes->chars', - parse => 'html5(chars)->dom5', - parse_xml => 'xml1(chars)->dom5', - serialize_html => 'dom5->html5(char)', - serialize_xml => 'dom5->xml1(char)', - serialize_test => 'dom5->test(char)', - check => 'dom5 check', - }->{$_}; - print STDOUT "\t", $time{$_}, "s\n"; - open my $file, '>>', ".manakai-$_.txt" or die ".manakai-$_.txt: $!"; - print $file $char_length, "\t", $time{$_}, "\n"; +sub check_and_print ($$$) { + my ($input, $result, $out) = @_; + my $original_input = $out->input; + $out->input ($input); + + $input->generate_info_section ($result); + + $input->generate_transfer_sections ($result); + + unless (defined $input->{s}) { + ## NOTE: This is an error of the implementation. + $result->layer_uncertain ('transfer'); + $result->generate_result_section; + return; + } + + my $checker_class = { + 'text/cache-manifest' => 'WebHACC::Language::CacheManifest', + 'text/css' => 'WebHACC::Language::CSS', + 'text/html' => 'WebHACC::Language::HTML', + 'text/x-webidl' => 'WebHACC::Language::WebIDL', + + 'text/xml' => 'WebHACC::Language::XML', + 'application/atom+xml' => 'WebHACC::Language::XML', + 'application/rss+xml' => 'WebHACC::Language::XML', + 'image/svg+xml' => 'WebHACC::Language::XML', + 'application/xhtml+xml' => 'WebHACC::Language::XML', + 'application/xml' => 'WebHACC::Language::XML', + ## TODO: Should we make all XML MIME Types fall + ## into this category? + + ## NOTE: This type has different model from normal XML types. + 'application/rdf+xml' => 'WebHACC::Language::XML', + }->{$input->{media_type}} || 'WebHACC::Language::Default'; + + eval qq{ require $checker_class } or die "$0: Loading $checker_class: $@"; + my $checker = $checker_class->new; + $checker->input ($input); + $checker->output ($out); + $checker->result ($result); + + ## TODO: A cache manifest MUST be text/cache-manifest + ## TODO: WebIDL media type "text/x-webidl" + + $checker->generate_syntax_error_section; + $checker->generate_source_string_section; + + my @subdoc; + $checker->onsubdoc (sub { + push @subdoc, shift; + }); + + $checker->generate_structure_dump_section; + $checker->generate_structure_error_section; + $checker->generate_additional_sections; + + my $id_prefix = 0; + for my $_subinput (@subdoc) { + my $subinput = WebHACC::Input::Subdocument->new (++$id_prefix); + $subinput->{$_} = $_subinput->{$_} for keys %$_subinput; + $subinput->{base_uri} = $subinput->{container_node}->base_uri + unless defined $subinput->{base_uri}; + $subinput->{parent_input} = $input; + + my $subresult = WebHACC::Result->new; + $subresult->output ($out); + $subresult->parent_result ($result); + + $subinput->start_section ($subresult); + check_and_print ($subinput => $subresult => $out); + $subinput->end_section ($subresult); } -exit; + $result->generate_result_section; -sub test_serialize ($) { - my $node = shift; - my $r = ''; - - my @node = map { [$_, ''] } @{$node->child_nodes}; - while (@node) { - my $child = shift @node; - my $nt = $child->[0]->node_type; - if ($nt == $child->[0]->ELEMENT_NODE) { - $r .= '| ' . $child->[1] . '<' . $child->[0]->tag_name . ">\x0A"; ## ISSUE: case? - - for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value] } - @{$child->[0]->attributes}) { - $r .= '| ' . $child->[1] . ' ' . $attr->[0] . '="'; ## ISSUE: case? - $r .= $attr->[1] . '"' . "\x0A"; - } - - unshift @node, - map { [$_, $child->[1] . ' '] } @{$child->[0]->child_nodes}; - } elsif ($nt == $child->[0]->TEXT_NODE) { - $r .= '| ' . $child->[1] . '"' . $child->[0]->data . '"' . "\x0A"; - } elsif ($nt == $child->[0]->CDATA_SECTION_NODE) { - $r .= '| ' . $child->[1] . '[0]->data . "]]>\x0A"; - } elsif ($nt == $child->[0]->COMMENT_NODE) { - $r .= '| ' . $child->[1] . '\x0A"; - } elsif ($nt == $child->[0]->DOCUMENT_TYPE_NODE) { - $r .= '| ' . $child->[1] . '[0]->name . ">\x0A"; - } elsif ($nt == $child->[0]->PROCESSING_INSTRUCTION_NODE) { - $r .= '| ' . $child->[1] . '[0]->target . ' ' . - $child->[0]->data . "?>\x0A"; - } else { - $r .= '| ' . $child->[1] . $child->[0]->node_type . "\x0A"; # error - } - } - - return \$r; -} # test_serialize - -sub get_node_path ($) { - my $node = shift; - my @r; - while (defined $node) { - my $rs; - if ($node->node_type == 1) { - $rs = $node->manakai_local_name; - $node = $node->parent_node; - } elsif ($node->node_type == 2) { - $rs = '@' . $node->manakai_local_name; - $node = $node->owner_element; - } elsif ($node->node_type == 3) { - $rs = '"' . $node->data . '"'; - $node = $node->parent_node; - } elsif ($node->node_type == 9) { - $rs = ''; - $node = $node->parent_node; - } else { - $rs = '#' . $node->node_type; - $node = $node->parent_node; - } - unshift @r, $rs; - } - return join '/', @r; -} # get_node_path + $out->input ($original_input); +} # check_and_print =head1 AUTHOR @@ -229,11 +140,11 @@ =head1 LICENSE -Copyright 2007 Wakaba +Copyright 2007-2008 Wakaba This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut -## $Date: 2007/06/27 11:08:03 $ +## $Date: 2008/08/14 15:50:42 $