--- test/html-whatpm/parser-manakai.cgi 2007/07/15 07:53:00 1.5 +++ test/html-whatpm/parser-manakai.cgi 2008/12/11 03:22:57 1.9 @@ -2,22 +2,20 @@ use strict; use lib qw[/home/httpd/html/www/markup/html/whatpm - /home/wakaba/work/manakai/lib - /home/wakaba/public_html/-temp/wiki/lib]; + /home/wakaba/work/manakai2/lib]; use CGI::Carp qw[fatalsToBrowser]; use Time::HiRes qw/time/; -use SuikaWiki::Input::HTTP; ## TODO: Use some better CGI module - -my $http = SuikaWiki::Input::HTTP->new; +use Message::CGI::HTTP; +my $http = Message::CGI::HTTP->new; ## TODO: _charset_ -my @mode = split m#/#, scalar $http->meta_variable ('PATH_INFO'), -1; +my @mode = split m#/#, scalar $http->get_meta_variable ('PATH_INFO'), -1; shift @mode if @mode and $mode[0] == ''; ## TODO: decode unreserved characters - my $s = $http->parameter ('s'); + my $s = $http->get_parameter ('s'); if (length $s > 1000_000) { print STDOUT "Status: 400 Document Too Long\nContent-Type: text/plain; charset=us-ascii\n\nToo long"; exit; @@ -33,8 +31,9 @@ my $doc; my $el; + if (@mode == 3 and $mode[0] eq 'html' and - ($mode[2] eq 'html' or $mode[2] eq 'test')) { + ($mode[2] eq 'html' or $mode[2] eq 'test' or $mode[2] eq 'xml')) { print STDOUT "Content-Type: text/plain; charset=utf-8\n\n"; require Encode; @@ -50,7 +49,7 @@ my $onerror = sub { my (%opt) = @_; - print STDOUT "$opt{line},$opt{column},$opt{type}\n"; + print STDOUT "$opt{line},$opt{column},$opt{type};$opt{level};$opt{value}\n"; }; $doc = $dom->create_document; @@ -74,16 +73,80 @@ $out = \( ($el or $doc)->inner_html ); $time2 = time; $time{serialize_html} = $time2 - $time1; + } elsif ($mode[2] eq 'xml') { + $doc->manakai_is_html (0); + $time1 = time; + $out = \( ($el or $doc)->inner_html ); + $time2 = time; + $time{serialize_xml} = $time2 - $time1; + $doc->manakai_is_html (1); + } else { # test + require Whatpm::HTML::Dumper; + $time1 = time; + $out = \Whatpm::HTML::Dumper::dumptree ($el || $doc); + $time2 = time; + $time{serialize_test} = $time2 - $time1; + } + print STDOUT Encode::encode ('utf-8', $$out); + print STDOUT "\n"; +} elsif (@mode == 3 and $mode[0] eq 'xml1' and + ($mode[2] eq 'html' or $mode[2] eq 'test' or $mode[2] eq 'xml')) { + print STDOUT "Content-Type: text/plain; charset=utf-8\n\n"; + + require Encode; + require Whatpm::XML::Parser; + + $time1 = time; + $s = Encode::decode ('utf-8', $s); + $time2 = time; + $time{decode} = $time2 - $time1; + + print STDOUT "#errors\n"; + + my $onerror = sub { + my (%opt) = @_; + print STDOUT "$opt{line},$opt{column},$opt{type};$opt{level};$opt{value}\n"; + }; + + $doc = $dom->create_document; + $time1 = time; +## TODO: + #if (length $mode[1]) { + # $el = $doc->create_element_ns + # ('http://www.w3.org/1999/xhtml', [undef, $mode[1]]); + # #Whatpm::HTML->set_inner_html ($el, $s, $onerror); + #} else { + Whatpm::XML::Parser->parse_char_string ($s => $doc, $onerror); + #} + $time2 = time; + $time{parse_xml1} = $time2 - $time1; + + print "#document\n"; + + my $out; + if ($mode[2] eq 'html') { + $doc->manakai_is_html (1); + $time1 = time; + $out = \( ($el or $doc)->inner_html ); + $time2 = time; + $time{serialize_html} = $time2 - $time1; + $doc->manakai_is_html (0); + } elsif ($mode[2] eq 'xml') { + $time1 = time; + $out = \( ($el or $doc)->inner_html ); + $time2 = time; + $time{serialize_xml} = $time2 - $time1; } else { # test + require Whatpm::HTML::Dumper; $time1 = time; - $out = test_serialize ($el || $doc); + $out = \Whatpm::HTML::Dumper::dumptree ($el || $doc); $time2 = time; $time{serialize_test} = $time2 - $time1; } print STDOUT Encode::encode ('utf-8', $$out); print STDOUT "\n"; } elsif (@mode == 3 and $mode[0] eq 'xhtml' and - ($mode[2] eq 'html' or $mode[2] eq 'test')) { + ($mode[2] eq 'html' or $mode[2] eq 'test' or $mode[2] eq 'xml')) { print STDOUT "Content-Type: text/plain; charset=utf-8\n\n"; require Message::DOM::XMLParserTemp; @@ -108,20 +171,67 @@ my $out; if ($mode[2] eq 'html') { + $doc->manakai_is_html (0); $time1 = time; $out = \( $doc->inner_html ); ## TODO: $el case $time2 = time; + $time{serialize_html} = $time2 - $time1; + $doc->manakai_is_html (1); + } elsif ($mode[2] eq 'xml') { + $time1 = time; + $out = \( $doc->inner_html ); ## TODO: $el case + $time2 = time; + $time{serialize_xml} = $time2 - $time1; + } else { # test + require Whatpm::HTML::Dumper; + $time1 = time; + $out = \Whatpm::HTML::Dumper::dumptree ($el || $doc); + $time2 = time; + $time{serialize_test} = $time2 - $time1; + } + print STDOUT Encode::encode ('utf-8', $$out); + print STDOUT "\n"; +} elsif (@mode == 3 and $mode[0] eq 'swml' and $mode[1] eq '' and + ($mode[2] eq 'html' or $mode[2] eq 'test' or $mode[2] eq 'xml')) { + print STDOUT "Content-Type: text/plain; charset=utf-8\n\n"; + + require Encode; + $time1 = time; + $s = Encode::decode ('utf-8', $s); + $time2 = time; + $time{decode} = $time2 - $time1; + + require Whatpm::SWML::Parser; + $doc = $dom->create_document; + my $p = Whatpm::SWML::Parser->new; + $p->parse_char_string ($s => $doc); + + print "#document\n"; + + my $out; + if ($mode[2] eq 'html') { + $doc->manakai_is_html (0); + $time1 = time; + $out = \( $doc->inner_html ); + $time2 = time; + $time{serialize_html} = $time2 - $time1; + $doc->manakai_is_html (1); + } elsif ($mode[2] eq 'xml') { + $time1 = time; + $out = \( $doc->inner_html ); + $time2 = time; $time{serialize_xml} = $time2 - $time1; } else { # test + require Whatpm::HTML::Dumper; $time1 = time; - $out = test_serialize ($doc); + $out = \Whatpm::HTML::Dumper::dumptree ($el || $doc); $time2 = time; $time{serialize_test} = $time2 - $time1; } print STDOUT Encode::encode ('utf-8', $$out); print STDOUT "\n"; } elsif (@mode == 3 and $mode[0] eq 'h2h' and $mode[1] eq '' and - ($mode[2] eq 'html' or $mode[2] eq 'test')) { + ($mode[2] eq 'html' or $mode[2] eq 'test' or $mode[2] eq 'xml')) { print STDOUT "Content-Type: text/plain; charset=utf-8\n\n"; require Encode; @@ -138,13 +248,21 @@ my $out; if ($mode[2] eq 'html') { + $doc->manakai_is_html (0); + $time1 = time; + $out = \( $doc->inner_html ); + $time2 = time; + $time{serialize_html} = $time2 - $time1; + $doc->manakai_is_html (1); + } elsif ($mode[2] eq 'xml') { $time1 = time; $out = \( $doc->inner_html ); $time2 = time; $time{serialize_xml} = $time2 - $time1; } else { # test + require Whatpm::HTML::Dumper; $time1 = time; - $out = test_serialize ($doc); + $out = \Whatpm::HTML::Dumper::dumptree ($el || $doc); $time2 = time; $time{serialize_test} = $time2 - $time1; } @@ -155,7 +273,7 @@ exit; } - if ($http->parameter ('dom5')) { + if ($http->get_parameter ('dom5')) { require Whatpm::ContentChecker; my $onerror = sub { my %opt = @_; @@ -173,13 +291,15 @@ } print STDOUT "#log\n"; - for (qw/decode parse parse_xml serialize_html serialize_xml serialize_test + for (qw/decode parse parse_xml parse_xml1 + serialize_html serialize_xml serialize_test check/) { next unless defined $time{$_}; print STDOUT { decode => 'bytes->chars', parse => 'html5(chars)->dom5', - parse_xml => 'xml1(chars)->dom5', + parse_xml => 'xml(chars)->dom5', + parse_xml1 => 'xml1(chars)->dom5', serialize_html => 'dom5->html5(char)', serialize_xml => 'dom5->xml1(char)', serialize_test => 'dom5->test(char)', @@ -192,44 +312,6 @@ exit; -sub test_serialize ($) { - my $node = shift; - my $r = ''; - - my @node = map { [$_, ''] } @{$node->child_nodes}; - while (@node) { - my $child = shift @node; - my $nt = $child->[0]->node_type; - if ($nt == $child->[0]->ELEMENT_NODE) { - $r .= '| ' . $child->[1] . '<' . $child->[0]->tag_name . ">\x0A"; ## ISSUE: case? - - for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value] } - @{$child->[0]->attributes}) { - $r .= '| ' . $child->[1] . ' ' . $attr->[0] . '="'; ## ISSUE: case? - $r .= $attr->[1] . '"' . "\x0A"; - } - - unshift @node, - map { [$_, $child->[1] . ' '] } @{$child->[0]->child_nodes}; - } elsif ($nt == $child->[0]->TEXT_NODE) { - $r .= '| ' . $child->[1] . '"' . $child->[0]->data . '"' . "\x0A"; - } elsif ($nt == $child->[0]->CDATA_SECTION_NODE) { - $r .= '| ' . $child->[1] . '[0]->data . "]]>\x0A"; - } elsif ($nt == $child->[0]->COMMENT_NODE) { - $r .= '| ' . $child->[1] . '\x0A"; - } elsif ($nt == $child->[0]->DOCUMENT_TYPE_NODE) { - $r .= '| ' . $child->[1] . '[0]->name . ">\x0A"; - } elsif ($nt == $child->[0]->PROCESSING_INSTRUCTION_NODE) { - $r .= '| ' . $child->[1] . '[0]->target . ' ' . - $child->[0]->data . "?>\x0A"; - } else { - $r .= '| ' . $child->[1] . $child->[0]->node_type . "\x0A"; # error - } - } - - return \$r; -} # test_serialize - sub get_node_path ($) { my $node = shift; my @r; @@ -262,11 +344,11 @@ =head1 LICENSE -Copyright 2007 Wakaba +Copyright 2007-2008 Wakaba This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut -## $Date: 2007/07/15 07:53:00 $ +## $Date: 2008/12/11 03:22:57 $