/[suikacvs]/test/html-whatpm/parser-manakai.cgi
Suika

Diff of /test/html-whatpm/parser-manakai.cgi

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2 by wakaba, Mon Jun 25 00:15:12 2007 UTC revision 1.9 by wakaba, Thu Dec 11 03:22:57 2008 UTC
# Line 1  Line 1 
1  #!/usr/bin/perl -d:DProf  #!/usr/bin/perl
2  use strict;  use strict;
3    
4  use lib qw[/home/httpd/html/www/markup/html/whatpm  use lib qw[/home/httpd/html/www/markup/html/whatpm
5             /home/wakaba/work/manakai/lib             /home/wakaba/work/manakai2/lib];
            /home/wakaba/public_html/-temp/wiki/lib];  
6  use CGI::Carp qw[fatalsToBrowser];  use CGI::Carp qw[fatalsToBrowser];
7  use Time::HiRes qw/time/;  use Time::HiRes qw/time/;
8    
9  use SuikaWiki::Input::HTTP; ## TODO: Use some better CGI module  use Message::CGI::HTTP;
10    my $http = Message::CGI::HTTP->new;
 my $http = SuikaWiki::Input::HTTP->new;  
11    
12  ## TODO: _charset_  ## TODO: _charset_
13    
14  my @mode = split m#/#, scalar $http->meta_variable ('PATH_INFO'), -1;  my @mode = split m#/#, scalar $http->get_meta_variable ('PATH_INFO'), -1;
15  shift @mode if @mode and $mode[0] == '';  shift @mode if @mode and $mode[0] == '';
16  ## TODO: decode unreserved characters  ## TODO: decode unreserved characters
17    
18    my $s = $http->parameter ('s');    my $s = $http->get_parameter ('s');
19    if (length $s > 1000_000) {    if (length $s > 1000_000) {
20      print STDOUT "Status: 400 Document Too Long\nContent-Type: text/plain; charset=us-ascii\n\nToo long";      print STDOUT "Status: 400 Document Too Long\nContent-Type: text/plain; charset=us-ascii\n\nToo long";
21      exit;      exit;
# Line 28  shift @mode if @mode and $mode[0] == ''; Line 26  shift @mode if @mode and $mode[0] == '';
26    my $time2;    my $time2;
27    
28    require Message::DOM::DOMImplementation;    require Message::DOM::DOMImplementation;
29    my $dom = Message::DOM::DOMImplementation->____new;    my $dom = Message::DOM::DOMImplementation->new;
30  #  $| = 1;  #  $| = 1;
31    my $doc;    my $doc;
32    my $el;    my $el;
33    
34    
35  if (@mode == 3 and $mode[0] eq 'html' and  if (@mode == 3 and $mode[0] eq 'html' and
36      ($mode[2] eq 'html' or $mode[2] eq 'test')) {      ($mode[2] eq 'html' or $mode[2] eq 'test' or $mode[2] eq 'xml')) {
37    print STDOUT "Content-Type: text/plain; charset=utf-8\n\n";    print STDOUT "Content-Type: text/plain; charset=utf-8\n\n";
38    
39    require Encode;    require Encode;
# Line 50  if (@mode == 3 and $mode[0] eq 'html' an Line 49  if (@mode == 3 and $mode[0] eq 'html' an
49    
50    my $onerror = sub {    my $onerror = sub {
51      my (%opt) = @_;      my (%opt) = @_;
52      print STDOUT "$opt{line},$opt{column},$opt{type}\n";      print STDOUT "$opt{line},$opt{column},$opt{type};$opt{level};$opt{value}\n";
53    };    };
54    
55    $doc = $dom->create_document;    $doc = $dom->create_document;
56      $doc->manakai_is_html (1);
57    $time1 = time;    $time1 = time;
58    if (length $mode[1]) {    if (length $mode[1]) {
59      $el = $doc->create_element_ns      $el = $doc->create_element_ns
# Line 70  if (@mode == 3 and $mode[0] eq 'html' an Line 70  if (@mode == 3 and $mode[0] eq 'html' an
70    my $out;    my $out;
71    if ($mode[2] eq 'html') {    if ($mode[2] eq 'html') {
72      $time1 = time;      $time1 = time;
73      $out = Whatpm::HTML->get_inner_html ($el || $doc);      $out = \( ($el or $doc)->inner_html );
74        $time2 = time;
75        $time{serialize_html} = $time2 - $time1;
76      } elsif ($mode[2] eq 'xml') {
77        $doc->manakai_is_html (0);
78        $time1 = time;
79        $out = \( ($el or $doc)->inner_html );
80        $time2 = time;
81        $time{serialize_xml} = $time2 - $time1;
82        $doc->manakai_is_html (1);
83      } else { # test
84        require Whatpm::HTML::Dumper;
85        $time1 = time;
86        $out = \Whatpm::HTML::Dumper::dumptree ($el || $doc);
87        $time2 = time;
88        $time{serialize_test} = $time2 - $time1;
89      }
90      print STDOUT Encode::encode ('utf-8', $$out);
91      print STDOUT "\n";
92    } elsif (@mode == 3 and $mode[0] eq 'xml1' and
93        ($mode[2] eq 'html' or $mode[2] eq 'test' or $mode[2] eq 'xml')) {
94      print STDOUT "Content-Type: text/plain; charset=utf-8\n\n";
95    
96      require Encode;
97      require Whatpm::XML::Parser;
98    
99      $time1 = time;
100      $s = Encode::decode ('utf-8', $s);
101      $time2 = time;
102      $time{decode} = $time2 - $time1;
103      
104      print STDOUT "#errors\n";
105    
106      my $onerror = sub {
107        my (%opt) = @_;
108        print STDOUT "$opt{line},$opt{column},$opt{type};$opt{level};$opt{value}\n";
109      };
110    
111      $doc = $dom->create_document;
112      $time1 = time;
113    ## TODO:
114      #if (length $mode[1]) {
115      #  $el = $doc->create_element_ns
116      #      ('http://www.w3.org/1999/xhtml', [undef, $mode[1]]);
117      #  #Whatpm::HTML->set_inner_html ($el, $s, $onerror);
118      #} else {
119        Whatpm::XML::Parser->parse_char_string ($s => $doc, $onerror);
120      #}
121      $time2 = time;
122      $time{parse_xml1} = $time2 - $time1;
123    
124      print "#document\n";
125    
126      my $out;
127      if ($mode[2] eq 'html') {
128        $doc->manakai_is_html (1);
129        $time1 = time;
130        $out = \( ($el or $doc)->inner_html );
131      $time2 = time;      $time2 = time;
132      $time{serialize_html} = $time2 - $time1;      $time{serialize_html} = $time2 - $time1;
133        $doc->manakai_is_html (0);
134      } elsif ($mode[2] eq 'xml') {
135        $time1 = time;
136        $out = \( ($el or $doc)->inner_html );
137        $time2 = time;
138        $time{serialize_xml} = $time2 - $time1;
139    } else { # test    } else { # test
140        require Whatpm::HTML::Dumper;
141      $time1 = time;      $time1 = time;
142      $out = test_serialize ($el || $doc);      $out = \Whatpm::HTML::Dumper::dumptree ($el || $doc);
143      $time2 = time;      $time2 = time;
144      $time{serialize_test} = $time2 - $time1;      $time{serialize_test} = $time2 - $time1;
145    }    }
146    print STDOUT Encode::encode ('utf-8', $$out);    print STDOUT Encode::encode ('utf-8', $$out);
147    print STDOUT "\n";    print STDOUT "\n";
148  } elsif (@mode == 3 and $mode[0] eq 'xhtml' and  } elsif (@mode == 3 and $mode[0] eq 'xhtml' and
149           ($mode[2] eq 'html' or $mode[2] eq 'test')) {           ($mode[2] eq 'html' or $mode[2] eq 'test' or $mode[2] eq 'xml')) {
150    print STDOUT "Content-Type: text/plain; charset=utf-8\n\n";    print STDOUT "Content-Type: text/plain; charset=utf-8\n\n";
151    
152    require Message::DOM::XMLParserTemp;    require Message::DOM::XMLParserTemp;
# Line 107  if (@mode == 3 and $mode[0] eq 'html' an Line 171  if (@mode == 3 and $mode[0] eq 'html' an
171    
172    my $out;    my $out;
173    if ($mode[2] eq 'html') {    if ($mode[2] eq 'html') {
174      ## TODO: Use XHTML serializer      $doc->manakai_is_html (0);
175      #$out = Whatpm::HTML->get_inner_html ($doc);      $time1 = time;
176        $out = \( $doc->inner_html ); ## TODO: $el case
177        $time2 = time;
178        $time{serialize_html} = $time2 - $time1;
179        $doc->manakai_is_html (1);
180      } elsif ($mode[2] eq 'xml') {
181        $time1 = time;
182        $out = \( $doc->inner_html ); ## TODO: $el case
183        $time2 = time;
184        $time{serialize_xml} = $time2 - $time1;
185    } else { # test    } else { # test
186        require Whatpm::HTML::Dumper;
187      $time1 = time;      $time1 = time;
188      $out = test_serialize ($doc);      $out = \Whatpm::HTML::Dumper::dumptree ($el || $doc);
189        $time2 = time;
190        $time{serialize_test} = $time2 - $time1;
191      }
192      print STDOUT Encode::encode ('utf-8', $$out);
193      print STDOUT "\n";
194    } elsif (@mode == 3 and $mode[0] eq 'swml' and $mode[1] eq '' and
195             ($mode[2] eq 'html' or $mode[2] eq 'test' or $mode[2] eq 'xml')) {
196      print STDOUT "Content-Type: text/plain; charset=utf-8\n\n";
197    
198      require Encode;
199      $time1 = time;
200      $s = Encode::decode ('utf-8', $s);
201      $time2 = time;
202      $time{decode} = $time2 - $time1;
203    
204      require Whatpm::SWML::Parser;
205      $doc = $dom->create_document;
206      my $p = Whatpm::SWML::Parser->new;
207      $p->parse_char_string ($s => $doc);
208    
209      print "#document\n";
210    
211      my $out;
212      if ($mode[2] eq 'html') {
213        $doc->manakai_is_html (0);
214        $time1 = time;
215        $out = \( $doc->inner_html );
216        $time2 = time;
217        $time{serialize_html} = $time2 - $time1;
218        $doc->manakai_is_html (1);
219      } elsif ($mode[2] eq 'xml') {
220        $time1 = time;
221        $out = \( $doc->inner_html );
222        $time2 = time;
223        $time{serialize_xml} = $time2 - $time1;
224      } else { # test
225        require Whatpm::HTML::Dumper;
226        $time1 = time;
227        $out = \Whatpm::HTML::Dumper::dumptree ($el || $doc);
228        $time2 = time;
229        $time{serialize_test} = $time2 - $time1;
230      }
231      print STDOUT Encode::encode ('utf-8', $$out);
232      print STDOUT "\n";
233    } elsif (@mode == 3 and $mode[0] eq 'h2h' and $mode[1] eq '' and
234             ($mode[2] eq 'html' or $mode[2] eq 'test' or $mode[2] eq 'xml')) {
235      print STDOUT "Content-Type: text/plain; charset=utf-8\n\n";
236    
237      require Encode;
238      $time1 = time;
239      $s = Encode::decode ('utf-8', $s);
240      $time2 = time;
241      $time{decode} = $time2 - $time1;
242    
243      require Whatpm::H2H;
244      $doc = $dom->create_document;
245      Whatpm::H2H->parse_string ($s => $doc);
246    
247      print "#document\n";
248    
249      my $out;
250      if ($mode[2] eq 'html') {
251        $doc->manakai_is_html (0);
252        $time1 = time;
253        $out = \( $doc->inner_html );
254        $time2 = time;
255        $time{serialize_html} = $time2 - $time1;
256        $doc->manakai_is_html (1);
257      } elsif ($mode[2] eq 'xml') {
258        $time1 = time;
259        $out = \( $doc->inner_html );
260        $time2 = time;
261        $time{serialize_xml} = $time2 - $time1;
262      } else { # test
263        require Whatpm::HTML::Dumper;
264        $time1 = time;
265        $out = \Whatpm::HTML::Dumper::dumptree ($el || $doc);
266      $time2 = time;      $time2 = time;
267      $time{serialize_test} = $time2 - $time1;      $time{serialize_test} = $time2 - $time1;
268    }    }
# Line 122  if (@mode == 3 and $mode[0] eq 'html' an Line 273  if (@mode == 3 and $mode[0] eq 'html' an
273    exit;    exit;
274  }  }
275    
276    if ($http->parameter ('dom5')) {    if ($http->get_parameter ('dom5')) {
277      require Whatpm::ContentChecker;      require Whatpm::ContentChecker;
278      my $onerror = sub {      my $onerror = sub {
279        my %opt = @_;        my %opt = @_;
# Line 140  if (@mode == 3 and $mode[0] eq 'html' an Line 291  if (@mode == 3 and $mode[0] eq 'html' an
291    }    }
292    
293    print STDOUT "#log\n";    print STDOUT "#log\n";
294    for (qw/decode parse parse_xml serialize_html serialize_xml serialize_test    for (qw/decode parse parse_xml parse_xml1
295              serialize_html serialize_xml serialize_test
296            check/) {            check/) {
297      next unless defined $time{$_};      next unless defined $time{$_};
298      print STDOUT {      print STDOUT {
299        decode => 'bytes->chars',        decode => 'bytes->chars',
300        parse => 'html5(chars)->dom5',        parse => 'html5(chars)->dom5',
301        parse_xml => 'xml1(chars)->dom5',        parse_xml => 'xml(chars)->dom5',
302          parse_xml1 => 'xml1(chars)->dom5',
303        serialize_html => 'dom5->html5(char)',        serialize_html => 'dom5->html5(char)',
304        serialize_xml => 'dom5->xml1(char)',        serialize_xml => 'dom5->xml1(char)',
305        serialize_test => 'dom5->test(char)',        serialize_test => 'dom5->test(char)',
# Line 159  if (@mode == 3 and $mode[0] eq 'html' an Line 312  if (@mode == 3 and $mode[0] eq 'html' an
312    
313  exit;  exit;
314    
 sub test_serialize ($) {  
   my $node = shift;  
   my $r = '';  
   
   my @node = map { [$_, ''] } @{$node->child_nodes};  
   while (@node) {  
     my $child = shift @node;  
     my $nt = $child->[0]->node_type;  
     if ($nt == $child->[0]->ELEMENT_NODE) {  
       $r .= '| ' . $child->[1] . '<' . $child->[0]->tag_name . ">\x0A"; ## ISSUE: case?  
   
       for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value] }  
                     @{$child->[0]->attributes}) {  
         $r .= '| ' . $child->[1] . '  ' . $attr->[0] . '="'; ## ISSUE: case?  
         $r .= $attr->[1] . '"' . "\x0A";  
       }  
         
       unshift @node,  
         map { [$_, $child->[1] . '  '] } @{$child->[0]->child_nodes};  
     } elsif ($nt == $child->[0]->TEXT_NODE) {  
       $r .= '| ' . $child->[1] . '"' . $child->[0]->data . '"' . "\x0A";  
     } elsif ($nt == $child->[0]->CDATA_SECTION_NODE) {  
       $r .= '| ' . $child->[1] . '<![CDATA[' . $child->[0]->data . "]]>\x0A";  
     } elsif ($nt == $child->[0]->COMMENT_NODE) {  
       $r .= '| ' . $child->[1] . '<!-- ' . $child->[0]->data . " -->\x0A";  
     } elsif ($nt == $child->[0]->DOCUMENT_TYPE_NODE) {  
       $r .= '| ' . $child->[1] . '<!DOCTYPE ' . $child->[0]->name . ">\x0A";  
     } elsif ($nt == $child->[0]->PROCESSING_INSTRUCTION_NODE) {  
       $r .= '| ' . $child->[1] . '<?' . $child->[0]->target . ' ' .  
           $child->[0]->data . "?>\x0A";  
     } else {  
       $r .= '| ' . $child->[1] . $child->[0]->node_type . "\x0A"; # error  
     }  
   }  
     
   return \$r;  
 } # test_serialize  
   
315  sub get_node_path ($) {  sub get_node_path ($) {
316    my $node = shift;    my $node = shift;
317    my @r;    my @r;
# Line 229  Wakaba <w@suika.fam.cx>. Line 344  Wakaba <w@suika.fam.cx>.
344    
345  =head1 LICENSE  =head1 LICENSE
346    
347  Copyright 2007 Wakaba <w@suika.fam.cx>  Copyright 2007-2008 Wakaba <w@suika.fam.cx>
348    
349  This library is free software; you can redistribute it  This library is free software; you can redistribute it
350  and/or modify it under the same terms as Perl itself.  and/or modify it under the same terms as Perl itself.

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.9

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24