/[pub]/test/html-whatpm/parser.cgi
Suika

Contents of /test/html-whatpm/parser.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (hide annotations) (download)
Tue May 1 11:05:04 2007 UTC (17 years, 6 months ago) by wakaba
Branch: MAIN
Changes since 1.2: +5 -5 lines
++ ChangeLog	1 May 2007 11:04:56 -0000
	* parser.cgi: Module names are updated.

	* parser-interface.en.html: Link to Whatpm introduction
	is added.

2007-05-01  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3    
4 wakaba 1.2 use lib qw[/home/httpd/html/www/markup/html/whatpm
5     /home/wakaba/public_html/-temp/wiki/lib];
6 wakaba 1.1
7     use SuikaWiki::Input::HTTP; ## TODO: Use some better CGI module
8    
9     my $http = SuikaWiki::Input::HTTP->new;
10    
11     ## TODO: _charset_
12    
13     my $mode = $http->meta_variable ('PATH_INFO');
14     ## TODO: decode unreserved characters
15    
16 wakaba 1.2 if ($mode eq '/html' or $mode eq '/test') {
17 wakaba 1.1 require Encode;
18 wakaba 1.3 require Whatpm::HTML;
19     require Whatpm::NanoDOM;
20 wakaba 1.1
21     my $s = $http->parameter ('s');
22     if (length $s > 1000_000) {
23     print STDOUT "Status: 400 Document Too Long\nContent-Type: text/plain; charset=us-ascii\n\nToo long";
24     exit;
25     }
26    
27     $s = Encode::decode ('utf-8', $s);
28    
29     print STDOUT "Content-Type: text/plain; charset=utf-8\n\n";
30    
31 wakaba 1.2 print STDOUT "#errors\n";
32    
33     my $onerror = sub {
34 wakaba 1.1 print STDOUT "0,0,", $_[0], "\n";
35 wakaba 1.2 };
36 wakaba 1.1
37 wakaba 1.3 my $doc = Whatpm::HTML->parse_string
38     ($s => Whatpm::NanoDOM::Document->new, $onerror);
39 wakaba 1.1
40 wakaba 1.2 print "#document\n";
41    
42     my $out;
43     if ($mode eq '/html') {
44 wakaba 1.3 $out = Whatpm::HTML->get_inner_html ($doc);
45 wakaba 1.2 } else { # test
46     $out = test_serialize ($doc);
47 wakaba 1.1 }
48 wakaba 1.2 print STDOUT Encode::encode ('utf-8', $$out);
49 wakaba 1.1 } else {
50     print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n404";
51     }
52 wakaba 1.2
53     exit;
54    
55     sub test_serialize ($) {
56     my $node = shift;
57     my $r = '';
58    
59     my @node = map { [$_, ''] } @{$node->child_nodes};
60     while (@node) {
61     my $child = shift @node;
62     my $nt = $child->[0]->node_type;
63     if ($nt == $child->[0]->ELEMENT_NODE) {
64     $r .= '| ' . $child->[1] . '<' . $child->[0]->tag_name . ">\x0A"; ## ISSUE: case?
65    
66     for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value] }
67     @{$child->[0]->attributes}) {
68     $r .= '| ' . $child->[1] . ' ' . $attr->[0] . '="'; ## ISSUE: case?
69     $r .= $attr->[1] . '"' . "\x0A";
70     }
71    
72     unshift @node,
73     map { [$_, $child->[1] . ' '] } @{$child->[0]->child_nodes};
74     } elsif ($nt == $child->[0]->TEXT_NODE) {
75     $r .= '| ' . $child->[1] . '"' . $child->[0]->data . '"' . "\x0A";
76     } elsif ($nt == $child->[0]->COMMENT_NODE) {
77     $r .= '| ' . $child->[1] . '<!-- ' . $child->[0]->data . " -->\x0A";
78     } elsif ($nt == $child->[0]->DOCUMENT_TYPE_NODE) {
79     $r .= '| ' . $child->[1] . '<!DOCTYPE ' . $child->[0]->name . ">\x0A";
80     } else {
81     $r .= '| ' . $child->[1] . $child->[0]->node_type . "\x0A"; # error
82     }
83     }
84    
85     return \$r;
86     } # test_serialize

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24