test/html-webhacc/cc.cgi

#!/usr/bin/perl
use strict;

use lib qw[/home/httpd/html/www/markup/html/whatpm
           /home/wakaba/work/manakai/lib
           /home/wakaba/public_html/-temp/wiki/lib];
use CGI::Carp qw[fatalsToBrowser];
use Scalar::Util qw[refaddr];

use SuikaWiki::Input::HTTP; ## TODO: Use some better CGI module

sub htescape ($) {
  my $s = $_[0];
  $s =~ s/&/&amp;/g;
  $s =~ s/</&lt;/g;
  $s =~ s/>/&gt;/g;
  $s =~ s/"/&quot;/g;
  $s =~ s!([\x00-\x09\x0B-\x1F\x7F-\x80])!sprintf '<var>U+%04X</var>', ord $1!ge;
  return $s;
} # htescape

my $http = SuikaWiki::Input::HTTP->new;

## TODO: _charset_

  my $input_format = $http->parameter ('i') || 'text/html';
  my $inner_html_element = $http->parameter ('e');
  my $input_uri = 'thismessage:/';

  my $s = $http->parameter ('s');
  if (length $s > 1000_000) {
    print STDOUT "Status: 400 Document Too Long\nContent-Type: text/plain; charset=us-ascii\n\nToo long";
    exit;
  }

  print STDOUT qq[Content-Type: text/html; charset=utf-8

<!DOCTYPE html>
<html lang="en">
<head>
<title>Web Document Conformance Checker (BETA)</title>
<link rel="stylesheet" href="/www/style/html/xhtml">
<style>
  q {
    white-space: pre;
    white-space: -moz-pre-wrap;
    white-space: pre-wrap;
  }
</style>
</head>
<body>
<h1>Web Document Conformance Checker (<em>beta</em>)</h1>

<dl>
<dt>Document URI</dt>
    <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input_uri]}">@{[htescape $input_uri]}</a>&gt;</code></dd>
<dt>Internet Media Type</dt>
    <dd><code class="MIME" lang="en">@{[htescape $input_format]}</code></dd>
]; # no </dl> yet

  require Message::DOM::DOMImplementation;
  my $dom = Message::DOM::DOMImplementation->____new;
  my $doc;
  my $el;

  if ($input_format eq 'text/html') {
    require Encode;
    require Whatpm::HTML;
    
    $s = Encode::decode ('utf-8', $s);

    print STDOUT qq[
<dt>Character Encoding</dt>
    <dd>(none)</dd>
</dl>

<div id="source-string" class="section">
];
    print_source_string (\$s);
    print STDOUT qq[
</div>

<div id="parse-errors" class="section">
<h2>Parse Errors</h2>

<ul>
];

  my $onerror = sub {
    my (%opt) = @_;
    if ($opt{column} > 0) {
      print STDOUT qq[<li><a href="#line-$opt{line}">Line $opt{line}</a> column $opt{column}: ];
    } else {
      $opt{line}--;
      print STDOUT qq[<li><a href="#line-$opt{line}">Line $opt{line}</a>: ];
    }
    print STDOUT qq[@{[htescape $opt{type}]}</li>\n];
  };

  $doc = $dom->create_document;
  if (defined $inner_html_element and length $inner_html_element) {
    $el = $doc->create_element_ns
        ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
    Whatpm::HTML->set_inner_html ($el, $s, $onerror);
  } else {
    Whatpm::HTML->parse_string ($s => $doc, $onerror);
  }

  print STDOUT qq[
</ul>
</div>
];
  } elsif ($input_format eq 'application/xhtml+xml') {
    require Message::DOM::XMLParserTemp;
    require Encode;
    
    my $t = Encode::decode ('utf-8', $s);

    print STDOUT qq[
<dt>Character Encoding</dt>
    <dd>(none)</dd>
</dl>

<div id="source-string" class="section">
];
    print_source_string (\$t);
    print STDOUT qq[
</div>

<div id="parse-errors" class="section">
<h2>Parse Errors</h2>

<ul>
];

  my $onerror = sub {
    my $err = shift;
    my $line = $err->location->line_number;
    print STDOUT qq[<li><a href="#line-$line">Line $line</a> column ];
    print STDOUT $err->location->column_number, ": ";
    print STDOUT htescape $err->text, "</li>\n";
    return 1;
  };

  open my $fh, '<', \$s;
  $doc = Message::DOM::XMLParserTemp->parse_byte_stream
      ($fh => $dom, $onerror, charset => 'utf-8');

    print STDOUT qq[
</ul>
</div>
];
  } else {
    print STDOUT qq[
</dl>

<p><em>Media type <code class="MIME" lang="en">@{[htescape $input_format]}</code> is not supported!</em></p>
];
  }


  if (defined $doc or defined $el) {
    print STDOUT qq[
<div id="document-tree" class="section">
<h2>Document Tree</h2>
];

    print_document_tree ($el || $doc);

    print STDOUT qq[
</div>

<div id="document-errors" class="section">
<h2>Document Errors</h2>

<ul>
];

    require Whatpm::ContentChecker;
    my $onerror = sub {
      my %opt = @_;
      print STDOUT qq[<li><a href="#node-@{[refaddr $opt{node}]}">],
          htescape get_node_path ($opt{node}),
          "</a>: ", htescape $opt{type}, "</li>\n";
    };

    if ($el) {
      Whatpm::ContentChecker->check_element ($el, $onerror);
    } else {
      Whatpm::ContentChecker->check_document ($doc, $onerror);
    }

    print STDOUT qq[
</ul>
</div>
];
  }

  ## TODO: Show result
  print STDOUT qq[
</body>
</html>
];

exit;

sub print_source_string ($) {
  my $s = $_[0];
  my $i = 1;
  print STDOUT qq[<ol lang="">\n];
  while ($$s =~ /\G([^\x0A]*?)\x0D?\x0A/gc) {
    print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
    $i++;
  }
  if ($$s =~ /\G([^\x0A]+)/gc) {
    print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
  }
  print STDOUT "</ol>";
} # print_input_string

sub print_document_tree ($) {
  my $node = shift;
  my $r = '<ol class="xoxo">';

  my @node = ($node);
  while (@node) {
    my $child = shift @node;
    unless (ref $child) {
      $r .= $child;
      next;
    }

    my $node_id = 'node-'.refaddr $child;
    my $nt = $child->node_type;
    if ($nt == $child->ELEMENT_NODE) {
      $r .= qq'<li id="$node_id"><code>' . htescape ($child->tag_name) .
          '</code>'; ## ISSUE: case

      if ($child->has_attributes) {
        $r .= '<ul class="attributes">';
        for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, 'node-'.refaddr $_] }
                      @{$child->attributes}) {
          $r .= qq'<li id="$attr->[2]"><code>' . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
          $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
        }
        $r .= '</ul>';
      }

      if ($node->has_child_nodes) {
        $r .= '<ol class="children">';
        unshift @node, @{$child->child_nodes}, '</ol>';
      }
    } elsif ($nt == $child->TEXT_NODE) {
      $r .= qq'<li id="$node_id"><q>' . htescape ($child->data) . '</q></li>';
    } elsif ($nt == $child->CDATA_SECTION_NODE) {
      $r .= qq'<li id="$node_id"><code>&lt;[CDATA[</code><q>' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
    } elsif ($nt == $child->COMMENT_NODE) {
      $r .= qq'<li id="$node_id"><code>&lt;!--</code><q>' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
    } elsif ($nt == $child->DOCUMENT_NODE) {
      $r .= qq'<li id="$node_id">Document</li>';
      if ($child->has_child_nodes) {
        $r .= '<ol>';
        unshift @node, @{$child->child_nodes}, '</ol>';
      }
    } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
      $r .= qq'<li id="$node_id"><code>&lt;!DOCTYPE&gt;</code><ul>';
      $r .= '<li>Name = <q>@{[htescape ($child->name)]}</q></li>';
      $r .= '<li>Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>';
      $r .= '<li>System identifier = <q>@{[htescape ($child->system_id)]}</q></li>';
      $r .= '</ul></li>';
    } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
      $r .= qq'<li id="$node_id"><code>&lt;?@{[htescape ($child->target)]}?&gt;</code>';
      $r .= '<ul><li>@{[htescape ($child->data)]}</li></ul></li>';
    } else {
      $r .= qq'<li id="$node_id">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
    }
  }

  $r .= '</ol>';
  print STDOUT $r;
} # print_document_tree

sub get_node_path ($) {
  my $node = shift;
  my @r;
  while (defined $node) {
    my $rs;
    if ($node->node_type == 1) {
      $rs = $node->manakai_local_name;
      $node = $node->parent_node;
    } elsif ($node->node_type == 2) {
      $rs = '@' . $node->manakai_local_name;
      $node = $node->owner_element;
    } elsif ($node->node_type == 3) {
      $rs = '"' . $node->data . '"';
      $node = $node->parent_node;
    } elsif ($node->node_type == 9) {
      $rs = '';
      $node = $node->parent_node;
    } else {
      $rs = '#' . $node->node_type;
      $node = $node->parent_node;
    }
    unshift @r, $rs;
  }
  return join '/', @r;
} # get_node_path

=head1 AUTHOR

Wakaba <w@suika.fam.cx>.

=head1 LICENSE

Copyright 2007 Wakaba <w@suika.fam.cx>

This library is free software; you can redistribute it
and/or modify it under the same terms as Perl itself.

=cut

## $Date: 2007/06/27 11:08:03 $
1	#!/usr/bin/perl
2	use strict;
3
4	use lib qw[/home/httpd/html/www/markup/html/whatpm
5	/home/wakaba/work/manakai/lib
6	/home/wakaba/public_html/-temp/wiki/lib];
7	use CGI::Carp qw[fatalsToBrowser];
8	use Scalar::Util qw[refaddr];
9
10	use SuikaWiki::Input::HTTP; ## TODO: Use some better CGI module
11
12	sub htescape ($) {
13	my $s = $_[0];
14	$s =~ s/&/&/g;
15	$s =~ s/</</g;
16	$s =~ s/>/>/g;
17	$s =~ s/"/"/g;
18	$s =~ s!([\x00-\x09\x0B-\x1F\x7F-\x80])!sprintf '<var>U+%04X</var>', ord $1!ge;
19	return $s;
20	} # htescape
21
22	my $http = SuikaWiki::Input::HTTP->new;
23
24	## TODO: _charset_
25
26	my $input_format = $http->parameter ('i') \|\| 'text/html';
27	my $inner_html_element = $http->parameter ('e');
28	my $input_uri = 'thismessage:/';
29
30	my $s = $http->parameter ('s');
31	if (length $s > 1000_000) {
32	print STDOUT "Status: 400 Document Too Long\nContent-Type: text/plain; charset=us-ascii\n\nToo long";
33	exit;
34	}
35
36	print STDOUT qq[Content-Type: text/html; charset=utf-8
37
38	<!DOCTYPE html>
39	<html lang="en">
40	<head>
41	<title>Web Document Conformance Checker (BETA)</title>
42	<link rel="stylesheet" href="/www/style/html/xhtml">
43	<style>
44	q {
45	white-space: pre;
46	white-space: -moz-pre-wrap;
47	white-space: pre-wrap;
48	}
49	</style>
50	</head>
51	<body>
52	<h1>Web Document Conformance Checker (<em>beta</em>)</h1>
53
54	<dl>
55	<dt>Document URI</dt>
56	<dd><code class="URI" lang=""><<a href="@{[htescape $input_uri]}">@{[htescape $input_uri]}</a>></code></dd>
57	<dt>Internet Media Type</dt>
58	<dd><code class="MIME" lang="en">@{[htescape $input_format]}</code></dd>
59	]; # no </dl> yet
60
61	require Message::DOM::DOMImplementation;
62	my $dom = Message::DOM::DOMImplementation->____new;
63	my $doc;
64	my $el;
65
66	if ($input_format eq 'text/html') {
67	require Encode;
68	require Whatpm::HTML;
69
70	$s = Encode::decode ('utf-8', $s);
71
72	print STDOUT qq[
73	<dt>Character Encoding</dt>
74	<dd>(none)</dd>
75	</dl>
76
77	<div id="source-string" class="section">
78	];
79	print_source_string (\$s);
80	print STDOUT qq[
81	</div>
82
83	<div id="parse-errors" class="section">
84	<h2>Parse Errors</h2>
85
86	<ul>
87	];
88
89	my $onerror = sub {
90	my (%opt) = @_;
91	if ($opt{column} > 0) {
92	print STDOUT qq[<li><a href="#line-$opt{line}">Line $opt{line}</a> column $opt{column}: ];
93	} else {
94	$opt{line}--;
95	print STDOUT qq[<li><a href="#line-$opt{line}">Line $opt{line}</a>: ];
96	}
97	print STDOUT qq[@{[htescape $opt{type}]}</li>\n];
98	};
99
100	$doc = $dom->create_document;
101	if (defined $inner_html_element and length $inner_html_element) {
102	$el = $doc->create_element_ns
103	('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
104	Whatpm::HTML->set_inner_html ($el, $s, $onerror);
105	} else {
106	Whatpm::HTML->parse_string ($s => $doc, $onerror);
107	}
108
109	print STDOUT qq[
110	</ul>
111	</div>
112	];
113	} elsif ($input_format eq 'application/xhtml+xml') {
114	require Message::DOM::XMLParserTemp;
115	require Encode;
116
117	my $t = Encode::decode ('utf-8', $s);
118
119	print STDOUT qq[
120	<dt>Character Encoding</dt>
121	<dd>(none)</dd>
122	</dl>
123
124	<div id="source-string" class="section">
125	];
126	print_source_string (\$t);
127	print STDOUT qq[
128	</div>
129
130	<div id="parse-errors" class="section">
131	<h2>Parse Errors</h2>
132
133	<ul>
134	];
135
136	my $onerror = sub {
137	my $err = shift;
138	my $line = $err->location->line_number;
139	print STDOUT qq[<li><a href="#line-$line">Line $line</a> column ];
140	print STDOUT $err->location->column_number, ": ";
141	print STDOUT htescape $err->text, "</li>\n";
142	return 1;
143	};
144
145	open my $fh, '<', \$s;
146	$doc = Message::DOM::XMLParserTemp->parse_byte_stream
147	($fh => $dom, $onerror, charset => 'utf-8');
148
149	print STDOUT qq[
150	</ul>
151	</div>
152	];
153	} else {
154	print STDOUT qq[
155	</dl>
156
157	<p><em>Media type <code class="MIME" lang="en">@{[htescape $input_format]}</code> is not supported!</em></p>
158	];
159	}
160
161
162	if (defined $doc or defined $el) {
163	print STDOUT qq[
164	<div id="document-tree" class="section">
165	<h2>Document Tree</h2>
166	];
167
168	print_document_tree ($el \|\| $doc);
169
170	print STDOUT qq[
171	</div>
172
173	<div id="document-errors" class="section">
174	<h2>Document Errors</h2>
175
176	<ul>
177	];
178
179	require Whatpm::ContentChecker;
180	my $onerror = sub {
181	my %opt = @_;
182	print STDOUT qq[<li><a href="#node-@{[refaddr $opt{node}]}">],
183	htescape get_node_path ($opt{node}),
184	"</a>: ", htescape $opt{type}, "</li>\n";
185	};
186
187	if ($el) {
188	Whatpm::ContentChecker->check_element ($el, $onerror);
189	} else {
190	Whatpm::ContentChecker->check_document ($doc, $onerror);
191	}
192
193	print STDOUT qq[
194	</ul>
195	</div>
196	];
197	}
198
199	## TODO: Show result
200	print STDOUT qq[
201	</body>
202	</html>
203	];
204
205	exit;
206
207	sub print_source_string ($) {
208	my $s = $_[0];
209	my $i = 1;
210	print STDOUT qq[<ol lang="">\n];
211	while ($$s =~ /\G([^\x0A]*?)\x0D?\x0A/gc) {
212	print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
213	$i++;
214	}
215	if ($$s =~ /\G([^\x0A]+)/gc) {
216	print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
217	}
218	print STDOUT "</ol>";
219	} # print_input_string
220
221	sub print_document_tree ($) {
222	my $node = shift;
223	my $r = '<ol class="xoxo">';
224
225	my @node = ($node);
226	while (@node) {
227	my $child = shift @node;
228	unless (ref $child) {
229	$r .= $child;
230	next;
231	}
232
233	my $node_id = 'node-'.refaddr $child;
234	my $nt = $child->node_type;
235	if ($nt == $child->ELEMENT_NODE) {
236	$r .= qq'<li id="$node_id"><code>' . htescape ($child->tag_name) .
237	'</code>'; ## ISSUE: case
238
239	if ($child->has_attributes) {
240	$r .= '<ul class="attributes">';
241	for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, 'node-'.refaddr $_] }
242	@{$child->attributes}) {
243	$r .= qq'<li id="$attr->[2]"><code>' . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
244	$r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
245	}
246	$r .= '</ul>';
247	}
248
249	if ($node->has_child_nodes) {
250	$r .= '<ol class="children">';
251	unshift @node, @{$child->child_nodes}, '</ol>';
252	}
253	} elsif ($nt == $child->TEXT_NODE) {
254	$r .= qq'<li id="$node_id"><q>' . htescape ($child->data) . '</q></li>';
255	} elsif ($nt == $child->CDATA_SECTION_NODE) {
256	$r .= qq'<li id="$node_id"><code><[CDATA[</code><q>' . htescape ($child->data) . '</q><code>]]></code></li>';
257	} elsif ($nt == $child->COMMENT_NODE) {
258	$r .= qq'<li id="$node_id"><code><!--</code><q>' . htescape ($child->data) . '</q><code>--></code></li>';
259	} elsif ($nt == $child->DOCUMENT_NODE) {
260	$r .= qq'<li id="$node_id">Document</li>';
261	if ($child->has_child_nodes) {
262	$r .= '<ol>';
263	unshift @node, @{$child->child_nodes}, '</ol>';
264	}
265	} elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
266	$r .= qq'<li id="$node_id"><code><!DOCTYPE></code><ul>';
267	$r .= '<li>Name = <q>@{[htescape ($child->name)]}</q></li>';
268	$r .= '<li>Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>';
269	$r .= '<li>System identifier = <q>@{[htescape ($child->system_id)]}</q></li>';
270	$r .= '</ul></li>';
271	} elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
272	$r .= qq'<li id="$node_id"><code><?@{[htescape ($child->target)]}?></code>';
273	$r .= '<ul><li>@{[htescape ($child->data)]}</li></ul></li>';
274	} else {
275	$r .= qq'<li id="$node_id">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
276	}
277	}
278
279	$r .= '</ol>';
280	print STDOUT $r;
281	} # print_document_tree
282
283	sub get_node_path ($) {
284	my $node = shift;
285	my @r;
286	while (defined $node) {
287	my $rs;
288	if ($node->node_type == 1) {
289	$rs = $node->manakai_local_name;
290	$node = $node->parent_node;
291	} elsif ($node->node_type == 2) {
292	$rs = '@' . $node->manakai_local_name;
293	$node = $node->owner_element;
294	} elsif ($node->node_type == 3) {
295	$rs = '"' . $node->data . '"';
296	$node = $node->parent_node;
297	} elsif ($node->node_type == 9) {
298	$rs = '';
299	$node = $node->parent_node;
300	} else {
301	$rs = '#' . $node->node_type;
302	$node = $node->parent_node;
303	}
304	unshift @r, $rs;
305	}
306	return join '/', @r;
307	} # get_node_path
308
309	=head1 AUTHOR
310
311	Wakaba <w@suika.fam.cx>.
312
313	=head1 LICENSE
314
315	Copyright 2007 Wakaba <w@suika.fam.cx>
316
317	This library is free software; you can redistribute it
318	and/or modify it under the same terms as Perl itself.
319
320	=cut
321
322	## $Date: 2007/06/27 11:08:03 $