/[pub]/test/html-webhacc/cc.cgi
Suika

Diff of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.20 by wakaba, Mon Sep 10 12:09:34 2007 UTC revision 1.53 by wakaba, Sun Jul 20 14:58:24 2008 UTC
# Line 1  Line 1 
1  #!/usr/bin/perl  #!/usr/bin/perl
2  use strict;  use strict;
3    use utf8;
4    
5  use lib qw[/home/httpd/html/www/markup/html/whatpm  use lib qw[/home/httpd/html/www/markup/html/whatpm
6             /home/wakaba/work/manakai2/lib];             /home/wakaba/work/manakai2/lib];
7  use CGI::Carp qw[fatalsToBrowser];  use CGI::Carp qw[fatalsToBrowser];
8  use Scalar::Util qw[refaddr];  use Scalar::Util qw[refaddr];
 use Time::HiRes qw/time/;  
9    
10  sub htescape ($) {    require WebHACC::Input;
11    my $s = $_[0];    require WebHACC::Result;
12    $s =~ s/&/&/g;    require WebHACC::Output;
   $s =~ s/</&lt;/g;  
   $s =~ s/>/&gt;/g;  
   $s =~ s/"/&quot;/g;  
   $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{  
     sprintf '<var>U+%04X</var>', ord $1;  
   }ge;  
   return $s;  
 } # htescape  
13    
14    my $out;
15    
16      require Message::DOM::DOMImplementation;
17      my $dom = Message::DOM::DOMImplementation->new;
18    {
19    use Message::CGI::HTTP;    use Message::CGI::HTTP;
20    my $http = Message::CGI::HTTP->new;    my $http = Message::CGI::HTTP->new;
21    
# Line 26  sub htescape ($) { Line 23  sub htescape ($) {
23      print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";      print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
24      exit;      exit;
25    }    }
26      
   binmode STDOUT, ':utf8';  
   $| = 1;  
   
   require Message::DOM::DOMImplementation;  
   my $dom = Message::DOM::DOMImplementation->new;  
   
27    load_text_catalog ('en'); ## TODO: conneg    load_text_catalog ('en'); ## TODO: conneg
28    
29    my @nav;    $out = WebHACC::Output->new;
30    print STDOUT qq[Content-Type: text/html; charset=utf-8    $out->handle (*STDOUT);
31      $out->set_utf8;
32      $out->set_flush;
33      $out->html (qq[Content-Type: text/html; charset=utf-8
34    
35  <!DOCTYPE html>  <!DOCTYPE html>
36  <html lang="en">  <html lang="en">
# Line 47  sub htescape ($) { Line 41  sub htescape ($) {
41  <body>  <body>
42  <h1><a href="../cc-interface">Web Document Conformance Checker</a>  <h1><a href="../cc-interface">Web Document Conformance Checker</a>
43  (<em>beta</em>)</h1>  (<em>beta</em>)</h1>
44  ];  ]);
45    
   $| = 0;  
46    my $input = get_input_document ($http, $dom);    my $input = get_input_document ($http, $dom);
47    my $inner_html_element = $http->get_parameter ('e');    $out->input ($input);
48    my $char_length = 0;    $out->unset_flush;
   my %time;  
49    
50    print qq[    my $char_length = 0;
 <div id="document-info" class="section">  
 <dl>  
 <dt>Request URI</dt>  
     <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>  
 <dt>Document URI</dt>  
     <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}">@{[htescape $input->{uri}]}</a>&gt;</code></dd>  
 ]; # no </dl> yet  
   push @nav, ['#document-info' => 'Information'];  
   
 if (defined $input->{s}) {  
   $char_length = length $input->{s};  
51    
52    print STDOUT qq[    $out->start_section (id => 'document-info', title => 'Information');
53  <dt>Base URI</dt>    $out->html (qq[<dl>
54      <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>  <dt>Request URL</dt>
55  <dt>Internet Media Type</dt>      <dd>]);
56      <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>    $out->url ($input->{request_uri});
57      @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : '']}</dd>    $out->html (q[<dt>Document URL<!-- HTML5 document's address? -->
58  <dt>Character Encoding</dt>      <dd>]);
59      <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}    $out->url ($input->{uri}, id => 'anchor-document-url');
60      @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>    $out->html (q[
61        <script>
62          document.title = '<'
63              + document.getElementById ('anchor-document-url').href + '> \\u2014 '
64              + document.title;
65        </script>]);
66      ## NOTE: no </dl> yet
67    
68      if (defined $input->{s}) {
69        $char_length = length $input->{s};
70    
71        $out->html (qq[<dt>Base URI<dd>]);
72        $out->url ($input->{base_uri});
73        $out->html (qq[<dt>Internet Media Type</dt>
74        <dd><code class="MIME" lang="en">]);
75        $out->text ($input->{media_type});
76        $out->html (qq[</code> ]);
77        if ($input->{media_type_overridden}) {
78          $out->html ('<em>(overridden)</em>');
79        } elsif (defined $input->{official_type}) {
80          if ($input->{media_type} eq $input->{official_type}) {
81            #
82          } else {
83            $out->html ('<em>(sniffed; official type is: <code class=MIME lang=en>');
84            $out->text ($input->{official_type});
85            $out->html ('</code>)');
86          }
87        } else {
88          $out->html ('<em>(sniffed)</em>');
89        }
90        $out->html (q[<dt>Character Encoding<dd>]);
91        if (defined $input->{charset}) {
92          $out->html ('<code class="charset" lang="en">');
93          $out->text ($input->{charset});
94          $out->html ('</code>');
95        } else {
96          $out->text ('(none)');
97        }
98        $out->html (' <em>overridden</em>') if $input->{charset_overridden};
99        $out->html (qq[
100  <dt>Length</dt>  <dt>Length</dt>
101      <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>      <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
102  </dl>  </dl>
 </div>  
 ];  
   
   my $result = {conforming_min => 1, conforming_max => 1};  
   print_http_header_section ($input, $result);  
   
   my $doc;  
   my $el;  
103    
104    if ($input->{media_type} eq 'text/html') {  <script src="../cc-script.js"></script>
105      ($doc, $el) = print_syntax_error_html_section ($input, $result);  ]);
106      print_source_string_section (\($input->{s}), $input->{charset});      $out->end_section;
107    } elsif ({  
108              'text/xml' => 1,      my $result = WebHACC::Result->new;
109              'application/atom+xml' => 1,      $result->{conforming_min} = 1;
110              'application/rss+xml' => 1,      $result->{conforming_max} = 1;
111              'application/svg+xml' => 1,      check_and_print ($input => $result => $out);
112              'application/xhtml+xml' => 1,      print_result_section ($result);
             'application/xml' => 1,  
            }->{$input->{media_type}}) {  
     ($doc, $el) = print_syntax_error_xml_section ($input, $result);  
     print_source_string_section (\($input->{s}), $doc->input_encoding);  
113    } else {    } else {
114      ## TODO: Change HTTP status code??      $out->html ('</dl>');
115      print_result_unknown_type_section ($input);      $out->end_section;
116        print_result_input_error_section ($input);
117    }    }
118    
119    if (defined $doc or defined $el) {    $out->nav_list;
     print_structure_dump_section ($doc, $el);  
     my $elements = print_structure_error_section ($doc, $el, $result);  
     print_table_section ($elements->{table}) if @{$elements->{table}};  
     print_id_section ($elements->{id}) if keys %{$elements->{id}};  
     print_term_section ($elements->{term}) if keys %{$elements->{term}};  
     print_class_section ($elements->{class}) if keys %{$elements->{class}};  
   }  
120    
121    print_result_section ($result);    exit;
 } else {  
   print STDOUT qq[</dl></div>];  
   print_result_input_error_section ($input);  
122  }  }
123    
   print STDOUT qq[  
 <ul class="navigation" id="nav-items">  
 ];  
   for (@nav) {  
     print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];  
   }  
   print STDOUT qq[  
 </ul>  
 </body>  
 </html>  
 ];  
   
   for (qw/decode parse parse_xml check/) {  
     next unless defined $time{$_};  
     open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";  
     print $file $char_length, "\t", $time{$_}, "\n";  
   }  
   
 exit;  
   
124  sub add_error ($$$) {  sub add_error ($$$) {
125    my ($layer, $err, $result) = @_;    my ($layer, $err, $result) = @_;
126    if (defined $err->{level}) {    if (defined $err->{level}) {
# Line 151  sub add_error ($$$) { Line 130  sub add_error ($$$) {
130        $result->{conforming_min} = 0;        $result->{conforming_min} = 0;
131      } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {      } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
132        $result->{$layer}->{warning}++;        $result->{$layer}->{warning}++;
133      } elsif ($err->{level} eq 'unsupported') {      } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
134        $result->{$layer}->{unsupported}++;        $result->{$layer}->{unsupported}++;
135        $result->{unsupported} = 1;        $result->{unsupported} = 1;
136        } elsif ($err->{level} eq 'i') {
137          #
138      } else {      } else {
139        $result->{$layer}->{must}++;        $result->{$layer}->{must}++;
140        $result->{$layer}->{score_max} -= 2;        $result->{$layer}->{score_max} -= 2;
# Line 170  sub add_error ($$$) { Line 151  sub add_error ($$$) {
151    }    }
152  } # add_error  } # add_error
153    
154    sub check_and_print ($$$) {
155      my ($input, $result, $out) = @_;
156      my $original_input = $out->input;
157      $out->input ($input);
158    
159      print_http_header_section ($input, $result);
160    
161      my @subdoc;
162    
163      my $checker_class = {
164        'text/cache-manifest' => 'WebHACC::Language::CacheManifest',
165        'text/css' => 'WebHACC::Language::CSS',
166        'text/html' => 'WebHACC::Language::HTML',
167        'text/x-webidl' => 'WebHACC::Language::WebIDL',
168    
169        'text/xml' => 'WebHACC::Language::XML',
170        'application/atom+xml' => 'WebHACC::Language::XML',
171        'application/rss+xml' => 'WebHACC::Language::XML',
172        'image/svg+xml' => 'WebHACC::Language::XML',
173        'application/xhtml+xml' => 'WebHACC::Language::XML',
174        'application/xml' => 'WebHACC::Language::XML',
175        ## TODO: Should we make all XML MIME Types fall
176        ## into this category?
177    
178        ## NOTE: This type has different model from normal XML types.
179        'application/rdf+xml' => 'WebHACC::Language::XML',
180      }->{$input->{media_type}} || 'WebHACC::Language::Default';
181    
182      eval qq{ require $checker_class } or die "$0: Loading $checker_class: $@";
183      my $checker = $checker_class->new;
184      $checker->input ($input);
185      $checker->output ($out);
186      $checker->result ($result);
187    
188      ## TODO: A cache manifest MUST be text/cache-manifest
189      ## TODO: WebIDL media type "text/x-webidl"
190    
191      $checker->generate_syntax_error_section;
192      $checker->generate_source_string_section;
193    
194      $checker->onsubdoc (sub {
195        push @subdoc, shift;
196      });
197    
198      $checker->generate_structure_dump_section;
199      $checker->generate_structure_error_section;
200      $checker->generate_additional_sections;
201    
202    =pod
203    
204      if (defined $doc or defined $el) {
205    
206        print_table_section ($input, $elements->{table}) if @{$elements->{table}};
207        print_listing_section ({
208          id => 'identifiers', label => 'IDs', heading => 'Identifiers',
209        }, $input, $elements->{id}) if keys %{$elements->{id}};
210        print_listing_section ({
211          id => 'terms', label => 'Terms', heading => 'Terms',
212        }, $input, $elements->{term}) if keys %{$elements->{term}};
213        print_listing_section ({
214          id => 'classes', label => 'Classes', heading => 'Classes',
215        }, $input, $elements->{class}) if keys %{$elements->{class}};
216      
217        print_rdf_section ($input, $elements->{rdf}) if @{$elements->{rdf}};
218      }
219    
220    =cut
221    
222      my $id_prefix = 0;
223      for my $_subinput (@subdoc) {
224        my $subinput = WebHACC::Input->new;
225        $subinput->{$_} = $_subinput->{$_} for keys %$_subinput;
226        $subinput->id_prefix ('subdoc-' . ++$id_prefix);
227        $subinput->nested (1);
228        $subinput->{base_uri} = $subinput->{container_node}->base_uri
229            unless defined $subinput->{base_uri};
230        my $ebaseuri = htescape ($subinput->{base_uri});
231        $out->start_section (id => $subinput->id_prefix,
232                             title => qq[Subdocument #$id_prefix]);
233        print STDOUT qq[
234          <dl>
235          <dt>Internet Media Type</dt>
236            <dd><code class="MIME" lang="en">@{[htescape $subinput->{media_type}]}</code>
237          <dt>Container Node</dt>
238            <dd>@{[get_node_link ($input, $subinput->{container_node})]}</dd>
239          <dt>Base <abbr title="Uniform Resource Identifiers">URI</abbr></dt>
240            <dd><code class=URI>&lt;<a href="$ebaseuri">$ebaseuri</a>></code></dd>
241          </dl>];              
242    
243        $subinput->{id_prefix} .= '-';
244        check_and_print ($subinput => $result => $out);
245    
246        $out->end_section;
247      }
248    
249      $out->input ($original_input);
250    } # check_and_print
251    
252  sub print_http_header_section ($$) {  sub print_http_header_section ($$) {
253    my ($input, $result) = @_;    my ($input, $result) = @_;
254    return unless defined $input->{header_status_code} or    return unless defined $input->{header_status_code} or
255        defined $input->{header_status_text} or        defined $input->{header_status_text} or
256        @{$input->{header_field}};        @{$input->{header_field} or []};
257        
258    push @nav, ['#source-header' => 'HTTP Header'];    $out->start_section (id => 'source-header', title => 'HTTP Header');
259    print STDOUT qq[<div id="source-header" class="section">    print STDOUT qq[<p><strong>Note</strong>: Due to the limitation of the
 <h2>HTTP Header</h2>  
   
 <p><strong>Note</strong>: Due to the limitation of the  
260  network library in use, the content of this section might  network library in use, the content of this section might
261  not be the real header.</p>  not be the real header.</p>
262    
# Line 189  not be the real header.</p> Line 265  not be the real header.</p>
265    
266    if (defined $input->{header_status_code}) {    if (defined $input->{header_status_code}) {
267      print STDOUT qq[<tr><th scope="row">Status code</th>];      print STDOUT qq[<tr><th scope="row">Status code</th>];
268      print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];      print STDOUT qq[<td>];
269        $out->code ($input->{header_status_code});
270    }    }
271    if (defined $input->{header_status_text}) {    if (defined $input->{header_status_text}) {
272      print STDOUT qq[<tr><th scope="row">Status text</th>];      print STDOUT qq[<tr><th scope="row">Status text</th>];
273      print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];      print STDOUT qq[<td>];
274        $out->code ($input->{header_status_text});
275    }    }
276        
277    for (@{$input->{header_field}}) {    for (@{$input->{header_field}}) {
278      print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];      print STDOUT qq[<tr><th scope="row">];
279      print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];      $out->code ($_->[0]);
280    }      print STDOUT qq[<td>];
281        $out->code ($_->[1]);
   print STDOUT qq[</tbody></table></div>];  
 } # print_http_header_section  
   
 sub print_syntax_error_html_section ($$) {  
   my ($input, $result) = @_;  
     
   require Encode;  
   require Whatpm::HTML;  
   
   $input->{charset} ||= 'ISO-8859-1'; ## TODO: for now.  
     
   my $time1 = time;  
   my $t = Encode::decode ($input->{charset}, $input->{s});  
   $time{decode} = time - $time1;  
   
   print STDOUT qq[  
 <div id="parse-errors" class="section">  
 <h2>Parse Errors</h2>  
   
 <dl>];  
   push @nav, ['#parse-errors' => 'Parse Error'];  
   
   my $onerror = sub {  
     my (%opt) = @_;  
     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});  
     if ($opt{column} > 0) {  
       print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a> column $opt{column}</dt>\n];  
     } else {  
       $opt{line} = $opt{line} - 1 || 1;  
       print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a></dt>\n];  
     }  
     $type =~ tr/ /-/;  
     $type =~ s/\|/%7C/g;  
     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];  
     print STDOUT qq[<dd class="$cls">$msg</dd>\n];  
   
     add_error ('syntax', \%opt => $result);  
   };  
   
   my $doc = $dom->create_document;  
   my $el;  
   $time1 = time;  
   if (defined $inner_html_element and length $inner_html_element) {  
     $el = $doc->create_element_ns  
         ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);  
     Whatpm::HTML->set_inner_html ($el, $t, $onerror);  
   } else {  
     Whatpm::HTML->parse_string ($t => $doc, $onerror);  
282    }    }
   $time{parse} = time - $time1;  
   
   print STDOUT qq[</dl></div>];  
283    
284    return ($doc, $el);    print STDOUT qq[</tbody></table>];
 } # print_syntax_error_html_section  
285    
286  sub print_syntax_error_xml_section ($$) {    $out->end_section;
287    my ($input, $result) = @_;  } # print_http_header_section
     
   require Message::DOM::XMLParserTemp;  
     
   print STDOUT qq[  
 <div id="parse-errors" class="section">  
 <h2>Parse Errors</h2>  
   
 <dl>];  
   push @nav, ['#parse-errors' => 'Parse Error'];  
   
   my $onerror = sub {  
     my $err = shift;  
     my $line = $err->location->line_number;  
     print STDOUT qq[<dt><a href="#line-$line">Line $line</a> column ];  
     print STDOUT $err->location->column_number, "</dt><dd>";  
     print STDOUT htescape $err->text, "</dd>\n";  
   
     add_error ('syntax', {type => $err->text,  
                 level => [  
                           $err->SEVERITY_FATAL_ERROR => 'm',  
                           $err->SEVERITY_ERROR => 'm',  
                           $err->SEVERITY_WARNING => 's',  
                          ]->[$err->severity]} => $result);  
   
     return 1;  
   };  
   
   my $time1 = time;  
   open my $fh, '<', \($input->{s});  
   my $doc = Message::DOM::XMLParserTemp->parse_byte_stream  
       ($fh => $dom, $onerror, charset => $input->{charset});  
   $time{parse_xml} = time - $time1;  
   
   print STDOUT qq[</dl></div>];  
   
   return ($doc, undef);  
 } # print_syntax_error_xml_section  
   
 sub print_source_string_section ($$) {  
   require Encode;  
   my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name  
   return unless $enc;  
   
   my $s = \($enc->decode (${$_[0]}));  
   my $i = 1;                              
   push @nav, ['#source-string' => 'Source'];  
   print STDOUT qq[<div id="source-string" class="section">  
 <h2>Document Source</h2>  
 <ol lang="">\n];  
   if (length $$s) {  
     while ($$s =~ /\G([^\x0A]*?)\x0D?\x0A/gc) {  
       print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";  
       $i++;  
     }  
     if ($$s =~ /\G([^\x0A]+)/gc) {  
       print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";  
     }  
   } else {  
     print STDOUT q[<li id="line-1"></li>];  
   }  
   print STDOUT "</ol></div>";  
 } # print_input_string_section  
   
 sub print_document_tree ($) {  
   my $node = shift;  
   my $r = '<ol class="xoxo">';  
   
   my @node = ($node);  
   while (@node) {  
     my $child = shift @node;  
     unless (ref $child) {  
       $r .= $child;  
       next;  
     }  
   
     my $node_id = 'node-'.refaddr $child;  
     my $nt = $child->node_type;  
     if ($nt == $child->ELEMENT_NODE) {  
       my $child_nsuri = $child->namespace_uri;  
       $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .  
           '</code>'; ## ISSUE: case  
   
       if ($child->has_attributes) {  
         $r .= '<ul class="attributes">';  
         for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }  
                       @{$child->attributes}) {  
           $r .= qq[<li id="$attr->[3]" class="tree-attribute"><code title="@{[defined $_->[2] ? $_->[2] : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?  
           $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children  
         }  
         $r .= '</ul>';  
       }  
   
       if ($child->has_child_nodes) {  
         $r .= '<ol class="children">';  
         unshift @node, @{$child->child_nodes}, '</ol></li>';  
       } else {  
         $r .= '</li>';  
       }  
     } elsif ($nt == $child->TEXT_NODE) {  
       $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';  
     } elsif ($nt == $child->CDATA_SECTION_NODE) {  
       $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';  
     } elsif ($nt == $child->COMMENT_NODE) {  
       $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';  
     } elsif ($nt == $child->DOCUMENT_NODE) {  
       $r .= qq'<li id="$node_id" class="tree-document">Document';  
       $r .= qq[<ul class="attributes">];  
       $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];  
       $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];  
       unless ($child->manakai_is_html) {  
         $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];  
         if (defined $child->xml_encoding) {  
           $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];  
         } else {  
           $r .= qq[<li>XML encoding = (null)</li>];  
         }  
         $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];  
       }  
       $r .= qq[</ul>];  
       if ($child->has_child_nodes) {  
         $r .= '<ol class="children">';  
         unshift @node, @{$child->child_nodes}, '</ol></li>';  
       }  
     } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {  
       $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';  
       $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];  
       $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];  
       $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];  
       $r .= '</ul></li>';  
     } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {  
       $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';  
     } else {  
       $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error  
     }  
   }  
   
   $r .= '</ol>';  
   print STDOUT $r;  
 } # print_document_tree  
   
 sub print_structure_dump_section ($$) {  
   my ($doc, $el) = @_;  
   
   print STDOUT qq[  
 <div id="document-tree" class="section">  
 <h2>Document Tree</h2>  
 ];  
   push @nav, ['#document-tree' => 'Tree'];  
   
   print_document_tree ($el || $doc);  
   
   print STDOUT qq[</div>];  
 } # print_structure_dump_section  
   
 sub print_structure_error_section ($$$) {  
   my ($doc, $el, $result) = @_;  
   
   print STDOUT qq[<div id="document-errors" class="section">  
 <h2>Document Errors</h2>  
   
 <dl>];  
   push @nav, ['#document-errors' => 'Document Error'];  
   
   require Whatpm::ContentChecker;  
   my $onerror = sub {  
     my %opt = @_;  
     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});  
     $type =~ tr/ /-/;  
     $type =~ s/\|/%7C/g;  
     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];  
     print STDOUT qq[<dt class="$cls">] . get_node_link ($opt{node}) .  
         qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";  
     add_error ('structure', \%opt => $result);  
   };  
   
   my $elements;  
   my $time1 = time;  
   if ($el) {  
     $elements = Whatpm::ContentChecker->check_element ($el, $onerror);  
   } else {  
     $elements = Whatpm::ContentChecker->check_document ($doc, $onerror);  
   }  
   $time{check} = time - $time1;  
   
   print STDOUT qq[</dl></div>];  
   
   return $elements;  
 } # print_structure_error_section  
288    
289  sub print_table_section ($) {  sub print_table_section ($$) {
290    my $tables = shift;    my ($input, $tables) = @_;
291        
292    push @nav, ['#tables' => 'Tables'];  #  push @nav, [qq[#$input->{id_prefix}tables] => 'Tables']
293    #      unless $input->{nested};
294    print STDOUT qq[    print STDOUT qq[
295  <div id="tables" class="section">  <div id="$input->{id_prefix}tables" class="section">
296  <h2>Tables</h2>  <h2>Tables</h2>
297    
298  <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->  <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
# Line 466  sub print_table_section ($) { Line 305  sub print_table_section ($) {
305    require JSON;    require JSON;
306        
307    my $i = 0;    my $i = 0;
308    for my $table_el (@$tables) {    for my $table (@$tables) {
309      $i++;      $i++;
310      print STDOUT qq[<div class="section" id="table-$i"><h3>] .      print STDOUT qq[<div class="section" id="$input->{id_prefix}table-$i"><h3>] .
311          get_node_link ($table_el) . q[</h3>];          get_node_link ($input, $table->{element}) . q[</h3>];
312    
313      ## TODO: Make |ContentChecker| return |form_table| result      delete $table->{element};
314      ## so that this script don't have to run the algorithm twice.  
315      my $table = Whatpm::HTMLTable->form_table ($table_el);      for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption},
316                 @{$table->{row}}) {
     for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {  
317        next unless $_;        next unless $_;
318        delete $_->{element};        delete $_->{element};
319      }      }
# Line 501  sub print_table_section ($) { Line 339  sub print_table_section ($) {
339                    
340      print STDOUT '</div><script type="text/javascript">tableToCanvas (';      print STDOUT '</div><script type="text/javascript">tableToCanvas (';
341      print STDOUT JSON::objToJson ($table);      print STDOUT JSON::objToJson ($table);
342      print STDOUT qq[, document.getElementById ('table-$i'));</script>];      print STDOUT qq[, document.getElementById ('$input->{id_prefix}table-$i')];
343        print STDOUT qq[, '$input->{id_prefix}');</script>];
344    }    }
345        
346    print STDOUT qq[</div>];    print STDOUT qq[</div>];
347  } # print_table_section  } # print_table_section
348    
349  sub print_id_section ($) {  sub print_listing_section ($$$) {
350    my $ids = shift;    my ($opt, $input, $ids) = @_;
351        
352    push @nav, ['#identifiers' => 'IDs'];  #  push @nav, ['#' . $input->{id_prefix} . $opt->{id} => $opt->{label}]
353    #      unless $input->{nested};
354    print STDOUT qq[    print STDOUT qq[
355  <div id="identifiers" class="section">  <div id="$input->{id_prefix}$opt->{id}" class="section">
356  <h2>Identifiers</h2>  <h2>$opt->{heading}</h2>
357    
358  <dl>  <dl>
359  ];  ];
360    for my $id (sort {$a cmp $b} keys %$ids) {    for my $id (sort {$a cmp $b} keys %$ids) {
361      print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];      print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
362      for (@{$ids->{$id}}) {      for (@{$ids->{$id}}) {
363        print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];        print STDOUT qq[<dd>].get_node_link ($input, $_).qq[</dd>];
364      }      }
365    }    }
366    print STDOUT qq[</dl></div>];    print STDOUT qq[</dl></div>];
367  } # print_id_section  } # print_listing_section
368    
369    
370  sub print_term_section ($) {  sub print_rdf_section ($$$) {
371    my $terms = shift;    my ($input, $rdfs) = @_;
372        
373    push @nav, ['#terms' => 'Terms'];  #  push @nav, ['#' . $input->{id_prefix} . 'rdf' => 'RDF']
374    #      unless $input->{nested};
375    print STDOUT qq[    print STDOUT qq[
376  <div id="terms" class="section">  <div id="$input->{id_prefix}rdf" class="section">
377  <h2>Terms</h2>  <h2>RDF Triples</h2>
378    
379  <dl>  <dl>];
380  ];    my $i = 0;
381    for my $term (sort {$a cmp $b} keys %$terms) {    for my $rdf (@$rdfs) {
382      print STDOUT qq[<dt>@{[htescape $term]}</dt>];      print STDOUT qq[<dt id="$input->{id_prefix}rdf-@{[$i++]}">];
383      for (@{$terms->{$term}}) {      print STDOUT get_node_link ($input, $rdf->[0]);
384        print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];      print STDOUT qq[<dd><dl>];
385        for my $triple (@{$rdf->[1]}) {
386          print STDOUT '<dt>' . get_node_link ($input, $triple->[0]) . '<dd>';
387          print STDOUT get_rdf_resource_html ($triple->[1]);
388          print STDOUT ' ';
389          print STDOUT get_rdf_resource_html ($triple->[2]);
390          print STDOUT ' ';
391          print STDOUT get_rdf_resource_html ($triple->[3]);
392      }      }
393        print STDOUT qq[</dl>];
394    }    }
395    print STDOUT qq[</dl></div>];    print STDOUT qq[</dl></div>];
396  } # print_term_section  } # print_rdf_section
397    
398  sub print_class_section ($) {  sub get_rdf_resource_html ($) {
399    my $classes = shift;    my $resource = shift;
400        if (defined $resource->{uri}) {
401    push @nav, ['#classes' => 'Classes'];      my $euri = htescape ($resource->{uri});
402    print STDOUT qq[      return '<code class=uri>&lt;<a href="' . $euri . '">' . $euri .
403  <div id="classes" class="section">          '</a>></code>';
404  <h2>Classes</h2>    } elsif (defined $resource->{bnodeid}) {
405        return htescape ('_:' . $resource->{bnodeid});
406  <dl>    } elsif ($resource->{nodes}) {
407  ];      return '(rdf:XMLLiteral)';
408    for my $class (sort {$a cmp $b} keys %$classes) {    } elsif (defined $resource->{value}) {
409      print STDOUT qq[<dt><code>@{[htescape $class]}</code></dt>];      my $elang = htescape (defined $resource->{language}
410      for (@{$classes->{$class}}) {                                ? $resource->{language} : '');
411        print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];      my $r = qq[<q lang="$elang">] . htescape ($resource->{value}) . '</q>';
412        if (defined $resource->{datatype}) {
413          my $euri = htescape ($resource->{datatype});
414          $r .= '^^<code class=uri>&lt;<a href="' . $euri . '">' . $euri .
415              '</a>></code>';
416        } elsif (length $resource->{language}) {
417          $r .= '@' . htescape ($resource->{language});
418      }      }
419        return $r;
420      } else {
421        return '??';
422    }    }
423    print STDOUT qq[</dl></div>];  } # get_rdf_resource_html
 } # print_class_section  
424    
425  sub print_result_section ($) {  sub print_result_section ($) {
426    my $result = shift;    my $result = shift;
427    
428    print STDOUT qq[    $out->start_section (id => 'result-summary',
429  <div id="result-summary" class="section">                         title => 'Result');
 <h2>Result</h2>];  
430    
431    if ($result->{unsupported}) {      if ($result->{unsupported} and $result->{conforming_max}) {  
432      print STDOUT qq[<p class=uncertain id=result-para>The conformance      print STDOUT qq[<p class=uncertain id=result-para>The conformance
433          checker cannot decide whether the document is conforming or          checker cannot decide whether the document is conforming or
434          not, since the document contains one or more unsupported          not, since the document contains one or more unsupported
435          features.</p>];          features.  The document might or might not be conforming.</p>];
436    } elsif ($result->{conforming_min}) {    } elsif ($result->{conforming_min}) {
437      print STDOUT qq[<p class=PASS id=result-para>No conformance-error is      print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
438          found in this document.</p>];          found in this document.</p>];
# Line 591  sub print_result_section ($) { Line 448  sub print_result_section ($) {
448    print STDOUT qq[<table>    print STDOUT qq[<table>
449  <colgroup><col><colgroup><col><col><col><colgroup><col>  <colgroup><col><colgroup><col><col><col><colgroup><col>
450  <thead>  <thead>
451  <tr><th scope=col></th><th scope=col><em class=rfc2119>MUST</em>-level  <tr><th scope=col></th>
452  Errors</th><th scope=col><em class=rfc2119>SHOULD</em>-level  <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
453  Errors</th><th scope=col>Warnings</th><th scope=col>Score</th></tr>  Errors</a></th>
454  </thead><tbody>];  <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
455    Errors</a></th>
456    <th scope=col><a href="../error-description#level-w">Warnings</a></th>
457    <th scope=col>Score</th></tr></thead><tbody>];
458    
459    my $must_error = 0;    my $must_error = 0;
460    my $should_error = 0;    my $should_error = 0;
# Line 602  Errors</th><th scope=col>Warnings</th><t Line 462  Errors</th><th scope=col>Warnings</th><t
462    my $score_min = 0;    my $score_min = 0;
463    my $score_max = 0;    my $score_max = 0;
464    my $score_base = 20;    my $score_base = 20;
465      my $score_unit = $score_base / 100;
466    for (    for (
467      [Transfer => 'transfer', ''],      [Transfer => 'transfer', ''],
468      [Character => 'char', ''],      [Character => 'char', ''],
# Line 611  Errors</th><th scope=col>Warnings</th><t Line 472  Errors</th><th scope=col>Warnings</th><t
472      $must_error += ($result->{$_->[1]}->{must} += 0);      $must_error += ($result->{$_->[1]}->{must} += 0);
473      $should_error += ($result->{$_->[1]}->{should} += 0);      $should_error += ($result->{$_->[1]}->{should} += 0);
474      $warning += ($result->{$_->[1]}->{warning} += 0);      $warning += ($result->{$_->[1]}->{warning} += 0);
475      $score_min += ($result->{$_->[1]}->{score_min} += $score_base);      $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
476      $score_max += ($result->{$_->[1]}->{score_max} += $score_base);      $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
477    
478      my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';      my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
479      my $label = $_->[0];      my $label = $_->[0];
# Line 625  Errors</th><th scope=col>Warnings</th><t Line 486  Errors</th><th scope=col>Warnings</th><t
486    
487      print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];      print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
488      if ($uncertain) {      if ($uncertain) {
489        print qq[<td class="@{[$score_max < $score_base ? $score_min < $score_max ? 'FAIL' : 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];        print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}];
490      } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {      } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
491        print qq[<td class="@{[$score_max < $score_base ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max} + $score_base</td></tr>];        print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}];
492      } else {      } else {
493        print qq[<td class="@{[$score_max < $score_base ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];        print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}];
494      }      }
495        print qq[ / 20];
496    }    }
497    
498    $score_max += $score_base;    $score_max += $score_base;
499    
500    print STDOUT qq[    print STDOUT qq[
501  <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>  <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base / 20
502  </tbody>  </tbody>
503  <tfoot><tr class=uncertain><th scope=row>Total</th><td>$must_error?</td><td>$should_error?</td><td>$warning?</td><td><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>  <tfoot><tr class=uncertain><th scope=row>Total</th>
504    <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
505    <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
506    <td>$warning?</td>
507    <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong> / 100
508  </table>  </table>
509    
510  <p><strong>Important</strong>: This conformance checking service  <p><strong>Important</strong>: This conformance checking service
511  is <em>under development</em>.  The result above might be <em>wrong</em>.</p>  is <em>under development</em>.  The result above might be <em>wrong</em>.</p>];
512  </div>];    $out->end_section;
   push @nav, ['#result-summary' => 'Result'];  
513  } # print_result_section  } # print_result_section
514    
 sub print_result_unknown_type_section ($) {  
   my $input = shift;  
   
   print STDOUT qq[  
 <div id="result-summary" class="section">  
 <p><em>Media type <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code> is not supported!</em></p>  
 </div>  
 ];  
   push @nav, ['#result-summary' => 'Result'];  
 } # print_result_unknown_type_section  
   
515  sub print_result_input_error_section ($) {  sub print_result_input_error_section ($) {
516    my $input = shift;    my $input = shift;
517    print STDOUT qq[<div class="section" id="result-summary">    $out->start_section (id => 'result-summary', title => 'Result');
518  <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>    print STDOUT qq[
519  </div>];  <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>];
520    push @nav, ['#result-summary' => 'Result'];    $out->end_section;
521  } # print_Result_input_error_section  } # print_result_input_error_section
   
 sub get_node_path ($) {  
   my $node = shift;  
   my @r;  
   while (defined $node) {  
     my $rs;  
     if ($node->node_type == 1) {  
       $rs = $node->manakai_local_name;  
       $node = $node->parent_node;  
     } elsif ($node->node_type == 2) {  
       $rs = '@' . $node->manakai_local_name;  
       $node = $node->owner_element;  
     } elsif ($node->node_type == 3) {  
       $rs = '"' . $node->data . '"';  
       $node = $node->parent_node;  
     } elsif ($node->node_type == 9) {  
       @r = ('') unless @r;  
       $rs = '';  
       $node = $node->parent_node;  
     } else {  
       $rs = '#' . $node->node_type;  
       $node = $node->parent_node;  
     }  
     unshift @r, $rs;  
   }  
   return join '/', @r;  
 } # get_node_path  
   
 sub get_node_link ($) {  
   return qq[<a href="#node-@{[refaddr $_[0]]}">] .  
       htescape (get_node_path ($_[0])) . qq[</a>];  
 } # get_node_link  
522    
523  {  {
524    my $Msg = {};    my $Msg = {};
525    
526  sub load_text_catalog ($) {  sub load_text_catalog ($) {
527    #  my $self = shift;
528    my $lang = shift; # MUST be a canonical lang name    my $lang = shift; # MUST be a canonical lang name
529    open my $file, '<', "cc-msg.$lang.txt" or die "$0: cc-msg.$lang.txt: $!";    open my $file, '<:utf8', "cc-msg.$lang.txt"
530          or die "$0: cc-msg.$lang.txt: $!";
531    while (<$file>) {    while (<$file>) {
532      if (s/^([^;]+);([^;]*);//) {      if (s/^([^;]+);([^;]*);//) {
533        my ($type, $cls, $msg) = ($1, $2, $_);        my ($type, $cls, $msg) = ($1, $2, $_);
# Line 713  sub load_text_catalog ($) { Line 537  sub load_text_catalog ($) {
537    }    }
538  } # load_text_catalog  } # load_text_catalog
539    
540  sub get_text ($) {  sub get_text ($;$$) {
541    #  my $self = shift;
542    my ($type, $level, $node) = @_;    my ($type, $level, $node) = @_;
543    $type = $level . ':' . $type if defined $level;    $type = $level . ':' . $type if defined $level;
544      $level = 'm' unless defined $level;
545    my @arg;    my @arg;
546    {    {
547      if (defined $Msg->{$type}) {      if (defined $Msg->{$type}) {
548        my $msg = $Msg->{$type}->[1];        my $msg = $Msg->{$type}->[1];
549        $msg =~ s{<var>\$([0-9]+)</var>}{        $msg =~ s{<var>\$([0-9]+)</var>}{
550          defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';          defined $arg[$1] ? ($arg[$1]) : '(undef)';
551        }ge;        }ge;                 ##BUG: ^ must be escaped
552        $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{        $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
553          UNIVERSAL::can ($node, 'get_attribute_ns')          UNIVERSAL::can ($node, 'get_attribute_ns')
554              ? htescape ($node->get_attribute_ns (undef, $1)) : ''              ?  ($node->get_attribute_ns (undef, $1)) : ''
555        }ge;        }ge; ## BUG: ^ must be escaped
556        $msg =~ s{<var>{\@}</var>}{        $msg =~ s{<var>{\@}</var>}{        ## BUG: v must be escaped
557          UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''          UNIVERSAL::can ($node, 'value') ? ($node->value) : ''
558        }ge;        }ge;
559        $msg =~ s{<var>{local-name}</var>}{        $msg =~ s{<var>{local-name}</var>}{
560          UNIVERSAL::can ($node, 'manakai_local_name')          UNIVERSAL::can ($node, 'manakai_local_name')
561            ? htescape ($node->manakai_local_name) : ''            ? ($node->manakai_local_name) : ''
562        }ge;        }ge;  ## BUG: ^ must be escaped
563        $msg =~ s{<var>{element-local-name}</var>}{        $msg =~ s{<var>{element-local-name}</var>}{
564          (UNIVERSAL::can ($node, 'owner_element') and          (UNIVERSAL::can ($node, 'owner_element') and
565           $node->owner_element)           $node->owner_element)
566            ? htescape ($node->owner_element->manakai_local_name)            ?  ($node->owner_element->manakai_local_name)
567            : ''            : '' ## BUG: ^ must be escaped
568        }ge;        }ge;
569        return ($type, $Msg->{$type}->[0], $msg);        return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
570      } elsif ($type =~ s/:([^:]*)$//) {      } elsif ($type =~ s/:([^:]*)$//) {
571        unshift @arg, $1;        unshift @arg, $1;
572        redo;        redo;
573      }      }
574    }    }
575    return ($type, '', htescape ($_[0]));    return ($type, 'level-'.$level, ($_[0]));
576                                     ## BUG: ^ must be escaped
577  } # get_text  } # get_text
578    
579  }  }
# Line 755  sub get_input_document ($$) { Line 582  sub get_input_document ($$) {
582    my ($http, $dom) = @_;    my ($http, $dom) = @_;
583    
584    my $request_uri = $http->get_parameter ('uri');    my $request_uri = $http->get_parameter ('uri');
585    my $r = {};    my $r = WebHACC::Input->new;
586    if (defined $request_uri and length $request_uri) {    if (defined $request_uri and length $request_uri) {
587      my $uri = $dom->create_uri_reference ($request_uri);      my $uri = $dom->create_uri_reference ($request_uri);
588      unless ({      unless ({
# Line 802  EOH Line 629  EOH
629      $ua->protocols_allowed ([qw/http/]);      $ua->protocols_allowed ([qw/http/]);
630      $ua->max_size (1000_000);      $ua->max_size (1000_000);
631      my $req = HTTP::Request->new (GET => $request_uri);      my $req = HTTP::Request->new (GET => $request_uri);
632        $req->header ('Accept-Encoding' => 'identity, *; q=0');
633      my $res = $ua->request ($req);      my $res = $ua->request ($req);
634      ## TODO: 401 sets |is_success| true.      ## TODO: 401 sets |is_success| true.
635      if ($res->is_success or $http->get_parameter ('error-page')) {      if ($res->is_success or $http->get_parameter ('error-page')) {
# Line 811  EOH Line 639  EOH
639    
640        ## TODO: More strict parsing...        ## TODO: More strict parsing...
641        my $ct = $res->header ('Content-Type');        my $ct = $res->header ('Content-Type');
642        if (defined $ct and $ct =~ m#^([0-9A-Za-z._+-]+/[0-9A-Za-z._+-]+)#) {        if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
         $r->{media_type} = lc $1;  
       }  
       if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?(\S+)"?/i) {  
643          $r->{charset} = lc $1;          $r->{charset} = lc $1;
644          $r->{charset} =~ tr/\\//d;          $r->{charset} =~ tr/\\//d;
645            $r->{official_charset} = $r->{charset};
646        }        }
647    
648        my $input_charset = $http->get_parameter ('charset');        my $input_charset = $http->get_parameter ('charset');
# Line 824  EOH Line 650  EOH
650          $r->{charset_overridden}          $r->{charset_overridden}
651              = (not defined $r->{charset} or $r->{charset} ne $input_charset);              = (not defined $r->{charset} or $r->{charset} ne $input_charset);
652          $r->{charset} = $input_charset;          $r->{charset} = $input_charset;
653        }        }
654    
655          ## TODO: Support for HTTP Content-Encoding
656    
657        $r->{s} = ''.$res->content;        $r->{s} = ''.$res->content;
658    
659          require Whatpm::ContentType;
660          ($r->{official_type}, $r->{media_type})
661              = Whatpm::ContentType->get_sniffed_type
662                  (get_file_head => sub {
663                     return substr $r->{s}, 0, shift;
664                   },
665                   http_content_type_byte => $ct,
666                   has_http_content_encoding =>
667                       defined $res->header ('Content-Encoding'),
668                   supported_image_types => {});
669      } else {      } else {
670        $r->{uri} = $res->request->uri;        $r->{uri} = $res->request->uri;
671        $r->{request_uri} = $request_uri;        $r->{request_uri} = $request_uri;
# Line 847  EOH Line 686  EOH
686      $r->{charset} = ''.$http->get_parameter ('_charset_');      $r->{charset} = ''.$http->get_parameter ('_charset_');
687      $r->{charset} =~ s/\s+//g;      $r->{charset} =~ s/\s+//g;
688      $r->{charset} = 'utf-8' if $r->{charset} eq '';      $r->{charset} = 'utf-8' if $r->{charset} eq '';
689        $r->{official_charset} = $r->{charset};
690      $r->{header_field} = [];      $r->{header_field} = [];
691    
692        require Whatpm::ContentType;
693        ($r->{official_type}, $r->{media_type})
694            = Whatpm::ContentType->get_sniffed_type
695                (get_file_head => sub {
696                   return substr $r->{s}, 0, shift;
697                 },
698                 http_content_type_byte => undef,
699                 has_http_content_encoding => 0,
700                 supported_image_types => {});
701    }    }
702    
703    my $input_format = $http->get_parameter ('i');    my $input_format = $http->get_parameter ('i');
# Line 864  EOH Line 714  EOH
714    if ($r->{media_type} eq 'text/xml') {    if ($r->{media_type} eq 'text/xml') {
715      unless (defined $r->{charset}) {      unless (defined $r->{charset}) {
716        $r->{charset} = 'us-ascii';        $r->{charset} = 'us-ascii';
717          $r->{official_charset} = $r->{charset};
718      } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {      } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
719        $r->{charset_overridden} = 0;        $r->{charset_overridden} = 0;
720      }      }
# Line 875  EOH Line 726  EOH
726      return $r;      return $r;
727    }    }
728    
729      $r->{inner_html_element} = $http->get_parameter ('e');
730    
731    return $r;    return $r;
732  } # get_input_document  } # get_input_document
733    
# Line 907  Wakaba <w@suika.fam.cx>. Line 760  Wakaba <w@suika.fam.cx>.
760    
761  =head1 LICENSE  =head1 LICENSE
762    
763  Copyright 2007 Wakaba <w@suika.fam.cx>  Copyright 2007-2008 Wakaba <w@suika.fam.cx>
764    
765  This library is free software; you can redistribute it  This library is free software; you can redistribute it
766  and/or modify it under the same terms as Perl itself.  and/or modify it under the same terms as Perl itself.

Legend:
Removed from v.1.20  
changed lines
  Added in v.1.53

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24