/[pub]/test/html-webhacc/cc.cgi
Suika

Diff of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.22 by wakaba, Sun Nov 4 09:15:02 2007 UTC revision 1.31 by wakaba, Sun Feb 10 02:05:30 2008 UTC
# Line 1  Line 1 
1  #!/usr/bin/perl  #!/usr/bin/perl
2  use strict;  use strict;
3    use utf8;
4    
5  use lib qw[/home/httpd/html/www/markup/html/whatpm  use lib qw[/home/httpd/html/www/markup/html/whatpm
6             /home/wakaba/work/manakai2/lib];             /home/wakaba/work/manakai2/lib];
# Line 51  sub htescape ($) { Line 52  sub htescape ($) {
52    
53    $| = 0;    $| = 0;
54    my $input = get_input_document ($http, $dom);    my $input = get_input_document ($http, $dom);
   my $inner_html_element = $http->get_parameter ('e');  
55    my $char_length = 0;    my $char_length = 0;
56    my %time;    my %time;
57    
# Line 61  sub htescape ($) { Line 61  sub htescape ($) {
61  <dt>Request URI</dt>  <dt>Request URI</dt>
62      <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>      <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63  <dt>Document URI</dt>  <dt>Document URI</dt>
64      <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}">@{[htescape $input->{uri}]}</a>&gt;</code></dd>      <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}" id=anchor-document-uri>@{[htescape $input->{uri}]}</a>&gt;</code>
65        <script>
66          document.title = '<'
67              + document.getElementById ('anchor-document-uri').href + '> \\u2014 '
68              + document.title;
69        </script></dd>
70  ]; # no </dl> yet  ]; # no </dl> yet
71    push @nav, ['#document-info' => 'Information'];    push @nav, ['#document-info' => 'Information'];
72    
# Line 73  if (defined $input->{s}) { Line 78  if (defined $input->{s}) {
78      <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>      <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
79  <dt>Internet Media Type</dt>  <dt>Internet Media Type</dt>
80      <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>      <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
81      @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : '']}</dd>      @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '<em>(sniffed; official type is: <code class=MIME lang=en>'.htescape ($input->{official_type}).'</code>)' : '<em>(sniffed)</em>']}</dd>
82  <dt>Character Encoding</dt>  <dt>Character Encoding</dt>
83      <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}      <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
84      @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>      @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
# Line 84  if (defined $input->{s}) { Line 89  if (defined $input->{s}) {
89  ];  ];
90    
91    my $result = {conforming_min => 1, conforming_max => 1};    my $result = {conforming_min => 1, conforming_max => 1};
92    print_http_header_section ($input, $result);    check_and_print ($input => $result);
   
   my $doc;  
   my $el;  
   my $manifest;  
   
   if ($input->{media_type} eq 'text/html') {  
     ($doc, $el) = print_syntax_error_html_section ($input, $result);  
     print_source_string_section (\($input->{s}), $input->{charset});  
   } elsif ({  
             'text/xml' => 1,  
             'application/atom+xml' => 1,  
             'application/rss+xml' => 1,  
             'application/svg+xml' => 1,  
             'application/xhtml+xml' => 1,  
             'application/xml' => 1,  
            }->{$input->{media_type}}) {  
     ($doc, $el) = print_syntax_error_xml_section ($input, $result);  
     print_source_string_section (\($input->{s}), $doc->input_encoding);  
   } elsif ($input->{media_type} eq 'text/cache-manifest') {  
 ## TODO: MUST be text/cache-manifest  
     $manifest = print_syntax_error_manifest_section ($input, $result);  
     print_source_string_section (\($input->{s}), 'utf-8');  
   } else {  
     ## TODO: Change HTTP status code??  
     print_result_unknown_type_section ($input);  
   }  
   
   if (defined $doc or defined $el) {  
     print_structure_dump_dom_section ($doc, $el);  
     my $elements = print_structure_error_dom_section ($doc, $el, $result);  
     print_table_section ($elements->{table}) if @{$elements->{table}};  
     print_id_section ($elements->{id}) if keys %{$elements->{id}};  
     print_term_section ($elements->{term}) if keys %{$elements->{term}};  
     print_class_section ($elements->{class}) if keys %{$elements->{class}};  
   } elsif (defined $manifest) {  
     print_structure_dump_manifest_section ($manifest);  
     print_structure_error_manifest_section ($manifest, $result);  
   }  
   
93    print_result_section ($result);    print_result_section ($result);
94  } else {  } else {
95    print STDOUT qq[</dl></div>];    print STDOUT qq[</dl></div>];
# Line 142  if (defined $input->{s}) { Line 108  if (defined $input->{s}) {
108  </html>  </html>
109  ];  ];
110    
111    for (qw/decode parse parse_xml parse_manifest check check_manifest/) {    for (qw/decode parse parse_html parse_xml parse_manifest
112              check check_manifest/) {
113      next unless defined $time{$_};      next unless defined $time{$_};
114      open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";      open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
115      print $file $char_length, "\t", $time{$_}, "\n";      print $file $char_length, "\t", $time{$_}, "\n";
# Line 159  sub add_error ($$$) { Line 126  sub add_error ($$$) {
126        $result->{conforming_min} = 0;        $result->{conforming_min} = 0;
127      } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {      } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
128        $result->{$layer}->{warning}++;        $result->{$layer}->{warning}++;
129      } elsif ($err->{level} eq 'unsupported') {      } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
130        $result->{$layer}->{unsupported}++;        $result->{$layer}->{unsupported}++;
131        $result->{unsupported} = 1;        $result->{unsupported} = 1;
132      } else {      } else {
# Line 178  sub add_error ($$$) { Line 145  sub add_error ($$$) {
145    }    }
146  } # add_error  } # add_error
147    
148    sub check_and_print ($$) {
149      my ($input, $result) = @_;
150    
151      print_http_header_section ($input, $result);
152    
153      my $doc;
154      my $el;
155      my $manifest;
156    
157      if ($input->{media_type} eq 'text/html') {
158        ($doc, $el) = print_syntax_error_html_section ($input, $result);
159        print_source_string_section
160            (\($input->{s}), $input->{charset} || $doc->input_encoding);
161      } elsif ({
162                'text/xml' => 1,
163                'application/atom+xml' => 1,
164                'application/rss+xml' => 1,
165                'application/svg+xml' => 1,
166                'application/xhtml+xml' => 1,
167                'application/xml' => 1,
168               }->{$input->{media_type}}) {
169        ($doc, $el) = print_syntax_error_xml_section ($input, $result);
170        print_source_string_section (\($input->{s}), $doc->input_encoding);
171      } elsif ($input->{media_type} eq 'text/cache-manifest') {
172    ## TODO: MUST be text/cache-manifest
173        $manifest = print_syntax_error_manifest_section ($input, $result);
174        print_source_string_section (\($input->{s}), 'utf-8');
175      } else {
176        ## TODO: Change HTTP status code??
177        print_result_unknown_type_section ($input, $result);
178      }
179    
180      if (defined $doc or defined $el) {
181        print_structure_dump_dom_section ($doc, $el);
182        my $elements = print_structure_error_dom_section ($doc, $el, $result);
183        print_table_section ($elements->{table}) if @{$elements->{table}};
184        print_id_section ($elements->{id}) if keys %{$elements->{id}};
185        print_term_section ($elements->{term}) if keys %{$elements->{term}};
186        print_class_section ($elements->{class}) if keys %{$elements->{class}};
187      } elsif (defined $manifest) {
188        print_structure_dump_manifest_section ($manifest);
189        print_structure_error_manifest_section ($manifest, $result);
190      }
191    } # check_and_print
192    
193  sub print_http_header_section ($$) {  sub print_http_header_section ($$) {
194    my ($input, $result) = @_;    my ($input, $result) = @_;
195    return unless defined $input->{header_status_code} or    return unless defined $input->{header_status_code} or
# Line 217  sub print_syntax_error_html_section ($$) Line 229  sub print_syntax_error_html_section ($$)
229        
230    require Encode;    require Encode;
231    require Whatpm::HTML;    require Whatpm::HTML;
   
   $input->{charset} ||= 'ISO-8859-1'; ## TODO: for now.  
232        
   my $time1 = time;  
   my $t = Encode::decode ($input->{charset}, $input->{s});  
   $time{decode} = time - $time1;  
   
233    print STDOUT qq[    print STDOUT qq[
234  <div id="parse-errors" class="section">  <div id="parse-errors" class="section">
235  <h2>Parse Errors</h2>  <h2>Parse Errors</h2>
# Line 243  sub print_syntax_error_html_section ($$) Line 249  sub print_syntax_error_html_section ($$)
249      $type =~ tr/ /-/;      $type =~ tr/ /-/;
250      $type =~ s/\|/%7C/g;      $type =~ s/\|/%7C/g;
251      $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];      $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
252      print STDOUT qq[<dd class="$cls">$msg</dd>\n];      print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
253        print STDOUT qq[$msg</dd>\n];
254    
255      add_error ('syntax', \%opt => $result);      add_error ('syntax', \%opt => $result);
256    };    };
257    
258    my $doc = $dom->create_document;    my $doc = $dom->create_document;
259    my $el;    my $el;
260    $time1 = time;    my $inner_html_element = $http->get_parameter ('e');
261    if (defined $inner_html_element and length $inner_html_element) {    if (defined $inner_html_element and length $inner_html_element) {
262        $input->{charset} ||= 'windows-1252'; ## TODO: for now.
263        my $time1 = time;
264        my $t = Encode::decode ($input->{charset}, $input->{s});
265        $time{decode} = time - $time1;
266        
267      $el = $doc->create_element_ns      $el = $doc->create_element_ns
268          ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);          ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
269        $time1 = time;
270      Whatpm::HTML->set_inner_html ($el, $t, $onerror);      Whatpm::HTML->set_inner_html ($el, $t, $onerror);
271        $time{parse} = time - $time1;
272    } else {    } else {
273      Whatpm::HTML->parse_string ($t => $doc, $onerror);      my $time1 = time;
274        Whatpm::HTML->parse_byte_string
275            ($input->{charset}, $input->{s} => $doc, $onerror);
276        $time{parse_html} = time - $time1;
277    }    }
278    $time{parse} = time - $time1;    $doc->manakai_charset ($input->{official_charset})
279          if defined $input->{official_charset};
280      
281    print STDOUT qq[</dl></div>];    print STDOUT qq[</dl></div>];
282    
283    return ($doc, $el);    return ($doc, $el);
# Line 299  sub print_syntax_error_xml_section ($$) Line 317  sub print_syntax_error_xml_section ($$)
317    my $doc = Message::DOM::XMLParserTemp->parse_byte_stream    my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
318        ($fh => $dom, $onerror, charset => $input->{charset});        ($fh => $dom, $onerror, charset => $input->{charset});
319    $time{parse_xml} = time - $time1;    $time{parse_xml} = time - $time1;
320      $doc->manakai_charset ($input->{official_charset})
321          if defined $input->{official_charset};
322    
323    print STDOUT qq[</dl></div>];    print STDOUT qq[</dl></div>];
324    
# Line 324  sub print_syntax_error_manifest_section Line 344  sub print_syntax_error_manifest_section
344      $type =~ tr/ /-/;      $type =~ tr/ /-/;
345      $type =~ s/\|/%7C/g;      $type =~ s/\|/%7C/g;
346      $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];      $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
347      print STDOUT qq[<dd class="$cls">$msg</dd>\n];      print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
348        print STDOUT qq[$msg</dd>\n];
349    
350      add_error ('syntax', \%opt => $result);      add_error ('syntax', \%opt => $result);
351    };    };
# Line 387  sub print_document_tree ($) { Line 408  sub print_document_tree ($) {
408          $r .= '<ul class="attributes">';          $r .= '<ul class="attributes">';
409          for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }          for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
410                        @{$child->attributes}) {                        @{$child->attributes}) {
411            $r .= qq[<li id="$attr->[3]" class="tree-attribute"><code title="@{[defined $_->[2] ? $_->[2] : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?            $r .= qq[<li id="$attr->[3]" class="tree-attribute"><code title="@{[defined $attr->[2] ? htescape ($attr->[2]) : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
412            $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children            $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
413          }          }
414          $r .= '</ul>';          $r .= '</ul>';
# Line 408  sub print_document_tree ($) { Line 429  sub print_document_tree ($) {
429      } elsif ($nt == $child->DOCUMENT_NODE) {      } elsif ($nt == $child->DOCUMENT_NODE) {
430        $r .= qq'<li id="$node_id" class="tree-document">Document';        $r .= qq'<li id="$node_id" class="tree-document">Document';
431        $r .= qq[<ul class="attributes">];        $r .= qq[<ul class="attributes">];
432          my $cp = $child->manakai_charset;
433          if (defined $cp) {
434            $r .= qq[<li><code>charset</code> parameter = <code>];
435            $r .= htescape ($cp) . qq[</code></li>];
436          }
437          $r .= qq[<li><code>inputEncoding</code> = ];
438          my $ie = $child->input_encoding;
439          if (defined $ie) {
440            $r .= qq[<code>@{[htescape ($ie)]}</code>];
441            if ($child->manakai_has_bom) {
442              $r .= qq[ (with <code class=charname><abbr>BOM</abbr></code>)];
443            }
444          } else {
445            $r .= qq[(<code>null</code>)];
446          }
447        $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];        $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
448        $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];        $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
449        unless ($child->manakai_is_html) {        unless ($child->manakai_is_html) {
# Line 506  sub print_structure_error_dom_section ($ Line 542  sub print_structure_error_dom_section ($
542      $type =~ s/\|/%7C/g;      $type =~ s/\|/%7C/g;
543      $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];      $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
544      print STDOUT qq[<dt class="$cls">] . get_error_label (\%opt) .      print STDOUT qq[<dt class="$cls">] . get_error_label (\%opt) .
545          qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";          qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
546        print STDOUT $msg, "</dd>\n";
547      add_error ('structure', \%opt => $result);      add_error ('structure', \%opt => $result);
548    };    };
549    
# Line 691  sub print_result_section ($) { Line 728  sub print_result_section ($) {
728    print STDOUT qq[<table>    print STDOUT qq[<table>
729  <colgroup><col><colgroup><col><col><col><colgroup><col>  <colgroup><col><colgroup><col><col><col><colgroup><col>
730  <thead>  <thead>
731  <tr><th scope=col></th><th scope=col><em class=rfc2119>MUST</em>-level  <tr><th scope=col></th>
732  Errors</th><th scope=col><em class=rfc2119>SHOULD</em>-level  <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
733  Errors</th><th scope=col>Warnings</th><th scope=col>Score</th></tr>  Errors</a></th>
734  </thead><tbody>];  <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
735    Errors</a></th>
736    <th scope=col><a href="../error-description#level-w">Warnings</a></th>
737    <th scope=col>Score</th></tr></thead><tbody>];
738    
739    my $must_error = 0;    my $must_error = 0;
740    my $should_error = 0;    my $should_error = 0;
# Line 752  is <em>under development</em>.  The resu Line 792  is <em>under development</em>.  The resu
792    push @nav, ['#result-summary' => 'Result'];    push @nav, ['#result-summary' => 'Result'];
793  } # print_result_section  } # print_result_section
794    
795  sub print_result_unknown_type_section ($) {  sub print_result_unknown_type_section ($$) {
796    my $input = shift;    my ($input, $result) = @_;
797    
798      my $euri = htescape ($input->{uri});
799    print STDOUT qq[    print STDOUT qq[
800  <div id="result-summary" class="section">  <div id="parse-errors" class="section">
801  <p><em>Media type <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code> is not supported!</em></p>  <h2>Errors</h2>
802    
803    <dl>
804    <dt class=unsupported><code>&lt;<a href="$euri">$euri</a>&gt;</code></dt>
805        <dd class=unsupported><strong><a href="../error-description#level-u">Not
806            supported</a></strong>:
807        Media type
808        <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
809        is not supported.</dd>
810    </dl>
811  </div>  </div>
812  ];  ];
813    push @nav, ['#result-summary' => 'Result'];    push @nav, ['#parse-errors' => 'Errors'];
814      add_error (char => {level => 'u'} => $result);
815      add_error (syntax => {level => 'u'} => $result);
816      add_error (structure => {level => 'u'} => $result);
817  } # print_result_unknown_type_section  } # print_result_unknown_type_section
818    
819  sub print_result_input_error_section ($) {  sub print_result_input_error_section ($) {
# Line 787  sub get_error_label ($) { Line 840  sub get_error_label ($) {
840    
841    if (defined $err->{node}) {    if (defined $err->{node}) {
842      $r .= ' ' if length $r;      $r .= ' ' if length $r;
843      $r = get_node_path ($err->{node});      $r = get_node_link ($err->{node});
844    }    }
845    
846    if (defined $err->{index}) {    if (defined $err->{index}) {
# Line 803  sub get_error_label ($) { Line 856  sub get_error_label ($) {
856    return $r;    return $r;
857  } # get_error_label  } # get_error_label
858    
859    sub get_error_level_label ($) {
860      my $err = shift;
861    
862      my $r = '';
863    
864      if (not defined $err->{level} or $err->{level} eq 'm') {
865        $r = qq[<strong><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
866            error</a></strong>: ];
867      } elsif ($err->{level} eq 's') {
868        $r = qq[<strong><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
869            error</a></strong>: ];
870      } elsif ($err->{level} eq 'w') {
871        $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
872            ];
873      } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
874        $r = qq[<strong><a href="../error-description#level-u">Not
875            supported</a></strong>: ];
876      } else {
877        my $elevel = htescape ($err->{level});
878        $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
879            ];
880      }
881    
882      return $r;
883    } # get_error_level_label
884    
885  sub get_node_path ($) {  sub get_node_path ($) {
886    my $node = shift;    my $node = shift;
887    my @r;    my @r;
# Line 840  sub get_node_link ($) { Line 919  sub get_node_link ($) {
919    
920  sub load_text_catalog ($) {  sub load_text_catalog ($) {
921    my $lang = shift; # MUST be a canonical lang name    my $lang = shift; # MUST be a canonical lang name
922    open my $file, '<', "cc-msg.$lang.txt" or die "$0: cc-msg.$lang.txt: $!";    open my $file, '<:utf8', "cc-msg.$lang.txt"
923          or die "$0: cc-msg.$lang.txt: $!";
924    while (<$file>) {    while (<$file>) {
925      if (s/^([^;]+);([^;]*);//) {      if (s/^([^;]+);([^;]*);//) {
926        my ($type, $cls, $msg) = ($1, $2, $_);        my ($type, $cls, $msg) = ($1, $2, $_);
# Line 853  sub load_text_catalog ($) { Line 933  sub load_text_catalog ($) {
933  sub get_text ($) {  sub get_text ($) {
934    my ($type, $level, $node) = @_;    my ($type, $level, $node) = @_;
935    $type = $level . ':' . $type if defined $level;    $type = $level . ':' . $type if defined $level;
936      $level = 'm' unless defined $level;
937    my @arg;    my @arg;
938    {    {
939      if (defined $Msg->{$type}) {      if (defined $Msg->{$type}) {
# Line 877  sub get_text ($) { Line 958  sub get_text ($) {
958            ? htescape ($node->owner_element->manakai_local_name)            ? htescape ($node->owner_element->manakai_local_name)
959            : ''            : ''
960        }ge;        }ge;
961        return ($type, $Msg->{$type}->[0], $msg);        return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
962      } elsif ($type =~ s/:([^:]*)$//) {      } elsif ($type =~ s/:([^:]*)$//) {
963        unshift @arg, $1;        unshift @arg, $1;
964        redo;        redo;
965      }      }
966    }    }
967    return ($type, '', htescape ($_[0]));    return ($type, 'level-'.$level, htescape ($_[0]));
968  } # get_text  } # get_text
969    
970  }  }
# Line 939  EOH Line 1020  EOH
1020      $ua->protocols_allowed ([qw/http/]);      $ua->protocols_allowed ([qw/http/]);
1021      $ua->max_size (1000_000);      $ua->max_size (1000_000);
1022      my $req = HTTP::Request->new (GET => $request_uri);      my $req = HTTP::Request->new (GET => $request_uri);
1023        $req->header ('Accept-Encoding' => 'identity, *; q=0');
1024      my $res = $ua->request ($req);      my $res = $ua->request ($req);
1025      ## TODO: 401 sets |is_success| true.      ## TODO: 401 sets |is_success| true.
1026      if ($res->is_success or $http->get_parameter ('error-page')) {      if ($res->is_success or $http->get_parameter ('error-page')) {
# Line 948  EOH Line 1030  EOH
1030    
1031        ## TODO: More strict parsing...        ## TODO: More strict parsing...
1032        my $ct = $res->header ('Content-Type');        my $ct = $res->header ('Content-Type');
       if (defined $ct and $ct =~ m#^([0-9A-Za-z._+-]+/[0-9A-Za-z._+-]+)#) {  
         $r->{media_type} = lc $1;  
       }  
1033        if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {        if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
1034          $r->{charset} = lc $1;          $r->{charset} = lc $1;
1035          $r->{charset} =~ tr/\\//d;          $r->{charset} =~ tr/\\//d;
1036            $r->{official_charset} = $r->{charset};
1037        }        }
1038    
1039        my $input_charset = $http->get_parameter ('charset');        my $input_charset = $http->get_parameter ('charset');
# Line 961  EOH Line 1041  EOH
1041          $r->{charset_overridden}          $r->{charset_overridden}
1042              = (not defined $r->{charset} or $r->{charset} ne $input_charset);              = (not defined $r->{charset} or $r->{charset} ne $input_charset);
1043          $r->{charset} = $input_charset;          $r->{charset} = $input_charset;
1044        }        }
1045    
1046          ## TODO: Support for HTTP Content-Encoding
1047    
1048        $r->{s} = ''.$res->content;        $r->{s} = ''.$res->content;
1049    
1050          require Whatpm::ContentType;
1051          ($r->{official_type}, $r->{media_type})
1052              = Whatpm::ContentType->get_sniffed_type
1053                  (get_file_head => sub {
1054                     return substr $r->{s}, 0, shift;
1055                   },
1056                   http_content_type_byte => $ct,
1057                   has_http_content_encoding =>
1058                       defined $res->header ('Content-Encoding'),
1059                   supported_image_types => {});
1060      } else {      } else {
1061        $r->{uri} = $res->request->uri;        $r->{uri} = $res->request->uri;
1062        $r->{request_uri} = $request_uri;        $r->{request_uri} = $request_uri;
# Line 984  EOH Line 1077  EOH
1077      $r->{charset} = ''.$http->get_parameter ('_charset_');      $r->{charset} = ''.$http->get_parameter ('_charset_');
1078      $r->{charset} =~ s/\s+//g;      $r->{charset} =~ s/\s+//g;
1079      $r->{charset} = 'utf-8' if $r->{charset} eq '';      $r->{charset} = 'utf-8' if $r->{charset} eq '';
1080        $r->{official_charset} = $r->{charset};
1081      $r->{header_field} = [];      $r->{header_field} = [];
1082    
1083        require Whatpm::ContentType;
1084        ($r->{official_type}, $r->{media_type})
1085            = Whatpm::ContentType->get_sniffed_type
1086                (get_file_head => sub {
1087                   return substr $r->{s}, 0, shift;
1088                 },
1089                 http_content_type_byte => undef,
1090                 has_http_content_encoding => 0,
1091                 supported_image_types => {});
1092    }    }
1093    
1094    my $input_format = $http->get_parameter ('i');    my $input_format = $http->get_parameter ('i');
# Line 1001  EOH Line 1105  EOH
1105    if ($r->{media_type} eq 'text/xml') {    if ($r->{media_type} eq 'text/xml') {
1106      unless (defined $r->{charset}) {      unless (defined $r->{charset}) {
1107        $r->{charset} = 'us-ascii';        $r->{charset} = 'us-ascii';
1108          $r->{official_charset} = $r->{charset};
1109      } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {      } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1110        $r->{charset_overridden} = 0;        $r->{charset_overridden} = 0;
1111      }      }

Legend:
Removed from v.1.22  
changed lines
  Added in v.1.31

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24