/[pub]/test/html-webhacc/cc.cgi
Suika

Diff of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.20 by wakaba, Mon Sep 10 12:09:34 2007 UTC revision 1.48 by wakaba, Sat Apr 12 15:57:56 2008 UTC
# Line 1  Line 1 
1  #!/usr/bin/perl  #!/usr/bin/perl
2  use strict;  use strict;
3    use utf8;
4    
5  use lib qw[/home/httpd/html/www/markup/html/whatpm  use lib qw[/home/httpd/html/www/markup/html/whatpm
6             /home/wakaba/work/manakai2/lib];             /home/wakaba/work/manakai2/lib];
# Line 19  sub htescape ($) { Line 20  sub htescape ($) {
20    return $s;    return $s;
21  } # htescape  } # htescape
22    
23      my @nav;
24      my %time;
25      require Message::DOM::DOMImplementation;
26      my $dom = Message::DOM::DOMImplementation->new;
27    {
28    use Message::CGI::HTTP;    use Message::CGI::HTTP;
29    my $http = Message::CGI::HTTP->new;    my $http = Message::CGI::HTTP->new;
30    
# Line 30  sub htescape ($) { Line 36  sub htescape ($) {
36    binmode STDOUT, ':utf8';    binmode STDOUT, ':utf8';
37    $| = 1;    $| = 1;
38    
   require Message::DOM::DOMImplementation;  
   my $dom = Message::DOM::DOMImplementation->new;  
   
39    load_text_catalog ('en'); ## TODO: conneg    load_text_catalog ('en'); ## TODO: conneg
40    
   my @nav;  
41    print STDOUT qq[Content-Type: text/html; charset=utf-8    print STDOUT qq[Content-Type: text/html; charset=utf-8
42    
43  <!DOCTYPE html>  <!DOCTYPE html>
# Line 51  sub htescape ($) { Line 53  sub htescape ($) {
53    
54    $| = 0;    $| = 0;
55    my $input = get_input_document ($http, $dom);    my $input = get_input_document ($http, $dom);
   my $inner_html_element = $http->get_parameter ('e');  
56    my $char_length = 0;    my $char_length = 0;
   my %time;  
57    
58    print qq[    print qq[
59  <div id="document-info" class="section">  <div id="document-info" class="section">
# Line 61  sub htescape ($) { Line 61  sub htescape ($) {
61  <dt>Request URI</dt>  <dt>Request URI</dt>
62      <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>      <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63  <dt>Document URI</dt>  <dt>Document URI</dt>
64      <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}">@{[htescape $input->{uri}]}</a>&gt;</code></dd>      <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}" id=anchor-document-uri>@{[htescape $input->{uri}]}</a>&gt;</code>
65        <script>
66          document.title = '<'
67              + document.getElementById ('anchor-document-uri').href + '> \\u2014 '
68              + document.title;
69        </script></dd>
70  ]; # no </dl> yet  ]; # no </dl> yet
71    push @nav, ['#document-info' => 'Information'];    push @nav, ['#document-info' => 'Information'];
72    
# Line 73  if (defined $input->{s}) { Line 78  if (defined $input->{s}) {
78      <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>      <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
79  <dt>Internet Media Type</dt>  <dt>Internet Media Type</dt>
80      <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>      <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
81      @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : '']}</dd>      @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '<em>(sniffed; official type is: <code class=MIME lang=en>'.htescape ($input->{official_type}).'</code>)' : '<em>(sniffed)</em>']}</dd>
82  <dt>Character Encoding</dt>  <dt>Character Encoding</dt>
83      <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}      <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
84      @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>      @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
# Line 81  if (defined $input->{s}) { Line 86  if (defined $input->{s}) {
86      <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>      <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
87  </dl>  </dl>
88  </div>  </div>
89    
90    <script src="../cc-script.js"></script>
91  ];  ];
92    
93      $input->{id_prefix} = '';
94      #$input->{nested} = 0;
95    my $result = {conforming_min => 1, conforming_max => 1};    my $result = {conforming_min => 1, conforming_max => 1};
96    print_http_header_section ($input, $result);    check_and_print ($input => $result);
   
   my $doc;  
   my $el;  
   
   if ($input->{media_type} eq 'text/html') {  
     ($doc, $el) = print_syntax_error_html_section ($input, $result);  
     print_source_string_section (\($input->{s}), $input->{charset});  
   } elsif ({  
             'text/xml' => 1,  
             'application/atom+xml' => 1,  
             'application/rss+xml' => 1,  
             'application/svg+xml' => 1,  
             'application/xhtml+xml' => 1,  
             'application/xml' => 1,  
            }->{$input->{media_type}}) {  
     ($doc, $el) = print_syntax_error_xml_section ($input, $result);  
     print_source_string_section (\($input->{s}), $doc->input_encoding);  
   } else {  
     ## TODO: Change HTTP status code??  
     print_result_unknown_type_section ($input);  
   }  
   
   if (defined $doc or defined $el) {  
     print_structure_dump_section ($doc, $el);  
     my $elements = print_structure_error_section ($doc, $el, $result);  
     print_table_section ($elements->{table}) if @{$elements->{table}};  
     print_id_section ($elements->{id}) if keys %{$elements->{id}};  
     print_term_section ($elements->{term}) if keys %{$elements->{term}};  
     print_class_section ($elements->{class}) if keys %{$elements->{class}};  
   }  
   
97    print_result_section ($result);    print_result_section ($result);
98  } else {  } else {
99    print STDOUT qq[</dl></div>];    print STDOUT qq[</dl></div>];
# Line 134  if (defined $input->{s}) { Line 112  if (defined $input->{s}) {
112  </html>  </html>
113  ];  ];
114    
115    for (qw/decode parse parse_xml check/) {    for (qw/decode parse parse_html parse_xml parse_manifest
116              check check_manifest/) {
117      next unless defined $time{$_};      next unless defined $time{$_};
118      open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";      open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
119      print $file $char_length, "\t", $time{$_}, "\n";      print $file $char_length, "\t", $time{$_}, "\n";
120    }    }
121    
122  exit;  exit;
123    }
124    
125  sub add_error ($$$) {  sub add_error ($$$) {
126    my ($layer, $err, $result) = @_;    my ($layer, $err, $result) = @_;
# Line 151  sub add_error ($$$) { Line 131  sub add_error ($$$) {
131        $result->{conforming_min} = 0;        $result->{conforming_min} = 0;
132      } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {      } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
133        $result->{$layer}->{warning}++;        $result->{$layer}->{warning}++;
134      } elsif ($err->{level} eq 'unsupported') {      } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
135        $result->{$layer}->{unsupported}++;        $result->{$layer}->{unsupported}++;
136        $result->{unsupported} = 1;        $result->{unsupported} = 1;
137        } elsif ($err->{level} eq 'i') {
138          #
139      } else {      } else {
140        $result->{$layer}->{must}++;        $result->{$layer}->{must}++;
141        $result->{$layer}->{score_max} -= 2;        $result->{$layer}->{score_max} -= 2;
# Line 170  sub add_error ($$$) { Line 152  sub add_error ($$$) {
152    }    }
153  } # add_error  } # add_error
154    
155    sub check_and_print ($$) {
156      my ($input, $result) = @_;
157    
158      print_http_header_section ($input, $result);
159    
160      my $doc;
161      my $el;
162      my $cssom;
163      my $manifest;
164      my @subdoc;
165    
166      if ($input->{media_type} eq 'text/html') {
167        ($doc, $el) = print_syntax_error_html_section ($input, $result);
168        print_source_string_section
169            ($input,
170             \($input->{s}),
171             $input->{charset} || $doc->input_encoding);
172      } elsif ({
173                'text/xml' => 1,
174                'application/atom+xml' => 1,
175                'application/rss+xml' => 1,
176                'image/svg+xml' => 1,
177                'application/xhtml+xml' => 1,
178                'application/xml' => 1,
179                ## TODO: Should we make all XML MIME Types fall
180                ## into this category?
181    
182                'application/rdf+xml' => 1, ## NOTE: This type has different model.
183               }->{$input->{media_type}}) {
184        ($doc, $el) = print_syntax_error_xml_section ($input, $result);
185        print_source_string_section ($input,
186                                     \($input->{s}),
187                                     $doc->input_encoding);
188      } elsif ($input->{media_type} eq 'text/css') {
189        $cssom = print_syntax_error_css_section ($input, $result);
190        print_source_string_section
191            ($input, \($input->{s}),
192             $cssom->manakai_input_encoding);
193      } elsif ($input->{media_type} eq 'text/cache-manifest') {
194    ## TODO: MUST be text/cache-manifest
195        $manifest = print_syntax_error_manifest_section ($input, $result);
196        print_source_string_section ($input, \($input->{s}),
197                                     'utf-8');
198      } else {
199        ## TODO: Change HTTP status code??
200        print_result_unknown_type_section ($input, $result);
201      }
202    
203      if (defined $doc or defined $el) {
204        $doc->document_uri ($input->{uri});
205        $doc->manakai_entity_base_uri ($input->{base_uri});
206        print_structure_dump_dom_section ($input, $doc, $el);
207        my $elements = print_structure_error_dom_section
208            ($input, $doc, $el, $result, sub {
209              push @subdoc, shift;
210            });
211        print_table_section ($input, $elements->{table}) if @{$elements->{table}};
212        print_listing_section ({
213          id => 'identifiers', label => 'IDs', heading => 'Identifiers',
214        }, $input, $elements->{id}) if keys %{$elements->{id}};
215        print_listing_section ({
216          id => 'terms', label => 'Terms', heading => 'Terms',
217        }, $input, $elements->{term}) if keys %{$elements->{term}};
218        print_listing_section ({
219          id => 'classes', label => 'Classes', heading => 'Classes',
220        }, $input, $elements->{class}) if keys %{$elements->{class}};
221        print_uri_section ($input, $elements->{uri}) if keys %{$elements->{uri}};
222        print_rdf_section ($input, $elements->{rdf}) if @{$elements->{rdf}};
223      } elsif (defined $cssom) {
224        print_structure_dump_cssom_section ($input, $cssom);
225        ## TODO: CSSOM validation
226        add_error ('structure', {level => 'u'} => $result);
227      } elsif (defined $manifest) {
228        print_structure_dump_manifest_section ($input, $manifest);
229        print_structure_error_manifest_section ($input, $manifest, $result);
230      }
231    
232      my $id_prefix = 0;
233      for my $subinput (@subdoc) {
234        $subinput->{id_prefix} = 'subdoc-' . ++$id_prefix;
235        $subinput->{nested} = 1;
236        $subinput->{base_uri} = $subinput->{container_node}->base_uri
237            unless defined $subinput->{base_uri};
238        my $ebaseuri = htescape ($subinput->{base_uri});
239        push @nav, ['#' . $subinput->{id_prefix} => 'Sub #' . $id_prefix];
240        print STDOUT qq[<div id="$subinput->{id_prefix}" class=section>
241          <h2>Subdocument #$id_prefix</h2>
242    
243          <dl>
244          <dt>Internet Media Type</dt>
245            <dd><code class="MIME" lang="en">@{[htescape $subinput->{media_type}]}</code>
246          <dt>Container Node</dt>
247            <dd>@{[get_node_link ($input, $subinput->{container_node})]}</dd>
248          <dt>Base <abbr title="Uniform Resource Identifiers">URI</abbr></dt>
249            <dd><code class=URI>&lt;<a href="$ebaseuri">$ebaseuri</a>></code></dd>
250          </dl>];              
251    
252        $subinput->{id_prefix} .= '-';
253        check_and_print ($subinput => $result);
254    
255        print STDOUT qq[</div>];
256      }
257    } # check_and_print
258    
259  sub print_http_header_section ($$) {  sub print_http_header_section ($$) {
260    my ($input, $result) = @_;    my ($input, $result) = @_;
261    return unless defined $input->{header_status_code} or    return unless defined $input->{header_status_code} or
262        defined $input->{header_status_text} or        defined $input->{header_status_text} or
263        @{$input->{header_field}};        @{$input->{header_field} or []};
264        
265    push @nav, ['#source-header' => 'HTTP Header'];    push @nav, ['#source-header' => 'HTTP Header'] unless $input->{nested};
266    print STDOUT qq[<div id="source-header" class="section">    print STDOUT qq[<div id="$input->{id_prefix}source-header" class="section">
267  <h2>HTTP Header</h2>  <h2>HTTP Header</h2>
268    
269  <p><strong>Note</strong>: Due to the limitation of the  <p><strong>Note</strong>: Due to the limitation of the
# Line 209  sub print_syntax_error_html_section ($$) Line 295  sub print_syntax_error_html_section ($$)
295        
296    require Encode;    require Encode;
297    require Whatpm::HTML;    require Whatpm::HTML;
   
   $input->{charset} ||= 'ISO-8859-1'; ## TODO: for now.  
298        
   my $time1 = time;  
   my $t = Encode::decode ($input->{charset}, $input->{s});  
   $time{decode} = time - $time1;  
   
299    print STDOUT qq[    print STDOUT qq[
300  <div id="parse-errors" class="section">  <div id="$input->{id_prefix}parse-errors" class="section">
301  <h2>Parse Errors</h2>  <h2>Parse Errors</h2>
302    
303  <dl>];  <dl id="$input->{id_prefix}parse-errors-list">];
304    push @nav, ['#parse-errors' => 'Parse Error'];    push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
305    
306    my $onerror = sub {    my $onerror = sub {
307      my (%opt) = @_;      my (%opt) = @_;
308      my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});      my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
309      if ($opt{column} > 0) {      print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
310        print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a> column $opt{column}</dt>\n];          qq[</dt>];
     } else {  
       $opt{line} = $opt{line} - 1 || 1;  
       print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a></dt>\n];  
     }  
311      $type =~ tr/ /-/;      $type =~ tr/ /-/;
312      $type =~ s/\|/%7C/g;      $type =~ s/\|/%7C/g;
313      $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];      $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
314      print STDOUT qq[<dd class="$cls">$msg</dd>\n];      print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
315        print STDOUT qq[$msg</dd>\n];
316    
317      add_error ('syntax', \%opt => $result);      add_error ('syntax', \%opt => $result);
318    };    };
319    
320    my $doc = $dom->create_document;    my $doc = $dom->create_document;
321    my $el;    my $el;
322    $time1 = time;    my $inner_html_element = $input->{inner_html_element};
323    if (defined $inner_html_element and length $inner_html_element) {    if (defined $inner_html_element and length $inner_html_element) {
324        $input->{charset} ||= 'windows-1252'; ## TODO: for now.
325        my $time1 = time;
326        my $t = \($input->{s});
327        unless ($input->{is_char_string}) {
328          $t = \(Encode::decode ($input->{charset}, $$t));
329        }
330        $time{decode} = time - $time1;
331        
332      $el = $doc->create_element_ns      $el = $doc->create_element_ns
333          ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);          ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
334      Whatpm::HTML->set_inner_html ($el, $t, $onerror);      $time1 = time;
335        Whatpm::HTML->set_inner_html ($el, $$t, $onerror);
336        $time{parse} = time - $time1;
337    } else {    } else {
338      Whatpm::HTML->parse_string ($t => $doc, $onerror);      my $time1 = time;
339        if ($input->{is_char_string}) {
340          Whatpm::HTML->parse_char_string ($input->{s} => $doc, $onerror);
341        } else {
342          Whatpm::HTML->parse_byte_string
343              ($input->{charset}, $input->{s} => $doc, $onerror);
344        }
345        $time{parse_html} = time - $time1;
346    }    }
347    $time{parse} = time - $time1;    $doc->manakai_charset ($input->{official_charset})
348          if defined $input->{official_charset};
349      
350    print STDOUT qq[</dl></div>];    print STDOUT qq[</dl></div>];
351    
352    return ($doc, $el);    return ($doc, $el);
# Line 263  sub print_syntax_error_xml_section ($$) Line 358  sub print_syntax_error_xml_section ($$)
358    require Message::DOM::XMLParserTemp;    require Message::DOM::XMLParserTemp;
359        
360    print STDOUT qq[    print STDOUT qq[
361  <div id="parse-errors" class="section">  <div id="$input->{id_prefix}parse-errors" class="section">
362  <h2>Parse Errors</h2>  <h2>Parse Errors</h2>
363    
364  <dl>];  <dl id="$input->{id_prefix}parse-errors-list">];
365    push @nav, ['#parse-errors' => 'Parse Error'];    push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{prefix};
366    
367    my $onerror = sub {    my $onerror = sub {
368      my $err = shift;      my $err = shift;
369      my $line = $err->location->line_number;      my $line = $err->location->line_number;
370      print STDOUT qq[<dt><a href="#line-$line">Line $line</a> column ];      print STDOUT qq[<dt><a href="#$input->{id_prefix}line-$line">Line $line</a> column ];
371      print STDOUT $err->location->column_number, "</dt><dd>";      print STDOUT $err->location->column_number, "</dt><dd>";
372      print STDOUT htescape $err->text, "</dd>\n";      print STDOUT htescape $err->text, "</dd>\n";
373    
# Line 286  sub print_syntax_error_xml_section ($$) Line 381  sub print_syntax_error_xml_section ($$)
381      return 1;      return 1;
382    };    };
383    
384      my $t = \($input->{s});
385      if ($input->{is_char_string}) {
386        require Encode;
387        $t = \(Encode::encode ('utf8', $$t));
388        $input->{charset} = 'utf-8';
389      }
390    
391    my $time1 = time;    my $time1 = time;
392    open my $fh, '<', \($input->{s});    open my $fh, '<', $t;
393    my $doc = Message::DOM::XMLParserTemp->parse_byte_stream    my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
394        ($fh => $dom, $onerror, charset => $input->{charset});        ($fh => $dom, $onerror, charset => $input->{charset});
395    $time{parse_xml} = time - $time1;    $time{parse_xml} = time - $time1;
396      $doc->manakai_charset ($input->{official_charset})
397          if defined $input->{official_charset};
398    
399    print STDOUT qq[</dl></div>];    print STDOUT qq[</dl></div>];
400    
401    return ($doc, undef);    return ($doc, undef);
402  } # print_syntax_error_xml_section  } # print_syntax_error_xml_section
403    
404  sub print_source_string_section ($$) {  sub get_css_parser () {
405    require Encode;    our $CSSParser;
406    my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name    return $CSSParser if $CSSParser;
407    return unless $enc;  
408      require Whatpm::CSS::Parser;
409      my $p = Whatpm::CSS::Parser->new;
410    
411      $p->{prop}->{$_} = 1 for qw/
412        alignment-baseline
413        background background-attachment background-color background-image
414        background-position background-position-x background-position-y
415        background-repeat border border-bottom border-bottom-color
416        border-bottom-style border-bottom-width border-collapse border-color
417        border-left border-left-color
418        border-left-style border-left-width border-right border-right-color
419        border-right-style border-right-width
420        border-spacing -manakai-border-spacing-x -manakai-border-spacing-y
421        border-style border-top border-top-color border-top-style border-top-width
422        border-width bottom
423        caption-side clear clip color content counter-increment counter-reset
424        cursor direction display dominant-baseline empty-cells float font
425        font-family font-size font-size-adjust font-stretch
426        font-style font-variant font-weight height left
427        letter-spacing line-height
428        list-style list-style-image list-style-position list-style-type
429        margin margin-bottom margin-left margin-right margin-top marker-offset
430        marks max-height max-width min-height min-width opacity -moz-opacity
431        orphans outline outline-color outline-style outline-width overflow
432        overflow-x overflow-y
433        padding padding-bottom padding-left padding-right padding-top
434        page page-break-after page-break-before page-break-inside
435        position quotes right size table-layout
436        text-align text-anchor text-decoration text-indent text-transform
437        top unicode-bidi vertical-align visibility white-space width widows
438        word-spacing writing-mode z-index
439      /;
440      $p->{prop_value}->{display}->{$_} = 1 for qw/
441        block clip inline inline-block inline-table list-item none
442        table table-caption table-cell table-column table-column-group
443        table-header-group table-footer-group table-row table-row-group
444        compact marker
445      /;
446      $p->{prop_value}->{position}->{$_} = 1 for qw/
447        absolute fixed relative static
448      /;
449      $p->{prop_value}->{float}->{$_} = 1 for qw/
450        left right none
451      /;
452      $p->{prop_value}->{clear}->{$_} = 1 for qw/
453        left right none both
454      /;
455      $p->{prop_value}->{direction}->{ltr} = 1;
456      $p->{prop_value}->{direction}->{rtl} = 1;
457      $p->{prop_value}->{marks}->{crop} = 1;
458      $p->{prop_value}->{marks}->{cross} = 1;
459      $p->{prop_value}->{'unicode-bidi'}->{$_} = 1 for qw/
460        normal bidi-override embed
461      /;
462      for my $prop_name (qw/overflow overflow-x overflow-y/) {
463        $p->{prop_value}->{$prop_name}->{$_} = 1 for qw/
464          visible hidden scroll auto -webkit-marquee -moz-hidden-unscrollable
465        /;
466      }
467      $p->{prop_value}->{visibility}->{$_} = 1 for qw/
468        visible hidden collapse
469      /;
470      $p->{prop_value}->{'list-style-type'}->{$_} = 1 for qw/
471        disc circle square decimal decimal-leading-zero
472        lower-roman upper-roman lower-greek lower-latin
473        upper-latin armenian georgian lower-alpha upper-alpha none
474        hebrew cjk-ideographic hiragana katakana hiragana-iroha
475        katakana-iroha
476      /;
477      $p->{prop_value}->{'list-style-position'}->{outside} = 1;
478      $p->{prop_value}->{'list-style-position'}->{inside} = 1;
479      $p->{prop_value}->{'page-break-before'}->{$_} = 1 for qw/
480        auto always avoid left right
481      /;
482      $p->{prop_value}->{'page-break-after'}->{$_} = 1 for qw/
483        auto always avoid left right
484      /;
485      $p->{prop_value}->{'page-break-inside'}->{auto} = 1;
486      $p->{prop_value}->{'page-break-inside'}->{avoid} = 1;
487      $p->{prop_value}->{'background-repeat'}->{$_} = 1 for qw/
488        repeat repeat-x repeat-y no-repeat
489      /;
490      $p->{prop_value}->{'background-attachment'}->{scroll} = 1;
491      $p->{prop_value}->{'background-attachment'}->{fixed} = 1;
492      $p->{prop_value}->{'font-size'}->{$_} = 1 for qw/
493        xx-small x-small small medium large x-large xx-large
494        -manakai-xxx-large -webkit-xxx-large
495        larger smaller
496      /;
497      $p->{prop_value}->{'font-style'}->{normal} = 1;
498      $p->{prop_value}->{'font-style'}->{italic} = 1;
499      $p->{prop_value}->{'font-style'}->{oblique} = 1;
500      $p->{prop_value}->{'font-variant'}->{normal} = 1;
501      $p->{prop_value}->{'font-variant'}->{'small-caps'} = 1;
502      $p->{prop_value}->{'font-stretch'}->{$_} = 1 for
503          qw/normal wider narrower ultra-condensed extra-condensed
504            condensed semi-condensed semi-expanded expanded
505            extra-expanded ultra-expanded/;
506      $p->{prop_value}->{'text-align'}->{$_} = 1 for qw/
507        left right center justify begin end
508      /;
509      $p->{prop_value}->{'text-transform'}->{$_} = 1 for qw/
510        capitalize uppercase lowercase none
511      /;
512      $p->{prop_value}->{'white-space'}->{$_} = 1 for qw/
513        normal pre nowrap pre-line pre-wrap -moz-pre-wrap
514      /;
515      $p->{prop_value}->{'writing-mode'}->{$_} = 1 for qw/
516        lr rl tb lr-tb rl-tb tb-rl
517      /;
518      $p->{prop_value}->{'text-anchor'}->{$_} = 1 for qw/
519        start middle end
520      /;
521      $p->{prop_value}->{'dominant-baseline'}->{$_} = 1 for qw/
522        auto use-script no-change reset-size ideographic alphabetic
523        hanging mathematical central middle text-after-edge text-before-edge
524      /;
525      $p->{prop_value}->{'alignment-baseline'}->{$_} = 1 for qw/
526        auto baseline before-edge text-before-edge middle central
527        after-edge text-after-edge ideographic alphabetic hanging
528        mathematical
529      /;
530      $p->{prop_value}->{'text-decoration'}->{$_} = 1 for qw/
531        none blink underline overline line-through
532      /;
533      $p->{prop_value}->{'caption-side'}->{$_} = 1 for qw/
534        top bottom left right
535      /;
536      $p->{prop_value}->{'table-layout'}->{auto} = 1;
537      $p->{prop_value}->{'table-layout'}->{fixed} = 1;
538      $p->{prop_value}->{'border-collapse'}->{collapse} = 1;
539      $p->{prop_value}->{'border-collapse'}->{separate} = 1;
540      $p->{prop_value}->{'empty-cells'}->{show} = 1;
541      $p->{prop_value}->{'empty-cells'}->{hide} = 1;
542      $p->{prop_value}->{cursor}->{$_} = 1 for qw/
543        auto crosshair default pointer move e-resize ne-resize nw-resize n-resize
544        se-resize sw-resize s-resize w-resize text wait help progress
545      /;
546      for my $prop (qw/border-top-style border-left-style
547                       border-bottom-style border-right-style outline-style/) {
548        $p->{prop_value}->{$prop}->{$_} = 1 for qw/
549          none hidden dotted dashed solid double groove ridge inset outset
550        /;
551      }
552      for my $prop (qw/color background-color
553                       border-bottom-color border-left-color border-right-color
554                       border-top-color border-color/) {
555        $p->{prop_value}->{$prop}->{transparent} = 1;
556        $p->{prop_value}->{$prop}->{flavor} = 1;
557        $p->{prop_value}->{$prop}->{'-manakai-default'} = 1;
558      }
559      $p->{prop_value}->{'outline-color'}->{invert} = 1;
560      $p->{prop_value}->{'outline-color'}->{'-manakai-invert-or-currentcolor'} = 1;
561      $p->{pseudo_class}->{$_} = 1 for qw/
562        active checked disabled empty enabled first-child first-of-type
563        focus hover indeterminate last-child last-of-type link only-child
564        only-of-type root target visited
565        lang nth-child nth-last-child nth-of-type nth-last-of-type not
566        -manakai-contains -manakai-current
567      /;
568      $p->{pseudo_element}->{$_} = 1 for qw/
569        after before first-letter first-line
570      /;
571    
572      return $CSSParser = $p;
573    } # get_css_parser
574    
575    sub print_syntax_error_css_section ($$) {
576      my ($input, $result) = @_;
577    
578      print STDOUT qq[
579    <div id="$input->{id_prefix}parse-errors" class="section">
580    <h2>Parse Errors</h2>
581    
582    <dl id="$input->{id_prefix}parse-errors-list">];
583      push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
584    
585      my $p = get_css_parser ();
586      $p->init;
587      $p->{onerror} = sub {
588        my (%opt) = @_;
589        my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
590        if ($opt{token}) {
591          print STDOUT qq[<dt class="$cls"><a href="#$input->{id_prefix}line-$opt{token}->{line}">Line $opt{token}->{line}</a> column $opt{token}->{column}];
592        } else {
593          print STDOUT qq[<dt class="$cls">Unknown location];
594        }
595        if (defined $opt{value}) {
596          print STDOUT qq[ (<code>@{[htescape ($opt{value})]}</code>)];
597        } elsif (defined $opt{token}) {
598          print STDOUT qq[ (<code>@{[htescape (Whatpm::CSS::Tokenizer->serialize_token ($opt{token}))]}</code>)];
599        }
600        $type =~ tr/ /-/;
601        $type =~ s/\|/%7C/g;
602        $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
603        print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
604        print STDOUT qq[$msg</dd>\n];
605    
606        add_error ('syntax', \%opt => $result);
607      };
608      $p->{href} = $input->{uri};
609      $p->{base_uri} = $input->{base_uri};
610    
611    #  if ($parse_mode eq 'q') {
612    #    $p->{unitless_px} = 1;
613    #    $p->{hashless_color} = 1;
614    #  }
615    
616    ## TODO: Make $input->{s} a ref.
617    
618      my $s = \$input->{s};
619      my $charset;
620      unless ($input->{is_char_string}) {
621        require Encode;
622        if (defined $input->{charset}) {## TODO: IANA->Perl
623          $charset = $input->{charset};
624          $s = \(Encode::decode ($input->{charset}, $$s));
625        } else {
626          ## TODO: charset detection
627          $s = \(Encode::decode ($charset = 'utf-8', $$s));
628        }
629      }
630      
631      my $cssom = $p->parse_char_string ($$s);
632      $cssom->manakai_input_encoding ($charset) if defined $charset;
633    
634      print STDOUT qq[</dl></div>];
635    
636      return $cssom;
637    } # print_syntax_error_css_section
638    
639    sub print_syntax_error_manifest_section ($$) {
640      my ($input, $result) = @_;
641    
642      require Whatpm::CacheManifest;
643    
644      print STDOUT qq[
645    <div id="$input->{id_prefix}parse-errors" class="section">
646    <h2>Parse Errors</h2>
647    
648    <dl id="$input->{id_prefix}parse-errors-list">];
649      push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
650    
651      my $onerror = sub {
652        my (%opt) = @_;
653        my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
654        print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
655            qq[</dt>];
656        $type =~ tr/ /-/;
657        $type =~ s/\|/%7C/g;
658        $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
659        print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
660        print STDOUT qq[$msg</dd>\n];
661    
662        add_error ('syntax', \%opt => $result);
663      };
664    
665      my $m = $input->{is_char_string} ? 'parse_char_string' : 'parse_byte_string';
666      my $time1 = time;
667      my $manifest = Whatpm::CacheManifest->$m
668          ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
669      $time{parse_manifest} = time - $time1;
670    
671      print STDOUT qq[</dl></div>];
672    
673      return $manifest;
674    } # print_syntax_error_manifest_section
675    
676    sub print_source_string_section ($$$) {
677      my $input = shift;
678      my $s;
679      unless ($input->{is_char_string}) {
680        require Encode;
681        my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
682        return unless $enc;
683    
684        $s = \($enc->decode (${$_[0]}));
685      } else {
686        $s = $_[0];
687      }
688    
   my $s = \($enc->decode (${$_[0]}));  
689    my $i = 1;                                my $i = 1;                            
690    push @nav, ['#source-string' => 'Source'];    push @nav, ['#source-string' => 'Source'] unless $input->{nested};
691    print STDOUT qq[<div id="source-string" class="section">    print STDOUT qq[<div id="$input->{id_prefix}source-string" class="section">
692  <h2>Document Source</h2>  <h2>Document Source</h2>
693  <ol lang="">\n];  <ol lang="">\n];
694    if (length $$s) {    if (length $$s) {
695      while ($$s =~ /\G([^\x0A]*?)\x0D?\x0A/gc) {      while ($$s =~ /\G([^\x0D\x0A]*?)(?>\x0D\x0A?|\x0A)/gc) {
696        print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";        print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
697              "</li>\n";
698        $i++;        $i++;
699      }      }
700      if ($$s =~ /\G([^\x0A]+)/gc) {      if ($$s =~ /\G([^\x0D\x0A]+)/gc) {
701        print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";        print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
702              "</li>\n";
703      }      }
704    } else {    } else {
705      print STDOUT q[<li id="line-1"></li>];      print STDOUT q[<li id="$input->{id_prefix}line-1"></li>];
706    }    }
707    print STDOUT "</ol></div>";    print STDOUT "</ol></div>
708    <script>
709      addSourceToParseErrorList ('$input->{id_prefix}', 'parse-errors-list');
710    </script>";
711  } # print_input_string_section  } # print_input_string_section
712    
713  sub print_document_tree ($) {  sub print_document_tree ($$) {
714    my $node = shift;    my ($input, $node) = @_;
715    
716    my $r = '<ol class="xoxo">';    my $r = '<ol class="xoxo">';
717    
718    my @node = ($node);    my @node = ($node);
# Line 334  sub print_document_tree ($) { Line 723  sub print_document_tree ($) {
723        next;        next;
724      }      }
725    
726      my $node_id = 'node-'.refaddr $child;      my $node_id = $input->{id_prefix} . 'node-'.refaddr $child;
727      my $nt = $child->node_type;      my $nt = $child->node_type;
728      if ($nt == $child->ELEMENT_NODE) {      if ($nt == $child->ELEMENT_NODE) {
729        my $child_nsuri = $child->namespace_uri;        my $child_nsuri = $child->namespace_uri;
# Line 345  sub print_document_tree ($) { Line 734  sub print_document_tree ($) {
734          $r .= '<ul class="attributes">';          $r .= '<ul class="attributes">';
735          for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }          for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
736                        @{$child->attributes}) {                        @{$child->attributes}) {
737            $r .= qq[<li id="$attr->[3]" class="tree-attribute"><code title="@{[defined $_->[2] ? $_->[2] : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?            $r .= qq[<li id="$input->{id_prefix}$attr->[3]" class="tree-attribute"><code title="@{[defined $attr->[2] ? htescape ($attr->[2]) : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
738            $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children            $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
739          }          }
740          $r .= '</ul>';          $r .= '</ul>';
# Line 366  sub print_document_tree ($) { Line 755  sub print_document_tree ($) {
755      } elsif ($nt == $child->DOCUMENT_NODE) {      } elsif ($nt == $child->DOCUMENT_NODE) {
756        $r .= qq'<li id="$node_id" class="tree-document">Document';        $r .= qq'<li id="$node_id" class="tree-document">Document';
757        $r .= qq[<ul class="attributes">];        $r .= qq[<ul class="attributes">];
758          my $cp = $child->manakai_charset;
759          if (defined $cp) {
760            $r .= qq[<li><code>charset</code> parameter = <code>];
761            $r .= htescape ($cp) . qq[</code></li>];
762          }
763          $r .= qq[<li><code>inputEncoding</code> = ];
764          my $ie = $child->input_encoding;
765          if (defined $ie) {
766            $r .= qq[<code>@{[htescape ($ie)]}</code>];
767            if ($child->manakai_has_bom) {
768              $r .= qq[ (with <code class=charname><abbr>BOM</abbr></code>)];
769            }
770          } else {
771            $r .= qq[(<code>null</code>)];
772          }
773        $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];        $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
774        $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];        $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
775        unless ($child->manakai_is_html) {        unless ($child->manakai_is_html) {
# Line 399  sub print_document_tree ($) { Line 803  sub print_document_tree ($) {
803    print STDOUT $r;    print STDOUT $r;
804  } # print_document_tree  } # print_document_tree
805    
806  sub print_structure_dump_section ($$) {  sub print_structure_dump_dom_section ($$$) {
807    my ($doc, $el) = @_;    my ($input, $doc, $el) = @_;
808    
809    print STDOUT qq[    print STDOUT qq[
810  <div id="document-tree" class="section">  <div id="$input->{id_prefix}document-tree" class="section">
811  <h2>Document Tree</h2>  <h2>Document Tree</h2>
812  ];  ];
813    push @nav, ['#document-tree' => 'Tree'];    push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
814          unless $input->{nested};
815    
816    print_document_tree ($el || $doc);    print_document_tree ($input, $el || $doc);
817    
818    print STDOUT qq[</div>];    print STDOUT qq[</div>];
819  } # print_structure_dump_section  } # print_structure_dump_dom_section
820    
821  sub print_structure_error_section ($$$) {  sub print_structure_dump_cssom_section ($$) {
822    my ($doc, $el, $result) = @_;    my ($input, $cssom) = @_;
823    
824      print STDOUT qq[
825    <div id="$input->{id_prefix}document-tree" class="section">
826    <h2>Document Tree</h2>
827    ];
828      push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
829          unless $input->{nested};
830    
831      ## TODO:
832      print STDOUT "<pre>".htescape ($cssom->css_text)."</pre>";
833    
834      print STDOUT qq[</div>];
835    } # print_structure_dump_cssom_section
836    
837    sub print_structure_dump_manifest_section ($$) {
838      my ($input, $manifest) = @_;
839    
840      print STDOUT qq[
841    <div id="$input->{id_prefix}dump-manifest" class="section">
842    <h2>Cache Manifest</h2>
843    ];
844      push @nav, [qq[#$input->{id_prefix}dump-manifest] => 'Cache Manifest']
845          unless $input->{nested};
846    
847    print STDOUT qq[<div id="document-errors" class="section">    print STDOUT qq[<dl><dt>Explicit entries</dt>];
848      my $i = 0;
849      for my $uri (@{$manifest->[0]}) {
850        my $euri = htescape ($uri);
851        print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
852      }
853    
854      print STDOUT qq[<dt>Fallback entries</dt><dd>
855          <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
856          <th scope=row>Fallback Entry</tr><tbody>];
857      for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
858        my $euri = htescape ($uri);
859        my $euri2 = htescape ($manifest->[1]->{$uri});
860        print STDOUT qq[<tr><td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
861            <td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
862      }
863    
864      print STDOUT qq[</table><dt>Online whitelist</dt>];
865      for my $uri (@{$manifest->[2]}) {
866        my $euri = htescape ($uri);
867        print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
868      }
869    
870      print STDOUT qq[</dl></div>];
871    } # print_structure_dump_manifest_section
872    
873    sub print_structure_error_dom_section ($$$$$) {
874      my ($input, $doc, $el, $result, $onsubdoc) = @_;
875    
876      print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
877  <h2>Document Errors</h2>  <h2>Document Errors</h2>
878    
879  <dl>];  <dl id=document-errors-list>];
880    push @nav, ['#document-errors' => 'Document Error'];    push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
881          unless $input->{nested};
882    
883    require Whatpm::ContentChecker;    require Whatpm::ContentChecker;
884    my $onerror = sub {    my $onerror = sub {
# Line 429  sub print_structure_error_section ($$$) Line 887  sub print_structure_error_section ($$$)
887      $type =~ tr/ /-/;      $type =~ tr/ /-/;
888      $type =~ s/\|/%7C/g;      $type =~ s/\|/%7C/g;
889      $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];      $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
890      print STDOUT qq[<dt class="$cls">] . get_node_link ($opt{node}) .      print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
891          qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";          qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
892        print STDOUT $msg, "</dd>\n";
893      add_error ('structure', \%opt => $result);      add_error ('structure', \%opt => $result);
894    };    };
895    
896    my $elements;    my $elements;
897    my $time1 = time;    my $time1 = time;
898    if ($el) {    if ($el) {
899      $elements = Whatpm::ContentChecker->check_element ($el, $onerror);      $elements = Whatpm::ContentChecker->check_element
900            ($el, $onerror, $onsubdoc);
901    } else {    } else {
902      $elements = Whatpm::ContentChecker->check_document ($doc, $onerror);      $elements = Whatpm::ContentChecker->check_document
903            ($doc, $onerror, $onsubdoc);
904    }    }
905    $time{check} = time - $time1;    $time{check} = time - $time1;
906    
907    print STDOUT qq[</dl></div>];    print STDOUT qq[</dl>
908    <script>
909      addSourceToParseErrorList ('$input->{id_prefix}', 'document-errors-list');
910    </script></div>];
911    
912    return $elements;    return $elements;
913  } # print_structure_error_section  } # print_structure_error_dom_section
914    
915    sub print_structure_error_manifest_section ($$$) {
916      my ($input, $manifest, $result) = @_;
917    
918      print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
919    <h2>Document Errors</h2>
920    
921  sub print_table_section ($) {  <dl>];
922    my $tables = shift;    push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
923          unless $input->{nested};
924    
925      require Whatpm::CacheManifest;
926      Whatpm::CacheManifest->check_manifest ($manifest, sub {
927        my %opt = @_;
928        my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
929        $type =~ tr/ /-/;
930        $type =~ s/\|/%7C/g;
931        $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
932        print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
933            qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
934        add_error ('structure', \%opt => $result);
935      });
936    
937      print STDOUT qq[</div>];
938    } # print_structure_error_manifest_section
939    
940    sub print_table_section ($$) {
941      my ($input, $tables) = @_;
942        
943    push @nav, ['#tables' => 'Tables'];    push @nav, [qq[#$input->{id_prefix}tables] => 'Tables']
944          unless $input->{nested};
945    print STDOUT qq[    print STDOUT qq[
946  <div id="tables" class="section">  <div id="$input->{id_prefix}tables" class="section">
947  <h2>Tables</h2>  <h2>Tables</h2>
948    
949  <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->  <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
# Line 468  sub print_table_section ($) { Line 958  sub print_table_section ($) {
958    my $i = 0;    my $i = 0;
959    for my $table_el (@$tables) {    for my $table_el (@$tables) {
960      $i++;      $i++;
961      print STDOUT qq[<div class="section" id="table-$i"><h3>] .      print STDOUT qq[<div class="section" id="$input->{id_prefix}table-$i"><h3>] .
962          get_node_link ($table_el) . q[</h3>];          get_node_link ($input, $table_el) . q[</h3>];
963    
964      ## TODO: Make |ContentChecker| return |form_table| result      ## TODO: Make |ContentChecker| return |form_table| result
965      ## so that this script don't have to run the algorithm twice.      ## so that this script don't have to run the algorithm twice.
# Line 501  sub print_table_section ($) { Line 991  sub print_table_section ($) {
991                    
992      print STDOUT '</div><script type="text/javascript">tableToCanvas (';      print STDOUT '</div><script type="text/javascript">tableToCanvas (';
993      print STDOUT JSON::objToJson ($table);      print STDOUT JSON::objToJson ($table);
994      print STDOUT qq[, document.getElementById ('table-$i'));</script>];      print STDOUT qq[, document.getElementById ('$input->{id_prefix}table-$i')];
995        print STDOUT qq[, '$input->{id_prefix}');</script>];
996    }    }
997        
998    print STDOUT qq[</div>];    print STDOUT qq[</div>];
999  } # print_table_section  } # print_table_section
1000    
1001  sub print_id_section ($) {  sub print_listing_section ($$$) {
1002    my $ids = shift;    my ($opt, $input, $ids) = @_;
1003        
1004    push @nav, ['#identifiers' => 'IDs'];    push @nav, ['#' . $input->{id_prefix} . $opt->{id} => $opt->{label}]
1005          unless $input->{nested};
1006    print STDOUT qq[    print STDOUT qq[
1007  <div id="identifiers" class="section">  <div id="$input->{id_prefix}$opt->{id}" class="section">
1008  <h2>Identifiers</h2>  <h2>$opt->{heading}</h2>
1009    
1010  <dl>  <dl>
1011  ];  ];
1012    for my $id (sort {$a cmp $b} keys %$ids) {    for my $id (sort {$a cmp $b} keys %$ids) {
1013      print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];      print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
1014      for (@{$ids->{$id}}) {      for (@{$ids->{$id}}) {
1015        print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];        print STDOUT qq[<dd>].get_node_link ($input, $_).qq[</dd>];
1016      }      }
1017    }    }
1018    print STDOUT qq[</dl></div>];    print STDOUT qq[</dl></div>];
1019  } # print_id_section  } # print_listing_section
1020    
1021  sub print_term_section ($) {  sub print_uri_section ($$$) {
1022    my $terms = shift;    my ($input, $uris) = @_;
1023    
1024      ## NOTE: URIs contained in the DOM (i.e. in HTML or XML documents),
1025      ## except for those in RDF triples.
1026      ## TODO: URIs in CSS
1027        
1028    push @nav, ['#terms' => 'Terms'];    push @nav, ['#' . $input->{id_prefix} . 'uris' => 'URIs']
1029          unless $input->{nested};
1030    print STDOUT qq[    print STDOUT qq[
1031  <div id="terms" class="section">  <div id="$input->{id_prefix}uris" class="section">
1032  <h2>Terms</h2>  <h2>URIs</h2>
1033    
1034  <dl>  <dl>];
1035  ];    for my $uri (sort {$a cmp $b} keys %$uris) {
1036    for my $term (sort {$a cmp $b} keys %$terms) {      my $euri = htescape ($uri);
1037      print STDOUT qq[<dt>@{[htescape $term]}</dt>];      print STDOUT qq[<dt><code class=uri>&lt;<a href="$euri">$euri</a>></code>];
1038      for (@{$terms->{$term}}) {      my $eccuri = htescape (get_cc_uri ($uri));
1039        print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];      print STDOUT qq[<dd><a href="$eccuri">Check conformance of this document</a>];
1040        print STDOUT qq[<dd>Found at: <ul>];
1041        for my $entry (@{$uris->{$uri}}) {
1042          print STDOUT qq[<li>], get_node_link ($input, $entry->{node});
1043          if (keys %{$entry->{type} or {}}) {
1044            print STDOUT ' (';
1045            print STDOUT join ', ', map {
1046              {
1047                hyperlink => 'Hyperlink',
1048                resource => 'Link to an external resource',
1049                namespace => 'Namespace URI',
1050                cite => 'Citation or link to a long description',
1051                embedded => 'Link to an embedded content',
1052                base => 'Base URI',
1053                action => 'Submission URI',
1054              }->{$_}
1055                or
1056              htescape ($_)
1057            } keys %{$entry->{type}};
1058            print STDOUT ')';
1059          }
1060      }      }
1061        print STDOUT qq[</ul>];
1062    }    }
1063    print STDOUT qq[</dl></div>];    print STDOUT qq[</dl></div>];
1064  } # print_term_section  } # print_uri_section
1065    
1066  sub print_class_section ($) {  sub print_rdf_section ($$$) {
1067    my $classes = shift;    my ($input, $rdfs) = @_;
1068        
1069    push @nav, ['#classes' => 'Classes'];    push @nav, ['#' . $input->{id_prefix} . 'rdf' => 'RDF']
1070          unless $input->{nested};
1071    print STDOUT qq[    print STDOUT qq[
1072  <div id="classes" class="section">  <div id="$input->{id_prefix}rdf" class="section">
1073  <h2>Classes</h2>  <h2>RDF Triples</h2>
1074    
1075  <dl>  <dl>];
1076  ];    my $i = 0;
1077    for my $class (sort {$a cmp $b} keys %$classes) {    for my $rdf (@$rdfs) {
1078      print STDOUT qq[<dt><code>@{[htescape $class]}</code></dt>];      print STDOUT qq[<dt id="$input->{id_prefix}rdf-@{[$i++]}">];
1079      for (@{$classes->{$class}}) {      print STDOUT get_node_link ($input, $rdf->[0]);
1080        print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];      print STDOUT qq[<dd><dl>];
1081        for my $triple (@{$rdf->[1]}) {
1082          print STDOUT '<dt>' . get_node_link ($input, $triple->[0]) . '<dd>';
1083          print STDOUT get_rdf_resource_html ($triple->[1]);
1084          print STDOUT ' ';
1085          print STDOUT get_rdf_resource_html ($triple->[2]);
1086          print STDOUT ' ';
1087          print STDOUT get_rdf_resource_html ($triple->[3]);
1088      }      }
1089        print STDOUT qq[</dl>];
1090    }    }
1091    print STDOUT qq[</dl></div>];    print STDOUT qq[</dl></div>];
1092  } # print_class_section  } # print_rdf_section
1093    
1094    sub get_rdf_resource_html ($) {
1095      my $resource = shift;
1096      if (defined $resource->{uri}) {
1097        my $euri = htescape ($resource->{uri});
1098        return '<code class=uri>&lt;<a href="' . $euri . '">' . $euri .
1099            '</a>></code>';
1100      } elsif (defined $resource->{bnodeid}) {
1101        return htescape ('_:' . $resource->{bnodeid});
1102      } elsif ($resource->{nodes}) {
1103        return '(rdf:XMLLiteral)';
1104      } elsif (defined $resource->{value}) {
1105        my $elang = htescape (defined $resource->{language}
1106                                  ? $resource->{language} : '');
1107        my $r = qq[<q lang="$elang">] . htescape ($resource->{value}) . '</q>';
1108        if (defined $resource->{datatype}) {
1109          my $euri = htescape ($resource->{datatype});
1110          $r .= '^^<code class=uri>&lt;<a href="' . $euri . '">' . $euri .
1111              '</a>></code>';
1112        } elsif (length $resource->{language}) {
1113          $r .= '@' . htescape ($resource->{language});
1114        }
1115        return $r;
1116      } else {
1117        return '??';
1118      }
1119    } # get_rdf_resource_html
1120    
1121  sub print_result_section ($) {  sub print_result_section ($) {
1122    my $result = shift;    my $result = shift;
# Line 571  sub print_result_section ($) { Line 1125  sub print_result_section ($) {
1125  <div id="result-summary" class="section">  <div id="result-summary" class="section">
1126  <h2>Result</h2>];  <h2>Result</h2>];
1127    
1128    if ($result->{unsupported}) {      if ($result->{unsupported} and $result->{conforming_max}) {  
1129      print STDOUT qq[<p class=uncertain id=result-para>The conformance      print STDOUT qq[<p class=uncertain id=result-para>The conformance
1130          checker cannot decide whether the document is conforming or          checker cannot decide whether the document is conforming or
1131          not, since the document contains one or more unsupported          not, since the document contains one or more unsupported
1132          features.</p>];          features.  The document might or might not be conforming.</p>];
1133    } elsif ($result->{conforming_min}) {    } elsif ($result->{conforming_min}) {
1134      print STDOUT qq[<p class=PASS id=result-para>No conformance-error is      print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
1135          found in this document.</p>];          found in this document.</p>];
# Line 591  sub print_result_section ($) { Line 1145  sub print_result_section ($) {
1145    print STDOUT qq[<table>    print STDOUT qq[<table>
1146  <colgroup><col><colgroup><col><col><col><colgroup><col>  <colgroup><col><colgroup><col><col><col><colgroup><col>
1147  <thead>  <thead>
1148  <tr><th scope=col></th><th scope=col><em class=rfc2119>MUST</em>-level  <tr><th scope=col></th>
1149  Errors</th><th scope=col><em class=rfc2119>SHOULD</em>-level  <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1150  Errors</th><th scope=col>Warnings</th><th scope=col>Score</th></tr>  Errors</a></th>
1151  </thead><tbody>];  <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1152    Errors</a></th>
1153    <th scope=col><a href="../error-description#level-w">Warnings</a></th>
1154    <th scope=col>Score</th></tr></thead><tbody>];
1155    
1156    my $must_error = 0;    my $must_error = 0;
1157    my $should_error = 0;    my $should_error = 0;
# Line 602  Errors</th><th scope=col>Warnings</th><t Line 1159  Errors</th><th scope=col>Warnings</th><t
1159    my $score_min = 0;    my $score_min = 0;
1160    my $score_max = 0;    my $score_max = 0;
1161    my $score_base = 20;    my $score_base = 20;
1162      my $score_unit = $score_base / 100;
1163    for (    for (
1164      [Transfer => 'transfer', ''],      [Transfer => 'transfer', ''],
1165      [Character => 'char', ''],      [Character => 'char', ''],
# Line 611  Errors</th><th scope=col>Warnings</th><t Line 1169  Errors</th><th scope=col>Warnings</th><t
1169      $must_error += ($result->{$_->[1]}->{must} += 0);      $must_error += ($result->{$_->[1]}->{must} += 0);
1170      $should_error += ($result->{$_->[1]}->{should} += 0);      $should_error += ($result->{$_->[1]}->{should} += 0);
1171      $warning += ($result->{$_->[1]}->{warning} += 0);      $warning += ($result->{$_->[1]}->{warning} += 0);
1172      $score_min += ($result->{$_->[1]}->{score_min} += $score_base);      $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
1173      $score_max += ($result->{$_->[1]}->{score_max} += $score_base);      $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
1174    
1175      my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';      my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
1176      my $label = $_->[0];      my $label = $_->[0];
# Line 625  Errors</th><th scope=col>Warnings</th><t Line 1183  Errors</th><th scope=col>Warnings</th><t
1183    
1184      print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];      print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
1185      if ($uncertain) {      if ($uncertain) {
1186        print qq[<td class="@{[$score_max < $score_base ? $score_min < $score_max ? 'FAIL' : 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];        print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
1187      } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {      } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
1188        print qq[<td class="@{[$score_max < $score_base ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max} + $score_base</td></tr>];        print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
1189      } else {      } else {
1190        print qq[<td class="@{[$score_max < $score_base ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];        print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
1191      }      }
1192    }    }
1193    
# Line 638  Errors</th><th scope=col>Warnings</th><t Line 1196  Errors</th><th scope=col>Warnings</th><t
1196    print STDOUT qq[    print STDOUT qq[
1197  <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>  <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
1198  </tbody>  </tbody>
1199  <tfoot><tr class=uncertain><th scope=row>Total</th><td>$must_error?</td><td>$should_error?</td><td>$warning?</td><td><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>  <tfoot><tr class=uncertain><th scope=row>Total</th>
1200    <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
1201    <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
1202    <td>$warning?</td>
1203    <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
1204  </table>  </table>
1205    
1206  <p><strong>Important</strong>: This conformance checking service  <p><strong>Important</strong>: This conformance checking service
# Line 647  is <em>under development</em>.  The resu Line 1209  is <em>under development</em>.  The resu
1209    push @nav, ['#result-summary' => 'Result'];    push @nav, ['#result-summary' => 'Result'];
1210  } # print_result_section  } # print_result_section
1211    
1212  sub print_result_unknown_type_section ($) {  sub print_result_unknown_type_section ($$) {
1213    my $input = shift;    my ($input, $result) = @_;
1214    
1215      my $euri = htescape ($input->{uri});
1216    print STDOUT qq[    print STDOUT qq[
1217  <div id="result-summary" class="section">  <div id="$input->{id_prefix}parse-errors" class="section">
1218  <p><em>Media type <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code> is not supported!</em></p>  <h2>Errors</h2>
1219    
1220    <dl>
1221    <dt class=unsupported><code>&lt;<a href="$euri">$euri</a>&gt;</code></dt>
1222        <dd class=unsupported><strong><a href="../error-description#level-u">Not
1223            supported</a></strong>:
1224        Media type
1225        <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
1226        is not supported.</dd>
1227    </dl>
1228  </div>  </div>
1229  ];  ];
1230    push @nav, ['#result-summary' => 'Result'];    push @nav, [qq[#$input->{id_prefix}parse-errors] => 'Errors']
1231          unless $input->{nested};
1232      add_error (char => {level => 'u'} => $result);
1233      add_error (syntax => {level => 'u'} => $result);
1234      add_error (structure => {level => 'u'} => $result);
1235  } # print_result_unknown_type_section  } # print_result_unknown_type_section
1236    
1237  sub print_result_input_error_section ($) {  sub print_result_input_error_section ($) {
# Line 664  sub print_result_input_error_section ($) Line 1240  sub print_result_input_error_section ($)
1240  <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>  <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
1241  </div>];  </div>];
1242    push @nav, ['#result-summary' => 'Result'];    push @nav, ['#result-summary' => 'Result'];
1243  } # print_Result_input_error_section  } # print_result_input_error_section
1244    
1245    sub get_error_label ($$) {
1246      my ($input, $err) = @_;
1247    
1248      my $r = '';
1249    
1250      my $line;
1251      my $column;
1252        
1253      if (defined $err->{node}) {
1254        $line = $err->{node}->get_user_data ('manakai_source_line');
1255        if (defined $line) {
1256          $column = $err->{node}->get_user_data ('manakai_source_column');
1257        } else {
1258          if ($err->{node}->node_type == $err->{node}->ATTRIBUTE_NODE) {
1259            my $owner = $err->{node}->owner_element;
1260            $line = $owner->get_user_data ('manakai_source_line');
1261            $column = $owner->get_user_data ('manakai_source_column');
1262          } else {
1263            my $parent = $err->{node}->parent_node;
1264            if ($parent) {
1265              $line = $parent->get_user_data ('manakai_source_line');
1266              $column = $parent->get_user_data ('manakai_source_column');
1267            }
1268          }
1269        }
1270      }
1271      unless (defined $line) {
1272        if (defined $err->{token} and defined $err->{token}->{line}) {
1273          $line = $err->{token}->{line};
1274          $column = $err->{token}->{column};
1275        } elsif (defined $err->{line}) {
1276          $line = $err->{line};
1277          $column = $err->{column};
1278        }
1279      }
1280    
1281      if (defined $line) {
1282        if (defined $column and $column > 0) {
1283          $r = qq[<a href="#$input->{id_prefix}line-$line">Line $line</a> column $column];
1284        } else {
1285          $line = $line - 1 || 1;
1286          $r = qq[<a href="#$input->{id_prefix}line-$line">Line $line</a>];
1287        }
1288      }
1289    
1290      if (defined $err->{node}) {
1291        $r .= ' ' if length $r;
1292        $r .= get_node_link ($input, $err->{node});
1293      }
1294    
1295      if (defined $err->{index}) {
1296        if (length $r) {
1297          $r .= ', Index ' . (0+$err->{index});
1298        } else {
1299          $r .= "<a href='#$input->{id_prefix}index-@{[0+$err->{index}]}'>Index "
1300              . (0+$err->{index}) . '</a>';
1301        }
1302      }
1303    
1304      if (defined $err->{value}) {
1305        $r .= ' ' if length $r;
1306        $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
1307      }
1308    
1309      return $r;
1310    } # get_error_label
1311    
1312    sub get_error_level_label ($) {
1313      my $err = shift;
1314    
1315      my $r = '';
1316    
1317      if (not defined $err->{level} or $err->{level} eq 'm') {
1318        $r = qq[<strong><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1319            error</a></strong>: ];
1320      } elsif ($err->{level} eq 's') {
1321        $r = qq[<strong><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1322            error</a></strong>: ];
1323      } elsif ($err->{level} eq 'w') {
1324        $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
1325            ];
1326      } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
1327        $r = qq[<strong><a href="../error-description#level-u">Not
1328            supported</a></strong>: ];
1329      } elsif ($err->{level} eq 'i') {
1330        $r = qq[<strong><a href="../error-description#level-i">Information</a></strong>: ];
1331      } else {
1332        my $elevel = htescape ($err->{level});
1333        $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
1334            ];
1335      }
1336    
1337      return $r;
1338    } # get_error_level_label
1339    
1340  sub get_node_path ($) {  sub get_node_path ($) {
1341    my $node = shift;    my $node = shift;
# Line 672  sub get_node_path ($) { Line 1343  sub get_node_path ($) {
1343    while (defined $node) {    while (defined $node) {
1344      my $rs;      my $rs;
1345      if ($node->node_type == 1) {      if ($node->node_type == 1) {
1346        $rs = $node->manakai_local_name;        $rs = $node->node_name;
1347        $node = $node->parent_node;        $node = $node->parent_node;
1348      } elsif ($node->node_type == 2) {      } elsif ($node->node_type == 2) {
1349        $rs = '@' . $node->manakai_local_name;        $rs = '@' . $node->node_name;
1350        $node = $node->owner_element;        $node = $node->owner_element;
1351      } elsif ($node->node_type == 3) {      } elsif ($node->node_type == 3) {
1352        $rs = '"' . $node->data . '"';        $rs = '"' . $node->data . '"';
# Line 693  sub get_node_path ($) { Line 1364  sub get_node_path ($) {
1364    return join '/', @r;    return join '/', @r;
1365  } # get_node_path  } # get_node_path
1366    
1367  sub get_node_link ($) {  sub get_node_link ($$) {
1368    return qq[<a href="#node-@{[refaddr $_[0]]}">] .    return qq[<a href="#$_[0]->{id_prefix}node-@{[refaddr $_[1]]}">] .
1369        htescape (get_node_path ($_[0])) . qq[</a>];        htescape (get_node_path ($_[1])) . qq[</a>];
1370  } # get_node_link  } # get_node_link
1371    
1372  {  {
# Line 703  sub get_node_link ($) { Line 1374  sub get_node_link ($) {
1374    
1375  sub load_text_catalog ($) {  sub load_text_catalog ($) {
1376    my $lang = shift; # MUST be a canonical lang name    my $lang = shift; # MUST be a canonical lang name
1377    open my $file, '<', "cc-msg.$lang.txt" or die "$0: cc-msg.$lang.txt: $!";    open my $file, '<:utf8', "cc-msg.$lang.txt"
1378          or die "$0: cc-msg.$lang.txt: $!";
1379    while (<$file>) {    while (<$file>) {
1380      if (s/^([^;]+);([^;]*);//) {      if (s/^([^;]+);([^;]*);//) {
1381        my ($type, $cls, $msg) = ($1, $2, $_);        my ($type, $cls, $msg) = ($1, $2, $_);
# Line 716  sub load_text_catalog ($) { Line 1388  sub load_text_catalog ($) {
1388  sub get_text ($) {  sub get_text ($) {
1389    my ($type, $level, $node) = @_;    my ($type, $level, $node) = @_;
1390    $type = $level . ':' . $type if defined $level;    $type = $level . ':' . $type if defined $level;
1391      $level = 'm' unless defined $level;
1392    my @arg;    my @arg;
1393    {    {
1394      if (defined $Msg->{$type}) {      if (defined $Msg->{$type}) {
# Line 740  sub get_text ($) { Line 1413  sub get_text ($) {
1413            ? htescape ($node->owner_element->manakai_local_name)            ? htescape ($node->owner_element->manakai_local_name)
1414            : ''            : ''
1415        }ge;        }ge;
1416        return ($type, $Msg->{$type}->[0], $msg);        return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
1417      } elsif ($type =~ s/:([^:]*)$//) {      } elsif ($type =~ s/:([^:]*)$//) {
1418        unshift @arg, $1;        unshift @arg, $1;
1419        redo;        redo;
1420      }      }
1421    }    }
1422    return ($type, '', htescape ($_[0]));    return ($type, 'level-'.$level, htescape ($_[0]));
1423  } # get_text  } # get_text
1424    
1425  }  }
1426    
1427    sub encode_uri_component ($) {
1428      require Encode;
1429      my $s = Encode::encode ('utf8', shift);
1430      $s =~ s/([^0-9A-Za-z_.~-])/sprintf '%%%02X', ord $1/ge;
1431      return $s;
1432    } # encode_uri_component
1433    
1434    sub get_cc_uri ($) {
1435      return './?uri=' . encode_uri_component ($_[0]);
1436    } # get_cc_uri
1437    
1438  sub get_input_document ($$) {  sub get_input_document ($$) {
1439    my ($http, $dom) = @_;    my ($http, $dom) = @_;
1440    
# Line 802  EOH Line 1486  EOH
1486      $ua->protocols_allowed ([qw/http/]);      $ua->protocols_allowed ([qw/http/]);
1487      $ua->max_size (1000_000);      $ua->max_size (1000_000);
1488      my $req = HTTP::Request->new (GET => $request_uri);      my $req = HTTP::Request->new (GET => $request_uri);
1489        $req->header ('Accept-Encoding' => 'identity, *; q=0');
1490      my $res = $ua->request ($req);      my $res = $ua->request ($req);
1491      ## TODO: 401 sets |is_success| true.      ## TODO: 401 sets |is_success| true.
1492      if ($res->is_success or $http->get_parameter ('error-page')) {      if ($res->is_success or $http->get_parameter ('error-page')) {
# Line 811  EOH Line 1496  EOH
1496    
1497        ## TODO: More strict parsing...        ## TODO: More strict parsing...
1498        my $ct = $res->header ('Content-Type');        my $ct = $res->header ('Content-Type');
1499        if (defined $ct and $ct =~ m#^([0-9A-Za-z._+-]+/[0-9A-Za-z._+-]+)#) {        if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
         $r->{media_type} = lc $1;  
       }  
       if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?(\S+)"?/i) {  
1500          $r->{charset} = lc $1;          $r->{charset} = lc $1;
1501          $r->{charset} =~ tr/\\//d;          $r->{charset} =~ tr/\\//d;
1502            $r->{official_charset} = $r->{charset};
1503        }        }
1504    
1505        my $input_charset = $http->get_parameter ('charset');        my $input_charset = $http->get_parameter ('charset');
# Line 824  EOH Line 1507  EOH
1507          $r->{charset_overridden}          $r->{charset_overridden}
1508              = (not defined $r->{charset} or $r->{charset} ne $input_charset);              = (not defined $r->{charset} or $r->{charset} ne $input_charset);
1509          $r->{charset} = $input_charset;          $r->{charset} = $input_charset;
1510        }        }
1511    
1512          ## TODO: Support for HTTP Content-Encoding
1513    
1514        $r->{s} = ''.$res->content;        $r->{s} = ''.$res->content;
1515    
1516          require Whatpm::ContentType;
1517          ($r->{official_type}, $r->{media_type})
1518              = Whatpm::ContentType->get_sniffed_type
1519                  (get_file_head => sub {
1520                     return substr $r->{s}, 0, shift;
1521                   },
1522                   http_content_type_byte => $ct,
1523                   has_http_content_encoding =>
1524                       defined $res->header ('Content-Encoding'),
1525                   supported_image_types => {});
1526      } else {      } else {
1527        $r->{uri} = $res->request->uri;        $r->{uri} = $res->request->uri;
1528        $r->{request_uri} = $request_uri;        $r->{request_uri} = $request_uri;
# Line 847  EOH Line 1543  EOH
1543      $r->{charset} = ''.$http->get_parameter ('_charset_');      $r->{charset} = ''.$http->get_parameter ('_charset_');
1544      $r->{charset} =~ s/\s+//g;      $r->{charset} =~ s/\s+//g;
1545      $r->{charset} = 'utf-8' if $r->{charset} eq '';      $r->{charset} = 'utf-8' if $r->{charset} eq '';
1546        $r->{official_charset} = $r->{charset};
1547      $r->{header_field} = [];      $r->{header_field} = [];
1548    
1549        require Whatpm::ContentType;
1550        ($r->{official_type}, $r->{media_type})
1551            = Whatpm::ContentType->get_sniffed_type
1552                (get_file_head => sub {
1553                   return substr $r->{s}, 0, shift;
1554                 },
1555                 http_content_type_byte => undef,
1556                 has_http_content_encoding => 0,
1557                 supported_image_types => {});
1558    }    }
1559    
1560    my $input_format = $http->get_parameter ('i');    my $input_format = $http->get_parameter ('i');
# Line 864  EOH Line 1571  EOH
1571    if ($r->{media_type} eq 'text/xml') {    if ($r->{media_type} eq 'text/xml') {
1572      unless (defined $r->{charset}) {      unless (defined $r->{charset}) {
1573        $r->{charset} = 'us-ascii';        $r->{charset} = 'us-ascii';
1574          $r->{official_charset} = $r->{charset};
1575      } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {      } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1576        $r->{charset_overridden} = 0;        $r->{charset_overridden} = 0;
1577      }      }
# Line 875  EOH Line 1583  EOH
1583      return $r;      return $r;
1584    }    }
1585    
1586      $r->{inner_html_element} = $http->get_parameter ('e');
1587    
1588    return $r;    return $r;
1589  } # get_input_document  } # get_input_document
1590    
# Line 907  Wakaba <w@suika.fam.cx>. Line 1617  Wakaba <w@suika.fam.cx>.
1617    
1618  =head1 LICENSE  =head1 LICENSE
1619    
1620  Copyright 2007 Wakaba <w@suika.fam.cx>  Copyright 2007-2008 Wakaba <w@suika.fam.cx>
1621    
1622  This library is free software; you can redistribute it  This library is free software; you can redistribute it
1623  and/or modify it under the same terms as Perl itself.  and/or modify it under the same terms as Perl itself.

Legend:
Removed from v.1.20  
changed lines
  Added in v.1.48

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24