/[pub]/test/html-webhacc/cc.cgi
Suika

Diff of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.23 by wakaba, Mon Nov 5 09:33:52 2007 UTC revision 1.51 by wakaba, Sun May 18 03:47:56 2008 UTC
# Line 20  sub htescape ($) { Line 20  sub htescape ($) {
20    return $s;    return $s;
21  } # htescape  } # htescape
22    
23      my @nav;
24      my %time;
25      require Message::DOM::DOMImplementation;
26      my $dom = Message::DOM::DOMImplementation->new;
27    {
28    use Message::CGI::HTTP;    use Message::CGI::HTTP;
29    my $http = Message::CGI::HTTP->new;    my $http = Message::CGI::HTTP->new;
30    
# Line 31  sub htescape ($) { Line 36  sub htescape ($) {
36    binmode STDOUT, ':utf8';    binmode STDOUT, ':utf8';
37    $| = 1;    $| = 1;
38    
   require Message::DOM::DOMImplementation;  
   my $dom = Message::DOM::DOMImplementation->new;  
   
39    load_text_catalog ('en'); ## TODO: conneg    load_text_catalog ('en'); ## TODO: conneg
40    
   my @nav;  
41    print STDOUT qq[Content-Type: text/html; charset=utf-8    print STDOUT qq[Content-Type: text/html; charset=utf-8
42    
43  <!DOCTYPE html>  <!DOCTYPE html>
# Line 52  sub htescape ($) { Line 53  sub htescape ($) {
53    
54    $| = 0;    $| = 0;
55    my $input = get_input_document ($http, $dom);    my $input = get_input_document ($http, $dom);
   my $inner_html_element = $http->get_parameter ('e');  
56    my $char_length = 0;    my $char_length = 0;
   my %time;  
57    
58    print qq[    print qq[
59  <div id="document-info" class="section">  <div id="document-info" class="section">
# Line 62  sub htescape ($) { Line 61  sub htescape ($) {
61  <dt>Request URI</dt>  <dt>Request URI</dt>
62      <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>      <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63  <dt>Document URI</dt>  <dt>Document URI</dt>
64      <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}">@{[htescape $input->{uri}]}</a>&gt;</code></dd>      <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}" id=anchor-document-uri>@{[htescape $input->{uri}]}</a>&gt;</code>
65        <script>
66          document.title = '<'
67              + document.getElementById ('anchor-document-uri').href + '> \\u2014 '
68              + document.title;
69        </script></dd>
70  ]; # no </dl> yet  ]; # no </dl> yet
71    push @nav, ['#document-info' => 'Information'];    push @nav, ['#document-info' => 'Information'];
72    
# Line 74  if (defined $input->{s}) { Line 78  if (defined $input->{s}) {
78      <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>      <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
79  <dt>Internet Media Type</dt>  <dt>Internet Media Type</dt>
80      <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>      <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
81      @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : '']}</dd>      @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '<em>(sniffed; official type is: <code class=MIME lang=en>'.htescape ($input->{official_type}).'</code>)' : '<em>(sniffed)</em>']}</dd>
82  <dt>Character Encoding</dt>  <dt>Character Encoding</dt>
83      <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}      <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
84      @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>      @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
# Line 82  if (defined $input->{s}) { Line 86  if (defined $input->{s}) {
86      <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>      <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
87  </dl>  </dl>
88  </div>  </div>
89    
90    <script src="../cc-script.js"></script>
91  ];  ];
92    
93      $input->{id_prefix} = '';
94      #$input->{nested} = 0;
95    my $result = {conforming_min => 1, conforming_max => 1};    my $result = {conforming_min => 1, conforming_max => 1};
96    print_http_header_section ($input, $result);    check_and_print ($input => $result);
   
   my $doc;  
   my $el;  
   my $manifest;  
   
   if ($input->{media_type} eq 'text/html') {  
     ($doc, $el) = print_syntax_error_html_section ($input, $result);  
     print_source_string_section (\($input->{s}), $input->{charset});  
   } elsif ({  
             'text/xml' => 1,  
             'application/atom+xml' => 1,  
             'application/rss+xml' => 1,  
             'application/svg+xml' => 1,  
             'application/xhtml+xml' => 1,  
             'application/xml' => 1,  
            }->{$input->{media_type}}) {  
     ($doc, $el) = print_syntax_error_xml_section ($input, $result);  
     print_source_string_section (\($input->{s}), $doc->input_encoding);  
   } elsif ($input->{media_type} eq 'text/cache-manifest') {  
 ## TODO: MUST be text/cache-manifest  
     $manifest = print_syntax_error_manifest_section ($input, $result);  
     print_source_string_section (\($input->{s}), 'utf-8');  
   } else {  
     ## TODO: Change HTTP status code??  
     print_result_unknown_type_section ($input);  
   }  
   
   if (defined $doc or defined $el) {  
     print_structure_dump_dom_section ($doc, $el);  
     my $elements = print_structure_error_dom_section ($doc, $el, $result);  
     print_table_section ($elements->{table}) if @{$elements->{table}};  
     print_id_section ($elements->{id}) if keys %{$elements->{id}};  
     print_term_section ($elements->{term}) if keys %{$elements->{term}};  
     print_class_section ($elements->{class}) if keys %{$elements->{class}};  
   } elsif (defined $manifest) {  
     print_structure_dump_manifest_section ($manifest);  
     print_structure_error_manifest_section ($manifest, $result);  
   }  
   
97    print_result_section ($result);    print_result_section ($result);
98  } else {  } else {
99    print STDOUT qq[</dl></div>];    print STDOUT qq[</dl></div>];
# Line 143  if (defined $input->{s}) { Line 112  if (defined $input->{s}) {
112  </html>  </html>
113  ];  ];
114    
115    for (qw/decode parse parse_xml parse_manifest check check_manifest/) {    for (qw/decode parse parse_html parse_xml parse_manifest
116              check check_manifest/) {
117      next unless defined $time{$_};      next unless defined $time{$_};
118      open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";      open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
119      print $file $char_length, "\t", $time{$_}, "\n";      print $file $char_length, "\t", $time{$_}, "\n";
120    }    }
121    
122  exit;  exit;
123    }
124    
125  sub add_error ($$$) {  sub add_error ($$$) {
126    my ($layer, $err, $result) = @_;    my ($layer, $err, $result) = @_;
# Line 160  sub add_error ($$$) { Line 131  sub add_error ($$$) {
131        $result->{conforming_min} = 0;        $result->{conforming_min} = 0;
132      } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {      } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
133        $result->{$layer}->{warning}++;        $result->{$layer}->{warning}++;
134      } elsif ($err->{level} eq 'unsupported') {      } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
135        $result->{$layer}->{unsupported}++;        $result->{$layer}->{unsupported}++;
136        $result->{unsupported} = 1;        $result->{unsupported} = 1;
137        } elsif ($err->{level} eq 'i') {
138          #
139      } else {      } else {
140        $result->{$layer}->{must}++;        $result->{$layer}->{must}++;
141        $result->{$layer}->{score_max} -= 2;        $result->{$layer}->{score_max} -= 2;
# Line 179  sub add_error ($$$) { Line 152  sub add_error ($$$) {
152    }    }
153  } # add_error  } # add_error
154    
155    sub check_and_print ($$) {
156      my ($input, $result) = @_;
157    
158      print_http_header_section ($input, $result);
159    
160      my $doc;
161      my $el;
162      my $cssom;
163      my $manifest;
164      my @subdoc;
165    
166      if ($input->{media_type} eq 'text/html') {
167        ($doc, $el) = print_syntax_error_html_section ($input, $result);
168        print_source_string_section
169            ($input,
170             \($input->{s}),
171             $input->{charset} || $doc->input_encoding);
172      } elsif ({
173                'text/xml' => 1,
174                'application/atom+xml' => 1,
175                'application/rss+xml' => 1,
176                'image/svg+xml' => 1,
177                'application/xhtml+xml' => 1,
178                'application/xml' => 1,
179                ## TODO: Should we make all XML MIME Types fall
180                ## into this category?
181    
182                'application/rdf+xml' => 1, ## NOTE: This type has different model.
183               }->{$input->{media_type}}) {
184        ($doc, $el) = print_syntax_error_xml_section ($input, $result);
185        print_source_string_section ($input,
186                                     \($input->{s}),
187                                     $doc->input_encoding);
188      } elsif ($input->{media_type} eq 'text/css') {
189        $cssom = print_syntax_error_css_section ($input, $result);
190        print_source_string_section
191            ($input, \($input->{s}),
192             $cssom->manakai_input_encoding);
193      } elsif ($input->{media_type} eq 'text/cache-manifest') {
194    ## TODO: MUST be text/cache-manifest
195        $manifest = print_syntax_error_manifest_section ($input, $result);
196        print_source_string_section ($input, \($input->{s}),
197                                     'utf-8');
198      } else {
199        ## TODO: Change HTTP status code??
200        print_result_unknown_type_section ($input, $result);
201      }
202    
203      if (defined $doc or defined $el) {
204        $doc->document_uri ($input->{uri});
205        $doc->manakai_entity_base_uri ($input->{base_uri});
206        print_structure_dump_dom_section ($input, $doc, $el);
207        my $elements = print_structure_error_dom_section
208            ($input, $doc, $el, $result, sub {
209              push @subdoc, shift;
210            });
211        print_table_section ($input, $elements->{table}) if @{$elements->{table}};
212        print_listing_section ({
213          id => 'identifiers', label => 'IDs', heading => 'Identifiers',
214        }, $input, $elements->{id}) if keys %{$elements->{id}};
215        print_listing_section ({
216          id => 'terms', label => 'Terms', heading => 'Terms',
217        }, $input, $elements->{term}) if keys %{$elements->{term}};
218        print_listing_section ({
219          id => 'classes', label => 'Classes', heading => 'Classes',
220        }, $input, $elements->{class}) if keys %{$elements->{class}};
221        print_uri_section ($input, $elements->{uri}) if keys %{$elements->{uri}};
222        print_rdf_section ($input, $elements->{rdf}) if @{$elements->{rdf}};
223      } elsif (defined $cssom) {
224        print_structure_dump_cssom_section ($input, $cssom);
225        ## TODO: CSSOM validation
226        add_error ('structure', {level => 'u'} => $result);
227      } elsif (defined $manifest) {
228        print_structure_dump_manifest_section ($input, $manifest);
229        print_structure_error_manifest_section ($input, $manifest, $result);
230      }
231    
232      my $id_prefix = 0;
233      for my $subinput (@subdoc) {
234        $subinput->{id_prefix} = 'subdoc-' . ++$id_prefix;
235        $subinput->{nested} = 1;
236        $subinput->{base_uri} = $subinput->{container_node}->base_uri
237            unless defined $subinput->{base_uri};
238        my $ebaseuri = htescape ($subinput->{base_uri});
239        push @nav, ['#' . $subinput->{id_prefix} => 'Sub #' . $id_prefix];
240        print STDOUT qq[<div id="$subinput->{id_prefix}" class=section>
241          <h2>Subdocument #$id_prefix</h2>
242    
243          <dl>
244          <dt>Internet Media Type</dt>
245            <dd><code class="MIME" lang="en">@{[htescape $subinput->{media_type}]}</code>
246          <dt>Container Node</dt>
247            <dd>@{[get_node_link ($input, $subinput->{container_node})]}</dd>
248          <dt>Base <abbr title="Uniform Resource Identifiers">URI</abbr></dt>
249            <dd><code class=URI>&lt;<a href="$ebaseuri">$ebaseuri</a>></code></dd>
250          </dl>];              
251    
252        $subinput->{id_prefix} .= '-';
253        check_and_print ($subinput => $result);
254    
255        print STDOUT qq[</div>];
256      }
257    } # check_and_print
258    
259  sub print_http_header_section ($$) {  sub print_http_header_section ($$) {
260    my ($input, $result) = @_;    my ($input, $result) = @_;
261    return unless defined $input->{header_status_code} or    return unless defined $input->{header_status_code} or
262        defined $input->{header_status_text} or        defined $input->{header_status_text} or
263        @{$input->{header_field}};        @{$input->{header_field} or []};
264        
265    push @nav, ['#source-header' => 'HTTP Header'];    push @nav, ['#source-header' => 'HTTP Header'] unless $input->{nested};
266    print STDOUT qq[<div id="source-header" class="section">    print STDOUT qq[<div id="$input->{id_prefix}source-header" class="section">
267  <h2>HTTP Header</h2>  <h2>HTTP Header</h2>
268    
269  <p><strong>Note</strong>: Due to the limitation of the  <p><strong>Note</strong>: Due to the limitation of the
# Line 218  sub print_syntax_error_html_section ($$) Line 295  sub print_syntax_error_html_section ($$)
295        
296    require Encode;    require Encode;
297    require Whatpm::HTML;    require Whatpm::HTML;
   
   $input->{charset} ||= 'ISO-8859-1'; ## TODO: for now.  
298        
   my $time1 = time;  
   my $t = Encode::decode ($input->{charset}, $input->{s});  
   $time{decode} = time - $time1;  
   
299    print STDOUT qq[    print STDOUT qq[
300  <div id="parse-errors" class="section">  <div id="$input->{id_prefix}parse-errors" class="section">
301  <h2>Parse Errors</h2>  <h2>Parse Errors</h2>
302    
303  <dl>];  <dl id="$input->{id_prefix}parse-errors-list">];
304    push @nav, ['#parse-errors' => 'Parse Error'];    push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
305    
306    my $onerror = sub {    my $onerror = sub {
307      my (%opt) = @_;      my (%opt) = @_;
308      my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});      my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
309      if ($opt{column} > 0) {      print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
310        print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a> column $opt{column}</dt>\n];          qq[</dt>];
     } else {  
       $opt{line} = $opt{line} - 1 || 1;  
       print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a></dt>\n];  
     }  
311      $type =~ tr/ /-/;      $type =~ tr/ /-/;
312      $type =~ s/\|/%7C/g;      $type =~ s/\|/%7C/g;
313      $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];      $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
# Line 252  sub print_syntax_error_html_section ($$) Line 319  sub print_syntax_error_html_section ($$)
319    
320    my $doc = $dom->create_document;    my $doc = $dom->create_document;
321    my $el;    my $el;
322    $time1 = time;    my $inner_html_element = $input->{inner_html_element};
323    if (defined $inner_html_element and length $inner_html_element) {    if (defined $inner_html_element and length $inner_html_element) {
324        $input->{charset} ||= 'windows-1252'; ## TODO: for now.
325        my $time1 = time;
326        my $t = \($input->{s});
327        unless ($input->{is_char_string}) {
328          $t = \(Encode::decode ($input->{charset}, $$t));
329        }
330        $time{decode} = time - $time1;
331        
332      $el = $doc->create_element_ns      $el = $doc->create_element_ns
333          ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);          ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
334      Whatpm::HTML->set_inner_html ($el, $t, $onerror);      $time1 = time;
335        Whatpm::HTML->set_inner_html ($el, $$t, $onerror);
336        $time{parse} = time - $time1;
337    } else {    } else {
338      Whatpm::HTML->parse_string ($t => $doc, $onerror);      my $time1 = time;
339        if ($input->{is_char_string}) {
340          Whatpm::HTML->parse_char_string ($input->{s} => $doc, $onerror);
341        } else {
342          Whatpm::HTML->parse_byte_string
343              ($input->{charset}, $input->{s} => $doc, $onerror);
344        }
345        $time{parse_html} = time - $time1;
346    }    }
347    $time{parse} = time - $time1;    $doc->manakai_charset ($input->{official_charset})
348          if defined $input->{official_charset};
349      
350    print STDOUT qq[</dl></div>];    print STDOUT qq[</dl></div>];
351    
352    return ($doc, $el);    return ($doc, $el);
# Line 273  sub print_syntax_error_xml_section ($$) Line 358  sub print_syntax_error_xml_section ($$)
358    require Message::DOM::XMLParserTemp;    require Message::DOM::XMLParserTemp;
359        
360    print STDOUT qq[    print STDOUT qq[
361  <div id="parse-errors" class="section">  <div id="$input->{id_prefix}parse-errors" class="section">
362  <h2>Parse Errors</h2>  <h2>Parse Errors</h2>
363    
364  <dl>];  <dl id="$input->{id_prefix}parse-errors-list">];
365    push @nav, ['#parse-errors' => 'Parse Error'];    push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{prefix};
366    
367    my $onerror = sub {    my $onerror = sub {
368      my $err = shift;      my $err = shift;
369      my $line = $err->location->line_number;      my $line = $err->location->line_number;
370      print STDOUT qq[<dt><a href="#line-$line">Line $line</a> column ];      print STDOUT qq[<dt><a href="#$input->{id_prefix}line-$line">Line $line</a> column ];
371      print STDOUT $err->location->column_number, "</dt><dd>";      print STDOUT $err->location->column_number, "</dt><dd>";
372      print STDOUT htescape $err->text, "</dd>\n";      print STDOUT htescape $err->text, "</dd>\n";
373    
# Line 296  sub print_syntax_error_xml_section ($$) Line 381  sub print_syntax_error_xml_section ($$)
381      return 1;      return 1;
382    };    };
383    
384      my $t = \($input->{s});
385      if ($input->{is_char_string}) {
386        require Encode;
387        $t = \(Encode::encode ('utf8', $$t));
388        $input->{charset} = 'utf-8';
389      }
390    
391    my $time1 = time;    my $time1 = time;
392    open my $fh, '<', \($input->{s});    open my $fh, '<', $t;
393    my $doc = Message::DOM::XMLParserTemp->parse_byte_stream    my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
394        ($fh => $dom, $onerror, charset => $input->{charset});        ($fh => $dom, $onerror, charset => $input->{charset});
395    $time{parse_xml} = time - $time1;    $time{parse_xml} = time - $time1;
396      $doc->manakai_charset ($input->{official_charset})
397          if defined $input->{official_charset};
398    
399    print STDOUT qq[</dl></div>];    print STDOUT qq[</dl></div>];
400    
401    return ($doc, undef);    return ($doc, undef);
402  } # print_syntax_error_xml_section  } # print_syntax_error_xml_section
403    
404    sub get_css_parser () {
405      our $CSSParser;
406      return $CSSParser if $CSSParser;
407    
408      require Whatpm::CSS::Parser;
409      my $p = Whatpm::CSS::Parser->new;
410    
411      $p->{prop}->{$_} = 1 for qw/
412        alignment-baseline
413        background background-attachment background-color background-image
414        background-position background-position-x background-position-y
415        background-repeat border border-bottom border-bottom-color
416        border-bottom-style border-bottom-width border-collapse border-color
417        border-left border-left-color
418        border-left-style border-left-width border-right border-right-color
419        border-right-style border-right-width
420        border-spacing -manakai-border-spacing-x -manakai-border-spacing-y
421        border-style border-top border-top-color border-top-style border-top-width
422        border-width bottom
423        caption-side clear clip color content counter-increment counter-reset
424        cursor direction display dominant-baseline empty-cells float font
425        font-family font-size font-size-adjust font-stretch
426        font-style font-variant font-weight height left
427        letter-spacing line-height
428        list-style list-style-image list-style-position list-style-type
429        margin margin-bottom margin-left margin-right margin-top marker-offset
430        marks max-height max-width min-height min-width opacity -moz-opacity
431        orphans outline outline-color outline-style outline-width overflow
432        overflow-x overflow-y
433        padding padding-bottom padding-left padding-right padding-top
434        page page-break-after page-break-before page-break-inside
435        position quotes right size table-layout
436        text-align text-anchor text-decoration text-indent text-transform
437        top unicode-bidi vertical-align visibility white-space width widows
438        word-spacing writing-mode z-index
439      /;
440      $p->{prop_value}->{display}->{$_} = 1 for qw/
441        block clip inline inline-block inline-table list-item none
442        table table-caption table-cell table-column table-column-group
443        table-header-group table-footer-group table-row table-row-group
444        compact marker
445      /;
446      $p->{prop_value}->{position}->{$_} = 1 for qw/
447        absolute fixed relative static
448      /;
449      $p->{prop_value}->{float}->{$_} = 1 for qw/
450        left right none
451      /;
452      $p->{prop_value}->{clear}->{$_} = 1 for qw/
453        left right none both
454      /;
455      $p->{prop_value}->{direction}->{ltr} = 1;
456      $p->{prop_value}->{direction}->{rtl} = 1;
457      $p->{prop_value}->{marks}->{crop} = 1;
458      $p->{prop_value}->{marks}->{cross} = 1;
459      $p->{prop_value}->{'unicode-bidi'}->{$_} = 1 for qw/
460        normal bidi-override embed
461      /;
462      for my $prop_name (qw/overflow overflow-x overflow-y/) {
463        $p->{prop_value}->{$prop_name}->{$_} = 1 for qw/
464          visible hidden scroll auto -webkit-marquee -moz-hidden-unscrollable
465        /;
466      }
467      $p->{prop_value}->{visibility}->{$_} = 1 for qw/
468        visible hidden collapse
469      /;
470      $p->{prop_value}->{'list-style-type'}->{$_} = 1 for qw/
471        disc circle square decimal decimal-leading-zero
472        lower-roman upper-roman lower-greek lower-latin
473        upper-latin armenian georgian lower-alpha upper-alpha none
474        hebrew cjk-ideographic hiragana katakana hiragana-iroha
475        katakana-iroha
476      /;
477      $p->{prop_value}->{'list-style-position'}->{outside} = 1;
478      $p->{prop_value}->{'list-style-position'}->{inside} = 1;
479      $p->{prop_value}->{'page-break-before'}->{$_} = 1 for qw/
480        auto always avoid left right
481      /;
482      $p->{prop_value}->{'page-break-after'}->{$_} = 1 for qw/
483        auto always avoid left right
484      /;
485      $p->{prop_value}->{'page-break-inside'}->{auto} = 1;
486      $p->{prop_value}->{'page-break-inside'}->{avoid} = 1;
487      $p->{prop_value}->{'background-repeat'}->{$_} = 1 for qw/
488        repeat repeat-x repeat-y no-repeat
489      /;
490      $p->{prop_value}->{'background-attachment'}->{scroll} = 1;
491      $p->{prop_value}->{'background-attachment'}->{fixed} = 1;
492      $p->{prop_value}->{'font-size'}->{$_} = 1 for qw/
493        xx-small x-small small medium large x-large xx-large
494        -manakai-xxx-large -webkit-xxx-large
495        larger smaller
496      /;
497      $p->{prop_value}->{'font-style'}->{normal} = 1;
498      $p->{prop_value}->{'font-style'}->{italic} = 1;
499      $p->{prop_value}->{'font-style'}->{oblique} = 1;
500      $p->{prop_value}->{'font-variant'}->{normal} = 1;
501      $p->{prop_value}->{'font-variant'}->{'small-caps'} = 1;
502      $p->{prop_value}->{'font-stretch'}->{$_} = 1 for
503          qw/normal wider narrower ultra-condensed extra-condensed
504            condensed semi-condensed semi-expanded expanded
505            extra-expanded ultra-expanded/;
506      $p->{prop_value}->{'text-align'}->{$_} = 1 for qw/
507        left right center justify begin end
508      /;
509      $p->{prop_value}->{'text-transform'}->{$_} = 1 for qw/
510        capitalize uppercase lowercase none
511      /;
512      $p->{prop_value}->{'white-space'}->{$_} = 1 for qw/
513        normal pre nowrap pre-line pre-wrap -moz-pre-wrap
514      /;
515      $p->{prop_value}->{'writing-mode'}->{$_} = 1 for qw/
516        lr rl tb lr-tb rl-tb tb-rl
517      /;
518      $p->{prop_value}->{'text-anchor'}->{$_} = 1 for qw/
519        start middle end
520      /;
521      $p->{prop_value}->{'dominant-baseline'}->{$_} = 1 for qw/
522        auto use-script no-change reset-size ideographic alphabetic
523        hanging mathematical central middle text-after-edge text-before-edge
524      /;
525      $p->{prop_value}->{'alignment-baseline'}->{$_} = 1 for qw/
526        auto baseline before-edge text-before-edge middle central
527        after-edge text-after-edge ideographic alphabetic hanging
528        mathematical
529      /;
530      $p->{prop_value}->{'text-decoration'}->{$_} = 1 for qw/
531        none blink underline overline line-through
532      /;
533      $p->{prop_value}->{'caption-side'}->{$_} = 1 for qw/
534        top bottom left right
535      /;
536      $p->{prop_value}->{'table-layout'}->{auto} = 1;
537      $p->{prop_value}->{'table-layout'}->{fixed} = 1;
538      $p->{prop_value}->{'border-collapse'}->{collapse} = 1;
539      $p->{prop_value}->{'border-collapse'}->{separate} = 1;
540      $p->{prop_value}->{'empty-cells'}->{show} = 1;
541      $p->{prop_value}->{'empty-cells'}->{hide} = 1;
542      $p->{prop_value}->{cursor}->{$_} = 1 for qw/
543        auto crosshair default pointer move e-resize ne-resize nw-resize n-resize
544        se-resize sw-resize s-resize w-resize text wait help progress
545      /;
546      for my $prop (qw/border-top-style border-left-style
547                       border-bottom-style border-right-style outline-style/) {
548        $p->{prop_value}->{$prop}->{$_} = 1 for qw/
549          none hidden dotted dashed solid double groove ridge inset outset
550        /;
551      }
552      for my $prop (qw/color background-color
553                       border-bottom-color border-left-color border-right-color
554                       border-top-color border-color/) {
555        $p->{prop_value}->{$prop}->{transparent} = 1;
556        $p->{prop_value}->{$prop}->{flavor} = 1;
557        $p->{prop_value}->{$prop}->{'-manakai-default'} = 1;
558      }
559      $p->{prop_value}->{'outline-color'}->{invert} = 1;
560      $p->{prop_value}->{'outline-color'}->{'-manakai-invert-or-currentcolor'} = 1;
561      $p->{pseudo_class}->{$_} = 1 for qw/
562        active checked disabled empty enabled first-child first-of-type
563        focus hover indeterminate last-child last-of-type link only-child
564        only-of-type root target visited
565        lang nth-child nth-last-child nth-of-type nth-last-of-type not
566        -manakai-contains -manakai-current
567      /;
568      $p->{pseudo_element}->{$_} = 1 for qw/
569        after before first-letter first-line
570      /;
571    
572      return $CSSParser = $p;
573    } # get_css_parser
574    
575    sub print_syntax_error_css_section ($$) {
576      my ($input, $result) = @_;
577    
578      print STDOUT qq[
579    <div id="$input->{id_prefix}parse-errors" class="section">
580    <h2>Parse Errors</h2>
581    
582    <dl id="$input->{id_prefix}parse-errors-list">];
583      push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
584    
585      my $p = get_css_parser ();
586      $p->init;
587      $p->{onerror} = sub {
588        my (%opt) = @_;
589        my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
590        if ($opt{token}) {
591          print STDOUT qq[<dt class="$cls"><a href="#$input->{id_prefix}line-$opt{token}->{line}">Line $opt{token}->{line}</a> column $opt{token}->{column}];
592        } else {
593          print STDOUT qq[<dt class="$cls">Unknown location];
594        }
595        if (defined $opt{value}) {
596          print STDOUT qq[ (<code>@{[htescape ($opt{value})]}</code>)];
597        } elsif (defined $opt{token}) {
598          print STDOUT qq[ (<code>@{[htescape (Whatpm::CSS::Tokenizer->serialize_token ($opt{token}))]}</code>)];
599        }
600        $type =~ tr/ /-/;
601        $type =~ s/\|/%7C/g;
602        $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
603        print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
604        print STDOUT qq[$msg</dd>\n];
605    
606        add_error ('syntax', \%opt => $result);
607      };
608      $p->{href} = $input->{uri};
609      $p->{base_uri} = $input->{base_uri};
610    
611    #  if ($parse_mode eq 'q') {
612    #    $p->{unitless_px} = 1;
613    #    $p->{hashless_color} = 1;
614    #  }
615    
616    ## TODO: Make $input->{s} a ref.
617    
618      my $s = \$input->{s};
619      my $charset;
620      unless ($input->{is_char_string}) {
621        require Encode;
622        if (defined $input->{charset}) {## TODO: IANA->Perl
623          $charset = $input->{charset};
624          $s = \(Encode::decode ($input->{charset}, $$s));
625        } else {
626          ## TODO: charset detection
627          $s = \(Encode::decode ($charset = 'utf-8', $$s));
628        }
629      }
630      
631      my $cssom = $p->parse_char_string ($$s);
632      $cssom->manakai_input_encoding ($charset) if defined $charset;
633    
634      print STDOUT qq[</dl></div>];
635    
636      return $cssom;
637    } # print_syntax_error_css_section
638    
639  sub print_syntax_error_manifest_section ($$) {  sub print_syntax_error_manifest_section ($$) {
640    my ($input, $result) = @_;    my ($input, $result) = @_;
641    
642    require Whatpm::CacheManifest;    require Whatpm::CacheManifest;
643    
644    print STDOUT qq[    print STDOUT qq[
645  <div id="parse-errors" class="section">  <div id="$input->{id_prefix}parse-errors" class="section">
646  <h2>Parse Errors</h2>  <h2>Parse Errors</h2>
647    
648  <dl>];  <dl id="$input->{id_prefix}parse-errors-list">];
649    push @nav, ['#parse-errors' => 'Parse Error'];    push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
650    
651    my $onerror = sub {    my $onerror = sub {
652      my (%opt) = @_;      my (%opt) = @_;
653      my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});      my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
654      print STDOUT qq[<dt class="$cls">], get_error_label (\%opt), qq[</dt>];      print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
655            qq[</dt>];
656      $type =~ tr/ /-/;      $type =~ tr/ /-/;
657      $type =~ s/\|/%7C/g;      $type =~ s/\|/%7C/g;
658      $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];      $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
# Line 332  sub print_syntax_error_manifest_section Line 662  sub print_syntax_error_manifest_section
662      add_error ('syntax', \%opt => $result);      add_error ('syntax', \%opt => $result);
663    };    };
664    
665      my $m = $input->{is_char_string} ? 'parse_char_string' : 'parse_byte_string';
666    my $time1 = time;    my $time1 = time;
667    my $manifest = Whatpm::CacheManifest->parse_byte_string    my $manifest = Whatpm::CacheManifest->$m
668        ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);        ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
669    $time{parse_manifest} = time - $time1;    $time{parse_manifest} = time - $time1;
670    
# Line 342  sub print_syntax_error_manifest_section Line 673  sub print_syntax_error_manifest_section
673    return $manifest;    return $manifest;
674  } # print_syntax_error_manifest_section  } # print_syntax_error_manifest_section
675    
676  sub print_source_string_section ($$) {  sub print_source_string_section ($$$) {
677    require Encode;    my $input = shift;
678    my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name    my $s;
679    return unless $enc;    unless ($input->{is_char_string}) {
680        open my $byte_stream, '<', $_[0];
681        require Message::Charset::Info;
682        my $charset = Message::Charset::Info->get_by_iana_name ($_[1]);
683        my ($char_stream, $e_status) = $charset->get_decode_handle
684            ($byte_stream, allow_error_reporting => 1, allow_fallback => 1);
685        return unless $char_stream;
686    
687        $char_stream->onerror (sub {
688          my (undef, $type, %opt) = @_;
689          if ($opt{octets}) {
690            ${$opt{octets}} = "\x{FFFD}";
691          }
692        });
693    
694        my $t = '';
695        while (1) {
696          my $c = $char_stream->getc;
697          last unless defined $c;
698          $t .= $c;
699        }
700        $s = \$t;
701        ## TODO: Output for each line, don't concat all of lines.
702      } else {
703        $s = $_[0];
704      }
705    
   my $s = \($enc->decode (${$_[0]}));  
706    my $i = 1;                                my $i = 1;                            
707    push @nav, ['#source-string' => 'Source'];    push @nav, ['#source-string' => 'Source'] unless $input->{nested};
708    print STDOUT qq[<div id="source-string" class="section">    print STDOUT qq[<div id="$input->{id_prefix}source-string" class="section">
709  <h2>Document Source</h2>  <h2>Document Source</h2>
710  <ol lang="">\n];  <ol lang="">\n];
711    if (length $$s) {    if (length $$s) {
712      while ($$s =~ /\G([^\x0A]*?)\x0D?\x0A/gc) {      while ($$s =~ /\G([^\x0D\x0A]*?)(?>\x0D\x0A?|\x0A)/gc) {
713        print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";        print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
714              "</li>\n";
715        $i++;        $i++;
716      }      }
717      if ($$s =~ /\G([^\x0A]+)/gc) {      if ($$s =~ /\G([^\x0D\x0A]+)/gc) {
718        print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";        print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
719              "</li>\n";
720      }      }
721    } else {    } else {
722      print STDOUT q[<li id="line-1"></li>];      print STDOUT q[<li id="$input->{id_prefix}line-1"></li>];
723    }    }
724    print STDOUT "</ol></div>";    print STDOUT "</ol></div>
725    <script>
726      addSourceToParseErrorList ('$input->{id_prefix}', 'parse-errors-list');
727    </script>";
728  } # print_input_string_section  } # print_input_string_section
729    
730  sub print_document_tree ($) {  sub print_document_tree ($$) {
731    my $node = shift;    my ($input, $node) = @_;
732    
733    my $r = '<ol class="xoxo">';    my $r = '<ol class="xoxo">';
734    
735    my @node = ($node);    my @node = ($node);
# Line 379  sub print_document_tree ($) { Line 740  sub print_document_tree ($) {
740        next;        next;
741      }      }
742    
743      my $node_id = 'node-'.refaddr $child;      my $node_id = $input->{id_prefix} . 'node-'.refaddr $child;
744      my $nt = $child->node_type;      my $nt = $child->node_type;
745      if ($nt == $child->ELEMENT_NODE) {      if ($nt == $child->ELEMENT_NODE) {
746        my $child_nsuri = $child->namespace_uri;        my $child_nsuri = $child->namespace_uri;
# Line 390  sub print_document_tree ($) { Line 751  sub print_document_tree ($) {
751          $r .= '<ul class="attributes">';          $r .= '<ul class="attributes">';
752          for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }          for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
753                        @{$child->attributes}) {                        @{$child->attributes}) {
754            $r .= qq[<li id="$attr->[3]" class="tree-attribute"><code title="@{[defined $_->[2] ? $_->[2] : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?            $r .= qq[<li id="$input->{id_prefix}$attr->[3]" class="tree-attribute"><code title="@{[defined $attr->[2] ? htescape ($attr->[2]) : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
755            $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children            $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
756          }          }
757          $r .= '</ul>';          $r .= '</ul>';
# Line 411  sub print_document_tree ($) { Line 772  sub print_document_tree ($) {
772      } elsif ($nt == $child->DOCUMENT_NODE) {      } elsif ($nt == $child->DOCUMENT_NODE) {
773        $r .= qq'<li id="$node_id" class="tree-document">Document';        $r .= qq'<li id="$node_id" class="tree-document">Document';
774        $r .= qq[<ul class="attributes">];        $r .= qq[<ul class="attributes">];
775          my $cp = $child->manakai_charset;
776          if (defined $cp) {
777            $r .= qq[<li><code>charset</code> parameter = <code>];
778            $r .= htescape ($cp) . qq[</code></li>];
779          }
780          $r .= qq[<li><code>inputEncoding</code> = ];
781          my $ie = $child->input_encoding;
782          if (defined $ie) {
783            $r .= qq[<code>@{[htescape ($ie)]}</code>];
784            if ($child->manakai_has_bom) {
785              $r .= qq[ (with <code class=charname><abbr>BOM</abbr></code>)];
786            }
787          } else {
788            $r .= qq[(<code>null</code>)];
789          }
790        $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];        $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
791        $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];        $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
792        unless ($child->manakai_is_html) {        unless ($child->manakai_is_html) {
# Line 444  sub print_document_tree ($) { Line 820  sub print_document_tree ($) {
820    print STDOUT $r;    print STDOUT $r;
821  } # print_document_tree  } # print_document_tree
822    
823  sub print_structure_dump_dom_section ($$) {  sub print_structure_dump_dom_section ($$$) {
824    my ($doc, $el) = @_;    my ($input, $doc, $el) = @_;
825    
826    print STDOUT qq[    print STDOUT qq[
827  <div id="document-tree" class="section">  <div id="$input->{id_prefix}document-tree" class="section">
828  <h2>Document Tree</h2>  <h2>Document Tree</h2>
829  ];  ];
830    push @nav, ['#document-tree' => 'Tree'];    push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
831          unless $input->{nested};
832    
833    print_document_tree ($el || $doc);    print_document_tree ($input, $el || $doc);
834    
835    print STDOUT qq[</div>];    print STDOUT qq[</div>];
836  } # print_structure_dump_dom_section  } # print_structure_dump_dom_section
837    
838  sub print_structure_dump_manifest_section ($) {  sub print_structure_dump_cssom_section ($$) {
839    my $manifest = shift;    my ($input, $cssom) = @_;
840    
841      print STDOUT qq[
842    <div id="$input->{id_prefix}document-tree" class="section">
843    <h2>Document Tree</h2>
844    ];
845      push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
846          unless $input->{nested};
847    
848      ## TODO:
849      print STDOUT "<pre>".htescape ($cssom->css_text)."</pre>";
850    
851      print STDOUT qq[</div>];
852    } # print_structure_dump_cssom_section
853    
854    sub print_structure_dump_manifest_section ($$) {
855      my ($input, $manifest) = @_;
856    
857    print STDOUT qq[    print STDOUT qq[
858  <div id="dump-manifest" class="section">  <div id="$input->{id_prefix}dump-manifest" class="section">
859  <h2>Cache Manifest</h2>  <h2>Cache Manifest</h2>
860  ];  ];
861    push @nav, ['#dump-manifest' => 'Caceh Manifest'];    push @nav, [qq[#$input->{id_prefix}dump-manifest] => 'Cache Manifest']
862          unless $input->{nested};
863    
864    print STDOUT qq[<dl><dt>Explicit entries</dt>];    print STDOUT qq[<dl><dt>Explicit entries</dt>];
865      my $i = 0;
866    for my $uri (@{$manifest->[0]}) {    for my $uri (@{$manifest->[0]}) {
867      my $euri = htescape ($uri);      my $euri = htescape ($uri);
868      print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];      print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
869    }    }
870    
871    print STDOUT qq[<dt>Fallback entries</dt><dd>    print STDOUT qq[<dt>Fallback entries</dt><dd>
# Line 479  sub print_structure_dump_manifest_sectio Line 874  sub print_structure_dump_manifest_sectio
874    for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {    for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
875      my $euri = htescape ($uri);      my $euri = htescape ($uri);
876      my $euri2 = htescape ($manifest->[1]->{$uri});      my $euri2 = htescape ($manifest->[1]->{$uri});
877      print STDOUT qq[<tr><td><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>      print STDOUT qq[<tr><td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
878          <td><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];          <td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
879    }    }
880    
881    print STDOUT qq[</table><dt>Online whitelist</dt>];    print STDOUT qq[</table><dt>Online whitelist</dt>];
882    for my $uri (@{$manifest->[2]}) {    for my $uri (@{$manifest->[2]}) {
883      my $euri = htescape ($uri);      my $euri = htescape ($uri);
884      print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];      print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
885    }    }
886    
887    print STDOUT qq[</dl></div>];    print STDOUT qq[</dl></div>];
888  } # print_structure_dump_manifest_section  } # print_structure_dump_manifest_section
889    
890  sub print_structure_error_dom_section ($$$) {  sub print_structure_error_dom_section ($$$$$) {
891    my ($doc, $el, $result) = @_;    my ($input, $doc, $el, $result, $onsubdoc) = @_;
892    
893    print STDOUT qq[<div id="document-errors" class="section">    print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
894  <h2>Document Errors</h2>  <h2>Document Errors</h2>
895    
896  <dl>];  <dl id=document-errors-list>];
897    push @nav, ['#document-errors' => 'Document Error'];    push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
898          unless $input->{nested};
899    
900    require Whatpm::ContentChecker;    require Whatpm::ContentChecker;
901    my $onerror = sub {    my $onerror = sub {
# Line 508  sub print_structure_error_dom_section ($ Line 904  sub print_structure_error_dom_section ($
904      $type =~ tr/ /-/;      $type =~ tr/ /-/;
905      $type =~ s/\|/%7C/g;      $type =~ s/\|/%7C/g;
906      $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];      $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
907      print STDOUT qq[<dt class="$cls">] . get_error_label (\%opt) .      print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
908          qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);          qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
909      print STDOUT $msg, "</dd>\n";      print STDOUT $msg, "</dd>\n";
910      add_error ('structure', \%opt => $result);      add_error ('structure', \%opt => $result);
# Line 517  sub print_structure_error_dom_section ($ Line 913  sub print_structure_error_dom_section ($
913    my $elements;    my $elements;
914    my $time1 = time;    my $time1 = time;
915    if ($el) {    if ($el) {
916      $elements = Whatpm::ContentChecker->check_element ($el, $onerror);      $elements = Whatpm::ContentChecker->check_element
917            ($el, $onerror, $onsubdoc);
918    } else {    } else {
919      $elements = Whatpm::ContentChecker->check_document ($doc, $onerror);      $elements = Whatpm::ContentChecker->check_document
920            ($doc, $onerror, $onsubdoc);
921    }    }
922    $time{check} = time - $time1;    $time{check} = time - $time1;
923    
924    print STDOUT qq[</dl></div>];    print STDOUT qq[</dl>
925    <script>
926      addSourceToParseErrorList ('$input->{id_prefix}', 'document-errors-list');
927    </script></div>];
928    
929    return $elements;    return $elements;
930  } # print_structure_error_dom_section  } # print_structure_error_dom_section
931    
932  sub print_structure_error_manifest_section ($$$) {  sub print_structure_error_manifest_section ($$$) {
933    my ($manifest, $result) = @_;    my ($input, $manifest, $result) = @_;
934    
935    print STDOUT qq[<div id="document-errors" class="section">    print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
936  <h2>Document Errors</h2>  <h2>Document Errors</h2>
937    
938  <dl>];  <dl>];
939    push @nav, ['#document-errors' => 'Document Error'];    push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
940          unless $input->{nested};
941    
942    require Whatpm::CacheManifest;    require Whatpm::CacheManifest;
943    Whatpm::CacheManifest->check_manifest ($manifest, sub {    Whatpm::CacheManifest->check_manifest ($manifest, sub {
# Line 544  sub print_structure_error_manifest_secti Line 946  sub print_structure_error_manifest_secti
946      $type =~ tr/ /-/;      $type =~ tr/ /-/;
947      $type =~ s/\|/%7C/g;      $type =~ s/\|/%7C/g;
948      $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];      $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
949      print STDOUT qq[<dt class="$cls">] . get_error_label (\%opt) .      print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
950          qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";          qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
951      add_error ('structure', \%opt => $result);      add_error ('structure', \%opt => $result);
952    });    });
# Line 552  sub print_structure_error_manifest_secti Line 954  sub print_structure_error_manifest_secti
954    print STDOUT qq[</div>];    print STDOUT qq[</div>];
955  } # print_structure_error_manifest_section  } # print_structure_error_manifest_section
956    
957  sub print_table_section ($) {  sub print_table_section ($$) {
958    my $tables = shift;    my ($input, $tables) = @_;
959        
960    push @nav, ['#tables' => 'Tables'];    push @nav, [qq[#$input->{id_prefix}tables] => 'Tables']
961          unless $input->{nested};
962    print STDOUT qq[    print STDOUT qq[
963  <div id="tables" class="section">  <div id="$input->{id_prefix}tables" class="section">
964  <h2>Tables</h2>  <h2>Tables</h2>
965    
966  <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->  <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
# Line 570  sub print_table_section ($) { Line 973  sub print_table_section ($) {
973    require JSON;    require JSON;
974        
975    my $i = 0;    my $i = 0;
976    for my $table_el (@$tables) {    for my $table (@$tables) {
977      $i++;      $i++;
978      print STDOUT qq[<div class="section" id="table-$i"><h3>] .      print STDOUT qq[<div class="section" id="$input->{id_prefix}table-$i"><h3>] .
979          get_node_link ($table_el) . q[</h3>];          get_node_link ($input, $table->{element}) . q[</h3>];
980    
981      ## TODO: Make |ContentChecker| return |form_table| result      delete $table->{element};
982      ## so that this script don't have to run the algorithm twice.  
983      my $table = Whatpm::HTMLTable->form_table ($table_el);      for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption},
984                 @{$table->{row}}) {
     for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {  
985        next unless $_;        next unless $_;
986        delete $_->{element};        delete $_->{element};
987      }      }
# Line 605  sub print_table_section ($) { Line 1007  sub print_table_section ($) {
1007                    
1008      print STDOUT '</div><script type="text/javascript">tableToCanvas (';      print STDOUT '</div><script type="text/javascript">tableToCanvas (';
1009      print STDOUT JSON::objToJson ($table);      print STDOUT JSON::objToJson ($table);
1010      print STDOUT qq[, document.getElementById ('table-$i'));</script>];      print STDOUT qq[, document.getElementById ('$input->{id_prefix}table-$i')];
1011        print STDOUT qq[, '$input->{id_prefix}');</script>];
1012    }    }
1013        
1014    print STDOUT qq[</div>];    print STDOUT qq[</div>];
1015  } # print_table_section  } # print_table_section
1016    
1017  sub print_id_section ($) {  sub print_listing_section ($$$) {
1018    my $ids = shift;    my ($opt, $input, $ids) = @_;
1019        
1020    push @nav, ['#identifiers' => 'IDs'];    push @nav, ['#' . $input->{id_prefix} . $opt->{id} => $opt->{label}]
1021          unless $input->{nested};
1022    print STDOUT qq[    print STDOUT qq[
1023  <div id="identifiers" class="section">  <div id="$input->{id_prefix}$opt->{id}" class="section">
1024  <h2>Identifiers</h2>  <h2>$opt->{heading}</h2>
1025    
1026  <dl>  <dl>
1027  ];  ];
1028    for my $id (sort {$a cmp $b} keys %$ids) {    for my $id (sort {$a cmp $b} keys %$ids) {
1029      print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];      print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
1030      for (@{$ids->{$id}}) {      for (@{$ids->{$id}}) {
1031        print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];        print STDOUT qq[<dd>].get_node_link ($input, $_).qq[</dd>];
1032      }      }
1033    }    }
1034    print STDOUT qq[</dl></div>];    print STDOUT qq[</dl></div>];
1035  } # print_id_section  } # print_listing_section
1036    
1037    sub print_uri_section ($$$) {
1038      my ($input, $uris) = @_;
1039    
1040  sub print_term_section ($) {    ## NOTE: URIs contained in the DOM (i.e. in HTML or XML documents),
1041    my $terms = shift;    ## except for those in RDF triples.
1042      ## TODO: URIs in CSS
1043        
1044    push @nav, ['#terms' => 'Terms'];    push @nav, ['#' . $input->{id_prefix} . 'uris' => 'URIs']
1045          unless $input->{nested};
1046    print STDOUT qq[    print STDOUT qq[
1047  <div id="terms" class="section">  <div id="$input->{id_prefix}uris" class="section">
1048  <h2>Terms</h2>  <h2>URIs</h2>
1049    
1050  <dl>  <dl>];
1051  ];    for my $uri (sort {$a cmp $b} keys %$uris) {
1052    for my $term (sort {$a cmp $b} keys %$terms) {      my $euri = htescape ($uri);
1053      print STDOUT qq[<dt>@{[htescape $term]}</dt>];      print STDOUT qq[<dt><code class=uri>&lt;<a href="$euri">$euri</a>></code>];
1054      for (@{$terms->{$term}}) {      my $eccuri = htescape (get_cc_uri ($uri));
1055        print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];      print STDOUT qq[<dd><a href="$eccuri">Check conformance of this document</a>];
1056        print STDOUT qq[<dd>Found at: <ul>];
1057        for my $entry (@{$uris->{$uri}}) {
1058          print STDOUT qq[<li>], get_node_link ($input, $entry->{node});
1059          if (keys %{$entry->{type} or {}}) {
1060            print STDOUT ' (';
1061            print STDOUT join ', ', map {
1062              {
1063                hyperlink => 'Hyperlink',
1064                resource => 'Link to an external resource',
1065                namespace => 'Namespace URI',
1066                cite => 'Citation or link to a long description',
1067                embedded => 'Link to an embedded content',
1068                base => 'Base URI',
1069                action => 'Submission URI',
1070              }->{$_}
1071                or
1072              htescape ($_)
1073            } keys %{$entry->{type}};
1074            print STDOUT ')';
1075          }
1076      }      }
1077        print STDOUT qq[</ul>];
1078    }    }
1079    print STDOUT qq[</dl></div>];    print STDOUT qq[</dl></div>];
1080  } # print_term_section  } # print_uri_section
1081    
1082  sub print_class_section ($) {  sub print_rdf_section ($$$) {
1083    my $classes = shift;    my ($input, $rdfs) = @_;
1084        
1085    push @nav, ['#classes' => 'Classes'];    push @nav, ['#' . $input->{id_prefix} . 'rdf' => 'RDF']
1086          unless $input->{nested};
1087    print STDOUT qq[    print STDOUT qq[
1088  <div id="classes" class="section">  <div id="$input->{id_prefix}rdf" class="section">
1089  <h2>Classes</h2>  <h2>RDF Triples</h2>
1090    
1091  <dl>  <dl>];
1092  ];    my $i = 0;
1093    for my $class (sort {$a cmp $b} keys %$classes) {    for my $rdf (@$rdfs) {
1094      print STDOUT qq[<dt><code>@{[htescape $class]}</code></dt>];      print STDOUT qq[<dt id="$input->{id_prefix}rdf-@{[$i++]}">];
1095      for (@{$classes->{$class}}) {      print STDOUT get_node_link ($input, $rdf->[0]);
1096        print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];      print STDOUT qq[<dd><dl>];
1097        for my $triple (@{$rdf->[1]}) {
1098          print STDOUT '<dt>' . get_node_link ($input, $triple->[0]) . '<dd>';
1099          print STDOUT get_rdf_resource_html ($triple->[1]);
1100          print STDOUT ' ';
1101          print STDOUT get_rdf_resource_html ($triple->[2]);
1102          print STDOUT ' ';
1103          print STDOUT get_rdf_resource_html ($triple->[3]);
1104      }      }
1105        print STDOUT qq[</dl>];
1106    }    }
1107    print STDOUT qq[</dl></div>];    print STDOUT qq[</dl></div>];
1108  } # print_class_section  } # print_rdf_section
1109    
1110    sub get_rdf_resource_html ($) {
1111      my $resource = shift;
1112      if (defined $resource->{uri}) {
1113        my $euri = htescape ($resource->{uri});
1114        return '<code class=uri>&lt;<a href="' . $euri . '">' . $euri .
1115            '</a>></code>';
1116      } elsif (defined $resource->{bnodeid}) {
1117        return htescape ('_:' . $resource->{bnodeid});
1118      } elsif ($resource->{nodes}) {
1119        return '(rdf:XMLLiteral)';
1120      } elsif (defined $resource->{value}) {
1121        my $elang = htescape (defined $resource->{language}
1122                                  ? $resource->{language} : '');
1123        my $r = qq[<q lang="$elang">] . htescape ($resource->{value}) . '</q>';
1124        if (defined $resource->{datatype}) {
1125          my $euri = htescape ($resource->{datatype});
1126          $r .= '^^<code class=uri>&lt;<a href="' . $euri . '">' . $euri .
1127              '</a>></code>';
1128        } elsif (length $resource->{language}) {
1129          $r .= '@' . htescape ($resource->{language});
1130        }
1131        return $r;
1132      } else {
1133        return '??';
1134      }
1135    } # get_rdf_resource_html
1136    
1137  sub print_result_section ($) {  sub print_result_section ($) {
1138    my $result = shift;    my $result = shift;
# Line 733  Errors</a></th> Line 1199  Errors</a></th>
1199    
1200      print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];      print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
1201      if ($uncertain) {      if ($uncertain) {
1202        print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];        print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}];
1203      } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {      } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
1204        print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];        print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}];
1205      } else {      } else {
1206        print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];        print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}];
1207      }      }
1208        print qq[ / 20];
1209    }    }
1210    
1211    $score_max += $score_base;    $score_max += $score_base;
1212    
1213    print STDOUT qq[    print STDOUT qq[
1214  <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>  <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base / 20
1215  </tbody>  </tbody>
1216  <tfoot><tr class=uncertain><th scope=row>Total</th>  <tfoot><tr class=uncertain><th scope=row>Total</th>
1217  <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>  <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
1218  <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>  <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
1219  <td>$warning?</td>  <td>$warning?</td>
1220  <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>  <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong> / 100
1221  </table>  </table>
1222    
1223  <p><strong>Important</strong>: This conformance checking service  <p><strong>Important</strong>: This conformance checking service
# Line 759  is <em>under development</em>.  The resu Line 1226  is <em>under development</em>.  The resu
1226    push @nav, ['#result-summary' => 'Result'];    push @nav, ['#result-summary' => 'Result'];
1227  } # print_result_section  } # print_result_section
1228    
1229  sub print_result_unknown_type_section ($) {  sub print_result_unknown_type_section ($$) {
1230    my $input = shift;    my ($input, $result) = @_;
1231    
1232      my $euri = htescape ($input->{uri});
1233    print STDOUT qq[    print STDOUT qq[
1234  <div id="result-summary" class="section">  <div id="$input->{id_prefix}parse-errors" class="section">
1235  <p><em>Media type <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code> is not supported!</em></p>  <h2>Errors</h2>
1236    
1237    <dl>
1238    <dt class=unsupported><code>&lt;<a href="$euri">$euri</a>&gt;</code></dt>
1239        <dd class=unsupported><strong><a href="../error-description#level-u">Not
1240            supported</a></strong>:
1241        Media type
1242        <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
1243        is not supported.</dd>
1244    </dl>
1245  </div>  </div>
1246  ];  ];
1247    push @nav, ['#result-summary' => 'Result'];    push @nav, [qq[#$input->{id_prefix}parse-errors] => 'Errors']
1248          unless $input->{nested};
1249      add_error (char => {level => 'u'} => $result);
1250      add_error (syntax => {level => 'u'} => $result);
1251      add_error (structure => {level => 'u'} => $result);
1252  } # print_result_unknown_type_section  } # print_result_unknown_type_section
1253    
1254  sub print_result_input_error_section ($) {  sub print_result_input_error_section ($) {
# Line 776  sub print_result_input_error_section ($) Line 1257  sub print_result_input_error_section ($)
1257  <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>  <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
1258  </div>];  </div>];
1259    push @nav, ['#result-summary' => 'Result'];    push @nav, ['#result-summary' => 'Result'];
1260  } # print_Result_input_error_section  } # print_result_input_error_section
1261    
1262  sub get_error_label ($) {  sub get_error_label ($$) {
1263    my $err = shift;    my ($input, $err) = @_;
1264    
1265    my $r = '';    my $r = '';
1266    
1267    if (defined $err->{line}) {    my $line;
1268      if ($err->{column} > 0) {    my $column;
1269        $r = qq[<a href="#line-$err->{line}">Line $err->{line}</a> column $err->{column}];      
1270      if (defined $err->{node}) {
1271        $line = $err->{node}->get_user_data ('manakai_source_line');
1272        if (defined $line) {
1273          $column = $err->{node}->get_user_data ('manakai_source_column');
1274        } else {
1275          if ($err->{node}->node_type == $err->{node}->ATTRIBUTE_NODE) {
1276            my $owner = $err->{node}->owner_element;
1277            $line = $owner->get_user_data ('manakai_source_line');
1278            $column = $owner->get_user_data ('manakai_source_column');
1279          } else {
1280            my $parent = $err->{node}->parent_node;
1281            if ($parent) {
1282              $line = $parent->get_user_data ('manakai_source_line');
1283              $column = $parent->get_user_data ('manakai_source_column');
1284            }
1285          }
1286        }
1287      }
1288      unless (defined $line) {
1289        if (defined $err->{token} and defined $err->{token}->{line}) {
1290          $line = $err->{token}->{line};
1291          $column = $err->{token}->{column};
1292        } elsif (defined $err->{line}) {
1293          $line = $err->{line};
1294          $column = $err->{column};
1295        }
1296      }
1297    
1298      if (defined $line) {
1299        if (defined $column and $column > 0) {
1300          $r = qq[<a href="#$input->{id_prefix}line-$line">Line $line</a> column $column];
1301      } else {      } else {
1302        $err->{line} = $err->{line} - 1 || 1;        $line = $line - 1 || 1;
1303        $r = qq[<a href="#line-$err->{line}">Line $err->{line}</a>];        $r = qq[<a href="#$input->{id_prefix}line-$line">Line $line</a>];
1304      }      }
1305    }    }
1306    
1307    if (defined $err->{node}) {    if (defined $err->{node}) {
1308      $r .= ' ' if length $r;      $r .= ' ' if length $r;
1309      $r = get_node_link ($err->{node});      $r .= get_node_link ($input, $err->{node});
1310    }    }
1311    
1312    if (defined $err->{index}) {    if (defined $err->{index}) {
1313      $r .= ' ' if length $r;      if (length $r) {
1314      $r .= 'Index ' . (0+$err->{index});        $r .= ', Index ' . (0+$err->{index});
1315        } else {
1316          $r .= "<a href='#$input->{id_prefix}index-@{[0+$err->{index}]}'>Index "
1317              . (0+$err->{index}) . '</a>';
1318        }
1319    }    }
1320    
1321    if (defined $err->{value}) {    if (defined $err->{value}) {
# Line 824  sub get_error_level_label ($) { Line 1340  sub get_error_level_label ($) {
1340    } elsif ($err->{level} eq 'w') {    } elsif ($err->{level} eq 'w') {
1341      $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:      $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
1342          ];          ];
1343    } elsif ($err->{level} eq 'unsupported') {    } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
1344      $r = qq[<strong><a href="../error-description#level-u">Not      $r = qq[<strong><a href="../error-description#level-u">Not
1345          supported</a></strong>: ];          supported</a></strong>: ];
1346      } elsif ($err->{level} eq 'i') {
1347        $r = qq[<strong><a href="../error-description#level-i">Information</a></strong>: ];
1348    } else {    } else {
1349      my $elevel = htescape ($err->{level});      my $elevel = htescape ($err->{level});
1350      $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:      $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
# Line 842  sub get_node_path ($) { Line 1360  sub get_node_path ($) {
1360    while (defined $node) {    while (defined $node) {
1361      my $rs;      my $rs;
1362      if ($node->node_type == 1) {      if ($node->node_type == 1) {
1363        $rs = $node->manakai_local_name;        $rs = $node->node_name;
1364        $node = $node->parent_node;        $node = $node->parent_node;
1365      } elsif ($node->node_type == 2) {      } elsif ($node->node_type == 2) {
1366        $rs = '@' . $node->manakai_local_name;        $rs = '@' . $node->node_name;
1367        $node = $node->owner_element;        $node = $node->owner_element;
1368      } elsif ($node->node_type == 3) {      } elsif ($node->node_type == 3) {
1369        $rs = '"' . $node->data . '"';        $rs = '"' . $node->data . '"';
# Line 863  sub get_node_path ($) { Line 1381  sub get_node_path ($) {
1381    return join '/', @r;    return join '/', @r;
1382  } # get_node_path  } # get_node_path
1383    
1384  sub get_node_link ($) {  sub get_node_link ($$) {
1385    return qq[<a href="#node-@{[refaddr $_[0]]}">] .    return qq[<a href="#$_[0]->{id_prefix}node-@{[refaddr $_[1]]}">] .
1386        htescape (get_node_path ($_[0])) . qq[</a>];        htescape (get_node_path ($_[1])) . qq[</a>];
1387  } # get_node_link  } # get_node_link
1388    
1389  {  {
# Line 873  sub get_node_link ($) { Line 1391  sub get_node_link ($) {
1391    
1392  sub load_text_catalog ($) {  sub load_text_catalog ($) {
1393    my $lang = shift; # MUST be a canonical lang name    my $lang = shift; # MUST be a canonical lang name
1394    open my $file, '<', "cc-msg.$lang.txt" or die "$0: cc-msg.$lang.txt: $!";    open my $file, '<:utf8', "cc-msg.$lang.txt"
1395          or die "$0: cc-msg.$lang.txt: $!";
1396    while (<$file>) {    while (<$file>) {
1397      if (s/^([^;]+);([^;]*);//) {      if (s/^([^;]+);([^;]*);//) {
1398        my ($type, $cls, $msg) = ($1, $2, $_);        my ($type, $cls, $msg) = ($1, $2, $_);
# Line 886  sub load_text_catalog ($) { Line 1405  sub load_text_catalog ($) {
1405  sub get_text ($) {  sub get_text ($) {
1406    my ($type, $level, $node) = @_;    my ($type, $level, $node) = @_;
1407    $type = $level . ':' . $type if defined $level;    $type = $level . ':' . $type if defined $level;
1408      $level = 'm' unless defined $level;
1409    my @arg;    my @arg;
1410    {    {
1411      if (defined $Msg->{$type}) {      if (defined $Msg->{$type}) {
# Line 910  sub get_text ($) { Line 1430  sub get_text ($) {
1430            ? htescape ($node->owner_element->manakai_local_name)            ? htescape ($node->owner_element->manakai_local_name)
1431            : ''            : ''
1432        }ge;        }ge;
1433        return ($type, $Msg->{$type}->[0], $msg);        return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
1434      } elsif ($type =~ s/:([^:]*)$//) {      } elsif ($type =~ s/:([^:]*)$//) {
1435        unshift @arg, $1;        unshift @arg, $1;
1436        redo;        redo;
1437      }      }
1438    }    }
1439    return ($type, '', htescape ($_[0]));    return ($type, 'level-'.$level, htescape ($_[0]));
1440  } # get_text  } # get_text
1441    
1442  }  }
1443    
1444    sub encode_uri_component ($) {
1445      require Encode;
1446      my $s = Encode::encode ('utf8', shift);
1447      $s =~ s/([^0-9A-Za-z_.~-])/sprintf '%%%02X', ord $1/ge;
1448      return $s;
1449    } # encode_uri_component
1450    
1451    sub get_cc_uri ($) {
1452      return './?uri=' . encode_uri_component ($_[0]);
1453    } # get_cc_uri
1454    
1455  sub get_input_document ($$) {  sub get_input_document ($$) {
1456    my ($http, $dom) = @_;    my ($http, $dom) = @_;
1457    
# Line 972  EOH Line 1503  EOH
1503      $ua->protocols_allowed ([qw/http/]);      $ua->protocols_allowed ([qw/http/]);
1504      $ua->max_size (1000_000);      $ua->max_size (1000_000);
1505      my $req = HTTP::Request->new (GET => $request_uri);      my $req = HTTP::Request->new (GET => $request_uri);
1506        $req->header ('Accept-Encoding' => 'identity, *; q=0');
1507      my $res = $ua->request ($req);      my $res = $ua->request ($req);
1508      ## TODO: 401 sets |is_success| true.      ## TODO: 401 sets |is_success| true.
1509      if ($res->is_success or $http->get_parameter ('error-page')) {      if ($res->is_success or $http->get_parameter ('error-page')) {
# Line 981  EOH Line 1513  EOH
1513    
1514        ## TODO: More strict parsing...        ## TODO: More strict parsing...
1515        my $ct = $res->header ('Content-Type');        my $ct = $res->header ('Content-Type');
       if (defined $ct and $ct =~ m#^([0-9A-Za-z._+-]+/[0-9A-Za-z._+-]+)#) {  
         $r->{media_type} = lc $1;  
       }  
1516        if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {        if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
1517          $r->{charset} = lc $1;          $r->{charset} = lc $1;
1518          $r->{charset} =~ tr/\\//d;          $r->{charset} =~ tr/\\//d;
1519            $r->{official_charset} = $r->{charset};
1520        }        }
1521    
1522        my $input_charset = $http->get_parameter ('charset');        my $input_charset = $http->get_parameter ('charset');
# Line 994  EOH Line 1524  EOH
1524          $r->{charset_overridden}          $r->{charset_overridden}
1525              = (not defined $r->{charset} or $r->{charset} ne $input_charset);              = (not defined $r->{charset} or $r->{charset} ne $input_charset);
1526          $r->{charset} = $input_charset;          $r->{charset} = $input_charset;
1527        }        }
1528    
1529          ## TODO: Support for HTTP Content-Encoding
1530    
1531        $r->{s} = ''.$res->content;        $r->{s} = ''.$res->content;
1532    
1533          require Whatpm::ContentType;
1534          ($r->{official_type}, $r->{media_type})
1535              = Whatpm::ContentType->get_sniffed_type
1536                  (get_file_head => sub {
1537                     return substr $r->{s}, 0, shift;
1538                   },
1539                   http_content_type_byte => $ct,
1540                   has_http_content_encoding =>
1541                       defined $res->header ('Content-Encoding'),
1542                   supported_image_types => {});
1543      } else {      } else {
1544        $r->{uri} = $res->request->uri;        $r->{uri} = $res->request->uri;
1545        $r->{request_uri} = $request_uri;        $r->{request_uri} = $request_uri;
# Line 1017  EOH Line 1560  EOH
1560      $r->{charset} = ''.$http->get_parameter ('_charset_');      $r->{charset} = ''.$http->get_parameter ('_charset_');
1561      $r->{charset} =~ s/\s+//g;      $r->{charset} =~ s/\s+//g;
1562      $r->{charset} = 'utf-8' if $r->{charset} eq '';      $r->{charset} = 'utf-8' if $r->{charset} eq '';
1563        $r->{official_charset} = $r->{charset};
1564      $r->{header_field} = [];      $r->{header_field} = [];
1565    
1566        require Whatpm::ContentType;
1567        ($r->{official_type}, $r->{media_type})
1568            = Whatpm::ContentType->get_sniffed_type
1569                (get_file_head => sub {
1570                   return substr $r->{s}, 0, shift;
1571                 },
1572                 http_content_type_byte => undef,
1573                 has_http_content_encoding => 0,
1574                 supported_image_types => {});
1575    }    }
1576    
1577    my $input_format = $http->get_parameter ('i');    my $input_format = $http->get_parameter ('i');
# Line 1034  EOH Line 1588  EOH
1588    if ($r->{media_type} eq 'text/xml') {    if ($r->{media_type} eq 'text/xml') {
1589      unless (defined $r->{charset}) {      unless (defined $r->{charset}) {
1590        $r->{charset} = 'us-ascii';        $r->{charset} = 'us-ascii';
1591          $r->{official_charset} = $r->{charset};
1592      } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {      } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1593        $r->{charset_overridden} = 0;        $r->{charset_overridden} = 0;
1594      }      }
# Line 1045  EOH Line 1600  EOH
1600      return $r;      return $r;
1601    }    }
1602    
1603      $r->{inner_html_element} = $http->get_parameter ('e');
1604    
1605    return $r;    return $r;
1606  } # get_input_document  } # get_input_document
1607    
# Line 1077  Wakaba <w@suika.fam.cx>. Line 1634  Wakaba <w@suika.fam.cx>.
1634    
1635  =head1 LICENSE  =head1 LICENSE
1636    
1637  Copyright 2007 Wakaba <w@suika.fam.cx>  Copyright 2007-2008 Wakaba <w@suika.fam.cx>
1638    
1639  This library is free software; you can redistribute it  This library is free software; you can redistribute it
1640  and/or modify it under the same terms as Perl itself.  and/or modify it under the same terms as Perl itself.

Legend:
Removed from v.1.23  
changed lines
  Added in v.1.51

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24