--- test/html-webhacc/cc.cgi 2007/07/17 14:28:20 1.14 +++ test/html-webhacc/cc.cgi 2008/02/10 02:42:01 1.33 @@ -1,13 +1,12 @@ #!/usr/bin/perl use strict; +use utf8; use lib qw[/home/httpd/html/www/markup/html/whatpm - /home/wakaba/work/manakai/lib - /home/wakaba/public_html/-temp/wiki/lib]; + /home/wakaba/work/manakai2/lib]; use CGI::Carp qw[fatalsToBrowser]; use Scalar::Util qw[refaddr]; - -use SuikaWiki::Input::HTTP; ## TODO: Use some better CGI module +use Time::HiRes qw/time/; sub htescape ($) { my $s = $_[0]; @@ -21,11 +20,10 @@ return $s; } # htescape -my $http = SuikaWiki::Input::HTTP->new; - -## TODO: _charset_ + use Message::CGI::HTTP; + my $http = Message::CGI::HTTP->new; - if ($http->meta_variable ('PATH_INFO') ne '/') { + if ($http->get_meta_variable ('PATH_INFO') ne '/') { print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400"; exit; } @@ -54,7 +52,8 @@ $| = 0; my $input = get_input_document ($http, $dom); - my $inner_html_element = $http->parameter ('e'); + my $char_length = 0; + my %time; print qq[
@@ -62,44 +61,190 @@
Request URI
<@{[htescape $input->{request_uri}]}>
Document URI
-
<@{[htescape $input->{uri}]}>
+
<@{[htescape $input->{uri}]}> +
]; # no yet push @nav, ['#document-info' => 'Information']; if (defined $input->{s}) { + $char_length = length $input->{s}; print STDOUT qq[
Base URI
<@{[htescape $input->{base_uri}]}>
Internet Media Type
@{[htescape $input->{media_type}]} - @{[$input->{media_type_overridden} ? '(overridden)' : '']}
+ @{[$input->{media_type_overridden} ? '(overridden)' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '(sniffed; official type is: '.htescape ($input->{official_type}).')' : '(sniffed)']}
Character Encoding
@{[defined $input->{charset} ? ''.htescape ($input->{charset}).'' : '(none)']} @{[$input->{charset_overridden} ? '(overridden)' : '']}
+
Length
+
$char_length byte@{[$char_length == 1 ? '' : 's']}
]; - print_http_header_section ($input); + my $result = {conforming_min => 1, conforming_max => 1}; + check_and_print ($input => $result); + print_result_section ($result); +} else { + print STDOUT qq[]; + print_result_input_error_section ($input); +} + + print STDOUT qq[ + + + +]; + + for (qw/decode parse parse_html parse_xml parse_manifest + check check_manifest/) { + next unless defined $time{$_}; + open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!"; + print $file $char_length, "\t", $time{$_}, "\n"; + } + +exit; + +sub add_error ($$$) { + my ($layer, $err, $result) = @_; + if (defined $err->{level}) { + if ($err->{level} eq 's') { + $result->{$layer}->{should}++; + $result->{$layer}->{score_min} -= 2; + $result->{conforming_min} = 0; + } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') { + $result->{$layer}->{warning}++; + } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') { + $result->{$layer}->{unsupported}++; + $result->{unsupported} = 1; + } else { + $result->{$layer}->{must}++; + $result->{$layer}->{score_max} -= 2; + $result->{$layer}->{score_min} -= 2; + $result->{conforming_min} = 0; + $result->{conforming_max} = 0; + } + } else { + $result->{$layer}->{must}++; + $result->{$layer}->{score_max} -= 2; + $result->{$layer}->{score_min} -= 2; + $result->{conforming_min} = 0; + $result->{conforming_max} = 0; + } +} # add_error + +sub check_and_print ($$) { + my ($input, $result) = @_; + $input->{id_prefix} = ''; + #$input->{nested} = 1/0; + + print_http_header_section ($input, $result); my $doc; my $el; + my $manifest; if ($input->{media_type} eq 'text/html') { - require Encode; - require Whatpm::HTML; + ($doc, $el) = print_syntax_error_html_section ($input, $result); + print_source_string_section + (\($input->{s}), $input->{charset} || $doc->input_encoding); + } elsif ({ + 'text/xml' => 1, + 'application/atom+xml' => 1, + 'application/rss+xml' => 1, + 'application/svg+xml' => 1, + 'application/xhtml+xml' => 1, + 'application/xml' => 1, + }->{$input->{media_type}}) { + ($doc, $el) = print_syntax_error_xml_section ($input, $result); + print_source_string_section (\($input->{s}), $doc->input_encoding); + } elsif ($input->{media_type} eq 'text/cache-manifest') { +## TODO: MUST be text/cache-manifest + $manifest = print_syntax_error_manifest_section ($input, $result); + print_source_string_section (\($input->{s}), 'utf-8'); + } else { + ## TODO: Change HTTP status code?? + print_result_unknown_type_section ($input, $result); + } - $input->{charset} ||= 'ISO-8859-1'; ## TODO: for now. - - my $t = Encode::decode ($input->{charset}, $input->{s}); + if (defined $doc or defined $el) { + print_structure_dump_dom_section ($input, $doc, $el); + my $elements = print_structure_error_dom_section + ($input, $doc, $el, $result); + print_table_section ($input, $elements->{table}) if @{$elements->{table}}; + print_listing_section ({ + id => 'identifiers', label => 'IDs', heading => 'Identifiers', + }, $input, $elements->{id}) if keys %{$elements->{id}}; + print_listing_section ({ + id => 'terms', label => 'Terms', heading => 'Terms', + }, $input, $elements->{term}) if keys %{$elements->{term}}; + print_listing_section ({ + id => 'classes', label => 'Classes', heading => 'Classes', + }, $input, $elements->{class}) if keys %{$elements->{class}}; + } elsif (defined $manifest) { + print_structure_dump_manifest_section ($input, $manifest); + print_structure_error_manifest_section ($input, $manifest, $result); + } +} # check_and_print - print STDOUT qq[ -
+sub print_http_header_section ($$) { + my ($input, $result) = @_; + return unless defined $input->{header_status_code} or + defined $input->{header_status_text} or + @{$input->{header_field}}; + + push @nav, ['#source-header' => 'HTTP Header'] unless $input->{nested}; + print STDOUT qq[
+

HTTP Header

+ +

Note: Due to the limitation of the +network library in use, the content of this section might +not be the real header.

+ + +]; + + if (defined $input->{header_status_code}) { + print STDOUT qq[]; + print STDOUT qq[]; + } + if (defined $input->{header_status_text}) { + print STDOUT qq[]; + print STDOUT qq[]; + } + + for (@{$input->{header_field}}) { + print STDOUT qq[]; + print STDOUT qq[]; + } + + print STDOUT qq[
Status code@{[htescape ($input->{header_status_code})]}
Status text@{[htescape ($input->{header_status_text})]}
@{[htescape ($_->[0])]}@{[htescape ($_->[1])]}
]; +} # print_http_header_section + +sub print_syntax_error_html_section ($$) { + my ($input, $result) = @_; + + require Encode; + require Whatpm::HTML; + + print STDOUT qq[ +

Parse Errors

]; - push @nav, ['#parse-errors' => 'Parse Error']; + push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested}; my $onerror = sub { my (%opt) = @_; @@ -113,36 +258,51 @@ $type =~ tr/ /-/; $type =~ s/\|/%7C/g; $msg .= qq[ [Description]]; - print STDOUT qq[
$msg
\n]; + print STDOUT qq[
], get_error_level_label (\%opt); + print STDOUT qq[$msg
\n]; + + add_error ('syntax', \%opt => $result); }; - $doc = $dom->create_document; + my $doc = $dom->create_document; + my $el; + my $inner_html_element = $http->get_parameter ('e'); if (defined $inner_html_element and length $inner_html_element) { + $input->{charset} ||= 'windows-1252'; ## TODO: for now. + my $time1 = time; + my $t = Encode::decode ($input->{charset}, $input->{s}); + $time{decode} = time - $time1; + $el = $doc->create_element_ns ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]); + $time1 = time; Whatpm::HTML->set_inner_html ($el, $t, $onerror); + $time{parse} = time - $time1; } else { - Whatpm::HTML->parse_string ($t => $doc, $onerror); + my $time1 = time; + Whatpm::HTML->parse_byte_string + ($input->{charset}, $input->{s} => $doc, $onerror); + $time{parse_html} = time - $time1; } + $doc->manakai_charset ($input->{official_charset}) + if defined $input->{official_charset}; + + print STDOUT qq[
]; - print STDOUT qq[ -
-]; - - print_source_string_section (\($input->{s}), $input->{charset}); - } elsif ({ - 'text/xml' => 1, - 'application/xhtml+xml' => 1, - 'application/xml' => 1, - }->{$input->{media_type}}) { - require Message::DOM::XMLParserTemp; + return ($doc, $el); +} # print_syntax_error_html_section - print STDOUT qq[ -
+sub print_syntax_error_xml_section ($$) { + my ($input, $result) = @_; + + require Message::DOM::XMLParserTemp; + + print STDOUT qq[ +

Parse Errors

]; - push @nav, ['#parse-errors' => 'Parse Error']; + push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{prefix}; my $onerror = sub { my $err = shift; @@ -150,239 +310,65 @@ print STDOUT qq[
Line $line column ]; print STDOUT $err->location->column_number, "
"; print STDOUT htescape $err->text, "
\n"; + + add_error ('syntax', {type => $err->text, + level => [ + $err->SEVERITY_FATAL_ERROR => 'm', + $err->SEVERITY_ERROR => 'm', + $err->SEVERITY_WARNING => 's', + ]->[$err->severity]} => $result); + return 1; }; + my $time1 = time; open my $fh, '<', \($input->{s}); - $doc = Message::DOM::XMLParserTemp->parse_byte_stream + my $doc = Message::DOM::XMLParserTemp->parse_byte_stream ($fh => $dom, $onerror, charset => $input->{charset}); + $time{parse_xml} = time - $time1; + $doc->manakai_charset ($input->{official_charset}) + if defined $input->{official_charset}; - print STDOUT qq[
-
- -]; - print_source_string_section (\($input->{s}), $doc->input_encoding); - } else { - ## TODO: Change HTTP status code?? - print STDOUT qq[ -
-

Media type @{[htescape $input->{media_type}]} is not supported!

-
-]; - push @nav, ['#result-summary' => 'Result']; - } - - - if (defined $doc or defined $el) { - print STDOUT qq[ -
-

Document Tree

-]; - push @nav, ['#document-tree' => 'Tree']; - - print_document_tree ($el || $doc); - - print STDOUT qq[ -
- -
-

Document Errors

- -
]; - push @nav, ['#document-errors' => 'Document Error']; - - require Whatpm::ContentChecker; - my $onerror = sub { - my %opt = @_; - my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}); - $type =~ tr/ /-/; - $type =~ s/\|/%7C/g; - $msg .= qq[ [Description]]; - print STDOUT qq[
] . get_node_link ($opt{node}) . - qq[
\n
], $msg, "
\n"; - }; - - my $elements; - if ($el) { - $elements = Whatpm::ContentChecker->check_element ($el, $onerror); - } else { - $elements = Whatpm::ContentChecker->check_document ($doc, $onerror); - } - - print STDOUT qq[
-
-]; - - if (@{$elements->{table}}) { - require JSON; - - push @nav, ['#tables' => 'Tables']; - print STDOUT qq[ -
-

Tables

- - - - -]; - - my $i = 0; - for my $table_el (@{$elements->{table}}) { - $i++; - print STDOUT qq[

] . - get_node_link ($table_el) . q[

]; - - ## TODO: Make |ContentChecker| return |form_table| result - ## so that this script don't have to run the algorithm twice. - my $table = Whatpm::HTMLTable->form_table ($table_el); - - for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) { - next unless $_; - delete $_->{element}; - } - - for (@{$table->{row_group}}) { - next unless $_; - next unless $_->{element}; - $_->{type} = $_->{element}->manakai_local_name; - delete $_->{element}; - } - - for (@{$table->{cell}}) { - next unless $_; - for (@{$_}) { - next unless $_; - for (@$_) { - $_->{id} = refaddr $_->{element} if defined $_->{element}; - delete $_->{element}; - $_->{is_header} = $_->{is_header} ? 1 : 0; - } - } - } - - print STDOUT '
]; - } - - print STDOUT qq[
]; - } - - if (keys %{$elements->{id}}) { - push @nav, ['#identifiers' => 'IDs']; - print STDOUT qq[ -
-

Identifiers

+ print STDOUT qq[
]; -
-]; - for my $id (sort {$a cmp $b} keys %{$elements->{id}}) { - print STDOUT qq[
@{[htescape $id]}
]; - for (@{$elements->{id}->{$id}}) { - print STDOUT qq[
].get_node_link ($_).qq[
]; - } - } - print STDOUT qq[
]; - } + return ($doc, undef); +} # print_syntax_error_xml_section - if (keys %{$elements->{term}}) { - push @nav, ['#terms' => 'Terms']; - print STDOUT qq[ -
-

Terms

+sub print_syntax_error_manifest_section ($$) { + my ($input, $result) = @_; -
-]; - for my $term (sort {$a cmp $b} keys %{$elements->{term}}) { - print STDOUT qq[
@{[htescape $term]}
]; - for (@{$elements->{term}->{$term}}) { - print STDOUT qq[
].get_node_link ($_).qq[
]; - } - } - print STDOUT qq[
]; - } - - if (keys %{$elements->{class}}) { - push @nav, ['#classes' => 'Classes']; - print STDOUT qq[ -
-

Classes

- -
-]; - for my $class (sort {$a cmp $b} keys %{$elements->{class}}) { - print STDOUT qq[
@{[htescape $class]}
]; - for (@{$elements->{class}->{$class}}) { - print STDOUT qq[
].get_node_link ($_).qq[
]; - } - } - print STDOUT qq[
]; - } - } - - ## TODO: Show result -} else { - print STDOUT qq[ - - - -
-

Input Error: @{[htescape ($input->{error_status_text})]}

-
-]; - push @nav, ['#result-summary' => 'Result']; - -} + require Whatpm::CacheManifest; print STDOUT qq[ - - - -]; +
+

Parse Errors

-exit; +
]; + push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested}; -sub print_http_header_section ($) { - my $input = shift; - return unless defined $input->{header_status_code} or - defined $input->{header_status_text} or - @{$input->{header_field}}; - - push @nav, ['#source-header' => 'HTTP Header']; - print STDOUT qq[
-

HTTP Header

+ my $onerror = sub { + my (%opt) = @_; + my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}); + print STDOUT qq[
], get_error_label ($input, \%opt), + qq[
]; + $type =~ tr/ /-/; + $type =~ s/\|/%7C/g; + $msg .= qq[ [Description]]; + print STDOUT qq[
], get_error_level_label (\%opt); + print STDOUT qq[$msg
\n]; -

Note: Due to the limitation of the -network library in use, the content of this section might -not be the real header.

+ add_error ('syntax', \%opt => $result); + }; - -]; + my $time1 = time; + my $manifest = Whatpm::CacheManifest->parse_byte_string + ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror); + $time{parse_manifest} = time - $time1; - if (defined $input->{header_status_code}) { - print STDOUT qq[]; - print STDOUT qq[]; - } - if (defined $input->{header_status_text}) { - print STDOUT qq[]; - print STDOUT qq[]; - } - - for (@{$input->{header_field}}) { - print STDOUT qq[]; - print STDOUT qq[]; - } + print STDOUT qq[]; - print STDOUT qq[
Status code@{[htescape ($input->{header_status_code})]}
Status text@{[htescape ($input->{header_status_text})]}
@{[htescape ($_->[0])]}@{[htescape ($_->[1])]}
]; -} # print_http_header_section + return $manifest; +} # print_syntax_error_manifest_section sub print_source_string_section ($$) { require Encode; @@ -391,20 +377,22 @@ my $s = \($enc->decode (${$_[0]})); my $i = 1; - push @nav, ['#source-string' => 'Source']; - print STDOUT qq[
+ push @nav, ['#source-string' => 'Source'] unless $input->{nested}; + print STDOUT qq[

Document Source

    \n]; if (length $$s) { while ($$s =~ /\G([^\x0A]*?)\x0D?\x0A/gc) { - print STDOUT qq[
  1. ], htescape $1, "
  2. \n"; + print STDOUT qq[
  3. ], htescape $1, + "
  4. \n"; $i++; } if ($$s =~ /\G([^\x0A]+)/gc) { - print STDOUT qq[
  5. ], htescape $1, "
  6. \n"; + print STDOUT qq[
  7. ], htescape $1, + "
  8. \n"; } } else { - print STDOUT q[
  9. ]; + print STDOUT q[
  10. ]; } print STDOUT "
"; } # print_input_string_section @@ -421,7 +409,7 @@ next; } - my $node_id = 'node-'.refaddr $child; + my $node_id = $input->{id_prefix} . 'node-'.refaddr $child; my $nt = $child->node_type; if ($nt == $child->ELEMENT_NODE) { my $child_nsuri = $child->namespace_uri; @@ -432,7 +420,7 @@ $r .= '
    '; for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] } @{$child->attributes}) { - $r .= qq[
  • ] . htescape ($attr->[0]) . ' = '; ## ISSUE: case? + $r .= qq[
  • ] . htescape ($attr->[0]) . ' = '; ## ISSUE: case? $r .= '' . htescape ($attr->[1]) . '
  • '; ## TODO: children } $r .= '
'; @@ -453,6 +441,21 @@ } elsif ($nt == $child->DOCUMENT_NODE) { $r .= qq'
  • Document'; $r .= qq[
      ]; + my $cp = $child->manakai_charset; + if (defined $cp) { + $r .= qq[
    • charset parameter = ]; + $r .= htescape ($cp) . qq[
    • ]; + } + $r .= qq[
    • inputEncoding = ]; + my $ie = $child->input_encoding; + if (defined $ie) { + $r .= qq[@{[htescape ($ie)]}]; + if ($child->manakai_has_bom) { + $r .= qq[ (with BOM)]; + } + } else { + $r .= qq[(null)]; + } $r .= qq[
    • @{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}
    • ]; $r .= qq[
    • @{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}
    • ]; unless ($child->manakai_is_html) { @@ -486,6 +489,374 @@ print STDOUT $r; } # print_document_tree +sub print_structure_dump_dom_section ($$$) { + my ($input, $doc, $el) = @_; + + print STDOUT qq[ +
      +

      Document Tree

      +]; + push @nav, ['#document-tree' => 'Tree'] unless $input->{nested}; + + print_document_tree ($el || $doc); + + print STDOUT qq[
      ]; +} # print_structure_dump_dom_section + +sub print_structure_dump_manifest_section ($$) { + my ($input, $manifest) = @_; + + print STDOUT qq[ +
      +

      Cache Manifest

      +]; + push @nav, ['#dump-manifest' => 'Caceh Manifest'] unless $input->{nested}; + + print STDOUT qq[
      Explicit entries
      ]; + for my $uri (@{$manifest->[0]}) { + my $euri = htescape ($uri); + print STDOUT qq[
      <$euri>
      ]; + } + + print STDOUT qq[
      Fallback entries
      + + ]; + for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) { + my $euri = htescape ($uri); + my $euri2 = htescape ($manifest->[1]->{$uri}); + print STDOUT qq[ + ]; + } + + print STDOUT qq[
      Oppotunistic Caching NamespaceFallback Entry
      <$euri><$euri2>
      Online whitelist
      ]; + for my $uri (@{$manifest->[2]}) { + my $euri = htescape ($uri); + print STDOUT qq[
      <$euri>
      ]; + } + + print STDOUT qq[
      ]; +} # print_structure_dump_manifest_section + +sub print_structure_error_dom_section ($$$$) { + my ($input, $doc, $el, $result) = @_; + + print STDOUT qq[
      +

      Document Errors

      + +
      ]; + push @nav, ['#document-errors' => 'Document Error'] unless $input->{nested}; + + require Whatpm::ContentChecker; + my $onerror = sub { + my %opt = @_; + my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node}); + $type =~ tr/ /-/; + $type =~ s/\|/%7C/g; + $msg .= qq[ [Description]]; + print STDOUT qq[
      ] . get_error_label ($input, \%opt) . + qq[
      \n
      ], get_error_level_label (\%opt); + print STDOUT $msg, "
      \n"; + add_error ('structure', \%opt => $result); + }; + + my $elements; + my $time1 = time; + if ($el) { + $elements = Whatpm::ContentChecker->check_element ($el, $onerror); + } else { + $elements = Whatpm::ContentChecker->check_document ($doc, $onerror); + } + $time{check} = time - $time1; + + print STDOUT qq[
      ]; + + return $elements; +} # print_structure_error_dom_section + +sub print_structure_error_manifest_section ($$$) { + my ($input, $manifest, $result) = @_; + + print STDOUT qq[
      +

      Document Errors

      + +
      ]; + push @nav, ['#document-errors' => 'Document Error'] unless $input->{nested}; + + require Whatpm::CacheManifest; + Whatpm::CacheManifest->check_manifest ($manifest, sub { + my %opt = @_; + my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node}); + $type =~ tr/ /-/; + $type =~ s/\|/%7C/g; + $msg .= qq[ [Description]]; + print STDOUT qq[
      ] . get_error_label ($input, \%opt) . + qq[
      \n
      ], $msg, "
      \n"; + add_error ('structure', \%opt => $result); + }); + + print STDOUT qq[
      ]; +} # print_structure_error_manifest_section + +sub print_table_section ($$) { + my ($input, $tables) = @_; + + push @nav, ['#tables' => 'Tables'] unless $input->{nested}; + print STDOUT qq[ +
      +

      Tables

      + + + + +]; + + require JSON; + + my $i = 0; + for my $table_el (@$tables) { + $i++; + print STDOUT qq[

      ] . + get_node_link ($input, $table_el) . q[

      ]; + + ## TODO: Make |ContentChecker| return |form_table| result + ## so that this script don't have to run the algorithm twice. + my $table = Whatpm::HTMLTable->form_table ($table_el); + + for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) { + next unless $_; + delete $_->{element}; + } + + for (@{$table->{row_group}}) { + next unless $_; + next unless $_->{element}; + $_->{type} = $_->{element}->manakai_local_name; + delete $_->{element}; + } + + for (@{$table->{cell}}) { + next unless $_; + for (@{$_}) { + next unless $_; + for (@$_) { + $_->{id} = refaddr $_->{element} if defined $_->{element}; + delete $_->{element}; + $_->{is_header} = $_->{is_header} ? 1 : 0; + } + } + } + + print STDOUT '
      ]; + } + + print STDOUT qq[
      ]; +} # print_table_section + +sub print_listing_section ($$$) { + my ($opt, $input, $ids) = @_; + + push @nav, ['#' . $opt->{id} => $opt->{label}] unless $input->{nested}; + print STDOUT qq[ +
      +

      $opt->{heading}

      + +
      +]; + for my $id (sort {$a cmp $b} keys %$ids) { + print STDOUT qq[
      @{[htescape $id]}
      ]; + for (@{$ids->{$id}}) { + print STDOUT qq[
      ].get_node_link ($input, $_).qq[
      ]; + } + } + print STDOUT qq[
      ]; +} # print_listing_section + +sub print_result_section ($) { + my $result = shift; + + print STDOUT qq[ +
      +

      Result

      ]; + + if ($result->{unsupported} and $result->{conforming_max}) { + print STDOUT qq[

      The conformance + checker cannot decide whether the document is conforming or + not, since the document contains one or more unsupported + features. The document might or might not be conforming.

      ]; + } elsif ($result->{conforming_min}) { + print STDOUT qq[

      No conformance-error is + found in this document.

      ]; + } elsif ($result->{conforming_max}) { + print STDOUT qq[

      This document + is likely non-conforming, but in rare case + it might be conforming.

      ]; + } else { + print STDOUT qq[

      This document is + non-conforming.

      ]; + } + + print STDOUT qq[ ++ + + + + +]; + + my $must_error = 0; + my $should_error = 0; + my $warning = 0; + my $score_min = 0; + my $score_max = 0; + my $score_base = 20; + my $score_unit = $score_base / 100; + for ( + [Transfer => 'transfer', ''], + [Character => 'char', ''], + [Syntax => 'syntax', '#parse-errors'], + [Structure => 'structure', '#document-errors'], + ) { + $must_error += ($result->{$_->[1]}->{must} += 0); + $should_error += ($result->{$_->[1]}->{should} += 0); + $warning += ($result->{$_->[1]}->{warning} += 0); + $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base); + $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base); + + my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : ''; + my $label = $_->[0]; + if ($result->{$_->[1]}->{must} or + $result->{$_->[1]}->{should} or + $result->{$_->[1]}->{warning} or + $result->{$_->[1]}->{unsupported}) { + $label = qq[$label]; + } + + print STDOUT qq[]; + if ($uncertain) { + print qq[]; + } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) { + print qq[]; + } else { + print qq[]; + } + } + + $score_max += $score_base; + + print STDOUT qq[ + + + + + + + +
      MUST‐level +ErrorsSHOULD‐level +ErrorsWarningsScore
      $label$result->{$_->[1]}->{must}$uncertain$result->{$_->[1]}->{should}$uncertain$result->{$_->[1]}->{warning}$uncertain−∞..$result->{$_->[1]}->{score_max}$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}
      $result->{$_->[1]}->{score_min}
      Semantics0?0?0?−∞..$score_base
      Total$must_error?$should_error?$warning?−∞..$score_max
      + +

      Important: This conformance checking service +is under development. The result above might be wrong.

      +
      ]; + push @nav, ['#result-summary' => 'Result']; +} # print_result_section + +sub print_result_unknown_type_section ($$) { + my ($input, $result) = @_; + + my $euri = htescape ($input->{uri}); + print STDOUT qq[ +
      +

      Errors

      + +
      +
      <$euri>
      +
      Not + supported: + Media type + @{[htescape $input->{media_type}]} + is not supported.
      +
      +
      +]; + push @nav, ['#parse-errors' => 'Errors']; + add_error (char => {level => 'u'} => $result); + add_error (syntax => {level => 'u'} => $result); + add_error (structure => {level => 'u'} => $result); +} # print_result_unknown_type_section + +sub print_result_input_error_section ($) { + my $input = shift; + print STDOUT qq[
      +

      Input Error: @{[htescape ($input->{error_status_text})]}

      +
      ]; + push @nav, ['#result-summary' => 'Result']; +} # print_result_input_error_section + +sub get_error_label ($$) { + my ($input, $err) = @_; + + my $r = ''; + + if (defined $err->{line}) { + if ($err->{column} > 0) { + $r = qq[Line $err->{line} column $err->{column}]; + } else { + $err->{line} = $err->{line} - 1 || 1; + $r = qq[Line $err->{line}]; + } + } + + if (defined $err->{node}) { + $r .= ' ' if length $r; + $r = get_node_link ($input, $err->{node}); + } + + if (defined $err->{index}) { + $r .= ' ' if length $r; + $r .= 'Index ' . (0+$err->{index}); + } + + if (defined $err->{value}) { + $r .= ' ' if length $r; + $r .= '' . htescape ($err->{value}) . ''; + } + + return $r; +} # get_error_label + +sub get_error_level_label ($) { + my $err = shift; + + my $r = ''; + + if (not defined $err->{level} or $err->{level} eq 'm') { + $r = qq[MUST‐level + error: ]; + } elsif ($err->{level} eq 's') { + $r = qq[SHOULD‐level + error: ]; + } elsif ($err->{level} eq 'w') { + $r = qq[Warning: + ]; + } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') { + $r = qq[Not + supported: ]; + } else { + my $elevel = htescape ($err->{level}); + $r = qq[$elevel: + ]; + } + + return $r; +} # get_error_level_label + sub get_node_path ($) { my $node = shift; my @r; @@ -513,9 +884,9 @@ return join '/', @r; } # get_node_path -sub get_node_link ($) { - return qq[] . - htescape (get_node_path ($_[0])) . qq[]; +sub get_node_link ($$) { + return qq[] . + htescape (get_node_path ($_[1])) . qq[]; } # get_node_link { @@ -523,7 +894,8 @@ sub load_text_catalog ($) { my $lang = shift; # MUST be a canonical lang name - open my $file, '<', "cc-msg.$lang.txt" or die "$0: cc-msg.$lang.txt: $!"; + open my $file, '<:utf8', "cc-msg.$lang.txt" + or die "$0: cc-msg.$lang.txt: $!"; while (<$file>) { if (s/^([^;]+);([^;]*);//) { my ($type, $cls, $msg) = ($1, $2, $_); @@ -534,8 +906,9 @@ } # load_text_catalog sub get_text ($) { - my ($type, $level) = @_; + my ($type, $level, $node) = @_; $type = $level . ':' . $type if defined $level; + $level = 'm' unless defined $level; my @arg; { if (defined $Msg->{$type}) { @@ -543,13 +916,30 @@ $msg =~ s{\$([0-9]+)}{ defined $arg[$1] ? htescape ($arg[$1]) : '(undef)'; }ge; - return ($type, $Msg->{$type}->[0], $msg); + $msg =~ s{{\@([A-Za-z0-9:_.-]+)}}{ + UNIVERSAL::can ($node, 'get_attribute_ns') + ? htescape ($node->get_attribute_ns (undef, $1)) : '' + }ge; + $msg =~ s{{\@}}{ + UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : '' + }ge; + $msg =~ s{{local-name}}{ + UNIVERSAL::can ($node, 'manakai_local_name') + ? htescape ($node->manakai_local_name) : '' + }ge; + $msg =~ s{{element-local-name}}{ + (UNIVERSAL::can ($node, 'owner_element') and + $node->owner_element) + ? htescape ($node->owner_element->manakai_local_name) + : '' + }ge; + return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg); } elsif ($type =~ s/:([^:]*)$//) { unshift @arg, $1; redo; } } - return ($type, '', htescape ($_[0])); + return ($type, 'level-'.$level, htescape ($_[0])); } # get_text } @@ -557,7 +947,7 @@ sub get_input_document ($$) { my ($http, $dom) = @_; - my $request_uri = $http->parameter ('uri'); + my $request_uri = $http->get_parameter ('uri'); my $r = {}; if (defined $request_uri and length $request_uri) { my $uri = $dom->create_uri_reference ($request_uri); @@ -605,30 +995,43 @@ $ua->protocols_allowed ([qw/http/]); $ua->max_size (1000_000); my $req = HTTP::Request->new (GET => $request_uri); + $req->header ('Accept-Encoding' => 'identity, *; q=0'); my $res = $ua->request ($req); - if ($res->is_success or $http->parameter ('error-page')) { + ## TODO: 401 sets |is_success| true. + if ($res->is_success or $http->get_parameter ('error-page')) { $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and . ## TODO: Use our own code! $r->{uri} = $res->request->uri; $r->{request_uri} = $request_uri; ## TODO: More strict parsing... my $ct = $res->header ('Content-Type'); - if (defined $ct and $ct =~ m#^([0-9A-Za-z._+-]+/[0-9A-Za-z._+-]+)#) { - $r->{media_type} = lc $1; - } - if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?(\S+)"?/i) { + if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) { $r->{charset} = lc $1; $r->{charset} =~ tr/\\//d; + $r->{official_charset} = $r->{charset}; } - my $input_charset = $http->parameter ('charset'); + my $input_charset = $http->get_parameter ('charset'); if (defined $input_charset and length $input_charset) { $r->{charset_overridden} = (not defined $r->{charset} or $r->{charset} ne $input_charset); $r->{charset} = $input_charset; - } + } + + ## TODO: Support for HTTP Content-Encoding $r->{s} = ''.$res->content; + + require Whatpm::ContentType; + ($r->{official_type}, $r->{media_type}) + = Whatpm::ContentType->get_sniffed_type + (get_file_head => sub { + return substr $r->{s}, 0, shift; + }, + http_content_type_byte => $ct, + has_http_content_encoding => + defined $res->header ('Content-Encoding'), + supported_image_types => {}); } else { $r->{uri} = $res->request->uri; $r->{request_uri} = $request_uri; @@ -642,17 +1045,28 @@ $r->{header_status_code} = $res->code; $r->{header_status_text} = $res->message; } else { - $r->{s} = ''.$http->parameter ('s'); + $r->{s} = ''.$http->get_parameter ('s'); $r->{uri} = q; $r->{request_uri} = q; $r->{base_uri} = q; - $r->{charset} = ''.$http->parameter ('_charset_'); + $r->{charset} = ''.$http->get_parameter ('_charset_'); $r->{charset} =~ s/\s+//g; $r->{charset} = 'utf-8' if $r->{charset} eq ''; + $r->{official_charset} = $r->{charset}; $r->{header_field} = []; + + require Whatpm::ContentType; + ($r->{official_type}, $r->{media_type}) + = Whatpm::ContentType->get_sniffed_type + (get_file_head => sub { + return substr $r->{s}, 0, shift; + }, + http_content_type_byte => undef, + has_http_content_encoding => 0, + supported_image_types => {}); } - my $input_format = $http->parameter ('i'); + my $input_format = $http->get_parameter ('i'); if (defined $input_format and length $input_format) { $r->{media_type_overridden} = (not defined $r->{media_type} or $input_format ne $r->{media_type}); @@ -666,6 +1080,7 @@ if ($r->{media_type} eq 'text/xml') { unless (defined $r->{charset}) { $r->{charset} = 'us-ascii'; + $r->{official_charset} = $r->{charset}; } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') { $r->{charset_overridden} = 0; } @@ -716,4 +1131,4 @@ =cut -## $Date: 2007/07/17 14:28:20 $ +## $Date: 2008/02/10 02:42:01 $