| 173 | 'text/xml' => 1, | 'text/xml' => 1, | 
| 174 | 'application/atom+xml' => 1, | 'application/atom+xml' => 1, | 
| 175 | 'application/rss+xml' => 1, | 'application/rss+xml' => 1, | 
| 176 | 'application/svg+xml' => 1, | 'image/svg+xml' => 1, | 
| 177 | 'application/xhtml+xml' => 1, | 'application/xhtml+xml' => 1, | 
| 178 | 'application/xml' => 1, | 'application/xml' => 1, | 
| 179 |  | ## TODO: Should we make all XML MIME Types fall | 
| 180 |  | ## into this category? | 
| 181 |  |  | 
| 182 |  | 'application/rdf+xml' => 1, ## NOTE: This type has different model. | 
| 183 | }->{$input->{media_type}}) { | }->{$input->{media_type}}) { | 
| 184 | ($doc, $el) = print_syntax_error_xml_section ($input, $result); | ($doc, $el) = print_syntax_error_xml_section ($input, $result); | 
| 185 | print_source_string_section ($input, | print_source_string_section ($input, | 
| 218 | print_listing_section ({ | print_listing_section ({ | 
| 219 | id => 'classes', label => 'Classes', heading => 'Classes', | id => 'classes', label => 'Classes', heading => 'Classes', | 
| 220 | }, $input, $elements->{class}) if keys %{$elements->{class}}; | }, $input, $elements->{class}) if keys %{$elements->{class}}; | 
| 221 |  | print_uri_section ($input, $elements->{uri}) if keys %{$elements->{uri}}; | 
| 222 |  | print_rdf_section ($input, $elements->{rdf}) if @{$elements->{rdf}}; | 
| 223 | } elsif (defined $cssom) { | } elsif (defined $cssom) { | 
| 224 | print_structure_dump_cssom_section ($input, $cssom); | print_structure_dump_cssom_section ($input, $cssom); | 
| 225 | ## TODO: CSSOM validation | ## TODO: CSSOM validation | 
| 323 | if (defined $inner_html_element and length $inner_html_element) { | if (defined $inner_html_element and length $inner_html_element) { | 
| 324 | $input->{charset} ||= 'windows-1252'; ## TODO: for now. | $input->{charset} ||= 'windows-1252'; ## TODO: for now. | 
| 325 | my $time1 = time; | my $time1 = time; | 
| 326 | my $t = Encode::decode ($input->{charset}, $input->{s}); | my $t = \($input->{s}); | 
| 327 |  | unless ($input->{is_char_string}) { | 
| 328 |  | $t = \(Encode::decode ($input->{charset}, $$t)); | 
| 329 |  | } | 
| 330 | $time{decode} = time - $time1; | $time{decode} = time - $time1; | 
| 331 |  |  | 
| 332 | $el = $doc->create_element_ns | $el = $doc->create_element_ns | 
| 333 | ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]); | ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]); | 
| 334 | $time1 = time; | $time1 = time; | 
| 335 | Whatpm::HTML->set_inner_html ($el, $t, $onerror); | Whatpm::HTML->set_inner_html ($el, $$t, $onerror); | 
| 336 | $time{parse} = time - $time1; | $time{parse} = time - $time1; | 
| 337 | } else { | } else { | 
| 338 | my $time1 = time; | my $time1 = time; | 
| 339 | Whatpm::HTML->parse_byte_string | if ($input->{is_char_string}) { | 
| 340 | ($input->{charset}, $input->{s} => $doc, $onerror); | Whatpm::HTML->parse_char_string ($input->{s} => $doc, $onerror); | 
| 341 |  | } else { | 
| 342 |  | Whatpm::HTML->parse_byte_string | 
| 343 |  | ($input->{charset}, $input->{s} => $doc, $onerror); | 
| 344 |  | } | 
| 345 | $time{parse_html} = time - $time1; | $time{parse_html} = time - $time1; | 
| 346 | } | } | 
| 347 | $doc->manakai_charset ($input->{official_charset}) | $doc->manakai_charset ($input->{official_charset}) | 
| 381 | return 1; | return 1; | 
| 382 | }; | }; | 
| 383 |  |  | 
| 384 |  | my $t = \($input->{s}); | 
| 385 |  | if ($input->{is_char_string}) { | 
| 386 |  | require Encode; | 
| 387 |  | $t = \(Encode::encode ('utf8', $$t)); | 
| 388 |  | $input->{charset} = 'utf-8'; | 
| 389 |  | } | 
| 390 |  |  | 
| 391 | my $time1 = time; | my $time1 = time; | 
| 392 | open my $fh, '<', \($input->{s}); | open my $fh, '<', $t; | 
| 393 | my $doc = Message::DOM::XMLParserTemp->parse_byte_stream | my $doc = Message::DOM::XMLParserTemp->parse_byte_stream | 
| 394 | ($fh => $dom, $onerror, charset => $input->{charset}); | ($fh => $dom, $onerror, charset => $input->{charset}); | 
| 395 | $time{parse_xml} = time - $time1; | $time{parse_xml} = time - $time1; | 
| 662 | add_error ('syntax', \%opt => $result); | add_error ('syntax', \%opt => $result); | 
| 663 | }; | }; | 
| 664 |  |  | 
| 665 |  | my $m = $input->{is_char_string} ? 'parse_char_string' : 'parse_byte_string'; | 
| 666 | my $time1 = time; | my $time1 = time; | 
| 667 | my $manifest = Whatpm::CacheManifest->parse_byte_string | my $manifest = Whatpm::CacheManifest->$m | 
| 668 | ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror); | ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror); | 
| 669 | $time{parse_manifest} = time - $time1; | $time{parse_manifest} = time - $time1; | 
| 670 |  |  | 
| 965 | ## so that this script don't have to run the algorithm twice. | ## so that this script don't have to run the algorithm twice. | 
| 966 | my $table = Whatpm::HTMLTable->form_table ($table_el); | my $table = Whatpm::HTMLTable->form_table ($table_el); | 
| 967 |  |  | 
| 968 | for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) { | for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}, | 
| 969 |  | @{$table->{row}}) { | 
| 970 | next unless $_; | next unless $_; | 
| 971 | delete $_->{element}; | delete $_->{element}; | 
| 972 | } | } | 
| 1019 | print STDOUT qq[</dl></div>]; | print STDOUT qq[</dl></div>]; | 
| 1020 | } # print_listing_section | } # print_listing_section | 
| 1021 |  |  | 
| 1022 |  | sub print_uri_section ($$$) { | 
| 1023 |  | my ($input, $uris) = @_; | 
| 1024 |  |  | 
| 1025 |  | ## NOTE: URIs contained in the DOM (i.e. in HTML or XML documents), | 
| 1026 |  | ## except for those in RDF triples. | 
| 1027 |  | ## TODO: URIs in CSS | 
| 1028 |  |  | 
| 1029 |  | push @nav, ['#' . $input->{id_prefix} . 'uris' => 'URIs'] | 
| 1030 |  | unless $input->{nested}; | 
| 1031 |  | print STDOUT qq[ | 
| 1032 |  | <div id="$input->{id_prefix}uris" class="section"> | 
| 1033 |  | <h2>URIs</h2> | 
| 1034 |  |  | 
| 1035 |  | <dl>]; | 
| 1036 |  | for my $uri (sort {$a cmp $b} keys %$uris) { | 
| 1037 |  | my $euri = htescape ($uri); | 
| 1038 |  | print STDOUT qq[<dt><code class=uri><<a href="$euri">$euri</a>></code>]; | 
| 1039 |  | my $eccuri = htescape (get_cc_uri ($uri)); | 
| 1040 |  | print STDOUT qq[<dd><a href="$eccuri">Check conformance of this document</a>]; | 
| 1041 |  | print STDOUT qq[<dd>Found at: <ul>]; | 
| 1042 |  | for my $entry (@{$uris->{$uri}}) { | 
| 1043 |  | print STDOUT qq[<li>], get_node_link ($input, $entry->{node}); | 
| 1044 |  | if (keys %{$entry->{type} or {}}) { | 
| 1045 |  | print STDOUT ' ('; | 
| 1046 |  | print STDOUT join ', ', map { | 
| 1047 |  | { | 
| 1048 |  | hyperlink => 'Hyperlink', | 
| 1049 |  | resource => 'Link to an external resource', | 
| 1050 |  | namespace => 'Namespace URI', | 
| 1051 |  | cite => 'Citation or link to a long description', | 
| 1052 |  | embedded => 'Link to an embedded content', | 
| 1053 |  | base => 'Base URI', | 
| 1054 |  | action => 'Submission URI', | 
| 1055 |  | }->{$_} | 
| 1056 |  | or | 
| 1057 |  | htescape ($_) | 
| 1058 |  | } keys %{$entry->{type}}; | 
| 1059 |  | print STDOUT ')'; | 
| 1060 |  | } | 
| 1061 |  | } | 
| 1062 |  | print STDOUT qq[</ul>]; | 
| 1063 |  | } | 
| 1064 |  | print STDOUT qq[</dl></div>]; | 
| 1065 |  | } # print_uri_section | 
| 1066 |  |  | 
| 1067 |  | sub print_rdf_section ($$$) { | 
| 1068 |  | my ($input, $rdfs) = @_; | 
| 1069 |  |  | 
| 1070 |  | push @nav, ['#' . $input->{id_prefix} . 'rdf' => 'RDF'] | 
| 1071 |  | unless $input->{nested}; | 
| 1072 |  | print STDOUT qq[ | 
| 1073 |  | <div id="$input->{id_prefix}rdf" class="section"> | 
| 1074 |  | <h2>RDF Triples</h2> | 
| 1075 |  |  | 
| 1076 |  | <dl>]; | 
| 1077 |  | my $i = 0; | 
| 1078 |  | for my $rdf (@$rdfs) { | 
| 1079 |  | print STDOUT qq[<dt id="$input->{id_prefix}rdf-@{[$i++]}">]; | 
| 1080 |  | print STDOUT get_node_link ($input, $rdf->[0]); | 
| 1081 |  | print STDOUT qq[<dd><dl>]; | 
| 1082 |  | for my $triple (@{$rdf->[1]}) { | 
| 1083 |  | print STDOUT '<dt>' . get_node_link ($input, $triple->[0]) . '<dd>'; | 
| 1084 |  | print STDOUT get_rdf_resource_html ($triple->[1]); | 
| 1085 |  | print STDOUT ' '; | 
| 1086 |  | print STDOUT get_rdf_resource_html ($triple->[2]); | 
| 1087 |  | print STDOUT ' '; | 
| 1088 |  | print STDOUT get_rdf_resource_html ($triple->[3]); | 
| 1089 |  | } | 
| 1090 |  | print STDOUT qq[</dl>]; | 
| 1091 |  | } | 
| 1092 |  | print STDOUT qq[</dl></div>]; | 
| 1093 |  | } # print_rdf_section | 
| 1094 |  |  | 
| 1095 |  | sub get_rdf_resource_html ($) { | 
| 1096 |  | my $resource = shift; | 
| 1097 |  | if (defined $resource->{uri}) { | 
| 1098 |  | my $euri = htescape ($resource->{uri}); | 
| 1099 |  | return '<code class=uri><<a href="' . $euri . '">' . $euri . | 
| 1100 |  | '</a>></code>'; | 
| 1101 |  | } elsif (defined $resource->{bnodeid}) { | 
| 1102 |  | return htescape ('_:' . $resource->{bnodeid}); | 
| 1103 |  | } elsif ($resource->{nodes}) { | 
| 1104 |  | return '(rdf:XMLLiteral)'; | 
| 1105 |  | } elsif (defined $resource->{value}) { | 
| 1106 |  | my $elang = htescape (defined $resource->{language} | 
| 1107 |  | ? $resource->{language} : ''); | 
| 1108 |  | my $r = qq[<q lang="$elang">] . htescape ($resource->{value}) . '</q>'; | 
| 1109 |  | if (defined $resource->{datatype}) { | 
| 1110 |  | my $euri = htescape ($resource->{datatype}); | 
| 1111 |  | $r .= '^^<code class=uri><<a href="' . $euri . '">' . $euri . | 
| 1112 |  | '</a>></code>'; | 
| 1113 |  | } elsif (length $resource->{language}) { | 
| 1114 |  | $r .= '@' . htescape ($resource->{language}); | 
| 1115 |  | } | 
| 1116 |  | return $r; | 
| 1117 |  | } else { | 
| 1118 |  | return '??'; | 
| 1119 |  | } | 
| 1120 |  | } # get_rdf_resource_html | 
| 1121 |  |  | 
| 1122 | sub print_result_section ($) { | sub print_result_section ($) { | 
| 1123 | my $result = shift; | my $result = shift; | 
| 1124 |  |  | 
| 1344 | while (defined $node) { | while (defined $node) { | 
| 1345 | my $rs; | my $rs; | 
| 1346 | if ($node->node_type == 1) { | if ($node->node_type == 1) { | 
| 1347 | $rs = $node->manakai_local_name; | $rs = $node->node_name; | 
| 1348 | $node = $node->parent_node; | $node = $node->parent_node; | 
| 1349 | } elsif ($node->node_type == 2) { | } elsif ($node->node_type == 2) { | 
| 1350 | $rs = '@' . $node->manakai_local_name; | $rs = '@' . $node->node_name; | 
| 1351 | $node = $node->owner_element; | $node = $node->owner_element; | 
| 1352 | } elsif ($node->node_type == 3) { | } elsif ($node->node_type == 3) { | 
| 1353 | $rs = '"' . $node->data . '"'; | $rs = '"' . $node->data . '"'; | 
| 1425 |  |  | 
| 1426 | } | } | 
| 1427 |  |  | 
| 1428 |  | sub encode_uri_component ($) { | 
| 1429 |  | require Encode; | 
| 1430 |  | my $s = Encode::encode ('utf8', shift); | 
| 1431 |  | $s =~ s/([^0-9A-Za-z_.~-])/sprintf '%%%02X', ord $1/ge; | 
| 1432 |  | return $s; | 
| 1433 |  | } # encode_uri_component | 
| 1434 |  |  | 
| 1435 |  | sub get_cc_uri ($) { | 
| 1436 |  | return './?uri=' . encode_uri_component ($_[0]); | 
| 1437 |  | } # get_cc_uri | 
| 1438 |  |  | 
| 1439 | sub get_input_document ($$) { | sub get_input_document ($$) { | 
| 1440 | my ($http, $dom) = @_; | my ($http, $dom) = @_; | 
| 1441 |  |  |