207 |
$self->{must_level} = 'm'; |
$self->{must_level} = 'm'; |
208 |
$self->{fact_level} = 'f'; |
$self->{fact_level} = 'f'; |
209 |
$self->{should_level} = 's'; |
$self->{should_level} = 's'; |
210 |
$self->{good_level} = 'g'; |
$self->{good_level} = 'w'; |
211 |
|
|
212 |
my $docel = $doc->document_element; |
my $docel = $doc->document_element; |
213 |
unless (defined $docel) { |
unless (defined $docel) { |
249 |
## TODO: Check for other items other than document element |
## TODO: Check for other items other than document element |
250 |
## (second (errorous) element, text nodes, PI nodes, doctype nodes) |
## (second (errorous) element, text nodes, PI nodes, doctype nodes) |
251 |
|
|
252 |
return $self->check_element ($docel, $onerror); |
my $return = $self->check_element ($docel, $onerror); |
253 |
|
|
254 |
|
my $charset_name = $doc->input_encoding; |
255 |
|
if (defined $charset_name) { |
256 |
|
require Message::Charset::Info; |
257 |
|
my $charset = $Message::Charset::Info::IANACharset->{$charset_name}; |
258 |
|
|
259 |
|
if ($doc->manakai_is_html and |
260 |
|
not $doc->manakai_has_bom and |
261 |
|
not defined $doc->manakai_charset) { |
262 |
|
unless ($charset->{is_html_ascii_superset}) { |
263 |
|
$onerror->(node => $doc, level => $self->{must_level}, |
264 |
|
type => 'non ascii superset:'.$charset_name); |
265 |
|
} |
266 |
|
|
267 |
|
if (not $self->{has_charset} and |
268 |
|
not $charset->{iana_names}->{'us-ascii'}) { |
269 |
|
$onerror->(node => $doc, level => $self->{must_level}, |
270 |
|
type => 'no character encoding declaration:'.$charset_name); |
271 |
|
} |
272 |
|
} |
273 |
|
|
274 |
|
if ($charset->{iana_names}->{'utf-8'}) { |
275 |
|
# |
276 |
|
} elsif ($charset->{iana_names}->{'jis_x0212-1990'} or |
277 |
|
$charset->{iana_names}->{'x-jis0208'} or |
278 |
|
$charset->{iana_names}->{'utf-32'} or ## ISSUE: UTF-32BE? UTF-32LE? |
279 |
|
$charset->{is_ebcdic_based}) { |
280 |
|
$onerror->(node => $doc, |
281 |
|
type => 'character encoding:'.$charset_name, |
282 |
|
level => $self->{should_level}); |
283 |
|
} elsif ($charset->{iana_names}->{'cesu-8'} or |
284 |
|
$charset->{iana_names}->{'utf-8'} or ## ISSUE: UNICODE-1-1-UTF-7? |
285 |
|
$charset->{iana_names}->{'bocu-1'} or |
286 |
|
$charset->{iana_names}->{'scsu'}) { |
287 |
|
$onerror->(node => $doc, |
288 |
|
type => 'character encoding:'.$charset_name, |
289 |
|
level => $self->{must_level}); |
290 |
|
} else { |
291 |
|
$onerror->(node => $doc, |
292 |
|
type => 'character encoding:'.$charset_name, |
293 |
|
level => $self->{good_level}); |
294 |
|
} |
295 |
|
} |
296 |
|
|
297 |
|
return $return; |
298 |
} # check_document |
} # check_document |
299 |
|
|
300 |
sub check_element ($$$) { |
sub check_element ($$$) { |
305 |
$self->{must_level} = 'm'; |
$self->{must_level} = 'm'; |
306 |
$self->{fact_level} = 'f'; |
$self->{fact_level} = 'f'; |
307 |
$self->{should_level} = 's'; |
$self->{should_level} = 's'; |
308 |
$self->{good_level} = 'g'; |
$self->{good_level} = 'w'; |
309 |
|
|
310 |
$self->{pluses} = {}; |
$self->{pluses} = {}; |
311 |
$self->{minuses} = {}; |
$self->{minuses} = {}; |
318 |
$self->{has_link_type} = {}; |
$self->{has_link_type} = {}; |
319 |
#$self->{has_uri_attr}; |
#$self->{has_uri_attr}; |
320 |
#$self->{has_hyperlink_element}; |
#$self->{has_hyperlink_element}; |
321 |
|
#$self->{has_charset}; |
322 |
$self->{return} = { |
$self->{return} = { |
323 |
class => {}, |
class => {}, |
324 |
id => $self->{id}, table => [], term => $self->{term}, |
id => $self->{id}, table => [], term => $self->{term}, |