package Whatpm::CSS::Parser; use strict; use Whatpm::CSS::Tokenizer qw(:token); require Whatpm::CSS::SelectorsParser; require Whatpm::CSS::MediaQueryParser; sub new ($) { my $self = bless { level => { must => 'm', uncertain => 'u', }, lookup_namespace_uri => sub { undef }, }, shift; # $self->{base_uri} # $self->{unitless_px} = 1/0 # $self->{hashless_rgb} = 1/0 ## Default error handler $self->{onerror} = sub { my %opt = @_; require Carp; Carp::carp (sprintf 'Document <%s>: Line %d column %d (token %s): %s%s', defined $opt{uri} ? ${$opt{uri}} : 'thisdocument:/', $opt{token}->{line}, $opt{token}->{column}, Whatpm::CSS::Tokenizer->serialize_token ($opt{token}), $opt{type}, defined $opt{value} ? " (value $opt{value})" : ''); }; ## Media-dependent RGB color range clipper $self->{clip_color} = sub { shift; #my $self = shift; my $value = shift; if (defined $value and $value->[0] eq 'RGBA') { my ($r, $g, $b) = @$value[1, 2, 3]; $r = 0 if $r < 0; $r = 255 if $r > 255; $g = 0 if $g < 0; $g = 255 if $g > 255; $b = 0 if $b < 0; $b = 255 if $b > 255; return ['RGBA', $r, $g, $b, $value->[4]]; } return $value; }; ## System dependent font expander $self->{get_system_font} = sub { #my ($self, $normalized_system_font_name, $font_properties) = @_; ## Modify $font_properties hash (except for 'font-family' property). return $_[2]; }; return $self; } # new sub BEFORE_STATEMENT_STATE () { 0 } sub BEFORE_DECLARATION_STATE () { 1 } sub IGNORED_STATEMENT_STATE () { 2 } sub IGNORED_DECLARATION_STATE () { 3 } our $Prop; ## By CSS property name our $Attr; ## By CSSOM attribute name our $Key; ## By internal key sub init ($) { my $self = shift; delete $self->{style_sheet}; delete $self->{unitless_px}; delete $self->{hashless_rgb}; delete $self->{href}; delete $self->{base_uri}; } # init sub parse_char_string ($$) { my $self = $_[0]; my $s = $_[1]; pos ($s) = 0; my $line = 1; my $column = 0; my $tt = Whatpm::CSS::Tokenizer->new; my $onerror = $tt->{onerror} = $self->{onerror}; $tt->{get_char} = sub ($) { if (pos $s < length $s) { my $c = ord substr $s, pos ($s)++, 1; if ($c == 0x000A) { $line++; $column = 0; } elsif ($c == 0x000D) { unless (substr ($s, pos ($s), 1) eq "\x0A") { $line++; $column = 0; } else { $column++; } } else { $column++; } $_[0]->{line} = $line; $_[0]->{column} = $column; return $c; } else { $_[0]->{column} = $column + 1; ## Set the same number always. return -1; } }; # $tt->{get_char} $tt->init; my $sp = Whatpm::CSS::SelectorsParser->new; $sp->{onerror} = $self->{onerror}; $sp->{level} = $self->{level}; $sp->{pseudo_element} = $self->{pseudo_element}; $sp->{pseudo_class} = $self->{pseudo_class}; my $mp = Whatpm::CSS::MediaQueryParser->new; $mp->{onerror} = $self->{onerror}; $mp->{level} = $self->{level}; my $nsmap = {prefix_to_uri => {}, uri_to_prefixes => {}}; # $nsmap->{prefix_to_uri}->{p/""} = uri/undef # $nsmap->{uri_to_prefixes}->{uri} = ["p|"/"",...]/undef # $nsmap->{has_namespace} = 1/0 $self->{lookup_namespace_uri} = $sp->{lookup_namespace_uri} = sub { ## TODO: case return $nsmap->{prefix_to_uri}->{lc $_[0]}; # $_[0] is '' (default) or prefix }; # $sp->{lookup_namespace_uri} require Message::DOM::CSSStyleSheet; require Message::DOM::CSSRule; require Message::DOM::CSSStyleDeclaration; $self->{base_uri} = $self->{href} unless defined $self->{base_uri}; $sp->{href} = $self->{href}; my $state = BEFORE_STATEMENT_STATE; my $t = $tt->get_next_token; my $open_rules = [[]]; my $current_rules = $open_rules->[-1]; my $parent_rules = []; my $current_decls; my $closing_tokens = []; my $charset_allowed = 1; my $namespace_allowed = 1; my $import_allowed = 1; my $media_allowed = 1; my $ss = $self->{style_sheet} ||= Message::DOM::CSSStyleSheet->____new (css_rules => $open_rules->[0], ## TODO: href ## TODO: owner_node ## TODO: media type => 'text/css'); ## TODO: OK? $$ss->{manakai_base_uri} = $self->{base_uri}; $$ss->{_parser} = $self; $$ss->{_nsmap} = $nsmap; S: { if ($state == BEFORE_STATEMENT_STATE) { $t = $tt->get_next_token while $t->{type} == S_TOKEN or $t->{type} == CDO_TOKEN or $t->{type} == CDC_TOKEN; if ($t->{type} == ATKEYWORD_TOKEN) { my $at_rule_name = lc $t->{value}; ## TODO: case if ($at_rule_name eq 'namespace') { $t = $tt->get_next_token; $t = $tt->get_next_token while $t->{type} == S_TOKEN; my $prefix; if ($t->{type} == IDENT_TOKEN) { $prefix = lc $t->{value}; ## TODO: case (Unicode lowercase) $t = $tt->get_next_token; $t = $tt->get_next_token while $t->{type} == S_TOKEN; } if ($t->{type} == STRING_TOKEN or $t->{type} == URI_TOKEN) { my $uri = $t->{value}; $t = $tt->get_next_token; $t = $tt->get_next_token while $t->{type} == S_TOKEN; ## ISSUE: On handling of empty namespace URI, Firefox 2 and ## Opera 9 work differently (See SuikaWiki:namespace). ## TODO: We need to check what we do once it is specced. if ($t->{type} == SEMICOLON_TOKEN) { if ($namespace_allowed) { my $p = $prefix; $nsmap->{has_namespace} = 1; if (defined $prefix) { $nsmap->{prefix_to_uri}->{$prefix} = $uri; $p .= '|'; } else { $nsmap->{prefix_to_uri}->{''} = $uri; $p = ''; } for my $u (keys %{$nsmap->{uri_to_prefixes}}) { next if $u eq $uri; my $list = $nsmap->{uri_to_prefixes}->{$u}; next unless $list; for (reverse 0..$#$list) { splice @$list, $_, 1, () if $list->[$_] eq $p; } } push @{$nsmap->{uri_to_prefixes}->{$uri} ||= []}, $p; push @$current_rules, Message::DOM::CSSNamespaceRule->____new ($prefix, $uri); undef $charset_allowed; undef $import_allowed; } else { $onerror->(type => 'at-rule not allowed', text => 'namespace', level => $self->{level}->{must}, uri => \$self->{href}, token => $t); } $t = $tt->get_next_token; ## Stay in the state. redo S; } else { # } } else { # } $onerror->(type => 'at-rule syntax error', text => 'namespace', level => $self->{level}->{must}, uri => \$self->{href}, token => $t); # } elsif ($at_rule_name eq 'import') { if ($import_allowed) { $t = $tt->get_next_token; $t = $tt->get_next_token while $t->{type} == S_TOKEN; my $mq = []; if ($t->{type} == STRING_TOKEN or $t->{type} == URI_TOKEN) { my $uri = $t->{value}; $t = $tt->get_next_token; $t = $tt->get_next_token while $t->{type} == S_TOKEN; if ($t->{type} == IDENT_TOKEN or $t->{type} == DIMENSION_TOKEN or $t->{type} == NUMBER_TOKEN or $t->{type} == LPAREN_TOKEN) { ($t, $mq) = $mp->_parse_mq_with_tokenizer ($t, $tt); $t = $tt->get_next_token while $t->{type} == S_TOKEN; } if ($mq and $t->{type} == SEMICOLON_TOKEN) { ## TODO: error or warning ## TODO: White space definition $uri =~ s/^[\x09\x0A\x0D\x20]+//; $uri =~ s/[\x09\x0A\x0D\x20]+\z//; my $imported = Message::DOM::CSSStyleSheet->____new (css_rules => [], ## TODO: href parent_style_sheet => $ss, ## TODO: media type => 'text/css', ## TODO: OK? _parser => $self, _nsmap => {}); my $rule = Message::DOM::CSSImportRule->____new ($uri, \($self->{base_uri}), $mq, $ss); $$imported->{owner_rule} = $rule; Scalar::Util::weaken ($$imported->{owner_rule}); Scalar::Util::weaken ($$imported->{parent_style_sheet}); push @$current_rules, $rule; undef $charset_allowed; $t = $tt->get_next_token; ## Stay in the state. redo S; } } $onerror->(type => 'at-rule syntax error', text => 'import', level => $self->{level}->{must}, uri => \$self->{href}, token => $t) if defined $mq; ## NOTE: Otherwise, already raised in MQ parser # } else { $onerror->(type => 'at-rule not allowed', text => 'import', level => $self->{level}->{must}, uri => \$self->{href}, token => $t); # } } elsif ($at_rule_name eq 'media') { if ($media_allowed) { $t = $tt->get_next_token; $t = $tt->get_next_token while $t->{type} == S_TOKEN; my $q; ($t, $q) = $mp->_parse_mq_with_tokenizer ($t, $tt); if ($q) { if ($t->{type} == LBRACE_TOKEN) { undef $charset_allowed; undef $namespace_allowed; undef $import_allowed; undef $media_allowed; my $rule = Message::DOM::CSSMediaRule->____new ($q, my $v = []); push @$current_rules, $rule; push @$parent_rules, $rule; push @$open_rules, $current_rules = $v; $t = $tt->get_next_token; ## Stay in the state. redo S; } else { $onerror->(type => 'at-rule syntax error', text => 'media', level => $self->{level}->{must}, uri => \$self->{href}, token => $t); } # } # } else { ## Nested @media rule $onerror->(type => 'at-rule not allowed', text => 'media', level => $self->{level}->{must}, uri => \$self->{href}, token => $t); # } } elsif ($at_rule_name eq 'charset') { if ($charset_allowed) { $t = $tt->get_next_token; $t = $tt->get_next_token while $t->{type} == S_TOKEN; if ($t->{type} == STRING_TOKEN) { my $encoding = $t->{value}; $t = $tt->get_next_token; $t = $tt->get_next_token while $t->{type} == S_TOKEN; if ($t->{type} == SEMICOLON_TOKEN) { push @$current_rules, Message::DOM::CSSCharsetRule->____new ($encoding); undef $charset_allowed; ## TODO: Detect the conformance errors for @charset... $t = $tt->get_next_token; ## Stay in the state. redo S; } else { # } } else { # } $onerror->(type => 'at-rule syntax error', text => 'charset', level => $self->{level}->{must}, uri => \$self->{href}, token => $t); # } else { $onerror->(type => 'at-rule not allowed', text => 'charset', level => $self->{level}->{must}, uri => \$self->{href}, token => $t); # } } else { $onerror->(type => 'unknown at-rule', level => $self->{level}->{uncertain}, uri => \$self->{href}, token => $t, value => $t->{value}); } ## Reprocess. #$t = $tt->get_next_token; $state = IGNORED_STATEMENT_STATE; redo S; } elsif (@$open_rules > 1 and $t->{type} == RBRACE_TOKEN) { pop @$open_rules; $media_allowed = 1; $current_rules = $open_rules->[-1]; ## Stay in the state. $t = $tt->get_next_token; redo S; } elsif ($t->{type} == EOF_TOKEN) { if (@$open_rules > 1) { $onerror->(type => 'block not closed', level => $self->{level}->{must}, uri => \$self->{href}, token => $t); } last S; } else { undef $charset_allowed; undef $namespace_allowed; undef $import_allowed; ($t, my $selectors) = $sp->_parse_selectors_with_tokenizer ($tt, LBRACE_TOKEN, $t); $t = $tt->get_next_token while $t->{type} != LBRACE_TOKEN and $t->{type} != EOF_TOKEN; if ($t->{type} == LBRACE_TOKEN) { $current_decls = Message::DOM::CSSStyleDeclaration->____new; my $rs = Message::DOM::CSSStyleRule->____new ($selectors, $current_decls); push @{$current_rules}, $rs if defined $selectors; $state = BEFORE_DECLARATION_STATE; $t = $tt->get_next_token; redo S; } else { $onerror->(type => 'no declaration block', level => $self->{level}->{must}, uri => \$self->{href}, token => $t); ## Stay in the state. $t = $tt->get_next_token; redo S; } } } elsif ($state == BEFORE_DECLARATION_STATE) { ## NOTE: DELIM? in declaration will be removed: ## . my $prop_def; my $prop_value; my $prop_flag = ''; $t = $tt->get_next_token while $t->{type} == S_TOKEN; if ($t->{type} == IDENT_TOKEN) { # property my $prop_name = lc $t->{value}; ## TODO: case folding my $prop_name_t = $t; $t = $tt->get_next_token; $t = $tt->get_next_token while $t->{type} == S_TOKEN; if ($t->{type} == COLON_TOKEN) { $t = $tt->get_next_token; $t = $tt->get_next_token while $t->{type} == S_TOKEN; $prop_def = $Prop->{$prop_name}; if ($prop_def and $self->{prop}->{$prop_name}) { ($t, $prop_value) = $prop_def->{parse}->($self, $prop_name, $tt, $t, $onerror); if ($prop_value) { ## NOTE: {parse} don't have to consume trailing spaces. $t = $tt->get_next_token while $t->{type} == S_TOKEN; if ($t->{type} == EXCLAMATION_TOKEN) { $t = $tt->get_next_token; $t = $tt->get_next_token while $t->{type} == S_TOKEN; if ($t->{type} == IDENT_TOKEN and lc $t->{value} eq 'important') { ## TODO: case folding $prop_flag = 'important'; $t = $tt->get_next_token; $t = $tt->get_next_token while $t->{type} == S_TOKEN; # } else { $onerror->(type => 'priority syntax error', level => $self->{level}->{must}, uri => \$self->{href}, token => $t); ## Reprocess. $state = IGNORED_DECLARATION_STATE; redo S; } } # } else { ## Syntax error. ## Reprocess. $state = IGNORED_DECLARATION_STATE; redo S; } } else { $onerror->(type => 'unknown property', level => $self->{level}->{uncertain}, token => $prop_name_t, value => $prop_name, uri => \$self->{href}); # $state = IGNORED_DECLARATION_STATE; redo S; } } else { $onerror->(type => 'no property colon', level => $self->{level}->{must}, uri => \$self->{href}, token => $t); # $state = IGNORED_DECLARATION_STATE; redo S; } } if ($t->{type} == RBRACE_TOKEN) { $t = $tt->get_next_token; $state = BEFORE_STATEMENT_STATE; #redo S; } elsif ($t->{type} == SEMICOLON_TOKEN) { $t = $tt->get_next_token; ## Stay in the state. #redo S; } elsif ($t->{type} == EOF_TOKEN) { $onerror->(type => 'block not closed', level => $self->{level}->{must}, uri => \$self->{href}, token => $t); ## Reprocess. $state = BEFORE_STATEMENT_STATE; #redo S; } else { if ($prop_value) { $onerror->(type => 'no property semicolon', level => $self->{level}->{must}, uri => \$self->{href}, token => $t); } else { $onerror->(type => 'no property name', level => $self->{level}->{must}, uri => \$self->{href}, token => $t); } # $state = IGNORED_DECLARATION_STATE; redo S; } my $important = ($prop_flag eq 'important'); for my $set_prop_name (keys %{$prop_value or {}}) { my $set_prop_def = $Prop->{$set_prop_name}; $$current_decls->{$set_prop_def->{key}} = [$prop_value->{$set_prop_name}, $prop_flag] if $important or not $$current_decls->{$set_prop_def->{key}} or $$current_decls->{$set_prop_def->{key}}->[1] ne 'important'; } redo S; } elsif ($state == IGNORED_STATEMENT_STATE or $state == IGNORED_DECLARATION_STATE) { if (@$closing_tokens) { ## Something is yet in opening state. if ($t->{type} == EOF_TOKEN) { @$closing_tokens = (); ## Reprocess. $state = $state == IGNORED_STATEMENT_STATE ? BEFORE_STATEMENT_STATE : BEFORE_DECLARATION_STATE; redo S; } elsif ($t->{type} == $closing_tokens->[-1]) { pop @$closing_tokens; if (@$closing_tokens == 0 and $t->{type} == RBRACE_TOKEN and $state == IGNORED_STATEMENT_STATE) { $t = $tt->get_next_token; $state = BEFORE_STATEMENT_STATE; redo S; } else { $t = $tt->get_next_token; ## Stay in the state. redo S; } } elsif ({ RBRACE_TOKEN, 1, #RBRACKET_TOKEN, 1, #RPAREN_TOKEN, 1, SEMICOLON_TOKEN, 1, }->{$t->{type}}) { $t = $tt->get_next_token; ## Stay in the state. # } else { # } } else { if ($t->{type} == SEMICOLON_TOKEN) { $t = $tt->get_next_token; $state = $state == IGNORED_STATEMENT_STATE ? BEFORE_STATEMENT_STATE : BEFORE_DECLARATION_STATE; redo S; } elsif ($t->{type} == RBRACE_TOKEN) { if ($state == IGNORED_DECLARATION_STATE) { $t = $tt->get_next_token; $state = BEFORE_STATEMENT_STATE; redo S; } else { ## NOTE: Maybe this state cannot be reached. $t = $tt->get_next_token; ## Stay in the state. redo S; } } elsif ($t->{type} == EOF_TOKEN) { ## Reprocess. $state = $state == IGNORED_STATEMENT_STATE ? BEFORE_STATEMENT_STATE : BEFORE_DECLARATION_STATE; redo S; #} elsif ($t->{type} == RBRACKET_TOKEN or $t->{type} == RPAREN_TOKEN) { # $t = $tt->get_next_token; # ## Stay in the state. # # } else { # } } while (not { EOF_TOKEN, 1, RBRACE_TOKEN, 1, ## NOTE: ']' and ')' are disabled for browser compatibility. #RBRACKET_TOKEN, 1, #RPAREN_TOKEN, 1, SEMICOLON_TOKEN, 1, }->{$t->{type}}) { if ($t->{type} == LBRACE_TOKEN) { push @$closing_tokens, RBRACE_TOKEN; #} elsif ($t->{type} == LBRACKET_TOKEN) { # push @$closing_tokens, RBRACKET_TOKEN; #} elsif ($t->{type} == LPAREN_TOKEN or $t->{type} == FUNCTION_TOKEN) { # push @$closing_tokens, RPAREN_TOKEN; } $t = $tt->get_next_token; } # ## Stay in the state. redo S; } else { die "$0: parse_char_string: Unknown state: $state"; } } # S for (@{$$ss->{css_rules}}) { $$_->{parent_style_sheet} = $ss; Scalar::Util::weaken ($$_->{parent_style_sheet}); } for my $parent_rule (@$parent_rules) { for (@{$$parent_rule->{css_rules}}) { $$_->{parent_rule} = $parent_rule; Scalar::Util::weaken ($$_->{parent_rule}); } } return $ss; } # parse_char_string ## TODO: Test