/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Diff of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.211 by wakaba, Mon Oct 27 05:44:47 2008 UTC revision 1.218 by wakaba, Sat Jul 25 03:38:42 2009 UTC
# Line 525  sub parse_byte_stream ($$$$;$$) { Line 525  sub parse_byte_stream ($$$$;$$) {
525            
526      if ($char_stream) { # if supported      if ($char_stream) { # if supported
527        ## "Change the encoding" algorithm:        ## "Change the encoding" algorithm:
   
       ## Step 1      
       if ($charset->{category} &  
           Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) {  
         $charset = Message::Charset::Info->get_by_html_name ('utf-8');  
         ($char_stream, $e_status) = $charset->get_decode_handle  
             ($byte_stream,  
              byte_buffer => \ $buffer->{buffer});  
       }  
       $charset_name = $charset->get_iana_name;  
528                
529        ## Step 2        ## Step 1
530        if (defined $self->{input_encoding} and        if (defined $self->{input_encoding} and
531            $self->{input_encoding} eq $charset_name) {            $self->{input_encoding} eq $charset_name) {
532          !!!parse-error (type => 'charset label:matching',          !!!parse-error (type => 'charset label:matching',
# Line 546  sub parse_byte_stream ($$$$;$$) { Line 536  sub parse_byte_stream ($$$$;$$) {
536          return;          return;
537        }        }
538    
539          ## Step 2 (HTML5 revision 3205)
540          if (defined $self->{input_encoding} and
541              Message::Charset::Info->get_by_html_name ($self->{input_encoding})
542              ->{category} & Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) {
543            $self->{confident} = 1;
544            return;
545          }
546    
547          ## Step 3
548          if ($charset->{category} &
549              Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) {
550            $charset = Message::Charset::Info->get_by_html_name ('utf-8');
551            ($char_stream, $e_status) = $charset->get_decode_handle
552                ($byte_stream,
553                 byte_buffer => \ $buffer->{buffer});
554          }
555          $charset_name = $charset->get_iana_name;
556    
557        !!!parse-error (type => 'charset label detected',        !!!parse-error (type => 'charset label detected',
558                        text => $self->{input_encoding},                        text => $self->{input_encoding},
559                        value => $charset_name,                        value => $charset_name,
560                        level => $self->{level}->{warn},                        level => $self->{level}->{warn},
561                        token => $token);                        token => $token);
562                
563        ## Step 3        ## Step 4
564        # if (can) {        # if (can) {
565          ## change the encoding on the fly.          ## change the encoding on the fly.
566          #$self->{confident} = 1;          #$self->{confident} = 1;
567          #return;          #return;
568        # }        # }
569                
570        ## Step 4        ## Step 5
571        throw Whatpm::HTML::RestartParser ();        throw Whatpm::HTML::RestartParser ();
572      }      }
573    }; # $self->{change_encoding}    }; # $self->{change_encoding}
# Line 3829  sub _tree_construction_main ($) { Line 3837  sub _tree_construction_main ($) {
3837            !!!next-token;            !!!next-token;
3838            next B;            next B;
3839          } elsif ({          } elsif ({
3840                     select => 1, input => 1, textarea => 1,                     select => 1, input => 1, textarea => 1, keygen => 1,
3841                   }->{$token->{tag_name}} or                   }->{$token->{tag_name}} or
3842                   (($self->{insertion_mode} & IM_MASK)                   (($self->{insertion_mode} & IM_MASK)
3843                        == IN_SELECT_IN_TABLE_IM and                        == IN_SELECT_IN_TABLE_IM and
# Line 4409  sub _tree_construction_main ($) { Line 4417  sub _tree_construction_main ($) {
4417            next B;            next B;
4418          }          }
4419    
4420          ## has a p element in scope          if ($token->{tag_name} ne 'table' or # The Hixie Quirk
4421          INSCOPE: for (reverse @{$self->{open_elements}}) {              $self->{document}->manakai_compat_mode ne 'quirks') {
4422            if ($_->[1] == P_EL) {            ## has a p element in scope
4423              !!!cp ('t344');            INSCOPE: for (reverse @{$self->{open_elements}}) {
4424              !!!back-token; # <form>              if ($_->[1] == P_EL) {
4425              $token = {type => END_TAG_TOKEN, tag_name => 'p',                !!!cp ('t344');
4426                        line => $token->{line}, column => $token->{column}};                !!!back-token; # <form>
4427              next B;                $token = {type => END_TAG_TOKEN, tag_name => 'p',
4428            } elsif ($_->[1] & SCOPING_EL) {                          line => $token->{line}, column => $token->{column}};
4429              !!!cp ('t345');                next B;
4430              last INSCOPE;              } elsif ($_->[1] & SCOPING_EL) {
4431            }                !!!cp ('t345');
4432          } # INSCOPE                last INSCOPE;
4433                }
4434              } # INSCOPE
4435            }
4436                        
4437          !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);          !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4438          if ($token->{tag_name} eq 'pre' or $token->{tag_name} eq 'listing') {          if ($token->{tag_name} eq 'pre' or $token->{tag_name} eq 'listing') {
# Line 4792  sub _tree_construction_main ($) { Line 4803  sub _tree_construction_main ($) {
4803                           line => $token->{line}, column => $token->{column}},                           line => $token->{line}, column => $token->{column}},
4804                          {type => START_TAG_TOKEN, tag_name => 'hr',                          {type => START_TAG_TOKEN, tag_name => 'hr',
4805                           line => $token->{line}, column => $token->{column}},                           line => $token->{line}, column => $token->{column}},
                         {type => START_TAG_TOKEN, tag_name => 'p',  
                          line => $token->{line}, column => $token->{column}},  
4806                          {type => START_TAG_TOKEN, tag_name => 'label',                          {type => START_TAG_TOKEN, tag_name => 'label',
4807                           line => $token->{line}, column => $token->{column}},                           line => $token->{line}, column => $token->{column}},
4808                         );                         );
# Line 4816  sub _tree_construction_main ($) { Line 4825  sub _tree_construction_main ($) {
4825                          #{type => CHARACTER_TOKEN, data => ''}, # SHOULD                          #{type => CHARACTER_TOKEN, data => ''}, # SHOULD
4826                          {type => END_TAG_TOKEN, tag_name => 'label',                          {type => END_TAG_TOKEN, tag_name => 'label',
4827                           line => $token->{line}, column => $token->{column}},                           line => $token->{line}, column => $token->{column}},
                         {type => END_TAG_TOKEN, tag_name => 'p',  
                          line => $token->{line}, column => $token->{column}},  
4828                          {type => START_TAG_TOKEN, tag_name => 'hr',                          {type => START_TAG_TOKEN, tag_name => 'hr',
4829                           line => $token->{line}, column => $token->{column}},                           line => $token->{line}, column => $token->{column}},
4830                          {type => END_TAG_TOKEN, tag_name => 'form',                          {type => END_TAG_TOKEN, tag_name => 'form',
# Line 4901  sub _tree_construction_main ($) { Line 4908  sub _tree_construction_main ($) {
4908              last INSCOPE;              last INSCOPE;
4909            }            }
4910          } # INSCOPE          } # INSCOPE
4911              
4912            ## TODO: <non-ruby><rt> is not allowed.
4913    
4914          !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);          !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4915    
# Line 5436  sub _tree_construction_main ($) { Line 5445  sub _tree_construction_main ($) {
5445    ## TODO: script stuffs    ## TODO: script stuffs
5446  } # _tree_construct_main  } # _tree_construct_main
5447    
5448    ## XXX: How this method is organized is somewhat out of date, although
5449    ## it still does what the current spec documents.
5450  sub set_inner_html ($$$$;$) {  sub set_inner_html ($$$$;$) {
5451    my $class = shift;    my $class = shift;
5452    my $node = shift;    my $node = shift; # /context/
5453    #my $s = \$_[0];    #my $s = \$_[0];
5454    my $onerror = $_[1];    my $onerror = $_[1];
5455    my $get_wrapper = $_[2] || sub ($) { return $_[0] };    my $get_wrapper = $_[2] || sub ($) { return $_[0] };
# Line 5446  sub set_inner_html ($$$$;$) { Line 5457  sub set_inner_html ($$$$;$) {
5457    ## ISSUE: Should {confident} be true?    ## ISSUE: Should {confident} be true?
5458    
5459    my $nt = $node->node_type;    my $nt = $node->node_type;
5460    if ($nt == 9) {    if ($nt == 9) { # Document (invoke the algorithm with no /context/ element)
5461      # MUST      # MUST
5462            
5463      ## Step 1 # MUST      ## Step 1 # MUST
# Line 5461  sub set_inner_html ($$$$;$) { Line 5472  sub set_inner_html ($$$$;$) {
5472    
5473      ## Step 3, 4, 5 # MUST      ## Step 3, 4, 5 # MUST
5474      $class->parse_char_string ($_[0] => $node, $onerror, $get_wrapper);      $class->parse_char_string ($_[0] => $node, $onerror, $get_wrapper);
5475    } elsif ($nt == 1) {    } elsif ($nt == 1) { # Element (invoke the algorithm with /context/ element)
5476      ## TODO: If non-html element      ## TODO: If non-html element
5477    
5478      ## NOTE: Most of this code is copied from |parse_string|      ## NOTE: Most of this code is copied from |parse_string|
5479    
5480  ## TODO: Support for $get_wrapper  ## TODO: Support for $get_wrapper
5481    
5482      ## Step 1 # MUST      ## F1. Create an HTML document.
5483      my $this_doc = $node->owner_document;      my $this_doc = $node->owner_document;
5484      my $doc = $this_doc->implementation->create_document;      my $doc = $this_doc->implementation->create_document;
5485      $doc->manakai_is_html (1);      $doc->manakai_is_html (1);
5486    
5487        ## F2. Propagate quirkness flag
5488        my $node_doc = $node->owner_document;
5489        $doc->manakai_compat_mode ($node_doc->manakai_compat_mode);
5490    
5491        ## F3. Create an HTML parser
5492      my $p = $class->new;      my $p = $class->new;
5493      $p->{document} = $doc;      $p->{document} = $doc;
5494    
# Line 5599  sub set_inner_html ($$$$;$) { Line 5616  sub set_inner_html ($$$$;$) {
5616      $p->_initialize_tokenizer;      $p->_initialize_tokenizer;
5617      $p->_initialize_tree_constructor;      $p->_initialize_tree_constructor;
5618    
5619      ## Step 2      ## F4. If /context/ is not undef...
5620    
5621        ## F4.1. content model flag
5622      my $node_ln = $node->manakai_local_name;      my $node_ln = $node->manakai_local_name;
5623      $p->{content_model} = {      $p->{content_model} = {
5624        title => RCDATA_CONTENT_MODEL,        title => RCDATA_CONTENT_MODEL,
# Line 5619  sub set_inner_html ($$$$;$) { Line 5638  sub set_inner_html ($$$$;$) {
5638      $p->{inner_html_node} = [$node, $el_category->{$node_ln}];      $p->{inner_html_node} = [$node, $el_category->{$node_ln}];
5639        ## TODO: Foreign element OK?        ## TODO: Foreign element OK?
5640    
5641      ## Step 3      ## F4.2. Root |html| element
5642      my $root = $doc->create_element_ns      my $root = $doc->create_element_ns
5643        ('http://www.w3.org/1999/xhtml', [undef, 'html']);        ('http://www.w3.org/1999/xhtml', [undef, 'html']);
5644    
5645      ## Step 4 # MUST      ## F4.3.
5646      $doc->append_child ($root);      $doc->append_child ($root);
5647    
5648      ## Step 5 # MUST      ## F4.4.
5649      push @{$p->{open_elements}}, [$root, $el_category->{html}];      push @{$p->{open_elements}}, [$root, $el_category->{html}];
5650    
5651      undef $p->{head_element};      undef $p->{head_element};
5652      undef $p->{head_element_inserted};      undef $p->{head_element_inserted};
5653    
5654      ## Step 6 # MUST      ## F4.5.
5655      $p->_reset_insertion_mode;      $p->_reset_insertion_mode;
5656    
5657      ## Step 7 # MUST      ## F4.6.
5658      my $anode = $node;      my $anode = $node;
5659      AN: while (defined $anode) {      AN: while (defined $anode) {
5660        if ($anode->node_type == 1) {        if ($anode->node_type == 1) {
# Line 5650  sub set_inner_html ($$$$;$) { Line 5669  sub set_inner_html ($$$$;$) {
5669        }        }
5670        $anode = $anode->parent_node;        $anode = $anode->parent_node;
5671      } # AN      } # AN
5672        
5673      ## Step 9 # MUST      ## F.6. Start the parser.
5674      {      {
5675        my $self = $p;        my $self = $p;
5676        !!!next-token;        !!!next-token;
5677      }      }
5678      $p->_tree_construction_main;      $p->_tree_construction_main;
5679    
5680      ## Step 10 # MUST      ## F.7.
5681      my @cn = @{$node->child_nodes};      my @cn = @{$node->child_nodes};
5682      for (@cn) {      for (@cn) {
5683        $node->remove_child ($_);        $node->remove_child ($_);

Legend:
Removed from v.1.211  
changed lines
  Added in v.1.218

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24