/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Diff of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.83 by wakaba, Wed Mar 5 13:07:02 2008 UTC revision 1.84 by wakaba, Thu Mar 6 15:23:17 2008 UTC
# Line 304  sub ROW_IMS ()        { 0b10000000 } Line 304  sub ROW_IMS ()        { 0b10000000 }
304  sub BODY_AFTER_IMS () { 0b100000000 }  sub BODY_AFTER_IMS () { 0b100000000 }
305  sub FRAME_IMS ()      { 0b1000000000 }  sub FRAME_IMS ()      { 0b1000000000 }
306    
307    ## NOTE: "initial" and "before html" insertion modes have no constants.
308    
309    ## NOTE: "after after body" insertion mode.
310  sub AFTER_HTML_BODY_IM () { AFTER_HTML_IMS | BODY_AFTER_IMS }  sub AFTER_HTML_BODY_IM () { AFTER_HTML_IMS | BODY_AFTER_IMS }
311    
312    ## NOTE: "after after frameset" insertion mode.
313  sub AFTER_HTML_FRAMESET_IM () { AFTER_HTML_IMS | FRAME_IMS }  sub AFTER_HTML_FRAMESET_IM () { AFTER_HTML_IMS | FRAME_IMS }
314    
315  sub IN_HEAD_IM () { HEAD_IMS | 0b00 }  sub IN_HEAD_IM () { HEAD_IMS | 0b00 }
316  sub IN_HEAD_NOSCRIPT_IM () { HEAD_IMS | 0b01 }  sub IN_HEAD_NOSCRIPT_IM () { HEAD_IMS | 0b01 }
317  sub AFTER_HEAD_IM () { HEAD_IMS | 0b10 }  sub AFTER_HEAD_IM () { HEAD_IMS | 0b10 }
# Line 2420  sub _construct_tree ($) { Line 2426  sub _construct_tree ($) {
2426        
2427    !!!next-token;    !!!next-token;
2428    
   $self->{insertion_mode} = BEFORE_HEAD_IM;  
2429    undef $self->{form_element};    undef $self->{form_element};
2430    undef $self->{head_element};    undef $self->{head_element};
2431    $self->{open_elements} = [];    $self->{open_elements} = [];
2432    undef $self->{inner_html_node};    undef $self->{inner_html_node};
2433    
2434      ## NOTE: The "initial" insertion mode.
2435    $self->_tree_construction_initial; # MUST    $self->_tree_construction_initial; # MUST
2436    
2437      ## NOTE: The "before html" insertion mode.
2438    $self->_tree_construction_root_element;    $self->_tree_construction_root_element;
2439      $self->{insertion_mode} = BEFORE_HEAD_IM;
2440    
2441      ## NOTE: The "before head" insertion mode and so on.
2442    $self->_tree_construction_main;    $self->_tree_construction_main;
2443  } # _construct_tree  } # _construct_tree
2444    
2445  sub _tree_construction_initial ($) {  sub _tree_construction_initial ($) {
2446    my $self = shift;    my $self = shift;
2447    
2448      ## NOTE: "initial" insertion mode
2449    
2450    INITIAL: {    INITIAL: {
2451      if ($token->{type} == DOCTYPE_TOKEN) {      if ($token->{type} == DOCTYPE_TOKEN) {
2452        ## NOTE: Conformance checkers MAY, instead of reporting "not HTML5"        ## NOTE: Conformance checkers MAY, instead of reporting "not HTML5"
# Line 2579  sub _tree_construction_initial ($) { Line 2593  sub _tree_construction_initial ($) {
2593          !!!cp ('t13');          !!!cp ('t13');
2594        }        }
2595                
2596        ## Go to the root element phase.        ## Go to the "before html" insertion mode.
2597        !!!next-token;        !!!next-token;
2598        return;        return;
2599      } elsif ({      } elsif ({
# Line 2590  sub _tree_construction_initial ($) { Line 2604  sub _tree_construction_initial ($) {
2604        !!!cp ('t14');        !!!cp ('t14');
2605        !!!parse-error (type => 'no DOCTYPE');        !!!parse-error (type => 'no DOCTYPE');
2606        $self->{document}->manakai_compat_mode ('quirks');        $self->{document}->manakai_compat_mode ('quirks');
2607        ## Go to the root element phase        ## Go to the "before html" insertion mode.
2608        ## reprocess        ## reprocess
2609        return;        return;
2610      } elsif ($token->{type} == CHARACTER_TOKEN) {      } elsif ($token->{type} == CHARACTER_TOKEN) {
# Line 2599  sub _tree_construction_initial ($) { Line 2613  sub _tree_construction_initial ($) {
2613    
2614          unless (length $token->{data}) {          unless (length $token->{data}) {
2615            !!!cp ('t15');            !!!cp ('t15');
2616            ## Stay in the phase            ## Stay in the insertion mode.
2617            !!!next-token;            !!!next-token;
2618            redo INITIAL;            redo INITIAL;
2619          } else {          } else {
# Line 2611  sub _tree_construction_initial ($) { Line 2625  sub _tree_construction_initial ($) {
2625    
2626        !!!parse-error (type => 'no DOCTYPE');        !!!parse-error (type => 'no DOCTYPE');
2627        $self->{document}->manakai_compat_mode ('quirks');        $self->{document}->manakai_compat_mode ('quirks');
2628        ## Go to the root element phase        ## Go to the "before html" insertion mode.
2629        ## reprocess        ## reprocess
2630        return;        return;
2631      } elsif ($token->{type} == COMMENT_TOKEN) {      } elsif ($token->{type} == COMMENT_TOKEN) {
# Line 2619  sub _tree_construction_initial ($) { Line 2633  sub _tree_construction_initial ($) {
2633        my $comment = $self->{document}->create_comment ($token->{data});        my $comment = $self->{document}->create_comment ($token->{data});
2634        $self->{document}->append_child ($comment);        $self->{document}->append_child ($comment);
2635                
2636        ## Stay in the phase.        ## Stay in the insertion mode.
2637        !!!next-token;        !!!next-token;
2638        redo INITIAL;        redo INITIAL;
2639      } else {      } else {
# Line 2632  sub _tree_construction_initial ($) { Line 2646  sub _tree_construction_initial ($) {
2646    
2647  sub _tree_construction_root_element ($) {  sub _tree_construction_root_element ($) {
2648    my $self = shift;    my $self = shift;
2649    
2650      ## NOTE: "before html" insertion mode.
2651        
2652    B: {    B: {
2653        if ($token->{type} == DOCTYPE_TOKEN) {        if ($token->{type} == DOCTYPE_TOKEN) {
2654          !!!cp ('t19');          !!!cp ('t19');
2655          !!!parse-error (type => 'in html:#DOCTYPE');          !!!parse-error (type => 'in html:#DOCTYPE');
2656          ## Ignore the token          ## Ignore the token
2657          ## Stay in the phase          ## Stay in the insertion mode.
2658          !!!next-token;          !!!next-token;
2659          redo B;          redo B;
2660        } elsif ($token->{type} == COMMENT_TOKEN) {        } elsif ($token->{type} == COMMENT_TOKEN) {
2661          !!!cp ('t20');          !!!cp ('t20');
2662          my $comment = $self->{document}->create_comment ($token->{data});          my $comment = $self->{document}->create_comment ($token->{data});
2663          $self->{document}->append_child ($comment);          $self->{document}->append_child ($comment);
2664          ## Stay in the phase          ## Stay in the insertion mode.
2665          !!!next-token;          !!!next-token;
2666          redo B;          redo B;
2667        } elsif ($token->{type} == CHARACTER_TOKEN) {        } elsif ($token->{type} == CHARACTER_TOKEN) {
# Line 2654  sub _tree_construction_root_element ($) Line 2670  sub _tree_construction_root_element ($)
2670    
2671            unless (length $token->{data}) {            unless (length $token->{data}) {
2672              !!!cp ('t21');              !!!cp ('t21');
2673              ## Stay in the phase              ## Stay in the insertion mode.
2674              !!!next-token;              !!!next-token;
2675              redo B;              redo B;
2676            } else {            } else {
# Line 2668  sub _tree_construction_root_element ($) Line 2684  sub _tree_construction_root_element ($)
2684    
2685          #          #
2686        } elsif ($token->{type} == START_TAG_TOKEN) {        } elsif ($token->{type} == START_TAG_TOKEN) {
2687          if ($token->{tag_name} eq 'html' and          if ($token->{tag_name} eq 'html') {
2688              $token->{attributes}->{manifest}) {            my $root_element;
2689            !!!cp ('t24');            !!!create-element ($root_element, $token->{tag_name}, $token->{attributes});
2690            $self->{application_cache_selection}            $self->{document}->append_child ($root_element);
2691                 ->($token->{attributes}->{manifest}->{value});            push @{$self->{open_elements}}, [$root_element, 'html'];
2692            ## ISSUE: No relative reference resolution?  
2693              if ($token->{attributes}->{manifest}) {
2694                !!!cp ('t24');
2695                $self->{application_cache_selection}
2696                    ->($token->{attributes}->{manifest}->{value});
2697                ## ISSUE: No relative reference resolution?
2698              } else {
2699                !!!cp ('t25');
2700                $self->{application_cache_selection}->(undef);
2701              }
2702    
2703              !!!next-token;
2704              return; ## Go to the "before head" insertion mode.
2705          } else {          } else {
2706            !!!cp ('t25');            !!!cp ('t25.1');
2707            $self->{application_cache_selection}->(undef);            #
2708          }          }
   
         ## ISSUE: There is an issue in the spec  
         #  
2709        } elsif ({        } elsif ({
2710                  END_TAG_TOKEN, 1,                  END_TAG_TOKEN, 1,
2711                  END_OF_FILE_TOKEN, 1,                  END_OF_FILE_TOKEN, 1,
2712                 }->{$token->{type}}) {                 }->{$token->{type}}) {
2713          !!!cp ('t26');          !!!cp ('t26');
         $self->{application_cache_selection}->(undef);  
   
         ## ISSUE: There is an issue in the spec  
2714          #          #
2715        } else {        } else {
2716          die "$0: $token->{type}: Unknown token type";          die "$0: $token->{type}: Unknown token type";
2717        }        }
2718    
2719        my $root_element; !!!create-element ($root_element, 'html');      my $root_element; !!!create-element ($root_element, 'html');
2720        $self->{document}->append_child ($root_element);      $self->{document}->append_child ($root_element);
2721        push @{$self->{open_elements}}, [$root_element, 'html'];      push @{$self->{open_elements}}, [$root_element, 'html'];
2722        ## reprocess  
2723        #redo B;      $self->{application_cache_selection}->(undef);
2724        return; ## Go to the main phase.  
2725        ## NOTE: Reprocess the token.
2726        return; ## Go to the "before head" insertion mode.
2727    
2728        ## ISSUE: There is an issue in the spec
2729    } # B    } # B
2730    
2731    die "$0: _tree_construction_root_element: This should never be reached";    die "$0: _tree_construction_root_element: This should never be reached";
# Line 3274  sub _tree_construction_main ($) { Line 3300  sub _tree_construction_main ($) {
3300               $token->{tag_name} eq 'html') {               $token->{tag_name} eq 'html') {
3301        if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {        if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
3302          !!!cp ('t79');          !!!cp ('t79');
         ## Turn into the main phase  
3303          !!!parse-error (type => 'after html:html');          !!!parse-error (type => 'after html:html');
3304          $self->{insertion_mode} = AFTER_BODY_IM;          $self->{insertion_mode} = AFTER_BODY_IM;
3305        } elsif ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {        } elsif ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
3306          !!!cp ('t80');          !!!cp ('t80');
         ## Turn into the main phase  
3307          !!!parse-error (type => 'after html:html');          !!!parse-error (type => 'after html:html');
3308          $self->{insertion_mode} = AFTER_FRAMESET_IM;          $self->{insertion_mode} = AFTER_FRAMESET_IM;
3309        } else {        } else {
3310          !!!cp ('t81');          !!!cp ('t81');
3311        }        }
3312    
3313  ## ISSUE: "aa<html>" is not a parse error.        !!!cp ('t82');
3314  ## ISSUE: "<html>" in fragment is not a parse error.        !!!parse-error (type => 'not first start tag');
       unless ($token->{first_start_tag}) {  
         !!!cp ('t82');  
         !!!parse-error (type => 'not first start tag');  
       } else {  
         !!!cp ('t83');  
       }  
3315        my $top_el = $self->{open_elements}->[0]->[0];        my $top_el = $self->{open_elements}->[0]->[0];
3316        for my $attr_name (keys %{$token->{attributes}}) {        for my $attr_name (keys %{$token->{attributes}}) {
3317          unless ($top_el->has_attribute_ns (undef, $attr_name)) {          unless ($top_el->has_attribute_ns (undef, $attr_name)) {
# Line 5125  sub _tree_construction_main ($) { Line 5143  sub _tree_construction_main ($) {
5143            !!!cp ('t301');            !!!cp ('t301');
5144            !!!parse-error (type => 'after html:#character');            !!!parse-error (type => 'after html:#character');
5145    
5146            ## Reprocess in the "main" phase, "after body" insertion mode...            ## Reprocess in the "after body" insertion mode.
5147          } else {          } else {
5148            !!!cp ('t302');            !!!cp ('t302');
5149          }          }
# Line 5141  sub _tree_construction_main ($) { Line 5159  sub _tree_construction_main ($) {
5159            !!!cp ('t303');            !!!cp ('t303');
5160            !!!parse-error (type => 'after html:'.$token->{tag_name});            !!!parse-error (type => 'after html:'.$token->{tag_name});
5161                        
5162            ## Reprocess in the "main" phase, "after body" insertion mode...            ## Reprocess in the "after body" insertion mode.
5163          } else {          } else {
5164            !!!cp ('t304');            !!!cp ('t304');
5165          }          }
# Line 5158  sub _tree_construction_main ($) { Line 5176  sub _tree_construction_main ($) {
5176            !!!parse-error (type => 'after html:/'.$token->{tag_name});            !!!parse-error (type => 'after html:/'.$token->{tag_name});
5177                        
5178            $self->{insertion_mode} = AFTER_BODY_IM;            $self->{insertion_mode} = AFTER_BODY_IM;
5179            ## Reprocess in the "main" phase, "after body" insertion mode...            ## Reprocess in the "after body" insertion mode.
5180          } else {          } else {
5181            !!!cp ('t306');            !!!cp ('t306');
5182          }          }
# Line 5212  sub _tree_construction_main ($) { Line 5230  sub _tree_construction_main ($) {
5230              !!!parse-error (type => 'after html:#character');              !!!parse-error (type => 'after html:#character');
5231    
5232              $self->{insertion_mode} = AFTER_FRAMESET_IM;              $self->{insertion_mode} = AFTER_FRAMESET_IM;
5233              ## Reprocess in the "main" phase, "after frameset"...              ## Reprocess in the "after frameset" insertion mode.
5234              !!!parse-error (type => 'after frameset:#character');              !!!parse-error (type => 'after frameset:#character');
5235            }            }
5236                        
# Line 5234  sub _tree_construction_main ($) { Line 5252  sub _tree_construction_main ($) {
5252            !!!parse-error (type => 'after html:'.$token->{tag_name});            !!!parse-error (type => 'after html:'.$token->{tag_name});
5253    
5254            $self->{insertion_mode} = AFTER_FRAMESET_IM;            $self->{insertion_mode} = AFTER_FRAMESET_IM;
5255            ## Process in the "main" phase, "after frameset" insertion mode...            ## Process in the "after frameset" insertion mode.
5256          } else {          } else {
5257            !!!cp ('t317');            !!!cp ('t317');
5258          }          }
# Line 5275  sub _tree_construction_main ($) { Line 5293  sub _tree_construction_main ($) {
5293            !!!parse-error (type => 'after html:/'.$token->{tag_name});            !!!parse-error (type => 'after html:/'.$token->{tag_name});
5294    
5295            $self->{insertion_mode} = AFTER_FRAMESET_IM;            $self->{insertion_mode} = AFTER_FRAMESET_IM;
5296            ## Process in the "main" phase, "after frameset" insertion mode...            ## Process in the "after frameset" insertion mode.
5297          } else {          } else {
5298            !!!cp ('t324');            !!!cp ('t324');
5299          }          }
# Line 6322  sub _tree_construction_main ($) { Line 6340  sub _tree_construction_main ($) {
6340      redo B;      redo B;
6341    } # B    } # B
6342    
   ## NOTE: The "trailing end" phase in HTML5 is split into  
   ## two insertion modes: "after html body" and "after html frameset".  
   ## NOTE: States in the main stage is preserved while  
   ## the parser stays in the trailing end phase. # MUST  
   
6343    ## Stop parsing # MUST    ## Stop parsing # MUST
6344        
6345    ## TODO: script stuffs    ## TODO: script stuffs
# Line 6368  sub set_inner_html ($$$) { Line 6381  sub set_inner_html ($$$) {
6381      my $p = $class->new;      my $p = $class->new;
6382      $p->{document} = $doc;      $p->{document} = $doc;
6383    
6384      ## Step 9 # MUST      ## Step 8 # MUST
6385      my $i = 0;      my $i = 0;
6386      my $line = 1;      my $line = 1;
6387      my $column = 0;      my $column = 0;
# Line 6435  sub set_inner_html ($$$) { Line 6448  sub set_inner_html ($$$) {
6448    
6449      $p->{inner_html_node} = [$node, $node_ln];      $p->{inner_html_node} = [$node, $node_ln];
6450    
6451      ## Step 4      ## Step 3
6452      my $root = $doc->create_element_ns      my $root = $doc->create_element_ns
6453        ('http://www.w3.org/1999/xhtml', [undef, 'html']);        ('http://www.w3.org/1999/xhtml', [undef, 'html']);
6454    
6455      ## Step 5 # MUST      ## Step 4 # MUST
6456      $doc->append_child ($root);      $doc->append_child ($root);
6457    
6458      ## Step 6 # MUST      ## Step 5 # MUST
6459      push @{$p->{open_elements}}, [$root, 'html'];      push @{$p->{open_elements}}, [$root, 'html'];
6460    
6461      undef $p->{head_element};      undef $p->{head_element};
6462    
6463      ## Step 7 # MUST      ## Step 6 # MUST
6464      $p->_reset_insertion_mode;      $p->_reset_insertion_mode;
6465    
6466      ## Step 8 # MUST      ## Step 7 # MUST
6467      my $anode = $node;      my $anode = $node;
6468      AN: while (defined $anode) {      AN: while (defined $anode) {
6469        if ($anode->node_type == 1) {        if ($anode->node_type == 1) {
# Line 6466  sub set_inner_html ($$$) { Line 6479  sub set_inner_html ($$$) {
6479        $anode = $anode->parent_node;        $anode = $anode->parent_node;
6480      } # AN      } # AN
6481            
6482      ## Step 3 # MUST      ## Step 9 # MUST
     ## Step 10 # MUST  
6483      {      {
6484        my $self = $p;        my $self = $p;
6485        !!!next-token;        !!!next-token;
6486      }      }
6487      $p->_tree_construction_main;      $p->_tree_construction_main;
6488    
6489      ## Step 11 # MUST      ## Step 10 # MUST
6490      my @cn = @{$node->child_nodes};      my @cn = @{$node->child_nodes};
6491      for (@cn) {      for (@cn) {
6492        $node->remove_child ($_);        $node->remove_child ($_);
6493      }      }
6494      ## ISSUE: mutation events? read-only?      ## ISSUE: mutation events? read-only?
6495    
6496      ## Step 12 # MUST      ## Step 11 # MUST
6497      @cn = @{$root->child_nodes};      @cn = @{$root->child_nodes};
6498      for (@cn) {      for (@cn) {
6499        $this_doc->adopt_node ($_);        $this_doc->adopt_node ($_);

Legend:
Removed from v.1.83  
changed lines
  Added in v.1.84

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24