/[suikacvs]/markup/html/whatpm/What/HTML.pm
Suika

Diff of /markup/html/whatpm/What/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.8 by wakaba, Tue May 1 06:22:12 2007 UTC revision 1.9 by wakaba, Tue May 1 07:46:42 2007 UTC
# Line 2  package What::HTML; Line 2  package What::HTML;
2  use strict;  use strict;
3  our $VERSION=do{my @r=(q$Revision$=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};  our $VERSION=do{my @r=(q$Revision$=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4    
5  ## This is a very, very early version of an HTML parser.  ## This is an early version of an HTML parser.
6    
7  my $permitted_slash_tag_name = {  my $permitted_slash_tag_name = {
8    base => 1,    base => 1,
# Line 302  my $formatting_category = { Line 302  my $formatting_category = {
302  };  };
303  # $phrasing_category: all other elements  # $phrasing_category: all other elements
304    
305    sub parse_string ($$$;$) {
306      my $self = shift->new;
307      my $s = \$_[0];
308      $self->{document} = $_[1];
309    
310      my $i;
311      my $i = 0;
312      $self->{set_next_input_character} = sub {
313        my $self = shift;
314        $self->{next_input_character} = -1 and return if $i >= length $$s;
315        $self->{next_input_character} = ord substr $$s, $i++, 1;
316        
317        if ($self->{next_input_character} == 0x000D) { # CR
318          if ($i >= length $$s) {
319            #
320          } else {
321            my $next_char = ord substr $$s, $i++, 1;
322            if ($next_char == 0x000A) { # LF
323              #
324            } else {
325              push @{$self->{char}}, $next_char;
326            }
327          }
328          $self->{next_input_character} = 0x000A; # LF # MUST
329        } elsif ($self->{next_input_character} > 0x10FFFF) {
330          $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
331        } elsif ($self->{next_input_character} == 0x0000) { # NULL
332          $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
333        }
334      };
335    
336      $self->{parse_error} = $_[2] || sub {
337        warn "Parse error at character $i\n"; ## TODO: Report (line, column) pair
338      };
339    
340      $self->_initialize_tokenizer;
341      $self->_initialize_tree_constructor;
342      $self->_construct_tree;
343      $self->_terminate_tree_constructor;
344    
345      return $self->{document};
346    } # parse_string
347    
348  sub new ($) {  sub new ($) {
349    my $class = shift;    my $class = shift;
350    my $self = bless {}, $class;    my $self = bless {}, $class;
# Line 2081  sub _tokenize_attempt_to_consume_an_enti Line 2124  sub _tokenize_attempt_to_consume_an_enti
2124    
2125  sub _initialize_tree_constructor ($) {  sub _initialize_tree_constructor ($) {
2126    my $self = shift;    my $self = shift;
2127    require What::NanoDOM;    ## NOTE: $self->{document} MUST be specified before this method is called
   $self->{document} = What::NanoDOM::Document->new;  
2128    $self->{document}->strict_error_checking (0);    $self->{document}->strict_error_checking (0);
2129    ## TODO: Turn mutation events off # MUST    ## TODO: Turn mutation events off # MUST
2130    ## TODO: Turn loose Document option (manakai extension) on    ## TODO: Turn loose Document option (manakai extension) on
2131      ## TODO: Mark the Document as an HTML document # MUST
2132  } # _initialize_tree_constructor  } # _initialize_tree_constructor
2133    
2134  sub _terminate_tree_constructor ($) {  sub _terminate_tree_constructor ($) {
# Line 2565  sub _construct_tree ($) { Line 2608  sub _construct_tree ($) {
2608        } elsif ({        } elsif ({
2609                  base => 1, link => 1, meta => 1,                  base => 1, link => 1, meta => 1,
2610                 }->{$token->{tag_name}}) {                 }->{$token->{tag_name}}) {
2611          $self->{parse_error}->();          $self->{parse_error}-> ($token->{tag_name}.' in body');
2612          ## NOTE: This is an "as if in head" code clone          ## NOTE: This is an "as if in head" code clone
2613          my $el;          my $el;
2614                    
# Line 2583  sub _construct_tree ($) { Line 2626  sub _construct_tree ($) {
2626            $insert->($el);            $insert->($el);
2627          }          }
2628                    
         ## ISSUE: Issue on magical <base> in the spec  
           
2629          $token = $self->_get_next_token;          $token = $self->_get_next_token;
2630          return;          return;
2631        } elsif ($token->{tag_name} eq 'title') {        } elsif ($token->{tag_name} eq 'title') {
2632            $self->{parse_error}-> ('title in body');
2633          ## NOTE: There is an "as if in head" code clone          ## NOTE: There is an "as if in head" code clone
2634          my $title_el;          my $title_el;
2635                    
# Line 3833  sub _construct_tree ($) { Line 3875  sub _construct_tree ($) {
3875                
3876                (defined $head_element ? $head_element : $open_elements->[-1]->[0])                (defined $head_element ? $head_element : $open_elements->[-1]->[0])
3877                  ->append_child ($el);                  ->append_child ($el);
                 
               ## ISSUE: Issue on magical <base> in the spec  
3878    
3879                $token = $self->_get_next_token;                $token = $self->_get_next_token;
3880                redo B;                redo B;
# Line 5502  sub _construct_tree ($) { Line 5542  sub _construct_tree ($) {
5542              #              #
5543            }            }
5544    
5545            $self->{parse_error}->();            $self->{parse_error}-> ('data after body');
5546            $insertion_mode = 'in body';            $insertion_mode = 'in body';
5547            ## reprocess            ## reprocess
5548            redo B;            redo B;
# Line 5698  sub _construct_tree ($) { Line 5738  sub _construct_tree ($) {
5738    ## TODO: script stuffs    ## TODO: script stuffs
5739  } # _construct_tree  } # _construct_tree
5740    
5741  sub inner_html ($$$) {  sub get_inner_html ($$$) {
5742    my ($class, $node, $on_error) = @_;    my ($class, $node, $on_error) = @_;
5743    
5744    ## Step 1    ## Step 1
# Line 5786  sub inner_html ($$$) { Line 5826  sub inner_html ($$$) {
5826      } elsif ($nt == 5) { # entrefs      } elsif ($nt == 5) { # entrefs
5827        push @node, @{$child->child_nodes};        push @node, @{$child->child_nodes};
5828      } else {      } else {
5829        $on_error->($child);        $on_error->($child) if defined $on_error;
5830      }      }
5831        ## ISSUE: This code does not support PIs.
5832    } # C    } # C
5833        
5834    ## Step 3    ## Step 3
5835    return \$s;    return \$s;
5836  } # inner_html  } # get_inner_html
5837    
5838  1;  1;
5839  # $Date$  # $Date$

Legend:
Removed from v.1.8  
changed lines
  Added in v.1.9

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24