Whatpm/SWML/Parser.pm

package Whatpm::SWML::Parser;
use strict;


sub new ($) {
  my $self = bless {
  }, $_[0];
  return $self;
} # new

sub parse_char_string ($$$;$) {
  my $self = shift;
  my @s = split /\x0D\x0A?|\x0A/, ref $_[0] ? ${$_[0]} : $_[0], -1;

  my $doc = $_[1];
  $doc->inner_html
      ('<html xmlns="http://www.w3.org/1999/xhtml"><head/><body/></html>');
  for ($doc,
       $doc->document_element,
       $doc->document_element->first_child,
       $doc->document_element->last_child) {
    $_->set_user_data (manakai_source_line => 1);
    $_->set_user_data (manakai_source_column => 1);
  }
  $self->{oe} = {node => $doc->document_element->last_child,
                 section_depth => 0,
                 quotation_depth => 0,
                 list_depth => 0};
  
  my $_onerror = $_[2] || sub {
    my %opt = @_;
    my $r = 'Line ' . $opt{line} . ' column ' . $opt{column} . ': ';

    if ($opt{token}) {
      $r .= 'Token ' . (defined $opt{token}->{value}
                        ? $opt{token}->{value} : $opt{token}->{type}) . ': ';
    }

    $r .= $opt{type} . ';' . $opt{level};
    
    warn $r . "\n";
  }; # $_onerror

  my $line = 0;
  my $column = 0;
  my $token;
  my @nt;

  my $onerror = sub {
    $_onerror->(line => $line, column => $column, token => $token, @_);
  }; # $onerror

  my $continuous_line;

  my $tokenize_text = sub {
    my $s = shift; # ref

    if ($$s =~ s/^\[([0-9]+)\]//) {
      push @nt, {type => ELEMENT_TOKEN,
                 local_name => 'anchor-end', namespace => $SW09_NS,
                 anchor => $1, content => '[' . $1 . ']'};
      $column += $+[0] - $-[0];
    }
    
    while (length $$s) {
      if ($$s =~ s/^\[\[#([a-z-}+)//) {
        $column = $+[0] - $-[0];
        my $t = {type => FORM_TOKEN, name => $1,
                 line => $line, column => $column};
        if ($$s =~ s/^\(([^()\\]*)\)//) {
          $t->{id} = $1;
          $column += $+[0] - $-[0];
        }
        my @param;
        while ($$s =~ s/^:/) {
          if ($$s =~ s/^'((?>[^'\\]|\\.)*)//) {
            $column += 1 + $+[0] - $-[0];
            my $n = $1;
            $n =~ tr/\\//d;
            push @param, $n;
            $column++ if $$s =~ s/\A\\\z//;
            $column++ if $$s =~ s/^'//;
          } elsif ($$s =~ s/^([^':][^:]*)//) {
            $column += 1 + $+[0] - $-[0];
            push @param, $1;
          }
        }
        $t->{parameters} = \@param;
        $column += 2 if $$s =~ s/^\]\]//;
        push @nt, $t;
      } elsif ($$s =~ s/^\[\[//) {
        push @nt, {type => INLINE_START_TAG_TOKEN};
        $column += 2;
      } elsif ($$s =~ s/^\[([A-Z]+)(?>\(([^()\\]*)\))?(?>\@[0-9A-Za-z-]*)?\[//) {
        push @nt, {type => INLINE_START_TAG_TOKEN,
                   tag_name => $1, classes => $2, language => $3,
                   line => $line, column => $column};
        $column += $+[0] - $-[0];
      } elsif ($$s =~ s/^\]\]//) {
        push @nt, {type => INLINE_END_TAG_TOKEN,
                   line => $line, column => $column};
        $column += 2;
      } elsif ($$s =~ s/^(\]?)<([0-9A-Za-z%+._-]+)://) {
        my $t = {type => $1 ? INLINE_END_TAG_TOKEN : ELEMENT_TOKEN,
                 res_scheme => $2, res_parameter => '',
                 line => $line, column => $column};
        $column += $+[0] - $-[0];

        while (length $$s) {
          if ($$s =~ s/^([^>"]+)//) {
            $t->{res_parameter} .= $1;
            $column += $+[0] - $-[0];
          } elsif ($$s =~ s/^("(?>[^"\\]|\\.)*)//) {
            $t->{res_parameter} .= $1;
            $column += $+[0] - $-[0];
            $column++ if $$s =~ s/\A\\\z//;
            $column++ if $$s =~ s/^"//;
          } else {
            last;
          }
        }

        $column++ if $$s =~ s/^>//;

        $t->{content} = $t->{res_scheme} . ':' . $t->{res_parameter};
        if ($t->{res_scheme} !~ /[A-Z]/) {
          $t->{res_parameter} = $t->{content};
          $t->{res_scheme} = 'URI';
        }
        
        if ($t->{type} == INLINE_END_TAG_TOKEN) {
          $column++ if $$s =~ s/^\]//;
        } else {
          $t->{local_name} = 'anchor-external';
          $t->{namespace} = $SW09_NS;
        }
        push @nt, $t;
      } elsif ($$s =~ s/^\]>>([0-9]+)\]//) {
        push @nt, {type => INLINE_END_TAG_TOKEN,
                   anchor => $1,
                 line => $line, column => $column};
        $column += $+[0] - $-[0];
      } elsif ($$s =~ s/^\][\x09\x20]*(?>\@([0-9a-zA-Z-]*))?\[//) {
        push @nt, {type => INLINE_MIDDLE_TAG_TOKEN,
                   language => $1,
                   line => $line, column => $column};
        $column += $+[0] - $-[0];
      } elsif ($$s =~ s/\^''('?)//) {
        push @nt, {type => $1 ? STRONG_TOKEN : EMPHASIS_TOKEN,
                   line => $line, column => $column};
        $column += $+[0] - $-[0];
      } elsif ($$s =~ s/^>>([0-9]+)//) {
        push @nt, {type => ELEMENT_TOKEN,
                   local_name => 'anchor-internal', namespace => $SW09_NS,
                   anchor => $1,
                   line => $line, column => $column};
        $column += $+[0] - $-[0];
      } elsif ($$s =~ s/^__&&//) {
        if ($$s =~ s/^(.+?)&&__//) {
          push @nt, {type => ELEMENT_TOKEN,
                     local_name => 'replace', namespace => $SW09_NS,
                     by => $1,
                     line => $line, column => $column};
          $column += 4 + $+[0] - $-[0];
        } else {
          push @nt, {type => CHARACTER_TOKEN,
                     data => '__&&',
                     line => $line, column => $column};
          $column += 4;
        }
      } elsif ($$s =~ s/^([^<>\['_]+)//) {
        push @nt, {type => CHARACTER_TOKEN, data => $1,
                   line => $line, column => $column};
        $column += $+[0] - $-[0];
      } else {
        push @nt, {type => CHARACTER_TOKEN, data => substr ($$s, 0, 1),
                   line => $line, column => $column};
        substr ($$s, 0, 1) = '';
        $column++;
      }
    }
  }; # $tokenize_text

  my $get_next_token = sub {
    if (@nt) {
      return shift @nt;
    }
    
    if (not @s) {
      return {type => END_OF_FILE_TOKEN, line => $line, column => $column};
    }
    
    my $s = shift @s;
    ($line, $column) = ($line + 1, 1);
    if ($s eq '') {
      undef $continuous_line;
      return {type => EMPTY_LINE_TOKEN, line => $line, column => $column};
    } elsif ($s =~ /^[\x09\x20]/) {
      push @nt, {type => PREFORMATTED_START_TOKEN,
                 line => $line, column => $column};
      $tokenize_text->(\$s);
      while (@s) {
        my $s = shift @s;
        ($line, $column) = ($line + 1, 1);
        if ($s eq '') {
          push @nt, {type => PREFORMATTED_END_TOKEN,
                     line => $line, column => $column};
          unshift @s, $s;
          $line--;
          last;
        } elsif ($s =~ /\A\](INS|DEL)\][\x09\x20]*\z/) {
          push @nt, {type => PREFORMATTED_END_TOKEN,
                     line => $line, column => $column};
          push @nt, {type => BLOCK_END_TAG_TOKEN, tag_name => $1,
                     line => $line, column => $column};
          last;
        } else {
          push @nt, {type => CHARACTER_TOKEN, data => "\x0A",
                     line => $line, column => $column};
          $tokenize_text->(\$s);
        }
      }
      return shift @nt;
    } elsif ($s =~ s/^(\*+)\s*//) {
      push @nt, {type => HEADING_START_TOKEN, depth => length $1,
                 line => $line, column => $column};
      $column += $+[0] - $-[0];
      $tokenize_text->(\$s);
      push @nt, {type => HEADING_END_TOKEN,
                 line => $line, column => $column};
      undef $continuous_line;
      return shift @nt;
    } elsif ($s =~ s/^([-=]+)\s*//) {
      push @nt, {type => LIST_START_TOKEN, depth => $1,
                 line => $line, column => $column};
      $column += $+[0] - $-[0];
      $tokenize_text->(\$s);
      $continuous_line = 1;
      return shift @nt;
    } elsif ($s =~ s/^:([^:]*)//) {
      my $name = $1;
      if ($s eq '') {
        push @nt, {type => CHARACTER_TOKEN, data => ':',
                   line => $line, column => $column};
        $column++;
        $tokenize_text->(\$name);
      } else {
        my $real_column = $column + 1 + length $name;
        push @nt, {type => LABELED_LIST_START_TOKEN,
                   line => $line, column => $column};
        $name =~ s/\A[\x09\x20]*//;
        $column += 1 + $+[0] - $-[0];
        $name =~ s/[\x09\x20]+\z//;
        $tokenize_text->(\$s);
        $column = $real_column;
        push @nt, {type => LABELED_LIST_MIDDLE_TOKEN,
                   line => $line, column => $column};
        $column += $+[0] - $-[0] if $data =~ s/^:[\x09\x20]*//;
        $tokenize_text->(\$s);
      }
      $continuous_line = 1;
      return shift @nt;
    } elsif ($s =~ s/^(>+)//) {
      my $depth = length $1;
      if ($depth == 2 and $s =~ /^[0-9]/) {
        push @nt, {type => CHARACTER_TOKEN, data => "\x0A",
                   line => $line, column => $column}
            if $continuous_line;
        $s = '>>' . $s;
        $tokenize_text->(\$s);
      } else {
        push @nt, {type => QUOTATION_START_TOKEN, depth => $depth,
                   line => $line, column => $column};
        $column += $depth;
        $column += $+[0] - $-[0] if $s =~ s/^[\x09\x20]+//;
        if ($s =~ s/^\@\@[\x09\x20]*//) {
          push @nt, {type => EDITORIAL_NOTE_START_TOKEN,
                     line => $line, column => $column};
          $column += $+[0] - $-[0];
        } elsif ($s =~ s/^;;[\x09\x20]*//) {
          push @nt, {type => COMMENT_PARAGRAPH_START_TOKEN,
                     line => $line, column => $column};
          $column += $+[0] - $-[0];
        }
        $tokenize_text->(\$s);
      }
      $continuous_line = 1;
      return shift @nt;
    } elsif ($s =~ /\A\[(INS|DEL)(?>\(([^()\\]*)\))?\[[\x09\x20]*\z/) {
      undef $continuous_line;
      return {type => BLOCK_START_TAG_TOKEN, tag_name => $1,
              classes => $2,
              line => $line, column => $column};
    } elsif ($s =~ /\A\[PRE(?>\(([^()\\]*)\))?\[[\x09\x20]*\z/) {
      undef $continuous_line;
      push @nt, {type => BLOCK_START_TAG_TOKEN, tag_name => 'PRE',
                 classes => $1,
                 line => $line, column => $column};
      while (@s) {
        my $s = shift @s;
        ($line, $column) = ($line + 1, 1);
        if ($s =~ /\A\]PRE\][\x09\x20]*\z/) {
          push @nt, {type => BLOCK_END_TAG_TOKEN, tag_name => 'PRE',
                     line => $line, column => $column};
          undef $continuous_line;
          break;
        } else {
          push @nt, {type => CHARACTER_TOKEN, data => "\x0A",
                     line => $line, column => $column}
              if $continuous_line;
          $tokenize_text->(\$s);
          $continuous_line = 1;
        }
      }
      return shift @nt;
    } elsif ($s =~ s/^\@\@[\x09\x20]*//) {
      push @nt, {type => EDITORIAL_NOTE_START_TOKEN,
                 line => $line, column => $column};
      $column += $+[0] - $-[0];
      $tokenize_text->(\$s);
      $continuous_line = 1;
      return shift @nt;
    } elsif ($s =~ s/^;;[\x09\x20]*//) {
      push @nt, {type => COMMENT_PARAGRAPH_START_TOKEN,
                 line => $line, column => $column};
      $column += $+[0] - $-[0];
      $tokenize_text->(\$s);
      undef $continuous_line;
      return shift @nt;
    } elsif ($s =~ /\A\](INS|DEL)\][\x09\x20]*\z/) {
      $continuous_line = 1;
      return {type => BLOCK_END_TAG_TOKEN, tag_name => $1,
              line => $line, column => $column};
    } elsif ($s =~ /^,/) {
      push @nt, {type => TABLE_ROW_START_TOKEN,
                 line => $line, column => $column};
      while ($s =~ s/^,[\x09\x20]*//) {
        $column += $+[0] - $-[0];
        my $cell;
        my $cell_quoted;
        my $column_quoted = $column;
        my $column_cell = $column;
        if ($s =~ s/^"//) {
          $s =~ s/^((?>[^"\\]|\\.)*)//;
          $cell_quoted = $1;
          $column += 1 + length $cell_quoted;
          $cell_quoted =~ tr/\\//d;
          $column++ if $s =~ s/\A\\\z//;
          $column++ if $s =~ s/^"//;
        }
        if ($s =~ s/^([^,]+)//) {
          $cell = $1;
          $column += length $cell;
          $cell =~ s/[\x09\x20]+\z//;
        }
        if (not defined $cell_quoted and defined $cell and
            $cell eq '==') {
          push @nt, {type => TABLE_COLSPAN_CELL_TOKEN,
                     line => $line, column => $column_cell};
        } else {
          push @nt, {type => TABLE_CELL_START_TOKEN,
                     line => $line,
                     column => defined $column_quoted ? $column_quoted: $column_cell};
          my $real_column = $column;
          $column = $column_quoted + 1;
          $tokenize_text->(\$cell_quoted) if defined $cell_quoted;
              ## NOTE: When a quoted-pair is used, column numbers
              ## reported in this $tokenize_text call might be wrong.
          $column = $column_cell;
          $tokenize_text->(\$cell) if defined $cell;
          $column = $column_quoted;
          push @nt, {type => TABLE_CELL_END_TOKEN,
                     line => $line,
                     column => $column};
        }
      }
      push @nt, {type => TABLE_ROW_END_TOKEN,
                 line => $line, column => $column};
      undef $continuous_line;
      return shift @nt;
    } elsif ($s eq '__IMAGE__') {
      my $image = $doc->create_element_ns ($NS_SW09, 'image');
      $_->set_user_data (manakai_source_line => $line);
      $_->set_user_data (manakai_source_column => 1);
      $image->text_content (join "\x0A", $s, @s);
      ($line, $column) = ($line + @s, 1);
      @s = ();
      $doc->document_element->append_child ($image);
      return {type => END_OF_FILE_TOKEN,
              line => $line, column => $column};
    } else {
      push @nt, {type => CHARACTER_TOKEN, data => "\x0A",
                 line => $line, column => $column} if $continuous_line;
      $tokenize_text->(\$s);
      $continuous_line = 1;
      return shift @nt;
    }
  }; # $get_next_token_body

  ## NOTE: The "initial" mode.
  if (@s and $s[0] =~ /^#\?/) {
    ## NOTE: "Parse a magic line".

    my $s = shift @s;

    ## TODO:...

    $line = 2;
    $column = 0;
  }

  ## NOTE: Switched to the "body" mode.
  $get_next_token->();

} # parse_char_string

1;
1	wakaba	1.1	package Whatpm::SWML::Parser;
2			use strict;
3
4
5			sub new ($) {
6			my $self = bless {
7			}, $_[0];
8			return $self;
9			} # new
10
11			sub parse_char_string ($$$;$) {
12			my $self = shift;
13			my @s = split /\x0D\x0A?\|\x0A/, ref $_[0] ? ${$_[0]} : $_[0], -1;
14
15			my $doc = $_[1];
16			$doc->inner_html
17			('<html xmlns="http://www.w3.org/1999/xhtml"><head/><body/></html>');
18			for ($doc,
19			$doc->document_element,
20			$doc->document_element->first_child,
21			$doc->document_element->last_child) {
22			$_->set_user_data (manakai_source_line => 1);
23			$_->set_user_data (manakai_source_column => 1);
24			}
25			$self->{oe} = {node => $doc->document_element->last_child,
26			section_depth => 0,
27			quotation_depth => 0,
28			list_depth => 0};
29
30			my $_onerror = $_[2] \|\| sub {
31			my %opt = @_;
32			my $r = 'Line ' . $opt{line} . ' column ' . $opt{column} . ': ';
33
34			if ($opt{token}) {
35			$r .= 'Token ' . (defined $opt{token}->{value}
36			? $opt{token}->{value} : $opt{token}->{type}) . ': ';
37			}
38
39			$r .= $opt{type} . ';' . $opt{level};
40
41			warn $r . "\n";
42			}; # $_onerror
43
44			my $line = 0;
45			my $column = 0;
46			my $token;
47			my @nt;
48
49			my $onerror = sub {
50			$_onerror->(line => $line, column => $column, token => $token, @_);
51			}; # $onerror
52
53			my $continuous_line;
54
55			my $tokenize_text = sub {
56			my $s = shift; # ref
57
58			if ($$s =~ s/^\[([0-9]+)\]//) {
59			push @nt, {type => ELEMENT_TOKEN,
60			local_name => 'anchor-end', namespace => $SW09_NS,
61			anchor => $1, content => '[' . $1 . ']'};
62			$column += $+[0] - $-[0];
63			}
64
65			while (length $$s) {
66			if ($$s =~ s/^\[\[#([a-z-}+)//) {
67			$column = $+[0] - $-[0];
68			my $t = {type => FORM_TOKEN, name => $1,
69			line => $line, column => $column};
70			if ($$s =~ s/^\(([^()\\]*)\)//) {
71			$t->{id} = $1;
72			$column += $+[0] - $-[0];
73			}
74			my @param;
75			while ($$s =~ s/^:/) {
76			if ($$s =~ s/^'((?>[^'\\]\|\\.)*)//) {
77			$column += 1 + $+[0] - $-[0];
78			my $n = $1;
79			$n =~ tr/\\//d;
80			push @param, $n;
81			$column++ if $$s =~ s/\A\\\z//;
82			$column++ if $$s =~ s/^'//;
83			} elsif ($$s =~ s/^([^':][^:]*)//) {
84			$column += 1 + $+[0] - $-[0];
85			push @param, $1;
86			}
87			}
88			$t->{parameters} = \@param;
89			$column += 2 if $$s =~ s/^\]\]//;
90			push @nt, $t;
91			} elsif ($$s =~ s/^\[\[//) {
92			push @nt, {type => INLINE_START_TAG_TOKEN};
93			$column += 2;
94			} elsif ($$s =~ s/^\[([A-Z]+)(?>\(([^()\\])\))?(?>\@[0-9A-Za-z-])?\[//) {
95			push @nt, {type => INLINE_START_TAG_TOKEN,
96			tag_name => $1, classes => $2, language => $3,
97			line => $line, column => $column};
98			$column += $+[0] - $-[0];
99			} elsif ($$s =~ s/^\]\]//) {
100			push @nt, {type => INLINE_END_TAG_TOKEN,
101			line => $line, column => $column};
102			$column += 2;
103			} elsif ($$s =~ s/^(\]?)<([0-9A-Za-z%+._-]+)://) {
104			my $t = {type => $1 ? INLINE_END_TAG_TOKEN : ELEMENT_TOKEN,
105			res_scheme => $2, res_parameter => '',
106			line => $line, column => $column};
107			$column += $+[0] - $-[0];
108
109			while (length $$s) {
110			if ($$s =~ s/^([^>"]+)//) {
111			$t->{res_parameter} .= $1;
112			$column += $+[0] - $-[0];
113			} elsif ($$s =~ s/^("(?>[^"\\]\|\\.)*)//) {
114			$t->{res_parameter} .= $1;
115			$column += $+[0] - $-[0];
116			$column++ if $$s =~ s/\A\\\z//;
117			$column++ if $$s =~ s/^"//;
118			} else {
119			last;
120			}
121			}
122
123			$column++ if $$s =~ s/^>//;
124
125			$t->{content} = $t->{res_scheme} . ':' . $t->{res_parameter};
126			if ($t->{res_scheme} !~ /[A-Z]/) {
127			$t->{res_parameter} = $t->{content};
128			$t->{res_scheme} = 'URI';
129			}
130
131			if ($t->{type} == INLINE_END_TAG_TOKEN) {
132			$column++ if $$s =~ s/^\]//;
133			} else {
134			$t->{local_name} = 'anchor-external';
135			$t->{namespace} = $SW09_NS;
136			}
137			push @nt, $t;
138			} elsif ($$s =~ s/^\]>>([0-9]+)\]//) {
139			push @nt, {type => INLINE_END_TAG_TOKEN,
140			anchor => $1,
141			line => $line, column => $column};
142			$column += $+[0] - $-[0];
143			} elsif ($$s =~ s/^\][\x09\x20](?>\@([0-9a-zA-Z-]))?\[//) {
144			push @nt, {type => INLINE_MIDDLE_TAG_TOKEN,
145			language => $1,
146			line => $line, column => $column};
147			$column += $+[0] - $-[0];
148			} elsif ($$s =~ s/\^''('?)//) {
149			push @nt, {type => $1 ? STRONG_TOKEN : EMPHASIS_TOKEN,
150			line => $line, column => $column};
151			$column += $+[0] - $-[0];
152			} elsif ($$s =~ s/^>>([0-9]+)//) {
153			push @nt, {type => ELEMENT_TOKEN,
154			local_name => 'anchor-internal', namespace => $SW09_NS,
155			anchor => $1,
156			line => $line, column => $column};
157			$column += $+[0] - $-[0];
158			} elsif ($$s =~ s/^__&&//) {
159			if ($$s =~ s/^(.+?)&&__//) {
160			push @nt, {type => ELEMENT_TOKEN,
161			local_name => 'replace', namespace => $SW09_NS,
162			by => $1,
163			line => $line, column => $column};
164			$column += 4 + $+[0] - $-[0];
165			} else {
166			push @nt, {type => CHARACTER_TOKEN,
167			data => '__&&',
168			line => $line, column => $column};
169			$column += 4;
170			}
171			} elsif ($$s =~ s/^([^<>\['_]+)//) {
172			push @nt, {type => CHARACTER_TOKEN, data => $1,
173			line => $line, column => $column};
174			$column += $+[0] - $-[0];
175			} else {
176			push @nt, {type => CHARACTER_TOKEN, data => substr ($$s, 0, 1),
177			line => $line, column => $column};
178			substr ($$s, 0, 1) = '';
179			$column++;
180			}
181			}
182			}; # $tokenize_text
183
184			my $get_next_token = sub {
185			if (@nt) {
186			return shift @nt;
187			}
188
189			if (not @s) {
190			return {type => END_OF_FILE_TOKEN, line => $line, column => $column};
191			}
192
193			my $s = shift @s;
194			($line, $column) = ($line + 1, 1);
195			if ($s eq '') {
196			undef $continuous_line;
197			return {type => EMPTY_LINE_TOKEN, line => $line, column => $column};
198			} elsif ($s =~ /^[\x09\x20]/) {
199			push @nt, {type => PREFORMATTED_START_TOKEN,
200			line => $line, column => $column};
201			$tokenize_text->(\$s);
202			while (@s) {
203			my $s = shift @s;
204			($line, $column) = ($line + 1, 1);
205			if ($s eq '') {
206			push @nt, {type => PREFORMATTED_END_TOKEN,
207			line => $line, column => $column};
208			unshift @s, $s;
209			$line--;
210			last;
211			} elsif ($s =~ /\A\](INS\|DEL)\][\x09\x20]*\z/) {
212			push @nt, {type => PREFORMATTED_END_TOKEN,
213			line => $line, column => $column};
214			push @nt, {type => BLOCK_END_TAG_TOKEN, tag_name => $1,
215			line => $line, column => $column};
216			last;
217			} else {
218			push @nt, {type => CHARACTER_TOKEN, data => "\x0A",
219			line => $line, column => $column};
220			$tokenize_text->(\$s);
221			}
222			}
223			return shift @nt;
224			} elsif ($s =~ s/^(\+)\s//) {
225			push @nt, {type => HEADING_START_TOKEN, depth => length $1,
226			line => $line, column => $column};
227			$column += $+[0] - $-[0];
228			$tokenize_text->(\$s);
229			push @nt, {type => HEADING_END_TOKEN,
230			line => $line, column => $column};
231			undef $continuous_line;
232			return shift @nt;
233			} elsif ($s =~ s/^([-=]+)\s*//) {
234			push @nt, {type => LIST_START_TOKEN, depth => $1,
235			line => $line, column => $column};
236			$column += $+[0] - $-[0];
237			$tokenize_text->(\$s);
238			$continuous_line = 1;
239			return shift @nt;
240			} elsif ($s =~ s/^:([^:]*)//) {
241			my $name = $1;
242			if ($s eq '') {
243			push @nt, {type => CHARACTER_TOKEN, data => ':',
244			line => $line, column => $column};
245			$column++;
246			$tokenize_text->(\$name);
247			} else {
248			my $real_column = $column + 1 + length $name;
249			push @nt, {type => LABELED_LIST_START_TOKEN,
250			line => $line, column => $column};
251			$name =~ s/\A[\x09\x20]*//;
252			$column += 1 + $+[0] - $-[0];
253			$name =~ s/[\x09\x20]+\z//;
254			$tokenize_text->(\$s);
255			$column = $real_column;
256			push @nt, {type => LABELED_LIST_MIDDLE_TOKEN,
257			line => $line, column => $column};
258			$column += $+[0] - $-[0] if $data =~ s/^:[\x09\x20]*//;
259			$tokenize_text->(\$s);
260			}
261			$continuous_line = 1;
262			return shift @nt;
263			} elsif ($s =~ s/^(>+)//) {
264			my $depth = length $1;
265			if ($depth == 2 and $s =~ /^[0-9]/) {
266			push @nt, {type => CHARACTER_TOKEN, data => "\x0A",
267			line => $line, column => $column}
268			if $continuous_line;
269			$s = '>>' . $s;
270			$tokenize_text->(\$s);
271			} else {
272			push @nt, {type => QUOTATION_START_TOKEN, depth => $depth,
273			line => $line, column => $column};
274			$column += $depth;
275			$column += $+[0] - $-[0] if $s =~ s/^[\x09\x20]+//;
276			if ($s =~ s/^\@\@[\x09\x20]*//) {
277			push @nt, {type => EDITORIAL_NOTE_START_TOKEN,
278			line => $line, column => $column};
279			$column += $+[0] - $-[0];
280			} elsif ($s =~ s/^;;[\x09\x20]*//) {
281			push @nt, {type => COMMENT_PARAGRAPH_START_TOKEN,
282			line => $line, column => $column};
283			$column += $+[0] - $-[0];
284			}
285			$tokenize_text->(\$s);
286			}
287			$continuous_line = 1;
288			return shift @nt;
289			} elsif ($s =~ /\A\[(INS\|DEL)(?>\(([^()\\])\))?\[[\x09\x20]\z/) {
290			undef $continuous_line;
291			return {type => BLOCK_START_TAG_TOKEN, tag_name => $1,
292			classes => $2,
293			line => $line, column => $column};
294			} elsif ($s =~ /\A\[PRE(?>\(([^()\\])\))?\[[\x09\x20]\z/) {
295			undef $continuous_line;
296			push @nt, {type => BLOCK_START_TAG_TOKEN, tag_name => 'PRE',
297			classes => $1,
298			line => $line, column => $column};
299			while (@s) {
300			my $s = shift @s;
301			($line, $column) = ($line + 1, 1);
302			if ($s =~ /\A\]PRE\][\x09\x20]*\z/) {
303			push @nt, {type => BLOCK_END_TAG_TOKEN, tag_name => 'PRE',
304			line => $line, column => $column};
305			undef $continuous_line;
306			break;
307			} else {
308			push @nt, {type => CHARACTER_TOKEN, data => "\x0A",
309			line => $line, column => $column}
310			if $continuous_line;
311			$tokenize_text->(\$s);
312			$continuous_line = 1;
313			}
314			}
315			return shift @nt;
316			} elsif ($s =~ s/^\@\@[\x09\x20]*//) {
317			push @nt, {type => EDITORIAL_NOTE_START_TOKEN,
318			line => $line, column => $column};
319			$column += $+[0] - $-[0];
320			$tokenize_text->(\$s);
321			$continuous_line = 1;
322			return shift @nt;
323			} elsif ($s =~ s/^;;[\x09\x20]*//) {
324			push @nt, {type => COMMENT_PARAGRAPH_START_TOKEN,
325			line => $line, column => $column};
326			$column += $+[0] - $-[0];
327			$tokenize_text->(\$s);
328			undef $continuous_line;
329			return shift @nt;
330			} elsif ($s =~ /\A\](INS\|DEL)\][\x09\x20]*\z/) {
331			$continuous_line = 1;
332			return {type => BLOCK_END_TAG_TOKEN, tag_name => $1,
333			line => $line, column => $column};
334			} elsif ($s =~ /^,/) {
335			push @nt, {type => TABLE_ROW_START_TOKEN,
336			line => $line, column => $column};
337			while ($s =~ s/^,[\x09\x20]*//) {
338			$column += $+[0] - $-[0];
339			my $cell;
340			my $cell_quoted;
341			my $column_quoted = $column;
342			my $column_cell = $column;
343			if ($s =~ s/^"//) {
344			$s =~ s/^((?>[^"\\]\|\\.)*)//;
345			$cell_quoted = $1;
346			$column += 1 + length $cell_quoted;
347			$cell_quoted =~ tr/\\//d;
348			$column++ if $s =~ s/\A\\\z//;
349			$column++ if $s =~ s/^"//;
350			}
351			if ($s =~ s/^([^,]+)//) {
352			$cell = $1;
353			$column += length $cell;
354			$cell =~ s/[\x09\x20]+\z//;
355			}
356			if (not defined $cell_quoted and defined $cell and
357			$cell eq '==') {
358			push @nt, {type => TABLE_COLSPAN_CELL_TOKEN,
359			line => $line, column => $column_cell};
360			} else {
361			push @nt, {type => TABLE_CELL_START_TOKEN,
362			line => $line,
363			column => defined $column_quoted ? $column_quoted: $column_cell};
364			my $real_column = $column;
365			$column = $column_quoted + 1;
366			$tokenize_text->(\$cell_quoted) if defined $cell_quoted;
367			## NOTE: When a quoted-pair is used, column numbers
368			## reported in this $tokenize_text call might be wrong.
369			$column = $column_cell;
370			$tokenize_text->(\$cell) if defined $cell;
371			$column = $column_quoted;
372			push @nt, {type => TABLE_CELL_END_TOKEN,
373			line => $line,
374			column => $column};
375			}
376			}
377			push @nt, {type => TABLE_ROW_END_TOKEN,
378			line => $line, column => $column};
379			undef $continuous_line;
380			return shift @nt;
381			} elsif ($s eq '__IMAGE__') {
382			my $image = $doc->create_element_ns ($NS_SW09, 'image');
383			$_->set_user_data (manakai_source_line => $line);
384			$_->set_user_data (manakai_source_column => 1);
385			$image->text_content (join "\x0A", $s, @s);
386			($line, $column) = ($line + @s, 1);
387			@s = ();
388			$doc->document_element->append_child ($image);
389			return {type => END_OF_FILE_TOKEN,
390			line => $line, column => $column};
391			} else {
392			push @nt, {type => CHARACTER_TOKEN, data => "\x0A",
393			line => $line, column => $column} if $continuous_line;
394			$tokenize_text->(\$s);
395			$continuous_line = 1;
396			return shift @nt;
397			}
398			}; # $get_next_token_body
399
400			## NOTE: The "initial" mode.
401			if (@s and $s[0] =~ /^#\?/) {
402			## NOTE: "Parse a magic line".
403
404			my $s = shift @s;
405
406			## TODO:...
407
408			$line = 2;
409			$column = 0;
410			}
411
412			## NOTE: Switched to the "body" mode.
413			$get_next_token->();
414
415			} # parse_char_string
416
417			1;