/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Diff of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.12 by wakaba, Sun Apr 27 11:21:09 2008 UTC revision 1.14 by wakaba, Tue Apr 29 02:50:00 2008 UTC
# Line 2  Line 2 
2  <html lang=en>  <html lang=en>
3  <head>  <head>
4  <title>Live Scripting HTML Parser</title>  <title>Live Scripting HTML Parser</title>
5    <link rel=author href="http://suika.fam.cx/~wakaba/who?">
6    <link rel=license href="http://suika.fam.cx/c/gnu/gpl"
7        title="GNU GPL2 or later">
8  <style>  <style>
9    h1, h2 {    h1 {
10        margin: 0;
11        font-size: 150%;
12      }
13      h2 {
14      margin: 0;      margin: 0;
15      font-size: 100%;      font-size: 100%;
16    }    }
17    p, pre {    p {
18      margin: 0;      margin: 0 1em;
19    }    }
20    textarea {    textarea {
21      width: 100%;      width: 100%;
# Line 85  Line 92 
92    Parser.prototype.getNextToken = function () {    Parser.prototype.getNextToken = function () {
93      var p = this;      var p = this;
94      var i = this.input;      var i = this.input;
95      if (this.parseMode == 'script') {      if (this.parseMode == 'cdata') {
96          var tagName = this.endTagName;
97        var token;        var token;
98        if (p.insertionPoint <= 0) {        if (p.insertionPoint <= 0) {
99          return {type: 'abort'};          return {type: 'abort'};
# Line 103  Line 111 
111          return '';          return '';
112        });        });
113        if (token) return token;        if (token) return token;
114        i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function (s) {        var pattern = new RegExp ('^</' + tagName + '>', 'i');
115          i.s = i.s.replace (pattern, function (s) {
116          if (p.insertionPoint < s.length) {          if (p.insertionPoint < s.length) {
117            token = {type: 'abort'};            token = {type: 'abort'};
118            return s;            return s;
119          }          }
120          token = {type: 'end-tag', value: 'script'};          token = {type: 'end-tag', value: tagName};
121          p.insertionPoint -= s.length;          p.insertionPoint -= s.length;
122          return '';          return '';
123        });        });
124        if (token) return token;        if (token) return token;
125        var m;        var m;
126        if ((p.insertionPoint < '</script'.length) &&        if ((p.insertionPoint < ('</' + tagName).length) &&
127            (m = i.s.match (/^<\/([SCRIPTscript]+)/))) {            (m = i.s.match (/^<\/([A-Za-z]+)/))) {
128          var v = m[1].substring (0, p.insertionPoint).toLowerCase ();          var v = m[1].substring (0, p.insertionPoint).toLowerCase ();
129          if (v == 'script'.substring (0, p.insertionPoint - '</'.length)) {          if (v == tagName.substring (0, p.insertionPoint - '</'.length)) {
130            return {type: 'abort'};            return {type: 'abort'};
131          }          }
132        }        }
# Line 222  Line 231 
231            el.manakaiParserInserted = true;            el.manakaiParserInserted = true;
232    
233            // 3. Switch the tokeniser's content model flag to the CDATA state.            // 3. Switch the tokeniser's content model flag to the CDATA state.
234            this.parseMode = 'script';            this.parseMode = 'cdata';
235              this.endTagName = 'script';
236    
237            // 4.1. Collect all the character tokens.            // 4.1. Collect all the character tokens.
238            while (true) {            while (true) {
# Line 236  Line 246 
246              // 4.2. Until it returns a token that is not a character token, or              // 4.2. Until it returns a token that is not a character token, or
247              // until it stops tokenising.              // until it stops tokenising.
248              } else if (token.type == 'eof' ||              } else if (token.type == 'eof' ||
249                         (token.type == 'end-tag' && token.value == 'script') ||                         token.type == 'end-tag' ||
250                         token.type == 'abort') {                         token.type == 'abort') {
251                // 6. Switched back to the PCDATA state.                // 6. Switched back to the PCDATA state.
252                this.parseMode = 'pcdata';                this.parseMode = 'pcdata';
253    
254                // 7.1. If the next token is not an end tag token with ...                // 7.1. If the next token is not an end tag token with ...
255                if (token.type != 'end-tag') {                if (!(token.type == 'end-tag' && token.value == 'script')) {
256                  // 7.2. This is a parse error.                  // 7.2. This is a parse error.
257                  log ('Parse error: no </' + 'script>');                  log ('Parse error: no </' + 'script>');
258    
# Line 306  Line 316 
316                //                //
317              }              }
318            }            }
319            } else if (token.value == 'style' ||
320                       token.value == 'noscript' ||
321                       token.value == 'xmp') {
322              // 1. Create an element for the token in the HTML namespace.
323              var el = new JSElement (this.doc, token.value);
324    
325              // 2. Append the new element to the current node.
326              this.openElements[this.openElements.length - 1].appendChild (el);
327    
328              // 3. Switch the tokeniser's content model flag to the CDATA state.
329              this.parseMode = 'cdata';
330              this.endTagName = token.value;
331    
332              // 4.1. Collect all the character tokens.
333              while (true) {
334                var token = this.getNextToken ();
335                log ('token: ' + token.type + ' "' + token.value + '"');
336    
337                if (token.type == 'char') {
338                  // 5. Append a single Text node to the script element node.
339                  el.manakaiAppendText (token.value);
340    
341                // 4.2. Until it returns a token that is not a character token, or
342                // until it stops tokenising.
343                } else if (token.type == 'eof' ||
344                           token.type == 'end-tag' ||
345                           token.type == 'abort') {
346                  // 6. Switched back to the PCDATA state.
347                  this.parseMode = 'pcdata';
348    
349                  // 7.1. If the next token is not an end tag token with ...
350                  if (!(token.type == 'end-tag' &&
351                        token.value == this.endTagName)) {
352                    // 7.2. This is a parse error.
353                    log ('Parse error: no </' + this.endTagName + '>');
354    
355                    // 7.3. Mark the script element as "already executed".
356                    el.manakaiAlreadyExecuted = true;
357                  } else {
358                    // 7.4. Ignore it.
359                    //
360                  }
361                  break;
362                }
363              }
364          } else {          } else {
365            var el = new JSElement (this.doc, token.value);            var el = new JSElement (this.doc, token.value);
366            this.openElements[this.openElements.length - 1].appendChild (el);            this.openElements[this.openElements.length - 1].appendChild (el);
# Line 390  Line 445 
445    
446      log ('DOMContentLoaded event fired');      log ('DOMContentLoaded event fired');
447    
448      // "delays tha load event" things has completed:      // "delays the load event" things has completed:
449      // readyState = 'complete'      // readyState = 'complete'
450      log ('load event fired');      log ('load event fired');
451    
# Line 791  algorithm.  Especially, this parser: Line 846  algorithm.  Especially, this parser:
846  algorithm, and so on.  algorithm, and so on.
847  <li>Does not raise parse errors for invalid attribute specifications in start  <li>Does not raise parse errors for invalid attribute specifications in start
848  or end tags.  or end tags.
849  <li>Does not support CDATA/PCDATA element other than <code>script</code>.  <li>Does not support PCDATA elements (<code>title</code> and
850    <code>textarea</code>).
851    <li>Does not strip the first newline in <code>pre</code> elements.
852  <li>Does not support <code>&lt;!--</code>..<code>--></code> parsing rule  <li>Does not support <code>&lt;!--</code>..<code>--></code> parsing rule
853  in <code>script</code> element.  in <code>script</code> element.
854  <li>Does not support foreign (SVG or MathML) elements.  <li>Does not support foreign (SVG or MathML) elements.
# Line 831  when it is inserted into the document, n Line 888  when it is inserted into the document, n
888  executed.  Currently it does not matter, since we don't allow dynamic  executed.  Currently it does not matter, since we don't allow dynamic
889  modification to the |src| content/DOM attribute value yet. -->  modification to the |src| content/DOM attribute value yet. -->
890    
 <!-- TODO: license -->  
   
891  </body>  </body>
 </html>  
892    </html>
893    <!-- $Date$ -->
894    <!--
895    
896    Copyright 2008 Wakaba <w@suika.fam.cx>
897    
898    This program is free software; you can redistribute it and/or
899    modify it under the terms of the GNU General Public License
900    as published by the Free Software Foundation; either version 2
901    of the License, or (at your option) any later version.
902    
903    This program is distributed in the hope that it will be useful,
904    but WITHOUT ANY WARRANTY; without even the implied warranty of
905    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
906    GNU General Public License for more details.
907    
908    You should have received a copy of the GNU General Public License
909    along with this program; if not, write to the Free Software
910    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
911    
912    -->

Legend:
Removed from v.1.12  
changed lines
  Added in v.1.14

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24