/[suikacvs]/markup/html/scripting-parser/parser.html

Diff of /markup/html/scripting-parser/parser.html

Parent Directory | Revision Log | View Patch Patch

-revision 1.11 by wakaba,
Sun Apr 27 10:44:36 2008 UTC
+revision 1.14 by wakaba,
Tue Apr 29 02:50:00 2008 UTC
 Line 2
  <html lang=en>
  <head>
  <title>Live Scripting HTML Parser</title>
+ <link rel=author href="http://suika.fam.cx/~wakaba/who?">
+ <link rel=license href="http://suika.fam.cx/c/gnu/gpl"
+     title="GNU GPL2 or later">
  <style>
-   h1, h2 {
+   h1 {
+     margin: 0;
+     font-size: 150%;
+   }
+   h2 {
      margin: 0;
      font-size: 100%;
    }
-   p, pre {
+   p {
-     margin: 0;
+     margin: 0 1em;
    }
    textarea {
      width: 100%;
-Line 79
+Line 86
      this.input = i;
      this.scriptsExecutedAfterParsing = [];
      this.scriptsExecutedSoon = [];
+     this.scriptsExecutedAsynchronously = [];
    } // Parser
    Parser.prototype.getNextToken = function () {
      var p = this;
      var i = this.input;
-     if (this.parseMode == 'script') {
+     if (this.parseMode == 'cdata') {
+       var tagName = this.endTagName;
        var token;
        if (p.insertionPoint <= 0) {
          return {type: 'abort'};
-Line 102
+Line 111
          return '';
        });
        if (token) return token;
-       i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function (s) {
+       var pattern = new RegExp ('^</' + tagName + '>', 'i');
+       i.s = i.s.replace (pattern, function (s) {
          if (p.insertionPoint < s.length) {
            token = {type: 'abort'};
            return s;
          }
-         token = {type: 'end-tag', value: 'script'};
+         token = {type: 'end-tag', value: tagName};
          p.insertionPoint -= s.length;
          return '';
        });
        if (token) return token;
        var m;
-       if ((p.insertionPoint < '</script'.length) &&
+       if ((p.insertionPoint < ('</' + tagName).length) &&
-           (m = i.s.match (/^<\/([SCRIPTscript]+)/))) {
+           (m = i.s.match (/^<\/([A-Za-z]+)/))) {
          var v = m[1].substring (0, p.insertionPoint).toLowerCase ();
-         if (v == 'script'.substring (0, p.insertionPoint - '</'.length)) {
+         if (v == tagName.substring (0, p.insertionPoint - '</'.length)) {
            return {type: 'abort'};
          }
        }
-Line 221
+Line 231
            el.manakaiParserInserted = true;
            // 3. Switch the tokeniser's content model flag to the CDATA state.
-           this.parseMode = 'script';
+           this.parseMode = 'cdata';
+           this.endTagName = 'script';
            // 4.1. Collect all the character tokens.
            while (true) {
-Line 235
+Line 246
              // 4.2. Until it returns a token that is not a character token, or
              // until it stops tokenising.
              } else if (token.type == 'eof' ||
-                        (token.type == 'end-tag' && token.value == 'script') ||
+                        token.type == 'end-tag' ||
                         token.type == 'abort') {
                // 6. Switched back to the PCDATA state.
                this.parseMode = 'pcdata';
                // 7.1. If the next token is not an end tag token with ...
-               if (token.type != 'end-tag') {
+               if (!(token.type == 'end-tag' && token.value == 'script')) {
                  // 7.2. This is a parse error.
                  log ('Parse error: no </' + 'script>');
-Line 305
+Line 316
                //
              }
            }
+         } else if (token.value == 'style' ||
+                    token.value == 'noscript' ||
+                    token.value == 'xmp') {
+           // 1. Create an element for the token in the HTML namespace.
+           var el = new JSElement (this.doc, token.value);
+           // 2. Append the new element to the current node.
+           this.openElements[this.openElements.length - 1].appendChild (el);
+           // 3. Switch the tokeniser's content model flag to the CDATA state.
+           this.parseMode = 'cdata';
+           this.endTagName = token.value;
+           // 4.1. Collect all the character tokens.
+           while (true) {
+             var token = this.getNextToken ();
+             log ('token: ' + token.type + ' "' + token.value + '"');
+             if (token.type == 'char') {
+               // 5. Append a single Text node to the script element node.
+               el.manakaiAppendText (token.value);
+             // 4.2. Until it returns a token that is not a character token, or
+             // until it stops tokenising.
+             } else if (token.type == 'eof' ||
+                        token.type == 'end-tag' ||
+                        token.type == 'abort') {
+               // 6. Switched back to the PCDATA state.
+               this.parseMode = 'pcdata';
+               // 7.1. If the next token is not an end tag token with ...
+               if (!(token.type == 'end-tag' &&
+                     token.value == this.endTagName)) {
+                 // 7.2. This is a parse error.
+                 log ('Parse error: no </' + this.endTagName + '>');
+                 // 7.3. Mark the script element as "already executed".
+                 el.manakaiAlreadyExecuted = true;
+               } else {
+                 // 7.4. Ignore it.
+                 //
+               }
+               break;
+             }
+           }
          } else {
            var el = new JSElement (this.doc, token.value);
            this.openElements[this.openElements.length - 1].appendChild (el);
-Line 335
+Line 391
      // "When a script completes loading" rules start applying.
-     // List of scripts that will execute as soon as possible
+     while (this.scriptsExecutedSoon.length > 0 ||
-     for (var i = 0; i < this.scriptsExecutedSoon.length; i++) {
+            this.scriptsExecutedAsynchronously.length > 0) {
-       var e = this.scriptsExecutedSoon[i];
+       // Handle "list of scripts that will execute as soon as possible".
+       while (this.scriptsExecutedSoon.length > 0) {
-       // If it has completed loading
+         var e = this.scriptsExecutedSoon.shift ();
-       log ('Execute an external script not inserted by parser...');
-       executeScript (this.doc, e);
+         // If it has completed loading
+         log ('Execute an external script not inserted by parser...');
+         executeScript (this.doc, e);
+         // NOTE: It MAY be executed before the end of the parsing, according
+         // to the spec.
+         this.hasAsyncScript = true;
+       }
+       // Handle "list of scripts that will execute asynchronously".
+       while (this.scriptsExecutedAsynchronously.length > 0) {
+         var e = this.scriptsExecutedAsynchronously.shift ();
+         // Step 1.
+         // We assume that all scripts have been loaded at this time.
+         // Step 2.
+         log ('Execute an asynchronous script...');
+         executeScript (this.doc, e);
+         // Step 3.
+         //
+         // Step 4.
+         //
-       // NOTE: It MAY be executed before the end of the parsing, according
+         this.hasAsyncScript = true;
-       // to the spec.
+       }
-       this.hasAsyncScript = true;
      }
-     // TODO: Handles
-     // "list of scripts that will execute asynchronously"
      // Handle "list of scripts that will execute when the document has finished
      // parsing".
      var list = this.scriptsExecutedAfterParsing;
-Line 369
+Line 445
      log ('DOMContentLoaded event fired');
-     // "delays tha load event" things has completed:
+     // "delays the load event" things has completed:
      // readyState = 'complete'
      log ('load event fired');
-Line 444
+Line 520
          p.scriptsExecutedAfterParsing.push (e);
          log ('Running a script: aborted (defer)');
        } else if (e.async && e.src != null) {
-         // TODO
+         p.scriptsExecutedAsynchronously.push (e);
-       } else if (e.async && e.src == null
+         log ('Running a script: aborted (async src)');
-                  /* && list of scripts that will execute asynchronously is not empty */) {
+       } else if (e.async && e.src == null &&
-         // TODO
+                  p.scriptsExecutedAsynchronously.length > 0) {
+         p.scriptsExecutedAsynchronously.push (e);
+         log ('Running a script: aborted (async)');
+         // ISSUE: What is the difference with the case above?
        } else if (e.src != null && e.manakaiParserInserted) {
          if (p.scriptExecutedWhenParserResumes) {
            log ('Error: There is a script that will execute as soon as the parser resumes.');
-Line 767 
 algorithm.  Especially, this parser:
+Line 846 
 algorithm.  Especially, this parser:
  algorithm, and so on.
  <li>Does not raise parse errors for invalid attribute specifications in start
  or end tags.
- <li>Does not support CDATA/PCDATA element other than <code>script</code>.
+ <li>Does not support PCDATA elements (<code>title</code> and
+ <code>textarea</code>).
+ <li>Does not strip the first newline in <code>pre</code> elements.
  <li>Does not support <code>&lt;!--</code>..<code>--></code> parsing rule
  in <code>script</code> element.
  <li>Does not support foreign (SVG or MathML) elements.
-Line 792 
 the <abbr title="Uniform Resource Identi
+Line 873 
 the <abbr title="Uniform Resource Identi
  the regular expression <code>^javascript:\s*(?:"[^"]*"|'[^']*')\s*$</code>.
  <li>Only supports <code>\u<var>HHHH</var></code> escapes in JavaScript
  string literals.
+ <li>Does not handle <i>stop parsing</i> phase correctly if the document is
+ replaced by <code>document.open ()</code> call.  In other word, delayed
+ (deferred or asynchronous) script executions and event firings might be
+ treated in a wrong way if a <code>document.open ()</code> invocation
+ is implicitly done by <code>document.write ()</code> in a delayed script.
  </ul>
  <p>For some reason, this parser does not work in browsers that do
  not support JavaScript 1.5.
- <!-- TODO: license -->
+ <!-- TODO: |src| attribute value should refer the value at the time
+ when it is inserted into the document, not the value when the script is
+ executed.  Currently it does not matter, since we don't allow dynamic
+ modification to the |src| content/DOM attribute value yet. -->
  </body>
- </html>
+ </html>
+ <!-- $Date$ -->
+ <!--
+ Copyright 2008 Wakaba <w@suika.fam.cx>
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License
+ as published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ -->

 Legend:



Removed from v.1.11
 


changed lines


 
Added in v.1.14
 Legend:



Removed from v.1.11
 


changed lines


 
Added in v.1.14
-Removed from v.1.11
+Added in v.1.14

admin@suikawiki.org	ViewVC Help
Powered by ViewVC 1.1.24