/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Diff of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.14 by wakaba, Tue Apr 29 02:50:00 2008 UTC revision 1.21 by wakaba, Sun Jun 20 03:39:12 2010 UTC
# Line 65  Line 65 
65    
66    var logIndentLevel = 0;    var logIndentLevel = 0;
67    function log (s) {    function log (s) {
68        var indent = '';
69      for (var i = 0; i < logIndentLevel; i++) {      for (var i = 0; i < logIndentLevel; i++) {
70        s = '  ' + s;        indent += '  ';
71      }      }
72        s = indent + s.replace (/\n/g, "\n" + indent);
73      document.logElement.appendChild (document.createTextNode (s + "\n"));      document.logElement.appendChild (document.createTextNode (s + "\n"));
74    } // log    } // log
75    
# Line 81  Line 83 
83        doc = new JSDocument (this);        doc = new JSDocument (this);
84        doc.manakaiIsHTML = true;        doc.manakaiIsHTML = true;
85      }      }
86        this.nextToken = [];
87      this.doc = doc;      this.doc = doc;
88      this.openElements = [doc];      this.openElements = [doc];
89      this.input = i;      this.input = i;
# Line 90  Line 93 
93    } // Parser    } // Parser
94    
95    Parser.prototype.getNextToken = function () {    Parser.prototype.getNextToken = function () {
96        if (this.nextToken.length) {
97          return this.nextToken.shift ();
98        }
99    
100      var p = this;      var p = this;
101      var i = this.input;      var i = this.input;
102      if (this.parseMode == 'cdata') {      if (this.parseMode == 'cdata') {
# Line 144  Line 151 
151      i.s = i.s.replace (/^<\/([^>]+)(?:>|$)/, function (s, e) {      i.s = i.s.replace (/^<\/([^>]+)(?:>|$)/, function (s, e) {
152        if (p.insertionPoint < s.length ||        if (p.insertionPoint < s.length ||
153            (p.insertionPoint <= s.length &&            (p.insertionPoint <= s.length &&
154             s.substring (s.length - 1, 1) != '>')) {             s.substring (s.length - 1, s.length) != '>')) {
155          token = {type: 'abort'};          token = {type: 'abort'};
156          return s;          return s;
157        }        }
# Line 156  Line 163 
163      i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) {      i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) {
164        if (p.insertionPoint < s.length ||        if (p.insertionPoint < s.length ||
165            (p.insertionPoint <= s.length &&            (p.insertionPoint <= s.length &&
166             s.substring (s.length - 1, 1) != '>')) {             s.substring (s.length - 1, s.length) != '>')) {
167          token = {type: 'abort'};          token = {type: 'abort'};
168          return s;          return s;
169        }        }
# Line 219  Line 226 
226        var token = this.getNextToken ();        var token = this.getNextToken ();
227        log ('token: ' + token.type + ' "' + token.value + '"');        log ('token: ' + token.type + ' "' + token.value + '"');
228    
229          if (this.cdataEndTagRequired) {
230            // Generic CDATA parsing algorithm
231    
232            if (token.type != 'abort') {
233              // 7.
234              if (token.type == 'end-tag' && token.value == this.endTagName) {
235                // 7.1. Ignores it.
236                //
237              } else {
238                // 7.2. Parse error.
239                log ('Parse error: no </' + this.endTagName + '>');
240                this.nextToken.unshift (token);
241              }
242              this.cdataEndTagRequired = false;
243              continue;
244            }
245          }
246    
247        if (token.type == 'start-tag') {        if (token.type == 'start-tag') {
248          if (token.value == 'script') {          if (token.value == 'script') {
249            // 1. Create an element for the token in the HTML namespace.            // 1. Create an element for the token in the HTML namespace.
# Line 255  Line 280 
280                if (!(token.type == 'end-tag' && token.value == 'script')) {                if (!(token.type == 'end-tag' && token.value == 'script')) {
281                  // 7.2. This is a parse error.                  // 7.2. This is a parse error.
282                  log ('Parse error: no </' + 'script>');                  log ('Parse error: no </' + 'script>');
283                    this.nextToken.unshift (token);
284    
285                  // 7.3. Mark the script element as "already executed".                  // 7.3. Mark the script element as "already executed".
286                  el.manakaiAlreadyExecuted = true;                  el.manakaiAlreadyExecuted = true;
# Line 269  Line 295 
295            // 8.1. If the parser were originally created for the ...            // 8.1. If the parser were originally created for the ...
296            if (this.fragmentParsingMode) {            if (this.fragmentParsingMode) {
297              // 8.2. Mark the script element as "already executed" and ...              // 8.2. Mark the script element as "already executed" and ...
298              el.alreadyExecuted = true;              el.manakaiAlreadyExecuted = true;
299              continue;              continue;
300            }            }
301    
# Line 286  Line 312 
312            oldInsertionPoint += this.insertionPoint;            oldInsertionPoint += this.insertionPoint;
313            this.setInsertionPoint (oldInsertionPoint);            this.setInsertionPoint (oldInsertionPoint);
314    
315            // 12. If there is a script that will execute as soon as ...            // 12. If there is a pending external script
316            while (this.scriptExecutedWhenParserResumes) {            while (this.pendingExternalScript) {
317              // 12.1. If the tree construction stage is being called reentrantly              // 12.1. If the tree construction stage is being called reentrantly
318              if (this.reentrant) {              if (this.reentrant) {
319                log ('parse: abort (reentrance)');                log ('parse: abort (reentrance)');
# Line 297  Line 323 
323              // 12.2. Otherwise              // 12.2. Otherwise
324              } else {              } else {
325                // 1.                // 1.
326                var script = this.scriptExecutedWhenParserResumes;                var script = this.pendingExternalScript;
327                this.scriptExecutedWhenParserResumes = null;                this.pendingExternalScript = null;
328    
329                // 2. Pause until the script has completed loading.                // 2. Pause until the script has completed loading.
330                //                //
# Line 346  Line 372 
372                // 6. Switched back to the PCDATA state.                // 6. Switched back to the PCDATA state.
373                this.parseMode = 'pcdata';                this.parseMode = 'pcdata';
374    
375                  if (token.type == 'abort') {
376                    this.cdataEndTagRequired = true;
377                    break;
378                  }
379    
380                // 7.1. If the next token is not an end tag token with ...                // 7.1. If the next token is not an end tag token with ...
381                if (!(token.type == 'end-tag' &&                if (!(token.type == 'end-tag' &&
382                      token.value == this.endTagName)) {                      token.value == this.endTagName)) {
383                  // 7.2. This is a parse error.                  // 7.2. This is a parse error.
384                  log ('Parse error: no </' + this.endTagName + '>');                  log ('Parse error: no </' + this.endTagName + '>');
385                    this.nextToken.unshift (token);
386    
387                  // 7.3. Mark the script element as "already executed".                  // 7.3. Mark the script element as "already executed".
388                  el.manakaiAlreadyExecuted = true;                  el.manakaiAlreadyExecuted = true;
# Line 489  Line 521 
521        var doc = this.ownerDocument || this;        var doc = this.ownerDocument || this;
522        var p = doc._parser;        var p = doc._parser;
523    
524        // 1. Script type        // 1.The script's type
525          //
526    
527          // 2. The cript's character encoding
528        //        //
529    
530        // 2.1. If scripting is disabled        // 3.1. If without script
531        //        //
532        // 2.2. If the script element was created by an XML ... innerHTML ...        // 2.2. If the script element was created by an XML ... innerHTML ...
533        //        //
534        // 2.3. If the user agent does not support the scripting language ...        // 2.3. If the user agent does not support the scripting language ...
535        //        //
536        // 2.4. If the script element has its "already executed" flag set        if (false) {
       if (e.manakaiAlreadyExecuted) {  
537          // 2.5. Abort these steps at this point.          // 2.5. Abort these steps at this point.
538          log ('Running a script: aborted');          log ('Running a script: aborted (noscript)');
539          logIndentLevel--;          logIndentLevel--;
540          return e;          return e;
541        }        }
542    
543        // 3. Set the element's "already executed" flag.        // 4. Set the element's "already executed" flag.
544        e.manakaiAlreadyExecuted = true;        e.manakaiAlreadyExecuted = true;
545    
546        // 4. If the element has a src attribute, then a load for ...        // 5. If the element has a src attribute, then a load for ...
547        // TODO: load an external resource        // TODO: load an external resource
548    
549        // 5. The first of the following options:        // 5. The first of the following options:
550    
       // 5.1.  
551        if (/* TODO: If the document is still being parsed && */        if (/* TODO: If the document is still being parsed && */
552            e.defer && !e.async) {            e.defer && !e.async) {
553            // 6.1.
554          p.scriptsExecutedAfterParsing.push (e);          p.scriptsExecutedAfterParsing.push (e);
555          log ('Running a script: aborted (defer)');          log ('Running a script: aborted (defer)');
556        } else if (e.async && e.src != null) {        } else if (e.async && e.src != null) {
557            // 6.2.
558          p.scriptsExecutedAsynchronously.push (e);          p.scriptsExecutedAsynchronously.push (e);
559          log ('Running a script: aborted (async src)');          log ('Running a script: aborted (async src)');
560        } else if (e.async && e.src == null &&        } else if (e.async && e.src == null &&
561                   p.scriptsExecutedAsynchronously.length > 0) {                   p.scriptsExecutedAsynchronously.length > 0) {
562            // 6.3.
563          p.scriptsExecutedAsynchronously.push (e);          p.scriptsExecutedAsynchronously.push (e);
564          log ('Running a script: aborted (async)');          log ('Running a script: aborted (async)');
         // ISSUE: What is the difference with the case above?  
565        } else if (e.src != null && e.manakaiParserInserted) {        } else if (e.src != null && e.manakaiParserInserted) {
566          if (p.scriptExecutedWhenParserResumes) {          // 6.4.
567            log ('Error: There is a script that will execute as soon as the parser resumes.');          if (p.pendingExternalScript) {
568              log ('Error: There is a pending external script.');
569          }          }
570          p.scriptExecutedWhenParserResumes = e;          p.pendingExternalScript = e;
571          log ('Running a script: aborted (src parser-inserted)');          log ('Running a script: aborted (src parser-inserted)');
572        } else if (e.src != null) {        } else if (e.src != null) {
573            // 6.5.
574          p.scriptsExecutedSoon.push (e);          p.scriptsExecutedSoon.push (e);
575          log ('Running a script: aborted (src)');          log ('Running a script: aborted (src)');
576        } else {        } else {
577            // 6.6.
578          executeScript (doc, e); // even if other scripts are already executing.          executeScript (doc, e); // even if other scripts are already executing.
579        }        }
580    
# Line 566  Line 604 
604      }      }
605    
606      // If the load was successful      // If the load was successful
     log ('load event fired at the script element');  
607    
608      if (true) {      if (true) {
609      // Scripting is enabled, Document.designMode is disabled,      // Scripting is enabled, Document.designMode is disabled,
# Line 575  Line 612 
612        parseAndRunScript (doc, s);        parseAndRunScript (doc, s);
613      }      }
614    
615        log ('load event fired at the script element');
616    
617      log ('executing a script block: end');      log ('executing a script block: end');
618    } // executeScript    } // executeScript
619    
# Line 612  Line 651 
651          doc.write.apply (doc, args);          doc.write.apply (doc, args);
652          return '';          return '';
653        });        });
654        s = s.replace (/^\s*var\s+s\s*=\s*document\.createElement\s*\(\s*['"]script['"]\s*\)\s*;\s*s\.src\s*=\s*(?:'(javascript:[^']*)'|"(javascript:[^"]*)")\s*;\s*document\.documentElement\.appendChild\s*\(\s*s\s*\)\s*;\s*/,        var noDocumentElement = false;
655          s = s.replace (/^\s*var\s+s\s*=\s*document\.createElement\s*\(\s*['"]script['"]\s*\)\s*;\s*s\.src\s*=\s*(?:'([^']*)'|"([^"]*)")\s*;\s*document\.documentElement\.appendChild\s*\(\s*s\s*\)\s*;\s*/,
656        function (s, t, u) {        function (s, t, u) {
657          matched = true;          matched = true;
658          var args = [unescapeJSLiteral (t ? t : u)];          var args = [unescapeJSLiteral (t ? t : u)];
659          doc._insertExternalScript.apply (doc, args);          noDocumentElement = !doc._insertExternalScript.apply (doc, args);
660            return '';
661          });
662          if (noDocumentElement) {
663            log ('Script error: documentElement is null');
664            break;
665          }
666          s = s.replace (/^\s*w\s*\(\s*document\.documentElement\.innerHTML\s*\)\s*;\s*/,
667          function (s, t) {
668            matched = true;
669            log (dumpTree (doc, ''));
670          return '';          return '';
671        });        });
672        if (s == '') break;        if (s == '') break;
# Line 701  Line 751 
751    }; // document.open    }; // document.open
752    
753    JSDocument.prototype.write = function () {    JSDocument.prototype.write = function () {
754        log ('document.write: start');
755      logIndentLevel++;      logIndentLevel++;
756    
757      var p = this._parser;      var p = this._parser;
# Line 720  Line 771 
771          + p.input.s.substring (p.insertionPoint, p.input.s.length);          + p.input.s.substring (p.insertionPoint, p.input.s.length);
772      p.insertionPoint += s.length;      p.insertionPoint += s.length;
773    
774      // 3. If there is a script that will execute as soon as the parser resumes      // 3. If there is a pending external script
775      if (p.scriptExecutedAfterParserResumes) {      if (p.pendingExternalScript) {
776        log ('document.write: processed later (there is an unprocessed <script src>)');        log ('document.write: processed later (there is an unprocessed <script src>)');
777        logIndentLevel--;        logIndentLevel--;
778          log ('document.write: return');
779        return;        return;
780      }      }
781    
# Line 737  Line 789 
789      // to do something here?      // to do something here?
790    
791      // 5. Return      // 5. Return
792        logIndentLevel--;
793      log ('document.write: return');      log ('document.write: return');
794    
     logIndentLevel--;  
795      return;      return;
796    }; // document.write    }; // document.write
797    
798    JSDocument.prototype._insertExternalScript = function (uri) {    JSDocument.prototype._insertExternalScript = function (uri) {
799      var s = new JSElement (this, 'script');      var s = new JSElement (this, 'script');
800      s.src = uri;      s.src = uri;
801      this.documentElement.appendChild (s);      if (this.documentElement) {
802          this.documentElement.appendChild (s);
803          return true;
804        } else {
805          return false;
806        }
807    }; // _insertExternalScript    }; // _insertExternalScript
808    
809    JSDocument.prototype.__defineGetter__ ('documentElement', function () {    JSDocument.prototype.__defineGetter__ ('documentElement', function () {
# Line 833  document.write ('aaaaaaa&lt;/p>&lt;scrip Line 890  document.write ('aaaaaaa&lt;/p>&lt;scrip
890    
891  <h2 id=notes>Notes</h2>  <h2 id=notes>Notes</h2>
892    
893  <p>This is a <em>simplified</em> implementation of  <p>This is a <em>simplified</em> implementation of <a
894  <a href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5  href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5
895  Parsing Algorithm</a>.  It only implements script-related part of the  Parsing Algorithm</a> (revision 2138).  It only implements
896  algorithm.  Especially, this parser:  scripting-related parts of the algorithm.  Especially, this parser:
897    
898  <ul>  <ul>
899  <li>Does not support <code>DOCTYPE</code> and comment tokens.  <li>Does not support <code>DOCTYPE</code> and comment tokens.
900  <li>Does not support entities except for <code>&amp;quot;</code>,  <li>Does not support entities except for <code>&amp;quot;</code>,
# Line 846  algorithm.  Especially, this parser: Line 904  algorithm.  Especially, this parser:
904  algorithm, and so on.  algorithm, and so on.
905  <li>Does not raise parse errors for invalid attribute specifications in start  <li>Does not raise parse errors for invalid attribute specifications in start
906  or end tags.  or end tags.
907  <li>Does not support PCDATA elements (<code>title</code> and  <li>Does not support RCDATA elements (<code>title</code> and
908  <code>textarea</code>).  <code>textarea</code>).
909  <li>Does not strip the first newline in <code>pre</code> elements.  <li>Does not strip the first newline in <code>pre</code>,
910    <code>listing</code>, and <code>textarea</code> elements.
911  <li>Does not support <code>&lt;!--</code>..<code>--></code> parsing rule  <li>Does not support <code>&lt;!--</code>..<code>--></code> parsing rule
912  in <code>script</code> element.  in CDATA/RCDATA elements.
913  <li>Does not support foreign (SVG or MathML) elements.  <li>Does not support foreign (SVG or MathML) elements.
914  <li>Only supports <code>script</code> <code>type</code>  <li>Only supports <code>script</code> <code>type</code>
915  <code>text/javascript</code>.  <code>type</code> and <code>language</code>  <code>text/javascript</code>.  <code>type</code> and <code>language</code>
# Line 863  introduced, followed, or separated by wh Line 922  introduced, followed, or separated by wh
922    <li><code>var s = document.createElement ("script");    <li><code>var s = document.createElement ("script");
923              s.src = "<var>string</var>";              s.src = "<var>string</var>";
924              document.documentElement.appendChild (s);</code>              document.documentElement.appendChild (s);</code>
925      <li><code>w (document.documentElement.innerHTML);</code> (This statement
926      can be used to dump the document, even when the document has no
927      document element.  The output format is the tree dump format used
928      in html5lib test data, not <abbr>HTML</abbr>.)
929    </ul>    </ul>
930  Note that strings may be delimited by <code>'</code>s instead of  Note that strings may be delimited by <code>'</code>s instead of
931  <code>"</code>s.  <code>"</code>s.
# Line 888  when it is inserted into the document, n Line 951  when it is inserted into the document, n
951  executed.  Currently it does not matter, since we don't allow dynamic  executed.  Currently it does not matter, since we don't allow dynamic
952  modification to the |src| content/DOM attribute value yet. -->  modification to the |src| content/DOM attribute value yet. -->
953    
954    <p>See also
955    <a href="http://suika.fam.cx/gate/2005/sw/Live%20Scripting%20HTML%20Parser">SuikaWiki:
956    Live Scripting HTML Parser</a>.
957    
958  </body>  </body>
959  </html>  </html>
960  <!-- $Date$ -->  <!-- $Date$ -->

Legend:
Removed from v.1.14  
changed lines
  Added in v.1.21

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24