/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Diff of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.6 by wakaba, Fri Apr 25 13:42:51 2008 UTC revision 1.12 by wakaba, Sun Apr 27 11:21:09 2008 UTC
# Line 1  Line 1 
1  <!DOCTYPE HTML>  <!DOCTYPE HTML>
2  <html lang=en>  <html lang=en>
3  <head>  <head>
4  <title>Demo of HTML5 Parsing Algorithm with Scripting Enabled</title>  <title>Live Scripting HTML Parser</title>
5  <style>  <style>
6      h1, h2 {
7        margin: 0;
8        font-size: 100%;
9      }
10      p, pre {
11        margin: 0;
12      }
13    textarea {    textarea {
14       display: block;      width: 100%;
15       width: 80%;      -width: 99%;
16       margin-left: auto;      height: 10em;
      margin-right: auto;  
      min-height: 20em;  
17    }    }
18    output {    output {
19      display: block;      display: block;
# Line 18  Line 23 
23    }    }
24  </style>  </style>
25  <script>  <script>
26      var delayedUpdater = 0;
27    
28    function update () {    function update () {
29      document.logElement.textContent = '';      if (delayedUpdater) {
30      var p = new Parser (new InputStream (document.sourceElement.value));        clearTimeout (delayedUpdater);
31      var doc = p.doc;        delayedUpdater = 0;
32      p.parse ();      }
33      log (dumpTree (doc, ''));      delayedUpdater = setTimeout (update2, 100);
34    } // update    } // update
35    
36      function update2 () {
37        var v = document.sourceElement.value;
38        if (v != document.previousSourceText) {
39          document.previousSourceText = v;
40          document.links['permalink'].href
41              = location.pathname + '?s=' + encodeURIComponent (v);
42          document.links['ldvlink'].href
43              = 'http://software.hixie.ch/utilities/js/live-dom-viewer/?'
44              + encodeURIComponent (v);
45    
46          document.logElement.textContent = '';
47          var p = new Parser (new InputStream (v));
48          var doc = p.doc;
49          p.parse ();
50          
51          log (dumpTree (doc, ''));
52          
53          if (p.hasAsyncScript) {
54            log ('Some script codes are executed asynchronously; it means that the document might be rendered in different ways depending on the network condition and other factors');
55          }
56        }
57      } // update2
58    
59    var logIndentLevel = 0;    var logIndentLevel = 0;
60    function log (s) {    function log (s) {
61      for (var i = 0; i < logIndentLevel; i++) {      for (var i = 0; i < logIndentLevel; i++) {
# Line 46  Line 76 
76      }      }
77      this.doc = doc;      this.doc = doc;
78      this.openElements = [doc];      this.openElements = [doc];
79      this.in = i;      this.input = i;
80      this.scriptsExecutedAfterParsing = [];      this.scriptsExecutedAfterParsing = [];
81        this.scriptsExecutedSoon = [];
82        this.scriptsExecutedAsynchronously = [];
83    } // Parser    } // Parser
84    
85    Parser.prototype.getNextToken = function () {    Parser.prototype.getNextToken = function () {
86      var p = this;      var p = this;
87      var i = this.in;      var i = this.input;
88      if (this.parseMode == 'script') {      if (this.parseMode == 'script') {
89        var token;        var token;
90        if (p.insertionPoint <= 0) {        if (p.insertionPoint <= 0) {
# Line 125  Line 157 
157          tagName = v.toLowerCase ();          tagName = v.toLowerCase ();
158          return '';          return '';
159        });        });
160        e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"']+)))?/,        while (true) {
161        function (x, attrName, attrValue1, attrValue2, attrValue3) {          var m = false;
162          v = attrValue1 || attrValue2 || attrValue3;          e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"'\s]*)))?/,
163          v = v.replace (/&quot;/g, '"').replace (/&apos;/g, "'")          function (x, attrName, attrValue1, attrValue2, attrValue3) {
164              .replace (/&amp;/g, '&');            v = attrValue1 || attrValue2 || attrValue3;
165          attrs[attrName.toLowerCase ()] = v;            v = v.replace (/&quot;/g, '"').replace (/&apos;/g, "'")
166          return '';                .replace (/&amp;/g, '&');
167        });            attrs[attrName.toLowerCase ()] = v;
168              m = true;
169              return '';
170            });
171            if (!m) break;
172          }
173        if (e.length) {        if (e.length) {
174          log ('Broken start tag: "' + e + '"');          log ('Broken start tag: "' + e + '"');
175        }        }
# Line 235  Line 272 
272            this.openElements[this.openElements.length - 1].appendChild (el);            this.openElements[this.openElements.length - 1].appendChild (el);
273    
274            // 11. Let the insertion point have the value of the old ...            // 11. Let the insertion point have the value of the old ...
275    
276            oldInsertionPoint += this.insertionPoint;            oldInsertionPoint += this.insertionPoint;
277            this.setInsertionPoint (oldInsertionPoint);            this.setInsertionPoint (oldInsertionPoint);
278    
# Line 298  Line 336 
336    
337      // "When a script completes loading" rules start applying.      // "When a script completes loading" rules start applying.
338    
339      // TODO: Handles "list of scripts that will execute as soon as possible"      while (this.scriptsExecutedSoon.length > 0 ||
340      // and "list of scripts that will execute asynchronously"             this.scriptsExecutedAsynchronously.length > 0) {
341          // Handle "list of scripts that will execute as soon as possible".
342          while (this.scriptsExecutedSoon.length > 0) {
343            var e = this.scriptsExecutedSoon.shift ();
344      
345            // If it has completed loading
346            log ('Execute an external script not inserted by parser...');
347            executeScript (this.doc, e);
348    
349            // NOTE: It MAY be executed before the end of the parsing, according
350            // to the spec.
351            this.hasAsyncScript = true;
352          }
353    
354          // Handle "list of scripts that will execute asynchronously".
355          while (this.scriptsExecutedAsynchronously.length > 0) {
356            var e = this.scriptsExecutedAsynchronously.shift ();
357    
358            // Step 1.
359            // We assume that all scripts have been loaded at this time.
360      
361            // Step 2.
362            log ('Execute an asynchronous script...');
363            executeScript (this.doc, e);
364    
365            // Step 3.
366            //
367    
368            // Step 4.
369            //
370    
371            this.hasAsyncScript = true;
372          }
373        }
374    
375      // Handle "list of scripts that will execute when the document has finished      // Handle "list of scripts that will execute when the document has finished
376      // parsing".      // parsing".
# Line 330  Line 401 
401      if (ip == undefined || ip == null || isNaN (ip)) {      if (ip == undefined || ip == null || isNaN (ip)) {
402        log ('insertion point: set to undefined');        log ('insertion point: set to undefined');
403        this.insertionPoint = undefined;        this.insertionPoint = undefined;
404      } else if (ip == this.in.s.length) {      } else if (ip == this.input.s.length) {
405        log ('insertion point: end of file');        log ('insertion point: end of file');
406        this.insertionPoint = ip;        this.insertionPoint = ip;
407      } else {      } else {
408        log ('insertion point: set to ' + ip +        log ('insertion point: set to ' + ip +
409             ' (before "' + this.in.s.substring (0, 10) + '")');             ' (before "' + this.input.s.substring (0, 10) + '")');
410        this.insertionPoint = ip;        this.insertionPoint = ip;
411      }      }
412    }; // setInsertionPoint    }; // setInsertionPoint
# Line 394  Line 465 
465          p.scriptsExecutedAfterParsing.push (e);          p.scriptsExecutedAfterParsing.push (e);
466          log ('Running a script: aborted (defer)');          log ('Running a script: aborted (defer)');
467        } else if (e.async && e.src != null) {        } else if (e.async && e.src != null) {
468          // TODO          p.scriptsExecutedAsynchronously.push (e);
469        } else if (e.async && e.src == null          log ('Running a script: aborted (async src)');
470                   /* && list of scripts that will execute asynchronously is not empty */) {        } else if (e.async && e.src == null &&
471          // TODO                   p.scriptsExecutedAsynchronously.length > 0) {
472            p.scriptsExecutedAsynchronously.push (e);
473            log ('Running a script: aborted (async)');
474            // ISSUE: What is the difference with the case above?
475        } else if (e.src != null && e.manakaiParserInserted) {        } else if (e.src != null && e.manakaiParserInserted) {
476          if (p.scriptExecutedWhenParserResumes) {          if (p.scriptExecutedWhenParserResumes) {
477            log ('Error: There is a script that will execute as soon as the parser resumes.');            log ('Error: There is a script that will execute as soon as the parser resumes.');
478          }          }
479          p.scriptExecutedWhenParserResumes = e;          p.scriptExecutedWhenParserResumes = e;
480          log ('Running a script: aborted (src)');          log ('Running a script: aborted (src parser-inserted)');
481        } else if (e.src != null) {        } else if (e.src != null) {
482          // TODO          p.scriptsExecutedSoon.push (e);
483            log ('Running a script: aborted (src)');
484        } else {        } else {
485          executeScript (doc, e); // even if other scripts are already executing.          executeScript (doc, e); // even if other scripts are already executing.
486        }        }
# Line 453  Line 528 
528        var m;        var m;
529        if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) {        if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) {
530          if (m[1]) {          if (m[1]) {
531            return m[1];            return unescapeJSLiteral (m[1]);
532          } else if (m[2]) {          } else if (m[2]) {
533            return m[2];            return unescapeJSLiteral (m[2]);
534          } else {          } else {
535            return null;            return null;
536          }          }
# Line 476  Line 551 
551          matched = true;          matched = true;
552          var args = [];          var args = [];
553          t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {          t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
554            args.push (v.substring (1, v.length - 1));            args.push (unescapeJSLiteral (v.substring (1, v.length - 1)));
555            return '';            return '';
556          });          });
557          doc.write.apply (doc, args);          doc.write.apply (doc, args);
558          return '';          return '';
559        });        });
560          s = s.replace (/^\s*var\s+s\s*=\s*document\.createElement\s*\(\s*['"]script['"]\s*\)\s*;\s*s\.src\s*=\s*(?:'(javascript:[^']*)'|"(javascript:[^"]*)")\s*;\s*document\.documentElement\.appendChild\s*\(\s*s\s*\)\s*;\s*/,
561          function (s, t, u) {
562            matched = true;
563            var args = [unescapeJSLiteral (t ? t : u)];
564            doc._insertExternalScript.apply (doc, args);
565            return '';
566          });
567        if (s == '') break;        if (s == '') break;
568        if (!matched) {        if (!matched) {
569          log ('Script parse error: "' + s + '"');          log ('Script parse error: "' + s + '"');
# Line 490  Line 572 
572      }      }
573    } // parseAndRunScript    } // parseAndRunScript
574    
575      function unescapeJSLiteral (s) {
576        return s.replace (/\\u([0-9A-Fa-f]{4})/g, function (t, v) {
577          return String.fromCharCode (parseInt ('0x' + v));
578        });
579      } // unescapeJSLiteral
580    
581    function JSText (data) {    function JSText (data) {
582      this.data = data;      this.data = data;
583    } // JSText    } // JSText
# Line 517  Line 605 
605      // Step 3.      // Step 3.
606      if (this._parser &&      if (this._parser &&
607          !this._parser.scriptCreated &&          !this._parser.scriptCreated &&
608          this._parser.in.insertionPoint != undefined) {          this._parser.input.insertionPoint != undefined) {
609        log ('document.open () in parsing mode is ignored');        log ('document.open () in parsing mode is ignored');
610        return this;        return this;
611      }      }
# Line 551  Line 639 
639      }      }
640    
641      // Step 11.      // Step 11.
642      this._parser.setInsertionPoint (this._parser.in.s.length);      this._parser.setInsertionPoint (this._parser.input.s.length);
643    
644      // Step 12.      // Step 12.
645      return this;      return this;
# Line 571  Line 659 
659      // 2. ... inserted into the input stream just before the insertion point.      // 2. ... inserted into the input stream just before the insertion point.
660      var s = Array.join (arguments, '');      var s = Array.join (arguments, '');
661      log ('document.write: insert "' + s + '"' +      log ('document.write: insert "' + s + '"' +
662           ' before "' + p.in.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');           ' before "' +
663      p.in.s = p.in.s.substring (0, p.insertionPoint) + s           p.input.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
664          + p.in.s.substring (p.insertionPoint, p.in.s.length);      p.input.s = p.input.s.substring (0, p.insertionPoint) + s
665            + p.input.s.substring (p.insertionPoint, p.input.s.length);
666      p.insertionPoint += s.length;      p.insertionPoint += s.length;
667    
668      // 3. If there is a script that will execute as soon as the parser resumes      // 3. If there is a script that will execute as soon as the parser resumes
# Line 599  Line 688 
688      return;      return;
689    }; // document.write    }; // document.write
690    
691      JSDocument.prototype._insertExternalScript = function (uri) {
692        var s = new JSElement (this, 'script');
693        s.src = uri;
694        this.documentElement.appendChild (s);
695      }; // _insertExternalScript
696    
697      JSDocument.prototype.__defineGetter__ ('documentElement', function () {
698        var cn = this.childNodes;
699        for (var i = 0; i < cn.length; i++) {
700          if (cn[i] instanceof JSElement) {
701            return cn[i]
702          }
703        }
704        return null;
705      });
706    
707    JSElement.prototype.__defineGetter__ ('text', function () {    JSElement.prototype.__defineGetter__ ('text', function () {
708      var r = '';      var r = '';
709      for (var i = 0; i < this.childNodes.length; i++) {      for (var i = 0; i < this.childNodes.length; i++) {
# Line 617  Line 722 
722          r += '| ' + indent + node.localName + '\n';          r += '| ' + indent + node.localName + '\n';
723          if (node.async) r += '| ' + indent + '  async=""\n';          if (node.async) r += '| ' + indent + '  async=""\n';
724          if (node.defer) r += '| ' + indent + '  defer=""\n';          if (node.defer) r += '| ' + indent + '  defer=""\n';
725          if (node.src) r += '| ' + indent + '  src="' + node.src + '"\n';          if (node.src != null) {
726              r += '| ' + indent + '  src="' + node.src + '"\n';
727            }
728          r += dumpTree (node, indent + '  ');          r += dumpTree (node, indent + '  ');
729        } else if (node instanceof JSText) {        } else if (node instanceof JSText) {
730          r += '| ' + indent + '"' + node.data + '"\n';          r += '| ' + indent + '"' + node.data + '"\n';
# Line 631  Line 738 
738  </head>  </head>
739  <body onload="  <body onload="
740    document.sourceElement = document.getElementsByTagName ('textarea')[0];    document.sourceElement = document.getElementsByTagName ('textarea')[0];
741    
742      var q = location.search;
743      if (q != null) {
744        q = q.substring (1).split (/;/);
745        for (var i = 0; i < q.length; i++) {
746          var v = q[i].split (/=/, 2);
747          v[0] = decodeURIComponent (v[0]);
748          v[1] = decodeURIComponent (v[1] || '');
749          if (v[0] == 's') {
750            document.sourceElement.value = v[1];
751          }
752        }
753      }
754    
755    document.logElement = document.getElementsByTagName ('output')[0];    document.logElement = document.getElementsByTagName ('output')[0];
756    update ();    update ();
757  ">  ">
758    <h1>Live Scripting <abbr title="Hypertext Markup Language">HTML</abbr>
759    Parser</h1>
760    
761  <textarea onchange=" update () ">&lt;html>  <h2>Markup to test
762    (<a href=data:, id=permalink rel=bookmark>permalink</a>,
763    <a href="http://software.hixie.ch/utilities/js/live-dom-viewer/"
764        id=ldvlink>Live <abbr title="Document Object Model">DOM</abbr>
765        Viewer</a>)</h2>
766    <p>
767    <textarea onkeydown=" update () " onchange=" update () " oninput=" update () ">&lt;html>
768  &lt;head>&lt;/head>&lt;body>  &lt;head>&lt;/head>&lt;body>
769  &lt;p>  &lt;p>
770  &lt;script>  &lt;script>
# Line 644  document.write ('aaaaaaa&lt;/p>&lt;scrip Line 773  document.write ('aaaaaaa&lt;/p>&lt;scrip
773  &lt;p>  &lt;p>
774  </textarea>  </textarea>
775    
776  <output></output>  <h2 id=log>Log</h2>
777    <p><output></output>
778    
779    <h2 id=notes>Notes</h2>
780    
781    <p>This is a <em>simplified</em> implementation of
782    <a href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5
783    Parsing Algorithm</a>.  It only implements script-related part of the
784    algorithm.  Especially, this parser:
785    <ul>
786    <li>Does not support <code>DOCTYPE</code> and comment tokens.
787    <li>Does not support entities except for <code>&amp;quot;</code>,
788    <code>&amp;apos;</code>, and <code>&amp;amp;</code> in <code>script</code>
789    <code>src</code> attribute value.
790    <li>Does not support omissions of start or end tags, the <abbr>AAA</abbr>
791    algorithm, and so on.
792    <li>Does not raise parse errors for invalid attribute specifications in start
793    or end tags.
794    <li>Does not support CDATA/PCDATA element other than <code>script</code>.
795    <li>Does not support <code>&lt;!--</code>..<code>--></code> parsing rule
796    in <code>script</code> element.
797    <li>Does not support foreign (SVG or MathML) elements.
798    <li>Only supports <code>script</code> <code>type</code>
799    <code>text/javascript</code>.  <code>type</code> and <code>language</code>
800    attributes are ignored.
801    <li>Only supports limited statements.  It must consist of zero or more
802    of statements looking similar to the following statements, possibly
803    introduced, followed, or separated by white space characters:
804      <ul>
805      <li><code>document.write ("<var>string</var>", ["<var>string</var>", ...]);</code>.
806      <li><code>var s = document.createElement ("script");
807                s.src = "<var>string</var>";
808                document.documentElement.appendChild (s);</code>
809      </ul>
810    Note that strings may be delimited by <code>'</code>s instead of
811    <code>"</code>s.
812    <li>Only supports <code>javascript:</code>
813    <abbr title="Uniform Resourace Identifiers">URI</abbr> scheme in the
814    <code>src</code> attribute of the <code>script</code> element.  In addition,
815    the <abbr title="Uniform Resource Identifiers">URI</abbr> must be conform to
816    the regular expression <code>^javascript:\s*(?:"[^"]*"|'[^']*')\s*$</code>.
817    <li>Only supports <code>\u<var>HHHH</var></code> escapes in JavaScript
818    string literals.
819    <li>Does not handle <i>stop parsing</i> phase correctly if the document is
820    replaced by <code>document.open ()</code> call.  In other word, delayed
821    (deferred or asynchronous) script executions and event firings might be
822    treated in a wrong way if a <code>document.open ()</code> invocation
823    is implicitly done by <code>document.write ()</code> in a delayed script.
824    </ul>
825    
826    <p>For some reason, this parser does not work in browsers that do
827    not support JavaScript 1.5.
828    
829    <!-- TODO: |src| attribute value should refer the value at the time
830    when it is inserted into the document, not the value when the script is
831    executed.  Currently it does not matter, since we don't allow dynamic
832    modification to the |src| content/DOM attribute value yet. -->
833    
834    <!-- TODO: license -->
835    
836  </body>  </body>
837  </html>  </html>

Legend:
Removed from v.1.6  
changed lines
  Added in v.1.12

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24