/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Diff of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.7 by wakaba, Fri Apr 25 23:03:35 2008 UTC revision 1.13 by wakaba, Sun Apr 27 11:27:04 2008 UTC
# Line 1  Line 1 
1  <!DOCTYPE HTML>  <!DOCTYPE HTML>
2  <html lang=en>  <html lang=en>
3  <head>  <head>
4  <title>Demo of HTML5 Parsing Algorithm with Scripting Enabled</title>  <title>Live Scripting HTML Parser</title>
5    <link rel=author href="http://suika.fam.cx/~wakaba/who?">
6    <link rel=license href="http://suika.fam.cx/c/gnu/gpl"
7        title="GNU GPL2 or later">
8  <style>  <style>
9    h1, h2 {    h1 {
10        margin: 0;
11        font-size: 150%;
12      }
13      h2 {
14      margin: 0;      margin: 0;
15      font-size: 100%;      font-size: 100%;
16    }    }
17    p, pre {    p {
18      margin: 0;      margin: 0 1em;
19    }    }
20    textarea {    textarea {
21      width: 100%;      width: 100%;
# Line 34  Line 41 
41    } // update    } // update
42    
43    function update2 () {    function update2 () {
     document.logElement.textContent = '';  
44      var v = document.sourceElement.value;      var v = document.sourceElement.value;
45      var p = new Parser (new InputStream (v));      if (v != document.previousSourceText) {
46      var doc = p.doc;        document.previousSourceText = v;
47      p.parse ();        document.links['permalink'].href
48      log (dumpTree (doc, ''));            = location.pathname + '?s=' + encodeURIComponent (v);
49          document.links['ldvlink'].href
50      document.links['permalink'].href            = 'http://software.hixie.ch/utilities/js/live-dom-viewer/?'
51          = location.href + '?s=' + encodeURIComponent (v);            + encodeURIComponent (v);
52    
53          document.logElement.textContent = '';
54          var p = new Parser (new InputStream (v));
55          var doc = p.doc;
56          p.parse ();
57          
58          log (dumpTree (doc, ''));
59          
60          if (p.hasAsyncScript) {
61            log ('Some script codes are executed asynchronously; it means that the document might be rendered in different ways depending on the network condition and other factors');
62          }
63        }
64    } // update2    } // update2
65    
66    var logIndentLevel = 0;    var logIndentLevel = 0;
# Line 65  Line 83 
83      }      }
84      this.doc = doc;      this.doc = doc;
85      this.openElements = [doc];      this.openElements = [doc];
86      this.in = i;      this.input = i;
87      this.scriptsExecutedAfterParsing = [];      this.scriptsExecutedAfterParsing = [];
88        this.scriptsExecutedSoon = [];
89        this.scriptsExecutedAsynchronously = [];
90    } // Parser    } // Parser
91    
92    Parser.prototype.getNextToken = function () {    Parser.prototype.getNextToken = function () {
93      var p = this;      var p = this;
94      var i = this.in;      var i = this.input;
95      if (this.parseMode == 'script') {      if (this.parseMode == 'script') {
96        var token;        var token;
97        if (p.insertionPoint <= 0) {        if (p.insertionPoint <= 0) {
# Line 144  Line 164 
164          tagName = v.toLowerCase ();          tagName = v.toLowerCase ();
165          return '';          return '';
166        });        });
167        e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"']+)))?/,        while (true) {
168        function (x, attrName, attrValue1, attrValue2, attrValue3) {          var m = false;
169          v = attrValue1 || attrValue2 || attrValue3;          e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"'\s]*)))?/,
170          v = v.replace (/&quot;/g, '"').replace (/&apos;/g, "'")          function (x, attrName, attrValue1, attrValue2, attrValue3) {
171              .replace (/&amp;/g, '&');            v = attrValue1 || attrValue2 || attrValue3;
172          attrs[attrName.toLowerCase ()] = v;            v = v.replace (/&quot;/g, '"').replace (/&apos;/g, "'")
173          return '';                .replace (/&amp;/g, '&');
174        });            attrs[attrName.toLowerCase ()] = v;
175              m = true;
176              return '';
177            });
178            if (!m) break;
179          }
180        if (e.length) {        if (e.length) {
181          log ('Broken start tag: "' + e + '"');          log ('Broken start tag: "' + e + '"');
182        }        }
# Line 318  Line 343 
343    
344      // "When a script completes loading" rules start applying.      // "When a script completes loading" rules start applying.
345    
346      // TODO: Handles "list of scripts that will execute as soon as possible"      while (this.scriptsExecutedSoon.length > 0 ||
347      // and "list of scripts that will execute asynchronously"             this.scriptsExecutedAsynchronously.length > 0) {
348          // Handle "list of scripts that will execute as soon as possible".
349          while (this.scriptsExecutedSoon.length > 0) {
350            var e = this.scriptsExecutedSoon.shift ();
351      
352            // If it has completed loading
353            log ('Execute an external script not inserted by parser...');
354            executeScript (this.doc, e);
355    
356            // NOTE: It MAY be executed before the end of the parsing, according
357            // to the spec.
358            this.hasAsyncScript = true;
359          }
360    
361          // Handle "list of scripts that will execute asynchronously".
362          while (this.scriptsExecutedAsynchronously.length > 0) {
363            var e = this.scriptsExecutedAsynchronously.shift ();
364    
365            // Step 1.
366            // We assume that all scripts have been loaded at this time.
367      
368            // Step 2.
369            log ('Execute an asynchronous script...');
370            executeScript (this.doc, e);
371    
372            // Step 3.
373            //
374    
375            // Step 4.
376            //
377    
378            this.hasAsyncScript = true;
379          }
380        }
381    
382      // Handle "list of scripts that will execute when the document has finished      // Handle "list of scripts that will execute when the document has finished
383      // parsing".      // parsing".
# Line 350  Line 408 
408      if (ip == undefined || ip == null || isNaN (ip)) {      if (ip == undefined || ip == null || isNaN (ip)) {
409        log ('insertion point: set to undefined');        log ('insertion point: set to undefined');
410        this.insertionPoint = undefined;        this.insertionPoint = undefined;
411      } else if (ip == this.in.s.length) {      } else if (ip == this.input.s.length) {
412        log ('insertion point: end of file');        log ('insertion point: end of file');
413        this.insertionPoint = ip;        this.insertionPoint = ip;
414      } else {      } else {
415        log ('insertion point: set to ' + ip +        log ('insertion point: set to ' + ip +
416             ' (before "' + this.in.s.substring (0, 10) + '")');             ' (before "' + this.input.s.substring (0, 10) + '")');
417        this.insertionPoint = ip;        this.insertionPoint = ip;
418      }      }
419    }; // setInsertionPoint    }; // setInsertionPoint
# Line 414  Line 472 
472          p.scriptsExecutedAfterParsing.push (e);          p.scriptsExecutedAfterParsing.push (e);
473          log ('Running a script: aborted (defer)');          log ('Running a script: aborted (defer)');
474        } else if (e.async && e.src != null) {        } else if (e.async && e.src != null) {
475          // TODO          p.scriptsExecutedAsynchronously.push (e);
476        } else if (e.async && e.src == null          log ('Running a script: aborted (async src)');
477                   /* && list of scripts that will execute asynchronously is not empty */) {        } else if (e.async && e.src == null &&
478          // TODO                   p.scriptsExecutedAsynchronously.length > 0) {
479            p.scriptsExecutedAsynchronously.push (e);
480            log ('Running a script: aborted (async)');
481            // ISSUE: What is the difference with the case above?
482        } else if (e.src != null && e.manakaiParserInserted) {        } else if (e.src != null && e.manakaiParserInserted) {
483          if (p.scriptExecutedWhenParserResumes) {          if (p.scriptExecutedWhenParserResumes) {
484            log ('Error: There is a script that will execute as soon as the parser resumes.');            log ('Error: There is a script that will execute as soon as the parser resumes.');
485          }          }
486          p.scriptExecutedWhenParserResumes = e;          p.scriptExecutedWhenParserResumes = e;
487          log ('Running a script: aborted (src)');          log ('Running a script: aborted (src parser-inserted)');
488        } else if (e.src != null) {        } else if (e.src != null) {
489          // TODO          p.scriptsExecutedSoon.push (e);
490            log ('Running a script: aborted (src)');
491        } else {        } else {
492          executeScript (doc, e); // even if other scripts are already executing.          executeScript (doc, e); // even if other scripts are already executing.
493        }        }
# Line 473  Line 535 
535        var m;        var m;
536        if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) {        if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) {
537          if (m[1]) {          if (m[1]) {
538            return m[1];            return unescapeJSLiteral (m[1]);
539          } else if (m[2]) {          } else if (m[2]) {
540            return m[2];            return unescapeJSLiteral (m[2]);
541          } else {          } else {
542            return null;            return null;
543          }          }
# Line 496  Line 558 
558          matched = true;          matched = true;
559          var args = [];          var args = [];
560          t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {          t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
561            args.push (v.substring (1, v.length - 1));            args.push (unescapeJSLiteral (v.substring (1, v.length - 1)));
562            return '';            return '';
563          });          });
564          doc.write.apply (doc, args);          doc.write.apply (doc, args);
565          return '';          return '';
566        });        });
567          s = s.replace (/^\s*var\s+s\s*=\s*document\.createElement\s*\(\s*['"]script['"]\s*\)\s*;\s*s\.src\s*=\s*(?:'(javascript:[^']*)'|"(javascript:[^"]*)")\s*;\s*document\.documentElement\.appendChild\s*\(\s*s\s*\)\s*;\s*/,
568          function (s, t, u) {
569            matched = true;
570            var args = [unescapeJSLiteral (t ? t : u)];
571            doc._insertExternalScript.apply (doc, args);
572            return '';
573          });
574        if (s == '') break;        if (s == '') break;
575        if (!matched) {        if (!matched) {
576          log ('Script parse error: "' + s + '"');          log ('Script parse error: "' + s + '"');
# Line 510  Line 579 
579      }      }
580    } // parseAndRunScript    } // parseAndRunScript
581    
582      function unescapeJSLiteral (s) {
583        return s.replace (/\\u([0-9A-Fa-f]{4})/g, function (t, v) {
584          return String.fromCharCode (parseInt ('0x' + v));
585        });
586      } // unescapeJSLiteral
587    
588    function JSText (data) {    function JSText (data) {
589      this.data = data;      this.data = data;
590    } // JSText    } // JSText
# Line 537  Line 612 
612      // Step 3.      // Step 3.
613      if (this._parser &&      if (this._parser &&
614          !this._parser.scriptCreated &&          !this._parser.scriptCreated &&
615          this._parser.in.insertionPoint != undefined) {          this._parser.input.insertionPoint != undefined) {
616        log ('document.open () in parsing mode is ignored');        log ('document.open () in parsing mode is ignored');
617        return this;        return this;
618      }      }
# Line 571  Line 646 
646      }      }
647    
648      // Step 11.      // Step 11.
649      this._parser.setInsertionPoint (this._parser.in.s.length);      this._parser.setInsertionPoint (this._parser.input.s.length);
650    
651      // Step 12.      // Step 12.
652      return this;      return this;
# Line 591  Line 666 
666      // 2. ... inserted into the input stream just before the insertion point.      // 2. ... inserted into the input stream just before the insertion point.
667      var s = Array.join (arguments, '');      var s = Array.join (arguments, '');
668      log ('document.write: insert "' + s + '"' +      log ('document.write: insert "' + s + '"' +
669           ' before "' + p.in.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');           ' before "' +
670      p.in.s = p.in.s.substring (0, p.insertionPoint) + s           p.input.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
671          + p.in.s.substring (p.insertionPoint, p.in.s.length);      p.input.s = p.input.s.substring (0, p.insertionPoint) + s
672            + p.input.s.substring (p.insertionPoint, p.input.s.length);
673      p.insertionPoint += s.length;      p.insertionPoint += s.length;
674    
675      // 3. If there is a script that will execute as soon as the parser resumes      // 3. If there is a script that will execute as soon as the parser resumes
# Line 619  Line 695 
695      return;      return;
696    }; // document.write    }; // document.write
697    
698      JSDocument.prototype._insertExternalScript = function (uri) {
699        var s = new JSElement (this, 'script');
700        s.src = uri;
701        this.documentElement.appendChild (s);
702      }; // _insertExternalScript
703    
704      JSDocument.prototype.__defineGetter__ ('documentElement', function () {
705        var cn = this.childNodes;
706        for (var i = 0; i < cn.length; i++) {
707          if (cn[i] instanceof JSElement) {
708            return cn[i]
709          }
710        }
711        return null;
712      });
713    
714    JSElement.prototype.__defineGetter__ ('text', function () {    JSElement.prototype.__defineGetter__ ('text', function () {
715      var r = '';      var r = '';
716      for (var i = 0; i < this.childNodes.length; i++) {      for (var i = 0; i < this.childNodes.length; i++) {
# Line 637  Line 729 
729          r += '| ' + indent + node.localName + '\n';          r += '| ' + indent + node.localName + '\n';
730          if (node.async) r += '| ' + indent + '  async=""\n';          if (node.async) r += '| ' + indent + '  async=""\n';
731          if (node.defer) r += '| ' + indent + '  defer=""\n';          if (node.defer) r += '| ' + indent + '  defer=""\n';
732          if (node.src) r += '| ' + indent + '  src="' + node.src + '"\n';          if (node.src != null) {
733              r += '| ' + indent + '  src="' + node.src + '"\n';
734            }
735          r += dumpTree (node, indent + '  ');          r += dumpTree (node, indent + '  ');
736        } else if (node instanceof JSText) {        } else if (node instanceof JSText) {
737          r += '| ' + indent + '"' + node.data + '"\n';          r += '| ' + indent + '"' + node.data + '"\n';
# Line 651  Line 745 
745  </head>  </head>
746  <body onload="  <body onload="
747    document.sourceElement = document.getElementsByTagName ('textarea')[0];    document.sourceElement = document.getElementsByTagName ('textarea')[0];
748    
749      var q = location.search;
750      if (q != null) {
751        q = q.substring (1).split (/;/);
752        for (var i = 0; i < q.length; i++) {
753          var v = q[i].split (/=/, 2);
754          v[0] = decodeURIComponent (v[0]);
755          v[1] = decodeURIComponent (v[1] || '');
756          if (v[0] == 's') {
757            document.sourceElement.value = v[1];
758          }
759        }
760      }
761    
762    document.logElement = document.getElementsByTagName ('output')[0];    document.logElement = document.getElementsByTagName ('output')[0];
763    update ();    update ();
764  ">  ">
765  <h1>Live Scripting Parser</h1>  <h1>Live Scripting <abbr title="Hypertext Markup Language">HTML</abbr>
766    Parser</h1>
767    
768  <h2>Markup to test  <h2>Markup to test
769  (<a href=data:, id=permalink rel=bookmark>permalink</a>)</h2>  (<a href=data:, id=permalink rel=bookmark>permalink</a>,
770    <a href="http://software.hixie.ch/utilities/js/live-dom-viewer/"
771        id=ldvlink>Live <abbr title="Document Object Model">DOM</abbr>
772        Viewer</a>)</h2>
773  <p>  <p>
774  <textarea onkeydown=" update () " onchange=" update () " oninput=" update () ">&lt;html>  <textarea onkeydown=" update () " onchange=" update () " oninput=" update () ">&lt;html>
775  &lt;head>&lt;/head>&lt;body>  &lt;head>&lt;/head>&lt;body>
# Line 668  document.write ('aaaaaaa&lt;/p>&lt;scrip Line 780  document.write ('aaaaaaa&lt;/p>&lt;scrip
780  &lt;p>  &lt;p>
781  </textarea>  </textarea>
782    
783  <h2>Log</h2>  <h2 id=log>Log</h2>
784  <p><output></output>  <p><output></output>
785    
786  <!-- TODO: short description -->  <h2 id=notes>Notes</h2>
787    
788  <!-- TODO: permalink query -> textarea -->  <p>This is a <em>simplified</em> implementation of
789    <a href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5
790  <!-- TODO: multiple attributes are not supported yet -->  Parsing Algorithm</a>.  It only implements script-related part of the
791    algorithm.  Especially, this parser:
792    <ul>
793    <li>Does not support <code>DOCTYPE</code> and comment tokens.
794    <li>Does not support entities except for <code>&amp;quot;</code>,
795    <code>&amp;apos;</code>, and <code>&amp;amp;</code> in <code>script</code>
796    <code>src</code> attribute value.
797    <li>Does not support omissions of start or end tags, the <abbr>AAA</abbr>
798    algorithm, and so on.
799    <li>Does not raise parse errors for invalid attribute specifications in start
800    or end tags.
801    <li>Does not support CDATA/PCDATA element other than <code>script</code>.
802    <li>Does not support <code>&lt;!--</code>..<code>--></code> parsing rule
803    in <code>script</code> element.
804    <li>Does not support foreign (SVG or MathML) elements.
805    <li>Only supports <code>script</code> <code>type</code>
806    <code>text/javascript</code>.  <code>type</code> and <code>language</code>
807    attributes are ignored.
808    <li>Only supports limited statements.  It must consist of zero or more
809    of statements looking similar to the following statements, possibly
810    introduced, followed, or separated by white space characters:
811      <ul>
812      <li><code>document.write ("<var>string</var>", ["<var>string</var>", ...]);</code>.
813      <li><code>var s = document.createElement ("script");
814                s.src = "<var>string</var>";
815                document.documentElement.appendChild (s);</code>
816      </ul>
817    Note that strings may be delimited by <code>'</code>s instead of
818    <code>"</code>s.
819    <li>Only supports <code>javascript:</code>
820    <abbr title="Uniform Resourace Identifiers">URI</abbr> scheme in the
821    <code>src</code> attribute of the <code>script</code> element.  In addition,
822    the <abbr title="Uniform Resource Identifiers">URI</abbr> must be conform to
823    the regular expression <code>^javascript:\s*(?:"[^"]*"|'[^']*')\s*$</code>.
824    <li>Only supports <code>\u<var>HHHH</var></code> escapes in JavaScript
825    string literals.
826    <li>Does not handle <i>stop parsing</i> phase correctly if the document is
827    replaced by <code>document.open ()</code> call.  In other word, delayed
828    (deferred or asynchronous) script executions and event firings might be
829    treated in a wrong way if a <code>document.open ()</code> invocation
830    is implicitly done by <code>document.write ()</code> in a delayed script.
831    </ul>
832    
833    <p>For some reason, this parser does not work in browsers that do
834    not support JavaScript 1.5.
835    
836    <!-- TODO: |src| attribute value should refer the value at the time
837    when it is inserted into the document, not the value when the script is
838    executed.  Currently it does not matter, since we don't allow dynamic
839    modification to the |src| content/DOM attribute value yet. -->
840    
841  </body>  </body>
 </html>  
842    </html>
843    <!-- $Date$ -->
844    <!--
845    
846    Copyright 2008 Wakaba <w@suika.fam.cx>
847    
848    This program is free software; you can redistribute it and/or
849    modify it under the terms of the GNU General Public License
850    as published by the Free Software Foundation; either version 2
851    of the License, or (at your option) any later version.
852    
853    This program is distributed in the hope that it will be useful,
854    but WITHOUT ANY WARRANTY; without even the implied warranty of
855    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
856    GNU General Public License for more details.
857    
858    You should have received a copy of the GNU General Public License
859    along with this program; if not, write to the Free Software
860    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
861    
862    -->

Legend:
Removed from v.1.7  
changed lines
  Added in v.1.13

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24