/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Diff of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.7 by wakaba, Fri Apr 25 23:03:35 2008 UTC revision 1.11 by wakaba, Sun Apr 27 10:44:36 2008 UTC
# Line 1  Line 1 
1  <!DOCTYPE HTML>  <!DOCTYPE HTML>
2  <html lang=en>  <html lang=en>
3  <head>  <head>
4  <title>Demo of HTML5 Parsing Algorithm with Scripting Enabled</title>  <title>Live Scripting HTML Parser</title>
5  <style>  <style>
6    h1, h2 {    h1, h2 {
7      margin: 0;      margin: 0;
# Line 34  Line 34 
34    } // update    } // update
35    
36    function update2 () {    function update2 () {
     document.logElement.textContent = '';  
37      var v = document.sourceElement.value;      var v = document.sourceElement.value;
38      var p = new Parser (new InputStream (v));      if (v != document.previousSourceText) {
39      var doc = p.doc;        document.previousSourceText = v;
40      p.parse ();        document.links['permalink'].href
41      log (dumpTree (doc, ''));            = location.pathname + '?s=' + encodeURIComponent (v);
42          document.links['ldvlink'].href
43      document.links['permalink'].href            = 'http://software.hixie.ch/utilities/js/live-dom-viewer/?'
44          = location.href + '?s=' + encodeURIComponent (v);            + encodeURIComponent (v);
45    
46          document.logElement.textContent = '';
47          var p = new Parser (new InputStream (v));
48          var doc = p.doc;
49          p.parse ();
50          
51          log (dumpTree (doc, ''));
52          
53          if (p.hasAsyncScript) {
54            log ('Some script codes are executed asynchronously; it means that the document might be rendered in different ways depending on the network condition and other factors');
55          }
56        }
57    } // update2    } // update2
58    
59    var logIndentLevel = 0;    var logIndentLevel = 0;
# Line 65  Line 76 
76      }      }
77      this.doc = doc;      this.doc = doc;
78      this.openElements = [doc];      this.openElements = [doc];
79      this.in = i;      this.input = i;
80      this.scriptsExecutedAfterParsing = [];      this.scriptsExecutedAfterParsing = [];
81        this.scriptsExecutedSoon = [];
82    } // Parser    } // Parser
83    
84    Parser.prototype.getNextToken = function () {    Parser.prototype.getNextToken = function () {
85      var p = this;      var p = this;
86      var i = this.in;      var i = this.input;
87      if (this.parseMode == 'script') {      if (this.parseMode == 'script') {
88        var token;        var token;
89        if (p.insertionPoint <= 0) {        if (p.insertionPoint <= 0) {
# Line 144  Line 156 
156          tagName = v.toLowerCase ();          tagName = v.toLowerCase ();
157          return '';          return '';
158        });        });
159        e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"']+)))?/,        while (true) {
160        function (x, attrName, attrValue1, attrValue2, attrValue3) {          var m = false;
161          v = attrValue1 || attrValue2 || attrValue3;          e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"'\s]*)))?/,
162          v = v.replace (/&quot;/g, '"').replace (/&apos;/g, "'")          function (x, attrName, attrValue1, attrValue2, attrValue3) {
163              .replace (/&amp;/g, '&');            v = attrValue1 || attrValue2 || attrValue3;
164          attrs[attrName.toLowerCase ()] = v;            v = v.replace (/&quot;/g, '"').replace (/&apos;/g, "'")
165          return '';                .replace (/&amp;/g, '&');
166        });            attrs[attrName.toLowerCase ()] = v;
167              m = true;
168              return '';
169            });
170            if (!m) break;
171          }
172        if (e.length) {        if (e.length) {
173          log ('Broken start tag: "' + e + '"');          log ('Broken start tag: "' + e + '"');
174        }        }
# Line 318  Line 335 
335    
336      // "When a script completes loading" rules start applying.      // "When a script completes loading" rules start applying.
337    
338      // TODO: Handles "list of scripts that will execute as soon as possible"      // List of scripts that will execute as soon as possible
339      // and "list of scripts that will execute asynchronously"      for (var i = 0; i < this.scriptsExecutedSoon.length; i++) {
340          var e = this.scriptsExecutedSoon[i];
341    
342          // If it has completed loading
343          log ('Execute an external script not inserted by parser...');
344          executeScript (this.doc, e);
345    
346          // NOTE: It MAY be executed before the end of the parsing, according
347          // to the spec.
348          this.hasAsyncScript = true;
349        }
350    
351        // TODO: Handles
352        // "list of scripts that will execute asynchronously"
353    
354      // Handle "list of scripts that will execute when the document has finished      // Handle "list of scripts that will execute when the document has finished
355      // parsing".      // parsing".
# Line 350  Line 380 
380      if (ip == undefined || ip == null || isNaN (ip)) {      if (ip == undefined || ip == null || isNaN (ip)) {
381        log ('insertion point: set to undefined');        log ('insertion point: set to undefined');
382        this.insertionPoint = undefined;        this.insertionPoint = undefined;
383      } else if (ip == this.in.s.length) {      } else if (ip == this.input.s.length) {
384        log ('insertion point: end of file');        log ('insertion point: end of file');
385        this.insertionPoint = ip;        this.insertionPoint = ip;
386      } else {      } else {
387        log ('insertion point: set to ' + ip +        log ('insertion point: set to ' + ip +
388             ' (before "' + this.in.s.substring (0, 10) + '")');             ' (before "' + this.input.s.substring (0, 10) + '")');
389        this.insertionPoint = ip;        this.insertionPoint = ip;
390      }      }
391    }; // setInsertionPoint    }; // setInsertionPoint
# Line 423  Line 453 
453            log ('Error: There is a script that will execute as soon as the parser resumes.');            log ('Error: There is a script that will execute as soon as the parser resumes.');
454          }          }
455          p.scriptExecutedWhenParserResumes = e;          p.scriptExecutedWhenParserResumes = e;
456          log ('Running a script: aborted (src)');          log ('Running a script: aborted (src parser-inserted)');
457        } else if (e.src != null) {        } else if (e.src != null) {
458          // TODO          p.scriptsExecutedSoon.push (e);
459            log ('Running a script: aborted (src)');
460        } else {        } else {
461          executeScript (doc, e); // even if other scripts are already executing.          executeScript (doc, e); // even if other scripts are already executing.
462        }        }
# Line 473  Line 504 
504        var m;        var m;
505        if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) {        if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) {
506          if (m[1]) {          if (m[1]) {
507            return m[1];            return unescapeJSLiteral (m[1]);
508          } else if (m[2]) {          } else if (m[2]) {
509            return m[2];            return unescapeJSLiteral (m[2]);
510          } else {          } else {
511            return null;            return null;
512          }          }
# Line 496  Line 527 
527          matched = true;          matched = true;
528          var args = [];          var args = [];
529          t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {          t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
530            args.push (v.substring (1, v.length - 1));            args.push (unescapeJSLiteral (v.substring (1, v.length - 1)));
531            return '';            return '';
532          });          });
533          doc.write.apply (doc, args);          doc.write.apply (doc, args);
534          return '';          return '';
535        });        });
536          s = s.replace (/^\s*var\s+s\s*=\s*document\.createElement\s*\(\s*['"]script['"]\s*\)\s*;\s*s\.src\s*=\s*(?:'(javascript:[^']*)'|"(javascript:[^"]*)")\s*;\s*document\.documentElement\.appendChild\s*\(\s*s\s*\)\s*;\s*/,
537          function (s, t, u) {
538            matched = true;
539            var args = [unescapeJSLiteral (t ? t : u)];
540            doc._insertExternalScript.apply (doc, args);
541            return '';
542          });
543        if (s == '') break;        if (s == '') break;
544        if (!matched) {        if (!matched) {
545          log ('Script parse error: "' + s + '"');          log ('Script parse error: "' + s + '"');
# Line 510  Line 548 
548      }      }
549    } // parseAndRunScript    } // parseAndRunScript
550    
551      function unescapeJSLiteral (s) {
552        return s.replace (/\\u([0-9A-Fa-f]{4})/g, function (t, v) {
553          return String.fromCharCode (parseInt ('0x' + v));
554        });
555      } // unescapeJSLiteral
556    
557    function JSText (data) {    function JSText (data) {
558      this.data = data;      this.data = data;
559    } // JSText    } // JSText
# Line 537  Line 581 
581      // Step 3.      // Step 3.
582      if (this._parser &&      if (this._parser &&
583          !this._parser.scriptCreated &&          !this._parser.scriptCreated &&
584          this._parser.in.insertionPoint != undefined) {          this._parser.input.insertionPoint != undefined) {
585        log ('document.open () in parsing mode is ignored');        log ('document.open () in parsing mode is ignored');
586        return this;        return this;
587      }      }
# Line 571  Line 615 
615      }      }
616    
617      // Step 11.      // Step 11.
618      this._parser.setInsertionPoint (this._parser.in.s.length);      this._parser.setInsertionPoint (this._parser.input.s.length);
619    
620      // Step 12.      // Step 12.
621      return this;      return this;
# Line 591  Line 635 
635      // 2. ... inserted into the input stream just before the insertion point.      // 2. ... inserted into the input stream just before the insertion point.
636      var s = Array.join (arguments, '');      var s = Array.join (arguments, '');
637      log ('document.write: insert "' + s + '"' +      log ('document.write: insert "' + s + '"' +
638           ' before "' + p.in.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');           ' before "' +
639      p.in.s = p.in.s.substring (0, p.insertionPoint) + s           p.input.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
640          + p.in.s.substring (p.insertionPoint, p.in.s.length);      p.input.s = p.input.s.substring (0, p.insertionPoint) + s
641            + p.input.s.substring (p.insertionPoint, p.input.s.length);
642      p.insertionPoint += s.length;      p.insertionPoint += s.length;
643    
644      // 3. If there is a script that will execute as soon as the parser resumes      // 3. If there is a script that will execute as soon as the parser resumes
# Line 619  Line 664 
664      return;      return;
665    }; // document.write    }; // document.write
666    
667      JSDocument.prototype._insertExternalScript = function (uri) {
668        var s = new JSElement (this, 'script');
669        s.src = uri;
670        this.documentElement.appendChild (s);
671      }; // _insertExternalScript
672    
673      JSDocument.prototype.__defineGetter__ ('documentElement', function () {
674        var cn = this.childNodes;
675        for (var i = 0; i < cn.length; i++) {
676          if (cn[i] instanceof JSElement) {
677            return cn[i]
678          }
679        }
680        return null;
681      });
682    
683    JSElement.prototype.__defineGetter__ ('text', function () {    JSElement.prototype.__defineGetter__ ('text', function () {
684      var r = '';      var r = '';
685      for (var i = 0; i < this.childNodes.length; i++) {      for (var i = 0; i < this.childNodes.length; i++) {
# Line 637  Line 698 
698          r += '| ' + indent + node.localName + '\n';          r += '| ' + indent + node.localName + '\n';
699          if (node.async) r += '| ' + indent + '  async=""\n';          if (node.async) r += '| ' + indent + '  async=""\n';
700          if (node.defer) r += '| ' + indent + '  defer=""\n';          if (node.defer) r += '| ' + indent + '  defer=""\n';
701          if (node.src) r += '| ' + indent + '  src="' + node.src + '"\n';          if (node.src != null) {
702              r += '| ' + indent + '  src="' + node.src + '"\n';
703            }
704          r += dumpTree (node, indent + '  ');          r += dumpTree (node, indent + '  ');
705        } else if (node instanceof JSText) {        } else if (node instanceof JSText) {
706          r += '| ' + indent + '"' + node.data + '"\n';          r += '| ' + indent + '"' + node.data + '"\n';
# Line 651  Line 714 
714  </head>  </head>
715  <body onload="  <body onload="
716    document.sourceElement = document.getElementsByTagName ('textarea')[0];    document.sourceElement = document.getElementsByTagName ('textarea')[0];
717    
718      var q = location.search;
719      if (q != null) {
720        q = q.substring (1).split (/;/);
721        for (var i = 0; i < q.length; i++) {
722          var v = q[i].split (/=/, 2);
723          v[0] = decodeURIComponent (v[0]);
724          v[1] = decodeURIComponent (v[1] || '');
725          if (v[0] == 's') {
726            document.sourceElement.value = v[1];
727          }
728        }
729      }
730    
731    document.logElement = document.getElementsByTagName ('output')[0];    document.logElement = document.getElementsByTagName ('output')[0];
732    update ();    update ();
733  ">  ">
734  <h1>Live Scripting Parser</h1>  <h1>Live Scripting <abbr title="Hypertext Markup Language">HTML</abbr>
735    Parser</h1>
736    
737  <h2>Markup to test  <h2>Markup to test
738  (<a href=data:, id=permalink rel=bookmark>permalink</a>)</h2>  (<a href=data:, id=permalink rel=bookmark>permalink</a>,
739    <a href="http://software.hixie.ch/utilities/js/live-dom-viewer/"
740        id=ldvlink>Live <abbr title="Document Object Model">DOM</abbr>
741        Viewer</a>)</h2>
742  <p>  <p>
743  <textarea onkeydown=" update () " onchange=" update () " oninput=" update () ">&lt;html>  <textarea onkeydown=" update () " onchange=" update () " oninput=" update () ">&lt;html>
744  &lt;head>&lt;/head>&lt;body>  &lt;head>&lt;/head>&lt;body>
# Line 668  document.write ('aaaaaaa&lt;/p>&lt;scrip Line 749  document.write ('aaaaaaa&lt;/p>&lt;scrip
749  &lt;p>  &lt;p>
750  </textarea>  </textarea>
751    
752  <h2>Log</h2>  <h2 id=log>Log</h2>
753  <p><output></output>  <p><output></output>
754    
755  <!-- TODO: short description -->  <h2 id=notes>Notes</h2>
756    
757    <p>This is a <em>simplified</em> implementation of
758    <a href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5
759    Parsing Algorithm</a>.  It only implements script-related part of the
760    algorithm.  Especially, this parser:
761    <ul>
762    <li>Does not support <code>DOCTYPE</code> and comment tokens.
763    <li>Does not support entities except for <code>&amp;quot;</code>,
764    <code>&amp;apos;</code>, and <code>&amp;amp;</code> in <code>script</code>
765    <code>src</code> attribute value.
766    <li>Does not support omissions of start or end tags, the <abbr>AAA</abbr>
767    algorithm, and so on.
768    <li>Does not raise parse errors for invalid attribute specifications in start
769    or end tags.
770    <li>Does not support CDATA/PCDATA element other than <code>script</code>.
771    <li>Does not support <code>&lt;!--</code>..<code>--></code> parsing rule
772    in <code>script</code> element.
773    <li>Does not support foreign (SVG or MathML) elements.
774    <li>Only supports <code>script</code> <code>type</code>
775    <code>text/javascript</code>.  <code>type</code> and <code>language</code>
776    attributes are ignored.
777    <li>Only supports limited statements.  It must consist of zero or more
778    of statements looking similar to the following statements, possibly
779    introduced, followed, or separated by white space characters:
780      <ul>
781      <li><code>document.write ("<var>string</var>", ["<var>string</var>", ...]);</code>.
782      <li><code>var s = document.createElement ("script");
783                s.src = "<var>string</var>";
784                document.documentElement.appendChild (s);</code>
785      </ul>
786    Note that strings may be delimited by <code>'</code>s instead of
787    <code>"</code>s.
788    <li>Only supports <code>javascript:</code>
789    <abbr title="Uniform Resourace Identifiers">URI</abbr> scheme in the
790    <code>src</code> attribute of the <code>script</code> element.  In addition,
791    the <abbr title="Uniform Resource Identifiers">URI</abbr> must be conform to
792    the regular expression <code>^javascript:\s*(?:"[^"]*"|'[^']*')\s*$</code>.
793    <li>Only supports <code>\u<var>HHHH</var></code> escapes in JavaScript
794    string literals.
795    </ul>
796    
797  <!-- TODO: permalink query -> textarea -->  <p>For some reason, this parser does not work in browsers that do
798    not support JavaScript 1.5.
799    
800  <!-- TODO: multiple attributes are not supported yet -->  <!-- TODO: license -->
801    
802  </body>  </body>
803  </html>  </html>

Legend:
Removed from v.1.7  
changed lines
  Added in v.1.11

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24