/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Diff of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.6 by wakaba, Fri Apr 25 13:42:51 2008 UTC revision 1.9 by wakaba, Sun Apr 27 09:16:11 2008 UTC
# Line 1  Line 1 
1  <!DOCTYPE HTML>  <!DOCTYPE HTML>
2  <html lang=en>  <html lang=en>
3  <head>  <head>
4  <title>Demo of HTML5 Parsing Algorithm with Scripting Enabled</title>  <title>Live Scripting HTML Parser</title>
5  <style>  <style>
6      h1, h2 {
7        margin: 0;
8        font-size: 100%;
9      }
10      p, pre {
11        margin: 0;
12      }
13    textarea {    textarea {
14       display: block;      width: 100%;
15       width: 80%;      -width: 99%;
16       margin-left: auto;      height: 10em;
      margin-right: auto;  
      min-height: 20em;  
17    }    }
18    output {    output {
19      display: block;      display: block;
# Line 18  Line 23 
23    }    }
24  </style>  </style>
25  <script>  <script>
26      var delayedUpdater = 0;
27    
28    function update () {    function update () {
29      document.logElement.textContent = '';      if (delayedUpdater) {
30      var p = new Parser (new InputStream (document.sourceElement.value));        clearTimeout (delayedUpdater);
31      var doc = p.doc;        delayedUpdater = 0;
32      p.parse ();      }
33      log (dumpTree (doc, ''));      delayedUpdater = setTimeout (update2, 100);
34    } // update    } // update
35    
36      function update2 () {
37        var v = document.sourceElement.value;
38        if (v != document.previousSourceText) {
39          document.previousSourceText = v;
40          document.links['permalink'].href
41              = location.pathname + '?s=' + encodeURIComponent (v);
42          document.links['ldvlink'].href
43              = 'http://software.hixie.ch/utilities/js/live-dom-viewer/?'
44              + encodeURIComponent (v);
45    
46          document.logElement.textContent = '';
47          var p = new Parser (new InputStream (v));
48          var doc = p.doc;
49          p.parse ();
50          log (dumpTree (doc, ''));
51        }
52      } // update2
53    
54    var logIndentLevel = 0;    var logIndentLevel = 0;
55    function log (s) {    function log (s) {
56      for (var i = 0; i < logIndentLevel; i++) {      for (var i = 0; i < logIndentLevel; i++) {
# Line 46  Line 71 
71      }      }
72      this.doc = doc;      this.doc = doc;
73      this.openElements = [doc];      this.openElements = [doc];
74      this.in = i;      this.input = i;
75      this.scriptsExecutedAfterParsing = [];      this.scriptsExecutedAfterParsing = [];
76    } // Parser    } // Parser
77    
78    Parser.prototype.getNextToken = function () {    Parser.prototype.getNextToken = function () {
79      var p = this;      var p = this;
80      var i = this.in;      var i = this.input;
81      if (this.parseMode == 'script') {      if (this.parseMode == 'script') {
82        var token;        var token;
83        if (p.insertionPoint <= 0) {        if (p.insertionPoint <= 0) {
# Line 125  Line 150 
150          tagName = v.toLowerCase ();          tagName = v.toLowerCase ();
151          return '';          return '';
152        });        });
153        e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"']+)))?/,        while (true) {
154        function (x, attrName, attrValue1, attrValue2, attrValue3) {          var m = false;
155          v = attrValue1 || attrValue2 || attrValue3;          e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"'\s]*)))?/,
156          v = v.replace (/&quot;/g, '"').replace (/&apos;/g, "'")          function (x, attrName, attrValue1, attrValue2, attrValue3) {
157              .replace (/&amp;/g, '&');            v = attrValue1 || attrValue2 || attrValue3;
158          attrs[attrName.toLowerCase ()] = v;            v = v.replace (/&quot;/g, '"').replace (/&apos;/g, "'")
159          return '';                .replace (/&amp;/g, '&');
160        });            attrs[attrName.toLowerCase ()] = v;
161              m = true;
162              return '';
163            });
164            if (!m) break;
165          }
166        if (e.length) {        if (e.length) {
167          log ('Broken start tag: "' + e + '"');          log ('Broken start tag: "' + e + '"');
168        }        }
# Line 235  Line 265 
265            this.openElements[this.openElements.length - 1].appendChild (el);            this.openElements[this.openElements.length - 1].appendChild (el);
266    
267            // 11. Let the insertion point have the value of the old ...            // 11. Let the insertion point have the value of the old ...
268    
269            oldInsertionPoint += this.insertionPoint;            oldInsertionPoint += this.insertionPoint;
270            this.setInsertionPoint (oldInsertionPoint);            this.setInsertionPoint (oldInsertionPoint);
271    
# Line 330  Line 361 
361      if (ip == undefined || ip == null || isNaN (ip)) {      if (ip == undefined || ip == null || isNaN (ip)) {
362        log ('insertion point: set to undefined');        log ('insertion point: set to undefined');
363        this.insertionPoint = undefined;        this.insertionPoint = undefined;
364      } else if (ip == this.in.s.length) {      } else if (ip == this.input.s.length) {
365        log ('insertion point: end of file');        log ('insertion point: end of file');
366        this.insertionPoint = ip;        this.insertionPoint = ip;
367      } else {      } else {
368        log ('insertion point: set to ' + ip +        log ('insertion point: set to ' + ip +
369             ' (before "' + this.in.s.substring (0, 10) + '")');             ' (before "' + this.input.s.substring (0, 10) + '")');
370        this.insertionPoint = ip;        this.insertionPoint = ip;
371      }      }
372    }; // setInsertionPoint    }; // setInsertionPoint
# Line 517  Line 548 
548      // Step 3.      // Step 3.
549      if (this._parser &&      if (this._parser &&
550          !this._parser.scriptCreated &&          !this._parser.scriptCreated &&
551          this._parser.in.insertionPoint != undefined) {          this._parser.input.insertionPoint != undefined) {
552        log ('document.open () in parsing mode is ignored');        log ('document.open () in parsing mode is ignored');
553        return this;        return this;
554      }      }
# Line 551  Line 582 
582      }      }
583    
584      // Step 11.      // Step 11.
585      this._parser.setInsertionPoint (this._parser.in.s.length);      this._parser.setInsertionPoint (this._parser.input.s.length);
586    
587      // Step 12.      // Step 12.
588      return this;      return this;
# Line 571  Line 602 
602      // 2. ... inserted into the input stream just before the insertion point.      // 2. ... inserted into the input stream just before the insertion point.
603      var s = Array.join (arguments, '');      var s = Array.join (arguments, '');
604      log ('document.write: insert "' + s + '"' +      log ('document.write: insert "' + s + '"' +
605           ' before "' + p.in.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');           ' before "' +
606      p.in.s = p.in.s.substring (0, p.insertionPoint) + s           p.input.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
607          + p.in.s.substring (p.insertionPoint, p.in.s.length);      p.input.s = p.input.s.substring (0, p.insertionPoint) + s
608            + p.input.s.substring (p.insertionPoint, p.input.s.length);
609      p.insertionPoint += s.length;      p.insertionPoint += s.length;
610    
611      // 3. If there is a script that will execute as soon as the parser resumes      // 3. If there is a script that will execute as soon as the parser resumes
# Line 617  Line 649 
649          r += '| ' + indent + node.localName + '\n';          r += '| ' + indent + node.localName + '\n';
650          if (node.async) r += '| ' + indent + '  async=""\n';          if (node.async) r += '| ' + indent + '  async=""\n';
651          if (node.defer) r += '| ' + indent + '  defer=""\n';          if (node.defer) r += '| ' + indent + '  defer=""\n';
652          if (node.src) r += '| ' + indent + '  src="' + node.src + '"\n';          if (node.src != null) {
653              r += '| ' + indent + '  src="' + node.src + '"\n';
654            }
655          r += dumpTree (node, indent + '  ');          r += dumpTree (node, indent + '  ');
656        } else if (node instanceof JSText) {        } else if (node instanceof JSText) {
657          r += '| ' + indent + '"' + node.data + '"\n';          r += '| ' + indent + '"' + node.data + '"\n';
# Line 631  Line 665 
665  </head>  </head>
666  <body onload="  <body onload="
667    document.sourceElement = document.getElementsByTagName ('textarea')[0];    document.sourceElement = document.getElementsByTagName ('textarea')[0];
668    
669      var q = location.search;
670      if (q != null) {
671        q = q.substring (1).split (/;/);
672        for (var i = 0; i < q.length; i++) {
673          var v = q[i].split (/=/, 2);
674          v[0] = decodeURIComponent (v[0]);
675          v[1] = decodeURIComponent (v[1] || '');
676          if (v[0] == 's') {
677            document.sourceElement.value = v[1];
678          }
679        }
680      }
681    
682    document.logElement = document.getElementsByTagName ('output')[0];    document.logElement = document.getElementsByTagName ('output')[0];
683    update ();    update ();
684  ">  ">
685    <h1>Live Scripting <abbr title="Hypertext Markup Language">HTML</abbr>
686    Parser</h1>
687    
688  <textarea onchange=" update () ">&lt;html>  <h2>Markup to test
689    (<a href=data:, id=permalink rel=bookmark>permalink</a>,
690    <a href="http://software.hixie.ch/utilities/js/live-dom-viewer/"
691        id=ldvlink>Live <abbr title="Document Object Model">DOM</abbr>
692        Viewer</a>)</h2>
693    <p>
694    <textarea onkeydown=" update () " onchange=" update () " oninput=" update () ">&lt;html>
695  &lt;head>&lt;/head>&lt;body>  &lt;head>&lt;/head>&lt;body>
696  &lt;p>  &lt;p>
697  &lt;script>  &lt;script>
# Line 644  document.write ('aaaaaaa&lt;/p>&lt;scrip Line 700  document.write ('aaaaaaa&lt;/p>&lt;scrip
700  &lt;p>  &lt;p>
701  </textarea>  </textarea>
702    
703  <output></output>  <h2>Log</h2>
704    <p><output></output>
705    
706    <h2>Note</h2>
707    
708    <p>This is a <em>simplified</em> implementation of
709    <a href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5
710    Parsing Algorithm</a>.  It only implements script-related part of the
711    algorithm.  Especially, this parser:
712    <ul>
713    <li>Does not support <code>DOCTYPE</code> and comment tokens.
714    <li>Does not support entities except for <code>&amp;quot;</code>,
715    <code>&amp;apos;</code>, and <code>&amp;amp;</code> in <code>script</code>
716    <code>src</code> attribute value.
717    <li>Does not support omissions of start or end tags, the <abbr>AAA</abbr>
718    algorithm, and so on.
719    <li>Does not raise parse errors for invalid attribute specifications in start
720    or end tags.
721    <li>Does not support CDATA/PCDATA element other than <code>script</code>.
722    <li>Does not support <code>&lt;!--</code>..<code>--></code> parsing rule
723    in <code>script</code> element.
724    <li>Does not support foreign (SVG or MathML) elements.
725    <li>Only supports <code>script</code> <code>type</code>
726    <code>text/javascript</code>.  <code>type</code> and <code>language</code>
727    attributes are ignored.
728    <li>Only supports <code>document.write</code>.
729    The script code must be match to the regular expression
730    <code>^\s*(?:document\.write\s*\(<var>v</var>\s*(?:,\s*<var>v</var>\s*)*\)\s*;\s*)*$</code>
731    where <var>v</var> is <code>"[^"]*"|'[^']*'</code>.
732    <li>Only supports <code>javascript:</code>
733    <abbr title="Uniform Resourace Identifiers">URI</abbr> scheme in the
734    <code>src</code> attribute of the <code>script</code> element.  In addition,
735    the <abbr title="Uniform Resource Identifiers">URI</abbr> must be conform to
736    the regular expression <code>^javascript:\s*(?:"[^"]*"|'[^']*')\s*$</code>.
737    </ul>
738    
739    <p>For some reason, this parser does not work in browsers that do
740    not support JavaScript 1.5.
741    
742  </body>  </body>
743  </html>  </html>

Legend:
Removed from v.1.6  
changed lines
  Added in v.1.9

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24