/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Diff of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2 by wakaba, Sun Apr 20 07:48:00 2008 UTC revision 1.7 by wakaba, Fri Apr 25 23:03:35 2008 UTC
# Line 3  Line 3 
3  <head>  <head>
4  <title>Demo of HTML5 Parsing Algorithm with Scripting Enabled</title>  <title>Demo of HTML5 Parsing Algorithm with Scripting Enabled</title>
5  <style>  <style>
6      h1, h2 {
7        margin: 0;
8        font-size: 100%;
9      }
10      p, pre {
11        margin: 0;
12      }
13    textarea {    textarea {
14       display: block;      width: 100%;
15       width: 80%;      -width: 99%;
16       margin-left: auto;      height: 10em;
      margin-right: auto;  
      min-height: 20em;  
17    }    }
18    output {    output {
19      display: block;      display: block;
20      font-family: monospace;      font-family: monospace;
21      white-space: pre;      white-space: -moz-pre-wrap;
22        white-space: pre-wrap;
23    }    }
24  </style>  </style>
25  <script>  <script>
26      var delayedUpdater = 0;
27    
28    function update () {    function update () {
29        if (delayedUpdater) {
30          clearTimeout (delayedUpdater);
31          delayedUpdater = 0;
32        }
33        delayedUpdater = setTimeout (update2, 100);
34      } // update
35    
36      function update2 () {
37      document.logElement.textContent = '';      document.logElement.textContent = '';
38      var p = new Parser (new InputStream (document.sourceElement.value));      var v = document.sourceElement.value;
39        var p = new Parser (new InputStream (v));
40        var doc = p.doc;
41      p.parse ();      p.parse ();
42      log (dumpTree (p.doc, ''));      log (dumpTree (doc, ''));
43    } // update  
44        document.links['permalink'].href
45            = location.href + '?s=' + encodeURIComponent (v);
46      } // update2
47    
48      var logIndentLevel = 0;
49    function log (s) {    function log (s) {
50        for (var i = 0; i < logIndentLevel; i++) {
51          s = '  ' + s;
52        }
53      document.logElement.appendChild (document.createTextNode (s + "\n"));      document.logElement.appendChild (document.createTextNode (s + "\n"));
54    } // log    } // log
55    
# Line 32  Line 57 
57      this.s = s;      this.s = s;
58    } // InputStream    } // InputStream
59    
60    function Parser (i) {    function Parser (i, doc) {
61      this.parseMode = 'pcdata';      this.parseMode = 'pcdata';
62      this.doc = new JSDocument (this);      if (!doc) {
63      this.openElements = [this.doc];        doc = new JSDocument (this);
64          doc.manakaiIsHTML = true;
65        }
66        this.doc = doc;
67        this.openElements = [doc];
68      this.in = i;      this.in = i;
69        this.scriptsExecutedAfterParsing = [];
70    } // Parser    } // Parser
71    
72    Parser.prototype.getNextToken = function () {    Parser.prototype.getNextToken = function () {
73        var p = this;
74      var i = this.in;      var i = this.in;
75      if (this.parseMode == 'script') {      if (this.parseMode == 'script') {
76        var token;        var token;
77        i.s = i.s.replace (/^([\s\S]+?)<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/,        if (p.insertionPoint <= 0) {
78            return {type: 'abort'};
79          }
80          i.s = i.s.replace (/^([^<]+)/,
81        function (s, t) {        function (s, t) {
82            if (0 < p.insertionPoint && p.insertionPoint < t.length) {
83              token = {type: 'char', value: t.substring (0, p.insertionPoint)};
84              var ip = p.insertionPoint;
85              p.insertionPoint = 0;
86              return t.substring (ip, t.length);
87            }
88          token = {type: 'char', value: t};          token = {type: 'char', value: t};
89          return '<' + '/script>';          p.insertionPoint -= t.length;
90            return '';
91        });        });
92        if (token) return token;        if (token) return token;
93        i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function () {        i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function (s) {
94            if (p.insertionPoint < s.length) {
95              token = {type: 'abort'};
96              return s;
97            }
98          token = {type: 'end-tag', value: 'script'};          token = {type: 'end-tag', value: 'script'};
99            p.insertionPoint -= s.length;
100            return '';
101          });
102          if (token) return token;
103          var m;
104          if ((p.insertionPoint < '</script'.length) &&
105              (m = i.s.match (/^<\/([SCRIPTscript]+)/))) {
106            var v = m[1].substring (0, p.insertionPoint).toLowerCase ();
107            if (v == 'script'.substring (0, p.insertionPoint - '</'.length)) {
108              return {type: 'abort'};
109            }
110          }
111          i.s = i.s.replace (/^</,
112          function (s) {
113            token = {type: 'char', value: s};
114            p.insertionPoint -= s.length;
115          return '';          return '';
116        });        });
117        if (token) return token;        if (token) return token;
# Line 58  Line 119 
119      }      }
120    
121      var token;      var token;
122      i.s = i.s.replace (/^<\/([^>]+)>/, function (s, e) {      i.s = i.s.replace (/^<\/([^>]+)(?:>|$)/, function (s, e) {
123          if (p.insertionPoint < s.length ||
124              (p.insertionPoint <= s.length &&
125               s.substring (s.length - 1, 1) != '>')) {
126            token = {type: 'abort'};
127            return s;
128          }
129        token = {type: 'end-tag', value: e.toLowerCase ()};        token = {type: 'end-tag', value: e.toLowerCase ()};
130          p.insertionPoint -= s.length;
131        return '';        return '';
132      });      });
133      if (token) return token;      if (token) return token;
134      i.s = i.s.replace (/^<([^>]+)>/, function (s, e) {      i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) {
135        token = {type: 'start-tag', value: e.toLowerCase ()};        if (p.insertionPoint < s.length ||
136              (p.insertionPoint <= s.length &&
137               s.substring (s.length - 1, 1) != '>')) {
138            token = {type: 'abort'};
139            return s;
140          }
141          var tagName;
142          var attrs = {};
143          e = e.replace (/^[\S]+/, function (v) {
144            tagName = v.toLowerCase ();
145            return '';
146          });
147          e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"']+)))?/,
148          function (x, attrName, attrValue1, attrValue2, attrValue3) {
149            v = attrValue1 || attrValue2 || attrValue3;
150            v = v.replace (/&quot;/g, '"').replace (/&apos;/g, "'")
151                .replace (/&amp;/g, '&');
152            attrs[attrName.toLowerCase ()] = v;
153            return '';
154          });
155          if (e.length) {
156            log ('Broken start tag: "' + e + '"');
157          }
158          token = {type: 'start-tag', value: tagName, attrs: attrs};
159          p.insertionPoint -= s.length;
160        return '';        return '';
161      });      });
162      if (token) return token;      if (token) return token;
163        if (p.insertionPoint <= 0) {
164          return {type: 'abort'};
165        }
166      i.s = i.s.replace (/^[^<]+/, function (s) {      i.s = i.s.replace (/^[^<]+/, function (s) {
167          if (p.insertionPoint < s.length) {
168            token = {type: 'char', value: s.substring (0, p.insertionPoint)};
169            var ip = p.insertionPoint;
170            p.insertionPoint = 0;
171            return s.substring (ip, s.length);
172          }
173        token = {type: 'char', value: s};        token = {type: 'char', value: s};
174          p.insertionPoint -= s.length;
175        return '';        return '';
176      });      });
177      if (token) return token;      if (token) return token;
178      i.s = i.s.replace (/^[\s\S]/, function (s) {      i.s = i.s.replace (/^[\s\S]/, function (s) {
179        token = {type: 'char', value: s};        token = {type: 'char', value: s};
180          p.insertionPoint -= s.length;
181        return '';        return '';
182      });      });
183      if (token) return token;      if (token) return token;
# Line 82  Line 185 
185    } // getNextToken    } // getNextToken
186    
187    Parser.prototype.parse = function () {    Parser.prototype.parse = function () {
188      log ('start parsing');      logIndentLevel++;
189        log ('parse: start');
190    
191      while (true) {      while (true) {
192        var token = this.getNextToken ();        var token = this.getNextToken ();
# Line 92  Line 196 
196          if (token.value == 'script') {          if (token.value == 'script') {
197            // 1. Create an element for the token in the HTML namespace.            // 1. Create an element for the token in the HTML namespace.
198            var el = new JSElement (this.doc, token.value);            var el = new JSElement (this.doc, token.value);
199              if (token.attrs.async != null) el.async = true;
200              if (token.attrs.defer != null) el.defer = true;
201              if (token.attrs.src != null) el.src = token.attrs.src;
202    
203            // 2. Mark the element as being "parser-inserted".            // 2. Mark the element as being "parser-inserted".
204            el.manakaiParserInserted = true;            el.manakaiParserInserted = true;
# Line 109  Line 216 
216                el.manakaiAppendText (token.value);                el.manakaiAppendText (token.value);
217    
218              // 4.2. Until it returns a token that is not a character token, or              // 4.2. Until it returns a token that is not a character token, or
219              // TODO: 4.3. Until it stops tokenising.              // until it stops tokenising.
220              } else if (token.type == 'eof' ||              } else if (token.type == 'eof' ||
221                         (token.type == 'end-tag' && token.value == 'script')) {                         (token.type == 'end-tag' && token.value == 'script') ||
222                           token.type == 'abort') {
223                // 6. Switched back to the PCDATA state.                // 6. Switched back to the PCDATA state.
224                this.parseMode = 'pcdata';                this.parseMode = 'pcdata';
225    
# Line 138  Line 246 
246            }            }
247    
248            // 9.1. Let the old insertion point have the same value as the ...            // 9.1. Let the old insertion point have the same value as the ...
249              var oldInsertionPoint = this.insertionPoint;
250            // 9.2. Let the insertion point be just before the next input ...            // 9.2. Let the insertion point be just before the next input ...
251              this.setInsertionPoint (0);
252    
253            // 10. Append the new element to the current node.            // 10. Append the new element to the current node.
254            this.openElements[this.openElements.length - 1].appendChild (el);            this.openElements[this.openElements.length - 1].appendChild (el);
255    
256            // 11. Let the insertion point have the value of the old ...            // 11. Let the insertion point have the value of the old ...
257    
258              oldInsertionPoint += this.insertionPoint;
259              this.setInsertionPoint (oldInsertionPoint);
260    
261            // 12. If there is a script that will execute as soon as ...            // 12. If there is a script that will execute as soon as ...
262                        while (this.scriptExecutedWhenParserResumes) {
263                // 12.1. If the tree construction stage is being called reentrantly
264                if (this.reentrant) {
265                  log ('parse: abort (reentrance)');
266                  logIndentLevel--;
267                  return;
268    
269                // 12.2. Otherwise
270                } else {
271                  // 1.
272                  var script = this.scriptExecutedWhenParserResumes;
273                  this.scriptExecutedWhenParserResumes = null;
274    
275                  // 2. Pause until the script has completed loading.
276                  //
277    
278                  // 3. Let the insertion point to just before the next input char.
279                  this.setInsertionPoint (0);
280    
281                  // 4. Execute the script.
282                  executeScript (this.doc, script);
283    
284                  // 5. Let the insertion point be undefined again.
285                  this.setInsertionPoint (undefined);
286    
287                  // 6. If there is once again a script that will execute ...
288                  //
289                }
290              }
291          } else {          } else {
292            var el = new JSElement (this.doc, token.value);            var el = new JSElement (this.doc, token.value);
293            this.openElements[this.openElements.length - 1].appendChild (el);            this.openElements[this.openElements.length - 1].appendChild (el);
# Line 161  Line 300 
300          } else {          } else {
301            log ('parse error: unmatched end tag: ' + token.value);            log ('parse error: unmatched end tag: ' + token.value);
302          }          }
303          } else if (token.type == 'char') {
304            this.openElements[this.openElements.length - 1].manakaiAppendText
305                (token.value);
306        } else if (token.type == 'eof') {        } else if (token.type == 'eof') {
307          break;          break;
308          } else if (token.type == 'abort') {
309            log ('parse: abort');
310            logIndentLevel--;
311            return;
312        }        }
313      }      }
314    
315      log ('stop parsing');      log ('stop parsing');
316    
317        // readyState = 'interactive'
318    
319        // "When a script completes loading" rules start applying.
320    
321        // TODO: Handles "list of scripts that will execute as soon as possible"
322        // and "list of scripts that will execute asynchronously"
323    
324        // Handle "list of scripts that will execute when the document has finished
325        // parsing".
326        var list = this.scriptsExecutedAfterParsing;
327        while (list.length > 0) {
328          // TODO: break unless completed loading
329    
330          // Step 1.
331          //
332    
333          // Step 2. and Step 3.
334          log ('Executing a |defer|red script...');
335          executeScript (this.doc, list.shift ());
336    
337          // Step 4.
338        }
339    
340        log ('DOMContentLoaded event fired');
341    
342        // "delays tha load event" things has completed:
343        // readyState = 'complete'
344        log ('load event fired');
345    
346        logIndentLevel--;
347    } // parse    } // parse
348    
349      Parser.prototype.setInsertionPoint = function (ip) {
350        if (ip == undefined || ip == null || isNaN (ip)) {
351          log ('insertion point: set to undefined');
352          this.insertionPoint = undefined;
353        } else if (ip == this.in.s.length) {
354          log ('insertion point: end of file');
355          this.insertionPoint = ip;
356        } else {
357          log ('insertion point: set to ' + ip +
358               ' (before "' + this.in.s.substring (0, 10) + '")');
359          this.insertionPoint = ip;
360        }
361      }; // setInsertionPoint
362    
363    function JSDocument (p) {    function JSDocument (p) {
364      this.childNodes = [];      this.childNodes = [];
365      this._parser = p;      this._parser = p;
# Line 186  Line 377 
377      e.parentNode = this;      e.parentNode = this;
378    
379      if (e.localName == 'script') {      if (e.localName == 'script') {
380        log ('start running a script');        logIndentLevel++;
381          log ('Running a script: start');
382    
383        var doc = this.ownerDocument;        var doc = this.ownerDocument || this;
384        var p = doc._parser;        var p = doc._parser;
385    
386        // 1. Script type        // 1. Script type
# Line 203  Line 395 
395        // 2.4. If the script element has its "already executed" flag set        // 2.4. If the script element has its "already executed" flag set
396        if (e.manakaiAlreadyExecuted) {        if (e.manakaiAlreadyExecuted) {
397          // 2.5. Abort these steps at this point.          // 2.5. Abort these steps at this point.
398          log ('running a script: aborted');          log ('Running a script: aborted');
399            logIndentLevel--;
400          return e;          return e;
401        }        }
402    
# Line 218  Line 411 
411        // 5.1.        // 5.1.
412        if (/* TODO: If the document is still being parsed && */        if (/* TODO: If the document is still being parsed && */
413            e.defer && !e.async) {            e.defer && !e.async) {
414          // TODO          p.scriptsExecutedAfterParsing.push (e);
415            log ('Running a script: aborted (defer)');
416        } else if (e.async && e.src != null) {        } else if (e.async && e.src != null) {
417          // TODO          // TODO
418        } else if (e.async && e.src == null        } else if (e.async && e.src == null
419                   /* && list of scripts that will execute asynchronously is not empty */) {                   /* && list of scripts that will execute asynchronously is not empty */) {
420          // TODO          // TODO
421        } else if (e.src != null && e.manakaiParserInserted) {        } else if (e.src != null && e.manakaiParserInserted) {
422          // TODO          if (p.scriptExecutedWhenParserResumes) {
423              log ('Error: There is a script that will execute as soon as the parser resumes.');
424            }
425            p.scriptExecutedWhenParserResumes = e;
426            log ('Running a script: aborted (src)');
427        } else if (e.src != null) {        } else if (e.src != null) {
428          // TODO          // TODO
429        } else {        } else {
430          executeScript (doc, e); // even if other scripts are already executing.          executeScript (doc, e); // even if other scripts are already executing.
431        }        }
432    
433        log ('end running a script');        log ('Running a script: end');
434          logIndentLevel--;
435      }      }
436    
437      return e;      return e;
# Line 241  Line 440 
440    function executeScript (doc, e) {    function executeScript (doc, e) {
441      log ('executing a script block: start');      log ('executing a script block: start');
442    
443      // If the load resulted in an error, then ... firing an error event ...      var s;
444        if (e.src != null) {
445          s = getExternalScript (e.src);
446    
447          // If the load resulted in an error, then ... firing an error event ...
448          if (s == null) {
449            log ('error event fired at the script element');
450            return;
451          }
452    
453          log ('External script loaded: "' + s + '"');
454        } else {
455          s = e.text;
456        }
457    
458      // If the load was successful      // If the load was successful
459      log ('load event fired at the script element');      log ('load event fired at the script element');
# Line 250  Line 462 
462      // Scripting is enabled, Document.designMode is disabled,      // Scripting is enabled, Document.designMode is disabled,
463      // Document is the active document in its browsing context      // Document is the active document in its browsing context
464    
       var s;  
       if (e.src != null) {  
         // TODO: from external file  
       } else {  
         s = e.text;  
       }  
   
465        parseAndRunScript (doc, s);        parseAndRunScript (doc, s);
466      }      }
467    
468      log ('executing a script block: end');      log ('executing a script block: end');
469    } // executeScript    } // executeScript
470    
471      function getExternalScript (uri) {
472        if (uri.match (/^javascript:/i)) {
473          var m;
474          if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) {
475            if (m[1]) {
476              return m[1];
477            } else if (m[2]) {
478              return m[2];
479            } else {
480              return null;
481            }
482          } else {
483            log ('Complex javascript: URI is not supported: <' + uri + '>');
484            return null;
485          }
486        } else {
487          log ('URI scheme not supported: <' + uri + '>');
488          return null;
489        }
490      } // getExternalScript
491    
492    function parseAndRunScript (doc, s) {    function parseAndRunScript (doc, s) {
493      while (true) {      while (true) {
494        var matched = false;        var matched = false;
# Line 299  Line 525 
525      }      }
526    }; // manakaiAppendText    }; // manakaiAppendText
527    
528      JSDocument.prototype.open = function () {
529        // Two or fewer arguments
530    
531        // Step 1.
532        var type = arguments[0] || 'text/html';
533        
534        // Step 2.
535        var replace = arguments[1] == 'replace';
536    
537        // Step 3.
538        if (this._parser &&
539            !this._parser.scriptCreated &&
540            this._parser.in.insertionPoint != undefined) {
541          log ('document.open () in parsing mode is ignored');
542          return this;
543        }
544    
545        // Step 4.
546        log ('onbeforeunload event fired');
547        log ('onunload event fired');
548    
549        // Step 5.
550        if (this._parser) {
551          // Discard the parser.
552        }
553    
554        // Step 6.
555        log ('document cleared by document.open ()');
556        this.childNodes = [];
557    
558        // Step 7.
559        this._parser = new Parser (new InputStream (''), this);
560        this._parser.scriptCreated = true;
561    
562        // Step 8.
563        this.manakaiIsHTML = true;
564    
565        // Step 9.
566        // If not text/html, ...
567    
568        // Step 10.
569        if (!replace) {
570          // History      
571        }
572    
573        // Step 11.
574        this._parser.setInsertionPoint (this._parser.in.s.length);
575    
576        // Step 12.
577        return this;
578      }; // document.open
579    
580    JSDocument.prototype.write = function () {    JSDocument.prototype.write = function () {
581        logIndentLevel++;
582    
583        var p = this._parser;
584    
585      // 1. If the insertion point is undefined, the open() method must be ...      // 1. If the insertion point is undefined, the open() method must be ...
586      //      if (isNaN (p.insertionPoint) || p.insertionPoint == undefined) {
587          this.open ();
588          p = this._parser;
589        }
590    
591      // 2. ... inserted into the input stream just before the insertion point.      // 2. ... inserted into the input stream just before the insertion point.
592      log ('document.write: insert "' + Array.join (arguments, '') + '"');      var s = Array.join (arguments, '');
593        log ('document.write: insert "' + s + '"' +
594             ' before "' + p.in.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
595        p.in.s = p.in.s.substring (0, p.insertionPoint) + s
596            + p.in.s.substring (p.insertionPoint, p.in.s.length);
597        p.insertionPoint += s.length;
598    
599      // 3. If there is a script that will execute as soon as the parser resumes      // 3. If there is a script that will execute as soon as the parser resumes
600      // TODO      if (p.scriptExecutedAfterParserResumes) {
601          log ('document.write: processed later (there is an unprocessed <script src>)');
602          logIndentLevel--;
603          return;
604        }
605    
606      // 4. Process the characters that were inserted, ...      // 4. Process the characters that were inserted, ...
607        var originalReentrant = p.reentrant;
608        p.reentrant = true;
609        p.parse ();
610        p.reentrant = originalReentrant;
611        // TODO: "Abort the processing of any nested invokations of the tokeniser,
612        // yielding control back to the caller." (<script> parsing).  Do we need
613        // to do something here?
614    
615      // 5. Return      // 5. Return
616      log ('document.write: return');      log ('document.write: return');
617    
618        logIndentLevel--;
619      return;      return;
620    }; // document.write    }; // document.write
621    
# Line 332  Line 635 
635        var node = n.childNodes[i];        var node = n.childNodes[i];
636        if (node instanceof JSElement) {        if (node instanceof JSElement) {
637          r += '| ' + indent + node.localName + '\n';          r += '| ' + indent + node.localName + '\n';
638            if (node.async) r += '| ' + indent + '  async=""\n';
639            if (node.defer) r += '| ' + indent + '  defer=""\n';
640            if (node.src) r += '| ' + indent + '  src="' + node.src + '"\n';
641          r += dumpTree (node, indent + '  ');          r += dumpTree (node, indent + '  ');
642        } else if (node instanceof JSText) {        } else if (node instanceof JSText) {
643          r += '| ' + indent + '"' + node.data + '"\n';          r += '| ' + indent + '"' + node.data + '"\n';
# Line 348  Line 654 
654    document.logElement = document.getElementsByTagName ('output')[0];    document.logElement = document.getElementsByTagName ('output')[0];
655    update ();    update ();
656  ">  ">
657    <h1>Live Scripting Parser</h1>
658    
659  <textarea onchange=" update () ">&lt;html>  <h2>Markup to test
660    (<a href=data:, id=permalink rel=bookmark>permalink</a>)</h2>
661    <p>
662    <textarea onkeydown=" update () " onchange=" update () " oninput=" update () ">&lt;html>
663  &lt;head>&lt;/head>&lt;body>  &lt;head>&lt;/head>&lt;body>
664  &lt;p>  &lt;p>
665  &lt;script>  &lt;script>
666  document.write ('aaaaaaa&lt;/p>\n&lt;script>\ndocument.write("cccccc")\n&lt;/', 'script>\nbbbbbb');  document.write ('aaaaaaa&lt;/p>&lt;script>document.write("cccccc");&lt;/', 'script>bbbbbb');
667  &lt;/script>  &lt;/script>
668  &lt;p>  &lt;p>
669  </textarea>  </textarea>
670    
671  <output></output>  <h2>Log</h2>
672    <p><output></output>
673    
674    <!-- TODO: short description -->
675    
676    <!-- TODO: permalink query -> textarea -->
677    
678    <!-- TODO: multiple attributes are not supported yet -->
679    
680  </body>  </body>
681  </html>  </html>

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.7

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24