/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Diff of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2 by wakaba, Sun Apr 20 07:48:00 2008 UTC revision 1.12 by wakaba, Sun Apr 27 11:21:09 2008 UTC
# Line 1  Line 1 
1  <!DOCTYPE HTML>  <!DOCTYPE HTML>
2  <html lang=en>  <html lang=en>
3  <head>  <head>
4  <title>Demo of HTML5 Parsing Algorithm with Scripting Enabled</title>  <title>Live Scripting HTML Parser</title>
5  <style>  <style>
6      h1, h2 {
7        margin: 0;
8        font-size: 100%;
9      }
10      p, pre {
11        margin: 0;
12      }
13    textarea {    textarea {
14       display: block;      width: 100%;
15       width: 80%;      -width: 99%;
16       margin-left: auto;      height: 10em;
      margin-right: auto;  
      min-height: 20em;  
17    }    }
18    output {    output {
19      display: block;      display: block;
20      font-family: monospace;      font-family: monospace;
21      white-space: pre;      white-space: -moz-pre-wrap;
22        white-space: pre-wrap;
23    }    }
24  </style>  </style>
25  <script>  <script>
26      var delayedUpdater = 0;
27    
28    function update () {    function update () {
29      document.logElement.textContent = '';      if (delayedUpdater) {
30      var p = new Parser (new InputStream (document.sourceElement.value));        clearTimeout (delayedUpdater);
31      p.parse ();        delayedUpdater = 0;
32      log (dumpTree (p.doc, ''));      }
33        delayedUpdater = setTimeout (update2, 100);
34    } // update    } // update
35    
36      function update2 () {
37        var v = document.sourceElement.value;
38        if (v != document.previousSourceText) {
39          document.previousSourceText = v;
40          document.links['permalink'].href
41              = location.pathname + '?s=' + encodeURIComponent (v);
42          document.links['ldvlink'].href
43              = 'http://software.hixie.ch/utilities/js/live-dom-viewer/?'
44              + encodeURIComponent (v);
45    
46          document.logElement.textContent = '';
47          var p = new Parser (new InputStream (v));
48          var doc = p.doc;
49          p.parse ();
50          
51          log (dumpTree (doc, ''));
52          
53          if (p.hasAsyncScript) {
54            log ('Some script codes are executed asynchronously; it means that the document might be rendered in different ways depending on the network condition and other factors');
55          }
56        }
57      } // update2
58    
59      var logIndentLevel = 0;
60    function log (s) {    function log (s) {
61        for (var i = 0; i < logIndentLevel; i++) {
62          s = '  ' + s;
63        }
64      document.logElement.appendChild (document.createTextNode (s + "\n"));      document.logElement.appendChild (document.createTextNode (s + "\n"));
65    } // log    } // log
66    
# Line 32  Line 68 
68      this.s = s;      this.s = s;
69    } // InputStream    } // InputStream
70    
71    function Parser (i) {    function Parser (i, doc) {
72      this.parseMode = 'pcdata';      this.parseMode = 'pcdata';
73      this.doc = new JSDocument (this);      if (!doc) {
74      this.openElements = [this.doc];        doc = new JSDocument (this);
75      this.in = i;        doc.manakaiIsHTML = true;
76        }
77        this.doc = doc;
78        this.openElements = [doc];
79        this.input = i;
80        this.scriptsExecutedAfterParsing = [];
81        this.scriptsExecutedSoon = [];
82        this.scriptsExecutedAsynchronously = [];
83    } // Parser    } // Parser
84    
85    Parser.prototype.getNextToken = function () {    Parser.prototype.getNextToken = function () {
86      var i = this.in;      var p = this;
87        var i = this.input;
88      if (this.parseMode == 'script') {      if (this.parseMode == 'script') {
89        var token;        var token;
90        i.s = i.s.replace (/^([\s\S]+?)<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/,        if (p.insertionPoint <= 0) {
91            return {type: 'abort'};
92          }
93          i.s = i.s.replace (/^([^<]+)/,
94        function (s, t) {        function (s, t) {
95            if (0 < p.insertionPoint && p.insertionPoint < t.length) {
96              token = {type: 'char', value: t.substring (0, p.insertionPoint)};
97              var ip = p.insertionPoint;
98              p.insertionPoint = 0;
99              return t.substring (ip, t.length);
100            }
101          token = {type: 'char', value: t};          token = {type: 'char', value: t};
102          return '<' + '/script>';          p.insertionPoint -= t.length;
103            return '';
104        });        });
105        if (token) return token;        if (token) return token;
106        i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function () {        i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function (s) {
107            if (p.insertionPoint < s.length) {
108              token = {type: 'abort'};
109              return s;
110            }
111          token = {type: 'end-tag', value: 'script'};          token = {type: 'end-tag', value: 'script'};
112            p.insertionPoint -= s.length;
113            return '';
114          });
115          if (token) return token;
116          var m;
117          if ((p.insertionPoint < '</script'.length) &&
118              (m = i.s.match (/^<\/([SCRIPTscript]+)/))) {
119            var v = m[1].substring (0, p.insertionPoint).toLowerCase ();
120            if (v == 'script'.substring (0, p.insertionPoint - '</'.length)) {
121              return {type: 'abort'};
122            }
123          }
124          i.s = i.s.replace (/^</,
125          function (s) {
126            token = {type: 'char', value: s};
127            p.insertionPoint -= s.length;
128          return '';          return '';
129        });        });
130        if (token) return token;        if (token) return token;
# Line 58  Line 132 
132      }      }
133    
134      var token;      var token;
135      i.s = i.s.replace (/^<\/([^>]+)>/, function (s, e) {      i.s = i.s.replace (/^<\/([^>]+)(?:>|$)/, function (s, e) {
136          if (p.insertionPoint < s.length ||
137              (p.insertionPoint <= s.length &&
138               s.substring (s.length - 1, 1) != '>')) {
139            token = {type: 'abort'};
140            return s;
141          }
142        token = {type: 'end-tag', value: e.toLowerCase ()};        token = {type: 'end-tag', value: e.toLowerCase ()};
143          p.insertionPoint -= s.length;
144        return '';        return '';
145      });      });
146      if (token) return token;      if (token) return token;
147      i.s = i.s.replace (/^<([^>]+)>/, function (s, e) {      i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) {
148        token = {type: 'start-tag', value: e.toLowerCase ()};        if (p.insertionPoint < s.length ||
149              (p.insertionPoint <= s.length &&
150               s.substring (s.length - 1, 1) != '>')) {
151            token = {type: 'abort'};
152            return s;
153          }
154          var tagName;
155          var attrs = {};
156          e = e.replace (/^[\S]+/, function (v) {
157            tagName = v.toLowerCase ();
158            return '';
159          });
160          while (true) {
161            var m = false;
162            e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"'\s]*)))?/,
163            function (x, attrName, attrValue1, attrValue2, attrValue3) {
164              v = attrValue1 || attrValue2 || attrValue3;
165              v = v.replace (/&quot;/g, '"').replace (/&apos;/g, "'")
166                  .replace (/&amp;/g, '&');
167              attrs[attrName.toLowerCase ()] = v;
168              m = true;
169              return '';
170            });
171            if (!m) break;
172          }
173          if (e.length) {
174            log ('Broken start tag: "' + e + '"');
175          }
176          token = {type: 'start-tag', value: tagName, attrs: attrs};
177          p.insertionPoint -= s.length;
178        return '';        return '';
179      });      });
180      if (token) return token;      if (token) return token;
181        if (p.insertionPoint <= 0) {
182          return {type: 'abort'};
183        }
184      i.s = i.s.replace (/^[^<]+/, function (s) {      i.s = i.s.replace (/^[^<]+/, function (s) {
185          if (p.insertionPoint < s.length) {
186            token = {type: 'char', value: s.substring (0, p.insertionPoint)};
187            var ip = p.insertionPoint;
188            p.insertionPoint = 0;
189            return s.substring (ip, s.length);
190          }
191        token = {type: 'char', value: s};        token = {type: 'char', value: s};
192          p.insertionPoint -= s.length;
193        return '';        return '';
194      });      });
195      if (token) return token;      if (token) return token;
196      i.s = i.s.replace (/^[\s\S]/, function (s) {      i.s = i.s.replace (/^[\s\S]/, function (s) {
197        token = {type: 'char', value: s};        token = {type: 'char', value: s};
198          p.insertionPoint -= s.length;
199        return '';        return '';
200      });      });
201      if (token) return token;      if (token) return token;
# Line 82  Line 203 
203    } // getNextToken    } // getNextToken
204    
205    Parser.prototype.parse = function () {    Parser.prototype.parse = function () {
206      log ('start parsing');      logIndentLevel++;
207        log ('parse: start');
208    
209      while (true) {      while (true) {
210        var token = this.getNextToken ();        var token = this.getNextToken ();
# Line 92  Line 214 
214          if (token.value == 'script') {          if (token.value == 'script') {
215            // 1. Create an element for the token in the HTML namespace.            // 1. Create an element for the token in the HTML namespace.
216            var el = new JSElement (this.doc, token.value);            var el = new JSElement (this.doc, token.value);
217              if (token.attrs.async != null) el.async = true;
218              if (token.attrs.defer != null) el.defer = true;
219              if (token.attrs.src != null) el.src = token.attrs.src;
220    
221            // 2. Mark the element as being "parser-inserted".            // 2. Mark the element as being "parser-inserted".
222            el.manakaiParserInserted = true;            el.manakaiParserInserted = true;
# Line 109  Line 234 
234                el.manakaiAppendText (token.value);                el.manakaiAppendText (token.value);
235    
236              // 4.2. Until it returns a token that is not a character token, or              // 4.2. Until it returns a token that is not a character token, or
237              // TODO: 4.3. Until it stops tokenising.              // until it stops tokenising.
238              } else if (token.type == 'eof' ||              } else if (token.type == 'eof' ||
239                         (token.type == 'end-tag' && token.value == 'script')) {                         (token.type == 'end-tag' && token.value == 'script') ||
240                           token.type == 'abort') {
241                // 6. Switched back to the PCDATA state.                // 6. Switched back to the PCDATA state.
242                this.parseMode = 'pcdata';                this.parseMode = 'pcdata';
243    
# Line 138  Line 264 
264            }            }
265    
266            // 9.1. Let the old insertion point have the same value as the ...            // 9.1. Let the old insertion point have the same value as the ...
267              var oldInsertionPoint = this.insertionPoint;
268            // 9.2. Let the insertion point be just before the next input ...            // 9.2. Let the insertion point be just before the next input ...
269              this.setInsertionPoint (0);
270    
271            // 10. Append the new element to the current node.            // 10. Append the new element to the current node.
272            this.openElements[this.openElements.length - 1].appendChild (el);            this.openElements[this.openElements.length - 1].appendChild (el);
273    
274            // 11. Let the insertion point have the value of the old ...            // 11. Let the insertion point have the value of the old ...
275    
276              oldInsertionPoint += this.insertionPoint;
277              this.setInsertionPoint (oldInsertionPoint);
278    
279            // 12. If there is a script that will execute as soon as ...            // 12. If there is a script that will execute as soon as ...
280                        while (this.scriptExecutedWhenParserResumes) {
281                // 12.1. If the tree construction stage is being called reentrantly
282                if (this.reentrant) {
283                  log ('parse: abort (reentrance)');
284                  logIndentLevel--;
285                  return;
286    
287                // 12.2. Otherwise
288                } else {
289                  // 1.
290                  var script = this.scriptExecutedWhenParserResumes;
291                  this.scriptExecutedWhenParserResumes = null;
292    
293                  // 2. Pause until the script has completed loading.
294                  //
295    
296                  // 3. Let the insertion point to just before the next input char.
297                  this.setInsertionPoint (0);
298    
299                  // 4. Execute the script.
300                  executeScript (this.doc, script);
301    
302                  // 5. Let the insertion point be undefined again.
303                  this.setInsertionPoint (undefined);
304    
305                  // 6. If there is once again a script that will execute ...
306                  //
307                }
308              }
309          } else {          } else {
310            var el = new JSElement (this.doc, token.value);            var el = new JSElement (this.doc, token.value);
311            this.openElements[this.openElements.length - 1].appendChild (el);            this.openElements[this.openElements.length - 1].appendChild (el);
# Line 161  Line 318 
318          } else {          } else {
319            log ('parse error: unmatched end tag: ' + token.value);            log ('parse error: unmatched end tag: ' + token.value);
320          }          }
321          } else if (token.type == 'char') {
322            this.openElements[this.openElements.length - 1].manakaiAppendText
323                (token.value);
324        } else if (token.type == 'eof') {        } else if (token.type == 'eof') {
325          break;          break;
326          } else if (token.type == 'abort') {
327            log ('parse: abort');
328            logIndentLevel--;
329            return;
330        }        }
331      }      }
332    
333      log ('stop parsing');      log ('stop parsing');
334    
335        // readyState = 'interactive'
336    
337        // "When a script completes loading" rules start applying.
338    
339        while (this.scriptsExecutedSoon.length > 0 ||
340               this.scriptsExecutedAsynchronously.length > 0) {
341          // Handle "list of scripts that will execute as soon as possible".
342          while (this.scriptsExecutedSoon.length > 0) {
343            var e = this.scriptsExecutedSoon.shift ();
344      
345            // If it has completed loading
346            log ('Execute an external script not inserted by parser...');
347            executeScript (this.doc, e);
348    
349            // NOTE: It MAY be executed before the end of the parsing, according
350            // to the spec.
351            this.hasAsyncScript = true;
352          }
353    
354          // Handle "list of scripts that will execute asynchronously".
355          while (this.scriptsExecutedAsynchronously.length > 0) {
356            var e = this.scriptsExecutedAsynchronously.shift ();
357    
358            // Step 1.
359            // We assume that all scripts have been loaded at this time.
360      
361            // Step 2.
362            log ('Execute an asynchronous script...');
363            executeScript (this.doc, e);
364    
365            // Step 3.
366            //
367    
368            // Step 4.
369            //
370    
371            this.hasAsyncScript = true;
372          }
373        }
374    
375        // Handle "list of scripts that will execute when the document has finished
376        // parsing".
377        var list = this.scriptsExecutedAfterParsing;
378        while (list.length > 0) {
379          // TODO: break unless completed loading
380    
381          // Step 1.
382          //
383    
384          // Step 2. and Step 3.
385          log ('Executing a |defer|red script...');
386          executeScript (this.doc, list.shift ());
387    
388          // Step 4.
389        }
390    
391        log ('DOMContentLoaded event fired');
392    
393        // "delays tha load event" things has completed:
394        // readyState = 'complete'
395        log ('load event fired');
396    
397        logIndentLevel--;
398    } // parse    } // parse
399    
400      Parser.prototype.setInsertionPoint = function (ip) {
401        if (ip == undefined || ip == null || isNaN (ip)) {
402          log ('insertion point: set to undefined');
403          this.insertionPoint = undefined;
404        } else if (ip == this.input.s.length) {
405          log ('insertion point: end of file');
406          this.insertionPoint = ip;
407        } else {
408          log ('insertion point: set to ' + ip +
409               ' (before "' + this.input.s.substring (0, 10) + '")');
410          this.insertionPoint = ip;
411        }
412      }; // setInsertionPoint
413    
414    function JSDocument (p) {    function JSDocument (p) {
415      this.childNodes = [];      this.childNodes = [];
416      this._parser = p;      this._parser = p;
# Line 186  Line 428 
428      e.parentNode = this;      e.parentNode = this;
429    
430      if (e.localName == 'script') {      if (e.localName == 'script') {
431        log ('start running a script');        logIndentLevel++;
432          log ('Running a script: start');
433    
434        var doc = this.ownerDocument;        var doc = this.ownerDocument || this;
435        var p = doc._parser;        var p = doc._parser;
436    
437        // 1. Script type        // 1. Script type
# Line 203  Line 446 
446        // 2.4. If the script element has its "already executed" flag set        // 2.4. If the script element has its "already executed" flag set
447        if (e.manakaiAlreadyExecuted) {        if (e.manakaiAlreadyExecuted) {
448          // 2.5. Abort these steps at this point.          // 2.5. Abort these steps at this point.
449          log ('running a script: aborted');          log ('Running a script: aborted');
450            logIndentLevel--;
451          return e;          return e;
452        }        }
453    
# Line 218  Line 462 
462        // 5.1.        // 5.1.
463        if (/* TODO: If the document is still being parsed && */        if (/* TODO: If the document is still being parsed && */
464            e.defer && !e.async) {            e.defer && !e.async) {
465          // TODO          p.scriptsExecutedAfterParsing.push (e);
466            log ('Running a script: aborted (defer)');
467        } else if (e.async && e.src != null) {        } else if (e.async && e.src != null) {
468          // TODO          p.scriptsExecutedAsynchronously.push (e);
469        } else if (e.async && e.src == null          log ('Running a script: aborted (async src)');
470                   /* && list of scripts that will execute asynchronously is not empty */) {        } else if (e.async && e.src == null &&
471          // TODO                   p.scriptsExecutedAsynchronously.length > 0) {
472            p.scriptsExecutedAsynchronously.push (e);
473            log ('Running a script: aborted (async)');
474            // ISSUE: What is the difference with the case above?
475        } else if (e.src != null && e.manakaiParserInserted) {        } else if (e.src != null && e.manakaiParserInserted) {
476          // TODO          if (p.scriptExecutedWhenParserResumes) {
477              log ('Error: There is a script that will execute as soon as the parser resumes.');
478            }
479            p.scriptExecutedWhenParserResumes = e;
480            log ('Running a script: aborted (src parser-inserted)');
481        } else if (e.src != null) {        } else if (e.src != null) {
482          // TODO          p.scriptsExecutedSoon.push (e);
483            log ('Running a script: aborted (src)');
484        } else {        } else {
485          executeScript (doc, e); // even if other scripts are already executing.          executeScript (doc, e); // even if other scripts are already executing.
486        }        }
487    
488        log ('end running a script');        log ('Running a script: end');
489          logIndentLevel--;
490      }      }
491    
492      return e;      return e;
# Line 241  Line 495 
495    function executeScript (doc, e) {    function executeScript (doc, e) {
496      log ('executing a script block: start');      log ('executing a script block: start');
497    
498      // If the load resulted in an error, then ... firing an error event ...      var s;
499        if (e.src != null) {
500          s = getExternalScript (e.src);
501    
502          // If the load resulted in an error, then ... firing an error event ...
503          if (s == null) {
504            log ('error event fired at the script element');
505            return;
506          }
507    
508          log ('External script loaded: "' + s + '"');
509        } else {
510          s = e.text;
511        }
512    
513      // If the load was successful      // If the load was successful
514      log ('load event fired at the script element');      log ('load event fired at the script element');
# Line 250  Line 517 
517      // Scripting is enabled, Document.designMode is disabled,      // Scripting is enabled, Document.designMode is disabled,
518      // Document is the active document in its browsing context      // Document is the active document in its browsing context
519    
       var s;  
       if (e.src != null) {  
         // TODO: from external file  
       } else {  
         s = e.text;  
       }  
   
520        parseAndRunScript (doc, s);        parseAndRunScript (doc, s);
521      }      }
522    
523      log ('executing a script block: end');      log ('executing a script block: end');
524    } // executeScript    } // executeScript
525    
526      function getExternalScript (uri) {
527        if (uri.match (/^javascript:/i)) {
528          var m;
529          if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) {
530            if (m[1]) {
531              return unescapeJSLiteral (m[1]);
532            } else if (m[2]) {
533              return unescapeJSLiteral (m[2]);
534            } else {
535              return null;
536            }
537          } else {
538            log ('Complex javascript: URI is not supported: <' + uri + '>');
539            return null;
540          }
541        } else {
542          log ('URI scheme not supported: <' + uri + '>');
543          return null;
544        }
545      } // getExternalScript
546    
547    function parseAndRunScript (doc, s) {    function parseAndRunScript (doc, s) {
548      while (true) {      while (true) {
549        var matched = false;        var matched = false;
# Line 270  Line 551 
551          matched = true;          matched = true;
552          var args = [];          var args = [];
553          t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {          t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
554            args.push (v.substring (1, v.length - 1));            args.push (unescapeJSLiteral (v.substring (1, v.length - 1)));
555            return '';            return '';
556          });          });
557          doc.write.apply (doc, args);          doc.write.apply (doc, args);
558          return '';          return '';
559        });        });
560          s = s.replace (/^\s*var\s+s\s*=\s*document\.createElement\s*\(\s*['"]script['"]\s*\)\s*;\s*s\.src\s*=\s*(?:'(javascript:[^']*)'|"(javascript:[^"]*)")\s*;\s*document\.documentElement\.appendChild\s*\(\s*s\s*\)\s*;\s*/,
561          function (s, t, u) {
562            matched = true;
563            var args = [unescapeJSLiteral (t ? t : u)];
564            doc._insertExternalScript.apply (doc, args);
565            return '';
566          });
567        if (s == '') break;        if (s == '') break;
568        if (!matched) {        if (!matched) {
569          log ('Script parse error: "' + s + '"');          log ('Script parse error: "' + s + '"');
# Line 284  Line 572 
572      }      }
573    } // parseAndRunScript    } // parseAndRunScript
574    
575      function unescapeJSLiteral (s) {
576        return s.replace (/\\u([0-9A-Fa-f]{4})/g, function (t, v) {
577          return String.fromCharCode (parseInt ('0x' + v));
578        });
579      } // unescapeJSLiteral
580    
581    function JSText (data) {    function JSText (data) {
582      this.data = data;      this.data = data;
583    } // JSText    } // JSText
# Line 299  Line 593 
593      }      }
594    }; // manakaiAppendText    }; // manakaiAppendText
595    
596      JSDocument.prototype.open = function () {
597        // Two or fewer arguments
598    
599        // Step 1.
600        var type = arguments[0] || 'text/html';
601        
602        // Step 2.
603        var replace = arguments[1] == 'replace';
604    
605        // Step 3.
606        if (this._parser &&
607            !this._parser.scriptCreated &&
608            this._parser.input.insertionPoint != undefined) {
609          log ('document.open () in parsing mode is ignored');
610          return this;
611        }
612    
613        // Step 4.
614        log ('onbeforeunload event fired');
615        log ('onunload event fired');
616    
617        // Step 5.
618        if (this._parser) {
619          // Discard the parser.
620        }
621    
622        // Step 6.
623        log ('document cleared by document.open ()');
624        this.childNodes = [];
625    
626        // Step 7.
627        this._parser = new Parser (new InputStream (''), this);
628        this._parser.scriptCreated = true;
629    
630        // Step 8.
631        this.manakaiIsHTML = true;
632    
633        // Step 9.
634        // If not text/html, ...
635    
636        // Step 10.
637        if (!replace) {
638          // History      
639        }
640    
641        // Step 11.
642        this._parser.setInsertionPoint (this._parser.input.s.length);
643    
644        // Step 12.
645        return this;
646      }; // document.open
647    
648    JSDocument.prototype.write = function () {    JSDocument.prototype.write = function () {
649        logIndentLevel++;
650    
651        var p = this._parser;
652    
653      // 1. If the insertion point is undefined, the open() method must be ...      // 1. If the insertion point is undefined, the open() method must be ...
654      //      if (isNaN (p.insertionPoint) || p.insertionPoint == undefined) {
655          this.open ();
656          p = this._parser;
657        }
658    
659      // 2. ... inserted into the input stream just before the insertion point.      // 2. ... inserted into the input stream just before the insertion point.
660      log ('document.write: insert "' + Array.join (arguments, '') + '"');      var s = Array.join (arguments, '');
661        log ('document.write: insert "' + s + '"' +
662             ' before "' +
663             p.input.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
664        p.input.s = p.input.s.substring (0, p.insertionPoint) + s
665            + p.input.s.substring (p.insertionPoint, p.input.s.length);
666        p.insertionPoint += s.length;
667    
668      // 3. If there is a script that will execute as soon as the parser resumes      // 3. If there is a script that will execute as soon as the parser resumes
669      // TODO      if (p.scriptExecutedAfterParserResumes) {
670          log ('document.write: processed later (there is an unprocessed <script src>)');
671          logIndentLevel--;
672          return;
673        }
674    
675      // 4. Process the characters that were inserted, ...      // 4. Process the characters that were inserted, ...
676        var originalReentrant = p.reentrant;
677        p.reentrant = true;
678        p.parse ();
679        p.reentrant = originalReentrant;
680        // TODO: "Abort the processing of any nested invokations of the tokeniser,
681        // yielding control back to the caller." (<script> parsing).  Do we need
682        // to do something here?
683    
684      // 5. Return      // 5. Return
685      log ('document.write: return');      log ('document.write: return');
686    
687        logIndentLevel--;
688      return;      return;
689    }; // document.write    }; // document.write
690    
691      JSDocument.prototype._insertExternalScript = function (uri) {
692        var s = new JSElement (this, 'script');
693        s.src = uri;
694        this.documentElement.appendChild (s);
695      }; // _insertExternalScript
696    
697      JSDocument.prototype.__defineGetter__ ('documentElement', function () {
698        var cn = this.childNodes;
699        for (var i = 0; i < cn.length; i++) {
700          if (cn[i] instanceof JSElement) {
701            return cn[i]
702          }
703        }
704        return null;
705      });
706    
707    JSElement.prototype.__defineGetter__ ('text', function () {    JSElement.prototype.__defineGetter__ ('text', function () {
708      var r = '';      var r = '';
709      for (var i = 0; i < this.childNodes.length; i++) {      for (var i = 0; i < this.childNodes.length; i++) {
# Line 332  Line 720 
720        var node = n.childNodes[i];        var node = n.childNodes[i];
721        if (node instanceof JSElement) {        if (node instanceof JSElement) {
722          r += '| ' + indent + node.localName + '\n';          r += '| ' + indent + node.localName + '\n';
723            if (node.async) r += '| ' + indent + '  async=""\n';
724            if (node.defer) r += '| ' + indent + '  defer=""\n';
725            if (node.src != null) {
726              r += '| ' + indent + '  src="' + node.src + '"\n';
727            }
728          r += dumpTree (node, indent + '  ');          r += dumpTree (node, indent + '  ');
729        } else if (node instanceof JSText) {        } else if (node instanceof JSText) {
730          r += '| ' + indent + '"' + node.data + '"\n';          r += '| ' + indent + '"' + node.data + '"\n';
# Line 345  Line 738 
738  </head>  </head>
739  <body onload="  <body onload="
740    document.sourceElement = document.getElementsByTagName ('textarea')[0];    document.sourceElement = document.getElementsByTagName ('textarea')[0];
741    
742      var q = location.search;
743      if (q != null) {
744        q = q.substring (1).split (/;/);
745        for (var i = 0; i < q.length; i++) {
746          var v = q[i].split (/=/, 2);
747          v[0] = decodeURIComponent (v[0]);
748          v[1] = decodeURIComponent (v[1] || '');
749          if (v[0] == 's') {
750            document.sourceElement.value = v[1];
751          }
752        }
753      }
754    
755    document.logElement = document.getElementsByTagName ('output')[0];    document.logElement = document.getElementsByTagName ('output')[0];
756    update ();    update ();
757  ">  ">
758    <h1>Live Scripting <abbr title="Hypertext Markup Language">HTML</abbr>
759    Parser</h1>
760    
761  <textarea onchange=" update () ">&lt;html>  <h2>Markup to test
762    (<a href=data:, id=permalink rel=bookmark>permalink</a>,
763    <a href="http://software.hixie.ch/utilities/js/live-dom-viewer/"
764        id=ldvlink>Live <abbr title="Document Object Model">DOM</abbr>
765        Viewer</a>)</h2>
766    <p>
767    <textarea onkeydown=" update () " onchange=" update () " oninput=" update () ">&lt;html>
768  &lt;head>&lt;/head>&lt;body>  &lt;head>&lt;/head>&lt;body>
769  &lt;p>  &lt;p>
770  &lt;script>  &lt;script>
771  document.write ('aaaaaaa&lt;/p>\n&lt;script>\ndocument.write("cccccc")\n&lt;/', 'script>\nbbbbbb');  document.write ('aaaaaaa&lt;/p>&lt;script>document.write("cccccc");&lt;/', 'script>bbbbbb');
772  &lt;/script>  &lt;/script>
773  &lt;p>  &lt;p>
774  </textarea>  </textarea>
775    
776  <output></output>  <h2 id=log>Log</h2>
777    <p><output></output>
778    
779    <h2 id=notes>Notes</h2>
780    
781    <p>This is a <em>simplified</em> implementation of
782    <a href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5
783    Parsing Algorithm</a>.  It only implements script-related part of the
784    algorithm.  Especially, this parser:
785    <ul>
786    <li>Does not support <code>DOCTYPE</code> and comment tokens.
787    <li>Does not support entities except for <code>&amp;quot;</code>,
788    <code>&amp;apos;</code>, and <code>&amp;amp;</code> in <code>script</code>
789    <code>src</code> attribute value.
790    <li>Does not support omissions of start or end tags, the <abbr>AAA</abbr>
791    algorithm, and so on.
792    <li>Does not raise parse errors for invalid attribute specifications in start
793    or end tags.
794    <li>Does not support CDATA/PCDATA element other than <code>script</code>.
795    <li>Does not support <code>&lt;!--</code>..<code>--></code> parsing rule
796    in <code>script</code> element.
797    <li>Does not support foreign (SVG or MathML) elements.
798    <li>Only supports <code>script</code> <code>type</code>
799    <code>text/javascript</code>.  <code>type</code> and <code>language</code>
800    attributes are ignored.
801    <li>Only supports limited statements.  It must consist of zero or more
802    of statements looking similar to the following statements, possibly
803    introduced, followed, or separated by white space characters:
804      <ul>
805      <li><code>document.write ("<var>string</var>", ["<var>string</var>", ...]);</code>.
806      <li><code>var s = document.createElement ("script");
807                s.src = "<var>string</var>";
808                document.documentElement.appendChild (s);</code>
809      </ul>
810    Note that strings may be delimited by <code>'</code>s instead of
811    <code>"</code>s.
812    <li>Only supports <code>javascript:</code>
813    <abbr title="Uniform Resourace Identifiers">URI</abbr> scheme in the
814    <code>src</code> attribute of the <code>script</code> element.  In addition,
815    the <abbr title="Uniform Resource Identifiers">URI</abbr> must be conform to
816    the regular expression <code>^javascript:\s*(?:"[^"]*"|'[^']*')\s*$</code>.
817    <li>Only supports <code>\u<var>HHHH</var></code> escapes in JavaScript
818    string literals.
819    <li>Does not handle <i>stop parsing</i> phase correctly if the document is
820    replaced by <code>document.open ()</code> call.  In other word, delayed
821    (deferred or asynchronous) script executions and event firings might be
822    treated in a wrong way if a <code>document.open ()</code> invocation
823    is implicitly done by <code>document.write ()</code> in a delayed script.
824    </ul>
825    
826    <p>For some reason, this parser does not work in browsers that do
827    not support JavaScript 1.5.
828    
829    <!-- TODO: |src| attribute value should refer the value at the time
830    when it is inserted into the document, not the value when the script is
831    executed.  Currently it does not matter, since we don't allow dynamic
832    modification to the |src| content/DOM attribute value yet. -->
833    
834    <!-- TODO: license -->
835    
836  </body>  </body>
837  </html>  </html>

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.12

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24