/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Diff of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.4 by wakaba, Sun Apr 20 12:19:13 2008 UTC revision 1.14 by wakaba, Tue Apr 29 02:50:00 2008 UTC
# Line 1  Line 1 
1  <!DOCTYPE HTML>  <!DOCTYPE HTML>
2  <html lang=en>  <html lang=en>
3  <head>  <head>
4  <title>Demo of HTML5 Parsing Algorithm with Scripting Enabled</title>  <title>Live Scripting HTML Parser</title>
5    <link rel=author href="http://suika.fam.cx/~wakaba/who?">
6    <link rel=license href="http://suika.fam.cx/c/gnu/gpl"
7        title="GNU GPL2 or later">
8  <style>  <style>
9      h1 {
10        margin: 0;
11        font-size: 150%;
12      }
13      h2 {
14        margin: 0;
15        font-size: 100%;
16      }
17      p {
18        margin: 0 1em;
19      }
20    textarea {    textarea {
21       display: block;      width: 100%;
22       width: 80%;      -width: 99%;
23       margin-left: auto;      height: 10em;
      margin-right: auto;  
      min-height: 20em;  
24    }    }
25    output {    output {
26      display: block;      display: block;
# Line 18  Line 30 
30    }    }
31  </style>  </style>
32  <script>  <script>
33      var delayedUpdater = 0;
34    
35    function update () {    function update () {
36      document.logElement.textContent = '';      if (delayedUpdater) {
37      var p = new Parser (new InputStream (document.sourceElement.value));        clearTimeout (delayedUpdater);
38      var doc = p.doc;        delayedUpdater = 0;
39      p.parse ();      }
40      log (dumpTree (doc, ''));      delayedUpdater = setTimeout (update2, 100);
41    } // update    } // update
42    
43      function update2 () {
44        var v = document.sourceElement.value;
45        if (v != document.previousSourceText) {
46          document.previousSourceText = v;
47          document.links['permalink'].href
48              = location.pathname + '?s=' + encodeURIComponent (v);
49          document.links['ldvlink'].href
50              = 'http://software.hixie.ch/utilities/js/live-dom-viewer/?'
51              + encodeURIComponent (v);
52    
53          document.logElement.textContent = '';
54          var p = new Parser (new InputStream (v));
55          var doc = p.doc;
56          p.parse ();
57          
58          log (dumpTree (doc, ''));
59          
60          if (p.hasAsyncScript) {
61            log ('Some script codes are executed asynchronously; it means that the document might be rendered in different ways depending on the network condition and other factors');
62          }
63        }
64      } // update2
65    
66      var logIndentLevel = 0;
67    function log (s) {    function log (s) {
68        for (var i = 0; i < logIndentLevel; i++) {
69          s = '  ' + s;
70        }
71      document.logElement.appendChild (document.createTextNode (s + "\n"));      document.logElement.appendChild (document.createTextNode (s + "\n"));
72    } // log    } // log
73    
# Line 42  Line 83 
83      }      }
84      this.doc = doc;      this.doc = doc;
85      this.openElements = [doc];      this.openElements = [doc];
86      this.in = i;      this.input = i;
87      this.scriptsExecutedAfterParsing = [];      this.scriptsExecutedAfterParsing = [];
88        this.scriptsExecutedSoon = [];
89        this.scriptsExecutedAsynchronously = [];
90    } // Parser    } // Parser
91    
92    Parser.prototype.getNextToken = function () {    Parser.prototype.getNextToken = function () {
93      var p = this;      var p = this;
94      var i = this.in;      var i = this.input;
95      if (this.parseMode == 'script') {      if (this.parseMode == 'cdata') {
96          var tagName = this.endTagName;
97        var token;        var token;
98        if (p.insertionPoint <= 0) {        if (p.insertionPoint <= 0) {
99          return {type: 'abort'};          return {type: 'abort'};
# Line 67  Line 111 
111          return '';          return '';
112        });        });
113        if (token) return token;        if (token) return token;
114        i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function (s) {        var pattern = new RegExp ('^</' + tagName + '>', 'i');
115          i.s = i.s.replace (pattern, function (s) {
116          if (p.insertionPoint < s.length) {          if (p.insertionPoint < s.length) {
117            token = {type: 'abort'};            token = {type: 'abort'};
118            return s;            return s;
119          }          }
120          token = {type: 'end-tag', value: 'script'};          token = {type: 'end-tag', value: tagName};
121          p.insertionPoint -= s.length;          p.insertionPoint -= s.length;
122          return '';          return '';
123        });        });
124        if (token) return token;        if (token) return token;
125          var m;
126          if ((p.insertionPoint < ('</' + tagName).length) &&
127              (m = i.s.match (/^<\/([A-Za-z]+)/))) {
128            var v = m[1].substring (0, p.insertionPoint).toLowerCase ();
129            if (v == tagName.substring (0, p.insertionPoint - '</'.length)) {
130              return {type: 'abort'};
131            }
132          }
133        i.s = i.s.replace (/^</,        i.s = i.s.replace (/^</,
134        function (s) {        function (s) {
135          token = {type: 'char', value: s};          token = {type: 'char', value: s};
# Line 88  Line 141 
141      }      }
142    
143      var token;      var token;
144      i.s = i.s.replace (/^<\/([^>]+)>/, function (s, e) {      i.s = i.s.replace (/^<\/([^>]+)(?:>|$)/, function (s, e) {
145        if (p.insertionPoint < s.length) {        if (p.insertionPoint < s.length ||
146              (p.insertionPoint <= s.length &&
147               s.substring (s.length - 1, 1) != '>')) {
148          token = {type: 'abort'};          token = {type: 'abort'};
149          return s;          return s;
150        }        }
# Line 98  Line 153 
153        return '';        return '';
154      });      });
155      if (token) return token;      if (token) return token;
156      i.s = i.s.replace (/^<([^>]+)>/, function (s, e) {      i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) {
157        if (p.insertionPoint < s.length) {        if (p.insertionPoint < s.length ||
158              (p.insertionPoint <= s.length &&
159               s.substring (s.length - 1, 1) != '>')) {
160          token = {type: 'abort'};          token = {type: 'abort'};
161          return s;          return s;
162        }        }
# Line 109  Line 166 
166          tagName = v.toLowerCase ();          tagName = v.toLowerCase ();
167          return '';          return '';
168        });        });
169        e = e.replace (/^\s*(\S+)\s*(?:=\s*"([^"]*)"|'([^']*)'|([^"']+))?/,        while (true) {
170        function (x, attrName, attrValue1, attrValue2, attrValue3) {          var m = false;
171          attrs[attrName] = attrValue1 || attrValue2 || attrValue3;          e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"'\s]*)))?/,
172          return '';          function (x, attrName, attrValue1, attrValue2, attrValue3) {
173        });            v = attrValue1 || attrValue2 || attrValue3;
174              v = v.replace (/&quot;/g, '"').replace (/&apos;/g, "'")
175                  .replace (/&amp;/g, '&');
176              attrs[attrName.toLowerCase ()] = v;
177              m = true;
178              return '';
179            });
180            if (!m) break;
181          }
182          if (e.length) {
183            log ('Broken start tag: "' + e + '"');
184          }
185        token = {type: 'start-tag', value: tagName, attrs: attrs};        token = {type: 'start-tag', value: tagName, attrs: attrs};
186        p.insertionPoint -= s.length;        p.insertionPoint -= s.length;
187        return '';        return '';
# Line 144  Line 212 
212    } // getNextToken    } // getNextToken
213    
214    Parser.prototype.parse = function () {    Parser.prototype.parse = function () {
215      log ('start parsing');      logIndentLevel++;
216        log ('parse: start');
217    
218      while (true) {      while (true) {
219        var token = this.getNextToken ();        var token = this.getNextToken ();
# Line 162  Line 231 
231            el.manakaiParserInserted = true;            el.manakaiParserInserted = true;
232    
233            // 3. Switch the tokeniser's content model flag to the CDATA state.            // 3. Switch the tokeniser's content model flag to the CDATA state.
234            this.parseMode = 'script';            this.parseMode = 'cdata';
235              this.endTagName = 'script';
236    
237            // 4.1. Collect all the character tokens.            // 4.1. Collect all the character tokens.
238            while (true) {            while (true) {
# Line 176  Line 246 
246              // 4.2. Until it returns a token that is not a character token, or              // 4.2. Until it returns a token that is not a character token, or
247              // until it stops tokenising.              // until it stops tokenising.
248              } else if (token.type == 'eof' ||              } else if (token.type == 'eof' ||
249                         (token.type == 'end-tag' && token.value == 'script') ||                         token.type == 'end-tag' ||
250                         token.type == 'abort') {                         token.type == 'abort') {
251                // 6. Switched back to the PCDATA state.                // 6. Switched back to the PCDATA state.
252                this.parseMode = 'pcdata';                this.parseMode = 'pcdata';
253    
254                // 7.1. If the next token is not an end tag token with ...                // 7.1. If the next token is not an end tag token with ...
255                if (token.type != 'end-tag') {                if (!(token.type == 'end-tag' && token.value == 'script')) {
256                  // 7.2. This is a parse error.                  // 7.2. This is a parse error.
257                  log ('Parse error: no </' + 'script>');                  log ('Parse error: no </' + 'script>');
258    
# Line 212  Line 282 
282            this.openElements[this.openElements.length - 1].appendChild (el);            this.openElements[this.openElements.length - 1].appendChild (el);
283    
284            // 11. Let the insertion point have the value of the old ...            // 11. Let the insertion point have the value of the old ...
285    
286              oldInsertionPoint += this.insertionPoint;
287            this.setInsertionPoint (oldInsertionPoint);            this.setInsertionPoint (oldInsertionPoint);
288    
289            // 12. If there is a script that will execute as soon as ...            // 12. If there is a script that will execute as soon as ...
290                        while (this.scriptExecutedWhenParserResumes) {
291                // 12.1. If the tree construction stage is being called reentrantly
292                if (this.reentrant) {
293                  log ('parse: abort (reentrance)');
294                  logIndentLevel--;
295                  return;
296    
297                // 12.2. Otherwise
298                } else {
299                  // 1.
300                  var script = this.scriptExecutedWhenParserResumes;
301                  this.scriptExecutedWhenParserResumes = null;
302    
303                  // 2. Pause until the script has completed loading.
304                  //
305    
306                  // 3. Let the insertion point to just before the next input char.
307                  this.setInsertionPoint (0);
308    
309                  // 4. Execute the script.
310                  executeScript (this.doc, script);
311    
312                  // 5. Let the insertion point be undefined again.
313                  this.setInsertionPoint (undefined);
314    
315                  // 6. If there is once again a script that will execute ...
316                  //
317                }
318              }
319            } else if (token.value == 'style' ||
320                       token.value == 'noscript' ||
321                       token.value == 'xmp') {
322              // 1. Create an element for the token in the HTML namespace.
323              var el = new JSElement (this.doc, token.value);
324    
325              // 2. Append the new element to the current node.
326              this.openElements[this.openElements.length - 1].appendChild (el);
327    
328              // 3. Switch the tokeniser's content model flag to the CDATA state.
329              this.parseMode = 'cdata';
330              this.endTagName = token.value;
331    
332              // 4.1. Collect all the character tokens.
333              while (true) {
334                var token = this.getNextToken ();
335                log ('token: ' + token.type + ' "' + token.value + '"');
336    
337                if (token.type == 'char') {
338                  // 5. Append a single Text node to the script element node.
339                  el.manakaiAppendText (token.value);
340    
341                // 4.2. Until it returns a token that is not a character token, or
342                // until it stops tokenising.
343                } else if (token.type == 'eof' ||
344                           token.type == 'end-tag' ||
345                           token.type == 'abort') {
346                  // 6. Switched back to the PCDATA state.
347                  this.parseMode = 'pcdata';
348    
349                  // 7.1. If the next token is not an end tag token with ...
350                  if (!(token.type == 'end-tag' &&
351                        token.value == this.endTagName)) {
352                    // 7.2. This is a parse error.
353                    log ('Parse error: no </' + this.endTagName + '>');
354    
355                    // 7.3. Mark the script element as "already executed".
356                    el.manakaiAlreadyExecuted = true;
357                  } else {
358                    // 7.4. Ignore it.
359                    //
360                  }
361                  break;
362                }
363              }
364          } else {          } else {
365            var el = new JSElement (this.doc, token.value);            var el = new JSElement (this.doc, token.value);
366            this.openElements[this.openElements.length - 1].appendChild (el);            this.openElements[this.openElements.length - 1].appendChild (el);
# Line 236  Line 380 
380          break;          break;
381        } else if (token.type == 'abort') {        } else if (token.type == 'abort') {
382          log ('parse: abort');          log ('parse: abort');
383            logIndentLevel--;
384          return;          return;
385        }        }
386      }      }
# Line 246  Line 391 
391    
392      // "When a script completes loading" rules start applying.      // "When a script completes loading" rules start applying.
393    
394      // TODO: Handles "list of scripts that will execute as soon as possible"      while (this.scriptsExecutedSoon.length > 0 ||
395      // and "list of scripts that will execute asynchronously"             this.scriptsExecutedAsynchronously.length > 0) {
396          // Handle "list of scripts that will execute as soon as possible".
397          while (this.scriptsExecutedSoon.length > 0) {
398            var e = this.scriptsExecutedSoon.shift ();
399      
400            // If it has completed loading
401            log ('Execute an external script not inserted by parser...');
402            executeScript (this.doc, e);
403    
404            // NOTE: It MAY be executed before the end of the parsing, according
405            // to the spec.
406            this.hasAsyncScript = true;
407          }
408    
409          // Handle "list of scripts that will execute asynchronously".
410          while (this.scriptsExecutedAsynchronously.length > 0) {
411            var e = this.scriptsExecutedAsynchronously.shift ();
412    
413            // Step 1.
414            // We assume that all scripts have been loaded at this time.
415      
416            // Step 2.
417            log ('Execute an asynchronous script...');
418            executeScript (this.doc, e);
419    
420            // Step 3.
421            //
422    
423            // Step 4.
424            //
425    
426            this.hasAsyncScript = true;
427          }
428        }
429    
430      // Handle "list of scripts that will execute when the document has finished      // Handle "list of scripts that will execute when the document has finished
431      // parsing".      // parsing".
# Line 267  Line 445 
445    
446      log ('DOMContentLoaded event fired');      log ('DOMContentLoaded event fired');
447    
448      // "delays tha load event" things has completed:      // "delays the load event" things has completed:
449      // readyState = 'complete'      // readyState = 'complete'
450      log ('load event fired');      log ('load event fired');
451    
452        logIndentLevel--;
453    } // parse    } // parse
454    
455    Parser.prototype.setInsertionPoint = function (ip) {    Parser.prototype.setInsertionPoint = function (ip) {
456      if (ip == undefined || ip == null || isNaN (ip)) {      if (ip == undefined || ip == null || isNaN (ip)) {
457        log ('insertion point: set to undefined');        log ('insertion point: set to undefined');
458        this.insertionPoint = undefined;        this.insertionPoint = undefined;
459      } else if (ip == this.in.s.length) {      } else if (ip == this.input.s.length) {
460        log ('insertion point: end of file');        log ('insertion point: end of file');
461        this.insertionPoint = ip;        this.insertionPoint = ip;
462      } else {      } else {
463        log ('insertion point: set to ' + ip +        log ('insertion point: set to ' + ip +
464             ' (before "' + this.in.s.substring (0, 10) + '")');             ' (before "' + this.input.s.substring (0, 10) + '")');
465        this.insertionPoint = ip;        this.insertionPoint = ip;
466      }      }
467    }; // setInsertionPoint    }; // setInsertionPoint
# Line 303  Line 483 
483      e.parentNode = this;      e.parentNode = this;
484    
485      if (e.localName == 'script') {      if (e.localName == 'script') {
486          logIndentLevel++;
487        log ('Running a script: start');        log ('Running a script: start');
488    
489        var doc = this.ownerDocument || this;        var doc = this.ownerDocument || this;
# Line 321  Line 502 
502        if (e.manakaiAlreadyExecuted) {        if (e.manakaiAlreadyExecuted) {
503          // 2.5. Abort these steps at this point.          // 2.5. Abort these steps at this point.
504          log ('Running a script: aborted');          log ('Running a script: aborted');
505            logIndentLevel--;
506          return e;          return e;
507        }        }
508    
# Line 338  Line 520 
520          p.scriptsExecutedAfterParsing.push (e);          p.scriptsExecutedAfterParsing.push (e);
521          log ('Running a script: aborted (defer)');          log ('Running a script: aborted (defer)');
522        } else if (e.async && e.src != null) {        } else if (e.async && e.src != null) {
523          // TODO          p.scriptsExecutedAsynchronously.push (e);
524        } else if (e.async && e.src == null          log ('Running a script: aborted (async src)');
525                   /* && list of scripts that will execute asynchronously is not empty */) {        } else if (e.async && e.src == null &&
526          // TODO                   p.scriptsExecutedAsynchronously.length > 0) {
527            p.scriptsExecutedAsynchronously.push (e);
528            log ('Running a script: aborted (async)');
529            // ISSUE: What is the difference with the case above?
530        } else if (e.src != null && e.manakaiParserInserted) {        } else if (e.src != null && e.manakaiParserInserted) {
531          // TODO          if (p.scriptExecutedWhenParserResumes) {
532              log ('Error: There is a script that will execute as soon as the parser resumes.');
533            }
534            p.scriptExecutedWhenParserResumes = e;
535            log ('Running a script: aborted (src parser-inserted)');
536        } else if (e.src != null) {        } else if (e.src != null) {
537          // TODO          p.scriptsExecutedSoon.push (e);
538            log ('Running a script: aborted (src)');
539        } else {        } else {
540          executeScript (doc, e); // even if other scripts are already executing.          executeScript (doc, e); // even if other scripts are already executing.
541        }        }
542    
543        log ('Running a script: end');        log ('Running a script: end');
544          logIndentLevel--;
545      }      }
546    
547      return e;      return e;
# Line 359  Line 550 
550    function executeScript (doc, e) {    function executeScript (doc, e) {
551      log ('executing a script block: start');      log ('executing a script block: start');
552    
553      // If the load resulted in an error, then ... firing an error event ...      var s;
554        if (e.src != null) {
555          s = getExternalScript (e.src);
556    
557          // If the load resulted in an error, then ... firing an error event ...
558          if (s == null) {
559            log ('error event fired at the script element');
560            return;
561          }
562    
563          log ('External script loaded: "' + s + '"');
564        } else {
565          s = e.text;
566        }
567    
568      // If the load was successful      // If the load was successful
569      log ('load event fired at the script element');      log ('load event fired at the script element');
# Line 368  Line 572 
572      // Scripting is enabled, Document.designMode is disabled,      // Scripting is enabled, Document.designMode is disabled,
573      // Document is the active document in its browsing context      // Document is the active document in its browsing context
574    
       var s;  
       if (e.src != null) {  
         // TODO: from external file  
       } else {  
         s = e.text;  
       }  
   
575        parseAndRunScript (doc, s);        parseAndRunScript (doc, s);
576      }      }
577    
578      log ('executing a script block: end');      log ('executing a script block: end');
579    } // executeScript    } // executeScript
580    
581      function getExternalScript (uri) {
582        if (uri.match (/^javascript:/i)) {
583          var m;
584          if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) {
585            if (m[1]) {
586              return unescapeJSLiteral (m[1]);
587            } else if (m[2]) {
588              return unescapeJSLiteral (m[2]);
589            } else {
590              return null;
591            }
592          } else {
593            log ('Complex javascript: URI is not supported: <' + uri + '>');
594            return null;
595          }
596        } else {
597          log ('URI scheme not supported: <' + uri + '>');
598          return null;
599        }
600      } // getExternalScript
601    
602    function parseAndRunScript (doc, s) {    function parseAndRunScript (doc, s) {
603      while (true) {      while (true) {
604        var matched = false;        var matched = false;
# Line 388  Line 606 
606          matched = true;          matched = true;
607          var args = [];          var args = [];
608          t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {          t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
609            args.push (v.substring (1, v.length - 1));            args.push (unescapeJSLiteral (v.substring (1, v.length - 1)));
610            return '';            return '';
611          });          });
612          doc.write.apply (doc, args);          doc.write.apply (doc, args);
613          return '';          return '';
614        });        });
615          s = s.replace (/^\s*var\s+s\s*=\s*document\.createElement\s*\(\s*['"]script['"]\s*\)\s*;\s*s\.src\s*=\s*(?:'(javascript:[^']*)'|"(javascript:[^"]*)")\s*;\s*document\.documentElement\.appendChild\s*\(\s*s\s*\)\s*;\s*/,
616          function (s, t, u) {
617            matched = true;
618            var args = [unescapeJSLiteral (t ? t : u)];
619            doc._insertExternalScript.apply (doc, args);
620            return '';
621          });
622        if (s == '') break;        if (s == '') break;
623        if (!matched) {        if (!matched) {
624          log ('Script parse error: "' + s + '"');          log ('Script parse error: "' + s + '"');
# Line 402  Line 627 
627      }      }
628    } // parseAndRunScript    } // parseAndRunScript
629    
630      function unescapeJSLiteral (s) {
631        return s.replace (/\\u([0-9A-Fa-f]{4})/g, function (t, v) {
632          return String.fromCharCode (parseInt ('0x' + v));
633        });
634      } // unescapeJSLiteral
635    
636    function JSText (data) {    function JSText (data) {
637      this.data = data;      this.data = data;
638    } // JSText    } // JSText
# Line 429  Line 660 
660      // Step 3.      // Step 3.
661      if (this._parser &&      if (this._parser &&
662          !this._parser.scriptCreated &&          !this._parser.scriptCreated &&
663          this._parser.in.insertionPoint != undefined) {          this._parser.input.insertionPoint != undefined) {
664        log ('document.open () in parsing mode is ignored');        log ('document.open () in parsing mode is ignored');
665        return this;        return this;
666      }      }
# Line 463  Line 694 
694      }      }
695    
696      // Step 11.      // Step 11.
697      this._parser.setInsertionPoint (this._parser.in.s.length);      this._parser.setInsertionPoint (this._parser.input.s.length);
698    
699      // Step 12.      // Step 12.
700      return this;      return this;
701    }; // document.open    }; // document.open
702    
703    JSDocument.prototype.write = function () {    JSDocument.prototype.write = function () {
704        logIndentLevel++;
705    
706      var p = this._parser;      var p = this._parser;
707    
708      // 1. If the insertion point is undefined, the open() method must be ...      // 1. If the insertion point is undefined, the open() method must be ...
# Line 481  Line 714 
714      // 2. ... inserted into the input stream just before the insertion point.      // 2. ... inserted into the input stream just before the insertion point.
715      var s = Array.join (arguments, '');      var s = Array.join (arguments, '');
716      log ('document.write: insert "' + s + '"' +      log ('document.write: insert "' + s + '"' +
717           ' before "' + p.in.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');           ' before "' +
718      p.in.s = p.in.s.substring (0, p.insertionPoint) + s           p.input.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
719          + p.in.s.substring (p.insertionPoint, p.in.s.length);      p.input.s = p.input.s.substring (0, p.insertionPoint) + s
720            + p.input.s.substring (p.insertionPoint, p.input.s.length);
721      p.insertionPoint += s.length;      p.insertionPoint += s.length;
722    
723      // 3. If there is a script that will execute as soon as the parser resumes      // 3. If there is a script that will execute as soon as the parser resumes
724      // TODO      if (p.scriptExecutedAfterParserResumes) {
725          log ('document.write: processed later (there is an unprocessed <script src>)');
726          logIndentLevel--;
727          return;
728        }
729    
730      // 4. Process the characters that were inserted, ...      // 4. Process the characters that were inserted, ...
731        var originalReentrant = p.reentrant;
732        p.reentrant = true;
733      p.parse ();      p.parse ();
734        p.reentrant = originalReentrant;
735        // TODO: "Abort the processing of any nested invokations of the tokeniser,
736        // yielding control back to the caller." (<script> parsing).  Do we need
737        // to do something here?
738    
739      // 5. Return      // 5. Return
740      log ('document.write: return');      log ('document.write: return');
741    
742        logIndentLevel--;
743      return;      return;
744    }; // document.write    }; // document.write
745    
746      JSDocument.prototype._insertExternalScript = function (uri) {
747        var s = new JSElement (this, 'script');
748        s.src = uri;
749        this.documentElement.appendChild (s);
750      }; // _insertExternalScript
751    
752      JSDocument.prototype.__defineGetter__ ('documentElement', function () {
753        var cn = this.childNodes;
754        for (var i = 0; i < cn.length; i++) {
755          if (cn[i] instanceof JSElement) {
756            return cn[i]
757          }
758        }
759        return null;
760      });
761    
762    JSElement.prototype.__defineGetter__ ('text', function () {    JSElement.prototype.__defineGetter__ ('text', function () {
763      var r = '';      var r = '';
764      for (var i = 0; i < this.childNodes.length; i++) {      for (var i = 0; i < this.childNodes.length; i++) {
# Line 515  Line 777 
777          r += '| ' + indent + node.localName + '\n';          r += '| ' + indent + node.localName + '\n';
778          if (node.async) r += '| ' + indent + '  async=""\n';          if (node.async) r += '| ' + indent + '  async=""\n';
779          if (node.defer) r += '| ' + indent + '  defer=""\n';          if (node.defer) r += '| ' + indent + '  defer=""\n';
780          if (node.src) r += '| ' + indent + '  src="' + node.src + '"\n';          if (node.src != null) {
781              r += '| ' + indent + '  src="' + node.src + '"\n';
782            }
783          r += dumpTree (node, indent + '  ');          r += dumpTree (node, indent + '  ');
784        } else if (node instanceof JSText) {        } else if (node instanceof JSText) {
785          r += '| ' + indent + '"' + node.data + '"\n';          r += '| ' + indent + '"' + node.data + '"\n';
# Line 529  Line 793 
793  </head>  </head>
794  <body onload="  <body onload="
795    document.sourceElement = document.getElementsByTagName ('textarea')[0];    document.sourceElement = document.getElementsByTagName ('textarea')[0];
796    
797      var q = location.search;
798      if (q != null) {
799        q = q.substring (1).split (/;/);
800        for (var i = 0; i < q.length; i++) {
801          var v = q[i].split (/=/, 2);
802          v[0] = decodeURIComponent (v[0]);
803          v[1] = decodeURIComponent (v[1] || '');
804          if (v[0] == 's') {
805            document.sourceElement.value = v[1];
806          }
807        }
808      }
809    
810    document.logElement = document.getElementsByTagName ('output')[0];    document.logElement = document.getElementsByTagName ('output')[0];
811    update ();    update ();
812  ">  ">
813    <h1>Live Scripting <abbr title="Hypertext Markup Language">HTML</abbr>
814    Parser</h1>
815    
816  <textarea onchange=" update () ">&lt;html>  <h2>Markup to test
817    (<a href=data:, id=permalink rel=bookmark>permalink</a>,
818    <a href="http://software.hixie.ch/utilities/js/live-dom-viewer/"
819        id=ldvlink>Live <abbr title="Document Object Model">DOM</abbr>
820        Viewer</a>)</h2>
821    <p>
822    <textarea onkeydown=" update () " onchange=" update () " oninput=" update () ">&lt;html>
823  &lt;head>&lt;/head>&lt;body>  &lt;head>&lt;/head>&lt;body>
824  &lt;p>  &lt;p>
825  &lt;script>  &lt;script>
# Line 542  document.write ('aaaaaaa&lt;/p>&lt;scrip Line 828  document.write ('aaaaaaa&lt;/p>&lt;scrip
828  &lt;p>  &lt;p>
829  </textarea>  </textarea>
830    
831  <output></output>  <h2 id=log>Log</h2>
832    <p><output></output>
833    
834    <h2 id=notes>Notes</h2>
835    
836    <p>This is a <em>simplified</em> implementation of
837    <a href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5
838    Parsing Algorithm</a>.  It only implements script-related part of the
839    algorithm.  Especially, this parser:
840    <ul>
841    <li>Does not support <code>DOCTYPE</code> and comment tokens.
842    <li>Does not support entities except for <code>&amp;quot;</code>,
843    <code>&amp;apos;</code>, and <code>&amp;amp;</code> in <code>script</code>
844    <code>src</code> attribute value.
845    <li>Does not support omissions of start or end tags, the <abbr>AAA</abbr>
846    algorithm, and so on.
847    <li>Does not raise parse errors for invalid attribute specifications in start
848    or end tags.
849    <li>Does not support PCDATA elements (<code>title</code> and
850    <code>textarea</code>).
851    <li>Does not strip the first newline in <code>pre</code> elements.
852    <li>Does not support <code>&lt;!--</code>..<code>--></code> parsing rule
853    in <code>script</code> element.
854    <li>Does not support foreign (SVG or MathML) elements.
855    <li>Only supports <code>script</code> <code>type</code>
856    <code>text/javascript</code>.  <code>type</code> and <code>language</code>
857    attributes are ignored.
858    <li>Only supports limited statements.  It must consist of zero or more
859    of statements looking similar to the following statements, possibly
860    introduced, followed, or separated by white space characters:
861      <ul>
862      <li><code>document.write ("<var>string</var>", ["<var>string</var>", ...]);</code>.
863      <li><code>var s = document.createElement ("script");
864                s.src = "<var>string</var>";
865                document.documentElement.appendChild (s);</code>
866      </ul>
867    Note that strings may be delimited by <code>'</code>s instead of
868    <code>"</code>s.
869    <li>Only supports <code>javascript:</code>
870    <abbr title="Uniform Resourace Identifiers">URI</abbr> scheme in the
871    <code>src</code> attribute of the <code>script</code> element.  In addition,
872    the <abbr title="Uniform Resource Identifiers">URI</abbr> must be conform to
873    the regular expression <code>^javascript:\s*(?:"[^"]*"|'[^']*')\s*$</code>.
874    <li>Only supports <code>\u<var>HHHH</var></code> escapes in JavaScript
875    string literals.
876    <li>Does not handle <i>stop parsing</i> phase correctly if the document is
877    replaced by <code>document.open ()</code> call.  In other word, delayed
878    (deferred or asynchronous) script executions and event firings might be
879    treated in a wrong way if a <code>document.open ()</code> invocation
880    is implicitly done by <code>document.write ()</code> in a delayed script.
881    </ul>
882    
883    <p>For some reason, this parser does not work in browsers that do
884    not support JavaScript 1.5.
885    
886    <!-- TODO: |src| attribute value should refer the value at the time
887    when it is inserted into the document, not the value when the script is
888    executed.  Currently it does not matter, since we don't allow dynamic
889    modification to the |src| content/DOM attribute value yet. -->
890    
891  </body>  </body>
 </html>  
892    </html>
893    <!-- $Date$ -->
894    <!--
895    
896    Copyright 2008 Wakaba <w@suika.fam.cx>
897    
898    This program is free software; you can redistribute it and/or
899    modify it under the terms of the GNU General Public License
900    as published by the Free Software Foundation; either version 2
901    of the License, or (at your option) any later version.
902    
903    This program is distributed in the hope that it will be useful,
904    but WITHOUT ANY WARRANTY; without even the implied warranty of
905    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
906    GNU General Public License for more details.
907    
908    You should have received a copy of the GNU General Public License
909    along with this program; if not, write to the Free Software
910    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
911    
912    -->

Legend:
Removed from v.1.4  
changed lines
  Added in v.1.14

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24