/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Diff of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.6 by wakaba, Fri Apr 25 13:42:51 2008 UTC revision 1.16 by wakaba, Tue Apr 29 04:07:18 2008 UTC
# Line 1  Line 1 
1  <!DOCTYPE HTML>  <!DOCTYPE HTML>
2  <html lang=en>  <html lang=en>
3  <head>  <head>
4  <title>Demo of HTML5 Parsing Algorithm with Scripting Enabled</title>  <title>Live Scripting HTML Parser</title>
5    <link rel=author href="http://suika.fam.cx/~wakaba/who?">
6    <link rel=license href="http://suika.fam.cx/c/gnu/gpl"
7        title="GNU GPL2 or later">
8  <style>  <style>
9      h1 {
10        margin: 0;
11        font-size: 150%;
12      }
13      h2 {
14        margin: 0;
15        font-size: 100%;
16      }
17      p {
18        margin: 0 1em;
19      }
20    textarea {    textarea {
21       display: block;      width: 100%;
22       width: 80%;      -width: 99%;
23       margin-left: auto;      height: 10em;
      margin-right: auto;  
      min-height: 20em;  
24    }    }
25    output {    output {
26      display: block;      display: block;
# Line 18  Line 30 
30    }    }
31  </style>  </style>
32  <script>  <script>
33      var delayedUpdater = 0;
34    
35    function update () {    function update () {
36      document.logElement.textContent = '';      if (delayedUpdater) {
37      var p = new Parser (new InputStream (document.sourceElement.value));        clearTimeout (delayedUpdater);
38      var doc = p.doc;        delayedUpdater = 0;
39      p.parse ();      }
40      log (dumpTree (doc, ''));      delayedUpdater = setTimeout (update2, 100);
41    } // update    } // update
42    
43      function update2 () {
44        var v = document.sourceElement.value;
45        if (v != document.previousSourceText) {
46          document.previousSourceText = v;
47          document.links['permalink'].href
48              = location.pathname + '?s=' + encodeURIComponent (v);
49          document.links['ldvlink'].href
50              = 'http://software.hixie.ch/utilities/js/live-dom-viewer/?'
51              + encodeURIComponent (v);
52    
53          document.logElement.textContent = '';
54          var p = new Parser (new InputStream (v));
55          var doc = p.doc;
56          p.parse ();
57          
58          log (dumpTree (doc, ''));
59          
60          if (p.hasAsyncScript) {
61            log ('Some script codes are executed asynchronously; it means that the document might be rendered in different ways depending on the network condition and other factors');
62          }
63        }
64      } // update2
65    
66    var logIndentLevel = 0;    var logIndentLevel = 0;
67    function log (s) {    function log (s) {
68        var indent = '';
69      for (var i = 0; i < logIndentLevel; i++) {      for (var i = 0; i < logIndentLevel; i++) {
70        s = '  ' + s;        indent += '  ';
71      }      }
72        s = indent + s.replace (/\n/g, "\n" + indent);
73      document.logElement.appendChild (document.createTextNode (s + "\n"));      document.logElement.appendChild (document.createTextNode (s + "\n"));
74    } // log    } // log
75    
# Line 44  Line 83 
83        doc = new JSDocument (this);        doc = new JSDocument (this);
84        doc.manakaiIsHTML = true;        doc.manakaiIsHTML = true;
85      }      }
86        this.nextToken = [];
87      this.doc = doc;      this.doc = doc;
88      this.openElements = [doc];      this.openElements = [doc];
89      this.in = i;      this.input = i;
90      this.scriptsExecutedAfterParsing = [];      this.scriptsExecutedAfterParsing = [];
91        this.scriptsExecutedSoon = [];
92        this.scriptsExecutedAsynchronously = [];
93    } // Parser    } // Parser
94    
95    Parser.prototype.getNextToken = function () {    Parser.prototype.getNextToken = function () {
96        if (this.nextToken.length) {
97          return this.nextToken.shift ();
98        }
99    
100      var p = this;      var p = this;
101      var i = this.in;      var i = this.input;
102      if (this.parseMode == 'script') {      if (this.parseMode == 'cdata') {
103          var tagName = this.endTagName;
104        var token;        var token;
105        if (p.insertionPoint <= 0) {        if (p.insertionPoint <= 0) {
106          return {type: 'abort'};          return {type: 'abort'};
# Line 71  Line 118 
118          return '';          return '';
119        });        });
120        if (token) return token;        if (token) return token;
121        i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function (s) {        var pattern = new RegExp ('^</' + tagName + '>', 'i');
122          i.s = i.s.replace (pattern, function (s) {
123          if (p.insertionPoint < s.length) {          if (p.insertionPoint < s.length) {
124            token = {type: 'abort'};            token = {type: 'abort'};
125            return s;            return s;
126          }          }
127          token = {type: 'end-tag', value: 'script'};          token = {type: 'end-tag', value: tagName};
128          p.insertionPoint -= s.length;          p.insertionPoint -= s.length;
129          return '';          return '';
130        });        });
131        if (token) return token;        if (token) return token;
132        var m;        var m;
133        if ((p.insertionPoint < '</script'.length) &&        if ((p.insertionPoint < ('</' + tagName).length) &&
134            (m = i.s.match (/^<\/([SCRIPTscript]+)/))) {            (m = i.s.match (/^<\/([A-Za-z]+)/))) {
135          var v = m[1].substring (0, p.insertionPoint).toLowerCase ();          var v = m[1].substring (0, p.insertionPoint).toLowerCase ();
136          if (v == 'script'.substring (0, p.insertionPoint - '</'.length)) {          if (v == tagName.substring (0, p.insertionPoint - '</'.length)) {
137            return {type: 'abort'};            return {type: 'abort'};
138          }          }
139        }        }
# Line 103  Line 151 
151      i.s = i.s.replace (/^<\/([^>]+)(?:>|$)/, function (s, e) {      i.s = i.s.replace (/^<\/([^>]+)(?:>|$)/, function (s, e) {
152        if (p.insertionPoint < s.length ||        if (p.insertionPoint < s.length ||
153            (p.insertionPoint <= s.length &&            (p.insertionPoint <= s.length &&
154             s.substring (s.length - 1, 1) != '>')) {             s.substring (s.length - 1, s.length) != '>')) {
155          token = {type: 'abort'};          token = {type: 'abort'};
156          return s;          return s;
157        }        }
# Line 115  Line 163 
163      i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) {      i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) {
164        if (p.insertionPoint < s.length ||        if (p.insertionPoint < s.length ||
165            (p.insertionPoint <= s.length &&            (p.insertionPoint <= s.length &&
166             s.substring (s.length - 1, 1) != '>')) {             s.substring (s.length - 1, s.length) != '>')) {
167          token = {type: 'abort'};          token = {type: 'abort'};
168          return s;          return s;
169        }        }
# Line 125  Line 173 
173          tagName = v.toLowerCase ();          tagName = v.toLowerCase ();
174          return '';          return '';
175        });        });
176        e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"']+)))?/,        while (true) {
177        function (x, attrName, attrValue1, attrValue2, attrValue3) {          var m = false;
178          v = attrValue1 || attrValue2 || attrValue3;          e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"'\s]*)))?/,
179          v = v.replace (/&quot;/g, '"').replace (/&apos;/g, "'")          function (x, attrName, attrValue1, attrValue2, attrValue3) {
180              .replace (/&amp;/g, '&');            v = attrValue1 || attrValue2 || attrValue3;
181          attrs[attrName.toLowerCase ()] = v;            v = v.replace (/&quot;/g, '"').replace (/&apos;/g, "'")
182          return '';                .replace (/&amp;/g, '&');
183        });            attrs[attrName.toLowerCase ()] = v;
184              m = true;
185              return '';
186            });
187            if (!m) break;
188          }
189        if (e.length) {        if (e.length) {
190          log ('Broken start tag: "' + e + '"');          log ('Broken start tag: "' + e + '"');
191        }        }
# Line 173  Line 226 
226        var token = this.getNextToken ();        var token = this.getNextToken ();
227        log ('token: ' + token.type + ' "' + token.value + '"');        log ('token: ' + token.type + ' "' + token.value + '"');
228    
229          if (this.cdataEndTagRequired) {
230            // Generic CDATA parsing algorithm
231    
232            if (token.type != 'abort') {
233              // 7.
234              if (token.type == 'end-tag' && token.value == this.endTagName) {
235                // 7.1. Ignores it.
236                //
237              } else {
238                // 7.2. Parse error.
239                log ('Parse error: no </' + this.endTagName + '>');
240                this.nextToken.unshift (token);
241              }
242              this.cdataEndTagRequired = false;
243              continue;
244            }
245          }
246    
247        if (token.type == 'start-tag') {        if (token.type == 'start-tag') {
248          if (token.value == 'script') {          if (token.value == 'script') {
249            // 1. Create an element for the token in the HTML namespace.            // 1. Create an element for the token in the HTML namespace.
# Line 185  Line 256 
256            el.manakaiParserInserted = true;            el.manakaiParserInserted = true;
257    
258            // 3. Switch the tokeniser's content model flag to the CDATA state.            // 3. Switch the tokeniser's content model flag to the CDATA state.
259            this.parseMode = 'script';            this.parseMode = 'cdata';
260              this.endTagName = 'script';
261    
262            // 4.1. Collect all the character tokens.            // 4.1. Collect all the character tokens.
263            while (true) {            while (true) {
# Line 199  Line 271 
271              // 4.2. Until it returns a token that is not a character token, or              // 4.2. Until it returns a token that is not a character token, or
272              // until it stops tokenising.              // until it stops tokenising.
273              } else if (token.type == 'eof' ||              } else if (token.type == 'eof' ||
274                         (token.type == 'end-tag' && token.value == 'script') ||                         token.type == 'end-tag' ||
275                         token.type == 'abort') {                         token.type == 'abort') {
276                // 6. Switched back to the PCDATA state.                // 6. Switched back to the PCDATA state.
277                this.parseMode = 'pcdata';                this.parseMode = 'pcdata';
278    
279                // 7.1. If the next token is not an end tag token with ...                // 7.1. If the next token is not an end tag token with ...
280                if (token.type != 'end-tag') {                if (!(token.type == 'end-tag' && token.value == 'script')) {
281                  // 7.2. This is a parse error.                  // 7.2. This is a parse error.
282                  log ('Parse error: no </' + 'script>');                  log ('Parse error: no </' + 'script>');
283                    this.nextToken.unshift (token);
284    
285                  // 7.3. Mark the script element as "already executed".                  // 7.3. Mark the script element as "already executed".
286                  el.manakaiAlreadyExecuted = true;                  el.manakaiAlreadyExecuted = true;
# Line 235  Line 308 
308            this.openElements[this.openElements.length - 1].appendChild (el);            this.openElements[this.openElements.length - 1].appendChild (el);
309    
310            // 11. Let the insertion point have the value of the old ...            // 11. Let the insertion point have the value of the old ...
311    
312            oldInsertionPoint += this.insertionPoint;            oldInsertionPoint += this.insertionPoint;
313            this.setInsertionPoint (oldInsertionPoint);            this.setInsertionPoint (oldInsertionPoint);
314    
# Line 268  Line 342 
342                //                //
343              }              }
344            }            }
345            } else if (token.value == 'style' ||
346                       token.value == 'noscript' ||
347                       token.value == 'xmp') {
348              // 1. Create an element for the token in the HTML namespace.
349              var el = new JSElement (this.doc, token.value);
350    
351              // 2. Append the new element to the current node.
352              this.openElements[this.openElements.length - 1].appendChild (el);
353    
354              // 3. Switch the tokeniser's content model flag to the CDATA state.
355              this.parseMode = 'cdata';
356              this.endTagName = token.value;
357    
358              // 4.1. Collect all the character tokens.
359              while (true) {
360                var token = this.getNextToken ();
361                log ('token: ' + token.type + ' "' + token.value + '"');
362    
363                if (token.type == 'char') {
364                  // 5. Append a single Text node to the script element node.
365                  el.manakaiAppendText (token.value);
366    
367                // 4.2. Until it returns a token that is not a character token, or
368                // until it stops tokenising.
369                } else if (token.type == 'eof' ||
370                           token.type == 'end-tag' ||
371                           token.type == 'abort') {
372                  // 6. Switched back to the PCDATA state.
373                  this.parseMode = 'pcdata';
374    
375                  if (token.type == 'abort') {
376                    this.cdataEndTagRequired = true;
377                    break;
378                  }
379    
380                  // 7.1. If the next token is not an end tag token with ...
381                  if (!(token.type == 'end-tag' &&
382                        token.value == this.endTagName)) {
383                    // 7.2. This is a parse error.
384                    log ('Parse error: no </' + this.endTagName + '>');
385                    this.nextToken.unshift (token);
386    
387                    // 7.3. Mark the script element as "already executed".
388                    el.manakaiAlreadyExecuted = true;
389                  } else {
390                    // 7.4. Ignore it.
391                    //
392                  }
393                  break;
394                }
395              }
396          } else {          } else {
397            var el = new JSElement (this.doc, token.value);            var el = new JSElement (this.doc, token.value);
398            this.openElements[this.openElements.length - 1].appendChild (el);            this.openElements[this.openElements.length - 1].appendChild (el);
# Line 298  Line 423 
423    
424      // "When a script completes loading" rules start applying.      // "When a script completes loading" rules start applying.
425    
426      // TODO: Handles "list of scripts that will execute as soon as possible"      while (this.scriptsExecutedSoon.length > 0 ||
427      // and "list of scripts that will execute asynchronously"             this.scriptsExecutedAsynchronously.length > 0) {
428          // Handle "list of scripts that will execute as soon as possible".
429          while (this.scriptsExecutedSoon.length > 0) {
430            var e = this.scriptsExecutedSoon.shift ();
431      
432            // If it has completed loading
433            log ('Execute an external script not inserted by parser...');
434            executeScript (this.doc, e);
435    
436            // NOTE: It MAY be executed before the end of the parsing, according
437            // to the spec.
438            this.hasAsyncScript = true;
439          }
440    
441          // Handle "list of scripts that will execute asynchronously".
442          while (this.scriptsExecutedAsynchronously.length > 0) {
443            var e = this.scriptsExecutedAsynchronously.shift ();
444    
445            // Step 1.
446            // We assume that all scripts have been loaded at this time.
447      
448            // Step 2.
449            log ('Execute an asynchronous script...');
450            executeScript (this.doc, e);
451    
452            // Step 3.
453            //
454    
455            // Step 4.
456            //
457    
458            this.hasAsyncScript = true;
459          }
460        }
461    
462      // Handle "list of scripts that will execute when the document has finished      // Handle "list of scripts that will execute when the document has finished
463      // parsing".      // parsing".
# Line 319  Line 477 
477    
478      log ('DOMContentLoaded event fired');      log ('DOMContentLoaded event fired');
479    
480      // "delays tha load event" things has completed:      // "delays the load event" things has completed:
481      // readyState = 'complete'      // readyState = 'complete'
482      log ('load event fired');      log ('load event fired');
483    
# Line 330  Line 488 
488      if (ip == undefined || ip == null || isNaN (ip)) {      if (ip == undefined || ip == null || isNaN (ip)) {
489        log ('insertion point: set to undefined');        log ('insertion point: set to undefined');
490        this.insertionPoint = undefined;        this.insertionPoint = undefined;
491      } else if (ip == this.in.s.length) {      } else if (ip == this.input.s.length) {
492        log ('insertion point: end of file');        log ('insertion point: end of file');
493        this.insertionPoint = ip;        this.insertionPoint = ip;
494      } else {      } else {
495        log ('insertion point: set to ' + ip +        log ('insertion point: set to ' + ip +
496             ' (before "' + this.in.s.substring (0, 10) + '")');             ' (before "' + this.input.s.substring (0, 10) + '")');
497        this.insertionPoint = ip;        this.insertionPoint = ip;
498      }      }
499    }; // setInsertionPoint    }; // setInsertionPoint
# Line 375  Line 533 
533        // 2.4. If the script element has its "already executed" flag set        // 2.4. If the script element has its "already executed" flag set
534        if (e.manakaiAlreadyExecuted) {        if (e.manakaiAlreadyExecuted) {
535          // 2.5. Abort these steps at this point.          // 2.5. Abort these steps at this point.
536          log ('Running a script: aborted');          log ('Running a script: aborted (already executed)');
537          logIndentLevel--;          logIndentLevel--;
538          return e;          return e;
539        }        }
# Line 394  Line 552 
552          p.scriptsExecutedAfterParsing.push (e);          p.scriptsExecutedAfterParsing.push (e);
553          log ('Running a script: aborted (defer)');          log ('Running a script: aborted (defer)');
554        } else if (e.async && e.src != null) {        } else if (e.async && e.src != null) {
555          // TODO          p.scriptsExecutedAsynchronously.push (e);
556        } else if (e.async && e.src == null          log ('Running a script: aborted (async src)');
557                   /* && list of scripts that will execute asynchronously is not empty */) {        } else if (e.async && e.src == null &&
558          // TODO                   p.scriptsExecutedAsynchronously.length > 0) {
559            p.scriptsExecutedAsynchronously.push (e);
560            log ('Running a script: aborted (async)');
561            // ISSUE: What is the difference with the case above?
562        } else if (e.src != null && e.manakaiParserInserted) {        } else if (e.src != null && e.manakaiParserInserted) {
563          if (p.scriptExecutedWhenParserResumes) {          if (p.scriptExecutedWhenParserResumes) {
564            log ('Error: There is a script that will execute as soon as the parser resumes.');            log ('Error: There is a script that will execute as soon as the parser resumes.');
565          }          }
566          p.scriptExecutedWhenParserResumes = e;          p.scriptExecutedWhenParserResumes = e;
567          log ('Running a script: aborted (src)');          log ('Running a script: aborted (src parser-inserted)');
568        } else if (e.src != null) {        } else if (e.src != null) {
569          // TODO          p.scriptsExecutedSoon.push (e);
570            log ('Running a script: aborted (src)');
571        } else {        } else {
572          executeScript (doc, e); // even if other scripts are already executing.          executeScript (doc, e); // even if other scripts are already executing.
573        }        }
# Line 453  Line 615 
615        var m;        var m;
616        if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) {        if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) {
617          if (m[1]) {          if (m[1]) {
618            return m[1];            return unescapeJSLiteral (m[1]);
619          } else if (m[2]) {          } else if (m[2]) {
620            return m[2];            return unescapeJSLiteral (m[2]);
621          } else {          } else {
622            return null;            return null;
623          }          }
# Line 476  Line 638 
638          matched = true;          matched = true;
639          var args = [];          var args = [];
640          t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {          t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
641            args.push (v.substring (1, v.length - 1));            args.push (unescapeJSLiteral (v.substring (1, v.length - 1)));
642            return '';            return '';
643          });          });
644          doc.write.apply (doc, args);          doc.write.apply (doc, args);
645          return '';          return '';
646        });        });
647          var noDocumentElement = false;
648          s = s.replace (/^\s*var\s+s\s*=\s*document\.createElement\s*\(\s*['"]script['"]\s*\)\s*;\s*s\.src\s*=\s*(?:'([^']*)'|"([^"]*)")\s*;\s*document\.documentElement\.appendChild\s*\(\s*s\s*\)\s*;\s*/,
649          function (s, t, u) {
650            matched = true;
651            var args = [unescapeJSLiteral (t ? t : u)];
652            noDocumentElement = !doc._insertExternalScript.apply (doc, args);
653            return '';
654          });
655          if (noDocumentElement) {
656            log ('Script error: documentElement is null');
657            break;
658          }
659          s = s.replace (/^\s*w\s*\(\s*document\.documentElement\.innerHTML\s*\)\s*;\s*/,
660          function (s, t) {
661            matched = true;
662            log (dumpTree (doc, ''));
663            return '';
664          });
665        if (s == '') break;        if (s == '') break;
666        if (!matched) {        if (!matched) {
667          log ('Script parse error: "' + s + '"');          log ('Script parse error: "' + s + '"');
# Line 490  Line 670 
670      }      }
671    } // parseAndRunScript    } // parseAndRunScript
672    
673      function unescapeJSLiteral (s) {
674        return s.replace (/\\u([0-9A-Fa-f]{4})/g, function (t, v) {
675          return String.fromCharCode (parseInt ('0x' + v));
676        });
677      } // unescapeJSLiteral
678    
679    function JSText (data) {    function JSText (data) {
680      this.data = data;      this.data = data;
681    } // JSText    } // JSText
# Line 517  Line 703 
703      // Step 3.      // Step 3.
704      if (this._parser &&      if (this._parser &&
705          !this._parser.scriptCreated &&          !this._parser.scriptCreated &&
706          this._parser.in.insertionPoint != undefined) {          this._parser.input.insertionPoint != undefined) {
707        log ('document.open () in parsing mode is ignored');        log ('document.open () in parsing mode is ignored');
708        return this;        return this;
709      }      }
# Line 551  Line 737 
737      }      }
738    
739      // Step 11.      // Step 11.
740      this._parser.setInsertionPoint (this._parser.in.s.length);      this._parser.setInsertionPoint (this._parser.input.s.length);
741    
742      // Step 12.      // Step 12.
743      return this;      return this;
744    }; // document.open    }; // document.open
745    
746    JSDocument.prototype.write = function () {    JSDocument.prototype.write = function () {
747        log ('document.write: start');
748      logIndentLevel++;      logIndentLevel++;
749    
750      var p = this._parser;      var p = this._parser;
# Line 571  Line 758 
758      // 2. ... inserted into the input stream just before the insertion point.      // 2. ... inserted into the input stream just before the insertion point.
759      var s = Array.join (arguments, '');      var s = Array.join (arguments, '');
760      log ('document.write: insert "' + s + '"' +      log ('document.write: insert "' + s + '"' +
761           ' before "' + p.in.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');           ' before "' +
762      p.in.s = p.in.s.substring (0, p.insertionPoint) + s           p.input.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
763          + p.in.s.substring (p.insertionPoint, p.in.s.length);      p.input.s = p.input.s.substring (0, p.insertionPoint) + s
764            + p.input.s.substring (p.insertionPoint, p.input.s.length);
765      p.insertionPoint += s.length;      p.insertionPoint += s.length;
766    
767      // 3. If there is a script that will execute as soon as the parser resumes      // 3. If there is a script that will execute as soon as the parser resumes
768      if (p.scriptExecutedAfterParserResumes) {      if (p.scriptExecutedAfterParserResumes) {
769        log ('document.write: processed later (there is an unprocessed <script src>)');        log ('document.write: processed later (there is an unprocessed <script src>)');
770        logIndentLevel--;        logIndentLevel--;
771          log ('document.write: return');
772        return;        return;
773      }      }
774    
# Line 593  Line 782 
782      // to do something here?      // to do something here?
783    
784      // 5. Return      // 5. Return
785        logIndentLevel--;
786      log ('document.write: return');      log ('document.write: return');
787    
     logIndentLevel--;  
788      return;      return;
789    }; // document.write    }; // document.write
790    
791      JSDocument.prototype._insertExternalScript = function (uri) {
792        var s = new JSElement (this, 'script');
793        s.src = uri;
794        if (this.documentElement) {
795          this.documentElement.appendChild (s);
796          return true;
797        } else {
798          return false;
799        }
800      }; // _insertExternalScript
801    
802      JSDocument.prototype.__defineGetter__ ('documentElement', function () {
803        var cn = this.childNodes;
804        for (var i = 0; i < cn.length; i++) {
805          if (cn[i] instanceof JSElement) {
806            return cn[i]
807          }
808        }
809        return null;
810      });
811    
812    JSElement.prototype.__defineGetter__ ('text', function () {    JSElement.prototype.__defineGetter__ ('text', function () {
813      var r = '';      var r = '';
814      for (var i = 0; i < this.childNodes.length; i++) {      for (var i = 0; i < this.childNodes.length; i++) {
# Line 617  Line 827 
827          r += '| ' + indent + node.localName + '\n';          r += '| ' + indent + node.localName + '\n';
828          if (node.async) r += '| ' + indent + '  async=""\n';          if (node.async) r += '| ' + indent + '  async=""\n';
829          if (node.defer) r += '| ' + indent + '  defer=""\n';          if (node.defer) r += '| ' + indent + '  defer=""\n';
830          if (node.src) r += '| ' + indent + '  src="' + node.src + '"\n';          if (node.src != null) {
831              r += '| ' + indent + '  src="' + node.src + '"\n';
832            }
833          r += dumpTree (node, indent + '  ');          r += dumpTree (node, indent + '  ');
834        } else if (node instanceof JSText) {        } else if (node instanceof JSText) {
835          r += '| ' + indent + '"' + node.data + '"\n';          r += '| ' + indent + '"' + node.data + '"\n';
# Line 631  Line 843 
843  </head>  </head>
844  <body onload="  <body onload="
845    document.sourceElement = document.getElementsByTagName ('textarea')[0];    document.sourceElement = document.getElementsByTagName ('textarea')[0];
846    
847      var q = location.search;
848      if (q != null) {
849        q = q.substring (1).split (/;/);
850        for (var i = 0; i < q.length; i++) {
851          var v = q[i].split (/=/, 2);
852          v[0] = decodeURIComponent (v[0]);
853          v[1] = decodeURIComponent (v[1] || '');
854          if (v[0] == 's') {
855            document.sourceElement.value = v[1];
856          }
857        }
858      }
859    
860    document.logElement = document.getElementsByTagName ('output')[0];    document.logElement = document.getElementsByTagName ('output')[0];
861    update ();    update ();
862  ">  ">
863    <h1>Live Scripting <abbr title="Hypertext Markup Language">HTML</abbr>
864    Parser</h1>
865    
866  <textarea onchange=" update () ">&lt;html>  <h2>Markup to test
867    (<a href=data:, id=permalink rel=bookmark>permalink</a>,
868    <a href="http://software.hixie.ch/utilities/js/live-dom-viewer/"
869        id=ldvlink>Live <abbr title="Document Object Model">DOM</abbr>
870        Viewer</a>)</h2>
871    <p>
872    <textarea onkeydown=" update () " onchange=" update () " oninput=" update () ">&lt;html>
873  &lt;head>&lt;/head>&lt;body>  &lt;head>&lt;/head>&lt;body>
874  &lt;p>  &lt;p>
875  &lt;script>  &lt;script>
# Line 644  document.write ('aaaaaaa&lt;/p>&lt;scrip Line 878  document.write ('aaaaaaa&lt;/p>&lt;scrip
878  &lt;p>  &lt;p>
879  </textarea>  </textarea>
880    
881  <output></output>  <h2 id=log>Log</h2>
882    <p><output></output>
883    
884    <h2 id=notes>Notes</h2>
885    
886    <p>This is a <em>simplified</em> implementation of
887    <a href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5
888    Parsing Algorithm</a>.  It only implements script-related part of the
889    algorithm.  Especially, this parser:
890    <ul>
891    <li>Does not support <code>DOCTYPE</code> and comment tokens.
892    <li>Does not support entities except for <code>&amp;quot;</code>,
893    <code>&amp;apos;</code>, and <code>&amp;amp;</code> in <code>script</code>
894    <code>src</code> attribute value.
895    <li>Does not support omissions of start or end tags, the <abbr>AAA</abbr>
896    algorithm, and so on.
897    <li>Does not raise parse errors for invalid attribute specifications in start
898    or end tags.
899    <li>Does not support PCDATA elements (<code>title</code> and
900    <code>textarea</code>).
901    <li>Does not strip the first newline in <code>pre</code> elements.
902    <li>Does not support <code>&lt;!--</code>..<code>--></code> parsing rule
903    in <code>script</code> element.
904    <li>Does not support foreign (SVG or MathML) elements.
905    <li>Only supports <code>script</code> <code>type</code>
906    <code>text/javascript</code>.  <code>type</code> and <code>language</code>
907    attributes are ignored.
908    <li>Only supports limited statements.  It must consist of zero or more
909    of statements looking similar to the following statements, possibly
910    introduced, followed, or separated by white space characters:
911      <ul>
912      <li><code>document.write ("<var>string</var>", ["<var>string</var>", ...]);</code>.
913      <li><code>var s = document.createElement ("script");
914                s.src = "<var>string</var>";
915                document.documentElement.appendChild (s);</code>
916      <li><code>w (document.documentElement.innerHTML);</code> (This statement
917      can be used to dump the document, even when the document has no
918      document element.  The output format is the tree dump format used
919      in html5lib test data, not <abbr>HTML</abbr>.)
920      </ul>
921    Note that strings may be delimited by <code>'</code>s instead of
922    <code>"</code>s.
923    <li>Only supports <code>javascript:</code>
924    <abbr title="Uniform Resourace Identifiers">URI</abbr> scheme in the
925    <code>src</code> attribute of the <code>script</code> element.  In addition,
926    the <abbr title="Uniform Resource Identifiers">URI</abbr> must be conform to
927    the regular expression <code>^javascript:\s*(?:"[^"]*"|'[^']*')\s*$</code>.
928    <li>Only supports <code>\u<var>HHHH</var></code> escapes in JavaScript
929    string literals.
930    <li>Does not handle <i>stop parsing</i> phase correctly if the document is
931    replaced by <code>document.open ()</code> call.  In other word, delayed
932    (deferred or asynchronous) script executions and event firings might be
933    treated in a wrong way if a <code>document.open ()</code> invocation
934    is implicitly done by <code>document.write ()</code> in a delayed script.
935    </ul>
936    
937    <p>For some reason, this parser does not work in browsers that do
938    not support JavaScript 1.5.
939    
940    <!-- TODO: |src| attribute value should refer the value at the time
941    when it is inserted into the document, not the value when the script is
942    executed.  Currently it does not matter, since we don't allow dynamic
943    modification to the |src| content/DOM attribute value yet. -->
944    
945  </body>  </body>
 </html>  
946    </html>
947    <!-- $Date$ -->
948    <!--
949    
950    Copyright 2008 Wakaba <w@suika.fam.cx>
951    
952    This program is free software; you can redistribute it and/or
953    modify it under the terms of the GNU General Public License
954    as published by the Free Software Foundation; either version 2
955    of the License, or (at your option) any later version.
956    
957    This program is distributed in the hope that it will be useful,
958    but WITHOUT ANY WARRANTY; without even the implied warranty of
959    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
960    GNU General Public License for more details.
961    
962    You should have received a copy of the GNU General Public License
963    along with this program; if not, write to the Free Software
964    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
965    
966    -->

Legend:
Removed from v.1.6  
changed lines
  Added in v.1.16

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24