/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Diff of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1 by wakaba, Sun Apr 20 06:07:24 2008 UTC revision 1.20 by wakaba, Sat Sep 20 13:32:45 2008 UTC
# Line 1  Line 1 
1  <!DOCTYPE HTML>  <!DOCTYPE HTML>
2  <html lang=en>  <html lang=en>
3  <head>  <head>
4  <title>Demo of HTML5 Parsing Algorithm with Scripting Enabled</title>  <title>Live Scripting HTML Parser</title>
5    <link rel=author href="http://suika.fam.cx/~wakaba/who?">
6    <link rel=license href="http://suika.fam.cx/c/gnu/gpl"
7        title="GNU GPL2 or later">
8  <style>  <style>
9      h1 {
10        margin: 0;
11        font-size: 150%;
12      }
13      h2 {
14        margin: 0;
15        font-size: 100%;
16      }
17      p {
18        margin: 0 1em;
19      }
20    textarea {    textarea {
21       display: block;      width: 100%;
22       width: 80%;      -width: 99%;
23       margin-left: auto;      height: 10em;
      margin-right: auto;  
      min-height: 20em;  
24    }    }
25    output {    output {
26      display: block;      display: block;
27      font-family: monospace;      font-family: monospace;
28      white-space: pre;      white-space: -moz-pre-wrap;
29        white-space: pre-wrap;
30    }    }
31  </style>  </style>
32  <script>  <script>
33      var delayedUpdater = 0;
34    
35    function update () {    function update () {
36      document.logElement.textContent = '';      if (delayedUpdater) {
37      var p = new Parser ();        clearTimeout (delayedUpdater);
38      p.parse (new InputStream (document.sourceElement.value));        delayedUpdater = 0;
39      log (dumpTree (p.doc, ''));      }
40        delayedUpdater = setTimeout (update2, 100);
41    } // update    } // update
42    
43      function update2 () {
44        var v = document.sourceElement.value;
45        if (v != document.previousSourceText) {
46          document.previousSourceText = v;
47          document.links['permalink'].href
48              = location.pathname + '?s=' + encodeURIComponent (v);
49          document.links['ldvlink'].href
50              = 'http://software.hixie.ch/utilities/js/live-dom-viewer/?'
51              + encodeURIComponent (v);
52    
53          document.logElement.textContent = '';
54          var p = new Parser (new InputStream (v));
55          var doc = p.doc;
56          p.parse ();
57          
58          log (dumpTree (doc, ''));
59          
60          if (p.hasAsyncScript) {
61            log ('Some script codes are executed asynchronously; it means that the document might be rendered in different ways depending on the network condition and other factors');
62          }
63        }
64      } // update2
65    
66      var logIndentLevel = 0;
67    function log (s) {    function log (s) {
68        var indent = '';
69        for (var i = 0; i < logIndentLevel; i++) {
70          indent += '  ';
71        }
72        s = indent + s.replace (/\n/g, "\n" + indent);
73      document.logElement.appendChild (document.createTextNode (s + "\n"));      document.logElement.appendChild (document.createTextNode (s + "\n"));
74    } // log    } // log
75    
# Line 32  Line 77 
77      this.s = s;      this.s = s;
78    } // InputStream    } // InputStream
79    
80    function Parser () {    function Parser (i, doc) {
81      this.parseMode = 'pcdata';      this.parseMode = 'pcdata';
82      this.doc = new JSDocument ();      if (!doc) {
83      this.openElements = [this.doc];        doc = new JSDocument (this);
84          doc.manakaiIsHTML = true;
85        }
86        this.nextToken = [];
87        this.doc = doc;
88        this.openElements = [doc];
89        this.input = i;
90        this.scriptsExecutedAfterParsing = [];
91        this.scriptsExecutedSoon = [];
92        this.scriptsExecutedAsynchronously = [];
93    } // Parser    } // Parser
94    
95    Parser.prototype.getNextToken = function (i) {    Parser.prototype.getNextToken = function () {
96      if (this.parseMode == 'script') {      if (this.nextToken.length) {
97          return this.nextToken.shift ();
98        }
99    
100        var p = this;
101        var i = this.input;
102        if (this.parseMode == 'cdata') {
103          var tagName = this.endTagName;
104        var token;        var token;
105        i.s = i.s.replace (/^([\s\S]+?)<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/,        if (p.insertionPoint <= 0) {
106            return {type: 'abort'};
107          }
108          i.s = i.s.replace (/^([^<]+)/,
109        function (s, t) {        function (s, t) {
110            if (0 < p.insertionPoint && p.insertionPoint < t.length) {
111              token = {type: 'char', value: t.substring (0, p.insertionPoint)};
112              var ip = p.insertionPoint;
113              p.insertionPoint = 0;
114              return t.substring (ip, t.length);
115            }
116          token = {type: 'char', value: t};          token = {type: 'char', value: t};
117          return '<' + '/script>';          p.insertionPoint -= t.length;
118            return '';
119          });
120          if (token) return token;
121          var pattern = new RegExp ('^</' + tagName + '>', 'i');
122          i.s = i.s.replace (pattern, function (s) {
123            if (p.insertionPoint < s.length) {
124              token = {type: 'abort'};
125              return s;
126            }
127            token = {type: 'end-tag', value: tagName};
128            p.insertionPoint -= s.length;
129            return '';
130        });        });
131        if (token) return token;        if (token) return token;
132        i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function () {        var m;
133          token = {type: 'end-tag', value: 'script'};        if ((p.insertionPoint < ('</' + tagName).length) &&
134              (m = i.s.match (/^<\/([A-Za-z]+)/))) {
135            var v = m[1].substring (0, p.insertionPoint).toLowerCase ();
136            if (v == tagName.substring (0, p.insertionPoint - '</'.length)) {
137              return {type: 'abort'};
138            }
139          }
140          i.s = i.s.replace (/^</,
141          function (s) {
142            token = {type: 'char', value: s};
143            p.insertionPoint -= s.length;
144          return '';          return '';
145        });        });
146        if (token) return token;        if (token) return token;
# Line 56  Line 148 
148      }      }
149    
150      var token;      var token;
151      i.s = i.s.replace (/^<\/([^>]+)>/, function (s, e) {      i.s = i.s.replace (/^<\/([^>]+)(?:>|$)/, function (s, e) {
152          if (p.insertionPoint < s.length ||
153              (p.insertionPoint <= s.length &&
154               s.substring (s.length - 1, s.length) != '>')) {
155            token = {type: 'abort'};
156            return s;
157          }
158        token = {type: 'end-tag', value: e.toLowerCase ()};        token = {type: 'end-tag', value: e.toLowerCase ()};
159          p.insertionPoint -= s.length;
160        return '';        return '';
161      });      });
162      if (token) return token;      if (token) return token;
163      i.s = i.s.replace (/^<([^>]+)>/, function (s, e) {      i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) {
164        token = {type: 'start-tag', value: e.toLowerCase ()};        if (p.insertionPoint < s.length ||
165              (p.insertionPoint <= s.length &&
166               s.substring (s.length - 1, s.length) != '>')) {
167            token = {type: 'abort'};
168            return s;
169          }
170          var tagName;
171          var attrs = {};
172          e = e.replace (/^[\S]+/, function (v) {
173            tagName = v.toLowerCase ();
174            return '';
175          });
176          while (true) {
177            var m = false;
178            e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"'\s]*)))?/,
179            function (x, attrName, attrValue1, attrValue2, attrValue3) {
180              v = attrValue1 || attrValue2 || attrValue3;
181              v = v.replace (/&quot;/g, '"').replace (/&apos;/g, "'")
182                  .replace (/&amp;/g, '&');
183              attrs[attrName.toLowerCase ()] = v;
184              m = true;
185              return '';
186            });
187            if (!m) break;
188          }
189          if (e.length) {
190            log ('Broken start tag: "' + e + '"');
191          }
192          token = {type: 'start-tag', value: tagName, attrs: attrs};
193          p.insertionPoint -= s.length;
194        return '';        return '';
195      });      });
196      if (token) return token;      if (token) return token;
197        if (p.insertionPoint <= 0) {
198          return {type: 'abort'};
199        }
200      i.s = i.s.replace (/^[^<]+/, function (s) {      i.s = i.s.replace (/^[^<]+/, function (s) {
201          if (p.insertionPoint < s.length) {
202            token = {type: 'char', value: s.substring (0, p.insertionPoint)};
203            var ip = p.insertionPoint;
204            p.insertionPoint = 0;
205            return s.substring (ip, s.length);
206          }
207        token = {type: 'char', value: s};        token = {type: 'char', value: s};
208          p.insertionPoint -= s.length;
209        return '';        return '';
210      });      });
211      if (token) return token;      if (token) return token;
212      i.s = i.s.replace (/^[\s\S]/, function (s) {      i.s = i.s.replace (/^[\s\S]/, function (s) {
213        token = {type: 'char', value: s};        token = {type: 'char', value: s};
214          p.insertionPoint -= s.length;
215        return '';        return '';
216      });      });
217      if (token) return token;      if (token) return token;
218      return {type: 'eof'};      return {type: 'eof'};
219    } // getNextToken    } // getNextToken
220    
221    Parser.prototype.parse = function (i) {    Parser.prototype.parse = function () {
222      log ('start parsing');      logIndentLevel++;
223        log ('parse: start');
224    
225      while (true) {      while (true) {
226        var token = this.getNextToken (i);        var token = this.getNextToken ();
227        log ('token: ' + token.type + ' "' + token.value + '"');        log ('token: ' + token.type + ' "' + token.value + '"');
228    
229          if (this.cdataEndTagRequired) {
230            // Generic CDATA parsing algorithm
231    
232            if (token.type != 'abort') {
233              // 7.
234              if (token.type == 'end-tag' && token.value == this.endTagName) {
235                // 7.1. Ignores it.
236                //
237              } else {
238                // 7.2. Parse error.
239                log ('Parse error: no </' + this.endTagName + '>');
240                this.nextToken.unshift (token);
241              }
242              this.cdataEndTagRequired = false;
243              continue;
244            }
245          }
246    
247        if (token.type == 'start-tag') {        if (token.type == 'start-tag') {
         var el = new JSElement (token.value);  
248          if (token.value == 'script') {          if (token.value == 'script') {
249            this.parseMode = 'script';            // 1. Create an element for the token in the HTML namespace.
250              var el = new JSElement (this.doc, token.value);
251              if (token.attrs.async != null) el.async = true;
252              if (token.attrs.defer != null) el.defer = true;
253              if (token.attrs.src != null) el.src = token.attrs.src;
254    
255              // 2. Mark the element as being "parser-inserted".
256              el.manakaiParserInserted = true;
257    
258              // 3. Switch the tokeniser's content model flag to the CDATA state.
259              this.parseMode = 'cdata';
260              this.endTagName = 'script';
261    
262              // 4.1. Collect all the character tokens.
263            while (true) {            while (true) {
264              var token = this.getNextToken (i);              var token = this.getNextToken ();
265              log ('token: ' + token.type + ' "' + token.value + '"');              log ('token: ' + token.type + ' "' + token.value + '"');
266    
267              if (token.type == 'char') {              if (token.type == 'char') {
268                  // 5. Append a single Text node to the script element node.
269                el.manakaiAppendText (token.value);                el.manakaiAppendText (token.value);
270    
271                // 4.2. Until it returns a token that is not a character token, or
272                // until it stops tokenising.
273              } else if (token.type == 'eof' ||              } else if (token.type == 'eof' ||
274                         (token.type == 'end-tag' && token.value == 'script')) {                         token.type == 'end-tag' ||
275                           token.type == 'abort') {
276                  // 6. Switched back to the PCDATA state.
277                this.parseMode = 'pcdata';                this.parseMode = 'pcdata';
278    
279                  // 7.1. If the next token is not an end tag token with ...
280                  if (!(token.type == 'end-tag' && token.value == 'script')) {
281                    // 7.2. This is a parse error.
282                    log ('Parse error: no </' + 'script>');
283                    this.nextToken.unshift (token);
284    
285                    // 7.3. Mark the script element as "already executed".
286                    el.manakaiAlreadyExecuted = true;
287                  } else {
288                    // 7.4. Ignore it.
289                    //
290                  }
291                break;                break;
292              }              }
293            }            }
294    
295              // 8.1. If the parser were originally created for the ...
296              if (this.fragmentParsingMode) {
297                // 8.2. Mark the script element as "already executed" and ...
298                el.manakaiAlreadyExecuted = true;
299                continue;
300              }
301    
302              // 9.1. Let the old insertion point have the same value as the ...
303              var oldInsertionPoint = this.insertionPoint;
304              // 9.2. Let the insertion point be just before the next input ...
305              this.setInsertionPoint (0);
306    
307              // 10. Append the new element to the current node.
308              this.openElements[this.openElements.length - 1].appendChild (el);
309    
310              // 11. Let the insertion point have the value of the old ...
311    
312              oldInsertionPoint += this.insertionPoint;
313              this.setInsertionPoint (oldInsertionPoint);
314    
315              // 12. If there is a pending external script
316              while (this.pendingExternalScript) {
317                // 12.1. If the tree construction stage is being called reentrantly
318                if (this.reentrant) {
319                  log ('parse: abort (reentrance)');
320                  logIndentLevel--;
321                  return;
322    
323                // 12.2. Otherwise
324                } else {
325                  // 1.
326                  var script = this.pendingExternalScript;
327                  this.pendingExternalScript = null;
328    
329                  // 2. Pause until the script has completed loading.
330                  //
331    
332                  // 3. Let the insertion point to just before the next input char.
333                  this.setInsertionPoint (0);
334    
335                  // 4. Execute the script.
336                  executeScript (this.doc, script);
337    
338                  // 5. Let the insertion point be undefined again.
339                  this.setInsertionPoint (undefined);
340    
341                  // 6. If there is once again a script that will execute ...
342                  //
343                }
344              }
345            } else if (token.value == 'style' ||
346                       token.value == 'noscript' ||
347                       token.value == 'xmp') {
348              // 1. Create an element for the token in the HTML namespace.
349              var el = new JSElement (this.doc, token.value);
350    
351              // 2. Append the new element to the current node.
352            this.openElements[this.openElements.length - 1].appendChild (el);            this.openElements[this.openElements.length - 1].appendChild (el);
353    
354              // 3. Switch the tokeniser's content model flag to the CDATA state.
355              this.parseMode = 'cdata';
356              this.endTagName = token.value;
357    
358              // 4.1. Collect all the character tokens.
359              while (true) {
360                var token = this.getNextToken ();
361                log ('token: ' + token.type + ' "' + token.value + '"');
362    
363                if (token.type == 'char') {
364                  // 5. Append a single Text node to the script element node.
365                  el.manakaiAppendText (token.value);
366    
367                // 4.2. Until it returns a token that is not a character token, or
368                // until it stops tokenising.
369                } else if (token.type == 'eof' ||
370                           token.type == 'end-tag' ||
371                           token.type == 'abort') {
372                  // 6. Switched back to the PCDATA state.
373                  this.parseMode = 'pcdata';
374    
375                  if (token.type == 'abort') {
376                    this.cdataEndTagRequired = true;
377                    break;
378                  }
379    
380                  // 7.1. If the next token is not an end tag token with ...
381                  if (!(token.type == 'end-tag' &&
382                        token.value == this.endTagName)) {
383                    // 7.2. This is a parse error.
384                    log ('Parse error: no </' + this.endTagName + '>');
385                    this.nextToken.unshift (token);
386    
387                    // 7.3. Mark the script element as "already executed".
388                    el.manakaiAlreadyExecuted = true;
389                  } else {
390                    // 7.4. Ignore it.
391                    //
392                  }
393                  break;
394                }
395              }
396          } else {          } else {
397              var el = new JSElement (this.doc, token.value);
398            this.openElements[this.openElements.length - 1].appendChild (el);            this.openElements[this.openElements.length - 1].appendChild (el);
399            this.openElements.push (el);            this.openElements.push (el);
400          }          }
# Line 116  Line 405 
405          } else {          } else {
406            log ('parse error: unmatched end tag: ' + token.value);            log ('parse error: unmatched end tag: ' + token.value);
407          }          }
408          } else if (token.type == 'char') {
409            this.openElements[this.openElements.length - 1].manakaiAppendText
410                (token.value);
411        } else if (token.type == 'eof') {        } else if (token.type == 'eof') {
412          break;          break;
413          } else if (token.type == 'abort') {
414            log ('parse: abort');
415            logIndentLevel--;
416            return;
417        }        }
418      }      }
419    
420      log ('stop parsing');      log ('stop parsing');
421    
422        // readyState = 'interactive'
423    
424        // "When a script completes loading" rules start applying.
425    
426        while (this.scriptsExecutedSoon.length > 0 ||
427               this.scriptsExecutedAsynchronously.length > 0) {
428          // Handle "list of scripts that will execute as soon as possible".
429          while (this.scriptsExecutedSoon.length > 0) {
430            var e = this.scriptsExecutedSoon.shift ();
431      
432            // If it has completed loading
433            log ('Execute an external script not inserted by parser...');
434            executeScript (this.doc, e);
435    
436            // NOTE: It MAY be executed before the end of the parsing, according
437            // to the spec.
438            this.hasAsyncScript = true;
439          }
440    
441          // Handle "list of scripts that will execute asynchronously".
442          while (this.scriptsExecutedAsynchronously.length > 0) {
443            var e = this.scriptsExecutedAsynchronously.shift ();
444    
445            // Step 1.
446            // We assume that all scripts have been loaded at this time.
447      
448            // Step 2.
449            log ('Execute an asynchronous script...');
450            executeScript (this.doc, e);
451    
452            // Step 3.
453            //
454    
455            // Step 4.
456            //
457    
458            this.hasAsyncScript = true;
459          }
460        }
461    
462        // Handle "list of scripts that will execute when the document has finished
463        // parsing".
464        var list = this.scriptsExecutedAfterParsing;
465        while (list.length > 0) {
466          // TODO: break unless completed loading
467    
468          // Step 1.
469          //
470    
471          // Step 2. and Step 3.
472          log ('Executing a |defer|red script...');
473          executeScript (this.doc, list.shift ());
474    
475          // Step 4.
476        }
477    
478        log ('DOMContentLoaded event fired');
479    
480        // "delays the load event" things has completed:
481        // readyState = 'complete'
482        log ('load event fired');
483    
484        logIndentLevel--;
485    } // parse    } // parse
486    
487    function JSDocument () {    Parser.prototype.setInsertionPoint = function (ip) {
488        if (ip == undefined || ip == null || isNaN (ip)) {
489          log ('insertion point: set to undefined');
490          this.insertionPoint = undefined;
491        } else if (ip == this.input.s.length) {
492          log ('insertion point: end of file');
493          this.insertionPoint = ip;
494        } else {
495          log ('insertion point: set to ' + ip +
496               ' (before "' + this.input.s.substring (0, 10) + '")');
497          this.insertionPoint = ip;
498        }
499      }; // setInsertionPoint
500    
501      function JSDocument (p) {
502      this.childNodes = [];      this.childNodes = [];
503        this._parser = p;
504    } // JSDocument    } // JSDocument
505    
506    function JSElement (localName) {    function JSElement (doc, localName) {
507      this.localName = localName;      this.localName = localName;
508        this.ownerDocument = doc;
509      this.childNodes = [];      this.childNodes = [];
510    } // JSElement    } // JSElement
511    
# Line 137  Line 513 
513    function (e) {    function (e) {
514      this.childNodes.push (e);      this.childNodes.push (e);
515      e.parentNode = this;      e.parentNode = this;
516    
517        if (e.localName == 'script') {
518          logIndentLevel++;
519          log ('Running a script: start');
520    
521          var doc = this.ownerDocument || this;
522          var p = doc._parser;
523    
524          // 1.The script's type
525          //
526    
527          // 2. The cript's character encoding
528          //
529    
530          // 3.1. If without script
531          //
532          // 2.2. If the script element was created by an XML ... innerHTML ...
533          //
534          // 2.3. If the user agent does not support the scripting language ...
535          //
536          if (false) {
537            // 2.5. Abort these steps at this point.
538            log ('Running a script: aborted (noscript)');
539            logIndentLevel--;
540            return e;
541          }
542    
543          // 4. Set the element's "already executed" flag.
544          e.manakaiAlreadyExecuted = true;
545    
546          // 5. If the element has a src attribute, then a load for ...
547          // TODO: load an external resource
548    
549          // 5. The first of the following options:
550    
551          if (/* TODO: If the document is still being parsed && */
552              e.defer && !e.async) {
553            // 6.1.
554            p.scriptsExecutedAfterParsing.push (e);
555            log ('Running a script: aborted (defer)');
556          } else if (e.async && e.src != null) {
557            // 6.2.
558            p.scriptsExecutedAsynchronously.push (e);
559            log ('Running a script: aborted (async src)');
560          } else if (e.async && e.src == null &&
561                     p.scriptsExecutedAsynchronously.length > 0) {
562            // 6.3.
563            p.scriptsExecutedAsynchronously.push (e);
564            log ('Running a script: aborted (async)');
565          } else if (e.src != null && e.manakaiParserInserted) {
566            // 6.4.
567            if (p.pendingExternalScript) {
568              log ('Error: There is a pending external script.');
569            }
570            p.pendingExternalScript = e;
571            log ('Running a script: aborted (src parser-inserted)');
572          } else if (e.src != null) {
573            // 6.5.
574            p.scriptsExecutedSoon.push (e);
575            log ('Running a script: aborted (src)');
576          } else {
577            // 6.6.
578            executeScript (doc, e); // even if other scripts are already executing.
579          }
580    
581          log ('Running a script: end');
582          logIndentLevel--;
583        }
584    
585      return e;      return e;
586    }; // appendChild    }; // appendChild
587    
588      function executeScript (doc, e) {
589        log ('executing a script block: start');
590    
591        var s;
592        if (e.src != null) {
593          s = getExternalScript (e.src);
594    
595          // If the load resulted in an error, then ... firing an error event ...
596          if (s == null) {
597            log ('error event fired at the script element');
598            return;
599          }
600    
601          log ('External script loaded: "' + s + '"');
602        } else {
603          s = e.text;
604        }
605    
606        // If the load was successful
607    
608        if (true) {
609        // Scripting is enabled, Document.designMode is disabled,
610        // Document is the active document in its browsing context
611    
612          parseAndRunScript (doc, s);
613        }
614    
615        log ('load event fired at the script element');
616    
617        log ('executing a script block: end');
618      } // executeScript
619    
620      function getExternalScript (uri) {
621        if (uri.match (/^javascript:/i)) {
622          var m;
623          if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) {
624            if (m[1]) {
625              return unescapeJSLiteral (m[1]);
626            } else if (m[2]) {
627              return unescapeJSLiteral (m[2]);
628            } else {
629              return null;
630            }
631          } else {
632            log ('Complex javascript: URI is not supported: <' + uri + '>');
633            return null;
634          }
635        } else {
636          log ('URI scheme not supported: <' + uri + '>');
637          return null;
638        }
639      } // getExternalScript
640    
641      function parseAndRunScript (doc, s) {
642        while (true) {
643          var matched = false;
644          s = s.replace (/^\s*document\.write\s*\(((?:'[^']*'|"[^"]*")\s*(?:,\s*(?:'[^']*'|"[^"]*"))*)\)\s*;\s*/, function (s, t) {
645            matched = true;
646            var args = [];
647            t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
648              args.push (unescapeJSLiteral (v.substring (1, v.length - 1)));
649              return '';
650            });
651            doc.write.apply (doc, args);
652            return '';
653          });
654          var noDocumentElement = false;
655          s = s.replace (/^\s*var\s+s\s*=\s*document\.createElement\s*\(\s*['"]script['"]\s*\)\s*;\s*s\.src\s*=\s*(?:'([^']*)'|"([^"]*)")\s*;\s*document\.documentElement\.appendChild\s*\(\s*s\s*\)\s*;\s*/,
656          function (s, t, u) {
657            matched = true;
658            var args = [unescapeJSLiteral (t ? t : u)];
659            noDocumentElement = !doc._insertExternalScript.apply (doc, args);
660            return '';
661          });
662          if (noDocumentElement) {
663            log ('Script error: documentElement is null');
664            break;
665          }
666          s = s.replace (/^\s*w\s*\(\s*document\.documentElement\.innerHTML\s*\)\s*;\s*/,
667          function (s, t) {
668            matched = true;
669            log (dumpTree (doc, ''));
670            return '';
671          });
672          if (s == '') break;
673          if (!matched) {
674            log ('Script parse error: "' + s + '"');
675            break;
676          }
677        }
678      } // parseAndRunScript
679    
680      function unescapeJSLiteral (s) {
681        return s.replace (/\\u([0-9A-Fa-f]{4})/g, function (t, v) {
682          return String.fromCharCode (parseInt ('0x' + v));
683        });
684      } // unescapeJSLiteral
685    
686    function JSText (data) {    function JSText (data) {
687      this.data = data;      this.data = data;
688    } // JSText    } // JSText
# Line 155  Line 698 
698      }      }
699    }; // manakaiAppendText    }; // manakaiAppendText
700    
701      JSDocument.prototype.open = function () {
702        // Two or fewer arguments
703    
704        // Step 1.
705        var type = arguments[0] || 'text/html';
706        
707        // Step 2.
708        var replace = arguments[1] == 'replace';
709    
710        // Step 3.
711        if (this._parser &&
712            !this._parser.scriptCreated &&
713            this._parser.input.insertionPoint != undefined) {
714          log ('document.open () in parsing mode is ignored');
715          return this;
716        }
717    
718        // Step 4.
719        log ('onbeforeunload event fired');
720        log ('onunload event fired');
721    
722        // Step 5.
723        if (this._parser) {
724          // Discard the parser.
725        }
726    
727        // Step 6.
728        log ('document cleared by document.open ()');
729        this.childNodes = [];
730    
731        // Step 7.
732        this._parser = new Parser (new InputStream (''), this);
733        this._parser.scriptCreated = true;
734    
735        // Step 8.
736        this.manakaiIsHTML = true;
737    
738        // Step 9.
739        // If not text/html, ...
740    
741        // Step 10.
742        if (!replace) {
743          // History      
744        }
745    
746        // Step 11.
747        this._parser.setInsertionPoint (this._parser.input.s.length);
748    
749        // Step 12.
750        return this;
751      }; // document.open
752    
753      JSDocument.prototype.write = function () {
754        log ('document.write: start');
755        logIndentLevel++;
756    
757        var p = this._parser;
758    
759        // 1. If the insertion point is undefined, the open() method must be ...
760        if (isNaN (p.insertionPoint) || p.insertionPoint == undefined) {
761          this.open ();
762          p = this._parser;
763        }
764    
765        // 2. ... inserted into the input stream just before the insertion point.
766        var s = Array.join (arguments, '');
767        log ('document.write: insert "' + s + '"' +
768             ' before "' +
769             p.input.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
770        p.input.s = p.input.s.substring (0, p.insertionPoint) + s
771            + p.input.s.substring (p.insertionPoint, p.input.s.length);
772        p.insertionPoint += s.length;
773    
774        // 3. If there is a pending external script
775        if (p.pendingExternalScript) {
776          log ('document.write: processed later (there is an unprocessed <script src>)');
777          logIndentLevel--;
778          log ('document.write: return');
779          return;
780        }
781    
782        // 4. Process the characters that were inserted, ...
783        var originalReentrant = p.reentrant;
784        p.reentrant = true;
785        p.parse ();
786        p.reentrant = originalReentrant;
787        // TODO: "Abort the processing of any nested invokations of the tokeniser,
788        // yielding control back to the caller." (<script> parsing).  Do we need
789        // to do something here?
790    
791        // 5. Return
792        logIndentLevel--;
793        log ('document.write: return');
794    
795        return;
796      }; // document.write
797    
798      JSDocument.prototype._insertExternalScript = function (uri) {
799        var s = new JSElement (this, 'script');
800        s.src = uri;
801        if (this.documentElement) {
802          this.documentElement.appendChild (s);
803          return true;
804        } else {
805          return false;
806        }
807      }; // _insertExternalScript
808    
809      JSDocument.prototype.__defineGetter__ ('documentElement', function () {
810        var cn = this.childNodes;
811        for (var i = 0; i < cn.length; i++) {
812          if (cn[i] instanceof JSElement) {
813            return cn[i]
814          }
815        }
816        return null;
817      });
818    
819      JSElement.prototype.__defineGetter__ ('text', function () {
820        var r = '';
821        for (var i = 0; i < this.childNodes.length; i++) {
822          if (this.childNodes[i] instanceof JSText) {
823            r += this.childNodes[i].data;
824          }
825        }
826        return r;
827      });
828    
829    function dumpTree (n, indent) {    function dumpTree (n, indent) {
830      var r = '';      var r = '';
831      for (var i = 0; i < n.childNodes.length; i++) {      for (var i = 0; i < n.childNodes.length; i++) {
832        var node = n.childNodes[i];        var node = n.childNodes[i];
833        if (node instanceof JSElement) {        if (node instanceof JSElement) {
834          r += '| ' + indent + node.localName + '\n';          r += '| ' + indent + node.localName + '\n';
835            if (node.async) r += '| ' + indent + '  async=""\n';
836            if (node.defer) r += '| ' + indent + '  defer=""\n';
837            if (node.src != null) {
838              r += '| ' + indent + '  src="' + node.src + '"\n';
839            }
840          r += dumpTree (node, indent + '  ');          r += dumpTree (node, indent + '  ');
841        } else if (node instanceof JSText) {        } else if (node instanceof JSText) {
842          r += '| ' + indent + '"' + node.data + '"\n';          r += '| ' + indent + '"' + node.data + '"\n';
# Line 174  Line 850 
850  </head>  </head>
851  <body onload="  <body onload="
852    document.sourceElement = document.getElementsByTagName ('textarea')[0];    document.sourceElement = document.getElementsByTagName ('textarea')[0];
853    
854      var q = location.search;
855      if (q != null) {
856        q = q.substring (1).split (/;/);
857        for (var i = 0; i < q.length; i++) {
858          var v = q[i].split (/=/, 2);
859          v[0] = decodeURIComponent (v[0]);
860          v[1] = decodeURIComponent (v[1] || '');
861          if (v[0] == 's') {
862            document.sourceElement.value = v[1];
863          }
864        }
865      }
866    
867    document.logElement = document.getElementsByTagName ('output')[0];    document.logElement = document.getElementsByTagName ('output')[0];
868    update ();    update ();
869  ">  ">
870    <h1>Live Scripting <abbr title="Hypertext Markup Language">HTML</abbr>
871    Parser</h1>
872    
873  <textarea onchange=" update () ">&lt;html>  <h2>Markup to test
874    (<a href=data:, id=permalink rel=bookmark>permalink</a>,
875    <a href="http://software.hixie.ch/utilities/js/live-dom-viewer/"
876        id=ldvlink>Live <abbr title="Document Object Model">DOM</abbr>
877        Viewer</a>)</h2>
878    <p>
879    <textarea onkeydown=" update () " onchange=" update () " oninput=" update () ">&lt;html>
880  &lt;head>&lt;/head>&lt;body>  &lt;head>&lt;/head>&lt;body>
881  &lt;p>  &lt;p>
882  &lt;script>  &lt;script>
883  document.write ('aaaaaaa&lt;/p>\n&lt;script>\ndocument.write("cccccc")\n&lt;/', 'script>\nbbbbbb');  document.write ('aaaaaaa&lt;/p>&lt;script>document.write("cccccc");&lt;/', 'script>bbbbbb');
884  &lt;/script>  &lt;/script>
885  &lt;p>  &lt;p>
886  </textarea>  </textarea>
887    
888  <output></output>  <h2 id=log>Log</h2>
889    <p><output></output>
890    
891    <h2 id=notes>Notes</h2>
892    
893    <p>This is a <em>simplified</em> implementation of
894    <a href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5
895    Parsing Algorithm</a>.  It only implements script-related part of the
896    algorithm.  Especially, this parser:
897    <ul>
898    <li>Does not support <code>DOCTYPE</code> and comment tokens.
899    <li>Does not support entities except for <code>&amp;quot;</code>,
900    <code>&amp;apos;</code>, and <code>&amp;amp;</code> in <code>script</code>
901    <code>src</code> attribute value.
902    <li>Does not support omissions of start or end tags, the <abbr>AAA</abbr>
903    algorithm, and so on.
904    <li>Does not raise parse errors for invalid attribute specifications in start
905    or end tags.
906    <li>Does not support RCDATA elements (<code>title</code> and
907    <code>textarea</code>).
908    <li>Does not strip the first newline in <code>pre</code>,
909    <code>listing</code>, and <code>textarea</code> elements.
910    <li>Does not support <code>&lt;!--</code>..<code>--></code> parsing rule
911    in CDATA/RCDATA elements.
912    <li>Does not support foreign (SVG or MathML) elements.
913    <li>Only supports <code>script</code> <code>type</code>
914    <code>text/javascript</code>.  <code>type</code> and <code>language</code>
915    attributes are ignored.
916    <li>Only supports limited statements.  It must consist of zero or more
917    of statements looking similar to the following statements, possibly
918    introduced, followed, or separated by white space characters:
919      <ul>
920      <li><code>document.write ("<var>string</var>", ["<var>string</var>", ...]);</code>.
921      <li><code>var s = document.createElement ("script");
922                s.src = "<var>string</var>";
923                document.documentElement.appendChild (s);</code>
924      <li><code>w (document.documentElement.innerHTML);</code> (This statement
925      can be used to dump the document, even when the document has no
926      document element.  The output format is the tree dump format used
927      in html5lib test data, not <abbr>HTML</abbr>.)
928      </ul>
929    Note that strings may be delimited by <code>'</code>s instead of
930    <code>"</code>s.
931    <li>Only supports <code>javascript:</code>
932    <abbr title="Uniform Resourace Identifiers">URI</abbr> scheme in the
933    <code>src</code> attribute of the <code>script</code> element.  In addition,
934    the <abbr title="Uniform Resource Identifiers">URI</abbr> must be conform to
935    the regular expression <code>^javascript:\s*(?:"[^"]*"|'[^']*')\s*$</code>.
936    <li>Only supports <code>\u<var>HHHH</var></code> escapes in JavaScript
937    string literals.
938    <li>Does not handle <i>stop parsing</i> phase correctly if the document is
939    replaced by <code>document.open ()</code> call.  In other word, delayed
940    (deferred or asynchronous) script executions and event firings might be
941    treated in a wrong way if a <code>document.open ()</code> invocation
942    is implicitly done by <code>document.write ()</code> in a delayed script.
943    </ul>
944    
945    <p>For some reason, this parser does not work in browsers that do
946    not support JavaScript 1.5.
947    
948    <!-- TODO: |src| attribute value should refer the value at the time
949    when it is inserted into the document, not the value when the script is
950    executed.  Currently it does not matter, since we don't allow dynamic
951    modification to the |src| content/DOM attribute value yet. -->
952    
953    <p>See also
954    <a href="http://suika.fam.cx/gate/2005/sw/Live%20Scripting%20HTML%20Parser">SuikaWiki:
955    Live Scripting HTML Parser</a>.
956    
957  </body>  </body>
 </html>  
958    </html>
959    <!-- $Date$ -->
960    <!--
961    
962    Copyright 2008 Wakaba <w@suika.fam.cx>
963    
964    This program is free software; you can redistribute it and/or
965    modify it under the terms of the GNU General Public License
966    as published by the Free Software Foundation; either version 2
967    of the License, or (at your option) any later version.
968    
969    This program is distributed in the hope that it will be useful,
970    but WITHOUT ANY WARRANTY; without even the implied warranty of
971    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
972    GNU General Public License for more details.
973    
974    You should have received a copy of the GNU General Public License
975    along with this program; if not, write to the Free Software
976    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
977    
978    -->

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.20

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24