/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Diff of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2 by wakaba, Sun Apr 20 07:48:00 2008 UTC revision 1.17 by wakaba, Fri May 16 10:29:25 2008 UTC
# Line 1  Line 1 
1  <!DOCTYPE HTML>  <!DOCTYPE HTML>
2  <html lang=en>  <html lang=en>
3  <head>  <head>
4  <title>Demo of HTML5 Parsing Algorithm with Scripting Enabled</title>  <title>Live Scripting HTML Parser</title>
5    <link rel=author href="http://suika.fam.cx/~wakaba/who?">
6    <link rel=license href="http://suika.fam.cx/c/gnu/gpl"
7        title="GNU GPL2 or later">
8  <style>  <style>
9      h1 {
10        margin: 0;
11        font-size: 150%;
12      }
13      h2 {
14        margin: 0;
15        font-size: 100%;
16      }
17      p {
18        margin: 0 1em;
19      }
20    textarea {    textarea {
21       display: block;      width: 100%;
22       width: 80%;      -width: 99%;
23       margin-left: auto;      height: 10em;
      margin-right: auto;  
      min-height: 20em;  
24    }    }
25    output {    output {
26      display: block;      display: block;
27      font-family: monospace;      font-family: monospace;
28      white-space: pre;      white-space: -moz-pre-wrap;
29        white-space: pre-wrap;
30    }    }
31  </style>  </style>
32  <script>  <script>
33      var delayedUpdater = 0;
34    
35    function update () {    function update () {
36      document.logElement.textContent = '';      if (delayedUpdater) {
37      var p = new Parser (new InputStream (document.sourceElement.value));        clearTimeout (delayedUpdater);
38      p.parse ();        delayedUpdater = 0;
39      log (dumpTree (p.doc, ''));      }
40        delayedUpdater = setTimeout (update2, 100);
41    } // update    } // update
42    
43      function update2 () {
44        var v = document.sourceElement.value;
45        if (v != document.previousSourceText) {
46          document.previousSourceText = v;
47          document.links['permalink'].href
48              = location.pathname + '?s=' + encodeURIComponent (v);
49          document.links['ldvlink'].href
50              = 'http://software.hixie.ch/utilities/js/live-dom-viewer/?'
51              + encodeURIComponent (v);
52    
53          document.logElement.textContent = '';
54          var p = new Parser (new InputStream (v));
55          var doc = p.doc;
56          p.parse ();
57          
58          log (dumpTree (doc, ''));
59          
60          if (p.hasAsyncScript) {
61            log ('Some script codes are executed asynchronously; it means that the document might be rendered in different ways depending on the network condition and other factors');
62          }
63        }
64      } // update2
65    
66      var logIndentLevel = 0;
67    function log (s) {    function log (s) {
68        var indent = '';
69        for (var i = 0; i < logIndentLevel; i++) {
70          indent += '  ';
71        }
72        s = indent + s.replace (/\n/g, "\n" + indent);
73      document.logElement.appendChild (document.createTextNode (s + "\n"));      document.logElement.appendChild (document.createTextNode (s + "\n"));
74    } // log    } // log
75    
# Line 32  Line 77 
77      this.s = s;      this.s = s;
78    } // InputStream    } // InputStream
79    
80    function Parser (i) {    function Parser (i, doc) {
81      this.parseMode = 'pcdata';      this.parseMode = 'pcdata';
82      this.doc = new JSDocument (this);      if (!doc) {
83      this.openElements = [this.doc];        doc = new JSDocument (this);
84      this.in = i;        doc.manakaiIsHTML = true;
85        }
86        this.nextToken = [];
87        this.doc = doc;
88        this.openElements = [doc];
89        this.input = i;
90        this.scriptsExecutedAfterParsing = [];
91        this.scriptsExecutedSoon = [];
92        this.scriptsExecutedAsynchronously = [];
93    } // Parser    } // Parser
94    
95    Parser.prototype.getNextToken = function () {    Parser.prototype.getNextToken = function () {
96      var i = this.in;      if (this.nextToken.length) {
97      if (this.parseMode == 'script') {        return this.nextToken.shift ();
98        }
99    
100        var p = this;
101        var i = this.input;
102        if (this.parseMode == 'cdata') {
103          var tagName = this.endTagName;
104        var token;        var token;
105        i.s = i.s.replace (/^([\s\S]+?)<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/,        if (p.insertionPoint <= 0) {
106            return {type: 'abort'};
107          }
108          i.s = i.s.replace (/^([^<]+)/,
109        function (s, t) {        function (s, t) {
110            if (0 < p.insertionPoint && p.insertionPoint < t.length) {
111              token = {type: 'char', value: t.substring (0, p.insertionPoint)};
112              var ip = p.insertionPoint;
113              p.insertionPoint = 0;
114              return t.substring (ip, t.length);
115            }
116          token = {type: 'char', value: t};          token = {type: 'char', value: t};
117          return '<' + '/script>';          p.insertionPoint -= t.length;
118            return '';
119        });        });
120        if (token) return token;        if (token) return token;
121        i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function () {        var pattern = new RegExp ('^</' + tagName + '>', 'i');
122          token = {type: 'end-tag', value: 'script'};        i.s = i.s.replace (pattern, function (s) {
123            if (p.insertionPoint < s.length) {
124              token = {type: 'abort'};
125              return s;
126            }
127            token = {type: 'end-tag', value: tagName};
128            p.insertionPoint -= s.length;
129            return '';
130          });
131          if (token) return token;
132          var m;
133          if ((p.insertionPoint < ('</' + tagName).length) &&
134              (m = i.s.match (/^<\/([A-Za-z]+)/))) {
135            var v = m[1].substring (0, p.insertionPoint).toLowerCase ();
136            if (v == tagName.substring (0, p.insertionPoint - '</'.length)) {
137              return {type: 'abort'};
138            }
139          }
140          i.s = i.s.replace (/^</,
141          function (s) {
142            token = {type: 'char', value: s};
143            p.insertionPoint -= s.length;
144          return '';          return '';
145        });        });
146        if (token) return token;        if (token) return token;
# Line 58  Line 148 
148      }      }
149    
150      var token;      var token;
151      i.s = i.s.replace (/^<\/([^>]+)>/, function (s, e) {      i.s = i.s.replace (/^<\/([^>]+)(?:>|$)/, function (s, e) {
152          if (p.insertionPoint < s.length ||
153              (p.insertionPoint <= s.length &&
154               s.substring (s.length - 1, s.length) != '>')) {
155            token = {type: 'abort'};
156            return s;
157          }
158        token = {type: 'end-tag', value: e.toLowerCase ()};        token = {type: 'end-tag', value: e.toLowerCase ()};
159          p.insertionPoint -= s.length;
160        return '';        return '';
161      });      });
162      if (token) return token;      if (token) return token;
163      i.s = i.s.replace (/^<([^>]+)>/, function (s, e) {      i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) {
164        token = {type: 'start-tag', value: e.toLowerCase ()};        if (p.insertionPoint < s.length ||
165              (p.insertionPoint <= s.length &&
166               s.substring (s.length - 1, s.length) != '>')) {
167            token = {type: 'abort'};
168            return s;
169          }
170          var tagName;
171          var attrs = {};
172          e = e.replace (/^[\S]+/, function (v) {
173            tagName = v.toLowerCase ();
174            return '';
175          });
176          while (true) {
177            var m = false;
178            e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"'\s]*)))?/,
179            function (x, attrName, attrValue1, attrValue2, attrValue3) {
180              v = attrValue1 || attrValue2 || attrValue3;
181              v = v.replace (/&quot;/g, '"').replace (/&apos;/g, "'")
182                  .replace (/&amp;/g, '&');
183              attrs[attrName.toLowerCase ()] = v;
184              m = true;
185              return '';
186            });
187            if (!m) break;
188          }
189          if (e.length) {
190            log ('Broken start tag: "' + e + '"');
191          }
192          token = {type: 'start-tag', value: tagName, attrs: attrs};
193          p.insertionPoint -= s.length;
194        return '';        return '';
195      });      });
196      if (token) return token;      if (token) return token;
197        if (p.insertionPoint <= 0) {
198          return {type: 'abort'};
199        }
200      i.s = i.s.replace (/^[^<]+/, function (s) {      i.s = i.s.replace (/^[^<]+/, function (s) {
201          if (p.insertionPoint < s.length) {
202            token = {type: 'char', value: s.substring (0, p.insertionPoint)};
203            var ip = p.insertionPoint;
204            p.insertionPoint = 0;
205            return s.substring (ip, s.length);
206          }
207        token = {type: 'char', value: s};        token = {type: 'char', value: s};
208          p.insertionPoint -= s.length;
209        return '';        return '';
210      });      });
211      if (token) return token;      if (token) return token;
212      i.s = i.s.replace (/^[\s\S]/, function (s) {      i.s = i.s.replace (/^[\s\S]/, function (s) {
213        token = {type: 'char', value: s};        token = {type: 'char', value: s};
214          p.insertionPoint -= s.length;
215        return '';        return '';
216      });      });
217      if (token) return token;      if (token) return token;
# Line 82  Line 219 
219    } // getNextToken    } // getNextToken
220    
221    Parser.prototype.parse = function () {    Parser.prototype.parse = function () {
222      log ('start parsing');      logIndentLevel++;
223        log ('parse: start');
224    
225      while (true) {      while (true) {
226        var token = this.getNextToken ();        var token = this.getNextToken ();
227        log ('token: ' + token.type + ' "' + token.value + '"');        log ('token: ' + token.type + ' "' + token.value + '"');
228    
229          if (this.cdataEndTagRequired) {
230            // Generic CDATA parsing algorithm
231    
232            if (token.type != 'abort') {
233              // 7.
234              if (token.type == 'end-tag' && token.value == this.endTagName) {
235                // 7.1. Ignores it.
236                //
237              } else {
238                // 7.2. Parse error.
239                log ('Parse error: no </' + this.endTagName + '>');
240                this.nextToken.unshift (token);
241              }
242              this.cdataEndTagRequired = false;
243              continue;
244            }
245          }
246    
247        if (token.type == 'start-tag') {        if (token.type == 'start-tag') {
248          if (token.value == 'script') {          if (token.value == 'script') {
249            // 1. Create an element for the token in the HTML namespace.            // 1. Create an element for the token in the HTML namespace.
250            var el = new JSElement (this.doc, token.value);            var el = new JSElement (this.doc, token.value);
251              if (token.attrs.async != null) el.async = true;
252              if (token.attrs.defer != null) el.defer = true;
253              if (token.attrs.src != null) el.src = token.attrs.src;
254    
255            // 2. Mark the element as being "parser-inserted".            // 2. Mark the element as being "parser-inserted".
256            el.manakaiParserInserted = true;            el.manakaiParserInserted = true;
257    
258            // 3. Switch the tokeniser's content model flag to the CDATA state.            // 3. Switch the tokeniser's content model flag to the CDATA state.
259            this.parseMode = 'script';            this.parseMode = 'cdata';
260              this.endTagName = 'script';
261    
262            // 4.1. Collect all the character tokens.            // 4.1. Collect all the character tokens.
263            while (true) {            while (true) {
# Line 109  Line 269 
269                el.manakaiAppendText (token.value);                el.manakaiAppendText (token.value);
270    
271              // 4.2. Until it returns a token that is not a character token, or              // 4.2. Until it returns a token that is not a character token, or
272              // TODO: 4.3. Until it stops tokenising.              // until it stops tokenising.
273              } else if (token.type == 'eof' ||              } else if (token.type == 'eof' ||
274                         (token.type == 'end-tag' && token.value == 'script')) {                         token.type == 'end-tag' ||
275                           token.type == 'abort') {
276                // 6. Switched back to the PCDATA state.                // 6. Switched back to the PCDATA state.
277                this.parseMode = 'pcdata';                this.parseMode = 'pcdata';
278    
279                // 7.1. If the next token is not an end tag token with ...                // 7.1. If the next token is not an end tag token with ...
280                if (token.type != 'end-tag') {                if (!(token.type == 'end-tag' && token.value == 'script')) {
281                  // 7.2. This is a parse error.                  // 7.2. This is a parse error.
282                  log ('Parse error: no </' + 'script>');                  log ('Parse error: no </' + 'script>');
283                    this.nextToken.unshift (token);
284    
285                  // 7.3. Mark the script element as "already executed".                  // 7.3. Mark the script element as "already executed".
286                  el.manakaiAlreadyExecuted = true;                  el.manakaiAlreadyExecuted = true;
# Line 138  Line 300 
300            }            }
301    
302            // 9.1. Let the old insertion point have the same value as the ...            // 9.1. Let the old insertion point have the same value as the ...
303              var oldInsertionPoint = this.insertionPoint;
304            // 9.2. Let the insertion point be just before the next input ...            // 9.2. Let the insertion point be just before the next input ...
305              this.setInsertionPoint (0);
306    
307            // 10. Append the new element to the current node.            // 10. Append the new element to the current node.
308            this.openElements[this.openElements.length - 1].appendChild (el);            this.openElements[this.openElements.length - 1].appendChild (el);
309    
310            // 11. Let the insertion point have the value of the old ...            // 11. Let the insertion point have the value of the old ...
311    
312              oldInsertionPoint += this.insertionPoint;
313              this.setInsertionPoint (oldInsertionPoint);
314    
315            // 12. If there is a script that will execute as soon as ...            // 12. If there is a script that will execute as soon as ...
316                        while (this.scriptExecutedWhenParserResumes) {
317                // 12.1. If the tree construction stage is being called reentrantly
318                if (this.reentrant) {
319                  log ('parse: abort (reentrance)');
320                  logIndentLevel--;
321                  return;
322    
323                // 12.2. Otherwise
324                } else {
325                  // 1.
326                  var script = this.scriptExecutedWhenParserResumes;
327                  this.scriptExecutedWhenParserResumes = null;
328    
329                  // 2. Pause until the script has completed loading.
330                  //
331    
332                  // 3. Let the insertion point to just before the next input char.
333                  this.setInsertionPoint (0);
334    
335                  // 4. Execute the script.
336                  executeScript (this.doc, script);
337    
338                  // 5. Let the insertion point be undefined again.
339                  this.setInsertionPoint (undefined);
340    
341                  // 6. If there is once again a script that will execute ...
342                  //
343                }
344              }
345            } else if (token.value == 'style' ||
346                       token.value == 'noscript' ||
347                       token.value == 'xmp') {
348              // 1. Create an element for the token in the HTML namespace.
349              var el = new JSElement (this.doc, token.value);
350    
351              // 2. Append the new element to the current node.
352              this.openElements[this.openElements.length - 1].appendChild (el);
353    
354              // 3. Switch the tokeniser's content model flag to the CDATA state.
355              this.parseMode = 'cdata';
356              this.endTagName = token.value;
357    
358              // 4.1. Collect all the character tokens.
359              while (true) {
360                var token = this.getNextToken ();
361                log ('token: ' + token.type + ' "' + token.value + '"');
362    
363                if (token.type == 'char') {
364                  // 5. Append a single Text node to the script element node.
365                  el.manakaiAppendText (token.value);
366    
367                // 4.2. Until it returns a token that is not a character token, or
368                // until it stops tokenising.
369                } else if (token.type == 'eof' ||
370                           token.type == 'end-tag' ||
371                           token.type == 'abort') {
372                  // 6. Switched back to the PCDATA state.
373                  this.parseMode = 'pcdata';
374    
375                  if (token.type == 'abort') {
376                    this.cdataEndTagRequired = true;
377                    break;
378                  }
379    
380                  // 7.1. If the next token is not an end tag token with ...
381                  if (!(token.type == 'end-tag' &&
382                        token.value == this.endTagName)) {
383                    // 7.2. This is a parse error.
384                    log ('Parse error: no </' + this.endTagName + '>');
385                    this.nextToken.unshift (token);
386    
387                    // 7.3. Mark the script element as "already executed".
388                    el.manakaiAlreadyExecuted = true;
389                  } else {
390                    // 7.4. Ignore it.
391                    //
392                  }
393                  break;
394                }
395              }
396          } else {          } else {
397            var el = new JSElement (this.doc, token.value);            var el = new JSElement (this.doc, token.value);
398            this.openElements[this.openElements.length - 1].appendChild (el);            this.openElements[this.openElements.length - 1].appendChild (el);
# Line 161  Line 405 
405          } else {          } else {
406            log ('parse error: unmatched end tag: ' + token.value);            log ('parse error: unmatched end tag: ' + token.value);
407          }          }
408          } else if (token.type == 'char') {
409            this.openElements[this.openElements.length - 1].manakaiAppendText
410                (token.value);
411        } else if (token.type == 'eof') {        } else if (token.type == 'eof') {
412          break;          break;
413          } else if (token.type == 'abort') {
414            log ('parse: abort');
415            logIndentLevel--;
416            return;
417        }        }
418      }      }
419    
420      log ('stop parsing');      log ('stop parsing');
421    
422        // readyState = 'interactive'
423    
424        // "When a script completes loading" rules start applying.
425    
426        while (this.scriptsExecutedSoon.length > 0 ||
427               this.scriptsExecutedAsynchronously.length > 0) {
428          // Handle "list of scripts that will execute as soon as possible".
429          while (this.scriptsExecutedSoon.length > 0) {
430            var e = this.scriptsExecutedSoon.shift ();
431      
432            // If it has completed loading
433            log ('Execute an external script not inserted by parser...');
434            executeScript (this.doc, e);
435    
436            // NOTE: It MAY be executed before the end of the parsing, according
437            // to the spec.
438            this.hasAsyncScript = true;
439          }
440    
441          // Handle "list of scripts that will execute asynchronously".
442          while (this.scriptsExecutedAsynchronously.length > 0) {
443            var e = this.scriptsExecutedAsynchronously.shift ();
444    
445            // Step 1.
446            // We assume that all scripts have been loaded at this time.
447      
448            // Step 2.
449            log ('Execute an asynchronous script...');
450            executeScript (this.doc, e);
451    
452            // Step 3.
453            //
454    
455            // Step 4.
456            //
457    
458            this.hasAsyncScript = true;
459          }
460        }
461    
462        // Handle "list of scripts that will execute when the document has finished
463        // parsing".
464        var list = this.scriptsExecutedAfterParsing;
465        while (list.length > 0) {
466          // TODO: break unless completed loading
467    
468          // Step 1.
469          //
470    
471          // Step 2. and Step 3.
472          log ('Executing a |defer|red script...');
473          executeScript (this.doc, list.shift ());
474    
475          // Step 4.
476        }
477    
478        log ('DOMContentLoaded event fired');
479    
480        // "delays the load event" things has completed:
481        // readyState = 'complete'
482        log ('load event fired');
483    
484        logIndentLevel--;
485    } // parse    } // parse
486    
487      Parser.prototype.setInsertionPoint = function (ip) {
488        if (ip == undefined || ip == null || isNaN (ip)) {
489          log ('insertion point: set to undefined');
490          this.insertionPoint = undefined;
491        } else if (ip == this.input.s.length) {
492          log ('insertion point: end of file');
493          this.insertionPoint = ip;
494        } else {
495          log ('insertion point: set to ' + ip +
496               ' (before "' + this.input.s.substring (0, 10) + '")');
497          this.insertionPoint = ip;
498        }
499      }; // setInsertionPoint
500    
501    function JSDocument (p) {    function JSDocument (p) {
502      this.childNodes = [];      this.childNodes = [];
503      this._parser = p;      this._parser = p;
# Line 186  Line 515 
515      e.parentNode = this;      e.parentNode = this;
516    
517      if (e.localName == 'script') {      if (e.localName == 'script') {
518        log ('start running a script');        logIndentLevel++;
519          log ('Running a script: start');
520    
521        var doc = this.ownerDocument;        var doc = this.ownerDocument || this;
522        var p = doc._parser;        var p = doc._parser;
523    
524        // 1. Script type        // 1. Script type
# Line 203  Line 533 
533        // 2.4. If the script element has its "already executed" flag set        // 2.4. If the script element has its "already executed" flag set
534        if (e.manakaiAlreadyExecuted) {        if (e.manakaiAlreadyExecuted) {
535          // 2.5. Abort these steps at this point.          // 2.5. Abort these steps at this point.
536          log ('running a script: aborted');          log ('Running a script: aborted (already executed)');
537            logIndentLevel--;
538          return e;          return e;
539        }        }
540    
# Line 218  Line 549 
549        // 5.1.        // 5.1.
550        if (/* TODO: If the document is still being parsed && */        if (/* TODO: If the document is still being parsed && */
551            e.defer && !e.async) {            e.defer && !e.async) {
552          // TODO          p.scriptsExecutedAfterParsing.push (e);
553            log ('Running a script: aborted (defer)');
554        } else if (e.async && e.src != null) {        } else if (e.async && e.src != null) {
555          // TODO          p.scriptsExecutedAsynchronously.push (e);
556        } else if (e.async && e.src == null          log ('Running a script: aborted (async src)');
557                   /* && list of scripts that will execute asynchronously is not empty */) {        } else if (e.async && e.src == null &&
558          // TODO                   p.scriptsExecutedAsynchronously.length > 0) {
559            p.scriptsExecutedAsynchronously.push (e);
560            log ('Running a script: aborted (async)');
561            // ISSUE: What is the difference with the case above?
562        } else if (e.src != null && e.manakaiParserInserted) {        } else if (e.src != null && e.manakaiParserInserted) {
563          // TODO          if (p.scriptExecutedWhenParserResumes) {
564              log ('Error: There is a script that will execute as soon as the parser resumes.');
565            }
566            p.scriptExecutedWhenParserResumes = e;
567            log ('Running a script: aborted (src parser-inserted)');
568        } else if (e.src != null) {        } else if (e.src != null) {
569          // TODO          p.scriptsExecutedSoon.push (e);
570            log ('Running a script: aborted (src)');
571        } else {        } else {
572          executeScript (doc, e); // even if other scripts are already executing.          executeScript (doc, e); // even if other scripts are already executing.
573        }        }
574    
575        log ('end running a script');        log ('Running a script: end');
576          logIndentLevel--;
577      }      }
578    
579      return e;      return e;
# Line 241  Line 582 
582    function executeScript (doc, e) {    function executeScript (doc, e) {
583      log ('executing a script block: start');      log ('executing a script block: start');
584    
585      // If the load resulted in an error, then ... firing an error event ...      var s;
586        if (e.src != null) {
587          s = getExternalScript (e.src);
588    
589          // If the load resulted in an error, then ... firing an error event ...
590          if (s == null) {
591            log ('error event fired at the script element');
592            return;
593          }
594    
595          log ('External script loaded: "' + s + '"');
596        } else {
597          s = e.text;
598        }
599    
600      // If the load was successful      // If the load was successful
     log ('load event fired at the script element');  
601    
602      if (true) {      if (true) {
603      // Scripting is enabled, Document.designMode is disabled,      // Scripting is enabled, Document.designMode is disabled,
604      // Document is the active document in its browsing context      // Document is the active document in its browsing context
605    
       var s;  
       if (e.src != null) {  
         // TODO: from external file  
       } else {  
         s = e.text;  
       }  
   
606        parseAndRunScript (doc, s);        parseAndRunScript (doc, s);
607      }      }
608    
609        log ('load event fired at the script element');
610    
611      log ('executing a script block: end');      log ('executing a script block: end');
612    } // executeScript    } // executeScript
613    
614      function getExternalScript (uri) {
615        if (uri.match (/^javascript:/i)) {
616          var m;
617          if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) {
618            if (m[1]) {
619              return unescapeJSLiteral (m[1]);
620            } else if (m[2]) {
621              return unescapeJSLiteral (m[2]);
622            } else {
623              return null;
624            }
625          } else {
626            log ('Complex javascript: URI is not supported: <' + uri + '>');
627            return null;
628          }
629        } else {
630          log ('URI scheme not supported: <' + uri + '>');
631          return null;
632        }
633      } // getExternalScript
634    
635    function parseAndRunScript (doc, s) {    function parseAndRunScript (doc, s) {
636      while (true) {      while (true) {
637        var matched = false;        var matched = false;
# Line 270  Line 639 
639          matched = true;          matched = true;
640          var args = [];          var args = [];
641          t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {          t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
642            args.push (v.substring (1, v.length - 1));            args.push (unescapeJSLiteral (v.substring (1, v.length - 1)));
643            return '';            return '';
644          });          });
645          doc.write.apply (doc, args);          doc.write.apply (doc, args);
646          return '';          return '';
647        });        });
648          var noDocumentElement = false;
649          s = s.replace (/^\s*var\s+s\s*=\s*document\.createElement\s*\(\s*['"]script['"]\s*\)\s*;\s*s\.src\s*=\s*(?:'([^']*)'|"([^"]*)")\s*;\s*document\.documentElement\.appendChild\s*\(\s*s\s*\)\s*;\s*/,
650          function (s, t, u) {
651            matched = true;
652            var args = [unescapeJSLiteral (t ? t : u)];
653            noDocumentElement = !doc._insertExternalScript.apply (doc, args);
654            return '';
655          });
656          if (noDocumentElement) {
657            log ('Script error: documentElement is null');
658            break;
659          }
660          s = s.replace (/^\s*w\s*\(\s*document\.documentElement\.innerHTML\s*\)\s*;\s*/,
661          function (s, t) {
662            matched = true;
663            log (dumpTree (doc, ''));
664            return '';
665          });
666        if (s == '') break;        if (s == '') break;
667        if (!matched) {        if (!matched) {
668          log ('Script parse error: "' + s + '"');          log ('Script parse error: "' + s + '"');
# Line 284  Line 671 
671      }      }
672    } // parseAndRunScript    } // parseAndRunScript
673    
674      function unescapeJSLiteral (s) {
675        return s.replace (/\\u([0-9A-Fa-f]{4})/g, function (t, v) {
676          return String.fromCharCode (parseInt ('0x' + v));
677        });
678      } // unescapeJSLiteral
679    
680    function JSText (data) {    function JSText (data) {
681      this.data = data;      this.data = data;
682    } // JSText    } // JSText
# Line 299  Line 692 
692      }      }
693    }; // manakaiAppendText    }; // manakaiAppendText
694    
695      JSDocument.prototype.open = function () {
696        // Two or fewer arguments
697    
698        // Step 1.
699        var type = arguments[0] || 'text/html';
700        
701        // Step 2.
702        var replace = arguments[1] == 'replace';
703    
704        // Step 3.
705        if (this._parser &&
706            !this._parser.scriptCreated &&
707            this._parser.input.insertionPoint != undefined) {
708          log ('document.open () in parsing mode is ignored');
709          return this;
710        }
711    
712        // Step 4.
713        log ('onbeforeunload event fired');
714        log ('onunload event fired');
715    
716        // Step 5.
717        if (this._parser) {
718          // Discard the parser.
719        }
720    
721        // Step 6.
722        log ('document cleared by document.open ()');
723        this.childNodes = [];
724    
725        // Step 7.
726        this._parser = new Parser (new InputStream (''), this);
727        this._parser.scriptCreated = true;
728    
729        // Step 8.
730        this.manakaiIsHTML = true;
731    
732        // Step 9.
733        // If not text/html, ...
734    
735        // Step 10.
736        if (!replace) {
737          // History      
738        }
739    
740        // Step 11.
741        this._parser.setInsertionPoint (this._parser.input.s.length);
742    
743        // Step 12.
744        return this;
745      }; // document.open
746    
747    JSDocument.prototype.write = function () {    JSDocument.prototype.write = function () {
748        log ('document.write: start');
749        logIndentLevel++;
750    
751        var p = this._parser;
752    
753      // 1. If the insertion point is undefined, the open() method must be ...      // 1. If the insertion point is undefined, the open() method must be ...
754      //      if (isNaN (p.insertionPoint) || p.insertionPoint == undefined) {
755          this.open ();
756          p = this._parser;
757        }
758    
759      // 2. ... inserted into the input stream just before the insertion point.      // 2. ... inserted into the input stream just before the insertion point.
760      log ('document.write: insert "' + Array.join (arguments, '') + '"');      var s = Array.join (arguments, '');
761        log ('document.write: insert "' + s + '"' +
762             ' before "' +
763             p.input.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
764        p.input.s = p.input.s.substring (0, p.insertionPoint) + s
765            + p.input.s.substring (p.insertionPoint, p.input.s.length);
766        p.insertionPoint += s.length;
767    
768      // 3. If there is a script that will execute as soon as the parser resumes      // 3. If there is a script that will execute as soon as the parser resumes
769      // TODO      if (p.scriptExecutedAfterParserResumes) {
770          log ('document.write: processed later (there is an unprocessed <script src>)');
771          logIndentLevel--;
772          log ('document.write: return');
773          return;
774        }
775    
776      // 4. Process the characters that were inserted, ...      // 4. Process the characters that were inserted, ...
777        var originalReentrant = p.reentrant;
778        p.reentrant = true;
779        p.parse ();
780        p.reentrant = originalReentrant;
781        // TODO: "Abort the processing of any nested invokations of the tokeniser,
782        // yielding control back to the caller." (<script> parsing).  Do we need
783        // to do something here?
784    
785      // 5. Return      // 5. Return
786        logIndentLevel--;
787      log ('document.write: return');      log ('document.write: return');
788    
789      return;      return;
790    }; // document.write    }; // document.write
791    
792      JSDocument.prototype._insertExternalScript = function (uri) {
793        var s = new JSElement (this, 'script');
794        s.src = uri;
795        if (this.documentElement) {
796          this.documentElement.appendChild (s);
797          return true;
798        } else {
799          return false;
800        }
801      }; // _insertExternalScript
802    
803      JSDocument.prototype.__defineGetter__ ('documentElement', function () {
804        var cn = this.childNodes;
805        for (var i = 0; i < cn.length; i++) {
806          if (cn[i] instanceof JSElement) {
807            return cn[i]
808          }
809        }
810        return null;
811      });
812    
813    JSElement.prototype.__defineGetter__ ('text', function () {    JSElement.prototype.__defineGetter__ ('text', function () {
814      var r = '';      var r = '';
815      for (var i = 0; i < this.childNodes.length; i++) {      for (var i = 0; i < this.childNodes.length; i++) {
# Line 332  Line 826 
826        var node = n.childNodes[i];        var node = n.childNodes[i];
827        if (node instanceof JSElement) {        if (node instanceof JSElement) {
828          r += '| ' + indent + node.localName + '\n';          r += '| ' + indent + node.localName + '\n';
829            if (node.async) r += '| ' + indent + '  async=""\n';
830            if (node.defer) r += '| ' + indent + '  defer=""\n';
831            if (node.src != null) {
832              r += '| ' + indent + '  src="' + node.src + '"\n';
833            }
834          r += dumpTree (node, indent + '  ');          r += dumpTree (node, indent + '  ');
835        } else if (node instanceof JSText) {        } else if (node instanceof JSText) {
836          r += '| ' + indent + '"' + node.data + '"\n';          r += '| ' + indent + '"' + node.data + '"\n';
# Line 345  Line 844 
844  </head>  </head>
845  <body onload="  <body onload="
846    document.sourceElement = document.getElementsByTagName ('textarea')[0];    document.sourceElement = document.getElementsByTagName ('textarea')[0];
847    
848      var q = location.search;
849      if (q != null) {
850        q = q.substring (1).split (/;/);
851        for (var i = 0; i < q.length; i++) {
852          var v = q[i].split (/=/, 2);
853          v[0] = decodeURIComponent (v[0]);
854          v[1] = decodeURIComponent (v[1] || '');
855          if (v[0] == 's') {
856            document.sourceElement.value = v[1];
857          }
858        }
859      }
860    
861    document.logElement = document.getElementsByTagName ('output')[0];    document.logElement = document.getElementsByTagName ('output')[0];
862    update ();    update ();
863  ">  ">
864    <h1>Live Scripting <abbr title="Hypertext Markup Language">HTML</abbr>
865    Parser</h1>
866    
867  <textarea onchange=" update () ">&lt;html>  <h2>Markup to test
868    (<a href=data:, id=permalink rel=bookmark>permalink</a>,
869    <a href="http://software.hixie.ch/utilities/js/live-dom-viewer/"
870        id=ldvlink>Live <abbr title="Document Object Model">DOM</abbr>
871        Viewer</a>)</h2>
872    <p>
873    <textarea onkeydown=" update () " onchange=" update () " oninput=" update () ">&lt;html>
874  &lt;head>&lt;/head>&lt;body>  &lt;head>&lt;/head>&lt;body>
875  &lt;p>  &lt;p>
876  &lt;script>  &lt;script>
877  document.write ('aaaaaaa&lt;/p>\n&lt;script>\ndocument.write("cccccc")\n&lt;/', 'script>\nbbbbbb');  document.write ('aaaaaaa&lt;/p>&lt;script>document.write("cccccc");&lt;/', 'script>bbbbbb');
878  &lt;/script>  &lt;/script>
879  &lt;p>  &lt;p>
880  </textarea>  </textarea>
881    
882  <output></output>  <h2 id=log>Log</h2>
883    <p><output></output>
884    
885    <h2 id=notes>Notes</h2>
886    
887    <p>This is a <em>simplified</em> implementation of
888    <a href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5
889    Parsing Algorithm</a>.  It only implements script-related part of the
890    algorithm.  Especially, this parser:
891    <ul>
892    <li>Does not support <code>DOCTYPE</code> and comment tokens.
893    <li>Does not support entities except for <code>&amp;quot;</code>,
894    <code>&amp;apos;</code>, and <code>&amp;amp;</code> in <code>script</code>
895    <code>src</code> attribute value.
896    <li>Does not support omissions of start or end tags, the <abbr>AAA</abbr>
897    algorithm, and so on.
898    <li>Does not raise parse errors for invalid attribute specifications in start
899    or end tags.
900    <li>Does not support RCDATA elements (<code>title</code> and
901    <code>textarea</code>).
902    <li>Does not strip the first newline in <code>pre</code>,
903    <code>listing</code>, and <code>textarea</code> elements.
904    <li>Does not support <code>&lt;!--</code>..<code>--></code> parsing rule
905    in CDATA/RCDATA elements.
906    <li>Does not support foreign (SVG or MathML) elements.
907    <li>Only supports <code>script</code> <code>type</code>
908    <code>text/javascript</code>.  <code>type</code> and <code>language</code>
909    attributes are ignored.
910    <li>Only supports limited statements.  It must consist of zero or more
911    of statements looking similar to the following statements, possibly
912    introduced, followed, or separated by white space characters:
913      <ul>
914      <li><code>document.write ("<var>string</var>", ["<var>string</var>", ...]);</code>.
915      <li><code>var s = document.createElement ("script");
916                s.src = "<var>string</var>";
917                document.documentElement.appendChild (s);</code>
918      <li><code>w (document.documentElement.innerHTML);</code> (This statement
919      can be used to dump the document, even when the document has no
920      document element.  The output format is the tree dump format used
921      in html5lib test data, not <abbr>HTML</abbr>.)
922      </ul>
923    Note that strings may be delimited by <code>'</code>s instead of
924    <code>"</code>s.
925    <li>Only supports <code>javascript:</code>
926    <abbr title="Uniform Resourace Identifiers">URI</abbr> scheme in the
927    <code>src</code> attribute of the <code>script</code> element.  In addition,
928    the <abbr title="Uniform Resource Identifiers">URI</abbr> must be conform to
929    the regular expression <code>^javascript:\s*(?:"[^"]*"|'[^']*')\s*$</code>.
930    <li>Only supports <code>\u<var>HHHH</var></code> escapes in JavaScript
931    string literals.
932    <li>Does not handle <i>stop parsing</i> phase correctly if the document is
933    replaced by <code>document.open ()</code> call.  In other word, delayed
934    (deferred or asynchronous) script executions and event firings might be
935    treated in a wrong way if a <code>document.open ()</code> invocation
936    is implicitly done by <code>document.write ()</code> in a delayed script.
937    </ul>
938    
939    <p>For some reason, this parser does not work in browsers that do
940    not support JavaScript 1.5.
941    
942    <!-- TODO: |src| attribute value should refer the value at the time
943    when it is inserted into the document, not the value when the script is
944    executed.  Currently it does not matter, since we don't allow dynamic
945    modification to the |src| content/DOM attribute value yet. -->
946    
947  </body>  </body>
 </html>  
948    </html>
949    <!-- $Date$ -->
950    <!--
951    
952    Copyright 2008 Wakaba <w@suika.fam.cx>
953    
954    This program is free software; you can redistribute it and/or
955    modify it under the terms of the GNU General Public License
956    as published by the Free Software Foundation; either version 2
957    of the License, or (at your option) any later version.
958    
959    This program is distributed in the hope that it will be useful,
960    but WITHOUT ANY WARRANTY; without even the implied warranty of
961    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
962    GNU General Public License for more details.
963    
964    You should have received a copy of the GNU General Public License
965    along with this program; if not, write to the Free Software
966    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
967    
968    -->

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.17

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24