/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Diff of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2 by wakaba, Sun Apr 20 07:48:00 2008 UTC revision 1.14 by wakaba, Tue Apr 29 02:50:00 2008 UTC
# Line 1  Line 1 
1  <!DOCTYPE HTML>  <!DOCTYPE HTML>
2  <html lang=en>  <html lang=en>
3  <head>  <head>
4  <title>Demo of HTML5 Parsing Algorithm with Scripting Enabled</title>  <title>Live Scripting HTML Parser</title>
5    <link rel=author href="http://suika.fam.cx/~wakaba/who?">
6    <link rel=license href="http://suika.fam.cx/c/gnu/gpl"
7        title="GNU GPL2 or later">
8  <style>  <style>
9      h1 {
10        margin: 0;
11        font-size: 150%;
12      }
13      h2 {
14        margin: 0;
15        font-size: 100%;
16      }
17      p {
18        margin: 0 1em;
19      }
20    textarea {    textarea {
21       display: block;      width: 100%;
22       width: 80%;      -width: 99%;
23       margin-left: auto;      height: 10em;
      margin-right: auto;  
      min-height: 20em;  
24    }    }
25    output {    output {
26      display: block;      display: block;
27      font-family: monospace;      font-family: monospace;
28      white-space: pre;      white-space: -moz-pre-wrap;
29        white-space: pre-wrap;
30    }    }
31  </style>  </style>
32  <script>  <script>
33      var delayedUpdater = 0;
34    
35    function update () {    function update () {
36      document.logElement.textContent = '';      if (delayedUpdater) {
37      var p = new Parser (new InputStream (document.sourceElement.value));        clearTimeout (delayedUpdater);
38      p.parse ();        delayedUpdater = 0;
39      log (dumpTree (p.doc, ''));      }
40        delayedUpdater = setTimeout (update2, 100);
41    } // update    } // update
42    
43      function update2 () {
44        var v = document.sourceElement.value;
45        if (v != document.previousSourceText) {
46          document.previousSourceText = v;
47          document.links['permalink'].href
48              = location.pathname + '?s=' + encodeURIComponent (v);
49          document.links['ldvlink'].href
50              = 'http://software.hixie.ch/utilities/js/live-dom-viewer/?'
51              + encodeURIComponent (v);
52    
53          document.logElement.textContent = '';
54          var p = new Parser (new InputStream (v));
55          var doc = p.doc;
56          p.parse ();
57          
58          log (dumpTree (doc, ''));
59          
60          if (p.hasAsyncScript) {
61            log ('Some script codes are executed asynchronously; it means that the document might be rendered in different ways depending on the network condition and other factors');
62          }
63        }
64      } // update2
65    
66      var logIndentLevel = 0;
67    function log (s) {    function log (s) {
68        for (var i = 0; i < logIndentLevel; i++) {
69          s = '  ' + s;
70        }
71      document.logElement.appendChild (document.createTextNode (s + "\n"));      document.logElement.appendChild (document.createTextNode (s + "\n"));
72    } // log    } // log
73    
# Line 32  Line 75 
75      this.s = s;      this.s = s;
76    } // InputStream    } // InputStream
77    
78    function Parser (i) {    function Parser (i, doc) {
79      this.parseMode = 'pcdata';      this.parseMode = 'pcdata';
80      this.doc = new JSDocument (this);      if (!doc) {
81      this.openElements = [this.doc];        doc = new JSDocument (this);
82      this.in = i;        doc.manakaiIsHTML = true;
83        }
84        this.doc = doc;
85        this.openElements = [doc];
86        this.input = i;
87        this.scriptsExecutedAfterParsing = [];
88        this.scriptsExecutedSoon = [];
89        this.scriptsExecutedAsynchronously = [];
90    } // Parser    } // Parser
91    
92    Parser.prototype.getNextToken = function () {    Parser.prototype.getNextToken = function () {
93      var i = this.in;      var p = this;
94      if (this.parseMode == 'script') {      var i = this.input;
95        if (this.parseMode == 'cdata') {
96          var tagName = this.endTagName;
97        var token;        var token;
98        i.s = i.s.replace (/^([\s\S]+?)<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/,        if (p.insertionPoint <= 0) {
99            return {type: 'abort'};
100          }
101          i.s = i.s.replace (/^([^<]+)/,
102        function (s, t) {        function (s, t) {
103            if (0 < p.insertionPoint && p.insertionPoint < t.length) {
104              token = {type: 'char', value: t.substring (0, p.insertionPoint)};
105              var ip = p.insertionPoint;
106              p.insertionPoint = 0;
107              return t.substring (ip, t.length);
108            }
109          token = {type: 'char', value: t};          token = {type: 'char', value: t};
110          return '<' + '/script>';          p.insertionPoint -= t.length;
111            return '';
112        });        });
113        if (token) return token;        if (token) return token;
114        i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function () {        var pattern = new RegExp ('^</' + tagName + '>', 'i');
115          token = {type: 'end-tag', value: 'script'};        i.s = i.s.replace (pattern, function (s) {
116            if (p.insertionPoint < s.length) {
117              token = {type: 'abort'};
118              return s;
119            }
120            token = {type: 'end-tag', value: tagName};
121            p.insertionPoint -= s.length;
122            return '';
123          });
124          if (token) return token;
125          var m;
126          if ((p.insertionPoint < ('</' + tagName).length) &&
127              (m = i.s.match (/^<\/([A-Za-z]+)/))) {
128            var v = m[1].substring (0, p.insertionPoint).toLowerCase ();
129            if (v == tagName.substring (0, p.insertionPoint - '</'.length)) {
130              return {type: 'abort'};
131            }
132          }
133          i.s = i.s.replace (/^</,
134          function (s) {
135            token = {type: 'char', value: s};
136            p.insertionPoint -= s.length;
137          return '';          return '';
138        });        });
139        if (token) return token;        if (token) return token;
# Line 58  Line 141 
141      }      }
142    
143      var token;      var token;
144      i.s = i.s.replace (/^<\/([^>]+)>/, function (s, e) {      i.s = i.s.replace (/^<\/([^>]+)(?:>|$)/, function (s, e) {
145          if (p.insertionPoint < s.length ||
146              (p.insertionPoint <= s.length &&
147               s.substring (s.length - 1, 1) != '>')) {
148            token = {type: 'abort'};
149            return s;
150          }
151        token = {type: 'end-tag', value: e.toLowerCase ()};        token = {type: 'end-tag', value: e.toLowerCase ()};
152          p.insertionPoint -= s.length;
153        return '';        return '';
154      });      });
155      if (token) return token;      if (token) return token;
156      i.s = i.s.replace (/^<([^>]+)>/, function (s, e) {      i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) {
157        token = {type: 'start-tag', value: e.toLowerCase ()};        if (p.insertionPoint < s.length ||
158              (p.insertionPoint <= s.length &&
159               s.substring (s.length - 1, 1) != '>')) {
160            token = {type: 'abort'};
161            return s;
162          }
163          var tagName;
164          var attrs = {};
165          e = e.replace (/^[\S]+/, function (v) {
166            tagName = v.toLowerCase ();
167            return '';
168          });
169          while (true) {
170            var m = false;
171            e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"'\s]*)))?/,
172            function (x, attrName, attrValue1, attrValue2, attrValue3) {
173              v = attrValue1 || attrValue2 || attrValue3;
174              v = v.replace (/&quot;/g, '"').replace (/&apos;/g, "'")
175                  .replace (/&amp;/g, '&');
176              attrs[attrName.toLowerCase ()] = v;
177              m = true;
178              return '';
179            });
180            if (!m) break;
181          }
182          if (e.length) {
183            log ('Broken start tag: "' + e + '"');
184          }
185          token = {type: 'start-tag', value: tagName, attrs: attrs};
186          p.insertionPoint -= s.length;
187        return '';        return '';
188      });      });
189      if (token) return token;      if (token) return token;
190        if (p.insertionPoint <= 0) {
191          return {type: 'abort'};
192        }
193      i.s = i.s.replace (/^[^<]+/, function (s) {      i.s = i.s.replace (/^[^<]+/, function (s) {
194          if (p.insertionPoint < s.length) {
195            token = {type: 'char', value: s.substring (0, p.insertionPoint)};
196            var ip = p.insertionPoint;
197            p.insertionPoint = 0;
198            return s.substring (ip, s.length);
199          }
200        token = {type: 'char', value: s};        token = {type: 'char', value: s};
201          p.insertionPoint -= s.length;
202        return '';        return '';
203      });      });
204      if (token) return token;      if (token) return token;
205      i.s = i.s.replace (/^[\s\S]/, function (s) {      i.s = i.s.replace (/^[\s\S]/, function (s) {
206        token = {type: 'char', value: s};        token = {type: 'char', value: s};
207          p.insertionPoint -= s.length;
208        return '';        return '';
209      });      });
210      if (token) return token;      if (token) return token;
# Line 82  Line 212 
212    } // getNextToken    } // getNextToken
213    
214    Parser.prototype.parse = function () {    Parser.prototype.parse = function () {
215      log ('start parsing');      logIndentLevel++;
216        log ('parse: start');
217    
218      while (true) {      while (true) {
219        var token = this.getNextToken ();        var token = this.getNextToken ();
# Line 92  Line 223 
223          if (token.value == 'script') {          if (token.value == 'script') {
224            // 1. Create an element for the token in the HTML namespace.            // 1. Create an element for the token in the HTML namespace.
225            var el = new JSElement (this.doc, token.value);            var el = new JSElement (this.doc, token.value);
226              if (token.attrs.async != null) el.async = true;
227              if (token.attrs.defer != null) el.defer = true;
228              if (token.attrs.src != null) el.src = token.attrs.src;
229    
230            // 2. Mark the element as being "parser-inserted".            // 2. Mark the element as being "parser-inserted".
231            el.manakaiParserInserted = true;            el.manakaiParserInserted = true;
232    
233            // 3. Switch the tokeniser's content model flag to the CDATA state.            // 3. Switch the tokeniser's content model flag to the CDATA state.
234            this.parseMode = 'script';            this.parseMode = 'cdata';
235              this.endTagName = 'script';
236    
237            // 4.1. Collect all the character tokens.            // 4.1. Collect all the character tokens.
238            while (true) {            while (true) {
# Line 109  Line 244 
244                el.manakaiAppendText (token.value);                el.manakaiAppendText (token.value);
245    
246              // 4.2. Until it returns a token that is not a character token, or              // 4.2. Until it returns a token that is not a character token, or
247              // TODO: 4.3. Until it stops tokenising.              // until it stops tokenising.
248              } else if (token.type == 'eof' ||              } else if (token.type == 'eof' ||
249                         (token.type == 'end-tag' && token.value == 'script')) {                         token.type == 'end-tag' ||
250                           token.type == 'abort') {
251                // 6. Switched back to the PCDATA state.                // 6. Switched back to the PCDATA state.
252                this.parseMode = 'pcdata';                this.parseMode = 'pcdata';
253    
254                // 7.1. If the next token is not an end tag token with ...                // 7.1. If the next token is not an end tag token with ...
255                if (token.type != 'end-tag') {                if (!(token.type == 'end-tag' && token.value == 'script')) {
256                  // 7.2. This is a parse error.                  // 7.2. This is a parse error.
257                  log ('Parse error: no </' + 'script>');                  log ('Parse error: no </' + 'script>');
258    
# Line 138  Line 274 
274            }            }
275    
276            // 9.1. Let the old insertion point have the same value as the ...            // 9.1. Let the old insertion point have the same value as the ...
277              var oldInsertionPoint = this.insertionPoint;
278            // 9.2. Let the insertion point be just before the next input ...            // 9.2. Let the insertion point be just before the next input ...
279              this.setInsertionPoint (0);
280    
281            // 10. Append the new element to the current node.            // 10. Append the new element to the current node.
282            this.openElements[this.openElements.length - 1].appendChild (el);            this.openElements[this.openElements.length - 1].appendChild (el);
283    
284            // 11. Let the insertion point have the value of the old ...            // 11. Let the insertion point have the value of the old ...
285    
286              oldInsertionPoint += this.insertionPoint;
287              this.setInsertionPoint (oldInsertionPoint);
288    
289            // 12. If there is a script that will execute as soon as ...            // 12. If there is a script that will execute as soon as ...
290                        while (this.scriptExecutedWhenParserResumes) {
291                // 12.1. If the tree construction stage is being called reentrantly
292                if (this.reentrant) {
293                  log ('parse: abort (reentrance)');
294                  logIndentLevel--;
295                  return;
296    
297                // 12.2. Otherwise
298                } else {
299                  // 1.
300                  var script = this.scriptExecutedWhenParserResumes;
301                  this.scriptExecutedWhenParserResumes = null;
302    
303                  // 2. Pause until the script has completed loading.
304                  //
305    
306                  // 3. Let the insertion point to just before the next input char.
307                  this.setInsertionPoint (0);
308    
309                  // 4. Execute the script.
310                  executeScript (this.doc, script);
311    
312                  // 5. Let the insertion point be undefined again.
313                  this.setInsertionPoint (undefined);
314    
315                  // 6. If there is once again a script that will execute ...
316                  //
317                }
318              }
319            } else if (token.value == 'style' ||
320                       token.value == 'noscript' ||
321                       token.value == 'xmp') {
322              // 1. Create an element for the token in the HTML namespace.
323              var el = new JSElement (this.doc, token.value);
324    
325              // 2. Append the new element to the current node.
326              this.openElements[this.openElements.length - 1].appendChild (el);
327    
328              // 3. Switch the tokeniser's content model flag to the CDATA state.
329              this.parseMode = 'cdata';
330              this.endTagName = token.value;
331    
332              // 4.1. Collect all the character tokens.
333              while (true) {
334                var token = this.getNextToken ();
335                log ('token: ' + token.type + ' "' + token.value + '"');
336    
337                if (token.type == 'char') {
338                  // 5. Append a single Text node to the script element node.
339                  el.manakaiAppendText (token.value);
340    
341                // 4.2. Until it returns a token that is not a character token, or
342                // until it stops tokenising.
343                } else if (token.type == 'eof' ||
344                           token.type == 'end-tag' ||
345                           token.type == 'abort') {
346                  // 6. Switched back to the PCDATA state.
347                  this.parseMode = 'pcdata';
348    
349                  // 7.1. If the next token is not an end tag token with ...
350                  if (!(token.type == 'end-tag' &&
351                        token.value == this.endTagName)) {
352                    // 7.2. This is a parse error.
353                    log ('Parse error: no </' + this.endTagName + '>');
354    
355                    // 7.3. Mark the script element as "already executed".
356                    el.manakaiAlreadyExecuted = true;
357                  } else {
358                    // 7.4. Ignore it.
359                    //
360                  }
361                  break;
362                }
363              }
364          } else {          } else {
365            var el = new JSElement (this.doc, token.value);            var el = new JSElement (this.doc, token.value);
366            this.openElements[this.openElements.length - 1].appendChild (el);            this.openElements[this.openElements.length - 1].appendChild (el);
# Line 161  Line 373 
373          } else {          } else {
374            log ('parse error: unmatched end tag: ' + token.value);            log ('parse error: unmatched end tag: ' + token.value);
375          }          }
376          } else if (token.type == 'char') {
377            this.openElements[this.openElements.length - 1].manakaiAppendText
378                (token.value);
379        } else if (token.type == 'eof') {        } else if (token.type == 'eof') {
380          break;          break;
381          } else if (token.type == 'abort') {
382            log ('parse: abort');
383            logIndentLevel--;
384            return;
385        }        }
386      }      }
387    
388      log ('stop parsing');      log ('stop parsing');
389    
390        // readyState = 'interactive'
391    
392        // "When a script completes loading" rules start applying.
393    
394        while (this.scriptsExecutedSoon.length > 0 ||
395               this.scriptsExecutedAsynchronously.length > 0) {
396          // Handle "list of scripts that will execute as soon as possible".
397          while (this.scriptsExecutedSoon.length > 0) {
398            var e = this.scriptsExecutedSoon.shift ();
399      
400            // If it has completed loading
401            log ('Execute an external script not inserted by parser...');
402            executeScript (this.doc, e);
403    
404            // NOTE: It MAY be executed before the end of the parsing, according
405            // to the spec.
406            this.hasAsyncScript = true;
407          }
408    
409          // Handle "list of scripts that will execute asynchronously".
410          while (this.scriptsExecutedAsynchronously.length > 0) {
411            var e = this.scriptsExecutedAsynchronously.shift ();
412    
413            // Step 1.
414            // We assume that all scripts have been loaded at this time.
415      
416            // Step 2.
417            log ('Execute an asynchronous script...');
418            executeScript (this.doc, e);
419    
420            // Step 3.
421            //
422    
423            // Step 4.
424            //
425    
426            this.hasAsyncScript = true;
427          }
428        }
429    
430        // Handle "list of scripts that will execute when the document has finished
431        // parsing".
432        var list = this.scriptsExecutedAfterParsing;
433        while (list.length > 0) {
434          // TODO: break unless completed loading
435    
436          // Step 1.
437          //
438    
439          // Step 2. and Step 3.
440          log ('Executing a |defer|red script...');
441          executeScript (this.doc, list.shift ());
442    
443          // Step 4.
444        }
445    
446        log ('DOMContentLoaded event fired');
447    
448        // "delays the load event" things has completed:
449        // readyState = 'complete'
450        log ('load event fired');
451    
452        logIndentLevel--;
453    } // parse    } // parse
454    
455      Parser.prototype.setInsertionPoint = function (ip) {
456        if (ip == undefined || ip == null || isNaN (ip)) {
457          log ('insertion point: set to undefined');
458          this.insertionPoint = undefined;
459        } else if (ip == this.input.s.length) {
460          log ('insertion point: end of file');
461          this.insertionPoint = ip;
462        } else {
463          log ('insertion point: set to ' + ip +
464               ' (before "' + this.input.s.substring (0, 10) + '")');
465          this.insertionPoint = ip;
466        }
467      }; // setInsertionPoint
468    
469    function JSDocument (p) {    function JSDocument (p) {
470      this.childNodes = [];      this.childNodes = [];
471      this._parser = p;      this._parser = p;
# Line 186  Line 483 
483      e.parentNode = this;      e.parentNode = this;
484    
485      if (e.localName == 'script') {      if (e.localName == 'script') {
486        log ('start running a script');        logIndentLevel++;
487          log ('Running a script: start');
488    
489        var doc = this.ownerDocument;        var doc = this.ownerDocument || this;
490        var p = doc._parser;        var p = doc._parser;
491    
492        // 1. Script type        // 1. Script type
# Line 203  Line 501 
501        // 2.4. If the script element has its "already executed" flag set        // 2.4. If the script element has its "already executed" flag set
502        if (e.manakaiAlreadyExecuted) {        if (e.manakaiAlreadyExecuted) {
503          // 2.5. Abort these steps at this point.          // 2.5. Abort these steps at this point.
504          log ('running a script: aborted');          log ('Running a script: aborted');
505            logIndentLevel--;
506          return e;          return e;
507        }        }
508    
# Line 218  Line 517 
517        // 5.1.        // 5.1.
518        if (/* TODO: If the document is still being parsed && */        if (/* TODO: If the document is still being parsed && */
519            e.defer && !e.async) {            e.defer && !e.async) {
520          // TODO          p.scriptsExecutedAfterParsing.push (e);
521            log ('Running a script: aborted (defer)');
522        } else if (e.async && e.src != null) {        } else if (e.async && e.src != null) {
523          // TODO          p.scriptsExecutedAsynchronously.push (e);
524        } else if (e.async && e.src == null          log ('Running a script: aborted (async src)');
525                   /* && list of scripts that will execute asynchronously is not empty */) {        } else if (e.async && e.src == null &&
526          // TODO                   p.scriptsExecutedAsynchronously.length > 0) {
527            p.scriptsExecutedAsynchronously.push (e);
528            log ('Running a script: aborted (async)');
529            // ISSUE: What is the difference with the case above?
530        } else if (e.src != null && e.manakaiParserInserted) {        } else if (e.src != null && e.manakaiParserInserted) {
531          // TODO          if (p.scriptExecutedWhenParserResumes) {
532              log ('Error: There is a script that will execute as soon as the parser resumes.');
533            }
534            p.scriptExecutedWhenParserResumes = e;
535            log ('Running a script: aborted (src parser-inserted)');
536        } else if (e.src != null) {        } else if (e.src != null) {
537          // TODO          p.scriptsExecutedSoon.push (e);
538            log ('Running a script: aborted (src)');
539        } else {        } else {
540          executeScript (doc, e); // even if other scripts are already executing.          executeScript (doc, e); // even if other scripts are already executing.
541        }        }
542    
543        log ('end running a script');        log ('Running a script: end');
544          logIndentLevel--;
545      }      }
546    
547      return e;      return e;
# Line 241  Line 550 
550    function executeScript (doc, e) {    function executeScript (doc, e) {
551      log ('executing a script block: start');      log ('executing a script block: start');
552    
553      // If the load resulted in an error, then ... firing an error event ...      var s;
554        if (e.src != null) {
555          s = getExternalScript (e.src);
556    
557          // If the load resulted in an error, then ... firing an error event ...
558          if (s == null) {
559            log ('error event fired at the script element');
560            return;
561          }
562    
563          log ('External script loaded: "' + s + '"');
564        } else {
565          s = e.text;
566        }
567    
568      // If the load was successful      // If the load was successful
569      log ('load event fired at the script element');      log ('load event fired at the script element');
# Line 250  Line 572 
572      // Scripting is enabled, Document.designMode is disabled,      // Scripting is enabled, Document.designMode is disabled,
573      // Document is the active document in its browsing context      // Document is the active document in its browsing context
574    
       var s;  
       if (e.src != null) {  
         // TODO: from external file  
       } else {  
         s = e.text;  
       }  
   
575        parseAndRunScript (doc, s);        parseAndRunScript (doc, s);
576      }      }
577    
578      log ('executing a script block: end');      log ('executing a script block: end');
579    } // executeScript    } // executeScript
580    
581      function getExternalScript (uri) {
582        if (uri.match (/^javascript:/i)) {
583          var m;
584          if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) {
585            if (m[1]) {
586              return unescapeJSLiteral (m[1]);
587            } else if (m[2]) {
588              return unescapeJSLiteral (m[2]);
589            } else {
590              return null;
591            }
592          } else {
593            log ('Complex javascript: URI is not supported: <' + uri + '>');
594            return null;
595          }
596        } else {
597          log ('URI scheme not supported: <' + uri + '>');
598          return null;
599        }
600      } // getExternalScript
601    
602    function parseAndRunScript (doc, s) {    function parseAndRunScript (doc, s) {
603      while (true) {      while (true) {
604        var matched = false;        var matched = false;
# Line 270  Line 606 
606          matched = true;          matched = true;
607          var args = [];          var args = [];
608          t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {          t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
609            args.push (v.substring (1, v.length - 1));            args.push (unescapeJSLiteral (v.substring (1, v.length - 1)));
610            return '';            return '';
611          });          });
612          doc.write.apply (doc, args);          doc.write.apply (doc, args);
613          return '';          return '';
614        });        });
615          s = s.replace (/^\s*var\s+s\s*=\s*document\.createElement\s*\(\s*['"]script['"]\s*\)\s*;\s*s\.src\s*=\s*(?:'(javascript:[^']*)'|"(javascript:[^"]*)")\s*;\s*document\.documentElement\.appendChild\s*\(\s*s\s*\)\s*;\s*/,
616          function (s, t, u) {
617            matched = true;
618            var args = [unescapeJSLiteral (t ? t : u)];
619            doc._insertExternalScript.apply (doc, args);
620            return '';
621          });
622        if (s == '') break;        if (s == '') break;
623        if (!matched) {        if (!matched) {
624          log ('Script parse error: "' + s + '"');          log ('Script parse error: "' + s + '"');
# Line 284  Line 627 
627      }      }
628    } // parseAndRunScript    } // parseAndRunScript
629    
630      function unescapeJSLiteral (s) {
631        return s.replace (/\\u([0-9A-Fa-f]{4})/g, function (t, v) {
632          return String.fromCharCode (parseInt ('0x' + v));
633        });
634      } // unescapeJSLiteral
635    
636    function JSText (data) {    function JSText (data) {
637      this.data = data;      this.data = data;
638    } // JSText    } // JSText
# Line 299  Line 648 
648      }      }
649    }; // manakaiAppendText    }; // manakaiAppendText
650    
651      JSDocument.prototype.open = function () {
652        // Two or fewer arguments
653    
654        // Step 1.
655        var type = arguments[0] || 'text/html';
656        
657        // Step 2.
658        var replace = arguments[1] == 'replace';
659    
660        // Step 3.
661        if (this._parser &&
662            !this._parser.scriptCreated &&
663            this._parser.input.insertionPoint != undefined) {
664          log ('document.open () in parsing mode is ignored');
665          return this;
666        }
667    
668        // Step 4.
669        log ('onbeforeunload event fired');
670        log ('onunload event fired');
671    
672        // Step 5.
673        if (this._parser) {
674          // Discard the parser.
675        }
676    
677        // Step 6.
678        log ('document cleared by document.open ()');
679        this.childNodes = [];
680    
681        // Step 7.
682        this._parser = new Parser (new InputStream (''), this);
683        this._parser.scriptCreated = true;
684    
685        // Step 8.
686        this.manakaiIsHTML = true;
687    
688        // Step 9.
689        // If not text/html, ...
690    
691        // Step 10.
692        if (!replace) {
693          // History      
694        }
695    
696        // Step 11.
697        this._parser.setInsertionPoint (this._parser.input.s.length);
698    
699        // Step 12.
700        return this;
701      }; // document.open
702    
703    JSDocument.prototype.write = function () {    JSDocument.prototype.write = function () {
704        logIndentLevel++;
705    
706        var p = this._parser;
707    
708      // 1. If the insertion point is undefined, the open() method must be ...      // 1. If the insertion point is undefined, the open() method must be ...
709      //      if (isNaN (p.insertionPoint) || p.insertionPoint == undefined) {
710          this.open ();
711          p = this._parser;
712        }
713    
714      // 2. ... inserted into the input stream just before the insertion point.      // 2. ... inserted into the input stream just before the insertion point.
715      log ('document.write: insert "' + Array.join (arguments, '') + '"');      var s = Array.join (arguments, '');
716        log ('document.write: insert "' + s + '"' +
717             ' before "' +
718             p.input.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
719        p.input.s = p.input.s.substring (0, p.insertionPoint) + s
720            + p.input.s.substring (p.insertionPoint, p.input.s.length);
721        p.insertionPoint += s.length;
722    
723      // 3. If there is a script that will execute as soon as the parser resumes      // 3. If there is a script that will execute as soon as the parser resumes
724      // TODO      if (p.scriptExecutedAfterParserResumes) {
725          log ('document.write: processed later (there is an unprocessed <script src>)');
726          logIndentLevel--;
727          return;
728        }
729    
730      // 4. Process the characters that were inserted, ...      // 4. Process the characters that were inserted, ...
731        var originalReentrant = p.reentrant;
732        p.reentrant = true;
733        p.parse ();
734        p.reentrant = originalReentrant;
735        // TODO: "Abort the processing of any nested invokations of the tokeniser,
736        // yielding control back to the caller." (<script> parsing).  Do we need
737        // to do something here?
738    
739      // 5. Return      // 5. Return
740      log ('document.write: return');      log ('document.write: return');
741    
742        logIndentLevel--;
743      return;      return;
744    }; // document.write    }; // document.write
745    
746      JSDocument.prototype._insertExternalScript = function (uri) {
747        var s = new JSElement (this, 'script');
748        s.src = uri;
749        this.documentElement.appendChild (s);
750      }; // _insertExternalScript
751    
752      JSDocument.prototype.__defineGetter__ ('documentElement', function () {
753        var cn = this.childNodes;
754        for (var i = 0; i < cn.length; i++) {
755          if (cn[i] instanceof JSElement) {
756            return cn[i]
757          }
758        }
759        return null;
760      });
761    
762    JSElement.prototype.__defineGetter__ ('text', function () {    JSElement.prototype.__defineGetter__ ('text', function () {
763      var r = '';      var r = '';
764      for (var i = 0; i < this.childNodes.length; i++) {      for (var i = 0; i < this.childNodes.length; i++) {
# Line 332  Line 775 
775        var node = n.childNodes[i];        var node = n.childNodes[i];
776        if (node instanceof JSElement) {        if (node instanceof JSElement) {
777          r += '| ' + indent + node.localName + '\n';          r += '| ' + indent + node.localName + '\n';
778            if (node.async) r += '| ' + indent + '  async=""\n';
779            if (node.defer) r += '| ' + indent + '  defer=""\n';
780            if (node.src != null) {
781              r += '| ' + indent + '  src="' + node.src + '"\n';
782            }
783          r += dumpTree (node, indent + '  ');          r += dumpTree (node, indent + '  ');
784        } else if (node instanceof JSText) {        } else if (node instanceof JSText) {
785          r += '| ' + indent + '"' + node.data + '"\n';          r += '| ' + indent + '"' + node.data + '"\n';
# Line 345  Line 793 
793  </head>  </head>
794  <body onload="  <body onload="
795    document.sourceElement = document.getElementsByTagName ('textarea')[0];    document.sourceElement = document.getElementsByTagName ('textarea')[0];
796    
797      var q = location.search;
798      if (q != null) {
799        q = q.substring (1).split (/;/);
800        for (var i = 0; i < q.length; i++) {
801          var v = q[i].split (/=/, 2);
802          v[0] = decodeURIComponent (v[0]);
803          v[1] = decodeURIComponent (v[1] || '');
804          if (v[0] == 's') {
805            document.sourceElement.value = v[1];
806          }
807        }
808      }
809    
810    document.logElement = document.getElementsByTagName ('output')[0];    document.logElement = document.getElementsByTagName ('output')[0];
811    update ();    update ();
812  ">  ">
813    <h1>Live Scripting <abbr title="Hypertext Markup Language">HTML</abbr>
814    Parser</h1>
815    
816  <textarea onchange=" update () ">&lt;html>  <h2>Markup to test
817    (<a href=data:, id=permalink rel=bookmark>permalink</a>,
818    <a href="http://software.hixie.ch/utilities/js/live-dom-viewer/"
819        id=ldvlink>Live <abbr title="Document Object Model">DOM</abbr>
820        Viewer</a>)</h2>
821    <p>
822    <textarea onkeydown=" update () " onchange=" update () " oninput=" update () ">&lt;html>
823  &lt;head>&lt;/head>&lt;body>  &lt;head>&lt;/head>&lt;body>
824  &lt;p>  &lt;p>
825  &lt;script>  &lt;script>
826  document.write ('aaaaaaa&lt;/p>\n&lt;script>\ndocument.write("cccccc")\n&lt;/', 'script>\nbbbbbb');  document.write ('aaaaaaa&lt;/p>&lt;script>document.write("cccccc");&lt;/', 'script>bbbbbb');
827  &lt;/script>  &lt;/script>
828  &lt;p>  &lt;p>
829  </textarea>  </textarea>
830    
831  <output></output>  <h2 id=log>Log</h2>
832    <p><output></output>
833    
834    <h2 id=notes>Notes</h2>
835    
836    <p>This is a <em>simplified</em> implementation of
837    <a href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5
838    Parsing Algorithm</a>.  It only implements script-related part of the
839    algorithm.  Especially, this parser:
840    <ul>
841    <li>Does not support <code>DOCTYPE</code> and comment tokens.
842    <li>Does not support entities except for <code>&amp;quot;</code>,
843    <code>&amp;apos;</code>, and <code>&amp;amp;</code> in <code>script</code>
844    <code>src</code> attribute value.
845    <li>Does not support omissions of start or end tags, the <abbr>AAA</abbr>
846    algorithm, and so on.
847    <li>Does not raise parse errors for invalid attribute specifications in start
848    or end tags.
849    <li>Does not support PCDATA elements (<code>title</code> and
850    <code>textarea</code>).
851    <li>Does not strip the first newline in <code>pre</code> elements.
852    <li>Does not support <code>&lt;!--</code>..<code>--></code> parsing rule
853    in <code>script</code> element.
854    <li>Does not support foreign (SVG or MathML) elements.
855    <li>Only supports <code>script</code> <code>type</code>
856    <code>text/javascript</code>.  <code>type</code> and <code>language</code>
857    attributes are ignored.
858    <li>Only supports limited statements.  It must consist of zero or more
859    of statements looking similar to the following statements, possibly
860    introduced, followed, or separated by white space characters:
861      <ul>
862      <li><code>document.write ("<var>string</var>", ["<var>string</var>", ...]);</code>.
863      <li><code>var s = document.createElement ("script");
864                s.src = "<var>string</var>";
865                document.documentElement.appendChild (s);</code>
866      </ul>
867    Note that strings may be delimited by <code>'</code>s instead of
868    <code>"</code>s.
869    <li>Only supports <code>javascript:</code>
870    <abbr title="Uniform Resourace Identifiers">URI</abbr> scheme in the
871    <code>src</code> attribute of the <code>script</code> element.  In addition,
872    the <abbr title="Uniform Resource Identifiers">URI</abbr> must be conform to
873    the regular expression <code>^javascript:\s*(?:"[^"]*"|'[^']*')\s*$</code>.
874    <li>Only supports <code>\u<var>HHHH</var></code> escapes in JavaScript
875    string literals.
876    <li>Does not handle <i>stop parsing</i> phase correctly if the document is
877    replaced by <code>document.open ()</code> call.  In other word, delayed
878    (deferred or asynchronous) script executions and event firings might be
879    treated in a wrong way if a <code>document.open ()</code> invocation
880    is implicitly done by <code>document.write ()</code> in a delayed script.
881    </ul>
882    
883    <p>For some reason, this parser does not work in browsers that do
884    not support JavaScript 1.5.
885    
886    <!-- TODO: |src| attribute value should refer the value at the time
887    when it is inserted into the document, not the value when the script is
888    executed.  Currently it does not matter, since we don't allow dynamic
889    modification to the |src| content/DOM attribute value yet. -->
890    
891  </body>  </body>
 </html>  
892    </html>
893    <!-- $Date$ -->
894    <!--
895    
896    Copyright 2008 Wakaba <w@suika.fam.cx>
897    
898    This program is free software; you can redistribute it and/or
899    modify it under the terms of the GNU General Public License
900    as published by the Free Software Foundation; either version 2
901    of the License, or (at your option) any later version.
902    
903    This program is distributed in the hope that it will be useful,
904    but WITHOUT ANY WARRANTY; without even the implied warranty of
905    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
906    GNU General Public License for more details.
907    
908    You should have received a copy of the GNU General Public License
909    along with this program; if not, write to the Free Software
910    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
911    
912    -->

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.14

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24