/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Diff of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1 by wakaba, Sun Apr 20 06:07:24 2008 UTC revision 1.15 by wakaba, Tue Apr 29 03:29:41 2008 UTC
# Line 1  Line 1 
1  <!DOCTYPE HTML>  <!DOCTYPE HTML>
2  <html lang=en>  <html lang=en>
3  <head>  <head>
4  <title>Demo of HTML5 Parsing Algorithm with Scripting Enabled</title>  <title>Live Scripting HTML Parser</title>
5    <link rel=author href="http://suika.fam.cx/~wakaba/who?">
6    <link rel=license href="http://suika.fam.cx/c/gnu/gpl"
7        title="GNU GPL2 or later">
8  <style>  <style>
9      h1 {
10        margin: 0;
11        font-size: 150%;
12      }
13      h2 {
14        margin: 0;
15        font-size: 100%;
16      }
17      p {
18        margin: 0 1em;
19      }
20    textarea {    textarea {
21       display: block;      width: 100%;
22       width: 80%;      -width: 99%;
23       margin-left: auto;      height: 10em;
      margin-right: auto;  
      min-height: 20em;  
24    }    }
25    output {    output {
26      display: block;      display: block;
27      font-family: monospace;      font-family: monospace;
28      white-space: pre;      white-space: -moz-pre-wrap;
29        white-space: pre-wrap;
30    }    }
31  </style>  </style>
32  <script>  <script>
33      var delayedUpdater = 0;
34    
35    function update () {    function update () {
36      document.logElement.textContent = '';      if (delayedUpdater) {
37      var p = new Parser ();        clearTimeout (delayedUpdater);
38      p.parse (new InputStream (document.sourceElement.value));        delayedUpdater = 0;
39      log (dumpTree (p.doc, ''));      }
40        delayedUpdater = setTimeout (update2, 100);
41    } // update    } // update
42    
43      function update2 () {
44        var v = document.sourceElement.value;
45        if (v != document.previousSourceText) {
46          document.previousSourceText = v;
47          document.links['permalink'].href
48              = location.pathname + '?s=' + encodeURIComponent (v);
49          document.links['ldvlink'].href
50              = 'http://software.hixie.ch/utilities/js/live-dom-viewer/?'
51              + encodeURIComponent (v);
52    
53          document.logElement.textContent = '';
54          var p = new Parser (new InputStream (v));
55          var doc = p.doc;
56          p.parse ();
57          
58          log (dumpTree (doc, ''));
59          
60          if (p.hasAsyncScript) {
61            log ('Some script codes are executed asynchronously; it means that the document might be rendered in different ways depending on the network condition and other factors');
62          }
63        }
64      } // update2
65    
66      var logIndentLevel = 0;
67    function log (s) {    function log (s) {
68        var indent = '';
69        for (var i = 0; i < logIndentLevel; i++) {
70          indent += '  ';
71        }
72        s = indent + s.replace (/\n/g, "\n" + indent);
73      document.logElement.appendChild (document.createTextNode (s + "\n"));      document.logElement.appendChild (document.createTextNode (s + "\n"));
74    } // log    } // log
75    
# Line 32  Line 77 
77      this.s = s;      this.s = s;
78    } // InputStream    } // InputStream
79    
80    function Parser () {    function Parser (i, doc) {
81      this.parseMode = 'pcdata';      this.parseMode = 'pcdata';
82      this.doc = new JSDocument ();      if (!doc) {
83      this.openElements = [this.doc];        doc = new JSDocument (this);
84          doc.manakaiIsHTML = true;
85        }
86        this.doc = doc;
87        this.openElements = [doc];
88        this.input = i;
89        this.scriptsExecutedAfterParsing = [];
90        this.scriptsExecutedSoon = [];
91        this.scriptsExecutedAsynchronously = [];
92    } // Parser    } // Parser
93    
94    Parser.prototype.getNextToken = function (i) {    Parser.prototype.getNextToken = function () {
95      if (this.parseMode == 'script') {      var p = this;
96        var i = this.input;
97        if (this.parseMode == 'cdata') {
98          var tagName = this.endTagName;
99        var token;        var token;
100        i.s = i.s.replace (/^([\s\S]+?)<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/,        if (p.insertionPoint <= 0) {
101            return {type: 'abort'};
102          }
103          i.s = i.s.replace (/^([^<]+)/,
104        function (s, t) {        function (s, t) {
105            if (0 < p.insertionPoint && p.insertionPoint < t.length) {
106              token = {type: 'char', value: t.substring (0, p.insertionPoint)};
107              var ip = p.insertionPoint;
108              p.insertionPoint = 0;
109              return t.substring (ip, t.length);
110            }
111          token = {type: 'char', value: t};          token = {type: 'char', value: t};
112          return '<' + '/script>';          p.insertionPoint -= t.length;
113            return '';
114        });        });
115        if (token) return token;        if (token) return token;
116        i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function () {        var pattern = new RegExp ('^</' + tagName + '>', 'i');
117          token = {type: 'end-tag', value: 'script'};        i.s = i.s.replace (pattern, function (s) {
118            if (p.insertionPoint < s.length) {
119              token = {type: 'abort'};
120              return s;
121            }
122            token = {type: 'end-tag', value: tagName};
123            p.insertionPoint -= s.length;
124            return '';
125          });
126          if (token) return token;
127          var m;
128          if ((p.insertionPoint < ('</' + tagName).length) &&
129              (m = i.s.match (/^<\/([A-Za-z]+)/))) {
130            var v = m[1].substring (0, p.insertionPoint).toLowerCase ();
131            if (v == tagName.substring (0, p.insertionPoint - '</'.length)) {
132              return {type: 'abort'};
133            }
134          }
135          i.s = i.s.replace (/^</,
136          function (s) {
137            token = {type: 'char', value: s};
138            p.insertionPoint -= s.length;
139          return '';          return '';
140        });        });
141        if (token) return token;        if (token) return token;
# Line 56  Line 143 
143      }      }
144    
145      var token;      var token;
146      i.s = i.s.replace (/^<\/([^>]+)>/, function (s, e) {      i.s = i.s.replace (/^<\/([^>]+)(?:>|$)/, function (s, e) {
147          if (p.insertionPoint < s.length ||
148              (p.insertionPoint <= s.length &&
149               s.substring (s.length - 1, 1) != '>')) {
150            token = {type: 'abort'};
151            return s;
152          }
153        token = {type: 'end-tag', value: e.toLowerCase ()};        token = {type: 'end-tag', value: e.toLowerCase ()};
154          p.insertionPoint -= s.length;
155        return '';        return '';
156      });      });
157      if (token) return token;      if (token) return token;
158      i.s = i.s.replace (/^<([^>]+)>/, function (s, e) {      i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) {
159        token = {type: 'start-tag', value: e.toLowerCase ()};        if (p.insertionPoint < s.length ||
160              (p.insertionPoint <= s.length &&
161               s.substring (s.length - 1, 1) != '>')) {
162            token = {type: 'abort'};
163            return s;
164          }
165          var tagName;
166          var attrs = {};
167          e = e.replace (/^[\S]+/, function (v) {
168            tagName = v.toLowerCase ();
169            return '';
170          });
171          while (true) {
172            var m = false;
173            e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"'\s]*)))?/,
174            function (x, attrName, attrValue1, attrValue2, attrValue3) {
175              v = attrValue1 || attrValue2 || attrValue3;
176              v = v.replace (/&quot;/g, '"').replace (/&apos;/g, "'")
177                  .replace (/&amp;/g, '&');
178              attrs[attrName.toLowerCase ()] = v;
179              m = true;
180              return '';
181            });
182            if (!m) break;
183          }
184          if (e.length) {
185            log ('Broken start tag: "' + e + '"');
186          }
187          token = {type: 'start-tag', value: tagName, attrs: attrs};
188          p.insertionPoint -= s.length;
189        return '';        return '';
190      });      });
191      if (token) return token;      if (token) return token;
192        if (p.insertionPoint <= 0) {
193          return {type: 'abort'};
194        }
195      i.s = i.s.replace (/^[^<]+/, function (s) {      i.s = i.s.replace (/^[^<]+/, function (s) {
196          if (p.insertionPoint < s.length) {
197            token = {type: 'char', value: s.substring (0, p.insertionPoint)};
198            var ip = p.insertionPoint;
199            p.insertionPoint = 0;
200            return s.substring (ip, s.length);
201          }
202        token = {type: 'char', value: s};        token = {type: 'char', value: s};
203          p.insertionPoint -= s.length;
204        return '';        return '';
205      });      });
206      if (token) return token;      if (token) return token;
207      i.s = i.s.replace (/^[\s\S]/, function (s) {      i.s = i.s.replace (/^[\s\S]/, function (s) {
208        token = {type: 'char', value: s};        token = {type: 'char', value: s};
209          p.insertionPoint -= s.length;
210        return '';        return '';
211      });      });
212      if (token) return token;      if (token) return token;
213      return {type: 'eof'};      return {type: 'eof'};
214    } // getNextToken    } // getNextToken
215    
216    Parser.prototype.parse = function (i) {    Parser.prototype.parse = function () {
217      log ('start parsing');      logIndentLevel++;
218        log ('parse: start');
219    
220      while (true) {      while (true) {
221        var token = this.getNextToken (i);        var token = this.getNextToken ();
222        log ('token: ' + token.type + ' "' + token.value + '"');        log ('token: ' + token.type + ' "' + token.value + '"');
223    
224        if (token.type == 'start-tag') {        if (token.type == 'start-tag') {
         var el = new JSElement (token.value);  
225          if (token.value == 'script') {          if (token.value == 'script') {
226            this.parseMode = 'script';            // 1. Create an element for the token in the HTML namespace.
227              var el = new JSElement (this.doc, token.value);
228              if (token.attrs.async != null) el.async = true;
229              if (token.attrs.defer != null) el.defer = true;
230              if (token.attrs.src != null) el.src = token.attrs.src;
231    
232              // 2. Mark the element as being "parser-inserted".
233              el.manakaiParserInserted = true;
234    
235              // 3. Switch the tokeniser's content model flag to the CDATA state.
236              this.parseMode = 'cdata';
237              this.endTagName = 'script';
238    
239              // 4.1. Collect all the character tokens.
240            while (true) {            while (true) {
241              var token = this.getNextToken (i);              var token = this.getNextToken ();
242              log ('token: ' + token.type + ' "' + token.value + '"');              log ('token: ' + token.type + ' "' + token.value + '"');
243    
244              if (token.type == 'char') {              if (token.type == 'char') {
245                  // 5. Append a single Text node to the script element node.
246                el.manakaiAppendText (token.value);                el.manakaiAppendText (token.value);
247    
248                // 4.2. Until it returns a token that is not a character token, or
249                // until it stops tokenising.
250              } else if (token.type == 'eof' ||              } else if (token.type == 'eof' ||
251                         (token.type == 'end-tag' && token.value == 'script')) {                         token.type == 'end-tag' ||
252                           token.type == 'abort') {
253                  // 6. Switched back to the PCDATA state.
254                this.parseMode = 'pcdata';                this.parseMode = 'pcdata';
255    
256                  // 7.1. If the next token is not an end tag token with ...
257                  if (!(token.type == 'end-tag' && token.value == 'script')) {
258                    // 7.2. This is a parse error.
259                    log ('Parse error: no </' + 'script>');
260    
261                    // 7.3. Mark the script element as "already executed".
262                    el.manakaiAlreadyExecuted = true;
263                  } else {
264                    // 7.4. Ignore it.
265                    //
266                  }
267                break;                break;
268              }              }
269            }            }
270    
271              // 8.1. If the parser were originally created for the ...
272              if (this.fragmentParsingMode) {
273                // 8.2. Mark the script element as "already executed" and ...
274                el.alreadyExecuted = true;
275                continue;
276              }
277    
278              // 9.1. Let the old insertion point have the same value as the ...
279              var oldInsertionPoint = this.insertionPoint;
280              // 9.2. Let the insertion point be just before the next input ...
281              this.setInsertionPoint (0);
282    
283              // 10. Append the new element to the current node.
284              this.openElements[this.openElements.length - 1].appendChild (el);
285    
286              // 11. Let the insertion point have the value of the old ...
287    
288              oldInsertionPoint += this.insertionPoint;
289              this.setInsertionPoint (oldInsertionPoint);
290    
291              // 12. If there is a script that will execute as soon as ...
292              while (this.scriptExecutedWhenParserResumes) {
293                // 12.1. If the tree construction stage is being called reentrantly
294                if (this.reentrant) {
295                  log ('parse: abort (reentrance)');
296                  logIndentLevel--;
297                  return;
298    
299                // 12.2. Otherwise
300                } else {
301                  // 1.
302                  var script = this.scriptExecutedWhenParserResumes;
303                  this.scriptExecutedWhenParserResumes = null;
304    
305                  // 2. Pause until the script has completed loading.
306                  //
307    
308                  // 3. Let the insertion point to just before the next input char.
309                  this.setInsertionPoint (0);
310    
311                  // 4. Execute the script.
312                  executeScript (this.doc, script);
313    
314                  // 5. Let the insertion point be undefined again.
315                  this.setInsertionPoint (undefined);
316    
317                  // 6. If there is once again a script that will execute ...
318                  //
319                }
320              }
321            } else if (token.value == 'style' ||
322                       token.value == 'noscript' ||
323                       token.value == 'xmp') {
324              // 1. Create an element for the token in the HTML namespace.
325              var el = new JSElement (this.doc, token.value);
326    
327              // 2. Append the new element to the current node.
328            this.openElements[this.openElements.length - 1].appendChild (el);            this.openElements[this.openElements.length - 1].appendChild (el);
329    
330              // 3. Switch the tokeniser's content model flag to the CDATA state.
331              this.parseMode = 'cdata';
332              this.endTagName = token.value;
333    
334              // 4.1. Collect all the character tokens.
335              while (true) {
336                var token = this.getNextToken ();
337                log ('token: ' + token.type + ' "' + token.value + '"');
338    
339                if (token.type == 'char') {
340                  // 5. Append a single Text node to the script element node.
341                  el.manakaiAppendText (token.value);
342    
343                // 4.2. Until it returns a token that is not a character token, or
344                // until it stops tokenising.
345                } else if (token.type == 'eof' ||
346                           token.type == 'end-tag' ||
347                           token.type == 'abort') {
348                  // 6. Switched back to the PCDATA state.
349                  this.parseMode = 'pcdata';
350    
351                  // 7.1. If the next token is not an end tag token with ...
352                  if (!(token.type == 'end-tag' &&
353                        token.value == this.endTagName)) {
354                    // 7.2. This is a parse error.
355                    log ('Parse error: no </' + this.endTagName + '>');
356    
357                    // 7.3. Mark the script element as "already executed".
358                    el.manakaiAlreadyExecuted = true;
359                  } else {
360                    // 7.4. Ignore it.
361                    //
362                  }
363                  break;
364                }
365              }
366          } else {          } else {
367              var el = new JSElement (this.doc, token.value);
368            this.openElements[this.openElements.length - 1].appendChild (el);            this.openElements[this.openElements.length - 1].appendChild (el);
369            this.openElements.push (el);            this.openElements.push (el);
370          }          }
# Line 116  Line 375 
375          } else {          } else {
376            log ('parse error: unmatched end tag: ' + token.value);            log ('parse error: unmatched end tag: ' + token.value);
377          }          }
378          } else if (token.type == 'char') {
379            this.openElements[this.openElements.length - 1].manakaiAppendText
380                (token.value);
381        } else if (token.type == 'eof') {        } else if (token.type == 'eof') {
382          break;          break;
383          } else if (token.type == 'abort') {
384            log ('parse: abort');
385            logIndentLevel--;
386            return;
387        }        }
388      }      }
389    
390      log ('stop parsing');      log ('stop parsing');
391    
392        // readyState = 'interactive'
393    
394        // "When a script completes loading" rules start applying.
395    
396        while (this.scriptsExecutedSoon.length > 0 ||
397               this.scriptsExecutedAsynchronously.length > 0) {
398          // Handle "list of scripts that will execute as soon as possible".
399          while (this.scriptsExecutedSoon.length > 0) {
400            var e = this.scriptsExecutedSoon.shift ();
401      
402            // If it has completed loading
403            log ('Execute an external script not inserted by parser...');
404            executeScript (this.doc, e);
405    
406            // NOTE: It MAY be executed before the end of the parsing, according
407            // to the spec.
408            this.hasAsyncScript = true;
409          }
410    
411          // Handle "list of scripts that will execute asynchronously".
412          while (this.scriptsExecutedAsynchronously.length > 0) {
413            var e = this.scriptsExecutedAsynchronously.shift ();
414    
415            // Step 1.
416            // We assume that all scripts have been loaded at this time.
417      
418            // Step 2.
419            log ('Execute an asynchronous script...');
420            executeScript (this.doc, e);
421    
422            // Step 3.
423            //
424    
425            // Step 4.
426            //
427    
428            this.hasAsyncScript = true;
429          }
430        }
431    
432        // Handle "list of scripts that will execute when the document has finished
433        // parsing".
434        var list = this.scriptsExecutedAfterParsing;
435        while (list.length > 0) {
436          // TODO: break unless completed loading
437    
438          // Step 1.
439          //
440    
441          // Step 2. and Step 3.
442          log ('Executing a |defer|red script...');
443          executeScript (this.doc, list.shift ());
444    
445          // Step 4.
446        }
447    
448        log ('DOMContentLoaded event fired');
449    
450        // "delays the load event" things has completed:
451        // readyState = 'complete'
452        log ('load event fired');
453    
454        logIndentLevel--;
455    } // parse    } // parse
456    
457    function JSDocument () {    Parser.prototype.setInsertionPoint = function (ip) {
458        if (ip == undefined || ip == null || isNaN (ip)) {
459          log ('insertion point: set to undefined');
460          this.insertionPoint = undefined;
461        } else if (ip == this.input.s.length) {
462          log ('insertion point: end of file');
463          this.insertionPoint = ip;
464        } else {
465          log ('insertion point: set to ' + ip +
466               ' (before "' + this.input.s.substring (0, 10) + '")');
467          this.insertionPoint = ip;
468        }
469      }; // setInsertionPoint
470    
471      function JSDocument (p) {
472      this.childNodes = [];      this.childNodes = [];
473        this._parser = p;
474    } // JSDocument    } // JSDocument
475    
476    function JSElement (localName) {    function JSElement (doc, localName) {
477      this.localName = localName;      this.localName = localName;
478        this.ownerDocument = doc;
479      this.childNodes = [];      this.childNodes = [];
480    } // JSElement    } // JSElement
481    
# Line 137  Line 483 
483    function (e) {    function (e) {
484      this.childNodes.push (e);      this.childNodes.push (e);
485      e.parentNode = this;      e.parentNode = this;
486    
487        if (e.localName == 'script') {
488          logIndentLevel++;
489          log ('Running a script: start');
490    
491          var doc = this.ownerDocument || this;
492          var p = doc._parser;
493    
494          // 1. Script type
495          //
496    
497          // 2.1. If scripting is disabled
498          //
499          // 2.2. If the script element was created by an XML ... innerHTML ...
500          //
501          // 2.3. If the user agent does not support the scripting language ...
502          //
503          // 2.4. If the script element has its "already executed" flag set
504          if (e.manakaiAlreadyExecuted) {
505            // 2.5. Abort these steps at this point.
506            log ('Running a script: aborted (already executed)');
507            logIndentLevel--;
508            return e;
509          }
510    
511          // 3. Set the element's "already executed" flag.
512          e.manakaiAlreadyExecuted = true;
513    
514          // 4. If the element has a src attribute, then a load for ...
515          // TODO: load an external resource
516    
517          // 5. The first of the following options:
518    
519          // 5.1.
520          if (/* TODO: If the document is still being parsed && */
521              e.defer && !e.async) {
522            p.scriptsExecutedAfterParsing.push (e);
523            log ('Running a script: aborted (defer)');
524          } else if (e.async && e.src != null) {
525            p.scriptsExecutedAsynchronously.push (e);
526            log ('Running a script: aborted (async src)');
527          } else if (e.async && e.src == null &&
528                     p.scriptsExecutedAsynchronously.length > 0) {
529            p.scriptsExecutedAsynchronously.push (e);
530            log ('Running a script: aborted (async)');
531            // ISSUE: What is the difference with the case above?
532          } else if (e.src != null && e.manakaiParserInserted) {
533            if (p.scriptExecutedWhenParserResumes) {
534              log ('Error: There is a script that will execute as soon as the parser resumes.');
535            }
536            p.scriptExecutedWhenParserResumes = e;
537            log ('Running a script: aborted (src parser-inserted)');
538          } else if (e.src != null) {
539            p.scriptsExecutedSoon.push (e);
540            log ('Running a script: aborted (src)');
541          } else {
542            executeScript (doc, e); // even if other scripts are already executing.
543          }
544    
545          log ('Running a script: end');
546          logIndentLevel--;
547        }
548    
549      return e;      return e;
550    }; // appendChild    }; // appendChild
551    
552      function executeScript (doc, e) {
553        log ('executing a script block: start');
554    
555        var s;
556        if (e.src != null) {
557          s = getExternalScript (e.src);
558    
559          // If the load resulted in an error, then ... firing an error event ...
560          if (s == null) {
561            log ('error event fired at the script element');
562            return;
563          }
564    
565          log ('External script loaded: "' + s + '"');
566        } else {
567          s = e.text;
568        }
569    
570        // If the load was successful
571        log ('load event fired at the script element');
572    
573        if (true) {
574        // Scripting is enabled, Document.designMode is disabled,
575        // Document is the active document in its browsing context
576    
577          parseAndRunScript (doc, s);
578        }
579    
580        log ('executing a script block: end');
581      } // executeScript
582    
583      function getExternalScript (uri) {
584        if (uri.match (/^javascript:/i)) {
585          var m;
586          if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) {
587            if (m[1]) {
588              return unescapeJSLiteral (m[1]);
589            } else if (m[2]) {
590              return unescapeJSLiteral (m[2]);
591            } else {
592              return null;
593            }
594          } else {
595            log ('Complex javascript: URI is not supported: <' + uri + '>');
596            return null;
597          }
598        } else {
599          log ('URI scheme not supported: <' + uri + '>');
600          return null;
601        }
602      } // getExternalScript
603    
604      function parseAndRunScript (doc, s) {
605        while (true) {
606          var matched = false;
607          s = s.replace (/^\s*document\.write\s*\(((?:'[^']*'|"[^"]*")\s*(?:,\s*(?:'[^']*'|"[^"]*"))*)\)\s*;\s*/, function (s, t) {
608            matched = true;
609            var args = [];
610            t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
611              args.push (unescapeJSLiteral (v.substring (1, v.length - 1)));
612              return '';
613            });
614            doc.write.apply (doc, args);
615            return '';
616          });
617          var noDocumentElement = false;
618          s = s.replace (/^\s*var\s+s\s*=\s*document\.createElement\s*\(\s*['"]script['"]\s*\)\s*;\s*s\.src\s*=\s*(?:'([^']*)'|"([^"]*)")\s*;\s*document\.documentElement\.appendChild\s*\(\s*s\s*\)\s*;\s*/,
619          function (s, t, u) {
620            matched = true;
621            var args = [unescapeJSLiteral (t ? t : u)];
622            noDocumentElement = !doc._insertExternalScript.apply (doc, args);
623            return '';
624          });
625          if (noDocumentElement) {
626            log ('Script error: documentElement is null');
627            break;
628          }
629          s = s.replace (/^\s*w\s*\(\s*document\.documentElement\.innerHTML\s*\)\s*;\s*/,
630          function (s, t) {
631            matched = true;
632            log (dumpTree (doc, ''));
633            return '';
634          });
635          if (s == '') break;
636          if (!matched) {
637            log ('Script parse error: "' + s + '"');
638            break;
639          }
640        }
641      } // parseAndRunScript
642    
643      function unescapeJSLiteral (s) {
644        return s.replace (/\\u([0-9A-Fa-f]{4})/g, function (t, v) {
645          return String.fromCharCode (parseInt ('0x' + v));
646        });
647      } // unescapeJSLiteral
648    
649    function JSText (data) {    function JSText (data) {
650      this.data = data;      this.data = data;
651    } // JSText    } // JSText
# Line 155  Line 661 
661      }      }
662    }; // manakaiAppendText    }; // manakaiAppendText
663    
664      JSDocument.prototype.open = function () {
665        // Two or fewer arguments
666    
667        // Step 1.
668        var type = arguments[0] || 'text/html';
669        
670        // Step 2.
671        var replace = arguments[1] == 'replace';
672    
673        // Step 3.
674        if (this._parser &&
675            !this._parser.scriptCreated &&
676            this._parser.input.insertionPoint != undefined) {
677          log ('document.open () in parsing mode is ignored');
678          return this;
679        }
680    
681        // Step 4.
682        log ('onbeforeunload event fired');
683        log ('onunload event fired');
684    
685        // Step 5.
686        if (this._parser) {
687          // Discard the parser.
688        }
689    
690        // Step 6.
691        log ('document cleared by document.open ()');
692        this.childNodes = [];
693    
694        // Step 7.
695        this._parser = new Parser (new InputStream (''), this);
696        this._parser.scriptCreated = true;
697    
698        // Step 8.
699        this.manakaiIsHTML = true;
700    
701        // Step 9.
702        // If not text/html, ...
703    
704        // Step 10.
705        if (!replace) {
706          // History      
707        }
708    
709        // Step 11.
710        this._parser.setInsertionPoint (this._parser.input.s.length);
711    
712        // Step 12.
713        return this;
714      }; // document.open
715    
716      JSDocument.prototype.write = function () {
717        log ('document.write: start');
718        logIndentLevel++;
719    
720        var p = this._parser;
721    
722        // 1. If the insertion point is undefined, the open() method must be ...
723        if (isNaN (p.insertionPoint) || p.insertionPoint == undefined) {
724          this.open ();
725          p = this._parser;
726        }
727    
728        // 2. ... inserted into the input stream just before the insertion point.
729        var s = Array.join (arguments, '');
730        log ('document.write: insert "' + s + '"' +
731             ' before "' +
732             p.input.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
733        p.input.s = p.input.s.substring (0, p.insertionPoint) + s
734            + p.input.s.substring (p.insertionPoint, p.input.s.length);
735        p.insertionPoint += s.length;
736    
737        // 3. If there is a script that will execute as soon as the parser resumes
738        if (p.scriptExecutedAfterParserResumes) {
739          log ('document.write: processed later (there is an unprocessed <script src>)');
740          logIndentLevel--;
741          log ('document.write: return');
742          return;
743        }
744    
745        // 4. Process the characters that were inserted, ...
746        var originalReentrant = p.reentrant;
747        p.reentrant = true;
748        p.parse ();
749        p.reentrant = originalReentrant;
750        // TODO: "Abort the processing of any nested invokations of the tokeniser,
751        // yielding control back to the caller." (<script> parsing).  Do we need
752        // to do something here?
753    
754        // 5. Return
755        logIndentLevel--;
756        log ('document.write: return');
757    
758        return;
759      }; // document.write
760    
761      JSDocument.prototype._insertExternalScript = function (uri) {
762        var s = new JSElement (this, 'script');
763        s.src = uri;
764        if (this.documentElement) {
765          this.documentElement.appendChild (s);
766          return true;
767        } else {
768          return false;
769        }
770      }; // _insertExternalScript
771    
772      JSDocument.prototype.__defineGetter__ ('documentElement', function () {
773        var cn = this.childNodes;
774        for (var i = 0; i < cn.length; i++) {
775          if (cn[i] instanceof JSElement) {
776            return cn[i]
777          }
778        }
779        return null;
780      });
781    
782      JSElement.prototype.__defineGetter__ ('text', function () {
783        var r = '';
784        for (var i = 0; i < this.childNodes.length; i++) {
785          if (this.childNodes[i] instanceof JSText) {
786            r += this.childNodes[i].data;
787          }
788        }
789        return r;
790      });
791    
792    function dumpTree (n, indent) {    function dumpTree (n, indent) {
793      var r = '';      var r = '';
794      for (var i = 0; i < n.childNodes.length; i++) {      for (var i = 0; i < n.childNodes.length; i++) {
795        var node = n.childNodes[i];        var node = n.childNodes[i];
796        if (node instanceof JSElement) {        if (node instanceof JSElement) {
797          r += '| ' + indent + node.localName + '\n';          r += '| ' + indent + node.localName + '\n';
798            if (node.async) r += '| ' + indent + '  async=""\n';
799            if (node.defer) r += '| ' + indent + '  defer=""\n';
800            if (node.src != null) {
801              r += '| ' + indent + '  src="' + node.src + '"\n';
802            }
803          r += dumpTree (node, indent + '  ');          r += dumpTree (node, indent + '  ');
804        } else if (node instanceof JSText) {        } else if (node instanceof JSText) {
805          r += '| ' + indent + '"' + node.data + '"\n';          r += '| ' + indent + '"' + node.data + '"\n';
# Line 174  Line 813 
813  </head>  </head>
814  <body onload="  <body onload="
815    document.sourceElement = document.getElementsByTagName ('textarea')[0];    document.sourceElement = document.getElementsByTagName ('textarea')[0];
816    
817      var q = location.search;
818      if (q != null) {
819        q = q.substring (1).split (/;/);
820        for (var i = 0; i < q.length; i++) {
821          var v = q[i].split (/=/, 2);
822          v[0] = decodeURIComponent (v[0]);
823          v[1] = decodeURIComponent (v[1] || '');
824          if (v[0] == 's') {
825            document.sourceElement.value = v[1];
826          }
827        }
828      }
829    
830    document.logElement = document.getElementsByTagName ('output')[0];    document.logElement = document.getElementsByTagName ('output')[0];
831    update ();    update ();
832  ">  ">
833    <h1>Live Scripting <abbr title="Hypertext Markup Language">HTML</abbr>
834    Parser</h1>
835    
836  <textarea onchange=" update () ">&lt;html>  <h2>Markup to test
837    (<a href=data:, id=permalink rel=bookmark>permalink</a>,
838    <a href="http://software.hixie.ch/utilities/js/live-dom-viewer/"
839        id=ldvlink>Live <abbr title="Document Object Model">DOM</abbr>
840        Viewer</a>)</h2>
841    <p>
842    <textarea onkeydown=" update () " onchange=" update () " oninput=" update () ">&lt;html>
843  &lt;head>&lt;/head>&lt;body>  &lt;head>&lt;/head>&lt;body>
844  &lt;p>  &lt;p>
845  &lt;script>  &lt;script>
846  document.write ('aaaaaaa&lt;/p>\n&lt;script>\ndocument.write("cccccc")\n&lt;/', 'script>\nbbbbbb');  document.write ('aaaaaaa&lt;/p>&lt;script>document.write("cccccc");&lt;/', 'script>bbbbbb');
847  &lt;/script>  &lt;/script>
848  &lt;p>  &lt;p>
849  </textarea>  </textarea>
850    
851  <output></output>  <h2 id=log>Log</h2>
852    <p><output></output>
853    
854    <h2 id=notes>Notes</h2>
855    
856    <p>This is a <em>simplified</em> implementation of
857    <a href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5
858    Parsing Algorithm</a>.  It only implements script-related part of the
859    algorithm.  Especially, this parser:
860    <ul>
861    <li>Does not support <code>DOCTYPE</code> and comment tokens.
862    <li>Does not support entities except for <code>&amp;quot;</code>,
863    <code>&amp;apos;</code>, and <code>&amp;amp;</code> in <code>script</code>
864    <code>src</code> attribute value.
865    <li>Does not support omissions of start or end tags, the <abbr>AAA</abbr>
866    algorithm, and so on.
867    <li>Does not raise parse errors for invalid attribute specifications in start
868    or end tags.
869    <li>Does not support PCDATA elements (<code>title</code> and
870    <code>textarea</code>).
871    <li>Does not strip the first newline in <code>pre</code> elements.
872    <li>Does not support <code>&lt;!--</code>..<code>--></code> parsing rule
873    in <code>script</code> element.
874    <li>Does not support foreign (SVG or MathML) elements.
875    <li>Only supports <code>script</code> <code>type</code>
876    <code>text/javascript</code>.  <code>type</code> and <code>language</code>
877    attributes are ignored.
878    <li>Only supports limited statements.  It must consist of zero or more
879    of statements looking similar to the following statements, possibly
880    introduced, followed, or separated by white space characters:
881      <ul>
882      <li><code>document.write ("<var>string</var>", ["<var>string</var>", ...]);</code>.
883      <li><code>var s = document.createElement ("script");
884                s.src = "<var>string</var>";
885                document.documentElement.appendChild (s);</code>
886      <li><code>w (document.documentElement.innerHTML);</code> (This statement
887      can be used to dump the document, even when the document has no
888      document element.  The output format is the tree dump format used
889      in html5lib test data, not <abbr>HTML</abbr>.)
890      </ul>
891    Note that strings may be delimited by <code>'</code>s instead of
892    <code>"</code>s.
893    <li>Only supports <code>javascript:</code>
894    <abbr title="Uniform Resourace Identifiers">URI</abbr> scheme in the
895    <code>src</code> attribute of the <code>script</code> element.  In addition,
896    the <abbr title="Uniform Resource Identifiers">URI</abbr> must be conform to
897    the regular expression <code>^javascript:\s*(?:"[^"]*"|'[^']*')\s*$</code>.
898    <li>Only supports <code>\u<var>HHHH</var></code> escapes in JavaScript
899    string literals.
900    <li>Does not handle <i>stop parsing</i> phase correctly if the document is
901    replaced by <code>document.open ()</code> call.  In other word, delayed
902    (deferred or asynchronous) script executions and event firings might be
903    treated in a wrong way if a <code>document.open ()</code> invocation
904    is implicitly done by <code>document.write ()</code> in a delayed script.
905    </ul>
906    
907    <p>For some reason, this parser does not work in browsers that do
908    not support JavaScript 1.5.
909    
910    <!-- TODO: |src| attribute value should refer the value at the time
911    when it is inserted into the document, not the value when the script is
912    executed.  Currently it does not matter, since we don't allow dynamic
913    modification to the |src| content/DOM attribute value yet. -->
914    
915  </body>  </body>
 </html>  
916    </html>
917    <!-- $Date$ -->
918    <!--
919    
920    Copyright 2008 Wakaba <w@suika.fam.cx>
921    
922    This program is free software; you can redistribute it and/or
923    modify it under the terms of the GNU General Public License
924    as published by the Free Software Foundation; either version 2
925    of the License, or (at your option) any later version.
926    
927    This program is distributed in the hope that it will be useful,
928    but WITHOUT ANY WARRANTY; without even the implied warranty of
929    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
930    GNU General Public License for more details.
931    
932    You should have received a copy of the GNU General Public License
933    along with this program; if not, write to the Free Software
934    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
935    
936    -->

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.15

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24