/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Diff of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.13 by wakaba, Sun Apr 27 11:27:04 2008 UTC revision 1.15 by wakaba, Tue Apr 29 03:29:41 2008 UTC
# Line 65  Line 65 
65    
66    var logIndentLevel = 0;    var logIndentLevel = 0;
67    function log (s) {    function log (s) {
68        var indent = '';
69      for (var i = 0; i < logIndentLevel; i++) {      for (var i = 0; i < logIndentLevel; i++) {
70        s = '  ' + s;        indent += '  ';
71      }      }
72        s = indent + s.replace (/\n/g, "\n" + indent);
73      document.logElement.appendChild (document.createTextNode (s + "\n"));      document.logElement.appendChild (document.createTextNode (s + "\n"));
74    } // log    } // log
75    
# Line 92  Line 94 
94    Parser.prototype.getNextToken = function () {    Parser.prototype.getNextToken = function () {
95      var p = this;      var p = this;
96      var i = this.input;      var i = this.input;
97      if (this.parseMode == 'script') {      if (this.parseMode == 'cdata') {
98          var tagName = this.endTagName;
99        var token;        var token;
100        if (p.insertionPoint <= 0) {        if (p.insertionPoint <= 0) {
101          return {type: 'abort'};          return {type: 'abort'};
# Line 110  Line 113 
113          return '';          return '';
114        });        });
115        if (token) return token;        if (token) return token;
116        i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function (s) {        var pattern = new RegExp ('^</' + tagName + '>', 'i');
117          i.s = i.s.replace (pattern, function (s) {
118          if (p.insertionPoint < s.length) {          if (p.insertionPoint < s.length) {
119            token = {type: 'abort'};            token = {type: 'abort'};
120            return s;            return s;
121          }          }
122          token = {type: 'end-tag', value: 'script'};          token = {type: 'end-tag', value: tagName};
123          p.insertionPoint -= s.length;          p.insertionPoint -= s.length;
124          return '';          return '';
125        });        });
126        if (token) return token;        if (token) return token;
127        var m;        var m;
128        if ((p.insertionPoint < '</script'.length) &&        if ((p.insertionPoint < ('</' + tagName).length) &&
129            (m = i.s.match (/^<\/([SCRIPTscript]+)/))) {            (m = i.s.match (/^<\/([A-Za-z]+)/))) {
130          var v = m[1].substring (0, p.insertionPoint).toLowerCase ();          var v = m[1].substring (0, p.insertionPoint).toLowerCase ();
131          if (v == 'script'.substring (0, p.insertionPoint - '</'.length)) {          if (v == tagName.substring (0, p.insertionPoint - '</'.length)) {
132            return {type: 'abort'};            return {type: 'abort'};
133          }          }
134        }        }
# Line 229  Line 233 
233            el.manakaiParserInserted = true;            el.manakaiParserInserted = true;
234    
235            // 3. Switch the tokeniser's content model flag to the CDATA state.            // 3. Switch the tokeniser's content model flag to the CDATA state.
236            this.parseMode = 'script';            this.parseMode = 'cdata';
237              this.endTagName = 'script';
238    
239            // 4.1. Collect all the character tokens.            // 4.1. Collect all the character tokens.
240            while (true) {            while (true) {
# Line 243  Line 248 
248              // 4.2. Until it returns a token that is not a character token, or              // 4.2. Until it returns a token that is not a character token, or
249              // until it stops tokenising.              // until it stops tokenising.
250              } else if (token.type == 'eof' ||              } else if (token.type == 'eof' ||
251                         (token.type == 'end-tag' && token.value == 'script') ||                         token.type == 'end-tag' ||
252                         token.type == 'abort') {                         token.type == 'abort') {
253                // 6. Switched back to the PCDATA state.                // 6. Switched back to the PCDATA state.
254                this.parseMode = 'pcdata';                this.parseMode = 'pcdata';
255    
256                // 7.1. If the next token is not an end tag token with ...                // 7.1. If the next token is not an end tag token with ...
257                if (token.type != 'end-tag') {                if (!(token.type == 'end-tag' && token.value == 'script')) {
258                  // 7.2. This is a parse error.                  // 7.2. This is a parse error.
259                  log ('Parse error: no </' + 'script>');                  log ('Parse error: no </' + 'script>');
260    
# Line 313  Line 318 
318                //                //
319              }              }
320            }            }
321            } else if (token.value == 'style' ||
322                       token.value == 'noscript' ||
323                       token.value == 'xmp') {
324              // 1. Create an element for the token in the HTML namespace.
325              var el = new JSElement (this.doc, token.value);
326    
327              // 2. Append the new element to the current node.
328              this.openElements[this.openElements.length - 1].appendChild (el);
329    
330              // 3. Switch the tokeniser's content model flag to the CDATA state.
331              this.parseMode = 'cdata';
332              this.endTagName = token.value;
333    
334              // 4.1. Collect all the character tokens.
335              while (true) {
336                var token = this.getNextToken ();
337                log ('token: ' + token.type + ' "' + token.value + '"');
338    
339                if (token.type == 'char') {
340                  // 5. Append a single Text node to the script element node.
341                  el.manakaiAppendText (token.value);
342    
343                // 4.2. Until it returns a token that is not a character token, or
344                // until it stops tokenising.
345                } else if (token.type == 'eof' ||
346                           token.type == 'end-tag' ||
347                           token.type == 'abort') {
348                  // 6. Switched back to the PCDATA state.
349                  this.parseMode = 'pcdata';
350    
351                  // 7.1. If the next token is not an end tag token with ...
352                  if (!(token.type == 'end-tag' &&
353                        token.value == this.endTagName)) {
354                    // 7.2. This is a parse error.
355                    log ('Parse error: no </' + this.endTagName + '>');
356    
357                    // 7.3. Mark the script element as "already executed".
358                    el.manakaiAlreadyExecuted = true;
359                  } else {
360                    // 7.4. Ignore it.
361                    //
362                  }
363                  break;
364                }
365              }
366          } else {          } else {
367            var el = new JSElement (this.doc, token.value);            var el = new JSElement (this.doc, token.value);
368            this.openElements[this.openElements.length - 1].appendChild (el);            this.openElements[this.openElements.length - 1].appendChild (el);
# Line 397  Line 447 
447    
448      log ('DOMContentLoaded event fired');      log ('DOMContentLoaded event fired');
449    
450      // "delays tha load event" things has completed:      // "delays the load event" things has completed:
451      // readyState = 'complete'      // readyState = 'complete'
452      log ('load event fired');      log ('load event fired');
453    
# Line 453  Line 503 
503        // 2.4. If the script element has its "already executed" flag set        // 2.4. If the script element has its "already executed" flag set
504        if (e.manakaiAlreadyExecuted) {        if (e.manakaiAlreadyExecuted) {
505          // 2.5. Abort these steps at this point.          // 2.5. Abort these steps at this point.
506          log ('Running a script: aborted');          log ('Running a script: aborted (already executed)');
507          logIndentLevel--;          logIndentLevel--;
508          return e;          return e;
509        }        }
# Line 564  Line 614 
614          doc.write.apply (doc, args);          doc.write.apply (doc, args);
615          return '';          return '';
616        });        });
617        s = s.replace (/^\s*var\s+s\s*=\s*document\.createElement\s*\(\s*['"]script['"]\s*\)\s*;\s*s\.src\s*=\s*(?:'(javascript:[^']*)'|"(javascript:[^"]*)")\s*;\s*document\.documentElement\.appendChild\s*\(\s*s\s*\)\s*;\s*/,        var noDocumentElement = false;
618          s = s.replace (/^\s*var\s+s\s*=\s*document\.createElement\s*\(\s*['"]script['"]\s*\)\s*;\s*s\.src\s*=\s*(?:'([^']*)'|"([^"]*)")\s*;\s*document\.documentElement\.appendChild\s*\(\s*s\s*\)\s*;\s*/,
619        function (s, t, u) {        function (s, t, u) {
620          matched = true;          matched = true;
621          var args = [unescapeJSLiteral (t ? t : u)];          var args = [unescapeJSLiteral (t ? t : u)];
622          doc._insertExternalScript.apply (doc, args);          noDocumentElement = !doc._insertExternalScript.apply (doc, args);
623            return '';
624          });
625          if (noDocumentElement) {
626            log ('Script error: documentElement is null');
627            break;
628          }
629          s = s.replace (/^\s*w\s*\(\s*document\.documentElement\.innerHTML\s*\)\s*;\s*/,
630          function (s, t) {
631            matched = true;
632            log (dumpTree (doc, ''));
633          return '';          return '';
634        });        });
635        if (s == '') break;        if (s == '') break;
# Line 653  Line 714 
714    }; // document.open    }; // document.open
715    
716    JSDocument.prototype.write = function () {    JSDocument.prototype.write = function () {
717        log ('document.write: start');
718      logIndentLevel++;      logIndentLevel++;
719    
720      var p = this._parser;      var p = this._parser;
# Line 676  Line 738 
738      if (p.scriptExecutedAfterParserResumes) {      if (p.scriptExecutedAfterParserResumes) {
739        log ('document.write: processed later (there is an unprocessed <script src>)');        log ('document.write: processed later (there is an unprocessed <script src>)');
740        logIndentLevel--;        logIndentLevel--;
741          log ('document.write: return');
742        return;        return;
743      }      }
744    
# Line 689  Line 752 
752      // to do something here?      // to do something here?
753    
754      // 5. Return      // 5. Return
755        logIndentLevel--;
756      log ('document.write: return');      log ('document.write: return');
757    
     logIndentLevel--;  
758      return;      return;
759    }; // document.write    }; // document.write
760    
761    JSDocument.prototype._insertExternalScript = function (uri) {    JSDocument.prototype._insertExternalScript = function (uri) {
762      var s = new JSElement (this, 'script');      var s = new JSElement (this, 'script');
763      s.src = uri;      s.src = uri;
764      this.documentElement.appendChild (s);      if (this.documentElement) {
765          this.documentElement.appendChild (s);
766          return true;
767        } else {
768          return false;
769        }
770    }; // _insertExternalScript    }; // _insertExternalScript
771    
772    JSDocument.prototype.__defineGetter__ ('documentElement', function () {    JSDocument.prototype.__defineGetter__ ('documentElement', function () {
# Line 798  algorithm.  Especially, this parser: Line 866  algorithm.  Especially, this parser:
866  algorithm, and so on.  algorithm, and so on.
867  <li>Does not raise parse errors for invalid attribute specifications in start  <li>Does not raise parse errors for invalid attribute specifications in start
868  or end tags.  or end tags.
869  <li>Does not support CDATA/PCDATA element other than <code>script</code>.  <li>Does not support PCDATA elements (<code>title</code> and
870    <code>textarea</code>).
871    <li>Does not strip the first newline in <code>pre</code> elements.
872  <li>Does not support <code>&lt;!--</code>..<code>--></code> parsing rule  <li>Does not support <code>&lt;!--</code>..<code>--></code> parsing rule
873  in <code>script</code> element.  in <code>script</code> element.
874  <li>Does not support foreign (SVG or MathML) elements.  <li>Does not support foreign (SVG or MathML) elements.
# Line 813  introduced, followed, or separated by wh Line 883  introduced, followed, or separated by wh
883    <li><code>var s = document.createElement ("script");    <li><code>var s = document.createElement ("script");
884              s.src = "<var>string</var>";              s.src = "<var>string</var>";
885              document.documentElement.appendChild (s);</code>              document.documentElement.appendChild (s);</code>
886      <li><code>w (document.documentElement.innerHTML);</code> (This statement
887      can be used to dump the document, even when the document has no
888      document element.  The output format is the tree dump format used
889      in html5lib test data, not <abbr>HTML</abbr>.)
890    </ul>    </ul>
891  Note that strings may be delimited by <code>'</code>s instead of  Note that strings may be delimited by <code>'</code>s instead of
892  <code>"</code>s.  <code>"</code>s.

Legend:
Removed from v.1.13  
changed lines
  Added in v.1.15

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24