/[suikacvs]/markup/html/scripting-parser/parser.html

Diff of /markup/html/scripting-parser/parser.html

Parent Directory | Revision Log | View Patch Patch

-revision 1.12 by wakaba,
Sun Apr 27 11:21:09 2008 UTC
+revision 1.14 by wakaba,
Tue Apr 29 02:50:00 2008 UTC
 Line 2
  <html lang=en>
  <head>
  <title>Live Scripting HTML Parser</title>
+ <link rel=author href="http://suika.fam.cx/~wakaba/who?">
+ <link rel=license href="http://suika.fam.cx/c/gnu/gpl"
+     title="GNU GPL2 or later">
  <style>
-   h1, h2 {
+   h1 {
+     margin: 0;
+     font-size: 150%;
+   }
+   h2 {
      margin: 0;
      font-size: 100%;
    }
-   p, pre {
+   p {
-     margin: 0;
+     margin: 0 1em;
    }
    textarea {
      width: 100%;
-Line 85
+Line 92
    Parser.prototype.getNextToken = function () {
      var p = this;
      var i = this.input;
-     if (this.parseMode == 'script') {
+     if (this.parseMode == 'cdata') {
+       var tagName = this.endTagName;
        var token;
        if (p.insertionPoint <= 0) {
          return {type: 'abort'};
-Line 103
+Line 111
          return '';
        });
        if (token) return token;
-       i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function (s) {
+       var pattern = new RegExp ('^</' + tagName + '>', 'i');
+       i.s = i.s.replace (pattern, function (s) {
          if (p.insertionPoint < s.length) {
            token = {type: 'abort'};
            return s;
          }
-         token = {type: 'end-tag', value: 'script'};
+         token = {type: 'end-tag', value: tagName};
          p.insertionPoint -= s.length;
          return '';
        });
        if (token) return token;
        var m;
-       if ((p.insertionPoint < '</script'.length) &&
+       if ((p.insertionPoint < ('</' + tagName).length) &&
-           (m = i.s.match (/^<\/([SCRIPTscript]+)/))) {
+           (m = i.s.match (/^<\/([A-Za-z]+)/))) {
          var v = m[1].substring (0, p.insertionPoint).toLowerCase ();
-         if (v == 'script'.substring (0, p.insertionPoint - '</'.length)) {
+         if (v == tagName.substring (0, p.insertionPoint - '</'.length)) {
            return {type: 'abort'};
          }
        }
-Line 222
+Line 231
            el.manakaiParserInserted = true;
            // 3. Switch the tokeniser's content model flag to the CDATA state.
-           this.parseMode = 'script';
+           this.parseMode = 'cdata';
+           this.endTagName = 'script';
            // 4.1. Collect all the character tokens.
            while (true) {
-Line 236
+Line 246
              // 4.2. Until it returns a token that is not a character token, or
              // until it stops tokenising.
              } else if (token.type == 'eof' ||
-                        (token.type == 'end-tag' && token.value == 'script') ||
+                        token.type == 'end-tag' ||
                         token.type == 'abort') {
                // 6. Switched back to the PCDATA state.
                this.parseMode = 'pcdata';
                // 7.1. If the next token is not an end tag token with ...
-               if (token.type != 'end-tag') {
+               if (!(token.type == 'end-tag' && token.value == 'script')) {
                  // 7.2. This is a parse error.
                  log ('Parse error: no </' + 'script>');
-Line 306
+Line 316
                //
              }
            }
+         } else if (token.value == 'style' ||
+                    token.value == 'noscript' ||
+                    token.value == 'xmp') {
+           // 1. Create an element for the token in the HTML namespace.
+           var el = new JSElement (this.doc, token.value);
+           // 2. Append the new element to the current node.
+           this.openElements[this.openElements.length - 1].appendChild (el);
+           // 3. Switch the tokeniser's content model flag to the CDATA state.
+           this.parseMode = 'cdata';
+           this.endTagName = token.value;
+           // 4.1. Collect all the character tokens.
+           while (true) {
+             var token = this.getNextToken ();
+             log ('token: ' + token.type + ' "' + token.value + '"');
+             if (token.type == 'char') {
+               // 5. Append a single Text node to the script element node.
+               el.manakaiAppendText (token.value);
+             // 4.2. Until it returns a token that is not a character token, or
+             // until it stops tokenising.
+             } else if (token.type == 'eof' ||
+                        token.type == 'end-tag' ||
+                        token.type == 'abort') {
+               // 6. Switched back to the PCDATA state.
+               this.parseMode = 'pcdata';
+               // 7.1. If the next token is not an end tag token with ...
+               if (!(token.type == 'end-tag' &&
+                     token.value == this.endTagName)) {
+                 // 7.2. This is a parse error.
+                 log ('Parse error: no </' + this.endTagName + '>');
+                 // 7.3. Mark the script element as "already executed".
+                 el.manakaiAlreadyExecuted = true;
+               } else {
+                 // 7.4. Ignore it.
+                 //
+               }
+               break;
+             }
+           }
          } else {
            var el = new JSElement (this.doc, token.value);
            this.openElements[this.openElements.length - 1].appendChild (el);
-Line 390
+Line 445
      log ('DOMContentLoaded event fired');
-     // "delays tha load event" things has completed:
+     // "delays the load event" things has completed:
      // readyState = 'complete'
      log ('load event fired');
-Line 791 
 algorithm.  Especially, this parser:
+Line 846 
 algorithm.  Especially, this parser:
  algorithm, and so on.
  <li>Does not raise parse errors for invalid attribute specifications in start
  or end tags.
- <li>Does not support CDATA/PCDATA element other than <code>script</code>.
+ <li>Does not support PCDATA elements (<code>title</code> and
+ <code>textarea</code>).
+ <li>Does not strip the first newline in <code>pre</code> elements.
  <li>Does not support <code>&lt;!--</code>..<code>--></code> parsing rule
  in <code>script</code> element.
  <li>Does not support foreign (SVG or MathML) elements.
-Line 831 
 when it is inserted into the document, n
+Line 888 
 when it is inserted into the document, n
  executed.  Currently it does not matter, since we don't allow dynamic
  modification to the |src| content/DOM attribute value yet. -->
- <!-- TODO: license -->
  </body>
- </html>
+ </html>
+ <!-- $Date$ -->
+ <!--
+ Copyright 2008 Wakaba <w@suika.fam.cx>
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License
+ as published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ -->

 Legend:



Removed from v.1.12
 


changed lines


 
Added in v.1.14
 Legend:



Removed from v.1.12
 


changed lines


 
Added in v.1.14
-Removed from v.1.12
+Added in v.1.14

admin@suikawiki.org	ViewVC Help
Powered by ViewVC 1.1.24