/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Diff of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.11 by wakaba, Sun Apr 27 10:44:36 2008 UTC revision 1.14 by wakaba, Tue Apr 29 02:50:00 2008 UTC
# Line 2  Line 2 
2  <html lang=en>  <html lang=en>
3  <head>  <head>
4  <title>Live Scripting HTML Parser</title>  <title>Live Scripting HTML Parser</title>
5    <link rel=author href="http://suika.fam.cx/~wakaba/who?">
6    <link rel=license href="http://suika.fam.cx/c/gnu/gpl"
7        title="GNU GPL2 or later">
8  <style>  <style>
9    h1, h2 {    h1 {
10        margin: 0;
11        font-size: 150%;
12      }
13      h2 {
14      margin: 0;      margin: 0;
15      font-size: 100%;      font-size: 100%;
16    }    }
17    p, pre {    p {
18      margin: 0;      margin: 0 1em;
19    }    }
20    textarea {    textarea {
21      width: 100%;      width: 100%;
# Line 79  Line 86 
86      this.input = i;      this.input = i;
87      this.scriptsExecutedAfterParsing = [];      this.scriptsExecutedAfterParsing = [];
88      this.scriptsExecutedSoon = [];      this.scriptsExecutedSoon = [];
89        this.scriptsExecutedAsynchronously = [];
90    } // Parser    } // Parser
91    
92    Parser.prototype.getNextToken = function () {    Parser.prototype.getNextToken = function () {
93      var p = this;      var p = this;
94      var i = this.input;      var i = this.input;
95      if (this.parseMode == 'script') {      if (this.parseMode == 'cdata') {
96          var tagName = this.endTagName;
97        var token;        var token;
98        if (p.insertionPoint <= 0) {        if (p.insertionPoint <= 0) {
99          return {type: 'abort'};          return {type: 'abort'};
# Line 102  Line 111 
111          return '';          return '';
112        });        });
113        if (token) return token;        if (token) return token;
114        i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function (s) {        var pattern = new RegExp ('^</' + tagName + '>', 'i');
115          i.s = i.s.replace (pattern, function (s) {
116          if (p.insertionPoint < s.length) {          if (p.insertionPoint < s.length) {
117            token = {type: 'abort'};            token = {type: 'abort'};
118            return s;            return s;
119          }          }
120          token = {type: 'end-tag', value: 'script'};          token = {type: 'end-tag', value: tagName};
121          p.insertionPoint -= s.length;          p.insertionPoint -= s.length;
122          return '';          return '';
123        });        });
124        if (token) return token;        if (token) return token;
125        var m;        var m;
126        if ((p.insertionPoint < '</script'.length) &&        if ((p.insertionPoint < ('</' + tagName).length) &&
127            (m = i.s.match (/^<\/([SCRIPTscript]+)/))) {            (m = i.s.match (/^<\/([A-Za-z]+)/))) {
128          var v = m[1].substring (0, p.insertionPoint).toLowerCase ();          var v = m[1].substring (0, p.insertionPoint).toLowerCase ();
129          if (v == 'script'.substring (0, p.insertionPoint - '</'.length)) {          if (v == tagName.substring (0, p.insertionPoint - '</'.length)) {
130            return {type: 'abort'};            return {type: 'abort'};
131          }          }
132        }        }
# Line 221  Line 231 
231            el.manakaiParserInserted = true;            el.manakaiParserInserted = true;
232    
233            // 3. Switch the tokeniser's content model flag to the CDATA state.            // 3. Switch the tokeniser's content model flag to the CDATA state.
234            this.parseMode = 'script';            this.parseMode = 'cdata';
235              this.endTagName = 'script';
236    
237            // 4.1. Collect all the character tokens.            // 4.1. Collect all the character tokens.
238            while (true) {            while (true) {
# Line 235  Line 246 
246              // 4.2. Until it returns a token that is not a character token, or              // 4.2. Until it returns a token that is not a character token, or
247              // until it stops tokenising.              // until it stops tokenising.
248              } else if (token.type == 'eof' ||              } else if (token.type == 'eof' ||
249                         (token.type == 'end-tag' && token.value == 'script') ||                         token.type == 'end-tag' ||
250                         token.type == 'abort') {                         token.type == 'abort') {
251                // 6. Switched back to the PCDATA state.                // 6. Switched back to the PCDATA state.
252                this.parseMode = 'pcdata';                this.parseMode = 'pcdata';
253    
254                // 7.1. If the next token is not an end tag token with ...                // 7.1. If the next token is not an end tag token with ...
255                if (token.type != 'end-tag') {                if (!(token.type == 'end-tag' && token.value == 'script')) {
256                  // 7.2. This is a parse error.                  // 7.2. This is a parse error.
257                  log ('Parse error: no </' + 'script>');                  log ('Parse error: no </' + 'script>');
258    
# Line 305  Line 316 
316                //                //
317              }              }
318            }            }
319            } else if (token.value == 'style' ||
320                       token.value == 'noscript' ||
321                       token.value == 'xmp') {
322              // 1. Create an element for the token in the HTML namespace.
323              var el = new JSElement (this.doc, token.value);
324    
325              // 2. Append the new element to the current node.
326              this.openElements[this.openElements.length - 1].appendChild (el);
327    
328              // 3. Switch the tokeniser's content model flag to the CDATA state.
329              this.parseMode = 'cdata';
330              this.endTagName = token.value;
331    
332              // 4.1. Collect all the character tokens.
333              while (true) {
334                var token = this.getNextToken ();
335                log ('token: ' + token.type + ' "' + token.value + '"');
336    
337                if (token.type == 'char') {
338                  // 5. Append a single Text node to the script element node.
339                  el.manakaiAppendText (token.value);
340    
341                // 4.2. Until it returns a token that is not a character token, or
342                // until it stops tokenising.
343                } else if (token.type == 'eof' ||
344                           token.type == 'end-tag' ||
345                           token.type == 'abort') {
346                  // 6. Switched back to the PCDATA state.
347                  this.parseMode = 'pcdata';
348    
349                  // 7.1. If the next token is not an end tag token with ...
350                  if (!(token.type == 'end-tag' &&
351                        token.value == this.endTagName)) {
352                    // 7.2. This is a parse error.
353                    log ('Parse error: no </' + this.endTagName + '>');
354    
355                    // 7.3. Mark the script element as "already executed".
356                    el.manakaiAlreadyExecuted = true;
357                  } else {
358                    // 7.4. Ignore it.
359                    //
360                  }
361                  break;
362                }
363              }
364          } else {          } else {
365            var el = new JSElement (this.doc, token.value);            var el = new JSElement (this.doc, token.value);
366            this.openElements[this.openElements.length - 1].appendChild (el);            this.openElements[this.openElements.length - 1].appendChild (el);
# Line 335  Line 391 
391    
392      // "When a script completes loading" rules start applying.      // "When a script completes loading" rules start applying.
393    
394      // List of scripts that will execute as soon as possible      while (this.scriptsExecutedSoon.length > 0 ||
395      for (var i = 0; i < this.scriptsExecutedSoon.length; i++) {             this.scriptsExecutedAsynchronously.length > 0) {
396        var e = this.scriptsExecutedSoon[i];        // Handle "list of scripts that will execute as soon as possible".
397          while (this.scriptsExecutedSoon.length > 0) {
398        // If it has completed loading          var e = this.scriptsExecutedSoon.shift ();
399        log ('Execute an external script not inserted by parser...');    
400        executeScript (this.doc, e);          // If it has completed loading
401            log ('Execute an external script not inserted by parser...');
402            executeScript (this.doc, e);
403    
404            // NOTE: It MAY be executed before the end of the parsing, according
405            // to the spec.
406            this.hasAsyncScript = true;
407          }
408    
409          // Handle "list of scripts that will execute asynchronously".
410          while (this.scriptsExecutedAsynchronously.length > 0) {
411            var e = this.scriptsExecutedAsynchronously.shift ();
412    
413            // Step 1.
414            // We assume that all scripts have been loaded at this time.
415      
416            // Step 2.
417            log ('Execute an asynchronous script...');
418            executeScript (this.doc, e);
419    
420            // Step 3.
421            //
422    
423            // Step 4.
424            //
425    
426        // NOTE: It MAY be executed before the end of the parsing, according          this.hasAsyncScript = true;
427        // to the spec.        }
       this.hasAsyncScript = true;  
428      }      }
429    
     // TODO: Handles  
     // "list of scripts that will execute asynchronously"  
   
430      // Handle "list of scripts that will execute when the document has finished      // Handle "list of scripts that will execute when the document has finished
431      // parsing".      // parsing".
432      var list = this.scriptsExecutedAfterParsing;      var list = this.scriptsExecutedAfterParsing;
# Line 369  Line 445 
445    
446      log ('DOMContentLoaded event fired');      log ('DOMContentLoaded event fired');
447    
448      // "delays tha load event" things has completed:      // "delays the load event" things has completed:
449      // readyState = 'complete'      // readyState = 'complete'
450      log ('load event fired');      log ('load event fired');
451    
# Line 444  Line 520 
520          p.scriptsExecutedAfterParsing.push (e);          p.scriptsExecutedAfterParsing.push (e);
521          log ('Running a script: aborted (defer)');          log ('Running a script: aborted (defer)');
522        } else if (e.async && e.src != null) {        } else if (e.async && e.src != null) {
523          // TODO          p.scriptsExecutedAsynchronously.push (e);
524        } else if (e.async && e.src == null          log ('Running a script: aborted (async src)');
525                   /* && list of scripts that will execute asynchronously is not empty */) {        } else if (e.async && e.src == null &&
526          // TODO                   p.scriptsExecutedAsynchronously.length > 0) {
527            p.scriptsExecutedAsynchronously.push (e);
528            log ('Running a script: aborted (async)');
529            // ISSUE: What is the difference with the case above?
530        } else if (e.src != null && e.manakaiParserInserted) {        } else if (e.src != null && e.manakaiParserInserted) {
531          if (p.scriptExecutedWhenParserResumes) {          if (p.scriptExecutedWhenParserResumes) {
532            log ('Error: There is a script that will execute as soon as the parser resumes.');            log ('Error: There is a script that will execute as soon as the parser resumes.');
# Line 767  algorithm.  Especially, this parser: Line 846  algorithm.  Especially, this parser:
846  algorithm, and so on.  algorithm, and so on.
847  <li>Does not raise parse errors for invalid attribute specifications in start  <li>Does not raise parse errors for invalid attribute specifications in start
848  or end tags.  or end tags.
849  <li>Does not support CDATA/PCDATA element other than <code>script</code>.  <li>Does not support PCDATA elements (<code>title</code> and
850    <code>textarea</code>).
851    <li>Does not strip the first newline in <code>pre</code> elements.
852  <li>Does not support <code>&lt;!--</code>..<code>--></code> parsing rule  <li>Does not support <code>&lt;!--</code>..<code>--></code> parsing rule
853  in <code>script</code> element.  in <code>script</code> element.
854  <li>Does not support foreign (SVG or MathML) elements.  <li>Does not support foreign (SVG or MathML) elements.
# Line 792  the <abbr title="Uniform Resource Identi Line 873  the <abbr title="Uniform Resource Identi
873  the regular expression <code>^javascript:\s*(?:"[^"]*"|'[^']*')\s*$</code>.  the regular expression <code>^javascript:\s*(?:"[^"]*"|'[^']*')\s*$</code>.
874  <li>Only supports <code>\u<var>HHHH</var></code> escapes in JavaScript  <li>Only supports <code>\u<var>HHHH</var></code> escapes in JavaScript
875  string literals.  string literals.
876    <li>Does not handle <i>stop parsing</i> phase correctly if the document is
877    replaced by <code>document.open ()</code> call.  In other word, delayed
878    (deferred or asynchronous) script executions and event firings might be
879    treated in a wrong way if a <code>document.open ()</code> invocation
880    is implicitly done by <code>document.write ()</code> in a delayed script.
881  </ul>  </ul>
882    
883  <p>For some reason, this parser does not work in browsers that do  <p>For some reason, this parser does not work in browsers that do
884  not support JavaScript 1.5.  not support JavaScript 1.5.
885    
886  <!-- TODO: license -->  <!-- TODO: |src| attribute value should refer the value at the time
887    when it is inserted into the document, not the value when the script is
888    executed.  Currently it does not matter, since we don't allow dynamic
889    modification to the |src| content/DOM attribute value yet. -->
890    
891  </body>  </body>
 </html>  
892    </html>
893    <!-- $Date$ -->
894    <!--
895    
896    Copyright 2008 Wakaba <w@suika.fam.cx>
897    
898    This program is free software; you can redistribute it and/or
899    modify it under the terms of the GNU General Public License
900    as published by the Free Software Foundation; either version 2
901    of the License, or (at your option) any later version.
902    
903    This program is distributed in the hope that it will be useful,
904    but WITHOUT ANY WARRANTY; without even the implied warranty of
905    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
906    GNU General Public License for more details.
907    
908    You should have received a copy of the GNU General Public License
909    along with this program; if not, write to the Free Software
910    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
911    
912    -->

Legend:
Removed from v.1.11  
changed lines
  Added in v.1.14

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24