--- markup/html/scripting-parser/parser.html 2008/04/27 11:27:04 1.13 +++ markup/html/scripting-parser/parser.html 2008/08/31 09:50:49 1.19 @@ -65,9 +65,11 @@ var logIndentLevel = 0; function log (s) { + var indent = ''; for (var i = 0; i < logIndentLevel; i++) { - s = ' ' + s; + indent += ' '; } + s = indent + s.replace (/\n/g, "\n" + indent); document.logElement.appendChild (document.createTextNode (s + "\n")); } // log @@ -81,6 +83,7 @@ doc = new JSDocument (this); doc.manakaiIsHTML = true; } + this.nextToken = []; this.doc = doc; this.openElements = [doc]; this.input = i; @@ -90,9 +93,14 @@ } // Parser Parser.prototype.getNextToken = function () { + if (this.nextToken.length) { + return this.nextToken.shift (); + } + var p = this; var i = this.input; - if (this.parseMode == 'script') { + if (this.parseMode == 'cdata') { + var tagName = this.endTagName; var token; if (p.insertionPoint <= 0) { return {type: 'abort'}; @@ -110,21 +118,22 @@ return ''; }); if (token) return token; - i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function (s) { + var pattern = new RegExp ('^', 'i'); + i.s = i.s.replace (pattern, function (s) { if (p.insertionPoint < s.length) { token = {type: 'abort'}; return s; } - token = {type: 'end-tag', value: 'script'}; + token = {type: 'end-tag', value: tagName}; p.insertionPoint -= s.length; return ''; }); if (token) return token; var m; - if ((p.insertionPoint < ']+)(?:>|$)/, function (s, e) { if (p.insertionPoint < s.length || (p.insertionPoint <= s.length && - s.substring (s.length - 1, 1) != '>')) { + s.substring (s.length - 1, s.length) != '>')) { token = {type: 'abort'}; return s; } @@ -154,7 +163,7 @@ i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) { if (p.insertionPoint < s.length || (p.insertionPoint <= s.length && - s.substring (s.length - 1, 1) != '>')) { + s.substring (s.length - 1, s.length) != '>')) { token = {type: 'abort'}; return s; } @@ -217,6 +226,24 @@ var token = this.getNextToken (); log ('token: ' + token.type + ' "' + token.value + '"'); + if (this.cdataEndTagRequired) { + // Generic CDATA parsing algorithm + + if (token.type != 'abort') { + // 7. + if (token.type == 'end-tag' && token.value == this.endTagName) { + // 7.1. Ignores it. + // + } else { + // 7.2. Parse error. + log ('Parse error: no '); + this.nextToken.unshift (token); + } + this.cdataEndTagRequired = false; + continue; + } + } + if (token.type == 'start-tag') { if (token.value == 'script') { // 1. Create an element for the token in the HTML namespace. @@ -229,7 +256,8 @@ el.manakaiParserInserted = true; // 3. Switch the tokeniser's content model flag to the CDATA state. - this.parseMode = 'script'; + this.parseMode = 'cdata'; + this.endTagName = 'script'; // 4.1. Collect all the character tokens. while (true) { @@ -243,15 +271,16 @@ // 4.2. Until it returns a token that is not a character token, or // until it stops tokenising. } else if (token.type == 'eof' || - (token.type == 'end-tag' && token.value == 'script') || + token.type == 'end-tag' || token.type == 'abort') { // 6. Switched back to the PCDATA state. this.parseMode = 'pcdata'; // 7.1. If the next token is not an end tag token with ... - if (token.type != 'end-tag') { + if (!(token.type == 'end-tag' && token.value == 'script')) { // 7.2. This is a parse error. log ('Parse error: no '); + this.nextToken.unshift (token); // 7.3. Mark the script element as "already executed". el.manakaiAlreadyExecuted = true; @@ -283,8 +312,8 @@ oldInsertionPoint += this.insertionPoint; this.setInsertionPoint (oldInsertionPoint); - // 12. If there is a script that will execute as soon as ... - while (this.scriptExecutedWhenParserResumes) { + // 12. If there is a pending external script + while (this.pendingExternalScript) { // 12.1. If the tree construction stage is being called reentrantly if (this.reentrant) { log ('parse: abort (reentrance)'); @@ -294,8 +323,8 @@ // 12.2. Otherwise } else { // 1. - var script = this.scriptExecutedWhenParserResumes; - this.scriptExecutedWhenParserResumes = null; + var script = this.pendingExternalScript; + this.pendingExternalScript = null; // 2. Pause until the script has completed loading. // @@ -313,6 +342,57 @@ // } } + } else if (token.value == 'style' || + token.value == 'noscript' || + token.value == 'xmp') { + // 1. Create an element for the token in the HTML namespace. + var el = new JSElement (this.doc, token.value); + + // 2. Append the new element to the current node. + this.openElements[this.openElements.length - 1].appendChild (el); + + // 3. Switch the tokeniser's content model flag to the CDATA state. + this.parseMode = 'cdata'; + this.endTagName = token.value; + + // 4.1. Collect all the character tokens. + while (true) { + var token = this.getNextToken (); + log ('token: ' + token.type + ' "' + token.value + '"'); + + if (token.type == 'char') { + // 5. Append a single Text node to the script element node. + el.manakaiAppendText (token.value); + + // 4.2. Until it returns a token that is not a character token, or + // until it stops tokenising. + } else if (token.type == 'eof' || + token.type == 'end-tag' || + token.type == 'abort') { + // 6. Switched back to the PCDATA state. + this.parseMode = 'pcdata'; + + if (token.type == 'abort') { + this.cdataEndTagRequired = true; + break; + } + + // 7.1. If the next token is not an end tag token with ... + if (!(token.type == 'end-tag' && + token.value == this.endTagName)) { + // 7.2. This is a parse error. + log ('Parse error: no '); + this.nextToken.unshift (token); + + // 7.3. Mark the script element as "already executed". + el.manakaiAlreadyExecuted = true; + } else { + // 7.4. Ignore it. + // + } + break; + } + } } else { var el = new JSElement (this.doc, token.value); this.openElements[this.openElements.length - 1].appendChild (el); @@ -397,7 +477,7 @@ log ('DOMContentLoaded event fired'); - // "delays tha load event" things has completed: + // "delays the load event" things has completed: // readyState = 'complete' log ('load event fired'); @@ -453,7 +533,7 @@ // 2.4. If the script element has its "already executed" flag set if (e.manakaiAlreadyExecuted) { // 2.5. Abort these steps at this point. - log ('Running a script: aborted'); + log ('Running a script: aborted (already executed)'); logIndentLevel--; return e; } @@ -480,10 +560,10 @@ log ('Running a script: aborted (async)'); // ISSUE: What is the difference with the case above? } else if (e.src != null && e.manakaiParserInserted) { - if (p.scriptExecutedWhenParserResumes) { - log ('Error: There is a script that will execute as soon as the parser resumes.'); + if (p.pendingExternalScript) { + log ('Error: There is a pending external script.'); } - p.scriptExecutedWhenParserResumes = e; + p.pendingExternalScript = e; log ('Running a script: aborted (src parser-inserted)'); } else if (e.src != null) { p.scriptsExecutedSoon.push (e); @@ -518,7 +598,6 @@ } // If the load was successful - log ('load event fired at the script element'); if (true) { // Scripting is enabled, Document.designMode is disabled, @@ -527,6 +606,8 @@ parseAndRunScript (doc, s); } + log ('load event fired at the script element'); + log ('executing a script block: end'); } // executeScript @@ -564,11 +645,22 @@ doc.write.apply (doc, args); return ''; }); - s = s.replace (/^\s*var\s+s\s*=\s*document\.createElement\s*\(\s*['"]script['"]\s*\)\s*;\s*s\.src\s*=\s*(?:'(javascript:[^']*)'|"(javascript:[^"]*)")\s*;\s*document\.documentElement\.appendChild\s*\(\s*s\s*\)\s*;\s*/, + var noDocumentElement = false; + s = s.replace (/^\s*var\s+s\s*=\s*document\.createElement\s*\(\s*['"]script['"]\s*\)\s*;\s*s\.src\s*=\s*(?:'([^']*)'|"([^"]*)")\s*;\s*document\.documentElement\.appendChild\s*\(\s*s\s*\)\s*;\s*/, function (s, t, u) { matched = true; var args = [unescapeJSLiteral (t ? t : u)]; - doc._insertExternalScript.apply (doc, args); + noDocumentElement = !doc._insertExternalScript.apply (doc, args); + return ''; + }); + if (noDocumentElement) { + log ('Script error: documentElement is null'); + break; + } + s = s.replace (/^\s*w\s*\(\s*document\.documentElement\.innerHTML\s*\)\s*;\s*/, + function (s, t) { + matched = true; + log (dumpTree (doc, '')); return ''; }); if (s == '') break; @@ -653,6 +745,7 @@ }; // document.open JSDocument.prototype.write = function () { + log ('document.write: start'); logIndentLevel++; var p = this._parser; @@ -672,10 +765,11 @@ + p.input.s.substring (p.insertionPoint, p.input.s.length); p.insertionPoint += s.length; - // 3. If there is a script that will execute as soon as the parser resumes - if (p.scriptExecutedAfterParserResumes) { + // 3. If there is a pending external script + if (p.pendingExternalScript) { log ('document.write: processed later (there is an unprocessed