--- markup/html/scripting-parser/parser.html 2008/04/27 11:27:04 1.13
+++ markup/html/scripting-parser/parser.html 2010/06/20 03:39:12 1.21
@@ -65,9 +65,11 @@
var logIndentLevel = 0;
function log (s) {
+ var indent = '';
for (var i = 0; i < logIndentLevel; i++) {
- s = ' ' + s;
+ indent += ' ';
}
+ s = indent + s.replace (/\n/g, "\n" + indent);
document.logElement.appendChild (document.createTextNode (s + "\n"));
} // log
@@ -81,6 +83,7 @@
doc = new JSDocument (this);
doc.manakaiIsHTML = true;
}
+ this.nextToken = [];
this.doc = doc;
this.openElements = [doc];
this.input = i;
@@ -90,9 +93,14 @@
} // Parser
Parser.prototype.getNextToken = function () {
+ if (this.nextToken.length) {
+ return this.nextToken.shift ();
+ }
+
var p = this;
var i = this.input;
- if (this.parseMode == 'script') {
+ if (this.parseMode == 'cdata') {
+ var tagName = this.endTagName;
var token;
if (p.insertionPoint <= 0) {
return {type: 'abort'};
@@ -110,21 +118,22 @@
return '';
});
if (token) return token;
- i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function (s) {
+ var pattern = new RegExp ('^' + tagName + '>', 'i');
+ i.s = i.s.replace (pattern, function (s) {
if (p.insertionPoint < s.length) {
token = {type: 'abort'};
return s;
}
- token = {type: 'end-tag', value: 'script'};
+ token = {type: 'end-tag', value: tagName};
p.insertionPoint -= s.length;
return '';
});
if (token) return token;
var m;
- if ((p.insertionPoint < ']+)(?:>|$)/, function (s, e) {
if (p.insertionPoint < s.length ||
(p.insertionPoint <= s.length &&
- s.substring (s.length - 1, 1) != '>')) {
+ s.substring (s.length - 1, s.length) != '>')) {
token = {type: 'abort'};
return s;
}
@@ -154,7 +163,7 @@
i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) {
if (p.insertionPoint < s.length ||
(p.insertionPoint <= s.length &&
- s.substring (s.length - 1, 1) != '>')) {
+ s.substring (s.length - 1, s.length) != '>')) {
token = {type: 'abort'};
return s;
}
@@ -217,6 +226,24 @@
var token = this.getNextToken ();
log ('token: ' + token.type + ' "' + token.value + '"');
+ if (this.cdataEndTagRequired) {
+ // Generic CDATA parsing algorithm
+
+ if (token.type != 'abort') {
+ // 7.
+ if (token.type == 'end-tag' && token.value == this.endTagName) {
+ // 7.1. Ignores it.
+ //
+ } else {
+ // 7.2. Parse error.
+ log ('Parse error: no ' + this.endTagName + '>');
+ this.nextToken.unshift (token);
+ }
+ this.cdataEndTagRequired = false;
+ continue;
+ }
+ }
+
if (token.type == 'start-tag') {
if (token.value == 'script') {
// 1. Create an element for the token in the HTML namespace.
@@ -229,7 +256,8 @@
el.manakaiParserInserted = true;
// 3. Switch the tokeniser's content model flag to the CDATA state.
- this.parseMode = 'script';
+ this.parseMode = 'cdata';
+ this.endTagName = 'script';
// 4.1. Collect all the character tokens.
while (true) {
@@ -243,15 +271,16 @@
// 4.2. Until it returns a token that is not a character token, or
// until it stops tokenising.
} else if (token.type == 'eof' ||
- (token.type == 'end-tag' && token.value == 'script') ||
+ token.type == 'end-tag' ||
token.type == 'abort') {
// 6. Switched back to the PCDATA state.
this.parseMode = 'pcdata';
// 7.1. If the next token is not an end tag token with ...
- if (token.type != 'end-tag') {
+ if (!(token.type == 'end-tag' && token.value == 'script')) {
// 7.2. This is a parse error.
log ('Parse error: no ' + 'script>');
+ this.nextToken.unshift (token);
// 7.3. Mark the script element as "already executed".
el.manakaiAlreadyExecuted = true;
@@ -266,7 +295,7 @@
// 8.1. If the parser were originally created for the ...
if (this.fragmentParsingMode) {
// 8.2. Mark the script element as "already executed" and ...
- el.alreadyExecuted = true;
+ el.manakaiAlreadyExecuted = true;
continue;
}
@@ -283,8 +312,8 @@
oldInsertionPoint += this.insertionPoint;
this.setInsertionPoint (oldInsertionPoint);
- // 12. If there is a script that will execute as soon as ...
- while (this.scriptExecutedWhenParserResumes) {
+ // 12. If there is a pending external script
+ while (this.pendingExternalScript) {
// 12.1. If the tree construction stage is being called reentrantly
if (this.reentrant) {
log ('parse: abort (reentrance)');
@@ -294,8 +323,8 @@
// 12.2. Otherwise
} else {
// 1.
- var script = this.scriptExecutedWhenParserResumes;
- this.scriptExecutedWhenParserResumes = null;
+ var script = this.pendingExternalScript;
+ this.pendingExternalScript = null;
// 2. Pause until the script has completed loading.
//
@@ -313,6 +342,57 @@
//
}
}
+ } else if (token.value == 'style' ||
+ token.value == 'noscript' ||
+ token.value == 'xmp') {
+ // 1. Create an element for the token in the HTML namespace.
+ var el = new JSElement (this.doc, token.value);
+
+ // 2. Append the new element to the current node.
+ this.openElements[this.openElements.length - 1].appendChild (el);
+
+ // 3. Switch the tokeniser's content model flag to the CDATA state.
+ this.parseMode = 'cdata';
+ this.endTagName = token.value;
+
+ // 4.1. Collect all the character tokens.
+ while (true) {
+ var token = this.getNextToken ();
+ log ('token: ' + token.type + ' "' + token.value + '"');
+
+ if (token.type == 'char') {
+ // 5. Append a single Text node to the script element node.
+ el.manakaiAppendText (token.value);
+
+ // 4.2. Until it returns a token that is not a character token, or
+ // until it stops tokenising.
+ } else if (token.type == 'eof' ||
+ token.type == 'end-tag' ||
+ token.type == 'abort') {
+ // 6. Switched back to the PCDATA state.
+ this.parseMode = 'pcdata';
+
+ if (token.type == 'abort') {
+ this.cdataEndTagRequired = true;
+ break;
+ }
+
+ // 7.1. If the next token is not an end tag token with ...
+ if (!(token.type == 'end-tag' &&
+ token.value == this.endTagName)) {
+ // 7.2. This is a parse error.
+ log ('Parse error: no ' + this.endTagName + '>');
+ this.nextToken.unshift (token);
+
+ // 7.3. Mark the script element as "already executed".
+ el.manakaiAlreadyExecuted = true;
+ } else {
+ // 7.4. Ignore it.
+ //
+ }
+ break;
+ }
+ }
} else {
var el = new JSElement (this.doc, token.value);
this.openElements[this.openElements.length - 1].appendChild (el);
@@ -397,7 +477,7 @@
log ('DOMContentLoaded event fired');
- // "delays tha load event" things has completed:
+ // "delays the load event" things has completed:
// readyState = 'complete'
log ('load event fired');
@@ -441,54 +521,60 @@
var doc = this.ownerDocument || this;
var p = doc._parser;
- // 1. Script type
+ // 1.The script's type
+ //
+
+ // 2. The cript's character encoding
//
- // 2.1. If scripting is disabled
+ // 3.1. If without script
//
// 2.2. If the script element was created by an XML ... innerHTML ...
//
// 2.3. If the user agent does not support the scripting language ...
//
- // 2.4. If the script element has its "already executed" flag set
- if (e.manakaiAlreadyExecuted) {
+ if (false) {
// 2.5. Abort these steps at this point.
- log ('Running a script: aborted');
+ log ('Running a script: aborted (noscript)');
logIndentLevel--;
return e;
}
- // 3. Set the element's "already executed" flag.
+ // 4. Set the element's "already executed" flag.
e.manakaiAlreadyExecuted = true;
- // 4. If the element has a src attribute, then a load for ...
+ // 5. If the element has a src attribute, then a load for ...
// TODO: load an external resource
// 5. The first of the following options:
- // 5.1.
if (/* TODO: If the document is still being parsed && */
e.defer && !e.async) {
+ // 6.1.
p.scriptsExecutedAfterParsing.push (e);
log ('Running a script: aborted (defer)');
} else if (e.async && e.src != null) {
+ // 6.2.
p.scriptsExecutedAsynchronously.push (e);
log ('Running a script: aborted (async src)');
} else if (e.async && e.src == null &&
p.scriptsExecutedAsynchronously.length > 0) {
+ // 6.3.
p.scriptsExecutedAsynchronously.push (e);
log ('Running a script: aborted (async)');
- // ISSUE: What is the difference with the case above?
} else if (e.src != null && e.manakaiParserInserted) {
- if (p.scriptExecutedWhenParserResumes) {
- log ('Error: There is a script that will execute as soon as the parser resumes.');
+ // 6.4.
+ if (p.pendingExternalScript) {
+ log ('Error: There is a pending external script.');
}
- p.scriptExecutedWhenParserResumes = e;
+ p.pendingExternalScript = e;
log ('Running a script: aborted (src parser-inserted)');
} else if (e.src != null) {
+ // 6.5.
p.scriptsExecutedSoon.push (e);
log ('Running a script: aborted (src)');
} else {
+ // 6.6.
executeScript (doc, e); // even if other scripts are already executing.
}
@@ -518,7 +604,6 @@
}
// If the load was successful
- log ('load event fired at the script element');
if (true) {
// Scripting is enabled, Document.designMode is disabled,
@@ -527,6 +612,8 @@
parseAndRunScript (doc, s);
}
+ log ('load event fired at the script element');
+
log ('executing a script block: end');
} // executeScript
@@ -564,11 +651,22 @@
doc.write.apply (doc, args);
return '';
});
- s = s.replace (/^\s*var\s+s\s*=\s*document\.createElement\s*\(\s*['"]script['"]\s*\)\s*;\s*s\.src\s*=\s*(?:'(javascript:[^']*)'|"(javascript:[^"]*)")\s*;\s*document\.documentElement\.appendChild\s*\(\s*s\s*\)\s*;\s*/,
+ var noDocumentElement = false;
+ s = s.replace (/^\s*var\s+s\s*=\s*document\.createElement\s*\(\s*['"]script['"]\s*\)\s*;\s*s\.src\s*=\s*(?:'([^']*)'|"([^"]*)")\s*;\s*document\.documentElement\.appendChild\s*\(\s*s\s*\)\s*;\s*/,
function (s, t, u) {
matched = true;
var args = [unescapeJSLiteral (t ? t : u)];
- doc._insertExternalScript.apply (doc, args);
+ noDocumentElement = !doc._insertExternalScript.apply (doc, args);
+ return '';
+ });
+ if (noDocumentElement) {
+ log ('Script error: documentElement is null');
+ break;
+ }
+ s = s.replace (/^\s*w\s*\(\s*document\.documentElement\.innerHTML\s*\)\s*;\s*/,
+ function (s, t) {
+ matched = true;
+ log (dumpTree (doc, ''));
return '';
});
if (s == '') break;
@@ -653,6 +751,7 @@
}; // document.open
JSDocument.prototype.write = function () {
+ log ('document.write: start');
logIndentLevel++;
var p = this._parser;
@@ -672,10 +771,11 @@
+ p.input.s.substring (p.insertionPoint, p.input.s.length);
p.insertionPoint += s.length;
- // 3. If there is a script that will execute as soon as the parser resumes
- if (p.scriptExecutedAfterParserResumes) {
+ // 3. If there is a pending external script
+ if (p.pendingExternalScript) {
log ('document.write: processed later (there is an unprocessed