--- markup/html/scripting-parser/parser.html 2008/04/25 13:42:51 1.6
+++ markup/html/scripting-parser/parser.html 2008/08/31 09:46:14 1.18
@@ -1,14 +1,26 @@
-Demo of HTML5 Parsing Algorithm with Scripting Enabled
+Live Scripting HTML Parser
+
+
]+)(?:>|$)/, function (s, e) {
if (p.insertionPoint < s.length ||
(p.insertionPoint <= s.length &&
- s.substring (s.length - 1, 1) != '>')) {
+ s.substring (s.length - 1, s.length) != '>')) {
token = {type: 'abort'};
return s;
}
@@ -115,7 +163,7 @@
i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) {
if (p.insertionPoint < s.length ||
(p.insertionPoint <= s.length &&
- s.substring (s.length - 1, 1) != '>')) {
+ s.substring (s.length - 1, s.length) != '>')) {
token = {type: 'abort'};
return s;
}
@@ -125,14 +173,19 @@
tagName = v.toLowerCase ();
return '';
});
- e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"']+)))?/,
- function (x, attrName, attrValue1, attrValue2, attrValue3) {
- v = attrValue1 || attrValue2 || attrValue3;
- v = v.replace (/"/g, '"').replace (/'/g, "'")
- .replace (/&/g, '&');
- attrs[attrName.toLowerCase ()] = v;
- return '';
- });
+ while (true) {
+ var m = false;
+ e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"'\s]*)))?/,
+ function (x, attrName, attrValue1, attrValue2, attrValue3) {
+ v = attrValue1 || attrValue2 || attrValue3;
+ v = v.replace (/"/g, '"').replace (/'/g, "'")
+ .replace (/&/g, '&');
+ attrs[attrName.toLowerCase ()] = v;
+ m = true;
+ return '';
+ });
+ if (!m) break;
+ }
if (e.length) {
log ('Broken start tag: "' + e + '"');
}
@@ -173,6 +226,24 @@
var token = this.getNextToken ();
log ('token: ' + token.type + ' "' + token.value + '"');
+ if (this.cdataEndTagRequired) {
+ // Generic CDATA parsing algorithm
+
+ if (token.type != 'abort') {
+ // 7.
+ if (token.type == 'end-tag' && token.value == this.endTagName) {
+ // 7.1. Ignores it.
+ //
+ } else {
+ // 7.2. Parse error.
+ log ('Parse error: no ' + this.endTagName + '>');
+ this.nextToken.unshift (token);
+ }
+ this.cdataEndTagRequired = false;
+ continue;
+ }
+ }
+
if (token.type == 'start-tag') {
if (token.value == 'script') {
// 1. Create an element for the token in the HTML namespace.
@@ -185,7 +256,8 @@
el.manakaiParserInserted = true;
// 3. Switch the tokeniser's content model flag to the CDATA state.
- this.parseMode = 'script';
+ this.parseMode = 'cdata';
+ this.endTagName = 'script';
// 4.1. Collect all the character tokens.
while (true) {
@@ -199,15 +271,16 @@
// 4.2. Until it returns a token that is not a character token, or
// until it stops tokenising.
} else if (token.type == 'eof' ||
- (token.type == 'end-tag' && token.value == 'script') ||
+ token.type == 'end-tag' ||
token.type == 'abort') {
// 6. Switched back to the PCDATA state.
this.parseMode = 'pcdata';
// 7.1. If the next token is not an end tag token with ...
- if (token.type != 'end-tag') {
+ if (!(token.type == 'end-tag' && token.value == 'script')) {
// 7.2. This is a parse error.
log ('Parse error: no ' + 'script>');
+ this.nextToken.unshift (token);
// 7.3. Mark the script element as "already executed".
el.manakaiAlreadyExecuted = true;
@@ -235,11 +308,12 @@
this.openElements[this.openElements.length - 1].appendChild (el);
// 11. Let the insertion point have the value of the old ...
+
oldInsertionPoint += this.insertionPoint;
this.setInsertionPoint (oldInsertionPoint);
- // 12. If there is a script that will execute as soon as ...
- while (this.scriptExecutedWhenParserResumes) {
+ // 12. If there is a pending external script
+ while (this.pendingExternalScript) {
// 12.1. If the tree construction stage is being called reentrantly
if (this.reentrant) {
log ('parse: abort (reentrance)');
@@ -249,8 +323,8 @@
// 12.2. Otherwise
} else {
// 1.
- var script = this.scriptExecutedWhenParserResumes;
- this.scriptExecutedWhenParserResumes = null;
+ var script = this.pendingExternalScript;
+ this.pendingExternalScript = null;
// 2. Pause until the script has completed loading.
//
@@ -268,6 +342,57 @@
//
}
}
+ } else if (token.value == 'style' ||
+ token.value == 'noscript' ||
+ token.value == 'xmp') {
+ // 1. Create an element for the token in the HTML namespace.
+ var el = new JSElement (this.doc, token.value);
+
+ // 2. Append the new element to the current node.
+ this.openElements[this.openElements.length - 1].appendChild (el);
+
+ // 3. Switch the tokeniser's content model flag to the CDATA state.
+ this.parseMode = 'cdata';
+ this.endTagName = token.value;
+
+ // 4.1. Collect all the character tokens.
+ while (true) {
+ var token = this.getNextToken ();
+ log ('token: ' + token.type + ' "' + token.value + '"');
+
+ if (token.type == 'char') {
+ // 5. Append a single Text node to the script element node.
+ el.manakaiAppendText (token.value);
+
+ // 4.2. Until it returns a token that is not a character token, or
+ // until it stops tokenising.
+ } else if (token.type == 'eof' ||
+ token.type == 'end-tag' ||
+ token.type == 'abort') {
+ // 6. Switched back to the PCDATA state.
+ this.parseMode = 'pcdata';
+
+ if (token.type == 'abort') {
+ this.cdataEndTagRequired = true;
+ break;
+ }
+
+ // 7.1. If the next token is not an end tag token with ...
+ if (!(token.type == 'end-tag' &&
+ token.value == this.endTagName)) {
+ // 7.2. This is a parse error.
+ log ('Parse error: no ' + this.endTagName + '>');
+ this.nextToken.unshift (token);
+
+ // 7.3. Mark the script element as "already executed".
+ el.manakaiAlreadyExecuted = true;
+ } else {
+ // 7.4. Ignore it.
+ //
+ }
+ break;
+ }
+ }
} else {
var el = new JSElement (this.doc, token.value);
this.openElements[this.openElements.length - 1].appendChild (el);
@@ -298,8 +423,41 @@
// "When a script completes loading" rules start applying.
- // TODO: Handles "list of scripts that will execute as soon as possible"
- // and "list of scripts that will execute asynchronously"
+ while (this.scriptsExecutedSoon.length > 0 ||
+ this.scriptsExecutedAsynchronously.length > 0) {
+ // Handle "list of scripts that will execute as soon as possible".
+ while (this.scriptsExecutedSoon.length > 0) {
+ var e = this.scriptsExecutedSoon.shift ();
+
+ // If it has completed loading
+ log ('Execute an external script not inserted by parser...');
+ executeScript (this.doc, e);
+
+ // NOTE: It MAY be executed before the end of the parsing, according
+ // to the spec.
+ this.hasAsyncScript = true;
+ }
+
+ // Handle "list of scripts that will execute asynchronously".
+ while (this.scriptsExecutedAsynchronously.length > 0) {
+ var e = this.scriptsExecutedAsynchronously.shift ();
+
+ // Step 1.
+ // We assume that all scripts have been loaded at this time.
+
+ // Step 2.
+ log ('Execute an asynchronous script...');
+ executeScript (this.doc, e);
+
+ // Step 3.
+ //
+
+ // Step 4.
+ //
+
+ this.hasAsyncScript = true;
+ }
+ }
// Handle "list of scripts that will execute when the document has finished
// parsing".
@@ -319,7 +477,7 @@
log ('DOMContentLoaded event fired');
- // "delays tha load event" things has completed:
+ // "delays the load event" things has completed:
// readyState = 'complete'
log ('load event fired');
@@ -330,12 +488,12 @@
if (ip == undefined || ip == null || isNaN (ip)) {
log ('insertion point: set to undefined');
this.insertionPoint = undefined;
- } else if (ip == this.in.s.length) {
+ } else if (ip == this.input.s.length) {
log ('insertion point: end of file');
this.insertionPoint = ip;
} else {
log ('insertion point: set to ' + ip +
- ' (before "' + this.in.s.substring (0, 10) + '")');
+ ' (before "' + this.input.s.substring (0, 10) + '")');
this.insertionPoint = ip;
}
}; // setInsertionPoint
@@ -375,7 +533,7 @@
// 2.4. If the script element has its "already executed" flag set
if (e.manakaiAlreadyExecuted) {
// 2.5. Abort these steps at this point.
- log ('Running a script: aborted');
+ log ('Running a script: aborted (already executed)');
logIndentLevel--;
return e;
}
@@ -394,18 +552,22 @@
p.scriptsExecutedAfterParsing.push (e);
log ('Running a script: aborted (defer)');
} else if (e.async && e.src != null) {
- // TODO
- } else if (e.async && e.src == null
- /* && list of scripts that will execute asynchronously is not empty */) {
- // TODO
+ p.scriptsExecutedAsynchronously.push (e);
+ log ('Running a script: aborted (async src)');
+ } else if (e.async && e.src == null &&
+ p.scriptsExecutedAsynchronously.length > 0) {
+ p.scriptsExecutedAsynchronously.push (e);
+ log ('Running a script: aborted (async)');
+ // ISSUE: What is the difference with the case above?
} else if (e.src != null && e.manakaiParserInserted) {
- if (p.scriptExecutedWhenParserResumes) {
- log ('Error: There is a script that will execute as soon as the parser resumes.');
+ if (p.pendingExternalScript) {
+ log ('Error: There is a pending external script.');
}
- p.scriptExecutedWhenParserResumes = e;
- log ('Running a script: aborted (src)');
+ p.pendingExternalScript = e;
+ log ('Running a script: aborted (src parser-inserted)');
} else if (e.src != null) {
- // TODO
+ p.scriptsExecutedSoon.push (e);
+ log ('Running a script: aborted (src)');
} else {
executeScript (doc, e); // even if other scripts are already executing.
}
@@ -436,7 +598,6 @@
}
// If the load was successful
- log ('load event fired at the script element');
if (true) {
// Scripting is enabled, Document.designMode is disabled,
@@ -445,6 +606,8 @@
parseAndRunScript (doc, s);
}
+ log ('load event fired at the script element');
+
log ('executing a script block: end');
} // executeScript
@@ -453,9 +616,9 @@
var m;
if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) {
if (m[1]) {
- return m[1];
+ return unescapeJSLiteral (m[1]);
} else if (m[2]) {
- return m[2];
+ return unescapeJSLiteral (m[2]);
} else {
return null;
}
@@ -476,12 +639,30 @@
matched = true;
var args = [];
t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
- args.push (v.substring (1, v.length - 1));
+ args.push (unescapeJSLiteral (v.substring (1, v.length - 1)));
return '';
});
doc.write.apply (doc, args);
return '';
});
+ var noDocumentElement = false;
+ s = s.replace (/^\s*var\s+s\s*=\s*document\.createElement\s*\(\s*['"]script['"]\s*\)\s*;\s*s\.src\s*=\s*(?:'([^']*)'|"([^"]*)")\s*;\s*document\.documentElement\.appendChild\s*\(\s*s\s*\)\s*;\s*/,
+ function (s, t, u) {
+ matched = true;
+ var args = [unescapeJSLiteral (t ? t : u)];
+ noDocumentElement = !doc._insertExternalScript.apply (doc, args);
+ return '';
+ });
+ if (noDocumentElement) {
+ log ('Script error: documentElement is null');
+ break;
+ }
+ s = s.replace (/^\s*w\s*\(\s*document\.documentElement\.innerHTML\s*\)\s*;\s*/,
+ function (s, t) {
+ matched = true;
+ log (dumpTree (doc, ''));
+ return '';
+ });
if (s == '') break;
if (!matched) {
log ('Script parse error: "' + s + '"');
@@ -490,6 +671,12 @@
}
} // parseAndRunScript
+ function unescapeJSLiteral (s) {
+ return s.replace (/\\u([0-9A-Fa-f]{4})/g, function (t, v) {
+ return String.fromCharCode (parseInt ('0x' + v));
+ });
+ } // unescapeJSLiteral
+
function JSText (data) {
this.data = data;
} // JSText
@@ -517,7 +704,7 @@
// Step 3.
if (this._parser &&
!this._parser.scriptCreated &&
- this._parser.in.insertionPoint != undefined) {
+ this._parser.input.insertionPoint != undefined) {
log ('document.open () in parsing mode is ignored');
return this;
}
@@ -551,13 +738,14 @@
}
// Step 11.
- this._parser.setInsertionPoint (this._parser.in.s.length);
+ this._parser.setInsertionPoint (this._parser.input.s.length);
// Step 12.
return this;
}; // document.open
JSDocument.prototype.write = function () {
+ log ('document.write: start');
logIndentLevel++;
var p = this._parser;
@@ -571,15 +759,17 @@
// 2. ... inserted into the input stream just before the insertion point.
var s = Array.join (arguments, '');
log ('document.write: insert "' + s + '"' +
- ' before "' + p.in.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
- p.in.s = p.in.s.substring (0, p.insertionPoint) + s
- + p.in.s.substring (p.insertionPoint, p.in.s.length);
+ ' before "' +
+ p.input.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
+ p.input.s = p.input.s.substring (0, p.insertionPoint) + s
+ + p.input.s.substring (p.insertionPoint, p.input.s.length);
p.insertionPoint += s.length;
- // 3. If there is a script that will execute as soon as the parser resumes
- if (p.scriptExecutedAfterParserResumes) {
+ // 3. If there is a pending external script
+ if (p.pendingExternalScript) {
log ('document.write: processed later (there is an unprocessed