--- markup/html/scripting-parser/parser.html 2008/04/20 06:07:24 1.1
+++ markup/html/scripting-parser/parser.html 2008/04/25 11:40:56 1.5
@@ -13,15 +13,17 @@
output {
display: block;
font-family: monospace;
- white-space: pre;
+ white-space: -moz-pre-wrap;
+ white-space: pre-wrap;
}
]+)>/, function (s, e) {
+ i.s = i.s.replace (/^<\/([^>]+)(?:>|$)/, function (s, e) {
+ if (p.insertionPoint < s.length ||
+ (p.insertionPoint <= s.length &&
+ s.substring (s.length - 1, 1) != '>')) {
+ token = {type: 'abort'};
+ return s;
+ }
token = {type: 'end-tag', value: e.toLowerCase ()};
+ p.insertionPoint -= s.length;
return '';
});
if (token) return token;
- i.s = i.s.replace (/^<([^>]+)>/, function (s, e) {
- token = {type: 'start-tag', value: e.toLowerCase ()};
+ i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) {
+ if (p.insertionPoint < s.length ||
+ (p.insertionPoint <= s.length &&
+ s.substring (s.length - 1, 1) != '>')) {
+ token = {type: 'abort'};
+ return s;
+ }
+ var tagName;
+ var attrs = {};
+ e = e.replace (/^[\S]+/, function (v) {
+ tagName = v.toLowerCase ();
+ return '';
+ });
+ e = e.replace (/^\s*(\S+)\s*(?:=\s*"([^"]*)"|'([^']*)'|([^"']+))?/,
+ function (x, attrName, attrValue1, attrValue2, attrValue3) {
+ attrs[attrName] = attrValue1 || attrValue2 || attrValue3;
+ return '';
+ });
+ token = {type: 'start-tag', value: tagName, attrs: attrs};
+ p.insertionPoint -= s.length;
return '';
});
if (token) return token;
+ if (p.insertionPoint <= 0) {
+ return {type: 'abort'};
+ }
i.s = i.s.replace (/^[^<]+/, function (s) {
+ if (p.insertionPoint < s.length) {
+ token = {type: 'char', value: s.substring (0, p.insertionPoint)};
+ var ip = p.insertionPoint;
+ p.insertionPoint = 0;
+ return s.substring (ip, s.length);
+ }
token = {type: 'char', value: s};
+ p.insertionPoint -= s.length;
return '';
});
if (token) return token;
i.s = i.s.replace (/^[\s\S]/, function (s) {
token = {type: 'char', value: s};
+ p.insertionPoint -= s.length;
return '';
});
if (token) return token;
return {type: 'eof'};
} // getNextToken
- Parser.prototype.parse = function (i) {
+ Parser.prototype.parse = function () {
log ('start parsing');
while (true) {
- var token = this.getNextToken (i);
+ var token = this.getNextToken ();
log ('token: ' + token.type + ' "' + token.value + '"');
if (token.type == 'start-tag') {
- var el = new JSElement (token.value);
if (token.value == 'script') {
+ // 1. Create an element for the token in the HTML namespace.
+ var el = new JSElement (this.doc, token.value);
+ if (token.attrs.async != null) el.async = true;
+ if (token.attrs.defer != null) el.defer = true;
+ if (token.attrs.src != null) el.src = token.attrs.src;
+
+ // 2. Mark the element as being "parser-inserted".
+ el.manakaiParserInserted = true;
+
+ // 3. Switch the tokeniser's content model flag to the CDATA state.
this.parseMode = 'script';
+ // 4.1. Collect all the character tokens.
while (true) {
- var token = this.getNextToken (i);
+ var token = this.getNextToken ();
log ('token: ' + token.type + ' "' + token.value + '"');
if (token.type == 'char') {
+ // 5. Append a single Text node to the script element node.
el.manakaiAppendText (token.value);
+
+ // 4.2. Until it returns a token that is not a character token, or
+ // until it stops tokenising.
} else if (token.type == 'eof' ||
- (token.type == 'end-tag' && token.value == 'script')) {
+ (token.type == 'end-tag' && token.value == 'script') ||
+ token.type == 'abort') {
+ // 6. Switched back to the PCDATA state.
this.parseMode = 'pcdata';
+
+ // 7.1. If the next token is not an end tag token with ...
+ if (token.type != 'end-tag') {
+ // 7.2. This is a parse error.
+ log ('Parse error: no ' + 'script>');
+
+ // 7.3. Mark the script element as "already executed".
+ el.manakaiAlreadyExecuted = true;
+ } else {
+ // 7.4. Ignore it.
+ //
+ }
break;
}
}
+ // 8.1. If the parser were originally created for the ...
+ if (this.fragmentParsingMode) {
+ // 8.2. Mark the script element as "already executed" and ...
+ el.alreadyExecuted = true;
+ continue;
+ }
+
+ // 9.1. Let the old insertion point have the same value as the ...
+ var oldInsertionPoint = this.insertionPoint;
+ // 9.2. Let the insertion point be just before the next input ...
+ this.setInsertionPoint (0);
+
+ // 10. Append the new element to the current node.
this.openElements[this.openElements.length - 1].appendChild (el);
+
+ // 11. Let the insertion point have the value of the old ...
+ oldInsertionPoint += this.insertionPoint;
+ this.setInsertionPoint (oldInsertionPoint);
+
+ // 12. If there is a script that will execute as soon as ...
+
+
} else {
+ var el = new JSElement (this.doc, token.value);
this.openElements[this.openElements.length - 1].appendChild (el);
this.openElements.push (el);
}
@@ -116,20 +242,71 @@
} else {
log ('parse error: unmatched end tag: ' + token.value);
}
+ } else if (token.type == 'char') {
+ this.openElements[this.openElements.length - 1].manakaiAppendText
+ (token.value);
} else if (token.type == 'eof') {
break;
+ } else if (token.type == 'abort') {
+ log ('parse: abort');
+ return;
}
}
log ('stop parsing');
+
+ // readyState = 'interactive'
+
+ // "When a script completes loading" rules start applying.
+
+ // TODO: Handles "list of scripts that will execute as soon as possible"
+ // and "list of scripts that will execute asynchronously"
+
+ // Handle "list of scripts that will execute when the document has finished
+ // parsing".
+ var list = this.scriptsExecutedAfterParsing;
+ while (list.length > 0) {
+ // TODO: break unless completed loading
+
+ // Step 1.
+ //
+
+ // Step 2. and Step 3.
+ log ('Executing a |defer|red script...');
+ executeScript (this.doc, list.shift ());
+
+ // Step 4.
+ }
+
+ log ('DOMContentLoaded event fired');
+
+ // "delays tha load event" things has completed:
+ // readyState = 'complete'
+ log ('load event fired');
} // parse
- function JSDocument () {
+ Parser.prototype.setInsertionPoint = function (ip) {
+ if (ip == undefined || ip == null || isNaN (ip)) {
+ log ('insertion point: set to undefined');
+ this.insertionPoint = undefined;
+ } else if (ip == this.in.s.length) {
+ log ('insertion point: end of file');
+ this.insertionPoint = ip;
+ } else {
+ log ('insertion point: set to ' + ip +
+ ' (before "' + this.in.s.substring (0, 10) + '")');
+ this.insertionPoint = ip;
+ }
+ }; // setInsertionPoint
+
+ function JSDocument (p) {
this.childNodes = [];
+ this._parser = p;
} // JSDocument
- function JSElement (localName) {
+ function JSElement (doc, localName) {
this.localName = localName;
+ this.ownerDocument = doc;
this.childNodes = [];
} // JSElement
@@ -137,9 +314,107 @@
function (e) {
this.childNodes.push (e);
e.parentNode = this;
+
+ if (e.localName == 'script') {
+ log ('Running a script: start');
+
+ var doc = this.ownerDocument || this;
+ var p = doc._parser;
+
+ // 1. Script type
+ //
+
+ // 2.1. If scripting is disabled
+ //
+ // 2.2. If the script element was created by an XML ... innerHTML ...
+ //
+ // 2.3. If the user agent does not support the scripting language ...
+ //
+ // 2.4. If the script element has its "already executed" flag set
+ if (e.manakaiAlreadyExecuted) {
+ // 2.5. Abort these steps at this point.
+ log ('Running a script: aborted');
+ return e;
+ }
+
+ // 3. Set the element's "already executed" flag.
+ e.manakaiAlreadyExecuted = true;
+
+ // 4. If the element has a src attribute, then a load for ...
+ // TODO: load an external resource
+
+ // 5. The first of the following options:
+
+ // 5.1.
+ if (/* TODO: If the document is still being parsed && */
+ e.defer && !e.async) {
+ p.scriptsExecutedAfterParsing.push (e);
+ log ('Running a script: aborted (defer)');
+ } else if (e.async && e.src != null) {
+ // TODO
+ } else if (e.async && e.src == null
+ /* && list of scripts that will execute asynchronously is not empty */) {
+ // TODO
+ } else if (e.src != null && e.manakaiParserInserted) {
+ // TODO
+ } else if (e.src != null) {
+ // TODO
+ } else {
+ executeScript (doc, e); // even if other scripts are already executing.
+ }
+
+ log ('Running a script: end');
+ }
+
return e;
}; // appendChild
+ function executeScript (doc, e) {
+ log ('executing a script block: start');
+
+ // If the load resulted in an error, then ... firing an error event ...
+
+ // If the load was successful
+ log ('load event fired at the script element');
+
+ if (true) {
+ // Scripting is enabled, Document.designMode is disabled,
+ // Document is the active document in its browsing context
+
+ var s;
+ if (e.src != null) {
+ // TODO: from external file
+ } else {
+ s = e.text;
+ }
+
+ parseAndRunScript (doc, s);
+ }
+
+ log ('executing a script block: end');
+ } // executeScript
+
+ function parseAndRunScript (doc, s) {
+ while (true) {
+ var matched = false;
+ s = s.replace (/^\s*document\.write\s*\(((?:'[^']*'|"[^"]*")\s*(?:,\s*(?:'[^']*'|"[^"]*"))*)\)\s*;\s*/, function (s, t) {
+ matched = true;
+ var args = [];
+ t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
+ args.push (v.substring (1, v.length - 1));
+ return '';
+ });
+ doc.write.apply (doc, args);
+ return '';
+ });
+ if (s == '') break;
+ if (!matched) {
+ log ('Script parse error: "' + s + '"');
+ break;
+ }
+ }
+ } // parseAndRunScript
+
function JSText (data) {
this.data = data;
} // JSText
@@ -155,12 +430,105 @@
}
}; // manakaiAppendText
+ JSDocument.prototype.open = function () {
+ // Two or fewer arguments
+
+ // Step 1.
+ var type = arguments[0] || 'text/html';
+
+ // Step 2.
+ var replace = arguments[1] == 'replace';
+
+ // Step 3.
+ if (this._parser &&
+ !this._parser.scriptCreated &&
+ this._parser.in.insertionPoint != undefined) {
+ log ('document.open () in parsing mode is ignored');
+ return this;
+ }
+
+ // Step 4.
+ log ('onbeforeunload event fired');
+ log ('onunload event fired');
+
+ // Step 5.
+ if (this._parser) {
+ // Discard the parser.
+ }
+
+ // Step 6.
+ log ('document cleared by document.open ()');
+ this.childNodes = [];
+
+ // Step 7.
+ this._parser = new Parser (new InputStream (''), this);
+ this._parser.scriptCreated = true;
+
+ // Step 8.
+ this.manakaiIsHTML = true;
+
+ // Step 9.
+ // If not text/html, ...
+
+ // Step 10.
+ if (!replace) {
+ // History
+ }
+
+ // Step 11.
+ this._parser.setInsertionPoint (this._parser.in.s.length);
+
+ // Step 12.
+ return this;
+ }; // document.open
+
+ JSDocument.prototype.write = function () {
+ var p = this._parser;
+
+ // 1. If the insertion point is undefined, the open() method must be ...
+ if (isNaN (p.insertionPoint) || p.insertionPoint == undefined) {
+ this.open ();
+ p = this._parser;
+ }
+
+ // 2. ... inserted into the input stream just before the insertion point.
+ var s = Array.join (arguments, '');
+ log ('document.write: insert "' + s + '"' +
+ ' before "' + p.in.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
+ p.in.s = p.in.s.substring (0, p.insertionPoint) + s
+ + p.in.s.substring (p.insertionPoint, p.in.s.length);
+ p.insertionPoint += s.length;
+
+ // 3. If there is a script that will execute as soon as the parser resumes
+ // TODO
+
+ // 4. Process the characters that were inserted, ...
+ p.parse ();
+
+ // 5. Return
+ log ('document.write: return');
+ return;
+ }; // document.write
+
+ JSElement.prototype.__defineGetter__ ('text', function () {
+ var r = '';
+ for (var i = 0; i < this.childNodes.length; i++) {
+ if (this.childNodes[i] instanceof JSText) {
+ r += this.childNodes[i].data;
+ }
+ }
+ return r;
+ });
+
function dumpTree (n, indent) {
var r = '';
for (var i = 0; i < n.childNodes.length; i++) {
var node = n.childNodes[i];
if (node instanceof JSElement) {
r += '| ' + indent + node.localName + '\n';
+ if (node.async) r += '| ' + indent + ' async=""\n';
+ if (node.defer) r += '| ' + indent + ' defer=""\n';
+ if (node.src) r += '| ' + indent + ' src="' + node.src + '"\n';
r += dumpTree (node, indent + ' ');
} else if (node instanceof JSText) {
r += '| ' + indent + '"' + node.data + '"\n';
@@ -182,7 +550,7 @@
<head></head><body>
<p>
<script>
-document.write ('aaaaaaa</p>\n<script>\ndocument.write("cccccc")\n</', 'script>\nbbbbbb');
+document.write ('aaaaaaa</p><script>document.write("cccccc");</', 'script>bbbbbb');
</script>
<p>