]+)(?:>|$)/, function (s, e) { if (p.insertionPoint < s.length || (p.insertionPoint <= s.length && s.substring (s.length - 1, 1) != '>')) { token = {type: 'abort'}; return s; } token = {type: 'end-tag', value: e.toLowerCase ()}; p.insertionPoint -= s.length; return ''; }); if (token) return token; i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) { if (p.insertionPoint < s.length || (p.insertionPoint <= s.length && s.substring (s.length - 1, 1) != '>')) { token = {type: 'abort'}; return s; } var tagName; var attrs = {}; e = e.replace (/^[\S]+/, function (v) { tagName = v.toLowerCase (); return ''; }); while (true) { var m = false; e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"'\s]*)))?/, function (x, attrName, attrValue1, attrValue2, attrValue3) { v = attrValue1 || attrValue2 || attrValue3; v = v.replace (/"/g, '"').replace (/'/g, "'") .replace (/&/g, '&'); attrs[attrName.toLowerCase ()] = v; m = true; return ''; }); if (!m) break; } if (e.length) { log ('Broken start tag: "' + e + '"'); } token = {type: 'start-tag', value: tagName, attrs: attrs}; p.insertionPoint -= s.length; return ''; }); if (token) return token; if (p.insertionPoint <= 0) { return {type: 'abort'}; } i.s = i.s.replace (/^[^<]+/, function (s) { if (p.insertionPoint < s.length) { token = {type: 'char', value: s.substring (0, p.insertionPoint)}; var ip = p.insertionPoint; p.insertionPoint = 0; return s.substring (ip, s.length); } token = {type: 'char', value: s}; p.insertionPoint -= s.length; return ''; }); if (token) return token; i.s = i.s.replace (/^[\s\S]/, function (s) { token = {type: 'char', value: s}; p.insertionPoint -= s.length; return ''; }); if (token) return token; return {type: 'eof'}; } // getNextToken Parser.prototype.parse = function () { logIndentLevel++; log ('parse: start'); while (true) { var token = this.getNextToken (); log ('token: ' + token.type + ' "' + token.value + '"'); if (token.type == 'start-tag') { if (token.value == 'script') { // 1. Create an element for the token in the HTML namespace. var el = new JSElement (this.doc, token.value); if (token.attrs.async != null) el.async = true; if (token.attrs.defer != null) el.defer = true; if (token.attrs.src != null) el.src = token.attrs.src; // 2. Mark the element as being "parser-inserted". el.manakaiParserInserted = true; // 3. Switch the tokeniser's content model flag to the CDATA state. this.parseMode = 'script'; // 4.1. Collect all the character tokens. while (true) { var token = this.getNextToken (); log ('token: ' + token.type + ' "' + token.value + '"'); if (token.type == 'char') { // 5. Append a single Text node to the script element node. el.manakaiAppendText (token.value); // 4.2. Until it returns a token that is not a character token, or // until it stops tokenising. } else if (token.type == 'eof' || (token.type == 'end-tag' && token.value == 'script') || token.type == 'abort') { // 6. Switched back to the PCDATA state. this.parseMode = 'pcdata'; // 7.1. If the next token is not an end tag token with ... if (token.type != 'end-tag') { // 7.2. This is a parse error. log ('Parse error: no ' + 'script>'); // 7.3. Mark the script element as "already executed". el.manakaiAlreadyExecuted = true; } else { // 7.4. Ignore it. // } break; } } // 8.1. If the parser were originally created for the ... if (this.fragmentParsingMode) { // 8.2. Mark the script element as "already executed" and ... el.alreadyExecuted = true; continue; } // 9.1. Let the old insertion point have the same value as the ... var oldInsertionPoint = this.insertionPoint; // 9.2. Let the insertion point be just before the next input ... this.setInsertionPoint (0); // 10. Append the new element to the current node. this.openElements[this.openElements.length - 1].appendChild (el); // 11. Let the insertion point have the value of the old ... oldInsertionPoint += this.insertionPoint; this.setInsertionPoint (oldInsertionPoint); // 12. If there is a script that will execute as soon as ... while (this.scriptExecutedWhenParserResumes) { // 12.1. If the tree construction stage is being called reentrantly if (this.reentrant) { log ('parse: abort (reentrance)'); logIndentLevel--; return; // 12.2. Otherwise } else { // 1. var script = this.scriptExecutedWhenParserResumes; this.scriptExecutedWhenParserResumes = null; // 2. Pause until the script has completed loading. // // 3. Let the insertion point to just before the next input char. this.setInsertionPoint (0); // 4. Execute the script. executeScript (this.doc, script); // 5. Let the insertion point be undefined again. this.setInsertionPoint (undefined); // 6. If there is once again a script that will execute ... // } } } else { var el = new JSElement (this.doc, token.value); this.openElements[this.openElements.length - 1].appendChild (el); this.openElements.push (el); } } else if (token.type == 'end-tag') { if (this.openElements[this.openElements.length - 1].localName == token.value) { this.openElements.pop (); } else { log ('parse error: unmatched end tag: ' + token.value); } } else if (token.type == 'char') { this.openElements[this.openElements.length - 1].manakaiAppendText (token.value); } else if (token.type == 'eof') { break; } else if (token.type == 'abort') { log ('parse: abort'); logIndentLevel--; return; } } log ('stop parsing'); // readyState = 'interactive' // "When a script completes loading" rules start applying. // List of scripts that will execute as soon as possible for (var i = 0; i < this.scriptsExecutedSoon.length; i++) { var e = this.scriptsExecutedSoon[i]; // If it has completed loading log ('Execute an external script not inserted by parser...'); executeScript (this.doc, e); // NOTE: It MAY be executed before the end of the parsing, according // to the spec. this.hasAsyncScript = true; } // TODO: Handles // "list of scripts that will execute asynchronously" // Handle "list of scripts that will execute when the document has finished // parsing". var list = this.scriptsExecutedAfterParsing; while (list.length > 0) { // TODO: break unless completed loading // Step 1. // // Step 2. and Step 3. log ('Executing a |defer|red script...'); executeScript (this.doc, list.shift ()); // Step 4. } log ('DOMContentLoaded event fired'); // "delays tha load event" things has completed: // readyState = 'complete' log ('load event fired'); logIndentLevel--; } // parse Parser.prototype.setInsertionPoint = function (ip) { if (ip == undefined || ip == null || isNaN (ip)) { log ('insertion point: set to undefined'); this.insertionPoint = undefined; } else if (ip == this.input.s.length) { log ('insertion point: end of file'); this.insertionPoint = ip; } else { log ('insertion point: set to ' + ip + ' (before "' + this.input.s.substring (0, 10) + '")'); this.insertionPoint = ip; } }; // setInsertionPoint function JSDocument (p) { this.childNodes = []; this._parser = p; } // JSDocument function JSElement (doc, localName) { this.localName = localName; this.ownerDocument = doc; this.childNodes = []; } // JSElement JSDocument.prototype.appendChild = JSElement.prototype.appendChild = function (e) { this.childNodes.push (e); e.parentNode = this; if (e.localName == 'script') { logIndentLevel++; log ('Running a script: start'); var doc = this.ownerDocument || this; var p = doc._parser; // 1. Script type // // 2.1. If scripting is disabled // // 2.2. If the script element was created by an XML ... innerHTML ... // // 2.3. If the user agent does not support the scripting language ... // // 2.4. If the script element has its "already executed" flag set if (e.manakaiAlreadyExecuted) { // 2.5. Abort these steps at this point. log ('Running a script: aborted'); logIndentLevel--; return e; } // 3. Set the element's "already executed" flag. e.manakaiAlreadyExecuted = true; // 4. If the element has a src attribute, then a load for ... // TODO: load an external resource // 5. The first of the following options: // 5.1. if (/* TODO: If the document is still being parsed && */ e.defer && !e.async) { p.scriptsExecutedAfterParsing.push (e); log ('Running a script: aborted (defer)'); } else if (e.async && e.src != null) { // TODO } else if (e.async && e.src == null /* && list of scripts that will execute asynchronously is not empty */) { // TODO } else if (e.src != null && e.manakaiParserInserted) { if (p.scriptExecutedWhenParserResumes) { log ('Error: There is a script that will execute as soon as the parser resumes.'); } p.scriptExecutedWhenParserResumes = e; log ('Running a script: aborted (src parser-inserted)'); } else if (e.src != null) { p.scriptsExecutedSoon.push (e); log ('Running a script: aborted (src)'); } else { executeScript (doc, e); // even if other scripts are already executing. } log ('Running a script: end'); logIndentLevel--; } return e; }; // appendChild function executeScript (doc, e) { log ('executing a script block: start'); var s; if (e.src != null) { s = getExternalScript (e.src); // If the load resulted in an error, then ... firing an error event ... if (s == null) { log ('error event fired at the script element'); return; } log ('External script loaded: "' + s + '"'); } else { s = e.text; } // If the load was successful log ('load event fired at the script element'); if (true) { // Scripting is enabled, Document.designMode is disabled, // Document is the active document in its browsing context parseAndRunScript (doc, s); } log ('executing a script block: end'); } // executeScript function getExternalScript (uri) { if (uri.match (/^javascript:/i)) { var m; if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) { if (m[1]) { return m[1].replace (/\\u([0-9A-F]{4})/g, function (s, v) { return String.fromCharCode (parseInt ('0x' + v)); }); } else if (m[2]) { return m[2].replace (/\\u([0-9A-F]{4})/g, function (s, v) { return String.fromCharCode (parseInt ('0x' + v)); }); } else { return null; } } else { log ('Complex javascript: URI is not supported: <' + uri + '>'); return null; } } else { log ('URI scheme not supported: <' + uri + '>'); return null; } } // getExternalScript function parseAndRunScript (doc, s) { while (true) { var matched = false; s = s.replace (/^\s*document\.write\s*\(((?:'[^']*'|"[^"]*")\s*(?:,\s*(?:'[^']*'|"[^"]*"))*)\)\s*;\s*/, function (s, t) { matched = true; var args = []; t.replace (/('[^']*'|"[^"]*")/g, function (s, v) { args.push (v.substring (1, v.length - 1)); return ''; }); doc.write.apply (doc, args); return ''; }); s = s.replace (/^\s*var\s+s\s*=\s*document\.createElement\s*\(\s*['"]script['"]\s*\)\s*;\s*s\.src\s*=\s*(?:'(javascript:[^']*)'|"(javascript:[^"]*)")\s*;\s*document\.documentElement\.appendChild\s*\(\s*s\s*\)\s*;\s*/, function (s, t, u) { matched = true; var args = [t ? t : u]; doc._insertExternalScript.apply (doc, args); return ''; }); if (s == '') break; if (!matched) { log ('Script parse error: "' + s + '"'); break; } } } // parseAndRunScript function JSText (data) { this.data = data; } // JSText JSDocument.prototype.manakaiAppendText = JSElement.prototype.manakaiAppendText = function (s) { if (this.childNodes.length > 0 && this.childNodes[this.childNodes.length - 1] instanceof JSText) { this.childNodes[this.childNodes.length - 1].data += s; } else { this.childNodes.push (new JSText (s)); } }; // manakaiAppendText JSDocument.prototype.open = function () { // Two or fewer arguments // Step 1. var type = arguments[0] || 'text/html'; // Step 2. var replace = arguments[1] == 'replace'; // Step 3. if (this._parser && !this._parser.scriptCreated && this._parser.input.insertionPoint != undefined) { log ('document.open () in parsing mode is ignored'); return this; } // Step 4. log ('onbeforeunload event fired'); log ('onunload event fired'); // Step 5. if (this._parser) { // Discard the parser. } // Step 6. log ('document cleared by document.open ()'); this.childNodes = []; // Step 7. this._parser = new Parser (new InputStream (''), this); this._parser.scriptCreated = true; // Step 8. this.manakaiIsHTML = true; // Step 9. // If not text/html, ... // Step 10. if (!replace) { // History } // Step 11. this._parser.setInsertionPoint (this._parser.input.s.length); // Step 12. return this; }; // document.open JSDocument.prototype.write = function () { logIndentLevel++; var p = this._parser; // 1. If the insertion point is undefined, the open() method must be ... if (isNaN (p.insertionPoint) || p.insertionPoint == undefined) { this.open (); p = this._parser; } // 2. ... inserted into the input stream just before the insertion point. var s = Array.join (arguments, ''); log ('document.write: insert "' + s + '"' + ' before "' + p.input.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"'); p.input.s = p.input.s.substring (0, p.insertionPoint) + s + p.input.s.substring (p.insertionPoint, p.input.s.length); p.insertionPoint += s.length; // 3. If there is a script that will execute as soon as the parser resumes if (p.scriptExecutedAfterParserResumes) { log ('document.write: processed later (there is an unprocessed
This is a simplified implementation of HTML5 Parsing Algorithm. It only implements script-related part of the algorithm. Especially, this parser:
DOCTYPE
and comment tokens.
"
,
'
, and &
in script
src
attribute value.
script
.
<!--
..-->
parsing rule
in script
element.
script
type
text/javascript
. type
and language
attributes are ignored.
document.write ("string", ["string", ...]);
.
var s = document.createElement ("script");
s.src = "string";
document.documentElement.appendChild (s);
'
s instead of
"
s.
javascript:
URI scheme in the
src
attribute of the script
element. In addition,
the URI must be conform to
the regular expression ^javascript:\s*(?:"[^"]*"|'[^']*')\s*$
.
\uHHHH
escapes only in
javascript:
URI.
For some reason, this parser does not work in browsers that do not support JavaScript 1.5.