| 47 |
var p = new Parser (new InputStream (v)); |
var p = new Parser (new InputStream (v)); |
| 48 |
var doc = p.doc; |
var doc = p.doc; |
| 49 |
p.parse (); |
p.parse (); |
| 50 |
|
|
| 51 |
log (dumpTree (doc, '')); |
log (dumpTree (doc, '')); |
| 52 |
|
|
| 53 |
|
if (p.hasAsyncScript) { |
| 54 |
|
log ('Some script codes are executed asynchronously; it means that the document might be rendered in different ways depending on the network condition and other factors'); |
| 55 |
|
} |
| 56 |
} |
} |
| 57 |
} // update2 |
} // update2 |
| 58 |
|
|
| 78 |
this.openElements = [doc]; |
this.openElements = [doc]; |
| 79 |
this.input = i; |
this.input = i; |
| 80 |
this.scriptsExecutedAfterParsing = []; |
this.scriptsExecutedAfterParsing = []; |
| 81 |
|
this.scriptsExecutedSoon = []; |
| 82 |
|
this.scriptsExecutedAsynchronously = []; |
| 83 |
} // Parser |
} // Parser |
| 84 |
|
|
| 85 |
Parser.prototype.getNextToken = function () { |
Parser.prototype.getNextToken = function () { |
| 157 |
tagName = v.toLowerCase (); |
tagName = v.toLowerCase (); |
| 158 |
return ''; |
return ''; |
| 159 |
}); |
}); |
| 160 |
e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"']+)))?/, |
while (true) { |
| 161 |
function (x, attrName, attrValue1, attrValue2, attrValue3) { |
var m = false; |
| 162 |
v = attrValue1 || attrValue2 || attrValue3; |
e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"'\s]*)))?/, |
| 163 |
v = v.replace (/"/g, '"').replace (/'/g, "'") |
function (x, attrName, attrValue1, attrValue2, attrValue3) { |
| 164 |
.replace (/&/g, '&'); |
v = attrValue1 || attrValue2 || attrValue3; |
| 165 |
attrs[attrName.toLowerCase ()] = v; |
v = v.replace (/"/g, '"').replace (/'/g, "'") |
| 166 |
return ''; |
.replace (/&/g, '&'); |
| 167 |
}); |
attrs[attrName.toLowerCase ()] = v; |
| 168 |
|
m = true; |
| 169 |
|
return ''; |
| 170 |
|
}); |
| 171 |
|
if (!m) break; |
| 172 |
|
} |
| 173 |
if (e.length) { |
if (e.length) { |
| 174 |
log ('Broken start tag: "' + e + '"'); |
log ('Broken start tag: "' + e + '"'); |
| 175 |
} |
} |
| 336 |
|
|
| 337 |
// "When a script completes loading" rules start applying. |
// "When a script completes loading" rules start applying. |
| 338 |
|
|
| 339 |
// TODO: Handles "list of scripts that will execute as soon as possible" |
while (this.scriptsExecutedSoon.length > 0 || |
| 340 |
// and "list of scripts that will execute asynchronously" |
this.scriptsExecutedAsynchronously.length > 0) { |
| 341 |
|
// Handle "list of scripts that will execute as soon as possible". |
| 342 |
|
while (this.scriptsExecutedSoon.length > 0) { |
| 343 |
|
var e = this.scriptsExecutedSoon.shift (); |
| 344 |
|
|
| 345 |
|
// If it has completed loading |
| 346 |
|
log ('Execute an external script not inserted by parser...'); |
| 347 |
|
executeScript (this.doc, e); |
| 348 |
|
|
| 349 |
|
// NOTE: It MAY be executed before the end of the parsing, according |
| 350 |
|
// to the spec. |
| 351 |
|
this.hasAsyncScript = true; |
| 352 |
|
} |
| 353 |
|
|
| 354 |
|
// Handle "list of scripts that will execute asynchronously". |
| 355 |
|
while (this.scriptsExecutedAsynchronously.length > 0) { |
| 356 |
|
var e = this.scriptsExecutedAsynchronously.shift (); |
| 357 |
|
|
| 358 |
|
// Step 1. |
| 359 |
|
// We assume that all scripts have been loaded at this time. |
| 360 |
|
|
| 361 |
|
// Step 2. |
| 362 |
|
log ('Execute an asynchronous script...'); |
| 363 |
|
executeScript (this.doc, e); |
| 364 |
|
|
| 365 |
|
// Step 3. |
| 366 |
|
// |
| 367 |
|
|
| 368 |
|
// Step 4. |
| 369 |
|
// |
| 370 |
|
|
| 371 |
|
this.hasAsyncScript = true; |
| 372 |
|
} |
| 373 |
|
} |
| 374 |
|
|
| 375 |
// Handle "list of scripts that will execute when the document has finished |
// Handle "list of scripts that will execute when the document has finished |
| 376 |
// parsing". |
// parsing". |
| 465 |
p.scriptsExecutedAfterParsing.push (e); |
p.scriptsExecutedAfterParsing.push (e); |
| 466 |
log ('Running a script: aborted (defer)'); |
log ('Running a script: aborted (defer)'); |
| 467 |
} else if (e.async && e.src != null) { |
} else if (e.async && e.src != null) { |
| 468 |
// TODO |
p.scriptsExecutedAsynchronously.push (e); |
| 469 |
} else if (e.async && e.src == null |
log ('Running a script: aborted (async src)'); |
| 470 |
/* && list of scripts that will execute asynchronously is not empty */) { |
} else if (e.async && e.src == null && |
| 471 |
// TODO |
p.scriptsExecutedAsynchronously.length > 0) { |
| 472 |
|
p.scriptsExecutedAsynchronously.push (e); |
| 473 |
|
log ('Running a script: aborted (async)'); |
| 474 |
|
// ISSUE: What is the difference with the case above? |
| 475 |
} else if (e.src != null && e.manakaiParserInserted) { |
} else if (e.src != null && e.manakaiParserInserted) { |
| 476 |
if (p.scriptExecutedWhenParserResumes) { |
if (p.scriptExecutedWhenParserResumes) { |
| 477 |
log ('Error: There is a script that will execute as soon as the parser resumes.'); |
log ('Error: There is a script that will execute as soon as the parser resumes.'); |
| 478 |
} |
} |
| 479 |
p.scriptExecutedWhenParserResumes = e; |
p.scriptExecutedWhenParserResumes = e; |
| 480 |
log ('Running a script: aborted (src)'); |
log ('Running a script: aborted (src parser-inserted)'); |
| 481 |
} else if (e.src != null) { |
} else if (e.src != null) { |
| 482 |
// TODO |
p.scriptsExecutedSoon.push (e); |
| 483 |
|
log ('Running a script: aborted (src)'); |
| 484 |
} else { |
} else { |
| 485 |
executeScript (doc, e); // even if other scripts are already executing. |
executeScript (doc, e); // even if other scripts are already executing. |
| 486 |
} |
} |
| 528 |
var m; |
var m; |
| 529 |
if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) { |
if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) { |
| 530 |
if (m[1]) { |
if (m[1]) { |
| 531 |
return m[1]; |
return unescapeJSLiteral (m[1]); |
| 532 |
} else if (m[2]) { |
} else if (m[2]) { |
| 533 |
return m[2]; |
return unescapeJSLiteral (m[2]); |
| 534 |
} else { |
} else { |
| 535 |
return null; |
return null; |
| 536 |
} |
} |
| 551 |
matched = true; |
matched = true; |
| 552 |
var args = []; |
var args = []; |
| 553 |
t.replace (/('[^']*'|"[^"]*")/g, function (s, v) { |
t.replace (/('[^']*'|"[^"]*")/g, function (s, v) { |
| 554 |
args.push (v.substring (1, v.length - 1)); |
args.push (unescapeJSLiteral (v.substring (1, v.length - 1))); |
| 555 |
return ''; |
return ''; |
| 556 |
}); |
}); |
| 557 |
doc.write.apply (doc, args); |
doc.write.apply (doc, args); |
| 558 |
return ''; |
return ''; |
| 559 |
}); |
}); |
| 560 |
|
s = s.replace (/^\s*var\s+s\s*=\s*document\.createElement\s*\(\s*['"]script['"]\s*\)\s*;\s*s\.src\s*=\s*(?:'(javascript:[^']*)'|"(javascript:[^"]*)")\s*;\s*document\.documentElement\.appendChild\s*\(\s*s\s*\)\s*;\s*/, |
| 561 |
|
function (s, t, u) { |
| 562 |
|
matched = true; |
| 563 |
|
var args = [unescapeJSLiteral (t ? t : u)]; |
| 564 |
|
doc._insertExternalScript.apply (doc, args); |
| 565 |
|
return ''; |
| 566 |
|
}); |
| 567 |
if (s == '') break; |
if (s == '') break; |
| 568 |
if (!matched) { |
if (!matched) { |
| 569 |
log ('Script parse error: "' + s + '"'); |
log ('Script parse error: "' + s + '"'); |
| 572 |
} |
} |
| 573 |
} // parseAndRunScript |
} // parseAndRunScript |
| 574 |
|
|
| 575 |
|
function unescapeJSLiteral (s) { |
| 576 |
|
return s.replace (/\\u([0-9A-Fa-f]{4})/g, function (t, v) { |
| 577 |
|
return String.fromCharCode (parseInt ('0x' + v)); |
| 578 |
|
}); |
| 579 |
|
} // unescapeJSLiteral |
| 580 |
|
|
| 581 |
function JSText (data) { |
function JSText (data) { |
| 582 |
this.data = data; |
this.data = data; |
| 583 |
} // JSText |
} // JSText |
| 688 |
return; |
return; |
| 689 |
}; // document.write |
}; // document.write |
| 690 |
|
|
| 691 |
|
JSDocument.prototype._insertExternalScript = function (uri) { |
| 692 |
|
var s = new JSElement (this, 'script'); |
| 693 |
|
s.src = uri; |
| 694 |
|
this.documentElement.appendChild (s); |
| 695 |
|
}; // _insertExternalScript |
| 696 |
|
|
| 697 |
|
JSDocument.prototype.__defineGetter__ ('documentElement', function () { |
| 698 |
|
var cn = this.childNodes; |
| 699 |
|
for (var i = 0; i < cn.length; i++) { |
| 700 |
|
if (cn[i] instanceof JSElement) { |
| 701 |
|
return cn[i] |
| 702 |
|
} |
| 703 |
|
} |
| 704 |
|
return null; |
| 705 |
|
}); |
| 706 |
|
|
| 707 |
JSElement.prototype.__defineGetter__ ('text', function () { |
JSElement.prototype.__defineGetter__ ('text', function () { |
| 708 |
var r = ''; |
var r = ''; |
| 709 |
for (var i = 0; i < this.childNodes.length; i++) { |
for (var i = 0; i < this.childNodes.length; i++) { |
| 722 |
r += '| ' + indent + node.localName + '\n'; |
r += '| ' + indent + node.localName + '\n'; |
| 723 |
if (node.async) r += '| ' + indent + ' async=""\n'; |
if (node.async) r += '| ' + indent + ' async=""\n'; |
| 724 |
if (node.defer) r += '| ' + indent + ' defer=""\n'; |
if (node.defer) r += '| ' + indent + ' defer=""\n'; |
| 725 |
if (node.src) r += '| ' + indent + ' src="' + node.src + '"\n'; |
if (node.src != null) { |
| 726 |
|
r += '| ' + indent + ' src="' + node.src + '"\n'; |
| 727 |
|
} |
| 728 |
r += dumpTree (node, indent + ' '); |
r += dumpTree (node, indent + ' '); |
| 729 |
} else if (node instanceof JSText) { |
} else if (node instanceof JSText) { |
| 730 |
r += '| ' + indent + '"' + node.data + '"\n'; |
r += '| ' + indent + '"' + node.data + '"\n'; |
| 773 |
<p> |
<p> |
| 774 |
</textarea> |
</textarea> |
| 775 |
|
|
| 776 |
<h2>Log</h2> |
<h2 id=log>Log</h2> |
| 777 |
<p><output></output> |
<p><output></output> |
| 778 |
|
|
| 779 |
<h2>Note</h2> |
<h2 id=notes>Notes</h2> |
| 780 |
|
|
| 781 |
<p>This is a <em>simplified</em> implementation of |
<p>This is a <em>simplified</em> implementation of |
| 782 |
<a href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5 |
<a href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5 |
| 798 |
<li>Only supports <code>script</code> <code>type</code> |
<li>Only supports <code>script</code> <code>type</code> |
| 799 |
<code>text/javascript</code>. <code>type</code> and <code>language</code> |
<code>text/javascript</code>. <code>type</code> and <code>language</code> |
| 800 |
attributes are ignored. |
attributes are ignored. |
| 801 |
<li>Only supports <code>document.write</code>. |
<li>Only supports limited statements. It must consist of zero or more |
| 802 |
The script code must be match to the regular expression |
of statements looking similar to the following statements, possibly |
| 803 |
<code>^\s*(?:document\.write\s*\(<var>v</var>\s*(?:,\s*<var>v</var>\s*)*\)\s*;\s*)*$</code> |
introduced, followed, or separated by white space characters: |
| 804 |
where <var>v</var> is <code>"[^"]*"|'[^']*'</code>. |
<ul> |
| 805 |
|
<li><code>document.write ("<var>string</var>", ["<var>string</var>", ...]);</code>. |
| 806 |
|
<li><code>var s = document.createElement ("script"); |
| 807 |
|
s.src = "<var>string</var>"; |
| 808 |
|
document.documentElement.appendChild (s);</code> |
| 809 |
|
</ul> |
| 810 |
|
Note that strings may be delimited by <code>'</code>s instead of |
| 811 |
|
<code>"</code>s. |
| 812 |
<li>Only supports <code>javascript:</code> |
<li>Only supports <code>javascript:</code> |
| 813 |
<abbr title="Uniform Resourace Identifiers">URI</abbr> scheme in the |
<abbr title="Uniform Resourace Identifiers">URI</abbr> scheme in the |
| 814 |
<code>src</code> attribute of the <code>script</code> element. In addition, |
<code>src</code> attribute of the <code>script</code> element. In addition, |
| 815 |
the <abbr title="Uniform Resource Identifiers">URI</abbr> must be conform to |
the <abbr title="Uniform Resource Identifiers">URI</abbr> must be conform to |
| 816 |
the regular expression <code>^javascript:\s*(?:"[^"]*"|'[^']*')\s*$</code>. |
the regular expression <code>^javascript:\s*(?:"[^"]*"|'[^']*')\s*$</code>. |
| 817 |
|
<li>Only supports <code>\u<var>HHHH</var></code> escapes in JavaScript |
| 818 |
|
string literals. |
| 819 |
|
<li>Does not handle <i>stop parsing</i> phase correctly if the document is |
| 820 |
|
replaced by <code>document.open ()</code> call. In other word, delayed |
| 821 |
|
(deferred or asynchronous) script executions and event firings might be |
| 822 |
|
treated in a wrong way if a <code>document.open ()</code> invocation |
| 823 |
|
is implicitly done by <code>document.write ()</code> in a delayed script. |
| 824 |
</ul> |
</ul> |
| 825 |
|
|
| 826 |
<p>For some reason, this parser does not work in browsers that do |
<p>For some reason, this parser does not work in browsers that do |
| 827 |
not support JavaScript 1.5. |
not support JavaScript 1.5. |
| 828 |
|
|
| 829 |
<!-- TODO: multiple attributes are not supported yet --> |
<!-- TODO: |src| attribute value should refer the value at the time |
| 830 |
|
when it is inserted into the document, not the value when the script is |
| 831 |
|
executed. Currently it does not matter, since we don't allow dynamic |
| 832 |
|
modification to the |src| content/DOM attribute value yet. --> |
| 833 |
|
|
| 834 |
|
<!-- TODO: license --> |
| 835 |
|
|
| 836 |
</body> |
</body> |
| 837 |
</html> |
</html> |