| 2 |
<html lang=en> |
<html lang=en> |
| 3 |
<head> |
<head> |
| 4 |
<title>Live Scripting HTML Parser</title> |
<title>Live Scripting HTML Parser</title> |
| 5 |
|
<link rel=author href="http://suika.fam.cx/~wakaba/who?"> |
| 6 |
|
<link rel=license href="http://suika.fam.cx/c/gnu/gpl" |
| 7 |
|
title="GNU GPL2 or later"> |
| 8 |
<style> |
<style> |
| 9 |
h1, h2 { |
h1 { |
| 10 |
|
margin: 0; |
| 11 |
|
font-size: 150%; |
| 12 |
|
} |
| 13 |
|
h2 { |
| 14 |
margin: 0; |
margin: 0; |
| 15 |
font-size: 100%; |
font-size: 100%; |
| 16 |
} |
} |
| 17 |
p, pre { |
p { |
| 18 |
margin: 0; |
margin: 0 1em; |
| 19 |
} |
} |
| 20 |
textarea { |
textarea { |
| 21 |
width: 100%; |
width: 100%; |
| 54 |
var p = new Parser (new InputStream (v)); |
var p = new Parser (new InputStream (v)); |
| 55 |
var doc = p.doc; |
var doc = p.doc; |
| 56 |
p.parse (); |
p.parse (); |
| 57 |
|
|
| 58 |
log (dumpTree (doc, '')); |
log (dumpTree (doc, '')); |
| 59 |
|
|
| 60 |
|
if (p.hasAsyncScript) { |
| 61 |
|
log ('Some script codes are executed asynchronously; it means that the document might be rendered in different ways depending on the network condition and other factors'); |
| 62 |
|
} |
| 63 |
} |
} |
| 64 |
} // update2 |
} // update2 |
| 65 |
|
|
| 85 |
this.openElements = [doc]; |
this.openElements = [doc]; |
| 86 |
this.input = i; |
this.input = i; |
| 87 |
this.scriptsExecutedAfterParsing = []; |
this.scriptsExecutedAfterParsing = []; |
| 88 |
|
this.scriptsExecutedSoon = []; |
| 89 |
|
this.scriptsExecutedAsynchronously = []; |
| 90 |
} // Parser |
} // Parser |
| 91 |
|
|
| 92 |
Parser.prototype.getNextToken = function () { |
Parser.prototype.getNextToken = function () { |
| 343 |
|
|
| 344 |
// "When a script completes loading" rules start applying. |
// "When a script completes loading" rules start applying. |
| 345 |
|
|
| 346 |
// TODO: Handles "list of scripts that will execute as soon as possible" |
while (this.scriptsExecutedSoon.length > 0 || |
| 347 |
// and "list of scripts that will execute asynchronously" |
this.scriptsExecutedAsynchronously.length > 0) { |
| 348 |
|
// Handle "list of scripts that will execute as soon as possible". |
| 349 |
|
while (this.scriptsExecutedSoon.length > 0) { |
| 350 |
|
var e = this.scriptsExecutedSoon.shift (); |
| 351 |
|
|
| 352 |
|
// If it has completed loading |
| 353 |
|
log ('Execute an external script not inserted by parser...'); |
| 354 |
|
executeScript (this.doc, e); |
| 355 |
|
|
| 356 |
|
// NOTE: It MAY be executed before the end of the parsing, according |
| 357 |
|
// to the spec. |
| 358 |
|
this.hasAsyncScript = true; |
| 359 |
|
} |
| 360 |
|
|
| 361 |
|
// Handle "list of scripts that will execute asynchronously". |
| 362 |
|
while (this.scriptsExecutedAsynchronously.length > 0) { |
| 363 |
|
var e = this.scriptsExecutedAsynchronously.shift (); |
| 364 |
|
|
| 365 |
|
// Step 1. |
| 366 |
|
// We assume that all scripts have been loaded at this time. |
| 367 |
|
|
| 368 |
|
// Step 2. |
| 369 |
|
log ('Execute an asynchronous script...'); |
| 370 |
|
executeScript (this.doc, e); |
| 371 |
|
|
| 372 |
|
// Step 3. |
| 373 |
|
// |
| 374 |
|
|
| 375 |
|
// Step 4. |
| 376 |
|
// |
| 377 |
|
|
| 378 |
|
this.hasAsyncScript = true; |
| 379 |
|
} |
| 380 |
|
} |
| 381 |
|
|
| 382 |
// Handle "list of scripts that will execute when the document has finished |
// Handle "list of scripts that will execute when the document has finished |
| 383 |
// parsing". |
// parsing". |
| 472 |
p.scriptsExecutedAfterParsing.push (e); |
p.scriptsExecutedAfterParsing.push (e); |
| 473 |
log ('Running a script: aborted (defer)'); |
log ('Running a script: aborted (defer)'); |
| 474 |
} else if (e.async && e.src != null) { |
} else if (e.async && e.src != null) { |
| 475 |
// TODO |
p.scriptsExecutedAsynchronously.push (e); |
| 476 |
} else if (e.async && e.src == null |
log ('Running a script: aborted (async src)'); |
| 477 |
/* && list of scripts that will execute asynchronously is not empty */) { |
} else if (e.async && e.src == null && |
| 478 |
// TODO |
p.scriptsExecutedAsynchronously.length > 0) { |
| 479 |
|
p.scriptsExecutedAsynchronously.push (e); |
| 480 |
|
log ('Running a script: aborted (async)'); |
| 481 |
|
// ISSUE: What is the difference with the case above? |
| 482 |
} else if (e.src != null && e.manakaiParserInserted) { |
} else if (e.src != null && e.manakaiParserInserted) { |
| 483 |
if (p.scriptExecutedWhenParserResumes) { |
if (p.scriptExecutedWhenParserResumes) { |
| 484 |
log ('Error: There is a script that will execute as soon as the parser resumes.'); |
log ('Error: There is a script that will execute as soon as the parser resumes.'); |
| 485 |
} |
} |
| 486 |
p.scriptExecutedWhenParserResumes = e; |
p.scriptExecutedWhenParserResumes = e; |
| 487 |
log ('Running a script: aborted (src)'); |
log ('Running a script: aborted (src parser-inserted)'); |
| 488 |
} else if (e.src != null) { |
} else if (e.src != null) { |
| 489 |
// TODO |
p.scriptsExecutedSoon.push (e); |
| 490 |
|
log ('Running a script: aborted (src)'); |
| 491 |
} else { |
} else { |
| 492 |
executeScript (doc, e); // even if other scripts are already executing. |
executeScript (doc, e); // even if other scripts are already executing. |
| 493 |
} |
} |
| 535 |
var m; |
var m; |
| 536 |
if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) { |
if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) { |
| 537 |
if (m[1]) { |
if (m[1]) { |
| 538 |
return m[1]; |
return unescapeJSLiteral (m[1]); |
| 539 |
} else if (m[2]) { |
} else if (m[2]) { |
| 540 |
return m[2]; |
return unescapeJSLiteral (m[2]); |
| 541 |
} else { |
} else { |
| 542 |
return null; |
return null; |
| 543 |
} |
} |
| 558 |
matched = true; |
matched = true; |
| 559 |
var args = []; |
var args = []; |
| 560 |
t.replace (/('[^']*'|"[^"]*")/g, function (s, v) { |
t.replace (/('[^']*'|"[^"]*")/g, function (s, v) { |
| 561 |
args.push (v.substring (1, v.length - 1)); |
args.push (unescapeJSLiteral (v.substring (1, v.length - 1))); |
| 562 |
return ''; |
return ''; |
| 563 |
}); |
}); |
| 564 |
doc.write.apply (doc, args); |
doc.write.apply (doc, args); |
| 565 |
return ''; |
return ''; |
| 566 |
}); |
}); |
| 567 |
|
s = s.replace (/^\s*var\s+s\s*=\s*document\.createElement\s*\(\s*['"]script['"]\s*\)\s*;\s*s\.src\s*=\s*(?:'(javascript:[^']*)'|"(javascript:[^"]*)")\s*;\s*document\.documentElement\.appendChild\s*\(\s*s\s*\)\s*;\s*/, |
| 568 |
|
function (s, t, u) { |
| 569 |
|
matched = true; |
| 570 |
|
var args = [unescapeJSLiteral (t ? t : u)]; |
| 571 |
|
doc._insertExternalScript.apply (doc, args); |
| 572 |
|
return ''; |
| 573 |
|
}); |
| 574 |
if (s == '') break; |
if (s == '') break; |
| 575 |
if (!matched) { |
if (!matched) { |
| 576 |
log ('Script parse error: "' + s + '"'); |
log ('Script parse error: "' + s + '"'); |
| 579 |
} |
} |
| 580 |
} // parseAndRunScript |
} // parseAndRunScript |
| 581 |
|
|
| 582 |
|
function unescapeJSLiteral (s) { |
| 583 |
|
return s.replace (/\\u([0-9A-Fa-f]{4})/g, function (t, v) { |
| 584 |
|
return String.fromCharCode (parseInt ('0x' + v)); |
| 585 |
|
}); |
| 586 |
|
} // unescapeJSLiteral |
| 587 |
|
|
| 588 |
function JSText (data) { |
function JSText (data) { |
| 589 |
this.data = data; |
this.data = data; |
| 590 |
} // JSText |
} // JSText |
| 695 |
return; |
return; |
| 696 |
}; // document.write |
}; // document.write |
| 697 |
|
|
| 698 |
|
JSDocument.prototype._insertExternalScript = function (uri) { |
| 699 |
|
var s = new JSElement (this, 'script'); |
| 700 |
|
s.src = uri; |
| 701 |
|
this.documentElement.appendChild (s); |
| 702 |
|
}; // _insertExternalScript |
| 703 |
|
|
| 704 |
|
JSDocument.prototype.__defineGetter__ ('documentElement', function () { |
| 705 |
|
var cn = this.childNodes; |
| 706 |
|
for (var i = 0; i < cn.length; i++) { |
| 707 |
|
if (cn[i] instanceof JSElement) { |
| 708 |
|
return cn[i] |
| 709 |
|
} |
| 710 |
|
} |
| 711 |
|
return null; |
| 712 |
|
}); |
| 713 |
|
|
| 714 |
JSElement.prototype.__defineGetter__ ('text', function () { |
JSElement.prototype.__defineGetter__ ('text', function () { |
| 715 |
var r = ''; |
var r = ''; |
| 716 |
for (var i = 0; i < this.childNodes.length; i++) { |
for (var i = 0; i < this.childNodes.length; i++) { |
| 780 |
<p> |
<p> |
| 781 |
</textarea> |
</textarea> |
| 782 |
|
|
| 783 |
<h2>Log</h2> |
<h2 id=log>Log</h2> |
| 784 |
<p><output></output> |
<p><output></output> |
| 785 |
|
|
| 786 |
<h2>Note</h2> |
<h2 id=notes>Notes</h2> |
| 787 |
|
|
| 788 |
<p>This is a <em>simplified</em> implementation of |
<p>This is a <em>simplified</em> implementation of |
| 789 |
<a href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5 |
<a href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5 |
| 805 |
<li>Only supports <code>script</code> <code>type</code> |
<li>Only supports <code>script</code> <code>type</code> |
| 806 |
<code>text/javascript</code>. <code>type</code> and <code>language</code> |
<code>text/javascript</code>. <code>type</code> and <code>language</code> |
| 807 |
attributes are ignored. |
attributes are ignored. |
| 808 |
<li>Only supports <code>document.write</code>. |
<li>Only supports limited statements. It must consist of zero or more |
| 809 |
The script code must be match to the regular expression |
of statements looking similar to the following statements, possibly |
| 810 |
<code>^\s*(?:document\.write\s*\(<var>v</var>\s*(?:,\s*<var>v</var>\s*)*\)\s*;\s*)*$</code> |
introduced, followed, or separated by white space characters: |
| 811 |
where <var>v</var> is <code>"[^"]*"|'[^']*'</code>. |
<ul> |
| 812 |
|
<li><code>document.write ("<var>string</var>", ["<var>string</var>", ...]);</code>. |
| 813 |
|
<li><code>var s = document.createElement ("script"); |
| 814 |
|
s.src = "<var>string</var>"; |
| 815 |
|
document.documentElement.appendChild (s);</code> |
| 816 |
|
</ul> |
| 817 |
|
Note that strings may be delimited by <code>'</code>s instead of |
| 818 |
|
<code>"</code>s. |
| 819 |
<li>Only supports <code>javascript:</code> |
<li>Only supports <code>javascript:</code> |
| 820 |
<abbr title="Uniform Resourace Identifiers">URI</abbr> scheme in the |
<abbr title="Uniform Resourace Identifiers">URI</abbr> scheme in the |
| 821 |
<code>src</code> attribute of the <code>script</code> element. In addition, |
<code>src</code> attribute of the <code>script</code> element. In addition, |
| 822 |
the <abbr title="Uniform Resource Identifiers">URI</abbr> must be conform to |
the <abbr title="Uniform Resource Identifiers">URI</abbr> must be conform to |
| 823 |
the regular expression <code>^javascript:\s*(?:"[^"]*"|'[^']*')\s*$</code>. |
the regular expression <code>^javascript:\s*(?:"[^"]*"|'[^']*')\s*$</code>. |
| 824 |
|
<li>Only supports <code>\u<var>HHHH</var></code> escapes in JavaScript |
| 825 |
|
string literals. |
| 826 |
|
<li>Does not handle <i>stop parsing</i> phase correctly if the document is |
| 827 |
|
replaced by <code>document.open ()</code> call. In other word, delayed |
| 828 |
|
(deferred or asynchronous) script executions and event firings might be |
| 829 |
|
treated in a wrong way if a <code>document.open ()</code> invocation |
| 830 |
|
is implicitly done by <code>document.write ()</code> in a delayed script. |
| 831 |
</ul> |
</ul> |
| 832 |
|
|
| 833 |
<p>For some reason, this parser does not work in browsers that do |
<p>For some reason, this parser does not work in browsers that do |
| 834 |
not support JavaScript 1.5. |
not support JavaScript 1.5. |
| 835 |
|
|
| 836 |
|
<!-- TODO: |src| attribute value should refer the value at the time |
| 837 |
|
when it is inserted into the document, not the value when the script is |
| 838 |
|
executed. Currently it does not matter, since we don't allow dynamic |
| 839 |
|
modification to the |src| content/DOM attribute value yet. --> |
| 840 |
|
|
| 841 |
</body> |
</body> |
|
</html> |
|
| 842 |
|
</html> |
| 843 |
|
<!-- $Date$ --> |
| 844 |
|
<!-- |
| 845 |
|
|
| 846 |
|
Copyright 2008 Wakaba <w@suika.fam.cx> |
| 847 |
|
|
| 848 |
|
This program is free software; you can redistribute it and/or |
| 849 |
|
modify it under the terms of the GNU General Public License |
| 850 |
|
as published by the Free Software Foundation; either version 2 |
| 851 |
|
of the License, or (at your option) any later version. |
| 852 |
|
|
| 853 |
|
This program is distributed in the hope that it will be useful, |
| 854 |
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 855 |
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 856 |
|
GNU General Public License for more details. |
| 857 |
|
|
| 858 |
|
You should have received a copy of the GNU General Public License |
| 859 |
|
along with this program; if not, write to the Free Software |
| 860 |
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
| 861 |
|
|
| 862 |
|
--> |