2 |
<html lang=en> |
<html lang=en> |
3 |
<head> |
<head> |
4 |
<title>Live Scripting HTML Parser</title> |
<title>Live Scripting HTML Parser</title> |
5 |
|
<link rel=author href="http://suika.fam.cx/~wakaba/who?"> |
6 |
|
<link rel=license href="http://suika.fam.cx/c/gnu/gpl" |
7 |
|
title="GNU GPL2 or later"> |
8 |
<style> |
<style> |
9 |
h1, h2 { |
h1 { |
10 |
|
margin: 0; |
11 |
|
font-size: 150%; |
12 |
|
} |
13 |
|
h2 { |
14 |
margin: 0; |
margin: 0; |
15 |
font-size: 100%; |
font-size: 100%; |
16 |
} |
} |
17 |
p, pre { |
p { |
18 |
margin: 0; |
margin: 0 1em; |
19 |
} |
} |
20 |
textarea { |
textarea { |
21 |
width: 100%; |
width: 100%; |
54 |
var p = new Parser (new InputStream (v)); |
var p = new Parser (new InputStream (v)); |
55 |
var doc = p.doc; |
var doc = p.doc; |
56 |
p.parse (); |
p.parse (); |
57 |
|
|
58 |
log (dumpTree (doc, '')); |
log (dumpTree (doc, '')); |
59 |
|
|
60 |
|
if (p.hasAsyncScript) { |
61 |
|
log ('Some script codes are executed asynchronously; it means that the document might be rendered in different ways depending on the network condition and other factors'); |
62 |
|
} |
63 |
} |
} |
64 |
} // update2 |
} // update2 |
65 |
|
|
85 |
this.openElements = [doc]; |
this.openElements = [doc]; |
86 |
this.input = i; |
this.input = i; |
87 |
this.scriptsExecutedAfterParsing = []; |
this.scriptsExecutedAfterParsing = []; |
88 |
|
this.scriptsExecutedSoon = []; |
89 |
|
this.scriptsExecutedAsynchronously = []; |
90 |
} // Parser |
} // Parser |
91 |
|
|
92 |
Parser.prototype.getNextToken = function () { |
Parser.prototype.getNextToken = function () { |
343 |
|
|
344 |
// "When a script completes loading" rules start applying. |
// "When a script completes loading" rules start applying. |
345 |
|
|
346 |
// TODO: Handles "list of scripts that will execute as soon as possible" |
while (this.scriptsExecutedSoon.length > 0 || |
347 |
// and "list of scripts that will execute asynchronously" |
this.scriptsExecutedAsynchronously.length > 0) { |
348 |
|
// Handle "list of scripts that will execute as soon as possible". |
349 |
|
while (this.scriptsExecutedSoon.length > 0) { |
350 |
|
var e = this.scriptsExecutedSoon.shift (); |
351 |
|
|
352 |
|
// If it has completed loading |
353 |
|
log ('Execute an external script not inserted by parser...'); |
354 |
|
executeScript (this.doc, e); |
355 |
|
|
356 |
|
// NOTE: It MAY be executed before the end of the parsing, according |
357 |
|
// to the spec. |
358 |
|
this.hasAsyncScript = true; |
359 |
|
} |
360 |
|
|
361 |
|
// Handle "list of scripts that will execute asynchronously". |
362 |
|
while (this.scriptsExecutedAsynchronously.length > 0) { |
363 |
|
var e = this.scriptsExecutedAsynchronously.shift (); |
364 |
|
|
365 |
|
// Step 1. |
366 |
|
// We assume that all scripts have been loaded at this time. |
367 |
|
|
368 |
|
// Step 2. |
369 |
|
log ('Execute an asynchronous script...'); |
370 |
|
executeScript (this.doc, e); |
371 |
|
|
372 |
|
// Step 3. |
373 |
|
// |
374 |
|
|
375 |
|
// Step 4. |
376 |
|
// |
377 |
|
|
378 |
|
this.hasAsyncScript = true; |
379 |
|
} |
380 |
|
} |
381 |
|
|
382 |
// Handle "list of scripts that will execute when the document has finished |
// Handle "list of scripts that will execute when the document has finished |
383 |
// parsing". |
// parsing". |
472 |
p.scriptsExecutedAfterParsing.push (e); |
p.scriptsExecutedAfterParsing.push (e); |
473 |
log ('Running a script: aborted (defer)'); |
log ('Running a script: aborted (defer)'); |
474 |
} else if (e.async && e.src != null) { |
} else if (e.async && e.src != null) { |
475 |
// TODO |
p.scriptsExecutedAsynchronously.push (e); |
476 |
} else if (e.async && e.src == null |
log ('Running a script: aborted (async src)'); |
477 |
/* && list of scripts that will execute asynchronously is not empty */) { |
} else if (e.async && e.src == null && |
478 |
// TODO |
p.scriptsExecutedAsynchronously.length > 0) { |
479 |
|
p.scriptsExecutedAsynchronously.push (e); |
480 |
|
log ('Running a script: aborted (async)'); |
481 |
|
// ISSUE: What is the difference with the case above? |
482 |
} else if (e.src != null && e.manakaiParserInserted) { |
} else if (e.src != null && e.manakaiParserInserted) { |
483 |
if (p.scriptExecutedWhenParserResumes) { |
if (p.scriptExecutedWhenParserResumes) { |
484 |
log ('Error: There is a script that will execute as soon as the parser resumes.'); |
log ('Error: There is a script that will execute as soon as the parser resumes.'); |
485 |
} |
} |
486 |
p.scriptExecutedWhenParserResumes = e; |
p.scriptExecutedWhenParserResumes = e; |
487 |
log ('Running a script: aborted (src)'); |
log ('Running a script: aborted (src parser-inserted)'); |
488 |
} else if (e.src != null) { |
} else if (e.src != null) { |
489 |
// TODO |
p.scriptsExecutedSoon.push (e); |
490 |
|
log ('Running a script: aborted (src)'); |
491 |
} else { |
} else { |
492 |
executeScript (doc, e); // even if other scripts are already executing. |
executeScript (doc, e); // even if other scripts are already executing. |
493 |
} |
} |
535 |
var m; |
var m; |
536 |
if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) { |
if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) { |
537 |
if (m[1]) { |
if (m[1]) { |
538 |
return m[1]; |
return unescapeJSLiteral (m[1]); |
539 |
} else if (m[2]) { |
} else if (m[2]) { |
540 |
return m[2]; |
return unescapeJSLiteral (m[2]); |
541 |
} else { |
} else { |
542 |
return null; |
return null; |
543 |
} |
} |
558 |
matched = true; |
matched = true; |
559 |
var args = []; |
var args = []; |
560 |
t.replace (/('[^']*'|"[^"]*")/g, function (s, v) { |
t.replace (/('[^']*'|"[^"]*")/g, function (s, v) { |
561 |
args.push (v.substring (1, v.length - 1)); |
args.push (unescapeJSLiteral (v.substring (1, v.length - 1))); |
562 |
return ''; |
return ''; |
563 |
}); |
}); |
564 |
doc.write.apply (doc, args); |
doc.write.apply (doc, args); |
565 |
return ''; |
return ''; |
566 |
}); |
}); |
567 |
|
s = s.replace (/^\s*var\s+s\s*=\s*document\.createElement\s*\(\s*['"]script['"]\s*\)\s*;\s*s\.src\s*=\s*(?:'(javascript:[^']*)'|"(javascript:[^"]*)")\s*;\s*document\.documentElement\.appendChild\s*\(\s*s\s*\)\s*;\s*/, |
568 |
|
function (s, t, u) { |
569 |
|
matched = true; |
570 |
|
var args = [unescapeJSLiteral (t ? t : u)]; |
571 |
|
doc._insertExternalScript.apply (doc, args); |
572 |
|
return ''; |
573 |
|
}); |
574 |
if (s == '') break; |
if (s == '') break; |
575 |
if (!matched) { |
if (!matched) { |
576 |
log ('Script parse error: "' + s + '"'); |
log ('Script parse error: "' + s + '"'); |
579 |
} |
} |
580 |
} // parseAndRunScript |
} // parseAndRunScript |
581 |
|
|
582 |
|
function unescapeJSLiteral (s) { |
583 |
|
return s.replace (/\\u([0-9A-Fa-f]{4})/g, function (t, v) { |
584 |
|
return String.fromCharCode (parseInt ('0x' + v)); |
585 |
|
}); |
586 |
|
} // unescapeJSLiteral |
587 |
|
|
588 |
function JSText (data) { |
function JSText (data) { |
589 |
this.data = data; |
this.data = data; |
590 |
} // JSText |
} // JSText |
695 |
return; |
return; |
696 |
}; // document.write |
}; // document.write |
697 |
|
|
698 |
|
JSDocument.prototype._insertExternalScript = function (uri) { |
699 |
|
var s = new JSElement (this, 'script'); |
700 |
|
s.src = uri; |
701 |
|
this.documentElement.appendChild (s); |
702 |
|
}; // _insertExternalScript |
703 |
|
|
704 |
|
JSDocument.prototype.__defineGetter__ ('documentElement', function () { |
705 |
|
var cn = this.childNodes; |
706 |
|
for (var i = 0; i < cn.length; i++) { |
707 |
|
if (cn[i] instanceof JSElement) { |
708 |
|
return cn[i] |
709 |
|
} |
710 |
|
} |
711 |
|
return null; |
712 |
|
}); |
713 |
|
|
714 |
JSElement.prototype.__defineGetter__ ('text', function () { |
JSElement.prototype.__defineGetter__ ('text', function () { |
715 |
var r = ''; |
var r = ''; |
716 |
for (var i = 0; i < this.childNodes.length; i++) { |
for (var i = 0; i < this.childNodes.length; i++) { |
780 |
<p> |
<p> |
781 |
</textarea> |
</textarea> |
782 |
|
|
783 |
<h2>Log</h2> |
<h2 id=log>Log</h2> |
784 |
<p><output></output> |
<p><output></output> |
785 |
|
|
786 |
<h2>Note</h2> |
<h2 id=notes>Notes</h2> |
787 |
|
|
788 |
<p>This is a <em>simplified</em> implementation of |
<p>This is a <em>simplified</em> implementation of |
789 |
<a href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5 |
<a href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5 |
805 |
<li>Only supports <code>script</code> <code>type</code> |
<li>Only supports <code>script</code> <code>type</code> |
806 |
<code>text/javascript</code>. <code>type</code> and <code>language</code> |
<code>text/javascript</code>. <code>type</code> and <code>language</code> |
807 |
attributes are ignored. |
attributes are ignored. |
808 |
<li>Only supports <code>document.write</code>. |
<li>Only supports limited statements. It must consist of zero or more |
809 |
The script code must be match to the regular expression |
of statements looking similar to the following statements, possibly |
810 |
<code>^\s*(?:document\.write\s*\(<var>v</var>\s*(?:,\s*<var>v</var>\s*)*\)\s*;\s*)*$</code> |
introduced, followed, or separated by white space characters: |
811 |
where <var>v</var> is <code>"[^"]*"|'[^']*'</code>. |
<ul> |
812 |
|
<li><code>document.write ("<var>string</var>", ["<var>string</var>", ...]);</code>. |
813 |
|
<li><code>var s = document.createElement ("script"); |
814 |
|
s.src = "<var>string</var>"; |
815 |
|
document.documentElement.appendChild (s);</code> |
816 |
|
</ul> |
817 |
|
Note that strings may be delimited by <code>'</code>s instead of |
818 |
|
<code>"</code>s. |
819 |
<li>Only supports <code>javascript:</code> |
<li>Only supports <code>javascript:</code> |
820 |
<abbr title="Uniform Resourace Identifiers">URI</abbr> scheme in the |
<abbr title="Uniform Resourace Identifiers">URI</abbr> scheme in the |
821 |
<code>src</code> attribute of the <code>script</code> element. In addition, |
<code>src</code> attribute of the <code>script</code> element. In addition, |
822 |
the <abbr title="Uniform Resource Identifiers">URI</abbr> must be conform to |
the <abbr title="Uniform Resource Identifiers">URI</abbr> must be conform to |
823 |
the regular expression <code>^javascript:\s*(?:"[^"]*"|'[^']*')\s*$</code>. |
the regular expression <code>^javascript:\s*(?:"[^"]*"|'[^']*')\s*$</code>. |
824 |
|
<li>Only supports <code>\u<var>HHHH</var></code> escapes in JavaScript |
825 |
|
string literals. |
826 |
|
<li>Does not handle <i>stop parsing</i> phase correctly if the document is |
827 |
|
replaced by <code>document.open ()</code> call. In other word, delayed |
828 |
|
(deferred or asynchronous) script executions and event firings might be |
829 |
|
treated in a wrong way if a <code>document.open ()</code> invocation |
830 |
|
is implicitly done by <code>document.write ()</code> in a delayed script. |
831 |
</ul> |
</ul> |
832 |
|
|
833 |
<p>For some reason, this parser does not work in browsers that do |
<p>For some reason, this parser does not work in browsers that do |
834 |
not support JavaScript 1.5. |
not support JavaScript 1.5. |
835 |
|
|
836 |
|
<!-- TODO: |src| attribute value should refer the value at the time |
837 |
|
when it is inserted into the document, not the value when the script is |
838 |
|
executed. Currently it does not matter, since we don't allow dynamic |
839 |
|
modification to the |src| content/DOM attribute value yet. --> |
840 |
|
|
841 |
</body> |
</body> |
|
</html> |
|
842 |
|
</html> |
843 |
|
<!-- $Date$ --> |
844 |
|
<!-- |
845 |
|
|
846 |
|
Copyright 2008 Wakaba <w@suika.fam.cx> |
847 |
|
|
848 |
|
This program is free software; you can redistribute it and/or |
849 |
|
modify it under the terms of the GNU General Public License |
850 |
|
as published by the Free Software Foundation; either version 2 |
851 |
|
of the License, or (at your option) any later version. |
852 |
|
|
853 |
|
This program is distributed in the hope that it will be useful, |
854 |
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
855 |
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
856 |
|
GNU General Public License for more details. |
857 |
|
|
858 |
|
You should have received a copy of the GNU General Public License |
859 |
|
along with this program; if not, write to the Free Software |
860 |
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
861 |
|
|
862 |
|
--> |