// Copyright (C) 2006 Google Inc.␊ |
//␊ |
// Licensed under the Apache License, Version 2.0 (the "License");␊ |
// you may not use this file except in compliance with the License.␊ |
// You may obtain a copy of the License at␊ |
//␊ |
// http://www.apache.org/licenses/LICENSE-2.0␊ |
//␊ |
// Unless required by applicable law or agreed to in writing, software␊ |
// distributed under the License is distributed on an "AS IS" BASIS,␊ |
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.␊ |
// See the License for the specific language governing permissions and␊ |
// limitations under the License.␊ |
␊ |
␊ |
/**␊ |
* @fileoverview␊ |
* some functions for browser-side pretty printing of code contained in html.␊ |
*␊ |
* The lexer should work on a number of languages including C and friends,␊ |
* Java, Python, Bash, SQL, HTML, XML, CSS, Javascript, and Makefiles.␊ |
* It works passably on Ruby, PHP and Awk and a decent subset of Perl, but,␊ |
* because of commenting conventions, doesn't work on Smalltalk, Lisp-like, or␊ |
* CAML-like languages.␊ |
*␊ |
* If there's a language not mentioned here, then I don't know it, and don't␊ |
* know whether it works. If it has a C-like, Bash-like, or XML-like syntax␊ |
* then it should work passably.␊ |
*␊ |
* Usage:␊ |
* 1) include this source file in an html page via␊ |
* <script type="text/javascript" src="/path/to/prettify.js"></script>␊ |
* 2) define style rules. See the example page for examples.␊ |
* 3) mark the <pre> and <code> tags in your source with class=prettyprint.␊ |
* You can also use the (html deprecated) <xmp> tag, but the pretty printer␊ |
* needs to do more substantial DOM manipulations to support that, so some␊ |
* css styles may not be preserved.␊ |
* That's it. I wanted to keep the API as simple as possible, so there's no␊ |
* need to specify which language the code is in.␊ |
*␊ |
* Change log:␊ |
* cbeust, 2006/08/22␊ |
* Java annotations (start with "@") are now captured as literals ("lit")␊ |
*/␊ |
␊ |
var PR_keywords = new Object();␊ |
/** initialize the keyword list for our target languages. */␊ |
(function () {␊ |
var CPP_KEYWORDS = (␊ |
"bool break case catch char class const const_cast continue default " +␊ |
"delete deprecated dllexport dllimport do double dynamic_cast else enum " +␊ |
"explicit extern false float for friend goto if inline int long mutable " +␊ |
"naked namespace new noinline noreturn nothrow novtable operator private " +␊ |
"property protected public register reinterpret_cast return selectany " +␊ |
"short signed sizeof static static_cast struct switch template this " +␊ |
"thread throw true try typedef typeid typename union unsigned using " +␊ |
"declaration, using directive uuid virtual void volatile while typeof");␊ |
var JAVA_KEYWORDS = (␊ |
"abstract default goto package synchronized boolean do if private this " +␊ |
"break double implements protected throw byte else import public throws " +␊ |
"case enum instanceof return transient catch extends int short try char " +␊ |
"final interface static void class finally long strictfp volatile const " +␊ |
"float native super while continue for new switch");␊ |
var PYTHON_KEYWORDS = (␊ |
"and assert break class continue def del elif else except exec finally " +␊ |
"for from global if import in is lambda not or pass print raise return " +␊ |
"try while yield False True None");␊ |
var JSCRIPT_KEYWORDS = (␊ |
"abstract boolean break byte case catch char class const continue " +␊ |
"debugger default delete do double else enum export extends false final " +␊ |
"finally float for function goto if implements import in instanceof int " +␊ |
"interface long native new null package private protected public return " +␊ |
"short static super switch synchronized this throw throws transient " +␊ |
"true try typeof var void volatile while with NaN Infinity");␊ |
var PERL_KEYWORDS = (␊ |
"foreach require sub unless until use elsif BEGIN END");␊ |
var SH_KEYWORDS = (␊ |
"if then do else fi end");␊ |
var RUBY_KEYWORDS = (␊ |
"if then elsif else end begin do rescue ensure while for class module " +␊ |
"def yield raise until unless and or not when case super undef break " +␊ |
"next redo retry in return alias defined");␊ |
var KEYWORDS = [CPP_KEYWORDS, JAVA_KEYWORDS, JSCRIPT_KEYWORDS, PERL_KEYWORDS,␊ |
PYTHON_KEYWORDS, RUBY_KEYWORDS, SH_KEYWORDS];␊ |
for (var k = 0; k < KEYWORDS.length; k++) {␊ |
var kw = KEYWORDS[k].split(' ');␊ |
for (var i = 0; i < kw.length; i++) {␊ |
if (kw[i]) { PR_keywords[kw[i]] = true; }␊ |
}␊ |
}␊ |
}).call(this);␊ |
␊ |
// token style names. correspond to css classes␊ |
/** token style for a string literal */␊ |
var PR_STRING = 'str';␊ |
/** token style for a keyword */␊ |
var PR_KEYWORD = 'kwd';␊ |
/** token style for a comment */␊ |
var PR_COMMENT = 'com';␊ |
/** token style for a type */␊ |
var PR_TYPE = 'typ';␊ |
/** token style for a literal value. e.g. 1, null, true. */␊ |
var PR_LITERAL = 'lit';␊ |
/** token style for a punctuation string. */␊ |
var PR_PUNCTUATION = 'pun';␊ |
/** token style for a punctuation string. */␊ |
var PR_PLAIN = 'pln';␊ |
␊ |
/** token style for an sgml tag. */␊ |
var PR_TAG = 'tag';␊ |
/** token style for a markup declaration such as a DOCTYPE. */␊ |
var PR_DECLARATION = 'dec';␊ |
/** token style for embedded source. */␊ |
var PR_SOURCE = 'src';␊ |
/** token style for an sgml attribute name. */␊ |
var PR_ATTRIB_NAME = 'atn';␊ |
/** token style for an sgml attribute value. */␊ |
var PR_ATTRIB_VALUE = 'atv';␊ |
␊ |
/** the position of the end of a token during. A division of a string into␊ |
* n tokens can be represented as a series n - 1 token ends, as long as␊ |
* runs of whitespace warrant their own token.␊ |
* @private␊ |
*/␊ |
function PR_TokenEnd(end, style) {␊ |
if (undefined === style) { throw new Error('BAD'); }␊ |
if ('number' != typeof(end)) { throw new Error('BAD'); }␊ |
this.end = end;␊ |
this.style = style;␊ |
}␊ |
PR_TokenEnd.prototype.toString = function () {␊ |
return '[PR_TokenEnd ' + this.end +␊ |
(this.style ? ':' + this.style : '') + ']';␊ |
};␊ |
␊ |
␊ |
/** a chunk of text with a style. These are used to represent both the output␊ |
* from the lexing functions as well as intermediate results.␊ |
* @constructor␊ |
* @param token the token text␊ |
* @param style one of the token styles defined in designdoc-template, or null␊ |
* for a styleless token, such as an embedded html tag.␊ |
* @private␊ |
*/␊ |
function PR_Token(token, style) {␊ |
if (undefined === style) { throw new Error('BAD'); }␊ |
this.token = token;␊ |
this.style = style;␊ |
}␊ |
␊ |
PR_Token.prototype.toString = function () {␊ |
return '[PR_Token ' + this.token + (this.style ? ':' + this.style : '') + ']';␊ |
};␊ |
␊ |
␊ |
/** a helper class that decodes common html entities used to escape source and␊ |
* markup punctuation characters in html.␊ |
* @constructor␊ |
* @private␊ |
*/␊ |
function PR_DecodeHelper() {␊ |
this.next = 0;␊ |
this.ch = '\0';␊ |
}␊ |
␊ |
PR_DecodeHelper.prototype.decode = function (s, i) {␊ |
var next = i + 1;␊ |
var ch = s.charAt(i);␊ |
if ('&' == ch) {␊ |
var semi = s.indexOf(';', next);␊ |
if (semi >= 0 && semi < next + 4) {␊ |
var entityName = s.substring(next, semi).toLowerCase();␊ |
next = semi + 1;␊ |
if ('lt' == entityName) {␊ |
ch = '<';␊ |
} else if ('gt' == entityName) {␊ |
ch = '>';␊ |
} else if ('quot' == entityName) {␊ |
ch = '"';␊ |
} else if ('apos' == entityName) {␊ |
ch = '\'';␊ |
} else if ('amp' == entityName) {␊ |
ch = '&';␊ |
} else {␊ |
next = i + 1;␊ |
}␊ |
}␊ |
}␊ |
this.next = next;␊ |
this.ch = ch;␊ |
return this.ch;␊ |
}␊ |
␊ |
␊ |
// some string utilities␊ |
function PR_isWordChar(ch) {␊ |
return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');␊ |
}␊ |
␊ |
function PR_isIdentifierStart(ch) {␊ |
return PR_isWordChar(ch) || ch == '_' || ch == '$' || ch == '@';␊ |
}␊ |
␊ |
function PR_isIdentifierPart(ch) {␊ |
return PR_isIdentifierStart(ch) || PR_isDigitChar(ch);␊ |
}␊ |
␊ |
function PR_isSpaceChar(ch) {␊ |
return "\t \r\n".indexOf(ch) >= 0;␊ |
}␊ |
␊ |
function PR_isDigitChar(ch) {␊ |
return ch >= '0' && ch <= '9';␊ |
}␊ |
␊ |
function PR_trim(s) {␊ |
var i = 0, j = s.length - 1;␊ |
while (i <= j && PR_isSpaceChar(s.charAt(i))) { ++i; }␊ |
while (j > i && PR_isSpaceChar(s.charAt(j))) { --j; }␊ |
return s.substring(i, j + 1);␊ |
}␊ |
␊ |
function PR_startsWith(s, prefix) {␊ |
return s.length >= prefix.length && prefix == s.substring(0, prefix.length);␊ |
}␊ |
␊ |
function PR_endsWith(s, suffix) {␊ |
return s.length >= suffix.length &&␊ |
suffix == s.substring(s.length - suffix.length, s.length);␊ |
}␊ |
␊ |
/** true iff prefix matches the first prefix characters in chars[0:len].␊ |
* @private␊ |
*/␊ |
function PR_prefixMatch(chars, len, prefix) {␊ |
if (len < prefix.length) { return false; }␊ |
for (var i = 0, n = prefix.length; i < n; ++i) {␊ |
if (prefix.charAt(i) != chars[i]) { return false; }␊ |
}␊ |
return true;␊ |
}␊ |
␊ |
/** used to convert html special characters embedded in XMP tags into html. */␊ |
function PR_textToHtml(str) {␊ |
return str.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>');␊ |
}␊ |
␊ |
␊ |
/** split markup into chunks of html tags (style null) and␊ |
* plain text (style {@link #PR_PLAIN}).␊ |
*␊ |
* @param s a String of html.␊ |
* @return an Array of PR_Tokens of style PR_PLAIN and null.␊ |
* @private␊ |
*/␊ |
function PR_chunkify(s) {␊ |
var chunks = new Array();␊ |
var state = 0;␊ |
var start = 0;␊ |
var pos = -1;␊ |
for (var i = 0, n = s.length; i < n; ++i) {␊ |
var ch = s.charAt(i);␊ |
switch (state) {␊ |
case 0:␊ |
if ('<' == ch) { state = 1; }␊ |
break;␊ |
case 1:␊ |
pos = i - 1;␊ |
if ('/' == ch) { state = 2; }␊ |
else if (PR_isWordChar(ch)) { state = 3; }␊ |
else if ('<' == ch) { state = 1; }␊ |
else { state = 0; }␊ |
break;␊ |
case 2:␊ |
if (PR_isWordChar(ch)) { state = 3; }␊ |
else if ('<' == ch) { state = 1; }␊ |
else { state = 0; }␊ |
break;␊ |
case 3:␊ |
if ('>' == ch) {␊ |
if (pos > start) {␊ |
chunks.push(new PR_Token(s.substring(start, pos), PR_PLAIN));␊ |
}␊ |
chunks.push(new PR_Token(s.substring(pos, i + 1), null));␊ |
start = i + 1;␊ |
pos = -1;␊ |
state = 0;␊ |
}␊ |
break;␊ |
}␊ |
}␊ |
if (s.length > start) {␊ |
chunks.push(new PR_Token(s.substring(start, s.length), PR_PLAIN));␊ |
}␊ |
return chunks;␊ |
}␊ |
␊ |
/** splits chunks around entities.␊ |
* @private␊ |
*/␊ |
function PR_splitEntities(chunks) {␊ |
var chunksOut = new Array();␊ |
var state = 0;␊ |
for (var ci = 0, nc = chunks.length; ci < nc; ++ci) {␊ |
var chunk = chunks[ci];␊ |
if (PR_PLAIN != chunk.style) {␊ |
chunksOut.push(chunk);␊ |
continue;␊ |
}␊ |
var s = chunk.token;␊ |
var pos = 0;␊ |
var start;␊ |
for (var i = 0; i < s.length; ++i) {␊ |
var ch = s.charAt(i);␊ |
switch (state) {␊ |
case 0:␊ |
if ('&' == ch) { state = 1; }␊ |
break;␊ |
case 1:␊ |
if ('#' == ch || PR_isWordChar(ch)) {␊ |
start = i - 1;␊ |
state = 2;␊ |
} else {␊ |
state = 0;␊ |
}␊ |
break;␊ |
case 2:␊ |
if (';' == ch) {␊ |
if (start > pos) {␊ |
chunksOut.push(␊ |
new PR_Token(s.substring(pos, start), chunk.style));␊ |
}␊ |
chunksOut.push(new PR_Token(s.substring(start, i + 1), null));␊ |
pos = i + 1;␊ |
state = 0;␊ |
}␊ |
break;␊ |
}␊ |
}␊ |
if (s.length > pos) {␊ |
chunksOut.push(pos ?␊ |
new PR_Token(s.substring(pos, s.length), chunk.style) :␊ |
chunk);␊ |
}␊ |
}␊ |
return chunksOut;␊ |
}␊ |
␊ |
/** walk the tokenEnds list and the chunk list in parallel to generate a list␊ |
* of split tokens.␊ |
* @private␊ |
*/␊ |
function PR_splitChunks(chunks, tokenEnds) {␊ |
var tokens = new Array(); // the output␊ |
␊ |
var ci = 0; // index into chunks␊ |
// position of beginning of amount written so far in absolute space.␊ |
var posAbs = 0;␊ |
// position of amount written so far in chunk space␊ |
var posChunk = 0;␊ |
␊ |
// current chunk␊ |
var chunk = new PR_Token('', null);␊ |
␊ |
for (var ei = 0, ne = tokenEnds.length; ei < ne; ++ei) {␊ |
var tokenEnd = tokenEnds[ei];␊ |
var end = tokenEnd.end;␊ |
␊ |
var tokLen = end - posAbs;␊ |
var remainingInChunk = chunk.token.length - posChunk;␊ |
␊ |
// FIX Nicolas LASSALLE : remainingInChunk and tokLen can both be equals so that the while is infinite !␊ |
if (remainingInChunk != tokLen) {␊ |
while (remainingInChunk <= tokLen) {␊ |
if (remainingInChunk > 0) {␊ |
tokens.push(␊ |
new PR_Token(chunk.token.substring(posChunk, chunk.token.length),␊ |
null == chunk.style ? null : tokenEnd.style));␊ |
}␊ |
posAbs += remainingInChunk;␊ |
posChunk = 0;␊ |
if (ci < chunks.length) { chunk = chunks[ci++]; }␊ |
␊ |
tokLen = end - posAbs;␊ |
remainingInChunk = chunk.token.length - posChunk;␊ |
}␊ |
}␊ |
␊ |
if (tokLen) {␊ |
tokens.push(␊ |
new PR_Token(chunk.token.substring(posChunk, posChunk + tokLen),␊ |
tokenEnd.style));␊ |
posAbs += tokLen;␊ |
posChunk += tokLen;␊ |
}␊ |
}␊ |
␊ |
return tokens;␊ |
}␊ |
␊ |
/** splits markup tokens into declarations, tags, and source chunks.␊ |
* @private␊ |
*/␊ |
function PR_splitMarkup(chunks) {␊ |
// A state machine to split out declarations, tags, etc.␊ |
// This state machine deals with absolute space in the text, indexed by k,␊ |
// and position in the current chunk, indexed by pos and tokenStart to␊ |
// generate a list of the ends of tokens.␊ |
// Absolute space is calculated by considering the chunks as appended into␊ |
// one big string, as they were before being split.␊ |
␊ |
// Known failure cases␊ |
// Server side scripting sections such as <?...?> in attributes.␊ |
// i.e. <span class="<? foo ?>">␊ |
// Handling this would require a stack, and we don't use PHP.␊ |
␊ |
// The output: a list of pairs of PR_TokenEnd instances␊ |
var tokenEnds = new Array();␊ |
␊ |
var state = 0; // FSM state variable␊ |
var k = 0; // position in absolute space of the start of the current chunk␊ |
var tokenStart = -1; // the start of the current token␊ |
␊ |
// Try to find a closing tag for any open <style> or <script> tags␊ |
// We can't do this at a later stage because then the following case␊ |
// would fail:␊ |
// <script>document.writeln('<!--');</script>␊ |
␊ |
// We use tokenChars[:tokenCharsI] to accumulate the tag name so that we␊ |
// can check whether to enter into a no scripting section when the tag ends.␊ |
var tokenChars = new Array(12);␊ |
var tokenCharsI = 0;␊ |
// if non null, the tag prefix that we need to see to break out.␊ |
var endScriptTag = null;␊ |
var decodeHelper = new PR_DecodeHelper();␊ |
␊ |
for (var ci = 0, nc = chunks.length; ci < nc; ++ci) {␊ |
var chunk = chunks[ci];␊ |
if (PR_PLAIN != chunk.style) {␊ |
k += chunk.token.length;␊ |
continue;␊ |
}␊ |
␊ |
var s = chunk.token;␊ |
var pos = 0; // the position past the last character processed so far in s␊ |
␊ |
for (var i = 0, n = s.length; i < n; /* i = next at bottom */) {␊ |
decodeHelper.decode(s, i);␊ |
var ch = decodeHelper.ch;␊ |
var next = decodeHelper.next;␊ |
␊ |
var tokenStyle = null;␊ |
switch (state) {␊ |
case 0:␊ |
if ('<' == ch) { state = 1; }␊ |
break;␊ |
case 1:␊ |
tokenCharsI = 0;␊ |
if ('/' == ch) { // only consider close tags if we're in script/style␊ |
state = 7;␊ |
} else if (null == endScriptTag) {␊ |
if ('!' == ch) {␊ |
state = 2;␊ |
} else if (PR_isWordChar(ch)) {␊ |
state = 8;␊ |
} else if ('?' == ch) {␊ |
state = 9;␊ |
} else if ('%' == ch) {␊ |
state = 11;␊ |
} else if ('<' != ch) {␊ |
state = 0;␊ |
}␊ |
} else if ('<' != ch) {␊ |
state = 0;␊ |
}␊ |
break;␊ |
case 2:␊ |
if ('-' == ch) {␊ |
state = 4;␊ |
} else if (PR_isWordChar(ch)) {␊ |
state = 3;␊ |
} else if ('<' == ch) {␊ |
state = 1;␊ |
} else {␊ |
state = 0;␊ |
}␊ |
break;␊ |
case 3:␊ |
if ('>' == ch) {␊ |
state = 0;␊ |
tokenStyle = PR_DECLARATION;␊ |
}␊ |
break;␊ |
case 4:␊ |
if ('-' == ch) { state = 5; }␊ |
break;␊ |
case 5:␊ |
if ('-' == ch) { state = 6; }␊ |
break;␊ |
case 6:␊ |
if ('>' == ch) {␊ |
state = 0;␊ |
tokenStyle = PR_COMMENT;␊ |
} else if ('-' == ch) {␊ |
state = 6;␊ |
} else {␊ |
state = 4;␊ |
}␊ |
break;␊ |
case 7:␊ |
if (PR_isWordChar(ch)) {␊ |
state = 8;␊ |
} else if ('<' == ch) {␊ |
state = 1;␊ |
} else {␊ |
state = 0;␊ |
}␊ |
break;␊ |
case 8:␊ |
if ('>' == ch) {␊ |
state = 0;␊ |
tokenStyle = PR_TAG;␊ |
}␊ |
break;␊ |
case 9:␊ |
if ('?' == ch) { state = 10; }␊ |
break;␊ |
case 10:␊ |
if ('>' == ch) {␊ |
state = 0;␊ |
tokenStyle = PR_SOURCE;␊ |
} else if ('?' != ch) {␊ |
state = 9;␊ |
}␊ |
break;␊ |
case 11:␊ |
if ('%' == ch) { state = 12; }␊ |
break;␊ |
case 12:␊ |
if ('>' == ch) {␊ |
state = 0;␊ |
tokenStyle = PR_SOURCE;␊ |
} else if ('%' != ch) {␊ |
state = 11;␊ |
}␊ |
break;␊ |
}␊ |
␊ |
if (tokenCharsI < tokenChars.length) {␊ |
tokenChars[tokenCharsI++] = ch.toLowerCase();␊ |
}␊ |
if (1 == state) { tokenStart = k + i; }␊ |
i = next;␊ |
if (tokenStyle != null) {␊ |
if (null != tokenStyle) {␊ |
if (endScriptTag) {␊ |
if (PR_prefixMatch(tokenChars, tokenCharsI, endScriptTag)) {␊ |
endScriptTag = null;␊ |
}␊ |
} else {␊ |
if (PR_prefixMatch(tokenChars, tokenCharsI, 'script')) {␊ |
endScriptTag = '/script';␊ |
} else if (PR_prefixMatch(tokenChars, tokenCharsI, 'style')) {␊ |
endScriptTag = '/style';␊ |
} else if (PR_prefixMatch(tokenChars, tokenCharsI, 'xmp')) {␊ |
endScriptTag = '/xmp';␊ |
}␊ |
}␊ |
// disallow the tag if endScriptTag is set and this was not an open␊ |
// tag.␊ |
if (endScriptTag && tokenCharsI && '/' == tokenChars[0]) {␊ |
tokenStyle = null;␊ |
}␊ |
}␊ |
if (null != tokenStyle) {␊ |
tokenEnds.push(new PR_TokenEnd(tokenStart, PR_PLAIN));␊ |
tokenEnds.push(new PR_TokenEnd(k + next, tokenStyle));␊ |
}␊ |
}␊ |
}␊ |
k += chunk.token.length;␊ |
}␊ |
tokenEnds.push(new PR_TokenEnd(k, PR_PLAIN));␊ |
␊ |
return tokenEnds;␊ |
}␊ |
␊ |
/** splits the given string into comment, string, and "other" tokens.␊ |
* @return an array of PR_Tokens with style in␊ |
* (PR_STRING, PR_COMMENT, PR_PLAIN, null)␊ |
* The result array may contain spurious zero length tokens. Ignore them.␊ |
*␊ |
* @private␊ |
*/␊ |
function PR_splitStringAndCommentTokens(chunks) {␊ |
// a state machine to split out comments, strings, and other stuff␊ |
var tokenEnds = new Array(); // positions of ends of tokens in absolute space␊ |
var state = 0; // FSM state variable␊ |
var delim = -1; // string delimiter␊ |
var k = 0; // absolute position of beginning of current chunk␊ |
for (var ci = 0, nc = chunks.length; ci < nc; ++ci) {␊ |
var chunk = chunks[ci];␊ |
var s = chunk.token;␊ |
if (PR_PLAIN == chunk.style) {␊ |
for (var i = 0, n = s.length; i < n; ++i) {␊ |
var ch = s.charAt(i);␊ |
if (0 == state) {␊ |
if (ch == '"' || ch == '\'' || ch == '`') {␊ |
tokenEnds.push(new PR_TokenEnd(k + i, PR_PLAIN));␊ |
state = 1;␊ |
delim = ch;␊ |
} else if (ch == '/') {␊ |
state = 3;␊ |
} else if (ch == '#') {␊ |
tokenEnds.push(new PR_TokenEnd(k + i, PR_PLAIN));␊ |
state = 4;␊ |
}␊ |
} else if (1 == state) {␊ |
if (ch == delim) {␊ |
state = 0;␊ |
tokenEnds.push(new PR_TokenEnd(k + i + 1, PR_STRING));␊ |
} else if (ch == '\\') {␊ |
state = 2;␊ |
}␊ |
} else if (2 == state) {␊ |
state = 1;␊ |
} else if (3 == state) {␊ |
if (ch == '/') {␊ |
state = 4;␊ |
tokenEnds.push(new PR_TokenEnd(k + i - 1, PR_PLAIN));␊ |
} else if (ch == '*') {␊ |
state = 5;␊ |
tokenEnds.push(new PR_TokenEnd(k + i - 1, PR_PLAIN));␊ |
} else {␊ |
state = 0;␊ |
// next loop will reenter state 0 without same value of i, so␊ |
// ch will be reconsidered as start of new token.␊ |
--i;␊ |
}␊ |
} else if (4 == state) {␊ |
if (ch == '\r' || ch == '\n') {␊ |
state = 0;␊ |
tokenEnds.push(new PR_TokenEnd(k + i, PR_COMMENT));␊ |
}␊ |
} else if (5 == state) {␊ |
if (ch == '*') {␊ |
state = 6;␊ |
}␊ |
} else if (6 == state) {␊ |
if (ch == '/') {␊ |
state = 0;␊ |
tokenEnds.push(new PR_TokenEnd(k + i + 1, PR_COMMENT));␊ |
} else if (ch != '*') {␊ |
state = 5;␊ |
}␊ |
}␊ |
}␊ |
}␊ |
k += s.length;␊ |
}␊ |
tokenEnds.push(new PR_TokenEnd(k, PR_PLAIN)); // a token ends at the end␊ |
␊ |
return PR_splitChunks(chunks, tokenEnds);␊ |
}␊ |
␊ |
/** used by lexSource to split a non string, non comment token.␊ |
* @private␊ |
*/␊ |
function PR_splitNonStringNonCommentToken(s, outlist) {␊ |
var pos = 0;␊ |
var state = 0;␊ |
for (var i = 0; i <= s.length; i++) {␊ |
var ch = s.charAt(i);␊ |
// the next state.␊ |
// if set to -1 then it will cause a reentry to state 0 without consuming␊ |
// another character.␊ |
var nstate = state;␊ |
␊ |
if (i == s.length) {␊ |
// nstate will not be equal to state, so it will append the token␊ |
nstate = -2;␊ |
} else {␊ |
switch (state) {␊ |
case 0: // whitespace state␊ |
if (PR_isIdentifierStart(ch)) {␊ |
nstate = 1;␊ |
} else if (PR_isDigitChar(ch)) {␊ |
nstate = 2;␊ |
} else if (!PR_isSpaceChar(ch)) {␊ |
nstate = 3;␊ |
}␊ |
if (nstate && pos < i) {␊ |
var t = s.substring(pos, i);␊ |
outlist.push(new PR_Token(t, PR_PLAIN));␊ |
pos = i;␊ |
}␊ |
break;␊ |
case 1: // identifier state␊ |
if (!PR_isIdentifierPart(ch)) {␊ |
nstate = -1;␊ |
}␊ |
break;␊ |
case 2: // number literal state␊ |
// handle numeric literals like␊ |
// 0x7f 300UL 100_000␊ |
␊ |
// this does not treat floating point values as a single literal␊ |
// 0.1 and 3e-6␊ |
// are each split into multiple tokens␊ |
if (!(PR_isDigitChar(ch) || PR_isWordChar(ch) || ch == '_')) {␊ |
nstate = -1;␊ |
}␊ |
break;␊ |
case 3: // punctuation state␊ |
if (PR_isIdentifierStart(ch) || PR_isDigitChar(ch) ||␊ |
PR_isSpaceChar(ch)) {␊ |
nstate = -1;␊ |
}␊ |
break;␊ |
}␊ |
}␊ |
␊ |
if (nstate != state) {␊ |
if (nstate < 0) {␊ |
if (i > pos) {␊ |
var t = s.substring(pos, i);␊ |
var ch0 = t.charAt(0);␊ |
var style;␊ |
if (PR_isIdentifierStart(ch0)) {␊ |
if (PR_keywords[t]) {␊ |
style = PR_KEYWORD;␊ |
} else if (ch0 === '@') {␊ |
style = PR_LITERAL;␊ |
} else {␊ |
// Treat any word that starts with an uppercase character and␊ |
// contains at least one lowercase character as a type, or␊ |
// ends with _t.␊ |
// This works perfectly for Java, pretty well for C++, and␊ |
// passably for Python. The _t catches C structs.␊ |
var isType = false;␊ |
if (ch0 >= 'A' && ch0 <= 'Z') {␊ |
for (var j = 1; j < t.length; j++) {␊ |
var ch1 = t.charAt(j);␊ |
if (ch1 >= 'a' && ch1 <= 'z') {␊ |
isType = true;␊ |
break;␊ |
}␊ |
}␊ |
if (!isType && t.length >= 2 &&␊ |
t.substring(t.length - 2) == '_t') {␊ |
isType = true;␊ |
}␊ |
}␊ |
style = isType ? PR_TYPE : PR_PLAIN;␊ |
}␊ |
} else if (PR_isDigitChar(ch0)) {␊ |
style = PR_LITERAL;␊ |
} else if (!PR_isSpaceChar(ch0)) {␊ |
style = PR_PUNCTUATION;␊ |
} else {␊ |
style = PR_PLAIN;␊ |
}␊ |
pos = i;␊ |
outlist.push(new PR_Token(t, style));␊ |
}␊ |
␊ |
state = 0;␊ |
if (nstate == -1) {␊ |
// don't increment. This allows us to use state 0 to redispatch based␊ |
// on the current character.␊ |
i--;␊ |
continue;␊ |
}␊ |
}␊ |
state = nstate;␊ |
}␊ |
}␊ |
}␊ |
␊ |
/** split a group of chunks of markup.␊ |
* @private␊ |
*/␊ |
function PR_tokenizeMarkup(chunks) {␊ |
if (!(chunks && chunks.length)) { return chunks; }␊ |
␊ |
var tokenEnds = PR_splitMarkup(chunks);␊ |
return PR_splitChunks(chunks, tokenEnds);␊ |
}␊ |
␊ |
/** split tags attributes and their values out from the tag name, and␊ |
* recursively lex source chunks.␊ |
* @private␊ |
*/␊ |
function PR_splitTagAttributes(tokens) {␊ |
var tokensOut = new Array();␊ |
var state = 0;␊ |
var stateStyle = PR_TAG;␊ |
var delim = null; // attribute delimiter for quoted value state.␊ |
var decodeHelper = new PR_DecodeHelper();␊ |
for (var ci = 0; ci < tokens.length; ++ci) {␊ |
var tok = tokens[ci];␊ |
if (PR_TAG == tok.style) {␊ |
var s = tok.token;␊ |
var start = 0;␊ |
for (var i = 0; i < s.length; /* i = next at bottom */) {␊ |
decodeHelper.decode(s, i);␊ |
var ch = decodeHelper.ch;␊ |
var next = decodeHelper.next;␊ |
␊ |
var emitEnd = null; // null or position of end of chunk to emit.␊ |
var nextStyle = null; // null or next value of stateStyle␊ |
if (ch == '>') {␊ |
if (PR_TAG != stateStyle) {␊ |
emitEnd = i;␊ |
nextStyle = PR_TAG;␊ |
}␊ |
} else {␊ |
switch (state) {␊ |
case 0:␊ |
if ('<' == ch) { state = 1; }␊ |
break;␊ |
case 1:␊ |
if (PR_isSpaceChar(ch)) { state = 2; }␊ |
break;␊ |
case 2:␊ |
if (!PR_isSpaceChar(ch)) {␊ |
nextStyle = PR_ATTRIB_NAME;␊ |
emitEnd = i;␊ |
state = 3;␊ |
}␊ |
break;␊ |
case 3:␊ |
if ('=' == ch) {␊ |
emitEnd = i;␊ |
nextStyle = PR_TAG;␊ |
state = 5;␊ |
} else if (PR_isSpaceChar(ch)) {␊ |
emitEnd = i;␊ |
nextStyle = PR_TAG;␊ |
state = 4;␊ |
}␊ |
break;␊ |
case 4:␊ |
if ('=' == ch) {␊ |
state = 5;␊ |
} else if (!PR_isSpaceChar(ch)) {␊ |
emitEnd = i;␊ |
nextStyle = PR_ATTRIB_NAME;␊ |
state = 3;␊ |
}␊ |
break;␊ |
case 5:␊ |
if ('"' == ch || '\'' == ch) {␊ |
emitEnd = i;␊ |
nextStyle = PR_ATTRIB_VALUE;␊ |
state = 6;␊ |
delim = ch;␊ |
} else if (!PR_isSpaceChar(ch)) {␊ |
emitEnd = i;␊ |
nextStyle = PR_ATTRIB_VALUE;␊ |
state = 7;␊ |
}␊ |
break;␊ |
case 6:␊ |
if (ch == delim) {␊ |
emitEnd = next;␊ |
nextStyle = PR_TAG;␊ |
state = 2;␊ |
}␊ |
break;␊ |
case 7:␊ |
if (PR_isSpaceChar(ch)) {␊ |
emitEnd = i;␊ |
nextStyle = PR_TAG;␊ |
state = 2;␊ |
}␊ |
break;␊ |
}␊ |
}␊ |
if (emitEnd) {␊ |
if (emitEnd > start) {␊ |
tokensOut.push(␊ |
new PR_Token(s.substring(start, emitEnd), stateStyle));␊ |
start = emitEnd;␊ |
}␊ |
stateStyle = nextStyle;␊ |
}␊ |
i = next;␊ |
}␊ |
if (s.length > start) {␊ |
tokensOut.push(new PR_Token(s.substring(start, s.length), stateStyle));␊ |
}␊ |
} else {␊ |
if (tok.style) {␊ |
state = 0;␊ |
stateStyle = PR_TAG;␊ |
}␊ |
tokensOut.push(tok);␊ |
}␊ |
}␊ |
return tokensOut;␊ |
}␊ |
␊ |
/** identify regions of markup that are really source code, and recursivley␊ |
* lex them.␊ |
* @private␊ |
*/␊ |
function PR_splitSourceNodes(tokens) {␊ |
var tokensOut = new Array();␊ |
// when we see a <script> tag, store '/' here so that we know to end the␊ |
// source processing␊ |
var endScriptTag = null;␊ |
var decodeHelper = new PR_DecodeHelper();␊ |
␊ |
var sourceChunks = null;␊ |
␊ |
for (var ci = 0, nc = tokens.length; ci < nc; ++ci) {␊ |
var tok = tokens[ci];␊ |
if (null == tok.style) {␊ |
tokens.push(tok);␊ |
continue;␊ |
}␊ |
␊ |
var s = tok.token;␊ |
␊ |
if (null == endScriptTag) {␊ |
if (PR_SOURCE == tok.style) {␊ |
// split off any starting and trailing <?, <%␊ |
if ('<' == decodeHelper.decode(s, 0)) {␊ |
decodeHelper.decode(s, decodeHelper.next);␊ |
if ('%' == decodeHelper.ch || '?' == decodeHelper.ch) {␊ |
endScriptTag = decodeHelper.ch;␊ |
tokensOut.push(new PR_Token(s.substring(0, decodeHelper.next),␊ |
PR_TAG));␊ |
s = s.substring(decodeHelper.next, s.length);␊ |
}␊ |
}␊ |
} else if (PR_TAG == tok.style) {␊ |
if ('<' == decodeHelper.decode(s, 0) &&␊ |
'/' != s.charAt(decodeHelper.next)) {␊ |
var tagContent = s.substring(decodeHelper.next).toLowerCase();␊ |
// FIXME(msamuel): this does not mirror exactly the code in␊ |
// in PR_splitMarkup that defers splitting tags inside script and␊ |
// style blocks.␊ |
if (PR_startsWith(tagContent, 'script') ||␊ |
PR_startsWith(tagContent, 'style') ||␊ |
PR_startsWith(tagContent, 'xmp')) {␊ |
endScriptTag = '/';␊ |
}␊ |
}␊ |
}␊ |
}␊ |
␊ |
if (null != endScriptTag) {␊ |
var endTok = null;␊ |
if (PR_SOURCE == tok.style) {␊ |
if (endScriptTag == '%' || endScriptTag == '?') {␊ |
var pos = s.lastIndexOf(endScriptTag);␊ |
if (pos >= 0 && '>' == decodeHelper.decode(s, pos + 1) &&␊ |
s.length == decodeHelper.next) {␊ |
endTok = new PR_Token(s.substring(pos, s.length), PR_TAG);␊ |
s = s.substring(0, pos);␊ |
}␊ |
}␊ |
if (null == sourceChunks) { sourceChunks = new Array(); }␊ |
sourceChunks.push(new PR_Token(s, PR_PLAIN));␊ |
} else if (PR_PLAIN == tok.style) {␊ |
if (null == sourceChunks) { sourceChunks = new Array(); }␊ |
sourceChunks.push(tok);␊ |
} else if (PR_TAG == tok.style) {␊ |
// if it starts with </ then it must be the end tag.␊ |
if ('<' == decodeHelper.decode(tok.token, 0) &&␊ |
tok.token.length > decodeHelper.next &&␊ |
'/' == decodeHelper.decode(tok.token, decodeHelper.next)) {␊ |
endTok = tok;␊ |
} else {␊ |
tokensOut.push(tok);␊ |
}␊ |
} else {␊ |
if (sourceChunks) {␊ |
sourceChunks.push(tok);␊ |
} else {␊ |
// push remaining tag and attribute tokens from the opening tag␊ |
tokensOut.push(tok);␊ |
}␊ |
}␊ |
if (endTok) {␊ |
if (sourceChunks) {␊ |
var sourceTokens = PR_lexSource(sourceChunks);␊ |
tokensOut.push(new PR_Token('<span class=embsrc>', null));␊ |
for (var si = 0, ns = sourceTokens.length; si < ns; ++si) {␊ |
tokensOut.push(sourceTokens[si]);␊ |
}␊ |
tokensOut.push(new PR_Token('</span>', null));␊ |
sourceChunks = null;␊ |
}␊ |
tokensOut.push(endTok);␊ |
endScriptTag = null;␊ |
}␊ |
} else {␊ |
tokensOut.push(tok);␊ |
}␊ |
}␊ |
return tokensOut;␊ |
}␊ |
␊ |
/** splits the quotes from an attribute value.␊ |
* ['"foo"'] -> ['"', 'foo', '"']␊ |
* @private␊ |
*/␊ |
function PR_splitAttributeQuotes(tokens) {␊ |
var firstPlain = null, lastPlain = null;␊ |
for (var i = 0; i < tokens.length; ++i) {␊ |
if (PR_PLAIN == tokens[i].style) {␊ |
firstPlain = i;␊ |
break;␊ |
}␊ |
}␊ |
for (var i = tokens.length; --i >= 0;) {␊ |
if (PR_PLAIN == tokens[i].style) {␊ |
lastPlain = i;␊ |
break;␊ |
}␊ |
}␊ |
if (null == firstPlain) { return tokens; }␊ |
␊ |
var decodeHelper = new PR_DecodeHelper();␊ |
var fs = tokens[firstPlain].token;␊ |
var fc = decodeHelper.decode(fs, 0);␊ |
if ('"' != fc && '\'' != fc) {␊ |
return tokens;␊ |
}␊ |
var fpos = decodeHelper.next;␊ |
␊ |
var ls = tokens[lastPlain].token;␊ |
var lpos = ls.lastIndexOf('&');␊ |
if (lpos < 0) { lpos = ls.length - 1; }␊ |
var lc = decodeHelper.decode(ls, lpos);␊ |
if (lc != fc || decodeHelper.next != ls.length) {␊ |
lc = null;␊ |
lpos = ls.length;␊ |
}␊ |
␊ |
var tokensOut = new Array();␊ |
for (var i = 0; i < firstPlain; ++i) {␊ |
tokensOut.push(tokens[i]);␊ |
}␊ |
tokensOut.push(new PR_Token(fs.substring(0, fpos), PR_ATTRIB_VALUE));␊ |
if (lastPlain == firstPlain) {␊ |
tokensOut.push(new PR_Token(fs.substring(fpos, lpos), PR_PLAIN));␊ |
} else {␊ |
tokensOut.push(new PR_Token(fs.substring(fpos, fs.length), PR_PLAIN));␊ |
for (var i = firstPlain + 1; i < lastPlain; ++i) {␊ |
tokensOut.push(tokens[i]);␊ |
}␊ |
if (lc) {␊ |
tokens.push(new PR_Token(ls.substring(0, lpos), PR_PLAIN));␊ |
} else {␊ |
tokens.push(tokens[lastPlain]);␊ |
}␊ |
}␊ |
if (lc) {␊ |
tokensOut.push(new PR_Token(ls.substring(lpos, ls.length), PR_PLAIN));␊ |
}␊ |
for (var i = lastPlain + 1; i < tokens.length; ++i) {␊ |
tokensOut.push(tokens[i]);␊ |
}␊ |
return tokensOut;␊ |
}␊ |
␊ |
/** identify attribute values that really contain source code and recursively␊ |
* lex them.␊ |
* @private␊ |
*/␊ |
function PR_splitSourceAttributes(tokens) {␊ |
var tokensOut = new Array();␊ |
␊ |
var sourceChunks = null;␊ |
var inSource = false;␊ |
var name = '';␊ |
␊ |
for (var ci = 0, nc = tokens.length; ci < nc; ++ci) {␊ |
var tok = tokens[ci];␊ |
var outList = tokensOut;␊ |
if (PR_TAG == tok.style) {␊ |
if (inSource) {␊ |
inSource = false;␊ |
name = '';␊ |
if (sourceChunks) {␊ |
tokensOut.push(new PR_Token('<span class=embsrc>', null));␊ |
var sourceTokens =␊ |
PR_lexSource(PR_splitAttributeQuotes(sourceChunks));␊ |
for (var si = 0, ns = sourceTokens.length; si < ns; ++si) {␊ |
tokensOut.push(sourceTokens[si]);␊ |
}␊ |
tokensOut.push(new PR_Token('</span>', null));␊ |
sourceChunks = null;␊ |
}␊ |
} else if (name && tok.token.indexOf('=') >= 0) {␊ |
var nameLower = name.toLowerCase();␊ |
if (PR_startsWith(nameLower, 'on') || 'style' == nameLower) {␊ |
inSource = true;␊ |
}␊ |
} else {␊ |
name = '';␊ |
}␊ |
} else if (PR_ATTRIB_NAME == tok.style) {␊ |
name += tok.token;␊ |
} else if (PR_ATTRIB_VALUE == tok.style) {␊ |
if (inSource) {␊ |
if (null == sourceChunks) { sourceChunks = new Array(); }␊ |
outList = sourceChunks;␊ |
tok = new PR_Token(tok.token, PR_PLAIN);␊ |
}␊ |
} else {␊ |
if (sourceChunks) {␊ |
outList = sourceChunks;␊ |
}␊ |
}␊ |
outList.push(tok);␊ |
}␊ |
return tokensOut;␊ |
}␊ |
␊ |
/** returns a list of PR_Token objects given chunks of source code.␊ |
*␊ |
* This code assumes that < tokens are html escaped, but " are not.␊ |
* It will do a resonable job with <, but will not recognize an "␊ |
* as starting a string.␊ |
*␊ |
* This code treats ", ', and ` as string delimiters, and \ as a string escape.␊ |
* It does not recognize double delimiter escapes, or perl's qq() style␊ |
* strings.␊ |
*␊ |
* It recognizes C, C++, and shell style comments.␊ |
*␊ |
* @param chunks PR_Tokens with style in (null, PR_PLAIN)␊ |
*/␊ |
function PR_lexSource(chunks) {␊ |
// positions of ends of tokens in order␊ |
var tokensIn = PR_splitStringAndCommentTokens(chunks);␊ |
␊ |
// split entities out of so that we know to treat them as single units.␊ |
tokensIn = PR_splitEntities(tokensIn);␊ |
␊ |
// split non comment|string tokens on whitespace and word boundaries␊ |
var tokensOut = new Array();␊ |
for (var i = 0; i < tokensIn.length; ++i) {␊ |
var tok = tokensIn[i];␊ |
var t = tok.token;␊ |
var s = tok.style;␊ |
␊ |
if (PR_PLAIN == s) {␊ |
PR_splitNonStringNonCommentToken(t, tokensOut);␊ |
continue;␊ |
}␊ |
tokensOut.push(tok);␊ |
}␊ |
␊ |
return tokensOut;␊ |
}␊ |
␊ |
/** returns a list of PR_Token objects given a string of markup.␊ |
*␊ |
* This code assumes that < tokens are html escaped, but " are not.␊ |
* It will do a resonable job with <, but will not recognize an "␊ |
* as starting a string.␊ |
*␊ |
* This code recognizes a number of constructs.␊ |
* <!-- ... --> comment␊ |
* <!\w ... > declaration␊ |
* <\w ... > tag␊ |
* </\w ... > tag␊ |
* <?...?> embedded source␊ |
* &[#\w]...; entity␊ |
*␊ |
* It does not recognizes %foo; entities.␊ |
*␊ |
* It will recurse into any <style>, <script>, and on* attributes using␊ |
* PR_lexSource.␊ |
*/␊ |
function PR_lexMarkup(chunks) {␊ |
// This function works as follows:␊ |
// 1) Start by splitting the markup into text and tag chunks␊ |
// Input: String s␊ |
// Output: List<PR_Token> where style in (PR_PLAIN, null)␊ |
// 2) Then split the text chunks further into comments, declarations,␊ |
// tags, etc.␊ |
// After each split, consider whether the token is the start of an␊ |
// embedded source section, i.e. is an open <script> tag. If it is,␊ |
// find the corresponding close token, and don't bother to lex in between.␊ |
// Input: List<String>␊ |
// Output: List<PR_Token> with style in (PR_TAG, PR_PLAIN, PR_SOURCE, null)␊ |
// 3) Finally go over each tag token and split out attribute names and values.␊ |
// Input: List<PR_Token>␊ |
// Output: List<PR_Token> where style in␊ |
// (PR_TAG, PR_PLAIN, PR_SOURCE, NAME, VALUE, null)␊ |
var tokensOut = PR_tokenizeMarkup(chunks);␊ |
tokensOut = PR_splitTagAttributes(tokensOut);␊ |
tokensOut = PR_splitSourceNodes(tokensOut);␊ |
tokensOut = PR_splitSourceAttributes(tokensOut);␊ |
return tokensOut;␊ |
}␊ |
␊ |
/** classify the string as either source or markup and lex appropriately. */␊ |
function PR_lexOne(s) {␊ |
var chunks = PR_chunkify(s);␊ |
// treat it as markup if the first non whitespace character is a < and the␊ |
// last non-whitespace character is a >␊ |
var isMarkup = false;␊ |
for (var i = 0; i < chunks.length; ++i) {␊ |
if (PR_PLAIN == chunks[i].style) {␊ |
if (PR_startsWith(PR_trim(chunks[i].token), '<')) {␊ |
for (var j = chunks.length; --j >= 0;) {␊ |
if (PR_PLAIN == chunks[j].style) {␊ |
isMarkup = PR_endsWith(PR_trim(chunks[j].token), '>');␊ |
break;␊ |
}␊ |
}␊ |
}␊ |
break;␊ |
}␊ |
}␊ |
return isMarkup ? PR_lexMarkup(chunks) : PR_lexSource(chunks);␊ |
}␊ |
␊ |
/** pretty print a chunk of code.␊ |
*␊ |
* @param s code as html␊ |
* @return code as html, but prettier␊ |
*/␊ |
function prettyPrintOne(s) {␊ |
try {␊ |
var tokens = PR_lexOne(s);␊ |
var out = '';␊ |
var lastStyle = null;␊ |
for (var i = 0; i < tokens.length; i++) {␊ |
var t = tokens[i];␊ |
if (t.style != lastStyle) {␊ |
if (lastStyle != null) {␊ |
out += '</span>';␊ |
}␊ |
if (t.style != null) {␊ |
out += '<span class=' + t.style + '>';␊ |
}␊ |
lastStyle = t.style;␊ |
}␊ |
var html = t.token;␊ |
if (null != t.style) {␊ |
// This interacts badly with the wiki which introduces paragraph tags␊ |
// int pre blocks for some strange reason.␊ |
// It's necessary for IE though which seems to lose the preformattedness␊ |
// of <pre> tags when their innerHTML is assigned.␊ |
html = html.replace(/(?:\r\n?)|\n/g, '<br>').replace(/ /g, ' ');␊ |
}␊ |
out += html;␊ |
}␊ |
if (lastStyle != null) {␊ |
out += '</span>';␊ |
}␊ |
return out;␊ |
} catch (e) {␊ |
//alert(e.stack); // DISABLE in production␊ |
return s;␊ |
}␊ |
}␊ |
␊ |
/** find all the < pre > and < code > tags in the DOM with class=prettyprint and␊ |
* prettify them.␊ |
*/␊ |
function prettyPrint() {␊ |
// fetch a list of nodes to rewrite␊ |
var codeSegments = [␊ |
document.getElementsByTagName('pre'),␊ |
document.getElementsByTagName('code'),␊ |
document.getElementsByTagName('xmp') ];␊ |
var elements = [];␊ |
for (var i = 0; i < codeSegments.length; ++i) {␊ |
for (var j = 0; j < codeSegments[i].length; ++j) {␊ |
elements.push(codeSegments[i][j]);␊ |
}␊ |
}␊ |
codeSegments = null;␊ |
␊ |
// the loop is broken into a series of continuations to make sure that we␊ |
// don't make the browser unresponsive when rewriting a large page.␊ |
var k = 0;␊ |
␊ |
function doWork() {␊ |
var endTime = new Date().getTime() + 250;␊ |
for (; k < elements.length && new Date().getTime() < endTime; k++) {␊ |
var cs = elements[k];␊ |
if (cs.className && cs.className.indexOf('prettyprint') >= 0) {␊ |
␊ |
// make sure this is not nested in an already prettified element␊ |
var nested = false;␊ |
for (var p = cs.parentNode; p != null; p = p.parentNode) {␊ |
if ((p.tagName == 'pre' || p.tagName == 'code' ||␊ |
p.tagName == 'xmp') &&␊ |
p.className && p.className.indexOf('prettyprint') >= 0) {␊ |
nested = true;␊ |
break;␊ |
}␊ |
}␊ |
if (!nested) {␊ |
// XMP tags contain unescaped entities so require special handling.␊ |
var isRawContent = 'XMP' == cs.tagName;␊ |
␊ |
// fetch the content as a snippet of properly escaped HTML␊ |
var content = cs.innerHTML;␊ |
if (isRawContent) {␊ |
content = PR_textToHtml(content);␊ |
}␊ |
␊ |
// do the pretty printing␊ |
var newContent = prettyPrintOne(content);␊ |
␊ |
// push the prettified html back into the tag.␊ |
if (!isRawContent) {␊ |
// just replace the old html with the new␊ |
cs.innerHTML = newContent;␊ |
} else {␊ |
// we need to change the tag to a <pre> since <xmp>s do not allow␊ |
// embedded tags such as the span tags used to attach styles to␊ |
// sections of source code.␊ |
var pre = document.createElement('PRE');␊ |
for (var i = 0; i < cs.attributes.length; ++i) {␊ |
var a = cs.attributes[i];␊ |
if (a.specified) {␊ |
pre.setAttribute(a.name, a.value);␊ |
}␊ |
}␊ |
pre.innerHTML = newContent;␊ |
// remove the old␊ |
cs.parentNode.replaceChild(pre, cs);␊ |
}␊ |
}␊ |
}␊ |
}␊ |
if (k < elements.length) {␊ |
// finish up in a continuation␊ |
setTimeout(doWork, 250);␊ |
}␊ |
}␊ |
␊ |
doWork();␊ |
}␊ |
␊ |