Mercurial > zeropaste
comparison lib/assets/javascripts/prettify.js @ 58:33b47c2160a3
Use mainline prettify instead of stealing from bootstrap.
author | Edho Arief <edho@myconan.net> |
---|---|
date | Thu, 04 Oct 2012 06:53:04 +0700 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
57:85e9dc5df11c | 58:33b47c2160a3 |
---|---|
1 // Copyright (C) 2006 Google Inc. | |
2 // | |
3 // Licensed under the Apache License, Version 2.0 (the "License"); | |
4 // you may not use this file except in compliance with the License. | |
5 // You may obtain a copy of the License at | |
6 // | |
7 // http://www.apache.org/licenses/LICENSE-2.0 | |
8 // | |
9 // Unless required by applicable law or agreed to in writing, software | |
10 // distributed under the License is distributed on an "AS IS" BASIS, | |
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
12 // See the License for the specific language governing permissions and | |
13 // limitations under the License. | |
14 | |
15 | |
16 /** | |
17 * @fileoverview | |
18 * some functions for browser-side pretty printing of code contained in html. | |
19 * | |
20 * <p> | |
21 * For a fairly comprehensive set of languages see the | |
22 * <a href="http://google-code-prettify.googlecode.com/svn/trunk/README.html#langs">README</a> | |
23 * file that came with this source. At a minimum, the lexer should work on a | |
24 * number of languages including C and friends, Java, Python, Bash, SQL, HTML, | |
25 * XML, CSS, Javascript, and Makefiles. It works passably on Ruby, PHP and Awk | |
26 * and a subset of Perl, but, because of commenting conventions, doesn't work on | |
27 * Smalltalk, Lisp-like, or CAML-like languages without an explicit lang class. | |
28 * <p> | |
29 * Usage: <ol> | |
30 * <li> include this source file in an html page via | |
31 * {@code <script type="text/javascript" src="/path/to/prettify.js"></script>} | |
32 * <li> define style rules. See the example page for examples. | |
33 * <li> mark the {@code <pre>} and {@code <code>} tags in your source with | |
34 * {@code class=prettyprint.} | |
35 * You can also use the (html deprecated) {@code <xmp>} tag, but the pretty | |
36 * printer needs to do more substantial DOM manipulations to support that, so | |
37 * some css styles may not be preserved. | |
38 * </ol> | |
39 * That's it. I wanted to keep the API as simple as possible, so there's no | |
40 * need to specify which language the code is in, but if you wish, you can add | |
41 * another class to the {@code <pre>} or {@code <code>} element to specify the | |
42 * language, as in {@code <pre class="prettyprint lang-java">}. Any class that | |
43 * starts with "lang-" followed by a file extension, specifies the file type. | |
44 * See the "lang-*.js" files in this directory for code that implements | |
45 * per-language file handlers. | |
46 * <p> | |
47 * Change log:<br> | |
48 * cbeust, 2006/08/22 | |
49 * <blockquote> | |
50 * Java annotations (start with "@") are now captured as literals ("lit") | |
51 * </blockquote> | |
52 * @requires console | |
53 */ | |
54 | |
55 // JSLint declarations | |
56 /*global console, document, navigator, setTimeout, window */ | |
57 | |
58 /** | |
59 * Split {@code prettyPrint} into multiple timeouts so as not to interfere with | |
60 * UI events. | |
61 * If set to {@code false}, {@code prettyPrint()} is synchronous. | |
62 */ | |
63 window['PR_SHOULD_USE_CONTINUATION'] = true; | |
64 | |
65 (function () { | |
66 // Keyword lists for various languages. | |
67 // We use things that coerce to strings to make them compact when minified | |
68 // and to defeat aggressive optimizers that fold large string constants. | |
69 var FLOW_CONTROL_KEYWORDS = ["break,continue,do,else,for,if,return,while"]; | |
70 var C_KEYWORDS = [FLOW_CONTROL_KEYWORDS,"auto,case,char,const,default," + | |
71 "double,enum,extern,float,goto,int,long,register,short,signed,sizeof," + | |
72 "static,struct,switch,typedef,union,unsigned,void,volatile"]; | |
73 var COMMON_KEYWORDS = [C_KEYWORDS,"catch,class,delete,false,import," + | |
74 "new,operator,private,protected,public,this,throw,true,try,typeof"]; | |
75 var CPP_KEYWORDS = [COMMON_KEYWORDS,"alignof,align_union,asm,axiom,bool," + | |
76 "concept,concept_map,const_cast,constexpr,decltype," + | |
77 "dynamic_cast,explicit,export,friend,inline,late_check," + | |
78 "mutable,namespace,nullptr,reinterpret_cast,static_assert,static_cast," + | |
79 "template,typeid,typename,using,virtual,where"]; | |
80 var JAVA_KEYWORDS = [COMMON_KEYWORDS, | |
81 "abstract,boolean,byte,extends,final,finally,implements,import," + | |
82 "instanceof,null,native,package,strictfp,super,synchronized,throws," + | |
83 "transient"]; | |
84 var CSHARP_KEYWORDS = [JAVA_KEYWORDS, | |
85 "as,base,by,checked,decimal,delegate,descending,dynamic,event," + | |
86 "fixed,foreach,from,group,implicit,in,interface,internal,into,is,lock," + | |
87 "object,out,override,orderby,params,partial,readonly,ref,sbyte,sealed," + | |
88 "stackalloc,string,select,uint,ulong,unchecked,unsafe,ushort,var"]; | |
89 var COFFEE_KEYWORDS = "all,and,by,catch,class,else,extends,false,finally," + | |
90 "for,if,in,is,isnt,loop,new,no,not,null,of,off,on,or,return,super,then," + | |
91 "true,try,unless,until,when,while,yes"; | |
92 var JSCRIPT_KEYWORDS = [COMMON_KEYWORDS, | |
93 "debugger,eval,export,function,get,null,set,undefined,var,with," + | |
94 "Infinity,NaN"]; | |
95 var PERL_KEYWORDS = "caller,delete,die,do,dump,elsif,eval,exit,foreach,for," + | |
96 "goto,if,import,last,local,my,next,no,our,print,package,redo,require," + | |
97 "sub,undef,unless,until,use,wantarray,while,BEGIN,END"; | |
98 var PYTHON_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "and,as,assert,class,def,del," + | |
99 "elif,except,exec,finally,from,global,import,in,is,lambda," + | |
100 "nonlocal,not,or,pass,print,raise,try,with,yield," + | |
101 "False,True,None"]; | |
102 var RUBY_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "alias,and,begin,case,class," + | |
103 "def,defined,elsif,end,ensure,false,in,module,next,nil,not,or,redo," + | |
104 "rescue,retry,self,super,then,true,undef,unless,until,when,yield," + | |
105 "BEGIN,END"]; | |
106 var SH_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "case,done,elif,esac,eval,fi," + | |
107 "function,in,local,set,then,until"]; | |
108 var ALL_KEYWORDS = [ | |
109 CPP_KEYWORDS, CSHARP_KEYWORDS, JSCRIPT_KEYWORDS, PERL_KEYWORDS + | |
110 PYTHON_KEYWORDS, RUBY_KEYWORDS, SH_KEYWORDS]; | |
111 var C_TYPES = /^(DIR|FILE|vector|(de|priority_)?queue|list|stack|(const_)?iterator|(multi)?(set|map)|bitset|u?(int|float)\d*)/; | |
112 | |
113 // token style names. correspond to css classes | |
114 /** | |
115 * token style for a string literal | |
116 * @const | |
117 */ | |
118 var PR_STRING = 'str'; | |
119 /** | |
120 * token style for a keyword | |
121 * @const | |
122 */ | |
123 var PR_KEYWORD = 'kwd'; | |
124 /** | |
125 * token style for a comment | |
126 * @const | |
127 */ | |
128 var PR_COMMENT = 'com'; | |
129 /** | |
130 * token style for a type | |
131 * @const | |
132 */ | |
133 var PR_TYPE = 'typ'; | |
134 /** | |
135 * token style for a literal value. e.g. 1, null, true. | |
136 * @const | |
137 */ | |
138 var PR_LITERAL = 'lit'; | |
139 /** | |
140 * token style for a punctuation string. | |
141 * @const | |
142 */ | |
143 var PR_PUNCTUATION = 'pun'; | |
144 /** | |
145 * token style for a punctuation string. | |
146 * @const | |
147 */ | |
148 var PR_PLAIN = 'pln'; | |
149 | |
150 /** | |
151 * token style for an sgml tag. | |
152 * @const | |
153 */ | |
154 var PR_TAG = 'tag'; | |
155 /** | |
156 * token style for a markup declaration such as a DOCTYPE. | |
157 * @const | |
158 */ | |
159 var PR_DECLARATION = 'dec'; | |
160 /** | |
161 * token style for embedded source. | |
162 * @const | |
163 */ | |
164 var PR_SOURCE = 'src'; | |
165 /** | |
166 * token style for an sgml attribute name. | |
167 * @const | |
168 */ | |
169 var PR_ATTRIB_NAME = 'atn'; | |
170 /** | |
171 * token style for an sgml attribute value. | |
172 * @const | |
173 */ | |
174 var PR_ATTRIB_VALUE = 'atv'; | |
175 | |
176 /** | |
177 * A class that indicates a section of markup that is not code, e.g. to allow | |
178 * embedding of line numbers within code listings. | |
179 * @const | |
180 */ | |
181 var PR_NOCODE = 'nocode'; | |
182 | |
183 | |
184 | |
185 /** | |
186 * A set of tokens that can precede a regular expression literal in | |
187 * javascript | |
188 * http://web.archive.org/web/20070717142515/http://www.mozilla.org/js/language/js20/rationale/syntax.html | |
189 * has the full list, but I've removed ones that might be problematic when | |
190 * seen in languages that don't support regular expression literals. | |
191 * | |
192 * <p>Specifically, I've removed any keywords that can't precede a regexp | |
193 * literal in a syntactically legal javascript program, and I've removed the | |
194 * "in" keyword since it's not a keyword in many languages, and might be used | |
195 * as a count of inches. | |
196 * | |
197 * <p>The link a above does not accurately describe EcmaScript rules since | |
198 * it fails to distinguish between (a=++/b/i) and (a++/b/i) but it works | |
199 * very well in practice. | |
200 * | |
201 * @private | |
202 * @const | |
203 */ | |
204 var REGEXP_PRECEDER_PATTERN = '(?:^^\\.?|[+-]|\\!|\\!=|\\!==|\\#|\\%|\\%=|&|&&|&&=|&=|\\(|\\*|\\*=|\\+=|\\,|\\-=|\\->|\\/|\\/=|:|::|\\;|<|<<|<<=|<=|=|==|===|>|>=|>>|>>=|>>>|>>>=|\\?|\\@|\\[|\\^|\\^=|\\^\\^|\\^\\^=|\\{|\\||\\|=|\\|\\||\\|\\|=|\\~|break|case|continue|delete|do|else|finally|instanceof|return|throw|try|typeof)\\s*'; | |
205 | |
206 // CAVEAT: this does not properly handle the case where a regular | |
207 // expression immediately follows another since a regular expression may | |
208 // have flags for case-sensitivity and the like. Having regexp tokens | |
209 // adjacent is not valid in any language I'm aware of, so I'm punting. | |
210 // TODO: maybe style special characters inside a regexp as punctuation. | |
211 | |
212 | |
213 /** | |
214 * Given a group of {@link RegExp}s, returns a {@code RegExp} that globally | |
215 * matches the union of the sets of strings matched by the input RegExp. | |
216 * Since it matches globally, if the input strings have a start-of-input | |
217 * anchor (/^.../), it is ignored for the purposes of unioning. | |
218 * @param {Array.<RegExp>} regexs non multiline, non-global regexs. | |
219 * @return {RegExp} a global regex. | |
220 */ | |
221 function combinePrefixPatterns(regexs) { | |
222 var capturedGroupIndex = 0; | |
223 | |
224 var needToFoldCase = false; | |
225 var ignoreCase = false; | |
226 for (var i = 0, n = regexs.length; i < n; ++i) { | |
227 var regex = regexs[i]; | |
228 if (regex.ignoreCase) { | |
229 ignoreCase = true; | |
230 } else if (/[a-z]/i.test(regex.source.replace( | |
231 /\\u[0-9a-f]{4}|\\x[0-9a-f]{2}|\\[^ux]/gi, ''))) { | |
232 needToFoldCase = true; | |
233 ignoreCase = false; | |
234 break; | |
235 } | |
236 } | |
237 | |
238 var escapeCharToCodeUnit = { | |
239 'b': 8, | |
240 't': 9, | |
241 'n': 0xa, | |
242 'v': 0xb, | |
243 'f': 0xc, | |
244 'r': 0xd | |
245 }; | |
246 | |
247 function decodeEscape(charsetPart) { | |
248 var cc0 = charsetPart.charCodeAt(0); | |
249 if (cc0 !== 92 /* \\ */) { | |
250 return cc0; | |
251 } | |
252 var c1 = charsetPart.charAt(1); | |
253 cc0 = escapeCharToCodeUnit[c1]; | |
254 if (cc0) { | |
255 return cc0; | |
256 } else if ('0' <= c1 && c1 <= '7') { | |
257 return parseInt(charsetPart.substring(1), 8); | |
258 } else if (c1 === 'u' || c1 === 'x') { | |
259 return parseInt(charsetPart.substring(2), 16); | |
260 } else { | |
261 return charsetPart.charCodeAt(1); | |
262 } | |
263 } | |
264 | |
265 function encodeEscape(charCode) { | |
266 if (charCode < 0x20) { | |
267 return (charCode < 0x10 ? '\\x0' : '\\x') + charCode.toString(16); | |
268 } | |
269 var ch = String.fromCharCode(charCode); | |
270 if (ch === '\\' || ch === '-' || ch === '[' || ch === ']') { | |
271 ch = '\\' + ch; | |
272 } | |
273 return ch; | |
274 } | |
275 | |
276 function caseFoldCharset(charSet) { | |
277 var charsetParts = charSet.substring(1, charSet.length - 1).match( | |
278 new RegExp( | |
279 '\\\\u[0-9A-Fa-f]{4}' | |
280 + '|\\\\x[0-9A-Fa-f]{2}' | |
281 + '|\\\\[0-3][0-7]{0,2}' | |
282 + '|\\\\[0-7]{1,2}' | |
283 + '|\\\\[\\s\\S]' | |
284 + '|-' | |
285 + '|[^-\\\\]', | |
286 'g')); | |
287 var groups = []; | |
288 var ranges = []; | |
289 var inverse = charsetParts[0] === '^'; | |
290 for (var i = inverse ? 1 : 0, n = charsetParts.length; i < n; ++i) { | |
291 var p = charsetParts[i]; | |
292 if (/\\[bdsw]/i.test(p)) { // Don't muck with named groups. | |
293 groups.push(p); | |
294 } else { | |
295 var start = decodeEscape(p); | |
296 var end; | |
297 if (i + 2 < n && '-' === charsetParts[i + 1]) { | |
298 end = decodeEscape(charsetParts[i + 2]); | |
299 i += 2; | |
300 } else { | |
301 end = start; | |
302 } | |
303 ranges.push([start, end]); | |
304 // If the range might intersect letters, then expand it. | |
305 // This case handling is too simplistic. | |
306 // It does not deal with non-latin case folding. | |
307 // It works for latin source code identifiers though. | |
308 if (!(end < 65 || start > 122)) { | |
309 if (!(end < 65 || start > 90)) { | |
310 ranges.push([Math.max(65, start) | 32, Math.min(end, 90) | 32]); | |
311 } | |
312 if (!(end < 97 || start > 122)) { | |
313 ranges.push([Math.max(97, start) & ~32, Math.min(end, 122) & ~32]); | |
314 } | |
315 } | |
316 } | |
317 } | |
318 | |
319 // [[1, 10], [3, 4], [8, 12], [14, 14], [16, 16], [17, 17]] | |
320 // -> [[1, 12], [14, 14], [16, 17]] | |
321 ranges.sort(function (a, b) { return (a[0] - b[0]) || (b[1] - a[1]); }); | |
322 var consolidatedRanges = []; | |
323 var lastRange = [NaN, NaN]; | |
324 for (var i = 0; i < ranges.length; ++i) { | |
325 var range = ranges[i]; | |
326 if (range[0] <= lastRange[1] + 1) { | |
327 lastRange[1] = Math.max(lastRange[1], range[1]); | |
328 } else { | |
329 consolidatedRanges.push(lastRange = range); | |
330 } | |
331 } | |
332 | |
333 var out = ['[']; | |
334 if (inverse) { out.push('^'); } | |
335 out.push.apply(out, groups); | |
336 for (var i = 0; i < consolidatedRanges.length; ++i) { | |
337 var range = consolidatedRanges[i]; | |
338 out.push(encodeEscape(range[0])); | |
339 if (range[1] > range[0]) { | |
340 if (range[1] + 1 > range[0]) { out.push('-'); } | |
341 out.push(encodeEscape(range[1])); | |
342 } | |
343 } | |
344 out.push(']'); | |
345 return out.join(''); | |
346 } | |
347 | |
348 function allowAnywhereFoldCaseAndRenumberGroups(regex) { | |
349 // Split into character sets, escape sequences, punctuation strings | |
350 // like ('(', '(?:', ')', '^'), and runs of characters that do not | |
351 // include any of the above. | |
352 var parts = regex.source.match( | |
353 new RegExp( | |
354 '(?:' | |
355 + '\\[(?:[^\\x5C\\x5D]|\\\\[\\s\\S])*\\]' // a character set | |
356 + '|\\\\u[A-Fa-f0-9]{4}' // a unicode escape | |
357 + '|\\\\x[A-Fa-f0-9]{2}' // a hex escape | |
358 + '|\\\\[0-9]+' // a back-reference or octal escape | |
359 + '|\\\\[^ux0-9]' // other escape sequence | |
360 + '|\\(\\?[:!=]' // start of a non-capturing group | |
361 + '|[\\(\\)\\^]' // start/emd of a group, or line start | |
362 + '|[^\\x5B\\x5C\\(\\)\\^]+' // run of other characters | |
363 + ')', | |
364 'g')); | |
365 var n = parts.length; | |
366 | |
367 // Maps captured group numbers to the number they will occupy in | |
368 // the output or to -1 if that has not been determined, or to | |
369 // undefined if they need not be capturing in the output. | |
370 var capturedGroups = []; | |
371 | |
372 // Walk over and identify back references to build the capturedGroups | |
373 // mapping. | |
374 for (var i = 0, groupIndex = 0; i < n; ++i) { | |
375 var p = parts[i]; | |
376 if (p === '(') { | |
377 // groups are 1-indexed, so max group index is count of '(' | |
378 ++groupIndex; | |
379 } else if ('\\' === p.charAt(0)) { | |
380 var decimalValue = +p.substring(1); | |
381 if (decimalValue && decimalValue <= groupIndex) { | |
382 capturedGroups[decimalValue] = -1; | |
383 } | |
384 } | |
385 } | |
386 | |
387 // Renumber groups and reduce capturing groups to non-capturing groups | |
388 // where possible. | |
389 for (var i = 1; i < capturedGroups.length; ++i) { | |
390 if (-1 === capturedGroups[i]) { | |
391 capturedGroups[i] = ++capturedGroupIndex; | |
392 } | |
393 } | |
394 for (var i = 0, groupIndex = 0; i < n; ++i) { | |
395 var p = parts[i]; | |
396 if (p === '(') { | |
397 ++groupIndex; | |
398 if (capturedGroups[groupIndex] === undefined) { | |
399 parts[i] = '(?:'; | |
400 } | |
401 } else if ('\\' === p.charAt(0)) { | |
402 var decimalValue = +p.substring(1); | |
403 if (decimalValue && decimalValue <= groupIndex) { | |
404 parts[i] = '\\' + capturedGroups[groupIndex]; | |
405 } | |
406 } | |
407 } | |
408 | |
409 // Remove any prefix anchors so that the output will match anywhere. | |
410 // ^^ really does mean an anchored match though. | |
411 for (var i = 0, groupIndex = 0; i < n; ++i) { | |
412 if ('^' === parts[i] && '^' !== parts[i + 1]) { parts[i] = ''; } | |
413 } | |
414 | |
415 // Expand letters to groups to handle mixing of case-sensitive and | |
416 // case-insensitive patterns if necessary. | |
417 if (regex.ignoreCase && needToFoldCase) { | |
418 for (var i = 0; i < n; ++i) { | |
419 var p = parts[i]; | |
420 var ch0 = p.charAt(0); | |
421 if (p.length >= 2 && ch0 === '[') { | |
422 parts[i] = caseFoldCharset(p); | |
423 } else if (ch0 !== '\\') { | |
424 // TODO: handle letters in numeric escapes. | |
425 parts[i] = p.replace( | |
426 /[a-zA-Z]/g, | |
427 function (ch) { | |
428 var cc = ch.charCodeAt(0); | |
429 return '[' + String.fromCharCode(cc & ~32, cc | 32) + ']'; | |
430 }); | |
431 } | |
432 } | |
433 } | |
434 | |
435 return parts.join(''); | |
436 } | |
437 | |
438 var rewritten = []; | |
439 for (var i = 0, n = regexs.length; i < n; ++i) { | |
440 var regex = regexs[i]; | |
441 if (regex.global || regex.multiline) { throw new Error('' + regex); } | |
442 rewritten.push( | |
443 '(?:' + allowAnywhereFoldCaseAndRenumberGroups(regex) + ')'); | |
444 } | |
445 | |
446 return new RegExp(rewritten.join('|'), ignoreCase ? 'gi' : 'g'); | |
447 } | |
448 | |
449 | |
450 /** | |
451 * Split markup into a string of source code and an array mapping ranges in | |
452 * that string to the text nodes in which they appear. | |
453 * | |
454 * <p> | |
455 * The HTML DOM structure:</p> | |
456 * <pre> | |
457 * (Element "p" | |
458 * (Element "b" | |
459 * (Text "print ")) ; #1 | |
460 * (Text "'Hello '") ; #2 | |
461 * (Element "br") ; #3 | |
462 * (Text " + 'World';")) ; #4 | |
463 * </pre> | |
464 * <p> | |
465 * corresponds to the HTML | |
466 * {@code <p><b>print </b>'Hello '<br> + 'World';</p>}.</p> | |
467 * | |
468 * <p> | |
469 * It will produce the output:</p> | |
470 * <pre> | |
471 * { | |
472 * sourceCode: "print 'Hello '\n + 'World';", | |
473 * // 1 2 | |
474 * // 012345678901234 5678901234567 | |
475 * spans: [0, #1, 6, #2, 14, #3, 15, #4] | |
476 * } | |
477 * </pre> | |
478 * <p> | |
479 * where #1 is a reference to the {@code "print "} text node above, and so | |
480 * on for the other text nodes. | |
481 * </p> | |
482 * | |
483 * <p> | |
484 * The {@code} spans array is an array of pairs. Even elements are the start | |
485 * indices of substrings, and odd elements are the text nodes (or BR elements) | |
486 * that contain the text for those substrings. | |
487 * Substrings continue until the next index or the end of the source. | |
488 * </p> | |
489 * | |
490 * @param {Node} node an HTML DOM subtree containing source-code. | |
491 * @return {Object} source code and the text nodes in which they occur. | |
492 */ | |
493 function extractSourceSpans(node) { | |
494 var nocode = /(?:^|\s)nocode(?:\s|$)/; | |
495 | |
496 var chunks = []; | |
497 var length = 0; | |
498 var spans = []; | |
499 var k = 0; | |
500 | |
501 var whitespace; | |
502 if (node.currentStyle) { | |
503 whitespace = node.currentStyle.whiteSpace; | |
504 } else if (window.getComputedStyle) { | |
505 whitespace = document.defaultView.getComputedStyle(node, null) | |
506 .getPropertyValue('white-space'); | |
507 } | |
508 var isPreformatted = whitespace && 'pre' === whitespace.substring(0, 3); | |
509 | |
510 function walk(node) { | |
511 switch (node.nodeType) { | |
512 case 1: // Element | |
513 if (nocode.test(node.className)) { return; } | |
514 for (var child = node.firstChild; child; child = child.nextSibling) { | |
515 walk(child); | |
516 } | |
517 var nodeName = node.nodeName; | |
518 if ('BR' === nodeName || 'LI' === nodeName) { | |
519 chunks[k] = '\n'; | |
520 spans[k << 1] = length++; | |
521 spans[(k++ << 1) | 1] = node; | |
522 } | |
523 break; | |
524 case 3: case 4: // Text | |
525 var text = node.nodeValue; | |
526 if (text.length) { | |
527 if (!isPreformatted) { | |
528 text = text.replace(/[ \t\r\n]+/g, ' '); | |
529 } else { | |
530 text = text.replace(/\r\n?/g, '\n'); // Normalize newlines. | |
531 } | |
532 // TODO: handle tabs here? | |
533 chunks[k] = text; | |
534 spans[k << 1] = length; | |
535 length += text.length; | |
536 spans[(k++ << 1) | 1] = node; | |
537 } | |
538 break; | |
539 } | |
540 } | |
541 | |
542 walk(node); | |
543 | |
544 return { | |
545 sourceCode: chunks.join('').replace(/\n$/, ''), | |
546 spans: spans | |
547 }; | |
548 } | |
549 | |
550 | |
551 /** | |
552 * Apply the given language handler to sourceCode and add the resulting | |
553 * decorations to out. | |
554 * @param {number} basePos the index of sourceCode within the chunk of source | |
555 * whose decorations are already present on out. | |
556 */ | |
557 function appendDecorations(basePos, sourceCode, langHandler, out) { | |
558 if (!sourceCode) { return; } | |
559 var job = { | |
560 sourceCode: sourceCode, | |
561 basePos: basePos | |
562 }; | |
563 langHandler(job); | |
564 out.push.apply(out, job.decorations); | |
565 } | |
566 | |
567 var notWs = /\S/; | |
568 | |
569 /** | |
570 * Given an element, if it contains only one child element and any text nodes | |
571 * it contains contain only space characters, return the sole child element. | |
572 * Otherwise returns undefined. | |
573 * <p> | |
574 * This is meant to return the CODE element in {@code <pre><code ...>} when | |
575 * there is a single child element that contains all the non-space textual | |
576 * content, but not to return anything where there are multiple child elements | |
577 * as in {@code <pre><code>...</code><code>...</code></pre>} or when there | |
578 * is textual content. | |
579 */ | |
580 function childContentWrapper(element) { | |
581 var wrapper = undefined; | |
582 for (var c = element.firstChild; c; c = c.nextSibling) { | |
583 var type = c.nodeType; | |
584 wrapper = (type === 1) // Element Node | |
585 ? (wrapper ? element : c) | |
586 : (type === 3) // Text Node | |
587 ? (notWs.test(c.nodeValue) ? element : wrapper) | |
588 : wrapper; | |
589 } | |
590 return wrapper === element ? undefined : wrapper; | |
591 } | |
592 | |
593 /** Given triples of [style, pattern, context] returns a lexing function, | |
594 * The lexing function interprets the patterns to find token boundaries and | |
595 * returns a decoration list of the form | |
596 * [index_0, style_0, index_1, style_1, ..., index_n, style_n] | |
597 * where index_n is an index into the sourceCode, and style_n is a style | |
598 * constant like PR_PLAIN. index_n-1 <= index_n, and style_n-1 applies to | |
599 * all characters in sourceCode[index_n-1:index_n]. | |
600 * | |
601 * The stylePatterns is a list whose elements have the form | |
602 * [style : string, pattern : RegExp, DEPRECATED, shortcut : string]. | |
603 * | |
604 * Style is a style constant like PR_PLAIN, or can be a string of the | |
605 * form 'lang-FOO', where FOO is a language extension describing the | |
606 * language of the portion of the token in $1 after pattern executes. | |
607 * E.g., if style is 'lang-lisp', and group 1 contains the text | |
608 * '(hello (world))', then that portion of the token will be passed to the | |
609 * registered lisp handler for formatting. | |
610 * The text before and after group 1 will be restyled using this decorator | |
611 * so decorators should take care that this doesn't result in infinite | |
612 * recursion. For example, the HTML lexer rule for SCRIPT elements looks | |
613 * something like ['lang-js', /<[s]cript>(.+?)<\/script>/]. This may match | |
614 * '<script>foo()<\/script>', which would cause the current decorator to | |
615 * be called with '<script>' which would not match the same rule since | |
616 * group 1 must not be empty, so it would be instead styled as PR_TAG by | |
617 * the generic tag rule. The handler registered for the 'js' extension would | |
618 * then be called with 'foo()', and finally, the current decorator would | |
619 * be called with '<\/script>' which would not match the original rule and | |
620 * so the generic tag rule would identify it as a tag. | |
621 * | |
622 * Pattern must only match prefixes, and if it matches a prefix, then that | |
623 * match is considered a token with the same style. | |
624 * | |
625 * Context is applied to the last non-whitespace, non-comment token | |
626 * recognized. | |
627 * | |
628 * Shortcut is an optional string of characters, any of which, if the first | |
629 * character, gurantee that this pattern and only this pattern matches. | |
630 * | |
631 * @param {Array} shortcutStylePatterns patterns that always start with | |
632 * a known character. Must have a shortcut string. | |
633 * @param {Array} fallthroughStylePatterns patterns that will be tried in | |
634 * order if the shortcut ones fail. May have shortcuts. | |
635 * | |
636 * @return {function (Object)} a | |
637 * function that takes source code and returns a list of decorations. | |
638 */ | |
639 function createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns) { | |
640 var shortcuts = {}; | |
641 var tokenizer; | |
642 (function () { | |
643 var allPatterns = shortcutStylePatterns.concat(fallthroughStylePatterns); | |
644 var allRegexs = []; | |
645 var regexKeys = {}; | |
646 for (var i = 0, n = allPatterns.length; i < n; ++i) { | |
647 var patternParts = allPatterns[i]; | |
648 var shortcutChars = patternParts[3]; | |
649 if (shortcutChars) { | |
650 for (var c = shortcutChars.length; --c >= 0;) { | |
651 shortcuts[shortcutChars.charAt(c)] = patternParts; | |
652 } | |
653 } | |
654 var regex = patternParts[1]; | |
655 var k = '' + regex; | |
656 if (!regexKeys.hasOwnProperty(k)) { | |
657 allRegexs.push(regex); | |
658 regexKeys[k] = null; | |
659 } | |
660 } | |
661 allRegexs.push(/[\0-\uffff]/); | |
662 tokenizer = combinePrefixPatterns(allRegexs); | |
663 })(); | |
664 | |
665 var nPatterns = fallthroughStylePatterns.length; | |
666 | |
667 /** | |
668 * Lexes job.sourceCode and produces an output array job.decorations of | |
669 * style classes preceded by the position at which they start in | |
670 * job.sourceCode in order. | |
671 * | |
672 * @param {Object} job an object like <pre>{ | |
673 * sourceCode: {string} sourceText plain text, | |
674 * basePos: {int} position of job.sourceCode in the larger chunk of | |
675 * sourceCode. | |
676 * }</pre> | |
677 */ | |
678 var decorate = function (job) { | |
679 var sourceCode = job.sourceCode, basePos = job.basePos; | |
680 /** Even entries are positions in source in ascending order. Odd enties | |
681 * are style markers (e.g., PR_COMMENT) that run from that position until | |
682 * the end. | |
683 * @type {Array.<number|string>} | |
684 */ | |
685 var decorations = [basePos, PR_PLAIN]; | |
686 var pos = 0; // index into sourceCode | |
687 var tokens = sourceCode.match(tokenizer) || []; | |
688 var styleCache = {}; | |
689 | |
690 for (var ti = 0, nTokens = tokens.length; ti < nTokens; ++ti) { | |
691 var token = tokens[ti]; | |
692 var style = styleCache[token]; | |
693 var match = void 0; | |
694 | |
695 var isEmbedded; | |
696 if (typeof style === 'string') { | |
697 isEmbedded = false; | |
698 } else { | |
699 var patternParts = shortcuts[token.charAt(0)]; | |
700 if (patternParts) { | |
701 match = token.match(patternParts[1]); | |
702 style = patternParts[0]; | |
703 } else { | |
704 for (var i = 0; i < nPatterns; ++i) { | |
705 patternParts = fallthroughStylePatterns[i]; | |
706 match = token.match(patternParts[1]); | |
707 if (match) { | |
708 style = patternParts[0]; | |
709 break; | |
710 } | |
711 } | |
712 | |
713 if (!match) { // make sure that we make progress | |
714 style = PR_PLAIN; | |
715 } | |
716 } | |
717 | |
718 isEmbedded = style.length >= 5 && 'lang-' === style.substring(0, 5); | |
719 if (isEmbedded && !(match && typeof match[1] === 'string')) { | |
720 isEmbedded = false; | |
721 style = PR_SOURCE; | |
722 } | |
723 | |
724 if (!isEmbedded) { styleCache[token] = style; } | |
725 } | |
726 | |
727 var tokenStart = pos; | |
728 pos += token.length; | |
729 | |
730 if (!isEmbedded) { | |
731 decorations.push(basePos + tokenStart, style); | |
732 } else { // Treat group 1 as an embedded block of source code. | |
733 var embeddedSource = match[1]; | |
734 var embeddedSourceStart = token.indexOf(embeddedSource); | |
735 var embeddedSourceEnd = embeddedSourceStart + embeddedSource.length; | |
736 if (match[2]) { | |
737 // If embeddedSource can be blank, then it would match at the | |
738 // beginning which would cause us to infinitely recurse on the | |
739 // entire token, so we catch the right context in match[2]. | |
740 embeddedSourceEnd = token.length - match[2].length; | |
741 embeddedSourceStart = embeddedSourceEnd - embeddedSource.length; | |
742 } | |
743 var lang = style.substring(5); | |
744 // Decorate the left of the embedded source | |
745 appendDecorations( | |
746 basePos + tokenStart, | |
747 token.substring(0, embeddedSourceStart), | |
748 decorate, decorations); | |
749 // Decorate the embedded source | |
750 appendDecorations( | |
751 basePos + tokenStart + embeddedSourceStart, | |
752 embeddedSource, | |
753 langHandlerForExtension(lang, embeddedSource), | |
754 decorations); | |
755 // Decorate the right of the embedded section | |
756 appendDecorations( | |
757 basePos + tokenStart + embeddedSourceEnd, | |
758 token.substring(embeddedSourceEnd), | |
759 decorate, decorations); | |
760 } | |
761 } | |
762 job.decorations = decorations; | |
763 }; | |
764 return decorate; | |
765 } | |
766 | |
767 /** returns a function that produces a list of decorations from source text. | |
768 * | |
769 * This code treats ", ', and ` as string delimiters, and \ as a string | |
770 * escape. It does not recognize perl's qq() style strings. | |
771 * It has no special handling for double delimiter escapes as in basic, or | |
772 * the tripled delimiters used in python, but should work on those regardless | |
773 * although in those cases a single string literal may be broken up into | |
774 * multiple adjacent string literals. | |
775 * | |
776 * It recognizes C, C++, and shell style comments. | |
777 * | |
778 * @param {Object} options a set of optional parameters. | |
779 * @return {function (Object)} a function that examines the source code | |
780 * in the input job and builds the decoration list. | |
781 */ | |
782 function sourceDecorator(options) { | |
783 var shortcutStylePatterns = [], fallthroughStylePatterns = []; | |
784 if (options['tripleQuotedStrings']) { | |
785 // '''multi-line-string''', 'single-line-string', and double-quoted | |
786 shortcutStylePatterns.push( | |
787 [PR_STRING, /^(?:\'\'\'(?:[^\'\\]|\\[\s\S]|\'{1,2}(?=[^\']))*(?:\'\'\'|$)|\"\"\"(?:[^\"\\]|\\[\s\S]|\"{1,2}(?=[^\"]))*(?:\"\"\"|$)|\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$))/, | |
788 null, '\'"']); | |
789 } else if (options['multiLineStrings']) { | |
790 // 'multi-line-string', "multi-line-string" | |
791 shortcutStylePatterns.push( | |
792 [PR_STRING, /^(?:\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$)|\`(?:[^\\\`]|\\[\s\S])*(?:\`|$))/, | |
793 null, '\'"`']); | |
794 } else { | |
795 // 'single-line-string', "single-line-string" | |
796 shortcutStylePatterns.push( | |
797 [PR_STRING, | |
798 /^(?:\'(?:[^\\\'\r\n]|\\.)*(?:\'|$)|\"(?:[^\\\"\r\n]|\\.)*(?:\"|$))/, | |
799 null, '"\'']); | |
800 } | |
801 if (options['verbatimStrings']) { | |
802 // verbatim-string-literal production from the C# grammar. See issue 93. | |
803 fallthroughStylePatterns.push( | |
804 [PR_STRING, /^@\"(?:[^\"]|\"\")*(?:\"|$)/, null]); | |
805 } | |
806 var hc = options['hashComments']; | |
807 if (hc) { | |
808 if (options['cStyleComments']) { | |
809 if (hc > 1) { // multiline hash comments | |
810 shortcutStylePatterns.push( | |
811 [PR_COMMENT, /^#(?:##(?:[^#]|#(?!##))*(?:###|$)|.*)/, null, '#']); | |
812 } else { | |
813 // Stop C preprocessor declarations at an unclosed open comment | |
814 shortcutStylePatterns.push( | |
815 [PR_COMMENT, /^#(?:(?:define|elif|else|endif|error|ifdef|include|ifndef|line|pragma|undef|warning)\b|[^\r\n]*)/, | |
816 null, '#']); | |
817 } | |
818 fallthroughStylePatterns.push( | |
819 [PR_STRING, | |
820 /^<(?:(?:(?:\.\.\/)*|\/?)(?:[\w-]+(?:\/[\w-]+)+)?[\w-]+\.h|[a-z]\w*)>/, | |
821 null]); | |
822 } else { | |
823 shortcutStylePatterns.push([PR_COMMENT, /^#[^\r\n]*/, null, '#']); | |
824 } | |
825 } | |
826 if (options['cStyleComments']) { | |
827 fallthroughStylePatterns.push([PR_COMMENT, /^\/\/[^\r\n]*/, null]); | |
828 fallthroughStylePatterns.push( | |
829 [PR_COMMENT, /^\/\*[\s\S]*?(?:\*\/|$)/, null]); | |
830 } | |
831 if (options['regexLiterals']) { | |
832 /** | |
833 * @const | |
834 */ | |
835 var REGEX_LITERAL = ( | |
836 // A regular expression literal starts with a slash that is | |
837 // not followed by * or / so that it is not confused with | |
838 // comments. | |
839 '/(?=[^/*])' | |
840 // and then contains any number of raw characters, | |
841 + '(?:[^/\\x5B\\x5C]' | |
842 // escape sequences (\x5C), | |
843 + '|\\x5C[\\s\\S]' | |
844 // or non-nesting character sets (\x5B\x5D); | |
845 + '|\\x5B(?:[^\\x5C\\x5D]|\\x5C[\\s\\S])*(?:\\x5D|$))+' | |
846 // finally closed by a /. | |
847 + '/'); | |
848 fallthroughStylePatterns.push( | |
849 ['lang-regex', | |
850 new RegExp('^' + REGEXP_PRECEDER_PATTERN + '(' + REGEX_LITERAL + ')') | |
851 ]); | |
852 } | |
853 | |
854 var types = options['types']; | |
855 if (types) { | |
856 fallthroughStylePatterns.push([PR_TYPE, types]); | |
857 } | |
858 | |
859 var keywords = ("" + options['keywords']).replace(/^ | $/g, ''); | |
860 if (keywords.length) { | |
861 fallthroughStylePatterns.push( | |
862 [PR_KEYWORD, | |
863 new RegExp('^(?:' + keywords.replace(/[\s,]+/g, '|') + ')\\b'), | |
864 null]); | |
865 } | |
866 | |
867 shortcutStylePatterns.push([PR_PLAIN, /^\s+/, null, ' \r\n\t\xA0']); | |
868 fallthroughStylePatterns.push( | |
869 // TODO(mikesamuel): recognize non-latin letters and numerals in idents | |
870 [PR_LITERAL, /^@[a-z_$][a-z_$@0-9]*/i, null], | |
871 [PR_TYPE, /^(?:[@_]?[A-Z]+[a-z][A-Za-z_$@0-9]*|\w+_t\b)/, null], | |
872 [PR_PLAIN, /^[a-z_$][a-z_$@0-9]*/i, null], | |
873 [PR_LITERAL, | |
874 new RegExp( | |
875 '^(?:' | |
876 // A hex number | |
877 + '0x[a-f0-9]+' | |
878 // or an octal or decimal number, | |
879 + '|(?:\\d(?:_\\d+)*\\d*(?:\\.\\d*)?|\\.\\d\\+)' | |
880 // possibly in scientific notation | |
881 + '(?:e[+\\-]?\\d+)?' | |
882 + ')' | |
883 // with an optional modifier like UL for unsigned long | |
884 + '[a-z]*', 'i'), | |
885 null, '0123456789'], | |
886 // Don't treat escaped quotes in bash as starting strings. See issue 144. | |
887 [PR_PLAIN, /^\\[\s\S]?/, null], | |
888 [PR_PUNCTUATION, /^.[^\s\w\.$@\'\"\`\/\#\\]*/, null]); | |
889 | |
890 return createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns); | |
891 } | |
892 | |
893 var decorateSource = sourceDecorator({ | |
894 'keywords': ALL_KEYWORDS, | |
895 'hashComments': true, | |
896 'cStyleComments': true, | |
897 'multiLineStrings': true, | |
898 'regexLiterals': true | |
899 }); | |
900 | |
901 /** | |
902 * Given a DOM subtree, wraps it in a list, and puts each line into its own | |
903 * list item. | |
904 * | |
905 * @param {Node} node modified in place. Its content is pulled into an | |
906 * HTMLOListElement, and each line is moved into a separate list item. | |
907 * This requires cloning elements, so the input might not have unique | |
908 * IDs after numbering. | |
909 */ | |
910 function numberLines(node, opt_startLineNum) { | |
911 var nocode = /(?:^|\s)nocode(?:\s|$)/; | |
912 var lineBreak = /\r\n?|\n/; | |
913 | |
914 var document = node.ownerDocument; | |
915 | |
916 var whitespace; | |
917 if (node.currentStyle) { | |
918 whitespace = node.currentStyle.whiteSpace; | |
919 } else if (window.getComputedStyle) { | |
920 whitespace = document.defaultView.getComputedStyle(node, null) | |
921 .getPropertyValue('white-space'); | |
922 } | |
923 // If it's preformatted, then we need to split lines on line breaks | |
924 // in addition to <BR>s. | |
925 var isPreformatted = whitespace && 'pre' === whitespace.substring(0, 3); | |
926 | |
927 var li = document.createElement('LI'); | |
928 while (node.firstChild) { | |
929 li.appendChild(node.firstChild); | |
930 } | |
931 // An array of lines. We split below, so this is initialized to one | |
932 // un-split line. | |
933 var listItems = [li]; | |
934 | |
935 function walk(node) { | |
936 switch (node.nodeType) { | |
937 case 1: // Element | |
938 if (nocode.test(node.className)) { break; } | |
939 if ('BR' === node.nodeName) { | |
940 breakAfter(node); | |
941 // Discard the <BR> since it is now flush against a </LI>. | |
942 if (node.parentNode) { | |
943 node.parentNode.removeChild(node); | |
944 } | |
945 } else { | |
946 for (var child = node.firstChild; child; child = child.nextSibling) { | |
947 walk(child); | |
948 } | |
949 } | |
950 break; | |
951 case 3: case 4: // Text | |
952 if (isPreformatted) { | |
953 var text = node.nodeValue; | |
954 var match = text.match(lineBreak); | |
955 if (match) { | |
956 var firstLine = text.substring(0, match.index); | |
957 node.nodeValue = firstLine; | |
958 var tail = text.substring(match.index + match[0].length); | |
959 if (tail) { | |
960 var parent = node.parentNode; | |
961 parent.insertBefore( | |
962 document.createTextNode(tail), node.nextSibling); | |
963 } | |
964 breakAfter(node); | |
965 if (!firstLine) { | |
966 // Don't leave blank text nodes in the DOM. | |
967 node.parentNode.removeChild(node); | |
968 } | |
969 } | |
970 } | |
971 break; | |
972 } | |
973 } | |
974 | |
975 // Split a line after the given node. | |
976 function breakAfter(lineEndNode) { | |
977 // If there's nothing to the right, then we can skip ending the line | |
978 // here, and move root-wards since splitting just before an end-tag | |
979 // would require us to create a bunch of empty copies. | |
980 while (!lineEndNode.nextSibling) { | |
981 lineEndNode = lineEndNode.parentNode; | |
982 if (!lineEndNode) { return; } | |
983 } | |
984 | |
985 function breakLeftOf(limit, copy) { | |
986 // Clone shallowly if this node needs to be on both sides of the break. | |
987 var rightSide = copy ? limit.cloneNode(false) : limit; | |
988 var parent = limit.parentNode; | |
989 if (parent) { | |
990 // We clone the parent chain. | |
991 // This helps us resurrect important styling elements that cross lines. | |
992 // E.g. in <i>Foo<br>Bar</i> | |
993 // should be rewritten to <li><i>Foo</i></li><li><i>Bar</i></li>. | |
994 var parentClone = breakLeftOf(parent, 1); | |
995 // Move the clone and everything to the right of the original | |
996 // onto the cloned parent. | |
997 var next = limit.nextSibling; | |
998 parentClone.appendChild(rightSide); | |
999 for (var sibling = next; sibling; sibling = next) { | |
1000 next = sibling.nextSibling; | |
1001 parentClone.appendChild(sibling); | |
1002 } | |
1003 } | |
1004 return rightSide; | |
1005 } | |
1006 | |
1007 var copiedListItem = breakLeftOf(lineEndNode.nextSibling, 0); | |
1008 | |
1009 // Walk the parent chain until we reach an unattached LI. | |
1010 for (var parent; | |
1011 // Check nodeType since IE invents document fragments. | |
1012 (parent = copiedListItem.parentNode) && parent.nodeType === 1;) { | |
1013 copiedListItem = parent; | |
1014 } | |
1015 // Put it on the list of lines for later processing. | |
1016 listItems.push(copiedListItem); | |
1017 } | |
1018 | |
1019 // Split lines while there are lines left to split. | |
1020 for (var i = 0; // Number of lines that have been split so far. | |
1021 i < listItems.length; // length updated by breakAfter calls. | |
1022 ++i) { | |
1023 walk(listItems[i]); | |
1024 } | |
1025 | |
1026 // Make sure numeric indices show correctly. | |
1027 if (opt_startLineNum === (opt_startLineNum|0)) { | |
1028 listItems[0].setAttribute('value', opt_startLineNum); | |
1029 } | |
1030 | |
1031 var ol = document.createElement('OL'); | |
1032 ol.className = 'linenums'; | |
1033 var offset = Math.max(0, ((opt_startLineNum - 1 /* zero index */)) | 0) || 0; | |
1034 for (var i = 0, n = listItems.length; i < n; ++i) { | |
1035 li = listItems[i]; | |
1036 // Stick a class on the LIs so that stylesheets can | |
1037 // color odd/even rows, or any other row pattern that | |
1038 // is co-prime with 10. | |
1039 li.className = 'L' + ((i + offset) % 10); | |
1040 if (!li.firstChild) { | |
1041 li.appendChild(document.createTextNode('\xA0')); | |
1042 } | |
1043 ol.appendChild(li); | |
1044 } | |
1045 | |
1046 node.appendChild(ol); | |
1047 } | |
1048 | |
1049 /** | |
1050 * Breaks {@code job.sourceCode} around style boundaries in | |
1051 * {@code job.decorations} and modifies {@code job.sourceNode} in place. | |
1052 * @param {Object} job like <pre>{ | |
1053 * sourceCode: {string} source as plain text, | |
1054 * spans: {Array.<number|Node>} alternating span start indices into source | |
1055 * and the text node or element (e.g. {@code <BR>}) corresponding to that | |
1056 * span. | |
1057 * decorations: {Array.<number|string} an array of style classes preceded | |
1058 * by the position at which they start in job.sourceCode in order | |
1059 * }</pre> | |
1060 * @private | |
1061 */ | |
1062 function recombineTagsAndDecorations(job) { | |
1063 var isIE = /\bMSIE\b/.test(navigator.userAgent); | |
1064 var newlineRe = /\n/g; | |
1065 | |
1066 var source = job.sourceCode; | |
1067 var sourceLength = source.length; | |
1068 // Index into source after the last code-unit recombined. | |
1069 var sourceIndex = 0; | |
1070 | |
1071 var spans = job.spans; | |
1072 var nSpans = spans.length; | |
1073 // Index into spans after the last span which ends at or before sourceIndex. | |
1074 var spanIndex = 0; | |
1075 | |
1076 var decorations = job.decorations; | |
1077 var nDecorations = decorations.length; | |
1078 // Index into decorations after the last decoration which ends at or before | |
1079 // sourceIndex. | |
1080 var decorationIndex = 0; | |
1081 | |
1082 // Remove all zero-length decorations. | |
1083 decorations[nDecorations] = sourceLength; | |
1084 var decPos, i; | |
1085 for (i = decPos = 0; i < nDecorations;) { | |
1086 if (decorations[i] !== decorations[i + 2]) { | |
1087 decorations[decPos++] = decorations[i++]; | |
1088 decorations[decPos++] = decorations[i++]; | |
1089 } else { | |
1090 i += 2; | |
1091 } | |
1092 } | |
1093 nDecorations = decPos; | |
1094 | |
1095 // Simplify decorations. | |
1096 for (i = decPos = 0; i < nDecorations;) { | |
1097 var startPos = decorations[i]; | |
1098 // Conflate all adjacent decorations that use the same style. | |
1099 var startDec = decorations[i + 1]; | |
1100 var end = i + 2; | |
1101 while (end + 2 <= nDecorations && decorations[end + 1] === startDec) { | |
1102 end += 2; | |
1103 } | |
1104 decorations[decPos++] = startPos; | |
1105 decorations[decPos++] = startDec; | |
1106 i = end; | |
1107 } | |
1108 | |
1109 nDecorations = decorations.length = decPos; | |
1110 | |
1111 var decoration = null; | |
1112 while (spanIndex < nSpans) { | |
1113 var spanStart = spans[spanIndex]; | |
1114 var spanEnd = spans[spanIndex + 2] || sourceLength; | |
1115 | |
1116 var decStart = decorations[decorationIndex]; | |
1117 var decEnd = decorations[decorationIndex + 2] || sourceLength; | |
1118 | |
1119 var end = Math.min(spanEnd, decEnd); | |
1120 | |
1121 var textNode = spans[spanIndex + 1]; | |
1122 var styledText; | |
1123 if (textNode.nodeType !== 1 // Don't muck with <BR>s or <LI>s | |
1124 // Don't introduce spans around empty text nodes. | |
1125 && (styledText = source.substring(sourceIndex, end))) { | |
1126 // This may seem bizarre, and it is. Emitting LF on IE causes the | |
1127 // code to display with spaces instead of line breaks. | |
1128 // Emitting Windows standard issue linebreaks (CRLF) causes a blank | |
1129 // space to appear at the beginning of every line but the first. | |
1130 // Emitting an old Mac OS 9 line separator makes everything spiffy. | |
1131 if (isIE) { styledText = styledText.replace(newlineRe, '\r'); } | |
1132 textNode.nodeValue = styledText; | |
1133 var document = textNode.ownerDocument; | |
1134 var span = document.createElement('SPAN'); | |
1135 span.className = decorations[decorationIndex + 1]; | |
1136 var parentNode = textNode.parentNode; | |
1137 parentNode.replaceChild(span, textNode); | |
1138 span.appendChild(textNode); | |
1139 if (sourceIndex < spanEnd) { // Split off a text node. | |
1140 spans[spanIndex + 1] = textNode | |
1141 // TODO: Possibly optimize by using '' if there's no flicker. | |
1142 = document.createTextNode(source.substring(end, spanEnd)); | |
1143 parentNode.insertBefore(textNode, span.nextSibling); | |
1144 } | |
1145 } | |
1146 | |
1147 sourceIndex = end; | |
1148 | |
1149 if (sourceIndex >= spanEnd) { | |
1150 spanIndex += 2; | |
1151 } | |
1152 if (sourceIndex >= decEnd) { | |
1153 decorationIndex += 2; | |
1154 } | |
1155 } | |
1156 } | |
1157 | |
1158 | |
1159 /** Maps language-specific file extensions to handlers. */ | |
1160 var langHandlerRegistry = {}; | |
1161 /** Register a language handler for the given file extensions. | |
1162 * @param {function (Object)} handler a function from source code to a list | |
1163 * of decorations. Takes a single argument job which describes the | |
1164 * state of the computation. The single parameter has the form | |
1165 * {@code { | |
1166 * sourceCode: {string} as plain text. | |
1167 * decorations: {Array.<number|string>} an array of style classes | |
1168 * preceded by the position at which they start in | |
1169 * job.sourceCode in order. | |
1170 * The language handler should assigned this field. | |
1171 * basePos: {int} the position of source in the larger source chunk. | |
1172 * All positions in the output decorations array are relative | |
1173 * to the larger source chunk. | |
1174 * } } | |
1175 * @param {Array.<string>} fileExtensions | |
1176 */ | |
1177 function registerLangHandler(handler, fileExtensions) { | |
1178 for (var i = fileExtensions.length; --i >= 0;) { | |
1179 var ext = fileExtensions[i]; | |
1180 if (!langHandlerRegistry.hasOwnProperty(ext)) { | |
1181 langHandlerRegistry[ext] = handler; | |
1182 } else if (window['console']) { | |
1183 console['warn']('cannot override language handler %s', ext); | |
1184 } | |
1185 } | |
1186 } | |
1187 function langHandlerForExtension(extension, source) { | |
1188 if (!(extension && langHandlerRegistry.hasOwnProperty(extension))) { | |
1189 // Treat it as markup if the first non whitespace character is a < and | |
1190 // the last non-whitespace character is a >. | |
1191 extension = /^\s*</.test(source) | |
1192 ? 'default-markup' | |
1193 : 'default-code'; | |
1194 } | |
1195 return langHandlerRegistry[extension]; | |
1196 } | |
1197 registerLangHandler(decorateSource, ['default-code']); | |
1198 registerLangHandler( | |
1199 createSimpleLexer( | |
1200 [], | |
1201 [ | |
1202 [PR_PLAIN, /^[^<?]+/], | |
1203 [PR_DECLARATION, /^<!\w[^>]*(?:>|$)/], | |
1204 [PR_COMMENT, /^<\!--[\s\S]*?(?:-\->|$)/], | |
1205 // Unescaped content in an unknown language | |
1206 ['lang-', /^<\?([\s\S]+?)(?:\?>|$)/], | |
1207 ['lang-', /^<%([\s\S]+?)(?:%>|$)/], | |
1208 [PR_PUNCTUATION, /^(?:<[%?]|[%?]>)/], | |
1209 ['lang-', /^<xmp\b[^>]*>([\s\S]+?)<\/xmp\b[^>]*>/i], | |
1210 // Unescaped content in javascript. (Or possibly vbscript). | |
1211 ['lang-js', /^<script\b[^>]*>([\s\S]*?)(<\/script\b[^>]*>)/i], | |
1212 // Contains unescaped stylesheet content | |
1213 ['lang-css', /^<style\b[^>]*>([\s\S]*?)(<\/style\b[^>]*>)/i], | |
1214 ['lang-in.tag', /^(<\/?[a-z][^<>]*>)/i] | |
1215 ]), | |
1216 ['default-markup', 'htm', 'html', 'mxml', 'xhtml', 'xml', 'xsl']); | |
1217 registerLangHandler( | |
1218 createSimpleLexer( | |
1219 [ | |
1220 [PR_PLAIN, /^[\s]+/, null, ' \t\r\n'], | |
1221 [PR_ATTRIB_VALUE, /^(?:\"[^\"]*\"?|\'[^\']*\'?)/, null, '\"\''] | |
1222 ], | |
1223 [ | |
1224 [PR_TAG, /^^<\/?[a-z](?:[\w.:-]*\w)?|\/?>$/i], | |
1225 [PR_ATTRIB_NAME, /^(?!style[\s=]|on)[a-z](?:[\w:-]*\w)?/i], | |
1226 ['lang-uq.val', /^=\s*([^>\'\"\s]*(?:[^>\'\"\s\/]|\/(?=\s)))/], | |
1227 [PR_PUNCTUATION, /^[=<>\/]+/], | |
1228 ['lang-js', /^on\w+\s*=\s*\"([^\"]+)\"/i], | |
1229 ['lang-js', /^on\w+\s*=\s*\'([^\']+)\'/i], | |
1230 ['lang-js', /^on\w+\s*=\s*([^\"\'>\s]+)/i], | |
1231 ['lang-css', /^style\s*=\s*\"([^\"]+)\"/i], | |
1232 ['lang-css', /^style\s*=\s*\'([^\']+)\'/i], | |
1233 ['lang-css', /^style\s*=\s*([^\"\'>\s]+)/i] | |
1234 ]), | |
1235 ['in.tag']); | |
1236 registerLangHandler( | |
1237 createSimpleLexer([], [[PR_ATTRIB_VALUE, /^[\s\S]+/]]), ['uq.val']); | |
1238 registerLangHandler(sourceDecorator({ | |
1239 'keywords': CPP_KEYWORDS, | |
1240 'hashComments': true, | |
1241 'cStyleComments': true, | |
1242 'types': C_TYPES | |
1243 }), ['c', 'cc', 'cpp', 'cxx', 'cyc', 'm']); | |
1244 registerLangHandler(sourceDecorator({ | |
1245 'keywords': 'null,true,false' | |
1246 }), ['json']); | |
1247 registerLangHandler(sourceDecorator({ | |
1248 'keywords': CSHARP_KEYWORDS, | |
1249 'hashComments': true, | |
1250 'cStyleComments': true, | |
1251 'verbatimStrings': true, | |
1252 'types': C_TYPES | |
1253 }), ['cs']); | |
1254 registerLangHandler(sourceDecorator({ | |
1255 'keywords': JAVA_KEYWORDS, | |
1256 'cStyleComments': true | |
1257 }), ['java']); | |
1258 registerLangHandler(sourceDecorator({ | |
1259 'keywords': SH_KEYWORDS, | |
1260 'hashComments': true, | |
1261 'multiLineStrings': true | |
1262 }), ['bsh', 'csh', 'sh']); | |
1263 registerLangHandler(sourceDecorator({ | |
1264 'keywords': PYTHON_KEYWORDS, | |
1265 'hashComments': true, | |
1266 'multiLineStrings': true, | |
1267 'tripleQuotedStrings': true | |
1268 }), ['cv', 'py']); | |
1269 registerLangHandler(sourceDecorator({ | |
1270 'keywords': PERL_KEYWORDS, | |
1271 'hashComments': true, | |
1272 'multiLineStrings': true, | |
1273 'regexLiterals': true | |
1274 }), ['perl', 'pl', 'pm']); | |
1275 registerLangHandler(sourceDecorator({ | |
1276 'keywords': RUBY_KEYWORDS, | |
1277 'hashComments': true, | |
1278 'multiLineStrings': true, | |
1279 'regexLiterals': true | |
1280 }), ['rb']); | |
1281 registerLangHandler(sourceDecorator({ | |
1282 'keywords': JSCRIPT_KEYWORDS, | |
1283 'cStyleComments': true, | |
1284 'regexLiterals': true | |
1285 }), ['js']); | |
1286 registerLangHandler(sourceDecorator({ | |
1287 'keywords': COFFEE_KEYWORDS, | |
1288 'hashComments': 3, // ### style block comments | |
1289 'cStyleComments': true, | |
1290 'multilineStrings': true, | |
1291 'tripleQuotedStrings': true, | |
1292 'regexLiterals': true | |
1293 }), ['coffee']); | |
1294 registerLangHandler(createSimpleLexer([], [[PR_STRING, /^[\s\S]+/]]), ['regex']); | |
1295 | |
1296 function applyDecorator(job) { | |
1297 var opt_langExtension = job.langExtension; | |
1298 | |
1299 try { | |
1300 // Extract tags, and convert the source code to plain text. | |
1301 var sourceAndSpans = extractSourceSpans(job.sourceNode); | |
1302 /** Plain text. @type {string} */ | |
1303 var source = sourceAndSpans.sourceCode; | |
1304 job.sourceCode = source; | |
1305 job.spans = sourceAndSpans.spans; | |
1306 job.basePos = 0; | |
1307 | |
1308 // Apply the appropriate language handler | |
1309 langHandlerForExtension(opt_langExtension, source)(job); | |
1310 | |
1311 // Integrate the decorations and tags back into the source code, | |
1312 // modifying the sourceNode in place. | |
1313 recombineTagsAndDecorations(job); | |
1314 } catch (e) { | |
1315 if ('console' in window) { | |
1316 console['log'](e && e['stack'] ? e['stack'] : e); | |
1317 } | |
1318 } | |
1319 } | |
1320 | |
1321 /** | |
1322 * @param sourceCodeHtml {string} The HTML to pretty print. | |
1323 * @param opt_langExtension {string} The language name to use. | |
1324 * Typically, a filename extension like 'cpp' or 'java'. | |
1325 * @param opt_numberLines {number|boolean} True to number lines, | |
1326 * or the 1-indexed number of the first line in sourceCodeHtml. | |
1327 */ | |
1328 function prettyPrintOne(sourceCodeHtml, opt_langExtension, opt_numberLines) { | |
1329 var container = document.createElement('PRE'); | |
1330 // This could cause images to load and onload listeners to fire. | |
1331 // E.g. <img onerror="alert(1337)" src="nosuchimage.png">. | |
1332 // We assume that the inner HTML is from a trusted source. | |
1333 container.innerHTML = sourceCodeHtml; | |
1334 if (opt_numberLines) { | |
1335 numberLines(container, opt_numberLines); | |
1336 } | |
1337 | |
1338 var job = { | |
1339 langExtension: opt_langExtension, | |
1340 numberLines: opt_numberLines, | |
1341 sourceNode: container | |
1342 }; | |
1343 applyDecorator(job); | |
1344 return container.innerHTML; | |
1345 } | |
1346 | |
1347 function prettyPrint(opt_whenDone) { | |
1348 function byTagName(tn) { return document.getElementsByTagName(tn); } | |
1349 // fetch a list of nodes to rewrite | |
1350 var codeSegments = [byTagName('pre'), byTagName('code'), byTagName('xmp')]; | |
1351 var elements = []; | |
1352 for (var i = 0; i < codeSegments.length; ++i) { | |
1353 for (var j = 0, n = codeSegments[i].length; j < n; ++j) { | |
1354 elements.push(codeSegments[i][j]); | |
1355 } | |
1356 } | |
1357 codeSegments = null; | |
1358 | |
1359 var clock = Date; | |
1360 if (!clock['now']) { | |
1361 clock = { 'now': function () { return +(new Date); } }; | |
1362 } | |
1363 | |
1364 // The loop is broken into a series of continuations to make sure that we | |
1365 // don't make the browser unresponsive when rewriting a large page. | |
1366 var k = 0; | |
1367 var prettyPrintingJob; | |
1368 | |
1369 var langExtensionRe = /\blang(?:uage)?-([\w.]+)(?!\S)/; | |
1370 var prettyPrintRe = /\bprettyprint\b/; | |
1371 | |
1372 function doWork() { | |
1373 var endTime = (window['PR_SHOULD_USE_CONTINUATION'] ? | |
1374 clock['now']() + 250 /* ms */ : | |
1375 Infinity); | |
1376 for (; k < elements.length && clock['now']() < endTime; k++) { | |
1377 var cs = elements[k]; | |
1378 var className = cs.className; | |
1379 if (className.indexOf('prettyprint') >= 0) { | |
1380 // If the classes includes a language extensions, use it. | |
1381 // Language extensions can be specified like | |
1382 // <pre class="prettyprint lang-cpp"> | |
1383 // the language extension "cpp" is used to find a language handler as | |
1384 // passed to PR.registerLangHandler. | |
1385 // HTML5 recommends that a language be specified using "language-" | |
1386 // as the prefix instead. Google Code Prettify supports both. | |
1387 // http://dev.w3.org/html5/spec-author-view/the-code-element.html | |
1388 var langExtension = className.match(langExtensionRe); | |
1389 // Support <pre class="prettyprint"><code class="language-c"> | |
1390 var wrapper; | |
1391 if (!langExtension && (wrapper = childContentWrapper(cs)) | |
1392 && "CODE" === wrapper.tagName) { | |
1393 langExtension = wrapper.className.match(langExtensionRe); | |
1394 } | |
1395 | |
1396 if (langExtension) { | |
1397 langExtension = langExtension[1]; | |
1398 } | |
1399 | |
1400 // make sure this is not nested in an already prettified element | |
1401 var nested = false; | |
1402 for (var p = cs.parentNode; p; p = p.parentNode) { | |
1403 if ((p.tagName === 'pre' || p.tagName === 'code' || | |
1404 p.tagName === 'xmp') && | |
1405 p.className && p.className.indexOf('prettyprint') >= 0) { | |
1406 nested = true; | |
1407 break; | |
1408 } | |
1409 } | |
1410 if (!nested) { | |
1411 // Look for a class like linenums or linenums:<n> where <n> is the | |
1412 // 1-indexed number of the first line. | |
1413 var lineNums = cs.className.match(/\blinenums\b(?::(\d+))?/); | |
1414 lineNums = lineNums | |
1415 ? lineNums[1] && lineNums[1].length ? +lineNums[1] : true | |
1416 : false; | |
1417 if (lineNums) { numberLines(cs, lineNums); } | |
1418 | |
1419 // do the pretty printing | |
1420 prettyPrintingJob = { | |
1421 langExtension: langExtension, | |
1422 sourceNode: cs, | |
1423 numberLines: lineNums | |
1424 }; | |
1425 applyDecorator(prettyPrintingJob); | |
1426 } | |
1427 } | |
1428 } | |
1429 if (k < elements.length) { | |
1430 // finish up in a continuation | |
1431 setTimeout(doWork, 250); | |
1432 } else if (opt_whenDone) { | |
1433 opt_whenDone(); | |
1434 } | |
1435 } | |
1436 | |
1437 doWork(); | |
1438 } | |
1439 | |
1440 /** | |
1441 * Find all the {@code <pre>} and {@code <code>} tags in the DOM with | |
1442 * {@code class=prettyprint} and prettify them. | |
1443 * | |
1444 * @param {Function?} opt_whenDone if specified, called when the last entry | |
1445 * has been finished. | |
1446 */ | |
1447 window['prettyPrintOne'] = prettyPrintOne; | |
1448 /** | |
1449 * Pretty print a chunk of code. | |
1450 * | |
1451 * @param {string} sourceCodeHtml code as html | |
1452 * @return {string} code as html, but prettier | |
1453 */ | |
1454 window['prettyPrint'] = prettyPrint; | |
1455 /** | |
1456 * Contains functions for creating and registering new language handlers. | |
1457 * @type {Object} | |
1458 */ | |
1459 window['PR'] = { | |
1460 'createSimpleLexer': createSimpleLexer, | |
1461 'registerLangHandler': registerLangHandler, | |
1462 'sourceDecorator': sourceDecorator, | |
1463 'PR_ATTRIB_NAME': PR_ATTRIB_NAME, | |
1464 'PR_ATTRIB_VALUE': PR_ATTRIB_VALUE, | |
1465 'PR_COMMENT': PR_COMMENT, | |
1466 'PR_DECLARATION': PR_DECLARATION, | |
1467 'PR_KEYWORD': PR_KEYWORD, | |
1468 'PR_LITERAL': PR_LITERAL, | |
1469 'PR_NOCODE': PR_NOCODE, | |
1470 'PR_PLAIN': PR_PLAIN, | |
1471 'PR_PUNCTUATION': PR_PUNCTUATION, | |
1472 'PR_SOURCE': PR_SOURCE, | |
1473 'PR_STRING': PR_STRING, | |
1474 'PR_TAG': PR_TAG, | |
1475 'PR_TYPE': PR_TYPE | |
1476 }; | |
1477 })(); |