comparison lib/assets/javascripts/prettify.js @ 58:33b47c2160a3

Use mainline prettify instead of stealing from bootstrap.
author Edho Arief <edho@myconan.net>
date Thu, 04 Oct 2012 06:53:04 +0700
parents
children
comparison
equal deleted inserted replaced
57:85e9dc5df11c 58:33b47c2160a3
1 // Copyright (C) 2006 Google Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15
16 /**
17 * @fileoverview
18 * some functions for browser-side pretty printing of code contained in html.
19 *
20 * <p>
21 * For a fairly comprehensive set of languages see the
22 * <a href="http://google-code-prettify.googlecode.com/svn/trunk/README.html#langs">README</a>
23 * file that came with this source. At a minimum, the lexer should work on a
24 * number of languages including C and friends, Java, Python, Bash, SQL, HTML,
25 * XML, CSS, Javascript, and Makefiles. It works passably on Ruby, PHP and Awk
26 * and a subset of Perl, but, because of commenting conventions, doesn't work on
27 * Smalltalk, Lisp-like, or CAML-like languages without an explicit lang class.
28 * <p>
29 * Usage: <ol>
30 * <li> include this source file in an html page via
31 * {@code <script type="text/javascript" src="/path/to/prettify.js"></script>}
32 * <li> define style rules. See the example page for examples.
33 * <li> mark the {@code <pre>} and {@code <code>} tags in your source with
34 * {@code class=prettyprint.}
35 * You can also use the (html deprecated) {@code <xmp>} tag, but the pretty
36 * printer needs to do more substantial DOM manipulations to support that, so
37 * some css styles may not be preserved.
38 * </ol>
39 * That's it. I wanted to keep the API as simple as possible, so there's no
40 * need to specify which language the code is in, but if you wish, you can add
41 * another class to the {@code <pre>} or {@code <code>} element to specify the
42 * language, as in {@code <pre class="prettyprint lang-java">}. Any class that
43 * starts with "lang-" followed by a file extension, specifies the file type.
44 * See the "lang-*.js" files in this directory for code that implements
45 * per-language file handlers.
46 * <p>
47 * Change log:<br>
48 * cbeust, 2006/08/22
49 * <blockquote>
50 * Java annotations (start with "@") are now captured as literals ("lit")
51 * </blockquote>
52 * @requires console
53 */
54
55 // JSLint declarations
56 /*global console, document, navigator, setTimeout, window */
57
58 /**
59 * Split {@code prettyPrint} into multiple timeouts so as not to interfere with
60 * UI events.
61 * If set to {@code false}, {@code prettyPrint()} is synchronous.
62 */
63 window['PR_SHOULD_USE_CONTINUATION'] = true;
64
65 (function () {
66 // Keyword lists for various languages.
67 // We use things that coerce to strings to make them compact when minified
68 // and to defeat aggressive optimizers that fold large string constants.
69 var FLOW_CONTROL_KEYWORDS = ["break,continue,do,else,for,if,return,while"];
70 var C_KEYWORDS = [FLOW_CONTROL_KEYWORDS,"auto,case,char,const,default," +
71 "double,enum,extern,float,goto,int,long,register,short,signed,sizeof," +
72 "static,struct,switch,typedef,union,unsigned,void,volatile"];
73 var COMMON_KEYWORDS = [C_KEYWORDS,"catch,class,delete,false,import," +
74 "new,operator,private,protected,public,this,throw,true,try,typeof"];
75 var CPP_KEYWORDS = [COMMON_KEYWORDS,"alignof,align_union,asm,axiom,bool," +
76 "concept,concept_map,const_cast,constexpr,decltype," +
77 "dynamic_cast,explicit,export,friend,inline,late_check," +
78 "mutable,namespace,nullptr,reinterpret_cast,static_assert,static_cast," +
79 "template,typeid,typename,using,virtual,where"];
80 var JAVA_KEYWORDS = [COMMON_KEYWORDS,
81 "abstract,boolean,byte,extends,final,finally,implements,import," +
82 "instanceof,null,native,package,strictfp,super,synchronized,throws," +
83 "transient"];
84 var CSHARP_KEYWORDS = [JAVA_KEYWORDS,
85 "as,base,by,checked,decimal,delegate,descending,dynamic,event," +
86 "fixed,foreach,from,group,implicit,in,interface,internal,into,is,lock," +
87 "object,out,override,orderby,params,partial,readonly,ref,sbyte,sealed," +
88 "stackalloc,string,select,uint,ulong,unchecked,unsafe,ushort,var"];
89 var COFFEE_KEYWORDS = "all,and,by,catch,class,else,extends,false,finally," +
90 "for,if,in,is,isnt,loop,new,no,not,null,of,off,on,or,return,super,then," +
91 "true,try,unless,until,when,while,yes";
92 var JSCRIPT_KEYWORDS = [COMMON_KEYWORDS,
93 "debugger,eval,export,function,get,null,set,undefined,var,with," +
94 "Infinity,NaN"];
95 var PERL_KEYWORDS = "caller,delete,die,do,dump,elsif,eval,exit,foreach,for," +
96 "goto,if,import,last,local,my,next,no,our,print,package,redo,require," +
97 "sub,undef,unless,until,use,wantarray,while,BEGIN,END";
98 var PYTHON_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "and,as,assert,class,def,del," +
99 "elif,except,exec,finally,from,global,import,in,is,lambda," +
100 "nonlocal,not,or,pass,print,raise,try,with,yield," +
101 "False,True,None"];
102 var RUBY_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "alias,and,begin,case,class," +
103 "def,defined,elsif,end,ensure,false,in,module,next,nil,not,or,redo," +
104 "rescue,retry,self,super,then,true,undef,unless,until,when,yield," +
105 "BEGIN,END"];
106 var SH_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "case,done,elif,esac,eval,fi," +
107 "function,in,local,set,then,until"];
108 var ALL_KEYWORDS = [
109 CPP_KEYWORDS, CSHARP_KEYWORDS, JSCRIPT_KEYWORDS, PERL_KEYWORDS +
110 PYTHON_KEYWORDS, RUBY_KEYWORDS, SH_KEYWORDS];
111 var C_TYPES = /^(DIR|FILE|vector|(de|priority_)?queue|list|stack|(const_)?iterator|(multi)?(set|map)|bitset|u?(int|float)\d*)/;
112
113 // token style names. correspond to css classes
114 /**
115 * token style for a string literal
116 * @const
117 */
118 var PR_STRING = 'str';
119 /**
120 * token style for a keyword
121 * @const
122 */
123 var PR_KEYWORD = 'kwd';
124 /**
125 * token style for a comment
126 * @const
127 */
128 var PR_COMMENT = 'com';
129 /**
130 * token style for a type
131 * @const
132 */
133 var PR_TYPE = 'typ';
134 /**
135 * token style for a literal value. e.g. 1, null, true.
136 * @const
137 */
138 var PR_LITERAL = 'lit';
139 /**
140 * token style for a punctuation string.
141 * @const
142 */
143 var PR_PUNCTUATION = 'pun';
144 /**
145 * token style for a punctuation string.
146 * @const
147 */
148 var PR_PLAIN = 'pln';
149
150 /**
151 * token style for an sgml tag.
152 * @const
153 */
154 var PR_TAG = 'tag';
155 /**
156 * token style for a markup declaration such as a DOCTYPE.
157 * @const
158 */
159 var PR_DECLARATION = 'dec';
160 /**
161 * token style for embedded source.
162 * @const
163 */
164 var PR_SOURCE = 'src';
165 /**
166 * token style for an sgml attribute name.
167 * @const
168 */
169 var PR_ATTRIB_NAME = 'atn';
170 /**
171 * token style for an sgml attribute value.
172 * @const
173 */
174 var PR_ATTRIB_VALUE = 'atv';
175
176 /**
177 * A class that indicates a section of markup that is not code, e.g. to allow
178 * embedding of line numbers within code listings.
179 * @const
180 */
181 var PR_NOCODE = 'nocode';
182
183
184
185 /**
186 * A set of tokens that can precede a regular expression literal in
187 * javascript
188 * http://web.archive.org/web/20070717142515/http://www.mozilla.org/js/language/js20/rationale/syntax.html
189 * has the full list, but I've removed ones that might be problematic when
190 * seen in languages that don't support regular expression literals.
191 *
192 * <p>Specifically, I've removed any keywords that can't precede a regexp
193 * literal in a syntactically legal javascript program, and I've removed the
194 * "in" keyword since it's not a keyword in many languages, and might be used
195 * as a count of inches.
196 *
197 * <p>The link a above does not accurately describe EcmaScript rules since
198 * it fails to distinguish between (a=++/b/i) and (a++/b/i) but it works
199 * very well in practice.
200 *
201 * @private
202 * @const
203 */
204 var REGEXP_PRECEDER_PATTERN = '(?:^^\\.?|[+-]|\\!|\\!=|\\!==|\\#|\\%|\\%=|&|&&|&&=|&=|\\(|\\*|\\*=|\\+=|\\,|\\-=|\\->|\\/|\\/=|:|::|\\;|<|<<|<<=|<=|=|==|===|>|>=|>>|>>=|>>>|>>>=|\\?|\\@|\\[|\\^|\\^=|\\^\\^|\\^\\^=|\\{|\\||\\|=|\\|\\||\\|\\|=|\\~|break|case|continue|delete|do|else|finally|instanceof|return|throw|try|typeof)\\s*';
205
206 // CAVEAT: this does not properly handle the case where a regular
207 // expression immediately follows another since a regular expression may
208 // have flags for case-sensitivity and the like. Having regexp tokens
209 // adjacent is not valid in any language I'm aware of, so I'm punting.
210 // TODO: maybe style special characters inside a regexp as punctuation.
211
212
213 /**
214 * Given a group of {@link RegExp}s, returns a {@code RegExp} that globally
215 * matches the union of the sets of strings matched by the input RegExp.
216 * Since it matches globally, if the input strings have a start-of-input
217 * anchor (/^.../), it is ignored for the purposes of unioning.
218 * @param {Array.<RegExp>} regexs non multiline, non-global regexs.
219 * @return {RegExp} a global regex.
220 */
221 function combinePrefixPatterns(regexs) {
222 var capturedGroupIndex = 0;
223
224 var needToFoldCase = false;
225 var ignoreCase = false;
226 for (var i = 0, n = regexs.length; i < n; ++i) {
227 var regex = regexs[i];
228 if (regex.ignoreCase) {
229 ignoreCase = true;
230 } else if (/[a-z]/i.test(regex.source.replace(
231 /\\u[0-9a-f]{4}|\\x[0-9a-f]{2}|\\[^ux]/gi, ''))) {
232 needToFoldCase = true;
233 ignoreCase = false;
234 break;
235 }
236 }
237
238 var escapeCharToCodeUnit = {
239 'b': 8,
240 't': 9,
241 'n': 0xa,
242 'v': 0xb,
243 'f': 0xc,
244 'r': 0xd
245 };
246
247 function decodeEscape(charsetPart) {
248 var cc0 = charsetPart.charCodeAt(0);
249 if (cc0 !== 92 /* \\ */) {
250 return cc0;
251 }
252 var c1 = charsetPart.charAt(1);
253 cc0 = escapeCharToCodeUnit[c1];
254 if (cc0) {
255 return cc0;
256 } else if ('0' <= c1 && c1 <= '7') {
257 return parseInt(charsetPart.substring(1), 8);
258 } else if (c1 === 'u' || c1 === 'x') {
259 return parseInt(charsetPart.substring(2), 16);
260 } else {
261 return charsetPart.charCodeAt(1);
262 }
263 }
264
265 function encodeEscape(charCode) {
266 if (charCode < 0x20) {
267 return (charCode < 0x10 ? '\\x0' : '\\x') + charCode.toString(16);
268 }
269 var ch = String.fromCharCode(charCode);
270 if (ch === '\\' || ch === '-' || ch === '[' || ch === ']') {
271 ch = '\\' + ch;
272 }
273 return ch;
274 }
275
276 function caseFoldCharset(charSet) {
277 var charsetParts = charSet.substring(1, charSet.length - 1).match(
278 new RegExp(
279 '\\\\u[0-9A-Fa-f]{4}'
280 + '|\\\\x[0-9A-Fa-f]{2}'
281 + '|\\\\[0-3][0-7]{0,2}'
282 + '|\\\\[0-7]{1,2}'
283 + '|\\\\[\\s\\S]'
284 + '|-'
285 + '|[^-\\\\]',
286 'g'));
287 var groups = [];
288 var ranges = [];
289 var inverse = charsetParts[0] === '^';
290 for (var i = inverse ? 1 : 0, n = charsetParts.length; i < n; ++i) {
291 var p = charsetParts[i];
292 if (/\\[bdsw]/i.test(p)) { // Don't muck with named groups.
293 groups.push(p);
294 } else {
295 var start = decodeEscape(p);
296 var end;
297 if (i + 2 < n && '-' === charsetParts[i + 1]) {
298 end = decodeEscape(charsetParts[i + 2]);
299 i += 2;
300 } else {
301 end = start;
302 }
303 ranges.push([start, end]);
304 // If the range might intersect letters, then expand it.
305 // This case handling is too simplistic.
306 // It does not deal with non-latin case folding.
307 // It works for latin source code identifiers though.
308 if (!(end < 65 || start > 122)) {
309 if (!(end < 65 || start > 90)) {
310 ranges.push([Math.max(65, start) | 32, Math.min(end, 90) | 32]);
311 }
312 if (!(end < 97 || start > 122)) {
313 ranges.push([Math.max(97, start) & ~32, Math.min(end, 122) & ~32]);
314 }
315 }
316 }
317 }
318
319 // [[1, 10], [3, 4], [8, 12], [14, 14], [16, 16], [17, 17]]
320 // -> [[1, 12], [14, 14], [16, 17]]
321 ranges.sort(function (a, b) { return (a[0] - b[0]) || (b[1] - a[1]); });
322 var consolidatedRanges = [];
323 var lastRange = [NaN, NaN];
324 for (var i = 0; i < ranges.length; ++i) {
325 var range = ranges[i];
326 if (range[0] <= lastRange[1] + 1) {
327 lastRange[1] = Math.max(lastRange[1], range[1]);
328 } else {
329 consolidatedRanges.push(lastRange = range);
330 }
331 }
332
333 var out = ['['];
334 if (inverse) { out.push('^'); }
335 out.push.apply(out, groups);
336 for (var i = 0; i < consolidatedRanges.length; ++i) {
337 var range = consolidatedRanges[i];
338 out.push(encodeEscape(range[0]));
339 if (range[1] > range[0]) {
340 if (range[1] + 1 > range[0]) { out.push('-'); }
341 out.push(encodeEscape(range[1]));
342 }
343 }
344 out.push(']');
345 return out.join('');
346 }
347
348 function allowAnywhereFoldCaseAndRenumberGroups(regex) {
349 // Split into character sets, escape sequences, punctuation strings
350 // like ('(', '(?:', ')', '^'), and runs of characters that do not
351 // include any of the above.
352 var parts = regex.source.match(
353 new RegExp(
354 '(?:'
355 + '\\[(?:[^\\x5C\\x5D]|\\\\[\\s\\S])*\\]' // a character set
356 + '|\\\\u[A-Fa-f0-9]{4}' // a unicode escape
357 + '|\\\\x[A-Fa-f0-9]{2}' // a hex escape
358 + '|\\\\[0-9]+' // a back-reference or octal escape
359 + '|\\\\[^ux0-9]' // other escape sequence
360 + '|\\(\\?[:!=]' // start of a non-capturing group
361 + '|[\\(\\)\\^]' // start/emd of a group, or line start
362 + '|[^\\x5B\\x5C\\(\\)\\^]+' // run of other characters
363 + ')',
364 'g'));
365 var n = parts.length;
366
367 // Maps captured group numbers to the number they will occupy in
368 // the output or to -1 if that has not been determined, or to
369 // undefined if they need not be capturing in the output.
370 var capturedGroups = [];
371
372 // Walk over and identify back references to build the capturedGroups
373 // mapping.
374 for (var i = 0, groupIndex = 0; i < n; ++i) {
375 var p = parts[i];
376 if (p === '(') {
377 // groups are 1-indexed, so max group index is count of '('
378 ++groupIndex;
379 } else if ('\\' === p.charAt(0)) {
380 var decimalValue = +p.substring(1);
381 if (decimalValue && decimalValue <= groupIndex) {
382 capturedGroups[decimalValue] = -1;
383 }
384 }
385 }
386
387 // Renumber groups and reduce capturing groups to non-capturing groups
388 // where possible.
389 for (var i = 1; i < capturedGroups.length; ++i) {
390 if (-1 === capturedGroups[i]) {
391 capturedGroups[i] = ++capturedGroupIndex;
392 }
393 }
394 for (var i = 0, groupIndex = 0; i < n; ++i) {
395 var p = parts[i];
396 if (p === '(') {
397 ++groupIndex;
398 if (capturedGroups[groupIndex] === undefined) {
399 parts[i] = '(?:';
400 }
401 } else if ('\\' === p.charAt(0)) {
402 var decimalValue = +p.substring(1);
403 if (decimalValue && decimalValue <= groupIndex) {
404 parts[i] = '\\' + capturedGroups[groupIndex];
405 }
406 }
407 }
408
409 // Remove any prefix anchors so that the output will match anywhere.
410 // ^^ really does mean an anchored match though.
411 for (var i = 0, groupIndex = 0; i < n; ++i) {
412 if ('^' === parts[i] && '^' !== parts[i + 1]) { parts[i] = ''; }
413 }
414
415 // Expand letters to groups to handle mixing of case-sensitive and
416 // case-insensitive patterns if necessary.
417 if (regex.ignoreCase && needToFoldCase) {
418 for (var i = 0; i < n; ++i) {
419 var p = parts[i];
420 var ch0 = p.charAt(0);
421 if (p.length >= 2 && ch0 === '[') {
422 parts[i] = caseFoldCharset(p);
423 } else if (ch0 !== '\\') {
424 // TODO: handle letters in numeric escapes.
425 parts[i] = p.replace(
426 /[a-zA-Z]/g,
427 function (ch) {
428 var cc = ch.charCodeAt(0);
429 return '[' + String.fromCharCode(cc & ~32, cc | 32) + ']';
430 });
431 }
432 }
433 }
434
435 return parts.join('');
436 }
437
438 var rewritten = [];
439 for (var i = 0, n = regexs.length; i < n; ++i) {
440 var regex = regexs[i];
441 if (regex.global || regex.multiline) { throw new Error('' + regex); }
442 rewritten.push(
443 '(?:' + allowAnywhereFoldCaseAndRenumberGroups(regex) + ')');
444 }
445
446 return new RegExp(rewritten.join('|'), ignoreCase ? 'gi' : 'g');
447 }
448
449
450 /**
451 * Split markup into a string of source code and an array mapping ranges in
452 * that string to the text nodes in which they appear.
453 *
454 * <p>
455 * The HTML DOM structure:</p>
456 * <pre>
457 * (Element "p"
458 * (Element "b"
459 * (Text "print ")) ; #1
460 * (Text "'Hello '") ; #2
461 * (Element "br") ; #3
462 * (Text " + 'World';")) ; #4
463 * </pre>
464 * <p>
465 * corresponds to the HTML
466 * {@code <p><b>print </b>'Hello '<br> + 'World';</p>}.</p>
467 *
468 * <p>
469 * It will produce the output:</p>
470 * <pre>
471 * {
472 * sourceCode: "print 'Hello '\n + 'World';",
473 * // 1 2
474 * // 012345678901234 5678901234567
475 * spans: [0, #1, 6, #2, 14, #3, 15, #4]
476 * }
477 * </pre>
478 * <p>
479 * where #1 is a reference to the {@code "print "} text node above, and so
480 * on for the other text nodes.
481 * </p>
482 *
483 * <p>
484 * The {@code} spans array is an array of pairs. Even elements are the start
485 * indices of substrings, and odd elements are the text nodes (or BR elements)
486 * that contain the text for those substrings.
487 * Substrings continue until the next index or the end of the source.
488 * </p>
489 *
490 * @param {Node} node an HTML DOM subtree containing source-code.
491 * @return {Object} source code and the text nodes in which they occur.
492 */
493 function extractSourceSpans(node) {
494 var nocode = /(?:^|\s)nocode(?:\s|$)/;
495
496 var chunks = [];
497 var length = 0;
498 var spans = [];
499 var k = 0;
500
501 var whitespace;
502 if (node.currentStyle) {
503 whitespace = node.currentStyle.whiteSpace;
504 } else if (window.getComputedStyle) {
505 whitespace = document.defaultView.getComputedStyle(node, null)
506 .getPropertyValue('white-space');
507 }
508 var isPreformatted = whitespace && 'pre' === whitespace.substring(0, 3);
509
510 function walk(node) {
511 switch (node.nodeType) {
512 case 1: // Element
513 if (nocode.test(node.className)) { return; }
514 for (var child = node.firstChild; child; child = child.nextSibling) {
515 walk(child);
516 }
517 var nodeName = node.nodeName;
518 if ('BR' === nodeName || 'LI' === nodeName) {
519 chunks[k] = '\n';
520 spans[k << 1] = length++;
521 spans[(k++ << 1) | 1] = node;
522 }
523 break;
524 case 3: case 4: // Text
525 var text = node.nodeValue;
526 if (text.length) {
527 if (!isPreformatted) {
528 text = text.replace(/[ \t\r\n]+/g, ' ');
529 } else {
530 text = text.replace(/\r\n?/g, '\n'); // Normalize newlines.
531 }
532 // TODO: handle tabs here?
533 chunks[k] = text;
534 spans[k << 1] = length;
535 length += text.length;
536 spans[(k++ << 1) | 1] = node;
537 }
538 break;
539 }
540 }
541
542 walk(node);
543
544 return {
545 sourceCode: chunks.join('').replace(/\n$/, ''),
546 spans: spans
547 };
548 }
549
550
551 /**
552 * Apply the given language handler to sourceCode and add the resulting
553 * decorations to out.
554 * @param {number} basePos the index of sourceCode within the chunk of source
555 * whose decorations are already present on out.
556 */
557 function appendDecorations(basePos, sourceCode, langHandler, out) {
558 if (!sourceCode) { return; }
559 var job = {
560 sourceCode: sourceCode,
561 basePos: basePos
562 };
563 langHandler(job);
564 out.push.apply(out, job.decorations);
565 }
566
567 var notWs = /\S/;
568
569 /**
570 * Given an element, if it contains only one child element and any text nodes
571 * it contains contain only space characters, return the sole child element.
572 * Otherwise returns undefined.
573 * <p>
574 * This is meant to return the CODE element in {@code <pre><code ...>} when
575 * there is a single child element that contains all the non-space textual
576 * content, but not to return anything where there are multiple child elements
577 * as in {@code <pre><code>...</code><code>...</code></pre>} or when there
578 * is textual content.
579 */
580 function childContentWrapper(element) {
581 var wrapper = undefined;
582 for (var c = element.firstChild; c; c = c.nextSibling) {
583 var type = c.nodeType;
584 wrapper = (type === 1) // Element Node
585 ? (wrapper ? element : c)
586 : (type === 3) // Text Node
587 ? (notWs.test(c.nodeValue) ? element : wrapper)
588 : wrapper;
589 }
590 return wrapper === element ? undefined : wrapper;
591 }
592
593 /** Given triples of [style, pattern, context] returns a lexing function,
594 * The lexing function interprets the patterns to find token boundaries and
595 * returns a decoration list of the form
596 * [index_0, style_0, index_1, style_1, ..., index_n, style_n]
597 * where index_n is an index into the sourceCode, and style_n is a style
598 * constant like PR_PLAIN. index_n-1 <= index_n, and style_n-1 applies to
599 * all characters in sourceCode[index_n-1:index_n].
600 *
601 * The stylePatterns is a list whose elements have the form
602 * [style : string, pattern : RegExp, DEPRECATED, shortcut : string].
603 *
604 * Style is a style constant like PR_PLAIN, or can be a string of the
605 * form 'lang-FOO', where FOO is a language extension describing the
606 * language of the portion of the token in $1 after pattern executes.
607 * E.g., if style is 'lang-lisp', and group 1 contains the text
608 * '(hello (world))', then that portion of the token will be passed to the
609 * registered lisp handler for formatting.
610 * The text before and after group 1 will be restyled using this decorator
611 * so decorators should take care that this doesn't result in infinite
612 * recursion. For example, the HTML lexer rule for SCRIPT elements looks
613 * something like ['lang-js', /<[s]cript>(.+?)<\/script>/]. This may match
614 * '<script>foo()<\/script>', which would cause the current decorator to
615 * be called with '<script>' which would not match the same rule since
616 * group 1 must not be empty, so it would be instead styled as PR_TAG by
617 * the generic tag rule. The handler registered for the 'js' extension would
618 * then be called with 'foo()', and finally, the current decorator would
619 * be called with '<\/script>' which would not match the original rule and
620 * so the generic tag rule would identify it as a tag.
621 *
622 * Pattern must only match prefixes, and if it matches a prefix, then that
623 * match is considered a token with the same style.
624 *
625 * Context is applied to the last non-whitespace, non-comment token
626 * recognized.
627 *
628 * Shortcut is an optional string of characters, any of which, if the first
629 * character, gurantee that this pattern and only this pattern matches.
630 *
631 * @param {Array} shortcutStylePatterns patterns that always start with
632 * a known character. Must have a shortcut string.
633 * @param {Array} fallthroughStylePatterns patterns that will be tried in
634 * order if the shortcut ones fail. May have shortcuts.
635 *
636 * @return {function (Object)} a
637 * function that takes source code and returns a list of decorations.
638 */
639 function createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns) {
640 var shortcuts = {};
641 var tokenizer;
642 (function () {
643 var allPatterns = shortcutStylePatterns.concat(fallthroughStylePatterns);
644 var allRegexs = [];
645 var regexKeys = {};
646 for (var i = 0, n = allPatterns.length; i < n; ++i) {
647 var patternParts = allPatterns[i];
648 var shortcutChars = patternParts[3];
649 if (shortcutChars) {
650 for (var c = shortcutChars.length; --c >= 0;) {
651 shortcuts[shortcutChars.charAt(c)] = patternParts;
652 }
653 }
654 var regex = patternParts[1];
655 var k = '' + regex;
656 if (!regexKeys.hasOwnProperty(k)) {
657 allRegexs.push(regex);
658 regexKeys[k] = null;
659 }
660 }
661 allRegexs.push(/[\0-\uffff]/);
662 tokenizer = combinePrefixPatterns(allRegexs);
663 })();
664
665 var nPatterns = fallthroughStylePatterns.length;
666
667 /**
668 * Lexes job.sourceCode and produces an output array job.decorations of
669 * style classes preceded by the position at which they start in
670 * job.sourceCode in order.
671 *
672 * @param {Object} job an object like <pre>{
673 * sourceCode: {string} sourceText plain text,
674 * basePos: {int} position of job.sourceCode in the larger chunk of
675 * sourceCode.
676 * }</pre>
677 */
678 var decorate = function (job) {
679 var sourceCode = job.sourceCode, basePos = job.basePos;
680 /** Even entries are positions in source in ascending order. Odd enties
681 * are style markers (e.g., PR_COMMENT) that run from that position until
682 * the end.
683 * @type {Array.<number|string>}
684 */
685 var decorations = [basePos, PR_PLAIN];
686 var pos = 0; // index into sourceCode
687 var tokens = sourceCode.match(tokenizer) || [];
688 var styleCache = {};
689
690 for (var ti = 0, nTokens = tokens.length; ti < nTokens; ++ti) {
691 var token = tokens[ti];
692 var style = styleCache[token];
693 var match = void 0;
694
695 var isEmbedded;
696 if (typeof style === 'string') {
697 isEmbedded = false;
698 } else {
699 var patternParts = shortcuts[token.charAt(0)];
700 if (patternParts) {
701 match = token.match(patternParts[1]);
702 style = patternParts[0];
703 } else {
704 for (var i = 0; i < nPatterns; ++i) {
705 patternParts = fallthroughStylePatterns[i];
706 match = token.match(patternParts[1]);
707 if (match) {
708 style = patternParts[0];
709 break;
710 }
711 }
712
713 if (!match) { // make sure that we make progress
714 style = PR_PLAIN;
715 }
716 }
717
718 isEmbedded = style.length >= 5 && 'lang-' === style.substring(0, 5);
719 if (isEmbedded && !(match && typeof match[1] === 'string')) {
720 isEmbedded = false;
721 style = PR_SOURCE;
722 }
723
724 if (!isEmbedded) { styleCache[token] = style; }
725 }
726
727 var tokenStart = pos;
728 pos += token.length;
729
730 if (!isEmbedded) {
731 decorations.push(basePos + tokenStart, style);
732 } else { // Treat group 1 as an embedded block of source code.
733 var embeddedSource = match[1];
734 var embeddedSourceStart = token.indexOf(embeddedSource);
735 var embeddedSourceEnd = embeddedSourceStart + embeddedSource.length;
736 if (match[2]) {
737 // If embeddedSource can be blank, then it would match at the
738 // beginning which would cause us to infinitely recurse on the
739 // entire token, so we catch the right context in match[2].
740 embeddedSourceEnd = token.length - match[2].length;
741 embeddedSourceStart = embeddedSourceEnd - embeddedSource.length;
742 }
743 var lang = style.substring(5);
744 // Decorate the left of the embedded source
745 appendDecorations(
746 basePos + tokenStart,
747 token.substring(0, embeddedSourceStart),
748 decorate, decorations);
749 // Decorate the embedded source
750 appendDecorations(
751 basePos + tokenStart + embeddedSourceStart,
752 embeddedSource,
753 langHandlerForExtension(lang, embeddedSource),
754 decorations);
755 // Decorate the right of the embedded section
756 appendDecorations(
757 basePos + tokenStart + embeddedSourceEnd,
758 token.substring(embeddedSourceEnd),
759 decorate, decorations);
760 }
761 }
762 job.decorations = decorations;
763 };
764 return decorate;
765 }
766
767 /** returns a function that produces a list of decorations from source text.
768 *
769 * This code treats ", ', and ` as string delimiters, and \ as a string
770 * escape. It does not recognize perl's qq() style strings.
771 * It has no special handling for double delimiter escapes as in basic, or
772 * the tripled delimiters used in python, but should work on those regardless
773 * although in those cases a single string literal may be broken up into
774 * multiple adjacent string literals.
775 *
776 * It recognizes C, C++, and shell style comments.
777 *
778 * @param {Object} options a set of optional parameters.
779 * @return {function (Object)} a function that examines the source code
780 * in the input job and builds the decoration list.
781 */
782 function sourceDecorator(options) {
783 var shortcutStylePatterns = [], fallthroughStylePatterns = [];
784 if (options['tripleQuotedStrings']) {
785 // '''multi-line-string''', 'single-line-string', and double-quoted
786 shortcutStylePatterns.push(
787 [PR_STRING, /^(?:\'\'\'(?:[^\'\\]|\\[\s\S]|\'{1,2}(?=[^\']))*(?:\'\'\'|$)|\"\"\"(?:[^\"\\]|\\[\s\S]|\"{1,2}(?=[^\"]))*(?:\"\"\"|$)|\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$))/,
788 null, '\'"']);
789 } else if (options['multiLineStrings']) {
790 // 'multi-line-string', "multi-line-string"
791 shortcutStylePatterns.push(
792 [PR_STRING, /^(?:\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$)|\`(?:[^\\\`]|\\[\s\S])*(?:\`|$))/,
793 null, '\'"`']);
794 } else {
795 // 'single-line-string', "single-line-string"
796 shortcutStylePatterns.push(
797 [PR_STRING,
798 /^(?:\'(?:[^\\\'\r\n]|\\.)*(?:\'|$)|\"(?:[^\\\"\r\n]|\\.)*(?:\"|$))/,
799 null, '"\'']);
800 }
801 if (options['verbatimStrings']) {
802 // verbatim-string-literal production from the C# grammar. See issue 93.
803 fallthroughStylePatterns.push(
804 [PR_STRING, /^@\"(?:[^\"]|\"\")*(?:\"|$)/, null]);
805 }
806 var hc = options['hashComments'];
807 if (hc) {
808 if (options['cStyleComments']) {
809 if (hc > 1) { // multiline hash comments
810 shortcutStylePatterns.push(
811 [PR_COMMENT, /^#(?:##(?:[^#]|#(?!##))*(?:###|$)|.*)/, null, '#']);
812 } else {
813 // Stop C preprocessor declarations at an unclosed open comment
814 shortcutStylePatterns.push(
815 [PR_COMMENT, /^#(?:(?:define|elif|else|endif|error|ifdef|include|ifndef|line|pragma|undef|warning)\b|[^\r\n]*)/,
816 null, '#']);
817 }
818 fallthroughStylePatterns.push(
819 [PR_STRING,
820 /^<(?:(?:(?:\.\.\/)*|\/?)(?:[\w-]+(?:\/[\w-]+)+)?[\w-]+\.h|[a-z]\w*)>/,
821 null]);
822 } else {
823 shortcutStylePatterns.push([PR_COMMENT, /^#[^\r\n]*/, null, '#']);
824 }
825 }
826 if (options['cStyleComments']) {
827 fallthroughStylePatterns.push([PR_COMMENT, /^\/\/[^\r\n]*/, null]);
828 fallthroughStylePatterns.push(
829 [PR_COMMENT, /^\/\*[\s\S]*?(?:\*\/|$)/, null]);
830 }
831 if (options['regexLiterals']) {
832 /**
833 * @const
834 */
835 var REGEX_LITERAL = (
836 // A regular expression literal starts with a slash that is
837 // not followed by * or / so that it is not confused with
838 // comments.
839 '/(?=[^/*])'
840 // and then contains any number of raw characters,
841 + '(?:[^/\\x5B\\x5C]'
842 // escape sequences (\x5C),
843 + '|\\x5C[\\s\\S]'
844 // or non-nesting character sets (\x5B\x5D);
845 + '|\\x5B(?:[^\\x5C\\x5D]|\\x5C[\\s\\S])*(?:\\x5D|$))+'
846 // finally closed by a /.
847 + '/');
848 fallthroughStylePatterns.push(
849 ['lang-regex',
850 new RegExp('^' + REGEXP_PRECEDER_PATTERN + '(' + REGEX_LITERAL + ')')
851 ]);
852 }
853
854 var types = options['types'];
855 if (types) {
856 fallthroughStylePatterns.push([PR_TYPE, types]);
857 }
858
859 var keywords = ("" + options['keywords']).replace(/^ | $/g, '');
860 if (keywords.length) {
861 fallthroughStylePatterns.push(
862 [PR_KEYWORD,
863 new RegExp('^(?:' + keywords.replace(/[\s,]+/g, '|') + ')\\b'),
864 null]);
865 }
866
867 shortcutStylePatterns.push([PR_PLAIN, /^\s+/, null, ' \r\n\t\xA0']);
868 fallthroughStylePatterns.push(
869 // TODO(mikesamuel): recognize non-latin letters and numerals in idents
870 [PR_LITERAL, /^@[a-z_$][a-z_$@0-9]*/i, null],
871 [PR_TYPE, /^(?:[@_]?[A-Z]+[a-z][A-Za-z_$@0-9]*|\w+_t\b)/, null],
872 [PR_PLAIN, /^[a-z_$][a-z_$@0-9]*/i, null],
873 [PR_LITERAL,
874 new RegExp(
875 '^(?:'
876 // A hex number
877 + '0x[a-f0-9]+'
878 // or an octal or decimal number,
879 + '|(?:\\d(?:_\\d+)*\\d*(?:\\.\\d*)?|\\.\\d\\+)'
880 // possibly in scientific notation
881 + '(?:e[+\\-]?\\d+)?'
882 + ')'
883 // with an optional modifier like UL for unsigned long
884 + '[a-z]*', 'i'),
885 null, '0123456789'],
886 // Don't treat escaped quotes in bash as starting strings. See issue 144.
887 [PR_PLAIN, /^\\[\s\S]?/, null],
888 [PR_PUNCTUATION, /^.[^\s\w\.$@\'\"\`\/\#\\]*/, null]);
889
890 return createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns);
891 }
892
893 var decorateSource = sourceDecorator({
894 'keywords': ALL_KEYWORDS,
895 'hashComments': true,
896 'cStyleComments': true,
897 'multiLineStrings': true,
898 'regexLiterals': true
899 });
900
901 /**
902 * Given a DOM subtree, wraps it in a list, and puts each line into its own
903 * list item.
904 *
905 * @param {Node} node modified in place. Its content is pulled into an
906 * HTMLOListElement, and each line is moved into a separate list item.
907 * This requires cloning elements, so the input might not have unique
908 * IDs after numbering.
909 */
910 function numberLines(node, opt_startLineNum) {
911 var nocode = /(?:^|\s)nocode(?:\s|$)/;
912 var lineBreak = /\r\n?|\n/;
913
914 var document = node.ownerDocument;
915
916 var whitespace;
917 if (node.currentStyle) {
918 whitespace = node.currentStyle.whiteSpace;
919 } else if (window.getComputedStyle) {
920 whitespace = document.defaultView.getComputedStyle(node, null)
921 .getPropertyValue('white-space');
922 }
923 // If it's preformatted, then we need to split lines on line breaks
924 // in addition to <BR>s.
925 var isPreformatted = whitespace && 'pre' === whitespace.substring(0, 3);
926
927 var li = document.createElement('LI');
928 while (node.firstChild) {
929 li.appendChild(node.firstChild);
930 }
931 // An array of lines. We split below, so this is initialized to one
932 // un-split line.
933 var listItems = [li];
934
935 function walk(node) {
936 switch (node.nodeType) {
937 case 1: // Element
938 if (nocode.test(node.className)) { break; }
939 if ('BR' === node.nodeName) {
940 breakAfter(node);
941 // Discard the <BR> since it is now flush against a </LI>.
942 if (node.parentNode) {
943 node.parentNode.removeChild(node);
944 }
945 } else {
946 for (var child = node.firstChild; child; child = child.nextSibling) {
947 walk(child);
948 }
949 }
950 break;
951 case 3: case 4: // Text
952 if (isPreformatted) {
953 var text = node.nodeValue;
954 var match = text.match(lineBreak);
955 if (match) {
956 var firstLine = text.substring(0, match.index);
957 node.nodeValue = firstLine;
958 var tail = text.substring(match.index + match[0].length);
959 if (tail) {
960 var parent = node.parentNode;
961 parent.insertBefore(
962 document.createTextNode(tail), node.nextSibling);
963 }
964 breakAfter(node);
965 if (!firstLine) {
966 // Don't leave blank text nodes in the DOM.
967 node.parentNode.removeChild(node);
968 }
969 }
970 }
971 break;
972 }
973 }
974
975 // Split a line after the given node.
976 function breakAfter(lineEndNode) {
977 // If there's nothing to the right, then we can skip ending the line
978 // here, and move root-wards since splitting just before an end-tag
979 // would require us to create a bunch of empty copies.
980 while (!lineEndNode.nextSibling) {
981 lineEndNode = lineEndNode.parentNode;
982 if (!lineEndNode) { return; }
983 }
984
985 function breakLeftOf(limit, copy) {
986 // Clone shallowly if this node needs to be on both sides of the break.
987 var rightSide = copy ? limit.cloneNode(false) : limit;
988 var parent = limit.parentNode;
989 if (parent) {
990 // We clone the parent chain.
991 // This helps us resurrect important styling elements that cross lines.
992 // E.g. in <i>Foo<br>Bar</i>
993 // should be rewritten to <li><i>Foo</i></li><li><i>Bar</i></li>.
994 var parentClone = breakLeftOf(parent, 1);
995 // Move the clone and everything to the right of the original
996 // onto the cloned parent.
997 var next = limit.nextSibling;
998 parentClone.appendChild(rightSide);
999 for (var sibling = next; sibling; sibling = next) {
1000 next = sibling.nextSibling;
1001 parentClone.appendChild(sibling);
1002 }
1003 }
1004 return rightSide;
1005 }
1006
1007 var copiedListItem = breakLeftOf(lineEndNode.nextSibling, 0);
1008
1009 // Walk the parent chain until we reach an unattached LI.
1010 for (var parent;
1011 // Check nodeType since IE invents document fragments.
1012 (parent = copiedListItem.parentNode) && parent.nodeType === 1;) {
1013 copiedListItem = parent;
1014 }
1015 // Put it on the list of lines for later processing.
1016 listItems.push(copiedListItem);
1017 }
1018
1019 // Split lines while there are lines left to split.
1020 for (var i = 0; // Number of lines that have been split so far.
1021 i < listItems.length; // length updated by breakAfter calls.
1022 ++i) {
1023 walk(listItems[i]);
1024 }
1025
1026 // Make sure numeric indices show correctly.
1027 if (opt_startLineNum === (opt_startLineNum|0)) {
1028 listItems[0].setAttribute('value', opt_startLineNum);
1029 }
1030
1031 var ol = document.createElement('OL');
1032 ol.className = 'linenums';
1033 var offset = Math.max(0, ((opt_startLineNum - 1 /* zero index */)) | 0) || 0;
1034 for (var i = 0, n = listItems.length; i < n; ++i) {
1035 li = listItems[i];
1036 // Stick a class on the LIs so that stylesheets can
1037 // color odd/even rows, or any other row pattern that
1038 // is co-prime with 10.
1039 li.className = 'L' + ((i + offset) % 10);
1040 if (!li.firstChild) {
1041 li.appendChild(document.createTextNode('\xA0'));
1042 }
1043 ol.appendChild(li);
1044 }
1045
1046 node.appendChild(ol);
1047 }
1048
1049 /**
1050 * Breaks {@code job.sourceCode} around style boundaries in
1051 * {@code job.decorations} and modifies {@code job.sourceNode} in place.
1052 * @param {Object} job like <pre>{
1053 * sourceCode: {string} source as plain text,
1054 * spans: {Array.<number|Node>} alternating span start indices into source
1055 * and the text node or element (e.g. {@code <BR>}) corresponding to that
1056 * span.
1057 * decorations: {Array.<number|string} an array of style classes preceded
1058 * by the position at which they start in job.sourceCode in order
1059 * }</pre>
1060 * @private
1061 */
1062 function recombineTagsAndDecorations(job) {
1063 var isIE = /\bMSIE\b/.test(navigator.userAgent);
1064 var newlineRe = /\n/g;
1065
1066 var source = job.sourceCode;
1067 var sourceLength = source.length;
1068 // Index into source after the last code-unit recombined.
1069 var sourceIndex = 0;
1070
1071 var spans = job.spans;
1072 var nSpans = spans.length;
1073 // Index into spans after the last span which ends at or before sourceIndex.
1074 var spanIndex = 0;
1075
1076 var decorations = job.decorations;
1077 var nDecorations = decorations.length;
1078 // Index into decorations after the last decoration which ends at or before
1079 // sourceIndex.
1080 var decorationIndex = 0;
1081
1082 // Remove all zero-length decorations.
1083 decorations[nDecorations] = sourceLength;
1084 var decPos, i;
1085 for (i = decPos = 0; i < nDecorations;) {
1086 if (decorations[i] !== decorations[i + 2]) {
1087 decorations[decPos++] = decorations[i++];
1088 decorations[decPos++] = decorations[i++];
1089 } else {
1090 i += 2;
1091 }
1092 }
1093 nDecorations = decPos;
1094
1095 // Simplify decorations.
1096 for (i = decPos = 0; i < nDecorations;) {
1097 var startPos = decorations[i];
1098 // Conflate all adjacent decorations that use the same style.
1099 var startDec = decorations[i + 1];
1100 var end = i + 2;
1101 while (end + 2 <= nDecorations && decorations[end + 1] === startDec) {
1102 end += 2;
1103 }
1104 decorations[decPos++] = startPos;
1105 decorations[decPos++] = startDec;
1106 i = end;
1107 }
1108
1109 nDecorations = decorations.length = decPos;
1110
1111 var decoration = null;
1112 while (spanIndex < nSpans) {
1113 var spanStart = spans[spanIndex];
1114 var spanEnd = spans[spanIndex + 2] || sourceLength;
1115
1116 var decStart = decorations[decorationIndex];
1117 var decEnd = decorations[decorationIndex + 2] || sourceLength;
1118
1119 var end = Math.min(spanEnd, decEnd);
1120
1121 var textNode = spans[spanIndex + 1];
1122 var styledText;
1123 if (textNode.nodeType !== 1 // Don't muck with <BR>s or <LI>s
1124 // Don't introduce spans around empty text nodes.
1125 && (styledText = source.substring(sourceIndex, end))) {
1126 // This may seem bizarre, and it is. Emitting LF on IE causes the
1127 // code to display with spaces instead of line breaks.
1128 // Emitting Windows standard issue linebreaks (CRLF) causes a blank
1129 // space to appear at the beginning of every line but the first.
1130 // Emitting an old Mac OS 9 line separator makes everything spiffy.
1131 if (isIE) { styledText = styledText.replace(newlineRe, '\r'); }
1132 textNode.nodeValue = styledText;
1133 var document = textNode.ownerDocument;
1134 var span = document.createElement('SPAN');
1135 span.className = decorations[decorationIndex + 1];
1136 var parentNode = textNode.parentNode;
1137 parentNode.replaceChild(span, textNode);
1138 span.appendChild(textNode);
1139 if (sourceIndex < spanEnd) { // Split off a text node.
1140 spans[spanIndex + 1] = textNode
1141 // TODO: Possibly optimize by using '' if there's no flicker.
1142 = document.createTextNode(source.substring(end, spanEnd));
1143 parentNode.insertBefore(textNode, span.nextSibling);
1144 }
1145 }
1146
1147 sourceIndex = end;
1148
1149 if (sourceIndex >= spanEnd) {
1150 spanIndex += 2;
1151 }
1152 if (sourceIndex >= decEnd) {
1153 decorationIndex += 2;
1154 }
1155 }
1156 }
1157
1158
1159 /** Maps language-specific file extensions to handlers. */
1160 var langHandlerRegistry = {};
1161 /** Register a language handler for the given file extensions.
1162 * @param {function (Object)} handler a function from source code to a list
1163 * of decorations. Takes a single argument job which describes the
1164 * state of the computation. The single parameter has the form
1165 * {@code {
1166 * sourceCode: {string} as plain text.
1167 * decorations: {Array.<number|string>} an array of style classes
1168 * preceded by the position at which they start in
1169 * job.sourceCode in order.
1170 * The language handler should assigned this field.
1171 * basePos: {int} the position of source in the larger source chunk.
1172 * All positions in the output decorations array are relative
1173 * to the larger source chunk.
1174 * } }
1175 * @param {Array.<string>} fileExtensions
1176 */
1177 function registerLangHandler(handler, fileExtensions) {
1178 for (var i = fileExtensions.length; --i >= 0;) {
1179 var ext = fileExtensions[i];
1180 if (!langHandlerRegistry.hasOwnProperty(ext)) {
1181 langHandlerRegistry[ext] = handler;
1182 } else if (window['console']) {
1183 console['warn']('cannot override language handler %s', ext);
1184 }
1185 }
1186 }
1187 function langHandlerForExtension(extension, source) {
1188 if (!(extension && langHandlerRegistry.hasOwnProperty(extension))) {
1189 // Treat it as markup if the first non whitespace character is a < and
1190 // the last non-whitespace character is a >.
1191 extension = /^\s*</.test(source)
1192 ? 'default-markup'
1193 : 'default-code';
1194 }
1195 return langHandlerRegistry[extension];
1196 }
1197 registerLangHandler(decorateSource, ['default-code']);
1198 registerLangHandler(
1199 createSimpleLexer(
1200 [],
1201 [
1202 [PR_PLAIN, /^[^<?]+/],
1203 [PR_DECLARATION, /^<!\w[^>]*(?:>|$)/],
1204 [PR_COMMENT, /^<\!--[\s\S]*?(?:-\->|$)/],
1205 // Unescaped content in an unknown language
1206 ['lang-', /^<\?([\s\S]+?)(?:\?>|$)/],
1207 ['lang-', /^<%([\s\S]+?)(?:%>|$)/],
1208 [PR_PUNCTUATION, /^(?:<[%?]|[%?]>)/],
1209 ['lang-', /^<xmp\b[^>]*>([\s\S]+?)<\/xmp\b[^>]*>/i],
1210 // Unescaped content in javascript. (Or possibly vbscript).
1211 ['lang-js', /^<script\b[^>]*>([\s\S]*?)(<\/script\b[^>]*>)/i],
1212 // Contains unescaped stylesheet content
1213 ['lang-css', /^<style\b[^>]*>([\s\S]*?)(<\/style\b[^>]*>)/i],
1214 ['lang-in.tag', /^(<\/?[a-z][^<>]*>)/i]
1215 ]),
1216 ['default-markup', 'htm', 'html', 'mxml', 'xhtml', 'xml', 'xsl']);
1217 registerLangHandler(
1218 createSimpleLexer(
1219 [
1220 [PR_PLAIN, /^[\s]+/, null, ' \t\r\n'],
1221 [PR_ATTRIB_VALUE, /^(?:\"[^\"]*\"?|\'[^\']*\'?)/, null, '\"\'']
1222 ],
1223 [
1224 [PR_TAG, /^^<\/?[a-z](?:[\w.:-]*\w)?|\/?>$/i],
1225 [PR_ATTRIB_NAME, /^(?!style[\s=]|on)[a-z](?:[\w:-]*\w)?/i],
1226 ['lang-uq.val', /^=\s*([^>\'\"\s]*(?:[^>\'\"\s\/]|\/(?=\s)))/],
1227 [PR_PUNCTUATION, /^[=<>\/]+/],
1228 ['lang-js', /^on\w+\s*=\s*\"([^\"]+)\"/i],
1229 ['lang-js', /^on\w+\s*=\s*\'([^\']+)\'/i],
1230 ['lang-js', /^on\w+\s*=\s*([^\"\'>\s]+)/i],
1231 ['lang-css', /^style\s*=\s*\"([^\"]+)\"/i],
1232 ['lang-css', /^style\s*=\s*\'([^\']+)\'/i],
1233 ['lang-css', /^style\s*=\s*([^\"\'>\s]+)/i]
1234 ]),
1235 ['in.tag']);
1236 registerLangHandler(
1237 createSimpleLexer([], [[PR_ATTRIB_VALUE, /^[\s\S]+/]]), ['uq.val']);
1238 registerLangHandler(sourceDecorator({
1239 'keywords': CPP_KEYWORDS,
1240 'hashComments': true,
1241 'cStyleComments': true,
1242 'types': C_TYPES
1243 }), ['c', 'cc', 'cpp', 'cxx', 'cyc', 'm']);
1244 registerLangHandler(sourceDecorator({
1245 'keywords': 'null,true,false'
1246 }), ['json']);
1247 registerLangHandler(sourceDecorator({
1248 'keywords': CSHARP_KEYWORDS,
1249 'hashComments': true,
1250 'cStyleComments': true,
1251 'verbatimStrings': true,
1252 'types': C_TYPES
1253 }), ['cs']);
1254 registerLangHandler(sourceDecorator({
1255 'keywords': JAVA_KEYWORDS,
1256 'cStyleComments': true
1257 }), ['java']);
1258 registerLangHandler(sourceDecorator({
1259 'keywords': SH_KEYWORDS,
1260 'hashComments': true,
1261 'multiLineStrings': true
1262 }), ['bsh', 'csh', 'sh']);
1263 registerLangHandler(sourceDecorator({
1264 'keywords': PYTHON_KEYWORDS,
1265 'hashComments': true,
1266 'multiLineStrings': true,
1267 'tripleQuotedStrings': true
1268 }), ['cv', 'py']);
1269 registerLangHandler(sourceDecorator({
1270 'keywords': PERL_KEYWORDS,
1271 'hashComments': true,
1272 'multiLineStrings': true,
1273 'regexLiterals': true
1274 }), ['perl', 'pl', 'pm']);
1275 registerLangHandler(sourceDecorator({
1276 'keywords': RUBY_KEYWORDS,
1277 'hashComments': true,
1278 'multiLineStrings': true,
1279 'regexLiterals': true
1280 }), ['rb']);
1281 registerLangHandler(sourceDecorator({
1282 'keywords': JSCRIPT_KEYWORDS,
1283 'cStyleComments': true,
1284 'regexLiterals': true
1285 }), ['js']);
1286 registerLangHandler(sourceDecorator({
1287 'keywords': COFFEE_KEYWORDS,
1288 'hashComments': 3, // ### style block comments
1289 'cStyleComments': true,
1290 'multilineStrings': true,
1291 'tripleQuotedStrings': true,
1292 'regexLiterals': true
1293 }), ['coffee']);
1294 registerLangHandler(createSimpleLexer([], [[PR_STRING, /^[\s\S]+/]]), ['regex']);
1295
1296 function applyDecorator(job) {
1297 var opt_langExtension = job.langExtension;
1298
1299 try {
1300 // Extract tags, and convert the source code to plain text.
1301 var sourceAndSpans = extractSourceSpans(job.sourceNode);
1302 /** Plain text. @type {string} */
1303 var source = sourceAndSpans.sourceCode;
1304 job.sourceCode = source;
1305 job.spans = sourceAndSpans.spans;
1306 job.basePos = 0;
1307
1308 // Apply the appropriate language handler
1309 langHandlerForExtension(opt_langExtension, source)(job);
1310
1311 // Integrate the decorations and tags back into the source code,
1312 // modifying the sourceNode in place.
1313 recombineTagsAndDecorations(job);
1314 } catch (e) {
1315 if ('console' in window) {
1316 console['log'](e && e['stack'] ? e['stack'] : e);
1317 }
1318 }
1319 }
1320
1321 /**
1322 * @param sourceCodeHtml {string} The HTML to pretty print.
1323 * @param opt_langExtension {string} The language name to use.
1324 * Typically, a filename extension like 'cpp' or 'java'.
1325 * @param opt_numberLines {number|boolean} True to number lines,
1326 * or the 1-indexed number of the first line in sourceCodeHtml.
1327 */
1328 function prettyPrintOne(sourceCodeHtml, opt_langExtension, opt_numberLines) {
1329 var container = document.createElement('PRE');
1330 // This could cause images to load and onload listeners to fire.
1331 // E.g. <img onerror="alert(1337)" src="nosuchimage.png">.
1332 // We assume that the inner HTML is from a trusted source.
1333 container.innerHTML = sourceCodeHtml;
1334 if (opt_numberLines) {
1335 numberLines(container, opt_numberLines);
1336 }
1337
1338 var job = {
1339 langExtension: opt_langExtension,
1340 numberLines: opt_numberLines,
1341 sourceNode: container
1342 };
1343 applyDecorator(job);
1344 return container.innerHTML;
1345 }
1346
1347 function prettyPrint(opt_whenDone) {
1348 function byTagName(tn) { return document.getElementsByTagName(tn); }
1349 // fetch a list of nodes to rewrite
1350 var codeSegments = [byTagName('pre'), byTagName('code'), byTagName('xmp')];
1351 var elements = [];
1352 for (var i = 0; i < codeSegments.length; ++i) {
1353 for (var j = 0, n = codeSegments[i].length; j < n; ++j) {
1354 elements.push(codeSegments[i][j]);
1355 }
1356 }
1357 codeSegments = null;
1358
1359 var clock = Date;
1360 if (!clock['now']) {
1361 clock = { 'now': function () { return +(new Date); } };
1362 }
1363
1364 // The loop is broken into a series of continuations to make sure that we
1365 // don't make the browser unresponsive when rewriting a large page.
1366 var k = 0;
1367 var prettyPrintingJob;
1368
1369 var langExtensionRe = /\blang(?:uage)?-([\w.]+)(?!\S)/;
1370 var prettyPrintRe = /\bprettyprint\b/;
1371
1372 function doWork() {
1373 var endTime = (window['PR_SHOULD_USE_CONTINUATION'] ?
1374 clock['now']() + 250 /* ms */ :
1375 Infinity);
1376 for (; k < elements.length && clock['now']() < endTime; k++) {
1377 var cs = elements[k];
1378 var className = cs.className;
1379 if (className.indexOf('prettyprint') >= 0) {
1380 // If the classes includes a language extensions, use it.
1381 // Language extensions can be specified like
1382 // <pre class="prettyprint lang-cpp">
1383 // the language extension "cpp" is used to find a language handler as
1384 // passed to PR.registerLangHandler.
1385 // HTML5 recommends that a language be specified using "language-"
1386 // as the prefix instead. Google Code Prettify supports both.
1387 // http://dev.w3.org/html5/spec-author-view/the-code-element.html
1388 var langExtension = className.match(langExtensionRe);
1389 // Support <pre class="prettyprint"><code class="language-c">
1390 var wrapper;
1391 if (!langExtension && (wrapper = childContentWrapper(cs))
1392 && "CODE" === wrapper.tagName) {
1393 langExtension = wrapper.className.match(langExtensionRe);
1394 }
1395
1396 if (langExtension) {
1397 langExtension = langExtension[1];
1398 }
1399
1400 // make sure this is not nested in an already prettified element
1401 var nested = false;
1402 for (var p = cs.parentNode; p; p = p.parentNode) {
1403 if ((p.tagName === 'pre' || p.tagName === 'code' ||
1404 p.tagName === 'xmp') &&
1405 p.className && p.className.indexOf('prettyprint') >= 0) {
1406 nested = true;
1407 break;
1408 }
1409 }
1410 if (!nested) {
1411 // Look for a class like linenums or linenums:<n> where <n> is the
1412 // 1-indexed number of the first line.
1413 var lineNums = cs.className.match(/\blinenums\b(?::(\d+))?/);
1414 lineNums = lineNums
1415 ? lineNums[1] && lineNums[1].length ? +lineNums[1] : true
1416 : false;
1417 if (lineNums) { numberLines(cs, lineNums); }
1418
1419 // do the pretty printing
1420 prettyPrintingJob = {
1421 langExtension: langExtension,
1422 sourceNode: cs,
1423 numberLines: lineNums
1424 };
1425 applyDecorator(prettyPrintingJob);
1426 }
1427 }
1428 }
1429 if (k < elements.length) {
1430 // finish up in a continuation
1431 setTimeout(doWork, 250);
1432 } else if (opt_whenDone) {
1433 opt_whenDone();
1434 }
1435 }
1436
1437 doWork();
1438 }
1439
1440 /**
1441 * Find all the {@code <pre>} and {@code <code>} tags in the DOM with
1442 * {@code class=prettyprint} and prettify them.
1443 *
1444 * @param {Function?} opt_whenDone if specified, called when the last entry
1445 * has been finished.
1446 */
1447 window['prettyPrintOne'] = prettyPrintOne;
1448 /**
1449 * Pretty print a chunk of code.
1450 *
1451 * @param {string} sourceCodeHtml code as html
1452 * @return {string} code as html, but prettier
1453 */
1454 window['prettyPrint'] = prettyPrint;
1455 /**
1456 * Contains functions for creating and registering new language handlers.
1457 * @type {Object}
1458 */
1459 window['PR'] = {
1460 'createSimpleLexer': createSimpleLexer,
1461 'registerLangHandler': registerLangHandler,
1462 'sourceDecorator': sourceDecorator,
1463 'PR_ATTRIB_NAME': PR_ATTRIB_NAME,
1464 'PR_ATTRIB_VALUE': PR_ATTRIB_VALUE,
1465 'PR_COMMENT': PR_COMMENT,
1466 'PR_DECLARATION': PR_DECLARATION,
1467 'PR_KEYWORD': PR_KEYWORD,
1468 'PR_LITERAL': PR_LITERAL,
1469 'PR_NOCODE': PR_NOCODE,
1470 'PR_PLAIN': PR_PLAIN,
1471 'PR_PUNCTUATION': PR_PUNCTUATION,
1472 'PR_SOURCE': PR_SOURCE,
1473 'PR_STRING': PR_STRING,
1474 'PR_TAG': PR_TAG,
1475 'PR_TYPE': PR_TYPE
1476 };
1477 })();