|
1 /** |
|
2 * Utils.js |
|
3 * |
|
4 * Copyright, Moxiecode Systems AB |
|
5 * Released under LGPL License. |
|
6 * |
|
7 * License: http://www.tinymce.com/license |
|
8 * Contributing: http://www.tinymce.com/contributing |
|
9 */ |
|
10 |
|
11 /** |
|
12 * This class contails various utility functions for the paste plugin. |
|
13 * |
|
14 * @class tinymce.pasteplugin.Utils |
|
15 */ |
|
16 define("tinymce/pasteplugin/Utils", [ |
|
17 "tinymce/util/Tools", |
|
18 "tinymce/html/DomParser", |
|
19 "tinymce/html/Schema" |
|
20 ], function(Tools, DomParser, Schema) { |
|
21 function filter(content, items) { |
|
22 Tools.each(items, function(v) { |
|
23 if (v.constructor == RegExp) { |
|
24 content = content.replace(v, ''); |
|
25 } else { |
|
26 content = content.replace(v[0], v[1]); |
|
27 } |
|
28 }); |
|
29 |
|
30 return content; |
|
31 } |
|
32 |
|
33 /** |
|
34 * Gets the innerText of the specified element. It will handle edge cases |
|
35 * and works better than textContent on Gecko. |
|
36 * |
|
37 * @param {String} html HTML string to get text from. |
|
38 * @return {String} String of text with line feeds. |
|
39 */ |
|
40 function innerText(html) { |
|
41 var schema = new Schema(), domParser = new DomParser({}, schema), text = ''; |
|
42 var shortEndedElements = schema.getShortEndedElements(); |
|
43 var ignoreElements = Tools.makeMap('script noscript style textarea video audio iframe object', ' '); |
|
44 var blockElements = schema.getBlockElements(); |
|
45 |
|
46 function walk(node) { |
|
47 var name = node.name, currentNode = node; |
|
48 |
|
49 if (name === 'br') { |
|
50 text += '\n'; |
|
51 return; |
|
52 } |
|
53 |
|
54 // img/input/hr |
|
55 if (shortEndedElements[name]) { |
|
56 text += ' '; |
|
57 } |
|
58 |
|
59 // Ingore script, video contents |
|
60 if (ignoreElements[name]) { |
|
61 text += ' '; |
|
62 return; |
|
63 } |
|
64 |
|
65 if (node.type == 3) { |
|
66 text += node.value; |
|
67 } |
|
68 |
|
69 // Walk all children |
|
70 if (!node.shortEnded) { |
|
71 if ((node = node.firstChild)) { |
|
72 do { |
|
73 walk(node); |
|
74 } while ((node = node.next)); |
|
75 } |
|
76 } |
|
77 |
|
78 // Add \n or \n\n for blocks or P |
|
79 if (blockElements[name] && currentNode.next) { |
|
80 text += '\n'; |
|
81 |
|
82 if (name == 'p') { |
|
83 text += '\n'; |
|
84 } |
|
85 } |
|
86 } |
|
87 |
|
88 html = filter(html, [ |
|
89 /<!\[[^\]]+\]>/g // Conditional comments |
|
90 ]); |
|
91 |
|
92 walk(domParser.parse(html)); |
|
93 |
|
94 return text; |
|
95 } |
|
96 |
|
97 /** |
|
98 * Trims the specified HTML by removing all WebKit fragments, all elements wrapping the body trailing BR elements etc. |
|
99 * |
|
100 * @param {String} html Html string to trim contents on. |
|
101 * @return {String} Html contents that got trimmed. |
|
102 */ |
|
103 function trimHtml(html) { |
|
104 function trimSpaces(all, s1, s2) { |
|
105 // WebKit meant to preserve multiple spaces but instead inserted around all inline tags, |
|
106 // including the spans with inline styles created on paste |
|
107 if (!s1 && !s2) { |
|
108 return ' '; |
|
109 } |
|
110 |
|
111 return '\u00a0'; |
|
112 } |
|
113 |
|
114 html = filter(html, [ |
|
115 /^[\s\S]*<body[^>]*>\s*|\s*<\/body[^>]*>[\s\S]*$/g, // Remove anything but the contents within the BODY element |
|
116 /<!--StartFragment-->|<!--EndFragment-->/g, // Inner fragments (tables from excel on mac) |
|
117 [/( ?)<span class="Apple-converted-space">\u00a0<\/span>( ?)/g, trimSpaces], |
|
118 /<br>$/i // Trailing BR elements |
|
119 ]); |
|
120 |
|
121 return html; |
|
122 } |
|
123 |
|
124 return { |
|
125 filter: filter, |
|
126 innerText: innerText, |
|
127 trimHtml: trimHtml |
|
128 }; |
|
129 }); |