1 /** |
|
2 * Entities.js |
|
3 * |
|
4 * Copyright, Moxiecode Systems AB |
|
5 * Released under LGPL License. |
|
6 * |
|
7 * License: http://www.tinymce.com/license |
|
8 * Contributing: http://www.tinymce.com/contributing |
|
9 */ |
|
10 |
|
11 /*jshint bitwise:false */ |
|
12 /*eslint no-bitwise:0 */ |
|
13 |
|
14 /** |
|
15 * Entity encoder class. |
|
16 * |
|
17 * @class tinymce.html.Entities |
|
18 * @static |
|
19 * @version 3.4 |
|
20 */ |
|
21 define("tinymce/html/Entities", [ |
|
22 "tinymce/util/Tools" |
|
23 ], function(Tools) { |
|
24 var makeMap = Tools.makeMap; |
|
25 |
|
26 var namedEntities, baseEntities, reverseEntities, |
|
27 attrsCharsRegExp = /[&<>\"\u0060\u007E-\uD7FF\uE000-\uFFEF]|[\uD800-\uDBFF][\uDC00-\uDFFF]/g, |
|
28 textCharsRegExp = /[<>&\u007E-\uD7FF\uE000-\uFFEF]|[\uD800-\uDBFF][\uDC00-\uDFFF]/g, |
|
29 rawCharsRegExp = /[<>&\"\']/g, |
|
30 entityRegExp = /&#([a-z0-9]+);?|&([a-z0-9]+);/gi, |
|
31 asciiMap = { |
|
32 128: "\u20AC", 130: "\u201A", 131: "\u0192", 132: "\u201E", 133: "\u2026", 134: "\u2020", |
|
33 135: "\u2021", 136: "\u02C6", 137: "\u2030", 138: "\u0160", 139: "\u2039", 140: "\u0152", |
|
34 142: "\u017D", 145: "\u2018", 146: "\u2019", 147: "\u201C", 148: "\u201D", 149: "\u2022", |
|
35 150: "\u2013", 151: "\u2014", 152: "\u02DC", 153: "\u2122", 154: "\u0161", 155: "\u203A", |
|
36 156: "\u0153", 158: "\u017E", 159: "\u0178" |
|
37 }; |
|
38 |
|
39 // Raw entities |
|
40 baseEntities = { |
|
41 '\"': '"', // Needs to be escaped since the YUI compressor would otherwise break the code |
|
42 "'": ''', |
|
43 '<': '<', |
|
44 '>': '>', |
|
45 '&': '&', |
|
46 '\u0060': '`' |
|
47 }; |
|
48 |
|
49 // Reverse lookup table for raw entities |
|
50 reverseEntities = { |
|
51 '<': '<', |
|
52 '>': '>', |
|
53 '&': '&', |
|
54 '"': '"', |
|
55 ''': "'" |
|
56 }; |
|
57 |
|
58 // Decodes text by using the browser |
|
59 function nativeDecode(text) { |
|
60 var elm; |
|
61 |
|
62 elm = document.createElement("div"); |
|
63 elm.innerHTML = text; |
|
64 |
|
65 return elm.textContent || elm.innerText || text; |
|
66 } |
|
67 |
|
68 // Build a two way lookup table for the entities |
|
69 function buildEntitiesLookup(items, radix) { |
|
70 var i, chr, entity, lookup = {}; |
|
71 |
|
72 if (items) { |
|
73 items = items.split(','); |
|
74 radix = radix || 10; |
|
75 |
|
76 // Build entities lookup table |
|
77 for (i = 0; i < items.length; i += 2) { |
|
78 chr = String.fromCharCode(parseInt(items[i], radix)); |
|
79 |
|
80 // Only add non base entities |
|
81 if (!baseEntities[chr]) { |
|
82 entity = '&' + items[i + 1] + ';'; |
|
83 lookup[chr] = entity; |
|
84 lookup[entity] = chr; |
|
85 } |
|
86 } |
|
87 |
|
88 return lookup; |
|
89 } |
|
90 } |
|
91 |
|
92 // Unpack entities lookup where the numbers are in radix 32 to reduce the size |
|
93 namedEntities = buildEntitiesLookup( |
|
94 '50,nbsp,51,iexcl,52,cent,53,pound,54,curren,55,yen,56,brvbar,57,sect,58,uml,59,copy,' + |
|
95 '5a,ordf,5b,laquo,5c,not,5d,shy,5e,reg,5f,macr,5g,deg,5h,plusmn,5i,sup2,5j,sup3,5k,acute,' + |
|
96 '5l,micro,5m,para,5n,middot,5o,cedil,5p,sup1,5q,ordm,5r,raquo,5s,frac14,5t,frac12,5u,frac34,' + |
|
97 '5v,iquest,60,Agrave,61,Aacute,62,Acirc,63,Atilde,64,Auml,65,Aring,66,AElig,67,Ccedil,' + |
|
98 '68,Egrave,69,Eacute,6a,Ecirc,6b,Euml,6c,Igrave,6d,Iacute,6e,Icirc,6f,Iuml,6g,ETH,6h,Ntilde,' + |
|
99 '6i,Ograve,6j,Oacute,6k,Ocirc,6l,Otilde,6m,Ouml,6n,times,6o,Oslash,6p,Ugrave,6q,Uacute,' + |
|
100 '6r,Ucirc,6s,Uuml,6t,Yacute,6u,THORN,6v,szlig,70,agrave,71,aacute,72,acirc,73,atilde,74,auml,' + |
|
101 '75,aring,76,aelig,77,ccedil,78,egrave,79,eacute,7a,ecirc,7b,euml,7c,igrave,7d,iacute,7e,icirc,' + |
|
102 '7f,iuml,7g,eth,7h,ntilde,7i,ograve,7j,oacute,7k,ocirc,7l,otilde,7m,ouml,7n,divide,7o,oslash,' + |
|
103 '7p,ugrave,7q,uacute,7r,ucirc,7s,uuml,7t,yacute,7u,thorn,7v,yuml,ci,fnof,sh,Alpha,si,Beta,' + |
|
104 'sj,Gamma,sk,Delta,sl,Epsilon,sm,Zeta,sn,Eta,so,Theta,sp,Iota,sq,Kappa,sr,Lambda,ss,Mu,' + |
|
105 'st,Nu,su,Xi,sv,Omicron,t0,Pi,t1,Rho,t3,Sigma,t4,Tau,t5,Upsilon,t6,Phi,t7,Chi,t8,Psi,' + |
|
106 't9,Omega,th,alpha,ti,beta,tj,gamma,tk,delta,tl,epsilon,tm,zeta,tn,eta,to,theta,tp,iota,' + |
|
107 'tq,kappa,tr,lambda,ts,mu,tt,nu,tu,xi,tv,omicron,u0,pi,u1,rho,u2,sigmaf,u3,sigma,u4,tau,' + |
|
108 'u5,upsilon,u6,phi,u7,chi,u8,psi,u9,omega,uh,thetasym,ui,upsih,um,piv,812,bull,816,hellip,' + |
|
109 '81i,prime,81j,Prime,81u,oline,824,frasl,88o,weierp,88h,image,88s,real,892,trade,89l,alefsym,' + |
|
110 '8cg,larr,8ch,uarr,8ci,rarr,8cj,darr,8ck,harr,8dl,crarr,8eg,lArr,8eh,uArr,8ei,rArr,8ej,dArr,' + |
|
111 '8ek,hArr,8g0,forall,8g2,part,8g3,exist,8g5,empty,8g7,nabla,8g8,isin,8g9,notin,8gb,ni,8gf,prod,' + |
|
112 '8gh,sum,8gi,minus,8gn,lowast,8gq,radic,8gt,prop,8gu,infin,8h0,ang,8h7,and,8h8,or,8h9,cap,8ha,cup,' + |
|
113 '8hb,int,8hk,there4,8hs,sim,8i5,cong,8i8,asymp,8j0,ne,8j1,equiv,8j4,le,8j5,ge,8k2,sub,8k3,sup,8k4,' + |
|
114 'nsub,8k6,sube,8k7,supe,8kl,oplus,8kn,otimes,8l5,perp,8m5,sdot,8o8,lceil,8o9,rceil,8oa,lfloor,8ob,' + |
|
115 'rfloor,8p9,lang,8pa,rang,9ea,loz,9j0,spades,9j3,clubs,9j5,hearts,9j6,diams,ai,OElig,aj,oelig,b0,' + |
|
116 'Scaron,b1,scaron,bo,Yuml,m6,circ,ms,tilde,802,ensp,803,emsp,809,thinsp,80c,zwnj,80d,zwj,80e,lrm,' + |
|
117 '80f,rlm,80j,ndash,80k,mdash,80o,lsquo,80p,rsquo,80q,sbquo,80s,ldquo,80t,rdquo,80u,bdquo,810,dagger,' + |
|
118 '811,Dagger,81g,permil,81p,lsaquo,81q,rsaquo,85c,euro', 32); |
|
119 |
|
120 var Entities = { |
|
121 /** |
|
122 * Encodes the specified string using raw entities. This means only the required XML base entities will be endoded. |
|
123 * |
|
124 * @method encodeRaw |
|
125 * @param {String} text Text to encode. |
|
126 * @param {Boolean} attr Optional flag to specify if the text is attribute contents. |
|
127 * @return {String} Entity encoded text. |
|
128 */ |
|
129 encodeRaw: function(text, attr) { |
|
130 return text.replace(attr ? attrsCharsRegExp : textCharsRegExp, function(chr) { |
|
131 return baseEntities[chr] || chr; |
|
132 }); |
|
133 }, |
|
134 |
|
135 /** |
|
136 * Encoded the specified text with both the attributes and text entities. This function will produce larger text contents |
|
137 * since it doesn't know if the context is within a attribute or text node. This was added for compatibility |
|
138 * and is exposed as the DOMUtils.encode function. |
|
139 * |
|
140 * @method encodeAllRaw |
|
141 * @param {String} text Text to encode. |
|
142 * @return {String} Entity encoded text. |
|
143 */ |
|
144 encodeAllRaw: function(text) { |
|
145 return ('' + text).replace(rawCharsRegExp, function(chr) { |
|
146 return baseEntities[chr] || chr; |
|
147 }); |
|
148 }, |
|
149 |
|
150 /** |
|
151 * Encodes the specified string using numeric entities. The core entities will be |
|
152 * encoded as named ones but all non lower ascii characters will be encoded into numeric entities. |
|
153 * |
|
154 * @method encodeNumeric |
|
155 * @param {String} text Text to encode. |
|
156 * @param {Boolean} attr Optional flag to specify if the text is attribute contents. |
|
157 * @return {String} Entity encoded text. |
|
158 */ |
|
159 encodeNumeric: function(text, attr) { |
|
160 return text.replace(attr ? attrsCharsRegExp : textCharsRegExp, function(chr) { |
|
161 // Multi byte sequence convert it to a single entity |
|
162 if (chr.length > 1) { |
|
163 return '&#' + (((chr.charCodeAt(0) - 0xD800) * 0x400) + (chr.charCodeAt(1) - 0xDC00) + 0x10000) + ';'; |
|
164 } |
|
165 |
|
166 return baseEntities[chr] || '&#' + chr.charCodeAt(0) + ';'; |
|
167 }); |
|
168 }, |
|
169 |
|
170 /** |
|
171 * Encodes the specified string using named entities. The core entities will be encoded |
|
172 * as named ones but all non lower ascii characters will be encoded into named entities. |
|
173 * |
|
174 * @method encodeNamed |
|
175 * @param {String} text Text to encode. |
|
176 * @param {Boolean} attr Optional flag to specify if the text is attribute contents. |
|
177 * @param {Object} entities Optional parameter with entities to use. |
|
178 * @return {String} Entity encoded text. |
|
179 */ |
|
180 encodeNamed: function(text, attr, entities) { |
|
181 entities = entities || namedEntities; |
|
182 |
|
183 return text.replace(attr ? attrsCharsRegExp : textCharsRegExp, function(chr) { |
|
184 return baseEntities[chr] || entities[chr] || chr; |
|
185 }); |
|
186 }, |
|
187 |
|
188 /** |
|
189 * Returns an encode function based on the name(s) and it's optional entities. |
|
190 * |
|
191 * @method getEncodeFunc |
|
192 * @param {String} name Comma separated list of encoders for example named,numeric. |
|
193 * @param {String} entities Optional parameter with entities to use instead of the built in set. |
|
194 * @return {function} Encode function to be used. |
|
195 */ |
|
196 getEncodeFunc: function(name, entities) { |
|
197 entities = buildEntitiesLookup(entities) || namedEntities; |
|
198 |
|
199 function encodeNamedAndNumeric(text, attr) { |
|
200 return text.replace(attr ? attrsCharsRegExp : textCharsRegExp, function(chr) { |
|
201 return baseEntities[chr] || entities[chr] || '&#' + chr.charCodeAt(0) + ';' || chr; |
|
202 }); |
|
203 } |
|
204 |
|
205 function encodeCustomNamed(text, attr) { |
|
206 return Entities.encodeNamed(text, attr, entities); |
|
207 } |
|
208 |
|
209 // Replace + with , to be compatible with previous TinyMCE versions |
|
210 name = makeMap(name.replace(/\+/g, ',')); |
|
211 |
|
212 // Named and numeric encoder |
|
213 if (name.named && name.numeric) { |
|
214 return encodeNamedAndNumeric; |
|
215 } |
|
216 |
|
217 // Named encoder |
|
218 if (name.named) { |
|
219 // Custom names |
|
220 if (entities) { |
|
221 return encodeCustomNamed; |
|
222 } |
|
223 |
|
224 return Entities.encodeNamed; |
|
225 } |
|
226 |
|
227 // Numeric |
|
228 if (name.numeric) { |
|
229 return Entities.encodeNumeric; |
|
230 } |
|
231 |
|
232 // Raw encoder |
|
233 return Entities.encodeRaw; |
|
234 }, |
|
235 |
|
236 /** |
|
237 * Decodes the specified string, this will replace entities with raw UTF characters. |
|
238 * |
|
239 * @method decode |
|
240 * @param {String} text Text to entity decode. |
|
241 * @return {String} Entity decoded string. |
|
242 */ |
|
243 decode: function(text) { |
|
244 return text.replace(entityRegExp, function(all, numeric) { |
|
245 if (numeric) { |
|
246 if (numeric.charAt(0).toLowerCase() === 'x') { |
|
247 numeric = parseInt(numeric.substr(1), 16); |
|
248 } else { |
|
249 numeric = parseInt(numeric, 10); |
|
250 } |
|
251 |
|
252 // Support upper UTF |
|
253 if (numeric > 0xFFFF) { |
|
254 numeric -= 0x10000; |
|
255 |
|
256 return String.fromCharCode(0xD800 + (numeric >> 10), 0xDC00 + (numeric & 0x3FF)); |
|
257 } else { |
|
258 return asciiMap[numeric] || String.fromCharCode(numeric); |
|
259 } |
|
260 } |
|
261 |
|
262 return reverseEntities[all] || namedEntities[all] || nativeDecode(all); |
|
263 }); |
|
264 } |
|
265 }; |
|
266 |
|
267 return Entities; |
|
268 }); |
|