changeset 566 a1707c607eec
parent 565 318533413200
child 567 bca1726b1d85
--- a/src/pyams_skin/resources/js/ext/tinymce/dev/plugins/paste/classes/WordFilter.js	Sun Jul 19 02:02:20 2020 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,498 +0,0 @@
- * WordFilter.js
- *
- * Copyright, Moxiecode Systems AB
- * Released under LGPL License.
- *
- * License:
- * Contributing:
- */
- * This class parses word HTML into proper TinyMCE markup.
- *
- * @class tinymce.pasteplugin.WordFilter
- * @private
- */
-define("tinymce/pasteplugin/WordFilter", [
-	"tinymce/util/Tools",
-	"tinymce/html/DomParser",
-	"tinymce/html/Schema",
-	"tinymce/html/Serializer",
-	"tinymce/html/Node",
-	"tinymce/pasteplugin/Utils"
-], function(Tools, DomParser, Schema, Serializer, Node, Utils) {
-	/**
-	 * Checks if the specified content is from any of the following sources: MS Word/Office 365/Google docs.
-	 */
-	function isWordContent(content) {
-		return (
-			(/<font face="Times New Roman"|class="?Mso|style="[^"]*\bmso-|style='[^'']*\bmso-|w:WordDocument/i).test(content) ||
-			(/class="OutlineElement/).test(content) ||
-			(/id="?docs\-internal\-guid\-/.test(content))
-		);
-	}
-	/**
-	 * Checks if the specified text starts with "1. " or "a. " etc.
-	 */
-	function isNumericList(text) {
-		var found, patterns;
-		patterns = [
-			/^[IVXLMCD]{1,2}\.[ \u00a0]/,  // Roman upper case
-			/^[ivxlmcd]{1,2}\.[ \u00a0]/,  // Roman lower case
-			/^[a-z]{1,2}[\.\)][ \u00a0]/,  // Alphabetical a-z
-			/^[A-Z]{1,2}[\.\)][ \u00a0]/,  // Alphabetical A-Z
-			/^[0-9]+\.[ \u00a0]/,          // Numeric lists
-			/^[\u3007\u4e00\u4e8c\u4e09\u56db\u4e94\u516d\u4e03\u516b\u4e5d]+\.[ \u00a0]/, // Japanese
-			/^[\u58f1\u5f10\u53c2\u56db\u4f0d\u516d\u4e03\u516b\u4e5d\u62fe]+\.[ \u00a0]/  // Chinese
-		];
-		text = text.replace(/^[\u00a0 ]+/, '');
-		Tools.each(patterns, function(pattern) {
-			if (pattern.test(text)) {
-				found = true;
-				return false;
-			}
-		});
-		return found;
-	}
-	function isBulletList(text) {
-		return /^[\s\u00a0]*[\u2022\u00b7\u00a7\u25CF]\s*/.test(text);
-	}
-	function WordFilter(editor) {
-		var settings = editor.settings;
-		editor.on('BeforePastePreProcess', function(e) {
-			var content = e.content, retainStyleProperties, validStyles;
-			// Remove google docs internal guid markers
-			content = content.replace(/<b[^>]+id="?docs-internal-[^>]*>/gi, '');
-			content = content.replace(/<br class="?Apple-interchange-newline"?>/gi, '');
-			retainStyleProperties = settings.paste_retain_style_properties;
-			if (retainStyleProperties) {
-				validStyles = Tools.makeMap(retainStyleProperties.split(/[, ]/));
-			}
-			/**
-			 * Converts fake bullet and numbered lists to real semantic OL/UL.
-			 *
-			 * @param {tinymce.html.Node} node Root node to convert children of.
-			 */
-			function convertFakeListsToProperLists(node) {
-				var currentListNode, prevListNode, lastLevel = 1;
-				function getText(node) {
-					var txt = '';
-					if (node.type === 3) {
-						return node.value;
-					}
-					if ((node = node.firstChild)) {
-						do {
-							txt += getText(node);
-						} while ((node =;
-					}
-					return txt;
-				}
-				function trimListStart(node, regExp) {
-					if (node.type === 3) {
-						if (regExp.test(node.value)) {
-							node.value = node.value.replace(regExp, '');
-							return false;
-						}
-					}
-					if ((node = node.firstChild)) {
-						do {
-							if (!trimListStart(node, regExp)) {
-								return false;
-							}
-						} while ((node =;
-					}
-					return true;
-				}
-				function removeIgnoredNodes(node) {
-					if (node._listIgnore) {
-						node.remove();
-						return;
-					}
-					if ((node = node.firstChild)) {
-						do {
-							removeIgnoredNodes(node);
-						} while ((node =;
-					}
-				}
-				function convertParagraphToLi(paragraphNode, listName, start) {
-					var level = paragraphNode._listLevel || lastLevel;
-					// Handle list nesting
-					if (level != lastLevel) {
-						if (level < lastLevel) {
-							// Move to parent list
-							if (currentListNode) {
-								currentListNode = currentListNode.parent.parent;
-							}
-						} else {
-							// Create new list
-							prevListNode = currentListNode;
-							currentListNode = null;
-						}
-					}
-					if (!currentListNode || != listName) {
-						prevListNode = prevListNode || currentListNode;
-						currentListNode = new Node(listName, 1);
-						if (start > 1) {
-							currentListNode.attr('start', '' + start);
-						}
-						paragraphNode.wrap(currentListNode);
-					} else {
-						currentListNode.append(paragraphNode);
-					}
- = 'li';
-					// Append list to previous list if it exists
-					if (level > lastLevel && prevListNode) {
-						prevListNode.lastChild.append(currentListNode);
-					}
-					lastLevel = level;
-					// Remove start of list item "1. " or "&middot; " etc
-					removeIgnoredNodes(paragraphNode);
-					trimListStart(paragraphNode, /^\u00a0+/);
-					trimListStart(paragraphNode, /^\s*([\u2022\u00b7\u00a7\u25CF]|\w+\.)/);
-					trimListStart(paragraphNode, /^\u00a0+/);
-				}
-				// Build a list of all root level elements before we start
-				// altering them in the loop below.
-				var elements = [], child = node.firstChild;
-				while (typeof child !== 'undefined' && child !== null) {
-					elements.push(child);
-					child = child.walk();
-					if (child !== null) {
-						while (typeof child !== 'undefined' && child.parent !== node) {
-							child = child.walk();
-						}
-					}
-				}
-				for (var i = 0; i < elements.length; i++) {
-					node = elements[i];
-					if ( == 'p' && node.firstChild) {
-						// Find first text node in paragraph
-						var nodeText = getText(node);
-						// Detect unordered lists look for bullets
-						if (isBulletList(nodeText)) {
-							convertParagraphToLi(node, 'ul');
-							continue;
-						}
-						// Detect ordered lists 1., a. or ixv.
-						if (isNumericList(nodeText)) {
-							// Parse OL start number
-							var matches = /([0-9]+)\./.exec(nodeText);
-							var start = 1;
-							if (matches) {
-								start = parseInt(matches[1], 10);
-							}
-							convertParagraphToLi(node, 'ol', start);
-							continue;
-						}
-						// Convert paragraphs marked as lists but doesn't look like anything
-						if (node._listLevel) {
-							convertParagraphToLi(node, 'ul', 1);
-							continue;
-						}
-						currentListNode = null;
-					} else {
-						// If the root level element isn't a p tag which can be
-						// processed by convertParagraphToLi, it interrupts the
-						// lists, causing a new list to start instead of having
-						// elements from the next list inserted above this tag.
-						prevListNode = currentListNode;
-						currentListNode = null;
-					}
-				}
-			}
-			function filterStyles(node, styleValue) {
-				var outputStyles = {}, matches, styles = editor.dom.parseStyle(styleValue);
-				Tools.each(styles, function(value, name) {
-					// Convert various MS styles to W3C styles
-					switch (name) {
-						case 'mso-list':
-							// Parse out list indent level for lists
-							matches = /\w+ \w+([0-9]+)/i.exec(styleValue);
-							if (matches) {
-								node._listLevel = parseInt(matches[1], 10);
-							}
-							// Remove these nodes <span style="mso-list:Ignore">o</span>
-							// Since the span gets removed we mark the text node and the span
-							if (/Ignore/i.test(value) && node.firstChild) {
-								node._listIgnore = true;
-								node.firstChild._listIgnore = true;
-							}
-							break;
-						case "horiz-align":
-							name = "text-align";
-							break;
-						case "vert-align":
-							name = "vertical-align";
-							break;
-						case "font-color":
-						case "mso-foreground":
-							name = "color";
-							break;
-						case "mso-background":
-						case "mso-highlight":
-							name = "background";
-							break;
-						case "font-weight":
-						case "font-style":
-							if (value != "normal") {
-								outputStyles[name] = value;
-							}
-							return;
-						case "mso-element":
-							// Remove track changes code
-							if (/^(comment|comment-list)$/i.test(value)) {
-								node.remove();
-								return;
-							}
-							break;
-					}
-					if (name.indexOf('mso-comment') === 0) {
-						node.remove();
-						return;
-					}
-					// Never allow mso- prefixed names
-					if (name.indexOf('mso-') === 0) {
-						return;
-					}
-					// Output only valid styles
-					if (retainStyleProperties == "all" || (validStyles && validStyles[name])) {
-						outputStyles[name] = value;
-					}
-				});
-				// Convert bold style to "b" element
-				if (/(bold)/i.test(outputStyles["font-weight"])) {
-					delete outputStyles["font-weight"];
-					node.wrap(new Node("b", 1));
-				}
-				// Convert italic style to "i" element
-				if (/(italic)/i.test(outputStyles["font-style"])) {
-					delete outputStyles["font-style"];
-					node.wrap(new Node("i", 1));
-				}
-				// Serialize the styles and see if there is something left to keep
-				outputStyles = editor.dom.serializeStyle(outputStyles,;
-				if (outputStyles) {
-					return outputStyles;
-				}
-				return null;
-			}
-			if (settings.paste_enable_default_filters === false) {
-				return;
-			}
-			// Detect is the contents is Word junk HTML
-			if (isWordContent(e.content)) {
-				e.wordContent = true; // Mark it for other processors
-				// Remove basic Word junk
-				content = Utils.filter(content, [
-					// Word comments like conditional comments etc
-					/<!--[\s\S]+?-->/gi,
-					// Remove comments, scripts (e.g., msoShowComment), XML tag, VML content,
-					// MS Office namespaced tags, and a few other tags
-					/<(!|script[^>]*>.*?<\/script(?=[>\s])|\/?(\?xml(:\w+)?|img|meta|link|style|\w:\w+)(?=[\s\/>]))[^>]*>/gi,
-					// Convert <s> into <strike> for line-though
-					[/<(\/?)s>/gi, "<$1strike>"],
-					// Replace nsbp entites to char since it's easier to handle
-					[/&nbsp;/gi, "\u00a0"],
-					// Convert <span style="mso-spacerun:yes">___</span> to string of alternating
-					// breaking/non-breaking spaces of same length
-					[/<span\s+style\s*=\s*"\s*mso-spacerun\s*:\s*yes\s*;?\s*"\s*>([\s\u00a0]*)<\/span>/gi,
-						function(str, spaces) {
-							return (spaces.length > 0) ?
-								spaces.replace(/./, " ").slice(Math.floor(spaces.length / 2)).split("").join("\u00a0") : "";
-						}
-					]
-				]);
-				var validElements = settings.paste_word_valid_elements;
-				if (!validElements) {
-					validElements = (
-						'-strong/b,-em/i,-u,-span,-p,-ol,-ul,-li,-h1,-h2,-h3,-h4,-h5,-h6,' +
-						'-p/div,-a[href|name],sub,sup,strike,br,del,table[width],tr,' +
-						'td[colspan|rowspan|width],th[colspan|rowspan|width],thead,tfoot,tbody'
-					);
-				}
-				// Setup strict schema
-				var schema = new Schema({
-					valid_elements: validElements,
-					valid_children: '-li[p]'
-				});
-				// Add style/class attribute to all element rules since the user might have removed them from
-				// paste_word_valid_elements config option and we need to check them for properties
-				Tools.each(schema.elements, function(rule) {
-					/*eslint dot-notation:0*/
-					if (!rule.attributes["class"]) {
-						rule.attributes["class"] = {};
-						rule.attributesOrder.push("class");
-					}
-					if (! {
- = {};
-						rule.attributesOrder.push("style");
-					}
-				});
-				// Parse HTML into DOM structure
-				var domParser = new DomParser({}, schema);
-				// Filter styles to remove "mso" specific styles and convert some of them
-				domParser.addAttributeFilter('style', function(nodes) {
-					var i = nodes.length, node;
-					while (i--) {
-						node = nodes[i];
-						node.attr('style', filterStyles(node, node.attr('style')));
-						// Remove pointess spans
-						if ( == 'span' && node.parent && !node.attributes.length) {
-							node.unwrap();
-						}
-					}
-				});
-				// Check the class attribute for comments or del items and remove those
-				domParser.addAttributeFilter('class', function(nodes) {
-					var i = nodes.length, node, className;
-					while (i--) {
-						node = nodes[i];
-						className = node.attr('class');
-						if (/^(MsoCommentReference|MsoCommentText|msoDel)$/i.test(className)) {
-							node.remove();
-						}
-						node.attr('class', null);
-					}
-				});
-				// Remove all del elements since we don't want the track changes code in the editor
-				domParser.addNodeFilter('del', function(nodes) {
-					var i = nodes.length;
-					while (i--) {
-						nodes[i].remove();
-					}
-				});
-				// Keep some of the links and anchors
-				domParser.addNodeFilter('a', function(nodes) {
-					var i = nodes.length, node, href, name;
-					while (i--) {
-						node = nodes[i];
-						href = node.attr('href');
-						name = node.attr('name');
-						if (href && href.indexOf('#_msocom_') != -1) {
-							node.remove();
-							continue;
-						}
-						if (href && href.indexOf('file://') === 0) {
-							href = href.split('#')[1];
-							if (href) {
-								href = '#' + href;
-							}
-						}
-						if (!href && !name) {
-							node.unwrap();
-						} else {
-							// Remove all named anchors that aren't specific to TOC, Footnotes or Endnotes
-							if (name && !/^_?(?:toc|edn|ftn)/i.test(name)) {
-								node.unwrap();
-								continue;
-							}
-							node.attr({
-								href: href,
-								name: name
-							});
-						}
-					}
-				});
-				// Parse into DOM structure
-				var rootNode = domParser.parse(content);
-				// Process DOM
-				if (settings.paste_convert_word_fake_lists !== false) {
-					convertFakeListsToProperLists(rootNode);
-				}
-				// Serialize DOM back to HTML
-				e.content = new Serializer({}, schema).serialize(rootNode);
-			}
-		});
-	}
-	WordFilter.isWordContent = isWordContent;
-	return WordFilter;