--- a/src/pyams_skin/resources/js/ext/tinymce/dev/plugins/paste/classes/WordFilter.js Sun Jul 19 02:02:20 2020 +0200
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,498 +0,0 @@
-/**
- * WordFilter.js
- *
- * Copyright, Moxiecode Systems AB
- * Released under LGPL License.
- *
- * License: http://www.tinymce.com/license
- * Contributing: http://www.tinymce.com/contributing
- */
-
-/**
- * This class parses word HTML into proper TinyMCE markup.
- *
- * @class tinymce.pasteplugin.WordFilter
- * @private
- */
-define("tinymce/pasteplugin/WordFilter", [
- "tinymce/util/Tools",
- "tinymce/html/DomParser",
- "tinymce/html/Schema",
- "tinymce/html/Serializer",
- "tinymce/html/Node",
- "tinymce/pasteplugin/Utils"
-], function(Tools, DomParser, Schema, Serializer, Node, Utils) {
- /**
- * Checks if the specified content is from any of the following sources: MS Word/Office 365/Google docs.
- */
- function isWordContent(content) {
- return (
- (/<font face="Times New Roman"|class="?Mso|style="[^"]*\bmso-|style='[^'']*\bmso-|w:WordDocument/i).test(content) ||
- (/class="OutlineElement/).test(content) ||
- (/id="?docs\-internal\-guid\-/.test(content))
- );
- }
-
- /**
- * Checks if the specified text starts with "1. " or "a. " etc.
- */
- function isNumericList(text) {
- var found, patterns;
-
- patterns = [
- /^[IVXLMCD]{1,2}\.[ \u00a0]/, // Roman upper case
- /^[ivxlmcd]{1,2}\.[ \u00a0]/, // Roman lower case
- /^[a-z]{1,2}[\.\)][ \u00a0]/, // Alphabetical a-z
- /^[A-Z]{1,2}[\.\)][ \u00a0]/, // Alphabetical A-Z
- /^[0-9]+\.[ \u00a0]/, // Numeric lists
- /^[\u3007\u4e00\u4e8c\u4e09\u56db\u4e94\u516d\u4e03\u516b\u4e5d]+\.[ \u00a0]/, // Japanese
- /^[\u58f1\u5f10\u53c2\u56db\u4f0d\u516d\u4e03\u516b\u4e5d\u62fe]+\.[ \u00a0]/ // Chinese
- ];
-
- text = text.replace(/^[\u00a0 ]+/, '');
-
- Tools.each(patterns, function(pattern) {
- if (pattern.test(text)) {
- found = true;
- return false;
- }
- });
-
- return found;
- }
-
- function isBulletList(text) {
- return /^[\s\u00a0]*[\u2022\u00b7\u00a7\u25CF]\s*/.test(text);
- }
-
- function WordFilter(editor) {
- var settings = editor.settings;
-
- editor.on('BeforePastePreProcess', function(e) {
- var content = e.content, retainStyleProperties, validStyles;
-
- // Remove google docs internal guid markers
- content = content.replace(/<b[^>]+id="?docs-internal-[^>]*>/gi, '');
- content = content.replace(/<br class="?Apple-interchange-newline"?>/gi, '');
-
- retainStyleProperties = settings.paste_retain_style_properties;
- if (retainStyleProperties) {
- validStyles = Tools.makeMap(retainStyleProperties.split(/[, ]/));
- }
-
- /**
- * Converts fake bullet and numbered lists to real semantic OL/UL.
- *
- * @param {tinymce.html.Node} node Root node to convert children of.
- */
- function convertFakeListsToProperLists(node) {
- var currentListNode, prevListNode, lastLevel = 1;
-
- function getText(node) {
- var txt = '';
-
- if (node.type === 3) {
- return node.value;
- }
-
- if ((node = node.firstChild)) {
- do {
- txt += getText(node);
- } while ((node = node.next));
- }
-
- return txt;
- }
-
- function trimListStart(node, regExp) {
- if (node.type === 3) {
- if (regExp.test(node.value)) {
- node.value = node.value.replace(regExp, '');
- return false;
- }
- }
-
- if ((node = node.firstChild)) {
- do {
- if (!trimListStart(node, regExp)) {
- return false;
- }
- } while ((node = node.next));
- }
-
- return true;
- }
-
- function removeIgnoredNodes(node) {
- if (node._listIgnore) {
- node.remove();
- return;
- }
-
- if ((node = node.firstChild)) {
- do {
- removeIgnoredNodes(node);
- } while ((node = node.next));
- }
- }
-
- function convertParagraphToLi(paragraphNode, listName, start) {
- var level = paragraphNode._listLevel || lastLevel;
-
- // Handle list nesting
- if (level != lastLevel) {
- if (level < lastLevel) {
- // Move to parent list
- if (currentListNode) {
- currentListNode = currentListNode.parent.parent;
- }
- } else {
- // Create new list
- prevListNode = currentListNode;
- currentListNode = null;
- }
- }
-
- if (!currentListNode || currentListNode.name != listName) {
- prevListNode = prevListNode || currentListNode;
- currentListNode = new Node(listName, 1);
-
- if (start > 1) {
- currentListNode.attr('start', '' + start);
- }
-
- paragraphNode.wrap(currentListNode);
- } else {
- currentListNode.append(paragraphNode);
- }
-
- paragraphNode.name = 'li';
-
- // Append list to previous list if it exists
- if (level > lastLevel && prevListNode) {
- prevListNode.lastChild.append(currentListNode);
- }
-
- lastLevel = level;
-
- // Remove start of list item "1. " or "· " etc
- removeIgnoredNodes(paragraphNode);
- trimListStart(paragraphNode, /^\u00a0+/);
- trimListStart(paragraphNode, /^\s*([\u2022\u00b7\u00a7\u25CF]|\w+\.)/);
- trimListStart(paragraphNode, /^\u00a0+/);
- }
-
- // Build a list of all root level elements before we start
- // altering them in the loop below.
- var elements = [], child = node.firstChild;
- while (typeof child !== 'undefined' && child !== null) {
- elements.push(child);
-
- child = child.walk();
- if (child !== null) {
- while (typeof child !== 'undefined' && child.parent !== node) {
- child = child.walk();
- }
- }
- }
-
- for (var i = 0; i < elements.length; i++) {
- node = elements[i];
-
- if (node.name == 'p' && node.firstChild) {
- // Find first text node in paragraph
- var nodeText = getText(node);
-
- // Detect unordered lists look for bullets
- if (isBulletList(nodeText)) {
- convertParagraphToLi(node, 'ul');
- continue;
- }
-
- // Detect ordered lists 1., a. or ixv.
- if (isNumericList(nodeText)) {
- // Parse OL start number
- var matches = /([0-9]+)\./.exec(nodeText);
- var start = 1;
- if (matches) {
- start = parseInt(matches[1], 10);
- }
-
- convertParagraphToLi(node, 'ol', start);
- continue;
- }
-
- // Convert paragraphs marked as lists but doesn't look like anything
- if (node._listLevel) {
- convertParagraphToLi(node, 'ul', 1);
- continue;
- }
-
- currentListNode = null;
- } else {
- // If the root level element isn't a p tag which can be
- // processed by convertParagraphToLi, it interrupts the
- // lists, causing a new list to start instead of having
- // elements from the next list inserted above this tag.
- prevListNode = currentListNode;
- currentListNode = null;
- }
- }
- }
-
- function filterStyles(node, styleValue) {
- var outputStyles = {}, matches, styles = editor.dom.parseStyle(styleValue);
-
- Tools.each(styles, function(value, name) {
- // Convert various MS styles to W3C styles
- switch (name) {
- case 'mso-list':
- // Parse out list indent level for lists
- matches = /\w+ \w+([0-9]+)/i.exec(styleValue);
- if (matches) {
- node._listLevel = parseInt(matches[1], 10);
- }
-
- // Remove these nodes <span style="mso-list:Ignore">o</span>
- // Since the span gets removed we mark the text node and the span
- if (/Ignore/i.test(value) && node.firstChild) {
- node._listIgnore = true;
- node.firstChild._listIgnore = true;
- }
-
- break;
-
- case "horiz-align":
- name = "text-align";
- break;
-
- case "vert-align":
- name = "vertical-align";
- break;
-
- case "font-color":
- case "mso-foreground":
- name = "color";
- break;
-
- case "mso-background":
- case "mso-highlight":
- name = "background";
- break;
-
- case "font-weight":
- case "font-style":
- if (value != "normal") {
- outputStyles[name] = value;
- }
- return;
-
- case "mso-element":
- // Remove track changes code
- if (/^(comment|comment-list)$/i.test(value)) {
- node.remove();
- return;
- }
-
- break;
- }
-
- if (name.indexOf('mso-comment') === 0) {
- node.remove();
- return;
- }
-
- // Never allow mso- prefixed names
- if (name.indexOf('mso-') === 0) {
- return;
- }
-
- // Output only valid styles
- if (retainStyleProperties == "all" || (validStyles && validStyles[name])) {
- outputStyles[name] = value;
- }
- });
-
- // Convert bold style to "b" element
- if (/(bold)/i.test(outputStyles["font-weight"])) {
- delete outputStyles["font-weight"];
- node.wrap(new Node("b", 1));
- }
-
- // Convert italic style to "i" element
- if (/(italic)/i.test(outputStyles["font-style"])) {
- delete outputStyles["font-style"];
- node.wrap(new Node("i", 1));
- }
-
- // Serialize the styles and see if there is something left to keep
- outputStyles = editor.dom.serializeStyle(outputStyles, node.name);
- if (outputStyles) {
- return outputStyles;
- }
-
- return null;
- }
-
- if (settings.paste_enable_default_filters === false) {
- return;
- }
-
- // Detect is the contents is Word junk HTML
- if (isWordContent(e.content)) {
- e.wordContent = true; // Mark it for other processors
-
- // Remove basic Word junk
- content = Utils.filter(content, [
- // Word comments like conditional comments etc
- /<!--[\s\S]+?-->/gi,
-
- // Remove comments, scripts (e.g., msoShowComment), XML tag, VML content,
- // MS Office namespaced tags, and a few other tags
- /<(!|script[^>]*>.*?<\/script(?=[>\s])|\/?(\?xml(:\w+)?|img|meta|link|style|\w:\w+)(?=[\s\/>]))[^>]*>/gi,
-
- // Convert <s> into <strike> for line-though
- [/<(\/?)s>/gi, "<$1strike>"],
-
- // Replace nsbp entites to char since it's easier to handle
- [/ /gi, "\u00a0"],
-
- // Convert <span style="mso-spacerun:yes">___</span> to string of alternating
- // breaking/non-breaking spaces of same length
- [/<span\s+style\s*=\s*"\s*mso-spacerun\s*:\s*yes\s*;?\s*"\s*>([\s\u00a0]*)<\/span>/gi,
- function(str, spaces) {
- return (spaces.length > 0) ?
- spaces.replace(/./, " ").slice(Math.floor(spaces.length / 2)).split("").join("\u00a0") : "";
- }
- ]
- ]);
-
- var validElements = settings.paste_word_valid_elements;
- if (!validElements) {
- validElements = (
- '-strong/b,-em/i,-u,-span,-p,-ol,-ul,-li,-h1,-h2,-h3,-h4,-h5,-h6,' +
- '-p/div,-a[href|name],sub,sup,strike,br,del,table[width],tr,' +
- 'td[colspan|rowspan|width],th[colspan|rowspan|width],thead,tfoot,tbody'
- );
- }
-
- // Setup strict schema
- var schema = new Schema({
- valid_elements: validElements,
- valid_children: '-li[p]'
- });
-
- // Add style/class attribute to all element rules since the user might have removed them from
- // paste_word_valid_elements config option and we need to check them for properties
- Tools.each(schema.elements, function(rule) {
- /*eslint dot-notation:0*/
- if (!rule.attributes["class"]) {
- rule.attributes["class"] = {};
- rule.attributesOrder.push("class");
- }
-
- if (!rule.attributes.style) {
- rule.attributes.style = {};
- rule.attributesOrder.push("style");
- }
- });
-
- // Parse HTML into DOM structure
- var domParser = new DomParser({}, schema);
-
- // Filter styles to remove "mso" specific styles and convert some of them
- domParser.addAttributeFilter('style', function(nodes) {
- var i = nodes.length, node;
-
- while (i--) {
- node = nodes[i];
- node.attr('style', filterStyles(node, node.attr('style')));
-
- // Remove pointess spans
- if (node.name == 'span' && node.parent && !node.attributes.length) {
- node.unwrap();
- }
- }
- });
-
- // Check the class attribute for comments or del items and remove those
- domParser.addAttributeFilter('class', function(nodes) {
- var i = nodes.length, node, className;
-
- while (i--) {
- node = nodes[i];
-
- className = node.attr('class');
- if (/^(MsoCommentReference|MsoCommentText|msoDel)$/i.test(className)) {
- node.remove();
- }
-
- node.attr('class', null);
- }
- });
-
- // Remove all del elements since we don't want the track changes code in the editor
- domParser.addNodeFilter('del', function(nodes) {
- var i = nodes.length;
-
- while (i--) {
- nodes[i].remove();
- }
- });
-
- // Keep some of the links and anchors
- domParser.addNodeFilter('a', function(nodes) {
- var i = nodes.length, node, href, name;
-
- while (i--) {
- node = nodes[i];
- href = node.attr('href');
- name = node.attr('name');
-
- if (href && href.indexOf('#_msocom_') != -1) {
- node.remove();
- continue;
- }
-
- if (href && href.indexOf('file://') === 0) {
- href = href.split('#')[1];
- if (href) {
- href = '#' + href;
- }
- }
-
- if (!href && !name) {
- node.unwrap();
- } else {
- // Remove all named anchors that aren't specific to TOC, Footnotes or Endnotes
- if (name && !/^_?(?:toc|edn|ftn)/i.test(name)) {
- node.unwrap();
- continue;
- }
-
- node.attr({
- href: href,
- name: name
- });
- }
- }
- });
-
- // Parse into DOM structure
- var rootNode = domParser.parse(content);
-
- // Process DOM
- if (settings.paste_convert_word_fake_lists !== false) {
- convertFakeListsToProperLists(rootNode);
- }
-
- // Serialize DOM back to HTML
- e.content = new Serializer({}, schema).serialize(rootNode);
- }
- });
- }
-
- WordFilter.isWordContent = isWordContent;
-
- return WordFilter;
-});