diff -r 318533413200 -r a1707c607eec src/pyams_skin/resources/js/ext/tinymce/dev/plugins/paste/classes/WordFilter.js --- a/src/pyams_skin/resources/js/ext/tinymce/dev/plugins/paste/classes/WordFilter.js Sun Jul 19 02:02:20 2020 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,498 +0,0 @@ -/** - * WordFilter.js - * - * Copyright, Moxiecode Systems AB - * Released under LGPL License. - * - * License: http://www.tinymce.com/license - * Contributing: http://www.tinymce.com/contributing - */ - -/** - * This class parses word HTML into proper TinyMCE markup. - * - * @class tinymce.pasteplugin.WordFilter - * @private - */ -define("tinymce/pasteplugin/WordFilter", [ - "tinymce/util/Tools", - "tinymce/html/DomParser", - "tinymce/html/Schema", - "tinymce/html/Serializer", - "tinymce/html/Node", - "tinymce/pasteplugin/Utils" -], function(Tools, DomParser, Schema, Serializer, Node, Utils) { - /** - * Checks if the specified content is from any of the following sources: MS Word/Office 365/Google docs. - */ - function isWordContent(content) { - return ( - (/]+id="?docs-internal-[^>]*>/gi, ''); - content = content.replace(/
/gi, ''); - - retainStyleProperties = settings.paste_retain_style_properties; - if (retainStyleProperties) { - validStyles = Tools.makeMap(retainStyleProperties.split(/[, ]/)); - } - - /** - * Converts fake bullet and numbered lists to real semantic OL/UL. - * - * @param {tinymce.html.Node} node Root node to convert children of. - */ - function convertFakeListsToProperLists(node) { - var currentListNode, prevListNode, lastLevel = 1; - - function getText(node) { - var txt = ''; - - if (node.type === 3) { - return node.value; - } - - if ((node = node.firstChild)) { - do { - txt += getText(node); - } while ((node = node.next)); - } - - return txt; - } - - function trimListStart(node, regExp) { - if (node.type === 3) { - if (regExp.test(node.value)) { - node.value = node.value.replace(regExp, ''); - return false; - } - } - - if ((node = node.firstChild)) { - do { - if (!trimListStart(node, regExp)) { - return false; - } - } while ((node = node.next)); - } - - return true; - } - - function removeIgnoredNodes(node) { - if (node._listIgnore) { - node.remove(); - return; - } - - if ((node = node.firstChild)) { - do { - removeIgnoredNodes(node); - } while ((node = node.next)); - } - } - - function convertParagraphToLi(paragraphNode, listName, start) { - var level = paragraphNode._listLevel || lastLevel; - - // Handle list nesting - if (level != lastLevel) { - if (level < lastLevel) { - // Move to parent list - if (currentListNode) { - currentListNode = currentListNode.parent.parent; - } - } else { - // Create new list - prevListNode = currentListNode; - currentListNode = null; - } - } - - if (!currentListNode || currentListNode.name != listName) { - prevListNode = prevListNode || currentListNode; - currentListNode = new Node(listName, 1); - - if (start > 1) { - currentListNode.attr('start', '' + start); - } - - paragraphNode.wrap(currentListNode); - } else { - currentListNode.append(paragraphNode); - } - - paragraphNode.name = 'li'; - - // Append list to previous list if it exists - if (level > lastLevel && prevListNode) { - prevListNode.lastChild.append(currentListNode); - } - - lastLevel = level; - - // Remove start of list item "1. " or "· " etc - removeIgnoredNodes(paragraphNode); - trimListStart(paragraphNode, /^\u00a0+/); - trimListStart(paragraphNode, /^\s*([\u2022\u00b7\u00a7\u25CF]|\w+\.)/); - trimListStart(paragraphNode, /^\u00a0+/); - } - - // Build a list of all root level elements before we start - // altering them in the loop below. - var elements = [], child = node.firstChild; - while (typeof child !== 'undefined' && child !== null) { - elements.push(child); - - child = child.walk(); - if (child !== null) { - while (typeof child !== 'undefined' && child.parent !== node) { - child = child.walk(); - } - } - } - - for (var i = 0; i < elements.length; i++) { - node = elements[i]; - - if (node.name == 'p' && node.firstChild) { - // Find first text node in paragraph - var nodeText = getText(node); - - // Detect unordered lists look for bullets - if (isBulletList(nodeText)) { - convertParagraphToLi(node, 'ul'); - continue; - } - - // Detect ordered lists 1., a. or ixv. - if (isNumericList(nodeText)) { - // Parse OL start number - var matches = /([0-9]+)\./.exec(nodeText); - var start = 1; - if (matches) { - start = parseInt(matches[1], 10); - } - - convertParagraphToLi(node, 'ol', start); - continue; - } - - // Convert paragraphs marked as lists but doesn't look like anything - if (node._listLevel) { - convertParagraphToLi(node, 'ul', 1); - continue; - } - - currentListNode = null; - } else { - // If the root level element isn't a p tag which can be - // processed by convertParagraphToLi, it interrupts the - // lists, causing a new list to start instead of having - // elements from the next list inserted above this tag. - prevListNode = currentListNode; - currentListNode = null; - } - } - } - - function filterStyles(node, styleValue) { - var outputStyles = {}, matches, styles = editor.dom.parseStyle(styleValue); - - Tools.each(styles, function(value, name) { - // Convert various MS styles to W3C styles - switch (name) { - case 'mso-list': - // Parse out list indent level for lists - matches = /\w+ \w+([0-9]+)/i.exec(styleValue); - if (matches) { - node._listLevel = parseInt(matches[1], 10); - } - - // Remove these nodes o - // Since the span gets removed we mark the text node and the span - if (/Ignore/i.test(value) && node.firstChild) { - node._listIgnore = true; - node.firstChild._listIgnore = true; - } - - break; - - case "horiz-align": - name = "text-align"; - break; - - case "vert-align": - name = "vertical-align"; - break; - - case "font-color": - case "mso-foreground": - name = "color"; - break; - - case "mso-background": - case "mso-highlight": - name = "background"; - break; - - case "font-weight": - case "font-style": - if (value != "normal") { - outputStyles[name] = value; - } - return; - - case "mso-element": - // Remove track changes code - if (/^(comment|comment-list)$/i.test(value)) { - node.remove(); - return; - } - - break; - } - - if (name.indexOf('mso-comment') === 0) { - node.remove(); - return; - } - - // Never allow mso- prefixed names - if (name.indexOf('mso-') === 0) { - return; - } - - // Output only valid styles - if (retainStyleProperties == "all" || (validStyles && validStyles[name])) { - outputStyles[name] = value; - } - }); - - // Convert bold style to "b" element - if (/(bold)/i.test(outputStyles["font-weight"])) { - delete outputStyles["font-weight"]; - node.wrap(new Node("b", 1)); - } - - // Convert italic style to "i" element - if (/(italic)/i.test(outputStyles["font-style"])) { - delete outputStyles["font-style"]; - node.wrap(new Node("i", 1)); - } - - // Serialize the styles and see if there is something left to keep - outputStyles = editor.dom.serializeStyle(outputStyles, node.name); - if (outputStyles) { - return outputStyles; - } - - return null; - } - - if (settings.paste_enable_default_filters === false) { - return; - } - - // Detect is the contents is Word junk HTML - if (isWordContent(e.content)) { - e.wordContent = true; // Mark it for other processors - - // Remove basic Word junk - content = Utils.filter(content, [ - // Word comments like conditional comments etc - //gi, - - // Remove comments, scripts (e.g., msoShowComment), XML tag, VML content, - // MS Office namespaced tags, and a few other tags - /<(!|script[^>]*>.*?<\/script(?=[>\s])|\/?(\?xml(:\w+)?|img|meta|link|style|\w:\w+)(?=[\s\/>]))[^>]*>/gi, - - // Convert into for line-though - [/<(\/?)s>/gi, "<$1strike>"], - - // Replace nsbp entites to char since it's easier to handle - [/ /gi, "\u00a0"], - - // Convert ___ to string of alternating - // breaking/non-breaking spaces of same length - [/([\s\u00a0]*)<\/span>/gi, - function(str, spaces) { - return (spaces.length > 0) ? - spaces.replace(/./, " ").slice(Math.floor(spaces.length / 2)).split("").join("\u00a0") : ""; - } - ] - ]); - - var validElements = settings.paste_word_valid_elements; - if (!validElements) { - validElements = ( - '-strong/b,-em/i,-u,-span,-p,-ol,-ul,-li,-h1,-h2,-h3,-h4,-h5,-h6,' + - '-p/div,-a[href|name],sub,sup,strike,br,del,table[width],tr,' + - 'td[colspan|rowspan|width],th[colspan|rowspan|width],thead,tfoot,tbody' - ); - } - - // Setup strict schema - var schema = new Schema({ - valid_elements: validElements, - valid_children: '-li[p]' - }); - - // Add style/class attribute to all element rules since the user might have removed them from - // paste_word_valid_elements config option and we need to check them for properties - Tools.each(schema.elements, function(rule) { - /*eslint dot-notation:0*/ - if (!rule.attributes["class"]) { - rule.attributes["class"] = {}; - rule.attributesOrder.push("class"); - } - - if (!rule.attributes.style) { - rule.attributes.style = {}; - rule.attributesOrder.push("style"); - } - }); - - // Parse HTML into DOM structure - var domParser = new DomParser({}, schema); - - // Filter styles to remove "mso" specific styles and convert some of them - domParser.addAttributeFilter('style', function(nodes) { - var i = nodes.length, node; - - while (i--) { - node = nodes[i]; - node.attr('style', filterStyles(node, node.attr('style'))); - - // Remove pointess spans - if (node.name == 'span' && node.parent && !node.attributes.length) { - node.unwrap(); - } - } - }); - - // Check the class attribute for comments or del items and remove those - domParser.addAttributeFilter('class', function(nodes) { - var i = nodes.length, node, className; - - while (i--) { - node = nodes[i]; - - className = node.attr('class'); - if (/^(MsoCommentReference|MsoCommentText|msoDel)$/i.test(className)) { - node.remove(); - } - - node.attr('class', null); - } - }); - - // Remove all del elements since we don't want the track changes code in the editor - domParser.addNodeFilter('del', function(nodes) { - var i = nodes.length; - - while (i--) { - nodes[i].remove(); - } - }); - - // Keep some of the links and anchors - domParser.addNodeFilter('a', function(nodes) { - var i = nodes.length, node, href, name; - - while (i--) { - node = nodes[i]; - href = node.attr('href'); - name = node.attr('name'); - - if (href && href.indexOf('#_msocom_') != -1) { - node.remove(); - continue; - } - - if (href && href.indexOf('file://') === 0) { - href = href.split('#')[1]; - if (href) { - href = '#' + href; - } - } - - if (!href && !name) { - node.unwrap(); - } else { - // Remove all named anchors that aren't specific to TOC, Footnotes or Endnotes - if (name && !/^_?(?:toc|edn|ftn)/i.test(name)) { - node.unwrap(); - continue; - } - - node.attr({ - href: href, - name: name - }); - } - } - }); - - // Parse into DOM structure - var rootNode = domParser.parse(content); - - // Process DOM - if (settings.paste_convert_word_fake_lists !== false) { - convertFakeListsToProperLists(rootNode); - } - - // Serialize DOM back to HTML - e.content = new Serializer({}, schema).serialize(rootNode); - } - }); - } - - WordFilter.isWordContent = isWordContent; - - return WordFilter; -});