Open steffimueller opened 9 years ago
+1 @pulges please could you give a descriptino on that or some wiki would help.
+1 @pulges
How can we remove this "comment" part ?
Moreover, I always got strange characters at the end of the text
Between "" and the strange characters, all the content seems perfect .
Thanks for your help
I don't think this works as expected. @pulges thinks that there are some issues here. @hmillet, do you have "comments: 1" set in your current rules config?. You can remove that config to get rid of all the comments in your copy paste.
No I don't.
Here's my config file :
/**
* https://github.com/Voog/wysihtml/wiki/Supported-Commands
*/
window.wysihtml5ParserRules = {
classes: {
"wysiwyg-text-align-left": 1,
"wysiwyg-text-align-center": 1,
"wysiwyg-text-align-right": 1,
"wysiwyg-text-align-justify": 1,
},
classes_blacklist: {
"Apple-interchange-newline": 1,
"MsoNormal": 1,
"MsoPlainText": 1
},
tags: {
strong: { rename_tag: "b" },
b: {},
i: {},
em: { rename_tag: "i" },
u: {},
br: {},
p: {},
div: {},
span: {},
ul: {},
ol: {},
li: {},
comment: { remove: 1 },
}
};
(function(wysihtml5) {
// Paste cleanup rules universal for all rules (also applied to content copied from editor)
var commonRules = wysihtml5.lang.object(window.wysihtml5ParserRules).clone(true);
commonRules.comments = false;
commonRules.selectors = { "a u": "unwrap"};
commonRules.tags.style = { "remove": 1 };
commonRules.tags.script = { "remove": 1 };
commonRules.tags.head = { "remove": 1 };
// Paste cleanup for unindentified source
var universalRules = wysihtml5.lang.object(commonRules).clone(true);
universalRules.tags.div.one_of_type.alignment_object = 1;
universalRules.tags.div.remove_action = "unwrap";
universalRules.tags.div.check_attributes.style = false;
universalRules.tags.div.keep_styles = {
"textAlign": /^((left)|(right)|(center)|(justify))$/i,
"float": 1
};
universalRules.tags.span.keep_styles = false;
// Paste cleanup for MS Office
// TODO: should be extended to stricter ruleset, as current set will probably not cover all Office bizarreness
var msOfficeRules = wysihtml5.lang.object(universalRules).clone(true);
msOfficeRules.classes = {};
window.wysihtml5ParserPasteRulesets = [
{
condition: /<font face="Times New Roman"|class="?Mso|style="[^"]*\bmso-|style='[^'']*\bmso-|w:WordDocument|class="OutlineElement|id="?docs\-internal\-guid\-/i,
set: msOfficeRules
},{
condition: /<meta name="copied-from" content="wysihtml5">/i,
set: commonRules
},{
set: universalRules
}
];
})(window.wysihtml5);
I don't have paste parser rules set. Did you try without setting the paste config while creating editor instance?
Yes I tried without the part :
(function(wysihtml5) {
../..
})(window.wysihtml5);
Result is the same.
Here's mys instanciation code :
myEditor = new wysihtml5.Editor('proposal-synthesis-editor', {
toolbar: 'proposa-synthesis-toolbar',
parserRules: window.wysihtml5ParserRules,
handleTabKey: true,
handleTables: true,
useLineBreaks: false,
});
Is this something to do with the environment?. I see no comments on copy paste in latest chrome on Mac. This is the config I use. You can give this a try.
var wysihtml5ParserRulesDefaults = {
"blockLevelEl": {
"keep_styles": {
"textAlign": /^((left)|(right)|(center)|(justify))$/i,
"float": 1
},
"add_style": {
"align": "align_text"
},
"check_attributes": {
"id": "any"
}
},
"makeDiv": {
"rename_tag": "div",
"one_of_type": {
"alignment_object": 1
},
"remove_action": "unwrap",
"keep_styles": {
"textAlign": 1,
"float": 1
},
"add_style": {
"align": "align_text"
},
"check_attributes": {
"id": "any"
}
}
};
var wysihtml5ParserRules = {
/**
* CSS Class white-list
* Following CSS classes won't be removed when parsed by the wysihtml5 HTML parser
* If all classes should pass "any" as classes value. Ex: "classes": "any"
*/
"classes": "any",
/* blacklist of classes is only available if classes is set to any */
"classes_blacklist": {
"Apple-interchange-newline": 1,
"MsoNormal": 1,
"MsoPlainText": 1
},
"type_definitions": {
"alignment_object": {
"classes": {
"wysiwyg-text-align-center": 1,
"wysiwyg-text-align-justify": 1,
"wysiwyg-text-align-left": 1,
"wysiwyg-text-align-right": 1,
"wysiwyg-float-left": 1,
"wysiwyg-float-right": 1
},
"styles": {
"float": ["left", "right"],
"text-align": ["left", "right", "center"]
}
},
"valid_image_src": {
"attrs": {
"src": /^[^data\:]/i
}
},
"text_color_object": {
"styles": {
"color": true,
"background-color": true
}
},
"text_fontsize_object": {
"styles": {
"font-size": true
}
},
"text_formatting_object": {
"classes": {
"wysiwyg-color-aqua": 1,
"wysiwyg-color-black": 1,
"wysiwyg-color-blue": 1,
"wysiwyg-color-fuchsia": 1,
"wysiwyg-color-gray": 1,
"wysiwyg-color-green": 1,
"wysiwyg-color-lime": 1,
"wysiwyg-color-maroon": 1,
"wysiwyg-color-navy": 1,
"wysiwyg-color-olive": 1,
"wysiwyg-color-purple": 1,
"wysiwyg-color-red": 1,
"wysiwyg-color-silver": 1,
"wysiwyg-color-teal": 1,
"wysiwyg-color-white": 1,
"wysiwyg-color-yellow": 1,
"wysiwyg-font-size-large": 1,
"wysiwyg-font-size-larger": 1,
"wysiwyg-font-size-medium": 1,
"wysiwyg-font-size-small": 1,
"wysiwyg-font-size-smaller": 1,
"wysiwyg-font-size-x-large": 1,
"wysiwyg-font-size-x-small": 1,
"wysiwyg-font-size-xx-large": 1,
"wysiwyg-font-size-xx-small": 1
}
}
},
// "comments": 1, // if set allows comments to pass
/**
* Tag list
*
* The following options are available:
*
* - add_class: converts and deletes the given HTML4 attribute (align, clear, ...) via the given method to a css class
* The following methods are implemented in wysihtml5.dom.parse:
* - align_text: converts align attribute values (right/left/center/justify) to their corresponding css class "wysiwyg-text-align-*")
* <p align="center">foo</p> ... becomes ... <p> class="wysiwyg-text-align-center">foo</p>
* - clear_br: converts clear attribute values left/right/all/both to their corresponding css class "wysiwyg-clear-*"
* <br clear="all"> ... becomes ... <br class="wysiwyg-clear-both">
* - align_img: converts align attribute values (right/left) on <img> to their corresponding css class "wysiwyg-float-*"
*
* - remove: removes the element and its content
*
* - unwrap removes element but leaves content
*
* - rename_tag: renames the element to the given tag
*
* - set_class: adds the given class to the element (note: make sure that the class is in the "classes" white list above)
*
* - set_attributes: sets/overrides the given attributes
*
* - check_attributes: checks the given HTML attribute via the given method
* - url: allows only valid urls (starting with http:// or https://)
* - src: allows something like "/foobar.jpg", "http://google.com", ...
* - href: allows something like "mailto:bert@foo.com", "http://google.com", "/foobar.jpg"
* - alt: strips unwanted characters. if the attribute is not set, then it gets set (to ensure valid and compatible HTML)
* - numbers: ensures that the attribute only contains numeric characters
* - any: allows anything to pass
*/
"tags": {
"tr": {
"unwrap": 1
},
"strike": {
"unwrap": 1
},
"form": {
"unwrap": 1
},
"rt": {
"rename_tag": "span"
},
"code": {},
"acronym": {
"rename_tag": "span"
},
"br": {
"add_class": {
"clear": "clear_br"
}
},
"details": {
"unwrap": 1
},
"em": {},
"title": {
"remove": 1
},
"multicol": {
"unwrap": 1
},
"figure": {
"unwrap": 1
},
"xmp": {
"unwrap": 1
},
"small": {
"rename_tag": "span",
"set_class": "wysiwyg-font-size-smaller",
"check_attributes": {
"id": "numbers"
}
},
"area": {
"remove": 1
},
"time": {
"unwrap": 1
},
"dir": {
"rename_tag": "ul"
},
"bdi": {
"unwrap": 1
},
"command": {
"unwrap": 1
},
"ul": {
"check_attributes": {
"id": "any"
}
},
"progress": {
"rename_tag": "span"
},
"dfn": {
"unwrap": 1
},
"iframe": {
"remove": 1
},
"figcaption": {
"unwrap": 1
},
"a": {
"unwrap": 1
},
"img": {
"remove_tag": 1
},
"rb": {
"unwrap": 1
},
"footer": wysihtml5ParserRulesDefaults.makeDiv,
"noframes": {
"remove": 1
},
"abbr": {
"unwrap": 1
},
"u": {},
"bgsound": {
"remove": 1
},
"sup": {
"unwrap": 1
},
"address": {
"unwrap": 1
},
"basefont": {
"remove": 1
},
"nav": {
"unwrap": 1
},
"h1": {
"rename_tag": "strong"
},
"h2": {
"rename_tag": "strong"
},
"h3": {
"rename_tag": "strong"
},
"h4": {
"rename_tag": "strong"
},
"h5": {
"rename_tag": "strong"
},
"h6": {
"rename_tag": "strong"
},
"head": {
"unwrap": 1
},
"tbody": {
"unwrap": 1
},
"dd": {
"unwrap": 1
},
"s": {
"unwrap": 1
},
"li": {},
"td": {
"unwrap": 1
},
"object": {
"remove": 1
},
"div": {
"one_of_type": {
"alignment_object": 1
},
"remove_action": "unwrap",
"keep_styles": {
"textAlign": 1,
"float": 1
},
"add_style": {
"align": "align_text"
},
"check_attributes": {
"id": "any",
"contenteditable": "any"
}
},
"option": {
"remove":1
},
"select": {
"remove":1
},
"i": {},
"track": {
"remove": 1
},
"wbr": {
"remove": 1
},
"fieldset": {
"unwrap": 1
},
"big": {
"rename_tag": "span",
"set_class": "wysiwyg-font-size-larger"
},
"button": {
"remove": 1
},
"noscript": {
"remove": 1
},
"svg": {
"remove": 1
},
"input": {
"remove": 1
},
"table": {
"unwrap": 1
},
"keygen": {
"remove": 1
},
"meta": {
"remove": 1
},
"map": {
"remove": 1
},
"isindex": {
"remove": 1
},
"mark": {
"unwrap": 1
},
"caption": wysihtml5ParserRulesDefaults.blockLevelEl,
"tfoot": wysihtml5ParserRulesDefaults.blockLevelEl,
"base": {
"remove": 1
},
"video": {
"remove": 1
},
"strong": {},
"canvas": {
"remove": 1
},
"output": {
"unwrap": 1
},
"marquee": {
"unwrap": 1
},
"b": {},
"q": {
"check_attributes": {
"cite": "url",
"id": "any"
}
},
"applet": {
"remove": 1
},
"rp": {
"unwrap": 1
},
"spacer": {
"remove": 1
},
"source": {
"remove": 1
},
"aside": wysihtml5ParserRulesDefaults.makeDiv,
"frame": {
"remove": 1
},
"section": wysihtml5ParserRulesDefaults.makeDiv,
"body": {
"unwrap": 1
},
"ol": {},
"nobr": {
"unwrap": 1
},
"html": {
"unwrap": 1
},
"summary": {
"unwrap": 1
},
"var": {
"unwrap": 1
},
"del": {
"unwrap": 1
},
"blockquote": {
"keep_styles": {
"textAlign": 1,
"float": 1
},
"add_style": {
"align": "align_text"
},
"check_attributes": {
"cite": "url",
"id": "any"
}
},
"device": {
"remove": 1
},
"meter": {
"unwrap": 1
},
"textarea": {
"unwrap": 1
},
"embed": {
"remove": 1
},
"hgroup": {
"unwrap": 1
},
"font": {
"rename_tag": "span",
"add_class": {
"size": "size_font"
}
},
"tt": {
"unwrap": 1
},
"noembed": {
"remove": 1
},
"thead": {
"add_style": {
"align": "align_text"
},
"check_attributes": {
"id": "any"
}
},
"blink": {
"unwrap": 1
},
"plaintext": {
"unwrap": 1
},
"xml": {
"remove": 1
},
"style": {
"remove": 1
},
"param": {
"remove": 1
},
"th": {
"check_attributes": {
"rowspan": "numbers",
"colspan": "numbers",
"valign": "any",
"align": "any",
"id": "any"
},
"keep_styles": {
"backgroundColor": 1,
"width": 1,
"height": 1
},
"add_style": {
"align": "align_text"
}
},
"legend": {
"unwrap": 1
},
"hr": {},
"label": {
"unwrap": 1
},
"dl": {
"unwrap": 1
},
"kbd": {
"unwrap": 1
},
"listing": {
"unwrap": 1
},
"dt": {
"unwrap": 1
},
"nextid": {
"remove": 1
},
"pre": {},
"center": wysihtml5ParserRulesDefaults.makeDiv,
"audio": {
"remove": 1
},
"datalist": {
"unwrap": 1
},
"samp": {
"unwrap": 1
},
"col": {
"remove": 1
},
"article": wysihtml5ParserRulesDefaults.makeDiv,
"cite": {},
"link": {
"remove": 1
},
"script": {
"remove": 1
},
"bdo": {
"unwrap": 1
},
"menu": {
"rename_tag": "ul"
},
"colgroup": {
"remove": 1
},
"ruby": {
"unwrap": 1
},
"ins": {
"unwrap": 1
},
"p": {
"add_class": {
"align": "align_text"
}
},
"span": {
"rename_tag": "span",
"check_attributes": {
"id": "any",
"contenteditable": "any"
}
},
"sub": {
"unwrap": 1
},
"comment": {
"remove": 1
},
"frameset": {
"remove": 1
},
"optgroup": {
"unwrap": 1
},
"header": wysihtml5ParserRulesDefaults.makeDiv
}
};
It works with your config !!
Thanks, it proves there's a way.
I will try to understand which part of your config file resolve my trouble.
That is cool. Please share when you find it.
I got it !!
Adding
style: { remove: 1 },
to my config file makes the trick.
I also removed the window.wysihtml5ParserPasteRulesets
definition, that seems useless.
Finally, I've got :
/**
* https://github.com/Voog/wysihtml/wiki/Supported-Commands
*/
window.wysihtml5ParserRules = {
classes: "any",
classes_blacklist: {
"Apple-interchange-newline": 1,
"MsoNormal": 1,
"MsoPlainText": 1
},
tags: {
strong: { rename_tag: "b" },
b: {},
i: {},
em: { rename_tag: "i" },
u: {},
br: {},
p: {},
div: {},
span: {},
ul: {},
ol: {},
li: {},
comment: { remove: 1 },
style: { remove: 1 },
}
};
My last trouble with the paste from word is special characters at the end, like
l|s�@�
Hi friend, I am using parser_rules/advanced_and_extended.js as parser rule, it is working fine on Firefox and IE , but on Chrome I have same issue with the paste from word is special characters at the end, Do you have any solution for this issue?
Using the methods outlined in this thread I was able to get rid of most word styles. But I'm facing the same problem as @hmillet. There are some cryptic characters left at the end.
Are there any known workarounds for this problem?
Hi all, Does anyone has a a solution for the following problem from @hmillet ?
"My last trouble with the paste from word is special characters at the end, like
l��|s�@�� "
@jmaicher @Baukino @pratap-vijay @hmillet were you able to figure out the solution for the above mentioned problem of yours?
Unfortunately not, still an open issue.
Still a current issue forme too
This solved the issue for me: https://github.com/jhollingworth/bootstrap-wysihtml5/issues/7
I have the same problem like @hmillet, that write special characters at the end when copy and paste from Word: "My last trouble with the paste from word is special characters at the end, like
l��|s�@��"
I have solved this problem adding this configuration when load wysihtml5:
wysihtml5.dom.getPastedHtml = function(event) { var html; if (event.clipboardData) { html = event.clipboardData.getData('text/plain'); } return html; };
https://github.com/jhollingworth/bootstrap-wysihtml5/issues/314
http://stackoverflow.com/questions/15046225/wysihtml5-copying-text-from-a-word-document-to-the-editor
This solution only get the text plain but don't get any style. If someone finds another solution, I'm happy to hear it.
@jvrodrigo-scnby , What i observed was that, when copied from word, there is some extra content outside of the html tag. I think this is the reason for the random characters at the end of the pasted content. So i removed all the content after html tag. I have faced this problem only once, hence not sure if this is the right solution .
Hi @Manoj51, thanks for reply. How do you removed all the content after html tag?
@jvrodrigo-scnby , I removed the extra content after the html tag in the cleanPastedHTML function in the code.
Hi @Manoj51, u mean this?: https://code.msdn.microsoft.com/ASPNET-MVC-Application-b4b0dc3f/sourcecode?fileId=148365&pathId=2125821849 /**
@author Christopher Blum */ wysihtml5.quirks.cleanPastedHTML = (function() {
var styleToRegex = function (styleStr) { var trimmedStr = wysihtml5.lang.string(styleStr).trim(), escapedStr = trimmedStr.replace(/[-[]\/{}()*+\?.\\^\$|]/g, "\$&");
return new RegExp("^((?!^" + escapedStr + "$).)*$", "i");
};
var extendRulesWithStyleExceptions = function (rules, exceptStyles) { var newRules = wysihtml5.lang.object(rules).clone(true), tag, style;
for (tag in newRules.tags) {
if (newRules.tags.hasOwnProperty(tag)) {
if (newRules.tags[tag].keep_styles) {
for (style in newRules.tags[tag].keep_styles) {
if (newRules.tags[tag].keep_styles.hasOwnProperty(style)) {
if (exceptStyles[style]) {
newRules.tags[tag].keep_styles[style] = styleToRegex(exceptStyles[style]);
}
}
}
}
}
}
return newRules;
};
var pickRuleset = function(ruleset, html) { var pickedSet, defaultSet;
if (!ruleset) {
return null;
}
for (var i = 0, max = ruleset.length; i < max; i++) {
if (!ruleset[i].condition) {
defaultSet = ruleset[i].set;
}
if (ruleset[i].condition && ruleset[i].condition.test(html)) {
return ruleset[i].set;
}
}
return defaultSet;
};
return function(html, options) { var exceptStyles = { 'color': wysihtml5.dom.getStyle("color").from(options.referenceNode), 'fontSize': wysihtml5.dom.getStyle("font-size").from(options.referenceNode) }, rules = extendRulesWithStyleExceptions(pickRuleset(options.rules, html) || {}, exceptStyles), newHtml;
newHtml = wysihtml5.dom.parse(html, {
"rules": rules,
"cleanUp": true, // <span> elements, empty or without attributes, should be removed/replaced with their content
"context": options.referenceNode.ownerDocument,
"uneditableClass": options.uneditableClass,
"clearInternals" : true, // don't paste temprorary selection and other markings
"unjoinNbsps" : true
});
return newHtml;
};
})(); I doesn't works in windows(chrome - word still adding l��|s�@�� strings), in ubuntu(chrome - libreoffice) works. Thanks for reply.
I looked up into the advanced.js parser rule but I do not get how I do subsequently add tags that are allowed.
Assume to start from the simple.js rule where everything is blocked. How do I know which tags I have to add when copying something from word?
With the simple rule I get:
What does this tell me to add? Thanks for help.