Hopding / pdf-lib

Create and modify PDF documents in any JavaScript environment
https://pdf-lib.js.org
MIT License
6.77k stars 647 forks source link

Error: WinAnsi cannot encode #548

Closed MarcGodard closed 4 years ago

MarcGodard commented 4 years ago

Hi noticed in my error logs the following error.

Error: WinAnsi cannot encode "
"
    at Encoding.encodeUnicodeCodePoint (/app/node_modules/@pdf-lib/standard-fonts/lib/Encoding.js:22:23)
    at StandardFontEmbedder.encodeTextAsGlyphs (/app/node_modules/pdf-lib/cjs/core/embedders/StandardFontEmbedder.js:82:41)
    at StandardFontEmbedder.widthOfTextAtSize (/app/node_modules/pdf-lib/cjs/core/embedders/StandardFontEmbedder.js:35:27)
    at PDFFont.widthOfTextAtSize (/app/node_modules/pdf-lib/cjs/api/PDFFont.js:53:30)
    at splitIfBigger (/app/api/services/shipping-logs/shipping-logs.class.js:192:35)
    at Object.get (/app/api/services/shipping-logs/shipping-logs.class.js:141:5)
    at runMicrotasks (<anonymous>)
    at processTicksAndRejections (internal/process/task_queues.js:97:5)
    at async /app/api/services/orders/hooks/orders.send-email.js:194:37

I assume it is because someone tried to add a non-existing font by copy and pasting test into an input field. What could be the solution?

Hopding commented 4 years ago

@MarcGodard You can access the characters supported by a given font with PDFFont.getCharacterSet(). You could then filter out any input characters that are not supported by your font before attempting to draw text with it. You could also substitute them, throw errors, or any number of things. Really depends on your use case.

I hope this helps!

MarcGodard commented 3 years ago

@Hopding I have implemented a system that checks the chars and removes ones (replaces with a "?") that are not in it, but I am still getting this error occasionally:

Error: WinAnsi cannot encode "
" (0x000a)
    at Encoding.encodeUnicodeCodePoint (/app/node_modules/@pdf-lib/standard-fonts/lib/Encoding.js:23:23)
    at StandardFontEmbedder.encodeTextAsGlyphs (/app/node_modules/pdf-lib/cjs/core/embedders/StandardFontEmbedder.js:88:41)
    at StandardFontEmbedder.widthOfTextAtSize (/app/node_modules/pdf-lib/cjs/core/embedders/StandardFontEmbedder.js:36:27)
    at PDFFont.widthOfTextAtSize (/app/node_modules/pdf-lib/cjs/api/PDFFont.js:53:30)

Any idea why?

MarcelloTheArcane commented 3 years ago

@MarcGodard That's the Unicode character for a newline (I know because it just happened to me).

You can try replacing it with a space character, or else split the text and add the paragraphs separately:

string.split('\n').forEach(paragraph => page.drawText(paragraph))
Gricardov commented 3 years ago

@Hopding I have implemented a system that checks the chars and removes ones (replaces with a "?") that are not in it, but I am still getting this error occasionally:

Error: WinAnsi cannot encode "
" (0x000a)
    at Encoding.encodeUnicodeCodePoint (/app/node_modules/@pdf-lib/standard-fonts/lib/Encoding.js:23:23)
    at StandardFontEmbedder.encodeTextAsGlyphs (/app/node_modules/pdf-lib/cjs/core/embedders/StandardFontEmbedder.js:88:41)
    at StandardFontEmbedder.widthOfTextAtSize (/app/node_modules/pdf-lib/cjs/core/embedders/StandardFontEmbedder.js:36:27)
    at PDFFont.widthOfTextAtSize (/app/node_modules/pdf-lib/cjs/api/PDFFont.js:53:30)

Any idea why?

Hi. Could you please share your implementation? I'm having the same problem

michchan commented 2 years ago

I also encountered that here. I got a workaround for that (mentioned in the issue).

MarcGodard commented 1 year ago

This is how I filter. Any suggested improvements?

function filterCharSet (string, font) {
  const charSet = font.getCharacterSet()
  for (let i = 0; i < string.length; i++) {
    if (string[i] && !charSet.includes(string[i])) string[i] = '?'
  }
  string = string.replace(/[\uE000-\uF8FF]/g, '?')
  string = string.replace(/[^\w\s!?{}()-;:"'*@#$%&+=]/g, '?')
  return string
}
Subtletree commented 1 year ago

Based on this https://stackoverflow.com/a/18391901/2511083

Here's what I use, think the only thing I've added is the final entry, 'arabic comma'.

const defaultDiacriticsRemovalMap = Object.freeze([
  { base: 'A',  letters: '\u0041\u24B6\uFF21\u00C0\u00C1\u00C2\u1EA6\u1EA4\u1EAA\u1EA8\u00C3\u0100\u0102\u1EB0\u1EAE\u1EB4\u1EB2\u0226\u01E0\u00C4\u01DE\u1EA2\u00C5\u01FA\u01CD\u0200\u0202\u1EA0\u1EAC\u1EB6\u1E00\u0104\u023A\u2C6F' },
  { base: 'AA', letters: '\uA732' },
  { base: 'AE', letters: '\u00C6\u01FC\u01E2' },
  { base: 'AO', letters: '\uA734' },
  { base: 'AU', letters: '\uA736' },
  { base: 'AV', letters: '\uA738\uA73A' },
  { base: 'AY', letters: '\uA73C' },
  { base: 'B',  letters: '\u0042\u24B7\uFF22\u1E02\u1E04\u1E06\u0243\u0182\u0181' },
  { base: 'C',  letters: '\u0043\u24B8\uFF23\u0106\u0108\u010A\u010C\u00C7\u1E08\u0187\u023B\uA73E' },
  { base: 'D',  letters: '\u0044\u24B9\uFF24\u1E0A\u010E\u1E0C\u1E10\u1E12\u1E0E\u0110\u018B\u018A\u0189\uA779\u00D0' },
  { base: 'DZ', letters: '\u01F1\u01C4' },
  { base: 'Dz', letters: '\u01F2\u01C5' },
  { base: 'E',  letters: '\u0045\u24BA\uFF25\u00C8\u00C9\u00CA\u1EC0\u1EBE\u1EC4\u1EC2\u1EBC\u0112\u1E14\u1E16\u0114\u0116\u00CB\u1EBA\u011A\u0204\u0206\u1EB8\u1EC6\u0228\u1E1C\u0118\u1E18\u1E1A\u0190\u018E' },
  { base: 'F',  letters: '\u0046\u24BB\uFF26\u1E1E\u0191\uA77B' },
  { base: 'G',  letters: '\u0047\u24BC\uFF27\u01F4\u011C\u1E20\u011E\u0120\u01E6\u0122\u01E4\u0193\uA7A0\uA77D\uA77E' },
  { base: 'H',  letters: '\u0048\u24BD\uFF28\u0124\u1E22\u1E26\u021E\u1E24\u1E28\u1E2A\u0126\u2C67\u2C75\uA78D' },
  { base: 'I',  letters: '\u0049\u24BE\uFF29\u00CC\u00CD\u00CE\u0128\u012A\u012C\u0130\u00CF\u1E2E\u1EC8\u01CF\u0208\u020A\u1ECA\u012E\u1E2C\u0197' },
  { base: 'J',  letters: '\u004A\u24BF\uFF2A\u0134\u0248' },
  { base: 'K',  letters: '\u004B\u24C0\uFF2B\u1E30\u01E8\u1E32\u0136\u1E34\u0198\u2C69\uA740\uA742\uA744\uA7A2' },
  { base: 'L',  letters: '\u004C\u24C1\uFF2C\u013F\u0139\u013D\u1E36\u1E38\u013B\u1E3C\u1E3A\u0141\u023D\u2C62\u2C60\uA748\uA746\uA780' },
  { base: 'LJ', letters: '\u01C7' },
  { base: 'Lj', letters: '\u01C8' },
  { base: 'M',  letters: '\u004D\u24C2\uFF2D\u1E3E\u1E40\u1E42\u2C6E\u019C' },
  { base: 'N',  letters: '\u004E\u24C3\uFF2E\u01F8\u0143\u00D1\u1E44\u0147\u1E46\u0145\u1E4A\u1E48\u0220\u019D\uA790\uA7A4' },
  { base: 'NJ', letters: '\u01CA' },
  { base: 'Nj', letters: '\u01CB' },
  { base: 'O',  letters: '\u004F\u24C4\uFF2F\u00D2\u00D3\u00D4\u1ED2\u1ED0\u1ED6\u1ED4\u00D5\u1E4C\u022C\u1E4E\u014C\u1E50\u1E52\u014E\u022E\u0230\u00D6\u022A\u1ECE\u0150\u01D1\u020C\u020E\u01A0\u1EDC\u1EDA\u1EE0\u1EDE\u1EE2\u1ECC\u1ED8\u01EA\u01EC\u00D8\u01FE\u0186\u019F\uA74A\uA74C' },
  { base: 'OI', letters: '\u01A2' },
  { base: 'OO', letters: '\uA74E' },
  { base: 'OU', letters: '\u0222' },
  { base: 'OE', letters: '\u008C\u0152' },
  { base: 'oe', letters: '\u009C\u0153' },
  { base: 'P',  letters: '\u0050\u24C5\uFF30\u1E54\u1E56\u01A4\u2C63\uA750\uA752\uA754' },
  { base: 'Q',  letters: '\u0051\u24C6\uFF31\uA756\uA758\u024A' },
  { base: 'R',  letters: '\u0052\u24C7\uFF32\u0154\u1E58\u0158\u0210\u0212\u1E5A\u1E5C\u0156\u1E5E\u024C\u2C64\uA75A\uA7A6\uA782' },
  { base: 'S',  letters: '\u0053\u24C8\uFF33\u1E9E\u015A\u1E64\u015C\u1E60\u0160\u1E66\u1E62\u1E68\u0218\u015E\u2C7E\uA7A8\uA784' },
  { base: 'T',  letters: '\u0054\u24C9\uFF34\u1E6A\u0164\u1E6C\u021A\u0162\u1E70\u1E6E\u0166\u01AC\u01AE\u023E\uA786' },
  { base: 'TZ', letters: '\uA728' },
  { base: 'U',  letters: '\u0055\u24CA\uFF35\u00D9\u00DA\u00DB\u0168\u1E78\u016A\u1E7A\u016C\u00DC\u01DB\u01D7\u01D5\u01D9\u1EE6\u016E\u0170\u01D3\u0214\u0216\u01AF\u1EEA\u1EE8\u1EEE\u1EEC\u1EF0\u1EE4\u1E72\u0172\u1E76\u1E74\u0244' },
  { base: 'V',  letters: '\u0056\u24CB\uFF36\u1E7C\u1E7E\u01B2\uA75E\u0245' },
  { base: 'VY', letters: '\uA760' },
  { base: 'W',  letters: '\u0057\u24CC\uFF37\u1E80\u1E82\u0174\u1E86\u1E84\u1E88\u2C72' },
  { base: 'X',  letters: '\u0058\u24CD\uFF38\u1E8A\u1E8C' },
  { base: 'Y',  letters: '\u0059\u24CE\uFF39\u1EF2\u00DD\u0176\u1EF8\u0232\u1E8E\u0178\u1EF6\u1EF4\u01B3\u024E\u1EFE' },
  { base: 'Z',  letters: '\u005A\u24CF\uFF3A\u0179\u1E90\u017B\u017D\u1E92\u1E94\u01B5\u0224\u2C7F\u2C6B\uA762' },
  { base: 'a',  letters: '\u0061\u24D0\uFF41\u1E9A\u00E0\u00E1\u00E2\u1EA7\u1EA5\u1EAB\u1EA9\u00E3\u0101\u0103\u1EB1\u1EAF\u1EB5\u1EB3\u0227\u01E1\u00E4\u01DF\u1EA3\u00E5\u01FB\u01CE\u0201\u0203\u1EA1\u1EAD\u1EB7\u1E01\u0105\u2C65\u0250' },
  { base: 'aa', letters: '\uA733' },
  { base: 'ae', letters: '\u00E6\u01FD\u01E3' },
  { base: 'ao', letters: '\uA735' },
  { base: 'au', letters: '\uA737' },
  { base: 'av', letters: '\uA739\uA73B' },
  { base: 'ay', letters: '\uA73D' },
  { base: 'b',  letters: '\u0062\u24D1\uFF42\u1E03\u1E05\u1E07\u0180\u0183\u0253' },
  { base: 'c',  letters: '\u0063\u24D2\uFF43\u0107\u0109\u010B\u010D\u00E7\u1E09\u0188\u023C\uA73F\u2184' },
  { base: 'd',  letters: '\u0064\u24D3\uFF44\u1E0B\u010F\u1E0D\u1E11\u1E13\u1E0F\u0111\u018C\u0256\u0257\uA77A' },
  { base: 'dz', letters: '\u01F3\u01C6' },
  { base: 'e',  letters: '\u0065\u24D4\uFF45\u00E8\u00E9\u00EA\u1EC1\u1EBF\u1EC5\u1EC3\u1EBD\u0113\u1E15\u1E17\u0115\u0117\u00EB\u1EBB\u011B\u0205\u0207\u1EB9\u1EC7\u0229\u1E1D\u0119\u1E19\u1E1B\u0247\u025B\u01DD' },
  { base: 'f',  letters: '\u0066\u24D5\uFF46\u1E1F\u0192\uA77C' },
  { base: 'g',  letters: '\u0067\u24D6\uFF47\u01F5\u011D\u1E21\u011F\u0121\u01E7\u0123\u01E5\u0260\uA7A1\u1D79\uA77F' },
  { base: 'h',  letters: '\u0068\u24D7\uFF48\u0125\u1E23\u1E27\u021F\u1E25\u1E29\u1E2B\u1E96\u0127\u2C68\u2C76\u0265' },
  { base: 'hv', letters: '\u0195' },
  { base: 'i',  letters: '\u0069\u24D8\uFF49\u00EC\u00ED\u00EE\u0129\u012B\u012D\u00EF\u1E2F\u1EC9\u01D0\u0209\u020B\u1ECB\u012F\u1E2D\u0268\u0131' },
  { base: 'j',  letters: '\u006A\u24D9\uFF4A\u0135\u01F0\u0249' },
  { base: 'k',  letters: '\u006B\u24DA\uFF4B\u1E31\u01E9\u1E33\u0137\u1E35\u0199\u2C6A\uA741\uA743\uA745\uA7A3' },
  { base: 'l',  letters: '\u006C\u24DB\uFF4C\u0140\u013A\u013E\u1E37\u1E39\u013C\u1E3D\u1E3B\u017F\u0142\u019A\u026B\u2C61\uA749\uA781\uA747' },
  { base: 'lj', letters: '\u01C9' },
  { base: 'm',  letters: '\u006D\u24DC\uFF4D\u1E3F\u1E41\u1E43\u0271\u026F' },
  { base: 'n',  letters: '\u006E\u24DD\uFF4E\u01F9\u0144\u00F1\u1E45\u0148\u1E47\u0146\u1E4B\u1E49\u019E\u0272\u0149\uA791\uA7A5' },
  { base: 'nj', letters: '\u01CC' },
  { base: 'o',  letters: '\u006F\u24DE\uFF4F\u00F2\u00F3\u00F4\u1ED3\u1ED1\u1ED7\u1ED5\u00F5\u1E4D\u022D\u1E4F\u014D\u1E51\u1E53\u014F\u022F\u0231\u00F6\u022B\u1ECF\u0151\u01D2\u020D\u020F\u01A1\u1EDD\u1EDB\u1EE1\u1EDF\u1EE3\u1ECD\u1ED9\u01EB\u01ED\u00F8\u01FF\u0254\uA74B\uA74D\u0275' },
  { base: 'oi', letters: '\u01A3' },
  { base: 'ou', letters: '\u0223' },
  { base: 'oo', letters: '\uA74F' },
  { base: 'p',  letters: '\u0070\u24DF\uFF50\u1E55\u1E57\u01A5\u1D7D\uA751\uA753\uA755' },
  { base: 'q',  letters: '\u0071\u24E0\uFF51\u024B\uA757\uA759' },
  { base: 'r',  letters: '\u0072\u24E1\uFF52\u0155\u1E59\u0159\u0211\u0213\u1E5B\u1E5D\u0157\u1E5F\u024D\u027D\uA75B\uA7A7\uA783' },
  { base: 's',  letters: '\u0073\u24E2\uFF53\u00DF\u015B\u1E65\u015D\u1E61\u0161\u1E67\u1E63\u1E69\u0219\u015F\u023F\uA7A9\uA785\u1E9B' },
  { base: 't',  letters: '\u0074\u24E3\uFF54\u1E6B\u1E97\u0165\u1E6D\u021B\u0163\u1E71\u1E6F\u0167\u01AD\u0288\u2C66\uA787' },
  { base: 'tz', letters: '\uA729' },
  { base: 'u',  letters:  '\u0075\u24E4\uFF55\u00F9\u00FA\u00FB\u0169\u1E79\u016B\u1E7B\u016D\u00FC\u01DC\u01D8\u01D6\u01DA\u1EE7\u016F\u0171\u01D4\u0215\u0217\u01B0\u1EEB\u1EE9\u1EEF\u1EED\u1EF1\u1EE5\u1E73\u0173\u1E77\u1E75\u0289' },
  { base: 'v',  letters: '\u0076\u24E5\uFF56\u1E7D\u1E7F\u028B\uA75F\u028C' },
  { base: 'vy', letters: '\uA761' },
  { base: 'w',  letters: '\u0077\u24E6\uFF57\u1E81\u1E83\u0175\u1E87\u1E85\u1E98\u1E89\u2C73' },
  { base: 'x',  letters: '\u0078\u24E7\uFF58\u1E8B\u1E8D' },
  { base: 'y',  letters: '\u0079\u24E8\uFF59\u1EF3\u00FD\u0177\u1EF9\u0233\u1E8F\u00FF\u1EF7\u1E99\u1EF5\u01B4\u024F\u1EFF' },
  { base: 'z',  letters: '\u007A\u24E9\uFF5A\u017A\u1E91\u017C\u017E\u1E93\u1E95\u01B6\u0225\u0240\u2C6C\uA763' },
  { base: "'",  letters: '\u2018\u2019' },
  { base: '"',  letters: '\u201C\u201D' },
  { base: ',',  letters: '\u060C' }
]);

const diacriticsMap = {};
for (let i=0; i < defaultDiacriticsRemovalMap.length; i++){
  let letters = defaultDiacriticsRemovalMap[i].letters;
  for (let j=0; j < letters.length ; j++){
      diacriticsMap[letters[j]] = defaultDiacriticsRemovalMap[i].base;
  }
}
Object.freeze(diacriticsMap);

export function removeDiacritics (str) {
  return str.replace(/[^\u0000-\u007E]/g, function(a){ //eslint-disable-line no-control-regex
     return diacriticsMap[a] || a;
  });
}