RealRaven2000 / FiltaQuilla

Adds many new mail filter actions to Thunderbird
http://quickfilters.quickfolders.org/filtaquilla.html
GNU General Public License v3.0
88 stars 17 forks source link

Save to file: Allow special characters in file name, e.g. German Umlauts #216

Open salguri opened 1 year ago

salguri commented 1 year ago

Use Filtquilla for a long time. For me it is the best.

My problem When using "save to file" it saves the files (examples) Bericht Altes Forsthaus - Woche 27. Februar - 5. März 2023.pdf Bericht Alle Websites - Freitag 17. März 2023.pdf

as Bericht Altes Forsthaus - Woche 27. Februar - 5. Maerz 2023.pdf Bericht Alle Websites - Freitag 17. Maerz 2023.pdf

and change "ä" to "ae"

Can you please fix it?

RealRaven2000 commented 1 year ago

it was a safety feature for file systems that may break with certain characters, below is the relevant code. The question is - should it be turned off completely risking crashes and Thunderbird hanging if a filter encounters a bad letter and FQ tries to save an illegal file? Or is there another approach - maybe by adding a whitelist of characters you absolutely want to keep?

  /**
   * Removes invalid file name characters
   *
   * @returns a sanitized name to be used as a filename, or a random name
   *          if a sanitized name cannot be obtained (if aName contains
   *          no valid characters).
   */
  function _sanitizeName(aName, includesExtension=false) {
    const prefs = Services.prefs.getBranch("extensions.filtaquilla."); 
    let chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789()_-+'!%" + (includesExtension ? "." : ""),
        maxLength = prefs.getIntPref("fileNames.maxLength") || 60;

    let spaceChar = fileNamesSpaceCharacter.substring(0,1);
    if (!chars.includes(spaceChar)) {
      chars += spaceChar;
    }

    let str = aName; // .toLowerCase();
    // diacritics
    str = str.replace(/Ä/g, 'Ae');
    str = str.replace(/æ|ǽ|ä/g, 'ae');
    str = str.replace(/À|Á|Â|Ã|Å|Ǻ|Ā|Ă|Ą|Ǎ|А/g, 'A');
    str = str.replace(/à|á|â|ã|å|ǻ|ā|ă|ą|ǎ|ª|а/g, 'a');
    str = str.replace(/Б/g, 'B');
    str = str.replace(/б/g, 'b');
    str = str.replace(/Ç|Ć|Ĉ|Ċ|Č|Ц/g, 'C');
    str = str.replace(/ç|ć|ĉ|ċ|č|ц/g, 'c');
    str = str.replace(/Ð|Ď|Đ/g, 'Dj');
    str = str.replace(/ð|ď|đ/g, 'dj');
    str = str.replace(/Д/g, 'D');
    str = str.replace(/д/g, 'd');
    str = str.replace(/È|É|Ê|Ë|Ē|Ĕ|Ė|Ę|Ě|Е|Ё|Э/g, 'E');
    str = str.replace(/è|é|ê|ë|ē|ĕ|ė|ę|ě|е|ё|э/g, 'e');
    str = str.replace(/Ф/g, 'F');
    str = str.replace(/ƒ|ф/g, 'f');
    str = str.replace(/Ĝ|Ğ|Ġ|Ģ|Г/g, 'G');
    str = str.replace(/ĝ|ğ|ġ|ģ|г/g, 'g');
    str = str.replace(/Ĥ|Ħ|Х/g, 'H');
    str = str.replace(/ĥ|ħ|х/g, 'h');
    str = str.replace(/Ì|Í|Î|Ï|Ĩ|Ī|Ĭ|Ǐ|Į|İ|И/g, 'I');
    str = str.replace(/ì|í|î|ï|ĩ|ī|ĭ|ǐ|į|ı|и/g, 'i');
    str = str.replace(/Ĵ|Й/g, 'J');
    str = str.replace(/ĵ|й/g, 'j');
    str = str.replace(/Ķ|К/g, 'K');
    str = str.replace(/ķ|к/g, 'k');
    str = str.replace(/Ĺ|Ļ|Ľ|Ŀ|Ł|Л/g, 'L');
    str = str.replace(/ĺ|ļ|ľ|ŀ|ł|л/g, 'l');
    str = str.replace(/М/g, 'M');
    str = str.replace(/м/g, 'm');
    str = str.replace(/Ñ|Ń|Ņ|Ň|Н/g, 'N');
    str = str.replace(/ñ|ń|ņ|ň|ʼn|н/g, 'n');
    str = str.replace(/Ö/g, 'Oe');
    str = str.replace(/œ|ö/g, 'oe');
    str = str.replace(/Ò|Ó|Ô|Õ|Ō|Ŏ|Ǒ|Ő|Ơ|Ø|Ǿ|О/g, 'O');
    str = str.replace(/ò|ó|ô|õ|ō|ŏ|ǒ|ő|ơ|ø|ǿ|º|о/g, 'o');
    str = str.replace(/П/g, 'P');
    str = str.replace(/п/g, 'p');
    str = str.replace(/Ŕ|Ŗ|Ř|Р/g, 'R');
    str = str.replace(/ŕ|ŗ|ř|р/g, 'r');
    str = str.replace(/Ś|Ŝ|Ş|Ș|Š|С/g, 'S');
    str = str.replace(/ś|ŝ|ş|ș|š|ſ|с/g, 's');
    str = str.replace(/Ţ|Ț|Ť|Ŧ|Т/g, 'T');
    str = str.replace(/ţ|ț|ť|ŧ|т/g, 't');
    str = str.replace(/Ü/g, 'Ue');
    str = str.replace(/ü/g, 'ue');
    str = str.replace(/Ù|Ú|Û|Ũ|Ū|Ŭ|Ů|Ű|Ų|Ư|Ǔ|Ǖ|Ǘ|Ǚ|Ǜ|У/g, 'U');
    str = str.replace(/ù|ú|û|ũ|ū|ŭ|ů|ű|ų|ư|ǔ|ǖ|ǘ|ǚ|ǜ|у/g, 'u');
    str = str.replace(/В/g, 'V');
    str = str.replace(/в/g, 'v');
    str = str.replace(/Ý|Ÿ|Ŷ|Ы/g, 'Y');
    str = str.replace(/ý|ÿ|ŷ|ы/g, 'y');
    str = str.replace(/Ŵ/g, 'W');
    str = str.replace(/ŵ/g, 'w');
    str = str.replace(/Ź|Ż|Ž|З/g, 'Z');
    str = str.replace(/ź|ż|ž|з/g, 'z');
    str = str.replace(/Æ|Ǽ/g, 'AE');
    str = str.replace(/ß/g, 'ss');
    str = str.replace(/IJ/g, 'IJ');
    str = str.replace(/ij/g, 'ij');
    str = str.replace(/Œ/g, 'OE');
    str = str.replace(/Ч/g, 'Ch');
    str = str.replace(/ч/g, 'ch');
    str = str.replace(/Ю/g, 'Ju');
    str = str.replace(/ю/g, 'ju');
    str = str.replace(/Я/g, 'Ja');
    str = str.replace(/я/g, 'ja');
    str = str.replace(/Ш/g, 'Sh');
    str = str.replace(/ш/g, 'sh');
    str = str.replace(/Щ/g, 'Shch');
    str = str.replace(/щ/g, 'shch');
    str = str.replace(/Ж/g, 'Zh');
    str = str.replace(/ж/g, 'zh');  
    str = str.replace("&","+"); // improve readability
    // special characters    
    let name = str.trim().replace(/ /g, spaceChar); // used to be "-"
    name = name.replace(/[@:\|\/\\\*\?]/g, "-");
    name = name.replace(/[\$"<>,]/g, "").trim();
    name = name.split("").filter(function (el) {
                                   return chars.indexOf(el) != -1;
                                 }).join("");

    if (!name) {
      // Our input had no valid characters - use a random name
      let cl = chars.length - 1;
      for (let i = 0; i < 8; ++i)
        name += chars.charAt(Math.round(Math.random() * cl));
    }

    if (name.length > maxLength) {
      let ext;
      if (includesExtension) {
        let i = name.lastIndexOf(".");
        if (i>0) {
          ext = name.substr(i);
        }
      }
      if (ext) {
        name = name.substring(0, maxLength-ext.length) + ext;
      }
      else {
        name = name.substring(0, maxLength);
      }
    }

    return name;
  }
salguri commented 1 year ago

I think turning off security for filesystems that can't handle certain characters in general isn't a good idea. If exceptions could be defined in a separate whitelist, that would be the way to go. Can I create such a list today?

Otherwise I can live with the "problem". There is only one month with an umlaut in German.

RealRaven2000 commented 1 year ago

I think turning off security for filesystems that can't handle certain characters in general isn't a good idea. If exceptions could be defined in a separate whitelist, that would be the way to go. Can I create such a list today?

Well a whitelist would only be useful if there was a way to add it via the FiltaQuilla Preference dialog... I would just need to rework the replacing code with a map and then remove the whitelisted characters in real time.

salguri commented 1 year ago

That would of course be a perfect solution! If that's possible, I'm very happy - and I'm sure other users too

RealRaven2000 commented 1 year ago

Here is a trial version that implements the white list feature. All allowed letters can be added to the whitelist string (no delimiters needed)

filtaquilla-3.6.2pre35.zip

I am storing the setting in the new config setting extensions.filtaquilla.fileNames.whiteList

image

But I added the UI for it right here:

image

To try out, download the zip file and drag it into Thunderbird Add-ons Manager to install it.

salguri commented 1 year ago

Thanks for the preview:

Changed to:

_filtaquilla_2023-03-19_143233

But: Same result: "ä" is converted to "ae". still the same.

can not find the dialog "Advanded, fileName.white.List" (your picture 2)

RealRaven2000 commented 1 year ago

Maybe try restarting, with pre 35 I managed to save an attachment named "Frühe Äpfel - Laebs.txt":

image

The original name was (note how æ was replaced with ae): Frühe Äpfel - Læbs.txt

My whitelisted letters: "Äü"

RealRaven2000 commented 1 year ago

PS: the screenshot of the preferences in about:config is from Thunderbird Settings / General / [Config Editor...] (bottom right)

RealRaven2000 commented 1 year ago

Tested again this time with the lowercase Umlaut ä, for some reason it behaves differently here and replaces it:

image

RealRaven2000 commented 1 year ago

Tested again this time with the lowercase Umlaut ä, for some reason it behaves differently here and replaces it:

* so there must be a reason why it works with Umlaut `ü` but not with `ä` ....

no sorry I tested again, it works for me... strange one.

RealRaven2000 commented 1 year ago

Can you add all Umlauts ÄÖÜäüö to the whitelist and test again please?

RealRaven2000 commented 1 year ago

I did one more test, with these settings:

image

and saved 2 attachments:

image

But maybe you use a different path - is it "Save Message as File" - and does this use the subject line??

RealRaven2000 commented 1 year ago

Saving message as file seems to work as well:

image

salguri commented 1 year ago

Now it works perfect. Used "ÄÖÜäüö" Thank you for making Filtaquilla to be the best for filter cases!

doncherry commented 1 year ago

Just for the sake of completeness: While your list covers the lowercase sharp s <ß>, it's missing the (admittedly rarely used) uppercase sharp s <ẞ>, which should be replaced by \<SS>.

RealRaven2000 commented 1 year ago

yes will do. never knew there was such a thing - back in the day I learned from a friend who worked in typesetting before that you never ever are allowed to is the sharp s in an uppercase heading on account of It looking too much like an uppercase B. the more you know!