EC-Nordbund / denomailer

A SMTP-Client implementation for deno (to send mails!)
https://deno.land/x/denomailer
MIT License
50 stars 16 forks source link

RFC2045 Quoted Printable Encoding. Proposal. #1

Closed suchislife801 closed 2 years ago

suchislife801 commented 2 years ago

Hello. I would like to propose that the body of each message be encoded in Quoted-Printable format. All major email clients do this.

Here is a prototype:

String to Quoted Printable

function str2qp(str) {
  const hexChars = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'];
  const RFC2045Encode1IN = / \r\n|\r\n|[^!-<>-~ ]/gm;
  const RFC2045Encode1OUT = function(sMatch) {
    // Encode space before CRLF sequence to prevent spaces from being stripped
    // Keep hard line breaks intact; CRLF sequences
    if (sMatch.length > 1) {
      return sMatch.replace(' ', '=20');
    }
    // Encode matching character
    const chr = sMatch.charCodeAt(0);
    return '=' + hexChars[((chr >>> 4) & 15)] + hexChars[(chr & 15)];
  }
  // Split lines to 75 characters; the reason it's 75 and not 76 is because softline breaks are
  // preceeded by an equal sign; which would be the 76th character. However, if the last line/string
  // was exactly 76 characters, then a softline would not be needed. PHP currently softbreaks
  // anyway; so this function replicates PHP.
  const RFC2045Encode2IN = /.{1,72}(?!\r\n)[^=]{0,3}/g;

  const RFC2045Encode2OUT = function(sMatch) {
    if (sMatch.substr(sMatch.length - 2) === '\r\n') {
      return sMatch;
    }
    return sMatch + '=\r\n';
  }
  str = str.replace(RFC2045Encode1IN, RFC2045Encode1OUT).replace(RFC2045Encode2IN, RFC2045Encode2OUT);
  // Strip last softline break
  return str.substr(0, str.length - 3);
}

Quoted Printable to String

function qp2str(qp) {
  // Decodes all equal signs followed by two hex digits
  const RFC2045Decode1 = /=\r\n/gm;

  // the RFC states against decoding lower case encodings, but following apparent PHP behavior
  const RFC2045Decode2IN = /=([0-9A-F]{2})/gim;
  // RFC2045Decode2IN = /=([0-9A-F]{2})/gm,

  const RFC2045Decode2OUT = function(sMatch, sHex) {
    return String.fromCharCode(parseInt(sHex, 16));
  }
  // Remove RFC2045Decode1 matches & then replace RFC2045Decode2IN matches with RFC2045Decode2OUT matches.
  return qp.replace(RFC2045Decode1, '').replace(RFC2045Decode2IN, RFC2045Decode2OUT);
}

let textPlainMimeBody = 
`Good morning, Guest.

This is the text/plain MIME body.
Each text/plain line is no more than 75 characters in length.
That is, 76 in total. 75 + an "=" character used in quoted-printable 
encoding as a soft line break at the very end of each line that
wasn't intentionally meant to break.
75||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||75

Please reach out if you have any questions.

Thank you,

User1
Systems Administrator
XYZ Inc.
P | (555) 555-5555
F | (777) 777-7777
E | user1@example.com`;

let textHtmlMimeBody = 
`<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
</head>
<body>
Good morning, Guest.<br/>
<br/>
This is the text/html MIME body.<br/>
Each text/html line is no more than 75 characters in length.<br/>
That is, 76 in total. 75 + an "=" character used in quoted-printable<br/>
encoding as a soft line break at the very end of each line that<br/>
wasn't intentionally meant to break.
75||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||75
<br/>
<br/>
Please reach out if you have any questions.<br/>
<br/>
<br/>
Thank you,<br/><br/>
User1<br/>
Systems Administrator<br/>
XYZ Inc.<br/>
P | (555) 555-5555<br/>
F | (777) 777-7777<br/>
E | user1@example.com
</body>
</html>`;

console.log('\r\n- - - - - - - - - - - - - - - - - - - - - - - - -\r\nDefault: text/plain\r\n');
console.log(textPlainMimeBody);
console.log('\r\n- - - - - - - - - - - - - - - - - - - - - - - - -\r\nDefault: text/html\r\n');
console.log(textHtmlMimeBody);

console.log('\r\n- - - - - - - - - - - - - - - - - - - - - - - - -\r\nEncoded: text/plain\r\n');
console.log(str2qp(textPlainMimeBody));
console.log('\r\n- - - - - - - - - - - - - - - - - - - - - - - - -\r\nEncoded: text/html\r\n');
console.log(str2qp(textHtmlMimeBody));

console.log('\r\n- - - - - - - - - - - - - - - - - - - - - - - - -\r\nDecoded: text/plain\r\n');
console.log(qp2str(str2qp(textPlainMimeBody)));
console.log('\r\n- - - - - - - - - - - - - - - - - - - - - - - - -\r\nDecoded: text/html\r\n');
console.log(qp2str(str2qp(textHtmlMimeBody)));
mathe42 commented 2 years ago

Thanks! This is one of the things I wanted to look at. Currently there is an option content_encoding wich does nothing. Do you think using Quoted Printable in all cases is a good idea? Or are there cases where you want different encodings?

Your code looks good for me but I need a deeper look into this encoding. I have currently not that much time so a PR whould be quite nice! Otherwise expect a implementation for this in the next 1-2 weeks.

suchislife801 commented 2 years ago

No hurry. Take your time. I'm just glad someone revived this very cool Deno project.

As for using quoted printable in all cases, most email providers will process the message and do the encoding for you but all of them do. Every programmer should learn at least one protocol and this is the one I remember choosing.

There is an ocean of RFCs for email smtp, But they consolidate in later revisions.

mathe42 commented 2 years ago

released in 0.10.0