RustCrypto / traits

Collection of cryptography-related traits
581 stars 190 forks source link

aead::stream API is very awkward #1305

Closed AndreKR closed 1 year ago

AndreKR commented 1 year ago

I used aead::stream and it was quite painful.

To help with those issues I wrote an encryptor/decryptor pair that wraps aead::stream::EncryptorBE32/DecryptorBE32 and implements Write (for encryption) and Read (for decryption).

Here they are:

Code ```rust use std::cmp::min; use std::collections::VecDeque; use std::io::{ErrorKind, Read, Write}; use anyhow::Result; use chacha20poly1305::aead::stream::{DecryptorBE32, EncryptorBE32}; use chacha20poly1305::KeyInit; use chacha20poly1305::XChaCha20Poly1305; use hkdf::Hkdf; use rand::rngs::OsRng; use rand::RngCore; use sha2::Sha256; struct XChaCha20Poly1305StreamEncryptor { sink: W, chunk_size: u64, encryptor: EncryptorBE32, chunk_buf: Vec, } impl XChaCha20Poly1305StreamEncryptor { // The stream module doesn't seem to have a generate_nonce() function to generate the required // 19-byte nonce fn generate_nonce() -> [u8; 19] { let mut nonce = [0u8; 19]; OsRng.fill_bytes(&mut nonce); nonce } } impl XChaCha20Poly1305StreamEncryptor { pub fn new(mut sink: W, chunk_size: u64, key: &[u8]) -> Result { // Derive an XChaCha20 key let hk = Hkdf::::new(None, key); let mut derived_key = [0u8; 32]; hk.expand(&[], &mut derived_key).expect("invalid length"); // Initialize encryption let cipher = XChaCha20Poly1305::new(&derived_key.into()); let nonce = XChaCha20Poly1305StreamEncryptor::::generate_nonce(); let encryptor = EncryptorBE32::from_aead(cipher, &nonce.into()); // Prepend ciphertext with the chunk size sink.write(&(chunk_size as u64).to_be_bytes())?; // Prepend ciphertext with the nonce sink.write(&nonce)?; Ok(Self { sink, encryptor, chunk_size, chunk_buf: Vec::with_capacity(chunk_size as usize), }) } fn emit_chunk(&mut self) -> std::io::Result<()> { let encrypted = self .encryptor .encrypt_next(self.chunk_buf.as_slice()) .map_err(|e| std::io::Error::new(ErrorKind::Other, e))?; self.sink.write_all(&encrypted)?; self.chunk_buf.clear(); Ok(()) } } impl Write for XChaCha20Poly1305StreamEncryptor { fn write(&mut self, buf: &[u8]) -> std::io::Result { // We only process what fits in one chunk because I believe it's recommended to create // backpressure instead of buffering arbitrary amounts of data let to_read = min(self.chunk_size as usize - self.chunk_buf.len(), buf.len()); self.chunk_buf.extend(&buf[0..to_read]); debug_assert_eq!(self.chunk_buf.capacity(), self.chunk_size as usize); if self.chunk_buf.len() == self.chunk_size as usize { self.emit_chunk()?; } Ok(to_read as usize) } fn flush(&mut self) -> std::io::Result<()> { self.emit_chunk() } } struct XChaCha20Poly1305StreamDecryptor { source: R, chunk_size: u64, decryptor: DecryptorBE32, decrypted_chunk_buf: VecDeque, } impl XChaCha20Poly1305StreamDecryptor { pub fn new(mut source: R, key: &[u8]) -> Result { // Read chunk size and nonce back let mut buf = [0u8; 8]; source.read_exact(&mut buf)?; let chunk_size = u64::from_be_bytes(buf); let mut nonce = [0u8; 19]; source.read_exact(&mut nonce)?; // Derive an XChaCha20 key let hk = Hkdf::::new(None, key); let mut derived_key = [0u8; 32]; hk.expand(&[], &mut derived_key).expect("invalid length"); // Initialize decryption let cipher = XChaCha20Poly1305::new(&derived_key.into()); let decryptor = DecryptorBE32::from_aead(cipher, &nonce.into()); Ok(Self { source, chunk_size, decryptor, decrypted_chunk_buf: VecDeque::with_capacity(chunk_size as usize * 2), }) } } impl Read for XChaCha20Poly1305StreamDecryptor { fn read(&mut self, buf: &mut [u8]) -> std::io::Result { let read_chunk_size = self.chunk_size + 16; // Refill the chunk buffer if it holds less than a full chunk (pretty arbitrary) if (self.decrypted_chunk_buf.len() as u64) < self.chunk_size { let mut encrypted_chunk = Vec::with_capacity(read_chunk_size as usize); let _ = (&mut self.source) .take(read_chunk_size) .read_to_end(&mut encrypted_chunk) .map_err(|e| std::io::Error::new(ErrorKind::Other, e))?; // Decrypt if not EOF if encrypted_chunk.len() > 0 { let decrypted = self .decryptor .decrypt_next(encrypted_chunk.as_slice()) .map_err(|e| std::io::Error::new(ErrorKind::Other, e))?; self.decrypted_chunk_buf.extend(decrypted.iter()); } } let drain_count = min(buf.len(), self.decrypted_chunk_buf.len()); for (i, b) in self.decrypted_chunk_buf.drain(0..drain_count).enumerate() { buf[i] = b; } Ok(drain_count) } } #[cfg(test)] mod tests { use std::io::{Cursor, Read, Seek, Write}; use anyhow::Result; use chacha20poly1305::aead::stream::{DecryptorBE32, EncryptorBE32}; use chacha20poly1305::KeyInit; use chacha20poly1305::XChaCha20Poly1305; use hkdf::Hkdf; use sha2::Sha256; use crate::cryptostream::{XChaCha20Poly1305StreamDecryptor, XChaCha20Poly1305StreamEncryptor}; #[test] fn stream_test() -> Result<()> { let passphrase = "correct horse battery staple"; let cleartext = "Das Pferd frisst keinen Gurkensalat"; let chunk_size = 1000; let mut ciphertext = Cursor::new(Vec::new()); let mut encryptor = XChaCha20Poly1305StreamEncryptor::new(&mut ciphertext, chunk_size, passphrase.as_bytes())?; encryptor.write_all(cleartext.as_bytes())?; encryptor.flush()?; ciphertext.rewind()?; let mut ciphertext_decrypted = Vec::new(); let mut decryptor = XChaCha20Poly1305StreamDecryptor::new(ciphertext, passphrase.as_bytes())?; let _ = decryptor.read_to_end(&mut ciphertext_decrypted)?; assert_eq!(ciphertext_decrypted, cleartext.as_bytes()); Ok(()) } // This shows how to use aead::stream without using the wrappers above #[test] fn aead_stream_demo() -> Result<()> { let passphrase = "correct horse battery staple"; let cleartext = "Das Pferd frisst keinen Gurkensalat"; let chunk_size = 10; let mut ciphertext = Cursor::new(Vec::new()); // Derive an XChaCha20 key let hk = Hkdf::::new(None, passphrase.as_bytes()); let mut key = [0u8; 32]; hk.expand(&[], &mut key).expect("invalid length"); // Initialize encryption let cipher = XChaCha20Poly1305::new(&key.into()); let nonce = XChaCha20Poly1305StreamEncryptor::<()>::generate_nonce(); let mut stream_encryptor = EncryptorBE32::from_aead(cipher, &nonce.into()); // Prepend ciphertext with the chunk size ciphertext.write(&(chunk_size as u64).to_be_bytes())?; // Prepend ciphertext with the nonce ciphertext.write(&nonce)?; // Run encryption let mut data = Cursor::new(cleartext.as_bytes()); let mut buf = Vec::with_capacity(chunk_size); loop { buf.clear(); let read_count = (&mut data).take(chunk_size as u64).read_to_end(&mut buf)?; if read_count == 0 { // this indicates EOF break; } // This works - contrary to examples I found on the internet - because: // - Calling `encrypt_last()` is not actually necessary, `encrypt_next()` until the end // will do. // - Encrypting empty chunks is fine. // - Calling `decrypt_last()` is not necessary either, `decrypt_next()` until the end is // fine. // If those conditions weren't true, usage would be much more complicated. // And by the way `encrypt()` and `decrypt()` should never be called I think, I don't // know why they exist. let encrypted = stream_encryptor.encrypt_next(buf.as_slice())?; ciphertext.write(&encrypted)?; } ciphertext.rewind()?; let mut ciphertext_decrypted = Vec::new(); // Read chunk size and nonce back let mut buf = [0u8; 8]; ciphertext.read_exact(&mut buf)?; let chunk_size = u64::from_be_bytes(buf); let mut nonce = [0u8; 19]; ciphertext.read_exact(&mut nonce)?; // Derive an XChaCha20 key let hk = Hkdf::::new(None, passphrase.as_bytes()); let mut key = [0u8; 32]; hk.expand(&[], &mut key).expect("invalid length"); // Initialize decryption let cipher = XChaCha20Poly1305::new(&key.into()); let mut stream_decryptor = DecryptorBE32::from_aead(cipher, &nonce.into()); let mut buf = Vec::with_capacity((chunk_size + 16) as usize); loop { buf.clear(); let read_count = (&mut ciphertext).take(chunk_size + 16).read_to_end(&mut buf)?; if read_count == 0 { // We have processed the last chunk, there are no more chunks in the ciphertext break; } let mut decrypted = stream_decryptor.decrypt_next(buf.as_slice())?; ciphertext_decrypted.append(&mut decrypted); } assert_eq!(ciphertext_decrypted, cleartext.as_bytes()); Ok(()) } } ```

A couple of things of note:

Is this something that could have a place as part of RustCrypto? Or does it make more sense to publish it as a crate of my own? Or is it just a bad idea in general? (I might be missing something here.)

tarcieri commented 1 year ago

You've more or less reinvented https://github.com/RustCrypto/nacl-compat/tree/master/crypto_secretstream

There are some pretty big drawbacks to that design though which make STREAM much more flexible: when used with fixed-sized segments, STREAM permits random access with zero framing overhead. This is not possible with a crypto_secretstream-like protocol, which is more like a sequence of framed packets which can only be processed in-order.

As another point of feedback: the reason aead::stream API doesn't provide a generate_nonce function is because the nonce prefixes are so small they risk potential collisions and nonce reuse, which is catastrophic with any online authentication scheme.

I guess we've discussed patterns for initializing STREAM quite a bit on Zulip but don't have a tracking issue for it. Both the Tink paper and https://eprint.iacr.org/2020/1019.pdf discuss methods of deriving a unique key per stream, which would help address this problem and allow for the use of a random nonce.

tarcieri commented 1 year ago

I opened https://github.com/RustCrypto/traits/issues/1306 to track improving STREAM initialization.

Otherwise, I would suggest using crypto_secretstream if you want a ready-made packetized protocol, especially since it has multiple implementations in many different languages already.

AndreKR commented 1 year ago

You've more or less reinvented https://github.com/RustCrypto/nacl-compat/tree/master/crypto_secretstream

Indeed this didn't come up in my initial research. Probably because I didn't look under nacl-compat because I don't use NaCl and also because the About section really just describes AEAD and nothing about the nature of the streaming it provides.

There are some pretty big drawbacks to that design though which make STREAM much more flexible: when used with fixed-sized segments, STREAM permits random access with zero framing overhead. This is not possible with a crypto_secretstream-like protocol, which is more like a sequence of framed packets which can only be processed in-order.

If the underlying En-/DecryptorBE32 allow random access, my wrappers could easily implement Seek but I don't think they do - encrypt_next() increments a position counter. If there were encrypt/decrypt functions that take a position parameter and carry documentation that tells the user how to calculate the chunks (+ 16 bytes), it could be a suitable API for random access.

TBH, implementing Read and Write wasn't even my main intention when writing the wrappers, my main intention was to encapsulate all the knowledge that is required to use aead::stream:

As another point of feedback: the reason aead::stream API doesn't provide a generate_nonce function is because the nonce prefixes are so small they risk potential collisions and nonce reuse, which is catastrophic with any online authentication scheme.

It was my understanding that if you use XChaCha20Poly1305 as the cipher you can safely use random nonces because they are long enough? Is that negated by aead::stream?

tarcieri commented 1 year ago

If the underlying En-/DecryptorBE32 allow random access, my wrappers could easily implement Seek but I don't think they do - encrypt_next() increments a position counter.

As the rustdoc notes, these types implement the 𝒟 decryptor and ℰ encryptor objects as described in the STREAM paper, which are explicitly designed to manage the counter for you.

Random access is possible via the StreamPrimitive trait.

That you don't actually need to use encrypt_last() and decrypt_last()

You're adding an empty segment as a simplification. While that works, particularly for the packet-framed case, it's not a zero-cost abstraction: it adds an additional MAC tag, and if you're framing the packets, an additional length prefix.

These may be undesirable in e.g. the file decryption case.

It was my understanding that if you use XChaCha20Poly1305 as the cipher you can safely use random nonces because they are long enough?

It's fine with a cipher with an extended nonce, but not fine for any e.g. IETF AEAD, which is why as #1306 notes it isn't something that should be made into a general-purpose pattern.

AndreKR commented 1 year ago

That you don't actually need to use encrypt_last() and decrypt_last()

You're adding an empty segment as a simplification. While that works, particularly for the packet-framed case, it's not a zero-cost abstraction: it adds an additional MAC tag, and if you're framing the packets, an additional length prefix.

I (in my wrappers) don't actually do that, I detect the end of the stream by the fact that the encrypted stream has no more chunks. In fact that's my point: If you wanted to use decrypt_last() (because it's there and the docs kind of imply you should use it), then you would have to somehow frame or pad your segments because otherwise how would you know that you have arrived at the last one?

tarcieri commented 1 year ago

In the file encryption case, you can either know you're at EOF via the filesystem API, or if you have a footer it can tell you when the encrypted ends

tarcieri commented 1 year ago

I'm going to close this issue.

Your code example is misusing STREAM. The "last block" flag in the nonce is very much a deliberate design decision directly from the STREAM paper: it's used to prevent truncation attacks, where an attacker can trick you into accepting a STREAM which is shorter than the original.

Yes, that makes the API a bit painful, but it's there for a reason.

tarcieri commented 1 year ago

No, it controls how the nonce is computed, so it needs to be known in advance prior to decryption.

Instead, you need to use EOF to detect it, or failing that some outer framing.

It might still be possible to build the sort of abstraction you want on top of STREAM (essentially a buffered Encryptor/Decryptor which operates over fixed-sized segments) but it MUST properly set the last block flag to prevent truncation attacks.

AndreKR commented 1 year ago

Here's an updated version that uses encrypt_last() and decrypt_last() for the last chunk before EOF:

Code ```rust use std::cmp::min; use std::collections::VecDeque; use std::io::{ErrorKind, Read, Write}; use anyhow::Result; use chacha20poly1305::aead::stream::{DecryptorBE32, EncryptorBE32}; use chacha20poly1305::KeyInit; use chacha20poly1305::XChaCha20Poly1305; use hkdf::Hkdf; use rand::rngs::OsRng; use rand::RngCore; use sha2::Sha256; struct XChaCha20Poly1305StreamEncryptor { sink: W, chunk_size: u64, encryptor: Option>, chunk_buf: Vec, } impl XChaCha20Poly1305StreamEncryptor { // The stream module doesn't seem to have a generate_nonce() function to generate the required // 19-byte nonce fn generate_nonce() -> [u8; 19] { let mut nonce = [0u8; 19]; OsRng.fill_bytes(&mut nonce); nonce } } impl XChaCha20Poly1305StreamEncryptor { pub fn new(mut sink: W, chunk_size: u64, key: &[u8]) -> Result { // Derive an XChaCha20 key let hk = Hkdf::::new(None, key); let mut derived_key = [0u8; 32]; hk.expand(&[], &mut derived_key).expect("invalid length"); // Initialize encryption let cipher = XChaCha20Poly1305::new(&derived_key.into()); let nonce = XChaCha20Poly1305StreamEncryptor::::generate_nonce(); let encryptor = EncryptorBE32::from_aead(cipher, &nonce.into()); // Prepend ciphertext with the chunk size sink.write(&(chunk_size as u64).to_be_bytes())?; // Prepend ciphertext with the nonce sink.write(&nonce)?; Ok(Self { sink, encryptor: Some(encryptor), chunk_size, chunk_buf: Vec::with_capacity(chunk_size as usize), }) } fn emit_chunk(&mut self, last: bool) -> std::io::Result<()> { let mut e = self.encryptor.take().unwrap(); let encrypted = if last { e.encrypt_last(self.chunk_buf.as_slice()) .map_err(|e| std::io::Error::new(ErrorKind::Other, e))? } else { let encrypted = e .encrypt_next(self.chunk_buf.as_slice()) .map_err(|e| std::io::Error::new(ErrorKind::Other, e))?; self.encryptor.replace(e); encrypted }; self.sink.write_all(&encrypted)?; self.chunk_buf.clear(); Ok(()) } } impl Write for XChaCha20Poly1305StreamEncryptor { fn write(&mut self, buf: &[u8]) -> std::io::Result { // We only process what fits in one chunk because I believe it's recommended to create // backpressure instead of buffering arbitrary amounts of data let to_read = min(self.chunk_size as usize - self.chunk_buf.len(), buf.len()); self.chunk_buf.extend(&buf[0..to_read]); debug_assert_eq!(self.chunk_buf.capacity(), self.chunk_size as usize); if self.chunk_buf.len() == self.chunk_size as usize { self.emit_chunk(false)?; } Ok(to_read as usize) } fn flush(&mut self) -> std::io::Result<()> { self.emit_chunk(true) } } struct XChaCha20Poly1305StreamDecryptor { source: R, chunk_size: u64, decryptor: Option>, decrypted_chunk_buf: VecDeque, chunk_stash: Vec, } impl XChaCha20Poly1305StreamDecryptor { pub fn new(mut source: R, key: &[u8]) -> Result { // Read chunk size and nonce back let mut buf = [0u8; 8]; source.read_exact(&mut buf)?; let chunk_size = u64::from_be_bytes(buf); let mut nonce = [0u8; 19]; source.read_exact(&mut nonce)?; // Derive an XChaCha20 key let hk = Hkdf::::new(None, key); let mut derived_key = [0u8; 32]; hk.expand(&[], &mut derived_key).expect("invalid length"); // Initialize decryption let cipher = XChaCha20Poly1305::new(&derived_key.into()); let decryptor = DecryptorBE32::from_aead(cipher, &nonce.into()); let mut instance = Self { source, chunk_size, decryptor: Some(decryptor), decrypted_chunk_buf: VecDeque::with_capacity(chunk_size as usize * 2), chunk_stash: Vec::with_capacity(chunk_size as usize + 16), }; // Read the first chunk into the stash instance.chunk_stash = XChaCha20Poly1305StreamDecryptor::read_chunk(&mut instance, chunk_size + 16)?; Ok(instance) } fn read_chunk(&mut self, read_chunk_size: u64) -> Result> { let mut encrypted_chunk = Vec::with_capacity(read_chunk_size as usize); let _ = (&mut self.source) .take(read_chunk_size) .read_to_end(&mut encrypted_chunk)?; Ok(encrypted_chunk) } } impl Read for XChaCha20Poly1305StreamDecryptor { fn read(&mut self, buf: &mut [u8]) -> std::io::Result { let read_chunk_size = self.chunk_size + 16; // Refill the chunk buffer if it holds less than a full chunk (pretty arbitrary) if (self.decrypted_chunk_buf.len() as u64) < self.chunk_size { // Already read the next chunk so we can take a peek let next_chunk = self .read_chunk(read_chunk_size) .map_err(|e| std::io::Error::new(ErrorKind::Other, e))?; // Decrypt if not EOF if self.chunk_stash.len() > 0 { let mut d = self.decryptor.take().unwrap(); let decrypted = if next_chunk.len() == 0 { // This is the last chunk before EOF d.decrypt_last(self.chunk_stash.as_slice()) .map_err(|e| std::io::Error::new(ErrorKind::Other, e))? } else { let decrypted = d .decrypt_next(self.chunk_stash.as_slice()) .map_err(|e| std::io::Error::new(ErrorKind::Other, e))?; self.decryptor.replace(d); decrypted }; self.decrypted_chunk_buf.extend(decrypted.iter()); } self.chunk_stash = next_chunk; } let drain_count = min(buf.len(), self.decrypted_chunk_buf.len()); for (i, b) in self.decrypted_chunk_buf.drain(0..drain_count).enumerate() { buf[i] = b; } Ok(drain_count) } } #[cfg(test)] mod tests { use std::io::{Cursor, Read, Seek, Write}; use anyhow::Result; use chacha20poly1305::aead::stream::{DecryptorBE32, EncryptorBE32}; use chacha20poly1305::KeyInit; use chacha20poly1305::XChaCha20Poly1305; use hkdf::Hkdf; use sha2::Sha256; use crate::cryptostream::{XChaCha20Poly1305StreamDecryptor, XChaCha20Poly1305StreamEncryptor}; #[test] fn stream_test() -> Result<()> { let passphrase = "correct horse battery staple"; let cleartext = "Das Pferd frisst keinen Gurkensalat"; let chunk_size = 1000; let mut ciphertext = Cursor::new(Vec::new()); let mut encryptor = XChaCha20Poly1305StreamEncryptor::new(&mut ciphertext, chunk_size, passphrase.as_bytes())?; encryptor.write_all(cleartext.as_bytes())?; encryptor.flush()?; ciphertext.rewind()?; let mut ciphertext_decrypted = Vec::new(); let mut decryptor = XChaCha20Poly1305StreamDecryptor::new(ciphertext, passphrase.as_bytes())?; let _ = decryptor.read_to_end(&mut ciphertext_decrypted)?; assert_eq!(ciphertext_decrypted, cleartext.as_bytes()); Ok(()) } } ```
rjzak commented 1 year ago

I also feel the stream API is really awkward. I have some code using the aead crate, but trying to use streams is confusing. Especially the nonce part, it seems to be circular. You need a nonce for the given stream type, but can't make the stream type without the nonce already. It would be really nice to see an example usage in the docs.

tarcieri commented 1 year ago

Please see #1306 for alternative stream initialization designs