I would like to have a .lines() method available on &WStr. I have already implemented this in my code base, so feel free to steal my code (below) and pull it into your codebase. I can also sign a licensing agreement if you need me to.
pub(crate) trait Utf16StringExtensions<Endianness>
where
Endianness: ByteOrder + 'static,
{
/// An iterator over the lines of a string, as string slices.
///
/// Lines are split at line endings that are either newlines (\n) or sequences of a carriage return followed by a line feed (\r\n).
///
/// Line terminators are not included in the lines returned by the iterator.
///
/// Note that any carriage return (\r) not immediately followed by a line feed (\n) does not split a line. These carriage returns are thereby included in the produced lines.
///
/// The final line ending is optional. A string that ends with a final line ending will return the same lines as an otherwise identical string without a final line ending.
fn lines(&self) -> impl Iterator<Item = &WStr<Endianness>>;
}
impl<Endianness> Utf16StringExtensions<Endianness> for WString<Endianness>
where
Endianness: ByteOrder + 'static,
{
/// An iterator over the lines of a string, as string slices.
///
/// Lines are split at line endings that are either newlines (\n) or sequences of a carriage return followed by a line feed (\r\n).
///
/// Line terminators are not included in the lines returned by the iterator.
///
/// Note that any carriage return (\r) not immediately followed by a line feed (\n) does not split a line. These carriage returns are thereby included in the produced lines.
///
/// The final line ending is optional. A string that ends with a final line ending will return the same lines as an otherwise identical string without a final line ending.
fn lines(&self) -> impl Iterator<Item = &WStr<Endianness>> {
Lines::new(self)
}
}
struct Lines<'source, Endianness>
where
Endianness: ByteOrder + 'static,
{
source: &'source WStr<Endianness>,
char_indices: WStrCharIndices<'source, Endianness>,
}
impl<'source, Endianness> Lines<'source, Endianness>
where
Endianness: ByteOrder + 'static,
{
fn new(source: &'source WStr<Endianness>) -> Self {
Self {
source,
char_indices: source.char_indices(),
}
}
}
impl<'source, Endianness> Iterator for Lines<'source, Endianness>
where
Endianness: ByteOrder + 'static,
{
type Item = &'source WStr<Endianness>;
fn next(&mut self) -> Option<Self::Item> {
let start = self.char_indices.next()?;
let mut previous_was_carriage_return = false;
const BYTE_WIDTH_OF_CARRIAGE_RETURN: usize = 2;
let mut last_ch_index = None;
while let Some((ch_index, ch)) = self.char_indices.next() {
last_ch_index = Some(ch_index);
match ch {
'\r' => previous_was_carriage_return = true,
'\n' => {
if previous_was_carriage_return {
return Some(&self.source[start.0..(ch_index - BYTE_WIDTH_OF_CARRIAGE_RETURN)]);
} else {
return Some(&self.source[start.0..ch_index]);
}
}
_ => previous_was_carriage_return = false,
}
}
if let Some(_) = last_ch_index {
Some(&self.source[start.0..self.source.len()])
} else {
None
}
}
}
#[cfg(test)]
mod lines_tests {
use byteorder::{ByteOrder, BE, LE};
use utf16string::{WStr, WString};
use super::Utf16StringExtensions;
#[test]
fn can_handle_unix_newlines() {
let be_wstr = WString::<BE>::from("This is a string with a \n newline");
let le_wstr = WString::<LE>::from("This is a string with a \n newline");
let be_lines = be_wstr.lines().collect::<Vec<_>>();
assert!(be_lines.len() == 2);
assert!(are_equal("This is a string with a ", be_lines[0]));
assert!(are_equal(" newline", be_lines[1]));
let le_lines = le_wstr.lines().collect::<Vec<_>>();
assert!(le_lines.len() == 2);
assert!(are_equal("This is a string with a ", le_lines[0]));
assert!(are_equal(" newline", le_lines[1]));
}
#[test]
fn doesnt_include_ending_unix_newline() {
let be_wstr = WString::<BE>::from("This is a string \n that terminates in a \\n character. The ending should not be included. \n");
let le_wstr = WString::<LE>::from("This is a string \n that terminates in a \\n character. The ending should not be included. \n");
let be_lines = be_wstr.lines().collect::<Vec<_>>();
assert!(be_lines.len() == 2);
assert!(are_equal("This is a string ", &be_lines[0]));
assert!(are_equal(" that terminates in a \\n character. The ending should not be included. ", &be_lines[1]));
let le_lines = le_wstr.lines().collect::<Vec<_>>();
assert!(le_lines.len() == 2);
assert!(are_equal("This is a string ", &le_lines[0]));
assert!(are_equal(" that terminates in a \\n character. The ending should not be included. ", &le_lines[1]));
}
#[test]
fn can_handle_windows_newlines() {
let be_wstr = WString::<BE>::from("This is a string with a \r\n newline");
let le_wstr = WString::<LE>::from("This is a string with a \r\n newline");
let be_lines = be_wstr.lines().collect::<Vec<_>>();
assert!(be_lines.len() == 2);
assert!(are_equal("This is a string with a ", be_lines[0]));
assert!(are_equal(" newline", be_lines[1]));
let le_lines = le_wstr.lines().collect::<Vec<_>>();
assert!(le_lines.len() == 2);
assert!(are_equal("This is a string with a ", le_lines[0]));
assert!(are_equal(" newline", le_lines[1]));
}
#[test]
fn doesnt_include_ending_windows_newline() {
let be_wstr = WString::<BE>::from("This is a string \r\n that terminates in a \\r\\n character. The ending should not be included. \r\n");
let le_wstr = WString::<LE>::from("This is a string \r\n that terminates in a \\r\\n character. The ending should not be included. \r\n");
let be_lines = be_wstr.lines().collect::<Vec<_>>();
assert!(be_lines.len() == 2);
assert!(are_equal("This is a string ", &be_lines[0]));
assert!(are_equal(" that terminates in a \\r\\n character. The ending should not be included. ", &be_lines[1]));
let le_lines = le_wstr.lines().collect::<Vec<_>>();
assert!(le_lines.len() == 2);
assert!(are_equal("This is a string ", &le_lines[0]));
assert!(are_equal(" that terminates in a \\r\\n character. The ending should not be included. ", &le_lines[1]));
}
fn are_equal<Endianness: ByteOrder + 'static>(str: &str, wstr: &WStr<Endianness>) -> bool {
let wstr1_bytes = str.chars().collect::<Vec<_>>();
let wstr2_bytes = wstr.chars().collect::<Vec<_>>();
if wstr1_bytes.len() != wstr2_bytes.len() {
return false;
}
for i in 0..wstr1_bytes.len() {
if wstr1_bytes[i] != wstr2_bytes[i] {
return false;
}
}
return true;
}
}
I would like to have a .lines() method available on &WStr. I have already implemented this in my code base, so feel free to steal my code (below) and pull it into your codebase. I can also sign a licensing agreement if you need me to.