diff --git a/src/rust/Cargo.toml b/src/rust/Cargo.toml index f8363c81dc645e9c6c0afcf521c0822eb7734fd5..5b2fdfea8a6c03caf3c7c6cf4a9e0607dec97f47 100644 --- a/src/rust/Cargo.toml +++ b/src/rust/Cargo.toml @@ -12,6 +12,7 @@ members = [ "kurisu_api", "commons", "getset", + "smallstring", # Clients "amadeus", diff --git a/src/rust/smallstring/Cargo.toml b/src/rust/smallstring/Cargo.toml new file mode 100644 index 0000000000000000000000000000000000000000..d88521f65e28a96f95b545de767a419f256e3414 --- /dev/null +++ b/src/rust/smallstring/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "smallstring" +version = "0.1.3" +authors = ["Jack Fransham <moonfudgeman@hotmail.co.uk>", "Maƫl MARTIN"] +description = "'Small string' optimization: store small strings on the stack using smallvec" +license = "MIT" + +edition.workspace = true + +[dependencies] +smallvec = { version = "^1", default-features = false } diff --git a/src/rust/smallstring/src/lib.rs b/src/rust/smallstring/src/lib.rs new file mode 100644 index 0000000000000000000000000000000000000000..9fb47d7588420d860adec073f1db39aecc4a3abd --- /dev/null +++ b/src/rust/smallstring/src/lib.rs @@ -0,0 +1,162 @@ +use smallvec::{Array, SmallVec}; +use std::{ + borrow::Borrow, + ffi::OsStr, + iter::{FromIterator, IntoIterator}, + ops::Deref, + str, +}; + +// TODO: FromIterator without having to allocate a String +#[derive(Clone, Default)] +pub struct SmallString<B: Array<Item = u8> = [u8; 8]> { + buffer: SmallVec<B>, +} + +impl<B: Array<Item = u8>> std::hash::Hash for SmallString<B> { + fn hash<H: std::hash::Hasher>(&self, state: &mut H) { + let s: &str = self; + s.hash(state) + } +} + +impl<B: Array<Item = u8>> std::cmp::PartialEq for SmallString<B> { + fn eq(&self, other: &Self) -> bool { + let (s1, s2): (&str, &str) = (self, other); + s1 == s2 + } +} + +impl<B: Array<Item = u8>> std::cmp::Eq for SmallString<B> {} + +impl<'a, B: Array<Item = u8>> PartialEq<SmallString<B>> for &'a str { + fn eq(&self, other: &SmallString<B>) -> bool { + *self == (other as &str) + } +} + +impl<B: Array<Item = u8>> std::fmt::Display for SmallString<B> { + fn fmt(&self, fm: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + let s: &str = SmallString::deref(self); + s.fmt(fm) + } +} + +impl<B: Array<Item = u8>> std::fmt::Debug for SmallString<B> { + fn fmt(&self, fm: &mut std::fmt::Formatter) -> std::fmt::Result { + let s: &str = SmallString::deref(self); + s.fmt(fm) + } +} + +impl<'a, B: Array<Item = u8>> From<&'a str> for SmallString<B> { + fn from(s: &str) -> Self { + Self { + buffer: s.as_bytes().iter().cloned().collect(), + } + } +} + +impl<B: Array<Item = u8>> Deref for SmallString<B> { + type Target = str; + + fn deref(&self) -> &str { + // We only allow `buffer` to be created from an existing valid string, + // so this is safe. + unsafe { str::from_utf8_unchecked(self.buffer.as_ref()) } + } +} + +impl AsRef<str> for SmallString { + fn as_ref(&self) -> &str { + // We only allow `buffer` to be created from an existing valid string, + // so this is safe. + unsafe { str::from_utf8_unchecked(self.buffer.as_ref()) } + } +} + +struct Utf8Iterator<I>(I, Option<smallvec::IntoIter<[u8; 4]>>); + +impl<I: Iterator<Item = char>> Utf8Iterator<I> { + pub fn new<In: IntoIterator<IntoIter = I, Item = char>>(into: In) -> Self { + Utf8Iterator(into.into_iter(), None) + } +} + +impl<I: Iterator<Item = char>> Iterator for Utf8Iterator<I> { + type Item = u8; + + fn next(&mut self) -> Option<Self::Item> { + if let Some(mut into) = self.1.take() { + if let Some(n) = into.next() { + self.1 = Some(into); + return Some(n); + } + } + + let out = self.0.next(); + + out.and_then(|chr| { + let mut dest = [0u8; 4]; + let outstr = chr.encode_utf8(&mut dest); + + self.1 = Some( + outstr + .as_bytes() + .iter() + .cloned() + .collect::<SmallVec<[u8; 4]>>() + .into_iter(), + ); + + self.1.as_mut().and_then(|i| i.next()) + }) + } + + fn size_hint(&self) -> (usize, Option<usize>) { + let hint = self.0.size_hint(); + + (hint.0, hint.1.map(|x| x * 4)) + } +} + +impl FromIterator<char> for SmallString { + fn from_iter<T: IntoIterator<Item = char>>(into_iter: T) -> Self { + // We're a shell so we mostly work with ASCII data - optimise for this + // case since we have to optimise for _some_ fixed size of char. + let utf8 = Utf8Iterator::new(into_iter); + + SmallString { + buffer: utf8.collect(), + } + } +} + +impl AsRef<OsStr> for SmallString { + fn as_ref(&self) -> &OsStr { + let s: &str = self.as_ref(); + s.as_ref() + } +} + +impl Borrow<str> for SmallString { + fn borrow(&self) -> &str { + // We only allow `buffer` to be created from an existing valid string, + // so this is safe. + unsafe { str::from_utf8_unchecked(self.buffer.as_ref()) } + } +} + +impl From<String> for SmallString { + fn from(s: String) -> SmallString { + SmallString { + buffer: SmallVec::from_vec(s.into_bytes()), + } + } +} + +impl From<SmallString> for String { + fn from(s: SmallString) -> String { + unsafe { String::from_utf8_unchecked(s.buffer.into_vec()) } + } +}