From c5c98fcf602505e452dab8f8f87aff3a47592af4 Mon Sep 17 00:00:00 2001 From: Kubat <mael.martin31@gmail.com> Date: Thu, 26 Jan 2023 07:15:31 +0100 Subject: [PATCH] RUST: Add the smallstring crate + makes it 2021 edition --- src/rust/Cargo.toml | 1 + src/rust/smallstring/Cargo.toml | 11 +++ src/rust/smallstring/src/lib.rs | 162 ++++++++++++++++++++++++++++++++ 3 files changed, 174 insertions(+) create mode 100644 src/rust/smallstring/Cargo.toml create mode 100644 src/rust/smallstring/src/lib.rs diff --git a/src/rust/Cargo.toml b/src/rust/Cargo.toml index f8363c81..5b2fdfea 100644 --- a/src/rust/Cargo.toml +++ b/src/rust/Cargo.toml @@ -12,6 +12,7 @@ members = [ "kurisu_api", "commons", "getset", + "smallstring", # Clients "amadeus", diff --git a/src/rust/smallstring/Cargo.toml b/src/rust/smallstring/Cargo.toml new file mode 100644 index 00000000..d88521f6 --- /dev/null +++ b/src/rust/smallstring/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "smallstring" +version = "0.1.3" +authors = ["Jack Fransham <moonfudgeman@hotmail.co.uk>", "Maƫl MARTIN"] +description = "'Small string' optimization: store small strings on the stack using smallvec" +license = "MIT" + +edition.workspace = true + +[dependencies] +smallvec = { version = "^1", default-features = false } diff --git a/src/rust/smallstring/src/lib.rs b/src/rust/smallstring/src/lib.rs new file mode 100644 index 00000000..9fb47d75 --- /dev/null +++ b/src/rust/smallstring/src/lib.rs @@ -0,0 +1,162 @@ +use smallvec::{Array, SmallVec}; +use std::{ + borrow::Borrow, + ffi::OsStr, + iter::{FromIterator, IntoIterator}, + ops::Deref, + str, +}; + +// TODO: FromIterator without having to allocate a String +#[derive(Clone, Default)] +pub struct SmallString<B: Array<Item = u8> = [u8; 8]> { + buffer: SmallVec<B>, +} + +impl<B: Array<Item = u8>> std::hash::Hash for SmallString<B> { + fn hash<H: std::hash::Hasher>(&self, state: &mut H) { + let s: &str = self; + s.hash(state) + } +} + +impl<B: Array<Item = u8>> std::cmp::PartialEq for SmallString<B> { + fn eq(&self, other: &Self) -> bool { + let (s1, s2): (&str, &str) = (self, other); + s1 == s2 + } +} + +impl<B: Array<Item = u8>> std::cmp::Eq for SmallString<B> {} + +impl<'a, B: Array<Item = u8>> PartialEq<SmallString<B>> for &'a str { + fn eq(&self, other: &SmallString<B>) -> bool { + *self == (other as &str) + } +} + +impl<B: Array<Item = u8>> std::fmt::Display for SmallString<B> { + fn fmt(&self, fm: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + let s: &str = SmallString::deref(self); + s.fmt(fm) + } +} + +impl<B: Array<Item = u8>> std::fmt::Debug for SmallString<B> { + fn fmt(&self, fm: &mut std::fmt::Formatter) -> std::fmt::Result { + let s: &str = SmallString::deref(self); + s.fmt(fm) + } +} + +impl<'a, B: Array<Item = u8>> From<&'a str> for SmallString<B> { + fn from(s: &str) -> Self { + Self { + buffer: s.as_bytes().iter().cloned().collect(), + } + } +} + +impl<B: Array<Item = u8>> Deref for SmallString<B> { + type Target = str; + + fn deref(&self) -> &str { + // We only allow `buffer` to be created from an existing valid string, + // so this is safe. + unsafe { str::from_utf8_unchecked(self.buffer.as_ref()) } + } +} + +impl AsRef<str> for SmallString { + fn as_ref(&self) -> &str { + // We only allow `buffer` to be created from an existing valid string, + // so this is safe. + unsafe { str::from_utf8_unchecked(self.buffer.as_ref()) } + } +} + +struct Utf8Iterator<I>(I, Option<smallvec::IntoIter<[u8; 4]>>); + +impl<I: Iterator<Item = char>> Utf8Iterator<I> { + pub fn new<In: IntoIterator<IntoIter = I, Item = char>>(into: In) -> Self { + Utf8Iterator(into.into_iter(), None) + } +} + +impl<I: Iterator<Item = char>> Iterator for Utf8Iterator<I> { + type Item = u8; + + fn next(&mut self) -> Option<Self::Item> { + if let Some(mut into) = self.1.take() { + if let Some(n) = into.next() { + self.1 = Some(into); + return Some(n); + } + } + + let out = self.0.next(); + + out.and_then(|chr| { + let mut dest = [0u8; 4]; + let outstr = chr.encode_utf8(&mut dest); + + self.1 = Some( + outstr + .as_bytes() + .iter() + .cloned() + .collect::<SmallVec<[u8; 4]>>() + .into_iter(), + ); + + self.1.as_mut().and_then(|i| i.next()) + }) + } + + fn size_hint(&self) -> (usize, Option<usize>) { + let hint = self.0.size_hint(); + + (hint.0, hint.1.map(|x| x * 4)) + } +} + +impl FromIterator<char> for SmallString { + fn from_iter<T: IntoIterator<Item = char>>(into_iter: T) -> Self { + // We're a shell so we mostly work with ASCII data - optimise for this + // case since we have to optimise for _some_ fixed size of char. + let utf8 = Utf8Iterator::new(into_iter); + + SmallString { + buffer: utf8.collect(), + } + } +} + +impl AsRef<OsStr> for SmallString { + fn as_ref(&self) -> &OsStr { + let s: &str = self.as_ref(); + s.as_ref() + } +} + +impl Borrow<str> for SmallString { + fn borrow(&self) -> &str { + // We only allow `buffer` to be created from an existing valid string, + // so this is safe. + unsafe { str::from_utf8_unchecked(self.buffer.as_ref()) } + } +} + +impl From<String> for SmallString { + fn from(s: String) -> SmallString { + SmallString { + buffer: SmallVec::from_vec(s.into_bytes()), + } + } +} + +impl From<SmallString> for String { + fn from(s: SmallString) -> String { + unsafe { String::from_utf8_unchecked(s.buffer.into_vec()) } + } +} -- GitLab