From e6566653f323f347b44f06c74ed1088af6e62a93 Mon Sep 17 00:00:00 2001 From: Kubat <maelle.martin@proton.me> Date: Fri, 18 Oct 2024 18:35:07 +0200 Subject: [PATCH] NKDB: Specialized structures & db Storage update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Specialized structure for tags: we use a specific structure for the tags, where we handle the fact that some tags that are known can only apear once. We also try to avoid allocations if we have only one value for the tag. - SmallVec to store languages and authors and owners in karas and playlists, because they won't be too many so try to avoir allocations as much as possible. - We clean up a bit the code, and use something else than a macro to implement reading and checking a folder of hash-verified files… --- Cargo.lock | 89 ++--- Cargo.toml | 53 ++- amadeus/src/app/bottom_bar.rs | 4 +- amadeus/src/app/kard.rs | 5 +- kurisu_api/src/hash.rs | 47 +-- lektor_nkdb/Cargo.toml | 34 +- lektor_nkdb/src/database/kara.rs | 132 ------- lektor_nkdb/src/database/mod.rs | 1 - lektor_nkdb/src/database/pool.rs | 5 - lektor_nkdb/src/database/update.rs | 72 ++-- lektor_nkdb/src/kara/mod.rs | 87 +++++ lektor_nkdb/src/kara/status.rs | 43 +++ lektor_nkdb/src/kara/tags.rs | 302 +++++++++++++++ lektor_nkdb/src/kara/timestamps.rs | 15 + lektor_nkdb/src/lib.rs | 21 +- lektor_nkdb/src/playlists/mod.rs | 14 +- lektor_nkdb/src/playlists/playlist.rs | 86 ++++- lektor_nkdb/src/storage/disk_storage.rs | 451 ++++++++--------------- lektor_nkdb/src/storage/folder_reader.rs | 115 ++++++ lektor_nkdb/src/storage/mod.rs | 4 + lektor_nkdb/src/storage/test_storage.rs | 7 +- lektor_payloads/src/lib.rs | 2 +- 22 files changed, 960 insertions(+), 629 deletions(-) delete mode 100644 lektor_nkdb/src/database/kara.rs create mode 100644 lektor_nkdb/src/kara/mod.rs create mode 100644 lektor_nkdb/src/kara/status.rs create mode 100644 lektor_nkdb/src/kara/tags.rs create mode 100644 lektor_nkdb/src/kara/timestamps.rs create mode 100644 lektor_nkdb/src/storage/folder_reader.rs diff --git a/Cargo.lock b/Cargo.lock index 16b7f847..24b1948d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1197,7 +1197,7 @@ dependencies = [ [[package]] name = "cosmic-config" version = "0.1.0" -source = "git+https://github.com/pop-os/libcosmic.git#8da25f94e9c363c8c6b93284adf4cf6e07bc3a45" +source = "git+https://github.com/pop-os/libcosmic.git#9c62f19e4b80b6bcffde024698015d3a533cb944" dependencies = [ "atomicwrites", "cosmic-config-derive", @@ -1219,7 +1219,7 @@ dependencies = [ [[package]] name = "cosmic-config-derive" version = "0.1.0" -source = "git+https://github.com/pop-os/libcosmic.git#8da25f94e9c363c8c6b93284adf4cf6e07bc3a45" +source = "git+https://github.com/pop-os/libcosmic.git#9c62f19e4b80b6bcffde024698015d3a533cb944" dependencies = [ "quote", "syn 1.0.109", @@ -1259,7 +1259,7 @@ dependencies = [ [[package]] name = "cosmic-theme" version = "0.1.0" -source = "git+https://github.com/pop-os/libcosmic.git#8da25f94e9c363c8c6b93284adf4cf6e07bc3a45" +source = "git+https://github.com/pop-os/libcosmic.git#9c62f19e4b80b6bcffde024698015d3a533cb944" dependencies = [ "almost", "cosmic-config", @@ -1768,7 +1768,7 @@ checksum = "35c0522e981e68cbfa8c3f978441a5f34b30b96e146b33cd3359176b50fe8586" dependencies = [ "cfg-if", "libc", - "libredox 0.1.3", + "libredox", "windows-sys 0.59.0", ] @@ -2297,7 +2297,6 @@ checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" dependencies = [ "ahash", "allocator-api2", - "serde", ] [[package]] @@ -2411,9 +2410,9 @@ checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" [[package]] name = "hyper" -version = "1.4.1" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50dfd22e0e76d0f662d429a5f80fcaf3855009297eab6a0a9f8543834744ba05" +checksum = "bbbff0a806a4728c99295b254c8838933b5b082d75e3cb70c8dab21fdfbcfa9a" dependencies = [ "bytes", "futures-channel", @@ -2563,7 +2562,7 @@ dependencies = [ [[package]] name = "iced" version = "0.12.0" -source = "git+https://github.com/pop-os/libcosmic.git#8da25f94e9c363c8c6b93284adf4cf6e07bc3a45" +source = "git+https://github.com/pop-os/libcosmic.git#9c62f19e4b80b6bcffde024698015d3a533cb944" dependencies = [ "dnd", "iced_accessibility", @@ -2581,7 +2580,7 @@ dependencies = [ [[package]] name = "iced_accessibility" version = "0.1.0" -source = "git+https://github.com/pop-os/libcosmic.git#8da25f94e9c363c8c6b93284adf4cf6e07bc3a45" +source = "git+https://github.com/pop-os/libcosmic.git#9c62f19e4b80b6bcffde024698015d3a533cb944" dependencies = [ "accesskit", "accesskit_winit", @@ -2590,7 +2589,7 @@ dependencies = [ [[package]] name = "iced_core" version = "0.12.0" -source = "git+https://github.com/pop-os/libcosmic.git#8da25f94e9c363c8c6b93284adf4cf6e07bc3a45" +source = "git+https://github.com/pop-os/libcosmic.git#9c62f19e4b80b6bcffde024698015d3a533cb944" dependencies = [ "bitflags 2.6.0", "dnd", @@ -2610,7 +2609,7 @@ dependencies = [ [[package]] name = "iced_futures" version = "0.12.0" -source = "git+https://github.com/pop-os/libcosmic.git#8da25f94e9c363c8c6b93284adf4cf6e07bc3a45" +source = "git+https://github.com/pop-os/libcosmic.git#9c62f19e4b80b6bcffde024698015d3a533cb944" dependencies = [ "futures", "iced_core", @@ -2623,7 +2622,7 @@ dependencies = [ [[package]] name = "iced_graphics" version = "0.12.0" -source = "git+https://github.com/pop-os/libcosmic.git#8da25f94e9c363c8c6b93284adf4cf6e07bc3a45" +source = "git+https://github.com/pop-os/libcosmic.git#9c62f19e4b80b6bcffde024698015d3a533cb944" dependencies = [ "bitflags 2.6.0", "bytemuck", @@ -2647,7 +2646,7 @@ dependencies = [ [[package]] name = "iced_renderer" version = "0.12.0" -source = "git+https://github.com/pop-os/libcosmic.git#8da25f94e9c363c8c6b93284adf4cf6e07bc3a45" +source = "git+https://github.com/pop-os/libcosmic.git#9c62f19e4b80b6bcffde024698015d3a533cb944" dependencies = [ "iced_graphics", "iced_tiny_skia", @@ -2659,7 +2658,7 @@ dependencies = [ [[package]] name = "iced_runtime" version = "0.12.0" -source = "git+https://github.com/pop-os/libcosmic.git#8da25f94e9c363c8c6b93284adf4cf6e07bc3a45" +source = "git+https://github.com/pop-os/libcosmic.git#9c62f19e4b80b6bcffde024698015d3a533cb944" dependencies = [ "dnd", "iced_core", @@ -2671,7 +2670,7 @@ dependencies = [ [[package]] name = "iced_style" version = "0.12.0" -source = "git+https://github.com/pop-os/libcosmic.git#8da25f94e9c363c8c6b93284adf4cf6e07bc3a45" +source = "git+https://github.com/pop-os/libcosmic.git#9c62f19e4b80b6bcffde024698015d3a533cb944" dependencies = [ "iced_core", "once_cell", @@ -2681,7 +2680,7 @@ dependencies = [ [[package]] name = "iced_tiny_skia" version = "0.12.0" -source = "git+https://github.com/pop-os/libcosmic.git#8da25f94e9c363c8c6b93284adf4cf6e07bc3a45" +source = "git+https://github.com/pop-os/libcosmic.git#9c62f19e4b80b6bcffde024698015d3a533cb944" dependencies = [ "bytemuck", "cosmic-text", @@ -2698,7 +2697,7 @@ dependencies = [ [[package]] name = "iced_wgpu" version = "0.12.0" -source = "git+https://github.com/pop-os/libcosmic.git#8da25f94e9c363c8c6b93284adf4cf6e07bc3a45" +source = "git+https://github.com/pop-os/libcosmic.git#9c62f19e4b80b6bcffde024698015d3a533cb944" dependencies = [ "as-raw-xcb-connection", "bitflags 2.6.0", @@ -2727,7 +2726,7 @@ dependencies = [ [[package]] name = "iced_widget" version = "0.12.0" -source = "git+https://github.com/pop-os/libcosmic.git#8da25f94e9c363c8c6b93284adf4cf6e07bc3a45" +source = "git+https://github.com/pop-os/libcosmic.git#9c62f19e4b80b6bcffde024698015d3a533cb944" dependencies = [ "dnd", "iced_renderer", @@ -2743,7 +2742,7 @@ dependencies = [ [[package]] name = "iced_winit" version = "0.12.0" -source = "git+https://github.com/pop-os/libcosmic.git#8da25f94e9c363c8c6b93284adf4cf6e07bc3a45" +source = "git+https://github.com/pop-os/libcosmic.git#9c62f19e4b80b6bcffde024698015d3a533cb944" dependencies = [ "dnd", "iced_graphics", @@ -3082,10 +3081,10 @@ dependencies = [ "lektor_utils", "log", "rand", - "regex", "serde", "serde_json", "sha256", + "smallvec", "tokio", "tokio-stream", "url", @@ -3139,7 +3138,7 @@ version = "8.0.1" dependencies = [ "aho-corasick", "futures", - "hashbrown 0.14.5", + "hashbrown 0.15.0", "lektor_payloads", "log", ] @@ -3188,14 +3187,14 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.159" +version = "0.2.161" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "561d97a539a36e26a9a5fad1ea11a3039a67714694aaa379433e580854bc3dc5" +checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1" [[package]] name = "libcosmic" version = "0.1.0" -source = "git+https://github.com/pop-os/libcosmic.git#8da25f94e9c363c8c6b93284adf4cf6e07bc3a45" +source = "git+https://github.com/pop-os/libcosmic.git#9c62f19e4b80b6bcffde024698015d3a533cb944" dependencies = [ "apply", "ashpd 0.9.2", @@ -3259,17 +3258,6 @@ version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" -[[package]] -name = "libredox" -version = "0.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3af92c55d7d839293953fcd0fda5ecfe93297cfde6ffbdec13b41d99c0ba6607" -dependencies = [ - "bitflags 2.6.0", - "libc", - "redox_syscall 0.4.1", -] - [[package]] name = "libredox" version = "0.1.3" @@ -3843,11 +3831,11 @@ checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" [[package]] name = "orbclient" -version = "0.3.47" +version = "0.3.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52f0d54bde9774d3a51dcf281a5def240c71996bc6ca05d2c847ec8b2b216166" +checksum = "ba0b26cec2e24f08ed8bb31519a9333140a6599b867dac464bb150bdb796fd43" dependencies = [ - "libredox 0.0.2", + "libredox", ] [[package]] @@ -4181,9 +4169,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.87" +version = "1.0.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3e4daa0dcf6feba26f985457cdf104d4b4256fc5a09547140f3631bb076b19a" +checksum = "7c3a7fc5db1e57d5a779a352c8cdb57b29aa4c40cc69c3a68a7fedc815fbf2f9" dependencies = [ "unicode-ident", ] @@ -4396,7 +4384,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" dependencies = [ "getrandom", - "libredox 0.1.3", + "libredox", "thiserror", ] @@ -4662,9 +4650,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.14" +version = "0.23.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "415d9944693cb90382053259f89fbb077ea730ad7273047ec63b19bc9b160ba8" +checksum = "5fbb44d7acc4e873d613422379f69f237a1b141928c02f6bc6ccfddddc2d7993" dependencies = [ "once_cell", "ring", @@ -4685,9 +4673,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e696e35370c65c9c541198af4543ccd580cf17fc25d8e05c5a242b202488c55" +checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b" [[package]] name = "rustls-webpki" @@ -4702,9 +4690,9 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" +checksum = "0e819f2bc632f285be6d7cd36e25940d45b2391dd6d9b939e79de557f7014248" [[package]] name = "rustybuzz" @@ -4816,9 +4804,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.128" +version = "1.0.129" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" +checksum = "6dbcf9b78a125ee667ae19388837dd12294b858d101fdd393cb9d5501ef09eb2" dependencies = [ "indexmap", "itoa", @@ -4973,6 +4961,9 @@ name = "smallvec" version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" +dependencies = [ + "serde", +] [[package]] name = "smithay-client-toolkit" diff --git a/Cargo.toml b/Cargo.toml index 1e3e7846..5851db28 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -44,17 +44,16 @@ zbus = { version = "*", default-features = false, features = ["tokio"] } chrono = { version = "*", default-features = false, features = ["clock"] } sha256 = { version = "*", default-features = false, features = ["async"] } anyhow = { version = "*", default-features = false, features = ["std"] } -regex = { version = "*", default-features = false, features = ["std"] } -log = "*" -rand = "*" -base64 = "*" -dirs = "*" -open = "*" +log = { version = "*" } +rand = { version = "*" } +base64 = { version = "*" } +dirs = { version = "*" } +open = { version = "*" } derive_more = { version = "*", features = ["full"] } -i18n-embed-fl = "0.9.1" -rust-embed = "8.5.0" +i18n-embed-fl = { version = "*" } +rust-embed = { version = "*" } i18n-embed = { version = "*", features = ["fluent-system", "desktop-requester"] } # Local crates @@ -65,24 +64,22 @@ lektor_payloads = { path = "lektor_payloads" } lektor_procmacros = { path = "lektor_procmacros" } lektor_nkdb = { path = "lektor_nkdb" } lektor_search = { path = "lektor_search" } +kurisu_api = { path = "kurisu_api" } # Data Structures -hashbrown = { version = "*", features = ["serde"] } -async-channel = { version = "*", default-features = false } aho-corasick = { version = "*" } +async-channel = { version = "*", default-features = false } +hashbrown = { version = "*", features = ["serde"] } +smallvec = { version = "*", features = ["serde", "union", "const_new", "const_generics"] } # Serialization & Deserialization -toml = "*" -serde_json = { version = "*", default-features = false, features = [ - "std", "preserve_order" -] } -serde = { version = "*", default-features = false, features = [ - "rc", "std", "derive" -] } +toml = { version = "*" } +serde_json = { version = "*", default-features = false, features = ["std", "preserve_order"] } +serde = { version = "*", default-features = false, features = ["rc", "std", "derive"] } # Async stuff -async-trait = "*" -futures-util = "*" +async-trait = { version = "*" } +futures-util = { version = "*" } futures = { version = "*", default-features = false, features = ["std", "async-await"] } tokio-stream = { version = "*", default-features = false, features = ["net"]} tokio = { version = "*", default-features = false, features = [ @@ -90,29 +87,21 @@ tokio = { version = "*", default-features = false, features = [ ] } # Web stuff -reqwest = { version = "*", default-features = false, features = [ - "rustls-tls", - "json", -] } -axum = { version = "*", default-features = false, features = [ - "http1", - "json", - "macros", - "tokio", -] } +reqwest = { version = "*", default-features = false, features = ["rustls-tls", "json"] } +axum = { version = "*", default-features = false, features = ["http1", "json", "macros", "tokio"] } tower = { version = "*", default-features = false, features = ["util"] } hyper-util = { version = "*", default-features = false, features = ["http1", "tokio", "server-auto"] } hyper = { version = "*", default-features = false, features = ["http1", "server"] } # Arguments -roff = "*" +roff = { version = "*" } clap_complete = { version = "*", default-features = false } clap = { version = "*", default-features = false, features = [ "usage", "help", "std", "wrap_help", "derive" ] } # Proc macro things -syn = "*" +syn = { version = "*" } quote = { version = "*", default-features = false } proc-macro2 = { version = "*", default-features = false } proc-macro-error = { version = "*", default-features = false } @@ -125,5 +114,5 @@ proc-macro-error = { version = "*", default-features = false } [workspace.dependencies.libcosmic] git = "https://github.com/pop-os/libcosmic.git" default-features = false -features = [ "dbus-config", "tokio", "winit", "wgpu", "single-instance" ] +features = ["dbus-config", "tokio", "winit", "wgpu", "single-instance"] diff --git a/amadeus/src/app/bottom_bar.rs b/amadeus/src/app/bottom_bar.rs index f4d15607..0510670c 100644 --- a/amadeus/src/app/bottom_bar.rs +++ b/amadeus/src/app/bottom_bar.rs @@ -15,7 +15,7 @@ use cosmic::{ style, theme, widget::{self, tooltip::Position}, }; -use lektor_payloads::{KId, Kara}; +use lektor_payloads::{KId, Kara, Tags}; fn view_right_part<'a>(kara: &Kara) -> Element<'a, Message> { let Spacing { @@ -75,7 +75,7 @@ fn view_left_part<'a>(kara: &Kara) -> Element<'a, Message> { )) .wrap(Wrap::None); - let source = (kara.tags.get(Kara::TAG_NUMBER).and_then(|num| num.first())) + let source = (kara.tags.get_value(Tags::key_number())) .map(|num| format!("{}{num} - {}", kara.song_type, kara.song_source)) .unwrap_or_else(|| format!("{} - {}", kara.song_type, kara.song_source)) .apply(widget::text::title4) diff --git a/amadeus/src/app/kard.rs b/amadeus/src/app/kard.rs index 3bdb3c05..4889cac5 100644 --- a/amadeus/src/app/kard.rs +++ b/amadeus/src/app/kard.rs @@ -13,14 +13,15 @@ use cosmic::{ style, theme, widget::{self, tooltip::Position}, }; -use lektor_payloads::Kara; +use lektor_payloads::{Kara, Tags}; const KARD_HEIGHT: f32 = 50.0; fn kara_title<'a>(kara: &Kara) -> Element<'a, Message> { widget::column::with_children(vec![ widget::text::title4(kara.song_title.clone()).into(), - (kara.tags.get(Kara::TAG_NUMBER).and_then(|num| num.first())) + kara.tags + .get_value(Tags::key_number()) .map(|num| format!("{}{num} - {}", kara.song_type, kara.song_source)) .unwrap_or_else(|| format!("{} - {}", kara.song_type, kara.song_source)) .apply(widget::text::text) diff --git a/kurisu_api/src/hash.rs b/kurisu_api/src/hash.rs index 253479f0..ddda0e49 100644 --- a/kurisu_api/src/hash.rs +++ b/kurisu_api/src/hash.rs @@ -1,14 +1,24 @@ //! Custom sha256 representation to be able to copy the damn thing and don't use stupid strings. -use std::{borrow::Cow, str::FromStr}; +use derive_more::{Debug, Display}; +use std::{borrow::Cow, fmt, str::FromStr}; /// Represent a sha256 digest to try to avoid allocating strings everywhere. -#[derive(Clone, Copy, PartialEq, Eq, Hash)] +#[derive(Clone, Copy, PartialEq, Eq, Hash, Display, Debug)] +#[display("{:32x}{:32x}", self.higher, self.lower)] +#[debug("{self}")] pub struct SHA256 { higher: u128, lower: u128, } +#[derive(Clone, Display, Debug)] +#[display("{_0}")] +#[debug("{_0}")] +pub struct SHA256Error(Cow<'static, str>); + +impl std::error::Error for SHA256Error {} + impl SHA256 { /// Breaks the sha256 into the higher and lower parts. pub fn into_parts(self) -> (u128, u128) { @@ -23,22 +33,22 @@ impl From<(u128, u128)> for SHA256 { } impl TryFrom<String> for SHA256 { - type Error = Cow<'static, str>; + type Error = SHA256Error; fn try_from(value: String) -> Result<Self, Self::Error> { value.as_str().parse() } } impl FromStr for SHA256 { - type Err = Cow<'static, str>; + type Err = SHA256Error; fn from_str(s: &str) -> Result<Self, Self::Err> { if s.len() != 64 { - return Err(Cow::Borrowed( + return Err(SHA256Error(Cow::Borrowed( "invalid sha256 digest: should be a 64 character long string", - )); + ))); } let (h, l) = s.split_at(32); - let handle = |err| Cow::Owned(format!("invalid sha256 digest: {err}")); + let handle = |err| SHA256Error(Cow::Owned(format!("invalid sha256 digest: {err}"))); Ok(Self { higher: u128::from_str_radix(h, 16).map_err(handle)?, lower: u128::from_str_radix(l, 16).map_err(handle)?, @@ -46,21 +56,9 @@ impl FromStr for SHA256 { } } -impl std::fmt::Display for SHA256 { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{:32x}{:32x}", self.higher, self.lower) - } -} - -impl std::fmt::Debug for SHA256 { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{:32x}{:32x}", self.higher, self.lower) - } -} - impl serde::Serialize for SHA256 { fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> { - serializer.serialize_str(&format!("{self}")) + serializer.serialize_str(&self.to_string()) } } @@ -71,7 +69,7 @@ impl<'de> serde::Deserialize<'de> for SHA256 { impl<'de> Visitor<'de> for Hex64StringOrU126Pair { type Value = SHA256; - fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("a 64 hex character long string or a u128 pair") } @@ -83,13 +81,6 @@ impl<'de> serde::Deserialize<'de> for SHA256 { } fn visit_seq<A: SeqAccess<'de>>(self, mut seq: A) -> Result<Self::Value, A::Error> { - static ERROR_MSG: &str = "there should be two u128 in the sequence for the hash"; - if let Some(len) = seq.size_hint() { - if len != 2 { - return Err(serde::de::Error::invalid_length(len, &ERROR_MSG)); - } - } - match (seq.next_element()?, seq.next_element()?) { (Some(higher), Some(lower)) => Ok(SHA256 { higher, lower }), _ => Err(serde::de::Error::custom("expected two u128 in the sequece")), diff --git a/lektor_nkdb/Cargo.toml b/lektor_nkdb/Cargo.toml index e7eaef27..a2ce6311 100644 --- a/lektor_nkdb/Cargo.toml +++ b/lektor_nkdb/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "lektor_nkdb" +name = "lektor_nkdb" description = "New database implementation for lektord (New Kara DataBase)" rust-version.workspace = true @@ -9,20 +9,24 @@ authors.workspace = true license.workspace = true [dependencies] -serde.workspace = true -serde_json.workspace = true -log.workspace = true -url.workspace = true -rand.workspace = true -regex.workspace = true -chrono.workspace = true -sha256.workspace = true -anyhow.workspace = true -hashbrown.workspace = true -derive_more.workspace = true +serde.workspace = true +serde_json.workspace = true +derive_more.workspace = true + tokio.workspace = true futures.workspace = true tokio-stream.workspace = true -kurisu_api = { path = "../kurisu_api" } -lektor_utils = { path = "../lektor_utils" } -lektor_procmacros = { path = "../lektor_procmacros" } + +log.workspace = true +url.workspace = true +rand.workspace = true +chrono.workspace = true +sha256.workspace = true +anyhow.workspace = true + +hashbrown.workspace = true +smallvec.workspace = true + +kurisu_api.workspace = true +lektor_utils.workspace = true +lektor_procmacros.workspace = true diff --git a/lektor_nkdb/src/database/kara.rs b/lektor_nkdb/src/database/kara.rs deleted file mode 100644 index 31747df3..00000000 --- a/lektor_nkdb/src/database/kara.rs +++ /dev/null @@ -1,132 +0,0 @@ -//! Base kara structures definitions. - -use crate::*; -use hashbrown::HashSet; -use kurisu_api::{ - v2::{SongOrigin, SongType}, - SHA256, -}; -use serde::{Deserialize, Serialize}; -use std::sync::Arc; - -/// The status of a kara, it can either be virtual or physical to help with searches in the -/// database and with suggestions. -#[derive(Debug, Serialize, Deserialize, Clone, Copy, Hash)] -pub enum KaraStatus { - /// Is this kara virtual? E.g. a control kara without a concrete existence? If so, then the - /// kara has no file and can't be added to the queue or a playlist, but can be usefull when - /// searching a kara. - Virtual, - - /// If a kara is physical then it has a file that is playable, it can be added to the queue or - /// to a playlist. - Physical { - /// The size of the kara. - filesize: u64, - - /// The hash of the kara file. - hash: SHA256, - }, -} - -/// Time stamps for a kara. -#[derive(Debug, Serialize, Deserialize, Clone, Copy, Hash)] -pub struct KaraTimeStamps { - /// Unix timestamp of the kara creation. - pub created_at: i64, - - /// Unix timestamp of the kara's last significant modification. - pub updated_at: i64, - - /// Epoch of the kara, two karas can't share the same epoch, i.e. we can sort by epoch every - /// object from the database and two can't be from the same epoch. - pub epoch: u64, -} - -/// The kara's data. To ensure backward compatibility with databases versions, every kara should be -/// constructible from this struct definition. -/// -/// TODO: Replace things by short vecs, to avoid too much allocations… -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct Kara { - /// The local ID of the kara - pub id: KId, - - /// The remote ID of the kara. - pub remote: RemoteKId, - - /// The title of the song. - pub song_title: String, - - /// The name of the anime, vn, etc from where the song is. - pub song_source: String, - - /// A list of kara makers / authors and contributors to the kara. - /// - /// TODO: Replace by a short vec or a tri, or specialized structure. - pub kara_makers: HashSet<Arc<str>>, - - /// The type of the song. - pub song_type: SongType, - - /// The origin of the source. - pub song_origin: SongOrigin, - - /// All languages present in the kara. - /// - /// TODO: Replace by a short vec, or tri, or specialized structure. - pub language: HashSet<Arc<str>>, - - /// The status of the kara, can be usefull to decide if we can insert it into the queue or not. - pub kara_status: KaraStatus, - - /// Time stamps of the kara. - pub timestamps: KaraTimeStamps, - - /// A list of tag. Tags can be value-less, have one value or multiple ones. - /// - /// TODO: Replace by a short vec. Better, replace by a specialized structure… - pub tags: HashMap<Arc<str>, Vec<Arc<str>>>, -} - -impl Kara { - /// Should two kara have the same video file? different from the [PartialEq] because two - /// virtual karas can't share the same file even if the structs are equal. - pub(crate) fn same_file_as(&self, other: &Kara) -> bool { - use KaraStatus::*; - matches!((&self.kara_status, &other.kara_status), - (Physical { hash: h1, .. }, Physical { hash: h2, .. }) if h1.eq(h2) - ) - } - - pub const TAG_NUMBER: &str = "number"; - pub const TAG_VERSION: &str = "version"; -} - -impl std::fmt::Display for Kara { - /// Format for a kara, see if we can do better... - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let Self { - song_title, - song_source, - kara_makers, - song_type, - song_origin, - language, - .. - } = self; - - let mut language = language.iter().cloned().collect::<Vec<_>>(); - language.sort_by(|a, b| PartialOrd::partial_cmp(a, b).unwrap()); - let language = language.join(","); - - let mut kara_makers = kara_makers.iter().cloned().collect::<Vec<_>>(); - kara_makers.sort_by(|a, b| PartialOrd::partial_cmp(a, b).unwrap()); - let kara_makers = kara_makers.join(","); - - write!( - f, - "{song_origin} - {language} - {song_source}/{song_title} - {song_type} [{kara_makers}]" - ) - } -} diff --git a/lektor_nkdb/src/database/mod.rs b/lektor_nkdb/src/database/mod.rs index 357442d3..b1a8f763 100644 --- a/lektor_nkdb/src/database/mod.rs +++ b/lektor_nkdb/src/database/mod.rs @@ -2,6 +2,5 @@ //! only a small, but fundational, part of the database system. pub(crate) mod epoch; -pub(crate) mod kara; pub(crate) mod pool; pub(crate) mod update; diff --git a/lektor_nkdb/src/database/pool.rs b/lektor_nkdb/src/database/pool.rs index 5af0585d..9c96a493 100644 --- a/lektor_nkdb/src/database/pool.rs +++ b/lektor_nkdb/src/database/pool.rs @@ -73,11 +73,6 @@ impl Pool { .into_iter() .map(|author| self.get_str_sync(author)) .collect(); - kara.tags = HashMap::from_iter(kara.tags.into_iter().map(|(key, values)| { - let key = self.get_str_sync(key); - let values = values.into_iter().map(|v| self.get_str_sync(v)); - (key, values.collect()) - })); (kid, kara) }); let _ = std::mem::replace(data, EpochData::from_iter(content)); diff --git a/lektor_nkdb/src/database/update.rs b/lektor_nkdb/src/database/update.rs index 80eafe08..ecd83dbc 100644 --- a/lektor_nkdb/src/database/update.rs +++ b/lektor_nkdb/src/database/update.rs @@ -2,10 +2,9 @@ use crate::*; use anyhow::Result; -use futures::future::join_all; -use hashbrown::HashMap; +use futures::prelude::*; use kurisu_api::SHA256; -use std::{cell::RefCell, sync::Arc}; +use std::{cell::RefCell, marker}; /// A pool handle. Used to add new karas to the pool. The update logic follows the following: /// - if a kara was not present, we add it and we download the file. @@ -16,34 +15,31 @@ use std::{cell::RefCell, sync::Arc}; /// /// The last case is if a kara was present but nothing changed, we don't need to do anything, juste /// mark the kara as present again. -/// -/// NOTE: The handler is not [Send] and should be called from multiple threads. If it is supported -/// one day: -/// ```not_rust -/// impl<'a, Storage: DatabaseStorage> !Send for UpdateHandler<'a, Storage> {} -/// impl<'a, Storage: DatabaseStorage> !Sync for UpdateHandler<'a, Storage> {} -/// ``` #[derive(Debug)] pub struct UpdateHandler<'a, Storage: DatabaseStorage> { pool: &'a Pool, storage: &'a Storage, new_epoch: RefCell<PushVecMutNode<Epoch>>, last_epoch: Option<&'a Epoch>, + + /// This type is not send or sync! + _not_send: marker::PhantomData<*mut ()>, } impl<'a, Storage: DatabaseStorage> UpdateHandler<'a, Storage> { /// Create a new update handler. - pub(crate) async fn new( + pub(crate) fn new( pool: &'a Pool, storage: &'a Storage, node: PushVecMutNode<Epoch>, - last: Option<&'a Epoch>, + last_epoch: Option<&'a Epoch>, ) -> UpdateHandler<'a, Storage> { Self { pool, storage, + last_epoch, new_epoch: RefCell::new(node), - last_epoch: last, + _not_send: marker::PhantomData, } } @@ -134,43 +130,35 @@ impl<'a, Storage: DatabaseStorage> UpdateHandler<'a, Storage> { .get_from_remote(RemoteKId::new(kara.id, repo)) .await; + let get_str = |str: String| self.pool.get_str(str); let (language, kara_makers) = tokio::join!( - join_all((kara.language.into_iter()).map(|str| self.pool.get_str(str))), - join_all((kara.kara_makers.into_iter()).map(|str| self.pool.get_str(str))) + future::join_all((kara.language.into_iter()).map(get_str)), + future::join_all((kara.kara_makers.into_iter()).map(get_str)) ); - let mut tags = HashMap::<Arc<str>, Vec<Arc<str>>>::default(); - for [key, value] in kara.tags { - if value.is_empty() { - tags.entry(self.pool.get_str(key).await) - .or_insert_with(Vec::new); - } else { - let (key, value) = tokio::join!(self.pool.get_str(key), self.pool.get_str(value)); - tags.entry(key).or_insert_with(Vec::new).push(value); - } + // Detect errors with virtual things + if kara.is_virtual && kara.file_hash.is_some() { + log::warn!("kara {remote} has virtual flag and a file hash"); + } else if !kara.is_virtual && kara.file_hash.is_none() { + log::error!("the kara {remote} is not virtual but don't have a file hash"); } - let kara_status = match (kara.is_virtual, kara.file_hash) { - (true, None) => KaraStatus::Virtual, - (true, Some(hash)) => { - log::warn!("kara {remote} has virtual flag but a file hash: {hash}"); - KaraStatus::Virtual - } - (false, None) => { - log::error!("the kara {remote} is not virtual but don't have a file hash"); - KaraStatus::Virtual - } - (false, Some(hash)) => KaraStatus::Physical { - filesize: kara.filesize, - hash, - }, - }; - + // Build and add the kara. self.add_kara(Kara { id: id.unwrap_or_else(|| self.pool.next_kid()), - tags, remote, - kara_status, + tags: (kara.tags.into_iter()) + .map(|[key, value]| (key, value)) + .collect(), + kara_status: (kara.is_virtual) + .then_some(KaraStatus::Virtual) + .or_else(|| { + kara.file_hash.map(|hash| KaraStatus::Physical { + filesize: kara.filesize, + hash, + }) + }) + .unwrap_or_default(), song_type: kara.song_type, song_title: kara.song_title, song_source: kara.song_source, diff --git a/lektor_nkdb/src/kara/mod.rs b/lektor_nkdb/src/kara/mod.rs new file mode 100644 index 00000000..bc85dca5 --- /dev/null +++ b/lektor_nkdb/src/kara/mod.rs @@ -0,0 +1,87 @@ +//! Base kara structures definitions. + +pub(crate) mod status; +pub(crate) mod tags; +pub(crate) mod timestamps; + +use crate::*; +use kurisu_api::v2::{SongOrigin, SongType}; +use serde::{Deserialize, Serialize}; +use smallvec::SmallVec; +use std::sync::Arc; + +/// The kara's data. To ensure backward compatibility with databases versions, every kara should be +/// constructible from this struct definition. +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct Kara { + /// The local ID of the kara + pub id: KId, + + /// The remote ID of the kara. + pub remote: RemoteKId, + + /// The title of the song. + pub song_title: String, + + /// The name of the anime, vn, etc from where the song is. + pub song_source: String, + + /// A list of kara makers / authors and contributors to the kara. + pub kara_makers: SmallVec<[Arc<str>; 2]>, + + /// The type of the song. + pub song_type: SongType, + + /// The origin of the source. + pub song_origin: SongOrigin, + + /// All languages present in the kara. + pub language: SmallVec<[Arc<str>; 2]>, + + /// The status of the kara, can be usefull to decide if we can insert it into the queue or not. + pub kara_status: KaraStatus, + + /// Time stamps of the kara. + pub timestamps: KaraTimeStamps, + + /// A list of tag. Tags can be value-less, have one value or multiple ones. + pub tags: Tags, +} + +impl Kara { + /// Should two kara have the same video file? different from the [PartialEq] because two + /// virtual karas can't share the same file even if the structs are equal. + pub(crate) fn same_file_as(&self, other: &Kara) -> bool { + use KaraStatus::*; + matches!((&self.kara_status, &other.kara_status), + (Physical { hash: h1, .. }, Physical { hash: h2, .. }) if h1.eq(h2) + ) + } +} + +impl std::fmt::Display for Kara { + /// Format for a kara, see if we can do better... + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let Self { + song_title, + song_source, + song_type, + song_origin, + .. + } = self; + write!( + f, + "{song_origin} - {language} - {song_source}/{song_title} - {song_type} [{kara_makers}]", + language = { + let mut language = self.language.iter().cloned().collect::<Vec<_>>(); + language.sort_by(|a, b| PartialOrd::partial_cmp(a, b).unwrap()); + language.join(",") + }, + kara_makers = { + let mut kara_makers = self.kara_makers.iter().cloned().collect::<Vec<_>>(); + kara_makers.sort_by(|a, b| PartialOrd::partial_cmp(a, b).unwrap()); + kara_makers.join(",") + }, + ) + } +} diff --git a/lektor_nkdb/src/kara/status.rs b/lektor_nkdb/src/kara/status.rs new file mode 100644 index 00000000..41299335 --- /dev/null +++ b/lektor_nkdb/src/kara/status.rs @@ -0,0 +1,43 @@ +use kurisu_api::SHA256; +use serde::{Deserialize, Serialize}; + +/// The status of a kara, it can either be virtual or physical to help with searches in the +/// database and with suggestions. +#[derive(Debug, Serialize, Deserialize, Clone, Copy, Hash, Default)] +pub enum KaraStatus { + /// Is this kara virtual? E.g. a control kara without a concrete existence? If so, then the + /// kara has no file and can't be added to the queue or a playlist, but can be usefull when + /// searching a kara. + #[default] + Virtual, + + /// If a kara is physical then it has a file that is playable, it can be added to the queue or + /// to a playlist. + Physical { + /// The size of the kara. + filesize: u64, + + /// The hash of the kara file. + hash: SHA256, + }, +} + +impl KaraStatus { + /// Get the file size of the kara in bytes if [KaraStatus::Physical]. If it's a + /// [KaraStatus::Virtual], returns [None]. + pub fn file_size(&self) -> Option<u64> { + match self { + KaraStatus::Virtual => None, + KaraStatus::Physical { filesize, .. } => Some(*filesize), + } + } + + /// Get the file hash of the kara if [KaraStatus::Physical]. If it's a [KaraStatus::Virtual], + /// returns [None]. + pub fn file_hash(&self) -> Option<SHA256> { + match self { + KaraStatus::Virtual => None, + KaraStatus::Physical { hash, .. } => Some(*hash), + } + } +} diff --git a/lektor_nkdb/src/kara/tags.rs b/lektor_nkdb/src/kara/tags.rs new file mode 100644 index 00000000..203248b9 --- /dev/null +++ b/lektor_nkdb/src/kara/tags.rs @@ -0,0 +1,302 @@ +//! Contains everything to implement tags for [crate::kara::Kara]. + +use derive_more::Display; +use hashbrown::HashMap; +use serde::{Deserialize, Serialize}; +use std::{borrow, mem, sync::Arc}; + +/// Contains multiple tags. +/// +/// Note that the keys are always in lowercase english. It is up to the frontend to traduce them if +/// needed and title case them. +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +pub struct Tags(HashMap<TagKey, TagValue>); + +impl Tags { + /// Get the key for the `Number` tag. + pub fn key_number() -> &'static str { + TagKey::Number.as_str() + } + + /// Get the key for the `Version` tag. + pub fn key_version() -> &'static str { + TagKey::Version.as_str() + } + + /// Tells wether a tag is present or not. + pub fn has_tag(&self, tag: impl AsRef<str>) -> bool { + self.0.contains_key(tag.as_ref()) + } + + /// Remove a value from the values. Some conditions apply, like if we can remove the value. + pub fn remove_value(&mut self, tag: impl AsRef<str>, value: impl AsRef<str>) { + if let Some(values) = self.0.get_mut(tag.as_ref()) { + values.remove(value); + } + } + + /// Add a value to the values. Some validity conditions apply. + pub fn add_value(&mut self, tag: impl AsRef<str>, value: impl ToString) { + if let Some(values) = self.0.get_mut(tag.as_ref()) { + return values.add(value); + } + + let tag = TagKey::from(tag.as_ref()); + let value = match &tag { + TagKey::Other(_) => TagValue(TagValueVariant::One(value.to_string())), + _ => TagValue(TagValueVariant::Single(value.to_string())), + }; + _ = self.0.insert(tag, value); + } + + /// Set wether a tag is present or not. We will only allocate its key, with no values attached. + pub fn set_flag(&mut self, tag: impl AsRef<str>) { + _ = self.0.entry(tag.as_ref().into()).or_default() + } + + /// Iterate over the values of a tag. + pub fn iter_values(&self, tag: impl AsRef<str>) -> impl Iterator<Item = &str> { + self.0 + .get(tag.as_ref()) + .map(TagValueIter::from) + .into_iter() + .flatten() + } + + /// Get the value of the tag. Will return [None] if there are zero or two and more values. + pub fn get_value(&self, tag: impl AsRef<str>) -> Option<&str> { + self.0.get(tag.as_ref()).and_then(|values| match &values.0 { + TagValueVariant::Single(value) => Some(value.as_ref()), + _ => None, + }) + } +} + +impl<'a> FromIterator<(&'a str, &'a str)> for Tags { + fn from_iter<T: IntoIterator<Item = (&'a str, &'a str)>>(iter: T) -> Self { + iter.into_iter() + .map(|(key, value)| (TagKey::from(key), value.to_string())) + .collect() + } +} + +impl FromIterator<(String, String)> for Tags { + fn from_iter<T: IntoIterator<Item = (String, String)>>(iter: T) -> Self { + iter.into_iter() + .map(|(key, value)| (TagKey::from(key.as_str()), value)) + .collect() + } +} + +impl FromIterator<(TagKey, String)> for Tags { + fn from_iter<T: IntoIterator<Item = (TagKey, String)>>(iter: T) -> Self { + let mut tags = Self::default(); + (iter.into_iter()).for_each(|(key, value)| match value.is_empty() { + true => tags.set_flag(key), + false => tags.add_value(key, value), + }); + tags + } +} + +impl IntoIterator for Tags { + type Item = (Arc<str>, String); + type IntoIter = TagsIntoIter; + + fn into_iter(self) -> Self::IntoIter { + TagsIntoIter::from(self) + } +} + +pub struct TagsIntoIter { + tags: Tags, +} + +impl From<Tags> for TagsIntoIter { + fn from(value: Tags) -> Self { + Self { tags: value } + } +} + +impl Iterator for TagsIntoIter { + type Item = (Arc<str>, String); + + fn next(&mut self) -> Option<Self::Item> { + let next = self.tags.0.keys().next()?.clone(); + match &mut self.tags.0.get_mut(&next)?.0 { + TagValueVariant::Empty => { + _ = self.tags.0.remove(&next); + None + } + TagValueVariant::One(value) | TagValueVariant::Single(value) => { + let value = mem::take(value); + _ = self.tags.0.remove(&next); + Some((next.into(), value)) + } + TagValueVariant::Many(vec) => match vec.pop() { + Some(value) => Some((next.into(), value)), + None => { + _ = self.tags.0.remove(&next); + None + } + }, + } + } +} + +/// The value of a tag. We have some logic if the value can be multiple or not. +#[derive(Default, Clone, Debug, Serialize, Deserialize)] +#[repr(transparent)] +struct TagValue(TagValueVariant); + +/// The value of a tag, but we don't leak the enum. +#[derive(Default, Clone, Debug, Serialize, Deserialize)] +enum TagValueVariant { + /// The tag has no values, but can have as many as needed. + #[default] + Empty, + + /// One value. But the tag can have multiple ones. + One(String), + + /// Many values. + Many(Vec<String>), + + /// Only one value, and one at most. + Single(String), +} + +impl TagValue { + /// Remove a value from the values. Some conditions apply, like if we can remove the value or + /// not (see [TagValueVariant::Single]). + fn remove(&mut self, tag: impl AsRef<str>) { + use TagValueVariant::*; + match &mut self.0 { + One(other) if *other == tag.as_ref() => self.0 = Empty, + Many(vec) => { + let tag = tag.as_ref().to_lowercase(); + _ = (vec.iter().enumerate()) + .find_map(|(idx, other)| (other.to_lowercase() == tag).then_some(idx)) + .map(|idx| vec.remove(idx)); + if let [single] = &mut vec[..] { + self.0 = One(mem::take(single)); + } + } + Single(single) if *single == tag.as_ref() => { + log::error!("can't remove a single tag: {single}") + } + _ => {} + } + } + + /// Add a value to the values. Some conditions apply, like for [TagValueVariant::Single]. + fn add(&mut self, tag: impl ToString) { + use TagValueVariant::*; + let tag = tag.to_string(); + match &mut self.0 { + Empty => self.0 = One(tag), + One(other) if *other != tag => self.0 = Many(vec![mem::take(other), tag]), + Many(vec) if !vec.contains(&tag) => vec.push(tag), + Single(_) => log::error!("can't add '{tag}' to a tag can have only one"), + _ => {} + } + } +} + +/// An iterator to iterate other all the values. Don't leak this type. +struct TagValueIter<'a> { + values: &'a TagValueVariant, + index: usize, +} + +impl<'a> From<&'a TagValue> for TagValueIter<'a> { + fn from(value: &'a TagValue) -> Self { + Self::from(&value.0) + } +} + +impl<'a> From<&'a TagValueVariant> for TagValueIter<'a> { + fn from(value: &'a TagValueVariant) -> Self { + Self { + values: value, + index: 0, + } + } +} + +impl<'a> Iterator for TagValueIter<'a> { + type Item = &'a str; + + fn next(&mut self) -> Option<Self::Item> { + use TagValueVariant::*; + match self.values { + Empty => None, + Many(vec) => vec + .get(self.index) + .inspect(|_| self.index += 1) + .map(AsRef::as_ref), + One(value) | Single(value) => (self.index == 0).then(|| { + self.index += 1; + value.as_str() + }), + } + } +} + +/// The key of a tag. Don't leak this type. Note that the keys are always in lowercase. +#[derive(Clone, Display, Hash, PartialEq, Eq, Debug, Serialize, Deserialize)] +#[display("{}", self.as_str())] +enum TagKey { + /// Can only have one number per kara. + Number, + + /// Can only have one version per kara. + Version, + + /// Other keys that don't have specific rules. + Other(Arc<str>), +} + +impl AsRef<str> for TagKey { + fn as_ref(&self) -> &str { + self.as_str() + } +} + +impl borrow::Borrow<str> for TagKey { + fn borrow(&self) -> &str { + self.as_str() + } +} + +impl From<&str> for TagKey { + fn from(value: &str) -> Self { + if TagKey::Number.as_str() == value { + TagKey::Number + } else if TagKey::Version.as_str() == value { + TagKey::Version + } else { + TagKey::Other(Arc::from(value.to_lowercase())) + } + } +} + +impl From<TagKey> for Arc<str> { + fn from(value: TagKey) -> Self { + if let TagKey::Other(arc) = value { + arc + } else { + Arc::from(value.as_str()) + } + } +} + +impl TagKey { + fn as_str(&self) -> &str { + match self { + TagKey::Number => "number", + TagKey::Version => "version", + TagKey::Other(arc) => arc.as_ref(), + } + } +} diff --git a/lektor_nkdb/src/kara/timestamps.rs b/lektor_nkdb/src/kara/timestamps.rs new file mode 100644 index 00000000..bcf50ccc --- /dev/null +++ b/lektor_nkdb/src/kara/timestamps.rs @@ -0,0 +1,15 @@ +use serde::{Deserialize, Serialize}; + +/// Time stamps for a kara. +#[derive(Debug, Serialize, Deserialize, Clone, Copy, Hash)] +pub struct KaraTimeStamps { + /// Unix timestamp of the kara creation. + pub created_at: i64, + + /// Unix timestamp of the kara's last significant modification. + pub updated_at: i64, + + /// Epoch of the kara, two karas can't share the same epoch, i.e. we can sort by epoch every + /// object from the database and two can't be from the same epoch. + pub epoch: u64, +} diff --git a/lektor_nkdb/src/lib.rs b/lektor_nkdb/src/lib.rs index 80e25d79..b744897d 100644 --- a/lektor_nkdb/src/lib.rs +++ b/lektor_nkdb/src/lib.rs @@ -1,25 +1,23 @@ //! A new implementation of a database to store informations about karas, playlists and such. pub use crate::{ - database::{ - epoch::Epoch, - kara::{Kara, KaraStatus, KaraTimeStamps}, - update::UpdateHandler, - }, + database::{epoch::Epoch, update::UpdateHandler}, id::{KId, RemoteKId}, + kara::{status::KaraStatus, tags::Tags, timestamps::KaraTimeStamps, Kara}, playlists::playlist::{Playlist, PlaylistInfo}, storage::{DatabaseDiskStorage, DatabaseStorage}, }; pub use kurisu_api::v2::{SongOrigin, SongType, SONGORIGIN_LENGTH, SONGTYPE_LENGTH}; use crate::database::{epoch::EpochData, pool::Pool}; -use anyhow::{anyhow, Context as _, Result}; +use anyhow::Context as _; use hashbrown::HashMap; use lektor_utils::pushvec::*; use playlists::{Playlists, PlaylistsHandle}; mod database; mod id; +mod kara; mod playlists; mod storage; mod strings; @@ -35,10 +33,10 @@ pub struct Database<Storage: DatabaseStorage = DatabaseDiskStorage> { impl<Storage: DatabaseStorage> Database<Storage> { /// Create a new database with the correspondig prefix. - pub async fn new(prefix: impl Into<Storage::Prefix>) -> Result<Self> { + pub async fn new(prefix: impl Into<Storage::Prefix>) -> anyhow::Result<Self> { let storage = Storage::load_from_prefix(prefix.into()) .await - .with_context(|| "failed to load database storage")?; + .context("failed to load database storage")?; log::info!("load the last epoch and populate the pool without factorization"); let epochs: PushVec<Epoch> = Default::default(); @@ -58,7 +56,7 @@ impl<Storage: DatabaseStorage> Database<Storage> { storage .read_playlists() .await - .with_context(|| "failed to read playlists")?, + .context("failed to read playlists")?, ), pool, epochs, @@ -80,13 +78,12 @@ impl<Storage: DatabaseStorage> Database<Storage> { self.epochs.push_default().await, self.last_epoch().await, ) - .await } /// Get a [Kara] by its [KId] representation in the last epoch. Should be more efficient than /// the [Self::get_kara_by_id] because we dirrectly use the hash thing and we don't iterate /// over all the items in the [HashMap]. - pub async fn get_kara_by_kid(&self, id: KId) -> Result<&Kara> { + pub async fn get_kara_by_kid(&self, id: KId) -> anyhow::Result<&Kara> { self.last_epoch() .await .context("empty epoch")? @@ -101,7 +98,7 @@ impl<Storage: DatabaseStorage> Database<Storage> { /// Get the path to a kara, try to get the absolute path, so here we append the relative path /// to the prefix of the database if possible. - pub fn get_kara_uri(&self, id: KId) -> Result<url::Url> { + pub fn get_kara_uri(&self, id: KId) -> anyhow::Result<url::Url> { self.storage.get_kara_uri(id) } } diff --git a/lektor_nkdb/src/playlists/mod.rs b/lektor_nkdb/src/playlists/mod.rs index 290c47d1..94595a47 100644 --- a/lektor_nkdb/src/playlists/mod.rs +++ b/lektor_nkdb/src/playlists/mod.rs @@ -16,40 +16,50 @@ pub(crate) struct Playlists { epoch: AtomicU64, } +/// A handle to control how to write or read a [Playlist]. This also can be used to track the epoch +/// (e.g. the number of modifications) of all the playlists. pub struct PlaylistsHandle<'a, Storage: DatabaseStorage + Sized> { playlists: &'a Playlists, storage: &'a Storage, } impl<'a, Storage: DatabaseStorage + Sized> PlaylistsHandle<'a, Storage> { + /// Create a new handle. pub(crate) fn new(playlists: &'a Playlists, storage: &'a Storage) -> Self { Self { playlists, storage } } + /// Get the number of modifications since startup of the [Playlist], the epoch. pub async fn epoch(&self) -> u64 { self.playlists.epoch.load(Ordering::Acquire) } + /// Tells if there are any [Playlist] that are stored. pub async fn is_empty(&self) -> bool { self.playlists.content.read().await.is_empty() } + /// Get the number of [Playlist]. pub async fn len(&self) -> usize { self.playlists.content.read().await.len() } + /// Read a [Playlist]. pub async fn read<T>(&self, plt: KId, cb: impl FnOnce(&Playlist) -> T) -> Result<T> { Ok(cb((self.playlists.content.read().await) .get(&plt) .context("playlist not found")?)) } + /// Get the names and [KId] of the stored [Playlist]. pub async fn list(&self) -> Vec<(KId, String)> { (self.playlists.content.read().await.iter()) .map(|(&id, playlist)| (id, playlist.name().to_string())) .collect() } + /// Write a [Playlist]. This will increment the playlists' epoch, even if you do nothing inside + /// the callback, prompting all the clients to refrech the playlists. pub async fn write<T>( &self, plt: KId, @@ -69,7 +79,7 @@ impl<'a, Storage: DatabaseStorage + Sized> PlaylistsHandle<'a, Storage> { Ok(res) } - /// Add a new playlist if it didn't already exists. Returns whever the creation operation was + /// Add a new [Playlist] if it didn't already exists. Returns whever the creation operation was /// successfull or not. pub async fn create( &self, @@ -85,6 +95,7 @@ impl<'a, Storage: DatabaseStorage + Sized> PlaylistsHandle<'a, Storage> { Ok(()) } + /// Delete a [Playlist]. pub async fn delete(&self, plt: KId, user: impl AsRef<str>, admin: bool) -> Result<()> { match self.playlists.content.write().await.entry(plt) { hash_map::Entry::Vacant(_) => bail!("playlist not found"), @@ -103,6 +114,7 @@ impl<'a, Storage: DatabaseStorage + Sized> PlaylistsHandle<'a, Storage> { } impl Playlists { + /// Create a new [Playlists] store, initialized by an initial list of [Playlist]. pub(crate) fn new(iter: impl IntoIterator<Item = Playlist>) -> Self { Self { content: RwLock::new(iter.into_iter().map(|plt| (plt.local_id(), plt)).collect()), diff --git a/lektor_nkdb/src/playlists/playlist.rs b/lektor_nkdb/src/playlists/playlist.rs index 78437750..e8ae512d 100644 --- a/lektor_nkdb/src/playlists/playlist.rs +++ b/lektor_nkdb/src/playlists/playlist.rs @@ -4,6 +4,7 @@ use crate::{strings, KId, RemoteKId}; use hashbrown::HashSet; use rand::seq::SliceRandom as _; use serde::{Deserialize, Serialize}; +use smallvec::SmallVec; use std::sync::Arc; /// Informations about a playlist. @@ -17,7 +18,7 @@ pub struct Playlist { /// The name of the owners of the playlist. If the user who created the playlist wasn't /// identified, then it's an anonymous playlist. The users can't be empty strings. - owners: HashSet<Arc<str>>, + owners: SmallVec<[Arc<str>; 2]>, /// The name of the playlist. Note that this is not a primary key, multiple playlists can have /// the same name... Must be only in ascii characters. @@ -39,6 +40,7 @@ pub struct Playlist { // Builder-lite interface impl Playlist { + /// Create a new playlist with an associated [KId]. pub(crate) fn new(id: KId, name: impl ToString) -> Self { Self { name: name.to_string(), @@ -54,69 +56,95 @@ impl Playlist { } } + /// Add an owner to the playlist, in a sync way. This function may block. pub fn with_owner_sync(mut self, user: impl AsRef<str>) -> Self { - self.owners.insert(strings::CACHE.get_sync(user.as_ref())); + if !(self.owners.iter()).any(|owner| owner.as_ref() == user.as_ref()) { + self.owners.push(strings::CACHE.get_sync(user.as_ref())); + } self.updated_now() } + /// Add an owner to the playlist, in an async way. This function may yield. pub async fn with_owner(mut self, user: impl AsRef<str>) -> Self { - self.owners.insert(strings::CACHE.get(user.as_ref()).await); + if !(self.owners.iter()).any(|owner| owner.as_ref() == user.as_ref()) { + self.owners.push(strings::CACHE.get(user.as_ref()).await); + } self.updated_now() } + /// Add a description to the playlist. pub fn with_description(mut self, desc: impl ToString) -> Self { let description = desc.to_string(); self.description = (!description.is_empty()).then_some(description); self.updated_now() } - pub(super) fn updated_now(mut self) -> Self { - self.updated_at = chrono::Utc::now().timestamp(); - self + /// Update the [Self::updated_at] field. + pub(super) fn updated_now(self) -> Self { + Self { + updated_at: chrono::Utc::now().timestamp(), + ..self + } } } // Setters impl Playlist { + /// Change the name of the playlist. pub fn set_name(&mut self, name: impl ToString) -> &mut Self { self.name = name.to_string(); self } + /// Add an owner to the playlist. + /// + /// TODO: Note that here we won't try to reuse memory… pub fn add_owner(&mut self, name: impl AsRef<str>) -> &mut Self { - self.owners.insert(Arc::from(name.as_ref())); + if !(self.owners.iter()).any(|owner| owner.as_ref() == name.as_ref()) { + self.owners.push(Arc::from(name.as_ref())); + } self } + /// Remove an owner of a playlist. pub fn remove_owner(&mut self, name: impl AsRef<str>) -> &mut Self { - self.owners.remove(name.as_ref()); + _ = (self.owners.iter().enumerate()) + .find_map(|(idx, user)| (user.as_ref() == name.as_ref()).then_some(idx)) + .map(|idx| self.owners.remove(idx)); self } + /// Tells if someone is authorized to modify the playlist. It can be in the [Self::owners] + /// list, or be an admin. pub(crate) fn authorize_writes( &mut self, user: impl AsRef<str>, admin: bool, ) -> Option<&mut Self> { - (admin || self.owners.contains(user.as_ref())).then_some(self) + (admin || (self.owners.iter()).any(|owner| owner.as_ref() == user.as_ref())).then_some(self) } + /// Add a [crate::Kara] by its [KId] to the playlist. pub fn push(&mut self, id: KId) { self.content.push(id) } + /// Remove a [crate::Kara] by its [KId] from the playlist. pub fn remove(&mut self, id: KId) { self.retain(|other| *other != id) } + /// Select which [crate::Kara] are kept in the playlist by runing a callback on its [KId]. pub fn retain(&mut self, cb: impl FnMut(&KId) -> bool) { self.content.retain(cb) } + /// Add a [Vec] of [crate::Kara] by their [KId] to the playlist. pub fn append(&mut self, ids: &mut Vec<KId>) { self.content.append(ids) } + /// Shuffle the content of the playlist. pub fn shuffle(&mut self) { self.content.shuffle(&mut rand::thread_rng()) } @@ -124,46 +152,63 @@ impl Playlist { // Getters impl Playlist { + /// Get the local [KId] of the playlist. Note that this id can be different on each lektord + /// instance. pub fn local_id(&self) -> KId { self.local_id } + /// Tells if the playlist contains a said [crate::Kara] by its [KId]. pub fn contains(&self, id: KId) -> bool { self.content.contains(&id) } + /// Get the [RemoteKId] of a playlist. This is a shared id by all the lektord instances that + /// are syncronized by a said source. pub fn remote_id(&self) -> Option<RemoteKId> { self.remote_id.clone() } + /// Get all the owners of the playlist. pub fn owners(&self) -> impl Iterator<Item = &str> { self.owners.iter().map(Arc::as_ref) } + /// Get the name of the playlist. A playlist name is not unique, and two playlists can have the + /// same name. pub fn name(&self) -> &str { self.name.as_ref() } + /// Get the optional description of the playlist. pub fn description(&self) -> Option<&str> { self.description.as_deref() } + /// Get the [chrono::DateTime] at which the playlist was created for *this lektord instance*. pub fn created_at(&self) -> chrono::DateTime<chrono::Utc> { chrono::DateTime::from_timestamp(self.created_at, 0).unwrap_or_default() } + /// Get the [chrono::DateTime] at which the playlist was updated for the last time for *this + /// lektord instance*. pub fn updated_at(&self) -> chrono::DateTime<chrono::Utc> { chrono::DateTime::from_timestamp(self.updated_at, 0).unwrap_or_default() } + /// Iter over the content of the playlist, with this we iterate in a way where the order + /// matters and there can be duplicates. pub fn iter_seq_content(&self) -> impl Iterator<Item = KId> + '_ { self.content.iter().copied() } + /// Iter over the content of the playlist, but in any order and there can't be duplicates. Note + /// that this function will *allocate a lot…* pub fn iter_uniq_content(&self) -> impl Iterator<Item = KId> + '_ { HashSet::<KId>::from_iter(self.content.iter().copied()).into_iter() } + /// Get the informations about the playlist. pub fn get_infos(&self) -> PlaylistInfo { PlaylistInfo { local_id: self.local_id, @@ -177,6 +222,7 @@ impl Playlist { } impl Extend<KId> for Playlist { + /// Add a [Vec] of [crate::Kara] by their [KId] to the playlist. fn extend<T: IntoIterator<Item = KId>>(&mut self, iter: T) { self.content.extend(iter) } @@ -186,35 +232,55 @@ impl Extend<KId> for Playlist { /// see the ones in [Playlist]. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] pub struct PlaylistInfo { + /// [Playlist::local_id] local_id: KId, - owners: HashSet<Arc<str>>, + + /// [Playlist::owners] + owners: SmallVec<[Arc<str>; 2]>, + + /// [Playlist::name] name: String, + + /// [Playlist::description] description: Option<String>, + + /// [Playlist::created_at] created_at: i64, + + /// [Playlist::updated_at] updated_at: i64, } impl PlaylistInfo { + /// Get the local [KId] of the [Playlist]. Note that this id can be different on each lektord + /// instance. pub fn local_id(&self) -> KId { self.local_id } + /// Get all the owners of the [Playlist]. pub fn owners(&self) -> impl Iterator<Item = &str> { self.owners.iter().map(Arc::as_ref) } + /// Get the name of the [Playlist]. A playlist name is not unique, and two playlists can have + /// the same name. pub fn name(&self) -> &str { self.name.as_ref() } + /// Get the optional description of the [Playlist]. pub fn description(&self) -> Option<&str> { self.description.as_deref() } + /// Get the [chrono::DateTime] at which the [Playlist] was created for *this lektord instance*. pub fn created_at(&self) -> chrono::DateTime<chrono::Utc> { chrono::DateTime::from_timestamp(self.created_at, 0).unwrap_or_default() } + /// Get the [chrono::DateTime] at which the [Playlist] was updated for the last time for *this + /// lektord instance*. pub fn updated_at(&self) -> chrono::DateTime<chrono::Utc> { chrono::DateTime::from_timestamp(self.updated_at, 0).unwrap_or_default() } diff --git a/lektor_nkdb/src/storage/disk_storage.rs b/lektor_nkdb/src/storage/disk_storage.rs index 7917e517..cb40a6f5 100644 --- a/lektor_nkdb/src/storage/disk_storage.rs +++ b/lektor_nkdb/src/storage/disk_storage.rs @@ -1,22 +1,18 @@ //! We store things in the disk. -use crate::*; -use anyhow::{bail, ensure, Result}; -use futures::{ - stream::{self, FuturesUnordered}, - StreamExt, -}; -use hashbrown::HashSet; +use crate::{storage::folder_reader::FolderReader, *}; +use anyhow::ensure; +use core::fmt; +use futures::{prelude::*, stream::FuturesUnordered}; use kurisu_api::SHA256; -use regex::Regex; use std::{ + cell::RefCell, path::{Path, PathBuf}, - str::FromStr, - sync::{atomic::AtomicU64, LazyLock}, + sync::atomic::{AtomicU64, Ordering}, }; use tokio::{ - fs::{create_dir_all, read, read_dir, write, OpenOptions}, - io::AsyncWriteExt, + fs, + io::AsyncWriteExt as _, sync::mpsc::{channel, Receiver, Sender}, }; @@ -25,9 +21,10 @@ use tokio::{ /// From the prefix we have mainly two folders: /// - $prefix/data: Here we store the files pointed by the database /// - $prefix/data/$num.mkv: The kara files +/// - $prefix/data/$num.ok: Contains the hash of the file /// - $prefix/playlists: Here we store the playlists files. -/// - $prefix/playlists/$name.json: Contains the serialized data -/// - $prefix/playlists/$name.ok: Only written after the playlist file was saved, contains +/// - $prefix/playlists/$num.json: Contains the serialized data +/// - $prefix/playlists/$num.ok: Only written after the playlist file was saved, contains /// the hash of the data file for validity check. /// - $prefix/epochs: Here we stores files for the epochs. /// - $prefix/epochs/$num.json: Contains the epoch in a serialized format. @@ -47,20 +44,49 @@ enum PlaylistWriteEvent { Delete(KId), } -macro_rules! regex { - ($regex:literal) => { - LazyLock::new(|| Regex::new($regex).unwrap()) - }; -} - -fn get_regex_ok() -> &'static Regex { - static REGEX: LazyLock<Regex> = regex!(r"^([0123456789]+).ok$"); - ®EX +/// Here we will write the playlists one at a time, so no race conditions! +async fn pool_playlist_write_events(prefix: PathBuf, mut recv: Receiver<PlaylistWriteEvent>) { + while let Some(event) = recv.recv().await { + match event { + PlaylistWriteEvent::Write(id, plt) => match serde_json::to_string(&plt) { + Err(e) => log::error!("failed to write playlist {id} to disk: {e:?}"), + Ok(data) => { + _ = write_json_from_root(&prefix, id, data) + .await + .map_err(|err| log::error!("can't write playlist {id}: {err:?}")) + } + }, + + PlaylistWriteEvent::Delete(id) => { + _ = fs::remove_file(prefix.join(format!("{id}.json"))) + .await + .map_err(|err| log::error!("failed to delete playlist {id}: {err:?}")); + _ = fs::remove_file(prefix.join(format!("{id}.ok"))) + .await + .map_err(|err| log::error!("failed to delete hash file: {err:?}")); + } + } + } } -fn get_regex_json() -> &'static Regex { - static REGEX: LazyLock<Regex> = regex!(r"^([0123456789]+).json$"); - ®EX +/// Write a json file in an async way from a [String] and its sha256 hash belong it when +/// finished. +async fn write_json_from_root( + root: impl AsRef<Path>, + from_root: impl fmt::Display, + data: impl ToString, +) -> anyhow::Result<()> { + let path = root.as_ref().join(format!("{from_root}.json")); + fs::write(&path, data.to_string()) + .await + .context("failed to write json file")?; + + fs::write( + root.as_ref().join(format!("{from_root}.ok")), + sha256::try_async_digest(path).await?, + ) + .await + .context("failed to write hash file") } impl DatabaseDiskStorage { @@ -72,291 +98,113 @@ impl DatabaseDiskStorage { /// Get the id of the next epoch, incrementing the next id counter. fn next_epoch(&self) -> u64 { - self.next_epoch - .fetch_add(1, std::sync::atomic::Ordering::SeqCst) - } - - /// Create a new kara file with a specific extension. If a file with the same path exists it - /// will be overwritten. - async fn new_kara_file(&self, id: KId) -> Result<(PathBuf, tokio::fs::File)> { - let path = self.path_from_root(format!("data/{id}.mkv")); - if path.exists() { - log::warn!("overwriting file {}", path.to_string_lossy()) - } - let file = OpenOptions::new() - .create(true) - .write(true) - .truncate(true) - .open(&path) - .await?; - Ok((path, file)) - } - - /// Write a json file in an async way from a [String] and its sha256 hash belong it when - /// finished. - async fn write_json_from_root( - root: impl AsRef<Path>, - from_root: impl ToString, - data: impl ToString, - ) -> Result<()> { - let from = from_root.to_string(); - - // Write JSON - let path = root.as_ref().join(format!("{from}.json")); - let data = data.to_string(); - write(&path, data).await?; - - // Write digest - let digest = sha256::try_async_digest(&path).await?; - let path = root.as_ref().join(format!("{from}.ok")); - write(&path, digest).await?; - - Ok(()) - } - - /// Write a json file in an async way from a [String] and its sha256 hash belong it when - /// finished. - async fn write_json(&self, from_root: impl ToString, data: impl ToString) -> Result<()> { - let from = from_root.to_string(); - - // Write JSON - let path = self.path_from_root(format!("{from}.json")); - let data = data.to_string(); - write(&path, data).await?; - - // Write digest - let digest = sha256::try_async_digest(&path).await?; - let path = self.path_from_root(format!("{from}.ok")); - write(&path, digest).await?; - - Ok(()) - } - - /// From a set of json files ("${self.root}/${folder}/T.{json,ok}"), we keep only the valid - /// json files, i.e. the hash of the json file correspond the the hash written in the ok file. - async fn keep_valid_json<'a, 'b, T: std::fmt::Display + std::hash::Hash + std::cmp::Eq + 'b>( - &'a self, - folder: impl AsRef<str>, - set: impl IntoIterator<Item = &'b T>, - ) -> Result<HashSet<&'b T>> - where - 'a: 'b, - { - Ok(stream::iter(set.into_iter().map(|item| { - let folder = folder.as_ref(); - let json = self.path_from_root(format!("{folder}/{item}.json")); - let ok = self.path_from_root(format!("{folder}/{item}.ok")); - (json, ok, item) - })) - .then(|(json, ok, item)| async move { - let json_digest = sha256::try_async_digest(json) - .await? - .parse::<SHA256>() - .map_err(|err| anyhow!("{err}"))?; - let ok_digest = tokio::fs::read_to_string(ok) - .await? - .parse::<SHA256>() - .map_err(|err| anyhow!("{err}"))?; - Ok::<_, anyhow::Error>((json_digest, ok_digest, item)) - }) - .collect::<FuturesUnordered<_>>() - .await - .into_iter() - .filter_map(|res| match res { - Ok((json, ok, item)) if ok.eq(&json) => Some(item), - Ok((json, ok, item)) => { - log::error!("invalid data for {item}, expected hash {ok} but got {json}"); - None - } - Err(err) => { - log::error!("{err}"); - None - } - }) - .collect()) - } - - /// Here we will write the playlists one at a time, so no race conditions! - async fn pool_playlist_write_events(prefix: PathBuf, mut recv: Receiver<PlaylistWriteEvent>) { - let prefix = prefix.as_path(); - let write = |name, plt| async move { - match serde_json::to_string(&plt) { - Ok(data) => { - if let Err(err) = Self::write_json_from_root(prefix, &name, data).await { - log::error!("can't write playlist {name}: {err:?}") - } - } - Err(e) => log::error!("failed to write playlist {name} to disk: {e:?}"), - }; - }; - let delete = |id: KId| async move { - if let Err(err) = tokio::fs::remove_file(prefix.join(format!("{id}.json"))).await { - log::error!("failed to delete playlist {id}: {err:?}") - } - }; - - while let Some(event) = recv.recv().await { - match event { - PlaylistWriteEvent::Write(id, plt) => write(id, plt).await, - PlaylistWriteEvent::Delete(id) => delete(id).await, - } - } + self.next_epoch.fetch_add(1, Ordering::SeqCst) } } -/// Read json data from a folder and only keep valid data. The things returned from the regex_ok -/// and regex_json regexes must be parsable as the Names type, or we will panic. -macro_rules! read_json_folder { - ( - ($self: expr) $folder: literal [$ok: expr, $json: expr] => $ty: ty; - $ok_valid: ident => $body: expr - ) => { - let mut sets: [HashSet<$ty>; 2] = Default::default(); - let regx = [$ok, $json]; - let folder = $self.path_from_root($folder); - let mut dir = read_dir(&folder).await?; - - while let Some(entry) = dir.next_entry().await? { - let name = entry.file_name(); - let name = name.to_string_lossy(); - for i in 0..2 { - let Some(capture) = regx[i].captures(&name).into_iter().next() else { - continue; - }; - let Some(what) = capture.get(1) else { - log::error!("invalid name for '{name}'"); - continue; - }; - match what.as_str().parse() { - Err(err) => bail!("invalid regex, problem with file: {name}: {err}"), - Ok(name) => _ = sets[i].insert(name), - } - } - } - - let $ok_valid = $self - .keep_valid_json($folder, sets[0].intersection(&sets[1])) - .await?; - $body - }; -} - impl DatabaseStorage for DatabaseDiskStorage { type Prefix = PathBuf; - type File = (PathBuf, tokio::fs::File); - - async fn load_from_prefix(prefix: PathBuf) -> Result<Self> { - let regex = get_regex_ok(); + type File = (PathBuf, fs::File); + async fn load_from_prefix(prefix: PathBuf) -> anyhow::Result<Self> { // Prepare folders. - for folder in ["data", "epoch", "playlists"] { - let folder = prefix.join(folder); - match create_dir_all(&folder).await { - Err(err) if err.kind() != std::io::ErrorKind::AlreadyExists => { - return Err(err) - .with_context(|| format!("failed to create folder {}", folder.display())) - } - _ => {} + for folder in ["data", "epoch", "playlists"].map(|folder| prefix.join(folder)) { + if let Err(err) = fs::create_dir_all(&folder).await { + ensure!( + err.kind() == std::io::ErrorKind::AlreadyExists, + "failed to create folder {}: {err}", + folder.display() + ); } - write(folder.join(".touch"), chrono::Local::now().to_string()).await?; } // We find the maximal epoch number for this computer, to be sure to not override any // previous epoch. - let mut max = 0; - let epoch = prefix.join("epoch"); - let mut epoch = read_dir(&epoch).await?; - while let Some(entry) = epoch.next_entry().await? { - let name = entry.file_name(); - let name = name.to_string_lossy(); - let Some(capture) = regex.captures(&name).into_iter().next() else { - continue; - }; - let Some(integer) = capture.get(1) else { - log::error!("invalid name for '{name}'"); - continue; - }; - match integer.as_str().parse::<u64>() { - Ok(integer) => max = std::cmp::max(max, integer), - Err(err) => { - log::error!("invalid name for '{name}': {err}"); - continue; - } - }; - } + let last_epoch = FolderReader::new(prefix.join("epoch")) + .await? + .unfold_entries::<u64>() + .collect::<FuturesUnordered<_>>() + .await + .into_iter() + .max() + .unwrap_or_default(); // Here we will write the playlists one at a time, so no race conditions! let (send, recv) = channel::<PlaylistWriteEvent>(10); - tokio::spawn(Self::pool_playlist_write_events( - prefix.join("playlists"), - recv, - )); + tokio::spawn(pool_playlist_write_events(prefix.join("playlists"), recv)); Ok(Self { prefix, - next_epoch: AtomicU64::new(max + 1), + next_epoch: AtomicU64::new(last_epoch + 1), playlist_pipeline: send, }) } - async fn read_last_epoch(&self) -> Result<Option<(EpochData, u64)>> { - read_json_folder! { - (self) "epoch" [get_regex_ok(), get_regex_json()] => u64; - valid_epochs => { match valid_epochs.into_iter().max() { - None => Ok(None), - Some(id) => { - let path = self.path_from_root(format!("epoch/{id}.json")); - let mut data = serde_json::from_slice::<EpochData>(&read(&path).await?)?; - stream::iter(data.iter()).then(|(id, kara): (&KId, &Kara)| async move { - let KaraStatus::Physical { hash, .. } = &kara.kara_status else { - return None; - }; - match sha256::try_async_digest(self.path_from_root(format!("data/{id}.mkv"))).await - .context("failed to digest file") - .map(|str| SHA256::from_str(&str).map_err(|err| anyhow!("{err}"))) - { - Ok(Ok(file_hash)) => (file_hash != *hash).then_some(*id), - Ok(Err(err)) | Err(err) => { - log::error!("kara with id {id} is corrupted: {err}"); - Some(*id) - } - } - }) - .collect::<FuturesUnordered<_>>().await - .into_iter().flatten().for_each(|kid: KId| { data.remove(&kid); }); - log::info!("load epoch {id} with {} karas from path: {}", data.len(), path.to_string_lossy()); - Ok(Some((data, *id))) - } - }} - } + async fn read_last_epoch(&self) -> anyhow::Result<Option<(EpochData, u64)>> { + // Get the ID of the last epoch + let Some(id) = FolderReader::new(self.prefix.join("epoch")) + .await? + .unfold_entries() + .collect::<FuturesUnordered<_>>() + .await + .into_iter() + .max() + else { + return Ok(None); + }; + + // Read the last epoch + let data = RefCell::new(serde_json::from_slice::<EpochData>( + &fs::read(self.path_from_root(format!("epoch/{id}.json"))).await?, + )?); + + // Remove invalid karas from the epoch + FolderReader::new(self.prefix.join("data")) + .await? + .with_file_extension(".mkv") + .unfold_entries_with_check(|id: KId, sha_mkv, sha_ok| { + (sha_mkv != sha_ok) + .then(|| _ = data.borrow_mut().remove(&id)) + .is_none() + }) + .collect::<FuturesUnordered<_>>() + .await + .into_iter() + .for_each(drop); + + // Ok, return things + log::info!("load epoch {id} with {} karas", data.borrow().len()); + Ok(Some((data.into_inner(), id))) } - async fn read_playlists(&self) -> Result<Vec<Playlist>> { - read_json_folder! { - (self) "playlists" [get_regex_ok(), get_regex_json()] => KId; - valid_playlists => { - stream::iter(valid_playlists).then(|id: &KId| async move { - let path = self.path_from_root(format!("playlists/{id}.json")); - log::info!("load playlist from path: {}", path.to_string_lossy()); - let json: Playlist = serde_json::from_slice(&read(path).await?)?; - ensure!(json.local_id() == *id, ""); - Ok(json) - }) - .collect::<FuturesUnordered<_>>().await - .into_iter().collect::<Result<Vec<_>, _>>() - } - } + async fn read_playlists(&self) -> anyhow::Result<Vec<Playlist>> { + FolderReader::new(self.prefix.join("playlists")) + .await? + .unfold_entries::<KId>() + .then(|id| async move { + let path = self.path_from_root(format!("playlists/{id}.json")); + log::info!("try to load playlist from path: {}", path.display()); + Ok(serde_json::from_slice(&fs::read(path).await?)?) + }) + .collect::<FuturesUnordered<_>>() + .await + .into_iter() + .collect::<anyhow::Result<Vec<Playlist>>>() } - async fn write_epoch(&self, data: &EpochData) -> Result<()> { + async fn write_epoch(&self, data: &EpochData) -> anyhow::Result<()> { let num = self.next_epoch(); - self.write_json(format!("epoch/{num}"), serde_json::to_string(data)?) - .await?; + write_json_from_root( + &self.prefix, + format!("epoch/{num}"), + serde_json::to_string(data)?, + ) + .await?; log::info!("wrote epoch {num} to disk"); Ok(()) } - async fn write_playlist(&self, playlist: &Playlist) -> Result<()> { + async fn write_playlist(&self, playlist: &Playlist) -> anyhow::Result<()> { self.playlist_pipeline .send(PlaylistWriteEvent::Write( playlist.local_id(), @@ -366,35 +214,56 @@ impl DatabaseStorage for DatabaseDiskStorage { .context("failed to send event") } - async fn delete_playlist(&self, id: KId) -> Result<()> { + async fn delete_playlist(&self, id: KId) -> anyhow::Result<()> { self.playlist_pipeline .send(PlaylistWriteEvent::Delete(id)) .await .context("failed to send event") } - async fn prepare_kara(&self, id: KId) -> Result<Self::File> { - self.new_kara_file(id).await + async fn prepare_kara(&self, id: KId) -> anyhow::Result<Self::File> { + let path = self.path_from_root(format!("data/{id}.mkv")); + if path.exists() { + log::warn!("overwriting file {}", path.to_string_lossy()) + } + let file = fs::OpenOptions::new() + .create(true) + .write(true) + .truncate(true) + .open(&path) + .await?; + Ok((path, file)) } - async fn write_kara(&self, (_, file): &mut Self::File, data: &[u8]) -> Result<()> { - let _ = file.write(data).await?; + async fn write_kara(&self, (_, file): &mut Self::File, data: &[u8]) -> anyhow::Result<()> { + _ = file.write(data).await?; Ok(()) } - async fn submit_kara(&self, (path, _): Self::File, id: KId, hash: SHA256) -> Result<()> { - let digest = SHA256::try_from(sha256::try_async_digest(&path).await?) - .map_err(|err| anyhow!("invalid sha256 computed: {err}"))?; + async fn submit_kara( + &self, + (mut path, _): Self::File, + id: KId, + hash: SHA256, + ) -> anyhow::Result<()> { + let digest = sha256::try_async_digest(&path).await?; + ensure!( - digest == hash, + hash == digest + .parse::<SHA256>() + .context("invalid sha256 computed")?, "invalid digest for {}, expected {hash}, got {digest}", path.display() ); - log::info!("finished writing file for kara {id:?}: {}", path.display()); + + path.set_extension("ok"); + fs::write(path, digest).await?; + + log::info!("finished writing file for kara {id}"); Ok(()) } - fn get_kara_uri(&self, id: KId) -> Result<url::Url> { + fn get_kara_uri(&self, id: KId) -> anyhow::Result<url::Url> { let path = self.path_from_root(format!("data/{id}.mkv")); Ok(url::Url::parse(&format!("file://{}", path.display()))?) } diff --git a/lektor_nkdb/src/storage/folder_reader.rs b/lektor_nkdb/src/storage/folder_reader.rs new file mode 100644 index 00000000..60eac502 --- /dev/null +++ b/lektor_nkdb/src/storage/folder_reader.rs @@ -0,0 +1,115 @@ +//! Contains things to read a folder of json files and stored along side their hash. + +use anyhow::{anyhow, Context as _}; +use futures::prelude::*; +use kurisu_api::SHA256; +use std::{ + fmt, + path::{Path, PathBuf}, + str::FromStr, +}; +use tokio::fs; + +/// Reader struct to read a folder that contains json files and stored along side their sha256 +/// hash. The name of the json file is `^([0123456789]+).json$` and their hash file has a name +/// following `^([0123456789]+).ok$`. +pub struct FolderReader<'a> { + read_dir: fs::ReadDir, + suffix: &'a str, +} + +fn default_check<Id>(_: Id, json_digest: SHA256, ok_digest: SHA256) -> bool { + json_digest == ok_digest +} + +impl<'a> FolderReader<'a> { + /// Create a new reader. By default read `.json` files. + pub async fn new(root: impl AsRef<Path>) -> anyhow::Result<Self> { + Ok(Self { + read_dir: fs::read_dir(root.as_ref()).await?, + suffix: ".json", + }) + } + + /// Read files other that `.json` files. Like `.mkv` for video files. + pub fn with_file_extension(self, suffix: &'a str) -> Self { + Self { suffix, ..self } + } + + /// Get a stream that read the next entry from the folder. May fail… Here we let the user + /// implement its own validity check. + pub fn unfold_entries_with_check<Id, FnCheck>( + self, + check: FnCheck, + ) -> impl Stream<Item = Id> + use<'a, Id, FnCheck> + where + Id: FromStr<Err: std::error::Error> + fmt::Display + Copy, + FnCheck: (Fn(Id, SHA256, SHA256) -> bool) + Copy + 'a, + { + stream::unfold(self, move |mut reader| async move { + let id = (reader.next_entry::<Id, _>(check).await) + .map_err(|err| log::error!("{err}")) + .ok()??; + Some((id, reader)) + }) + } + + /// Get the next entry from the folder. May fail… + pub fn unfold_entries<Id>(self) -> impl Stream<Item = Id> + use<'a, Id> + where + Id: FromStr<Err: std::error::Error> + fmt::Display + Copy + 'a, + { + self.unfold_entries_with_check(default_check) + } + + /// Get the next entry from the folder. May fail… This is a generator (just an async iterator.) + /// Here we let the user implement its own validity check. + async fn next_entry<Id, FnCheck>(&mut self, check: FnCheck) -> anyhow::Result<Option<Id>> + where + Id: FromStr<Err: std::error::Error> + fmt::Display + Copy, + FnCheck: Fn(Id, SHA256, SHA256) -> bool, + { + while let Some(entry) = (self.read_dir.next_entry()) + .await + .context("failed to read root folder")? + { + // Parse the name of the file + let entry = match entry.file_type().await { + Ok(file_type) if file_type.is_file() => entry.file_name(), + _ => continue, // Only consider files + }; + let Some(id) = entry.to_str().and_then(|str| str.strip_suffix(self.suffix)) else { + continue; // Only consider json files + }; + let id = Id::from_str(id).map_err(|err| anyhow!("invalid json file name: {err}"))?; + + // Check for hash validity + let digest = sha256::try_async_digest(&entry) + .await + .with_context(|| format!("failed to read `{}` file", self.suffix))? + .parse::<SHA256>()?; + + let mut entry_ok = PathBuf::from(&entry); + entry_ok.set_extension("ok"); + let ok_digest = fs::read_to_string(&entry_ok) + .await + .context("failed to read `.ok` file")? + .parse::<SHA256>()?; + + match check(id, digest, ok_digest) { + false => log::error!( + "`{}` file {} hash didn't match its stored one in `.ok` file {}", + self.suffix, + PathBuf::from(&entry).display(), + entry_ok.display() + ), + + // Everything is Ok, return the pared id. + true => return Ok(Some(id)), + } + } + + // Hit end of directory. + Ok(None) + } +} diff --git a/lektor_nkdb/src/storage/mod.rs b/lektor_nkdb/src/storage/mod.rs index ad841439..a333da79 100644 --- a/lektor_nkdb/src/storage/mod.rs +++ b/lektor_nkdb/src/storage/mod.rs @@ -6,9 +6,13 @@ use crate::*; use anyhow::Result; use kurisu_api::SHA256; +mod folder_reader; + +// The disk storage. mod disk_storage; pub use disk_storage::DatabaseDiskStorage; +// Test things. #[cfg(test)] mod test_storage; #[cfg(test)] diff --git a/lektor_nkdb/src/storage/test_storage.rs b/lektor_nkdb/src/storage/test_storage.rs index b9e30c60..3d1d6e30 100644 --- a/lektor_nkdb/src/storage/test_storage.rs +++ b/lektor_nkdb/src/storage/test_storage.rs @@ -2,12 +2,7 @@ //! tests and we don't depend on the fs to do things... Note that the [Uri] returned by this //! storage are in `void://` which is not something usable! -use crate::{ - database::{epoch::*, kara::*}, - id::*, - storage::DatabaseStorage, - Playlist, -}; +use crate::{database::epoch::*, id::*, kara::*, storage::DatabaseStorage, Playlist}; use anyhow::{Context, Result}; use futures::future; use hashbrown::HashMap; diff --git a/lektor_payloads/src/lib.rs b/lektor_payloads/src/lib.rs index 56cf7eaa..46dac1d7 100644 --- a/lektor_payloads/src/lib.rs +++ b/lektor_payloads/src/lib.rs @@ -18,7 +18,7 @@ pub use crate::{ }; pub use lektor_nkdb::{ KId, Kara, KaraStatus, KaraTimeStamps, Playlist, PlaylistInfo, RemoteKId, SongOrigin, SongType, - SONGORIGIN_LENGTH, SONGTYPE_LENGTH, + Tags, SONGORIGIN_LENGTH, SONGTYPE_LENGTH, }; use anyhow::{anyhow, ensure}; -- GitLab