From 03bf78b2a3113d20bae5c45501da1cf2093bfa81 Mon Sep 17 00:00:00 2001
From: Kubat <maelle.martin@proton.me>
Date: Fri, 18 Oct 2024 18:35:07 +0200
Subject: [PATCH] SEARCH: Implement the base of matching karas

---
 Cargo.lock                        |  22 +++-
 Cargo.toml                        |   2 +
 amadeus/src/app/pages/search.rs   |   4 +-
 kurisu_api/Cargo.toml             |  25 ++--
 kurisu_api/src/v2.rs              |   9 +-
 lektor_nkdb/src/database/kara.rs  |   9 --
 lektor_nkdb/src/id.rs             |   8 +-
 lektor_nkdb/src/lib.rs            |   9 +-
 lektor_nkdb/src/search/kara_by.rs | 144 ---------------------
 lektor_nkdb/src/search/mod.rs     |  83 ------------
 lektor_payloads/src/filter.rs     |  16 +++
 lektor_payloads/src/lib.rs        |   8 +-
 lektor_payloads/src/search.rs     | 145 +++++++++++++++++++--
 lektor_search/Cargo.toml          |  16 +++
 lektor_search/src/batch.rs        | 204 ++++++++++++++++++++++++++++++
 lektor_search/src/lib.rs          |  45 +++++++
 lektor_search/src/search.rs       | 194 ++++++++++++++++++++++++++++
 lektor_search/src/traits.rs       |  37 ++++++
 lektord/Cargo.toml                |   3 +-
 19 files changed, 701 insertions(+), 282 deletions(-)
 delete mode 100644 lektor_nkdb/src/search/kara_by.rs
 delete mode 100644 lektor_nkdb/src/search/mod.rs
 create mode 100644 lektor_payloads/src/filter.rs
 create mode 100644 lektor_search/Cargo.toml
 create mode 100644 lektor_search/src/batch.rs
 create mode 100644 lektor_search/src/lib.rs
 create mode 100644 lektor_search/src/search.rs
 create mode 100644 lektor_search/src/traits.rs

diff --git a/Cargo.lock b/Cargo.lock
index 2055200c..16b7f847 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -812,9 +812,9 @@ checksum = "64fa3c856b712db6612c019f14756e64e4bcea13337a6b33b696333a9eaa2d06"
 
 [[package]]
 name = "bytemuck"
-version = "1.18.0"
+version = "1.19.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "94bbb0ad554ad961ddc5da507a12a29b14e4ae5bda06b19f575a3e6079d2e2ae"
+checksum = "8334215b81e418a0a7bdb8ef0849474f40bb10c8b71f1c4ed315cff49f32494d"
 dependencies = [
  "bytemuck_derive",
 ]
@@ -2297,6 +2297,7 @@ checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
 dependencies = [
  "ahash",
  "allocator-api2",
+ "serde",
 ]
 
 [[package]]
@@ -3018,6 +3019,7 @@ version = "8.0.1"
 dependencies = [
  "derive_more",
  "hashbrown 0.15.0",
+ "lektor_procmacros",
  "lektor_utils",
  "log",
  "serde",
@@ -3131,6 +3133,17 @@ dependencies = [
  "tokio",
 ]
 
+[[package]]
+name = "lektor_search"
+version = "8.0.1"
+dependencies = [
+ "aho-corasick",
+ "futures",
+ "hashbrown 0.14.5",
+ "lektor_payloads",
+ "log",
+]
+
 [[package]]
 name = "lektor_utils"
 version = "8.0.1"
@@ -3162,6 +3175,7 @@ dependencies = [
  "lektor_nkdb",
  "lektor_payloads",
  "lektor_repo",
+ "lektor_search",
  "lektor_utils",
  "log",
  "rand",
@@ -4176,9 +4190,9 @@ dependencies = [
 
 [[package]]
 name = "profiling"
-version = "1.0.15"
+version = "1.0.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "43d84d1d7a6ac92673717f9f6d1518374ef257669c24ebc5ac25d5033828be58"
+checksum = "afbdc74edc00b6f6a218ca6a5364d6226a259d4b8ea1af4a0ea063f27e179f4d"
 
 [[package]]
 name = "qoi"
diff --git a/Cargo.toml b/Cargo.toml
index 638ca35a..1e3e7846 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -64,10 +64,12 @@ lektor_mpris      = { path = "lektor_mpris" }
 lektor_payloads   = { path = "lektor_payloads" }
 lektor_procmacros = { path = "lektor_procmacros" }
 lektor_nkdb       = { path = "lektor_nkdb" }
+lektor_search     = { path = "lektor_search" }
 
 # Data Structures
 hashbrown     = { version = "*", features = ["serde"] }
 async-channel = { version = "*", default-features = false } 
+aho-corasick  = { version = "*" }
 
 # Serialization & Deserialization
 toml = "*"
diff --git a/amadeus/src/app/pages/search.rs b/amadeus/src/app/pages/search.rs
index b427ed70..86d1f49a 100644
--- a/amadeus/src/app/pages/search.rs
+++ b/amadeus/src/app/pages/search.rs
@@ -48,8 +48,8 @@ impl FilterAtom {
         let (icon, text) = match &self.1 {
             KaraBy::Id(id) => (icon!(HASHTAG), id.to_string()),
             KaraBy::Query(query) => (icon!(FILTER), query.clone()),
-            KaraBy::Tag((name, None)) => (icon!(TAG), name.clone()),
-            KaraBy::Tag((name, Some(value))) => (icon!(TAGS), format!("{name}:{value}")),
+            KaraBy::Tag(name, None) => (icon!(TAG), name.clone()),
+            KaraBy::Tag(name, Some(value)) => (icon!(TAGS), format!("{name}:{value}")),
             KaraBy::SongType(song_type) => (icon!(HASHTAG), song_type.to_string()),
             KaraBy::SongOrigin(song_origin) => (icon!(HASHTAG), song_origin.to_string()),
             KaraBy::Author(author) => (icon!(USER), author.clone()),
diff --git a/kurisu_api/Cargo.toml b/kurisu_api/Cargo.toml
index 5c6996e5..a962735c 100644
--- a/kurisu_api/Cargo.toml
+++ b/kurisu_api/Cargo.toml
@@ -1,23 +1,24 @@
 [package]
-name = "kurisu_api"
+name        = "kurisu_api"
 description = "Crate used to deserialize what Kurisu returns"
-rust-version.workspace = true
 
-version.workspace = true
-edition.workspace = true
-authors.workspace = true
-license.workspace = true
+rust-version.workspace = true
+version.workspace      = true
+edition.workspace      = true
+authors.workspace      = true
+license.workspace      = true
 
 [lib]
 doctest = false
 
 [dependencies]
-log.workspace = true
-serde.workspace = true
-sha256.workspace = true
-hashbrown.workspace = true
-derive_more.workspace = true
-lektor_utils = { path = "../lektor_utils" }
+log.workspace               = true
+serde.workspace             = true
+sha256.workspace            = true
+hashbrown.workspace         = true
+derive_more.workspace       = true
+lektor_utils.workspace      = true
+lektor_procmacros.workspace = true
 
 [dev-dependencies]
 serde_json.workspace = true
diff --git a/kurisu_api/src/v2.rs b/kurisu_api/src/v2.rs
index 35c9cae8..6fd2504b 100644
--- a/kurisu_api/src/v2.rs
+++ b/kurisu_api/src/v2.rs
@@ -3,6 +3,7 @@
 use crate::{error::Error, SHA256};
 use derive_more::Display;
 use hashbrown::{HashMap, HashSet};
+use lektor_procmacros::EnumVariantCount;
 use serde::{Deserialize, Serialize};
 use std::{borrow, cmp, collections::BTreeSet, str::FromStr};
 
@@ -155,7 +156,9 @@ impl Infos {
 }
 
 /// The type of a song. One the the following, one per kara.
-#[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Clone, Copy, Hash, Display)]
+#[derive(
+    Debug, PartialEq, Eq, Serialize, Deserialize, Clone, Copy, Hash, Display, EnumVariantCount,
+)]
 #[serde(rename_all = "UPPERCASE")]
 #[display("{}", self.as_str())]
 pub enum SongType {
@@ -167,7 +170,9 @@ pub enum SongType {
 }
 
 /// The origin of a song's source. One the the following, one per kara.
-#[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Clone, Copy, Hash, Display)]
+#[derive(
+    Debug, PartialEq, Eq, Serialize, Deserialize, Clone, Copy, Hash, Display, EnumVariantCount,
+)]
 #[serde(rename_all = "lowercase")]
 #[display("{}", self.as_str())]
 pub enum SongOrigin {
diff --git a/lektor_nkdb/src/database/kara.rs b/lektor_nkdb/src/database/kara.rs
index aeab4641..31747df3 100644
--- a/lektor_nkdb/src/database/kara.rs
+++ b/lektor_nkdb/src/database/kara.rs
@@ -99,15 +99,6 @@ impl Kara {
         )
     }
 
-    /// Get the source/title string to use for regex match.
-    pub(crate) fn to_title_string(&self) -> String {
-        let mut ret = String::with_capacity(self.song_title.len() + self.song_source.len() + 3);
-        ret.push_str(&self.song_source.to_lowercase());
-        ret.push_str(" / ");
-        ret.push_str(&self.song_title.to_lowercase());
-        ret
-    }
-
     pub const TAG_NUMBER: &str = "number";
     pub const TAG_VERSION: &str = "version";
 }
diff --git a/lektor_nkdb/src/id.rs b/lektor_nkdb/src/id.rs
index 0dea9e69..3fdc8a92 100644
--- a/lektor_nkdb/src/id.rs
+++ b/lektor_nkdb/src/id.rs
@@ -13,6 +13,12 @@ use std::{borrow, num, str::FromStr, sync::Arc};
 #[display("{_0}")]
 pub struct KId(pub(crate) u64);
 
+impl KId {
+    pub const fn from_u64(id: u64) -> Self {
+        Self(id)
+    }
+}
+
 impl PartialEq<KId> for u64 {
     fn eq(&self, other: &KId) -> bool {
         other.0 == *self
@@ -35,7 +41,7 @@ impl FromStr for KId {
 
 impl From<u64> for KId {
     fn from(value: u64) -> Self {
-        Self(value)
+        Self::from_u64(value)
     }
 }
 
diff --git a/lektor_nkdb/src/lib.rs b/lektor_nkdb/src/lib.rs
index f9e94341..80e25d79 100644
--- a/lektor_nkdb/src/lib.rs
+++ b/lektor_nkdb/src/lib.rs
@@ -8,15 +8,11 @@ pub use crate::{
     },
     id::{KId, RemoteKId},
     playlists::playlist::{Playlist, PlaylistInfo},
-    search::{KaraBy, SearchFrom},
     storage::{DatabaseDiskStorage, DatabaseStorage},
 };
-pub use kurisu_api::v2::{SongOrigin, SongType};
+pub use kurisu_api::v2::{SongOrigin, SongType, SONGORIGIN_LENGTH, SONGTYPE_LENGTH};
 
-use crate::{
-    database::{epoch::EpochData, pool::Pool},
-    search::*,
-};
+use crate::database::{epoch::EpochData, pool::Pool};
 use anyhow::{anyhow, Context as _, Result};
 use hashbrown::HashMap;
 use lektor_utils::pushvec::*;
@@ -25,7 +21,6 @@ use playlists::{Playlists, PlaylistsHandle};
 mod database;
 mod id;
 mod playlists;
-mod search;
 mod storage;
 mod strings;
 
diff --git a/lektor_nkdb/src/search/kara_by.rs b/lektor_nkdb/src/search/kara_by.rs
deleted file mode 100644
index ae375cbc..00000000
--- a/lektor_nkdb/src/search/kara_by.rs
+++ /dev/null
@@ -1,144 +0,0 @@
-use crate::*;
-use lektor_utils::either;
-use regex::{Regex, RegexBuilder};
-use serde::{Deserialize, Serialize};
-use std::{borrow::Cow, convert::Infallible, fmt, str::FromStr};
-
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
-pub enum KaraBy {
-    Id(u64),
-    Query(String),
-    Tag((String, Option<String>)),
-    SongType(SongType),
-    SongOrigin(SongOrigin),
-    Author(String),
-    Playlist(String),
-}
-
-/// Get the index if the character is not an alphanumeric or space one.
-fn non_alphanumspace_char((i, c): (usize, char)) -> Option<usize> {
-    (!(c.is_ascii_digit() || c.is_alphanumeric() || c.is_whitespace())).then_some(i)
-}
-
-/// Trim a string in-place.
-fn trim_in_place(value: &mut String) {
-    const SPACE: &[char] = &[' ', '\t', '\r', '\n'];
-    while value.starts_with(SPACE) {
-        value.remove(0);
-    }
-    while value.ends_with(SPACE) {
-        value.pop();
-    }
-}
-
-fn build_regex_for_cow(value: Cow<'_, str>) -> Result<Regex> {
-    let mut fuzzy = value.trim().replace(' ', r".+").to_lowercase();
-    fuzzy.insert_str(0, r".*");
-    fuzzy.push_str(r".*");
-    Ok(RegexBuilder::new(&fuzzy)
-        .nest_limit(32)
-        .swap_greed(true)
-        .case_insensitive(false)
-        .build()?)
-}
-
-impl fmt::Display for KaraBy {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        let str: Cow<str> = self.into();
-        f.write_str(&str)
-    }
-}
-
-impl<'a> From<&'a KaraBy> for Cow<'a, str> {
-    fn from(value: &'a KaraBy) -> Self {
-        match value {
-            KaraBy::Id(id) => Cow::Owned(id.to_string()),
-            KaraBy::Tag((tag, Some(value))) => Cow::Owned(format!("{tag}:{value}")),
-            KaraBy::Tag((tag, None)) => Cow::Borrowed(tag.as_str()),
-            KaraBy::SongType(ty) => Cow::Borrowed(ty.as_str()),
-            KaraBy::SongOrigin(ori) => Cow::Borrowed(ori.as_str()),
-
-            KaraBy::Author(str) | KaraBy::Playlist(str) | KaraBy::Query(str) => {
-                Cow::Borrowed(str.as_str())
-            }
-        }
-    }
-}
-
-impl From<String> for KaraBy {
-    fn from(mut value: String) -> Self {
-        trim_in_place(&mut value);
-
-        if value.starts_with('@') {
-            value.remove(0);
-            trim_in_place(&mut value);
-            Self::Author(value)
-        } else if value.starts_with('#') {
-            value.remove(0);
-            trim_in_place(&mut value);
-            Self::Playlist(value)
-        } else if let Ok(value) = value.parse::<u64>() {
-            Self::Id(value)
-        } else if let Ok(value) = value.parse::<SongType>() {
-            Self::SongType(value)
-        } else if let Ok(value) = value.parse::<SongOrigin>() {
-            Self::SongOrigin(value)
-        } else if let Some((tag, value)) = value.split_once(':') {
-            if tag.is_empty() {
-                Self::Tag((value.trim().to_string(), None))
-            } else {
-                let value = value.trim();
-                let value = either!(value.is_empty() => None; Some(value.to_string()));
-                Self::Tag((tag.trim().to_string(), value))
-            }
-        } else {
-            Self::Query(value)
-        }
-    }
-}
-
-impl FromStr for KaraBy {
-    type Err = Infallible;
-
-    fn from_str(value: &str) -> Result<Self, Self::Err> {
-        Ok(value.trim().to_string().into())
-    }
-}
-
-impl TryFrom<KaraBy> for Regex {
-    type Error = anyhow::Error;
-
-    fn try_from(value: KaraBy) -> std::result::Result<Self, Self::Error> {
-        use KaraBy::*;
-        match &value {
-            Id(_) | SongType(_) | SongOrigin(_) => {}
-            regex @ Author(_) | regex @ Playlist(_) | regex @ Tag(_) | regex @ Query(_) => {
-                if let Some(idx) = Into::<Cow<'_, _>>::into(regex)
-                    .char_indices()
-                    .find_map(non_alphanumspace_char)
-                {
-                    let regex = regex.to_string();
-                    anyhow::bail!("invalid char at index {idx} in regex: {regex}")
-                }
-            }
-        }
-
-        build_regex_for_cow(Into::<Cow<str>>::into(&value))
-    }
-}
-
-impl TryFrom<KaraBy> for SearchBy {
-    type Error = anyhow::Error;
-
-    fn try_from(value: KaraBy) -> std::result::Result<Self, Self::Error> {
-        match value {
-            KaraBy::Query(query) => build_regex_for_cow(Cow::Owned(query)).map(SearchBy::Query),
-            KaraBy::Id(id) => Ok(SearchBy::Id(id)),
-            KaraBy::Tag((tag, value)) => Ok(SearchBy::Tag((tag, value))),
-            KaraBy::SongType(ty) => Ok(SearchBy::SongType(ty)),
-            KaraBy::SongOrigin(ori) => Ok(SearchBy::SongOrigin(ori)),
-            KaraBy::Author(auth) => Ok(SearchBy::Author(auth)),
-            KaraBy::Playlist(plt) => Ok(SearchBy::Playlist(plt)),
-        }
-    }
-}
diff --git a/lektor_nkdb/src/search/mod.rs b/lektor_nkdb/src/search/mod.rs
deleted file mode 100644
index 6c7b416b..00000000
--- a/lektor_nkdb/src/search/mod.rs
+++ /dev/null
@@ -1,83 +0,0 @@
-//! Utilities to search the database, the playlists, the history, the queue, etc, in a single
-//! consistent way.
-
-mod kara_by;
-
-pub use kara_by::*;
-
-use crate::{KId, Kara};
-use anyhow::Result;
-use kurisu_api::v2::{SongOrigin, SongType};
-use regex::Regex;
-use serde::{Deserialize, Serialize};
-
-/// Structure to tell from which KId set we are searching.
-#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
-pub enum SearchFrom {
-    Queue,
-    Database,
-    History,
-    Playlist(KId),
-}
-
-/// Structure used to tell how to do the search, either by a regex, or by applying another way
-/// (author, tag, etc), or a list (intersection) of multiple things.
-#[derive(Debug, Clone)]
-pub enum SearchBy {
-    Query(Regex),
-    Id(u64),
-    Tag((String, Option<String>)),
-    SongType(SongType),
-    SongOrigin(SongOrigin),
-    Author(String),
-    Playlist(String),
-    Multiple(Vec<SearchBy>),
-}
-
-impl FromIterator<SearchBy> for SearchBy {
-    fn from_iter<T: IntoIterator<Item = SearchBy>>(iter: T) -> Self {
-        SearchBy::Multiple(iter.into_iter().collect())
-    }
-}
-
-impl SearchBy {
-    pub(crate) fn new(regex: KaraBy) -> Result<Self> {
-        regex.try_into()
-    }
-
-    /// Get the list of playlist that are needed for the kara to match. This is the only
-    /// informations that is not present in the epoch and thus need to be handled differently...
-    pub(crate) fn into_needed_playlists(self) -> Vec<String> {
-        match self {
-            SearchBy::Playlist(plt) => vec![plt],
-            SearchBy::Multiple(searches) => searches
-                .into_iter()
-                .flat_map(Self::into_needed_playlists)
-                .collect(),
-            _ => vec![],
-        }
-    }
-
-    /// A match function.
-    pub(crate) fn matches(&self, kara: &Kara) -> bool {
-        match &self {
-            SearchBy::Query(regex) => regex.is_match(&kara.to_title_string()),
-            SearchBy::Id(id) => kara.id == *id,
-            SearchBy::SongType(ty) => kara.song_type.eq(ty),
-            SearchBy::SongOrigin(ori) => kara.song_origin.eq(ori),
-            SearchBy::Author(author) => kara.kara_makers.contains(author.as_str()),
-            SearchBy::Tag((key, None)) => kara.tags.contains_key(key.as_str()),
-            SearchBy::Tag((key, Some(value))) => kara
-                .tags
-                .get(key.as_str())
-                .map(|v| v.iter().any(|v| v.as_ref().eq(value.as_str())))
-                .unwrap_or_default(),
-
-            // Recursive thing to apply multiple filters.
-            SearchBy::Multiple(filters) => filters.iter().all(|filter| filter.matches(kara)),
-
-            // Handled after...
-            SearchBy::Playlist(_) => true,
-        }
-    }
-}
diff --git a/lektor_payloads/src/filter.rs b/lektor_payloads/src/filter.rs
new file mode 100644
index 00000000..3d91d343
--- /dev/null
+++ b/lektor_payloads/src/filter.rs
@@ -0,0 +1,16 @@
+use lektor_nkdb::KId;
+use serde::{Deserialize, Serialize};
+
+/// Add to something (playlist/queue/...), or remove something. Some times we can decide to shuffle
+/// the set of kara/the playlist before adding it. For the removing the shuffle flag is ignored.
+#[derive(Debug, Serialize, Deserialize)]
+pub enum KaraFilter {
+    /// A single kara.
+    KId(KId),
+
+    /// A set of karas.
+    List(bool, Vec<KId>),
+
+    /// The content of a playlist.
+    Playlist(bool, KId),
+}
diff --git a/lektor_payloads/src/lib.rs b/lektor_payloads/src/lib.rs
index 8825f98a..56cf7eaa 100644
--- a/lektor_payloads/src/lib.rs
+++ b/lektor_payloads/src/lib.rs
@@ -1,6 +1,7 @@
 //! Crate containing structs/enums that are used as payloads to communicate with the lektord
 //! daemon. Some things are re-exports.
 
+mod filter;
 mod play_state;
 mod priority;
 mod range;
@@ -8,15 +9,16 @@ mod search;
 mod userid;
 
 pub use crate::{
+    filter::*,
+    play_state::*,
     priority::{Priority, PRIORITY_LENGTH, PRIORITY_VALUES},
-    play_state::PlayState,
     range::*,
     search::*,
     userid::LektorUser,
 };
 pub use lektor_nkdb::{
-    KId, Kara, KaraBy, KaraStatus, KaraTimeStamps, Playlist, PlaylistInfo, RemoteKId, SearchFrom,
-    SongOrigin, SongType,
+    KId, Kara, KaraStatus, KaraTimeStamps, Playlist, PlaylistInfo, RemoteKId, SongOrigin, SongType,
+    SONGORIGIN_LENGTH, SONGTYPE_LENGTH,
 };
 
 use anyhow::{anyhow, ensure};
diff --git a/lektor_payloads/src/search.rs b/lektor_payloads/src/search.rs
index 4468c995..11b4d485 100644
--- a/lektor_payloads/src/search.rs
+++ b/lektor_payloads/src/search.rs
@@ -1,5 +1,50 @@
-use crate::*;
+use anyhow::Result;
+use lektor_nkdb::{KId, SongOrigin, SongType};
 use serde::{Deserialize, Serialize};
+use std::{borrow::Cow, convert::Infallible, fmt, str::FromStr};
+
+/// Structure used to tell how to do the search, either by a regex, or by applying another way
+/// (author, tag, etc), or a list (intersection) of multiple things.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
+pub enum KaraBy {
+    /// A kara with a particular Id.
+    Id(u64),
+
+    /// Query by strings.
+    Query(String),
+
+    /// A tag that contains or not a specified string. If the second element is [Some], one of the
+    /// values of the tag must contains the string, if [None] then the tag must just be present.
+    Tag(String, Option<String>),
+
+    /// Karas with a specific [SongType]
+    SongType(SongType),
+
+    /// Karas with a specific [SongOrigin]
+    SongOrigin(SongOrigin),
+
+    /// Karas made by a specific author.
+    Author(String),
+
+    /// Karas that are contained in a specific playlist, by its name.
+    Playlist(String),
+}
+
+/// Structure to tell from which KId set we are searching.
+#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
+pub enum SearchFrom {
+    /// Search from the queue.
+    Queue,
+
+    /// Search from the whole database.
+    Database,
+
+    /// Search from the history.
+    History,
+
+    /// Search from a specific playlist.
+    Playlist(KId),
+}
 
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
 pub struct SearchData {
@@ -7,18 +52,91 @@ pub struct SearchData {
     pub regex: Vec<KaraBy>,
 }
 
-/// Add to something (playlist/queue/...), or remove something. Some times we can decide to shuffle
-/// the set of kara/the playlist before adding it. For the removing the shuffle flag is ignored.
-#[derive(Debug, Serialize, Deserialize)]
-pub enum KaraFilter {
-    /// A single kara.
-    KId(KId),
+/// Trim a string and return it.
+fn take_and_trim(mut value: String) -> String {
+    trim_in_place(&mut value);
+    value
+}
+
+/// Trim a string in-place.
+fn trim_in_place(value: &mut String) {
+    const SPACE: &[char] = &[' ', '\t', '\r', '\n'];
+    while value.starts_with(SPACE) {
+        value.remove(0);
+    }
+    while value.ends_with(SPACE) {
+        value.pop();
+    }
+}
+
+impl fmt::Display for KaraBy {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.write_str(Cow::from(self).as_ref())
+    }
+}
+
+impl<'a> From<&'a KaraBy> for Cow<'a, str> {
+    fn from(value: &'a KaraBy) -> Self {
+        match value {
+            KaraBy::Id(id) => Cow::Owned(id.to_string()),
+
+            KaraBy::Tag(tag, Some(value)) => Cow::Owned(format!("{tag}:{value}")),
+            KaraBy::Tag(tag, None) => Cow::Borrowed(tag.as_str()),
+
+            KaraBy::SongType(ty) => Cow::Borrowed(ty.as_str()),
+            KaraBy::SongOrigin(ori) => Cow::Borrowed(ori.as_str()),
+            KaraBy::Author(str) | KaraBy::Playlist(str) | KaraBy::Query(str) => {
+                Cow::Borrowed(str.as_str())
+            }
+        }
+    }
+}
+
+impl From<String> for KaraBy {
+    fn from(mut value: String) -> Self {
+        trim_in_place(&mut value);
+
+        if value.starts_with('@') {
+            value.remove(0);
+            return Self::Author(take_and_trim(value));
+        }
 
-    /// A set of karas.
-    List(bool, Vec<KId>),
+        if value.starts_with('#') {
+            value.remove(0);
+            return Self::Playlist(take_and_trim(value));
+        }
 
-    /// The content of a playlist.
-    Playlist(bool, KId),
+        if let Ok(value) = value.parse::<u64>() {
+            return Self::Id(value);
+        }
+
+        if let Ok(value) = value.parse::<SongType>() {
+            return Self::SongType(value);
+        }
+
+        if let Ok(value) = value.parse::<SongOrigin>() {
+            return Self::SongOrigin(value);
+        }
+
+        let Some(idx) = value.find(':') else {
+            return Self::Query(value);
+        };
+
+        let tag = take_and_trim(value.split_off(idx));
+        let value = take_and_trim(value);
+        match tag.is_empty() {
+            true => Self::Tag(value, None),
+            false => Self::Tag(tag, (!value.trim().is_empty()).then_some(value)),
+        }
+    }
+}
+
+impl FromStr for KaraBy {
+    type Err = Infallible;
+
+    fn from_str(value: &str) -> Result<Self, Self::Err> {
+        Ok(value.trim().to_string().into())
+    }
 }
 
 #[cfg(test)]
@@ -33,8 +151,7 @@ mod test {
     fn assert_serde<T: Serialize + for<'de> Deserialize<'de> + std::fmt::Debug + std::cmp::Eq>(
         obj: T,
     ) -> Result<()> {
-        let res = serde_json::from_str(&serde_json::to_string(&obj)?)?;
-        assert_eq!(obj, res);
+        assert_eq!(obj, serde_json::from_str(&serde_json::to_string(&obj)?)?);
         Ok(())
     }
 
@@ -45,7 +162,7 @@ mod test {
             regex: vec![KaraBy::Id(42)],
         })?;
         assert_serde(SearchData {
-            from: SearchFrom::Playlist("jibun".parse().unwrap()),
+            from: SearchFrom::Playlist("1".parse().unwrap()),
             regex: vec![KaraBy::Query("Chicka".to_string())],
         })?;
 
diff --git a/lektor_search/Cargo.toml b/lektor_search/Cargo.toml
new file mode 100644
index 00000000..153ae2af
--- /dev/null
+++ b/lektor_search/Cargo.toml
@@ -0,0 +1,16 @@
+[package]
+name        = "lektor_search"
+description = "Search traits, functions, utilities, to search the database and the queue for matching karas."
+
+rust-version.workspace = true
+version.workspace      = true
+edition.workspace      = true
+authors.workspace      = true
+license.workspace      = true
+
+[dependencies]
+lektor_payloads.workspace = true
+futures.workspace         = true
+hashbrown.workspace       = true
+aho-corasick.workspace    = true
+log.workspace             = true
diff --git a/lektor_search/src/batch.rs b/lektor_search/src/batch.rs
new file mode 100644
index 00000000..bfc7a10b
--- /dev/null
+++ b/lektor_search/src/batch.rs
@@ -0,0 +1,204 @@
+use std::mem::MaybeUninit;
+
+/// A batch of elements containing at most [Batch::SIZE] elements. It implements [Iterator], so you
+/// can pull elements out of the batch to proccess them.
+///
+/// The [Batch::Item] must be [Copy], so that the manipulation is easier, we don't need to hand the
+/// drop thing, etc.
+#[derive(Clone, Copy)]
+pub struct Batch<const SIZE: usize, Item: Copy> {
+    /// The content of the batch. Items after the `current + count` position won't be initialized.
+    content: [MaybeUninit<Item>; SIZE],
+
+    /// The base index in content.
+    current: usize,
+
+    /// The number of KIds in the batch. Note that len is always inferior to the const
+    /// SIZE parameter.
+    count: usize,
+}
+
+impl<const SIZE: usize, Item: Copy> Batch<SIZE, Item> {
+    /// Create a new batch from a suze and a content.
+    ///
+    /// # Safety
+    /// The passed size must correspond to the number of initialized values at the begin of the
+    /// passed array.
+    const unsafe fn new(count: usize, content: [MaybeUninit<Item>; SIZE]) -> Self {
+        Self {
+            current: 0,
+            content,
+            count,
+        }
+    }
+
+    /// Get the capacity of the batch.
+    pub const fn capacity() -> usize {
+        SIZE
+    }
+
+    /// Create a complete batch out of an array of items.
+    pub const fn from_array(value: [Item; SIZE]) -> Self {
+        let mut content = [MaybeUninit::<Item>::uninit(); SIZE];
+        let mut i = 0;
+        while i < SIZE {
+            content[i] = MaybeUninit::new(value[i]);
+            i += 1;
+        }
+        unsafe { Self::new(SIZE, content) }
+    }
+
+    /// Create a batch out of items. The batch won't be complete.
+    pub const fn from_array_maybe(value: [Option<Item>; SIZE]) -> Self {
+        let mut i: usize = 0;
+        let mut count: usize = 0;
+        while i < SIZE {
+            count += match value[i] {
+                Some(_) => 1,
+                None => 0,
+            };
+            i += 1;
+        }
+
+        let mut i: usize = 0;
+        let mut content = [MaybeUninit::<Item>::uninit(); SIZE];
+        while i < count {
+            content[i] = match value[i] {
+                Some(id) => MaybeUninit::new(id),
+                None => unreachable!(),
+            };
+            i += 1;
+        }
+
+        unsafe { Self::new(count, content) }
+    }
+
+    /// Move things out of the batch and returns an array of the items.
+    pub fn into_array(self) -> [Option<Item>; SIZE] {
+        let mut ret: [Option<Item>; SIZE] = [None; SIZE];
+        (self.content.into_iter())
+            .skip(self.current)
+            .take(self.count)
+            .enumerate()
+            .for_each(|(idx, id)| ret[idx] = Some(unsafe { id.assume_init() }));
+        ret
+    }
+}
+
+impl<const SIZE: usize, Item: Copy> From<[Option<Item>; SIZE]> for Batch<SIZE, Item> {
+    fn from(value: [Option<Item>; SIZE]) -> Self {
+        Self::from_array_maybe(value)
+    }
+}
+
+impl<const SIZE: usize, Item: Copy> From<[Item; SIZE]> for Batch<SIZE, Item> {
+    fn from(value: [Item; SIZE]) -> Self {
+        Self::from_array(value)
+    }
+}
+
+impl<const SIZE: usize, Item: Copy> Iterator for Batch<SIZE, Item> {
+    type Item = Item;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        debug_assert!(self.count + self.current <= SIZE, "invalid size...");
+        (self.count != 0).then(|| {
+            let ret = self.content[self.current];
+            self.current += 1;
+            self.count -= 1;
+            unsafe { ret.assume_init() }
+        })
+    }
+
+    fn nth(&mut self, n: usize) -> Option<Self::Item> {
+        debug_assert!(self.count + self.current <= SIZE, "invalid size...");
+        (self.count != 0 && n < self.count).then(|| {
+            let ret = self.content[n + self.current];
+            self.current += n + 1;
+            self.count -= n + 1;
+            unsafe { ret.assume_init() }
+        })
+    }
+
+    fn last(self) -> Option<Self::Item> {
+        debug_assert!(self.count + self.current <= SIZE, "invalid size...");
+        (self.count != 0).then(|| {
+            let ret = self.content[self.current + self.count - 1];
+            unsafe { ret.assume_init() }
+        })
+    }
+
+    fn count(self) -> usize {
+        debug_assert!(self.count + self.current <= SIZE, "invalid size...");
+        self.count
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        debug_assert!(self.count + self.current <= SIZE, "invalid size...");
+        (self.count, Some(self.count))
+    }
+}
+
+#[test]
+#[allow(clippy::iter_nth_zero)]
+fn test_batch() {
+    const BATCH_01: Batch<10, u64> = Batch::from_array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
+
+    assert_eq!(BATCH_01.last(), Some(9));
+    assert_eq!(BATCH_01.clone().nth(9), Some(9));
+    assert_eq!(BATCH_01.clone().nth(10), None);
+
+    let mut batch = BATCH_01;
+    assert_eq!(batch.size_hint().0, 10);
+    assert_eq!(batch.size_hint().1, Some(10));
+    assert_eq!(batch.count(), 10);
+    assert_eq!(batch.nth(9), Some(9));
+    assert_eq!(batch.count(), 0);
+    assert_eq!(batch.nth(0), None);
+    assert_eq!(batch.last(), None);
+
+    let mut batch = BATCH_01;
+    assert_eq!(batch.nth(0), Some(0));
+    assert_eq!(batch.nth(0), Some(1));
+    assert_eq!(batch.nth(0), Some(2));
+    assert_eq!(batch.nth(0), Some(3));
+    assert_eq!(batch.count(), 6);
+    assert_eq!(batch.last(), Some(9));
+
+    let mut batch = BATCH_01.enumerate();
+    for (i, id) in batch.by_ref() {
+        assert_eq!(i as u64, id);
+        assert!(i < 10);
+    }
+    assert_eq!(batch.next(), None);
+
+    const BATCH_02: Batch<10, u64> = Batch::from_array_maybe([
+        Some(0),
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ]);
+
+    assert_eq!(BATCH_02.count(), 1);
+    assert_eq!(
+        BATCH_02.into_array(),
+        [
+            Some(0),
+            None,
+            None,
+            None,
+            None,
+            None,
+            None,
+            None,
+            None,
+            None,
+        ]
+    )
+}
diff --git a/lektor_search/src/lib.rs b/lektor_search/src/lib.rs
new file mode 100644
index 00000000..5c7878a4
--- /dev/null
+++ b/lektor_search/src/lib.rs
@@ -0,0 +1,45 @@
+mod batch;
+mod search;
+mod traits;
+
+use futures::{prelude::*, stream::FuturesUnordered};
+use lektor_payloads::{Kara, KaraBy};
+
+pub use crate::{batch::*, traits::*};
+
+/// Search a subset of a database that implements [KaraStore]. The subset must implement
+/// [KaraIdExtractor]. The search is performed by doing the intersection of all the [KaraBy].
+pub async fn search<const BATCH_SIZE: usize>(
+    store: &impl KaraStore,
+    extractor: impl KaraIdExtractor,
+    search: Vec<KaraBy>,
+) -> Vec<&Kara> {
+    let Some(search) = search::Search::new(search) else {
+        return Default::default();
+    };
+
+    stream::unfold(extractor, |state| async move {
+        if state.is_empty().await {
+            return None;
+        }
+        Some((state.next_id_batch::<BATCH_SIZE>().await, state)) // Get the chuncks of IDs
+    })
+    //
+    // Get the karas out of the ids
+    //
+    .then(|ids| store.get_kara_batch(ids))
+    .map(Batch::into_array)
+    //
+    // Filter karas only if they are matched by the search
+    //
+    .map(|karas| karas.map(|maybe| maybe.and_then(|kara| search.matches_and_map(kara))))
+    //
+    // Await the thing, build the return vector.
+    //
+    .collect::<FuturesUnordered<_>>()
+    .await
+    .into_iter()
+    .flatten()
+    .flatten()
+    .collect()
+}
diff --git a/lektor_search/src/search.rs b/lektor_search/src/search.rs
new file mode 100644
index 00000000..9f8a8d0d
--- /dev/null
+++ b/lektor_search/src/search.rs
@@ -0,0 +1,194 @@
+use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
+use hashbrown::{HashMap, HashSet};
+use lektor_payloads::{
+    KId, Kara, KaraBy, SongOrigin, SongType, SONGORIGIN_LENGTH, SONGTYPE_LENGTH,
+};
+
+/// To see if a kara matches.
+#[derive(Default)]
+pub(crate) struct Search {
+    only_id: Option<KId>,
+    tag_has_value: HashMap<String, Vec<String>>,
+    made_by_authors: Vec<String>,
+    present_in_playlists: Vec<String>,
+    queries: Option<AhoCorasick>,
+    types: [Option<SongType>; SONGTYPE_LENGTH],
+    origins: [Option<SongOrigin>; SONGORIGIN_LENGTH],
+}
+
+#[derive(Default)]
+struct SearchBuilder {
+    only_id: Option<KId>,
+    has_tag: Vec<String>,
+    tag_has_value: Vec<(String, String)>,
+    query: Vec<String>,
+    present_in_playlists: Vec<String>,
+    made_by: Vec<String>,
+
+    union_types: Vec<SongType>,
+    union_origin: Vec<SongOrigin>,
+}
+
+impl Search {
+    /// Create the search, precompute filters.
+    pub fn new(content: Vec<KaraBy>) -> Option<Self> {
+        SearchBuilder::new(content).build()
+    }
+
+    /// See if we matches a kara or not.
+    fn matches(&self, kara: &Kara) -> bool {
+        macro_rules! ensure {
+            ($expr:expr) => {{
+                if !($expr) {
+                    return false;
+                }
+            }};
+        }
+
+        if let Some(id) = self.only_id {
+            ensure!(kara.id == id);
+        }
+
+        if !self.types.is_empty() {
+            ensure!((self.types.iter().flatten()).any(|ty| kara.song_type == *ty));
+        }
+
+        if !self.origins.is_empty() {
+            ensure!((self.origins.iter().flatten()).any(|origin| kara.song_origin == *origin));
+        }
+
+        if !self.made_by_authors.is_empty() {
+            let lowercase: HashSet<String> = (kara.kara_makers.iter())
+                .map(|author| author.to_lowercase())
+                .collect();
+            ensure!((self.made_by_authors.iter()).any(|author| lowercase.contains(author)));
+        }
+
+        if !self.present_in_playlists.is_empty() {
+            log::error!("implement the present in playlist check");
+            return false;
+        }
+
+        if let Some(queries) = self.queries.as_ref() {
+            ensure!(queries.is_match(&kara.song_title) || queries.is_match(&kara.song_source));
+        }
+
+        if !self.tag_has_value.is_empty() {
+            log::error!("implement the tag search thing");
+            return false;
+        }
+
+        true
+    }
+
+    /// If we match a kara (see [Self::matches]), then we return said kara.
+    pub fn matches_and_map<'a>(&self, kara: &'a Kara) -> Option<&'a Kara> {
+        self.matches(kara).then_some(kara)
+    }
+
+    fn with_queries(self, queries: Vec<String>) -> Option<Self> {
+        let queries = AhoCorasickBuilder::new()
+            .ascii_case_insensitive(true)
+            .prefilter(true)
+            .build(queries)
+            .map_err(|err| log::error!("{err}"))
+            .inspect(|aho| {
+                log::info!(
+                    "memory usage for the query automaton: {} bytes",
+                    aho.memory_usage()
+                )
+            })
+            .ok()?;
+        Some(Self {
+            queries: Some(queries),
+            ..self
+        })
+    }
+
+    fn with_tags(mut self, tags: Vec<String>) -> Self {
+        tags.into_iter()
+            .for_each(|tag| _ = self.tag_has_value.entry(tag).or_default());
+        self
+    }
+
+    fn with_tag_with_values(mut self, tags_n_values: Vec<(String, String)>) -> Self {
+        tags_n_values.into_iter().for_each(|(tag, value)| {
+            let values = self.tag_has_value.entry(tag).or_default();
+            if !values.contains(&value) {
+                values.push(value);
+            }
+        });
+        self
+    }
+
+    fn with_types(self, tys: Vec<SongType>) -> Self {
+        tys.into_iter().fold(self, |this, ty| this.with_type(ty))
+    }
+
+    fn with_origins(self, origines: Vec<SongOrigin>) -> Self {
+        origines
+            .into_iter()
+            .fold(self, |this, origin| this.with_origin(origin))
+    }
+
+    fn with_type(mut self, ty: SongType) -> Self {
+        self.types.iter_mut().find_map(|stored| match stored {
+            Some(stored) if *stored != ty => None, // Not this one, we try the next item.
+            Some(_) => Some(()),                   // Already present, we return.
+            None => {
+                // Not found, we insert it.
+                *stored = Some(ty);
+                Some(())
+            }
+        });
+        self
+    }
+
+    fn with_origin(mut self, origin: SongOrigin) -> Self {
+        self.origins.iter_mut().find_map(|stored| match stored {
+            Some(stored) if *stored != origin => None, // Not this one, we try the next item.
+            Some(_) => Some(()),                       // Already present, we return.
+            None => {
+                // Not found, we insert it.
+                *stored = Some(origin);
+                Some(())
+            }
+        });
+        self
+    }
+}
+
+impl SearchBuilder {
+    fn new(content: Vec<KaraBy>) -> Self {
+        (content.into_iter()).fold(SearchBuilder::default(), |mut ret, kara_by| {
+            match kara_by {
+                KaraBy::Id(id) => ret.only_id = ret.only_id.or(Some(id.into())),
+                KaraBy::Playlist(name) => ret.present_in_playlists.push(name),
+                KaraBy::Query(query) => ret.query.push(query),
+                KaraBy::Tag(tag, None) => ret.has_tag.push(tag),
+                KaraBy::Tag(tag, Some(value)) => ret.tag_has_value.push((tag, value)),
+                KaraBy::Author(author) => ret.made_by.push(author),
+                KaraBy::SongType(tiipe) => ret.union_types.push(tiipe),
+                KaraBy::SongOrigin(origin) => ret.union_origin.push(origin),
+            }
+            ret
+        })
+    }
+
+    fn build(mut self) -> Option<Search> {
+        log::error!("make_titlecase instead of make_ascii_lowercase");
+        (self.made_by.iter_mut()).for_each(|str| str.make_ascii_lowercase());
+        (self.present_in_playlists.iter_mut()).for_each(|str| str.make_ascii_lowercase());
+        Search {
+            only_id: self.only_id,
+            made_by_authors: self.made_by,
+            present_in_playlists: self.present_in_playlists,
+            ..Default::default()
+        }
+        .with_origins(self.union_origin)
+        .with_types(self.union_types)
+        .with_tags(self.has_tag)
+        .with_tag_with_values(self.tag_has_value)
+        .with_queries(self.query)
+    }
+}
diff --git a/lektor_search/src/traits.rs b/lektor_search/src/traits.rs
new file mode 100644
index 00000000..b1fa5a02
--- /dev/null
+++ b/lektor_search/src/traits.rs
@@ -0,0 +1,37 @@
+use crate::batch::Batch;
+use lektor_payloads::{KId, Kara};
+
+#[allow(async_fn_in_trait)]
+pub trait KaraIdExtractor {
+    /// Get the next kara id.
+    async fn next_id(&self) -> Option<KId>;
+
+    /// Get a next batch of kara id, to reduce any lock usage.
+    async fn next_id_batch<const SIZE: usize>(&self) -> Batch<SIZE, KId>;
+
+    /// Get the number of karas to process until the extractor is empty.
+    async fn count(&self) -> usize;
+
+    /// Tells if the extractor is empty or not.
+    async fn is_empty(&self) -> bool {
+        self.count().await == 0
+    }
+}
+
+#[allow(async_fn_in_trait)]
+pub trait KaraStore {
+    /// Get a kara by its [KId].
+    async fn get_kara(&self, id: KId) -> Option<&Kara>;
+
+    /// Get a list of kara, by their [KId], we use [KIdBatch] to know the size here...
+    async fn get_kara_batch<const SIZE: usize>(
+        &self,
+        batch: Batch<SIZE, KId>,
+    ) -> Batch<SIZE, &Kara> {
+        let mut ret = [None; SIZE];
+        for (idx, id) in batch.into_array().into_iter().flatten().enumerate() {
+            ret[idx] = self.get_kara(id).await;
+        }
+        Batch::<SIZE, &Kara>::from_array_maybe(ret)
+    }
+}
diff --git a/lektord/Cargo.toml b/lektord/Cargo.toml
index 9b51010e..84f35257 100644
--- a/lektord/Cargo.toml
+++ b/lektord/Cargo.toml
@@ -31,7 +31,8 @@ lektor_nkdb = { path = "../lektor_nkdb" }
 lektor_repo = { path = "../lektor_repo" }
 lektor_utils = { path = "../lektor_utils" }
 lektor_mpris = { path = "../lektor_mpris" }
-lektor_payloads = { path = "../lektor_payloads" }
+lektor_payloads.workspace = true
+lektor_search.workspace   = true
 
 [build-dependencies]
 anyhow.workspace = true
-- 
GitLab