Skip to content

Commit

Permalink
Refactor index management and transaction structures for improved typ…
Browse files Browse the repository at this point in the history
…e handling
  • Loading branch information
sachaarbonel committed Jan 10, 2025
1 parent 221a67c commit 556794d
Show file tree
Hide file tree
Showing 8 changed files with 103 additions and 115 deletions.
4 changes: 2 additions & 2 deletions src/deadlock.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ impl DeadlockDetector {

pub fn detect_deadlock<S, FTS>(&self, transactions: &[&Transaction<S, FTS>]) -> Option<u64>
where
S: Storage + IndexManager<()> + Clone,
S: Storage + IndexManager + Clone,
FTS: Search + Clone,
FTS::NewArgs: Clone,
{
Expand Down Expand Up @@ -99,7 +99,7 @@ impl DeadlockDetector {

fn select_victim<S, FTS>(&self, cycle: &[u64], transactions: &[&Transaction<S, FTS>]) -> u64
where
S: Storage + IndexManager<()> + Clone,
S: Storage + IndexManager + Clone,
FTS: Search + Clone,
FTS::NewArgs: Clone,
{
Expand Down
34 changes: 21 additions & 13 deletions src/indexes/disk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@ use std::collections::HashMap;
use super::index_manager::{IndexType, IndexManager};

#[derive(Debug, Serialize, Deserialize)]
pub struct OnDiskIndexManager<T> {
pub struct OnDiskIndexManager {
file_path: String,
indexes: HashMap<String, HashMap<String, IndexType<T>>>,
indexes: HashMap<String, HashMap<String, IndexType>>,
}

impl<T: Clone> Clone for OnDiskIndexManager<T> {
impl Clone for OnDiskIndexManager {
fn clone(&self) -> Self {
OnDiskIndexManager {
file_path: self.file_path.clone(),
Expand All @@ -20,7 +20,7 @@ impl<T: Clone> Clone for OnDiskIndexManager<T> {
}
}

impl<T: Clone> OnDiskIndexManager<T> {
impl OnDiskIndexManager {
pub fn new(file_path: String) -> Self {
let path = Path::new(&file_path);
let mut indexes = HashMap::new();
Expand Down Expand Up @@ -58,8 +58,8 @@ impl<T: Clone> OnDiskIndexManager<T> {
}
}

impl<T: Clone> super::index_manager::IndexManager<T> for OnDiskIndexManager<T> {
fn create_index(&mut self, table: &str, column: &str, index_type: IndexType<T>) {
impl IndexManager for OnDiskIndexManager {
fn create_index(&mut self, table: &str, column: &str, index_type: IndexType) {
self.indexes
.entry(table.to_string())
.or_insert_with(HashMap::new)
Expand All @@ -74,18 +74,26 @@ impl<T: Clone> super::index_manager::IndexManager<T> for OnDiskIndexManager<T> {
}
}

fn get_index(&self, table: &str, column: &str) -> Option<&IndexType<T>> {
fn get_index(&self, table: &str, column: &str) -> Option<&IndexType> {
self.indexes
.get(table)
.and_then(|table_indexes| table_indexes.get(column))
}

fn update_index(&mut self, table: &str, column: &str, old_value: Vec<u8>, new_value: Vec<u8>, row_id: usize) {
if let Some(table_indexes) = self.indexes.get_mut(table) {
if let Some(IndexType::BTree(index)) = table_indexes.get_mut(column) {
index.remove_entry(old_value, row_id);
index.add_entry(new_value, row_id);
self.save().unwrap();
if let Some(index) = table_indexes.get_mut(column) {
match index {
IndexType::BTree(btree) => {
btree.remove_entry(old_value, row_id);
btree.add_entry(new_value, row_id);
self.save().unwrap();
}
IndexType::GIN(gin) => {
// GIN indexes don't support direct value updates
// They are updated through add_document/remove_document
}
}
}
}
}
Expand All @@ -104,7 +112,7 @@ mod tests {

// Create and populate index manager
{
let mut manager: OnDiskIndexManager<()> = OnDiskIndexManager::new(file_path.clone());
let mut manager = OnDiskIndexManager::new(file_path.clone());
let mut btree = BTreeIndex::new();
btree.add_entry(vec![1, 2, 3], 1);
btree.add_entry(vec![4, 5, 6], 2);
Expand All @@ -114,7 +122,7 @@ mod tests {

// Create new manager instance and verify persistence
{
let manager: OnDiskIndexManager<()> = OnDiskIndexManager::new(file_path);
let manager = OnDiskIndexManager::new(file_path);
if let Some(IndexType::BTree(index)) = manager.get_index("users", "age") {
assert!(index.search(vec![1, 2, 3]).unwrap().contains(&1));
assert!(index.search(vec![4, 5, 6]).unwrap().contains(&2));
Expand Down
150 changes: 65 additions & 85 deletions src/indexes/index_manager.rs
Original file line number Diff line number Diff line change
@@ -1,113 +1,60 @@
use super::btree::BTreeIndex;
use crate::fts::search::Search;
use std::collections::HashMap;
use std::collections::HashSet;
use std::marker::PhantomData;
use std::fmt::Debug;
use std::collections::HashSet;
use serde::{Serialize, Deserialize};
use crate::fts::default::DefaultSearchIdx;
use crate::fts::tokenizers::tokenizer::Tokenizer;
use crate::fts::tokenizers::default::DefaultTokenizer;
use crate::indexes::gin::GinIndex;
use crate::indexes::btree::BTreeIndex;
use crate::fts::search::Search;

pub trait SearchIndex: Debug {
type NewArgs: Clone;
fn search(&self, table: &str, column: &str, query: &str) -> HashSet<usize>;
fn add_column(&mut self, table: &str, column: &str);
fn add_document(&mut self, table: &str, column: &str, row_id: usize, text: &str);
fn remove_document(&mut self, table: &str, column: &str, row_id: usize);
fn update_document(&mut self, table: &str, column: &str, row_id: usize, text: &str);
}

impl<T: Search + Debug> SearchIndex for T
where
T::NewArgs: Clone
{
type NewArgs = T::NewArgs;
fn search(&self, table: &str, column: &str, query: &str) -> HashSet<usize> {
T::search(self, table, column, query)
}
fn add_column(&mut self, table: &str, column: &str) {
T::add_column(self, table, column)
}
fn add_document(&mut self, table: &str, column: &str, row_id: usize, text: &str) {
T::add_document(self, table, column, row_id, text)
}
fn remove_document(&mut self, table: &str, column: &str, row_id: usize) {
T::remove_document(self, table, column, row_id)
}
fn update_document(&mut self, table: &str, column: &str, row_id: usize, text: &str) {
T::update_document(self, table, column, row_id, text)
}
}

pub trait IndexManager<T> {
fn create_index(&mut self, table: &str, column: &str, index_type: IndexType<T>);
fn drop_index(&mut self, table: &str, column: &str);
fn get_index(&self, table: &str, column: &str) -> Option<&IndexType<T>>;
fn update_index(&mut self, table: &str, column: &str, old_value: Vec<u8>, new_value: Vec<u8>, row_id: usize);
}

#[derive(Debug)]
pub enum IndexType<T> {
#[derive(Debug, Serialize, Deserialize)]
pub enum IndexType {
BTree(BTreeIndex),
GIN(Box<dyn SearchIndex<NewArgs = T>>),
}

impl<T> Serialize for IndexType<T> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
match self {
IndexType::BTree(btree) => btree.serialize(serializer),
IndexType::GIN(_) => unimplemented!("GIN index serialization not supported"),
}
}
}

impl<'de, T> Deserialize<'de> for IndexType<T> {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
let btree = BTreeIndex::deserialize(deserializer)?;
Ok(IndexType::BTree(btree))
}
GIN(GinIndex<DefaultTokenizer>),
}

impl<T> Clone for IndexType<T> {
impl Clone for IndexType {
fn clone(&self) -> Self {
match self {
IndexType::BTree(btree) => IndexType::BTree(btree.clone()),
IndexType::GIN(_) => unimplemented!("GIN index cloning not supported"),
IndexType::GIN(gin) => IndexType::GIN(gin.clone()),
}
}
}

pub trait IndexManager {
fn create_index(&mut self, table: &str, column: &str, index_type: IndexType);
fn drop_index(&mut self, table: &str, column: &str);
fn get_index(&self, table: &str, column: &str) -> Option<&IndexType>;
fn update_index(&mut self, table: &str, column: &str, old_value: Vec<u8>, new_value: Vec<u8>, row_id: usize);
}

#[derive(Debug)]
pub struct DefaultIndexManager<T> {
indexes: HashMap<String, HashMap<String, IndexType<T>>>,
_phantom: PhantomData<T>,
pub struct DefaultIndexManager {
indexes: HashMap<String, HashMap<String, IndexType>>,
}

impl<T> Clone for DefaultIndexManager<T> {
impl Clone for DefaultIndexManager {
fn clone(&self) -> Self {
DefaultIndexManager {
indexes: self.indexes.clone(),
_phantom: PhantomData,
}
}
}

impl<T> DefaultIndexManager<T> {
pub fn new() -> DefaultIndexManager<T> {
impl DefaultIndexManager {
pub fn new() -> DefaultIndexManager {
DefaultIndexManager {
indexes: HashMap::new(),
_phantom: PhantomData,
}
}
}

impl<T> IndexManager<T> for DefaultIndexManager<T> {
fn create_index(&mut self, table: &str, column: &str, index_type: IndexType<T>) {
impl IndexManager for DefaultIndexManager {
fn create_index(&mut self, table: &str, column: &str, index_type: IndexType) {
self.indexes
.entry(table.to_string())
.or_insert_with(HashMap::new)
Expand All @@ -120,17 +67,25 @@ impl<T> IndexManager<T> for DefaultIndexManager<T> {
}
}

fn get_index(&self, table: &str, column: &str) -> Option<&IndexType<T>> {
fn get_index(&self, table: &str, column: &str) -> Option<&IndexType> {
self.indexes
.get(table)
.and_then(|table_indexes| table_indexes.get(column))
}

fn update_index(&mut self, table: &str, column: &str, old_value: Vec<u8>, new_value: Vec<u8>, row_id: usize) {
if let Some(table_indexes) = self.indexes.get_mut(table) {
if let Some(IndexType::BTree(index)) = table_indexes.get_mut(column) {
index.remove_entry(old_value, row_id);
index.add_entry(new_value, row_id);
if let Some(index) = table_indexes.get_mut(column) {
match index {
IndexType::BTree(btree) => {
btree.remove_entry(old_value, row_id);
btree.add_entry(new_value, row_id);
}
IndexType::GIN(gin) => {
// GIN indexes don't support direct value updates
// They are updated through add_document/remove_document
}
}
}
}
}
Expand All @@ -139,11 +94,10 @@ impl<T> IndexManager<T> for DefaultIndexManager<T> {
#[cfg(test)]
mod tests {
use super::*;
use crate::fts::default::DefaultSearchIdx;

#[test]
fn test_btree_index() {
let mut manager: DefaultIndexManager<()> = DefaultIndexManager::new();
let mut manager = DefaultIndexManager::new();
let mut btree = BTreeIndex::new();

// Add some test data
Expand All @@ -161,7 +115,7 @@ mod tests {

#[test]
fn test_index_crud() {
let mut manager: DefaultIndexManager<()> = DefaultIndexManager::new();
let mut manager = DefaultIndexManager::new();
let btree = BTreeIndex::new();

// Create
Expand All @@ -175,4 +129,30 @@ mod tests {
manager.drop_index("users", "age");
assert!(manager.get_index("users", "age").is_none());
}

#[test]
fn test_gin_index_serialization() {
let mut gin = GinIndex::<DefaultTokenizer>::new();
gin.add_column("users", "bio");
gin.add_document("users", "bio", 1, "Hello world");
gin.add_document("users", "bio", 2, "Goodbye world");

let index_type = IndexType::GIN(gin);

// Serialize
let serialized = bincode::serialize(&index_type).unwrap();

// Deserialize
let deserialized: IndexType = bincode::deserialize(&serialized).unwrap();

// Verify
if let IndexType::GIN(gin) = deserialized {
let results = gin.search("users", "bio", "world");
assert_eq!(results.len(), 2);
assert!(results.contains(&1));
assert!(results.contains(&2));
} else {
panic!("Expected GIN index");
}
}
}
4 changes: 2 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ impl OnDiskReefDB {
}

#[derive(Clone)]
pub struct ReefDB<S: Storage + IndexManager<()> + Clone + Any, FTS: Search + Clone>
pub struct ReefDB<S: Storage + IndexManager + Clone + Any, FTS: Search + Clone>
where
FTS::NewArgs: Clone + Default,
{
Expand All @@ -98,7 +98,7 @@ where
pub(crate) current_transaction_id: Option<u64>,
}

impl<S: Storage + IndexManager<()> + Clone + Any, FTS: Search + Clone> ReefDB<S, FTS>
impl<S: Storage + IndexManager + Clone + Any, FTS: Search + Clone> ReefDB<S, FTS>
where
FTS::NewArgs: Clone + Default,
{
Expand Down
8 changes: 4 additions & 4 deletions src/storage/disk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use crate::indexes::{IndexManager, IndexType, disk::OnDiskIndexManager};
pub struct OnDiskStorage {
file_path: String,
tables: HashMap<String, (Vec<ColumnDef>, Vec<Vec<DataValue>>)>,
index_manager: OnDiskIndexManager<()>,
index_manager: OnDiskIndexManager,
}

impl OnDiskStorage {
Expand Down Expand Up @@ -301,16 +301,16 @@ impl Storage for OnDiskStorage {
}
}

impl IndexManager<()> for OnDiskStorage {
fn create_index(&mut self, table: &str, column: &str, index_type: IndexType<()>) {
impl IndexManager for OnDiskStorage {
fn create_index(&mut self, table: &str, column: &str, index_type: IndexType) {
self.index_manager.create_index(table, column, index_type);
}

fn drop_index(&mut self, table: &str, column: &str) {
self.index_manager.drop_index(table, column);
}

fn get_index(&self, table: &str, column: &str) -> Option<&IndexType<()>> {
fn get_index(&self, table: &str, column: &str) -> Option<&IndexType> {
self.index_manager.get_index(table, column)
}

Expand Down
Loading

0 comments on commit 556794d

Please sign in to comment.