Skip to content

Commit

Permalink
Enhance database functionality with transaction management and table …
Browse files Browse the repository at this point in the history
…storage

- Added transaction management capabilities, including support for isolation levels and locking mechanisms.
- Introduced a new TableStorage struct to manage tables and their data more effectively.
- Implemented Write Ahead Logging (WAL) for improved data integrity and recovery.
- Refactored SQL execution to support transactions, including commit and rollback functionalities.
- Updated the .gitignore to exclude test database files and WAL logs.
  • Loading branch information
sachaarbonel committed Jan 7, 2025
1 parent 58025e9 commit 6f3222f
Show file tree
Hide file tree
Showing 14 changed files with 846 additions and 248 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
/target
/Cargo.lock
test_db
reef.wal
2 changes: 1 addition & 1 deletion src/indexes/fts/disk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use super::memory::InvertedIndex;
use super::search::Search;
use super::tokenizers::tokenizer::Tokenizer;

#[derive(Debug, Serialize, Deserialize)]
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct OnDiskInvertedIndex<T: Tokenizer> {
index: InvertedIndex<T>,
file_path: String,
Expand Down
34 changes: 28 additions & 6 deletions src/indexes/fts/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ impl<T: Tokenizer> InvertedIndex<T> {

for word in self.tokenizer.tokenize(text) {
let word_entry = column_entry
.entry(word.to_string())
.entry(word.to_lowercase())
.or_insert(HashSet::new());
word_entry.insert(row_id);
}
Expand All @@ -62,22 +62,44 @@ impl<T: Tokenizer> InvertedIndex<T> {
}

pub fn search(&self, table: &str, column: &str, query: &str) -> HashSet<usize> {
let mut results = HashSet::new();
if let Some(table_entry) = self.index.get(table) {
if let Some(column_entry) = table_entry.get(column) {
for word in self.tokenizer.tokenize(query) {
if let Some(word_entry) = column_entry.get(word) {
results.extend(word_entry);
let query_tokens: Vec<String> = self.tokenizer.tokenize(query)
.map(|s| s.to_lowercase())
.collect();

if query_tokens.is_empty() {
return HashSet::new();
}

// Get results for first token
let mut results = match column_entry.get(&query_tokens[0]) {
Some(word_entry) => word_entry.clone(),
None => return HashSet::new(),
};

// Intersect with results for remaining tokens
for token in query_tokens.iter().skip(1) {
if let Some(word_entry) = column_entry.get(token) {
results.retain(|id| word_entry.contains(id));
} else {
return HashSet::new();
}
}

results
} else {
HashSet::new()
}
} else {
HashSet::new()
}
results
}
}

impl<T: Tokenizer + Serialize + for<'de> Deserialize<'de>> Search for InvertedIndex<T> {
type NewArgs = ();

fn new(_: Self::NewArgs) -> Self {
InvertedIndex::new()
}
Expand Down
2 changes: 1 addition & 1 deletion src/indexes/fts/search.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::collections::HashSet;

pub trait Search {
type NewArgs;
type NewArgs: Default;
fn new(args: Self::NewArgs) -> Self;
fn search(&self, table: &str, column: &str, query: &str) -> HashSet<usize>;

Expand Down
14 changes: 12 additions & 2 deletions src/indexes/fts/tokenizers/default.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@ pub struct DefaultTokenizer;

impl Tokenizer for DefaultTokenizer {
fn tokenize<'a>(&self, text: &'a str) -> Box<dyn Iterator<Item = &'a str> + 'a> {
Box::new(text.split_whitespace())
Box::new(text
.split(|c: char| !c.is_alphanumeric())
.filter(|s| !s.is_empty())
.map(|s| s.trim())
.filter(|s| !s.is_empty()))
}

fn new() -> Self {
Expand All @@ -21,7 +25,13 @@ mod tests {
fn tokenizer_test() {
use super::Tokenizer;
let tokenizer = super::DefaultTokenizer::new();
let tokens: Vec<&str> = tokenizer.tokenize("Hello World").collect();
let tokens: Vec<&str> = tokenizer.tokenize("Hello, World!").collect();
assert_eq!(tokens, vec!["Hello", "World"]);

let tokens: Vec<&str> = tokenizer.tokenize("Computer Science").collect();
assert_eq!(tokens, vec!["Computer", "Science"]);

let tokens: Vec<&str> = tokenizer.tokenize("Artificial Intelligence").collect();
assert_eq!(tokens, vec!["Artificial", "Intelligence"]);
}
}
Loading

0 comments on commit 6f3222f

Please sign in to comment.