Skip to content

Commit

Permalink
forknet: Drop unneeded columns in the fork-network command (#12921)
Browse files Browse the repository at this point in the history
`fork-network finalize` deletes data in every column except the ones we
need to start the network, but it does it with a delete DB transaction.
This only logically deletes the values, but does not free up the disk
space, which will only happen after a compaction. So here we remove the
columns by calling `drop_cf()` on the unnecessary columns, which
actually removes the sst files and frees up the space. This will reduce
the size of images used for forknet by quite a bit.
  • Loading branch information
marcelo-gonzalez authored Feb 14, 2025
1 parent 557bff1 commit 489303d
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 19 deletions.
10 changes: 10 additions & 0 deletions core/store/src/db/rocksdb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -684,6 +684,16 @@ impl RocksDB {
}
}
}

/// Deletes all data in `cols`. This should also remove all sst files
pub fn clear_cols(&mut self, cols: &[DBCol]) -> anyhow::Result<()> {
for col in cols {
self.db
.drop_cf(col_name(*col))
.with_context(|| format!("failed to drop column family {:?}", col,))?;
}
Ok(())
}
}

impl Drop for RocksDB {
Expand Down
3 changes: 2 additions & 1 deletion core/store/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ pub mod trie;

pub use crate::config::{Mode, StoreConfig};
pub use crate::opener::{
checkpoint_hot_storage_and_cleanup_columns, StoreMigrator, StoreOpener, StoreOpenerError,
checkpoint_hot_storage_and_cleanup_columns, clear_columns, StoreMigrator, StoreOpener,
StoreOpenerError,
};

/// Specifies temperature of a storage.
Expand Down
52 changes: 44 additions & 8 deletions core/store/src/opener.rs
Original file line number Diff line number Diff line change
Expand Up @@ -252,14 +252,10 @@ impl<'a> StoreOpener<'a> {
self.open_in_mode(Mode::ReadWrite)
}

/// Opens the RocksDB database(s) for hot and cold (if configured) storages.
///
/// When opening in read-only mode, verifies that the database version is
/// what the node expects and fails if it isn’t. If database doesn’t exist,
/// creates a new one unless mode is [`Mode::ReadWriteExisting`]. On the
/// other hand, if mode is [`Mode::Create`], fails if the database already
/// exists.
pub fn open_in_mode(&self, mode: Mode) -> Result<crate::NodeStorage, StoreOpenerError> {
fn open_dbs(
&self,
mode: Mode,
) -> Result<(RocksDB, Snapshot, Option<RocksDB>, Snapshot), StoreOpenerError> {
{
let hot_path = self.hot.path.display().to_string();
let cold_path = match &self.cold {
Expand Down Expand Up @@ -290,7 +286,18 @@ impl<'a> StoreOpener<'a> {
.map(|cold| cold.open(mode, DB_VERSION))
.transpose()?
.map(|(db, _)| db);
Ok((hot_db, hot_snapshot, cold_db, cold_snapshot))
}

/// Opens the RocksDB database(s) for hot and cold (if configured) storages.
///
/// When opening in read-only mode, verifies that the database version is
/// what the node expects and fails if it isn’t. If database doesn’t exist,
/// creates a new one unless mode is [`Mode::ReadWriteExisting`]. On the
/// other hand, if mode is [`Mode::Create`], fails if the database already
/// exists.
pub fn open_in_mode(&self, mode: Mode) -> Result<crate::NodeStorage, StoreOpenerError> {
let (hot_db, hot_snapshot, cold_db, cold_snapshot) = self.open_dbs(mode)?;
let storage = NodeStorage::from_rocksdb(hot_db, cold_db);

hot_snapshot.remove()?;
Expand Down Expand Up @@ -636,6 +643,35 @@ pub fn checkpoint_hot_storage_and_cleanup_columns(
Ok(node_storage)
}

/// Deletes all data in the columns in `cols` from the rocksdb data
/// dir in `home_dir`. This actually removes all the sst files rather than
/// just logically deleting all keys with a transaction, which would
/// only give space savings after a compaction. This is meant to be
/// used only in tools where certain large columns don't need to be kept around.
///
/// For example, when preparing a database for use in a forknet
/// with the fork-network tool, we only need the state and
/// flat state, and a few other small columns. So getting rid of
/// everything else saves quite a bit on the disk space needed for each node.
pub fn clear_columns<'a>(
home_dir: &std::path::Path,
config: &StoreConfig,
archival_config: Option<ArchivalConfig<'a>>,
cols: &[DBCol],
) -> anyhow::Result<()> {
let opener = StoreOpener::new(home_dir, config, archival_config);
let (mut hot_db, _hot_snapshot, cold_db, _cold_snapshot) =
opener.open_dbs(Mode::ReadWriteExisting)?;
hot_db.clear_cols(cols)?;
if let Some(mut cold) = cold_db {
cold.clear_cols(cols)?;
}
drop(hot_db);
// Here we call open_dbs() to recreate the dropped columns, which should now be empty.
let _ = opener.open_dbs(Mode::ReadWriteExisting)?;
Ok(())
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
46 changes: 36 additions & 10 deletions tools/fork-network/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,13 @@ fn make_state_roots_key(shard_id: ShardId) -> Vec<u8> {
format!("{FORKED_ROOTS_KEY_PREFIX}{shard_id}").into_bytes()
}

/// The minimum set of columns that will be needed to start a node after the `finalize` command runs
const COLUMNS_TO_KEEP: &[DBCol] = &[DBCol::DbVersion, DBCol::Misc, DBCol::State, DBCol::FlatState];

/// Extra columns needed in the setup before the `finalize` command
const SETUP_COLUMNS_TO_KEEP: &[DBCol] =
&[DBCol::EpochInfo, DBCol::FlatStorageStatus, DBCol::ChunkExtra];

#[derive(clap::Parser)]
struct ResetCmd;

Expand Down Expand Up @@ -265,7 +272,7 @@ impl ForkNetworkCommand {
// Snapshots the DB.
// Determines parameters that will be used to initialize the new chain.
// After this completes, almost every DB column can be removed, however this command doesn't delete anything itself.
fn init(&self, near_config: &mut NearConfig, home_dir: &Path) -> anyhow::Result<()> {
fn write_fork_info(&self, near_config: &mut NearConfig, home_dir: &Path) -> anyhow::Result<()> {
// Open storage with migration
let storage = open_storage(&home_dir, near_config).unwrap();
let store = storage.get_hot_store();
Expand Down Expand Up @@ -337,6 +344,24 @@ impl ForkNetworkCommand {
Ok(())
}

fn init(&self, near_config: &mut NearConfig, home_dir: &Path) -> anyhow::Result<()> {
self.write_fork_info(near_config, home_dir)?;
let mut unwanted_cols = Vec::new();
for col in DBCol::iter() {
if !COLUMNS_TO_KEEP.contains(&col) && !SETUP_COLUMNS_TO_KEEP.contains(&col) {
unwanted_cols.push(col);
}
}
near_store::clear_columns(
home_dir,
&near_config.config.store,
near_config.config.archival_config(),
&unwanted_cols,
)
.context("failed deleting unwanted columns")?;
Ok(())
}

/// Creates a DB snapshot, then
/// Updates the state to ensure every account has a full access key that is known to us.
fn amend_access_keys(
Expand Down Expand Up @@ -472,19 +497,20 @@ impl ForkNetworkCommand {

/// Deletes DB columns that are not needed in the new chain.
fn finalize(&self, near_config: &mut NearConfig, home_dir: &Path) -> anyhow::Result<()> {
// Open storage with migration
let storage = open_storage(&home_dir, near_config).unwrap();
let store = storage.get_hot_store();

tracing::info!("Delete unneeded columns in the original DB");
let mut update = store.store_update();
let mut unwanted_cols = Vec::new();
for col in DBCol::iter() {
match col {
DBCol::DbVersion | DBCol::Misc | DBCol::State | DBCol::FlatState => {}
_ => update.delete_all(col),
if !COLUMNS_TO_KEEP.contains(&col) {
unwanted_cols.push(col);
}
}
update.commit()?;
near_store::clear_columns(
home_dir,
&near_config.config.store,
near_config.config.archival_config(),
&unwanted_cols,
)
.context("failed deleting unwanted columns")?;
Ok(())
}

Expand Down

0 comments on commit 489303d

Please sign in to comment.