diff --git a/core/store/src/db/rocksdb.rs b/core/store/src/db/rocksdb.rs index 50db5740aaf..4d0eca8e5cc 100644 --- a/core/store/src/db/rocksdb.rs +++ b/core/store/src/db/rocksdb.rs @@ -684,6 +684,16 @@ impl RocksDB { } } } + + /// Deletes all data in `cols`. This should also remove all sst files + pub fn clear_cols(&mut self, cols: &[DBCol]) -> anyhow::Result<()> { + for col in cols { + self.db + .drop_cf(col_name(*col)) + .with_context(|| format!("failed to drop column family {:?}", col,))?; + } + Ok(()) + } } impl Drop for RocksDB { diff --git a/core/store/src/lib.rs b/core/store/src/lib.rs index bbe8e6f83ff..9d05593d968 100644 --- a/core/store/src/lib.rs +++ b/core/store/src/lib.rs @@ -65,7 +65,8 @@ pub mod trie; pub use crate::config::{Mode, StoreConfig}; pub use crate::opener::{ - checkpoint_hot_storage_and_cleanup_columns, StoreMigrator, StoreOpener, StoreOpenerError, + checkpoint_hot_storage_and_cleanup_columns, clear_columns, StoreMigrator, StoreOpener, + StoreOpenerError, }; /// Specifies temperature of a storage. diff --git a/core/store/src/opener.rs b/core/store/src/opener.rs index 9fd30b49f41..ef843aec90c 100644 --- a/core/store/src/opener.rs +++ b/core/store/src/opener.rs @@ -252,14 +252,10 @@ impl<'a> StoreOpener<'a> { self.open_in_mode(Mode::ReadWrite) } - /// Opens the RocksDB database(s) for hot and cold (if configured) storages. - /// - /// When opening in read-only mode, verifies that the database version is - /// what the node expects and fails if it isn’t. If database doesn’t exist, - /// creates a new one unless mode is [`Mode::ReadWriteExisting`]. On the - /// other hand, if mode is [`Mode::Create`], fails if the database already - /// exists. - pub fn open_in_mode(&self, mode: Mode) -> Result { + fn open_dbs( + &self, + mode: Mode, + ) -> Result<(RocksDB, Snapshot, Option, Snapshot), StoreOpenerError> { { let hot_path = self.hot.path.display().to_string(); let cold_path = match &self.cold { @@ -290,7 +286,18 @@ impl<'a> StoreOpener<'a> { .map(|cold| cold.open(mode, DB_VERSION)) .transpose()? .map(|(db, _)| db); + Ok((hot_db, hot_snapshot, cold_db, cold_snapshot)) + } + /// Opens the RocksDB database(s) for hot and cold (if configured) storages. + /// + /// When opening in read-only mode, verifies that the database version is + /// what the node expects and fails if it isn’t. If database doesn’t exist, + /// creates a new one unless mode is [`Mode::ReadWriteExisting`]. On the + /// other hand, if mode is [`Mode::Create`], fails if the database already + /// exists. + pub fn open_in_mode(&self, mode: Mode) -> Result { + let (hot_db, hot_snapshot, cold_db, cold_snapshot) = self.open_dbs(mode)?; let storage = NodeStorage::from_rocksdb(hot_db, cold_db); hot_snapshot.remove()?; @@ -636,6 +643,35 @@ pub fn checkpoint_hot_storage_and_cleanup_columns( Ok(node_storage) } +/// Deletes all data in the columns in `cols` from the rocksdb data +/// dir in `home_dir`. This actually removes all the sst files rather than +/// just logically deleting all keys with a transaction, which would +/// only give space savings after a compaction. This is meant to be +/// used only in tools where certain large columns don't need to be kept around. +/// +/// For example, when preparing a database for use in a forknet +/// with the fork-network tool, we only need the state and +/// flat state, and a few other small columns. So getting rid of +/// everything else saves quite a bit on the disk space needed for each node. +pub fn clear_columns<'a>( + home_dir: &std::path::Path, + config: &StoreConfig, + archival_config: Option>, + cols: &[DBCol], +) -> anyhow::Result<()> { + let opener = StoreOpener::new(home_dir, config, archival_config); + let (mut hot_db, _hot_snapshot, cold_db, _cold_snapshot) = + opener.open_dbs(Mode::ReadWriteExisting)?; + hot_db.clear_cols(cols)?; + if let Some(mut cold) = cold_db { + cold.clear_cols(cols)?; + } + drop(hot_db); + // Here we call open_dbs() to recreate the dropped columns, which should now be empty. + let _ = opener.open_dbs(Mode::ReadWriteExisting)?; + Ok(()) +} + #[cfg(test)] mod tests { use super::*; diff --git a/tools/fork-network/src/cli.rs b/tools/fork-network/src/cli.rs index 618cc2ffdfe..f8f051abd59 100644 --- a/tools/fork-network/src/cli.rs +++ b/tools/fork-network/src/cli.rs @@ -142,6 +142,13 @@ fn make_state_roots_key(shard_id: ShardId) -> Vec { format!("{FORKED_ROOTS_KEY_PREFIX}{shard_id}").into_bytes() } +/// The minimum set of columns that will be needed to start a node after the `finalize` command runs +const COLUMNS_TO_KEEP: &[DBCol] = &[DBCol::DbVersion, DBCol::Misc, DBCol::State, DBCol::FlatState]; + +/// Extra columns needed in the setup before the `finalize` command +const SETUP_COLUMNS_TO_KEEP: &[DBCol] = + &[DBCol::EpochInfo, DBCol::FlatStorageStatus, DBCol::ChunkExtra]; + #[derive(clap::Parser)] struct ResetCmd; @@ -265,7 +272,7 @@ impl ForkNetworkCommand { // Snapshots the DB. // Determines parameters that will be used to initialize the new chain. // After this completes, almost every DB column can be removed, however this command doesn't delete anything itself. - fn init(&self, near_config: &mut NearConfig, home_dir: &Path) -> anyhow::Result<()> { + fn write_fork_info(&self, near_config: &mut NearConfig, home_dir: &Path) -> anyhow::Result<()> { // Open storage with migration let storage = open_storage(&home_dir, near_config).unwrap(); let store = storage.get_hot_store(); @@ -337,6 +344,24 @@ impl ForkNetworkCommand { Ok(()) } + fn init(&self, near_config: &mut NearConfig, home_dir: &Path) -> anyhow::Result<()> { + self.write_fork_info(near_config, home_dir)?; + let mut unwanted_cols = Vec::new(); + for col in DBCol::iter() { + if !COLUMNS_TO_KEEP.contains(&col) && !SETUP_COLUMNS_TO_KEEP.contains(&col) { + unwanted_cols.push(col); + } + } + near_store::clear_columns( + home_dir, + &near_config.config.store, + near_config.config.archival_config(), + &unwanted_cols, + ) + .context("failed deleting unwanted columns")?; + Ok(()) + } + /// Creates a DB snapshot, then /// Updates the state to ensure every account has a full access key that is known to us. fn amend_access_keys( @@ -458,19 +483,20 @@ impl ForkNetworkCommand { /// Deletes DB columns that are not needed in the new chain. fn finalize(&self, near_config: &mut NearConfig, home_dir: &Path) -> anyhow::Result<()> { - // Open storage with migration - let storage = open_storage(&home_dir, near_config).unwrap(); - let store = storage.get_hot_store(); - tracing::info!("Delete unneeded columns in the original DB"); - let mut update = store.store_update(); + let mut unwanted_cols = Vec::new(); for col in DBCol::iter() { - match col { - DBCol::DbVersion | DBCol::Misc | DBCol::State | DBCol::FlatState => {} - _ => update.delete_all(col), + if !COLUMNS_TO_KEEP.contains(&col) { + unwanted_cols.push(col); } } - update.commit()?; + near_store::clear_columns( + home_dir, + &near_config.config.store, + near_config.config.archival_config(), + &unwanted_cols, + ) + .context("failed deleting unwanted columns")?; Ok(()) }