Skip to content

Commit

Permalink
empty_df
Browse files Browse the repository at this point in the history
  • Loading branch information
lukaszkolodziejczyk committed Jan 20, 2025
1 parent c04468f commit cfbc578
Showing 1 changed file with 6 additions and 0 deletions.
6 changes: 6 additions & 0 deletions mostlyai/qa/_filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ def load_meta(self) -> dict:
def store_bins(self, bins: dict[str, list]) -> None:
df = pd.Series(bins).to_frame("bins").reset_index().rename(columns={"index": "column"})
self.bins_dir.mkdir(exist_ok=True, parents=True)
empty_df = pd.DataFrame(columns=["column", "bins"])
empty_df.to_parquet(self.bins_dir / "empty.parquet")
for i, row in df.iterrows():
row_df = pd.DataFrame([row]).explode("bins")
row_df.to_parquet(self.bins_dir / f"{i:05}.parquet")
Expand Down Expand Up @@ -170,6 +172,8 @@ def store_numeric_uni_kdes(self, trn_kdes: dict[str, pd.Series]) -> None:
columns=["column", "x", "y"],
)
self.numeric_kdes_uni_dir.mkdir(exist_ok=True, parents=True)
empty_df = pd.DataFrame(columns=["column", "x", "y"])
empty_df.to_parquet(self.numeric_kdes_uni_dir / "empty.parquet")
for i, row in trn_kdes.iterrows():
row_df = pd.DataFrame([row]).explode(["x", "y"])
row_df.to_parquet(self.numeric_kdes_uni_dir / f"{i:05}.parquet")
Expand All @@ -195,6 +199,8 @@ def store_categorical_uni_counts(self, trn_cnts_uni: dict[str, pd.Series]) -> No
columns=["column", "cat", "count"],
)
self.categorical_counts_uni_dir.mkdir(exist_ok=True, parents=True)
empty_df = pd.DataFrame(columns=["column", "cat", "count"])
empty_df.to_parquet(self.categorical_counts_uni_dir / "empty.parquet")
for i, row in trn_cnts_uni.iterrows():
row_df = pd.DataFrame([row]).explode(["cat", "count"])
row_df.to_parquet(self.categorical_counts_uni_dir / f"{i:05}.parquet")
Expand Down

0 comments on commit cfbc578

Please sign in to comment.