Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Picking columns to export CSV #260

Merged
merged 10 commits into from
Apr 12, 2024
1 change: 1 addition & 0 deletions post-processing/config_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ def __init__(self, config: dict):
self.filters = config.get("filters")
self.series = config.get("series")
self.column_types = config.get("column_types")
self.extra_columns = config.get("additional_columns_to_csv")

# parse filter information
self.and_filters = []
Expand Down
40 changes: 10 additions & 30 deletions post-processing/post_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def __init__(self, log_path: Path, debug=False, verbose=False):
# for re-running post-processing with front-end
# dataframe filters
self.mask = pd.Series(self.df.index.notnull())
self.log_path = log_path

def run_post_processing(self, config: ConfigHandler):
"""
Expand Down Expand Up @@ -50,6 +51,10 @@ def run_post_processing(self, config: ConfigHandler):
# FIXME (#issue #255): have an option to put this into a file (-s / --save flag?)
print("Selected dataframe:")
print(self.df[self.mask][config.plot_columns])
if self.debug:
print("CSV dataframe:")
print(self.df[self.mask][config.plot_columns + config.extra_columns])
ilectra marked this conversation as resolved.
Show resolved Hide resolved
self.df[self.mask][config.plot_columns + config.extra_columns].to_csv(str(self.log_path)+'/output.csv', index=True) # Set index=False to exclude the DataFrame index from the CSV
ilectra marked this conversation as resolved.
Show resolved Hide resolved
ilectra marked this conversation as resolved.
Show resolved Hide resolved

# call a plotting script
plot_generic(
Expand All @@ -62,36 +67,11 @@ def run_post_processing(self, config: ConfigHandler):
print("Full dataframe:")
print(self.df.to_json(orient="columns", indent=2))

df = pd.DataFrame()
df_csv_export = pd.DataFrame()
# put all perflog information in one dataframe
for file in log_files:
try:
temp = read_perflog(file)
df = pd.concat([df, temp], ignore_index=True)
except KeyError as e:
if self.debug:
print("Discarding %s:" %os.path.basename(file),
type(e).__name__ + ":", e.args[0], e.args[1])
print("")
if df.empty:
raise FileNotFoundError(errno.ENOENT, "Could not find a valid perflog in path", log_path)
# specify columns to export from dataframe to csv
if config.get("csv_export") is None:
raise KeyError("Missing csv_export (specify an empty list [] if none are required).")
else:
for col in config["csv_export"]:
df_csv_export = pd.concat([df_csv_export, df[col]], axis=1, join='outer')
df_csv_export.to_csv(log_path+'/output.csv', index=False) # Set index=False to exclude the DataFrame index from the CSV
if self.debug:
print("Selected dataframe to export CSV file:")
print(df_csv_export)
# get axis columns
columns = [config["x_axis"]["value"], config["y_axis"]["value"]]
if config["x_axis"]["units"].get("column"):
columns.insert(1, config["x_axis"]["units"]["column"])
if config["y_axis"]["units"].get("column"):
columns.append(config["y_axis"]["units"]["column"])
return self.df[config.plot_columns][self.mask]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume this mess here is leftover from the merging with the refactored version?


def check_df_columns(self, all_columns):
"""
Check that all columns listed in the config exist in the dataframe.

Args:
all_columns: list, names of all columns mentioned in the config.
Expand Down
6 changes: 2 additions & 4 deletions post-processing/post_processing_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,6 @@ column_types:
cpus_per_task: "int"

# Specify which columns to export to csv file
csv_export:
[tasks,
flops_value,
Triad_unit,
additional_columns_to_csv:
[flops_value,
ilectra marked this conversation as resolved.
Show resolved Hide resolved
cpus_per_task]
Loading