Skip to content
Permalink

Comparing changes

This is a direct comparison between two commits made in this repository or its related repositories. View the default comparison for this range or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: microsoft/RD-Agent
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: e2802d5667dfa5e23b55028843a250f78c82a693
Choose a base ref
..
head repository: microsoft/RD-Agent
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: c3f88afa35c0c7f42d0250180dced9949f4d9b32
Choose a head ref
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Changelog

## [0.4.0](https://github.com/microsoft/RD-Agent/compare/v0.3.0...v0.4.0) (2025-01-20)
## [0.4.0](https://github.com/microsoft/RD-Agent/compare/v0.3.0...v0.4.0) (2025-01-21)


### Features
Original file line number Diff line number Diff line change
@@ -55,6 +55,7 @@ def implement_one_task(
# return a workspace with "load_data.py", "spec/load_data.md" inside
# assign the implemented code to the new workspace.
competition_info = self.scen.get_scenario_all_desc()
runtime_environment = self.scen.get_runtime_environment()
data_folder_info = self.scen.processed_data_folder_description
data_loader_task_info = target_task.get_task_information()

@@ -88,6 +89,7 @@ def implement_one_task(
# TODO: We may move spec into a separated COSTEER task
if "spec/data_loader.md" not in workspace.file_dict: # Only generate the spec once
system_prompt = T(".prompts:spec.system").r(
runtime_environment=runtime_environment,
task_desc=data_loader_task_info,
competition_info=competition_info,
folder_spec=data_folder_info,
Original file line number Diff line number Diff line change
@@ -6,6 +6,9 @@ spec:
Currently, you are working on a Kaggle competition project.
This project involves analyzing data and building models to beat other competitors, with the code being generated by large language models.
The runtime environment you are working in includes the following libraries and their respective versions:
{{ runtime_environment }}
Your overall task is provided below:
{{ task_desc }}
14 changes: 14 additions & 0 deletions rdagent/scenarios/data_science/scen/__init__.py
Original file line number Diff line number Diff line change
@@ -6,6 +6,7 @@
from PIL import Image, TiffTags

from rdagent.app.data_science.conf import DS_RD_SETTING
from rdagent.core.experiment import FBWorkspace
from rdagent.core.scenario import Scenario
from rdagent.log import rdagent_logger as logger
from rdagent.oai.llm_utils import APIBackend
@@ -14,6 +15,7 @@
leaderboard_scores,
)
from rdagent.utils.agent.tpl import T
from rdagent.utils.env import DockerEnv, DSDockerConf


def read_csv_head(file_path, indent=0, lines=5, max_col_width=100):
@@ -304,6 +306,18 @@ def get_scenario_all_desc(self) -> str:
metric_direction=self.metric_direction,
)

def get_runtime_environment(self) -> str:
# TODO: add it into base class. Environment should(i.e. `DSDockerConf`) should be part of the scenario class.
ds_docker_conf = DSDockerConf()
de = DockerEnv(conf=ds_docker_conf)
implementation = FBWorkspace()
fname = "temp.py"
implementation.inject_files(
**{fname: (Path(__file__).absolute().resolve().parent / "runtime_info.py").read_text()}
)
stdout = implementation.execute(env=de, entry=f"python {fname}")
return stdout

def _get_data_folder_description(self) -> str:
return describe_data_folder(Path(DS_RD_SETTING.local_data_path) / self.competition)

40 changes: 40 additions & 0 deletions rdagent/scenarios/data_science/scen/runtime_info.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import platform
import sys
from importlib.metadata import distributions


def print_runtime_info():
print(f"Python {sys.version} on {platform.system()} {platform.release()}")


def get_installed_packages():
return {dist.metadata["Name"].lower(): dist.version for dist in distributions()}


def print_filtered_packages(installed_packages, filtered_packages):
for package_name in filtered_packages:
version = installed_packages.get(package_name.lower())
if version:
print(f"{package_name}=={version}")


if __name__ == "__main__":
print_runtime_info()
filtered_packages = [
"transformers",
"accelerate",
"torch",
"tensorflow",
"pandas",
"numpy",
"scikit-learn",
"scipy",
"lightgbm",
"vtk",
"opencv-python",
"keras",
"matplotlib",
"pydicom",
]
installed_packages = get_installed_packages()
print_filtered_packages(installed_packages, filtered_packages)