diff --git a/rdagent/app/kaggle/conf.py b/rdagent/app/kaggle/conf.py index 13a4d5db..1063d987 100644 --- a/rdagent/app/kaggle/conf.py +++ b/rdagent/app/kaggle/conf.py @@ -72,6 +72,9 @@ class Config: auto_submit: bool = False """Automatically upload and submit each experiment result to Kaggle platform""" + mle_submit: bool = False + """Automatically upload and submit each experiment result to mlebench""" + mini_case: bool = False """Enable mini-case study for experiments""" diff --git a/rdagent/app/kaggle/loop.py b/rdagent/app/kaggle/loop.py index 3839a4eb..7c3b0fb2 100644 --- a/rdagent/app/kaggle/loop.py +++ b/rdagent/app/kaggle/loop.py @@ -113,6 +113,28 @@ def running(self, prev_out: dict[str, Any]): except Exception as e: logger.error(f"Other exception when use kaggle api:\n{e}") + if KAGGLE_IMPLEMENT_SETTING.mle_submit: + csv_path = exp.experiment_workspace.workspace_path / "submission.csv" + try: + result = subprocess.run( + [ + "mlebench", + "grade-sample", + str(csv_path.absolute()), + KAGGLE_IMPLEMENT_SETTING.competition, + ], + check=True, + capture_output=True, + text=True, + ) + with open(exp.experiment_workspace.workspace_path / "mle_submission_report.txt", "w") as f: + f.write(result.stdout) + f.write(result.stderr) + except subprocess.CalledProcessError as e: + logger.error(f"Auto submission failed: \n{e}") + except Exception as e: + logger.error(f"Other exception when use mle api:\n{e}") + return exp skip_loop_error = (ModelEmptyError, FactorEmptyError) diff --git a/rdagent/scenarios/kaggle/developer/runner.py b/rdagent/scenarios/kaggle/developer/runner.py index a1868f6a..4bc5ba6d 100644 --- a/rdagent/scenarios/kaggle/developer/runner.py +++ b/rdagent/scenarios/kaggle/developer/runner.py @@ -71,6 +71,35 @@ def develop(self, exp: KGModelExperiment) -> KGModelExperiment: return exp +class MLEModelRunner(KGCachedRunner[KGModelExperiment]): + @cache_with_pickle(KGCachedRunner.get_cache_key, KGCachedRunner.assign_cached_result) + def develop(self, exp: KGModelExperiment) -> KGModelExperiment: + if exp.based_experiments and exp.based_experiments[-1].result is None: + exp.based_experiments[-1] = self.init_develop(exp.based_experiments[-1]) + + sub_ws = exp.sub_workspace_list[0] + if sub_ws is not None: + # TODO: There's a possibility of generating a hybrid model (lightgbm + xgboost), which results in having two items in the model_type list. + model_type = sub_ws.target_task.model_type + + if sub_ws.code_dict == {}: + raise ModelEmptyError("No model is implemented.") + else: + model_file_name = f"model/model_{model_type.lower()}.py" + exp.experiment_workspace.inject_code(**{model_file_name: sub_ws.code_dict["model.py"]}) + env_to_use = {"PYTHONPATH": "./"} + + result = exp.experiment_workspace.execute(run_env=env_to_use) + + if result is None: + raise CoderError("No result is returned from the experiment workspace") + + report_path = exp.experiment_workspace.workspace_path / "mle_submission_report.txt" + with open(report_path, "r") as f: + exp.result = f.read() + + return exp + class KGFactorRunner(KGCachedRunner[KGFactorExperiment]): @cache_with_pickle(KGCachedRunner.get_cache_key, KGCachedRunner.assign_cached_result)