diff --git a/docs/scens/data_agent_fin.rst b/docs/scens/data_agent_fin.rst index e9c42f13..b2d8013d 100644 --- a/docs/scens/data_agent_fin.rst +++ b/docs/scens/data_agent_fin.rst @@ -131,7 +131,7 @@ The following environment variables can be set in the `.env` file to customize t :settings-show-field-summary: False :exclude-members: Config -.. autopydantic_settings:: rdagent.components.coder.factor_coder.config.FactorImplementSettings +.. autopydantic_settings:: rdagent.components.coder.factor_coder.config.FactorCoSTEERSettings :settings-show-field-summary: False :members: coder_use_cache, data_folder, data_folder_debug, file_based_execution_timeout, select_method, select_threshold, max_loop, knowledge_base_path, new_knowledge_base_path :exclude-members: Config, fail_task_trial_limit, v1_query_former_trace_limit, v1_query_similar_success_limit, v2_query_component_limit, v2_query_error_limit, v2_query_former_trace_limit, v2_error_summary, v2_knowledge_sampler diff --git a/docs/scens/data_copilot_fin.rst b/docs/scens/data_copilot_fin.rst index 89280c61..28a34310 100644 --- a/docs/scens/data_copilot_fin.rst +++ b/docs/scens/data_copilot_fin.rst @@ -157,7 +157,7 @@ The following environment variables can be set in the `.env` file to customize t :show-inheritance: :exclude-members: Config -.. autopydantic_settings:: rdagent.components.coder.factor_coder.config.FactorImplementSettings +.. autopydantic_settings:: rdagent.components.coder.factor_coder.config.FactorCoSTEERSettings :settings-show-field-summary: False :members: coder_use_cache, data_folder, data_folder_debug, file_based_execution_timeout, select_method, select_threshold, max_loop, knowledge_base_path, new_knowledge_base_path :exclude-members: Config, python_bin, fail_task_trial_limit, v1_query_former_trace_limit, v1_query_similar_success_limit, v2_query_component_limit, v2_query_error_limit, v2_query_former_trace_limit, v2_error_summary, v2_knowledge_sampler diff --git a/docs/scens/kaggle_agent.rst b/docs/scens/kaggle_agent.rst index a44971ee..df0435f2 100644 --- a/docs/scens/kaggle_agent.rst +++ b/docs/scens/kaggle_agent.rst @@ -265,7 +265,7 @@ The following environment variables can be set in the `.env` file to customize t :settings-show-field-summary: False :exclude-members: Config -.. autopydantic_settings:: rdagent.components.coder.factor_coder.config.FactorImplementSettings +.. autopydantic_settings:: rdagent.components.coder.factor_coder.config.FactorCoSTEERSettings :settings-show-field-summary: False :members: coder_use_cache, file_based_execution_timeout, select_method, max_loop :exclude-members: Config, fail_task_trial_limit, v1_query_former_trace_limit, v1_query_similar_success_limit, v2_query_component_limit, v2_query_error_limit, v2_query_former_trace_limit, v2_error_summary, v2_knowledge_sampler, v2_add_fail_attempt_to_latest_successful_execution, new_knowledge_base_path, knowledge_base_path, data_folder, data_folder_debug, select_threshold diff --git a/rdagent/app/benchmark/model/eval.py b/rdagent/app/benchmark/model/eval.py index 0e1a7cbc..0182e9a8 100644 --- a/rdagent/app/benchmark/model/eval.py +++ b/rdagent/app/benchmark/model/eval.py @@ -1,6 +1,6 @@ from pathlib import Path -from rdagent.components.coder.model_coder.CoSTEER import ModelCoSTEER +from rdagent.components.coder.model_coder import ModelCoSTEER from rdagent.components.loader.task_loader import ModelTaskLoaderJson, ModelWsLoader from rdagent.scenarios.qlib.experiment.model_experiment import ( QlibModelExperiment, diff --git a/rdagent/app/data_mining/conf.py b/rdagent/app/data_mining/conf.py index 5cd12d75..45b1ef35 100644 --- a/rdagent/app/data_mining/conf.py +++ b/rdagent/app/data_mining/conf.py @@ -1,16 +1,11 @@ from pathlib import Path -from pydantic_settings import BaseSettings - from rdagent.components.workflow.conf import BasePropSetting +from rdagent.core.conf import ExtendedSettingsConfigDict class MedBasePropSetting(BasePropSetting): - class Config: - env_prefix = "DM_" - """Use `DM_` as prefix for environment variables""" - protected_namespaces = () - """Add 'model_' to the protected namespaces""" + model_config = ExtendedSettingsConfigDict(env_prefix="DM_", protected_namespaces=()) # 1) overriding the default scen: str = "rdagent.scenarios.data_mining.experiment.model_experiment.DMModelScenario" diff --git a/rdagent/app/kaggle/conf.py b/rdagent/app/kaggle/conf.py index 6cc9f52a..b5204730 100644 --- a/rdagent/app/kaggle/conf.py +++ b/rdagent/app/kaggle/conf.py @@ -1,14 +1,9 @@ -from pydantic_settings import BaseSettings - from rdagent.components.workflow.conf import BasePropSetting +from rdagent.core.conf import ExtendedSettingsConfigDict class KaggleBasePropSetting(BasePropSetting): - class Config: - env_prefix = "KG_" - """Use `KG_` as prefix for environment variables""" - protected_namespaces = () - """Do not allow overriding of these namespaces""" + model_config = ExtendedSettingsConfigDict(env_prefix="KG_", protected_namespaces=()) # 1) overriding the default scen: str = "rdagent.scenarios.kaggle.experiment.scenario.KGScenario" diff --git a/rdagent/app/qlib_rd_loop/conf.py b/rdagent/app/qlib_rd_loop/conf.py index b30bee41..e6a91351 100644 --- a/rdagent/app/qlib_rd_loop/conf.py +++ b/rdagent/app/qlib_rd_loop/conf.py @@ -1,14 +1,9 @@ -from pydantic_settings import BaseSettings - from rdagent.components.workflow.conf import BasePropSetting +from rdagent.core.conf import ExtendedSettingsConfigDict class ModelBasePropSetting(BasePropSetting): - class Config: - env_prefix = "QLIB_MODEL_" - """Use `QLIB_MODEL_` as prefix for environment variables""" - protected_namespaces = () - """Add 'model_' to the protected namespaces""" + model_config = ExtendedSettingsConfigDict(env_prefix="QLIB_MODEL_", protected_namespaces=()) # 1) override base settings scen: str = "rdagent.scenarios.qlib.experiment.model_experiment.QlibModelScenario" @@ -34,11 +29,7 @@ class Config: class FactorBasePropSetting(BasePropSetting): - class Config: - env_prefix = "QLIB_FACTOR_" - """Use `QLIB_FACTOR_` as prefix for environment variables""" - protected_namespaces = () - """Add 'factor_' to the protected namespaces""" + model_config = ExtendedSettingsConfigDict(env_prefix="QLIB_FACTOR_", protected_namespaces=()) # 1) override base settings scen: str = "rdagent.scenarios.qlib.experiment.factor_experiment.QlibFactorScenario" diff --git a/rdagent/components/benchmark/conf.py b/rdagent/components/benchmark/conf.py index 0a0ad418..f0eccec0 100644 --- a/rdagent/components/benchmark/conf.py +++ b/rdagent/components/benchmark/conf.py @@ -2,12 +2,12 @@ from pathlib import Path from typing import Optional -from pydantic_settings import BaseSettings +from rdagent.core.conf import ExtendedBaseSettings DIRNAME = Path("./") -class BenchmarkSettings(BaseSettings): +class BenchmarkSettings(ExtendedBaseSettings): class Config: env_prefix = "BENCHMARK_" """Use `BENCHMARK_` as prefix for environment variables""" @@ -24,7 +24,7 @@ class Config: bench_test_case_n: Optional[int] = None """how many test cases to run; If not given, all test cases will be run""" - bench_method_cls: str = "rdagent.components.coder.factor_coder.CoSTEER.FactorCoSTEER" + bench_method_cls: str = "rdagent.components.coder.CoSTEER.FactorCoSTEER" """method to be used for test cases""" bench_method_extra_kwargs: dict = field( diff --git a/rdagent/components/benchmark/eval_method.py b/rdagent/components/benchmark/eval_method.py index 173af377..5194b968 100644 --- a/rdagent/components/benchmark/eval_method.py +++ b/rdagent/components/benchmark/eval_method.py @@ -5,8 +5,8 @@ import pandas as pd from tqdm import tqdm -from rdagent.components.coder.factor_coder.config import FACTOR_IMPLEMENT_SETTINGS -from rdagent.components.coder.factor_coder.CoSTEER.evaluators import ( +from rdagent.components.coder.factor_coder.config import FACTOR_COSTEER_SETTINGS +from rdagent.components.coder.factor_coder.eva_utils import ( FactorCorrelationEvaluator, FactorEqualValueRatioEvaluator, FactorEvaluator, diff --git a/rdagent/components/coder/CoSTEER/__init__.py b/rdagent/components/coder/CoSTEER/__init__.py new file mode 100644 index 00000000..7dddbfe2 --- /dev/null +++ b/rdagent/components/coder/CoSTEER/__init__.py @@ -0,0 +1,108 @@ +import pickle +from pathlib import Path + +from rdagent.components.coder.CoSTEER.config import CoSTEERSettings +from rdagent.components.coder.CoSTEER.evolvable_subjects import EvolvingItem +from rdagent.components.coder.CoSTEER.evolving_agent import FilterFailedRAGEvoAgent +from rdagent.components.coder.CoSTEER.knowledge_management import ( + CoSTEERKnowledgeBaseV1, + CoSTEERKnowledgeBaseV2, + CoSTEERRAGStrategyV1, + CoSTEERRAGStrategyV2, +) +from rdagent.core.developer import Developer +from rdagent.core.evaluation import Evaluator +from rdagent.core.evolving_agent import EvolvingStrategy +from rdagent.core.experiment import Experiment +from rdagent.log import rdagent_logger as logger + + +class CoSTEER(Developer[Experiment]): + def __init__( + self, + settings: CoSTEERSettings, + eva: Evaluator, + es: EvolvingStrategy, + evolving_version: int, + *args, + with_knowledge: bool = True, + with_feedback: bool = True, + knowledge_self_gen: bool = True, + filter_final_evo: bool = True, + **kwargs, + ) -> None: + super().__init__(*args, **kwargs) + self.max_loop = settings.max_loop + self.knowledge_base_path = ( + Path(settings.knowledge_base_path) if settings.knowledge_base_path is not None else None + ) + self.new_knowledge_base_path = ( + Path(settings.new_knowledge_base_path) if settings.new_knowledge_base_path is not None else None + ) + + self.with_knowledge = with_knowledge + self.with_feedback = with_feedback + self.knowledge_self_gen = knowledge_self_gen + self.filter_final_evo = filter_final_evo + self.evolving_strategy = es + self.evaluator = eva + self.evolving_version = evolving_version + + # init knowledge base + self.knowledge_base = self.load_or_init_knowledge_base( + former_knowledge_base_path=self.knowledge_base_path, + component_init_list=[], + ) + # init rag method + self.rag = ( + CoSTEERRAGStrategyV2(self.knowledge_base, settings=settings) + if self.evolving_version == 2 + else CoSTEERRAGStrategyV1(self.knowledge_base, settings=settings) + ) + + def load_or_init_knowledge_base(self, former_knowledge_base_path: Path = None, component_init_list: list = []): + if former_knowledge_base_path is not None and former_knowledge_base_path.exists(): + knowledge_base = pickle.load(open(former_knowledge_base_path, "rb")) + if self.evolving_version == 1 and not isinstance(knowledge_base, CoSTEERKnowledgeBaseV1): + raise ValueError("The former knowledge base is not compatible with the current version") + elif self.evolving_version == 2 and not isinstance( + knowledge_base, + CoSTEERKnowledgeBaseV2, + ): + raise ValueError("The former knowledge base is not compatible with the current version") + else: + knowledge_base = ( + CoSTEERKnowledgeBaseV2( + init_component_list=component_init_list, + ) + if self.evolving_version == 2 + else CoSTEERKnowledgeBaseV1() + ) + return knowledge_base + + def develop(self, exp: Experiment) -> Experiment: + + # init intermediate items + experiment = EvolvingItem.from_experiment(exp) + + self.evolve_agent = FilterFailedRAGEvoAgent( + max_loop=self.max_loop, + evolving_strategy=self.evolving_strategy, + rag=self.rag, + with_knowledge=self.with_knowledge, + with_feedback=self.with_feedback, + knowledge_self_gen=self.knowledge_self_gen, + ) + + experiment = self.evolve_agent.multistep_evolve( + experiment, + self.evaluator, + filter_final_evo=self.filter_final_evo, + ) + + # save new knowledge base + if self.new_knowledge_base_path is not None: + pickle.dump(self.knowledge_base, open(self.new_knowledge_base_path, "wb")) + logger.info(f"New knowledge base saved to {self.new_knowledge_base_path}") + exp.sub_workspace_list = experiment.sub_workspace_list + return exp diff --git a/rdagent/components/coder/CoSTEER/config.py b/rdagent/components/coder/CoSTEER/config.py new file mode 100644 index 00000000..ee5f1856 --- /dev/null +++ b/rdagent/components/coder/CoSTEER/config.py @@ -0,0 +1,39 @@ +from typing import Union + +from rdagent.core.conf import ExtendedBaseSettings + + +class CoSTEERSettings(ExtendedBaseSettings): + """CoSTEER settings, this setting is supposed not to be used directly!!!""" + + class Config: + env_prefix = "CoSTEER_" + + coder_use_cache: bool = False + """Indicates whether to use cache for the coder""" + + max_loop: int = 10 + """Maximum number of task implementation loops""" + + fail_task_trial_limit: int = 20 + + v1_query_former_trace_limit: int = 5 + v1_query_similar_success_limit: int = 5 + + v2_query_component_limit: int = 1 + v2_query_error_limit: int = 1 + v2_query_former_trace_limit: int = 1 + v2_add_fail_attempt_to_latest_successful_execution: bool = False + v2_error_summary: bool = False + v2_knowledge_sampler: float = 1.0 + + knowledge_base_path: Union[str, None] = None + """Path to the knowledge base""" + + new_knowledge_base_path: Union[str, None] = None + """Path to the new knowledge base""" + + select_threshold: int = 10 + + +CoSTEER_SETTINGS = CoSTEERSettings() diff --git a/rdagent/components/coder/CoSTEER/evaluators.py b/rdagent/components/coder/CoSTEER/evaluators.py new file mode 100644 index 00000000..37f4d1ca --- /dev/null +++ b/rdagent/components/coder/CoSTEER/evaluators.py @@ -0,0 +1,112 @@ +from abc import abstractmethod +from typing import List + +from rdagent.components.coder.CoSTEER.evolvable_subjects import EvolvingItem +from rdagent.core.conf import RD_AGENT_SETTINGS +from rdagent.core.evaluation import Evaluator, Feedback +from rdagent.core.evolving_framework import QueriedKnowledge +from rdagent.core.experiment import Workspace +from rdagent.core.scenario import Task +from rdagent.core.utils import multiprocessing_wrapper +from rdagent.log import rdagent_logger as logger + + +class CoSTEERSingleFeedback(Feedback): + """This class is a base class for all code generator feedback to single implementation""" + + def __init__( + self, + execution_feedback: str = None, + shape_feedback: str = None, + code_feedback: str = None, + value_feedback: str = None, + final_decision: bool = None, + final_feedback: str = None, + value_generated_flag: bool = None, + final_decision_based_on_gt: bool = None, + ) -> None: + self.execution_feedback = execution_feedback + self.shape_feedback = shape_feedback + self.code_feedback = code_feedback + self.value_feedback = value_feedback + self.final_decision = final_decision + self.final_feedback = final_feedback + self.value_generated_flag = value_generated_flag + self.final_decision_based_on_gt = final_decision_based_on_gt + + def __str__(self) -> str: + return f"""------------------Execution Feedback------------------ +{self.execution_feedback if self.execution_feedback is not None else 'No execution feedback'} +------------------Shape Feedback------------------ +{self.shape_feedback if self.shape_feedback is not None else 'No shape feedback'} +------------------Code Feedback------------------ +{self.code_feedback if self.code_feedback is not None else 'No code feedback'} +------------------Value Feedback------------------ +{self.value_feedback if self.value_feedback is not None else 'No value feedback'} +------------------Final Feedback------------------ +{self.final_feedback if self.final_feedback is not None else 'No final feedback'} +------------------Final Decision------------------ +This implementation is {'SUCCESS' if self.final_decision else 'FAIL'}. +""" + + +class CoSTEERMultiFeedback( + Feedback, + List[CoSTEERSingleFeedback], +): + """Feedback contains a list, each element is the corresponding feedback for each factor implementation.""" + + +class CoSTEEREvaluator(Evaluator): + # TODO: + # I think we should have unified interface for all evaluates, for examples. + # So we should adjust the interface of other factors + @abstractmethod + def evaluate( + self, + target_task: Task, + implementation: Workspace, + gt_implementation: Workspace, + **kwargs, + ) -> CoSTEERSingleFeedback: + raise NotImplementedError("Please implement the `evaluator` method") + + +class CoSTEERMultiEvaluator(Evaluator): + def __init__(self, single_evaluator: CoSTEEREvaluator, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) + self.single_evaluator = single_evaluator + + def evaluate( + self, + evo: EvolvingItem, + queried_knowledge: QueriedKnowledge = None, + **kwargs, + ) -> CoSTEERMultiFeedback: + multi_implementation_feedback = multiprocessing_wrapper( + [ + ( + self.single_evaluator.evaluate, + ( + evo.sub_tasks[index], + evo.sub_workspace_list[index], + evo.sub_gt_implementations[index] if evo.sub_gt_implementations is not None else None, + queried_knowledge, + ), + ) + for index in range(len(evo.sub_tasks)) + ], + n=RD_AGENT_SETTINGS.multi_proc_n, + ) + + final_decision = [ + None if single_feedback is None else single_feedback.final_decision + for single_feedback in multi_implementation_feedback + ] + logger.info(f"Final decisions: {final_decision} True count: {final_decision.count(True)}") + + for index in range(len(evo.sub_tasks)): + if final_decision[index]: + evo.sub_tasks[index].factor_implementation = True + + return multi_implementation_feedback diff --git a/rdagent/components/coder/factor_coder/CoSTEER/evolvable_subjects.py b/rdagent/components/coder/CoSTEER/evolvable_subjects.py similarity index 67% rename from rdagent/components/coder/factor_coder/CoSTEER/evolvable_subjects.py rename to rdagent/components/coder/CoSTEER/evolvable_subjects.py index e45baed5..91d90d42 100644 --- a/rdagent/components/coder/factor_coder/CoSTEER/evolvable_subjects.py +++ b/rdagent/components/coder/CoSTEER/evolvable_subjects.py @@ -1,23 +1,20 @@ -from rdagent.components.coder.factor_coder.factor import ( - FactorExperiment, - FactorFBWorkspace, - FactorTask, -) from rdagent.core.evolving_framework import EvolvableSubjects +from rdagent.core.experiment import Experiment, FBWorkspace +from rdagent.core.scenario import Task from rdagent.log import rdagent_logger as logger -class FactorEvolvingItem(FactorExperiment, EvolvableSubjects): +class EvolvingItem(Experiment, EvolvableSubjects): """ Intermediate item of factor implementation. """ def __init__( self, - sub_tasks: list[FactorTask], - sub_gt_implementations: list[FactorFBWorkspace] = None, + sub_tasks: list[Task], + sub_gt_implementations: list[FBWorkspace] = None, ): - FactorExperiment.__init__(self, sub_tasks=sub_tasks) + Experiment.__init__(self, sub_tasks=sub_tasks) self.corresponding_selection: list = None if sub_gt_implementations is not None and len( sub_gt_implementations, @@ -30,7 +27,7 @@ def __init__( self.sub_gt_implementations = sub_gt_implementations @classmethod - def from_experiment(cls, exp: FactorExperiment) -> "FactorExperiment": + def from_experiment(cls, exp: Experiment) -> Experiment: ei = cls(sub_tasks=exp.sub_tasks) ei.based_experiments = exp.based_experiments ei.experiment_workspace = exp.experiment_workspace diff --git a/rdagent/components/coder/CoSTEER/evolving_agent.py b/rdagent/components/coder/CoSTEER/evolving_agent.py new file mode 100644 index 00000000..70c09729 --- /dev/null +++ b/rdagent/components/coder/CoSTEER/evolving_agent.py @@ -0,0 +1,18 @@ +from rdagent.components.coder.CoSTEER.evaluators import CoSTEERSingleFeedback +from rdagent.components.coder.CoSTEER.evolvable_subjects import EvolvingItem +from rdagent.core.evolving_agent import RAGEvoAgent +from rdagent.core.evolving_framework import EvolvableSubjects + + +class FilterFailedRAGEvoAgent(RAGEvoAgent): + def filter_evolvable_subjects_by_feedback( + self, evo: EvolvableSubjects, feedback: CoSTEERSingleFeedback + ) -> EvolvableSubjects: + assert isinstance(evo, EvolvingItem) + assert isinstance(feedback, list) + assert len(evo.sub_workspace_list) == len(feedback) + + for index in range(len(evo.sub_workspace_list)): + if evo.sub_workspace_list[index] is not None and feedback[index] and not feedback[index].final_decision: + evo.sub_workspace_list[index].clear() + return evo diff --git a/rdagent/components/coder/CoSTEER/evolving_strategy.py b/rdagent/components/coder/CoSTEER/evolving_strategy.py new file mode 100644 index 00000000..c7126e7f --- /dev/null +++ b/rdagent/components/coder/CoSTEER/evolving_strategy.py @@ -0,0 +1,101 @@ +from __future__ import annotations + +from abc import abstractmethod +from pathlib import Path + +from rdagent.components.coder.CoSTEER.config import CoSTEERSettings +from rdagent.components.coder.CoSTEER.evolvable_subjects import EvolvingItem +from rdagent.components.coder.CoSTEER.knowledge_management import ( + CoSTEERQueriedKnowledge, +) +from rdagent.components.coder.CoSTEER.scheduler import random_select +from rdagent.core.conf import RD_AGENT_SETTINGS +from rdagent.core.evaluation import Scenario +from rdagent.core.evolving_framework import EvolvingStrategy, QueriedKnowledge +from rdagent.core.experiment import Workspace +from rdagent.core.prompts import Prompts +from rdagent.core.scenario import Task +from rdagent.core.utils import multiprocessing_wrapper + +implement_prompts = Prompts(file_path=Path(__file__).parent / "prompts.yaml") + + +class MultiProcessEvolvingStrategy(EvolvingStrategy): + def __init__(self, scen: Scenario, settings: CoSTEERSettings): + super().__init__(scen) + self.settings = settings + + @abstractmethod + def implement_one_task( + self, + target_task: Task, + queried_knowledge: QueriedKnowledge = None, + ) -> Workspace: + raise NotImplementedError + + def select_one_round_tasks( + self, + to_be_finished_task_index: list, + evo: EvolvingItem, + selected_num: int, + queried_knowledge: CoSTEERQueriedKnowledge, + scen: Scenario, + ) -> list: + """Since scheduler is not essential, we implement a simple random selection here.""" + return random_select(to_be_finished_task_index, evo, selected_num, queried_knowledge, scen) + + @abstractmethod + def assign_code_list_to_evo(self, code_list: list, evo: EvolvingItem) -> None: + """ + Assign the code list to the evolving item. + + The code list is aligned with the evolving item's sub-tasks. + If a task is not implemented, put a None in the list. + """ + raise NotImplementedError + + def evolve( + self, + *, + evo: EvolvingItem, + queried_knowledge: CoSTEERQueriedKnowledge | None = None, + **kwargs, + ) -> EvolvingItem: + # 1.找出需要evolve的task + to_be_finished_task_index = [] + for index, target_task in enumerate(evo.sub_tasks): + target_task_desc = target_task.get_task_information() + if target_task_desc in queried_knowledge.success_task_to_knowledge_dict: + evo.sub_workspace_list[index] = queried_knowledge.success_task_to_knowledge_dict[ + target_task_desc + ].implementation + elif ( + target_task_desc not in queried_knowledge.success_task_to_knowledge_dict + and target_task_desc not in queried_knowledge.failed_task_info_set + ): + to_be_finished_task_index.append(index) + + # 2. 选择selection方法 + # if the number of factors to be implemented is larger than the limit, we need to select some of them + + if self.settings.select_threshold < len(to_be_finished_task_index): + # Select a fixed number of factors if the total exceeds the threshold + to_be_finished_task_index = self.select_one_round_tasks( + to_be_finished_task_index, evo, self.settings.select_threshold, queried_knowledge, self.scen + ) + + result = multiprocessing_wrapper( + [ + (self.implement_one_task, (evo.sub_tasks[target_index], queried_knowledge)) + for target_index in to_be_finished_task_index + ], + n=RD_AGENT_SETTINGS.multi_proc_n, + ) + code_list = [None for _ in range(len(evo.sub_tasks))] + for index, target_index in enumerate(to_be_finished_task_index): + code_list[target_index] = result[index] + + evo = self.assign_code_list_to_evo(code_list, evo) + evo.corresponding_selection = to_be_finished_task_index + + return evo diff --git a/rdagent/components/coder/factor_coder/CoSTEER/knowledge_management.py b/rdagent/components/coder/CoSTEER/knowledge_management.py similarity index 75% rename from rdagent/components/coder/factor_coder/CoSTEER/knowledge_management.py rename to rdagent/components/coder/CoSTEER/knowledge_management.py index 577d897c..d67215e5 100644 --- a/rdagent/components/coder/factor_coder/CoSTEER/knowledge_management.py +++ b/rdagent/components/coder/CoSTEER/knowledge_management.py @@ -10,15 +10,13 @@ from jinja2 import Environment, StrictUndefined -from rdagent.components.coder.factor_coder.config import FACTOR_IMPLEMENT_SETTINGS -from rdagent.components.coder.factor_coder.CoSTEER.evaluators import ( - FactorSingleFeedback, -) -from rdagent.components.coder.factor_coder.factor import FactorTask +from rdagent.components.coder.CoSTEER.config import CoSTEERSettings +from rdagent.components.coder.CoSTEER.evaluators import CoSTEERSingleFeedback from rdagent.components.knowledge_management.graph import ( UndirectedGraph, UndirectedNode, ) +from rdagent.core.evolving_agent import Feedback from rdagent.core.evolving_framework import ( EvolvableSubjects, EvolvingKnowledgeBase, @@ -27,8 +25,9 @@ QueriedKnowledge, RAGStrategy, ) -from rdagent.core.experiment import Workspace +from rdagent.core.experiment import FBWorkspace from rdagent.core.prompts import Prompts +from rdagent.core.scenario import Task from rdagent.log import rdagent_logger as logger from rdagent.oai.llm_utils import ( APIBackend, @@ -36,66 +35,64 @@ ) -class FactorKnowledge(Knowledge): +class CoSTEERKnowledge(Knowledge): def __init__( self, - target_task: FactorTask, - implementation: Workspace, - feedback: FactorSingleFeedback, + target_task: Task, + implementation: FBWorkspace, + feedback: Feedback, ) -> None: - """ - Initialize a FactorKnowledge object. The FactorKnowledge object is used to store a factor implementation without the ground truth code and value. - - Args: - factor (Factor): The factor object associated with the KnowledgeManagement. - - Returns: - None - """ self.target_task = target_task self.implementation = implementation.copy() self.feedback = feedback def get_implementation_and_feedback_str(self) -> str: - return f"""------------------Factor implementation code:------------------ + return f"""------------------implementation code:------------------ {self.implementation.code} -------------------Factor implementation feedback:------------------ +------------------implementation feedback:------------------ {self.feedback!s} """ -class FactorQueriedKnowledge(QueriedKnowledge): +class CoSTEERQueriedKnowledge(QueriedKnowledge): def __init__(self, success_task_to_knowledge_dict: dict = {}, failed_task_info_set: set = set()) -> None: self.success_task_to_knowledge_dict = success_task_to_knowledge_dict self.failed_task_info_set = failed_task_info_set -class FactorKnowledgeBaseV1(EvolvingKnowledgeBase): +class CoSTEERKnowledgeBaseV1(EvolvingKnowledgeBase): def __init__(self, path: str | Path = None) -> None: - self.implementation_trace: dict[str, FactorKnowledge] = dict() + self.implementation_trace: dict[str, CoSTEERKnowledge] = dict() self.success_task_info_set: set[str] = set() self.task_to_embedding = dict() super().__init__(path) - def query(self) -> QueriedKnowledge | None: + def query(self) -> CoSTEERQueriedKnowledge | None: """ Query the knowledge base to get the queried knowledge. So far is handled in RAG strategy. """ raise NotImplementedError -class FactorQueriedKnowledgeV1(FactorQueriedKnowledge): - def __init__(self) -> None: - self.working_task_to_former_failed_knowledge_dict = dict() - self.working_task_to_similar_successful_knowledge_dict = dict() - super().__init__() +class CoSTEERQueriedKnowledgeV1(CoSTEERQueriedKnowledge): + def __init__( + self, + *args, + task_to_former_failed_traces: dict = {}, + task_to_similar_task_successful_knowledge: dict = {}, + **kwargs, + ) -> None: + self.task_to_former_failed_traces = task_to_former_failed_traces + self.task_to_similar_task_successful_knowledge = task_to_similar_task_successful_knowledge + super().__init__(*args, **kwargs) -class FactorRAGStrategyV1(RAGStrategy): - def __init__(self, knowledgebase: FactorKnowledgeBaseV1) -> None: +class CoSTEERRAGStrategyV1(RAGStrategy): + def __init__(self, knowledgebase: CoSTEERKnowledgeBaseV1, settings: CoSTEERSettings) -> None: super().__init__(knowledgebase) self.current_generated_trace_count = 0 + self.settings = settings def generate_knowledge( self, @@ -103,6 +100,9 @@ def generate_knowledge( *, return_knowledge: bool = False, ) -> Knowledge | None: + raise NotImplementedError( + "This method should be considered as an un-implemented method because we encourage everyone to use v2." + ) if len(evolving_trace) == self.current_generated_trace_count: return else: @@ -120,7 +120,7 @@ def generate_knowledge( single_feedback = feedback[task_index] if single_feedback is None: continue - single_knowledge = FactorKnowledge( + single_knowledge = CoSTEERKnowledge( target_task=target_task, implementation=implementation, feedback=single_feedback, @@ -141,32 +141,35 @@ def query( self, evo: EvolvableSubjects, evolving_trace: list[EvoStep], - ) -> QueriedKnowledge | None: - v1_query_former_trace_limit = FACTOR_IMPLEMENT_SETTINGS.v1_query_former_trace_limit - v1_query_similar_success_limit = FACTOR_IMPLEMENT_SETTINGS.v1_query_similar_success_limit - fail_task_trial_limit = FACTOR_IMPLEMENT_SETTINGS.fail_task_trial_limit - - queried_knowledge = FactorQueriedKnowledgeV1() - for target_factor_task in evo.sub_tasks: - target_factor_task_information = target_factor_task.get_task_information() - if target_factor_task_information in self.knowledgebase.success_task_info_set: - queried_knowledge.success_task_to_knowledge_dict[target_factor_task_information] = ( - self.knowledgebase.implementation_trace[target_factor_task_information][-1] + ) -> CoSTEERQueriedKnowledge | None: + raise NotImplementedError( + "This method should be considered as an un-implemented method because we encourage everyone to use v2." + ) + v1_query_former_trace_limit = self.settings.v1_query_former_trace_limit + v1_query_similar_success_limit = self.settings.v1_query_similar_success_limit + fail_task_trial_limit = self.settings.fail_task_trial_limit + + queried_knowledge = CoSTEERQueriedKnowledgeV1() + for target_task in evo.sub_tasks: + target_task_information = target_task.get_task_information() + if target_task_information in self.knowledgebase.success_task_info_set: + queried_knowledge.success_task_to_knowledge_dict[target_task_information] = ( + self.knowledgebase.implementation_trace[target_task_information][-1] ) elif ( len( self.knowledgebase.implementation_trace.setdefault( - target_factor_task_information, + target_task_information, [], ), ) >= fail_task_trial_limit ): - queried_knowledge.failed_task_info_set.add(target_factor_task_information) + queried_knowledge.failed_task_info_set.add(target_task_information) else: - queried_knowledge.working_task_to_former_failed_knowledge_dict[target_factor_task_information] = ( + queried_knowledge.task_to_former_failed_traces[target_task_information] = ( self.knowledgebase.implementation_trace.setdefault( - target_factor_task_information, + target_task_information, [], )[-v1_query_former_trace_limit:] ) @@ -175,7 +178,7 @@ def query( self.knowledgebase.success_task_info_set, ) similarity = calculate_embedding_distance_between_str_list( - [target_factor_task_information], + [target_task_information], knowledge_base_success_task_list, )[0] similar_indexes = sorted( @@ -190,33 +193,36 @@ def query( )[-1] for index in similar_indexes ] - queried_knowledge.working_task_to_similar_successful_knowledge_dict[target_factor_task_information] = ( + queried_knowledge.task_to_similar_task_successful_knowledge[target_task_information] = ( similar_successful_knowledge ) return queried_knowledge -class FactorQueriedGraphKnowledge(FactorQueriedKnowledge): +class CoSTEERQueriedKnowledgeV2(CoSTEERQueriedKnowledgeV1): # Aggregation of knowledge def __init__( self, - former_traces: dict = {}, - component_with_success_task: dict = {}, - error_with_success_task: dict = {}, + task_to_former_failed_traces: dict = {}, + task_to_similar_task_successful_knowledge: dict = {}, + task_to_similar_error_successful_knowledge: dict = {}, **kwargs, ) -> None: - self.former_traces = former_traces - self.component_with_success_task = component_with_success_task - self.error_with_success_task = error_with_success_task - super().__init__(**kwargs) + self.task_to_similar_error_successful_knowledge = task_to_similar_error_successful_knowledge + super().__init__( + task_to_former_failed_traces=task_to_former_failed_traces, + task_to_similar_task_successful_knowledge=task_to_similar_task_successful_knowledge, + **kwargs, + ) -class FactorGraphRAGStrategy(RAGStrategy): - prompt = Prompts(file_path=Path(__file__).parent.parent / "prompts.yaml") +class CoSTEERRAGStrategyV2(RAGStrategy): + prompt = Prompts(file_path=Path(__file__).parent / "prompts.yaml") - def __init__(self, knowledgebase: FactorGraphKnowledgeBase) -> None: + def __init__(self, knowledgebase: CoSTEERKnowledgeBaseV2, settings: CoSTEERSettings) -> None: super().__init__(knowledgebase) self.current_generated_trace_count = 0 + self.settings = settings def generate_knowledge( self, @@ -233,14 +239,13 @@ def generate_knowledge( implementations = evo_step.evolvable_subjects feedback = evo_step.feedback for task_index in range(len(implementations.sub_tasks)): - single_feedback = feedback[task_index] target_task = implementations.sub_tasks[task_index] target_task_information = target_task.get_task_information() implementation = implementations.sub_workspace_list[task_index] - single_feedback = feedback[task_index] - if single_feedback is None: + single_feedback: CoSTEERSingleFeedback = feedback[task_index] + if implementation is None or single_feedback is None: continue - single_knowledge = FactorKnowledge( + single_knowledge = CoSTEERKnowledge( target_task=target_task, implementation=implementation, feedback=single_feedback, @@ -271,7 +276,7 @@ def generate_knowledge( ) else: error_analysis_result = self.analyze_error( - single_feedback.factor_value_feedback, + single_feedback.value_feedback, feedback_type="value", ) self.knowledgebase.working_trace_error_analysis.setdefault( @@ -284,35 +289,35 @@ def generate_knowledge( self.current_generated_trace_count = len(evolving_trace) return None - def query(self, evo: EvolvableSubjects, evolving_trace: list[EvoStep]) -> QueriedKnowledge | None: - conf_knowledge_sampler = FACTOR_IMPLEMENT_SETTINGS.v2_knowledge_sampler - factor_implementation_queried_graph_knowledge = FactorQueriedGraphKnowledge( + def query(self, evo: EvolvableSubjects, evolving_trace: list[EvoStep]) -> CoSTEERQueriedKnowledge | None: + conf_knowledge_sampler = self.settings.v2_knowledge_sampler + queried_knowledge_v2 = CoSTEERQueriedKnowledgeV2( success_task_to_knowledge_dict=self.knowledgebase.success_task_to_knowledge_dict, ) - factor_implementation_queried_graph_knowledge = self.former_trace_query( + queried_knowledge_v2 = self.former_trace_query( evo, - factor_implementation_queried_graph_knowledge, - FACTOR_IMPLEMENT_SETTINGS.v2_query_former_trace_limit, - FACTOR_IMPLEMENT_SETTINGS.v2_add_fail_attempt_to_latest_successful_execution, + queried_knowledge_v2, + self.settings.v2_query_former_trace_limit, + self.settings.v2_add_fail_attempt_to_latest_successful_execution, ) - factor_implementation_queried_graph_knowledge = self.component_query( + queried_knowledge_v2 = self.component_query( evo, - factor_implementation_queried_graph_knowledge, - FACTOR_IMPLEMENT_SETTINGS.v2_query_component_limit, + queried_knowledge_v2, + self.settings.v2_query_component_limit, knowledge_sampler=conf_knowledge_sampler, ) - factor_implementation_queried_graph_knowledge = self.error_query( + queried_knowledge_v2 = self.error_query( evo, - factor_implementation_queried_graph_knowledge, - FACTOR_IMPLEMENT_SETTINGS.v2_query_error_limit, + queried_knowledge_v2, + self.settings.v2_query_error_limit, knowledge_sampler=conf_knowledge_sampler, ) - return factor_implementation_queried_graph_knowledge + return queried_knowledge_v2 def analyze_component( self, - target_factor_task_information, + target_task_information, ) -> list[UndirectedNode]: # Hardcode: certain component nodes all_component_nodes = self.knowledgebase.graph.get_all_nodes_by_label_list(["component"]) if not len(all_component_nodes): @@ -328,7 +333,7 @@ def analyze_component( ) ) - analyze_component_user_prompt = target_factor_task_information + analyze_component_user_prompt = target_task_information try: component_no_list = json.loads( APIBackend().build_messages_and_create_chat_completion( @@ -389,33 +394,31 @@ def analyze_error( def former_trace_query( self, evo: EvolvableSubjects, - factor_implementation_queried_graph_knowledge: FactorQueriedGraphKnowledge, + queried_knowledge_v2: CoSTEERQueriedKnowledgeV2, v2_query_former_trace_limit: int = 5, v2_add_fail_attempt_to_latest_successful_execution: bool = False, - ) -> Union[QueriedKnowledge, set]: + ) -> Union[CoSTEERQueriedKnowledge, set]: """ Query the former trace knowledge of the working trace, and find all the failed task information which tried more than fail_task_trial_limit times """ - fail_task_trial_limit = FACTOR_IMPLEMENT_SETTINGS.fail_task_trial_limit + fail_task_trial_limit = self.settings.fail_task_trial_limit - for target_factor_task in evo.sub_tasks: - target_factor_task_information = target_factor_task.get_task_information() + for target_task in evo.sub_tasks: + target_task_information = target_task.get_task_information() if ( - target_factor_task_information not in self.knowledgebase.success_task_to_knowledge_dict - and target_factor_task_information in self.knowledgebase.working_trace_knowledge - and len(self.knowledgebase.working_trace_knowledge[target_factor_task_information]) - >= fail_task_trial_limit + target_task_information not in self.knowledgebase.success_task_to_knowledge_dict + and target_task_information in self.knowledgebase.working_trace_knowledge + and len(self.knowledgebase.working_trace_knowledge[target_task_information]) >= fail_task_trial_limit ): - factor_implementation_queried_graph_knowledge.failed_task_info_set.add(target_factor_task_information) + queried_knowledge_v2.failed_task_info_set.add(target_task_information) if ( - target_factor_task_information not in self.knowledgebase.success_task_to_knowledge_dict - and target_factor_task_information - not in factor_implementation_queried_graph_knowledge.failed_task_info_set - and target_factor_task_information in self.knowledgebase.working_trace_knowledge + target_task_information not in self.knowledgebase.success_task_to_knowledge_dict + and target_task_information not in queried_knowledge_v2.failed_task_info_set + and target_task_information in self.knowledgebase.working_trace_knowledge ): former_trace_knowledge = copy.copy( - self.knowledgebase.working_trace_knowledge[target_factor_task_information], + self.knowledgebase.working_trace_knowledge[target_task_information], ) # in former trace query we will delete the right trace in the following order:[..., value_generated_flag is True, value_generated_flag is False, ...] # because we think this order means a deterioration of the trial (like a wrong gradient descent) @@ -434,47 +437,44 @@ def former_trace_query( # When the last successful execution is not the last one in the working trace, it means we have tried to correct it. We should tell the agent this fail trial to avoid endless loop in the future. if ( len(former_trace_knowledge) > 0 - and len(self.knowledgebase.working_trace_knowledge[target_factor_task_information]) > 1 - and self.knowledgebase.working_trace_knowledge[target_factor_task_information].index( + and len(self.knowledgebase.working_trace_knowledge[target_task_information]) > 1 + and self.knowledgebase.working_trace_knowledge[target_task_information].index( former_trace_knowledge[-1] ) - < len(self.knowledgebase.working_trace_knowledge[target_factor_task_information]) - 1 + < len(self.knowledgebase.working_trace_knowledge[target_task_information]) - 1 ): - latest_attempt = self.knowledgebase.working_trace_knowledge[target_factor_task_information][-1] + latest_attempt = self.knowledgebase.working_trace_knowledge[target_task_information][-1] - factor_implementation_queried_graph_knowledge.former_traces[target_factor_task_information] = ( + queried_knowledge_v2.task_to_former_failed_traces[target_task_information] = ( former_trace_knowledge[-v2_query_former_trace_limit:], latest_attempt, ) else: - factor_implementation_queried_graph_knowledge.former_traces[target_factor_task_information] = ([], None) + queried_knowledge_v2.task_to_former_failed_traces[target_task_information] = ([], None) - return factor_implementation_queried_graph_knowledge + return queried_knowledge_v2 def component_query( self, evo: EvolvableSubjects, - factor_implementation_queried_graph_knowledge: FactorQueriedGraphKnowledge, + queried_knowledge_v2: CoSTEERQueriedKnowledgeV2, v2_query_component_limit: int = 5, knowledge_sampler: float = 1.0, - ) -> QueriedKnowledge | None: - # queried_component_knowledge = FactorQueriedGraphComponentKnowledge() - for target_factor_task in evo.sub_tasks: - target_factor_task_information = target_factor_task.get_task_information() + ) -> CoSTEERQueriedKnowledge | None: + for target_task in evo.sub_tasks: + target_task_information = target_task.get_task_information() if ( - target_factor_task_information in self.knowledgebase.success_task_to_knowledge_dict - or target_factor_task_information in factor_implementation_queried_graph_knowledge.failed_task_info_set + target_task_information in self.knowledgebase.success_task_to_knowledge_dict + or target_task_information in queried_knowledge_v2.failed_task_info_set ): - factor_implementation_queried_graph_knowledge.component_with_success_task[ - target_factor_task_information - ] = [] + queried_knowledge_v2.task_to_similar_task_successful_knowledge[target_task_information] = [] else: - if target_factor_task_information not in self.knowledgebase.task_to_component_nodes: - self.knowledgebase.task_to_component_nodes[target_factor_task_information] = self.analyze_component( - target_factor_task_information, + if target_task_information not in self.knowledgebase.task_to_component_nodes: + self.knowledgebase.task_to_component_nodes[target_task_information] = self.analyze_component( + target_task_information, ) - component_analysis_result = self.knowledgebase.task_to_component_nodes[target_factor_task_information] + component_analysis_result = self.knowledgebase.task_to_component_nodes[target_task_information] if len(component_analysis_result) > 1: task_des_node_list = self.knowledgebase.graph_query_by_intersection( @@ -485,9 +485,7 @@ def component_query( else: task_des_node_list = [] single_component_constraint = v2_query_component_limit - factor_implementation_queried_graph_knowledge.component_with_success_task[ - target_factor_task_information - ] = [] + queried_knowledge_v2.task_to_similar_task_successful_knowledge[target_task_information] = [] for component_node in component_analysis_result: # Reverse iterate, a trade-off with intersection search count = 0 @@ -518,19 +516,19 @@ def component_query( ] if ( target_knowledge - not in factor_implementation_queried_graph_knowledge.component_with_success_task[ - target_factor_task_information + not in queried_knowledge_v2.task_to_similar_task_successful_knowledge[ + target_task_information ] ): - factor_implementation_queried_graph_knowledge.component_with_success_task[ - target_factor_task_information + queried_knowledge_v2.task_to_similar_task_successful_knowledge[ + target_task_information ].append(target_knowledge) # finally add embedding related knowledge knowledge_base_success_task_list = list(self.knowledgebase.success_task_to_knowledge_dict) similarity = calculate_embedding_distance_between_str_list( - [target_factor_task_information], + [target_task_information], knowledge_base_success_task_list, )[0] similar_indexes = sorted( @@ -545,28 +543,24 @@ def component_query( for knowledge in embedding_similar_successful_knowledge: if ( knowledge - not in factor_implementation_queried_graph_knowledge.component_with_success_task[ - target_factor_task_information - ] + not in queried_knowledge_v2.task_to_similar_task_successful_knowledge[target_task_information] ): - factor_implementation_queried_graph_knowledge.component_with_success_task[ - target_factor_task_information - ].append(knowledge) + queried_knowledge_v2.task_to_similar_task_successful_knowledge[target_task_information].append( + knowledge + ) if knowledge_sampler > 0: - factor_implementation_queried_graph_knowledge.component_with_success_task[ - target_factor_task_information - ] = [ + queried_knowledge_v2.task_to_similar_task_successful_knowledge[target_task_information] = [ knowledge - for knowledge in factor_implementation_queried_graph_knowledge.component_with_success_task[ - target_factor_task_information + for knowledge in queried_knowledge_v2.task_to_similar_task_successful_knowledge[ + target_task_information ] if random.uniform(0, 1) <= knowledge_sampler ] # Make sure no less than half of the knowledge are from GT - queried_knowledge_list = factor_implementation_queried_graph_knowledge.component_with_success_task[ - target_factor_task_information + queried_knowledge_list = queried_knowledge_v2.task_to_similar_task_successful_knowledge[ + target_task_information ] queried_from_gt_knowledge_list = [ knowledge @@ -582,51 +576,43 @@ def component_query( min((v2_query_component_limit // 2 + 1), len(queried_from_gt_knowledge_list)), v2_query_component_limit - len(queried_without_gt_knowledge_list), ) - factor_implementation_queried_graph_knowledge.component_with_success_task[ - target_factor_task_information - ] = ( + queried_knowledge_v2.task_to_similar_task_successful_knowledge[target_task_information] = ( queried_from_gt_knowledge_list[:queried_from_gt_knowledge_count] + queried_without_gt_knowledge_list[: v2_query_component_limit - queried_from_gt_knowledge_count] ) - return factor_implementation_queried_graph_knowledge + return queried_knowledge_v2 def error_query( self, evo: EvolvableSubjects, - factor_implementation_queried_graph_knowledge: FactorQueriedGraphKnowledge, + queried_knowledge_v2: CoSTEERQueriedKnowledgeV2, v2_query_error_limit: int = 5, knowledge_sampler: float = 1.0, - ) -> QueriedKnowledge | None: - # queried_error_knowledge = FactorQueriedGraphErrorKnowledge() - for task_index, target_factor_task in enumerate(evo.sub_tasks): - target_factor_task_information = target_factor_task.get_task_information() - factor_implementation_queried_graph_knowledge.error_with_success_task[target_factor_task_information] = {} + ) -> CoSTEERQueriedKnowledge | None: + for task_index, target_task in enumerate(evo.sub_tasks): + target_task_information = target_task.get_task_information() + queried_knowledge_v2.task_to_similar_error_successful_knowledge[target_task_information] = [] if ( - target_factor_task_information in self.knowledgebase.success_task_to_knowledge_dict - or target_factor_task_information in factor_implementation_queried_graph_knowledge.failed_task_info_set + target_task_information in self.knowledgebase.success_task_to_knowledge_dict + or target_task_information in queried_knowledge_v2.failed_task_info_set ): - factor_implementation_queried_graph_knowledge.error_with_success_task[ - target_factor_task_information - ] = [] + queried_knowledge_v2.task_to_similar_error_successful_knowledge[target_task_information] = [] else: - factor_implementation_queried_graph_knowledge.error_with_success_task[ - target_factor_task_information - ] = [] + queried_knowledge_v2.task_to_similar_error_successful_knowledge[target_task_information] = [] if ( - target_factor_task_information in self.knowledgebase.working_trace_error_analysis - and len(self.knowledgebase.working_trace_error_analysis[target_factor_task_information]) > 0 - and len(factor_implementation_queried_graph_knowledge.former_traces[target_factor_task_information]) - > 0 + target_task_information in self.knowledgebase.working_trace_error_analysis + and len(self.knowledgebase.working_trace_error_analysis[target_task_information]) > 0 + and len(queried_knowledge_v2.task_to_former_failed_traces[target_task_information]) > 0 ): - queried_last_trace = factor_implementation_queried_graph_knowledge.former_traces[ - target_factor_task_information - ][0][-1] - target_index = self.knowledgebase.working_trace_knowledge[target_factor_task_information].index( + queried_last_trace = queried_knowledge_v2.task_to_former_failed_traces[target_task_information][0][ + -1 + ] + target_index = self.knowledgebase.working_trace_knowledge[target_task_information].index( queried_last_trace, ) last_knowledge_error_analysis_result = self.knowledgebase.working_trace_error_analysis[ - target_factor_task_information + target_task_information ][target_index] else: last_knowledge_error_analysis_result = [] @@ -719,14 +705,14 @@ def error_query( ] same_error_success_knowledge_pair_list = same_error_success_knowledge_pair_list[:v2_query_error_limit] - factor_implementation_queried_graph_knowledge.error_with_success_task[ - target_factor_task_information - ] = same_error_success_knowledge_pair_list + queried_knowledge_v2.task_to_similar_error_successful_knowledge[target_task_information] = ( + same_error_success_knowledge_pair_list + ) - return factor_implementation_queried_graph_knowledge + return queried_knowledge_v2 -class FactorGraphKnowledgeBase(EvolvingKnowledgeBase): +class CoSTEERKnowledgeBaseV2(EvolvingKnowledgeBase): def __init__(self, init_component_list=None, path: str | Path = None) -> None: """ Load knowledge, offer brief information of knowledge and common handle interfaces @@ -749,7 +735,7 @@ def __init__(self, init_component_list=None, path: str | Path = None) -> None: # Add already success task self.success_task_to_knowledge_dict = {} - # key:node_id(for task trace and success implement), value:knowledge instance(aka 'FactorKnowledge') + # key:node_id(for task trace and success implement), value:knowledge instance(aka 'CoSTEERKnowledge') self.node_to_implementation_knowledge_dict = {} # store the task description to component nodes diff --git a/rdagent/components/coder/CoSTEER/prompts.yaml b/rdagent/components/coder/CoSTEER/prompts.yaml new file mode 100644 index 00000000..78f4b627 --- /dev/null +++ b/rdagent/components/coder/CoSTEER/prompts.yaml @@ -0,0 +1,10 @@ + +analyze_component_prompt_v1_system: |- + User is getting a new task that might consist of the components below (given in component_index: component_description): + {{all_component_content}} + + You should find out what components does the new task have, and put their indices in a list. + Please response the critic in the json format. Here is an example structure for the JSON output, please strictly follow the format: + { + "component_no_list": the list containing indices of components. + } \ No newline at end of file diff --git a/rdagent/components/coder/CoSTEER/scheduler.py b/rdagent/components/coder/CoSTEER/scheduler.py new file mode 100644 index 00000000..a9b18aa3 --- /dev/null +++ b/rdagent/components/coder/CoSTEER/scheduler.py @@ -0,0 +1,25 @@ +import random + +from rdagent.components.coder.CoSTEER.evolvable_subjects import EvolvingItem +from rdagent.components.coder.CoSTEER.knowledge_management import ( + CoSTEERQueriedKnowledge, +) +from rdagent.core.evaluation import Scenario +from rdagent.log import rdagent_logger as logger + + +def random_select( + to_be_finished_task_index: list, + evo: EvolvingItem, + selected_num: int, + queried_knowledge: CoSTEERQueriedKnowledge, + scen: Scenario, +): + + to_be_finished_task_index = random.sample( + to_be_finished_task_index, + selected_num, + ) + + logger.info(f"The random selection is: {to_be_finished_task_index}") + return to_be_finished_task_index diff --git a/rdagent/components/coder/CoSTEER/task.py b/rdagent/components/coder/CoSTEER/task.py new file mode 100644 index 00000000..aaa38a4f --- /dev/null +++ b/rdagent/components/coder/CoSTEER/task.py @@ -0,0 +1,7 @@ +from rdagent.core.experiment import Task + + +class CoSTEERTask(Task): + def __init__(self, base_code: str = None, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) + self.base_code = base_code diff --git a/rdagent/components/coder/data_science/ensemble/__init__.py b/rdagent/components/coder/data_science/ensemble/__init__.py new file mode 100644 index 00000000..40d21d3e --- /dev/null +++ b/rdagent/components/coder/data_science/ensemble/__init__.py @@ -0,0 +1,19 @@ +# from rdagent.components.coder.CoSTEER import CoSTEER +# from rdagent.components.coder.CoSTEER.config import CoSTEER_SETTINGS +# from rdagent.components.coder.CoSTEER.evaluators import CoSTEERMultiEvaluator +# from rdagent.core.scenario import Scenario + + +# class ModelEnsembleCoSTEER(CoSTEER): +# def __init__( +# self, +# scen: Scenario, +# *args, +# **kwargs, +# ) -> None: +# eva = CoSTEERMultiEvaluator( +# ModelEnsembleCoSTEEREvaluator(scen=scen), scen=scen +# ) # Please specify whether you agree running your eva in parallel or not +# es = ModelEnsembleMultiProcessEvolvingStrategy(scen=scen, settings=CoSTEER_SETTINGS) + +# super().__init__(*args, settings=CoSTEER_SETTINGS, eva=eva, es=es, evolving_version=1, scen=scen, **kwargs) diff --git a/rdagent/components/coder/data_science/feature_process/__init__.py b/rdagent/components/coder/data_science/feature_process/__init__.py new file mode 100644 index 00000000..68a1ee6b --- /dev/null +++ b/rdagent/components/coder/data_science/feature_process/__init__.py @@ -0,0 +1,19 @@ +# from rdagent.components.coder.CoSTEER import CoSTEER +# from rdagent.components.coder.CoSTEER.config import CoSTEER_SETTINGS +# from rdagent.components.coder.CoSTEER.evaluators import CoSTEERMultiEvaluator +# from rdagent.core.scenario import Scenario + + +# class FeatureCoSTEER(CoSTEER): +# def __init__( +# self, +# scen: Scenario, +# *args, +# **kwargs, +# ) -> None: +# eva = CoSTEERMultiEvaluator( +# FeatureCoSTEEREvaluator(scen=scen), scen=scen +# ) # Please specify whether you agree running your eva in parallel or not +# es = FeatureMultiProcessEvolvingStrategy(scen=scen, settings=CoSTEER_SETTINGS) + +# super().__init__(*args, settings=CoSTEER_SETTINGS, eva=eva, es=es, evolving_version=1, scen=scen, **kwargs) diff --git a/rdagent/components/coder/data_science/model/__init__.py b/rdagent/components/coder/data_science/model/__init__.py new file mode 100644 index 00000000..7d4020cf --- /dev/null +++ b/rdagent/components/coder/data_science/model/__init__.py @@ -0,0 +1,19 @@ +# from rdagent.components.coder.CoSTEER import CoSTEER +# from rdagent.components.coder.CoSTEER.config import CoSTEER_SETTINGS +# from rdagent.components.coder.CoSTEER.evaluators import CoSTEERMultiEvaluator +# from rdagent.core.scenario import Scenario + + +# class ModelCoSTEER(CoSTEER): +# def __init__( +# self, +# scen: Scenario, +# *args, +# **kwargs, +# ) -> None: +# eva = CoSTEERMultiEvaluator( +# ModelCoSTEEREvaluator(scen=scen), scen=scen +# ) # Please specify whether you agree running your eva in parallel or not +# es = ModelMultiProcessEvolvingStrategy(scen=scen, settings=CoSTEER_SETTINGS) + +# super().__init__(*args, settings=CoSTEER_SETTINGS, eva=eva, es=es, evolving_version=1, scen=scen, **kwargs) diff --git a/rdagent/components/coder/data_science/raw_data_loader/__init__.py b/rdagent/components/coder/data_science/raw_data_loader/__init__.py new file mode 100644 index 00000000..22ed405d --- /dev/null +++ b/rdagent/components/coder/data_science/raw_data_loader/__init__.py @@ -0,0 +1,19 @@ +# from rdagent.components.coder.CoSTEER import CoSTEER +# from rdagent.components.coder.CoSTEER.config import CoSTEER_SETTINGS +# from rdagent.components.coder.CoSTEER.evaluators import CoSTEERMultiEvaluator +# from rdagent.core.scenario import Scenario + + +# class DataLoaderCoSTEER(CoSTEER): +# def __init__( +# self, +# scen: Scenario, +# *args, +# **kwargs, +# ) -> None: +# eva = CoSTEERMultiEvaluator( +# DataLoaderCoSTEEREvaluator(scen=scen), scen=scen +# ) # Please specify whether you agree running your eva in parallel or not +# es = DataLoaderMultiProcessEvolvingStrategy(scen=scen, settings=CoSTEER_SETTINGS) + +# super().__init__(*args, settings=CoSTEER_SETTINGS, eva=eva, es=es, evolving_version=1, scen=scen, **kwargs) diff --git a/rdagent/components/coder/data_science/workflow/__init__.py b/rdagent/components/coder/data_science/workflow/__init__.py new file mode 100644 index 00000000..90961567 --- /dev/null +++ b/rdagent/components/coder/data_science/workflow/__init__.py @@ -0,0 +1,19 @@ +# from rdagent.components.coder.CoSTEER import CoSTEER +# from rdagent.components.coder.CoSTEER.config import CoSTEER_SETTINGS +# from rdagent.components.coder.CoSTEER.evaluators import CoSTEERMultiEvaluator +# from rdagent.core.scenario import Scenario + + +# class WorkflowCoSTEER(CoSTEER): +# def __init__( +# self, +# scen: Scenario, +# *args, +# **kwargs, +# ) -> None: +# eva = CoSTEERMultiEvaluator( +# WorkflowCoSTEEREvaluator(scen=scen), scen=scen +# ) # Please specify whether you agree running your eva in parallel or not +# es = WorkflowMultiProcessEvolvingStrategy(scen=scen, settings=CoSTEER_SETTINGS) + +# super().__init__(*args, settings=CoSTEER_SETTINGS, eva=eva, es=es, evolving_version=1, scen=scen, **kwargs) diff --git a/rdagent/components/coder/factor_coder/CoSTEER/__init__.py b/rdagent/components/coder/factor_coder/CoSTEER/__init__.py deleted file mode 100644 index f2edc1d3..00000000 --- a/rdagent/components/coder/factor_coder/CoSTEER/__init__.py +++ /dev/null @@ -1,113 +0,0 @@ -import pickle -from pathlib import Path - -from rdagent.components.coder.factor_coder.config import FACTOR_IMPLEMENT_SETTINGS -from rdagent.components.coder.factor_coder.CoSTEER.evaluators import ( - FactorEvaluatorForCoder, - FactorMultiEvaluator, -) -from rdagent.components.coder.factor_coder.CoSTEER.evolvable_subjects import ( - FactorEvolvingItem, -) -from rdagent.components.coder.factor_coder.CoSTEER.evolving_agent import ( - FactorRAGEvoAgent, -) -from rdagent.components.coder.factor_coder.CoSTEER.evolving_strategy import ( - FactorEvolvingStrategyWithGraph, -) -from rdagent.components.coder.factor_coder.CoSTEER.knowledge_management import ( - FactorGraphKnowledgeBase, - FactorGraphRAGStrategy, - FactorKnowledgeBaseV1, -) -from rdagent.components.coder.factor_coder.factor import FactorExperiment -from rdagent.core.developer import Developer -from rdagent.core.evolving_agent import RAGEvoAgent -from rdagent.core.scenario import Scenario -from rdagent.log import rdagent_logger as logger - - -class FactorCoSTEER(Developer[FactorExperiment]): - def __init__( - self, - *args, - with_knowledge: bool = True, - with_feedback: bool = True, - knowledge_self_gen: bool = True, - filter_final_evo: bool = True, - **kwargs, - ) -> None: - super().__init__(*args, **kwargs) - self.max_loop = FACTOR_IMPLEMENT_SETTINGS.max_loop - self.knowledge_base_path = ( - Path(FACTOR_IMPLEMENT_SETTINGS.knowledge_base_path) - if FACTOR_IMPLEMENT_SETTINGS.knowledge_base_path is not None - else None - ) - self.new_knowledge_base_path = ( - Path(FACTOR_IMPLEMENT_SETTINGS.new_knowledge_base_path) - if FACTOR_IMPLEMENT_SETTINGS.new_knowledge_base_path is not None - else None - ) - self.with_knowledge = with_knowledge - self.with_feedback = with_feedback - self.knowledge_self_gen = knowledge_self_gen - self.filter_final_evo = filter_final_evo - self.evolving_strategy = FactorEvolvingStrategyWithGraph(scen=self.scen) - # declare the factor evaluator - self.factor_evaluator = FactorMultiEvaluator(FactorEvaluatorForCoder(scen=self.scen), scen=self.scen) - self.evolving_version = 2 - - def load_or_init_knowledge_base(self, former_knowledge_base_path: Path = None, component_init_list: list = []): - if former_knowledge_base_path is not None and former_knowledge_base_path.exists(): - factor_knowledge_base = pickle.load(open(former_knowledge_base_path, "rb")) - if self.evolving_version == 1 and not isinstance(factor_knowledge_base, FactorKnowledgeBaseV1): - raise ValueError("The former knowledge base is not compatible with the current version") - elif self.evolving_version == 2 and not isinstance( - factor_knowledge_base, - FactorGraphKnowledgeBase, - ): - raise ValueError("The former knowledge base is not compatible with the current version") - else: - factor_knowledge_base = ( - FactorGraphKnowledgeBase( - init_component_list=component_init_list, - ) - if self.evolving_version == 2 - else FactorKnowledgeBaseV1() - ) - return factor_knowledge_base - - def develop(self, exp: FactorExperiment) -> FactorExperiment: - # init knowledge base - factor_knowledge_base = self.load_or_init_knowledge_base( - former_knowledge_base_path=self.knowledge_base_path, - component_init_list=[], - ) - # init rag method - self.rag = FactorGraphRAGStrategy(factor_knowledge_base) - - # init intermediate items - factor_experiment = FactorEvolvingItem.from_experiment(exp) - - self.evolve_agent = FactorRAGEvoAgent( - max_loop=self.max_loop, - evolving_strategy=self.evolving_strategy, - rag=self.rag, - with_knowledge=self.with_knowledge, - with_feedback=self.with_feedback, - knowledge_self_gen=self.knowledge_self_gen, - ) - - factor_experiment = self.evolve_agent.multistep_evolve( - factor_experiment, - self.factor_evaluator, - filter_final_evo=self.filter_final_evo, - ) - - # save new knowledge base - if self.new_knowledge_base_path is not None: - pickle.dump(factor_knowledge_base, open(self.new_knowledge_base_path, "wb")) - logger.info(f"New knowledge base saved to {self.new_knowledge_base_path}") - exp.sub_workspace_list = factor_experiment.sub_workspace_list - return exp diff --git a/rdagent/components/coder/factor_coder/CoSTEER/evolving_agent.py b/rdagent/components/coder/factor_coder/CoSTEER/evolving_agent.py deleted file mode 100644 index 75661805..00000000 --- a/rdagent/components/coder/factor_coder/CoSTEER/evolving_agent.py +++ /dev/null @@ -1,19 +0,0 @@ -from rdagent.components.coder.factor_coder.CoSTEER.evaluators import FactorMultiFeedback -from rdagent.components.coder.factor_coder.CoSTEER.evolvable_subjects import ( - FactorEvolvingItem, -) -from rdagent.core.evaluation import Feedback -from rdagent.core.evolving_agent import RAGEvoAgent -from rdagent.core.evolving_framework import EvolvableSubjects - - -class FactorRAGEvoAgent(RAGEvoAgent): - def filter_evolvable_subjects_by_feedback(self, evo: EvolvableSubjects, feedback: Feedback) -> EvolvableSubjects: - assert isinstance(evo, FactorEvolvingItem) - assert isinstance(feedback, list) - assert len(evo.sub_workspace_list) == len(feedback) - - for index in range(len(evo.sub_workspace_list)): - if feedback[index] and not feedback[index].final_decision: - evo.sub_workspace_list[index].clear() - return evo diff --git a/rdagent/components/coder/factor_coder/CoSTEER/evolving_strategy.py b/rdagent/components/coder/factor_coder/CoSTEER/evolving_strategy.py deleted file mode 100644 index 6ef51e8c..00000000 --- a/rdagent/components/coder/factor_coder/CoSTEER/evolving_strategy.py +++ /dev/null @@ -1,331 +0,0 @@ -from __future__ import annotations - -import json -from abc import abstractmethod -from copy import deepcopy -from pathlib import Path -from typing import TYPE_CHECKING - -from jinja2 import Environment, StrictUndefined - -from rdagent.components.coder.factor_coder.config import FACTOR_IMPLEMENT_SETTINGS -from rdagent.components.coder.factor_coder.CoSTEER.evolvable_subjects import ( - FactorEvolvingItem, -) -from rdagent.components.coder.factor_coder.CoSTEER.scheduler import ( - LLMSelect, - RandomSelect, -) -from rdagent.components.coder.factor_coder.factor import FactorFBWorkspace, FactorTask -from rdagent.core.conf import RD_AGENT_SETTINGS -from rdagent.core.evolving_framework import EvolvingStrategy, QueriedKnowledge -from rdagent.core.experiment import Workspace -from rdagent.core.prompts import Prompts -from rdagent.core.utils import multiprocessing_wrapper -from rdagent.oai.llm_conf import LLM_SETTINGS -from rdagent.oai.llm_utils import APIBackend - -if TYPE_CHECKING: - from rdagent.components.coder.factor_coder.CoSTEER.knowledge_management import ( - FactorQueriedKnowledge, - FactorQueriedKnowledgeV1, - ) - -implement_prompts = Prompts(file_path=Path(__file__).parent.parent / "prompts.yaml") - - -class MultiProcessEvolvingStrategy(EvolvingStrategy): - @abstractmethod - def implement_one_factor( - self, - target_task: FactorTask, - queried_knowledge: QueriedKnowledge = None, - ) -> Workspace: - raise NotImplementedError - - def evolve( - self, - *, - evo: FactorEvolvingItem, - queried_knowledge: FactorQueriedKnowledge | None = None, - **kwargs, - ) -> FactorEvolvingItem: - # 1.找出需要evolve的factor - to_be_finished_task_index = [] - for index, target_factor_task in enumerate(evo.sub_tasks): - target_factor_task_desc = target_factor_task.get_task_information() - if target_factor_task_desc in queried_knowledge.success_task_to_knowledge_dict: - evo.sub_workspace_list[index] = queried_knowledge.success_task_to_knowledge_dict[ - target_factor_task_desc - ].implementation - elif ( - target_factor_task_desc not in queried_knowledge.success_task_to_knowledge_dict - and target_factor_task_desc not in queried_knowledge.failed_task_info_set - ): - to_be_finished_task_index.append(index) - - # 2. 选择selection方法 - # if the number of factors to be implemented is larger than the limit, we need to select some of them - - if FACTOR_IMPLEMENT_SETTINGS.select_threshold < len(to_be_finished_task_index): - # Select a fixed number of factors if the total exceeds the threshold - if FACTOR_IMPLEMENT_SETTINGS.select_method == "random": - to_be_finished_task_index = RandomSelect( - to_be_finished_task_index, - FACTOR_IMPLEMENT_SETTINGS.select_threshold, - ) - - if FACTOR_IMPLEMENT_SETTINGS.select_method == "scheduler": - to_be_finished_task_index = LLMSelect( - to_be_finished_task_index, - FACTOR_IMPLEMENT_SETTINGS.select_threshold, - evo, - queried_knowledge.former_traces, - self.scen, - ) - - result = multiprocessing_wrapper( - [ - (self.implement_one_factor, (evo.sub_tasks[target_index], queried_knowledge)) - for target_index in to_be_finished_task_index - ], - n=RD_AGENT_SETTINGS.multi_proc_n, - ) - from rdagent.components.coder.factor_coder.factor import FactorFBWorkspace - - for index, target_index in enumerate(to_be_finished_task_index): - if evo.sub_workspace_list[target_index] is None: - evo.sub_workspace_list[target_index] = FactorFBWorkspace(target_task=evo.sub_tasks[target_index]) - evo.sub_workspace_list[target_index].inject_code(**{"factor.py": result[index]}) - - evo.corresponding_selection = to_be_finished_task_index - - return evo - - -class FactorEvolvingStrategy(MultiProcessEvolvingStrategy): - def implement_one_factor( - self, - target_task: FactorTask, - queried_knowledge: FactorQueriedKnowledgeV1 = None, - ) -> str: - factor_information_str = target_task.get_task_information() - - if queried_knowledge is not None and factor_information_str in queried_knowledge.success_task_to_knowledge_dict: - return queried_knowledge.success_task_to_knowledge_dict[factor_information_str].implementation - elif queried_knowledge is not None and factor_information_str in queried_knowledge.failed_task_info_set: - return None - else: - queried_similar_successful_knowledge = ( - queried_knowledge.working_task_to_similar_successful_knowledge_dict[factor_information_str] - if queried_knowledge is not None - else [] - ) - queried_former_failed_knowledge = ( - queried_knowledge.working_task_to_former_failed_knowledge_dict[factor_information_str] - if queried_knowledge is not None - else [] - ) - - queried_former_failed_knowledge_to_render = queried_former_failed_knowledge - - system_prompt = ( - Environment(undefined=StrictUndefined) - .from_string( - implement_prompts["evolving_strategy_factor_implementation_v1_system"], - ) - .render( - scenario=self.scen.get_scenario_all_desc(target_task), - queried_former_failed_knowledge=queried_former_failed_knowledge_to_render, - ) - ) - session = APIBackend(use_chat_cache=FACTOR_IMPLEMENT_SETTINGS.coder_use_cache).build_chat_session( - session_system_prompt=system_prompt, - ) - - queried_similar_successful_knowledge_to_render = queried_similar_successful_knowledge - for _ in range(10): # max attempt to reduce the length of user_prompt - user_prompt = ( - Environment(undefined=StrictUndefined) - .from_string( - implement_prompts["evolving_strategy_factor_implementation_v1_user"], - ) - .render( - factor_information_str=factor_information_str, - queried_similar_successful_knowledge=queried_similar_successful_knowledge_to_render, - queried_former_failed_knowledge=queried_former_failed_knowledge_to_render, - ) - .strip("\n") - ) - if ( - session.build_chat_completion_message_and_calculate_token( - user_prompt, - ) - < LLM_SETTINGS.chat_token_limit - ): - break - elif len(queried_former_failed_knowledge_to_render) > 1: - queried_former_failed_knowledge_to_render = queried_former_failed_knowledge_to_render[1:] - elif len(queried_similar_successful_knowledge_to_render) > 1: - queried_similar_successful_knowledge_to_render = queried_similar_successful_knowledge_to_render[1:] - - code = json.loads( - session.build_chat_completion( - user_prompt=user_prompt, - json_mode=True, - ), - )["code"] - - return code - - -class FactorEvolvingStrategyWithGraph(MultiProcessEvolvingStrategy): - def __init__(self, *args, **kwargs) -> None: - super().__init__(*args, **kwargs) - self.num_loop = 0 - self.haveSelected = False - - def implement_one_factor( - self, - target_task: FactorTask, - queried_knowledge, - ) -> str: - error_summary = FACTOR_IMPLEMENT_SETTINGS.v2_error_summary - # 1. 提取因子的背景信息 - target_factor_task_information = target_task.get_task_information() - - # 2. 检查该因子是否需要继续做(是否已经作对,是否做错太多) - if ( - queried_knowledge is not None - and target_factor_task_information in queried_knowledge.success_task_to_knowledge_dict - ): - return queried_knowledge.success_task_to_knowledge_dict[target_factor_task_information].implementation - elif queried_knowledge is not None and target_factor_task_information in queried_knowledge.failed_task_info_set: - return None - else: - # 3. 取出knowledge里面的经验数据(similar success、similar error、former_trace) - queried_similar_component_knowledge = ( - queried_knowledge.component_with_success_task[target_factor_task_information] - if queried_knowledge is not None - else [] - ) # A list, [success task implement knowledge] - - queried_similar_error_knowledge = ( - queried_knowledge.error_with_success_task[target_factor_task_information] - if queried_knowledge is not None - else {} - ) # A dict, {{error_type:[[error_imp_knowledge, success_imp_knowledge],...]},...} - - queried_former_failed_knowledge = ( - queried_knowledge.former_traces[target_factor_task_information][0] - if queried_knowledge is not None - else [] - ) - - queried_former_failed_knowledge_to_render = queried_former_failed_knowledge - - latest_attempt_to_latest_successful_execution = queried_knowledge.former_traces[ - target_factor_task_information - ][1] - - system_prompt = ( - Environment(undefined=StrictUndefined) - .from_string( - implement_prompts["evolving_strategy_factor_implementation_v1_system"], - ) - .render( - scenario=self.scen.get_scenario_all_desc(target_task, filtered_tag="feature"), - queried_former_failed_knowledge=queried_former_failed_knowledge_to_render, - ) - ) - - session = APIBackend(use_chat_cache=FACTOR_IMPLEMENT_SETTINGS.coder_use_cache).build_chat_session( - session_system_prompt=system_prompt, - ) - - queried_similar_component_knowledge_to_render = queried_similar_component_knowledge - queried_similar_error_knowledge_to_render = queried_similar_error_knowledge - error_summary_critics = "" - # 动态地防止prompt超长 - for _ in range(10): # max attempt to reduce the length of user_prompt - # 总结error(可选) - if ( - error_summary - and len(queried_similar_error_knowledge_to_render) != 0 - and len(queried_former_failed_knowledge_to_render) != 0 - ): - error_summary_system_prompt = ( - Environment(undefined=StrictUndefined) - .from_string(implement_prompts["evolving_strategy_error_summary_v2_system"]) - .render( - scenario=self.scen.get_scenario_all_desc(target_task), - factor_information_str=target_factor_task_information, - code_and_feedback=queried_former_failed_knowledge_to_render[ - -1 - ].get_implementation_and_feedback_str(), - ) - .strip("\n") - ) - session_summary = APIBackend( - use_chat_cache=FACTOR_IMPLEMENT_SETTINGS.coder_use_cache - ).build_chat_session( - session_system_prompt=error_summary_system_prompt, - ) - for _ in range(10): # max attempt to reduce the length of error_summary_user_prompt - error_summary_user_prompt = ( - Environment(undefined=StrictUndefined) - .from_string(implement_prompts["evolving_strategy_error_summary_v2_user"]) - .render( - queried_similar_component_knowledge=queried_similar_component_knowledge_to_render, - ) - .strip("\n") - ) - if ( - session_summary.build_chat_completion_message_and_calculate_token(error_summary_user_prompt) - < LLM_SETTINGS.chat_token_limit - ): - break - elif len(queried_similar_error_knowledge_to_render) > 0: - queried_similar_error_knowledge_to_render = queried_similar_error_knowledge_to_render[:-1] - error_summary_critics = session_summary.build_chat_completion( - user_prompt=error_summary_user_prompt, - json_mode=False, - ) - # 构建user_prompt。开始写代码 - user_prompt = ( - Environment(undefined=StrictUndefined) - .from_string( - implement_prompts["evolving_strategy_factor_implementation_v2_user"], - ) - .render( - factor_information_str=target_factor_task_information, - queried_similar_component_knowledge=queried_similar_component_knowledge_to_render, - queried_similar_error_knowledge=queried_similar_error_knowledge_to_render, - error_summary=error_summary, - error_summary_critics=error_summary_critics, - latest_attempt_to_latest_successful_execution=latest_attempt_to_latest_successful_execution, - ) - .strip("\n") - ) - if ( - session.build_chat_completion_message_and_calculate_token( - user_prompt, - ) - < LLM_SETTINGS.chat_token_limit - ): - break - elif len(queried_former_failed_knowledge_to_render) > 1: - queried_former_failed_knowledge_to_render = queried_former_failed_knowledge_to_render[1:] - elif len(queried_similar_component_knowledge_to_render) > len( - queried_similar_error_knowledge_to_render, - ): - queried_similar_component_knowledge_to_render = queried_similar_component_knowledge_to_render[:-1] - elif len(queried_similar_error_knowledge_to_render) > 0: - queried_similar_error_knowledge_to_render = queried_similar_error_knowledge_to_render[:-1] - - response = session.build_chat_completion( - user_prompt=user_prompt, - json_mode=True, - ) - code = json.loads(response)["code"] - return code diff --git a/rdagent/components/coder/factor_coder/CoSTEER/scheduler.py b/rdagent/components/coder/factor_coder/CoSTEER/scheduler.py deleted file mode 100644 index ffb1bfc4..00000000 --- a/rdagent/components/coder/factor_coder/CoSTEER/scheduler.py +++ /dev/null @@ -1,88 +0,0 @@ -import json -from pathlib import Path -from typing import Dict - -from jinja2 import Environment, StrictUndefined - -from rdagent.components.coder.factor_coder.CoSTEER.evolvable_subjects import ( - FactorEvolvingItem, -) -from rdagent.core.prompts import Prompts -from rdagent.core.scenario import Scenario -from rdagent.log import rdagent_logger as logger -from rdagent.oai.llm_conf import LLM_SETTINGS -from rdagent.oai.llm_utils import APIBackend - -scheduler_prompts = Prompts(file_path=Path(__file__).parent.parent / "prompts.yaml") - - -def RandomSelect(to_be_finished_task_index, implementation_factors_per_round): - import random - - to_be_finished_task_index = random.sample( - to_be_finished_task_index, - implementation_factors_per_round, - ) - - logger.info(f"The random selection is: {to_be_finished_task_index}") - return to_be_finished_task_index - - -def LLMSelect( - to_be_finished_task_index, - implementation_factors_per_round, - evo: FactorEvolvingItem, - former_trace: Dict, - scen: Scenario, -): - tasks = [] - for i in to_be_finished_task_index: - # find corresponding former trace for each task - target_factor_task_information = evo.sub_tasks[i].get_task_information() - if target_factor_task_information in former_trace: - tasks.append((i, evo.sub_tasks[i], former_trace[target_factor_task_information][0])) - - system_prompt = ( - Environment(undefined=StrictUndefined) - .from_string( - scheduler_prompts["select_implementable_factor_system"], - ) - .render( - scenario=scen.get_scenario_all_desc(), - ) - ) - - for _ in range(10): # max attempt to reduce the length of user_prompt - user_prompt = ( - Environment(undefined=StrictUndefined) - .from_string( - scheduler_prompts["select_implementable_factor_user"], - ) - .render( - factor_num=implementation_factors_per_round, - sub_tasks=tasks, - ) - ) - if ( - APIBackend().build_messages_and_calculate_token( - user_prompt=user_prompt, - system_prompt=system_prompt, - ) - < LLM_SETTINGS.chat_token_limit - ): - break - - response = APIBackend().build_messages_and_create_chat_completion( - user_prompt=user_prompt, - system_prompt=system_prompt, - json_mode=True, - ) - try: - selection = json.loads(response)["selected_factor"] - if not isinstance(selection, list): - return to_be_finished_task_index - selection_index = [x for x in selection if isinstance(x, int)] - except: - return to_be_finished_task_index - - return selection_index diff --git a/rdagent/components/coder/factor_coder/__init__.py b/rdagent/components/coder/factor_coder/__init__.py new file mode 100644 index 00000000..be80b121 --- /dev/null +++ b/rdagent/components/coder/factor_coder/__init__.py @@ -0,0 +1,22 @@ +from rdagent.components.coder.CoSTEER import CoSTEER +from rdagent.components.coder.CoSTEER.evaluators import CoSTEERMultiEvaluator +from rdagent.components.coder.factor_coder.config import FACTOR_COSTEER_SETTINGS +from rdagent.components.coder.factor_coder.evaluators import FactorEvaluatorForCoder +from rdagent.components.coder.factor_coder.evolving_strategy import ( + FactorMultiProcessEvolvingStrategy, +) +from rdagent.core.scenario import Scenario + + +class FactorCoSTEER(CoSTEER): + def __init__( + self, + scen: Scenario, + *args, + **kwargs, + ) -> None: + setting = FACTOR_COSTEER_SETTINGS + eva = CoSTEERMultiEvaluator(FactorEvaluatorForCoder(scen=scen), scen=scen) + es = FactorMultiProcessEvolvingStrategy(scen=scen, settings=FACTOR_COSTEER_SETTINGS) + + super().__init__(*args, settings=setting, eva=eva, es=es, evolving_version=2, scen=scen, **kwargs) diff --git a/rdagent/components/coder/factor_coder/config.py b/rdagent/components/coder/factor_coder/config.py index 45488cfa..6f100da6 100644 --- a/rdagent/components/coder/factor_coder/config.py +++ b/rdagent/components/coder/factor_coder/config.py @@ -1,18 +1,9 @@ -from pathlib import Path -from typing import Literal, Union +from rdagent.components.coder.CoSTEER.config import CoSTEERSettings +from rdagent.core.conf import ExtendedSettingsConfigDict -from pydantic_settings import BaseSettings -SELECT_METHOD = Literal["random", "scheduler"] - - -class FactorImplementSettings(BaseSettings): - class Config: - env_prefix = "FACTOR_CODER_" - """Use `FACTOR_CODER_` as prefix for environment variables""" - - coder_use_cache: bool = False - """Indicates whether to use cache for the coder""" +class FactorCoSTEERSettings(CoSTEERSettings): + model_config = ExtendedSettingsConfigDict(env_prefix="FACTOR_CoSTEER_") data_folder: str = "git_ignore_folder/factor_implementation_source_data" """Path to the folder containing financial data (default is fundamental data in Qlib)""" @@ -20,21 +11,6 @@ class Config: data_folder_debug: str = "git_ignore_folder/factor_implementation_source_data_debug" """Path to the folder containing partial financial data (for debugging)""" - # TODO: the factor implement specific settings should not appear in this settings - # Evolving should have a method specific settings - # evolving related config - fail_task_trial_limit: int = 20 - - v1_query_former_trace_limit: int = 5 - v1_query_similar_success_limit: int = 5 - - v2_query_component_limit: int = 1 - v2_query_error_limit: int = 1 - v2_query_former_trace_limit: int = 1 - v2_add_fail_attempt_to_latest_successful_execution: bool = False - v2_error_summary: bool = False - v2_knowledge_sampler: float = 1.0 - simple_background: bool = False """Whether to use simple background information for code feedback""" @@ -44,20 +20,8 @@ class Config: select_method: str = "random" """Method for the selection of factors implementation""" - select_threshold: int = 10 - """Threshold for the number of factor selections""" - - max_loop: int = 10 - """Maximum number of task implementation loops""" - - knowledge_base_path: Union[str, None] = None - """Path to the knowledge base""" - - new_knowledge_base_path: Union[str, None] = None - """Path to the new knowledge base""" - python_bin: str = "python" """Path to the Python binary""" -FACTOR_IMPLEMENT_SETTINGS = FactorImplementSettings() +FACTOR_COSTEER_SETTINGS = FactorCoSTEERSettings() diff --git a/rdagent/components/coder/factor_coder/CoSTEER/evaluators.py b/rdagent/components/coder/factor_coder/eva_utils.py similarity index 69% rename from rdagent/components/coder/factor_coder/CoSTEER/evaluators.py rename to rdagent/components/coder/factor_coder/eva_utils.py index 9a047915..8efb2096 100644 --- a/rdagent/components/coder/factor_coder/CoSTEER/evaluators.py +++ b/rdagent/components/coder/factor_coder/eva_utils.py @@ -1,35 +1,28 @@ import io import json -import re from abc import abstractmethod from pathlib import Path -from typing import List, Tuple +from typing import Tuple import pandas as pd from jinja2 import Environment, StrictUndefined -from rdagent.components.coder.factor_coder.config import FACTOR_IMPLEMENT_SETTINGS -from rdagent.components.coder.factor_coder.CoSTEER.evolvable_subjects import ( - FactorEvolvingItem, -) +from rdagent.components.coder.factor_coder.config import FACTOR_COSTEER_SETTINGS from rdagent.components.coder.factor_coder.factor import FactorTask -from rdagent.core.conf import RD_AGENT_SETTINGS -from rdagent.core.evaluation import Evaluator, Feedback -from rdagent.core.evolving_framework import QueriedKnowledge from rdagent.core.experiment import Task, Workspace from rdagent.core.prompts import Prompts -from rdagent.core.utils import multiprocessing_wrapper -from rdagent.log import rdagent_logger as logger from rdagent.oai.llm_conf import LLM_SETTINGS from rdagent.oai.llm_utils import APIBackend -evaluate_prompts = Prompts(file_path=Path(__file__).parent.parent / "prompts.yaml") +evaluate_prompts = Prompts(file_path=Path(__file__).parent / "prompts.yaml") -class FactorEvaluator(Evaluator): - # TODO: - # I think we should have unified interface for all evaluates, for examples. - # So we should adjust the interface of other factors +class FactorEvaluator: + """Although the init method is same to Evaluator, but we want to emphasize they are different""" + + def __init__(self, scen=None) -> None: + self.scen = scen + @abstractmethod def evaluate( self, @@ -81,7 +74,7 @@ def evaluate( target_task: FactorTask, implementation: Workspace, execution_feedback: str, - factor_value_feedback: str = "", + value_feedback: str = "", gt_implementation: Workspace = None, **kwargs, ): @@ -96,7 +89,7 @@ def evaluate( self.scen.get_scenario_all_desc( target_task, filtered_tag="feature", - simple_background=FACTOR_IMPLEMENT_SETTINGS.simple_background, + simple_background=FACTOR_COSTEER_SETTINGS.simple_background, ) if self.scen is not None else "No scenario description." @@ -115,7 +108,7 @@ def evaluate( factor_information=factor_information, code=code, execution_feedback=execution_feedback_to_render, - factor_value_feedback=factor_value_feedback, + value_feedback=value_feedback, gt_code=gt_implementation.code if gt_implementation else None, ) ) @@ -503,7 +496,7 @@ def evaluate( return conclusion_str, decision_from_value_check -class FactorFinalDecisionEvaluator(Evaluator): +class FactorFinalDecisionEvaluator(FactorEvaluator): def evaluate( self, target_task: FactorTask, @@ -535,7 +528,7 @@ def evaluate( factor_information=target_task.get_task_information(), execution_feedback=execution_feedback_to_render, code_feedback=code_feedback, - factor_value_feedback=( + value_feedback=( value_feedback if value_feedback is not None else "No Ground Truth Value provided, so no evaluation on value is performed." @@ -585,198 +578,3 @@ def evaluate( ) from e return None, None - - -class FactorSingleFeedback: - """This class is a feedback to single implementation which is generated from an evaluator.""" - - def __init__( - self, - execution_feedback: str = None, - value_generated_flag: bool = False, - code_feedback: str = None, - factor_value_feedback: str = None, - final_decision: bool = None, - final_feedback: str = None, - final_decision_based_on_gt: bool = None, - ) -> None: - self.execution_feedback = execution_feedback - self.value_generated_flag = value_generated_flag - self.code_feedback = code_feedback - self.factor_value_feedback = factor_value_feedback - self.final_decision = final_decision - self.final_feedback = final_feedback - self.final_decision_based_on_gt = final_decision_based_on_gt - - def __str__(self) -> str: - return f"""------------------Factor Execution Feedback------------------ -{self.execution_feedback} -------------------Factor Code Feedback------------------ -{self.code_feedback} -------------------Factor Value Feedback------------------ -{self.factor_value_feedback} -------------------Factor Final Feedback------------------ -{self.final_feedback} -------------------Factor Final Decision------------------ -This implementation is {'SUCCESS' if self.final_decision else 'FAIL'}. -""" - - -class FactorMultiFeedback( - Feedback, - List[FactorSingleFeedback], -): - """Feedback contains a list, each element is the corresponding feedback for each factor implementation.""" - - -class FactorEvaluatorForCoder(FactorEvaluator): - """This class is the v1 version of evaluator for a single factor implementation. - It calls several evaluators in share modules to evaluate the factor implementation. - """ - - def __init__(self, *args, **kwargs) -> None: - super().__init__(*args, **kwargs) - self.value_evaluator = FactorValueEvaluator(self.scen) - self.code_evaluator = FactorCodeEvaluator(self.scen) - self.final_decision_evaluator = FactorFinalDecisionEvaluator(self.scen) - - def evaluate( - self, - target_task: FactorTask, - implementation: Workspace, - gt_implementation: Workspace = None, - queried_knowledge: QueriedKnowledge = None, - **kwargs, - ) -> FactorSingleFeedback: - if implementation is None: - return None - - target_task_information = target_task.get_task_information() - if ( - queried_knowledge is not None - and target_task_information in queried_knowledge.success_task_to_knowledge_dict - ): - return queried_knowledge.success_task_to_knowledge_dict[target_task_information].feedback - elif queried_knowledge is not None and target_task_information in queried_knowledge.failed_task_info_set: - return FactorSingleFeedback( - execution_feedback="This task has failed too many times, skip implementation.", - value_generated_flag=False, - code_feedback="This task has failed too many times, skip code evaluation.", - factor_value_feedback="This task has failed too many times, skip value evaluation.", - final_decision=False, - final_feedback="This task has failed too many times, skip final decision evaluation.", - final_decision_based_on_gt=False, - ) - else: - factor_feedback = FactorSingleFeedback() - - # 1. Get factor execution feedback to generated implementation and remove the long list of numbers in execution feedback - ( - execution_feedback, - gen_df, - ) = implementation.execute() - - execution_feedback = re.sub(r"(?<=\D)(,\s+-?\d+\.\d+){50,}(?=\D)", ", ", execution_feedback) - factor_feedback.execution_feedback = "\n".join( - [line for line in execution_feedback.split("\n") if "warning" not in line.lower()] - ) - - # 2. Get factor value feedback - if gen_df is None: - factor_feedback.factor_value_feedback = "No factor value generated, skip value evaluation." - factor_feedback.value_generated_flag = False - decision_from_value_check = None - else: - factor_feedback.value_generated_flag = True - ( - factor_feedback.factor_value_feedback, - decision_from_value_check, - ) = self.value_evaluator.evaluate( - implementation=implementation, gt_implementation=gt_implementation, version=target_task.version - ) - - factor_feedback.final_decision_based_on_gt = gt_implementation is not None - - if decision_from_value_check is not None and decision_from_value_check is True: - # To avoid confusion, when same_value_or_high_correlation is True, we do not need code feedback - factor_feedback.code_feedback = "Final decision is True and there are no code critics." - factor_feedback.final_decision = decision_from_value_check - factor_feedback.final_feedback = "Value evaluation passed, skip final decision evaluation." - elif decision_from_value_check is not None and decision_from_value_check is False: - factor_feedback.code_feedback, _ = self.code_evaluator.evaluate( - target_task=target_task, - implementation=implementation, - execution_feedback=factor_feedback.execution_feedback, - factor_value_feedback=factor_feedback.factor_value_feedback, - gt_implementation=gt_implementation, - ) - factor_feedback.final_decision = decision_from_value_check - factor_feedback.final_feedback = "Value evaluation failed, skip final decision evaluation." - else: - factor_feedback.code_feedback, _ = self.code_evaluator.evaluate( - target_task=target_task, - implementation=implementation, - execution_feedback=factor_feedback.execution_feedback, - factor_value_feedback=factor_feedback.factor_value_feedback, - gt_implementation=gt_implementation, - ) - ( - factor_feedback.final_decision, - factor_feedback.final_feedback, - ) = self.final_decision_evaluator.evaluate( - target_task=target_task, - execution_feedback=factor_feedback.execution_feedback, - value_feedback=factor_feedback.factor_value_feedback, - code_feedback=factor_feedback.code_feedback, - ) - return factor_feedback - - -class FactorMultiEvaluator(Evaluator): - def __init__(self, single_evaluator, *args, **kwargs) -> None: - super().__init__(*args, **kwargs) - self.single_factor_implementation_evaluator = single_evaluator - - def evaluate( - self, - evo: FactorEvolvingItem, - queried_knowledge: QueriedKnowledge = None, - **kwargs, - ) -> FactorMultiFeedback: - multi_implementation_feedback = multiprocessing_wrapper( - [ - ( - self.single_factor_implementation_evaluator.evaluate, - ( - evo.sub_tasks[index], - evo.sub_workspace_list[index], - evo.sub_gt_implementations[index] if evo.sub_gt_implementations is not None else None, - queried_knowledge, - ), - ) - for index in range(len(evo.sub_tasks)) - ], - n=RD_AGENT_SETTINGS.multi_proc_n, - ) - - final_decision = [ - None if single_feedback is None else single_feedback.final_decision - for single_feedback in multi_implementation_feedback - ] - logger.info(f"Final decisions: {final_decision} True count: {final_decision.count(True)}") - - for index in range(len(evo.sub_tasks)): - if final_decision[index]: - evo.sub_tasks[index].factor_implementation = True - - return multi_implementation_feedback - - -# TODO: -def shorten_prompt(tpl: str, render_kwargs: dict, shorten_key: str, max_trail: int = 10) -> str: - """When the prompt is too long. We have to shorten it. - But we should not truncate the prompt directly, so we should find the key we want to shorten and then shorten it. - """ - # TODO: this should replace most of code in - # - FactorFinalDecisionEvaluator.evaluate - # - FactorCodeEvaluator.evaluate diff --git a/rdagent/components/coder/factor_coder/evaluators.py b/rdagent/components/coder/factor_coder/evaluators.py new file mode 100644 index 00000000..c45a6c73 --- /dev/null +++ b/rdagent/components/coder/factor_coder/evaluators.py @@ -0,0 +1,131 @@ +import re + +from rdagent.components.coder.CoSTEER.evaluators import ( + CoSTEEREvaluator, + CoSTEERMultiFeedback, + CoSTEERSingleFeedback, +) +from rdagent.components.coder.factor_coder.eva_utils import ( + FactorCodeEvaluator, + FactorFinalDecisionEvaluator, + FactorValueEvaluator, +) +from rdagent.components.coder.factor_coder.factor import FactorTask +from rdagent.core.evolving_framework import QueriedKnowledge +from rdagent.core.experiment import Workspace + +FactorSingleFeedback = CoSTEERSingleFeedback +FactorMultiFeedback = CoSTEERMultiFeedback + + +class FactorEvaluatorForCoder(CoSTEEREvaluator): + """This class is the v1 version of evaluator for a single factor implementation. + It calls several evaluators in share modules to evaluate the factor implementation. + """ + + def __init__(self, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) + self.value_evaluator = FactorValueEvaluator(self.scen) + self.code_evaluator = FactorCodeEvaluator(self.scen) + self.final_decision_evaluator = FactorFinalDecisionEvaluator(self.scen) + + def evaluate( + self, + target_task: FactorTask, + implementation: Workspace, + gt_implementation: Workspace = None, + queried_knowledge: QueriedKnowledge = None, + **kwargs, + ) -> FactorSingleFeedback: + if implementation is None: + return None + + target_task_information = target_task.get_task_information() + if ( + queried_knowledge is not None + and target_task_information in queried_knowledge.success_task_to_knowledge_dict + ): + return queried_knowledge.success_task_to_knowledge_dict[target_task_information].feedback + elif queried_knowledge is not None and target_task_information in queried_knowledge.failed_task_info_set: + return FactorSingleFeedback( + execution_feedback="This task has failed too many times, skip implementation.", + value_generated_flag=False, + code_feedback="This task has failed too many times, skip code evaluation.", + value_feedback="This task has failed too many times, skip value evaluation.", + final_decision=False, + final_feedback="This task has failed too many times, skip final decision evaluation.", + final_decision_based_on_gt=False, + ) + else: + factor_feedback = FactorSingleFeedback() + + # 1. Get factor execution feedback to generated implementation and remove the long list of numbers in execution feedback + ( + execution_feedback, + gen_df, + ) = implementation.execute() + + execution_feedback = re.sub(r"(?<=\D)(,\s+-?\d+\.\d+){50,}(?=\D)", ", ", execution_feedback) + factor_feedback.execution_feedback = "\n".join( + [line for line in execution_feedback.split("\n") if "warning" not in line.lower()] + ) + + # 2. Get factor value feedback + if gen_df is None: + factor_feedback.value_feedback = "No factor value generated, skip value evaluation." + factor_feedback.value_generated_flag = False + decision_from_value_check = None + else: + factor_feedback.value_generated_flag = True + ( + factor_feedback.value_feedback, + decision_from_value_check, + ) = self.value_evaluator.evaluate( + implementation=implementation, gt_implementation=gt_implementation, version=target_task.version + ) + + factor_feedback.final_decision_based_on_gt = gt_implementation is not None + + if decision_from_value_check is not None and decision_from_value_check is True: + # To avoid confusion, when same_value_or_high_correlation is True, we do not need code feedback + factor_feedback.code_feedback = "Final decision is True and there are no code critics." + factor_feedback.final_decision = decision_from_value_check + factor_feedback.final_feedback = "Value evaluation passed, skip final decision evaluation." + elif decision_from_value_check is not None and decision_from_value_check is False: + factor_feedback.code_feedback, _ = self.code_evaluator.evaluate( + target_task=target_task, + implementation=implementation, + execution_feedback=factor_feedback.execution_feedback, + value_feedback=factor_feedback.value_feedback, + gt_implementation=gt_implementation, + ) + factor_feedback.final_decision = decision_from_value_check + factor_feedback.final_feedback = "Value evaluation failed, skip final decision evaluation." + else: + factor_feedback.code_feedback, _ = self.code_evaluator.evaluate( + target_task=target_task, + implementation=implementation, + execution_feedback=factor_feedback.execution_feedback, + value_feedback=factor_feedback.value_feedback, + gt_implementation=gt_implementation, + ) + ( + factor_feedback.final_decision, + factor_feedback.final_feedback, + ) = self.final_decision_evaluator.evaluate( + target_task=target_task, + execution_feedback=factor_feedback.execution_feedback, + value_feedback=factor_feedback.value_feedback, + code_feedback=factor_feedback.code_feedback, + ) + return factor_feedback + + +# TODO: +def shorten_prompt(tpl: str, render_kwargs: dict, shorten_key: str, max_trail: int = 10) -> str: + """When the prompt is too long. We have to shorten it. + But we should not truncate the prompt directly, so we should find the key we want to shorten and then shorten it. + """ + # TODO: this should replace most of code in + # - FactorFinalDecisionEvaluator.evaluate + # - FactorCodeEvaluator.evaluate diff --git a/rdagent/components/coder/factor_coder/evolving_strategy.py b/rdagent/components/coder/factor_coder/evolving_strategy.py new file mode 100644 index 00000000..c751996c --- /dev/null +++ b/rdagent/components/coder/factor_coder/evolving_strategy.py @@ -0,0 +1,177 @@ +from __future__ import annotations + +import json +from pathlib import Path + +from jinja2 import Environment, StrictUndefined + +from rdagent.components.coder.CoSTEER.evolving_strategy import ( + MultiProcessEvolvingStrategy, +) +from rdagent.components.coder.CoSTEER.knowledge_management import ( + CoSTEERQueriedKnowledge, + CoSTEERQueriedKnowledgeV2, +) +from rdagent.components.coder.factor_coder.config import FACTOR_COSTEER_SETTINGS +from rdagent.components.coder.factor_coder.factor import FactorFBWorkspace, FactorTask +from rdagent.core.prompts import Prompts +from rdagent.oai.llm_conf import LLM_SETTINGS +from rdagent.oai.llm_utils import APIBackend + +implement_prompts = Prompts(file_path=Path(__file__).parent / "prompts.yaml") + + +class FactorMultiProcessEvolvingStrategy(MultiProcessEvolvingStrategy): + def __init__(self, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) + self.num_loop = 0 + self.haveSelected = False + + def error_summary( + self, + target_task: FactorTask, + queried_former_failed_knowledge_to_render: list, + queried_similar_error_knowledge_to_render: list, + ) -> str: + error_summary_system_prompt = ( + Environment(undefined=StrictUndefined) + .from_string(implement_prompts["evolving_strategy_error_summary_v2_system"]) + .render( + scenario=self.scen.get_scenario_all_desc(target_task), + factor_information_str=target_task.get_task_information(), + code_and_feedback=queried_former_failed_knowledge_to_render[-1].get_implementation_and_feedback_str(), + ) + .strip("\n") + ) + for _ in range(10): # max attempt to reduce the length of error_summary_user_prompt + error_summary_user_prompt = ( + Environment(undefined=StrictUndefined) + .from_string(implement_prompts["evolving_strategy_error_summary_v2_user"]) + .render( + queried_similar_error_knowledge=queried_similar_error_knowledge_to_render, + ) + .strip("\n") + ) + if ( + APIBackend().build_messages_and_calculate_token( + user_prompt=error_summary_user_prompt, system_prompt=error_summary_system_prompt + ) + < LLM_SETTINGS.chat_token_limit + ): + break + elif len(queried_similar_error_knowledge_to_render) > 0: + queried_similar_error_knowledge_to_render = queried_similar_error_knowledge_to_render[:-1] + error_summary_critics = APIBackend( + use_chat_cache=FACTOR_COSTEER_SETTINGS.coder_use_cache + ).build_messages_and_create_chat_completion( + user_prompt=error_summary_user_prompt, system_prompt=error_summary_system_prompt, json_mode=False + ) + return error_summary_critics + + def implement_one_task( + self, + target_task: FactorTask, + queried_knowledge: CoSTEERQueriedKnowledge, + ) -> str: + target_factor_task_information = target_task.get_task_information() + + queried_similar_successful_knowledge = ( + queried_knowledge.task_to_similar_task_successful_knowledge[target_factor_task_information] + if queried_knowledge is not None + else [] + ) # A list, [success task implement knowledge] + + if isinstance(queried_knowledge, CoSTEERQueriedKnowledgeV2): + queried_similar_error_knowledge = ( + queried_knowledge.task_to_similar_error_successful_knowledge[target_factor_task_information] + if queried_knowledge is not None + else {} + ) # A dict, {{error_type:[[error_imp_knowledge, success_imp_knowledge],...]},...} + else: + queried_similar_error_knowledge = {} + + queried_former_failed_knowledge = ( + queried_knowledge.task_to_former_failed_traces[target_factor_task_information][0] + if queried_knowledge is not None + else [] + ) + + queried_former_failed_knowledge_to_render = queried_former_failed_knowledge + + latest_attempt_to_latest_successful_execution = queried_knowledge.task_to_former_failed_traces[ + target_factor_task_information + ][1] + + system_prompt = ( + Environment(undefined=StrictUndefined) + .from_string( + implement_prompts["evolving_strategy_factor_implementation_v1_system"], + ) + .render( + scenario=self.scen.get_scenario_all_desc(target_task, filtered_tag="feature"), + queried_former_failed_knowledge=queried_former_failed_knowledge_to_render, + ) + ) + queried_similar_successful_knowledge_to_render = queried_similar_successful_knowledge + queried_similar_error_knowledge_to_render = queried_similar_error_knowledge + # 动态地防止prompt超长 + for _ in range(10): # max attempt to reduce the length of user_prompt + # 总结error(可选) + if ( + isinstance(queried_knowledge, CoSTEERQueriedKnowledgeV2) + and FACTOR_COSTEER_SETTINGS.v2_error_summary + and len(queried_similar_error_knowledge_to_render) != 0 + and len(queried_former_failed_knowledge_to_render) != 0 + ): + error_summary_critics = self.error_summary( + target_task, + queried_former_failed_knowledge_to_render, + queried_similar_error_knowledge_to_render, + ) + else: + error_summary_critics = None + # 构建user_prompt。开始写代码 + user_prompt = ( + Environment(undefined=StrictUndefined) + .from_string( + implement_prompts["evolving_strategy_factor_implementation_v2_user"], + ) + .render( + factor_information_str=target_factor_task_information, + queried_similar_successful_knowledge=queried_similar_successful_knowledge_to_render, + queried_similar_error_knowledge=queried_similar_error_knowledge_to_render, + error_summary_critics=error_summary_critics, + latest_attempt_to_latest_successful_execution=latest_attempt_to_latest_successful_execution, + ) + .strip("\n") + ) + if ( + APIBackend().build_messages_and_calculate_token(user_prompt=user_prompt, system_prompt=system_prompt) + < LLM_SETTINGS.chat_token_limit + ): + break + elif len(queried_former_failed_knowledge_to_render) > 1: + queried_former_failed_knowledge_to_render = queried_former_failed_knowledge_to_render[1:] + elif len(queried_similar_successful_knowledge_to_render) > len( + queried_similar_error_knowledge_to_render, + ): + queried_similar_successful_knowledge_to_render = queried_similar_successful_knowledge_to_render[:-1] + elif len(queried_similar_error_knowledge_to_render) > 0: + queried_similar_error_knowledge_to_render = queried_similar_error_knowledge_to_render[:-1] + code = json.loads( + APIBackend( + use_chat_cache=FACTOR_COSTEER_SETTINGS.coder_use_cache + ).build_messages_and_create_chat_completion( + user_prompt=user_prompt, system_prompt=system_prompt, json_mode=True + ) + )["code"] + return code + + def assign_code_list_to_evo(self, code_list, evo): + for index in range(len(evo.sub_tasks)): + if code_list[index] is None: + continue + if evo.sub_workspace_list[index] is None: + evo.sub_workspace_list[index] = FactorFBWorkspace(target_task=evo.sub_tasks[index]) + evo.sub_workspace_list[index].inject_code(**{"factor.py": code_list[index]}) + return evo diff --git a/rdagent/components/coder/factor_coder/factor.py b/rdagent/components/coder/factor_coder/factor.py index 723db08a..edb7a9ce 100644 --- a/rdagent/components/coder/factor_coder/factor.py +++ b/rdagent/components/coder/factor_coder/factor.py @@ -1,6 +1,5 @@ from __future__ import annotations -import pickle import subprocess import uuid from pathlib import Path @@ -10,15 +9,15 @@ from filelock import FileLock from rdagent.app.kaggle.conf import KAGGLE_IMPLEMENT_SETTING -from rdagent.components.coder.factor_coder.config import FACTOR_IMPLEMENT_SETTINGS +from rdagent.components.coder.CoSTEER.task import CoSTEERTask +from rdagent.components.coder.factor_coder.config import FACTOR_COSTEER_SETTINGS from rdagent.core.exception import CodeFormatError, CustomRuntimeError, NoOutputError -from rdagent.core.experiment import Experiment, FBWorkspace, Task +from rdagent.core.experiment import Experiment, FBWorkspace from rdagent.core.utils import cache_with_pickle -from rdagent.log import rdagent_logger as logger from rdagent.oai.llm_utils import md5_hash -class FactorTask(Task): +class FactorTask(CoSTEERTask): # TODO: generalized the attributes into the Task # - factor_* -> * def __init__( @@ -124,11 +123,11 @@ def execute(self, data_type: str = "Debug") -> Tuple[str, pd.DataFrame]: if self.target_task.version == 1: source_data_path = ( Path( - FACTOR_IMPLEMENT_SETTINGS.data_folder_debug, + FACTOR_COSTEER_SETTINGS.data_folder_debug, ) if data_type == "Debug" # FIXME: (yx) don't think we should use a debug tag for this. else Path( - FACTOR_IMPLEMENT_SETTINGS.data_folder, + FACTOR_COSTEER_SETTINGS.data_folder, ) ) elif self.target_task.version == 2: @@ -152,11 +151,11 @@ def execute(self, data_type: str = "Debug") -> Tuple[str, pd.DataFrame]: try: subprocess.check_output( - f"{FACTOR_IMPLEMENT_SETTINGS.python_bin} {execution_code_path}", + f"{FACTOR_COSTEER_SETTINGS.python_bin} {execution_code_path}", shell=True, cwd=self.workspace_path, stderr=subprocess.STDOUT, - timeout=FACTOR_IMPLEMENT_SETTINGS.file_based_execution_timeout, + timeout=FACTOR_COSTEER_SETTINGS.file_based_execution_timeout, ) execution_success = True except subprocess.CalledProcessError as e: @@ -176,7 +175,7 @@ def execute(self, data_type: str = "Debug") -> Tuple[str, pd.DataFrame]: else: execution_error = CustomRuntimeError(execution_feedback) except subprocess.TimeoutExpired: - execution_feedback += f"Execution timeout error and the timeout is set to {FACTOR_IMPLEMENT_SETTINGS.file_based_execution_timeout} seconds." + execution_feedback += f"Execution timeout error and the timeout is set to {FACTOR_COSTEER_SETTINGS.file_based_execution_timeout} seconds." if self.raise_exception: raise CustomRuntimeError(execution_feedback) else: diff --git a/rdagent/components/coder/factor_coder/prompts.yaml b/rdagent/components/coder/factor_coder/prompts.yaml index 53d571ee..94a0c02b 100644 --- a/rdagent/components/coder/factor_coder/prompts.yaml +++ b/rdagent/components/coder/factor_coder/prompts.yaml @@ -28,9 +28,9 @@ evaluator_code_feedback_v1_user: |- {{ code }} --------------Execution feedback:--------------- {{ execution_feedback }} - {% if factor_value_feedback is not none %} + {% if value_feedback is not none %} --------------Factor value feedback:--------------- - {{ factor_value_feedback }} + {{ value_feedback }} {% endif %} {% if gt_code is not none %} --------------Ground truth Python code:--------------- @@ -62,36 +62,12 @@ evolving_strategy_factor_implementation_v1_system: |- "code": "The Python code as a string." } -evolving_strategy_factor_implementation_v1_user: |- - --------------Target factor information:--------------- - {{ factor_information_str }} - - {% if queried_similar_successful_knowledge|length != 0 %} - --------------Correct code to similar factors:--------------- - {% for similar_successful_knowledge in queried_similar_successful_knowledge %} - =====Factor {{loop.index}}:===== - {{ similar_successful_knowledge.target_task.get_task_information() }} - =====Code:===== - {{ similar_successful_knowledge.implementation.code }} - {% endfor %} - {% endif %} - - {% if queried_former_failed_knowledge|length != 0 %} - --------------Former failed code:--------------- - {% for former_failed_knowledge in queried_former_failed_knowledge %} - =====Code to implementation {{ loop.index }}===== - {{ former_failed_knowledge.implementation.code }} - =====Feedback to implementation {{ loop.index }}===== - {{ former_failed_knowledge.feedback }} - {% endfor %} - {% endif %} - evolving_strategy_factor_implementation_v2_user: |- --------------Target factor information:--------------- {{ factor_information_str }} {% if queried_similar_error_knowledge|length != 0 %} - {% if not error_summary %} + {% if error_summary_critics is none %} Recall your last failure, your implementation met some errors. When doing other tasks, you met some similar errors but you finally solve them. Here are some examples: {% for error_content, similar_error_knowledge in queried_similar_error_knowledge %} @@ -108,14 +84,14 @@ evolving_strategy_factor_implementation_v2_user: |- {{error_summary_critics}} {% endif %} {% endif %} - {% if queried_similar_component_knowledge|length != 0 %} + {% if queried_similar_successful_knowledge|length != 0 %} Here are some success implements of similar component tasks, take them as references: --------------Correct code to similar factors:--------------- - {% for similar_component_knowledge in queried_similar_component_knowledge %} + {% for similar_successful_knowledge in queried_similar_successful_knowledge %} =====Factor {{loop.index}}:===== - {{ similar_component_knowledge.target_task.get_task_information() }} + {{ similar_successful_knowledge.target_task.get_task_information() }} =====Code:===== - {{ similar_component_knowledge.implementation.code }} + {{ similar_successful_knowledge.implementation.code }} {% endfor %} {% endif %} {% if latest_attempt_to_latest_successful_execution is not none %} @@ -189,16 +165,6 @@ select_implementable_factor_user: |- {% endif %} {% endfor %} -analyze_component_prompt_v1_system: |- - User is getting a new task that might consist of the components below (given in component_index: component_description): - {{all_component_content}} - - You should find out what components does the new task have, and put their indices in a list. - Please response the critic in the json format. Here is an example structure for the JSON output, please strictly follow the format: - { - "component_no_list": the list containing indices of components. - } - evaluator_output_format_system: |- User is trying to implement some factors in the following scenario: {{ scenario }} @@ -235,4 +201,4 @@ evaluator_final_decision_v1_user: |- --------------Code feedback:--------------- {{ code_feedback }} --------------Factor value feedback:--------------- - {{ factor_value_feedback }} + {{ value_feedback }} diff --git a/rdagent/components/coder/model_coder/CoSTEER/__init__.py b/rdagent/components/coder/model_coder/CoSTEER/__init__.py deleted file mode 100644 index 8841d125..00000000 --- a/rdagent/components/coder/model_coder/CoSTEER/__init__.py +++ /dev/null @@ -1,94 +0,0 @@ -import pickle -from pathlib import Path - -from rdagent.components.coder.model_coder.conf import MODEL_IMPL_SETTINGS -from rdagent.components.coder.model_coder.CoSTEER.evaluators import ( - ModelCoderMultiEvaluator, -) -from rdagent.components.coder.model_coder.CoSTEER.evolvable_subjects import ( - ModelEvolvingItem, -) -from rdagent.components.coder.model_coder.CoSTEER.evolving_agent import ModelRAGEvoAgent -from rdagent.components.coder.model_coder.CoSTEER.evolving_strategy import ( - ModelCoderEvolvingStrategy, -) -from rdagent.components.coder.model_coder.CoSTEER.knowledge_management import ( - ModelKnowledgeBase, - ModelRAGStrategy, -) -from rdagent.components.coder.model_coder.model import ModelExperiment -from rdagent.core.developer import Developer -from rdagent.core.evolving_agent import RAGEvoAgent - - -class ModelCoSTEER(Developer[ModelExperiment]): - def __init__( - self, - *args, - with_knowledge: bool = True, - with_feedback: bool = True, - knowledge_self_gen: bool = True, - filter_final_evo: bool = True, - **kwargs, - ) -> None: - super().__init__(*args, **kwargs) - self.max_loop = MODEL_IMPL_SETTINGS.max_loop - self.knowledge_base_path = ( - Path(MODEL_IMPL_SETTINGS.knowledge_base_path) - if MODEL_IMPL_SETTINGS.knowledge_base_path is not None - else None - ) - self.new_knowledge_base_path = ( - Path(MODEL_IMPL_SETTINGS.new_knowledge_base_path) - if MODEL_IMPL_SETTINGS.new_knowledge_base_path is not None - else None - ) - self.with_knowledge = with_knowledge - self.with_feedback = with_feedback - self.knowledge_self_gen = knowledge_self_gen - self.filter_final_evo = filter_final_evo - self.evolving_strategy = ModelCoderEvolvingStrategy(scen=self.scen) - self.model_evaluator = ModelCoderMultiEvaluator(scen=self.scen) - - def load_or_init_knowledge_base(self, former_knowledge_base_path: Path = None, component_init_list: list = []): - if former_knowledge_base_path is not None and former_knowledge_base_path.exists(): - model_knowledge_base = pickle.load(open(former_knowledge_base_path, "rb")) - if not isinstance(model_knowledge_base, ModelKnowledgeBase): - raise ValueError("The former knowledge base is not compatible with the current version") - else: - model_knowledge_base = ModelKnowledgeBase() - - return model_knowledge_base - - def develop(self, exp: ModelExperiment) -> ModelExperiment: - # init knowledge base - model_knowledge_base = self.load_or_init_knowledge_base( - former_knowledge_base_path=self.knowledge_base_path, - component_init_list=[], - ) - # init rag method - self.rag = ModelRAGStrategy(model_knowledge_base) - - # init intermediate items - model_experiment = ModelEvolvingItem.from_experiment(exp) - - self.evolve_agent = ModelRAGEvoAgent( - max_loop=self.max_loop, - evolving_strategy=self.evolving_strategy, - rag=self.rag, - with_knowledge=self.with_knowledge, - with_feedback=self.with_feedback, - knowledge_self_gen=self.knowledge_self_gen, - ) - - model_experiment = self.evolve_agent.multistep_evolve( - model_experiment, - self.model_evaluator, - filter_final_evo=self.filter_final_evo, - ) - - # save new knowledge base - if self.new_knowledge_base_path is not None: - pickle.dump(model_knowledge_base, open(self.new_knowledge_base_path, "wb")) - exp.sub_workspace_list = model_experiment.sub_workspace_list - return exp diff --git a/rdagent/components/coder/model_coder/CoSTEER/evolvable_subjects.py b/rdagent/components/coder/model_coder/CoSTEER/evolvable_subjects.py deleted file mode 100644 index 2be2d826..00000000 --- a/rdagent/components/coder/model_coder/CoSTEER/evolvable_subjects.py +++ /dev/null @@ -1,36 +0,0 @@ -from rdagent.components.coder.model_coder.model import ( - ModelExperiment, - ModelFBWorkspace, - ModelTask, -) -from rdagent.core.evolving_framework import EvolvableSubjects -from rdagent.log import rdagent_logger as logger - - -class ModelEvolvingItem(ModelExperiment, EvolvableSubjects): - """ - Intermediate item of model implementation. - """ - - def __init__( - self, - sub_tasks: list[ModelTask], - sub_gt_implementations: list[ModelFBWorkspace] = None, - ): - ModelExperiment.__init__(self, sub_tasks=sub_tasks) - if sub_gt_implementations is not None and len( - sub_gt_implementations, - ) != len(self.sub_tasks): - self.sub_gt_implementations = None - logger.warning( - "The length of sub_gt_implementations is not equal to the length of sub_tasks, set sub_gt_implementations to None", - ) - else: - self.sub_gt_implementations = sub_gt_implementations - - @classmethod - def from_experiment(cls, exp: ModelExperiment) -> "ModelEvolvingItem": - ei = cls(sub_tasks=exp.sub_tasks) - ei.based_experiments = exp.based_experiments - ei.experiment_workspace = exp.experiment_workspace - return ei diff --git a/rdagent/components/coder/model_coder/CoSTEER/evolving_agent.py b/rdagent/components/coder/model_coder/CoSTEER/evolving_agent.py deleted file mode 100644 index 34efa7cd..00000000 --- a/rdagent/components/coder/model_coder/CoSTEER/evolving_agent.py +++ /dev/null @@ -1,19 +0,0 @@ -from rdagent.components.coder.model_coder.CoSTEER.evaluators import ModelCoderFeedback -from rdagent.components.coder.model_coder.CoSTEER.evolvable_subjects import ( - ModelEvolvingItem, -) -from rdagent.core.evaluation import Feedback -from rdagent.core.evolving_agent import RAGEvoAgent -from rdagent.core.evolving_framework import EvolvableSubjects - - -class ModelRAGEvoAgent(RAGEvoAgent): - def filter_evolvable_subjects_by_feedback(self, evo: EvolvableSubjects, feedback: Feedback) -> EvolvableSubjects: - assert isinstance(evo, ModelEvolvingItem) - assert isinstance(feedback, list) - assert len(evo.sub_workspace_list) == len(feedback) - - for index in range(len(evo.sub_workspace_list)): - if not feedback[index].final_decision: - evo.sub_workspace_list[index].clear() - return evo diff --git a/rdagent/components/coder/model_coder/CoSTEER/evolving_strategy.py b/rdagent/components/coder/model_coder/CoSTEER/evolving_strategy.py deleted file mode 100644 index a4d6776e..00000000 --- a/rdagent/components/coder/model_coder/CoSTEER/evolving_strategy.py +++ /dev/null @@ -1,158 +0,0 @@ -import json -from copy import deepcopy -from pathlib import Path - -from jinja2 import Environment, StrictUndefined - -from rdagent.components.coder.model_coder.conf import MODEL_IMPL_SETTINGS -from rdagent.components.coder.model_coder.CoSTEER.evolvable_subjects import ( - ModelEvolvingItem, -) -from rdagent.components.coder.model_coder.CoSTEER.knowledge_management import ( - ModelQueriedKnowledge, -) -from rdagent.components.coder.model_coder.model import ( - ModelExperiment, - ModelFBWorkspace, - ModelTask, -) -from rdagent.core.conf import RD_AGENT_SETTINGS -from rdagent.core.evolving_framework import EvolvingStrategy -from rdagent.core.prompts import Prompts -from rdagent.core.utils import multiprocessing_wrapper -from rdagent.oai.llm_conf import LLM_SETTINGS -from rdagent.oai.llm_utils import APIBackend -from rdagent.scenarios.kaggle.experiment.kaggle_experiment import KG_MODEL_MAPPING - -coder_prompts = Prompts(file_path=Path(__file__).parent.parent / "prompts.yaml") - - -class ModelCoderEvolvingStrategy(EvolvingStrategy): - def implement_one_model( - self, - target_task: ModelTask, - queried_knowledge: ModelQueriedKnowledge = None, - current_exp: ModelExperiment = None, # Add this parameter - ) -> str: - model_information_str = target_task.get_task_information() - model_type = target_task.model_type - - if len(current_exp.based_experiments) == 0: - current_code = None - else: - current_code = "" - sota_exp_code_dict = current_exp.based_experiments[-1].experiment_workspace.code_dict - if target_task.version == 2: - if model_type in KG_MODEL_MAPPING: - current_code = sota_exp_code_dict.get(KG_MODEL_MAPPING[model_type], None) - elif "model.py" in sota_exp_code_dict: - current_code = sota_exp_code_dict["model.py"] - else: - current_code = None - elif target_task.version == 1: - current_code = sota_exp_code_dict.get("model.py", None) - - if queried_knowledge is not None and model_information_str in queried_knowledge.success_task_to_knowledge_dict: - return queried_knowledge.success_task_to_knowledge_dict[model_information_str].implementation - elif queried_knowledge is not None and model_information_str in queried_knowledge.failed_task_info_set: - return None - else: - queried_similar_successful_knowledge = ( - queried_knowledge.working_task_to_similar_successful_knowledge_dict[model_information_str] - if queried_knowledge is not None - else [] - ) - queried_former_failed_knowledge = ( - queried_knowledge.working_task_to_former_failed_knowledge_dict[model_information_str] - if queried_knowledge is not None - else [] - ) - - queried_former_failed_knowledge_to_render = queried_former_failed_knowledge - - system_prompt = ( - Environment(undefined=StrictUndefined) - .from_string( - coder_prompts["evolving_strategy_model_coder"]["system"], - ) - .render( - scenario=self.scen.get_scenario_all_desc(filtered_tag=target_task.model_type), - queried_former_failed_knowledge=queried_former_failed_knowledge_to_render, - current_code=current_code, - ) - ) - - queried_similar_successful_knowledge_to_render = queried_similar_successful_knowledge - for _ in range(10): # max attempt to reduce the length of user_prompt - user_prompt = ( - Environment(undefined=StrictUndefined) - .from_string( - coder_prompts["evolving_strategy_model_coder"]["user"], - ) - .render( - model_information_str=model_information_str, - queried_similar_successful_knowledge=queried_similar_successful_knowledge_to_render, - queried_former_failed_knowledge=queried_former_failed_knowledge_to_render, - ) - .strip("\n") - ) - if ( - APIBackend().build_messages_and_calculate_token( - user_prompt=user_prompt, - system_prompt=system_prompt, - ) - < LLM_SETTINGS.chat_token_limit - ): - break - elif len(queried_former_failed_knowledge_to_render) > 1: - queried_former_failed_knowledge_to_render = queried_former_failed_knowledge_to_render[1:] - elif len(queried_similar_successful_knowledge_to_render) > 1: - queried_similar_successful_knowledge_to_render = queried_similar_successful_knowledge_to_render[1:] - - code = json.loads( - APIBackend( - use_chat_cache=MODEL_IMPL_SETTINGS.coder_use_cache - ).build_messages_and_create_chat_completion( - user_prompt=user_prompt, - system_prompt=system_prompt, - json_mode=True, - ), - )["code"] - return code - - def evolve( - self, - *, - evo: ModelEvolvingItem, - queried_knowledge: ModelQueriedKnowledge | None = None, - **kwargs, - ) -> ModelEvolvingItem: - # 1.找出需要evolve的model - to_be_finished_task_index = [] - for index, target_model_task in enumerate(evo.sub_tasks): - target_model_task_desc = target_model_task.get_task_information() - if target_model_task_desc in queried_knowledge.success_task_to_knowledge_dict: - evo.sub_workspace_list[index] = queried_knowledge.success_task_to_knowledge_dict[ - target_model_task_desc - ].implementation - elif ( - target_model_task_desc not in queried_knowledge.success_task_to_knowledge_dict - and target_model_task_desc not in queried_knowledge.failed_task_info_set - ): - to_be_finished_task_index.append(index) - - result = multiprocessing_wrapper( - [ - (self.implement_one_model, (evo.sub_tasks[target_index], queried_knowledge, evo)) - for target_index in to_be_finished_task_index - ], - n=RD_AGENT_SETTINGS.multi_proc_n, - ) - - for index, target_index in enumerate(to_be_finished_task_index): - evo.sub_workspace_list[target_index] = ModelFBWorkspace(target_task=evo.sub_tasks[target_index]) - evo.sub_workspace_list[target_index].inject_code(**{"model.py": result[index]}) - - evo.corresponding_selection = to_be_finished_task_index - - return evo diff --git a/rdagent/components/coder/model_coder/CoSTEER/knowledge_management.py b/rdagent/components/coder/model_coder/CoSTEER/knowledge_management.py deleted file mode 100644 index a2647b58..00000000 --- a/rdagent/components/coder/model_coder/CoSTEER/knowledge_management.py +++ /dev/null @@ -1,171 +0,0 @@ -from pathlib import Path - -from rdagent.components.coder.model_coder.conf import MODEL_IMPL_SETTINGS -from rdagent.components.coder.model_coder.CoSTEER.evaluators import ModelCoderFeedback -from rdagent.components.coder.model_coder.model import ModelTask -from rdagent.core.evolving_framework import ( - EvolvableSubjects, - EvolvingKnowledgeBase, - EvoStep, - Knowledge, - QueriedKnowledge, - RAGStrategy, -) -from rdagent.core.experiment import Workspace -from rdagent.oai.llm_utils import calculate_embedding_distance_between_str_list - - -class ModelKnowledge(Knowledge): - def __init__( - self, - target_task: ModelTask, - implementation: Workspace, - feedback: ModelCoderFeedback, - ) -> None: - """ - Initialize a ModelKnowledge object. The ModelKnowledge object is used to store a model implementation without the ground truth code and value. - - Args: - model (Model): The model object associated with the KnowledgeManagement. - - Returns: - None - """ - self.target_task = target_task - self.implementation = implementation.copy() - self.feedback = feedback - - def get_implementation_and_feedback_str(self) -> str: - return f"""------------------Model implementation code:------------------ -{self.implementation.code} -------------------Model implementation feedback:------------------ -{self.feedback!s} -""" - - -class ModelQueriedKnowledge(QueriedKnowledge): - def __init__(self, success_task_to_knowledge_dict: dict = {}, failed_task_info_set: set = set()) -> None: - self.success_task_to_knowledge_dict = success_task_to_knowledge_dict - self.failed_task_info_set = failed_task_info_set - self.working_task_to_former_failed_knowledge_dict = dict() - self.working_task_to_similar_successful_knowledge_dict = dict() - - -class ModelKnowledgeBase(EvolvingKnowledgeBase): - def __init__(self, path: str | Path = None) -> None: - self.implementation_trace: dict[str, ModelKnowledge] = dict() - self.success_task_info_set: set[str] = set() - - self.task_to_embedding = dict() - - super().__init__(path) - - def query(self) -> QueriedKnowledge | None: - """ - Query the knowledge base to get the queried knowledge. So far is handled in RAG strategy. - """ - raise NotImplementedError - - -class ModelRAGStrategy(RAGStrategy): - def __init__(self, knowledgebase: ModelKnowledgeBase) -> None: - super().__init__(knowledgebase) - self.current_generated_trace_count = 0 - - def generate_knowledge( - self, - evolving_trace: list[EvoStep], - *, - return_knowledge: bool = False, - ) -> Knowledge | None: - if len(evolving_trace) == self.current_generated_trace_count: - return - else: - for trace_index in range( - self.current_generated_trace_count, - len(evolving_trace), - ): - evo_step = evolving_trace[trace_index] - implementations = evo_step.evolvable_subjects - feedback = evo_step.feedback - for task_index in range(len(implementations.sub_tasks)): - target_task = implementations.sub_tasks[task_index] - target_task_information = target_task.get_task_information() - implementation = implementations.sub_workspace_list[task_index] - single_feedback = feedback[task_index] - if single_feedback is None: - continue - single_knowledge = ModelKnowledge( - target_task=target_task, - implementation=implementation, - feedback=single_feedback, - ) - if target_task_information not in self.knowledgebase.success_task_info_set: - self.knowledgebase.implementation_trace.setdefault( - target_task_information, - [], - ).append(single_knowledge) - - if single_feedback.final_decision == True: - self.knowledgebase.success_task_info_set.add( - target_task_information, - ) - self.current_generated_trace_count = len(evolving_trace) - - def query( - self, - evo: EvolvableSubjects, - evolving_trace: list[EvoStep], - ) -> QueriedKnowledge | None: - query_former_trace_limit = MODEL_IMPL_SETTINGS.query_former_trace_limit - query_similar_success_limit = MODEL_IMPL_SETTINGS.query_similar_success_limit - fail_task_trial_limit = MODEL_IMPL_SETTINGS.fail_task_trial_limit - - queried_knowledge = ModelQueriedKnowledge() - for target_model_task in evo.sub_tasks: - target_model_task_information = target_model_task.get_task_information() - if target_model_task_information in self.knowledgebase.success_task_info_set: - queried_knowledge.success_task_to_knowledge_dict[target_model_task_information] = ( - self.knowledgebase.implementation_trace[target_model_task_information][-1] - ) - elif ( - len( - self.knowledgebase.implementation_trace.setdefault( - target_model_task_information, - [], - ), - ) - >= fail_task_trial_limit - ): - queried_knowledge.failed_task_info_set.add(target_model_task_information) - else: - queried_knowledge.working_task_to_former_failed_knowledge_dict[target_model_task_information] = ( - self.knowledgebase.implementation_trace.setdefault( - target_model_task_information, - [], - )[-query_former_trace_limit:] - ) - - knowledge_base_success_task_list = list( - self.knowledgebase.success_task_info_set, - ) - similarity = calculate_embedding_distance_between_str_list( - [target_model_task_information], - knowledge_base_success_task_list, - )[0] - similar_indexes = sorted( - range(len(similarity)), - key=lambda i: similarity[i], - reverse=True, - )[:query_similar_success_limit] - similar_successful_knowledge = [ - self.knowledgebase.implementation_trace.setdefault( - knowledge_base_success_task_list[index], - [], - )[-1] - for index in similar_indexes - ] - queried_knowledge.working_task_to_similar_successful_knowledge_dict[target_model_task_information] = ( - similar_successful_knowledge - ) - return queried_knowledge diff --git a/rdagent/components/coder/model_coder/__init__.py b/rdagent/components/coder/model_coder/__init__.py new file mode 100644 index 00000000..32bec9fd --- /dev/null +++ b/rdagent/components/coder/model_coder/__init__.py @@ -0,0 +1,21 @@ +from rdagent.components.coder.CoSTEER import CoSTEER +from rdagent.components.coder.CoSTEER.config import CoSTEER_SETTINGS +from rdagent.components.coder.CoSTEER.evaluators import CoSTEERMultiEvaluator +from rdagent.components.coder.model_coder.evaluators import ModelCoSTEEREvaluator +from rdagent.components.coder.model_coder.evolving_strategy import ( + ModelMultiProcessEvolvingStrategy, +) +from rdagent.core.scenario import Scenario + + +class ModelCoSTEER(CoSTEER): + def __init__( + self, + scen: Scenario, + *args, + **kwargs, + ) -> None: + eva = CoSTEERMultiEvaluator(ModelCoSTEEREvaluator(scen=scen), scen=scen) + es = ModelMultiProcessEvolvingStrategy(scen=scen, settings=CoSTEER_SETTINGS) + + super().__init__(*args, settings=CoSTEER_SETTINGS, eva=eva, es=es, evolving_version=2, scen=scen, **kwargs) diff --git a/rdagent/components/coder/model_coder/conf.py b/rdagent/components/coder/model_coder/conf.py deleted file mode 100644 index c600c699..00000000 --- a/rdagent/components/coder/model_coder/conf.py +++ /dev/null @@ -1,23 +0,0 @@ -from pathlib import Path -from typing import Union - -from pydantic_settings import BaseSettings - - -class ModelImplSettings(BaseSettings): - class Config: - env_prefix = "MODEL_CODER_" # Use MODEL_CODER_ as prefix for environment variables - - coder_use_cache: bool = False - - knowledge_base_path: Union[str, None] = None - new_knowledge_base_path: Union[str, None] = None - - max_loop: int = 10 - - query_former_trace_limit: int = 5 - query_similar_success_limit: int = 5 - fail_task_trial_limit: int = 20 - - -MODEL_IMPL_SETTINGS = ModelImplSettings() diff --git a/rdagent/components/coder/model_coder/CoSTEER/evaluators.py b/rdagent/components/coder/model_coder/eva_utils.py similarity index 50% rename from rdagent/components/coder/model_coder/CoSTEER/evaluators.py rename to rdagent/components/coder/model_coder/eva_utils.py index 3634f807..8b2869d8 100644 --- a/rdagent/components/coder/model_coder/CoSTEER/evaluators.py +++ b/rdagent/components/coder/model_coder/eva_utils.py @@ -1,27 +1,18 @@ import json -import random from pathlib import Path -from typing import List, Tuple +from typing import Tuple import numpy as np from jinja2 import Environment, StrictUndefined -from rdagent.components.coder.model_coder.conf import MODEL_IMPL_SETTINGS -from rdagent.components.coder.model_coder.CoSTEER.evolvable_subjects import ( - ModelEvolvingItem, -) from rdagent.components.coder.model_coder.model import ModelFBWorkspace, ModelTask -from rdagent.core.conf import RD_AGENT_SETTINGS from rdagent.core.evaluation import Evaluator -from rdagent.core.evolving_framework import QueriedKnowledge from rdagent.core.experiment import Task, Workspace from rdagent.core.prompts import Prompts -from rdagent.core.utils import multiprocessing_wrapper -from rdagent.log import rdagent_logger as logger from rdagent.oai.llm_conf import LLM_SETTINGS from rdagent.oai.llm_utils import APIBackend -evaluate_prompts = Prompts(file_path=Path(__file__).parent.parent / "prompts.yaml") +evaluate_prompts = Prompts(file_path=Path(__file__).parent / "prompts.yaml") def shape_evaluator(prediction: np.ndarray, target_shape: Tuple = None) -> Tuple[str, bool]: @@ -193,154 +184,3 @@ def evaluate( final_evaluation_dict["final_feedback"], final_evaluation_dict["final_decision"], ) - - -class ModelCoderFeedback: - """This feedback includes all the content to the model coder""" - - def __init__( - self, - execution_feedback: str, - shape_feedback: str, - value_feedback: str, - code_feedback: str, - final_feedback: str, - final_decision: bool, - ): - self.execution_feedback: str = execution_feedback - self.shape_feedback: str = shape_feedback - self.value_feedback: str = value_feedback - self.code_feedback: str = code_feedback - self.final_feedback: str = final_feedback - self.final_decision: str = final_decision - - def __str__(self) -> str: - return f"""------------------Model Execution Feedback------------------ -{self.execution_feedback} -------------------Model Shape Feedback------------------ -{self.shape_feedback} -------------------Model Value Feedback------------------ -{self.value_feedback} -------------------Model Code Feedback------------------ -{self.code_feedback} -------------------Model Final Feedback------------------ -{self.final_feedback} -------------------Model Final Decision------------------ -This implementation is {'SUCCESS' if self.final_decision else 'FAIL'}. -""" - - -class ModelCoderEvaluator(Evaluator): - def evaluate( - self, - target_task: Task, - implementation: Workspace, - gt_implementation: Workspace, - queried_knowledge: QueriedKnowledge = None, - **kwargs, - ) -> ModelCoderFeedback: - target_task_information = target_task.get_task_information() - if ( - queried_knowledge is not None - and target_task_information in queried_knowledge.success_task_to_knowledge_dict - ): - return queried_knowledge.success_task_to_knowledge_dict[target_task_information].feedback - elif queried_knowledge is not None and target_task_information in queried_knowledge.failed_task_info_set: - return ModelCoderFeedback( - execution_feedback="This task has failed too many times, skip implementation.", - shape_feedback="This task has failed too many times, skip implementation.", - value_feedback="This task has failed too many times, skip implementation.", - code_feedback="This task has failed too many times, skip implementation.", - final_feedback="This task has failed too many times, skip implementation.", - final_decision=False, - ) - assert isinstance(target_task, ModelTask) - - # NOTE: Use fixed input to test the model to avoid randomness - batch_size = 8 - num_features = 30 - num_timesteps = 40 - input_value = 0.4 - param_init_value = 0.6 - - assert isinstance(implementation, ModelFBWorkspace) - model_execution_feedback, gen_np_array = implementation.execute( - batch_size=batch_size, - num_features=num_features, - num_timesteps=num_timesteps, - input_value=input_value, - param_init_value=param_init_value, - ) - if gt_implementation is not None: - assert isinstance(gt_implementation, ModelFBWorkspace) - _, gt_np_array = gt_implementation.execute( - batch_size=batch_size, - num_features=num_features, - num_timesteps=num_timesteps, - input_value=input_value, - param_init_value=param_init_value, - ) - else: - gt_np_array = None - - shape_feedback, shape_decision = shape_evaluator( - gen_np_array, - (batch_size, self.scen.model_output_channel if hasattr(self.scen, "model_output_channel") else 1), - ) - value_feedback, value_decision = value_evaluator(gen_np_array, gt_np_array) - code_feedback, _ = ModelCodeEvaluator(scen=self.scen).evaluate( - target_task=target_task, - implementation=implementation, - gt_implementation=gt_implementation, - model_execution_feedback=model_execution_feedback, - model_value_feedback="\n".join([shape_feedback, value_feedback]), - ) - final_feedback, final_decision = ModelFinalEvaluator(scen=self.scen).evaluate( - target_task=target_task, - implementation=implementation, - gt_implementation=gt_implementation, - model_execution_feedback=model_execution_feedback, - model_value_feedback=value_feedback, - model_code_feedback=code_feedback, - ) - - return ModelCoderFeedback( - execution_feedback=model_execution_feedback, - shape_feedback=shape_feedback, - value_feedback=value_feedback, - code_feedback=code_feedback, - final_feedback=final_feedback, - final_decision=final_decision, - ) - - -class ModelCoderMultiEvaluator(Evaluator): - def evaluate( - self, - evo: ModelEvolvingItem, - queried_knowledge: QueriedKnowledge = None, - **kwargs, - ) -> List[ModelCoderFeedback]: - multi_implementation_feedback = multiprocessing_wrapper( - [ - ( - ModelCoderEvaluator(scen=self.scen).evaluate, - ( - evo.sub_tasks[index], - evo.sub_workspace_list[index], - evo.sub_gt_implementations[index] if evo.sub_gt_implementations is not None else None, - queried_knowledge, - ), - ) - for index in range(len(evo.sub_tasks)) - ], - n=RD_AGENT_SETTINGS.multi_proc_n, - ) - - final_decision = [ - None if single_feedback is None else single_feedback.final_decision - for single_feedback in multi_implementation_feedback - ] - logger.info(f"Final decisions: {final_decision} True count: {final_decision.count(True)}") - - return multi_implementation_feedback diff --git a/rdagent/components/coder/model_coder/evaluators.py b/rdagent/components/coder/model_coder/evaluators.py new file mode 100644 index 00000000..926c2e6a --- /dev/null +++ b/rdagent/components/coder/model_coder/evaluators.py @@ -0,0 +1,103 @@ +from rdagent.components.coder.CoSTEER.evaluators import ( + CoSTEEREvaluator, + CoSTEERMultiFeedback, + CoSTEERSingleFeedback, +) +from rdagent.components.coder.model_coder.eva_utils import ( + ModelCodeEvaluator, + ModelFinalEvaluator, + shape_evaluator, + value_evaluator, +) +from rdagent.components.coder.model_coder.model import ModelFBWorkspace, ModelTask +from rdagent.core.evolving_framework import QueriedKnowledge +from rdagent.core.experiment import Task, Workspace + +ModelSingleFeedback = CoSTEERSingleFeedback +ModelMultiFeedback = CoSTEERMultiFeedback + + +class ModelCoSTEEREvaluator(CoSTEEREvaluator): + def evaluate( + self, + target_task: Task, + implementation: Workspace, + gt_implementation: Workspace, + queried_knowledge: QueriedKnowledge = None, + **kwargs, + ) -> ModelSingleFeedback: + target_task_information = target_task.get_task_information() + if ( + queried_knowledge is not None + and target_task_information in queried_knowledge.success_task_to_knowledge_dict + ): + return queried_knowledge.success_task_to_knowledge_dict[target_task_information].feedback + elif queried_knowledge is not None and target_task_information in queried_knowledge.failed_task_info_set: + return ModelSingleFeedback( + execution_feedback="This task has failed too many times, skip implementation.", + shape_feedback="This task has failed too many times, skip implementation.", + value_feedback="This task has failed too many times, skip implementation.", + code_feedback="This task has failed too many times, skip implementation.", + final_feedback="This task has failed too many times, skip implementation.", + final_decision=False, + ) + assert isinstance(target_task, ModelTask) + + # NOTE: Use fixed input to test the model to avoid randomness + batch_size = 8 + num_features = 30 + num_timesteps = 40 + input_value = 0.4 + param_init_value = 0.6 + + assert isinstance(implementation, ModelFBWorkspace) + model_execution_feedback, gen_np_array = implementation.execute( + batch_size=batch_size, + num_features=num_features, + num_timesteps=num_timesteps, + input_value=input_value, + param_init_value=param_init_value, + ) + if gt_implementation is not None: + assert isinstance(gt_implementation, ModelFBWorkspace) + _, gt_np_array = gt_implementation.execute( + batch_size=batch_size, + num_features=num_features, + num_timesteps=num_timesteps, + input_value=input_value, + param_init_value=param_init_value, + ) + else: + gt_np_array = None + + shape_feedback, shape_decision = shape_evaluator( + gen_np_array, + (batch_size, self.scen.model_output_channel if hasattr(self.scen, "model_output_channel") else 1), + ) + value_feedback, value_decision = value_evaluator(gen_np_array, gt_np_array) + code_feedback, _ = ModelCodeEvaluator(scen=self.scen).evaluate( + target_task=target_task, + implementation=implementation, + gt_implementation=gt_implementation, + model_execution_feedback=model_execution_feedback, + model_value_feedback="\n".join([shape_feedback, value_feedback]), + ) + final_feedback, final_decision = ModelFinalEvaluator(scen=self.scen).evaluate( + target_task=target_task, + implementation=implementation, + gt_implementation=gt_implementation, + model_execution_feedback=model_execution_feedback, + model_value_feedback=value_feedback, + model_code_feedback=code_feedback, + ) + + return ModelSingleFeedback( + execution_feedback=model_execution_feedback, + shape_feedback=shape_feedback, + value_feedback=value_feedback, + code_feedback=code_feedback, + final_feedback=final_feedback, + final_decision=final_decision, + value_generated_flag=(gen_np_array is not None), + final_decision_based_on_gt=(gt_implementation is not None), + ) diff --git a/rdagent/components/coder/model_coder/evolving_strategy.py b/rdagent/components/coder/model_coder/evolving_strategy.py new file mode 100644 index 00000000..b980508f --- /dev/null +++ b/rdagent/components/coder/model_coder/evolving_strategy.py @@ -0,0 +1,106 @@ +import json +from pathlib import Path + +from jinja2 import Environment, StrictUndefined + +from rdagent.components.coder.CoSTEER.config import CoSTEER_SETTINGS +from rdagent.components.coder.CoSTEER.evolving_strategy import ( + MultiProcessEvolvingStrategy, +) +from rdagent.components.coder.CoSTEER.knowledge_management import ( + CoSTEERQueriedKnowledge, + CoSTEERQueriedKnowledgeV2, +) +from rdagent.components.coder.model_coder.model import ( + ModelExperiment, + ModelFBWorkspace, + ModelTask, +) +from rdagent.core.prompts import Prompts +from rdagent.oai.llm_conf import LLM_SETTINGS +from rdagent.oai.llm_utils import APIBackend + +coder_prompts = Prompts(file_path=Path(__file__).parent / "prompts.yaml") + + +class ModelMultiProcessEvolvingStrategy(MultiProcessEvolvingStrategy): + def implement_one_task( + self, + target_task: ModelTask, + queried_knowledge: CoSTEERQueriedKnowledge = None, + ) -> str: + model_information_str = target_task.get_task_information() + + queried_similar_successful_knowledge = ( + queried_knowledge.task_to_similar_task_successful_knowledge[model_information_str] + if queried_knowledge is not None + else [] + ) + queried_former_failed_knowledge = ( + queried_knowledge.task_to_former_failed_traces[model_information_str] + if queried_knowledge is not None + else [] + ) + + queried_former_failed_knowledge_to_render = ( + queried_former_failed_knowledge[0] + if isinstance(queried_knowledge, CoSTEERQueriedKnowledgeV2) + else queried_former_failed_knowledge + ) + + system_prompt = ( + Environment(undefined=StrictUndefined) + .from_string( + coder_prompts["evolving_strategy_model_coder"]["system"], + ) + .render( + scenario=self.scen.get_scenario_all_desc(filtered_tag=target_task.model_type), + queried_former_failed_knowledge=queried_former_failed_knowledge_to_render, + current_code=target_task.base_code, + ) + ) + + queried_similar_successful_knowledge_to_render = queried_similar_successful_knowledge + for _ in range(10): # max attempt to reduce the length of user_prompt + user_prompt = ( + Environment(undefined=StrictUndefined) + .from_string( + coder_prompts["evolving_strategy_model_coder"]["user"], + ) + .render( + model_information_str=model_information_str, + queried_similar_successful_knowledge=queried_similar_successful_knowledge_to_render, + queried_former_failed_knowledge=queried_former_failed_knowledge_to_render, + ) + .strip("\n") + ) + if ( + APIBackend().build_messages_and_calculate_token( + user_prompt=user_prompt, + system_prompt=system_prompt, + ) + < LLM_SETTINGS.chat_token_limit + ): + break + elif len(queried_former_failed_knowledge_to_render) > 1: + queried_former_failed_knowledge_to_render = queried_former_failed_knowledge_to_render[1:] + elif len(queried_similar_successful_knowledge_to_render) > 1: + queried_similar_successful_knowledge_to_render = queried_similar_successful_knowledge_to_render[1:] + + code = json.loads( + APIBackend(use_chat_cache=CoSTEER_SETTINGS.coder_use_cache).build_messages_and_create_chat_completion( + user_prompt=user_prompt, + system_prompt=system_prompt, + json_mode=True, + ), + )["code"] + return code + + def assign_code_list_to_evo(self, code_list, evo): + for index in range(len(evo.sub_tasks)): + if code_list[index] is None: + continue + if evo.sub_workspace_list[index] is None: + evo.sub_workspace_list[index] = ModelFBWorkspace(target_task=evo.sub_tasks[index]) + evo.sub_workspace_list[index].inject_code(**{"model.py": code_list[index]}) + return evo diff --git a/rdagent/components/coder/model_coder/model.py b/rdagent/components/coder/model_coder/model.py index 75071c6c..b5a3e3ac 100644 --- a/rdagent/components/coder/model_coder/model.py +++ b/rdagent/components/coder/model_coder/model.py @@ -4,14 +4,14 @@ from pathlib import Path from typing import Dict, Optional -from rdagent.components.coder.model_coder.conf import MODEL_IMPL_SETTINGS -from rdagent.core.experiment import Experiment, FBWorkspace, Task +from rdagent.components.coder.CoSTEER.task import CoSTEERTask +from rdagent.core.experiment import Experiment, FBWorkspace from rdagent.core.utils import cache_with_pickle from rdagent.oai.llm_utils import md5_hash from rdagent.utils.env import KGDockerEnv, QTDockerEnv -class ModelTask(Task): +class ModelTask(CoSTEERTask): def __init__( self, name: str, diff --git a/rdagent/components/coder/model_coder/prompts.yaml b/rdagent/components/coder/model_coder/prompts.yaml index 4d12007f..fa6d0212 100644 --- a/rdagent/components/coder/model_coder/prompts.yaml +++ b/rdagent/components/coder/model_coder/prompts.yaml @@ -83,7 +83,7 @@ evolving_strategy_model_coder: --------------Correct code to similar models:--------------- {% for similar_successful_knowledge in queried_similar_successful_knowledge %} =====Model {{loop.index}}:===== - {{ similar_successful_knowledge.target_task.get_model_information() }} + {{ similar_successful_knowledge.target_task.get_task_information() }} =====Code:===== {{ similar_successful_knowledge.implementation.code }} {% endfor %} diff --git a/rdagent/components/workflow/conf.py b/rdagent/components/workflow/conf.py index 77ed0698..d6e87960 100644 --- a/rdagent/components/workflow/conf.py +++ b/rdagent/components/workflow/conf.py @@ -1,16 +1,10 @@ -from pydantic_settings import BaseSettings +from rdagent.core.conf import ExtendedBaseSettings -class BasePropSetting(BaseSettings): +class BasePropSetting(ExtendedBaseSettings): """ The common part of the config for RD Loop to propose and development You can add following config in the subclass to distinguish the environment variables. - - .. code-block:: python - - class Config: - env_prefix = "DM_MODEL_" # Use MODEL_CODER_ as prefix for environment variables - protected_namespaces = () # Add 'model_' to the protected namespaces """ scen: str = "" diff --git a/rdagent/core/conf.py b/rdagent/core/conf.py index 9f57d32d..eddf5547 100644 --- a/rdagent/core/conf.py +++ b/rdagent/core/conf.py @@ -1,14 +1,57 @@ from __future__ import annotations +# TODO: use pydantic for other modules in Qlib from pathlib import Path +from typing import TYPE_CHECKING, Any -from pydantic_settings import BaseSettings +if TYPE_CHECKING: + from pydantic.fields import FieldInfo -# TODO: use pydantic for other modules in Qlib -# from pydantic_settings import BaseSettings +from pydantic_settings import ( + BaseSettings, + EnvSettingsSource, + PydanticBaseSettingsSource, + SettingsConfigDict, +) + + +class ExtendedEnvSettingsSource(EnvSettingsSource): + def get_field_value(self, field: FieldInfo, field_name: str) -> tuple[Any, str, bool]: + # Dynamically gather prefixes from the current and parent classes + prefixes = [self.config.get("env_prefix", "")] + if hasattr(self.settings_cls, "__bases__"): + for base in self.settings_cls.__bases__: + if hasattr(base, "model_config"): + parent_prefix = base.model_config.get("env_prefix") + if parent_prefix and parent_prefix not in prefixes: + prefixes.append(parent_prefix) + for prefix in prefixes: + self.env_prefix = prefix + env_val, field_key, value_is_complex = super().get_field_value(field, field_name) + if env_val is not None: + return env_val, field_key, value_is_complex + + return super().get_field_value(field, field_name) + + +class ExtendedSettingsConfigDict(SettingsConfigDict, total=False): ... + + +class ExtendedBaseSettings(BaseSettings): + + @classmethod + def settings_customise_sources( + cls, + settings_cls: type[BaseSettings], + init_settings: PydanticBaseSettingsSource, # noqa + env_settings: PydanticBaseSettingsSource, # noqa + dotenv_settings: PydanticBaseSettingsSource, # noqa + file_secret_settings: PydanticBaseSettingsSource, # noqa + ) -> tuple[PydanticBaseSettingsSource, ...]: + return (ExtendedEnvSettingsSource(settings_cls),) -class RDAgentSettings(BaseSettings): +class RDAgentSettings(ExtendedBaseSettings): # TODO: (xiao) I think LLMSetting may be a better name. # TODO: (xiao) I think most of the config should be in oai.config # Log configs diff --git a/rdagent/core/evolving_framework.py b/rdagent/core/evolving_framework.py index 46c378e9..24c7c6ae 100644 --- a/rdagent/core/evolving_framework.py +++ b/rdagent/core/evolving_framework.py @@ -80,7 +80,7 @@ class RAGStrategy(ABC): """Retrieval Augmentation Generation Strategy""" def __init__(self, knowledgebase: EvolvingKnowledgeBase) -> None: - self.knowledgebase = knowledgebase + self.knowledgebase: EvolvingKnowledgeBase = knowledgebase @abstractmethod def query( diff --git a/rdagent/core/experiment.py b/rdagent/core/experiment.py index 0cf49c90..2fda6af9 100644 --- a/rdagent/core/experiment.py +++ b/rdagent/core/experiment.py @@ -206,7 +206,7 @@ def __init__( sub_tasks: Sequence[ASpecificTask], based_experiments: Sequence[ASpecificWSForExperiment] = [], ) -> None: - self.sub_tasks = sub_tasks + self.sub_tasks: Sequence[ASpecificTask] = sub_tasks self.sub_workspace_list: list[ASpecificWSForSubTasks | None] = [None] * len(self.sub_tasks) self.based_experiments: Sequence[ASpecificWSForExperiment] = based_experiments self.result: object = None # The result of the experiment, can be different types in different scenarios. diff --git a/rdagent/core/knowledge_base.py b/rdagent/core/knowledge_base.py index ceca8ab0..cc3ba6fe 100644 --- a/rdagent/core/knowledge_base.py +++ b/rdagent/core/knowledge_base.py @@ -15,9 +15,9 @@ def load(self) -> None: with self.path.open("rb") as f: loaded = pickle.load(f) if isinstance(loaded, dict): - self.__dict__.update({k: v for k, v in loaded.items() if not k == "path"}) + self.__dict__.update({k: v for k, v in loaded.items() if k != "path"}) else: - self.__dict__.update({k: v for k, v in loaded.__dict__.items() if not k == "path"}) + self.__dict__.update({k: v for k, v in loaded.__dict__.items() if k != "path"}) def dump(self) -> None: if self.path is not None: diff --git a/rdagent/log/ui/app.py b/rdagent/log/ui/app.py index 57f13327..7e47869d 100644 --- a/rdagent/log/ui/app.py +++ b/rdagent/log/ui/app.py @@ -14,11 +14,9 @@ from streamlit import session_state as state from streamlit_theme import st_theme -from rdagent.components.coder.factor_coder.CoSTEER.evaluators import ( - FactorSingleFeedback, -) +from rdagent.components.coder.factor_coder.evaluators import FactorSingleFeedback from rdagent.components.coder.factor_coder.factor import FactorFBWorkspace, FactorTask -from rdagent.components.coder.model_coder.CoSTEER.evaluators import ModelCoderFeedback +from rdagent.components.coder.model_coder.evaluators import ModelSingleFeedback from rdagent.components.coder.model_coder.model import ModelFBWorkspace, ModelTask from rdagent.core.proposal import Hypothesis, HypothesisFeedback from rdagent.core.scenario import Scenario @@ -256,7 +254,7 @@ def refresh(same_trace: bool = False): state.times = defaultdict(lambda: defaultdict(list)) -def evolving_feedback_window(wsf: FactorSingleFeedback | ModelCoderFeedback): +def evolving_feedback_window(wsf: FactorSingleFeedback | ModelSingleFeedback): if isinstance(wsf, FactorSingleFeedback): ffc, efc, cfc, vfc = st.tabs( ["**Final Feedback🏁**", "Execution Feedback🖥️", "Code Feedback📄", "Value Feedback🔢"] @@ -268,8 +266,8 @@ def evolving_feedback_window(wsf: FactorSingleFeedback | ModelCoderFeedback): with cfc: st.markdown(wsf.code_feedback) with vfc: - st.markdown(wsf.factor_value_feedback) - elif isinstance(wsf, ModelCoderFeedback): + st.markdown(wsf.value_feedback) + elif isinstance(wsf, ModelSingleFeedback): ffc, efc, cfc, msfc, vfc = st.tabs( [ "**Final Feedback🏁**", diff --git a/rdagent/log/ui/web.py b/rdagent/log/ui/web.py index c4bbe5ee..eb4862b4 100644 --- a/rdagent/log/ui/web.py +++ b/rdagent/log/ui/web.py @@ -9,11 +9,9 @@ import streamlit as st from streamlit.delta_generator import DeltaGenerator -from rdagent.components.coder.factor_coder.CoSTEER.evaluators import ( - FactorSingleFeedback, -) +from rdagent.components.coder.factor_coder.evaluators import FactorSingleFeedback from rdagent.components.coder.factor_coder.factor import FactorFBWorkspace, FactorTask -from rdagent.components.coder.model_coder.CoSTEER.evaluators import ModelCoderFeedback +from rdagent.components.coder.model_coder.evaluators import ModelSingleFeedback from rdagent.components.coder.model_coder.model import ModelFBWorkspace, ModelTask from rdagent.core.proposal import Hypothesis, HypothesisFeedback, Trace from rdagent.log.base import Message, Storage, View @@ -233,7 +231,7 @@ def consume_msg(self, msg: Message | FactorSingleFeedback): ### :blue[Factor Code Feedback] {fb.code_feedback} ### :blue[Factor Value Feedback] -{fb.factor_value_feedback} +{fb.value_feedback} ### :blue[Factor Final Feedback] {fb.final_feedback} ### :blue[Factor Final Decision] @@ -243,8 +241,8 @@ def consume_msg(self, msg: Message | FactorSingleFeedback): class ModelFeedbackWindow(StWindow): - def consume_msg(self, msg: Message | ModelCoderFeedback): - mb: ModelCoderFeedback = msg.content if isinstance(msg, Message) else msg + def consume_msg(self, msg: Message | ModelSingleFeedback): + mb: ModelSingleFeedback = msg.content if isinstance(msg, Message) else msg self.container.markdown( f"""### :blue[Model Execution Feedback] @@ -425,7 +423,7 @@ def consume_msg(self, msg: Message): inner_class=FactorFeedbackWindow, tab_names=self.evolving_tasks, ) - elif isinstance(msg.content[0], ModelCoderFeedback): + elif isinstance(msg.content[0], ModelSingleFeedback): self.current_win = ObjectsTabsWindow( self.container.expander("Model Feedbacks"), inner_class=ModelFeedbackWindow, @@ -519,7 +517,7 @@ def consume_msg(self, msg: Message): ObjectsTabsWindow( self.container.container(), inner_class=FactorFeedbackWindow, tab_names=self.evolving_tasks ).consume_msg(msg) - elif isinstance(msg.content[0], ModelCoderFeedback): + elif isinstance(msg.content[0], ModelSingleFeedback): self.container.markdown("**Model Feedbacks🔍**") ObjectsTabsWindow( self.container.container(), inner_class=ModelFeedbackWindow, tab_names=self.evolving_tasks diff --git a/rdagent/oai/llm_conf.py b/rdagent/oai/llm_conf.py index 500d05b8..15bce4ed 100644 --- a/rdagent/oai/llm_conf.py +++ b/rdagent/oai/llm_conf.py @@ -2,10 +2,10 @@ from pathlib import Path -from pydantic_settings import BaseSettings +from rdagent.core.conf import ExtendedBaseSettings -class LLMSettings(BaseSettings): +class LLMSettings(ExtendedBaseSettings): log_llm_chat_content: bool = True use_azure: bool = False diff --git a/rdagent/scenarios/data_mining/developer/model_coder.py b/rdagent/scenarios/data_mining/developer/model_coder.py index 95e9a2ca..1011a968 100644 --- a/rdagent/scenarios/data_mining/developer/model_coder.py +++ b/rdagent/scenarios/data_mining/developer/model_coder.py @@ -1,3 +1,3 @@ -from rdagent.components.coder.model_coder.CoSTEER import ModelCoSTEER +from rdagent.components.coder.model_coder import ModelCoSTEER DMModelCoSTEER = ModelCoSTEER diff --git a/rdagent/scenarios/kaggle/developer/coder.py b/rdagent/scenarios/kaggle/developer/coder.py index 3c02f552..71c390bb 100644 --- a/rdagent/scenarios/kaggle/developer/coder.py +++ b/rdagent/scenarios/kaggle/developer/coder.py @@ -3,8 +3,8 @@ from jinja2 import Environment, StrictUndefined -from rdagent.components.coder.factor_coder.CoSTEER import FactorCoSTEER -from rdagent.components.coder.model_coder.CoSTEER import ModelCoSTEER +from rdagent.components.coder.factor_coder import FactorCoSTEER +from rdagent.components.coder.model_coder import ModelCoSTEER from rdagent.core.developer import Developer from rdagent.core.prompts import Prompts from rdagent.oai.llm_utils import APIBackend diff --git a/rdagent/scenarios/kaggle/developer/runner.py b/rdagent/scenarios/kaggle/developer/runner.py index b7cc4815..51890086 100644 --- a/rdagent/scenarios/kaggle/developer/runner.py +++ b/rdagent/scenarios/kaggle/developer/runner.py @@ -78,6 +78,8 @@ def develop(self, exp: KGModelExperiment) -> KGModelExperiment: else: model_file_name = f"model/model_{model_type.lower()}.py" exp.experiment_workspace.inject_code(**{model_file_name: sub_ws.code_dict["model.py"]}) + else: + raise ModelEmptyError("No model is implemented.") env_to_use = {"PYTHONPATH": "./"} result = exp.experiment_workspace.execute(run_env=env_to_use) diff --git a/rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/fea_share_preprocess.py b/rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/fea_share_preprocess.py new file mode 100644 index 00000000..edf96a4a --- /dev/null +++ b/rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/fea_share_preprocess.py @@ -0,0 +1,109 @@ +import os + +import pandas as pd +from sklearn.compose import ColumnTransformer +from sklearn.impute import SimpleImputer +from sklearn.model_selection import train_test_split +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import LabelEncoder + + +def prepreprocess(): + """ + This method loads the data, drops the unnecessary columns, and splits it into train and validation sets. + """ + # Load and preprocess the data + data_df = pd.read_csv("/kaggle/input/train.csv") + data_df = data_df.drop(["PassengerId"], axis=1) + + X = data_df.drop(["Transported"], axis=1) + y = data_df["Transported"] + + label_encoder = LabelEncoder() + y = label_encoder.fit_transform(y) # Convert class labels to numeric + + # Split the data into training and validation sets + X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.10, random_state=42) + + return X_train, X_valid, y_train, y_valid + + +def preprocess_fit(X_train: pd.DataFrame): + """ + Fits the preprocessor on the training data and returns the fitted preprocessor. + """ + # Identify numerical and categorical features + numerical_cols = [cname for cname in X_train.columns if X_train[cname].dtype in ["int64", "float64"]] + categorical_cols = [cname for cname in X_train.columns if X_train[cname].dtype == "object"] + + # Define preprocessors for numerical and categorical features + label_encoders = {col: LabelEncoder().fit(X_train[col]) for col in categorical_cols} + + numerical_transformer = Pipeline(steps=[("imputer", SimpleImputer(strategy="mean"))]) + + # Combine preprocessing steps + preprocessor = ColumnTransformer( + transformers=[ + ("num", numerical_transformer, numerical_cols), + ], + remainder="passthrough", + ) + + # Fit the preprocessor on the training data + preprocessor.fit(X_train) + + return preprocessor, label_encoders + + +def preprocess_transform(X: pd.DataFrame, preprocessor, label_encoders): + """ + Transforms the given DataFrame using the fitted preprocessor. + Ensures the processed data has consistent features across train, validation, and test sets. + """ + # Encode categorical features + for col, le in label_encoders.items(): + # Handle unseen labels by setting them to a default value (e.g., -1) + X[col] = X[col].apply(lambda x: le.transform([x])[0] if x in le.classes_ else -1) + + # Transform the data using the fitted preprocessor + X_array = preprocessor.transform(X) + + # Convert arrays back to DataFrames + X_transformed = pd.DataFrame(X_array, columns=X.columns, index=X.index) + + return X_transformed + + +def preprocess_script(): + """ + This method applies the preprocessing steps to the training, validation, and test datasets. + """ + if os.path.exists("/kaggle/input/X_train.pkl"): + X_train = pd.read_pickle("/kaggle/input/X_train.pkl") + X_valid = pd.read_pickle("/kaggle/input/X_valid.pkl") + y_train = pd.read_pickle("/kaggle/input/y_train.pkl") + y_valid = pd.read_pickle("/kaggle/input/y_valid.pkl") + X_test = pd.read_pickle("/kaggle/input/X_test.pkl") + others = pd.read_pickle("/kaggle/input/others.pkl") + y_train = pd.Series(y_train).reset_index(drop=True) + y_valid = pd.Series(y_valid).reset_index(drop=True) + + return X_train, X_valid, y_train, y_valid, X_test, *others + X_train, X_valid, y_train, y_valid = prepreprocess() + y_train = pd.Series(y_train).reset_index(drop=True) + y_valid = pd.Series(y_valid).reset_index(drop=True) + + # Fit the preprocessor on the training data + preprocessor, label_encoders = preprocess_fit(X_train) + + # Preprocess the train, validation, and test data + X_train = preprocess_transform(X_train, preprocessor, label_encoders) + X_valid = preprocess_transform(X_valid, preprocessor, label_encoders) + + # Load and preprocess the test data + submission_df = pd.read_csv("/kaggle/input/test.csv") + passenger_ids = submission_df["PassengerId"] + submission_df = submission_df.drop(["PassengerId"], axis=1) + X_test = preprocess_transform(submission_df, preprocessor, label_encoders) + + return X_train, X_valid, y_train, y_valid, X_test, passenger_ids diff --git a/rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/feature/feature.py b/rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/feature/feature.py new file mode 100644 index 00000000..8ae043ac --- /dev/null +++ b/rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/feature/feature.py @@ -0,0 +1,23 @@ +import pandas as pd + +""" +Here is the feature engineering code for each task, with a class that has a fit and transform method. +Remember +""" + + +class IdentityFeature: + def fit(self, train_df: pd.DataFrame): + """ + Fit the feature engineering model to the training data. + """ + pass + + def transform(self, X: pd.DataFrame): + """ + Transform the input data. + """ + return X + + +feature_engineering_cls = IdentityFeature diff --git a/rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/model/model_randomforest.py b/rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/model/model_randomforest.py new file mode 100644 index 00000000..f0bca61c --- /dev/null +++ b/rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/model/model_randomforest.py @@ -0,0 +1,34 @@ +""" +Motivation of the model: +The Random Forest model is chosen for its robustness and ability to handle large datasets with higher dimensionality. +It reduces overfitting by averaging multiple decision trees and typically performs well out of the box, making it a good +baseline model for many classification tasks. +""" + +import pandas as pd +from sklearn.ensemble import RandomForestClassifier +from sklearn.metrics import accuracy_score + + +def fit(X_train: pd.DataFrame, y_train: pd.Series, X_valid: pd.DataFrame, y_valid: pd.Series): + """ + Define and train the Random Forest model. Merge feature selection into the pipeline. + """ + # Initialize the Random Forest model + model = RandomForestClassifier(n_estimators=100, random_state=32, n_jobs=-1) + + # Fit the model + model.fit(X_train, y_train) + + return model + + +def predict(model, X): + """ + Keep feature selection's consistency and make predictions. + """ + # Predict using the trained model + y_pred_prob = model.predict_proba(X)[:, 1] + + # Apply threshold to get boolean predictions + return y_pred_prob.reshape(-1, 1) diff --git a/rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/model/model_xgboost.py b/rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/model/model_xgboost.py new file mode 100644 index 00000000..83d82afb --- /dev/null +++ b/rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/model/model_xgboost.py @@ -0,0 +1,33 @@ +""" +motivation of the model +""" + +import pandas as pd +import xgboost as xgb + + +def fit(X_train: pd.DataFrame, y_train: pd.DataFrame, X_valid: pd.DataFrame, y_valid: pd.DataFrame): + """Define and train the model. Merge feature_select""" + dtrain = xgb.DMatrix(X_train, label=y_train) + dvalid = xgb.DMatrix(X_valid, label=y_valid) + + params = { + "nthread": -1, + "tree_method": "gpu_hist", + "device": "cuda", + } + num_round = 100 + + evallist = [(dtrain, "train"), (dvalid, "eval")] + bst = xgb.train(params, dtrain, num_round, evallist) + + return bst + + +def predict(model, X): + """ + Keep feature select's consistency. + """ + dtest = xgb.DMatrix(X) + y_pred_prob = model.predict(dtest) + return y_pred_prob.reshape(-1, 1) diff --git a/rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/model/select_lightgbm.py b/rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/model/select_lightgbm.py new file mode 100644 index 00000000..f230f130 --- /dev/null +++ b/rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/model/select_lightgbm.py @@ -0,0 +1,12 @@ +import pandas as pd + + +def select(X: pd.DataFrame) -> pd.DataFrame: + """ + Select relevant features. To be used in fit & predict function. + """ + # For now, we assume all features are relevant. This can be expanded to feature selection logic. + if X.columns.nlevels == 1: + return X + X.columns = ["_".join(str(i) for i in col).strip() for col in X.columns.values] + return X diff --git a/rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/model/select_nn.py b/rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/model/select_nn.py new file mode 100644 index 00000000..f230f130 --- /dev/null +++ b/rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/model/select_nn.py @@ -0,0 +1,12 @@ +import pandas as pd + + +def select(X: pd.DataFrame) -> pd.DataFrame: + """ + Select relevant features. To be used in fit & predict function. + """ + # For now, we assume all features are relevant. This can be expanded to feature selection logic. + if X.columns.nlevels == 1: + return X + X.columns = ["_".join(str(i) for i in col).strip() for col in X.columns.values] + return X diff --git a/rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/model/select_randomforest.py b/rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/model/select_randomforest.py new file mode 100644 index 00000000..f230f130 --- /dev/null +++ b/rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/model/select_randomforest.py @@ -0,0 +1,12 @@ +import pandas as pd + + +def select(X: pd.DataFrame) -> pd.DataFrame: + """ + Select relevant features. To be used in fit & predict function. + """ + # For now, we assume all features are relevant. This can be expanded to feature selection logic. + if X.columns.nlevels == 1: + return X + X.columns = ["_".join(str(i) for i in col).strip() for col in X.columns.values] + return X diff --git a/rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/model/select_xgboost.py b/rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/model/select_xgboost.py new file mode 100644 index 00000000..f230f130 --- /dev/null +++ b/rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/model/select_xgboost.py @@ -0,0 +1,12 @@ +import pandas as pd + + +def select(X: pd.DataFrame) -> pd.DataFrame: + """ + Select relevant features. To be used in fit & predict function. + """ + # For now, we assume all features are relevant. This can be expanded to feature selection logic. + if X.columns.nlevels == 1: + return X + X.columns = ["_".join(str(i) for i in col).strip() for col in X.columns.values] + return X diff --git a/rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/train.py b/rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/train.py new file mode 100644 index 00000000..6f696e25 --- /dev/null +++ b/rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/train.py @@ -0,0 +1,141 @@ +import importlib.util +import random +from pathlib import Path + +import numpy as np +import pandas as pd +from fea_share_preprocess import preprocess_script +from sklearn.metrics import accuracy_score + +# Set random seed for reproducibility +SEED = 42 +random.seed(SEED) +np.random.seed(SEED) +DIRNAME = Path(__file__).absolute().resolve().parent + + +# support various method for metrics calculation +def compute_metrics_for_classification(y_true, y_pred): + """Compute accuracy metric for classification.""" + accuracy = accuracy_score(y_true, y_pred) + return accuracy + + +def import_module_from_path(module_name, module_path): + spec = importlib.util.spec_from_file_location(module_name, module_path) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +# 1) Preprocess the data +X_train, X_valid, y_train, y_valid, X_test, passenger_ids = preprocess_script() + +# 2) Auto feature engineering +X_train_l, X_valid_l = [], [] +X_test_l = [] + +for f in DIRNAME.glob("feature/feat*.py"): + cls = import_module_from_path(f.stem, f).feature_engineering_cls() + cls.fit(X_train) + X_train_f = cls.transform(X_train.copy()) + X_valid_f = cls.transform(X_valid.copy()) + X_test_f = cls.transform(X_test.copy()) + + if X_train_f.shape[-1] == X_valid_f.shape[-1] and X_train_f.shape[-1] == X_test_f.shape[-1]: + X_train_l.append(X_train_f) + X_valid_l.append(X_valid_f) + X_test_l.append(X_test_f) + +X_train = pd.concat(X_train_l, axis=1, keys=[f"feature_{i}" for i in range(len(X_train_l))]) +X_valid = pd.concat(X_valid_l, axis=1, keys=[f"feature_{i}" for i in range(len(X_valid_l))]) +X_test = pd.concat(X_test_l, axis=1, keys=[f"feature_{i}" for i in range(len(X_test_l))]) + +print(X_train.shape, X_valid.shape, X_test.shape) + +# Handle inf and -inf values +X_train.replace([np.inf, -np.inf], np.nan, inplace=True) +X_valid.replace([np.inf, -np.inf], np.nan, inplace=True) +X_test.replace([np.inf, -np.inf], np.nan, inplace=True) + +from sklearn.impute import SimpleImputer + +imputer = SimpleImputer(strategy="mean") + +X_train = pd.DataFrame(imputer.fit_transform(X_train), columns=X_train.columns) +X_valid = pd.DataFrame(imputer.transform(X_valid), columns=X_valid.columns) +X_test = pd.DataFrame(imputer.transform(X_test), columns=X_test.columns) + +# Remove duplicate columns +X_train = X_train.loc[:, ~X_train.columns.duplicated()] +X_valid = X_valid.loc[:, ~X_valid.columns.duplicated()] +X_test = X_test.loc[:, ~X_test.columns.duplicated()] + + +# 3) Train the model +model_l = [] # list[tuple[model, predict_func,]] +for f in DIRNAME.glob("model/model*.py"): + select_python_path = f.with_name(f.stem.replace("model", "select") + f.suffix) + select_m = import_module_from_path(select_python_path.stem, select_python_path) + X_train_selected = select_m.select(X_train.copy()) + X_valid_selected = select_m.select(X_valid.copy()) + + m = import_module_from_path(f.stem, f) + model_l.append((m.fit(X_train_selected, y_train, X_valid_selected, y_valid), m.predict, select_m)) + +# 4) Evaluate the model on the validation set +# metrics_all = [] +# for model, predict_func, select_m in model_l: +# X_valid_selected = select_m.select(X_valid.copy()) +# y_valid_pred = predict_func(model, X_valid_selected) +# y_valid_pred = (y_valid_pred > 0.5).astype(int) +# metrics = compute_metrics_for_classification(y_valid, y_valid_pred) +# print(f"Accuracy on valid set: {metrics}") +# metrics_all.append(metrics) + +# 4) Use grid search to find the best ensemble model +valid_pred_list = [] +for model, predict_func, select_m in model_l: + X_valid_selected = select_m.select(X_valid.copy()) + y_valid_pred = predict_func(model, X_valid_selected) + valid_pred_list.append(y_valid_pred) + +metrics_all = [] +weight_list = [] +searched_set = set() +for i in range(1000): + weight = np.random.randint(0, high=10, size=(len(valid_pred_list),), dtype="i") + if str(weight.tolist()) in searched_set or weight.sum() == 0: + continue + weight = weight / weight.sum() + searched_set.add(str(weight.tolist())) + y_valid_pred = np.zeros_like(valid_pred_list[0]) + for j in range(len(valid_pred_list)): + y_valid_pred += valid_pred_list[j] * weight[j] + y_valid_pred = (y_valid_pred > 0.5).astype(int) + metrics = compute_metrics_for_classification(y_valid, y_valid_pred) + metrics_all.append(metrics) + weight_list.append(weight) + + +# 5) Save the validation accuracy +max_index = np.argmax(metrics_all) +pd.Series(data=[metrics_all[max_index]], index=["MCC"]).to_csv("submission_score.csv") +print(f"Accuracy on valid set: {metrics_all[max_index]}") + +# 6) Make predictions on the test set and save them +test_pred_list = [] +for model, predict_func, select_m in model_l: + X_test_selected = select_m.select(X_test.copy()) + y_test_pred = predict_func(model, X_test_selected) + test_pred_list.append(y_test_pred) +y_test_pred = np.zeros_like(test_pred_list[0]) +for j in range(len(test_pred_list)): + y_test_pred += test_pred_list[j] * weight_list[max_index][j] +y_test_pred = (y_test_pred > 0.5).astype(bool) +y_test_pred = y_test_pred.ravel() + +submission_result = pd.DataFrame({"PassengerId": passenger_ids, "Transported": y_test_pred}) + +# 8) Submit predictions for the test set +submission_result.to_csv("submission.csv", index=False) diff --git a/rdagent/scenarios/kaggle/proposal/proposal.py b/rdagent/scenarios/kaggle/proposal/proposal.py index 1628c77b..273860bb 100644 --- a/rdagent/scenarios/kaggle/proposal/proposal.py +++ b/rdagent/scenarios/kaggle/proposal/proposal.py @@ -17,6 +17,7 @@ from rdagent.core.prompts import Prompts from rdagent.core.proposal import Hypothesis, Scenario, Trace from rdagent.scenarios.kaggle.experiment.kaggle_experiment import ( + KG_MODEL_MAPPING, KG_SELECT_MAPPING, KGFactorExperiment, KGModelExperiment, @@ -397,22 +398,29 @@ def convert_model_experiment(self, response: str, trace: Trace) -> KGModelExperi f"Invalid model type '{model_type}'. Allowed model types are: {', '.join(KG_SELECT_MAPPING)}." ) + based_experiments = [KGModelExperiment(sub_tasks=[], source_feature_size=trace.scen.input_shape[-1])] + [ + t[1] for t in trace.hist if t[2] + ] + model_type = response_dict.get("model_type", "Model type not provided") + if model_type in KG_MODEL_MAPPING: + base_code = based_experiments[-1].experiment_workspace.code_dict.get(KG_MODEL_MAPPING[model_type], None) + else: + base_code = None + tasks.append( ModelTask( name=response_dict.get("model_name", "Model name not provided"), description=response_dict.get("description", "Description not provided"), architecture=response_dict.get("architecture", "Architecture not provided"), hyperparameters=response_dict.get("hyperparameters", "Hyperparameters not provided"), - model_type=response_dict.get("model_type", "Model type not provided"), + model_type=model_type, version=2, + base_code=base_code, ) ) exp = KGModelExperiment( sub_tasks=tasks, - based_experiments=( - [KGModelExperiment(sub_tasks=[], source_feature_size=trace.scen.input_shape[-1])] - + [t[1] for t in trace.hist if t[2]] - ), + based_experiments=based_experiments, ) return exp diff --git a/rdagent/scenarios/qlib/developer/factor_coder.py b/rdagent/scenarios/qlib/developer/factor_coder.py index 0facb840..04ee81c5 100644 --- a/rdagent/scenarios/qlib/developer/factor_coder.py +++ b/rdagent/scenarios/qlib/developer/factor_coder.py @@ -1,4 +1,3 @@ -from rdagent.components.coder.factor_coder.CoSTEER import FactorCoSTEER +from rdagent.components.coder.factor_coder import FactorCoSTEER QlibFactorCoSTEER = FactorCoSTEER -# TODO: This is a placeholder. We need to split the scenario part of the task implementation into this folder diff --git a/rdagent/scenarios/qlib/developer/model_coder.py b/rdagent/scenarios/qlib/developer/model_coder.py index bfe80a94..0f305587 100644 --- a/rdagent/scenarios/qlib/developer/model_coder.py +++ b/rdagent/scenarios/qlib/developer/model_coder.py @@ -1,3 +1,3 @@ -from rdagent.components.coder.model_coder.CoSTEER import ModelCoSTEER +from rdagent.components.coder.model_coder import ModelCoSTEER QlibModelCoSTEER = ModelCoSTEER diff --git a/rdagent/scenarios/qlib/experiment/utils.py b/rdagent/scenarios/qlib/experiment/utils.py index 690d7bd5..0a4cac2d 100644 --- a/rdagent/scenarios/qlib/experiment/utils.py +++ b/rdagent/scenarios/qlib/experiment/utils.py @@ -8,7 +8,7 @@ # render it with jinja from jinja2 import Environment, StrictUndefined -from rdagent.components.coder.factor_coder.config import FACTOR_IMPLEMENT_SETTINGS +from rdagent.components.coder.factor_coder.config import FACTOR_COSTEER_SETTINGS from rdagent.utils.env import QTDockerEnv @@ -30,24 +30,24 @@ def generate_data_folder_from_qlib(): Path(__file__).parent / "factor_data_template" / "daily_pv_debug.h5" ).exists(), "daily_pv_debug.h5 is not generated." - Path(FACTOR_IMPLEMENT_SETTINGS.data_folder).mkdir(parents=True, exist_ok=True) + Path(FACTOR_COSTEER_SETTINGS.data_folder).mkdir(parents=True, exist_ok=True) shutil.copy( Path(__file__).parent / "factor_data_template" / "daily_pv_all.h5", - Path(FACTOR_IMPLEMENT_SETTINGS.data_folder) / "daily_pv.h5", + Path(FACTOR_COSTEER_SETTINGS.data_folder) / "daily_pv.h5", ) shutil.copy( Path(__file__).parent / "factor_data_template" / "README.md", - Path(FACTOR_IMPLEMENT_SETTINGS.data_folder) / "README.md", + Path(FACTOR_COSTEER_SETTINGS.data_folder) / "README.md", ) - Path(FACTOR_IMPLEMENT_SETTINGS.data_folder_debug).mkdir(parents=True, exist_ok=True) + Path(FACTOR_COSTEER_SETTINGS.data_folder_debug).mkdir(parents=True, exist_ok=True) shutil.copy( Path(__file__).parent / "factor_data_template" / "daily_pv_debug.h5", - Path(FACTOR_IMPLEMENT_SETTINGS.data_folder_debug) / "daily_pv.h5", + Path(FACTOR_COSTEER_SETTINGS.data_folder_debug) / "daily_pv.h5", ) shutil.copy( Path(__file__).parent / "factor_data_template" / "README.md", - Path(FACTOR_IMPLEMENT_SETTINGS.data_folder_debug) / "README.md", + Path(FACTOR_COSTEER_SETTINGS.data_folder_debug) / "README.md", ) @@ -142,14 +142,14 @@ def get_data_folder_intro(fname_reg: str = ".*", flags=0, variable_mapping=None) """ if ( - not Path(FACTOR_IMPLEMENT_SETTINGS.data_folder).exists() - or not Path(FACTOR_IMPLEMENT_SETTINGS.data_folder_debug).exists() + not Path(FACTOR_COSTEER_SETTINGS.data_folder).exists() + or not Path(FACTOR_COSTEER_SETTINGS.data_folder_debug).exists() ): # FIXME: (xiao) I think this is writing in a hard-coded way. # get data folder intro does not imply that we are generating the data folder. generate_data_folder_from_qlib() content_l = [] - for p in Path(FACTOR_IMPLEMENT_SETTINGS.data_folder_debug).iterdir(): + for p in Path(FACTOR_COSTEER_SETTINGS.data_folder_debug).iterdir(): if re.match(fname_reg, p.name, flags) is not None: if variable_mapping: content_l.append(get_file_desc(p, variable_mapping.get(p.stem, []))) diff --git a/rdagent/utils/env.py b/rdagent/utils/env.py index 66439e72..d6f81784 100644 --- a/rdagent/utils/env.py +++ b/rdagent/utils/env.py @@ -11,25 +11,22 @@ import os import pickle import subprocess -import sys import uuid -import zipfile from abc import abstractmethod -from concurrent.futures import ThreadPoolExecutor, TimeoutError from pathlib import Path -from typing import Dict, Generic, Optional, TypeVar +from typing import Generic, Optional, TypeVar import docker import docker.models import docker.models.containers from pydantic import BaseModel -from pydantic_settings import BaseSettings from rich import print from rich.console import Console from rich.progress import Progress, SpinnerColumn, TextColumn from rich.rule import Rule from rich.table import Table +from rdagent.core.conf import ExtendedBaseSettings, ExtendedSettingsConfigDict from rdagent.log import rdagent_logger as logger ASpecificBaseModel = TypeVar("ASpecificBaseModel", bound=BaseModel) @@ -37,8 +34,8 @@ class Env(Generic[ASpecificBaseModel]): """ - We use BaseModel as the setting due to the featurs it provides - - It provides base typing and checking featurs. + We use BaseModel as the setting due to the features it provides + - It provides base typing and checking features. - loading and dumping the information will be easier: for example, we can use package like `pydantic-yaml` """ @@ -121,7 +118,7 @@ def run(self, entry: str | None = None, local_path: Optional[str] = None, env: d ## Docker Environment ----- -class DockerConf(BaseSettings): +class DockerConf(ExtendedBaseSettings): build_from_dockerfile: bool = False dockerfile_folder_path: Optional[Path] = ( None # the path to the dockerfile optional path provided when build_from_dockerfile is False @@ -143,8 +140,7 @@ class DockerConf(BaseSettings): class QlibDockerConf(DockerConf): - class Config: - env_prefix = "QLIB_DOCKER_" # Use QLIB_DOCKER_ as prefix for environment variables + model_config = ExtendedSettingsConfigDict(env_prefix="QLIB_DOCKER_") build_from_dockerfile: bool = True dockerfile_folder_path: Path = Path(__file__).parent.parent / "scenarios" / "qlib" / "docker" @@ -157,9 +153,7 @@ class Config: class DMDockerConf(DockerConf): - # Data Mining Docker - class Config: - env_prefix = "DM_DOCKER_" + model_config = ExtendedSettingsConfigDict(env_prefix="DM_DOCKER_") build_from_dockerfile: bool = True dockerfile_folder_path: Path = Path(__file__).parent.parent / "scenarios" / "data_mining" / "docker" @@ -175,8 +169,7 @@ class Config: class KGDockerConf(DockerConf): - class Config: - env_prefix = "KG_DOCKER_" + model_config = ExtendedSettingsConfigDict(env_prefix="KG_DOCKER_") build_from_dockerfile: bool = True dockerfile_folder_path: Path = Path(__file__).parent.parent / "scenarios" / "kaggle" / "docker" / "kaggle_docker" @@ -196,8 +189,7 @@ class Config: class MLEBDockerConf(DockerConf): - class Config: - env_prefix = "MLEB_DOCKER_" + model_config = ExtendedSettingsConfigDict(env_prefix="MLEB_DOCKER_") build_from_dockerfile: bool = True dockerfile_folder_path: Path = Path(__file__).parent.parent / "scenarios" / "kaggle" / "docker" / "mle_bench_docker"