Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Archive] Code cot #350

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 6 additions & 16 deletions rdagent/components/coder/factor_coder/CoSTEER/evolving_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from rdagent.core.prompts import Prompts
from rdagent.core.utils import multiprocessing_wrapper
from rdagent.oai.llm_utils import APIBackend
from rdagent.utils.agent.tpl import T

if TYPE_CHECKING:
from rdagent.components.coder.factor_coder.CoSTEER.knowledge_management import (
Expand Down Expand Up @@ -127,34 +128,22 @@ def implement_one_factor(

queried_former_failed_knowledge_to_render = queried_former_failed_knowledge

system_prompt = (
Environment(undefined=StrictUndefined)
.from_string(
implement_prompts["evolving_strategy_factor_implementation_v1_system"],
)
.render(
system_prompt = T(".prompts:evolving_strategy_factor_implementation_v1_system").r(
scenario=self.scen.get_scenario_all_desc(),
queried_former_failed_knowledge=queried_former_failed_knowledge_to_render,
)
enable_code_cot=FACTOR_IMPLEMENT_SETTINGS.enable_code_cot,
)
session = APIBackend(use_chat_cache=FACTOR_IMPLEMENT_SETTINGS.coder_use_cache).build_chat_session(
session_system_prompt=system_prompt,
)

queried_similar_successful_knowledge_to_render = queried_similar_successful_knowledge
for _ in range(10): # max attempt to reduce the length of user_prompt
user_prompt = (
Environment(undefined=StrictUndefined)
.from_string(
implement_prompts["evolving_strategy_factor_implementation_v1_user"],
)
.render(
user_prompt = T(".prompts:evolving_strategy_factor_implementation_v1_user").r(
factor_information_str=factor_information_str,
queried_similar_successful_knowledge=queried_similar_successful_knowledge_to_render,
queried_former_failed_knowledge=queried_former_failed_knowledge_to_render,
)
.strip("\n")
)
).strip("\n")
if (
session.build_chat_completion_message_and_calculate_token(
user_prompt,
Expand Down Expand Up @@ -228,6 +217,7 @@ def implement_one_factor(
.render(
scenario=self.scen.get_scenario_all_desc(),
queried_former_failed_knowledge=queried_former_failed_knowledge_to_render,
enable_code_cot=FACTOR_IMPLEMENT_SETTINGS.enable_code_cot,
)
)

Expand Down
3 changes: 3 additions & 0 deletions rdagent/components/coder/factor_coder/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ class Config:
max_loop: int = 10
"""Maximum number of task implementation loops"""

enable_code_cot: bool = False
"""Indicates whether to enable code cot"""

knowledge_base_path: Union[str, None] = None
"""Path to the knowledge base"""

Expand Down
40 changes: 39 additions & 1 deletion rdagent/components/coder/factor_coder/prompts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,46 @@ evolving_strategy_factor_implementation_v1_system: |-
{{ queried_former_failed_knowledge[-1].feedback }}
{% endif %}

{% if enable_code_cot %}
-------------- **The code standard** ----------------
You must write code with detailed comments to explain your thought process!!!
Even if other example code does not follow this, you should strictly adhere to this code standard.
If data processing is involved, include comments to describe the index and columns at each step.
Here is an example:
```python
import pandas as pd
focused_fields = ["NET_PROFIT_INCL_MIN_INT_INC"]

df = pd.read_hdf("ASHAREINCOME.h5", key="data") # type: pd.DataFrame; index with multi-level [datetime, instrument]; columns with single level that contains values like [REPORT_PERIOD, STATEMENT_TYPE, ..., NET_PROFIT_INCL_MIN_INT_INC]

# filter only part of the report statement
df = df[df["STATEMENT_TYPE"] == "408006000"] # type: pd.DataFrame; index with multi-level [datetime, instrument]; columns with single level that contains values like [REPORT_PERIOD, STATEMENT_TYPE, ..., NET_PROFIT_INCL_MIN_INT_INC]
df = df[["REPORT_PERIOD"] + focused_fields] # type: pd.DataFrame; index with multi-level [datetime, instrument]; columns with single level that contains values like [REPORT_PERIOD, NET_PROFIT_INCL_MIN_INT_INC]
df.columns.name = "field" # type: pd.DataFrame; index with multi-level [datetime, instrument]; columns level [filed] that contains values like [REPORT_PERIOD, NET_PROFIT_INCL_MIN_INT_INC]
df = df.set_index("REPORT_PERIOD", append=True).unstack(level="REPORT_PERIOD") # type: pd.DataFrame; index with multi-level [datetime, instrument]; columns mulit-level [filed, REPORT_PERIOD]
df = df.unstack("instrument") # type: pd.DataFrame; index with level [datetime]; columns mulit-level [filed, REPORT_PERIOD, instrument]
# fill the previous published value to later release date
df = df.sort_index().ffill(axis=0) # type: pd.DataFrame; index with level [datetime]; columns mulit-level [filed, REPORT_PERIOD, instrument]

pit_f_final = {}
for idx, row in df.iterrows():
pit_df = row.unstack("field").sort_index() # ensure the report_period is ascending order # type: pd.DataFrame; index with level [REPORT_PERIOD, instrument]; columns mulit-level [filed]
# 1) collapse all the columns
pit_f = pit_df["NET_PROFIT_INCL_MIN_INT_INC"] # type: pd.Series; index with level [REPORT_PERIOD, instrument]
# 2)
pit_f_df = pit_f.unstack("instrument") # type: pd.DataFrame; index with level [REPORT_PERIOD]; columns multi-level [instrument]
pit_f_ttm = pit_f_df.iloc[-4:, :].sum() # this for TTM # type: pd.Series; index with level [instrument]
pit_f_final[idx] = pit_f_ttm

pit_f_final = pd.DataFrame(pit_f_final).T # type: pd.DataFrame; index with level [datetime]; columns multi-level [instrument]
pit_f_final = pit_f_final.stack() # type: pd.Series; index with level [datetime, instrument]
pit_f_final.to_frame("net_profit_ttm").to_hdf('result.h5', key='data', mode='w')
```
{% endif %}

Please response the code in the following json format. Here is an example structure for the JSON output:
{
"code": "The Python code as a string."
"code": "The Python code as a string that follow the code standard."
}

evolving_strategy_factor_implementation_v1_user: |-
Expand All @@ -86,6 +123,7 @@ evolving_strategy_factor_implementation_v1_user: |-
{% endfor %}
{% endif %}


evolving_strategy_factor_implementation_v2_user: |-
--------------Target factor information:---------------
{{ factor_information_str }}
Expand Down
2 changes: 2 additions & 0 deletions rdagent/core/evolving_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ def multistep_evolve(
)
# TODO: Due to design issues, we have chosen to ignore this mypy error.
logger.log_object(evo.sub_workspace_list, tag="evolving code") # type: ignore[attr-defined]
for sw in evo.sub_workspace_list: # type: ignore[attr-defined]
logger.info(f"evolving code workspace: {sw}")

# 4. Pack evolve results
es = EvoStep(evo, queried_knowledge)
Expand Down
3 changes: 3 additions & 0 deletions rdagent/core/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,9 @@ def execute(self) -> object | None:
self.inject_code(**self.code_dict)
return None

def __str__(self) -> str:
return f"Workspace[{self.workspace_path=}" + ("]" if self.target_task is None else f",{self.target_task.name=}]")


ASpecificWSForExperiment = TypeVar("ASpecificWSForExperiment", bound=Workspace)
ASpecificWSForSubTasks = TypeVar("ASpecificWSForSubTasks", bound=Workspace)
Expand Down
8 changes: 8 additions & 0 deletions rdagent/utils/agent/tpl.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,14 @@
PROJ_PATH = DIRNAME.parent.parent


# TODO: It could be better to add the Truncator class into T class for reusing.
# class Truncator:
# def __init__(self, limit: int):
# self.limit = limit
#
# def render_with_trunc(self, render_func, context: dict):


# class T(SingletonBaseClass): TODO: singleton does not support args now.
class T:
"""Use the simplest way to (C)reate a Template and (r)ender it!!"""
Expand Down
Loading