diff --git a/docs/_static/RD2bench.json b/docs/_static/RD2bench.json
new file mode 100644
index 00000000..eb54dc56
--- /dev/null
+++ b/docs/_static/RD2bench.json
@@ -0,0 +1,332 @@
+{
+  "alpha053_15": {
+    "description": "Reversal class factor, negative delta of a ratio involving close, low, and high prices over 15 days.",
+    "formulation": "-1 times Deltaleft(frac{(text{close} - text{low}) - (text{high} - text{close})}{text{close} - text{low}}, 15right)",
+    "variables": {
+      "Delta(x, d)": "Change in 'x' over 'd' days.",
+      "text{close}": "Closing price of the stock.",
+      "text{low}": "Lowest price of the stock for the day.",
+      "text{high}": "Highest price of the stock for the day."
+    },
+    "Category": "Volume&Price",
+    "Difficulty": "Easy",
+    "gt_code": "import pandas as pd\ndata_pv = pd.read_hdf('daily_pv.h5')\nnew_df= data_pv.reset_index()\n# Calculate Alpha053\nnew_df['ratio'] =  (new_df['$close'] - new_df['$low'] - (new_df['$high'] - new_df['$close'])) / (new_df['$close'] - new_df['$low'])\n# the change of ratio in new_df over the 15 days\nnew_df['result']=-new_df['ratio'].diff(15)\n# transfer the result to series\nresult=pd.DataFrame(new_df['result']).set_index(data_pv.index)\nresult=result['result']\nresult.to_hdf('result.h5', key='data')"
+  },
+  "liquidity_imbalance": {
+    "description": "liquidity_imbalance=std(minute trading liquidity_imbalance)/mean(minute trading liquidity_imbalance).",
+    "formulation": "liquidity_imbalance = frac{text{std}(text{minute trading liquidity_imbalance})}{text{mean}(text{minute liquidity_imbalance})}",
+    "variables": {
+      "std(minute liquidity_imbalance)": "Standard deviation of trading liquidity_imbalance for each minute of the trading day.",
+      "mean(minute liquidity_imbalance)": "Mean of trading liquidity_imbalance for each minute of the trading day.",
+      "liquidity_imbalance": "(bid_size-ask_size)/(bid_size+ask_size), we use something like bidV for the size"
+    },
+    "Category": "High-Frequency",
+    "Difficulty": "Medium",
+    "gt_code": "import pandas as pd\ndata_hf = pd.read_hdf('high_freq.h5')\nsample_df= data_hf.reset_index()\n# Convert 'datetime' column to datetime and extract date for grouping\nsample_df['date'] = sample_df['datetime'].dt.date\nsample_df['liquidity_imbalance']=(sample_df['bidV']-sample_df['askV'])/(sample_df['bidV']+sample_df['askV'])\n# Group by instrument and date\ngrouped = sample_df.groupby(['date','instrument'])['liquidity_imbalance']\n# Calculate mean and standard deviation of the volume for each group\nstats = grouped.agg(['mean', 'std'])\n# Calculate Z value for each instrument per day\nstats['liquidity_imbalance'] = stats['std'] / stats['mean']\n# Display the calculated Z values\nresult=stats['liquidity_imbalance']\nresult.index.names = ['datetime','instrument']\n# result = result.swaplevel().sort_index()\nresult.to_hdf('result.h5', key='data')"
+  },
+  "liquidity_imbalance_2": {
+    "description": "liquidity_imbalance=std(minute trading liquidity_imbalance)/mean(minute trading liquidity_imbalance).",
+    "formulation": "liquidity_imbalance = frac{text{std}(text{minute trading liquidity_imbalance})}{text{mean}(text{minute liquidity_imbalance})}",
+    "variables": {
+      "std(minute liquidity_imbalance)": "Standard deviation of trading liquidity_imbalance for each minute of the trading day.",
+      "mean(minute liquidity_imbalance)": "Mean of trading liquidity_imbalance for each minute of the trading day.",
+      "liquidity_imbalance": "(bid_size-ask_size)/2*(bid_size+ask_size), we use something like bidV for the size"
+    },
+    "Category": "High-Frequency",
+    "Difficulty": "Medium",
+    "gt_code": "import pandas as pd\ndata_hf = pd.read_hdf('high_freq.h5')\nsample_df= data_hf.reset_index()\n# Convert 'datetime' column to datetime and extract date for grouping\nsample_df['date'] = sample_df['datetime'].dt.date\nsample_df['liquidity_imbalance']=(sample_df['bidV']-sample_df['askV'])/((sample_df['bidV']+sample_df['askV'])*2)\n# Group by instrument and date\ngrouped = sample_df.groupby(['date','instrument'])['liquidity_imbalance']\n# Calculate mean and standard deviation of the volume for each group\nstats = grouped.agg(['mean', 'std'])\n# Calculate Z value for each instrument per day\nstats['liquidity_imbalance'] = stats['std'] / stats['mean']\n# Display the calculated Z values\nresult=stats['liquidity_imbalance']\nresult.index.names = ['datetime','instrument']\n# result = result.swaplevel().sort_index()\nresult.to_hdf('result.h5', key='data')"
+  },
+  "liquidity_imbalance_3": {
+    "description": "liquidity_imbalance=std(minute trading liquidity_imbalance)/mean(minute trading liquidity_imbalance).",
+    "formulation": "liquidity_imbalance = frac{text{std}(text{minute trading liquidity_imbalance})}{text{mean}(text{minute liquidity_imbalance})}",
+    "variables": {
+      "std(minute liquidity_imbalance)": "Standard deviation of trading liquidity_imbalance for each minute of the trading day.",
+      "mean(minute liquidity_imbalance)": "Mean of trading liquidity_imbalance for each minute of the trading day.",
+      "liquidity_imbalance": "(bid_size-ask_size)/3*(bid_size+ask_size), we use something like bidV for the size"
+    },
+    "Category": "High-Frequency",
+    "Difficulty": "Medium",
+    "gt_code": "import pandas as pd\ndata_hf = pd.read_hdf('high_freq.h5')\nsample_df= data_hf.reset_index()\n# Convert 'datetime' column to datetime and extract date for grouping\nsample_df['date'] = sample_df['datetime'].dt.date\nsample_df['liquidity_imbalance']=(sample_df['bidV']-sample_df['askV'])/((sample_df['bidV']+sample_df['askV'])*3)\n# Group by instrument and date\ngrouped = sample_df.groupby(['date','instrument'])['liquidity_imbalance']\n# Calculate mean and standard deviation of the volume for each group\nstats = grouped.agg(['mean', 'std'])\n# Calculate Z value for each instrument per day\nstats['liquidity_imbalance'] = stats['std'] / stats['mean']\n# Display the calculated Z values\nresult=stats['liquidity_imbalance']\nresult.index.names = ['datetime','instrument']\n# result = result.swaplevel().sort_index()\nresult.to_hdf('result.h5', key='data')"
+  },
+  "micro_price": {
+    "description": "micro_price=std(minute trading micro_price)/mean(minute trading micro_price).",
+    "formulation": "micro_price = frac{text{std}(text{minute trading micro_price})}{text{mean}(text{minute micro_price})}",
+    "variables": {
+      "std(minute micro_price)": "Standard deviation of trading micro_price for each minute of the trading day.",
+      "mean(minute micro_price)": "Mean of trading micro_price for each minute of the trading day.",
+      "micro_price": "((df['bid_price'] * df['ask_size']) + (df['ask_price'] * df['bid_size'])) / (df['bid_size'] + df['ask_size'])"
+    },
+    "Category": "High-Frequency",
+    "Difficulty": "Hard",
+    "gt_code": "import pandas as pd\ndata_hf = pd.read_hdf('high_freq.h5')\nsample_df= data_hf.reset_index()\n# Convert 'datetime' column to datetime and extract date for grouping\nsample_df['date'] = sample_df['datetime'].dt.date\nsample_df['micro_price']=(sample_df['bid']*sample_df['askV']+sample_df['ask']*sample_df['bidV'])/(sample_df['bidV']+sample_df['askV'])\n# Group by instrument and date\ngrouped = sample_df.groupby(['date','instrument'])['micro_price']\n# Calculate mean and standard deviation of the volume for each group\nstats = grouped.agg(['mean', 'std'])\n# Calculate Z value for each instrument per day\nstats['micro_price'] = stats['std'] / stats['mean']\n# Display the calculated Z values\nresult=stats['micro_price']\nresult.index.names = ['datetime','instrument']\n# result = result.swaplevel().sort_index()\nresult.to_hdf('result.h5', key='data')"
+  },
+  "micro_price_2": {
+    "description": "micro_price_2=std(minute trading micro_price)/mean(minute trading micro_price).",
+    "formulation": "micro_price_2 = frac{text{std}(text{minute trading micro_price})}{text{mean}(text{minute micro_price})}",
+    "variables": {
+      "std(minute micro_price)": "Standard deviation of trading micro_price for each minute of the trading day.",
+      "mean(minute micro_price)": "Mean of trading micro_price for each minute of the trading day.",
+      "micro_price": "((df['bid_price'] * df['ask_size']) + (df['ask_price'] * df['bid_size'])) / 2*(df['bid_size'] + df['ask_size']), we use something like bidV for the size"
+    },
+    "Category": "High-Frequency",
+    "Difficulty": "Hard",
+    "gt_code": "import pandas as pd\ndata_hf = pd.read_hdf('high_freq.h5')\nsample_df= data_hf.reset_index()\n# Convert 'datetime' column to datetime and extract date for grouping\nsample_df['date'] = sample_df['datetime'].dt.date\nsample_df['micro_price']=(sample_df['bid']*sample_df['askV']+sample_df['ask']*sample_df['bidV'])/((sample_df['bidV']+sample_df['askV'])*2)\n# Group by instrument and date\ngrouped = sample_df.groupby(['date','instrument'])['micro_price']\n# Calculate mean and standard deviation of the volume for each group\nstats = grouped.agg(['mean', 'std'])\n# Calculate Z value for each instrument per day\nstats['micro_price'] = stats['std'] / stats['mean']\n# Display the calculated Z values\nresult=stats['micro_price']\nresult.index.names = ['datetime','instrument']\n# result = result.swaplevel().sort_index()\nresult.to_hdf('result.h5', key='data')"
+  },
+  "micro_price_3": {
+    "description": "micro_price_3=std(minute trading micro_price)/mean(minute trading micro_price).",
+    "formulation": "micro_price_3 = frac{text{std}(text{minute trading micro_price})}{text{mean}(text{minute micro_price})}",
+    "variables": {
+      "std(minute micro_price)": "Standard deviation of trading micro_price for each minute of the trading day.",
+      "mean(minute micro_price)": "Mean of trading micro_price for each minute of the trading day.",
+      "micro_price": "((df['bid_price'] * df['ask_size']) + (df['ask_price'] * df['bid_size'])) / 3*(df['bid_size'] + df['ask_size']), we use something like bidV for the size"
+    },
+    "Category": "High-Frequency",
+    "Difficulty": "Hard",
+    "gt_code": "import pandas as pd\ndata_hf = pd.read_hdf('high_freq.h5')\nsample_df= data_hf.reset_index()\n# Convert 'datetime' column to datetime and extract date for grouping\nsample_df['date'] = sample_df['datetime'].dt.date\nsample_df['micro_price']=(sample_df['bid']*sample_df['askV']+sample_df['ask']*sample_df['bidV'])/((sample_df['bidV']+sample_df['askV'])*3)\n# Group by instrument and date\ngrouped = sample_df.groupby(['date','instrument'])['micro_price']\n# Calculate mean and standard deviation of the volume for each group\nstats = grouped.agg(['mean', 'std'])\n# Calculate Z value for each instrument per day\nstats['micro_price'] = stats['std'] / stats['mean']\n# Display the calculated Z values\nresult=stats['micro_price']\nresult.index.names = ['datetime','instrument']\n# result = result.swaplevel().sort_index()\nresult.to_hdf('result.h5', key='data')"
+  },
+  "mid_price": {
+    "description": "mid_price=std(minute trading mid_price)/mean(minute trading mid_price).",
+    "formulation": "mid_price = frac{text{std}(text{minute trading mid price})}{text{mean}(text{minute mid price})}",
+    "variables": {
+      "std(minute mid_price)": "Standard deviation of trading mid_price for each minute of the trading day.",
+      "mean(minute mid_price)": "Mean of trading mid_price for each minute of the trading day.",
+      "mid_price": "The average of the bid and ask prices."
+    },
+    "Category": "High-Frequency",
+    "Difficulty": "Easy",
+    "gt_code": "import pandas as pd\ndata_hf = pd.read_hdf('high_freq.h5')\nsample_df= data_hf.reset_index()\n# Convert 'datetime' column to datetime and extract date for grouping\nsample_df['date'] = sample_df['datetime'].dt.date\nsample_df['mid_price']=(sample_df['bid']+sample_df['ask'])/2\n# Group by instrument and date\ngrouped = sample_df.groupby(['date','instrument'])['mid_price']\n# Calculate mean and standard deviation of the volume for each group\nstats = grouped.agg(['mean', 'std'])\nstats['mid_price'] = stats['std'] / stats['mean']\nresult=stats['mid_price']\nresult.index.names = ['datetime','instrument']\n# result = result.swaplevel().sort_index()\nresult.to_hdf('result.h5', key='data')"
+  },
+  "mid_price_2": {
+    "description": "mid_price=std(minute trading mid_price)/mean(minute trading mid_price).",
+    "formulation": "mid_price = frac{text{std}(text{minute trading mid price})}{text{mean}(text{minute mid price})}",
+    "variables": {
+      "std(minute mid_price)": "Standard deviation of trading mid_price for each minute of the trading day.",
+      "mean(minute mid_price)": "Mean of trading mid_price for each minute of the trading day.",
+      "mid_price_2": "the average of the bid and ask prices plus the the average of the bid and ask size (bidV and askV)."
+    },
+    "Category": "High-Frequency",
+    "Difficulty": "Easy",
+    "gt_code": "import pandas as pd\ndata_hf = pd.read_hdf('high_freq.h5')\nsample_df= data_hf.reset_index()\n# Convert 'datetime' column to datetime and extract date for grouping\nsample_df['date'] = sample_df['datetime'].dt.date\nsample_df['mid_price']=(sample_df['bid']+sample_df['ask'])/2+(sample_df['bidV']+sample_df['askV'])/2\n# Group by instrument and date\ngrouped = sample_df.groupby(['date','instrument'])['mid_price']\n# Calculate mean and standard deviation of the volume for each group\nstats = grouped.agg(['mean', 'std'])\nstats['mid_price'] = stats['std'] / stats['mean']\nresult=stats['mid_price']\nresult.index.names = ['datetime','instrument']\n# result = result.swaplevel().sort_index()\nresult.to_hdf('result.h5', key='data')"
+  },
+  "mid_price_3": {
+    "description": "mid_price=std(minute trading mid_price)/mean(minute trading mid_price).",
+    "formulation": "mid_price = frac{text{std}(text{minute trading mid price})}{text{mean}(text{minute mid price})}",
+    "variables": {
+      "std(minute mid_price)": "Standard deviation of trading mid_price for each minute of the trading day.",
+      "mean(minute mid_price)": "Mean of trading mid_price for each minute of the trading day.",
+      "mid_price_3": "The coefficient of variation (CV) of the mid-price for each minute of the trading day, calculated as the standard deviation of the mid-price divided by the mean mid-price."
+    },
+    "Category": "High-Frequency",
+    "Difficulty": "Easy",
+    "gt_code": "import pandas as pd\ndata_hf = pd.read_hdf('high_freq.h5')\nsample_df= data_hf.reset_index()\n# Convert 'datetime' column to datetime and extract date for grouping\nsample_df['date'] = sample_df['datetime'].dt.date\nsample_df['mid_price']=(sample_df['bid']+sample_df['ask'])/3\n# Group by instrument and date\ngrouped = sample_df.groupby(['date','instrument'])['mid_price']\n# Calculate mean and standard deviation of the volume for each group\nstats = grouped.agg(['mean', 'std'])\nstats['mid_price'] = stats['std'] / stats['mean']\nresult=stats['mid_price']\nresult.index.names = ['datetime','instrument']\n# result = result.swaplevel().sort_index()\nresult.to_hdf('result.h5', key='data')"
+  },
+  "PB_ROE": {
+    "description": "Constructed using the ranking difference between PB and ROE, with regression versions of PB and ROE replacing original PB and ROE to obtain reconstructed factor values.",
+    "formulation": "text{rank}(PB_t) - rank(ROE_t)",
+    "variables": {
+      "text{rank}(PB_t)": "Ranking of regression version PB on cross-section at time t.",
+      "text{rank}(ROE_t)": "Ranking of regression version single-quarter ROE on cross-section at time t."
+    },
+    "Category": "Fundamentals",
+    "Difficulty": "Easy",
+    "gt_code": "import pandas as pd\ndata_f = pd.read_hdf('daily_f.h5')\ndata = data_f.reset_index()\n# Calculate the rank of PB and ROE\ndata['PB_rank'] = data.groupby('datetime')['B/P'].rank()\ndata['ROE_rank'] = data.groupby('datetime')['ROE'].rank()\n# Calculate the difference between the ranks\ndata['PB_ROE'] = data['PB_rank'] - data['ROE_rank']\n# set the datetime and instrument as index and drop the original index\nresult=pd.DataFrame(data['PB_ROE']).set_index(data_f.index)\n# transfer the result to series\nresult=result['PB_ROE']\nresult.to_hdf('result.h5', key='data')"
+  },
+  "PB_ROE_2": {
+    "description": "Constructed using the ranking difference between PB/2 and ROE, with regression versions of PB and ROE replacing original PB and ROE to obtain reconstructed factor values.",
+    "formulation": "text{rank}(PB_t)/2 - rank(ROE_t)",
+    "variables": {
+      "text{rank}(PB_t)": "Ranking of regression version PB on cross-section at time t.",
+      "text{rank}(ROE_t)": "Ranking of regression version single-quarter ROE on cross-section at time t."
+    },
+    "Category": "Fundamentals",
+    "Difficulty": "Easy",
+    "gt_code": "import pandas as pd\ndata_f = pd.read_hdf('daily_f.h5')\ndata = data_f.reset_index()\n# Calculate the rank of PB and ROE\ndata['PB_rank'] = data.groupby('datetime')['B/P'].rank()\ndata['ROE_rank'] = data.groupby('datetime')['ROE'].rank()\n# Calculate the difference between the ranks\ndata['PB_ROE'] = data['PB_rank']/2 - data['ROE_rank']\n# set the datetime and instrument as index and drop the original index\nresult=pd.DataFrame(data['PB_ROE']).set_index(data_f.index)\n# transfer the result to series\nresult=result['PB_ROE']\nresult.to_hdf('result.h5', key='data')"
+  },
+  "PB_ROE_3": {
+    "description": "Constructed using the ranking difference between PB/3 and ROE, with regression versions of PB and ROE replacing original PB and ROE to obtain reconstructed factor values.",
+    "formulation": "text{rank}(PB_t)/3 - rank(ROE_t)",
+    "variables": {
+      "text{rank}(PB_t)": "Ranking of regression version PB on cross-section at time t.",
+      "text{rank}(ROE_t)": "Ranking of regression version single-quarter ROE on cross-section at time t."
+    },
+    "Category": "Fundamentals",
+    "Difficulty": "Easy",
+    "gt_code": "import pandas as pd\ndata_f = pd.read_hdf('daily_f.h5')\ndata = data_f.reset_index()\n# Calculate the rank of PB and ROE\ndata['PB_rank'] = data.groupby('datetime')['B/P'].rank()\ndata['ROE_rank'] = data.groupby('datetime')['ROE'].rank()\n# Calculate the difference between the ranks\ndata['PB_ROE'] = data['PB_rank']/3 - data['ROE_rank']\n# set the datetime and instrument as index and drop the original index\nresult=pd.DataFrame(data['PB_ROE']).set_index(data_f.index)\n# transfer the result to series\nresult=result['PB_ROE']\nresult.to_hdf('result.h5', key='data')"
+  },
+  "PB_ROE_movement": {
+    "description": "PB_ROE_movement=five day PB_ROE movement indicator(-1 and 1 or 0).",
+    "formulation": "PB_ROE_movement = 5_day_movement(PB_ROE), PB_ROE = text{rank}(PB_t) - rank(ROE_t)",
+    "variables": {
+      "PB_ROE": "the ranking difference between PB and ROE.",
+      "5_day_PB_ROE_movement": "1 if PB_ROE is higher than the PB_ROE 5 days ago, -1 if PB_ROE is lower than the PB_ROE 5 days ago, 0 if PB_ROE is the same as the PB_ROE 5 days ago.",
+      "text{rank}(PB_t)": "Ranking of regression version PB on cross-section at time t.",
+      "text{rank}(ROE_t)": "Ranking of regression version single-quarter ROE on cross-section at time t."
+    },
+    "Category": "Fundamentals",
+    "Difficulty": "Hard",
+    "gt_code": "import pandas as pd\ndata_f = pd.read_hdf('daily_f.h5')\nsample_df = data_f.reset_index()\n# Calculate the rank of PB and ROE\nsample_df['PB_rank'] = sample_df.groupby('datetime')['B/P'].rank()\nsample_df['ROE_rank'] = sample_df.groupby('datetime')['ROE'].rank()\nsample_df['PB_ROE'] = sample_df['PB_rank'] - sample_df['ROE_rank']\n# Group by instrument and date\nsample_df['PB_ROE_movement'] = sample_df['PB_ROE'].diff(periods=5).apply(lambda x: 1 if x > 0 else (-1 if x < 0 else 0))\n#calculate the mid_price_movement ratio for each day\n# set the datetime and instrument as index and drop the original index\nresult=pd.DataFrame(sample_df['PB_ROE_movement']).set_index(data_f.index)\n# transfer the result to series\nresult=result['PB_ROE_movement']\nresult.to_hdf('result.h5', key='data')"
+  },
+  "PB_ROE_movement_10": {
+    "description": "PB_ROE_movement=10 days PB_ROE movement indicator(-1 and 1 or 0).",
+    "formulation": "PB_ROE_movement = 10_day_movement(PB_ROE), PB_ROE = text{rank}(PB_t) - rank(ROE_t)",
+    "variables": {
+      "PB_ROE": "the ranking difference between PB and ROE.",
+      "10_day_PB_ROE_movement": "1 if PB_ROE is higher than the PB_ROE 10 days ago, -1 if PB_ROE is lower than the PB_ROE 10 days ago, 0 if PB_ROE is the same as the PB_ROE 10 days ago.",
+      "text{rank}(PB_t)": "Ranking of regression version PB on cross-section at time t.",
+      "text{rank}(ROE_t)": "Ranking of regression version single-quarter ROE on cross-section at time t."
+    },
+    "Category": "Fundamentals",
+    "Difficulty": "Hard",
+    "gt_code": "import pandas as pd\ndata_f = pd.read_hdf('daily_f.h5')\nsample_df = data_f.reset_index()\n# Calculate the rank of PB and ROE\nsample_df['PB_rank'] = sample_df.groupby('datetime')['B/P'].rank()\nsample_df['ROE_rank'] = sample_df.groupby('datetime')['ROE'].rank()\nsample_df['PB_ROE'] = sample_df['PB_rank'] - sample_df['ROE_rank']\n# Group by instrument and date\nsample_df['PB_ROE_movement'] = sample_df['PB_ROE'].diff(periods=10).apply(lambda x: 1 if x > 0 else (-1 if x < 0 else 0))\n#calculate the mid_price_movement ratio for each day\n# set the datetime and instrument as index and drop the original index\nresult=pd.DataFrame(sample_df['PB_ROE_movement']).set_index(data_f.index)\n# transfer the result to series\nresult=result['PB_ROE_movement']\nresult.to_hdf('result.h5', key='data')"
+  },
+  "PB_ROE_movement_20": {
+    "description": "PB_ROE_movement=20 days PB_ROE movement indicator(-1 and 1 or 0).",
+    "formulation": "PB_ROE_movement = 20_day_movement(PB_ROE), PB_ROE = text{rank}(PB_t) - rank(ROE_t)",
+    "variables": {
+      "PB_ROE": "the ranking difference between PB and ROE.",
+      "20_day_PB_ROE_movement": "1 if PB_ROE is higher than the PB_ROE 20 days ago, -1 if PB_ROE is lower than the PB_ROE 20 days ago, 0 if PB_ROE is the same as the PB_ROE 20 days ago.",
+      "text{rank}(PB_t)": "Ranking of regression version PB on cross-section at time t.",
+      "text{rank}(ROE_t)": "Ranking of regression version single-quarter ROE on cross-section at time t."
+    },
+    "Category": "Fundamentals",
+    "Difficulty": "Hard",
+    "gt_code": "import pandas as pd\ndata_f = pd.read_hdf('daily_f.h5')\nsample_df = data_f.reset_index()\n# Calculate the rank of PB and ROE\nsample_df['PB_rank'] = sample_df.groupby('datetime')['B/P'].rank()\nsample_df['ROE_rank'] = sample_df.groupby('datetime')['ROE'].rank()\nsample_df['PB_ROE'] = sample_df['PB_rank'] - sample_df['ROE_rank']\n# Group by instrument and date\nsample_df['PB_ROE_movement'] = sample_df['PB_ROE'].diff(periods=20).apply(lambda x: 1 if x > 0 else (-1 if x < 0 else 0))\n#calculate the mid_price_movement ratio for each day\n# set the datetime and instrument as index and drop the original index\nresult=pd.DataFrame(sample_df['PB_ROE_movement']).set_index(data_f.index)\n# transfer the result to series\nresult=result['PB_ROE_movement']\nresult.to_hdf('result.h5', key='data')"
+  },
+  "ROE_movement": {
+    "description": "ROE_movement=five day ROE movement indicator(-1 and 1 or 0).",
+    "formulation": "ROE_movement = 5_day_movement(ROE)",
+    "variables": {
+      "ROE": "ROE in fundamental statistics.",
+      "5_day_ROE_movement": "1 if ROE is higher than the ROE 5 days ago, -1 if ROE is lower than the ROE 5 days ago, 0 if ROE is the same as the ROE 5 days ago."
+    },
+    "Category": "Fundamentals",
+    "Difficulty": "Medium",
+    "gt_code": "import pandas as pd\ndata_f = pd.read_hdf('daily_f.h5')\nsample_df = data_f.reset_index()\n# Group by instrument and date\nsample_df['ROE_movement'] = sample_df['ROE'].diff(periods=5).apply(lambda x: 1 if x > 0 else (-1 if x < 0 else 0))\n#calculate the mid_price_movement ratio for each day\n# set the datetime and instrument as index and drop the original index\nresult=pd.DataFrame(sample_df['ROE_movement']).set_index(data_f.index)\n# transfer the result to series\nresult=result['ROE_movement']\nresult.to_hdf('result.h5', key='data')"
+  },
+  "ROE_movement_10": {
+    "description": "ROE_movement_10=ten day ROE movement indicator(-1 and 1 or 0).",
+    "formulation": "ROE_movement = 10_day_movement(ROE)",
+    "variables": {
+      "ROE": "ROE in fundamental statistics.",
+      "10_day_ROE_movement": "1 if ROE is higher than the ROE 10 days ago, -1 if ROE is lower than the ROE 10 days ago, 0 if ROE is the same as the ROE 10 days ago."
+    },
+    "Category": "Fundamentals",
+    "Difficulty": "Medium",
+    "gt_code": "import pandas as pd\ndata_f = pd.read_hdf('daily_f.h5')\nsample_df = data_f.reset_index()\n# Group by instrument and date\nsample_df['ROE_movement'] = sample_df['ROE'].diff(periods=10).apply(lambda x: 1 if x > 0 else (-1 if x < 0 else 0))\n#calculate the mid_price_movement ratio for each day\n# set the datetime and instrument as index and drop the original index\nresult=pd.DataFrame(sample_df['ROE_movement']).set_index(data_f.index)\n# transfer the result to series\nresult=result['ROE_movement']\nresult.to_hdf('result.h5', key='data')"
+  },
+  "ROE_movement_20": {
+    "description": "ROE_movement_20=20 day ROE movement indicator(-1 and 1 or 0).",
+    "formulation": "ROE_movement_20 = 20_day_movement(ROE)",
+    "variables": {
+      "ROE": "ROE in fundamental statistics.",
+      "20_day_ROE_movement": "1 if ROE is higher than the ROE 20 days ago, -1 if ROE is lower than the ROE 20 days ago, 0 if ROE is the same as the ROE 20 days ago."
+    },
+    "Category": "Fundamentals",
+    "Difficulty": "Medium",
+    "gt_code": "import pandas as pd\ndata_f = pd.read_hdf('daily_f.h5')\nsample_df = data_f.reset_index()\n# Group by instrument and date\nsample_df['ROE_movement'] = sample_df['ROE'].diff(periods=20).apply(lambda x: 1 if x > 0 else (-1 if x < 0 else 0))\n#calculate the mid_price_movement ratio for each day\n# set the datetime and instrument as index and drop the original index\nresult=pd.DataFrame(sample_df['ROE_movement']).set_index(data_f.index)\n# transfer the result to series\nresult=result['ROE_movement']\nresult.to_hdf('result.h5', key='data')"
+  },
+  "alpha_pv_diff": {
+    "description": "alpha_pv_diff is defined as the ratio of the difference between close prices 10 days change and open prices 10 days change to the sum of the highest minus lowest prices plus a small constant.",
+    "formulation": "frac{(text{close_diff10} - text{open_diff10})}{(text{high} - text{low} + 0.001)}",
+    "variables": {
+      "close": "Closing price of the stock",
+      "open": "Opening price of the stock",
+      "high": "Highest price of the stock during the day",
+      "low": "Lowest price of the stock during the day"
+    },
+    "Category": "Volume&Price",
+    "Difficulty": "Medium",
+    "gt_code": "import pandas as pd\ndata_pv = pd.read_hdf('daily_pv.h5')\nnew_df= data_pv.reset_index()\n# Calculate Alpha101\nnew_df['result'] = (new_df['$close'].diff(10) - new_df['$open'].diff(10)) / (new_df['$high'] - new_df['$low'] + 0.001)\n# keep the index of the original dataframe\nresult=pd.DataFrame(new_df['result']).set_index(data_pv.index)\n# transfer the result to series\nresult=result['result']\nresult.to_hdf('result.h5', key='data')"
+  },
+  "alpha_pv_diff_15": {
+    "description": "alpha_pv_diff is defined as the ratio of the difference between close prices 15 days change and open prices 15 days change to the sum of the highest minus lowest prices plus a small constant.",
+    "formulation": "frac{(text{close_diff15} - text{open_diff15})}{(text{high} - text{low} + 0.001)}",
+    "variables": {
+      "close": "Closing price of the stock",
+      "open": "Opening price of the stock",
+      "high": "Highest price of the stock during the day",
+      "low": "Lowest price of the stock during the day"
+    },
+    "Category": "Volume&Price",
+    "Difficulty": "Medium",
+    "gt_code": "import pandas as pd\ndata_pv = pd.read_hdf('daily_pv.h5')\nnew_df= data_pv.reset_index()\n# Calculate Alpha101\nnew_df['result'] = (new_df['$close'].diff(15) - new_df['$open'].diff(15)) / (new_df['$high'] - new_df['$low'] + 0.001)\n# keep the index of the original dataframe\nresult=pd.DataFrame(new_df['result']).set_index(data_pv.index)\n# transfer the result to series\nresult=result['result']\nresult.to_hdf('result.h5', key='data')"
+  },
+  "alpha_pv_diff_20": {
+    "description": "alpha_pv_diff is defined as the ratio of the difference between close prices 20 days change and open prices 20 days change to the sum of the highest minus lowest prices plus a small constant.",
+    "formulation": "frac{(text{close_diff20} - text{open_diff20})}{(text{high} - text{low} + 0.001)}",
+    "variables": {
+      "close": "Closing price of the stock",
+      "open": "Opening price of the stock",
+      "high": "Highest price of the stock during the day",
+      "low": "Lowest price of the stock during the day"
+    },
+    "Category": "Volume&Price",
+    "Difficulty": "Medium",
+    "gt_code": "import pandas as pd\ndata_pv = pd.read_hdf('daily_pv.h5')\nnew_df= data_pv.reset_index()\n# Calculate Alpha101\nnew_df['result'] = (new_df['$close'].diff(20) - new_df['$open'].diff(20)) / (new_df['$high'] - new_df['$low'] + 0.001)\n# keep the index of the original dataframe\nresult=pd.DataFrame(new_df['result']).set_index(data_pv.index)\n# transfer the result to series\nresult=result['result']\nresult.to_hdf('result.h5', key='data')"
+  },
+  "alpha_pv_diff_pct": {
+    "description": "alpha_pv is defined as the ratio of the difference between close prices 10 days change and open prices 10 days change to the sum of the highest prices 10 days change ratio minus lowest prices 10 days change ratio plus a small constant.",
+    "formulation": "frac{(text{close_diff10} - text{open_diff10})}{(text{high_pct10} - text{low_pct10} + 0.001)}",
+    "variables": {
+      "close": "Closing price of the stock",
+      "open": "Opening price of the stock",
+      "high": "Highest price of the stock during the day",
+      "low": "Lowest price of the stock during the day"
+    },
+    "Category": "Volume&Price",
+    "Difficulty": "Hard",
+    "gt_code": "import pandas as pd\ndata_pv = pd.read_hdf('daily_pv.h5')\nnew_df= data_pv.reset_index()\n# Calculate Alpha101\nnew_df['result'] = (new_df['$close'].diff(10) - new_df['$open'].diff(10)) / (new_df['$high'].pct_change(10) - new_df['$low'].pct_change(10) + 0.001)\n# keep the index of the original dataframe\nresult=pd.DataFrame(new_df['result']).set_index(data_pv.index)\n# transfer the result to series\nresult=result['result']\nresult.to_hdf('result.h5', key='data')"
+  },
+  "alpha_pv_diff_pct_15": {
+    "description": "alpha_pv is defined as the ratio of the difference between close prices 15 days change and open prices 15 days change to the sum of the highest prices 10 days change ratio minus lowest prices 10 days change ratio plus a small constant.",
+    "formulation": "frac{(text{close_diff15} - text{open_diff15})}{(text{high_pct10} - text{low_pct10} + 0.001)}",
+    "variables": {
+      "close": "Closing price of the stock",
+      "open": "Opening price of the stock",
+      "high": "Highest price of the stock during the day",
+      "low": "Lowest price of the stock during the day"
+    },
+    "Category": "Volume&Price",
+    "Difficulty": "Hard",
+    "gt_code": "import pandas as pd\ndata_pv = pd.read_hdf('daily_pv.h5')\nnew_df= data_pv.reset_index()\n# Calculate Alpha101\nnew_df['result'] = (new_df['$close'].diff(15) - new_df['$open'].diff(15)) / (new_df['$high'].pct_change(10) - new_df['$low'].pct_change(10) + 0.001)\n# keep the index of the original dataframe\nresult=pd.DataFrame(new_df['result']).set_index(data_pv.index)\n# transfer the result to series\nresult=result['result']\nresult.to_hdf('result.h5', key='data')"
+  },
+  "alpha_pv_diff_pct_20": {
+    "description": "alpha_pv is defined as the ratio of the difference between close prices 20 days change and open prices 20 days change to the sum of the highest prices 10 days change ratio minus lowest prices 10 days change ratio plus a small constant.",
+    "formulation": "frac{(text{close_diff20} - text{open_diff20})}{(text{high_pct10} - text{low_pct10} + 0.001)}",
+    "variables": {
+      "close": "Closing price of the stock",
+      "open": "Opening price of the stock",
+      "high": "Highest price of the stock during the day",
+      "low": "Lowest price of the stock during the day"
+    },
+    "Category": "Volume&Price",
+    "Difficulty": "Hard",
+    "gt_code": "import pandas as pd\ndata_pv = pd.read_hdf('daily_pv.h5')\nnew_df= data_pv.reset_index()\n# Calculate Alpha101\nnew_df['result'] = (new_df['$close'].diff(20) - new_df['$open'].diff(20)) / (new_df['$high'].pct_change(10) - new_df['$low'].pct_change(10) + 0.001)\n# keep the index of the original dataframe\nresult=pd.DataFrame(new_df['result']).set_index(data_pv.index)\n# transfer the result to series\nresult=result['result']\nresult.to_hdf('result.h5', key='data')"
+  },
+  "alpha053": {
+    "description": "Reversal class factor, negative delta of a ratio involving close, low, and high prices over 9 days.",
+    "formulation": "-1 times Deltaleft(frac{(text{close} - text{low}) - (text{high} - text{close})}{text{close} - text{low}}, 9right)",
+    "variables": {
+      "Delta(x, d)": "Change in 'x' over 'd' days.",
+      "text{close}": "Closing price of the stock.",
+      "text{low}": "Lowest price of the stock for the day.",
+      "text{high}": "Highest price of the stock for the day."
+    },
+    "Category": "Volume&Price",
+    "Difficulty": "Easy",
+    "gt_code": "import pandas as pd\ndata_pv = pd.read_hdf('daily_pv.h5')\nnew_df= data_pv.reset_index()\n# Calculate Alpha053\nnew_df['ratio'] =  (new_df['$close'] - new_df['$low'] - (new_df['$high'] - new_df['$close'])) / (new_df['$close'] - new_df['$low'])\n# the change of ratio in new_df over the 9 days\nnew_df['result']=-new_df['ratio'].diff(9)\n# transfer the result to series\nresult=pd.DataFrame(new_df['result']).set_index(data_pv.index)\nresult=result['result']\nresult.to_hdf('result.h5', key='data')"
+  },
+  "alpha053_5": {
+    "description": "Reversal class factor, negative delta of a ratio involving close, low, and high prices over 5 days.",
+    "formulation": "-1 times Deltaleft(frac{(text{close} - text{low}) - (text{high} - text{close})}{text{close} - text{low}}, 5right)",
+    "variables": {
+      "Delta(x, d)": "Change in 'x' over 'd' days.",
+      "text{close}": "Closing price of the stock.",
+      "text{low}": "Lowest price of the stock for the day.",
+      "text{high}": "Highest price of the stock for the day."
+    },
+    "Category": "Volume&Price",
+    "Difficulty": "Easy",
+    "gt_code": "import pandas as pd\ndata_pv = pd.read_hdf('daily_pv.h5')\nnew_df= data_pv.reset_index()\n# Calculate Alpha053\nnew_df['ratio'] =  (new_df['$close'] - new_df['$low'] - (new_df['$high'] - new_df['$close'])) / (new_df['$close'] - new_df['$low'])\n# the change of ratio in new_df over the 5 days\nnew_df['result']=-new_df['ratio'].diff(5)\n# transfer the result to series\nresult=pd.DataFrame(new_df['result']).set_index(data_pv.index)\nresult=result['result']\nresult.to_hdf('result.h5', key='data')"
+  }
+}
diff --git a/docs/research/benchmark.rst b/docs/research/benchmark.rst
index e212d454..15279d56 100644
--- a/docs/research/benchmark.rst
+++ b/docs/research/benchmark.rst
@@ -5,21 +5,12 @@ Benchmark
 Introduction
 =============
 
-
-Benchmarking the capabilities of the R&D is a very important research problem of the research area.
-
-Currently we are continuously exploring how to benchmark them.
-
-The current benchmarks are listed in this page
-
+Benchmarking the capabilities of R&D is a crucial research problem in this area. We are continuously exploring methods to benchmark these capabilities. The current benchmarks are listed on this page.
 
 Development Capability Benchmarking
 ===================================
 
-
-Benchmark is used to evaluate the effectiveness of factors with fixed data.
-
-It mainly includes the following steps:
+Benchmarking is used to evaluate the effectiveness of factors with fixed data. It mainly includes the following steps:
 
 1. :ref:`read and prepare the eval_data <data>`
 
@@ -27,34 +18,31 @@ It mainly includes the following steps:
 
 3. :ref:`declare the eval method and pass the arguments <config>`
 
-4. :ref:`run the eval <run>` 
+4. :ref:`run the eval <run>`
 
-5. :ref:`save and show the result <show>` 
+5. :ref:`save and show the result <show>`
 
-Configuration 
+Configuration
 -------------
 .. _config:
 
 .. autopydantic_settings:: rdagent.components.benchmark.conf.BenchmarkSettings
 
 Example
-++++++++
++++++++
 .. _example:
 
-The default value for ``bench_test_round`` is 10, and it will take about 2 hours to run 10 rounds.
-To modify it from ``10`` to ``2`` you can adjust this by adding environment variables in the .env file as shown below.
+The default value for ``bench_test_round`` is 10, which takes about 2 hours to run. To modify it from ``10`` to ``2``, adjust the environment variables in the .env file as shown below.
 
 .. code-block:: Properties
 
-      BENCHMARK_BENCH_TEST_ROUND=1
+      BENCHMARK_BENCH_TEST_ROUND=2
 
 Data Format
 -------------
 .. _data:
 
-The sample data in ``bench_data_path`` is a dictionary where each key represents a factor name. 
-
-The value associated with each key is factor data containing the following information:
+The sample data in ``bench_data_path`` is a dictionary where each key represents a factor name. The value associated with each key is factor data containing the following information:
 
 - **description**: A textual description of the factor.
 - **formulation**: A LaTeX formula representing the model's formulation.
@@ -63,22 +51,24 @@ The value associated with each key is factor data containing the following infor
 - **Difficulty**: The difficulty level of implementing or understanding the factor.
 - **gt_code**: A piece of code associated with the factor.
 
-Here is the example of this data format:
+Here is an example of this data format:
 
 .. literalinclude:: ../../rdagent/components/benchmark/example.json
    :language: json
 
+Ensure the data is placed in the ``FACTOR_COSTEER_SETTINGS.data_folder_debug``. The data files should be in ``.h5`` or ``.md`` format and must not be stored in any subfolders. LLM-Agents will review the file content and implement the tasks.
+
+.. TODO: Add a script to automatically generate the data in the `rdagent/app/quant_factor_benchmark/data` folder.
+
 Run Benchmark
 -------------
 .. _run:
 
-Start benchmark after finishing the :doc:`../installation_and_configuration`.
+Start the benchmark after completing the :doc:`../installation_and_configuration`.
 
 .. code-block:: Properties
 
-      python rdagent/app/quant_factor_benchmark/eval.py
-
-
+      dotenv run -- python rdagent/app/benchmark/factor/eval.py
 
 Once completed, a pkl file will be generated, and its path will be printed on the last line of the console.
 
@@ -86,18 +76,16 @@ Show Result
 -------------
 .. _show:
 
-The ``analysis.py`` script is used to read data from pkl and convert it to an image.
-Modify the python code in ``rdagent/app/quant_factor_benchmark/analysis.py`` to specify the path to the pkl file and the output path for the png file.
+The ``analysis.py`` script reads data from the pkl file and converts it to an image. Modify the Python code in ``rdagent/app/quant_factor_benchmark/analysis.py`` to specify the path to the pkl file and the output path for the png file.
 
 .. code-block:: Properties
 
-      python rdagent/app/quant_factor_benchmark/analysis.py
+      dotenv run -- python rdagent/app/benchmark/factor/analysis.py <log/path to.pkl>
 
 A png file will be saved to the designated path as shown below.
 
 .. image:: ../_static/benchmark.png
 
-
 Related Paper
 -------------
 
@@ -116,3 +104,6 @@ Related Paper
     }
 
 .. image:: https://github.com/user-attachments/assets/494f55d3-de9e-4e73-ba3d-a787e8f9e841
+
+To replicate the benchmark detailed in the paper, please consult the factors listed in the following file: `RD2bench.json <../_static/RD2bench.json>`_.
+Please note use ``only_correct_format=False`` when evaluating the results.
diff --git a/rdagent/app/benchmark/factor/analysis.py b/rdagent/app/benchmark/factor/analysis.py
index f3412b40..e41e490a 100644
--- a/rdagent/app/benchmark/factor/analysis.py
+++ b/rdagent/app/benchmark/factor/analysis.py
@@ -13,9 +13,10 @@
 
 
 class BenchmarkAnalyzer:
-    def __init__(self, settings):
+    def __init__(self, settings, only_correct_format=False):
         self.settings = settings
         self.index_map = self.load_index_map()
+        self.only_correct_format = only_correct_format
 
     def load_index_map(self):
         index_map = {}
@@ -119,11 +120,13 @@ def analyze_data(self, sum_df):
         format_succ_rate_f = self.reformat_index(format_succ_rate)
 
         corr = sum_df_clean["FactorCorrelationEvaluator"].fillna(0.0)
-        corr = corr.unstack().T.mean(axis=0).to_frame("corr(only success)")
-        corr_res = self.reformat_index(corr)
-        corr_max = sum_df_clean["FactorCorrelationEvaluator"]
+        if self.only_correct_format:
+            corr = corr.loc[format_issue == 1.0]
 
-        corr_max = corr_max.unstack().T.max(axis=0).to_frame("corr(only success)")
+        corr_res = corr.unstack().T.mean(axis=0).to_frame("corr(only success)")
+        corr_res = self.reformat_index(corr_res)
+
+        corr_max = corr.unstack().T.max(axis=0).to_frame("corr(only success)")
         corr_max_res = self.reformat_index(corr_max)
 
         value_max = sum_df_clean["FactorEqualValueRatioEvaluator"]
@@ -150,9 +153,15 @@ def analyze_data(self, sum_df):
             axis=1,
         )
 
-        df = result_all.sort_index(axis=1, key=self.result_all_key_order)
+        df = result_all.sort_index(axis=1, key=self.result_all_key_order).sort_index(axis=0)
         print(df)
 
+        print()
+        print(df.groupby("Category").mean())
+
+        print()
+        print(df.mean())
+
         # Calculate the mean of each column
         mean_values = df.fillna(0.0).mean()
         mean_df = pd.DataFrame(mean_values).T
@@ -196,9 +205,10 @@ def main(
     path="git_ignore_folder/eval_results/res_promptV220240724-060037.pkl",
     round=1,
     title="Comparison of Different Methods",
+    only_correct_format=False,
 ):
     settings = BenchmarkSettings()
-    benchmark = BenchmarkAnalyzer(settings)
+    benchmark = BenchmarkAnalyzer(settings, only_correct_format=only_correct_format)
     results = {
         f"{round} round experiment": path,
     }
diff --git a/rdagent/app/benchmark/factor/eval.py b/rdagent/app/benchmark/factor/eval.py
index b26c09bf..d041eacc 100644
--- a/rdagent/app/benchmark/factor/eval.py
+++ b/rdagent/app/benchmark/factor/eval.py
@@ -1,16 +1,9 @@
-import os
-import pickle
-import time
-from pathlib import Path
-from pprint import pprint
-
 from rdagent.app.qlib_rd_loop.conf import FACTOR_PROP_SETTING
 from rdagent.components.benchmark.conf import BenchmarkSettings
 from rdagent.components.benchmark.eval_method import FactorImplementEval
 from rdagent.core.scenario import Scenario
 from rdagent.core.utils import import_class
 from rdagent.log import rdagent_logger as logger
-from rdagent.scenarios.qlib.experiment.factor_experiment import QlibFactorScenario
 from rdagent.scenarios.qlib.factor_experiment_loader.json_loader import (
     FactorTestCaseLoaderFromJsonFile,
 )
@@ -25,7 +18,7 @@
     # 3.declare the method to be tested and pass the arguments.
 
     scen: Scenario = import_class(FACTOR_PROP_SETTING.scen)()
-    generate_method = import_class(bs.bench_method_cls)(scen=scen)
+    generate_method = import_class(bs.bench_method_cls)(scen=scen, **bs.bench_method_extra_kwargs)
     # 4.declare the eval method and pass the arguments.
     eval_method = FactorImplementEval(
         method=generate_method,
@@ -36,7 +29,7 @@
     )
 
     # 5.run the eval
-    res = eval_method.eval()
+    res = eval_method.eval(eval_method.develop())
 
     # 6.save the result
     logger.log_object(res)
diff --git a/rdagent/components/benchmark/conf.py b/rdagent/components/benchmark/conf.py
index f0eccec0..8ff428ef 100644
--- a/rdagent/components/benchmark/conf.py
+++ b/rdagent/components/benchmark/conf.py
@@ -12,9 +12,6 @@ class Config:
         env_prefix = "BENCHMARK_"
         """Use `BENCHMARK_` as prefix for environment variables"""
 
-    ground_truth_dir: Path = DIRNAME / "ground_truth"
-    """ground truth dir"""
-
     bench_data_path: Path = DIRNAME / "example.json"
     """data for benchmark"""
 
@@ -24,7 +21,7 @@ class Config:
     bench_test_case_n: Optional[int] = None
     """how many test cases to run; If not given, all test cases will be run"""
 
-    bench_method_cls: str = "rdagent.components.coder.CoSTEER.FactorCoSTEER"
+    bench_method_cls: str = "rdagent.components.coder.factor_coder.FactorCoSTEER"
     """method to be used for test cases"""
 
     bench_method_extra_kwargs: dict = field(
diff --git a/rdagent/components/coder/factor_coder/eva_utils.py b/rdagent/components/coder/factor_coder/eva_utils.py
index 8efb2096..48d7fe4f 100644
--- a/rdagent/components/coder/factor_coder/eva_utils.py
+++ b/rdagent/components/coder/factor_coder/eva_utils.py
@@ -221,15 +221,11 @@ def evaluate(
                     str(resp_dict["output_format_feedback"]),
                     resp_dict["output_format_decision"],
                 )
-
-            except json.JSONDecodeError as e:
-                raise ValueError("Failed to decode JSON response from API.") from e
-
-            except KeyError as e:
+            except (KeyError, json.JSONDecodeError) as e:
                 attempts += 1
                 if attempts >= max_attempts:
                     raise KeyError(
-                        "Response from API is missing 'output_format_decision' or 'output_format_feedback' key after multiple attempts."
+                        "Wrong JSON Response or missing 'output_format_decision' or 'output_format_feedback' key after multiple attempts."
                     ) from e
 
         return "Failed to evaluate output format after multiple attempts.", False
diff --git a/rdagent/components/coder/factor_coder/evolving_strategy.py b/rdagent/components/coder/factor_coder/evolving_strategy.py
index c751996c..5fbc8145 100644
--- a/rdagent/components/coder/factor_coder/evolving_strategy.py
+++ b/rdagent/components/coder/factor_coder/evolving_strategy.py
@@ -158,14 +158,20 @@ def implement_one_task(
                 queried_similar_successful_knowledge_to_render = queried_similar_successful_knowledge_to_render[:-1]
             elif len(queried_similar_error_knowledge_to_render) > 0:
                 queried_similar_error_knowledge_to_render = queried_similar_error_knowledge_to_render[:-1]
-        code = json.loads(
-            APIBackend(
-                use_chat_cache=FACTOR_COSTEER_SETTINGS.coder_use_cache
-            ).build_messages_and_create_chat_completion(
-                user_prompt=user_prompt, system_prompt=system_prompt, json_mode=True
-            )
-        )["code"]
-        return code
+        for _ in range(10):
+            try:
+                code = json.loads(
+                    APIBackend(
+                        use_chat_cache=FACTOR_COSTEER_SETTINGS.coder_use_cache
+                    ).build_messages_and_create_chat_completion(
+                        user_prompt=user_prompt, system_prompt=system_prompt, json_mode=True
+                    )
+                )["code"]
+                return code
+            except json.decoder.JSONDecodeError:
+                pass
+        else:
+            return ""  # return empty code if failed to get code after 10 attempts
 
     def assign_code_list_to_evo(self, code_list, evo):
         for index in range(len(evo.sub_tasks)):