microsoft · xisen-w · Nov 4, 2024 · Nov 15, 2024 · Nov 4, 2024 · Nov 15, 2024
diff --git a/scripts/exp/ablation/README.md b/scripts/exp/ablation/README.md
@@ -0,0 +1,38 @@
+# Introduction
+
+This document outlines the environment configurations for the ablation studies. Each environment file corresponds to a specific experimental case, with some cases currently unavailable for implementation.
+
+| Name      | .env         | Description                               | Available? |
+|-----------|--------------|-------------------------------------------|------------|
+| basic | basic.env | Standard case of RDAgent                         | Yes       | 
+| minicase  | minicase.env | Enables minicase and DS-Agent             | Yes       |
+| pro       | pro.env     | Standard case with vector RAG             | Yes        |
+| max       | max.env     | Enables all features                      | No         |
+
+## Notes
+
+- Each `.env` file represents a distinct case for experimentation. Future implementations will include the unavailable cases.
+- There is potential for integrating `CHAT_MODEL` in the future to facilitate comparisons between different models in experiments.
+
+## Common Environment Variables
+
+| Variable Name                     | Description                                                                 |
+|-----------------------------------|-----------------------------------------------------------------------------|
+| `MINICASE`                       | Set to `True` to enable the previous implementation of DS-Agent.           |
+| `IF_USING_MLE_DATA`              | Set to `True` to use MLE benchmark data; requires `KG_LOCAL_DATA_PATH=/data/userdata/share/mle_kaggle`. |
+| `KG_IF_USING_VECTOR_RAG`         | Set to `True` to enable vector RAG.                                       |
+| `KG_IF_USING_GRAPH_RAG`          | Set to `False` to disable graph RAG.                                      |
+| `KG_IF_ACTION_CHOOSING_BASED_ON_UCB` | Set to `True` to enable action selection based on UCB.                |
+
+## Future Work
+
+- Implement additional environment configurations as needed.
+- Explore the integration of different models for comparative analysis in ablation studies.
+
+
+
+
+
+
+
+
diff --git a/scripts/exp/ablation/env/basic.env b/scripts/exp/ablation/env/basic.env
@@ -0,0 +1,3 @@
+KG_IF_USING_VECTOR_RAG=False
+KG_IF_USING_GRAPH_RAG=False
+KG_IF_ACTION_CHOOSING_BASED_ON_UCB=False
diff --git a/scripts/exp/ablation/env/max.env b/scripts/exp/ablation/env/max.env
@@ -0,0 +1,4 @@
+KG_IF_USING_VECTOR_RAG=False
+KG_IF_USING_GRAPH_RAG=True
+KG_IF_ACTION_CHOOSING_BASED_ON_UCB=True
+#KG_KNOWLEDGE_BASE_PATH= TODO: Specify Your Knowledge Base Path
diff --git a/scripts/exp/ablation/env/mini-case.env b/scripts/exp/ablation/env/mini-case.env
@@ -0,0 +1,5 @@
+KG_IF_USING_VECTOR_RAG=True
+KG_IF_USING_GRAPH_RAG=False
+KG_IF_ACTION_CHOOSING_BASED_ON_UCB=True
+# MIGHT BE LEGACY
+
diff --git a/scripts/exp/ablation/env/pro.env b/scripts/exp/ablation/env/pro.env
@@ -0,0 +1,4 @@
+KG_IF_USING_VECTOR_RAG=True
+KG_IF_USING_GRAPH_RAG=False
+KG_IF_ACTION_CHOOSING_BASED_ON_UCB=True
+# MIGHT BE LEGACY
diff --git a/scripts/exp/tools/README.md b/scripts/exp/tools/README.md
@@ -0,0 +1,125 @@
+# Tools Directory
+
+This directory provides scripts to run experiments with different environment configurations, collect results, and demonstrate usage through an example script.
+
+## Directory Structure
+
+```
+scripts/exp/tools/
+├── run_envs.sh       # Script for running experiments
+├── collect.py        # Results collection and summary
+├── test_system.sh    # Usage script for rdagent kaggle loop
+└── README.md         # This documentation
+```
+
+## Tools Overview
+
+1. **run_envs.sh**: Executes experiments with different environment configurations in parallel.
+2. **collect.py**: Collects and summarizes experiment results into a single file.
+3. **test_system.sh**: Demonstrates how to use the above tools together for experiment execution and result collection (for rdagent kaggle loop).
+
+## Getting Started
+
+### Prerequisites
+
+Place your `.env` files in the desired directory for environment configurations.
+
+## Usage
+
+### 1. Running Experiments with Different Environments
+
+The `run_envs.sh` script allows running a command with multiple environment configurations in parallel.
+
+**Command Syntax:**
+
+```bash
+./run_envs.sh -d <dir_to_.envfiles> -j <number_of_parallel_processes> -- <command>
+```
+
+**Example Usage:**
+
+- Basic example:
+
+   ```bash
+   ./run_envs.sh -d env_files -j 1 -- echo "Hello"
+   ```
+
+- Practical example (running the kaggle loop file):
+
+   ```bash
+   dotenv run -- ./run_envs.sh -d RD-Agent/scripts/exp/ablation/env -j 1 -- python RD-Agent/rdagent/app/kaggle/loop.py
+   ```
+
+**Explanation:**
+
+| Option      | Description                                                  |
+|-------------|--------------------------------------------------------------|
+| `-d`       | Specifies the directory containing `.env` files.            |
+| `-j`       | Number of parallel processes to run (e.g., 1 for sequential execution). |
+| `--`       | Separates script options from the command to execute.       |
+| `<command>`| The command to execute with the environment variables loaded.|
+
+### 2. Collecting Results
+
+The `collect.py` script processes logs and generates a summary JSON file.
+
+**Command Syntax:**
+
+```bash
+python collect.py --log_path <path_to_logs> --output_name <summary_filename>
+```
+
+**Example Usage:**
+
+Collect results from logs:
+
+```bash
+python collect.py --log_path logs --output_name summary.json
+```
+
+**Explanation:**
+
+| Option          | Description                                                  |
+|-----------------|--------------------------------------------------------------|
+| `--log_path`   | Required. Specifies the directory containing experiment logs.|
+| `--output_name`| Optional. The name of the output summary file (default: summary.json). |
+
+### 3. Example Workflow (for rdagent kaggle loop)
+
+Use the `test_system.sh` script to demonstrate a complete workflow.
+
+**Steps:**
+
+1. Run the test system:
+
+   ```bash
+   ./scripts/exp/tools/test_system.sh
+   ```
+
+   This will:
+   1. Load environment configurations from `.env` files.
+   2. Execute experiments using the configurations.
+
+2. Find your logs in the `logs` directory.
+
+3. Use the `collect.py` script to summarize results:
+
+   ```bash
+   python collect.py --log_path logs --output_name summary.json
+   ```
+
+## Create Your Own Workflow
+
+- Create the ablation environments under a specified folder.
+- Revise the `test_system.sh` template to adjust the path and relevant commands for execution.
+- Run `test_system.sh` to execute the environments through different configurations.
+- Keep track of your log path and use `collect.py` to collect the results at scale.
+
+## Notes
+
+- Scale parallel processes as needed using the `-j` parameter.
+- Avoid errors by ensuring `.env` files are correctly formatted.
+- Modify `test_system.sh` to meet your project's specific needs.
+- Add other metrics of interest in `collect.py` to summarize automatically.
+
+For further assistance, refer to the comments within the scripts or reach out to the development team.
diff --git a/scripts/exp/tools/collect.py b/scripts/exp/tools/collect.py
@@ -0,0 +1,92 @@
+import os
+import json
+import argparse
+from pathlib import Path
+from datetime import datetime
+from rdagent.log.storage import FileStorage
+from rdagent.scenarios.kaggle.kaggle_crawler import (
+    leaderboard_scores,
+)
+import pandas as pd
+
+def collect_results(log_path) -> list[dict]:
+    summary = []
+    log_storage = FileStorage(Path(log_path))
+    evaluation_metric_direction = None
+    # Extract score from trace using the same approach as UI
+    for msg in log_storage.iter_msg():
+        if "scenario" in msg.tag:
+            competition_name = msg.content.competition # Find the competition name     
+            leaderboard = leaderboard_scores(competition_name)
+            evaluation_metric_direction = float(leaderboard[0]) > float(leaderboard[-1])
+
+        if "runner result" in msg.tag:
+            if msg.content.result is not None:
+                score = msg.content.result
+                summary.append({
+                    "competition_name": competition_name,
+                    "score": score,
+                    "workspace": msg.content.experiment_workspace.workspace_path,
+                    "evaluation_metric_direction": evaluation_metric_direction
+                })
+    return summary
+
+def generate_summary(results, output_path):
+    summary = {
+        "configs": {}, #TODO: add config? 
+        "best_result": {"competition_name": None, "score": None},
+        "timestamp": datetime.now().strftime("%Y%m%d_%H%M%S"),
+        #Add other metrics that we want to track in the future (eg. is there successive increase?)
+    }
+    for result in results:
+        # Update best result
+        if result["evaluation_metric_direction"]:
+            if (result["score"] is not None and 
+                (summary["best_result"]["score"] is None or 
+                (result["score"].iloc[0] > summary["best_result"]["score"]))):
+                summary["best_result"].update({
+                    "score": result["score"].iloc[0] if isinstance(result["score"], pd.Series) else result["score"],
+                    "competition_name": result["competition_name"]
+                })
+        else:
+            if (result["score"] is not None and 
+                (summary["best_result"]["score"] is None or 
+                (result["score"].iloc[0] < summary["best_result"]["score"]))):
+                summary["best_result"].update({
+                    "score": result["score"].iloc[0] if isinstance(result["score"], pd.Series) else result["score"],
+                    "competition_name": result["competition_name"]
+                })
+
+    # Convert Series to scalar or list if necessary
+    for key, value in summary.items():
+        if isinstance(value, pd.Series):
+            summary[key] = value.tolist()  # Convert Series to list
+        elif isinstance(value, dict):
+            for sub_key, sub_value in value.items():
+                if isinstance(sub_value, pd.Series):
+                    value[sub_key] = sub_value.tolist()  # Convert Series to list
+
+    with open(output_path, "w") as f: 
+        json.dump(summary, f, indent=4)
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Collect and summarize experiment results')
+    parser.add_argument('--log_path', type=str, required=True,
+                       help='Path to the log directory containing experiment results')
+    parser.add_argument('--output_name', type=str, default='summary.json',
+                       help='Name of the output summary file (default: summary.json)')
+    return parser.parse_args()
+
+if __name__ == "__main__":
+    args = parse_args()
+    log_path = Path(args.log_path)
+
+    # Verify the log path exists
+    if not log_path.exists():
+        raise FileNotFoundError(f"Log path does not exist: {log_path}")
+
+    results = collect_results(log_path)
+    output_path = log_path / args.output_name
+    generate_summary(results, output_path)
+    print("Summary generated successfully at", output_path)
+
diff --git a/scripts/exp/tools/run_envs.sh b/scripts/exp/tools/run_envs.sh
@@ -0,0 +1,51 @@
+#!/bin/sh
+cat << "EOF" > /dev/null
+Given a directory with *.env files.  Run each one.
+
+usage for example:
+
+  1) directly run command without extra shared envs
+  ./run_envs.sh -d <dir_to_*.envfiles> -j <number of parallel process> -- <command>
+
+  2) load shared envs `.env` before running command with different envs.
+  dotenv run -- ./run_envs.sh -d <dir_to_*.envfiles> -j <number of parallel process> -- <command>
+
+EOF
+
+# Function to display usage
+usage() {
+  echo "Usage: $0 -d <dir_to_*.envfiles> -j <number of parallel process> -- <command>"
+  exit 1
+}
+
+# Parse command line arguments
+while getopts "d:j:" opt; do
+  case $opt in
+    d) DIR=$OPTARG ;;
+    j) JOBS=$OPTARG ;;
+    *) usage ;;
+  esac
+done
+
+# Shift to get the command
+shift $((OPTIND -1))
+
+# Check if directory and jobs are set
+if [ -z "$DIR" ] || [ -z "$JOBS" ] || [ $# -eq 0 ]; then
+  usage
+fi
+
+COMMAND="$@"
+
+# Before running commands
+echo "Running experiments with following env files:"
+find "$DIR" -name "*.env" -exec echo "{}" \;
+
+# Export and run each .env file in parallel
+find "$DIR" -name "*.env" | xargs -n 1 -P "$JOBS" -I {} sh -c "
+  set -a
+  . {}
+  set +a
+  $COMMAND
+"
+
diff --git a/scripts/exp/tools/test_system.sh b/scripts/exp/tools/test_system.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+# Test directory setup
+TEST_DIR="test_run"
+mkdir -p "$TEST_DIR/results"
+mkdir -p "$TEST_DIR/logs"
+
+# Define relative paths inside the folder RDAgent
+ENV_DIR="scripts/exp/ablation/env" # The folder of environments to apply
+PYTHON_SCRIPT="rdagent/app/kaggle/loop.py" # The main file for running 
+
+# Run the experiment
+echo "Running experiments..."
+dotenv run -- ./scripts/exp/tools/run_envs.sh -d "$ENV_DIR" -j 4 -- \
+    python "$PYTHON_SCRIPT" \
+    --competition "spaceship-titanic" \ 
+
+# Cleanup (optional - comment out if you want to keep results)
+# rm -rf "$TEST_DIR"