-
Notifications
You must be signed in to change notification settings - Fork 12
/
construct_verifiable_medical_problems.py
212 lines (170 loc) · 9.03 KB
/
construct_verifiable_medical_problems.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
import os
import random
import json
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
from retrying import retry
import argparse
import traceback
import re
import requests
class GPT:
def __init__(self, model_name, api_url, api_key):
self.model_name = model_name
self.api_url = api_url
self.api_key = api_key
print(f"Using model: {self.model_name}")
def call(self, content, additional_args={}):
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {self.api_key}"
}
payload = {
"model": self.model_name,
"messages": [{'role': 'user', 'content': content}],
**additional_args,
}
response = requests.post(self.api_url, headers=headers, json=payload)
response_data = response.json()
if 'error' in response_data:
raise ValueError(f"API Error: {response_data}")
return response_data['choices'][0]['message']['content']
@retry(wait_fixed=3000, stop_max_attempt_number=3)
def retry_call(self, content, additional_args={"max_tokens": 8192}):
return self.call(content, additional_args)
def parse_arguments():
parser = argparse.ArgumentParser()
parser.add_argument("--data_path", type=str, required=True, help="Path to the input JSON data file.")
parser.add_argument("--filter_data", action='store_true', help="Enable filtering of questions with LLMs.")
parser.add_argument("--model_name", type=str, default="gpt-4", help="Name of the GPT model to use.")
parser.add_argument("--api_key", type=str, required=True, help="OpenAI API key.")
parser.add_argument("--api_url", type=str, default="https://api.openai.com/v1/chat/completions", help="OpenAI API URL.")
parser.add_argument("--num_process", type=int, default=10, help="Number of parallel processes.")
parser.add_argument("--limit_num", type=int, help="Limit the number of processed items.")
return parser.parse_args()
def extract_bracket_content(text):
# Extract content between the first '{' and the last '}'
match = re.search(r'\{.*\}', text, re.DOTALL)
return match.group(0) if match else None
def parse_gpt_response(response):
try:
if not response.startswith('{'):
response = extract_bracket_content(response)
parsed_data = json.loads(response.replace('\n', ''))
assert len(parsed_data) == 2, "Response JSON should contain exactly two keys."
assert isinstance(parsed_data["Open-ended Verifiable Question"], str), "Open-ended Question must be a string."
assert isinstance(parsed_data["Ground-True Answer"], str), "Ground-True Answer must be a string."
return True, parsed_data
except Exception as e:
print(f"Error parsing GPT response: {e}")
return False, None
def process_single_item(item, gpt_instance, save_directory, filter_prompt, reformat_prompt, filter_enabled):
try:
max_retries = 2
save_path = os.path.join(save_directory, f"{item['process_id']}.json")
# Generate options string for the question
item['options_str'] = '\n'.join([f"{key}. {value}" for key, value in item['options'].items()])
question_text = f"{item['question']}\n{item['options_str']}"
# Filter questions if enabled
if filter_enabled:
filter_query = filter_prompt.format(question_text, item['answer'])
item['gpt_filter_query'] = filter_query
response = gpt_instance.retry_call(filter_query)
item['gpt_filter_response'] = response
if 'pass' not in response.lower():
with open(save_path, 'w', encoding='utf-8') as file:
json.dump(item, file, ensure_ascii=False, indent=2)
return 1
# Reformat questions into open-ended format
reformat_query = reformat_prompt.format(question_text, item['answer'])
item['gpt_reformat_query'] = reformat_query
for _ in range(max_retries):
response = gpt_instance.retry_call(reformat_query)
item['gpt_reformat_response'] = response
valid, parsed_data = parse_gpt_response(response)
if valid:
item["Open-ended Verifiable Question"] = parsed_data["Open-ended Verifiable Question"]
item["Ground-True Answer"] = parsed_data["Ground-True Answer"]
break
with open(save_path, 'w', encoding='utf-8') as file:
json.dump(item, file, ensure_ascii=False, indent=2)
except Exception as e:
print(f"Error processing item {item['process_id']}: {e}")
return 1
def merge_saved_files(directory):
_, _, filenames = next(os.walk(directory))
json_files = [f for f in filenames if f.endswith('.json')]
merged_data = []
for file in json_files:
try:
with open(os.path.join(directory, file), 'r', encoding='utf-8') as f:
data = json.load(f)
assert 'Open-ended Verifiable Question' in data or 'gpt_filter_response' in data or 'gpt4_response_filter' in data
merged_data.append(data)
except Exception as e:
# traceback.print_exc()
print(f"Error merging file {file}: {e}")
return merged_data
def deduplicate_data(data, processed_data):
processed_ids = {item['process_id'] for item in processed_data}
return [item for item in data if item['process_id'] not in processed_ids]
def main():
args = parse_arguments()
# Load input data
with open(args.data_path, 'r') as file:
input_data = json.load(file)
# Assign unique process IDs to each item
for idx, item in enumerate(input_data, start=1):
item['process_id'] = idx
if args.limit_num:
input_data = input_data[:args.limit_num]
print(f"Loaded {len(input_data)} items.")
# Define task and save directory
task_name = os.path.splitext(os.path.basename(args.data_path))[0]
save_directory = os.path.join('output_data', task_name)
os.makedirs(save_directory, exist_ok=True)
gpt_instance = GPT(model_name=args.model_name, api_url=args.api_url, api_key=args.api_key)
filter_prompt = """<Multiple-choice Question>
{}
Correct Answer: {}
</Multiple-choice Question>
You are an expert in filtering and evaluating multiple-choice questions for advanced reasoning tasks. Your job is to evaluate a given question and determine whether it meets the following criteria:
1. **Depth of Reasoning:** The question should require deeper reasoning. If the question appears too simple, mark it as "Too Simple".
2. **Unambiguous Correct Answer:** The question must have a unique and unambiguous correct answer. If the question asks for "incorrect options" or allows for multiple correct answers, mark it as "Ambiguous Answer".
3. **Open-Ended Reformulation Feasibility:** The question should be suitable for reformatting into an open-ended format. If the question cannot be easily reformulated into an open-ended problem and a clear ground-truth answer, mark it as "Not Reformulatable".
For each question, provide one of the following evaluations:
- "Pass" (The question meets all the criteria.)
- "Too Simple"
- "Ambiguous Answer"
- "Not Reformulatable" """
reformat_prompt = """I will provide you with a multiple-choice question, and your task is to rewrite it into an open-ended question, along with a Ground-True Answer. The requirements are:
1. The question must be specific, targeting the point being tested in the original multiple-choice question. Ensure it is open-ended, meaning no options are provided, but there must be a definitive Ground-True Answer.
2. Based on the correct answer from the original question, provide a concise Ground-True Answer. The answer should allow for precise matching to determine whether the model's response is correct.
Here is the multiple-choice question for you to rewrite:
<Multiple-choice Question>
{}
Correct Answer: {}
</Multiple-choice Question>
Please output the result in the following JSON format:
```json
{{
"Open-ended Verifiable Question": "...",
"Ground-True Answer": "..."
}}
```"""
# Merge previously processed files
processed_data = merge_saved_files(save_directory)
print(f"Previously processed items: {len(processed_data)}")
input_data = deduplicate_data(input_data, processed_data)
print(f"Items remaining for processing: {len(input_data)}")
# Process data using a thread pool
with ThreadPoolExecutor(max_workers=args.num_process) as executor:
list(tqdm(executor.map(lambda item: process_single_item(item, gpt_instance, save_directory, filter_prompt, reformat_prompt, args.filter_data), input_data), total=len(input_data), desc="Processing Items", unit="item"))
# Merge and save final output
final_data = merge_saved_files(save_directory)
output_path = f"{task_name}_final_{len(final_data)}.json"
print(f"Processed {len(final_data)} items. Saving to {output_path}")
with open(output_path, 'w', encoding='utf-8') as file:
json.dump(final_data, file, ensure_ascii=False, indent=2)
if __name__ == '__main__':
main()