118 lines
4.1 KiB
Python
118 lines
4.1 KiB
Python
|
import re, copy, json
|
||
|
from loguru import logger
|
||
|
|
||
|
|
||
|
def extract_section(text, section_name):
|
||
|
# Define a pattern to extract the named section along with its content
|
||
|
section_pattern = rf'#### {section_name}\n(.*?)(?=####|$)'
|
||
|
|
||
|
# Find the specific section content
|
||
|
section_content = re.search(section_pattern, text, re.DOTALL)
|
||
|
|
||
|
if section_content:
|
||
|
# If the section is found, extract the content and strip the leading/trailing whitespace
|
||
|
# This will also remove leading/trailing newlines
|
||
|
content = section_content.group(1).strip()
|
||
|
|
||
|
# Return the cleaned content
|
||
|
return content
|
||
|
else:
|
||
|
# If the section is not found, return an empty string
|
||
|
return ""
|
||
|
|
||
|
|
||
|
def parse_section(text, section_name):
|
||
|
# Define a pattern to extract the named section along with its content
|
||
|
section_pattern = rf'#### {section_name}\n(.*?)(?=####|$)'
|
||
|
|
||
|
# Find the specific section content
|
||
|
section_content = re.search(section_pattern, text, re.DOTALL)
|
||
|
|
||
|
if section_content:
|
||
|
# If the section is found, extract the content
|
||
|
content = section_content.group(1)
|
||
|
|
||
|
# Define a pattern to find segments that follow the format **xx:**
|
||
|
segments_pattern = r'\*\*([^*]+):\*\*'
|
||
|
|
||
|
# Use findall method to extract all matches in the section content
|
||
|
segments = re.findall(segments_pattern, content)
|
||
|
|
||
|
return segments
|
||
|
else:
|
||
|
# If the section is not found, return an empty list
|
||
|
return []
|
||
|
|
||
|
|
||
|
def parse_text_to_dict(text):
|
||
|
# Define a regular expression pattern to capture the key and value
|
||
|
main_pattern = r"\*\*(.+?):\*\*\s*(.*?)\s*(?=\*\*|$)"
|
||
|
list_pattern = r'```python\n(.*?)```'
|
||
|
plan_pattern = r'\[\s*.*?\s*\]'
|
||
|
|
||
|
# Use re.findall to find all main matches in the text
|
||
|
main_matches = re.findall(main_pattern, text, re.DOTALL)
|
||
|
|
||
|
# Convert main matches to a dictionary
|
||
|
parsed_dict = {key.strip(): value.strip() for key, value in main_matches}
|
||
|
|
||
|
for k, v in parsed_dict.items():
|
||
|
for pattern in [list_pattern, plan_pattern]:
|
||
|
if "PLAN" != k: continue
|
||
|
v = v.replace("```list", "```python")
|
||
|
match_value = re.search(pattern, v, re.DOTALL)
|
||
|
if match_value:
|
||
|
# Add the code block to the dictionary
|
||
|
parsed_dict[k] = eval(match_value.group(1).strip())
|
||
|
break
|
||
|
|
||
|
return parsed_dict
|
||
|
|
||
|
|
||
|
def parse_dict_to_dict(parsed_dict) -> dict:
|
||
|
code_pattern = r'```python\n(.*?)```'
|
||
|
tool_pattern = r'```json\n(.*?)```'
|
||
|
|
||
|
pattern_dict = {"code": code_pattern, "json": tool_pattern}
|
||
|
spec_parsed_dict = copy.deepcopy(parsed_dict)
|
||
|
for key, pattern in pattern_dict.items():
|
||
|
for k, text in parsed_dict.items():
|
||
|
# Search for the code block
|
||
|
if not isinstance(text, str): continue
|
||
|
_match = re.search(pattern, text, re.DOTALL)
|
||
|
if _match:
|
||
|
# Add the code block to the dictionary
|
||
|
try:
|
||
|
spec_parsed_dict[key] = json.loads(_match.group(1).strip())
|
||
|
except:
|
||
|
spec_parsed_dict[key] = _match.group(1).strip()
|
||
|
break
|
||
|
return spec_parsed_dict
|
||
|
|
||
|
|
||
|
def prompt_cost(model_type: str, num_prompt_tokens: float, num_completion_tokens: float):
|
||
|
input_cost_map = {
|
||
|
"gpt-3.5-turbo": 0.0015,
|
||
|
"gpt-3.5-turbo-16k": 0.003,
|
||
|
"gpt-3.5-turbo-0613": 0.0015,
|
||
|
"gpt-3.5-turbo-16k-0613": 0.003,
|
||
|
"gpt-4": 0.03,
|
||
|
"gpt-4-0613": 0.03,
|
||
|
"gpt-4-32k": 0.06,
|
||
|
}
|
||
|
|
||
|
output_cost_map = {
|
||
|
"gpt-3.5-turbo": 0.002,
|
||
|
"gpt-3.5-turbo-16k": 0.004,
|
||
|
"gpt-3.5-turbo-0613": 0.002,
|
||
|
"gpt-3.5-turbo-16k-0613": 0.004,
|
||
|
"gpt-4": 0.06,
|
||
|
"gpt-4-0613": 0.06,
|
||
|
"gpt-4-32k": 0.12,
|
||
|
}
|
||
|
|
||
|
if model_type not in input_cost_map or model_type not in output_cost_map:
|
||
|
return -1
|
||
|
|
||
|
return num_prompt_tokens * input_cost_map[model_type] / 1000.0 + num_completion_tokens * output_cost_map[model_type] / 1000.0
|