codefuse-chatbot/examples/agent_examples/codeGenDoc_example.py

507 lines
20 KiB
Python
Raw Normal View History

import os, sys, json
from loguru import logger
src_dir = os.path.join(
os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
)
sys.path.append(src_dir)
from configs.model_config import KB_ROOT_PATH, JUPYTER_WORK_PATH, CB_ROOT_PATH
from configs.server_config import SANDBOX_SERVER
from coagent.llm_models.llm_config import EmbedConfig, LLMConfig
from coagent.connector.phase import BasePhase
from coagent.connector.agents import BaseAgent
from coagent.connector.schema import Message
from coagent.tools import CodeRetrievalSingle
from coagent.codechat.codebase_handler.codebase_handler import CodeBaseHandler
import importlib
# 定义一个新的agent类
class CodeGenDocer(BaseAgent):
def start_action_step(self, message: Message) -> Message:
'''do action before agent predict '''
# 根据问题获取代码片段和节点信息
action_json = CodeRetrievalSingle.run(message.code_engine_name, message.origin_query,
llm_config=self.llm_config, embed_config=self.embed_config, local_graph_path=message.local_graph_path, use_nh=message.use_nh,search_type="tag")
current_vertex = action_json['vertex']
message.customed_kargs["Code Snippet"] = action_json["code"]
message.customed_kargs['Current_Vertex'] = current_vertex
return message
# add agent or prompt_manager class
agent_module = importlib.import_module("coagent.connector.agents")
setattr(agent_module, 'CodeGenDocer', CodeGenDocer)
# log-levelprint prompt和llm predict
os.environ["log_verbose"] = "1"
phase_name = "code2DocsGroup"
llm_config = LLMConfig(
model_name="gpt-4", api_key=os.environ["OPENAI_API_KEY"],
api_base_url=os.environ["API_BASE_URL"], temperature=0.3
)
embed_config = EmbedConfig(
embed_engine="model", embed_model="text2vec-base-chinese",
embed_model_path=os.path.join(src_dir, "embedding_models/text2vec-base-chinese")
)
# initialize codebase
# delete codebase
codebase_name = 'client_local'
code_path = "D://chromeDownloads/devopschat-bot/client_v2/client"
use_nh = False
cbh = CodeBaseHandler(codebase_name, code_path, crawl_type='dir', use_nh=use_nh, local_graph_path=CB_ROOT_PATH,
llm_config=llm_config, embed_config=embed_config)
cbh.delete_codebase(codebase_name=codebase_name)
# load codebase
codebase_name = 'client_local'
code_path = "D://chromeDownloads/devopschat-bot/client_v2/client"
use_nh = True
do_interpret = True
cbh = CodeBaseHandler(codebase_name, code_path, crawl_type='dir', use_nh=use_nh, local_graph_path=CB_ROOT_PATH,
llm_config=llm_config, embed_config=embed_config)
cbh.import_code(do_interpret=do_interpret)
# 根据前面的load过程进行初始化
cbh = CodeBaseHandler(codebase_name, code_path, crawl_type='dir', use_nh=use_nh, local_graph_path=CB_ROOT_PATH,
llm_config=llm_config, embed_config=embed_config)
phase = BasePhase(
phase_name, sandbox_server=SANDBOX_SERVER, jupyter_work_path=JUPYTER_WORK_PATH,
embed_config=embed_config, llm_config=llm_config, kb_root_path=KB_ROOT_PATH,
)
for vertex_type in ["class", "method"]:
vertexes = cbh.search_vertices(vertex_type=vertex_type)
logger.info(f"vertexes={vertexes}")
# round-1
docs = []
for vertex in vertexes:
vertex = vertex.split("-")[0] # -为method的参数
query_content = f"{vertex_type}节点 {vertex}生成文档"
query = Message(
role_name="human", role_type="user",
role_content=query_content, input_query=query_content, origin_query=query_content,
code_engine_name="client_local", score_threshold=1.0, top_k=3, cb_search_type="tag", use_nh=use_nh,
local_graph_path=CB_ROOT_PATH,
)
output_message, output_memory = phase.step(query, reinit_memory=True)
# print(output_memory.to_str_messages(return_all=True, content_key="parsed_output_list"))
docs.append(output_memory.get_spec_parserd_output())
os.makedirs(f"{CB_ROOT_PATH}/docs", exist_ok=True)
with open(f"{CB_ROOT_PATH}/docs/raw_{vertex_type}.json", "w") as f:
json.dump(docs, f)
# 下面把生成的文档信息转换成markdown文本
from coagent.utils.code2doc_util import *
import json
with open(f"{CB_ROOT_PATH}/docs/raw_method.json", "r") as f:
method_raw_data = json.load(f)
with open(f"{CB_ROOT_PATH}/docs/raw_class.json", "r") as f:
class_raw_data = json.load(f)
method_data = method_info_decode(method_raw_data)
class_data = class_info_decode(class_raw_data)
method_mds = encode2md(method_data, method_text_md)
class_mds = encode2md(class_data, class_text_md)
docs_dict = {}
for k,v in class_mds.items():
method_textmds = method_mds.get(k, [])
for vv in v:
# 理论上只有一个
text_md = vv
for method_textmd in method_textmds:
text_md += "\n<br>" + method_textmd
docs_dict.setdefault(k, []).append(text_md)
with open(f"{CB_ROOT_PATH}//docs/{k}.md", "w") as f:
f.write(text_md)
####################################
######## 下面是完整的复现过程 ########
####################################
# import os, sys, requests
# from loguru import logger
# src_dir = os.path.join(
# os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# )
# sys.path.append(src_dir)
# from configs.model_config import KB_ROOT_PATH, JUPYTER_WORK_PATH, CB_ROOT_PATH
# from configs.server_config import SANDBOX_SERVER
# from coagent.tools import toLangchainTools, TOOL_DICT, TOOL_SETS
# from coagent.llm_models.llm_config import EmbedConfig, LLMConfig
# from coagent.connector.phase import BasePhase
# from coagent.connector.agents import BaseAgent, SelectorAgent
# from coagent.connector.chains import BaseChain
# from coagent.connector.schema import (
# Message, Memory, load_role_configs, load_phase_configs, load_chain_configs, ActionStatus
# )
# from coagent.connector.memory_manager import BaseMemoryManager
# from coagent.connector.configs import AGETN_CONFIGS, CHAIN_CONFIGS, PHASE_CONFIGS, BASE_PROMPT_CONFIGS
# from coagent.connector.prompt_manager.prompt_manager import PromptManager
# from coagent.codechat.codebase_handler.codebase_handler import CodeBaseHandler
# import importlib
# from loguru import logger
# from coagent.tools import CodeRetrievalSingle, RelatedVerticesRetrival, Vertex2Code
# # update new agent configs
# codeGenDocGroup_PROMPT = """#### Agent Profile
# Your goal is to response according the Context Data's information with the role that will best facilitate a solution, taking into account all relevant context (Context) provided.
# When you need to select the appropriate role for handling a user's query, carefully read the provided role names, role descriptions and tool list.
# ATTENTION: response carefully referenced "Response Output Format" in format.
# #### Input Format
# #### Response Output Format
# **Code Path:** Extract the paths for the class/method/function that need to be addressed from the context
# **Role:** Select the role from agent names
# """
# classGenDoc_PROMPT = """#### Agent Profile
# As an advanced code documentation generator, you are proficient in translating class definitions into comprehensive documentation with a focus on instantiation parameters.
# Your specific task is to parse the given code snippet of a class, extract information regarding its instantiation parameters.
# ATTENTION: response carefully in "Response Output Format".
# #### Input Format
# **Code Snippet:** Provide the full class definition, including the constructor and any parameters it may require for instantiation.
# #### Response Output Format
# **Class Base:** Specify the base class or interface from which the current class extends, if any.
# **Class Description:** Offer a brief description of the class's purpose and functionality.
# **Init Parameters:** List each parameter from construct. For each parameter, provide:
# - `param`: The parameter name
# - `param_description`: A concise explanation of the parameter's purpose.
# - `param_type`: The data type of the parameter, if explicitly defined.
# ```json
# [
# {
# "param": "parameter_name",
# "param_description": "A brief description of what this parameter is used for.",
# "param_type": "The data type of the parameter"
# },
# ...
# ]
# ```
# If no parameter for construct, return
# ```json
# []
# ```
# """
# funcGenDoc_PROMPT = """#### Agent Profile
# You are a high-level code documentation assistant, skilled at extracting information from function/method code into detailed and well-structured documentation.
# ATTENTION: response carefully in "Response Output Format".
# #### Input Format
# **Code Path:** Provide the code path of the function or method you wish to document.
# This name will be used to identify and extract the relevant details from the code snippet provided.
# **Code Snippet:** A segment of code that contains the function or method to be documented.
# #### Response Output Format
# **Class Description:** Offer a brief description of the method(function)'s purpose and functionality.
# **Parameters:** Extract parameter for the specific function/method Code from Code Snippet. For parameter, provide:
# - `param`: The parameter name
# - `param_description`: A concise explanation of the parameter's purpose.
# - `param_type`: The data type of the parameter, if explicitly defined.
# ```json
# [
# {
# "param": "parameter_name",
# "param_description": "A brief description of what this parameter is used for.",
# "param_type": "The data type of the parameter"
# },
# ...
# ]
# ```
# If no parameter for function/method, return
# ```json
# []
# ```
# **Return Value Description:** Describe what the function/method returns upon completion.
# **Return Type:** Indicate the type of data the function/method returns (e.g., string, integer, object, void).
# """
# CODE_GENERATE_GROUP_PROMPT_CONFIGS = [
# {"field_name": 'agent_profile', "function_name": 'handle_agent_profile', "is_context": False},
# {"field_name": 'agent_infomation', "function_name": 'handle_agent_data', "is_context": False, "omit_if_empty": False},
# # {"field_name": 'tool_information',"function_name": 'handle_tool_data', "is_context": False},
# {"field_name": 'context_placeholder', "function_name": '', "is_context": True},
# # {"field_name": 'reference_documents', "function_name": 'handle_doc_info'},
# {"field_name": 'session_records', "function_name": 'handle_session_records'},
# {"field_name": 'Specific Objective', "function_name": 'handle_specific_objective'},
# {"field_name": 'Code Snippet', "function_name": 'handle_code_snippet'},
# {"field_name": 'output_format', "function_name": 'handle_output_format', 'title': 'Response Output Format', "is_context": False},
# {"field_name": 'begin!!!', "function_name": 'handle_response', "is_context": False, "omit_if_empty": False}
# ]
# CODE_GENERATE_DOC_PROMPT_CONFIGS = [
# {"field_name": 'agent_profile', "function_name": 'handle_agent_profile', "is_context": False},
# # {"field_name": 'tool_information',"function_name": 'handle_tool_data', "is_context": False},
# {"field_name": 'context_placeholder', "function_name": '', "is_context": True},
# # {"field_name": 'reference_documents', "function_name": 'handle_doc_info'},
# {"field_name": 'session_records', "function_name": 'handle_session_records'},
# {"field_name": 'Specific Objective', "function_name": 'handle_specific_objective'},
# {"field_name": 'Code Snippet', "function_name": 'handle_code_snippet'},
# {"field_name": 'output_format', "function_name": 'handle_output_format', 'title': 'Response Output Format', "is_context": False},
# {"field_name": 'begin!!!', "function_name": 'handle_response', "is_context": False, "omit_if_empty": False}
# ]
# class CodeGenDocPM(PromptManager):
# def handle_code_snippet(self, **kwargs) -> str:
# if 'previous_agent_message' not in kwargs:
# return ""
# previous_agent_message: Message = kwargs['previous_agent_message']
# code_snippet = previous_agent_message.customed_kargs.get("Code Snippet", "")
# current_vertex = previous_agent_message.customed_kargs.get("Current_Vertex", "")
# instruction = "A segment of code that contains the function or method to be documented.\n"
# return instruction + "\n" + f"name: {current_vertex}\n{code_snippet}"
# def handle_specific_objective(self, **kwargs) -> str:
# if 'previous_agent_message' not in kwargs:
# return ""
# previous_agent_message: Message = kwargs['previous_agent_message']
# specific_objective = previous_agent_message.parsed_output.get("Code Path")
# instruction = "Provide the code path of the function or method you wish to document.\n"
# s = instruction + f"\n{specific_objective}"
# return s
# from coagent.tools import CodeRetrievalSingle
# # 定义一个新的agent类
# class CodeGenDocer(BaseAgent):
# def start_action_step(self, message: Message) -> Message:
# '''do action before agent predict '''
# # 根据问题获取代码片段和节点信息
# action_json = CodeRetrievalSingle.run(message.code_engine_name, message.origin_query,
# llm_config=self.llm_config, embed_config=self.embed_config, local_graph_path=message.local_graph_path, use_nh=message.use_nh,search_type="tag")
# current_vertex = action_json['vertex']
# message.customed_kargs["Code Snippet"] = action_json["code"]
# message.customed_kargs['Current_Vertex'] = current_vertex
# return message
# # add agent or prompt_manager class
# agent_module = importlib.import_module("coagent.connector.agents")
# prompt_manager_module = importlib.import_module("coagent.connector.prompt_manager")
# setattr(agent_module, 'CodeGenDocer', CodeGenDocer)
# setattr(prompt_manager_module, 'CodeGenDocPM', CodeGenDocPM)
# AGETN_CONFIGS.update({
# "classGenDoc": {
# "role": {
# "role_prompt": classGenDoc_PROMPT,
# "role_type": "assistant",
# "role_name": "classGenDoc",
# "role_desc": "",
# "agent_type": "CodeGenDocer"
# },
# "prompt_config": CODE_GENERATE_DOC_PROMPT_CONFIGS,
# "prompt_manager_type": "CodeGenDocPM",
# "chat_turn": 1,
# "focus_agents": [],
# "focus_message_keys": [],
# },
# "funcGenDoc": {
# "role": {
# "role_prompt": funcGenDoc_PROMPT,
# "role_type": "assistant",
# "role_name": "funcGenDoc",
# "role_desc": "",
# "agent_type": "CodeGenDocer"
# },
# "prompt_config": CODE_GENERATE_DOC_PROMPT_CONFIGS,
# "prompt_manager_type": "CodeGenDocPM",
# "chat_turn": 1,
# "focus_agents": [],
# "focus_message_keys": [],
# },
# "codeGenDocsGrouper": {
# "role": {
# "role_prompt": codeGenDocGroup_PROMPT,
# "role_type": "assistant",
# "role_name": "codeGenDocsGrouper",
# "role_desc": "",
# "agent_type": "SelectorAgent"
# },
# "prompt_config": CODE_GENERATE_GROUP_PROMPT_CONFIGS,
# "group_agents": ["classGenDoc", "funcGenDoc"],
# "chat_turn": 1,
# },
# })
# # update new chain configs
# CHAIN_CONFIGS.update({
# "codeGenDocsGroupChain": {
# "chain_name": "codeGenDocsGroupChain",
# "chain_type": "BaseChain",
# "agents": ["codeGenDocsGrouper"],
# "chat_turn": 1,
# "do_checker": False,
# "chain_prompt": ""
# }
# })
# # update phase configs
# PHASE_CONFIGS.update({
# "codeGenDocsGroup": {
# "phase_name": "codeGenDocsGroup",
# "phase_type": "BasePhase",
# "chains": ["codeGenDocsGroupChain"],
# "do_summary": False,
# "do_search": False,
# "do_doc_retrieval": False,
# "do_code_retrieval": False,
# "do_tool_retrieval": False,
# },
# })
# role_configs = load_role_configs(AGETN_CONFIGS)
# chain_configs = load_chain_configs(CHAIN_CONFIGS)
# phase_configs = load_phase_configs(PHASE_CONFIGS)
# # log-levelprint prompt和llm predict
# os.environ["log_verbose"] = "1"
# phase_name = "codeGenDocsGroup"
# llm_config = LLMConfig(
# model_name="gpt-4", api_key=os.environ["OPENAI_API_KEY"],
# api_base_url=os.environ["API_BASE_URL"], temperature=0.3
# )
# embed_config = EmbedConfig(
# embed_engine="model", embed_model="text2vec-base-chinese",
# embed_model_path=os.path.join(src_dir, "embedding_models/text2vec-base-chinese")
# )
# # initialize codebase
# # delete codebase
# codebase_name = 'client_local'
# code_path = "D://chromeDownloads/devopschat-bot/client_v2/client"
# use_nh = False
# cbh = CodeBaseHandler(codebase_name, code_path, crawl_type='dir', use_nh=use_nh, local_graph_path=CB_ROOT_PATH,
# llm_config=llm_config, embed_config=embed_config)
# cbh.delete_codebase(codebase_name=codebase_name)
# # load codebase
# codebase_name = 'client_local'
# code_path = "D://chromeDownloads/devopschat-bot/client_v2/client"
# use_nh = False
# do_interpret = True
# cbh = CodeBaseHandler(codebase_name, code_path, crawl_type='dir', use_nh=use_nh, local_graph_path=CB_ROOT_PATH,
# llm_config=llm_config, embed_config=embed_config)
# cbh.import_code(do_interpret=do_interpret)
# phase = BasePhase(
# phase_name, sandbox_server=SANDBOX_SERVER, jupyter_work_path=JUPYTER_WORK_PATH,
# embed_config=embed_config, llm_config=llm_config, kb_root_path=KB_ROOT_PATH,
# )
# for vertex_type in ["class", "method"]:
# vertexes = cbh.search_vertices(vertex_type=vertex_type)
# logger.info(f"vertexes={vertexes}")
# # round-1
# docs = []
# for vertex in vertexes:
# vertex = vertex.split("-")[0] # -为method的参数
# query_content = f"为{vertex_type}节点 {vertex}生成文档"
# query = Message(
# role_name="human", role_type="user",
# role_content=query_content, input_query=query_content, origin_query=query_content,
# code_engine_name="client_local", score_threshold=1.0, top_k=3, cb_search_type="tag", use_nh=use_nh,
# local_graph_path=CB_ROOT_PATH,
# )
# output_message, output_memory = phase.step(query, reinit_memory=True)
# # print(output_memory.to_str_messages(return_all=True, content_key="parsed_output_list"))
# docs.append(output_memory.get_spec_parserd_output())
# import json
# os.makedirs("/home/user/code_base/docs", exist_ok=True)
# with open(f"/home/user/code_base/docs/raw_{vertex_type}.json", "w") as f:
# json.dump(docs, f)
# # 下面把生成的文档信息转换成markdown文本
# from coagent.utils.code2doc_util import *
# import json
# with open(f"/home/user/code_base/docs/raw_method.json", "r") as f:
# method_raw_data = json.load(f)
# with open(f"/home/user/code_base/docs/raw_class.json", "r") as f:
# class_raw_data = json.load(f)
# method_data = method_info_decode(method_raw_data)
# class_data = class_info_decode(class_raw_data)
# method_mds = encode2md(method_data, method_text_md)
# class_mds = encode2md(class_data, class_text_md)
# docs_dict = {}
# for k,v in class_mds.items():
# method_textmds = method_mds.get(k, [])
# for vv in v:
# # 理论上只有一个
# text_md = vv
# for method_textmd in method_textmds:
# text_md += "\n<br>" + method_textmd
# docs_dict.setdefault(k, []).append(text_md)
# with open(f"/home/user/code_base/docs/{k}.md", "w") as f:
# f.write(text_md)