codefuse-chatbot/examples/agent_examples/codeGenDoc_example.py

507 lines
20 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os, sys, json
from loguru import logger
src_dir = os.path.join(
os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
)
sys.path.append(src_dir)
from configs.model_config import KB_ROOT_PATH, JUPYTER_WORK_PATH, CB_ROOT_PATH
from configs.server_config import SANDBOX_SERVER
from coagent.llm_models.llm_config import EmbedConfig, LLMConfig
from coagent.connector.phase import BasePhase
from coagent.connector.agents import BaseAgent
from coagent.connector.schema import Message
from coagent.tools import CodeRetrievalSingle
from coagent.codechat.codebase_handler.codebase_handler import CodeBaseHandler
import importlib
# 定义一个新的agent类
class CodeGenDocer(BaseAgent):
def start_action_step(self, message: Message) -> Message:
'''do action before agent predict '''
# 根据问题获取代码片段和节点信息
action_json = CodeRetrievalSingle.run(message.code_engine_name, message.origin_query,
llm_config=self.llm_config, embed_config=self.embed_config, local_graph_path=message.local_graph_path, use_nh=message.use_nh,search_type="tag")
current_vertex = action_json['vertex']
message.customed_kargs["Code Snippet"] = action_json["code"]
message.customed_kargs['Current_Vertex'] = current_vertex
return message
# add agent or prompt_manager class
agent_module = importlib.import_module("coagent.connector.agents")
setattr(agent_module, 'CodeGenDocer', CodeGenDocer)
# log-levelprint prompt和llm predict
os.environ["log_verbose"] = "1"
phase_name = "code2DocsGroup"
llm_config = LLMConfig(
model_name="gpt-4", api_key=os.environ["OPENAI_API_KEY"],
api_base_url=os.environ["API_BASE_URL"], temperature=0.3
)
embed_config = EmbedConfig(
embed_engine="model", embed_model="text2vec-base-chinese",
embed_model_path=os.path.join(src_dir, "embedding_models/text2vec-base-chinese")
)
# initialize codebase
# delete codebase
codebase_name = 'client_local'
code_path = "D://chromeDownloads/devopschat-bot/client_v2/client"
use_nh = False
cbh = CodeBaseHandler(codebase_name, code_path, crawl_type='dir', use_nh=use_nh, local_graph_path=CB_ROOT_PATH,
llm_config=llm_config, embed_config=embed_config)
cbh.delete_codebase(codebase_name=codebase_name)
# load codebase
codebase_name = 'client_local'
code_path = "D://chromeDownloads/devopschat-bot/client_v2/client"
use_nh = True
do_interpret = True
cbh = CodeBaseHandler(codebase_name, code_path, crawl_type='dir', use_nh=use_nh, local_graph_path=CB_ROOT_PATH,
llm_config=llm_config, embed_config=embed_config)
cbh.import_code(do_interpret=do_interpret)
# 根据前面的load过程进行初始化
cbh = CodeBaseHandler(codebase_name, code_path, crawl_type='dir', use_nh=use_nh, local_graph_path=CB_ROOT_PATH,
llm_config=llm_config, embed_config=embed_config)
phase = BasePhase(
phase_name, sandbox_server=SANDBOX_SERVER, jupyter_work_path=JUPYTER_WORK_PATH,
embed_config=embed_config, llm_config=llm_config, kb_root_path=KB_ROOT_PATH,
)
for vertex_type in ["class", "method"]:
vertexes = cbh.search_vertices(vertex_type=vertex_type)
logger.info(f"vertexes={vertexes}")
# round-1
docs = []
for vertex in vertexes:
vertex = vertex.split("-")[0] # -为method的参数
query_content = f"{vertex_type}节点 {vertex}生成文档"
query = Message(
role_name="human", role_type="user",
role_content=query_content, input_query=query_content, origin_query=query_content,
code_engine_name="client_local", score_threshold=1.0, top_k=3, cb_search_type="tag", use_nh=use_nh,
local_graph_path=CB_ROOT_PATH,
)
output_message, output_memory = phase.step(query, reinit_memory=True)
# print(output_memory.to_str_messages(return_all=True, content_key="parsed_output_list"))
docs.append(output_memory.get_spec_parserd_output())
os.makedirs(f"{CB_ROOT_PATH}/docs", exist_ok=True)
with open(f"{CB_ROOT_PATH}/docs/raw_{vertex_type}.json", "w") as f:
json.dump(docs, f)
# 下面把生成的文档信息转换成markdown文本
from coagent.utils.code2doc_util import *
import json
with open(f"{CB_ROOT_PATH}/docs/raw_method.json", "r") as f:
method_raw_data = json.load(f)
with open(f"{CB_ROOT_PATH}/docs/raw_class.json", "r") as f:
class_raw_data = json.load(f)
method_data = method_info_decode(method_raw_data)
class_data = class_info_decode(class_raw_data)
method_mds = encode2md(method_data, method_text_md)
class_mds = encode2md(class_data, class_text_md)
docs_dict = {}
for k,v in class_mds.items():
method_textmds = method_mds.get(k, [])
for vv in v:
# 理论上只有一个
text_md = vv
for method_textmd in method_textmds:
text_md += "\n<br>" + method_textmd
docs_dict.setdefault(k, []).append(text_md)
with open(f"{CB_ROOT_PATH}//docs/{k}.md", "w") as f:
f.write(text_md)
####################################
######## 下面是完整的复现过程 ########
####################################
# import os, sys, requests
# from loguru import logger
# src_dir = os.path.join(
# os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# )
# sys.path.append(src_dir)
# from configs.model_config import KB_ROOT_PATH, JUPYTER_WORK_PATH, CB_ROOT_PATH
# from configs.server_config import SANDBOX_SERVER
# from coagent.tools import toLangchainTools, TOOL_DICT, TOOL_SETS
# from coagent.llm_models.llm_config import EmbedConfig, LLMConfig
# from coagent.connector.phase import BasePhase
# from coagent.connector.agents import BaseAgent, SelectorAgent
# from coagent.connector.chains import BaseChain
# from coagent.connector.schema import (
# Message, Memory, load_role_configs, load_phase_configs, load_chain_configs, ActionStatus
# )
# from coagent.connector.memory_manager import BaseMemoryManager
# from coagent.connector.configs import AGETN_CONFIGS, CHAIN_CONFIGS, PHASE_CONFIGS, BASE_PROMPT_CONFIGS
# from coagent.connector.prompt_manager.prompt_manager import PromptManager
# from coagent.codechat.codebase_handler.codebase_handler import CodeBaseHandler
# import importlib
# from loguru import logger
# from coagent.tools import CodeRetrievalSingle, RelatedVerticesRetrival, Vertex2Code
# # update new agent configs
# codeGenDocGroup_PROMPT = """#### Agent Profile
# Your goal is to response according the Context Data's information with the role that will best facilitate a solution, taking into account all relevant context (Context) provided.
# When you need to select the appropriate role for handling a user's query, carefully read the provided role names, role descriptions and tool list.
# ATTENTION: response carefully referenced "Response Output Format" in format.
# #### Input Format
# #### Response Output Format
# **Code Path:** Extract the paths for the class/method/function that need to be addressed from the context
# **Role:** Select the role from agent names
# """
# classGenDoc_PROMPT = """#### Agent Profile
# As an advanced code documentation generator, you are proficient in translating class definitions into comprehensive documentation with a focus on instantiation parameters.
# Your specific task is to parse the given code snippet of a class, extract information regarding its instantiation parameters.
# ATTENTION: response carefully in "Response Output Format".
# #### Input Format
# **Code Snippet:** Provide the full class definition, including the constructor and any parameters it may require for instantiation.
# #### Response Output Format
# **Class Base:** Specify the base class or interface from which the current class extends, if any.
# **Class Description:** Offer a brief description of the class's purpose and functionality.
# **Init Parameters:** List each parameter from construct. For each parameter, provide:
# - `param`: The parameter name
# - `param_description`: A concise explanation of the parameter's purpose.
# - `param_type`: The data type of the parameter, if explicitly defined.
# ```json
# [
# {
# "param": "parameter_name",
# "param_description": "A brief description of what this parameter is used for.",
# "param_type": "The data type of the parameter"
# },
# ...
# ]
# ```
# If no parameter for construct, return
# ```json
# []
# ```
# """
# funcGenDoc_PROMPT = """#### Agent Profile
# You are a high-level code documentation assistant, skilled at extracting information from function/method code into detailed and well-structured documentation.
# ATTENTION: response carefully in "Response Output Format".
# #### Input Format
# **Code Path:** Provide the code path of the function or method you wish to document.
# This name will be used to identify and extract the relevant details from the code snippet provided.
# **Code Snippet:** A segment of code that contains the function or method to be documented.
# #### Response Output Format
# **Class Description:** Offer a brief description of the method(function)'s purpose and functionality.
# **Parameters:** Extract parameter for the specific function/method Code from Code Snippet. For parameter, provide:
# - `param`: The parameter name
# - `param_description`: A concise explanation of the parameter's purpose.
# - `param_type`: The data type of the parameter, if explicitly defined.
# ```json
# [
# {
# "param": "parameter_name",
# "param_description": "A brief description of what this parameter is used for.",
# "param_type": "The data type of the parameter"
# },
# ...
# ]
# ```
# If no parameter for function/method, return
# ```json
# []
# ```
# **Return Value Description:** Describe what the function/method returns upon completion.
# **Return Type:** Indicate the type of data the function/method returns (e.g., string, integer, object, void).
# """
# CODE_GENERATE_GROUP_PROMPT_CONFIGS = [
# {"field_name": 'agent_profile', "function_name": 'handle_agent_profile', "is_context": False},
# {"field_name": 'agent_infomation', "function_name": 'handle_agent_data', "is_context": False, "omit_if_empty": False},
# # {"field_name": 'tool_information',"function_name": 'handle_tool_data', "is_context": False},
# {"field_name": 'context_placeholder', "function_name": '', "is_context": True},
# # {"field_name": 'reference_documents', "function_name": 'handle_doc_info'},
# {"field_name": 'session_records', "function_name": 'handle_session_records'},
# {"field_name": 'Specific Objective', "function_name": 'handle_specific_objective'},
# {"field_name": 'Code Snippet', "function_name": 'handle_code_snippet'},
# {"field_name": 'output_format', "function_name": 'handle_output_format', 'title': 'Response Output Format', "is_context": False},
# {"field_name": 'begin!!!', "function_name": 'handle_response', "is_context": False, "omit_if_empty": False}
# ]
# CODE_GENERATE_DOC_PROMPT_CONFIGS = [
# {"field_name": 'agent_profile', "function_name": 'handle_agent_profile', "is_context": False},
# # {"field_name": 'tool_information',"function_name": 'handle_tool_data', "is_context": False},
# {"field_name": 'context_placeholder', "function_name": '', "is_context": True},
# # {"field_name": 'reference_documents', "function_name": 'handle_doc_info'},
# {"field_name": 'session_records', "function_name": 'handle_session_records'},
# {"field_name": 'Specific Objective', "function_name": 'handle_specific_objective'},
# {"field_name": 'Code Snippet', "function_name": 'handle_code_snippet'},
# {"field_name": 'output_format', "function_name": 'handle_output_format', 'title': 'Response Output Format', "is_context": False},
# {"field_name": 'begin!!!', "function_name": 'handle_response', "is_context": False, "omit_if_empty": False}
# ]
# class CodeGenDocPM(PromptManager):
# def handle_code_snippet(self, **kwargs) -> str:
# if 'previous_agent_message' not in kwargs:
# return ""
# previous_agent_message: Message = kwargs['previous_agent_message']
# code_snippet = previous_agent_message.customed_kargs.get("Code Snippet", "")
# current_vertex = previous_agent_message.customed_kargs.get("Current_Vertex", "")
# instruction = "A segment of code that contains the function or method to be documented.\n"
# return instruction + "\n" + f"name: {current_vertex}\n{code_snippet}"
# def handle_specific_objective(self, **kwargs) -> str:
# if 'previous_agent_message' not in kwargs:
# return ""
# previous_agent_message: Message = kwargs['previous_agent_message']
# specific_objective = previous_agent_message.parsed_output.get("Code Path")
# instruction = "Provide the code path of the function or method you wish to document.\n"
# s = instruction + f"\n{specific_objective}"
# return s
# from coagent.tools import CodeRetrievalSingle
# # 定义一个新的agent类
# class CodeGenDocer(BaseAgent):
# def start_action_step(self, message: Message) -> Message:
# '''do action before agent predict '''
# # 根据问题获取代码片段和节点信息
# action_json = CodeRetrievalSingle.run(message.code_engine_name, message.origin_query,
# llm_config=self.llm_config, embed_config=self.embed_config, local_graph_path=message.local_graph_path, use_nh=message.use_nh,search_type="tag")
# current_vertex = action_json['vertex']
# message.customed_kargs["Code Snippet"] = action_json["code"]
# message.customed_kargs['Current_Vertex'] = current_vertex
# return message
# # add agent or prompt_manager class
# agent_module = importlib.import_module("coagent.connector.agents")
# prompt_manager_module = importlib.import_module("coagent.connector.prompt_manager")
# setattr(agent_module, 'CodeGenDocer', CodeGenDocer)
# setattr(prompt_manager_module, 'CodeGenDocPM', CodeGenDocPM)
# AGETN_CONFIGS.update({
# "classGenDoc": {
# "role": {
# "role_prompt": classGenDoc_PROMPT,
# "role_type": "assistant",
# "role_name": "classGenDoc",
# "role_desc": "",
# "agent_type": "CodeGenDocer"
# },
# "prompt_config": CODE_GENERATE_DOC_PROMPT_CONFIGS,
# "prompt_manager_type": "CodeGenDocPM",
# "chat_turn": 1,
# "focus_agents": [],
# "focus_message_keys": [],
# },
# "funcGenDoc": {
# "role": {
# "role_prompt": funcGenDoc_PROMPT,
# "role_type": "assistant",
# "role_name": "funcGenDoc",
# "role_desc": "",
# "agent_type": "CodeGenDocer"
# },
# "prompt_config": CODE_GENERATE_DOC_PROMPT_CONFIGS,
# "prompt_manager_type": "CodeGenDocPM",
# "chat_turn": 1,
# "focus_agents": [],
# "focus_message_keys": [],
# },
# "codeGenDocsGrouper": {
# "role": {
# "role_prompt": codeGenDocGroup_PROMPT,
# "role_type": "assistant",
# "role_name": "codeGenDocsGrouper",
# "role_desc": "",
# "agent_type": "SelectorAgent"
# },
# "prompt_config": CODE_GENERATE_GROUP_PROMPT_CONFIGS,
# "group_agents": ["classGenDoc", "funcGenDoc"],
# "chat_turn": 1,
# },
# })
# # update new chain configs
# CHAIN_CONFIGS.update({
# "codeGenDocsGroupChain": {
# "chain_name": "codeGenDocsGroupChain",
# "chain_type": "BaseChain",
# "agents": ["codeGenDocsGrouper"],
# "chat_turn": 1,
# "do_checker": False,
# "chain_prompt": ""
# }
# })
# # update phase configs
# PHASE_CONFIGS.update({
# "codeGenDocsGroup": {
# "phase_name": "codeGenDocsGroup",
# "phase_type": "BasePhase",
# "chains": ["codeGenDocsGroupChain"],
# "do_summary": False,
# "do_search": False,
# "do_doc_retrieval": False,
# "do_code_retrieval": False,
# "do_tool_retrieval": False,
# },
# })
# role_configs = load_role_configs(AGETN_CONFIGS)
# chain_configs = load_chain_configs(CHAIN_CONFIGS)
# phase_configs = load_phase_configs(PHASE_CONFIGS)
# # log-levelprint prompt和llm predict
# os.environ["log_verbose"] = "1"
# phase_name = "codeGenDocsGroup"
# llm_config = LLMConfig(
# model_name="gpt-4", api_key=os.environ["OPENAI_API_KEY"],
# api_base_url=os.environ["API_BASE_URL"], temperature=0.3
# )
# embed_config = EmbedConfig(
# embed_engine="model", embed_model="text2vec-base-chinese",
# embed_model_path=os.path.join(src_dir, "embedding_models/text2vec-base-chinese")
# )
# # initialize codebase
# # delete codebase
# codebase_name = 'client_local'
# code_path = "D://chromeDownloads/devopschat-bot/client_v2/client"
# use_nh = False
# cbh = CodeBaseHandler(codebase_name, code_path, crawl_type='dir', use_nh=use_nh, local_graph_path=CB_ROOT_PATH,
# llm_config=llm_config, embed_config=embed_config)
# cbh.delete_codebase(codebase_name=codebase_name)
# # load codebase
# codebase_name = 'client_local'
# code_path = "D://chromeDownloads/devopschat-bot/client_v2/client"
# use_nh = False
# do_interpret = True
# cbh = CodeBaseHandler(codebase_name, code_path, crawl_type='dir', use_nh=use_nh, local_graph_path=CB_ROOT_PATH,
# llm_config=llm_config, embed_config=embed_config)
# cbh.import_code(do_interpret=do_interpret)
# phase = BasePhase(
# phase_name, sandbox_server=SANDBOX_SERVER, jupyter_work_path=JUPYTER_WORK_PATH,
# embed_config=embed_config, llm_config=llm_config, kb_root_path=KB_ROOT_PATH,
# )
# for vertex_type in ["class", "method"]:
# vertexes = cbh.search_vertices(vertex_type=vertex_type)
# logger.info(f"vertexes={vertexes}")
# # round-1
# docs = []
# for vertex in vertexes:
# vertex = vertex.split("-")[0] # -为method的参数
# query_content = f"为{vertex_type}节点 {vertex}生成文档"
# query = Message(
# role_name="human", role_type="user",
# role_content=query_content, input_query=query_content, origin_query=query_content,
# code_engine_name="client_local", score_threshold=1.0, top_k=3, cb_search_type="tag", use_nh=use_nh,
# local_graph_path=CB_ROOT_PATH,
# )
# output_message, output_memory = phase.step(query, reinit_memory=True)
# # print(output_memory.to_str_messages(return_all=True, content_key="parsed_output_list"))
# docs.append(output_memory.get_spec_parserd_output())
# import json
# os.makedirs("/home/user/code_base/docs", exist_ok=True)
# with open(f"/home/user/code_base/docs/raw_{vertex_type}.json", "w") as f:
# json.dump(docs, f)
# # 下面把生成的文档信息转换成markdown文本
# from coagent.utils.code2doc_util import *
# import json
# with open(f"/home/user/code_base/docs/raw_method.json", "r") as f:
# method_raw_data = json.load(f)
# with open(f"/home/user/code_base/docs/raw_class.json", "r") as f:
# class_raw_data = json.load(f)
# method_data = method_info_decode(method_raw_data)
# class_data = class_info_decode(class_raw_data)
# method_mds = encode2md(method_data, method_text_md)
# class_mds = encode2md(class_data, class_text_md)
# docs_dict = {}
# for k,v in class_mds.items():
# method_textmds = method_mds.get(k, [])
# for vv in v:
# # 理论上只有一个
# text_md = vv
# for method_textmd in method_textmds:
# text_md += "\n<br>" + method_textmd
# docs_dict.setdefault(k, []).append(text_md)
# with open(f"/home/user/code_base/docs/{k}.md", "w") as f:
# f.write(text_md)