507 lines
20 KiB
Python
507 lines
20 KiB
Python
|
import os, sys, json
|
|||
|
from loguru import logger
|
|||
|
src_dir = os.path.join(
|
|||
|
os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|||
|
)
|
|||
|
sys.path.append(src_dir)
|
|||
|
|
|||
|
from configs.model_config import KB_ROOT_PATH, JUPYTER_WORK_PATH, CB_ROOT_PATH
|
|||
|
from configs.server_config import SANDBOX_SERVER
|
|||
|
from coagent.llm_models.llm_config import EmbedConfig, LLMConfig
|
|||
|
|
|||
|
from coagent.connector.phase import BasePhase
|
|||
|
from coagent.connector.agents import BaseAgent
|
|||
|
from coagent.connector.schema import Message
|
|||
|
from coagent.tools import CodeRetrievalSingle
|
|||
|
from coagent.codechat.codebase_handler.codebase_handler import CodeBaseHandler
|
|||
|
import importlib
|
|||
|
|
|||
|
|
|||
|
# 定义一个新的agent类
|
|||
|
class CodeGenDocer(BaseAgent):
|
|||
|
|
|||
|
def start_action_step(self, message: Message) -> Message:
|
|||
|
'''do action before agent predict '''
|
|||
|
# 根据问题获取代码片段和节点信息
|
|||
|
action_json = CodeRetrievalSingle.run(message.code_engine_name, message.origin_query,
|
|||
|
llm_config=self.llm_config, embed_config=self.embed_config, local_graph_path=message.local_graph_path, use_nh=message.use_nh,search_type="tag")
|
|||
|
current_vertex = action_json['vertex']
|
|||
|
message.customed_kargs["Code Snippet"] = action_json["code"]
|
|||
|
message.customed_kargs['Current_Vertex'] = current_vertex
|
|||
|
return message
|
|||
|
|
|||
|
|
|||
|
# add agent or prompt_manager class
|
|||
|
agent_module = importlib.import_module("coagent.connector.agents")
|
|||
|
setattr(agent_module, 'CodeGenDocer', CodeGenDocer)
|
|||
|
|
|||
|
|
|||
|
# log-level,print prompt和llm predict
|
|||
|
os.environ["log_verbose"] = "1"
|
|||
|
|
|||
|
phase_name = "code2DocsGroup"
|
|||
|
llm_config = LLMConfig(
|
|||
|
model_name="gpt-4", api_key=os.environ["OPENAI_API_KEY"],
|
|||
|
api_base_url=os.environ["API_BASE_URL"], temperature=0.3
|
|||
|
)
|
|||
|
embed_config = EmbedConfig(
|
|||
|
embed_engine="model", embed_model="text2vec-base-chinese",
|
|||
|
embed_model_path=os.path.join(src_dir, "embedding_models/text2vec-base-chinese")
|
|||
|
)
|
|||
|
|
|||
|
# initialize codebase
|
|||
|
# delete codebase
|
|||
|
codebase_name = 'client_local'
|
|||
|
code_path = "D://chromeDownloads/devopschat-bot/client_v2/client"
|
|||
|
use_nh = False
|
|||
|
cbh = CodeBaseHandler(codebase_name, code_path, crawl_type='dir', use_nh=use_nh, local_graph_path=CB_ROOT_PATH,
|
|||
|
llm_config=llm_config, embed_config=embed_config)
|
|||
|
cbh.delete_codebase(codebase_name=codebase_name)
|
|||
|
|
|||
|
|
|||
|
# load codebase
|
|||
|
codebase_name = 'client_local'
|
|||
|
code_path = "D://chromeDownloads/devopschat-bot/client_v2/client"
|
|||
|
use_nh = True
|
|||
|
do_interpret = True
|
|||
|
cbh = CodeBaseHandler(codebase_name, code_path, crawl_type='dir', use_nh=use_nh, local_graph_path=CB_ROOT_PATH,
|
|||
|
llm_config=llm_config, embed_config=embed_config)
|
|||
|
cbh.import_code(do_interpret=do_interpret)
|
|||
|
|
|||
|
# 根据前面的load过程进行初始化
|
|||
|
cbh = CodeBaseHandler(codebase_name, code_path, crawl_type='dir', use_nh=use_nh, local_graph_path=CB_ROOT_PATH,
|
|||
|
llm_config=llm_config, embed_config=embed_config)
|
|||
|
phase = BasePhase(
|
|||
|
phase_name, sandbox_server=SANDBOX_SERVER, jupyter_work_path=JUPYTER_WORK_PATH,
|
|||
|
embed_config=embed_config, llm_config=llm_config, kb_root_path=KB_ROOT_PATH,
|
|||
|
)
|
|||
|
|
|||
|
for vertex_type in ["class", "method"]:
|
|||
|
vertexes = cbh.search_vertices(vertex_type=vertex_type)
|
|||
|
logger.info(f"vertexes={vertexes}")
|
|||
|
|
|||
|
# round-1
|
|||
|
docs = []
|
|||
|
for vertex in vertexes:
|
|||
|
vertex = vertex.split("-")[0] # -为method的参数
|
|||
|
query_content = f"为{vertex_type}节点 {vertex}生成文档"
|
|||
|
query = Message(
|
|||
|
role_name="human", role_type="user",
|
|||
|
role_content=query_content, input_query=query_content, origin_query=query_content,
|
|||
|
code_engine_name="client_local", score_threshold=1.0, top_k=3, cb_search_type="tag", use_nh=use_nh,
|
|||
|
local_graph_path=CB_ROOT_PATH,
|
|||
|
)
|
|||
|
output_message, output_memory = phase.step(query, reinit_memory=True)
|
|||
|
# print(output_memory.to_str_messages(return_all=True, content_key="parsed_output_list"))
|
|||
|
docs.append(output_memory.get_spec_parserd_output())
|
|||
|
|
|||
|
os.makedirs(f"{CB_ROOT_PATH}/docs", exist_ok=True)
|
|||
|
with open(f"{CB_ROOT_PATH}/docs/raw_{vertex_type}.json", "w") as f:
|
|||
|
json.dump(docs, f)
|
|||
|
|
|||
|
|
|||
|
# 下面把生成的文档信息转换成markdown文本
|
|||
|
from coagent.utils.code2doc_util import *
|
|||
|
import json
|
|||
|
with open(f"{CB_ROOT_PATH}/docs/raw_method.json", "r") as f:
|
|||
|
method_raw_data = json.load(f)
|
|||
|
|
|||
|
with open(f"{CB_ROOT_PATH}/docs/raw_class.json", "r") as f:
|
|||
|
class_raw_data = json.load(f)
|
|||
|
|
|||
|
|
|||
|
method_data = method_info_decode(method_raw_data)
|
|||
|
class_data = class_info_decode(class_raw_data)
|
|||
|
method_mds = encode2md(method_data, method_text_md)
|
|||
|
class_mds = encode2md(class_data, class_text_md)
|
|||
|
|
|||
|
|
|||
|
docs_dict = {}
|
|||
|
for k,v in class_mds.items():
|
|||
|
method_textmds = method_mds.get(k, [])
|
|||
|
for vv in v:
|
|||
|
# 理论上只有一个
|
|||
|
text_md = vv
|
|||
|
|
|||
|
for method_textmd in method_textmds:
|
|||
|
text_md += "\n<br>" + method_textmd
|
|||
|
|
|||
|
docs_dict.setdefault(k, []).append(text_md)
|
|||
|
|
|||
|
with open(f"{CB_ROOT_PATH}//docs/{k}.md", "w") as f:
|
|||
|
f.write(text_md)
|
|||
|
|
|||
|
|
|||
|
|
|||
|
|
|||
|
|
|||
|
####################################
|
|||
|
######## 下面是完整的复现过程 ########
|
|||
|
####################################
|
|||
|
|
|||
|
# import os, sys, requests
|
|||
|
# from loguru import logger
|
|||
|
# src_dir = os.path.join(
|
|||
|
# os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|||
|
# )
|
|||
|
# sys.path.append(src_dir)
|
|||
|
|
|||
|
# from configs.model_config import KB_ROOT_PATH, JUPYTER_WORK_PATH, CB_ROOT_PATH
|
|||
|
# from configs.server_config import SANDBOX_SERVER
|
|||
|
# from coagent.tools import toLangchainTools, TOOL_DICT, TOOL_SETS
|
|||
|
# from coagent.llm_models.llm_config import EmbedConfig, LLMConfig
|
|||
|
|
|||
|
# from coagent.connector.phase import BasePhase
|
|||
|
# from coagent.connector.agents import BaseAgent, SelectorAgent
|
|||
|
# from coagent.connector.chains import BaseChain
|
|||
|
# from coagent.connector.schema import (
|
|||
|
# Message, Memory, load_role_configs, load_phase_configs, load_chain_configs, ActionStatus
|
|||
|
# )
|
|||
|
# from coagent.connector.memory_manager import BaseMemoryManager
|
|||
|
# from coagent.connector.configs import AGETN_CONFIGS, CHAIN_CONFIGS, PHASE_CONFIGS, BASE_PROMPT_CONFIGS
|
|||
|
# from coagent.connector.prompt_manager.prompt_manager import PromptManager
|
|||
|
# from coagent.codechat.codebase_handler.codebase_handler import CodeBaseHandler
|
|||
|
|
|||
|
# import importlib
|
|||
|
# from loguru import logger
|
|||
|
|
|||
|
|
|||
|
# from coagent.tools import CodeRetrievalSingle, RelatedVerticesRetrival, Vertex2Code
|
|||
|
|
|||
|
|
|||
|
# # update new agent configs
|
|||
|
# codeGenDocGroup_PROMPT = """#### Agent Profile
|
|||
|
|
|||
|
# Your goal is to response according the Context Data's information with the role that will best facilitate a solution, taking into account all relevant context (Context) provided.
|
|||
|
|
|||
|
# When you need to select the appropriate role for handling a user's query, carefully read the provided role names, role descriptions and tool list.
|
|||
|
|
|||
|
# ATTENTION: response carefully referenced "Response Output Format" in format.
|
|||
|
|
|||
|
# #### Input Format
|
|||
|
|
|||
|
# #### Response Output Format
|
|||
|
|
|||
|
# **Code Path:** Extract the paths for the class/method/function that need to be addressed from the context
|
|||
|
|
|||
|
# **Role:** Select the role from agent names
|
|||
|
# """
|
|||
|
|
|||
|
# classGenDoc_PROMPT = """#### Agent Profile
|
|||
|
# As an advanced code documentation generator, you are proficient in translating class definitions into comprehensive documentation with a focus on instantiation parameters.
|
|||
|
# Your specific task is to parse the given code snippet of a class, extract information regarding its instantiation parameters.
|
|||
|
|
|||
|
# ATTENTION: response carefully in "Response Output Format".
|
|||
|
|
|||
|
# #### Input Format
|
|||
|
|
|||
|
# **Code Snippet:** Provide the full class definition, including the constructor and any parameters it may require for instantiation.
|
|||
|
|
|||
|
# #### Response Output Format
|
|||
|
# **Class Base:** Specify the base class or interface from which the current class extends, if any.
|
|||
|
|
|||
|
# **Class Description:** Offer a brief description of the class's purpose and functionality.
|
|||
|
|
|||
|
# **Init Parameters:** List each parameter from construct. For each parameter, provide:
|
|||
|
# - `param`: The parameter name
|
|||
|
# - `param_description`: A concise explanation of the parameter's purpose.
|
|||
|
# - `param_type`: The data type of the parameter, if explicitly defined.
|
|||
|
|
|||
|
# ```json
|
|||
|
# [
|
|||
|
# {
|
|||
|
# "param": "parameter_name",
|
|||
|
# "param_description": "A brief description of what this parameter is used for.",
|
|||
|
# "param_type": "The data type of the parameter"
|
|||
|
# },
|
|||
|
# ...
|
|||
|
# ]
|
|||
|
# ```
|
|||
|
|
|||
|
|
|||
|
# If no parameter for construct, return
|
|||
|
# ```json
|
|||
|
# []
|
|||
|
# ```
|
|||
|
# """
|
|||
|
|
|||
|
# funcGenDoc_PROMPT = """#### Agent Profile
|
|||
|
# You are a high-level code documentation assistant, skilled at extracting information from function/method code into detailed and well-structured documentation.
|
|||
|
|
|||
|
# ATTENTION: response carefully in "Response Output Format".
|
|||
|
|
|||
|
|
|||
|
# #### Input Format
|
|||
|
# **Code Path:** Provide the code path of the function or method you wish to document.
|
|||
|
# This name will be used to identify and extract the relevant details from the code snippet provided.
|
|||
|
|
|||
|
# **Code Snippet:** A segment of code that contains the function or method to be documented.
|
|||
|
|
|||
|
# #### Response Output Format
|
|||
|
|
|||
|
# **Class Description:** Offer a brief description of the method(function)'s purpose and functionality.
|
|||
|
|
|||
|
# **Parameters:** Extract parameter for the specific function/method Code from Code Snippet. For parameter, provide:
|
|||
|
# - `param`: The parameter name
|
|||
|
# - `param_description`: A concise explanation of the parameter's purpose.
|
|||
|
# - `param_type`: The data type of the parameter, if explicitly defined.
|
|||
|
# ```json
|
|||
|
# [
|
|||
|
# {
|
|||
|
# "param": "parameter_name",
|
|||
|
# "param_description": "A brief description of what this parameter is used for.",
|
|||
|
# "param_type": "The data type of the parameter"
|
|||
|
# },
|
|||
|
# ...
|
|||
|
# ]
|
|||
|
# ```
|
|||
|
|
|||
|
# If no parameter for function/method, return
|
|||
|
# ```json
|
|||
|
# []
|
|||
|
# ```
|
|||
|
|
|||
|
# **Return Value Description:** Describe what the function/method returns upon completion.
|
|||
|
|
|||
|
# **Return Type:** Indicate the type of data the function/method returns (e.g., string, integer, object, void).
|
|||
|
# """
|
|||
|
|
|||
|
# CODE_GENERATE_GROUP_PROMPT_CONFIGS = [
|
|||
|
# {"field_name": 'agent_profile', "function_name": 'handle_agent_profile', "is_context": False},
|
|||
|
# {"field_name": 'agent_infomation', "function_name": 'handle_agent_data', "is_context": False, "omit_if_empty": False},
|
|||
|
# # {"field_name": 'tool_information',"function_name": 'handle_tool_data', "is_context": False},
|
|||
|
# {"field_name": 'context_placeholder', "function_name": '', "is_context": True},
|
|||
|
# # {"field_name": 'reference_documents', "function_name": 'handle_doc_info'},
|
|||
|
# {"field_name": 'session_records', "function_name": 'handle_session_records'},
|
|||
|
# {"field_name": 'Specific Objective', "function_name": 'handle_specific_objective'},
|
|||
|
# {"field_name": 'Code Snippet', "function_name": 'handle_code_snippet'},
|
|||
|
# {"field_name": 'output_format', "function_name": 'handle_output_format', 'title': 'Response Output Format', "is_context": False},
|
|||
|
# {"field_name": 'begin!!!', "function_name": 'handle_response', "is_context": False, "omit_if_empty": False}
|
|||
|
# ]
|
|||
|
|
|||
|
# CODE_GENERATE_DOC_PROMPT_CONFIGS = [
|
|||
|
# {"field_name": 'agent_profile', "function_name": 'handle_agent_profile', "is_context": False},
|
|||
|
# # {"field_name": 'tool_information',"function_name": 'handle_tool_data', "is_context": False},
|
|||
|
# {"field_name": 'context_placeholder', "function_name": '', "is_context": True},
|
|||
|
# # {"field_name": 'reference_documents', "function_name": 'handle_doc_info'},
|
|||
|
# {"field_name": 'session_records', "function_name": 'handle_session_records'},
|
|||
|
# {"field_name": 'Specific Objective', "function_name": 'handle_specific_objective'},
|
|||
|
# {"field_name": 'Code Snippet', "function_name": 'handle_code_snippet'},
|
|||
|
# {"field_name": 'output_format', "function_name": 'handle_output_format', 'title': 'Response Output Format', "is_context": False},
|
|||
|
# {"field_name": 'begin!!!', "function_name": 'handle_response', "is_context": False, "omit_if_empty": False}
|
|||
|
# ]
|
|||
|
|
|||
|
|
|||
|
# class CodeGenDocPM(PromptManager):
|
|||
|
# def handle_code_snippet(self, **kwargs) -> str:
|
|||
|
# if 'previous_agent_message' not in kwargs:
|
|||
|
# return ""
|
|||
|
# previous_agent_message: Message = kwargs['previous_agent_message']
|
|||
|
# code_snippet = previous_agent_message.customed_kargs.get("Code Snippet", "")
|
|||
|
# current_vertex = previous_agent_message.customed_kargs.get("Current_Vertex", "")
|
|||
|
# instruction = "A segment of code that contains the function or method to be documented.\n"
|
|||
|
# return instruction + "\n" + f"name: {current_vertex}\n{code_snippet}"
|
|||
|
|
|||
|
# def handle_specific_objective(self, **kwargs) -> str:
|
|||
|
# if 'previous_agent_message' not in kwargs:
|
|||
|
# return ""
|
|||
|
# previous_agent_message: Message = kwargs['previous_agent_message']
|
|||
|
# specific_objective = previous_agent_message.parsed_output.get("Code Path")
|
|||
|
|
|||
|
# instruction = "Provide the code path of the function or method you wish to document.\n"
|
|||
|
# s = instruction + f"\n{specific_objective}"
|
|||
|
# return s
|
|||
|
|
|||
|
|
|||
|
# from coagent.tools import CodeRetrievalSingle
|
|||
|
|
|||
|
# # 定义一个新的agent类
|
|||
|
# class CodeGenDocer(BaseAgent):
|
|||
|
|
|||
|
# def start_action_step(self, message: Message) -> Message:
|
|||
|
# '''do action before agent predict '''
|
|||
|
# # 根据问题获取代码片段和节点信息
|
|||
|
# action_json = CodeRetrievalSingle.run(message.code_engine_name, message.origin_query,
|
|||
|
# llm_config=self.llm_config, embed_config=self.embed_config, local_graph_path=message.local_graph_path, use_nh=message.use_nh,search_type="tag")
|
|||
|
# current_vertex = action_json['vertex']
|
|||
|
# message.customed_kargs["Code Snippet"] = action_json["code"]
|
|||
|
# message.customed_kargs['Current_Vertex'] = current_vertex
|
|||
|
# return message
|
|||
|
|
|||
|
# # add agent or prompt_manager class
|
|||
|
# agent_module = importlib.import_module("coagent.connector.agents")
|
|||
|
# prompt_manager_module = importlib.import_module("coagent.connector.prompt_manager")
|
|||
|
|
|||
|
# setattr(agent_module, 'CodeGenDocer', CodeGenDocer)
|
|||
|
# setattr(prompt_manager_module, 'CodeGenDocPM', CodeGenDocPM)
|
|||
|
|
|||
|
|
|||
|
|
|||
|
|
|||
|
# AGETN_CONFIGS.update({
|
|||
|
# "classGenDoc": {
|
|||
|
# "role": {
|
|||
|
# "role_prompt": classGenDoc_PROMPT,
|
|||
|
# "role_type": "assistant",
|
|||
|
# "role_name": "classGenDoc",
|
|||
|
# "role_desc": "",
|
|||
|
# "agent_type": "CodeGenDocer"
|
|||
|
# },
|
|||
|
# "prompt_config": CODE_GENERATE_DOC_PROMPT_CONFIGS,
|
|||
|
# "prompt_manager_type": "CodeGenDocPM",
|
|||
|
# "chat_turn": 1,
|
|||
|
# "focus_agents": [],
|
|||
|
# "focus_message_keys": [],
|
|||
|
# },
|
|||
|
# "funcGenDoc": {
|
|||
|
# "role": {
|
|||
|
# "role_prompt": funcGenDoc_PROMPT,
|
|||
|
# "role_type": "assistant",
|
|||
|
# "role_name": "funcGenDoc",
|
|||
|
# "role_desc": "",
|
|||
|
# "agent_type": "CodeGenDocer"
|
|||
|
# },
|
|||
|
# "prompt_config": CODE_GENERATE_DOC_PROMPT_CONFIGS,
|
|||
|
# "prompt_manager_type": "CodeGenDocPM",
|
|||
|
# "chat_turn": 1,
|
|||
|
# "focus_agents": [],
|
|||
|
# "focus_message_keys": [],
|
|||
|
# },
|
|||
|
# "codeGenDocsGrouper": {
|
|||
|
# "role": {
|
|||
|
# "role_prompt": codeGenDocGroup_PROMPT,
|
|||
|
# "role_type": "assistant",
|
|||
|
# "role_name": "codeGenDocsGrouper",
|
|||
|
# "role_desc": "",
|
|||
|
# "agent_type": "SelectorAgent"
|
|||
|
# },
|
|||
|
# "prompt_config": CODE_GENERATE_GROUP_PROMPT_CONFIGS,
|
|||
|
# "group_agents": ["classGenDoc", "funcGenDoc"],
|
|||
|
# "chat_turn": 1,
|
|||
|
# },
|
|||
|
# })
|
|||
|
# # update new chain configs
|
|||
|
# CHAIN_CONFIGS.update({
|
|||
|
# "codeGenDocsGroupChain": {
|
|||
|
# "chain_name": "codeGenDocsGroupChain",
|
|||
|
# "chain_type": "BaseChain",
|
|||
|
# "agents": ["codeGenDocsGrouper"],
|
|||
|
# "chat_turn": 1,
|
|||
|
# "do_checker": False,
|
|||
|
# "chain_prompt": ""
|
|||
|
# }
|
|||
|
# })
|
|||
|
|
|||
|
# # update phase configs
|
|||
|
# PHASE_CONFIGS.update({
|
|||
|
# "codeGenDocsGroup": {
|
|||
|
# "phase_name": "codeGenDocsGroup",
|
|||
|
# "phase_type": "BasePhase",
|
|||
|
# "chains": ["codeGenDocsGroupChain"],
|
|||
|
# "do_summary": False,
|
|||
|
# "do_search": False,
|
|||
|
# "do_doc_retrieval": False,
|
|||
|
# "do_code_retrieval": False,
|
|||
|
# "do_tool_retrieval": False,
|
|||
|
# },
|
|||
|
# })
|
|||
|
|
|||
|
|
|||
|
# role_configs = load_role_configs(AGETN_CONFIGS)
|
|||
|
# chain_configs = load_chain_configs(CHAIN_CONFIGS)
|
|||
|
# phase_configs = load_phase_configs(PHASE_CONFIGS)
|
|||
|
|
|||
|
# # log-level,print prompt和llm predict
|
|||
|
# os.environ["log_verbose"] = "1"
|
|||
|
|
|||
|
# phase_name = "codeGenDocsGroup"
|
|||
|
# llm_config = LLMConfig(
|
|||
|
# model_name="gpt-4", api_key=os.environ["OPENAI_API_KEY"],
|
|||
|
# api_base_url=os.environ["API_BASE_URL"], temperature=0.3
|
|||
|
# )
|
|||
|
# embed_config = EmbedConfig(
|
|||
|
# embed_engine="model", embed_model="text2vec-base-chinese",
|
|||
|
# embed_model_path=os.path.join(src_dir, "embedding_models/text2vec-base-chinese")
|
|||
|
# )
|
|||
|
|
|||
|
|
|||
|
# # initialize codebase
|
|||
|
# # delete codebase
|
|||
|
# codebase_name = 'client_local'
|
|||
|
# code_path = "D://chromeDownloads/devopschat-bot/client_v2/client"
|
|||
|
# use_nh = False
|
|||
|
# cbh = CodeBaseHandler(codebase_name, code_path, crawl_type='dir', use_nh=use_nh, local_graph_path=CB_ROOT_PATH,
|
|||
|
# llm_config=llm_config, embed_config=embed_config)
|
|||
|
# cbh.delete_codebase(codebase_name=codebase_name)
|
|||
|
|
|||
|
|
|||
|
# # load codebase
|
|||
|
# codebase_name = 'client_local'
|
|||
|
# code_path = "D://chromeDownloads/devopschat-bot/client_v2/client"
|
|||
|
# use_nh = False
|
|||
|
# do_interpret = True
|
|||
|
# cbh = CodeBaseHandler(codebase_name, code_path, crawl_type='dir', use_nh=use_nh, local_graph_path=CB_ROOT_PATH,
|
|||
|
# llm_config=llm_config, embed_config=embed_config)
|
|||
|
# cbh.import_code(do_interpret=do_interpret)
|
|||
|
|
|||
|
|
|||
|
# phase = BasePhase(
|
|||
|
# phase_name, sandbox_server=SANDBOX_SERVER, jupyter_work_path=JUPYTER_WORK_PATH,
|
|||
|
# embed_config=embed_config, llm_config=llm_config, kb_root_path=KB_ROOT_PATH,
|
|||
|
# )
|
|||
|
|
|||
|
# for vertex_type in ["class", "method"]:
|
|||
|
# vertexes = cbh.search_vertices(vertex_type=vertex_type)
|
|||
|
# logger.info(f"vertexes={vertexes}")
|
|||
|
|
|||
|
# # round-1
|
|||
|
# docs = []
|
|||
|
# for vertex in vertexes:
|
|||
|
# vertex = vertex.split("-")[0] # -为method的参数
|
|||
|
# query_content = f"为{vertex_type}节点 {vertex}生成文档"
|
|||
|
# query = Message(
|
|||
|
# role_name="human", role_type="user",
|
|||
|
# role_content=query_content, input_query=query_content, origin_query=query_content,
|
|||
|
# code_engine_name="client_local", score_threshold=1.0, top_k=3, cb_search_type="tag", use_nh=use_nh,
|
|||
|
# local_graph_path=CB_ROOT_PATH,
|
|||
|
# )
|
|||
|
# output_message, output_memory = phase.step(query, reinit_memory=True)
|
|||
|
# # print(output_memory.to_str_messages(return_all=True, content_key="parsed_output_list"))
|
|||
|
# docs.append(output_memory.get_spec_parserd_output())
|
|||
|
|
|||
|
# import json
|
|||
|
# os.makedirs("/home/user/code_base/docs", exist_ok=True)
|
|||
|
# with open(f"/home/user/code_base/docs/raw_{vertex_type}.json", "w") as f:
|
|||
|
# json.dump(docs, f)
|
|||
|
|
|||
|
|
|||
|
# # 下面把生成的文档信息转换成markdown文本
|
|||
|
# from coagent.utils.code2doc_util import *
|
|||
|
|
|||
|
# import json
|
|||
|
# with open(f"/home/user/code_base/docs/raw_method.json", "r") as f:
|
|||
|
# method_raw_data = json.load(f)
|
|||
|
|
|||
|
# with open(f"/home/user/code_base/docs/raw_class.json", "r") as f:
|
|||
|
# class_raw_data = json.load(f)
|
|||
|
|
|||
|
|
|||
|
# method_data = method_info_decode(method_raw_data)
|
|||
|
# class_data = class_info_decode(class_raw_data)
|
|||
|
# method_mds = encode2md(method_data, method_text_md)
|
|||
|
# class_mds = encode2md(class_data, class_text_md)
|
|||
|
|
|||
|
# docs_dict = {}
|
|||
|
# for k,v in class_mds.items():
|
|||
|
# method_textmds = method_mds.get(k, [])
|
|||
|
# for vv in v:
|
|||
|
# # 理论上只有一个
|
|||
|
# text_md = vv
|
|||
|
|
|||
|
# for method_textmd in method_textmds:
|
|||
|
# text_md += "\n<br>" + method_textmd
|
|||
|
|
|||
|
# docs_dict.setdefault(k, []).append(text_md)
|
|||
|
|
|||
|
# with open(f"/home/user/code_base/docs/{k}.md", "w") as f:
|
|||
|
# f.write(text_md)
|