import os, sys, json from loguru import logger src_dir = os.path.join( os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) ) sys.path.append(src_dir) from configs.model_config import KB_ROOT_PATH, JUPYTER_WORK_PATH, CB_ROOT_PATH from configs.server_config import SANDBOX_SERVER from coagent.llm_models.llm_config import EmbedConfig, LLMConfig from coagent.connector.phase import BasePhase from coagent.connector.agents import BaseAgent from coagent.connector.schema import Message from coagent.tools import CodeRetrievalSingle from coagent.codechat.codebase_handler.codebase_handler import CodeBaseHandler import importlib # 定义一个新的agent类 class CodeGenDocer(BaseAgent): def start_action_step(self, message: Message) -> Message: '''do action before agent predict ''' # 根据问题获取代码片段和节点信息 action_json = CodeRetrievalSingle.run(message.code_engine_name, message.origin_query, llm_config=self.llm_config, embed_config=self.embed_config, local_graph_path=message.local_graph_path, use_nh=message.use_nh,search_type="tag") current_vertex = action_json['vertex'] message.customed_kargs["Code Snippet"] = action_json["code"] message.customed_kargs['Current_Vertex'] = current_vertex return message # add agent or prompt_manager class agent_module = importlib.import_module("coagent.connector.agents") setattr(agent_module, 'CodeGenDocer', CodeGenDocer) # log-level,print prompt和llm predict os.environ["log_verbose"] = "1" phase_name = "code2DocsGroup" llm_config = LLMConfig( model_name="gpt-4", api_key=os.environ["OPENAI_API_KEY"], api_base_url=os.environ["API_BASE_URL"], temperature=0.3 ) embed_config = EmbedConfig( embed_engine="model", embed_model="text2vec-base-chinese", embed_model_path=os.path.join(src_dir, "embedding_models/text2vec-base-chinese") ) # initialize codebase # delete codebase codebase_name = 'client_local' code_path = "D://chromeDownloads/devopschat-bot/client_v2/client" use_nh = False cbh = CodeBaseHandler(codebase_name, code_path, crawl_type='dir', use_nh=use_nh, local_graph_path=CB_ROOT_PATH, llm_config=llm_config, embed_config=embed_config) cbh.delete_codebase(codebase_name=codebase_name) # load codebase codebase_name = 'client_local' code_path = "D://chromeDownloads/devopschat-bot/client_v2/client" use_nh = True do_interpret = True cbh = CodeBaseHandler(codebase_name, code_path, crawl_type='dir', use_nh=use_nh, local_graph_path=CB_ROOT_PATH, llm_config=llm_config, embed_config=embed_config) cbh.import_code(do_interpret=do_interpret) # 根据前面的load过程进行初始化 cbh = CodeBaseHandler(codebase_name, code_path, crawl_type='dir', use_nh=use_nh, local_graph_path=CB_ROOT_PATH, llm_config=llm_config, embed_config=embed_config) phase = BasePhase( phase_name, sandbox_server=SANDBOX_SERVER, jupyter_work_path=JUPYTER_WORK_PATH, embed_config=embed_config, llm_config=llm_config, kb_root_path=KB_ROOT_PATH, ) for vertex_type in ["class", "method"]: vertexes = cbh.search_vertices(vertex_type=vertex_type) logger.info(f"vertexes={vertexes}") # round-1 docs = [] for vertex in vertexes: vertex = vertex.split("-")[0] # -为method的参数 query_content = f"为{vertex_type}节点 {vertex}生成文档" query = Message( role_name="human", role_type="user", role_content=query_content, input_query=query_content, origin_query=query_content, code_engine_name="client_local", score_threshold=1.0, top_k=3, cb_search_type="tag", use_nh=use_nh, local_graph_path=CB_ROOT_PATH, ) output_message, output_memory = phase.step(query, reinit_memory=True) # print(output_memory.to_str_messages(return_all=True, content_key="parsed_output_list")) docs.append(output_memory.get_spec_parserd_output()) os.makedirs(f"{CB_ROOT_PATH}/docs", exist_ok=True) with open(f"{CB_ROOT_PATH}/docs/raw_{vertex_type}.json", "w") as f: json.dump(docs, f) # 下面把生成的文档信息转换成markdown文本 from coagent.utils.code2doc_util import * import json with open(f"{CB_ROOT_PATH}/docs/raw_method.json", "r") as f: method_raw_data = json.load(f) with open(f"{CB_ROOT_PATH}/docs/raw_class.json", "r") as f: class_raw_data = json.load(f) method_data = method_info_decode(method_raw_data) class_data = class_info_decode(class_raw_data) method_mds = encode2md(method_data, method_text_md) class_mds = encode2md(class_data, class_text_md) docs_dict = {} for k,v in class_mds.items(): method_textmds = method_mds.get(k, []) for vv in v: # 理论上只有一个 text_md = vv for method_textmd in method_textmds: text_md += "\n
" + method_textmd docs_dict.setdefault(k, []).append(text_md) with open(f"{CB_ROOT_PATH}//docs/{k}.md", "w") as f: f.write(text_md) #################################### ######## 下面是完整的复现过程 ######## #################################### # import os, sys, requests # from loguru import logger # src_dir = os.path.join( # os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # ) # sys.path.append(src_dir) # from configs.model_config import KB_ROOT_PATH, JUPYTER_WORK_PATH, CB_ROOT_PATH # from configs.server_config import SANDBOX_SERVER # from coagent.tools import toLangchainTools, TOOL_DICT, TOOL_SETS # from coagent.llm_models.llm_config import EmbedConfig, LLMConfig # from coagent.connector.phase import BasePhase # from coagent.connector.agents import BaseAgent, SelectorAgent # from coagent.connector.chains import BaseChain # from coagent.connector.schema import ( # Message, Memory, load_role_configs, load_phase_configs, load_chain_configs, ActionStatus # ) # from coagent.connector.memory_manager import BaseMemoryManager # from coagent.connector.configs import AGETN_CONFIGS, CHAIN_CONFIGS, PHASE_CONFIGS, BASE_PROMPT_CONFIGS # from coagent.connector.prompt_manager.prompt_manager import PromptManager # from coagent.codechat.codebase_handler.codebase_handler import CodeBaseHandler # import importlib # from loguru import logger # from coagent.tools import CodeRetrievalSingle, RelatedVerticesRetrival, Vertex2Code # # update new agent configs # codeGenDocGroup_PROMPT = """#### Agent Profile # Your goal is to response according the Context Data's information with the role that will best facilitate a solution, taking into account all relevant context (Context) provided. # When you need to select the appropriate role for handling a user's query, carefully read the provided role names, role descriptions and tool list. # ATTENTION: response carefully referenced "Response Output Format" in format. # #### Input Format # #### Response Output Format # **Code Path:** Extract the paths for the class/method/function that need to be addressed from the context # **Role:** Select the role from agent names # """ # classGenDoc_PROMPT = """#### Agent Profile # As an advanced code documentation generator, you are proficient in translating class definitions into comprehensive documentation with a focus on instantiation parameters. # Your specific task is to parse the given code snippet of a class, extract information regarding its instantiation parameters. # ATTENTION: response carefully in "Response Output Format". # #### Input Format # **Code Snippet:** Provide the full class definition, including the constructor and any parameters it may require for instantiation. # #### Response Output Format # **Class Base:** Specify the base class or interface from which the current class extends, if any. # **Class Description:** Offer a brief description of the class's purpose and functionality. # **Init Parameters:** List each parameter from construct. For each parameter, provide: # - `param`: The parameter name # - `param_description`: A concise explanation of the parameter's purpose. # - `param_type`: The data type of the parameter, if explicitly defined. # ```json # [ # { # "param": "parameter_name", # "param_description": "A brief description of what this parameter is used for.", # "param_type": "The data type of the parameter" # }, # ... # ] # ``` # If no parameter for construct, return # ```json # [] # ``` # """ # funcGenDoc_PROMPT = """#### Agent Profile # You are a high-level code documentation assistant, skilled at extracting information from function/method code into detailed and well-structured documentation. # ATTENTION: response carefully in "Response Output Format". # #### Input Format # **Code Path:** Provide the code path of the function or method you wish to document. # This name will be used to identify and extract the relevant details from the code snippet provided. # **Code Snippet:** A segment of code that contains the function or method to be documented. # #### Response Output Format # **Class Description:** Offer a brief description of the method(function)'s purpose and functionality. # **Parameters:** Extract parameter for the specific function/method Code from Code Snippet. For parameter, provide: # - `param`: The parameter name # - `param_description`: A concise explanation of the parameter's purpose. # - `param_type`: The data type of the parameter, if explicitly defined. # ```json # [ # { # "param": "parameter_name", # "param_description": "A brief description of what this parameter is used for.", # "param_type": "The data type of the parameter" # }, # ... # ] # ``` # If no parameter for function/method, return # ```json # [] # ``` # **Return Value Description:** Describe what the function/method returns upon completion. # **Return Type:** Indicate the type of data the function/method returns (e.g., string, integer, object, void). # """ # CODE_GENERATE_GROUP_PROMPT_CONFIGS = [ # {"field_name": 'agent_profile', "function_name": 'handle_agent_profile', "is_context": False}, # {"field_name": 'agent_infomation', "function_name": 'handle_agent_data', "is_context": False, "omit_if_empty": False}, # # {"field_name": 'tool_information',"function_name": 'handle_tool_data', "is_context": False}, # {"field_name": 'context_placeholder', "function_name": '', "is_context": True}, # # {"field_name": 'reference_documents', "function_name": 'handle_doc_info'}, # {"field_name": 'session_records', "function_name": 'handle_session_records'}, # {"field_name": 'Specific Objective', "function_name": 'handle_specific_objective'}, # {"field_name": 'Code Snippet', "function_name": 'handle_code_snippet'}, # {"field_name": 'output_format', "function_name": 'handle_output_format', 'title': 'Response Output Format', "is_context": False}, # {"field_name": 'begin!!!', "function_name": 'handle_response', "is_context": False, "omit_if_empty": False} # ] # CODE_GENERATE_DOC_PROMPT_CONFIGS = [ # {"field_name": 'agent_profile', "function_name": 'handle_agent_profile', "is_context": False}, # # {"field_name": 'tool_information',"function_name": 'handle_tool_data', "is_context": False}, # {"field_name": 'context_placeholder', "function_name": '', "is_context": True}, # # {"field_name": 'reference_documents', "function_name": 'handle_doc_info'}, # {"field_name": 'session_records', "function_name": 'handle_session_records'}, # {"field_name": 'Specific Objective', "function_name": 'handle_specific_objective'}, # {"field_name": 'Code Snippet', "function_name": 'handle_code_snippet'}, # {"field_name": 'output_format', "function_name": 'handle_output_format', 'title': 'Response Output Format', "is_context": False}, # {"field_name": 'begin!!!', "function_name": 'handle_response', "is_context": False, "omit_if_empty": False} # ] # class CodeGenDocPM(PromptManager): # def handle_code_snippet(self, **kwargs) -> str: # if 'previous_agent_message' not in kwargs: # return "" # previous_agent_message: Message = kwargs['previous_agent_message'] # code_snippet = previous_agent_message.customed_kargs.get("Code Snippet", "") # current_vertex = previous_agent_message.customed_kargs.get("Current_Vertex", "") # instruction = "A segment of code that contains the function or method to be documented.\n" # return instruction + "\n" + f"name: {current_vertex}\n{code_snippet}" # def handle_specific_objective(self, **kwargs) -> str: # if 'previous_agent_message' not in kwargs: # return "" # previous_agent_message: Message = kwargs['previous_agent_message'] # specific_objective = previous_agent_message.parsed_output.get("Code Path") # instruction = "Provide the code path of the function or method you wish to document.\n" # s = instruction + f"\n{specific_objective}" # return s # from coagent.tools import CodeRetrievalSingle # # 定义一个新的agent类 # class CodeGenDocer(BaseAgent): # def start_action_step(self, message: Message) -> Message: # '''do action before agent predict ''' # # 根据问题获取代码片段和节点信息 # action_json = CodeRetrievalSingle.run(message.code_engine_name, message.origin_query, # llm_config=self.llm_config, embed_config=self.embed_config, local_graph_path=message.local_graph_path, use_nh=message.use_nh,search_type="tag") # current_vertex = action_json['vertex'] # message.customed_kargs["Code Snippet"] = action_json["code"] # message.customed_kargs['Current_Vertex'] = current_vertex # return message # # add agent or prompt_manager class # agent_module = importlib.import_module("coagent.connector.agents") # prompt_manager_module = importlib.import_module("coagent.connector.prompt_manager") # setattr(agent_module, 'CodeGenDocer', CodeGenDocer) # setattr(prompt_manager_module, 'CodeGenDocPM', CodeGenDocPM) # AGETN_CONFIGS.update({ # "classGenDoc": { # "role": { # "role_prompt": classGenDoc_PROMPT, # "role_type": "assistant", # "role_name": "classGenDoc", # "role_desc": "", # "agent_type": "CodeGenDocer" # }, # "prompt_config": CODE_GENERATE_DOC_PROMPT_CONFIGS, # "prompt_manager_type": "CodeGenDocPM", # "chat_turn": 1, # "focus_agents": [], # "focus_message_keys": [], # }, # "funcGenDoc": { # "role": { # "role_prompt": funcGenDoc_PROMPT, # "role_type": "assistant", # "role_name": "funcGenDoc", # "role_desc": "", # "agent_type": "CodeGenDocer" # }, # "prompt_config": CODE_GENERATE_DOC_PROMPT_CONFIGS, # "prompt_manager_type": "CodeGenDocPM", # "chat_turn": 1, # "focus_agents": [], # "focus_message_keys": [], # }, # "codeGenDocsGrouper": { # "role": { # "role_prompt": codeGenDocGroup_PROMPT, # "role_type": "assistant", # "role_name": "codeGenDocsGrouper", # "role_desc": "", # "agent_type": "SelectorAgent" # }, # "prompt_config": CODE_GENERATE_GROUP_PROMPT_CONFIGS, # "group_agents": ["classGenDoc", "funcGenDoc"], # "chat_turn": 1, # }, # }) # # update new chain configs # CHAIN_CONFIGS.update({ # "codeGenDocsGroupChain": { # "chain_name": "codeGenDocsGroupChain", # "chain_type": "BaseChain", # "agents": ["codeGenDocsGrouper"], # "chat_turn": 1, # "do_checker": False, # "chain_prompt": "" # } # }) # # update phase configs # PHASE_CONFIGS.update({ # "codeGenDocsGroup": { # "phase_name": "codeGenDocsGroup", # "phase_type": "BasePhase", # "chains": ["codeGenDocsGroupChain"], # "do_summary": False, # "do_search": False, # "do_doc_retrieval": False, # "do_code_retrieval": False, # "do_tool_retrieval": False, # }, # }) # role_configs = load_role_configs(AGETN_CONFIGS) # chain_configs = load_chain_configs(CHAIN_CONFIGS) # phase_configs = load_phase_configs(PHASE_CONFIGS) # # log-level,print prompt和llm predict # os.environ["log_verbose"] = "1" # phase_name = "codeGenDocsGroup" # llm_config = LLMConfig( # model_name="gpt-4", api_key=os.environ["OPENAI_API_KEY"], # api_base_url=os.environ["API_BASE_URL"], temperature=0.3 # ) # embed_config = EmbedConfig( # embed_engine="model", embed_model="text2vec-base-chinese", # embed_model_path=os.path.join(src_dir, "embedding_models/text2vec-base-chinese") # ) # # initialize codebase # # delete codebase # codebase_name = 'client_local' # code_path = "D://chromeDownloads/devopschat-bot/client_v2/client" # use_nh = False # cbh = CodeBaseHandler(codebase_name, code_path, crawl_type='dir', use_nh=use_nh, local_graph_path=CB_ROOT_PATH, # llm_config=llm_config, embed_config=embed_config) # cbh.delete_codebase(codebase_name=codebase_name) # # load codebase # codebase_name = 'client_local' # code_path = "D://chromeDownloads/devopschat-bot/client_v2/client" # use_nh = False # do_interpret = True # cbh = CodeBaseHandler(codebase_name, code_path, crawl_type='dir', use_nh=use_nh, local_graph_path=CB_ROOT_PATH, # llm_config=llm_config, embed_config=embed_config) # cbh.import_code(do_interpret=do_interpret) # phase = BasePhase( # phase_name, sandbox_server=SANDBOX_SERVER, jupyter_work_path=JUPYTER_WORK_PATH, # embed_config=embed_config, llm_config=llm_config, kb_root_path=KB_ROOT_PATH, # ) # for vertex_type in ["class", "method"]: # vertexes = cbh.search_vertices(vertex_type=vertex_type) # logger.info(f"vertexes={vertexes}") # # round-1 # docs = [] # for vertex in vertexes: # vertex = vertex.split("-")[0] # -为method的参数 # query_content = f"为{vertex_type}节点 {vertex}生成文档" # query = Message( # role_name="human", role_type="user", # role_content=query_content, input_query=query_content, origin_query=query_content, # code_engine_name="client_local", score_threshold=1.0, top_k=3, cb_search_type="tag", use_nh=use_nh, # local_graph_path=CB_ROOT_PATH, # ) # output_message, output_memory = phase.step(query, reinit_memory=True) # # print(output_memory.to_str_messages(return_all=True, content_key="parsed_output_list")) # docs.append(output_memory.get_spec_parserd_output()) # import json # os.makedirs("/home/user/code_base/docs", exist_ok=True) # with open(f"/home/user/code_base/docs/raw_{vertex_type}.json", "w") as f: # json.dump(docs, f) # # 下面把生成的文档信息转换成markdown文本 # from coagent.utils.code2doc_util import * # import json # with open(f"/home/user/code_base/docs/raw_method.json", "r") as f: # method_raw_data = json.load(f) # with open(f"/home/user/code_base/docs/raw_class.json", "r") as f: # class_raw_data = json.load(f) # method_data = method_info_decode(method_raw_data) # class_data = class_info_decode(class_raw_data) # method_mds = encode2md(method_data, method_text_md) # class_mds = encode2md(class_data, class_text_md) # docs_dict = {} # for k,v in class_mds.items(): # method_textmds = method_mds.get(k, []) # for vv in v: # # 理论上只有一个 # text_md = vv # for method_textmd in method_textmds: # text_md += "\n
" + method_textmd # docs_dict.setdefault(k, []).append(text_md) # with open(f"/home/user/code_base/docs/{k}.md", "w") as f: # f.write(text_md)