codefuse-chatbot/dev_opsgpt/codebase_handler/parser/java_paraser/java_parser.py

107 lines
3.1 KiB
Python
Raw Normal View History

# encoding: utf-8
'''
@author: 温进
@file: java_parser.py
@time: 2023/10/23 下午5:03
@desc:
'''
import json
import javalang
import glob
import os
from loguru import logger
class JavaParser:
def __init__(self):
pass
def parse(self, java_code_list):
'''
parse java code and extract entity
'''
tree_dict = self.preparse(java_code_list)
res = self.multi_java_code_parse(tree_dict)
return res
def preparse(self, java_code_dict):
'''
preparse by javalang
< dict of java_code and tree
'''
tree_dict = {}
for fp, java_code in java_code_dict.items():
try:
tree = javalang.parse.parse(java_code)
except Exception as e:
continue
if tree.package is not None:
tree_dict[java_code] = tree
logger.info('success parse {} files'.format(len(tree_dict)))
return tree_dict
def single_java_code_parse(self, tree):
'''
parse single code file
> tree: javalang parse result
< {pac_name: '', class_name_list: [], func_name_list: [], import_pac_name_list: []]}
'''
import_pac_name_list = []
# get imports
import_list = tree.imports
for import_pac in import_list:
import_pac_name = import_pac.path
import_pac_name_list.append(import_pac_name)
pac_name = tree.package.name
class_name_list = []
func_name_dict = {}
for node in tree.types:
if type(node) in (javalang.tree.ClassDeclaration, javalang.tree.InterfaceDeclaration):
class_name = pac_name + '.' + node.name
class_name_list.append(class_name)
for node_inner in node.body:
if type(node_inner) is javalang.tree.MethodDeclaration:
func_name = class_name + '.' + node_inner.name
# add params name to func_name
params_list = node_inner.parameters
for params in params_list:
params_name = params.type.name
func_name = func_name + '_' + params_name
if class_name not in func_name_dict:
func_name_dict[class_name] = []
func_name_dict[class_name].append(func_name)
res = {
'pac_name': pac_name,
'class_name_list': class_name_list,
'func_name_dict': func_name_dict,
'import_pac_name_list': import_pac_name_list
}
return res
def multi_java_code_parse(self, tree_dict):
'''
parse multiple java code
> tree_list
< parse_result_dict
'''
res_dict = {}
for java_code, tree in tree_dict.items():
try:
res_dict[java_code] = self.single_java_code_parse(tree)
except Exception as e:
logger.debug(java_code)
raise ImportError
return res_dict