107 lines
3.1 KiB
Python
107 lines
3.1 KiB
Python
|
# encoding: utf-8
|
||
|
'''
|
||
|
@author: 温进
|
||
|
@file: java_parser.py
|
||
|
@time: 2023/10/23 下午5:03
|
||
|
@desc:
|
||
|
'''
|
||
|
import json
|
||
|
import javalang
|
||
|
import glob
|
||
|
import os
|
||
|
from loguru import logger
|
||
|
|
||
|
|
||
|
class JavaParser:
|
||
|
def __init__(self):
|
||
|
pass
|
||
|
|
||
|
def parse(self, java_code_list):
|
||
|
'''
|
||
|
parse java code and extract entity
|
||
|
'''
|
||
|
tree_dict = self.preparse(java_code_list)
|
||
|
res = self.multi_java_code_parse(tree_dict)
|
||
|
|
||
|
return res
|
||
|
|
||
|
def preparse(self, java_code_dict):
|
||
|
'''
|
||
|
preparse by javalang
|
||
|
< dict of java_code and tree
|
||
|
'''
|
||
|
tree_dict = {}
|
||
|
for fp, java_code in java_code_dict.items():
|
||
|
try:
|
||
|
tree = javalang.parse.parse(java_code)
|
||
|
except Exception as e:
|
||
|
continue
|
||
|
|
||
|
if tree.package is not None:
|
||
|
tree_dict[java_code] = tree
|
||
|
logger.info('success parse {} files'.format(len(tree_dict)))
|
||
|
return tree_dict
|
||
|
|
||
|
def single_java_code_parse(self, tree):
|
||
|
'''
|
||
|
parse single code file
|
||
|
> tree: javalang parse result
|
||
|
< {pac_name: '', class_name_list: [], func_name_list: [], import_pac_name_list: []]}
|
||
|
'''
|
||
|
import_pac_name_list = []
|
||
|
|
||
|
# get imports
|
||
|
import_list = tree.imports
|
||
|
|
||
|
for import_pac in import_list:
|
||
|
import_pac_name = import_pac.path
|
||
|
import_pac_name_list.append(import_pac_name)
|
||
|
|
||
|
pac_name = tree.package.name
|
||
|
class_name_list = []
|
||
|
func_name_dict = {}
|
||
|
|
||
|
for node in tree.types:
|
||
|
if type(node) in (javalang.tree.ClassDeclaration, javalang.tree.InterfaceDeclaration):
|
||
|
class_name = pac_name + '.' + node.name
|
||
|
class_name_list.append(class_name)
|
||
|
|
||
|
for node_inner in node.body:
|
||
|
if type(node_inner) is javalang.tree.MethodDeclaration:
|
||
|
func_name = class_name + '.' + node_inner.name
|
||
|
|
||
|
# add params name to func_name
|
||
|
params_list = node_inner.parameters
|
||
|
|
||
|
for params in params_list:
|
||
|
params_name = params.type.name
|
||
|
func_name = func_name + '_' + params_name
|
||
|
|
||
|
if class_name not in func_name_dict:
|
||
|
func_name_dict[class_name] = []
|
||
|
|
||
|
func_name_dict[class_name].append(func_name)
|
||
|
|
||
|
res = {
|
||
|
'pac_name': pac_name,
|
||
|
'class_name_list': class_name_list,
|
||
|
'func_name_dict': func_name_dict,
|
||
|
'import_pac_name_list': import_pac_name_list
|
||
|
}
|
||
|
return res
|
||
|
|
||
|
def multi_java_code_parse(self, tree_dict):
|
||
|
'''
|
||
|
parse multiple java code
|
||
|
> tree_list
|
||
|
< parse_result_dict
|
||
|
'''
|
||
|
res_dict = {}
|
||
|
for java_code, tree in tree_dict.items():
|
||
|
try:
|
||
|
res_dict[java_code] = self.single_java_code_parse(tree)
|
||
|
except Exception as e:
|
||
|
logger.debug(java_code)
|
||
|
raise ImportError
|
||
|
|
||
|
return res_dict
|