2023-12-07 20:17:21 +08:00
|
|
|
# encoding: utf-8
|
|
|
|
'''
|
|
|
|
@author: 温进
|
|
|
|
@file: dir_crawler.py
|
|
|
|
@time: 2023/11/22 下午2:54
|
|
|
|
@desc:
|
|
|
|
'''
|
|
|
|
from loguru import logger
|
|
|
|
import os
|
|
|
|
import glob
|
|
|
|
|
|
|
|
|
|
|
|
class DirCrawler:
|
|
|
|
@staticmethod
|
|
|
|
def crawl(path: str, suffix: str):
|
|
|
|
'''
|
|
|
|
read local java file in path
|
|
|
|
> path: path to crawl, must be absolute path like A/B/C
|
|
|
|
< dict of java code string
|
|
|
|
'''
|
|
|
|
java_file_list = glob.glob('{path}{sep}**{sep}*.{suffix}'.format(path=path, sep=os.path.sep, suffix=suffix),
|
|
|
|
recursive=True)
|
|
|
|
java_code_dict = {}
|
|
|
|
|
|
|
|
logger.info(path)
|
|
|
|
logger.info('number of file={}'.format(len(java_file_list)))
|
|
|
|
logger.info(java_file_list)
|
|
|
|
|
|
|
|
for java_file in java_file_list:
|
2024-03-12 15:31:06 +08:00
|
|
|
with open(java_file, encoding="utf-8") as f:
|
2023-12-07 20:17:21 +08:00
|
|
|
java_code = ''.join(f.readlines())
|
|
|
|
java_code_dict[java_file] = java_code
|
|
|
|
return java_code_dict
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
path = '/Users/bingxu/Desktop/工作/大模型/chatbot/test_code_repo/middleware-alipay-starters-parent'
|
|
|
|
suffix = 'java'
|
|
|
|
DirCrawler.crawl(path, suffix)
|