codefuse-chatbot/dev_opsgpt/codechat/code_crawler/dir_crawler.py

39 lines
1.1 KiB
Python
Raw Permalink Normal View History

# encoding: utf-8
'''
@author: 温进
@file: dir_crawler.py
@time: 2023/11/22 下午2:54
@desc:
'''
from loguru import logger
import os
import glob
class DirCrawler:
@staticmethod
def crawl(path: str, suffix: str):
'''
read local java file in path
> path: path to crawl, must be absolute path like A/B/C
< dict of java code string
'''
java_file_list = glob.glob('{path}{sep}**{sep}*.{suffix}'.format(path=path, sep=os.path.sep, suffix=suffix),
recursive=True)
java_code_dict = {}
logger.info(path)
logger.info('number of file={}'.format(len(java_file_list)))
logger.info(java_file_list)
for java_file in java_file_list:
with open(java_file) as f:
java_code = ''.join(f.readlines())
java_code_dict[java_file] = java_code
return java_code_dict
if __name__ == '__main__':
path = '/Users/bingxu/Desktop/工作/大模型/chatbot/test_code_repo/middleware-alipay-starters-parent'
suffix = 'java'
DirCrawler.crawl(path, suffix)