codefuse-chatbot/dev_opsgpt/service/service_factory.py

115 lines
3.8 KiB
Python

from typing import List, Union, Dict
import os
from configs.model_config import EMBEDDING_MODEL
from .faiss_db_service import FaissKBService
from .base_service import KBService, SupportedVSType
from dev_opsgpt.orm.commands import *
from dev_opsgpt.utils.path_utils import *
class KBServiceFactory:
@staticmethod
def get_service(kb_name: str,
vector_store_type: Union[str, SupportedVSType],
embed_model: str = EMBEDDING_MODEL,
) -> KBService:
if isinstance(vector_store_type, str):
vector_store_type = getattr(SupportedVSType, vector_store_type.upper())
if SupportedVSType.FAISS == vector_store_type:
return FaissKBService(kb_name, embed_model=embed_model)
# if SupportedVSType.PG == vector_store_type:
# from server.knowledge_base.kb_service.pg_kb_service import PGKBService
# return PGKBService(kb_name, embed_model=embed_model)
# elif SupportedVSType.MILVUS == vector_store_type:
# from server.knowledge_base.kb_service.milvus_kb_service import MilvusKBService
# return MilvusKBService(kb_name, embed_model=embed_model) # other milvus parameters are set in model_config.kbs_config
# elif SupportedVSType.DEFAULT == vector_store_type: # kb_exists of default kbservice is False, to make validation easier.
# from server.knowledge_base.kb_service.default_kb_service import DefaultKBService
# return DefaultKBService(kb_name)
@staticmethod
def get_service_by_name(kb_name: str
) -> KBService:
_, vs_type, embed_model = load_kb_from_db(kb_name)
if vs_type is None and os.path.isdir(get_kb_path(kb_name)): # faiss knowledge base not in db
vs_type = "faiss"
return KBServiceFactory.get_service(kb_name, vs_type, embed_model)
@staticmethod
def get_default():
return KBServiceFactory.get_service("default", SupportedVSType.DEFAULT)
def get_kb_details() -> List[Dict]:
kbs_in_folder = list_kbs_from_folder()
kbs_in_db = KBService.list_kbs()
result = {}
for kb in kbs_in_folder:
result[kb] = {
"kb_name": kb,
"vs_type": "",
"embed_model": "",
"file_count": 0,
"create_time": None,
"in_folder": True,
"in_db": False,
}
for kb in kbs_in_db:
kb_detail = get_kb_detail(kb)
if kb_detail:
kb_detail["in_db"] = True
if kb in result:
result[kb].update(kb_detail)
else:
kb_detail["in_folder"] = False
result[kb] = kb_detail
data = []
for i, v in enumerate(result.values()):
v['No'] = i + 1
data.append(v)
return data
def get_kb_doc_details(kb_name: str) -> List[Dict]:
kb = KBServiceFactory.get_service_by_name(kb_name)
docs_in_folder = list_docs_from_folder(kb_name)
docs_in_db = kb.list_docs()
result = {}
for doc in docs_in_folder:
result[doc] = {
"kb_name": kb_name,
"file_name": doc,
"file_ext": os.path.splitext(doc)[-1],
"file_version": 0,
"document_loader": "",
"text_splitter": "",
"create_time": None,
"in_folder": True,
"in_db": False,
}
for doc in docs_in_db:
doc_detail = get_file_detail(kb_name, doc)
if doc_detail:
doc_detail["in_db"] = True
if doc in result:
result[doc].update(doc_detail)
else:
doc_detail["in_folder"] = False
result[doc] = doc_detail
data = []
for i, v in enumerate(result.values()):
v['No'] = i + 1
data.append(v)
return data