UnisKB/apps/common/config/tokenizer_manage_config.py

25 lines
646 B
Python
Raw Normal View History

2025-04-17 10:01:33 +00:00
# coding=utf-8
"""
@project: maxkb
@Author
@file tokenizer_manage_config.py
@date2024/4/28 10:17
@desc:
"""
class TokenizerManage:
tokenizer = None
@staticmethod
def get_tokenizer():
from transformers import BertTokenizer
if TokenizerManage.tokenizer is None:
TokenizerManage.tokenizer = BertTokenizer.from_pretrained(
'bert-base-cased',
2025-04-18 06:10:16 +00:00
cache_dir="/opt/maxkb-app/model/tokenizer",
2025-04-17 10:01:33 +00:00
local_files_only=True,
resume_download=False,
force_download=False)
return TokenizerManage.tokenizer