2025-04-17 10:01:33 +00:00
|
|
|
|
# coding=utf-8
|
|
|
|
|
|
"""
|
|
|
|
|
|
@project: maxkb
|
|
|
|
|
|
@Author:虎
|
|
|
|
|
|
@file: tokenizer_manage_config.py
|
|
|
|
|
|
@date:2024/4/28 10:17
|
|
|
|
|
|
@desc:
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TokenizerManage:
|
|
|
|
|
|
tokenizer = None
|
|
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
|
def get_tokenizer():
|
|
|
|
|
|
from transformers import BertTokenizer
|
|
|
|
|
|
if TokenizerManage.tokenizer is None:
|
|
|
|
|
|
TokenizerManage.tokenizer = BertTokenizer.from_pretrained(
|
|
|
|
|
|
'bert-base-cased',
|
2025-04-18 06:10:16 +00:00
|
|
|
|
cache_dir="/opt/maxkb-app/model/tokenizer",
|
2025-04-17 10:01:33 +00:00
|
|
|
|
local_files_only=True,
|
|
|
|
|
|
resume_download=False,
|
|
|
|
|
|
force_download=False)
|
|
|
|
|
|
return TokenizerManage.tokenizer
|