refactor: change model path in MKTokenizer.
parent
f457588cd5
commit
f9c1742b43
|
|
@ -6,11 +6,8 @@
|
||||||
@date:2024/4/28 10:17
|
@date:2024/4/28 10:17
|
||||||
@desc:
|
@desc:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
BASE_DIR = Path(__file__).resolve().parent.parent.parent
|
|
||||||
|
|
||||||
|
|
||||||
class MKTokenizer:
|
class MKTokenizer:
|
||||||
def __init__(self, tokenizer):
|
def __init__(self, tokenizer):
|
||||||
|
|
@ -27,6 +24,7 @@ class TokenizerManage:
|
||||||
def get_tokenizer():
|
def get_tokenizer():
|
||||||
from tokenizers import Tokenizer
|
from tokenizers import Tokenizer
|
||||||
# 创建Tokenizer
|
# 创建Tokenizer
|
||||||
s = os.path.join(BASE_DIR.parent, 'tokenizer', 'bert-base-cased', 'tokenizer.json')
|
model_path = os.path.join("/opt/maxkb-app", "model", "tokenizer", "models--bert-base-cased")
|
||||||
TokenizerManage.tokenizer = Tokenizer.from_file(s)
|
with open(f"{model_path}/refs/main", encoding="utf-8") as f: snapshot = f.read()
|
||||||
|
TokenizerManage.tokenizer = Tokenizer.from_file(f"{model_path}/snapshots/{snapshot}/tokenizer.json")
|
||||||
return MKTokenizer(TokenizerManage.tokenizer)
|
return MKTokenizer(TokenizerManage.tokenizer)
|
||||||
|
|
|
||||||
|
|
@ -1,23 +0,0 @@
|
||||||
{
|
|
||||||
"architectures": [
|
|
||||||
"BertForMaskedLM"
|
|
||||||
],
|
|
||||||
"attention_probs_dropout_prob": 0.1,
|
|
||||||
"gradient_checkpointing": false,
|
|
||||||
"hidden_act": "gelu",
|
|
||||||
"hidden_dropout_prob": 0.1,
|
|
||||||
"hidden_size": 768,
|
|
||||||
"initializer_range": 0.02,
|
|
||||||
"intermediate_size": 3072,
|
|
||||||
"layer_norm_eps": 1e-12,
|
|
||||||
"max_position_embeddings": 512,
|
|
||||||
"model_type": "bert",
|
|
||||||
"num_attention_heads": 12,
|
|
||||||
"num_hidden_layers": 12,
|
|
||||||
"pad_token_id": 0,
|
|
||||||
"position_embedding_type": "absolute",
|
|
||||||
"transformers_version": "4.6.0.dev0",
|
|
||||||
"type_vocab_size": 2,
|
|
||||||
"use_cache": true,
|
|
||||||
"vocab_size": 28996
|
|
||||||
}
|
|
||||||
File diff suppressed because one or more lines are too long
|
|
@ -1 +0,0 @@
|
||||||
{"do_lower_case": false, "model_max_length": 512}
|
|
||||||
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue