安裝 Google sentencepiece
brew install cmake
git clone https://github.com/google/sentencepiece.git
mkdir build && cd build && cmake ..
make -j $(nproc)
sudo make install
sudo update_dyld_shared_cache
cd ../python
python setup.py bdist_wheel
pip install dist/sentencepiece*.whl
安裝依賴
pip install torch transformers gluonnlp pandas tqdm sentencepiece protobuf
安裝並運行
from transformers import AutoModel, AutoTokenizer
import torch
# Load the model and tokenizer
model = AutoModel.from_pretrained("monologg/kobert")
tokenizer = AutoTokenizer.from_pretrained("monologg/kobert", trust_remote_code=True)
# Example text (in Korean)
text = "안녕하세요, KoBERT를 사용해봅시다."
# Tokenize the text
inputs = tokenizer(text, return_tensors="pt")
# Get the model's output
with torch.no_grad():
outputs = model(**inputs)
# Print the output
print(outputs)