add vllm implement in README

This commit is contained in:
xinpeng-zhang 2024-07-09 20:12:49 +08:00 committed by GitHub
parent 85e1557ec5
commit b07f58b5a8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -39,7 +39,7 @@ with torch.no_grad():
print(tokenizer.decode(outputs[0], skip_special_tokens=True)) print(tokenizer.decode(outputs[0], skip_special_tokens=True))
``` ```
Use vllm==0.5.1 to quickly launch Use `vllm==0.5.1` to quickly launch
``` ```
from transformers import AutoTokenizer from transformers import AutoTokenizer
from vllm import LLM, SamplingParams from vllm import LLM, SamplingParams
@ -58,7 +58,7 @@ llm = LLM(
max_model_len=max_model_len, max_model_len=max_model_len,
trust_remote_code=True, trust_remote_code=True,
enforce_eager=True, enforce_eager=True,
# GLM-4-9B-Chat-1M 如果遇见 OOM 现象,建议开启下述参数 # If OOMtry using follong parameters
# enable_chunked_prefill=True, # enable_chunked_prefill=True,
# max_num_batched_tokens=8192 # max_num_batched_tokens=8192
) )