From b07f58b5a8bc79707d41ab0168a348dbc1a0d0c8 Mon Sep 17 00:00:00 2001 From: xinpeng-zhang <53964972+xinpeng-zhang@users.noreply.github.com> Date: Tue, 9 Jul 2024 20:12:49 +0800 Subject: [PATCH] add vllm implement in README --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 394126b..09ac060 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ with torch.no_grad(): print(tokenizer.decode(outputs[0], skip_special_tokens=True)) ``` -Use vllm==0.5.1 to quickly launch +Use `vllm==0.5.1` to quickly launch ``` from transformers import AutoTokenizer from vllm import LLM, SamplingParams @@ -58,7 +58,7 @@ llm = LLM( max_model_len=max_model_len, trust_remote_code=True, enforce_eager=True, - # GLM-4-9B-Chat-1M 如果遇见 OOM 现象,建议开启下述参数 + # If OOM,try using follong parameters # enable_chunked_prefill=True, # max_num_batched_tokens=8192 )