mirror of
https://github.com/JasonYANG170/CodeGeeX4.git
synced 2024-11-23 12:16:33 +00:00
add vllm implement in README
This commit is contained in:
parent
85e1557ec5
commit
b07f58b5a8
|
@ -39,7 +39,7 @@ with torch.no_grad():
|
||||||
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
||||||
```
|
```
|
||||||
|
|
||||||
Use vllm==0.5.1 to quickly launch
|
Use `vllm==0.5.1` to quickly launch
|
||||||
```
|
```
|
||||||
from transformers import AutoTokenizer
|
from transformers import AutoTokenizer
|
||||||
from vllm import LLM, SamplingParams
|
from vllm import LLM, SamplingParams
|
||||||
|
@ -58,7 +58,7 @@ llm = LLM(
|
||||||
max_model_len=max_model_len,
|
max_model_len=max_model_len,
|
||||||
trust_remote_code=True,
|
trust_remote_code=True,
|
||||||
enforce_eager=True,
|
enforce_eager=True,
|
||||||
# GLM-4-9B-Chat-1M 如果遇见 OOM 现象,建议开启下述参数
|
# If OOM,try using follong parameters
|
||||||
# enable_chunked_prefill=True,
|
# enable_chunked_prefill=True,
|
||||||
# max_num_batched_tokens=8192
|
# max_num_batched_tokens=8192
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user