mirror of
https://github.com/JasonYANG170/CodeGeeX4.git
synced 2024-11-23 12:16:33 +00:00
fix errors when using auto-completion
This commit is contained in:
parent
d624f752c2
commit
38342ab02b
|
@ -29,16 +29,21 @@ class CodegeexChatModel:
|
||||||
|
|
||||||
def stream_chat(self, request: ChatCompletionRequest):
|
def stream_chat(self, request: ChatCompletionRequest):
|
||||||
try:
|
try:
|
||||||
|
inputs = self.tokenizer.apply_chat_template(
|
||||||
|
conversation=[msg.model_dump() for msg in request.messages],
|
||||||
|
add_generation_prompt=True,
|
||||||
|
return_tensors="pt",
|
||||||
|
return_dict=True
|
||||||
|
).to(self.model.device)
|
||||||
|
gen_configs = {
|
||||||
|
"max_new_tokens": request.max_tokens,
|
||||||
|
"temperature": request.temperature,
|
||||||
|
"top_p": request.top_p,
|
||||||
|
"repetition_penalty": request.presence_penalty
|
||||||
|
}
|
||||||
length = 0
|
length = 0
|
||||||
for i, (response, _) in enumerate(self.model.stream_chat(
|
for i, outputs in enumerate(self.model.stream_generate(**inputs, **gen_configs)):
|
||||||
self.tokenizer,
|
response = self.tokenizer.decode(outputs.tolist()[0][len(inputs["input_ids"][0]):-1])
|
||||||
query=request.messages[-1].content,
|
|
||||||
history=[msg.model_dump() for msg in request.messages[:-1]],
|
|
||||||
max_new_tokens=request.max_tokens,
|
|
||||||
temperature=request.temperature,
|
|
||||||
top_p=request.top_p,
|
|
||||||
repetition_penalty=request.presence_penalty
|
|
||||||
)):
|
|
||||||
resp = ChatCompletionStreamResponse()
|
resp = ChatCompletionStreamResponse()
|
||||||
resp.choices[0].index = i
|
resp.choices[0].index = i
|
||||||
resp.choices[0].delta.content = response[length:]
|
resp.choices[0].delta.content = response[length:]
|
||||||
|
|
Loading…
Reference in New Issue
Block a user