CodeGeeX4/repodemo/llm/api/codegeex4.py

40 lines
1.2 KiB
Python
Raw Normal View History

2024-07-05 01:33:53 +00:00
import requests
import json
2024-07-09 03:37:30 +00:00
URL = "" # the url you deploy codegeex service
2024-07-05 01:33:53 +00:00
def codegeex4(prompt, temperature=0.8, top_p=0.8):
url = URL
2024-07-09 03:37:30 +00:00
headers = {"Content-Type": "application/json"}
2024-07-05 01:33:53 +00:00
data = {
2024-07-09 03:37:30 +00:00
"inputs": prompt,
"parameters": {
"best_of": 1,
"do_sample": True,
"max_new_tokens": 4012,
"temperature": temperature,
"top_p": top_p,
"stop": ["<|endoftext|>", "<|user|>", "<|observation|>", "<|assistant|>"],
},
2024-07-05 01:33:53 +00:00
}
response = requests.post(url, json=data, headers=headers, verify=False, stream=True)
if response.status_code == 200:
for line in response.iter_lines():
if line:
2024-07-09 03:37:30 +00:00
decoded_line = line.decode("utf-8").replace("data:", "").strip()
2024-07-05 01:33:53 +00:00
if decoded_line:
try:
content = json.loads(decoded_line)
2024-07-09 03:37:30 +00:00
token_text = content.get("token", {}).get("text", "")
if "<|endoftext|>" in token_text:
break
2024-07-05 01:33:53 +00:00
yield token_text
except json.JSONDecodeError:
continue
else:
2024-07-09 03:37:30 +00:00
print("请求失败:", response.status_code)