Merge pull request #25 from XingYu-Zhong/main

optimization repodome
This commit is contained in:
Qinkai 2024-07-12 18:19:08 +08:00 committed by GitHub
commit 00d5465908
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 447 additions and 390 deletions

4
repodemo/.env Normal file
View File

@ -0,0 +1,4 @@
openai_api_key = ""
openai_api_base = "https://open.bigmodel.cn/api/paas/v4/"
model_name = "codegeex-4"
bing_api_key = ""

View File

@ -1,37 +1,50 @@
# CodeGeeX
## Welcome to My Chat Demo Application
# Welcome to My Chat Demo Application
This is a simple demonstration application.
## Instructions
1. Enter your question
2. Wait for a reply
1. Enter your question.
2. Wait for a response.
3. Enjoy the conversation!
## Features
- Supports multi-turn conversations
- Supports internet-connected Q&A
- Allows uploading local zip project files for project-related Q&A and modifications
- Supports multi-turn conversations.
- Supports online Q&A.
- Supports uploading local zip packages for project Q&A and modifications.
- Supports inputting GitHub project links for project Q&A and modifications.
## Installation
1. Clone the repository to your local machine
2. Set up the model; you can choose between a local model or an API model. If using a local model, set `local_model_path` in `run_local.py`
3. For internet-connected Q&A, set the Bing Search API key in `utils/bingsearch.py` (`bingsearch_api_key`)
4. Install dependencies: `pip install -r requirements.txt`
5. Run the application: `chainlit run run.py --port 8888`. For local use: `chainlit run run_local.py --port 8888`
1. Clone the repository locally.
2. Start the model. You can deploy the model using vllm or ollama, provide the OpenAI request format, and set the deployed `api_base` and `api_key`. Alternatively, visit [CodeGeeX API](https://open.bigmodel.cn/dev/api#codegeex-4) to get the API key.
## Notes
```shell
#use open.bigmodel.cn api
openai_api_key = "<|apikey|>"
openai_api_base = "https://open.bigmodel.cn/api/paas/v4/"
model_name = "codegeex-4"
#use vllm
openai_api_key = "EMPTY"
openai_api_base = "http://xxxx:xxxx/v1"
model_name = "codegeex4-all-9b"
```
Ensure that your network environment can access the CodeGeeX API.
3. Fill in the corresponding model information and `bing_search_api` (if you want to experience online search) in the `.env` file.
4. Install dependencies: `pip install -r requirements.txt`.
5. Run the application: `chainlit run run.py --port 8899`.
## Note
Please ensure your network environment can access the CodeGeeX API.
## Disclaimer
This application is for educational and research purposes only. It must not be used for any commercial purposes. The developer is not responsible for any loss or damage caused by the use of this application.
This application is for educational and research purposes only and should not be used for any commercial purposes. The developer is not responsible for any loss or damage caused by the use of this application.
## Acknowledgements
Thank you for using our application. If you have any questions or suggestions, please feel free to contact us. We look forward to your feedback and are committed to providing better service.
Thank you for using our application. If you have any questions or suggestions, please feel free to contact us. We look forward to your feedback and are committed to providing you with better service.

View File

@ -15,14 +15,27 @@
- 支持多轮对话
- 支持联网问答
- 支持上传本地zip压缩包项目可以进行项目问答和对项目进行修改
- 支持输入GitHub链接项目可以进行项目问答和对项目进行修改。
## 安装
1. 克隆仓库到本地
2. 设置模型可以选择本地模型或者api模型,如果使用本地模型需要到run_local.py里设置local_model_path
3. 如果要用联网问答需要设置bingsearch API在utils/bingsearch.py中设置bingsearch_api_key
2. 启动模型可以通过vllm或者ollama部署模型提供openai的请求格式设置部署的api_base和api_key或者访问[CodeGeeX API](https://open.bigmodel.cn/dev/api#codegeex-4)获取apikey.
```shell
#use open.bigmodel.cn api
openai_api_key = "<|apikey|>"
openai_api_base = "https://open.bigmodel.cn/api/paas/v4/"
model_name = "codegeex-4"
#use vllm
openai_api_key = "EMPTY"
openai_api_base = "http://xxxx:xxxx/v1"
model_name = "codegeex4-all-9b"
```
3. 到.env文件里填写对应模型信息和bing_search_api(如果需要体验联网查询)
4. 安装依赖:`pip install -r requirements.txt`
5. 运行应用:`chainlit run run.py --port 8888` 如果用本地:`chainlit run run_local.py --port 8888`
5. 运行应用:`chainlit run run.py --port 8899`
## 注意

View File

@ -1,40 +1,22 @@
import json
from openai import OpenAI
import requests
URL = "" # the url you deploy codegeex service
import os
def codegeex4(messages_list, temperature=0.2, top_p=0.95):
openai_api_key = os.getenv("openai_api_key")
openai_api_base = os.getenv("openai_api_base")
model_name = os.getenv("model_name")
def codegeex4(prompt, temperature=0.8, top_p=0.8):
url = URL
headers = {"Content-Type": "application/json"}
data = {
"inputs": prompt,
"parameters": {
"best_of": 1,
"do_sample": True,
"max_new_tokens": 4012,
"temperature": temperature,
"top_p": top_p,
"stop": ["<|endoftext|>", "<|user|>", "<|observation|>", "<|assistant|>"],
},
}
response = requests.post(url, json=data, headers=headers, verify=False, stream=True)
client = OpenAI(
api_key=openai_api_key,
base_url=openai_api_base,
)
if response.status_code == 200:
for line in response.iter_lines():
if line:
decoded_line = line.decode("utf-8").replace("data:", "").strip()
if decoded_line:
try:
content = json.loads(decoded_line)
token_text = content.get("token", {}).get("text", "")
if "<|endoftext|>" in token_text:
break
yield token_text
except json.JSONDecodeError:
continue
else:
print("请求失败:", response.status_code)
chat_response = client.chat.completions.create(
model=model_name,
messages=messages_list,
temperature=temperature,
top_p=top_p
)
return chat_response.choices[0].message.content

View File

@ -1,50 +0,0 @@
import torch
from pydantic import Field
from transformers import AutoModel, AutoTokenizer
class CodegeexChatModel:
device: str = Field(description="device to load the model")
tokenizer = Field(description="model's tokenizer")
model = Field(description="Codegeex model")
temperature: float = Field(description="temperature to use for the model.")
def __init__(self, model_name_or_path):
super().__init__()
self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.tokenizer = AutoTokenizer.from_pretrained(
model_name_or_path, trust_remote_code=True
)
self.model = (
AutoModel.from_pretrained(model_name_or_path, trust_remote_code=True)
.to(self.device)
.eval()
)
print("Model has been initialized.")
def chat(self, prompt, temperature=0.2, top_p=0.95):
try:
response, _ = self.model.chat(
self.tokenizer,
query=prompt,
max_length=120000,
temperature=temperature,
top_p=top_p,
)
return response
except Exception as e:
return f"error: {e}"
def stream_chat(self, prompt, temperature=0.2, top_p=0.95):
try:
for response, _ in self.model.stream_chat(
self.tokenizer,
query=prompt,
max_length=120000,
temperature=temperature,
top_p=top_p,
):
yield response
except Exception as e:
yield f"error: {e}"

View File

@ -2,13 +2,7 @@ base_system_prompt = """<|system|>\n你是一位智能编程助手你叫CodeG
repo_system_prompt = """<|system|>\n你是一位智能编程助手你叫CodeGeeX。你会为用户回答关于编程、代码、计算机方面的任何问题并提供格式规范、可以执行、准确安全的代码。请根据用户给出的项目仓库中的代码以及用户提出的需求生成新的代码或者更改已有代码。输出格式\n\n###PATH:{PATH}\n{CODE}"""
judge_task_prompt = """<|system|>\n你是一位任务分类专家,请你对用户的输入进行分类(问答/修改/正常),如果用户的输入是对项目进行提问则只需要输出问答两个字,如果用户的输入是对项目进行修改或增加则只需要输出修改两个字,如果用户输入的是一个与项目无关的问题则只需要输出正常两个字。<|user|>\n{user_input}<|assistant|>\n"""
web_judge_task_prompt = """<|system|>\n你是一位智能编程助手你叫CodeGeeX。你会为用户回答关于编程、代码、计算机方面的任何问题并提供格式规范、可以执行、准确安全的代码并在必要时提供详细的解释。<|user|>\n{user_input}\n这个问题需要进行联网来回答吗?仅回答“是”或者“否”。<|assistant|>\n"""
# judge_task_prompt = """<|system|>\n你是一位任务分类专家请你对用户的输入进行分类问答/修改),如果用户的输入是对项目进行提问则只需要输出问答两个字,如果用户的输入是对项目进行修改或增加则只需要输出修改两个字。<|user|>\n{user_input}<|assistant|>\n"""
web_search_prompy = """
你将接收到一个用户提出的问题并请撰写清晰简洁且准确的答案
web_search_prompy = """你将接收到一个用户提出的问题,并请撰写清晰、简洁且准确的答案。
# Note
- 您将获得与问题相关的多个上下文片段每个上下文都以引用编号开头例如[[citation:x]]其中x是一个数字如果适用请使用上下文并在每个句子的末尾引用上下文
@ -19,13 +13,171 @@ web_search_prompy = """
- 除了代码和特定的名称和引用外您的答案必须使用与问题相同的语言来撰写
""".lstrip()
tools_choose_prompt = """<|user|>\nAs a tool selector, you'll provide users with suggestions on tool selection. Depending on the provided tool summary (tools_summary) and user input (input_text), you'll need to follow these steps:
1. Read and understand the tool summary (tools_summary):
- Understand the features, suitcases, and limitations of each tool.
2. Analyze User Input (input_text):
- Understand the user's needs or problems.
- Identify keywords or phrases to determine which tool best suits the user's needs.
3. Decision-making logic:
- Recommend a tool if the user's needs correspond to the tool's functionality.
- If the user's needs are not suitable for any tool, or if the information is not sufficient to make a judgment, no tool is recommended.
4. Output:
- If a tool is recommended, output the tool name (toolname).
- If no tool is recommended, the output is empty.
Note that recommendations for tool selection should be based on the user's needs and refer to the tool summary provided. Follow the steps above and make sure to provide accurate tool selection suggestions in the output.
Here is some examples about tools choosing:
Input:
tools_summary: {
"online_query": "Questions need to be queried on the Internet to ensure accurate answers",
"project_qa": "Questions need to be answered specific to the project",
"project_modify": "The problem is that we need to modify the project"
}
input_text: "今天星期几"
Output:
{
"thoughts": {
"text": "用户想知道今天是星期几。",
"reasoning": "根据工具概要,'online_query' 是用来在互联网上查询问题以确保准确答案,这与用户的需求相符。",
"criticism": "没有其他工具适合回答这类问题,因为这是一个需要实时信息的查询。",
"speak": "让我在网上查一下今天是星期几。"
},
"tool": {
"name": ["online_query"]
}
}
Input:
tools_summary: {
"online_query": "Questions need to be queried on the Internet to ensure accurate answers",
"project_qa": "Questions need to be answered specific to the project",
"project_modify": "The problem is that we need to modify the project"
}
input_text: "请你帮我把项目的readme改成韩文"
Output:
{
"thoughts": {
"text": "用户需要将项目的readme文件翻译成韩文。",
"reasoning": "根据工具概要project_modify专用于项目修改这与用户的需求相符。",
"criticism": "需要确保用户对翻译后的韩文内容满意,因为翻译质量可能影响项目的整体感受。",
"speak": "我们将使用project_modify来修改项目的readme文件。请确认您希望使用的韩文翻译内容。"
},
"tool": {
"name": ["project_modify"]
}
}
Input:
tools_summary: {
"online_query": "Questions need to be queried on the Internet to ensure accurate answers",
"project_qa": "Questions need to be answered specific to the project",
"project_modify": "The problem is that we need to modify the project"
}
input_text: "你是谁"
Output:
{
"thoughts": {
"text": "用户问“你是谁”。",
"reasoning": "用户的提问是一个通用问题,不涉及具体的工具功能需求。",
"criticism": "这个问题不需要使用任何工具来回答,只需直接回答用户的问题即可。",
"speak": "我是一个人工智能助手,随时为您提供帮助。"
},
"tool": {
"name": []
}
}
Input:
tools_summary: {
"online_query": "Questions need to be queried on the Internet to ensure accurate answers",
"project_qa": "Questions need to be answered specific to the project",
"project_modify": "The problem is that we need to modify the project"
}
input_text: "解释一下项目"
Output:
{
"thoughts": {
"text": "用户需要对项目进行解释。",
"reasoning": "用户的需求是需要对项目进行解释,这通常涉及到具体项目的细节和背景。",
"criticism": "目前的工具概要中只有project_qa适用于与项目相关的问题解答。",
"speak": "您能提供更多关于项目的信息吗?这将有助于提供更准确的解释。"
},
"tool": {
"name": ["project_qa"]
}
}
You should only respond in JSON format as described below
Response Format:
{
"thoughts": {
"text": "your thoughts in the current context",
"reasoning": "reasoning for tool selection and input content",
"criticism": "critical thinking on tool selection and input in current context",
"speak": "words you want to speak to the user",
},
"tool": {
"name": ['tool_name'],
}
}
The strings corresponding to "text", "reasoning", "criticism", and "speak" in JSON should be described in Chinese.
If you don't need to use a tool(like solely chat scene), or have already reasoned the final answer associated with user input from the tool, You must abide by the following rules:
1. The tool's name in json is [].
Do not output any other information and do not contain quotation marks, such as `, \", \' and so on.
Ensure the output can be parsed by Python json.loads.
Don't output in markdown format, something like ```json or ```,just output in the corresponding string format.
Input:
tools_summary: {
"online_query": "Questions need to be queried on the Internet to ensure accurate answers",
"project_qa": "Questions need to be answered specific to the project",
"project_modify": "The problem is that we need to modify the project"
}
"""
tools_input_prompt = """
input_text: "{input_text}"
Output:
<|assistant|>\n"""
def build_message_list(result):
message_list = []
segments = result.split("<|")
for segment in segments:
if segment.startswith("system|>"):
message_list.append({"role": "system", "content": segment[8:]})
elif segment.startswith("user|>"):
message_list.append({"role": "user", "content": segment[6:]})
elif segment.startswith("assistant|>"):
message_list.append({"role": "assistant", "content": segment[11:]})
return message_list
def get_cur_base_user_prompt(message_history, index_prompt=None, judge_context=""):
user_prompt_tmp = """<|user|>\n{user_input}"""
assistant_prompt_tmp = """<|assistant|>\n{assistant_input}"""
history_prompt = ""
for i, message in enumerate(message_history):
if message["role"] == "user":
if message["role"] == "user" or message["role"] == "tool":
if i == 0 and index_prompt is not None:
history_prompt += "<|user|>\n" + index_prompt + message["content"]
else:
@ -35,11 +187,12 @@ def get_cur_base_user_prompt(message_history, index_prompt=None, judge_context="
assistant_input=message["content"]
)
# print("修改" not in judge_context)
# print(judge_context)
if "修改" not in judge_context:
if "project_modify" not in judge_context:
result = base_system_prompt + history_prompt + """<|assistant|>\n"""
else:
result = repo_system_prompt + history_prompt + """<|assistant|>\n"""
print(result)
return result
message_list = build_message_list(result)
# print(message_list)
return message_list

View File

@ -1,38 +1,50 @@
# CodeGeeX
# 欢迎使用我的chat demo应用
# Welcome to My Chat Demo Application
这是一个简单的演示应用程序。
This is a simple demonstration application.
## 使用说明
## Instructions
1. 输入您的问题
2. 等待回复
3. 享受对话!
1. Enter your question.
2. Wait for a response.
3. Enjoy the conversation!
## 功能
## Features
- 支持多轮对话
- 支持联网问答
- 支持上传本地zip压缩包项目可以进行项目问答和对项目进行修改
- Supports multi-turn conversations.
- Supports online Q&A.
- Supports uploading local zip packages for project Q&A and modifications.
- Supports inputting GitHub project links for project Q&A and modifications.
## 安装
## Installation
1. 克隆仓库到本地
2. 设置模型可以选择本地模型或者api模型,如果使用本地模型需要到run_local.py里设置local_model_path
3. 如果要用联网问答需要设置bingsearch API在utils/bingsearch.py中设置bingsearch_api_key
4. 安装依赖:`pip install -r requirements.txt`
5. 运行应用:`chainlit run run.py --port 8888` 如果用本地:`chainlit run run_local.py --port 8888`
1. Clone the repository locally.
2. Start the model. You can deploy the model using vllm or ollama, provide the OpenAI request format, and set the deployed `api_base` and `api_key`. Alternatively, visit [CodeGeeX API](https://open.bigmodel.cn/dev/api#codegeex-4) to get the API key.
```shell
#use open.bigmodel.cn api
openai_api_key = "<|apikey|>"
openai_api_base = "https://open.bigmodel.cn/api/paas/v4/"
model_name = "codegeex-4"
#use vllm
openai_api_key = "EMPTY"
openai_api_base = "http://xxxx:xxxx/v1"
model_name = "codegeex4-all-9b"
```
## 注意
3. Fill in the corresponding model information and `bing_search_api` (if you want to experience online search) in the `.env` file.
4. Install dependencies: `pip install -r requirements.txt`.
5. Run the application: `chainlit run run.py --port 8899`.
请确保您的网络环境可以访问CodeGeeX的API。
## Note
## 免责声明
Please ensure your network environment can access the CodeGeeX API.
本应用仅供学习和研究使用,不得用于任何商业用途。开发者不对因使用本应用而导致的任何损失或损害负责。
## Disclaimer
## 感谢
This application is for educational and research purposes only and should not be used for any commercial purposes. The developer is not responsible for any loss or damage caused by the use of this application.
感谢您使用我们的应用。如果您有任何问题或建议,请随时联系我们。我们期待您的反馈,并致力于为您提供更好的服务。
## Acknowledgements
Thank you for using our application. If you have any questions or suggestions, please feel free to contact us. We look forward to your feedback and are committed to providing you with better service.

View File

@ -1,7 +1,4 @@
chainlit==1.1.305
beautifulsoup4
#local
accelerate==0.31.0
tiktoken==0.7.0
torch==2.3.1
transformers==4.39.0
python-dotenv
gitpython

View File

@ -1,22 +1,56 @@
import chainlit as cl
from chainlit.input_widget import Slider
from chainlit.input_widget import Slider,Switch
import json
import re
from llm.api.codegeex4 import codegeex4
from prompts.base_prompt import (
judge_task_prompt,
get_cur_base_user_prompt,
web_judge_task_prompt,
build_message_list,
tools_choose_prompt,
tools_input_prompt
)
from utils.bingsearch import bing_search_prompt
from utils.tools import unzip_file, get_project_files_with_content
from utils.tools import unzip_file, get_project_files_with_content,clone_repo,is_valid_json
def tools_choose_agent(input_text):
tools_prompt = tools_choose_prompt+tools_input_prompt.format(input_text=input_text)
message_list = build_message_list(tools_prompt)
judge_tmp = codegeex4(
messages_list=message_list,
temperature=0.2,
top_p=0.95,
)
judge_context = ""
for part in judge_tmp:
judge_context += part
attempt = 1
max_attempts = 10
while not is_valid_json(judge_context) and attempt <= max_attempts:
judge_tmp = codegeex4(
messages_list=message_list,
temperature=0.2,
top_p=0.95,
)
judge_context = ""
for part in judge_tmp:
judge_context += part
attempt += 1
match = re.search(r'\{.*\}', judge_context, re.DOTALL)
if match:
dict_str = match.group()
response = json.loads(dict_str)
else:
response = json.loads(judge_context)
tool_name = response["tool"]["name"]
return tool_name
@cl.set_chat_profiles
async def chat_profile():
return [
cl.ChatProfile(
name="chat聊天",
markdown_description="聊天demo支持多轮对话。",
name="联网聊天",
markdown_description="聊天demo支持多轮对话。支持联网回答用户问题。默认联网,如不联网在输入框左边关闭联网功能。",
starters=[
cl.Starter(
label="请你用python写一个快速排序。",
@ -37,16 +71,19 @@ async def chat_profile():
],
),
cl.ChatProfile(
name="联网问答",
markdown_description="联网能力demo支持联网回答用户问题。",
),
cl.ChatProfile(
name="上传本地项目",
markdown_description="项目级能力demo支持上传本地zip压缩包项目可以进行项目问答和对项目进行修改。",
name="项目问答",
markdown_description="项目级能力demo支持上传本地zip压缩包项目支持输入GitHub链接项目可以进行项目问答和对项目进行修改。",
),
]
@cl.on_settings_update
async def setup_agent(settings):
temperature = settings["temperature"]
top_p = settings["top_p"]
is_online = settings["is_online"]
cl.user_session.set("temperature", temperature)
cl.user_session.set("top_p", top_p)
cl.user_session.set("is_online", is_online)
@cl.on_chat_start
async def start():
settings = await cl.ChatSettings(
@ -67,92 +104,117 @@ async def start():
max=1,
step=0.1,
),
Switch(
id="is_online",
label="CodeGeeX4 - is_online",
initial=True
),
]
).send()
temperature = settings["temperature"]
top_p = settings["top_p"]
is_online = settings["is_online"]
cl.user_session.set("temperature", temperature)
cl.user_session.set("top_p", top_p)
cl.user_session.set("is_online", is_online)
cl.user_session.set("message_history", [])
chat_profile = cl.user_session.get("chat_profile")
extract_dir = "repodata"
if chat_profile == "chat聊天":
pass
elif chat_profile == "上传本地项目":
files = None
while files == None:
files = await cl.AskFileMessage(
content="请上传项目zip压缩文件!",
accept={"application/zip": [".zip"]},
max_size_mb=50,
).send()
if chat_profile == "项目问答":
res = await cl.AskActionMessage(
content="请选择项目上传方式",
actions=[
cl.Action(name="zip", value="zip", label="本地上传zip文件"),
cl.Action(name="url", value="url", label="上传GitHub链接"),
],
).send()
if res.get("value") == "url":
repo_path =None
while repo_path == None:
res = await cl.AskUserMessage(content="请你在下面消息框中提供GitHub仓库URL? exhttps://github.com/THUDM/CodeGeeX4", timeout=3600).send()
if res:
repo_path = clone_repo(res['output'],extract_dir)
if repo_path is None:
await cl.Message(
content=f"您的github链接无法正常下载请检查项目链接或github网络连通情况。",
).send()
text_file = files[0]
extracted_path = unzip_file(text_file.path, extract_dir)
files_list = get_project_files_with_content(extracted_path)
cl.user_session.set("project_index", files_list)
if len(files_list) > 0:
await cl.Message(
content=f"已成功上传,您可以开始对项目进行提问!",
).send()
files_list = get_project_files_with_content(repo_path)
cl.user_session.set("project_index", files_list)
if len(files_list) > 0:
await cl.Message(
content=f"已成功上传,您可以开始对项目进行提问!",
).send()
elif res.get("value") == "zip":
files = None
while files == None:
files = await cl.AskFileMessage(
content="请上传项目zip压缩文件!",
accept={"application/zip": [".zip"]},
max_size_mb=50,
).send()
text_file = files[0]
extracted_path = unzip_file(text_file.path, extract_dir)
files_list = get_project_files_with_content(extracted_path)
cl.user_session.set("project_index", files_list)
if len(files_list) > 0:
await cl.Message(
content=f"已成功上传,您可以开始对项目进行提问!",
).send()
@cl.step(type="tool")
async def bing_search_tool(search_text):
current_step = cl.context.current_step
# Simulate a running task
current_step.input = search_text
prompt_tmp = bing_search_prompt(search_text)
current_step.output = prompt_tmp
return prompt_tmp
@cl.on_message
async def main(message: cl.Message):
chat_profile = cl.user_session.get("chat_profile")
message_history = cl.user_session.get("message_history")
message_history.append({"role": "user", "content": message.content})
if chat_profile == "chat聊天":
tool_name = tools_choose_agent(message.content)
is_online = cl.user_session.get("is_online")
if chat_profile == "联网聊天":
if "online_query" in tool_name and is_online:
prompt_tmp = await bing_search_tool(message.content)
message_history.append({"role": "tool", "content": prompt_tmp})
message_history.append({"role": "user", "content": message.content})
prompt_content = get_cur_base_user_prompt(message_history=message_history)
elif chat_profile == "联网问答":
judge_tmp = codegeex4(
web_judge_task_prompt.format(user_input=message.content),
temperature=0.2,
top_p=0.95,
)
judge_context = "\n".join(judge_tmp)
print(judge_context)
message_history.pop()
if "" in judge_context:
prompt_tmp = bing_search_prompt(message.content)
message_history.append({"role": "user", "content": prompt_tmp})
else:
message_history.append({"role": "user", "content": message.content})
prompt_content = get_cur_base_user_prompt(message_history=message_history)
elif chat_profile == "上传本地项目":
judge_tmp = codegeex4(
judge_task_prompt.format(user_input=message.content),
temperature=0.2,
top_p=0.95,
)
judge_context = ""
for part in judge_tmp:
judge_context += part
elif chat_profile == "项目问答":
message_history.append({"role": "user", "content": message.content})
project_index = cl.user_session.get("project_index")
index_prompt = ""
index_tmp = """###PATH:{path}\n{code}\n"""
for index in project_index:
index_prompt += index_tmp.format(path=index["path"], code=index["content"])
print(judge_context)
prompt_content = (
get_cur_base_user_prompt(
message_history=message_history,
index_prompt=index_prompt,
judge_context=judge_context,
)
if "正常" not in judge_context
else get_cur_base_user_prompt(message_history=message_history)
)
if len(tool_name)>0:
prompt_content = get_cur_base_user_prompt(
message_history=message_history,
index_prompt=index_prompt,
judge_context=tool_name[0],
)
else:
prompt_content = get_cur_base_user_prompt(message_history=message_history)
msg = cl.Message(content="")
await msg.send()
temperature = cl.user_session.get("temperature")
top_p = cl.user_session.get("top_p")
if len(prompt_content) / 4 < 120000:
stream = codegeex4(prompt_content, temperature=temperature, top_p=top_p)

View File

@ -1,175 +0,0 @@
import chainlit as cl
from chainlit.input_widget import Slider
from llm.local.codegeex4 import CodegeexChatModel
from prompts.base_prompt import (
judge_task_prompt,
get_cur_base_user_prompt,
web_judge_task_prompt,
)
from utils.bingsearch import bing_search_prompt
from utils.tools import unzip_file, get_project_files_with_content
local_model_path = "<your_local_model_path>"
llm = CodegeexChatModel(local_model_path)
class StreamProcessor:
def __init__(self):
self.previous_str = ""
def get_new_part(self, new_str):
new_part = new_str[len(self.previous_str):]
self.previous_str = new_str
return new_part
@cl.set_chat_profiles
async def chat_profile():
return [
cl.ChatProfile(
name="chat聊天",
markdown_description="聊天demo支持多轮对话。",
starters=[
cl.Starter(
label="请你用python写一个快速排序。",
message="请你用python写一个快速排序。",
),
cl.Starter(
label="请你介绍一下自己。",
message="请你介绍一下自己。",
),
cl.Starter(
label="用 Python 编写一个脚本来自动发送每日电子邮件报告,并指导我如何进行设置。",
message="用 Python 编写一个脚本来自动发送每日电子邮件报告,并指导我如何进行设置。",
),
cl.Starter(
label="我是一个python初学者请你告诉我怎么才能学好python。",
message="我是一个python初学者请你告诉我怎么才能学好python。",
),
],
),
cl.ChatProfile(
name="联网问答",
markdown_description="联网能力demo支持联网回答用户问题。",
),
cl.ChatProfile(
name="上传本地项目",
markdown_description="项目级能力demo支持上传本地zip压缩包项目可以进行项目问答和对项目进行修改。",
),
]
@cl.on_chat_start
async def start():
settings = await cl.ChatSettings(
[
Slider(
id="temperature",
label="CodeGeeX4 - Temperature",
initial=0.2,
min=0,
max=1,
step=0.1,
),
Slider(
id="top_p",
label="CodeGeeX4 - top_p",
initial=0.95,
min=0,
max=1,
step=0.1,
),
]
).send()
temperature = settings["temperature"]
top_p = settings["top_p"]
cl.user_session.set("temperature", temperature)
cl.user_session.set("top_p", top_p)
cl.user_session.set("message_history", [])
chat_profile = cl.user_session.get("chat_profile")
extract_dir = "repodata"
if chat_profile == "chat聊天":
pass
elif chat_profile == "上传本地项目":
files = None
while files == None:
files = await cl.AskFileMessage(
content="请上传项目zip压缩文件!",
accept={"application/zip": [".zip"]},
max_size_mb=50,
).send()
text_file = files[0]
extracted_path = unzip_file(text_file.path, extract_dir)
files_list = get_project_files_with_content(extracted_path)
cl.user_session.set("project_index", files_list)
if len(files_list) > 0:
await cl.Message(
content=f"已成功上传,您可以开始对项目进行提问!",
).send()
@cl.on_message
async def main(message: cl.Message):
chat_profile = cl.user_session.get("chat_profile")
message_history = cl.user_session.get("message_history")
message_history.append({"role": "user", "content": message.content})
if chat_profile == "chat聊天":
prompt_content = get_cur_base_user_prompt(message_history=message_history)
elif chat_profile == "联网问答":
judge_context = llm.chat(
web_judge_task_prompt.format(user_input=message.content), temperature=0.2
)
print(judge_context)
message_history.pop()
if "" in judge_context:
prompt_tmp = bing_search_prompt(message.content)
message_history.append({"role": "user", "content": prompt_tmp})
else:
message_history.append({"role": "user", "content": message.content})
prompt_content = get_cur_base_user_prompt(message_history=message_history)
elif chat_profile == "上传本地项目":
judge_context = llm.chat(
judge_task_prompt.format(user_input=message.content), temperature=0.2
)
project_index = cl.user_session.get("project_index")
index_prompt = ""
index_tmp = """###PATH:{path}\n{code}\n"""
for index in project_index:
index_prompt += index_tmp.format(path=index["path"], code=index["content"])
print(judge_context)
prompt_content = (
get_cur_base_user_prompt(
message_history=message_history,
index_prompt=index_prompt,
judge_context=judge_context,
)
if "正常" not in judge_context
else get_cur_base_user_prompt(message_history=message_history)
)
msg = cl.Message(content="")
await msg.send()
temperature = cl.user_session.get("temperature")
top_p = cl.user_session.get("top_p")
if len(prompt_content) / 4 < 120000:
stream = llm.stream_chat(prompt_content, temperature=temperature, top_p=top_p)
stream_processor = StreamProcessor()
for part in stream:
if isinstance(part, str):
text = stream_processor.get_new_part(part)
elif isinstance(part, dict):
text = stream_processor.get_new_part(part["name"] + part["content"])
if token := (text or " "):
await msg.stream_token(token)
else:
await msg.stream_token("项目太大了,请换小一点的项目。")
message_history.append({"role": "assistant", "content": msg.content})
await msg.update()

View File

@ -1,7 +1,8 @@
import requests
from bs4 import BeautifulSoup as BS4
import os
BING_API_KEY = "<your_bing_api_key>"
BING_API_KEY = os.getenv("bing_api_key")
def search_with_bing(query: str, search_timeout=30, top_k=6) -> list[dict]:

View File

@ -1,8 +1,53 @@
import json
import os
import zipfile
import git
import urllib.parse
import re
def is_valid_json(json_string):
try:
match = re.search(r'\{.*\}', json_string, re.DOTALL)
if match:
dict_str = match.group()
json.loads(dict_str)
else:
json.loads(json_string)
return True
except ValueError:
return False
def clone_repo(repo_url, clone_to):
"""
克隆一个GitHub仓库
参数:
repo_url (str): 原始仓库的URL
clone_to (str): 克隆到的本地目录
返回:
str: 成功时返回克隆到的本地目录包含子目录不成功时返回空字符串
"""
try:
if not os.path.exists(clone_to):
os.makedirs(clone_to)
# 从URL中提取仓库名称
repo_name = urllib.parse.urlparse(repo_url).path.split('/')[-1]
# 在clone_to目录下创建新的目录
cloned_path = os.path.join(clone_to, repo_name)
if os.path.exists(cloned_path):
return cloned_path
# 克隆仓库
repo = git.Repo.clone_from(repo_url, cloned_path)
print(f"Repository cloned to {cloned_path}")
return cloned_path
except Exception as e:
print(f"Failed to clone repository: {e}")
return None
def unzip_file(zip_path, extract_dir):
"""
解压zip文件到指定目录并在指定目录下创建一个新的目录存放解压后的文件