Merge pull request #14 from XingYu-Zhong/main

Eliminate PEP 8 warnings
2024-11-23 12:16:33 +00:00 · 2024-07-09 11:48:38 +08:00 · 2024-07-09 11:48:38 +08:00 · 73c3720049
commit 73c3720049
parent 48fff1449b 25870dc0a1
7 changed files with 289 additions and 262 deletions
--- a/repodemo/llm/api/codegeex4.py
+++ b/repodemo/llm/api/codegeex4.py
@ -2,40 +2,38 @@ import requests
 import json

 URL = ""  # the url you deploy codegeex service
+
+
 def codegeex4(prompt, temperature=0.8, top_p=0.8):
    url = URL
-    headers = {
-        'Content-Type': 'application/json'
-    }
+    headers = {"Content-Type": "application/json"}
    data = {
-        'inputs': prompt,
-        'parameters': {
-            'best_of':1,
-            'do_sample': True,
-            'max_new_tokens': 4012,
-            'temperature': temperature,
-            'top_p': top_p,
-            'stop': ["<|endoftext|>", "<|user|>", "<|observation|>", "<|assistant|>"],
-        }
+        "inputs": prompt,
+        "parameters": {
+            "best_of": 1,
+            "do_sample": True,
+            "max_new_tokens": 4012,
+            "temperature": temperature,
+            "top_p": top_p,
+            "stop": ["<|endoftext|>", "<|user|>", "<|observation|>", "<|assistant|>"],
+        },
    }
    response = requests.post(url, json=data, headers=headers, verify=False, stream=True)

    if response.status_code == 200:
        for line in response.iter_lines():
            if line:
-                decoded_line = line.decode('utf-8').replace('data:', '').strip()
+                decoded_line = line.decode("utf-8").replace("data:", "").strip()
                if decoded_line:
                    try:

                        content = json.loads(decoded_line)

-                        token_text = content.get('token', {}).get('text', '')
-                        if '<|endoftext|>' in token_text:
+                        token_text = content.get("token", {}).get("text", "")
+                        if "<|endoftext|>" in token_text:
                            break
                        yield token_text
                    except json.JSONDecodeError:
                        continue
    else:
-        print('请求失败:', response.status_code)
-
-
+        print("请求失败:", response.status_code)
--- a/repodemo/llm/local/codegeex4.py
+++ b/repodemo/llm/local/codegeex4.py
@ -3,7 +3,8 @@ from transformers import AutoModel, AutoTokenizer
 from typing import Iterator
 import torch

-class CodegeexChatModel():
+
+class CodegeexChatModel:
    device: str = Field(description="device to load the model")
    tokenizer = Field(description="model's tokenizer")
    model = Field(description="Codegeex model")
@ -12,11 +13,14 @@ class CodegeexChatModel():
    def __init__(self, model_name_or_path):
        super().__init__()
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
-        self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True)
-        self.model = AutoModel.from_pretrained(
-            model_name_or_path,
-            trust_remote_code=True
-        ).to(self.device).eval()
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            model_name_or_path, trust_remote_code=True
+        )
+        self.model = (
+            AutoModel.from_pretrained(model_name_or_path, trust_remote_code=True)
+            .to(self.device)
+            .eval()
+        )
        print("Model has been initialized.")

    def chat(self, prompt, temperature=0.2, top_p=0.95):
@ -26,7 +30,7 @@ class CodegeexChatModel():
                query=prompt,
                max_length=120000,
                temperature=temperature,
-                top_p=top_p
+                top_p=top_p,
            )
            return response
        except Exception as e:
@ -40,8 +44,8 @@ class CodegeexChatModel():
                query=prompt,
                max_length=120000,
                temperature=temperature,
-                    top_p=top_p
+                top_p=top_p,
            ):
                yield response
        except Exception as e:
-            yield f'error: {e}'
+            yield f"error: {e}"
--- a/repodemo/prompts/base_prompt.py
+++ b/repodemo/prompts/base_prompt.py
@ -19,18 +19,21 @@ web_search_prompy = """
 - 除了代码和特定的名称和引用外，您的答案必须使用与问题相同的语言来撰写。
 """.lstrip()

+
 def get_cur_base_user_prompt(message_history, index_prompt=None, judge_context=""):
    user_prompt_tmp = """<|user|>\n{user_input}"""
    assistant_prompt_tmp = """<|assistant|>\n{assistant_input}"""
    history_prompt = ""
    for i, message in enumerate(message_history):
-        if message['role'] == 'user':
+        if message["role"] == "user":
            if i == 0 and index_prompt is not None:
-                history_prompt += "<|user|>\n"+index_prompt+message['content']
+                history_prompt += "<|user|>\n" + index_prompt + message["content"]
            else:
-                history_prompt += user_prompt_tmp.format(user_input=message['content'])
-        elif message['role'] ==  'assistant':
-            history_prompt += assistant_prompt_tmp.format(assistant_input=message['content'])
+                history_prompt += user_prompt_tmp.format(user_input=message["content"])
+        elif message["role"] == "assistant":
+            history_prompt += assistant_prompt_tmp.format(
+                assistant_input=message["content"]
+            )

    # print("修改" not in judge_context)
    # print(judge_context)
--- a/repodemo/run.py
+++ b/repodemo/run.py
@ -1,7 +1,11 @@
 import chainlit as cl
 from chainlit.input_widget import Slider
 from llm.api.codegeex4 import codegeex4
-from prompts.base_prompt import judge_task_prompt,get_cur_base_user_prompt,web_judge_task_prompt
+from prompts.base_prompt import (
+    judge_task_prompt,
+    get_cur_base_user_prompt,
+    web_judge_task_prompt,
+)
 from utils.tools import unzip_file, get_project_files_with_content
 from utils.bingsearch import bing_search_prompt

@ -16,37 +20,29 @@ async def chat_profile():
                cl.Starter(
                    label="请你用python写一个快速排序。",
                    message="请你用python写一个快速排序。",
-               
                ),
-
                cl.Starter(
                    label="请你介绍一下自己。",
                    message="请你介绍一下自己。",
-               
                ),
                cl.Starter(
                    label="用 Python 编写一个脚本来自动发送每日电子邮件报告，并指导我如何进行设置。",
                    message="用 Python 编写一个脚本来自动发送每日电子邮件报告，并指导我如何进行设置。",
-                
                ),
                cl.Starter(
                    label="我是一个python初学者，请你告诉我怎么才能学好python。",
                    message="我是一个python初学者，请你告诉我怎么才能学好python。",
-                
-                )
-            ]
-      
+                ),
+            ],
        ),
        cl.ChatProfile(
            name="联网问答",
-            markdown_description="联网能力dome：支持联网回答用户问题。",
-            
+            markdown_description="联网能力demo：支持联网回答用户问题。",
        ),
        cl.ChatProfile(
            name="上传本地项目",
-            markdown_description="项目级能力dome：支持上传本地zip压缩包项目，可以进行项目问答和对项目进行修改。",
-            
-        )
+            markdown_description="项目级能力demo：支持上传本地zip压缩包项目，可以进行项目问答和对项目进行修改。",
+        ),
    ]


@ -74,21 +70,20 @@ async def start():
    ).send()
    temperature = settings["temperature"]
    top_p = settings["top_p"]
-    cl.user_session.set('temperature',temperature)
-    cl.user_session.set('top_p',top_p)
-    cl.user_session.set(
-        "message_history",
-        []
-    )
+    cl.user_session.set("temperature", temperature)
+    cl.user_session.set("top_p", top_p)
+    cl.user_session.set("message_history", [])
    chat_profile = cl.user_session.get("chat_profile")
-    extract_dir = 'repodata'
+    extract_dir = "repodata"
    if chat_profile == "chat聊天":
        pass
    elif chat_profile == "上传本地项目":
        files = None
        while files == None:
            files = await cl.AskFileMessage(
-                content="请上传项目zip压缩文件!", accept={"application/zip": [".zip"]},max_size_mb=50
+                content="请上传项目zip压缩文件!",
+                accept={"application/zip": [".zip"]},
+                max_size_mb=50,
            ).send()

        text_file = files[0]
@ -101,7 +96,6 @@ async def start():
            ).send()


-
@cl.on_message
 async def main(message: cl.Message):
    chat_profile = cl.user_session.get("chat_profile")
@ -111,12 +105,16 @@ async def main(message: cl.Message):
        prompt_content = get_cur_base_user_prompt(message_history=message_history)

    elif chat_profile == "联网问答":
-        judge_tmp = codegeex4(web_judge_task_prompt.format(user_input=message.content),temperature=0.2,top_p = 0.95)
-        judge_context = '\n'.join(judge_tmp)
+        judge_tmp = codegeex4(
+            web_judge_task_prompt.format(user_input=message.content),
+            temperature=0.2,
+            top_p=0.95,
+        )
+        judge_context = "\n".join(judge_tmp)
        print(judge_context)
        message_history.pop()

-        if '是' in judge_context:
+        if "是" in judge_context:
            prompt_tmp = bing_search_prompt(message.content)
            message_history.append({"role": "user", "content": prompt_tmp})
        else:
@ -124,8 +122,12 @@ async def main(message: cl.Message):
        prompt_content = get_cur_base_user_prompt(message_history=message_history)

    elif chat_profile == "上传本地项目":
-        judge_tmp = codegeex4(judge_task_prompt.format(user_input=message.content),temperature=0.2,top_p = 0.95)
-        judge_context = ''
+        judge_tmp = codegeex4(
+            judge_task_prompt.format(user_input=message.content),
+            temperature=0.2,
+            top_p=0.95,
+        )
+        judge_context = ""
        for part in judge_tmp:
            judge_context += part

@ -133,16 +135,22 @@ async def main(message: cl.Message):
        index_prompt = ""
        index_tmp = """###PATH:{path}\n{code}\n"""
        for index in project_index:
-            index_prompt+=index_tmp.format(path=index['path'],code=index['content'])
+            index_prompt += index_tmp.format(path=index["path"], code=index["content"])
        print(judge_context)
-        prompt_content = get_cur_base_user_prompt(message_history=message_history,index_prompt=index_prompt,judge_context=judge_context) if '正常' not in judge_context else get_cur_base_user_prompt(message_history=message_history)
-    
-    
+        prompt_content = (
+            get_cur_base_user_prompt(
+                message_history=message_history,
+                index_prompt=index_prompt,
+                judge_context=judge_context,
+            )
+            if "正常" not in judge_context
+            else get_cur_base_user_prompt(message_history=message_history)
+        )

    msg = cl.Message(content="")
    await msg.send()
    temperature = cl.user_session.get("temperature")
-    top_p = cl.user_session.get('top_p')
+    top_p = cl.user_session.get("top_p")

    if len(prompt_content) / 4 < 120000:
        stream = codegeex4(prompt_content, temperature=temperature, top_p=top_p)
--- a/repodemo/run_local.py
+++ b/repodemo/run_local.py
@ -1,13 +1,19 @@
 import chainlit as cl
 from chainlit.input_widget import Slider
 from llm.api.codegeex4 import codegeex4
-from prompts.base_prompt import judge_task_prompt,get_cur_base_user_prompt,web_judge_task_prompt
+from prompts.base_prompt import (
+    judge_task_prompt,
+    get_cur_base_user_prompt,
+    web_judge_task_prompt,
+)
 from utils.tools import unzip_file, get_project_files_with_content
 from utils.bingsearch import bing_search_prompt
 from llm.local.codegeex4 import CodegeexChatModel
-local_model_path = '<your_local_model_path>'
+
+local_model_path = "<your_local_model_path>"
 llm = CodegeexChatModel(local_model_path)

+
 class StreamProcessor:
    def __init__(self):
        self.previous_str = ""
@ -17,6 +23,7 @@ class StreamProcessor:
        self.previous_str = new_str
        return new_part

+
@cl.set_chat_profiles
 async def chat_profile():
    return [
@ -27,37 +34,29 @@ async def chat_profile():
                cl.Starter(
                    label="请你用python写一个快速排序。",
                    message="请你用python写一个快速排序。",
-               
                ),
-
                cl.Starter(
                    label="请你介绍一下自己。",
                    message="请你介绍一下自己。",
-               
                ),
                cl.Starter(
                    label="用 Python 编写一个脚本来自动发送每日电子邮件报告，并指导我如何进行设置。",
                    message="用 Python 编写一个脚本来自动发送每日电子邮件报告，并指导我如何进行设置。",
-                
                ),
                cl.Starter(
                    label="我是一个python初学者，请你告诉我怎么才能学好python。",
                    message="我是一个python初学者，请你告诉我怎么才能学好python。",
-                
-                )
-            ]
-      
+                ),
+            ],
        ),
        cl.ChatProfile(
            name="联网问答",
-            markdown_description="联网能力dome：支持联网回答用户问题。",
-            
+            markdown_description="联网能力demo：支持联网回答用户问题。",
        ),
        cl.ChatProfile(
            name="上传本地项目",
-            markdown_description="项目级能力dome：支持上传本地zip压缩包项目，可以进行项目问答和对项目进行修改。",
-            
-        )
+            markdown_description="项目级能力demo：支持上传本地zip压缩包项目，可以进行项目问答和对项目进行修改。",
+        ),
    ]


@ -85,21 +84,20 @@ async def start():
    ).send()
    temperature = settings["temperature"]
    top_p = settings["top_p"]
-    cl.user_session.set('temperature',temperature)
-    cl.user_session.set('top_p',top_p)
-    cl.user_session.set(
-        "message_history",
-        []
-    )
+    cl.user_session.set("temperature", temperature)
+    cl.user_session.set("top_p", top_p)
+    cl.user_session.set("message_history", [])
    chat_profile = cl.user_session.get("chat_profile")
-    extract_dir = 'repodata'
+    extract_dir = "repodata"
    if chat_profile == "chat聊天":
        pass
    elif chat_profile == "上传本地项目":
        files = None
        while files == None:
            files = await cl.AskFileMessage(
-                content="请上传项目zip压缩文件!", accept={"application/zip": [".zip"]},max_size_mb=50
+                content="请上传项目zip压缩文件!",
+                accept={"application/zip": [".zip"]},
+                max_size_mb=50,
            ).send()

        text_file = files[0]
@ -112,7 +110,6 @@ async def start():
            ).send()


-
@cl.on_message
 async def main(message: cl.Message):
    chat_profile = cl.user_session.get("chat_profile")
@ -122,11 +119,13 @@ async def main(message: cl.Message):
        prompt_content = get_cur_base_user_prompt(message_history=message_history)

    elif chat_profile == "联网问答":
-        judge_context = llm.chat(web_judge_task_prompt.format(user_input=message.content),temperature=0.2)
+        judge_context = llm.chat(
+            web_judge_task_prompt.format(user_input=message.content), temperature=0.2
+        )
        print(judge_context)
        message_history.pop()

-        if '是' in judge_context:
+        if "是" in judge_context:
            prompt_tmp = bing_search_prompt(message.content)
            message_history.append({"role": "user", "content": prompt_tmp})
        else:
@ -134,23 +133,30 @@ async def main(message: cl.Message):
        prompt_content = get_cur_base_user_prompt(message_history=message_history)

    elif chat_profile == "上传本地项目":
-        judge_context = llm.chat(judge_task_prompt.format(user_input=message.content),temperature=0.2)
-        
+        judge_context = llm.chat(
+            judge_task_prompt.format(user_input=message.content), temperature=0.2
+        )

        project_index = cl.user_session.get("project_index")
        index_prompt = ""
        index_tmp = """###PATH:{path}\n{code}\n"""
        for index in project_index:
-            index_prompt+=index_tmp.format(path=index['path'],code=index['content'])
+            index_prompt += index_tmp.format(path=index["path"], code=index["content"])
        print(judge_context)
-        prompt_content = get_cur_base_user_prompt(message_history=message_history,index_prompt=index_prompt,judge_context=judge_context) if '正常' not in judge_context else get_cur_base_user_prompt(message_history=message_history)
-    
-    
+        prompt_content = (
+            get_cur_base_user_prompt(
+                message_history=message_history,
+                index_prompt=index_prompt,
+                judge_context=judge_context,
+            )
+            if "正常" not in judge_context
+            else get_cur_base_user_prompt(message_history=message_history)
+        )

    msg = cl.Message(content="")
    await msg.send()
    temperature = cl.user_session.get("temperature")
-    top_p = cl.user_session.get('top_p')
+    top_p = cl.user_session.get("top_p")

    if len(prompt_content) / 4 < 120000:
        stream = llm.stream_chat(prompt_content, temperature=temperature, top_p=top_p)
@ -159,7 +165,7 @@ async def main(message: cl.Message):
            if isinstance(part, str):
                text = stream_processor.get_new_part(part)
            elif isinstance(part, dict):
-                text = stream_processor.get_new_part(part['name']+part['content'])
+                text = stream_processor.get_new_part(part["name"] + part["content"])
            if token := (text or " "):
                await msg.stream_token(token)
    else:
--- a/repodemo/utils/bingsearch.py
+++ b/repodemo/utils/bingsearch.py
@ -2,7 +2,9 @@ import requests
 from bs4 import BeautifulSoup as BS4
 import requests

-BING_API_KEY = '<your_bing_api_key>'
+BING_API_KEY = "<your_bing_api_key>"
+
+
 def search_with_bing(query: str, search_timeout=30, top_k=6) -> list[dict]:
    """
    Search with bing and return the contexts.
@ -13,9 +15,9 @@ def search_with_bing(query: str, search_timeout=30, top_k=6) -> list[dict]:
        headers={"Ocp-Apim-Subscription-Key": BING_API_KEY},
        params={
            "q": query,
-            "responseFilter": ['webpages'],
-            "freshness": 'month',
-            "mkt": 'zh-CN'
+            "responseFilter": ["webpages"],
+            "freshness": "month",
+            "mkt": "zh-CN",
        },
        timeout=search_timeout,
    )
@ -30,18 +32,22 @@ def search_with_bing(query: str, search_timeout=30, top_k=6) -> list[dict]:
        print(f"搜索失败，错误原因: {e}")
        return []

+
 def fetch_url(url):
    response = requests.get(url)
    # use beautifulsoup4 to parse html
-    soup = BS4(response.text, 'html.parser')
+    soup = BS4(response.text, "html.parser")
    plain_text = soup.get_text()
    return plain_text

+
 def bing_search_prompt(input):
    contents = search_with_bing(input, search_timeout=5, top_k=6)
    citations = "\n\n".join(
-        [f"[[citation:{i + 1}]]\n```markdown\n{item['snippet']}\n```" for i, item in enumerate(contents)]
+        [
+            f"[[citation:{i + 1}]]\n```markdown\n{item['snippet']}\n```"
+            for i, item in enumerate(contents)
+        ]
    )
    prompt = f"[引用]\n{citations}\n问：{input}\n"
    return prompt
-
--- a/repodemo/utils/tools.py
+++ b/repodemo/utils/tools.py
@ -2,6 +2,7 @@ import zipfile
 import os
 import json

+
 def unzip_file(zip_path, extract_dir):
    """
    解压zip文件到指定目录，并在指定目录下创建一个新的目录存放解压后的文件
@ -23,7 +24,7 @@ def unzip_file(zip_path, extract_dir):
    if not os.path.exists(new_extract_dir):
        os.makedirs(new_extract_dir)

-    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+    with zipfile.ZipFile(zip_path, "r") as zip_ref:
        zip_ref.extractall(new_extract_dir)

    return new_extract_dir
@ -48,14 +49,15 @@ def get_project_files_with_content(project_dir):
                relative_path = os.path.relpath(file_path, project_dir)
                if "__MACOSX" in relative_path:
                    continue
-                with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+                with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
                    content = f.read()
-                files_list.append({'path': relative_path, 'content': content})
+                files_list.append({"path": relative_path, "content": content})
            else:
                continue

    return files_list

+
 def filter_data(obj):
    LANGUAGE_TAG = {
        "c++": "// C++",
@ -89,7 +91,6 @@ def filter_data(obj):
        "tex": f"% TeX",
        "typescript": "// TypeScript",
        "vue": "<!-- Vue -->",
-
        "assembly": "; Assembly",
        "dart": "// Dart",
        "perl": "# Perl",
@ -124,7 +125,9 @@ def filter_data(obj):
        "dockerfile": "# Dockerfile",
    }

-    programming_languages_to_file_extensions = json.load(open('utils/programming-languages-to-file-extensions.json'))
+    programming_languages_to_file_extensions = json.load(
+        open("utils/programming-languages-to-file-extensions.json")
+    )
    need2del = []
    for key in programming_languages_to_file_extensions.keys():
        if key.lower() not in LANGUAGE_TAG:
@ -140,8 +143,8 @@ def filter_data(obj):
            ext_to_programming_languages[item] = key
            want_languages.append(item)

-    ext = '.'+obj.split('.')[-1]
-    with open('utils/keep.txt', 'r') as f:
+    ext = "." + obj.split(".")[-1]
+    with open("utils/keep.txt", "r") as f:
        keep_files = f.readlines()
        keep_files = [l.strip() for l in keep_files]
    # print(ext)
@ -151,4 +154,3 @@ def filter_data(obj):
        return False
    else:
        return True
-