from pydantic import Field from transformers import AutoModel, AutoTokenizer from typing import Iterator import torch class CodegeexChatModel: device: str = Field(description="device to load the model") tokenizer = Field(description="model's tokenizer") model = Field(description="Codegeex model") temperature: float = Field(description="temperature to use for the model.") def __init__(self, model_name_or_path): super().__init__() self.device = "cuda" if torch.cuda.is_available() else "cpu" self.tokenizer = AutoTokenizer.from_pretrained( model_name_or_path, trust_remote_code=True ) self.model = ( AutoModel.from_pretrained(model_name_or_path, trust_remote_code=True) .to(self.device) .eval() ) print("Model has been initialized.") def chat(self, prompt, temperature=0.2, top_p=0.95): try: response, _ = self.model.chat( self.tokenizer, query=prompt, max_length=120000, temperature=temperature, top_p=top_p, ) return response except Exception as e: return f"error:{e}" def stream_chat(self, prompt, temperature=0.2, top_p=0.95): try: for response, _ in self.model.stream_chat( self.tokenizer, query=prompt, max_length=120000, temperature=temperature, top_p=top_p, ): yield response except Exception as e: yield f"error: {e}"