import os import platform import signal from transformers import AutoTokenizer, AutoModel tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda() model = model.eval() os_name = platform.system() clear_command = 'cls' if os_name == 'Windows' else 'clear' stop_stream = False def build_prompt(history): prompt = "欢è¿ä½¿ç¨ ChatGLM-6B 模åï¼è¾å ¥å 容å³å¯è¿è¡å¯¹è¯ï¼clear æ¸ ç©ºå¯¹è¯åå²ï¼stop ç»æ¢ç¨åº" for query, response in history: prompt += f"\n\nç¨æ·ï¼{query}" prompt += f"\n\nChatGLM-6Bï¼{response}" return prompt def signal_handler(signal, frame): global stop_stream stop_stream = True def main(): history = [] global stop_stream print("欢è¿ä½¿ç¨ ChatGLM-6B 模åï¼è¾å ¥å 容å³å¯è¿è¡å¯¹è¯ï¼clear æ¸ ç©ºå¯¹è¯åå²ï¼stop ç»æ¢ç¨åº") while True: query = input("\nç¨æ·ï¼") if query.strip() == "stop": break if query.strip() == "clear": history = [] os.system(clear_command) print("欢è¿ä½¿ç¨ ChatGLM-6B 模åï¼è¾å ¥å 容å³å¯è¿è¡å¯¹è¯ï¼clear æ¸ ç©ºå¯¹è¯åå²ï¼stop ç»æ¢ç¨åº") continue count = 0 for response, history in model.stream_chat(tokenizer, query, history=history): if stop_stream: stop_stream = False break else: count += 1 if count % 8 == 0: os.system(clear_command) print(build_prompt(history), flush=True) signal.signal(signal.SIGINT, signal_handler) os.system(clear_command) print(build_prompt(history), flush=True) if __name__ == "__main__": main()