import os import platform import signal from transformers import AutoTokenizer, AutoModel tokenizer = AutoTokenizer.from_pretrained("silver/chatglm-6b-slim", trust_remote_code=True) model = AutoModel.from_pretrained("silver/chatglm-6b-slim", trust_remote_code=True).half().cuda() model = model.eval() os_name = platform.system() clear_command = 'cls' if os_name == 'Windows' else 'clear' stop_stream = False def build_prompt(history): prompt = "æ¬¢è¿Žä½¿ç”¨ ChatGLM-6B æ¨¡åž‹ï¼Œè¾“å…¥å†…å®¹å³å¯è¿›è¡Œå¯¹è¯ï¼Œclear æ¸…ç©ºå¯¹è¯åŽ†å²ï¼Œstop ç»ˆæ¢ç¨‹åº" for query, response in history: prompt += f"\n\nç”¨æˆ·ï¼š{query}" prompt += f"\n\nChatGLM-6Bï¼š{response}" return prompt def signal_handler(signal, frame): global stop_stream stop_stream = True def main(): history = [] global stop_stream print("æ¬¢è¿Žä½¿ç”¨ ChatGLM-6B æ¨¡åž‹ï¼Œè¾“å…¥å†…å®¹å³å¯è¿›è¡Œå¯¹è¯ï¼Œclear æ¸…ç©ºå¯¹è¯åŽ†å²ï¼Œstop ç»ˆæ¢ç¨‹åº") while True: query = input("\nç”¨æˆ·ï¼š") if query.strip() == "stop": break if query.strip() == "clear": history = [] os.system(clear_command) print("æ¬¢è¿Žä½¿ç”¨ ChatGLM-6B æ¨¡åž‹ï¼Œè¾“å…¥å†…å®¹å³å¯è¿›è¡Œå¯¹è¯ï¼Œclear æ¸…ç©ºå¯¹è¯åŽ†å²ï¼Œstop ç»ˆæ¢ç¨‹åº") continue count = 0 for response, history in model.stream_chat(tokenizer, query, history=history): if stop_stream: stop_stream = False break else: count += 1 if count % 8 == 0: os.system(clear_command) print(build_prompt(history), flush=True) signal.signal(signal.SIGINT, signal_handler) os.system(clear_command) print(build_prompt(history), flush=True) if __name__ == "__main__": main()