Skip to content

Commit

Permalink
fix params
Browse files Browse the repository at this point in the history
  • Loading branch information
jstzwj committed Jun 7, 2024
1 parent aedb056 commit 000edcb
Show file tree
Hide file tree
Showing 18 changed files with 422 additions and 16 deletions.
58 changes: 58 additions & 0 deletions convert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@

import argparse
import re
import logging
from typing import Any
import transformers # noqa: F401
import os
import json
from transformers import pipeline, set_seed
from transformers import AutoConfig, OPTForCausalLM, AutoModelForCausalLM, AutoTokenizer

from chatproto.conversation.history import ConversationHistory
from chatproto.registry import get_conv_settings

from katheryne.tools.chatbot import get_generator, get_model_response, get_user_input, stop_response

settings = get_conv_settings("openbuddy")
generator = get_generator("llm_trainer/lightning_logs/version_0/huggingface_format/checkpoint-step-931", settings, device="cuda:1")
set_seed(42)

instruction = """A chat between a curious user and an artificial intelligence assistant.
The assistant gives helpful, detailed, and polite answers to the user's questions."""
instruction = """Consider a conversation between User (a human) and Assistant (named Buddy).
Buddy can fluently speak the user's language (e.g. English, Chinese).
Buddy possesses vast knowledge about the world, history, and culture."""

num_rounds = 0
history = ConversationHistory(
system=instruction,
messages=[],
offset=0,
settings=settings,
)
# history.append_message(settings.roles[0], "你好")
# history.append_message(settings.roles[1], "你好呀")

while True:
num_rounds += 1
user_input, quit, clear = get_user_input()
history.append_message(settings.roles[0], user_input)
history.append_message(settings.roles[1], None)

if quit:
break
elif clear:
user_input, num_rounds = "", 0
history.messages.clear()
continue

prompt = history.get_prompt()
response = get_model_response(generator, prompt, 1024)[0]['generated_text']
response = response[len(prompt):]
output = stop_response(response, stop_str="\nUser: ")
history.messages[-1][1] = output

print("-" * 30 + f" Round {num_rounds} " + "-" * 30)
print(f"{output}")
# user_input = f"{output}\n\n"
50 changes: 50 additions & 0 deletions convert_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@

import argparse
import re
import logging
from typing import Any
import transformers # noqa: F401
import os
import json
from transformers import pipeline, set_seed
from transformers import AutoConfig, OPTForCausalLM, AutoModelForCausalLM, AutoTokenizer

from chatproto.conversation.history import ConversationHistory
from chatproto.registry import get_conv_settings

from katheryne.tools.chatbot import get_generator, get_model_response, get_user_input, stop_response

settings = get_conv_settings("openbuddy")
generator = get_generator("llm_trainer/lightning_logs/version_0/huggingface_format/checkpoint-step-931", settings, device="cuda:1")
set_seed(42)

instruction = """A chat between a curious user and an artificial intelligence assistant.
The assistant gives helpful, detailed, and polite answers to the user's questions."""
instruction = """Consider a conversation between User (a human) and Assistant (named Buddy).
Buddy can fluently speak the user's language (e.g. English, Chinese).
Buddy possesses vast knowledge about the world, history, and culture."""
instruction = ""

num_rounds = 0
history = ConversationHistory(
system=instruction,
messages=[],
offset=0,
settings=settings,
)
with open("a.java", "r", encoding="utf-8") as f:
user_input = f.read()

history.append_message(settings.roles[0], "Please translate the following code into cangjie:\n```\n" + user_input + "\n```")
history.append_message(settings.roles[1], None)

prompt = history.get_prompt()
response = get_model_response(generator, prompt, 2048)[0]['generated_text']
response = response[len(prompt):]
output = response
# output = stop_response(response)
history.messages[-1][1] = output

print("-" * 30 + f" Round {num_rounds} " + "-" * 30)
print(f"{output}")
# user_input = f"{output}\n\n"
34 changes: 34 additions & 0 deletions hparams/hparams_chat_codeqwen1.5_7b_cj.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"conv_format": "openbuddy",
"end_of_conversation": 151643,
"data_path": [
"/data/wangjun/github/cangjie/",
"Vtuber-plan/sharegpt-cleaned"
],
"data_output_path": "./tmp/data_files/",
"model_name_or_path": "/data/wangjun/models/CodeQwen1.5-7B",
"atten_class": "eager",
"per_device_train_batch_size": 2,
"per_device_eval_batch_size": 8,
"accumulate_grad_batches": 64,
"max_seq_len": 1024,
"checkpoint_every_n_train_steps": 100,
"log_every_n_steps": 1,
"val_check_interval": 0.25,
"limit_val_batches": 0.1,
"learning_rate": 8e-6,
"betas": [0.9, 0.95],
"eps": 8e-6,
"lr_decay": 0.999875,
"lr_scheduler_type": "cosine",
"num_warmup_steps": 100,
"max_epochs": 300,
"disable_dropout": true,
"model_torch_dtype": "auto",
"bf16": true,
"gradient_checkpointing": true,
"weight_decay": 0.0,
"gradient_clip_algorithm": "norm",
"gradient_clip_val": 1.0,
"strategy": null
}
45 changes: 45 additions & 0 deletions hparams/hparams_chat_qwen1.5_14b_cj_lora.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{
"conv_format": "openbuddy",
"end_of_conversation": 151643,
"data_path": [
{
"path": "/data/wangjun/github/cangjie/",
"sample": 1.0
},
{
"path": "Vtuber-plan/sharegpt-cleaned",
"sample": 0.01
}

],
"data_output_path": "./tmp/data_files/",
"model_name_or_path": "/data/wangjun/models/Qwen1.5-14B",
"atten_class": "eager",
"per_device_train_batch_size": 2,
"per_device_eval_batch_size": 8,
"accumulate_grad_batches": 32,
"max_seq_len": 1024,
"checkpoint_every_n_train_steps": 100,
"log_every_n_steps": 1,
"val_check_interval": 0.25,
"limit_val_batches": 0.1,
"learning_rate": 8e-6,
"betas": [0.9, 0.95],
"eps": 8e-6,
"lr_decay": 0.999875,
"lr_scheduler_type": "cosine",
"num_warmup_steps": 100,
"max_epochs": 300,
"disable_dropout": true,
"model_torch_dtype": "auto",
"bf16": true,
"gradient_checkpointing": true,
"weight_decay": 0.0,
"gradient_clip_algorithm": "norm",
"gradient_clip_val": 1.0,
"strategy": null,
"lora": {
"r": 64,
"target_modules": ["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj", "lm_head"]
}
}
40 changes: 40 additions & 0 deletions hparams/hparams_chat_qwen1.5_4b.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{
"conv_format": "openbuddy",
"end_of_conversation": 151643,
"data_path": [
{
"path": "/data/wangjun/github/cangjie/",
"sample": 1.0
},
{
"path": "Vtuber-plan/sharegpt-cleaned",
"sample": 0.01
}

],
"data_output_path": "./tmp/data_files/",
"model_name_or_path": "/data/wangjun/models/Qwen1.5-4B",
"atten_class": "eager",
"per_device_train_batch_size": 2,
"per_device_eval_batch_size": 8,
"accumulate_grad_batches": 64,
"max_seq_len": 1024,
"checkpoint_every_n_train_steps": 100,
"log_every_n_steps": 1,
"val_check_interval": 0.25,
"limit_val_batches": 0.1,
"learning_rate": 8e-6,
"betas": [0.9, 0.95],
"eps": 8e-6,
"lr_decay": 0.999875,
"lr_scheduler_type": "cosine",
"num_warmup_steps": 100,
"max_epochs": 300,
"disable_dropout": true,
"model_torch_dtype": "auto",
"bf16": true,
"gradient_checkpointing": true,
"weight_decay": 0.0,
"gradient_clip_algorithm": "norm",
"gradient_clip_val": 1.0
}
2 changes: 1 addition & 1 deletion hparams/hparams_chat_qwen1.5_7b_chat_cj.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"per_device_train_batch_size": 2,
"per_device_eval_batch_size": 8,
"accumulate_grad_batches": 64,
"max_seq_len": 2048,
"max_seq_len": 512,
"checkpoint_every_n_train_steps": 100,
"log_every_n_steps": 1,
"val_check_interval": 0.25,
Expand Down
37 changes: 37 additions & 0 deletions hparams/hparams_chat_qwen1.5_7b_chat_cj_deepspeed.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
"conv_format": "qwen",
"end_of_conversation": 151643,
"data_path": [
"/data/wangjun/github/cangjie/"
],
"data_output_path": "./tmp/data_files/",
"model_name_or_path": "/data/wangjun/models/Qwen1.5-7B-Chat",
"atten_class": "eager",
"per_device_train_batch_size": 2,
"per_device_eval_batch_size": 8,
"accumulate_grad_batches": 64,
"max_seq_len": 2048,
"checkpoint_every_n_train_steps": 100,
"log_every_n_steps": 1,
"val_check_interval": 0.25,
"limit_val_batches": 0.1,
"learning_rate": 8e-6,
"betas": [0.9, 0.95],
"eps": 8e-6,
"lr_decay": 0.999875,
"lr_scheduler_type": "cosine",
"num_warmup_steps": 100,
"max_epochs": 300,
"disable_dropout": true,
"model_torch_dtype": "auto",
"bf16": true,
"gradient_checkpointing": true,
"weight_decay": 0.0,
"gradient_clip_algorithm": "norm",
"gradient_clip_val": 1.0,
"strategy": "deepspeed",
"strategy_params": {
"offload": false,
"zero_stage": 3
}
}
2 changes: 1 addition & 1 deletion hparams/hparams_chat_qwen1.5_7b_chat_cj_lora.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,5 @@
"r": 128,
"target_modules": ["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj", "lm_head"]
},
"peft_merge": false
"peft_merge": true
}
33 changes: 33 additions & 0 deletions hparams/hparams_chat_qwen1.5_7b_cj.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{
"conv_format": "qwen",
"end_of_conversation": 151643,
"data_path": [
"/data/wangjun/github/cangjie/"
],
"data_output_path": "./tmp/data_files/",
"model_name_or_path": "/data/wangjun/models/Qwen1.5-7B",
"atten_class": "eager",
"per_device_train_batch_size": 2,
"per_device_eval_batch_size": 8,
"accumulate_grad_batches": 64,
"max_seq_len": 512,
"checkpoint_every_n_train_steps": 100,
"log_every_n_steps": 1,
"val_check_interval": 0.25,
"limit_val_batches": 0.1,
"learning_rate": 8e-6,
"betas": [0.9, 0.95],
"eps": 8e-6,
"lr_decay": 0.999875,
"lr_scheduler_type": "cosine",
"num_warmup_steps": 100,
"max_epochs": 300,
"disable_dropout": true,
"model_torch_dtype": "auto",
"bf16": true,
"gradient_checkpointing": true,
"weight_decay": 0.0,
"gradient_clip_algorithm": "norm",
"gradient_clip_val": 1.0,
"strategy": null
}
18 changes: 13 additions & 5 deletions hparams/hparams_chat_qwen1.5_7b_cj_lora.json
Original file line number Diff line number Diff line change
@@ -1,13 +1,21 @@
{
"conv_format": "qwen",
"conv_format": "openbuddy",
"end_of_conversation": 151643,
"data_path": [
"/data/wangjun/github/cangjie/"
{
"path": "/data/wangjun/github/cangjie/",
"sample": 1.0
},
{
"path": "Vtuber-plan/sharegpt-cleaned",
"sample": 0.01
}

],
"data_output_path": "./tmp/data_files/",
"model_name_or_path": "/data/wangjun/models/Qwen1.5-7B",
"atten_class": "eager",
"per_device_train_batch_size": 4,
"per_device_train_batch_size": 2,
"per_device_eval_batch_size": 8,
"accumulate_grad_batches": 32,
"max_seq_len": 2048,
Expand All @@ -31,7 +39,7 @@
"gradient_clip_val": 1.0,
"strategy": null,
"lora": {
"r": 128,
"target_modules": ["q_proj", "v_proj"]
"r": 256,
"target_modules": ["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj", "lm_head"]
}
}
Loading

0 comments on commit 000edcb

Please sign in to comment.