Skip to content

Commit

Permalink
support ia3
Browse files Browse the repository at this point in the history
Signed-off-by: ssbuild <[email protected]>
  • Loading branch information
ssbuild committed Sep 28, 2023
1 parent ca759f7 commit 203f71b
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 30 deletions.
37 changes: 17 additions & 20 deletions config/constant_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,18 @@
# @Author: tk
# @File:model_maps


from aigc_zoo.constants.define import (TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING,
TRANSFORMERS_MODELS_TO_ADALORA_TARGET_MODULES_MAPPING,
TRANSFORMERS_MODELS_TO_IA3_TARGET_MODULES_MAPPING,
TRANSFORMERS_MODELS_TO_IA3_FEEDFORWARD_MODULES_MAPPING)

__all__ = [
"TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING",
"TRANSFORMERS_MODELS_TO_ADALORA_TARGET_MODULES_MAPPING",
"TRANSFORMERS_MODELS_TO_IA3_TARGET_MODULES_MAPPING",
"TRANSFORMERS_MODELS_TO_IA3_FEEDFORWARD_MODULES_MAPPING",
"train_model_config"
]

train_info_models = {
'ChatYuan-large-v2': {
Expand All @@ -29,24 +40,10 @@
}


# 'target_modules': ['query_key_value'], # bloom,gpt_neox
# 'target_modules': ["q_proj", "v_proj"], #llama,opt,gptj,gpt_neo
# 'target_modules': ['c_attn'], #gpt2
# 'target_modules': ['project_q','project_v'] # cpmant

train_target_modules_maps = {
't5': ['q', 'v'],
'moss': ['qkv_proj'],
'chatglm': ['query_key_value'],
'bloom' : ['query_key_value'],
'gpt_neox' : ['query_key_value'],
'llama' : ["q_proj", "v_proj"],
'opt' : ["q_proj", "v_proj"],
'gptj' : ["q_proj", "v_proj"],
'gpt_neo' : ["q_proj", "v_proj"],
'gpt2' : ['c_attn'],
'cpmant' : ['project_q','project_v'],
'rwkv' : ['key','value','receptance'],
}
# 按需修改
# TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING
# TRANSFORMERS_MODELS_TO_ADALORA_TARGET_MODULES_MAPPING
# TRANSFORMERS_MODELS_TO_IA3_TARGET_MODULES_MAPPING
# TRANSFORMERS_MODELS_TO_IA3_FEEDFORWARD_MODULES_MAPPING

train_model_config = train_info_models['ChatYuan-large-v2']
11 changes: 7 additions & 4 deletions config/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,22 +63,25 @@ def patch_args(train_info_args):
#检查lora adalora是否开启
if 'lora' not in train_info_args and 'adalora' not in train_info_args:
raise ValueError('please config lora or adalora')
if train_info_args.get('lora',{}).get('with_lora',False) and train_info_args.get('adalora',{}).get('with_lora',False):
raise Exception('lora and adalora can set one at same time !')
assert train_info_args.get('lora',{}).get('with_lora',False) + \
train_info_args.get('adalora',{}).get('with_lora',False) + \
train_info_args.get('ia3',{}).get('with_lora',False) == 1 , ValueError('lora adalora ia3 can set one at same time !')

train_info_args.pop('prompt', None)
elif global_args["enable_ptv2"]:
train_info_args.pop('lora', None)
train_info_args.pop('adalora', None)
train_info_args.pop('ia3', None)
if hasattr(train_info_args,"gradient_checkpointing"):
train_info_args.gradient_checkpointing = False
else:
train_info_args.pop('lora',None)
train_info_args.pop('adalora', None)
train_info_args.pop('prompt', None)
train_info_args.pop('ia3', None)

# 预处理
if 'rwkv' in train_info_args[ 'tokenizer_name' ].lower():
if 'rwkv' in (train_info_args[ 'tokenizer_name' ] or train_info_args[ 'model_name_or_path' ]).lower():
train_info_args[ 'use_fast_tokenizer' ] = True


Expand All @@ -97,7 +100,7 @@ def get_deepspeed_config(precision='fp16'):
precision = str(precision).lower()
# 选择 deepspeed 配置文件
is_need_update_config = False
if global_args["enable_lora"]:
if global_args["enable_lora"] or global_args["enable_ptv2"]:
is_need_update_config = True
filename = os.path.join(os.path.dirname(__file__), 'deepspeed_offload.json')
else:
Expand Down
37 changes: 31 additions & 6 deletions config/sft_config_lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,39 +2,53 @@
# @Time : 2023/5/24 15:53
import json
import os

import torch
from transformers import BitsAndBytesConfig
from config.constant_map import train_model_config, train_target_modules_maps
from config.constant_map import (train_model_config,
TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING,
TRANSFORMERS_MODELS_TO_ADALORA_TARGET_MODULES_MAPPING,
TRANSFORMERS_MODELS_TO_IA3_TARGET_MODULES_MAPPING,
TRANSFORMERS_MODELS_TO_IA3_FEEDFORWARD_MODULES_MAPPING)


# 默认禁用lora 相关模块 , lora 和 adalora 只能同时启用一个
lora_info_args = {
'with_lora': True, # 是否启用lora模块
'lora_type': 'lora',
'r': 8,
'target_modules': train_target_modules_maps[train_model_config['model_type']],
'target_modules': TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING[train_model_config['model_type']],
'lora_alpha': 32,
'lora_dropout': 0.1,
'fan_in_fan_out': False,
'bias': 'none', # Bias type for Lora. Can be 'none', 'all' or 'lora_only'"
'modules_to_save' : None, # "help": "List of modules apart from LoRA layers to be set as trainable and saved in the final checkpoint. "
'layers_to_transform': None,
'layers_pattern': None,
'rank_pattern': {
# "The mapping from layer names or regexp expression to ranks which are different from the default rank specified by `r`. "
# "For example, `{model.decoder.layers.0.encoder_attn.k_proj: 8`}"
},
'alpha_pattern': {
# "The mapping from layer names or regexp expression to alphas which are different from the default alpha specified by `lora_alpha`. "
# "For example, `{model.decoder.layers.0.encoder_attn.k_proj: 32`}"
},

}

adalora_info_args = {
'with_lora': False, # 是否启用adalora模块
'lora_type': 'adalora',
'r': 8,
'target_modules': train_target_modules_maps[train_model_config['model_type']],
'target_modules': TRANSFORMERS_MODELS_TO_ADALORA_TARGET_MODULES_MAPPING[train_model_config['model_type']],
'lora_alpha': 32,
'lora_dropout': 0.1,
'fan_in_fan_out': False,
'bias': 'none', # Bias type for Lora. Can be 'none', 'all' or 'lora_only'"
'modules_to_save' : None, # "help": "List of modules apart from LoRA layers to be set as trainable and saved in the final checkpoint. "
'layers_to_transform': None,
'layers_pattern': None,
'alpha_pattern': {
# "The mapping from layer names or regexp expression to alphas which are different from the default alpha specified by `lora_alpha`. "
# "For example, `{model.decoder.layers.0.encoder_attn.k_proj: 32`}"
},

'target_r':8, # Target Lora matrix dimension.
'init_r': 12, #Intial Lora matrix dimension.
Expand All @@ -49,6 +63,15 @@
}


ia3_info_args = {
'with_lora': False, # 是否启用lora模块
'target_modules': TRANSFORMERS_MODELS_TO_IA3_TARGET_MODULES_MAPPING[train_model_config['model_type']],
'feedforward_modules': TRANSFORMERS_MODELS_TO_IA3_FEEDFORWARD_MODULES_MAPPING[train_model_config['model_type']],
'fan_in_fan_out': False,
'modules_to_save' : None, # "help": "List of modules apart from LoRA layers to be set as trainable and saved in the final checkpoint. "
'init_ia3_weights': True,
}



train_info_args = {
Expand Down Expand Up @@ -109,6 +132,7 @@
############## lora模块
'lora': lora_info_args,
'adalora': adalora_info_args,
'ia3': ia3_info_args,

}

Expand Down Expand Up @@ -167,6 +191,7 @@
############## lora模块
'lora': lora_info_args,
'adalora': adalora_info_args,
'ia3': ia3_info_args,

}

Expand Down

0 comments on commit 203f71b

Please sign in to comment.