Skip to content

Commit

Permalink
file glob
Browse files Browse the repository at this point in the history
Signed-off-by: ssbuild <[email protected]>
  • Loading branch information
ssbuild committed Nov 3, 2023
1 parent 007ca53 commit bea540f
Show file tree
Hide file tree
Showing 8 changed files with 11 additions and 5 deletions.
3 changes: 3 additions & 0 deletions data_utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
# @Time : 2023/1/22 16:22
# @Author : tk
# @FileName: data_utils.py

import sys
import os
sys.path.append(os.path.abspath(os.path.dirname(__file__)))

import glob
import copy
import json
import random
Expand Down Expand Up @@ -142,6 +144,7 @@ def _get_messages(self, lines):
# 读取文件
def on_get_corpus(self, files: typing.List, mode: str):
D = []
files = sum([glob.glob(file) for file in files], [])
for file in files:
with open(file, mode='r', encoding='utf-8', newline='\n') as f:
lines = f.readlines()
Expand Down
3 changes: 2 additions & 1 deletion scripts/config/train_ac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ seed: 42
fp16: true
do_train: true
train_file:
- ../data/finetune_train_examples.json
- ../data/*.json

do_eval: false
do_predict: false
per_device_train_batch_size: 2
Expand Down
2 changes: 1 addition & 1 deletion scripts/config/train_cl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ seed: 42
fp16: true
do_train: true
train_file:
- ../data/finetune_train_examples.json
- ../data/*.json
do_eval: false
do_predict: false
per_device_train_batch_size: 2
Expand Down
2 changes: 1 addition & 1 deletion scripts/config/train_hf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ seed: 42
fp16: true
do_train: true
train_file:
- ../data/finetune_train_examples.json
- ../data/*.json
do_eval: false
do_predict: false
per_device_train_batch_size: 2
Expand Down
2 changes: 1 addition & 1 deletion scripts/config/train_pl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ data_backend: parquet
convert_onnx: false
do_train: true
train_file:
- ../data/finetune_train_examples.json
- ../data/*.json
max_epochs: 20
max_steps: -1

Expand Down
1 change: 1 addition & 0 deletions scripts/train_full.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ export enable_ptv2=false
export enable_lora=false
export load_in_bit=0

#export CUDA_VISIBLE_DEVICES="0,1,2,3"



Expand Down
1 change: 1 addition & 0 deletions scripts/train_lora.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ export enable_ptv2=false
export enable_lora=true
export load_in_bit=4

#export CUDA_VISIBLE_DEVICES="0,1,2,3"

usage() { echo "Usage: $0 [-m <train|dataset>]" 1>&2; exit 1; }

Expand Down
2 changes: 1 addition & 1 deletion scripts/train_ptv2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ export enable_ptv2=true
export enable_lora=false
export load_in_bit=0


#export CUDA_VISIBLE_DEVICES="0,1,2,3"

usage() { echo "Usage: $0 [-m <train|dataset>]" 1>&2; exit 1; }

Expand Down

0 comments on commit bea540f

Please sign in to comment.