chore: _
This commit is contained in:
14
train.py
14
train.py
@@ -45,7 +45,7 @@ def main():
|
|||||||
|
|
||||||
max_seq_length = 16384 # Choose any! We auto support RoPE Scaling internally!
|
max_seq_length = 16384 # Choose any! We auto support RoPE Scaling internally!
|
||||||
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
|
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
|
||||||
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
|
load_in_4bit = False # Use 4bit quantization to reduce memory usage. Can be False.
|
||||||
|
|
||||||
model, tokenizer = FastLanguageModel.from_pretrained(
|
model, tokenizer = FastLanguageModel.from_pretrained(
|
||||||
model_name=args.base_model,
|
model_name=args.base_model,
|
||||||
@@ -114,18 +114,18 @@ def main():
|
|||||||
max_seq_length=max_seq_length,
|
max_seq_length=max_seq_length,
|
||||||
packing=False,
|
packing=False,
|
||||||
args=TrainingArguments(
|
args=TrainingArguments(
|
||||||
per_device_train_batch_size=2,
|
per_device_train_batch_size=16,
|
||||||
gradient_accumulation_steps=4,
|
gradient_accumulation_steps=2,
|
||||||
warmup_ratio=0.05,
|
warmup_ratio=0.05,
|
||||||
max_grad_norm=1.0,
|
max_grad_norm=1.0,
|
||||||
num_train_epochs=1,
|
num_train_epochs=1,
|
||||||
learning_rate=2e-5,
|
learning_rate=1e-4,
|
||||||
fp16=not torch.cuda.is_bf16_supported(),
|
fp16=not torch.cuda.is_bf16_supported(),
|
||||||
bf16=torch.cuda.is_bf16_supported(),
|
bf16=torch.cuda.is_bf16_supported(),
|
||||||
logging_steps=10,
|
logging_steps=50,
|
||||||
optim="adamw_8bit",
|
optim="adamw_8bit",
|
||||||
weight_decay=0.1,
|
weight_decay=0.05,
|
||||||
lr_scheduler_type="linear",
|
lr_scheduler_type="cosine",
|
||||||
seed=3407,
|
seed=3407,
|
||||||
output_dir="/output/",
|
output_dir="/output/",
|
||||||
report_to=None,
|
report_to=None,
|
||||||
|
|||||||
Reference in New Issue
Block a user