chore: _
This commit is contained in:
14
train.py
14
train.py
@@ -45,7 +45,7 @@ def main():
|
||||
|
||||
max_seq_length = 16384 # Choose any! We auto support RoPE Scaling internally!
|
||||
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
|
||||
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
|
||||
load_in_4bit = False # Use 4bit quantization to reduce memory usage. Can be False.
|
||||
|
||||
model, tokenizer = FastLanguageModel.from_pretrained(
|
||||
model_name=args.base_model,
|
||||
@@ -114,18 +114,18 @@ def main():
|
||||
max_seq_length=max_seq_length,
|
||||
packing=False,
|
||||
args=TrainingArguments(
|
||||
per_device_train_batch_size=2,
|
||||
gradient_accumulation_steps=4,
|
||||
per_device_train_batch_size=16,
|
||||
gradient_accumulation_steps=2,
|
||||
warmup_ratio=0.05,
|
||||
max_grad_norm=1.0,
|
||||
num_train_epochs=1,
|
||||
learning_rate=2e-5,
|
||||
learning_rate=1e-4,
|
||||
fp16=not torch.cuda.is_bf16_supported(),
|
||||
bf16=torch.cuda.is_bf16_supported(),
|
||||
logging_steps=10,
|
||||
logging_steps=50,
|
||||
optim="adamw_8bit",
|
||||
weight_decay=0.1,
|
||||
lr_scheduler_type="linear",
|
||||
weight_decay=0.05,
|
||||
lr_scheduler_type="cosine",
|
||||
seed=3407,
|
||||
output_dir="/output/",
|
||||
report_to=None,
|
||||
|
||||
Reference in New Issue
Block a user