This commit is contained in:
2025-02-11 20:42:01 +06:00
parent c7609f8328
commit 15b5555ab2

View File

@@ -45,7 +45,7 @@ def main():
max_seq_length = 16384 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
load_in_4bit = False # Use 4bit quantization to reduce memory usage. Can be False.
model, tokenizer = FastLanguageModel.from_pretrained(
model_name=args.base_model,
@@ -114,18 +114,18 @@ def main():
max_seq_length=max_seq_length,
packing=False,
args=TrainingArguments(
per_device_train_batch_size=2,
gradient_accumulation_steps=4,
per_device_train_batch_size=16,
gradient_accumulation_steps=2,
warmup_ratio=0.05,
max_grad_norm=1.0,
num_train_epochs=1,
learning_rate=2e-5,
learning_rate=1e-4,
fp16=not torch.cuda.is_bf16_supported(),
bf16=torch.cuda.is_bf16_supported(),
logging_steps=10,
logging_steps=50,
optim="adamw_8bit",
weight_decay=0.1,
lr_scheduler_type="linear",
weight_decay=0.05,
lr_scheduler_type="cosine",
seed=3407,
output_dir="/output/",
report_to=None,