This commit is contained in:
2025-02-11 20:42:01 +06:00
parent c7609f8328
commit 15b5555ab2

View File

@@ -45,7 +45,7 @@ def main():
max_seq_length = 16384 # Choose any! We auto support RoPE Scaling internally! max_seq_length = 16384 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+ dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False. load_in_4bit = False # Use 4bit quantization to reduce memory usage. Can be False.
model, tokenizer = FastLanguageModel.from_pretrained( model, tokenizer = FastLanguageModel.from_pretrained(
model_name=args.base_model, model_name=args.base_model,
@@ -114,18 +114,18 @@ def main():
max_seq_length=max_seq_length, max_seq_length=max_seq_length,
packing=False, packing=False,
args=TrainingArguments( args=TrainingArguments(
per_device_train_batch_size=2, per_device_train_batch_size=16,
gradient_accumulation_steps=4, gradient_accumulation_steps=2,
warmup_ratio=0.05, warmup_ratio=0.05,
max_grad_norm=1.0, max_grad_norm=1.0,
num_train_epochs=1, num_train_epochs=1,
learning_rate=2e-5, learning_rate=1e-4,
fp16=not torch.cuda.is_bf16_supported(), fp16=not torch.cuda.is_bf16_supported(),
bf16=torch.cuda.is_bf16_supported(), bf16=torch.cuda.is_bf16_supported(),
logging_steps=10, logging_steps=50,
optim="adamw_8bit", optim="adamw_8bit",
weight_decay=0.1, weight_decay=0.05,
lr_scheduler_type="linear", lr_scheduler_type="cosine",
seed=3407, seed=3407,
output_dir="/output/", output_dir="/output/",
report_to=None, report_to=None,