chore: _

2025-02-15 05:41:51 +06:00
parent 9af06f6382
commit 281f54df25
5 changed files with 28 additions and 22 deletions
--- a/config.py
+++ b/config.py
@@ -9,7 +9,7 @@ class DataConfig:
 Translation:
 {}"""
    train_split: float = 0.95
-    max_samples: int | None = 5000
+    max_samples: int | None = None


@dataclass
@@ -31,11 +31,11 @@ class TrainingConfig:
    base_model: str = "unsloth/Qwen2.5-7B"
    max_seq_length: int = 6144
    dtype: str | None = None
-    load_in_4bit: bool = True
+    load_in_4bit: bool = False

    # LoRA
-    lora_r: int = 64
-    lora_alpha: int = 128
+    lora_r: int = 16
+    lora_alpha: int = 32
    lora_dropout: float = 0
    target_modules: list[str] = field(
        default_factory=lambda: [
@@ -49,18 +49,19 @@ class TrainingConfig:
        ]
    )
    use_gradient_checkpointing: str = "unsloth"
-    random_state: int = 3407
+    random_state: int = 42
    use_rslora: bool = False
    loftq_config: dict | None = None

    # training args
    per_device_train_batch_size: int = 16
-    gradient_accumulation_steps: int = 2
-    warmup_ratio: float = 0.1
+    gradient_accumulation_steps: int = 4
+    # warmup_ratio: float = 0.1
+    warmup_steps: int = 80
    max_grad_norm: float = 1.0
-    num_train_epochs: float = 1
-    learning_rate: float = 5e-4
-    weight_decay: float = 0.01
+    num_train_epochs: float = 3
+    learning_rate: float = 1e-5
+    weight_decay: float = 0.001
    lr_scheduler_type: str = "cosine"
    logging_steps: int = 1

@@ -70,15 +71,15 @@ class TrainingConfig:
    save_total_limit: int | None = 3

    # dataset
-    dataset_num_proc: int = 4
+    dataset_num_proc: int = 8
    packing: bool = True

    # eval
    fp16_full_eval: bool = True
-    per_device_eval_batch_size: int = 8
-    eval_accumulation_steps: int = 2
+    per_device_eval_batch_size: int = 16
+    eval_accumulation_steps: int = 1
    eval_strategy: str = "steps"
-    eval_steps: int = 10
+    eval_steps: int = 100

    # output
    output_dir: str = "/workspace/output/"