Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ checkpoints/
train_data.jsonl
*.ckpt
*.pth
*.jsonl

# Logs
*.log
Expand Down
File renamed without changes.
82 changes: 82 additions & 0 deletions examples/smollm.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
base_model: mlx-community/SmolLM2-135M-Instruct
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer

datasets:
- path: train_data.jsonl
type: chat_template
chat_template: tokenizer_default
field_messages: messages
message_property_mappings:
role: role
content: content
train_on_eos: "turn"
roles:
assistant:
- assistant
user:
- user

load_in_4bit: False
# adapter: qlora
lora_r: 16
lora_alpha: 128
lora_dropout: 0.1
lora_target_modules:
- q_proj
- k_proj
- v_proj
- o_proj
- gate_proj
- up_proj
- down_proj

wandb_project: LexiFreak
wandb_name: advance

val_set_size: 0.01
evals_per_epoch: 10
eval_sample_packing: true
eval_max_new_tokens: 128

lora_modules_to_save:
- embed_tokens
- lm_head

bf16: auto
gradient_checkpointing_kwargs:
use_reentrant: true
resume_from_checkpoint:
flash_attention: true

gradient_accumulation_steps: 6
gradient_checkpointing: true
activation_offloading: true
micro_batch_size: 1
num_epochs: 5
optimizer: paged_adamw_8bit
lr_scheduler: cosine
learning_rate: 1e-5
warmup_ratio: 0.03
dataset_prepared_path: ./last_run_prepared

sequence_len: 2048
pad_to_sequence_len: true
sample_packing: true

output_dir: ./output
save_steps: 10000
logging_steps: 10
save_safetensors: true
plugins:
- axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
- axolotl.integrations.liger.LigerPlugin
liger_rope: true
liger_rms_norm: true
liger_glu_activation: true
liger_layer_norm: true
liger_fused_linear_cross_entropy: true

special_tokens:
pad_token: "<|finetune_right_pad_id|>"
eos_token: "<|eot_id|>"
15 changes: 11 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@ name = "train-mlx"
version = "0.1.0"
description = "LLM training with MLX on Apple Silicon"
authors = [
{name = "Sarah Aronson", email = "vagabond@pingas.org"}
{name = "Sarah Aronson", email = "vagabond@pingas.org"},
{name = "fizz~", email = "fizzarolli@riseup.net"}
]
requires-python = ">=3.10"
requires-python = ">=3.12"
dependencies = [
"mlx>=0.20.0",
"mlx-lm>=0.29.1",
Expand All @@ -27,6 +28,12 @@ dependencies = [
]

[project.optional-dependencies]
cuda12 = [
"mlx[cuda12]>=0.20.0"
]
cuda13 = [
"mlx[cuda13]>=0.20.0"
]
dev = [
"black>=24.0.0",
"ruff>=0.3.0",
Expand All @@ -37,11 +44,11 @@ dev = [

[tool.black]
line-length = 100
target-version = ['py310']
target-version = ['py312']

[tool.ruff]
line-length = 100
target-version = "py310"
target-version = "py312"

[tool.ruff.lint]
select = [
Expand Down
Loading
Loading