Skip to content
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
name: "baseline-olmo3_7b-step289000-anneal-100B-dolma2"
description: "Baseline: OLMo3 7B step 289000 (~4.9T tokens) anneal to 100B Tokens with dolma2 mix"
budget: "ai2/oe-base"
workspace: "ai2/olmo-3-microanneals"
nodes: 16
gpus: 8
preemptible: true
max_tokens: 100_000_000_000
global_batch_size: 2097152
sequence_length: 8192
seed: 1337
model: "olmo2_7B_swafix"
tokenizer: "dolma2"
priority: urgent
cluster: ai2/augusta-google-1
rank_microbatch_size: 16384
scheduler_type: linear
warmup_steps: 0
activation_checkpointing: true
annealing:
enabled: true
load_path: gs://ai2-llm/checkpoints/OLMo3-7B-swafix/step289000
load_state: false
dataset:
sources:
- name: dolma2-0625-v0.1
target_ratio: 1.0
paths:
- gs://ai2-llm/preprocessed/dolma2-0625/v0.1/allenai/dolma2-tokenizer/all-dressed-snazzy2/*/*.npy
- gs://ai2-llm/preprocessed/dolma2-0625/v0.1/allenai/dolma2-tokenizer/arxiv/*.npy
- gs://ai2-llm/preprocessed/dolma2-0625/v0.1/allenai/dolma2-tokenizer/finemath-3plus/*.npy
- gs://ai2-llm/preprocessed/dolma2-0625/v0.1/allenai/dolma2-tokenizer/s2pdf/*/*.npy
- gs://ai2-llm/preprocessed/dolma2-0625/v0.1/allenai/dolma2-tokenizer/stack-edu/*/*.npy
- gs://ai2-llm/preprocessed/dolma2-0625/v0.1/allenai/dolma2-tokenizer/wikipedia/*.npy