Skip to content

Commit aa7af32

Browse files
committed
feat: test Accelerator class
Signed-off-by: Charlie Doern <[email protected]>
1 parent 77e20ca commit aa7af32

File tree

2 files changed

+250
-1
lines changed

2 files changed

+250
-1
lines changed

src/instructlab/training/main_ds.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ def train(
9696
metric_logger = logging.getLogger("instructlab.training.metrics")
9797
base_logger = logging.getLogger("instructlab.training")
9898

99-
batch_size = args.effective_batch_size // args.grad_accum
99+
batch_size = args.effective_batch_size // accelerator.grad_accum
100100
samples_seen = 0
101101

102102
if hasattr(args, "samples_seen"):

tests/unit/test_accelerator.py

Lines changed: 249 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,249 @@
1+
# Standard
2+
from unittest.mock import MagicMock, patch
3+
import os
4+
5+
# Third Party
6+
from torch.utils.data import DataLoader
7+
import pytest
8+
import torch
9+
10+
# First Party
11+
from instructlab.training.accelerator import Accelerator
12+
from instructlab.training.config import DeepSpeedOptions, DistributedBackend
13+
from instructlab.training.model import Model
14+
15+
16+
@pytest.fixture
17+
def mock_model():
18+
model = MagicMock(spec=Model)
19+
model.model = MagicMock()
20+
model.lora_config = None
21+
model._no_split_modules = ["LlamaDecoderLayer"]
22+
# Add children method to model
23+
model.children = MagicMock(return_value=[])
24+
model.model.children = MagicMock(return_value=[])
25+
# Add get_module_class_from_name method
26+
model.get_module_class_from_name = MagicMock(return_value=torch.nn.Module)
27+
return model
28+
29+
30+
@pytest.fixture
31+
def mock_train_loader():
32+
loader = MagicMock(spec=DataLoader)
33+
loader.dataset = MagicMock()
34+
return loader
35+
36+
37+
@pytest.fixture
38+
def mock_optimizer():
39+
optimizer = MagicMock(spec=torch.optim.Optimizer)
40+
# Add param_groups attribute with required keys
41+
optimizer.param_groups = [{"params": [], "lr": 1e-4}]
42+
return optimizer
43+
44+
45+
@pytest.fixture
46+
def mock_transformers_accel():
47+
with patch("instructlab.training.accelerator.TransformersAccel") as mock:
48+
yield mock
49+
50+
51+
def test_accelerator_init_deepspeed(
52+
mock_model, mock_train_loader, mock_transformers_accel
53+
):
54+
with patch("torch.distributed.get_world_size", return_value=2):
55+
accelerator = Accelerator(
56+
model=mock_model,
57+
samples_per_gpu=8,
58+
grad_accum=2,
59+
train_loader=mock_train_loader,
60+
save_samples=1000,
61+
distributed_framework=DistributedBackend.DEEPSPEED,
62+
deepspeed_cpu_offload_optimizer_ratio=1.0, # Add default value
63+
)
64+
65+
assert accelerator.samples_per_gpu == 8
66+
assert accelerator.grad_accum == 2
67+
assert accelerator.model == mock_model
68+
assert accelerator.distributed_framework == DistributedBackend.DEEPSPEED
69+
assert accelerator.train_loader == mock_train_loader
70+
assert accelerator.save_samples == 1000
71+
72+
73+
def test_accelerator_init_fsdp(mock_model, mock_train_loader, mock_transformers_accel):
74+
with patch("torch.distributed.get_world_size", return_value=2):
75+
accelerator = Accelerator(
76+
model=mock_model,
77+
samples_per_gpu=8,
78+
grad_accum=2,
79+
train_loader=mock_train_loader,
80+
save_samples=1000,
81+
distributed_framework=DistributedBackend.FSDP,
82+
fsdp_sharding_strategy="HYBRID_SHARD",
83+
)
84+
85+
assert accelerator.samples_per_gpu == 8
86+
assert accelerator.grad_accum == 2
87+
assert accelerator.model == mock_model
88+
assert accelerator.distributed_framework == DistributedBackend.FSDP
89+
assert accelerator.fsdp_sharding_strategy == "HYBRID_SHARD"
90+
91+
92+
def test_accelerator_prepare_with_optimizer(
93+
mock_model, mock_train_loader, mock_optimizer, mock_transformers_accel
94+
):
95+
with patch("torch.distributed.get_world_size", return_value=2):
96+
accelerator = Accelerator(
97+
model=mock_model,
98+
samples_per_gpu=8,
99+
grad_accum=2,
100+
train_loader=mock_train_loader,
101+
save_samples=1000,
102+
distributed_framework=DistributedBackend.DEEPSPEED,
103+
deepspeed_cpu_offload_optimizer_ratio=1.0, # Add default value
104+
)
105+
106+
# Mock the accelerator's prepare method
107+
accelerator.accelerator = MagicMock()
108+
accelerator.accelerator.prepare.return_value = (
109+
mock_model.model,
110+
mock_optimizer,
111+
mock_train_loader,
112+
MagicMock(), # lr_scheduler
113+
)
114+
115+
accelerator.prepare_with_optimizer(
116+
optimizer=mock_optimizer,
117+
lr_scheduler="cosine",
118+
num_epochs=3,
119+
num_warmup_steps=100,
120+
)
121+
122+
# Verify that prepare was called with the correct arguments
123+
accelerator.accelerator.prepare.assert_called_once()
124+
assert accelerator.optimizer == mock_optimizer
125+
126+
127+
def test_accelerator_deepspeed_cpu_offload(
128+
mock_model, mock_train_loader, mock_transformers_accel
129+
):
130+
with patch("torch.distributed.get_world_size", return_value=2):
131+
accelerator = Accelerator(
132+
model=mock_model,
133+
samples_per_gpu=8,
134+
grad_accum=2,
135+
train_loader=mock_train_loader,
136+
save_samples=1000,
137+
distributed_framework=DistributedBackend.DEEPSPEED,
138+
deepspeed_cpu_offload_optimizer=True,
139+
deepspeed_cpu_offload_optimizer_pin_memory=True,
140+
deepspeed_cpu_offload_optimizer_ratio=0.5,
141+
)
142+
143+
assert accelerator.deepspeed_cpu_offload_optimizer is True
144+
assert accelerator.deepspeed_cpu_offload_optimizer_pin_memory is True
145+
assert accelerator.deepspeed_cpu_offload_optimizer_ratio == 0.5
146+
147+
148+
def test_accelerator_fsdp_cpu_offload(
149+
mock_model, mock_train_loader, mock_transformers_accel
150+
):
151+
with patch("torch.distributed.get_world_size", return_value=2):
152+
accelerator = Accelerator(
153+
model=mock_model,
154+
samples_per_gpu=8,
155+
grad_accum=2,
156+
train_loader=mock_train_loader,
157+
save_samples=1000,
158+
distributed_framework=DistributedBackend.FSDP,
159+
fsdp_sharding_strategy="HYBRID_SHARD",
160+
fsdp_cpu_offload_params=True,
161+
)
162+
163+
assert accelerator.fsdp_cpu_offload_params is True
164+
165+
166+
def test_accelerator_getattr(mock_model, mock_train_loader, mock_transformers_accel):
167+
with patch("torch.distributed.get_world_size", return_value=2):
168+
accelerator = Accelerator(
169+
model=mock_model,
170+
samples_per_gpu=8,
171+
grad_accum=2,
172+
train_loader=mock_train_loader,
173+
save_samples=1000,
174+
distributed_framework=DistributedBackend.DEEPSPEED,
175+
deepspeed_cpu_offload_optimizer_ratio=1.0, # Add default value
176+
)
177+
178+
# Mock a method on the underlying accelerator
179+
mock_method = MagicMock()
180+
accelerator.accelerator = MagicMock()
181+
accelerator.accelerator.some_method = mock_method
182+
183+
# Test that __getattr__ forwards to the underlying accelerator
184+
result = accelerator.some_method()
185+
assert result == mock_method.return_value
186+
187+
188+
def test_accelerator_setup_deepspeed_classmethod(
189+
mock_model, mock_train_loader, mock_transformers_accel
190+
):
191+
with patch("torch.distributed.get_world_size", return_value=2):
192+
accelerator = Accelerator.setup_deepspeed(
193+
model=mock_model,
194+
samples_per_gpu=8,
195+
grad_accum=2,
196+
train_loader=mock_train_loader,
197+
deepspeed_cpu_offload_optimizer=True,
198+
deepspeed_cpu_offload_optimizer_pin_memory=True,
199+
deepspeed_cpu_offload_optimizer_ratio=0.5,
200+
save_samples=1000,
201+
)
202+
203+
assert isinstance(accelerator, Accelerator)
204+
assert accelerator.distributed_framework == DistributedBackend.DEEPSPEED
205+
assert accelerator.deepspeed_cpu_offload_optimizer is True
206+
207+
208+
def test_accelerator_setup_fsdp_classmethod(
209+
mock_model, mock_train_loader, mock_transformers_accel
210+
):
211+
with patch("torch.distributed.get_world_size", return_value=2):
212+
accelerator = Accelerator.setup_fsdp(
213+
model=mock_model,
214+
samples_per_gpu=8,
215+
grad_accum=2,
216+
train_loader=mock_train_loader,
217+
fsdp_sharding_strategy="HYBRID_SHARD",
218+
fsdp_cpu_offload_params=True,
219+
save_samples=1000,
220+
)
221+
222+
assert isinstance(accelerator, Accelerator)
223+
assert accelerator.distributed_framework == DistributedBackend.FSDP
224+
assert accelerator.fsdp_sharding_strategy == "HYBRID_SHARD"
225+
assert accelerator.fsdp_cpu_offload_params is True
226+
227+
228+
def test_accelerator_with_lora(mock_model, mock_train_loader, mock_transformers_accel):
229+
# Set up a mock LoRA config
230+
mock_model.lora_config = MagicMock()
231+
mock_model.lora_config.target_modules = ["q_proj", "v_proj"]
232+
233+
# Mock the fsdp_auto_wrap_policy function
234+
mock_wrap_policy = MagicMock()
235+
with patch("peft.utils.other.fsdp_auto_wrap_policy", return_value=mock_wrap_policy):
236+
with patch("torch.distributed.get_world_size", return_value=2):
237+
accelerator = Accelerator(
238+
model=mock_model,
239+
samples_per_gpu=8,
240+
grad_accum=2,
241+
train_loader=mock_train_loader,
242+
save_samples=1000,
243+
distributed_framework=DistributedBackend.FSDP,
244+
fsdp_sharding_strategy="HYBRID_SHARD",
245+
)
246+
247+
# Verify that the accelerator was initialized with LoRA config
248+
assert accelerator.model.lora_config is not None
249+
assert accelerator.model.lora_config.target_modules == ["q_proj", "v_proj"]

0 commit comments

Comments
 (0)