Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions src/peft/peft_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,23 @@ def __init__(self, model: PreTrainedModel, peft_config: PeftConfig, adapter_name
if hasattr(self.base_model, "config") and hasattr(self.base_model.config, "pretraining_tp"):
self.base_model.config.pretraining_tp = 1

@property
def _ddp_params_and_buffers_to_ignore(self) -> set[str]:
# Add an attribute _ddp_params_and_buffers_to_ignore. This is sometimes required for DDP, as we may
# want to ignore certain parameters to avoid the error "Parameters which did not receive grad for rank X".
# See issue 899.
ddp_params_and_buffers_to_ignore = set()
for name, module in self.named_modules():
if module is self:
# avoid infinite recursion
continue

module_params_and_buffers_to_ignore = getattr(module, "_ddp_params_and_buffers_to_ignore", [])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
module_params_and_buffers_to_ignore = getattr(module, "_ddp_params_and_buffers_to_ignore", [])
if isinstance(module, BaseTunerLayer):
module_params_and_buffers_to_ignore = module._ddp_params_and_buffers_to_ignore

Then inside the if block perform the for loop

for param_name in module_params_and_buffers_to_ignore:
ddp_params_and_buffers_to_ignore.add(f"{name}.{param_name}")

return ddp_params_and_buffers_to_ignore

def save_pretrained(
self,
save_directory: str,
Expand Down
29 changes: 29 additions & 0 deletions src/peft/tuners/lora/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

import math
import warnings
Expand Down Expand Up @@ -112,6 +113,34 @@ def reset_lora_parameters(self, adapter_name):
nn.init.zeros_(self.lora_embedding_A[adapter_name])
nn.init.normal_(self.lora_embedding_B[adapter_name])

@property
def _ddp_params_and_buffers_to_ignore(self) -> set[str]:
# Parameters to ignore when applying DDP
# See issue 899.
ddp_params_and_buffers_to_ignore: set[str] = set()

inactive_adapters = (
set(self.lora_A) | set(self.lora_B) | set(self.lora_embedding_A) | set(self.lora_embedding_B)
)
if not self.disable_adapters:
# there is an active adapter, it should not be ignored
inactive_adapters -= {self.active_adapter}

def iter_params(adapter_name, lora_module):
# helper function to iterate through all params of given adapter and sub-module
module_dict = getattr(self, lora_module)
if adapter_name in module_dict:
for param_name, _ in module_dict[adapter_name].named_parameters():
yield f"{lora_module}.{adapter_name}.{param_name}"

for inactive_adapter in inactive_adapters:
ddp_params_and_buffers_to_ignore |= set(iter_params(inactive_adapter, "lora_A"))
ddp_params_and_buffers_to_ignore |= set(iter_params(inactive_adapter, "lora_B"))
ddp_params_and_buffers_to_ignore |= set(iter_params(inactive_adapter, "lora_embedding_A"))
ddp_params_and_buffers_to_ignore |= set(iter_params(inactive_adapter, "lora_embedding_B"))

return ddp_params_and_buffers_to_ignore


# Below code is based on https://github.com/microsoft/LoRA/blob/main/loralib/layers.py
# and modified to work with PyTorch FSDP
Expand Down
14 changes: 14 additions & 0 deletions src/peft/utils/other.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

import copy
import inspect
import os
Expand Down Expand Up @@ -157,6 +159,18 @@ def __init__(self, module_to_save, adapter_name):
self.active_adapter = adapter_name
self.disable_adapters = False

@property
def _ddp_params_and_buffers_to_ignore(self) -> set[str]:
# Parameters to ignore when applying DDP
# See issue 899.
if self.disable_adapters or (self.active_adapter not in self.modules_to_save):
names = set()
for adapter_name, adapter in self.modules_to_save.items():
names |= {f"modules_to_save.{adapter_name}.{name}" for name, _ in adapter.named_parameters()}
else:
names = {f"original_module.{name}" for name, _ in self.original_module.named_parameters()}
return names

def update(self, adapter_name):
self.modules_to_save.update(torch.nn.ModuleDict({adapter_name: copy.deepcopy(self.original_module)}))

Expand Down
136 changes: 136 additions & 0 deletions tests/test_custom_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,3 +363,139 @@ def run_with_disable(config_kwargs, bias):
@parameterized.expand(TEST_CASES)
def test_adding_multiple_adapters_with_bias_raises(self, test_name, model_id, config_cls, config_kwargs):
self._test_adding_multiple_adapters_with_bias_raises(model_id, config_cls, config_kwargs)


class DdpParamsToIgnoreTester(unittest.TestCase):
"""Tests that the _params_and_buffers_to_ignore property works as expected.

# See issue #899.

This is not specifically tied to custom models, it's just easy to test here and testing it on all types of models
would be overkill.

"""

def test_ddp_params_and_buffers_to_ignore_modules_to_save_none(self):
config = LoraConfig(target_modules=["lin0"])
peft_model = get_peft_model(MLP(), config)

params_and_buffers_to_ignore = peft_model._ddp_params_and_buffers_to_ignore
expected = set()
assert params_and_buffers_to_ignore == expected

def test_ddp_params_and_buffers_to_ignore_modules_to_save_default(self):
config = LoraConfig(target_modules=["lin0"], modules_to_save=["lin1"])
peft_model = get_peft_model(MLP(), config)

params_and_buffers_to_ignore = peft_model._ddp_params_and_buffers_to_ignore
expected = {"base_model.model.lin1.original_module.weight", "base_model.model.lin1.original_module.bias"}
self.assertEqual(params_and_buffers_to_ignore, expected)

# check that those parameters indeed exist on the model
existing_params = set(peft_model.state_dict().keys())
self.assertTrue(params_and_buffers_to_ignore.issubset(existing_params))

def test_ddp_params_and_buffers_to_ignore_modules_to_save_deactivated_adapter(self):
config = LoraConfig(target_modules=["lin0"], modules_to_save=["lin1"])
peft_model = get_peft_model(MLP(), config)

with peft_model.disable_adapter():
params_and_buffers_to_ignore = peft_model._ddp_params_and_buffers_to_ignore
expected = {
"base_model.model.lin1.modules_to_save.default.weight",
"base_model.model.lin1.modules_to_save.default.bias",
}
# we check for subset because we're not interested in checking the ignored lora layers in this test
self.assertTrue(expected.issubset(params_and_buffers_to_ignore))

# check that those parameters indeed exist on the model
existing_params = set(peft_model.state_dict().keys())
self.assertTrue(params_and_buffers_to_ignore.issubset(existing_params))

def test_ddp_params_and_buffers_to_ignore_modules_to_save_multiple_modules_to_save(self):
config = LoraConfig(target_modules=["emb"], modules_to_save=["conv1d", "lin0"])
peft_model = get_peft_model(ModelEmbConv1D(), config)

params_and_buffers_to_ignore = peft_model._ddp_params_and_buffers_to_ignore
expected = {
"base_model.model.conv1d.original_module.weight",
"base_model.model.conv1d.original_module.bias",
"base_model.model.lin0.original_module.weight",
"base_model.model.lin0.original_module.bias",
}
self.assertEqual(params_and_buffers_to_ignore, expected)

# check that those parameters indeed exist on the model
existing_params = set(peft_model.state_dict().keys())
self.assertTrue(params_and_buffers_to_ignore.issubset(existing_params))

def test_ddp_params_and_buffers_to_ignore_multiple_lora_adapters_different_targets(self):
# When having multiple LoRA layers on different targets, the inactive one should be ignored.
config0 = LoraConfig(target_modules=["lin0"])
peft_model = get_peft_model(MLP(), config0)

config1 = LoraConfig(target_modules=["lin1"])
peft_model.add_adapter("adapter1", config1)

# set config0 as active
peft_model.set_adapter("default")

params_and_buffers_to_ignore = peft_model._ddp_params_and_buffers_to_ignore
expected = {
"base_model.model.lin1.lora_A.adapter1.weight",
"base_model.model.lin1.lora_B.adapter1.weight",
}
self.assertEqual(params_and_buffers_to_ignore, expected)

# check that those parameters indeed exist on the model
existing_params = set(peft_model.state_dict().keys())
self.assertTrue(params_and_buffers_to_ignore.issubset(existing_params))

# change activate adapter to adapter1
peft_model.set_adapter("adapter1")
params_and_buffers_to_ignore = peft_model._ddp_params_and_buffers_to_ignore
expected = {
"base_model.model.lin0.lora_A.default.weight",
"base_model.model.lin0.lora_B.default.weight",
}
self.assertEqual(params_and_buffers_to_ignore, expected)

# check that those parameters indeed exist on the model
existing_params = set(peft_model.state_dict().keys())
self.assertTrue(params_and_buffers_to_ignore.issubset(existing_params))

def test_ddp_params_and_buffers_to_ignore_multiple_lora_adapters_same_targets(self):
# When having multiple LoRA layers on the same target, the inactive one should be ignored. This is similar to
# the previous test, only that adapter1 is also applied to lin0.
config0 = LoraConfig(target_modules=["lin0"])
peft_model = get_peft_model(MLP(), config0)

config1 = LoraConfig(target_modules=["lin0"])
peft_model.add_adapter("adapter1", config1)

# set config0 as active
peft_model.set_adapter("default")

params_and_buffers_to_ignore = peft_model._ddp_params_and_buffers_to_ignore
expected = {
"base_model.model.lin0.lora_A.adapter1.weight",
"base_model.model.lin0.lora_B.adapter1.weight",
}
self.assertEqual(params_and_buffers_to_ignore, expected)

# check that those parameters indeed exist on the model
existing_params = set(peft_model.state_dict().keys())
self.assertTrue(params_and_buffers_to_ignore.issubset(existing_params))

# change activate adapter to adapter1
peft_model.set_adapter("adapter1")
params_and_buffers_to_ignore = peft_model._ddp_params_and_buffers_to_ignore
expected = {
"base_model.model.lin0.lora_A.default.weight",
"base_model.model.lin0.lora_B.default.weight",
}
self.assertEqual(params_and_buffers_to_ignore, expected)

# check that those parameters indeed exist on the model
existing_params = set(peft_model.state_dict().keys())
self.assertTrue(params_and_buffers_to_ignore.issubset(existing_params))
2 changes: 1 addition & 1 deletion tests/testing_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -723,7 +723,7 @@ def _test_weighted_combination_of_adapters(self, model_id, config_cls, config_kw
# AdaLora does not support adding more than 1 adapter
return

adapter_list = ["adapter1", "adapter_2", "adapter_3"]
adapter_list = ["adapter_1", "adapter_2", "adapter_3"]
weight_list = [0.5, 1.5, 1.5]
model = self.transformers_class.from_pretrained(model_id)
config = config_cls(
Expand Down