Skip to content

Commit 1f90301

Browse files
authored
Merge branch 'PaddlePaddle:develop' into tensor
2 parents 4c9f968 + ecbee80 commit 1f90301

File tree

7 files changed

+212
-39
lines changed

7 files changed

+212
-39
lines changed

paddle/fluid/distributed/collective/deep_ep/include/event_pool.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ namespace deep_ep::detail {
2222

2323
class EventPool {
2424
public:
25-
EventPool() = default;
25+
EventPool();
2626
EventPool(const EventPool&) = delete;
2727
EventPool(EventPool&&) = delete;
2828
~EventPool();

paddle/fluid/distributed/collective/deep_ep/src/event_pool.cc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,16 @@ EventPool &EventPool::Instance() {
2222
return pool;
2323
}
2424

25+
EventPool::EventPool() {
26+
for (size_t i = 0; i < 1000; ++i) {
27+
cudaEvent_t new_event;
28+
CUDA_CHECK(cudaEventCreate(&new_event));
29+
30+
cudaEventRecord(new_event, 0);
31+
incomplished_events_.push(new_event);
32+
}
33+
}
34+
2535
EventPool::~EventPool() {
2636
const auto &DestroyEvent = [](cudaEvent_t event) {
2737
cudaError_t e = cudaEventDestroy(event);

python/paddle/distributed/fleet/meta_parallel/dualpipev.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
PipelineParallel,
3838
)
3939
from .pp_utils.batch_comm_helper import BatchCommHelper
40-
from .zero_bubble_utils import WeightGradStore
40+
from .zero_bubble_utils import EventStore, WeightGradStore
4141

4242
__all__ = []
4343

@@ -358,6 +358,10 @@ def _commit_and_wait_comm(
358358
else 0
359359
)
360360
if common_forward_ops_num == 0 and common_backward_ops_num == 0:
361+
if EventStore.event is not None:
362+
e_t = EventStore.event
363+
EventStore.event = None
364+
return e_t
361365
return deep_ep.get_event_from_custom_stream(
362366
paddle.device.current_stream().stream_base
363367
)
@@ -387,13 +391,28 @@ def _commit_and_wait_comm(
387391
pp_raw_stream
388392
)
389393

394+
backward_outer_event_wait = False
395+
if EventStore.event is not None:
396+
with paddle.device.stream_guard(
397+
paddle.device.Stream(stream_base=pp_raw_stream)
398+
):
399+
EventStore.event.current_stream_wait()
400+
401+
EventStore.set(None)
402+
self.pp_group.process_group.set_outer_wait(True)
403+
404+
backward_outer_event_wait = True
405+
390406
if common_backward_ops_num > 0:
391407
bwd_reqs = batch_isend_irecv(self.comm_backward_ops)
392408

393409
if not use_stream_wait_event:
394410
for req in bwd_reqs:
395411
req.wait()
396412

413+
if backward_outer_event_wait:
414+
self.pp_group.process_group.set_outer_wait(False)
415+
397416
if use_stream_wait_event:
398417
forward_event_to_wait.current_stream_wait()
399418

python/paddle/distributed/fleet/meta_parallel/zero_bubble_utils.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,15 @@ def clear(cls) -> None:
5454
cls.funcs_queue = queue.Queue()
5555

5656

57+
class EventStore:
58+
59+
event = None
60+
61+
@classmethod
62+
def set(cls, event) -> None:
63+
cls.event = event
64+
65+
5766
def fold_init_dims(tensor):
5867
# NOTE(zhangyuqin1998): Reshape a rank-3 tensor from P x M x N to (P * M) x N,
5968
# to keep weight_grad in a correct rank. See phi::FoldInitDims.

python/paddle/nn/functional/activation.py

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
from __future__ import annotations
1616

17-
from typing import TYPE_CHECKING
17+
from typing import TYPE_CHECKING, Literal
1818

1919
import paddle
2020
from paddle import _C_ops, in_dynamic_mode
@@ -150,14 +150,18 @@ def elu_(x: Tensor, alpha: float = 1.0, name: str | None = None) -> Tensor:
150150

151151

152152
def gelu(
153-
x: Tensor, approximate: bool = False, name: str | None = None
153+
x: Tensor,
154+
approximate: Literal["tanh", "none"] | bool = False,
155+
name: str | None = None,
154156
) -> Tensor:
155157
r"""
156158
gelu activation.
157159
158160
The activation function of Gelu is calculated element by element. More information refers to :ref: `Gaussian Error Linear Units`.
159161
160-
if approximate is True
162+
approximate parameter must be True, False, "tanh", "none".
163+
164+
if approximate is True or "tanh"
161165
162166
.. math::
163167
@@ -171,7 +175,7 @@ def gelu(
171175
172176
Parameters:
173177
x (Tensor): The input Tensor with data type float32, float64.
174-
approximate (bool, optional): Whether to enable approximation. Default is False.
178+
approximate (str|bool, optional): Whether to enable approximation. Default is False.
175179
name (str|None, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
176180
177181
Returns:
@@ -194,8 +198,23 @@ def gelu(
194198
Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
195199
[[-0.15880796, 0.34571400],
196200
[ 0.84119201, 1.39957154]])
201+
>>> out3 = F.gelu(x, "none")
202+
>>> print(out3)
203+
Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
204+
[[-0.15865529, 0.34573123],
205+
[ 0.84134471, 1.39978933]])
206+
>>> out4 = F.gelu(x, "tanh")
207+
>>> print(out4)
208+
Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
209+
[[-0.15880796, 0.34571400],
210+
[ 0.84119201, 1.39957154]])
197211
"""
198212

213+
if approximate == "tanh":
214+
approximate = True
215+
elif approximate == "none":
216+
approximate = False
217+
199218
if in_dynamic_or_pir_mode():
200219
return _C_ops.gelu(x, approximate)
201220
else:

python/paddle/nn/layer/activation.py

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
from __future__ import annotations
1717

18-
from typing import TYPE_CHECKING
18+
from typing import TYPE_CHECKING, Literal
1919

2020
from paddle.framework import get_default_dtype
2121

@@ -176,7 +176,9 @@ class GELU(Layer):
176176
r"""
177177
GELU Activation.
178178
179-
If approximate is True
179+
approximate parameter must be True, False, "tanh", "none".
180+
181+
If approximate is True or "tanh"
180182
181183
.. math::
182184
@@ -189,7 +191,7 @@ class GELU(Layer):
189191
GELU(x) = 0.5 * x * (1 + erf(\frac{x}{\sqrt{2}}))
190192
191193
Parameters:
192-
approximate (bool, optional): Whether to enable approximation. Default is False.
194+
approximate (str|bool, optional): Whether to enable approximation. Default is False.
193195
name (str|None, optional): Name for the operation (optional, default is None).
194196
For more information, please refer to :ref:`api_guide_Name`.
195197
@@ -208,6 +210,24 @@ class GELU(Layer):
208210
Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
209211
[[-0.15865529, 0.34573123],
210212
[ 0.84134471, 1.39978933]])
213+
>>> m = paddle.nn.GELU(False)
214+
>>> out = m(x)
215+
>>> print(out)
216+
Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
217+
[[-0.15865529, 0.34573123],
218+
[ 0.84134471, 1.39978933]])
219+
>>> m = paddle.nn.GELU("none")
220+
>>> out = m(x)
221+
>>> print(out)
222+
Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
223+
[[-0.15865529, 0.34573123],
224+
[ 0.84134471, 1.39978933]])
225+
>>> m = paddle.nn.GELU("tanh")
226+
>>> out = m(x)
227+
>>> print(out)
228+
Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
229+
[[-0.15880796, 0.34571400],
230+
[ 0.84119201, 1.39957154]])
211231
>>> m = paddle.nn.GELU(True)
212232
>>> out = m(x)
213233
>>> print(out)
@@ -217,7 +237,9 @@ class GELU(Layer):
217237
"""
218238

219239
def __init__(
220-
self, approximate: bool = False, name: str | None = None
240+
self,
241+
approximate: Literal["tanh", "none"] | bool = False,
242+
name: str | None = None,
221243
) -> None:
222244
super().__init__()
223245
self._approximate = approximate

0 commit comments

Comments
 (0)