|
13 | 13 | #include <thread> |
14 | 14 |
|
15 | 15 | #include "api.h" |
| 16 | +#include "connection_kernels.hpp" |
16 | 17 | #include "context.hpp" |
17 | 18 | #include "debug.h" |
18 | 19 | #include "endpoint.hpp" |
@@ -141,6 +142,17 @@ void CudaIpcConnection::updateAndSync(RegisteredMemory dst, uint64_t dstOffset, |
141 | 142 | #endif |
142 | 143 | } |
143 | 144 |
|
| 145 | +void CudaIpcConnection::atomicAdd(RegisteredMemory dst, uint64_t dstOffset, uint64_t value) { |
| 146 | + validateTransport(dst, remoteTransport()); |
| 147 | + |
| 148 | + uint64_t* dstPtr = reinterpret_cast<uint64_t*>(reinterpret_cast<char*>(dst.data()) + dstOffset); |
| 149 | + void* args[] = {reinterpret_cast<void**>(&dstPtr), &value}; |
| 150 | + |
| 151 | + stream_->launch(connectionAtomicAddKernelFunc(), dim3(1), dim3(1), args, 0); |
| 152 | + |
| 153 | + INFO(MSCCLPP_P2P, "CudaIpcConnection atomicAdd: value %lu to %p", value, dstPtr); |
| 154 | +} |
| 155 | + |
144 | 156 | void CudaIpcConnection::flush(int64_t timeoutUsec) { |
145 | 157 | #if defined(ENABLE_NPKIT) && defined(ENABLE_NPKIT_EVENT_CONN_CUDA_IPC_FLUSH_ENTRY) |
146 | 158 | NpKit::CollectCpuEvent(NPKIT_EVENT_CONN_CUDA_IPC_FLUSH_ENTRY, 0, 0, *NpKit::GetCpuTimestamp(), 0); |
@@ -244,6 +256,19 @@ void IBConnection::updateAndSync(RegisteredMemory dst, uint64_t dstOffset, uint6 |
244 | 256 | #endif |
245 | 257 | } |
246 | 258 |
|
| 259 | +void IBConnection::atomicAdd(RegisteredMemory dst, uint64_t dstOffset, uint64_t value) { |
| 260 | + validateTransport(dst, remoteTransport()); |
| 261 | + auto dstTransportInfo = getImpl(dst).getTransportInfo(remoteTransport()); |
| 262 | + if (dstTransportInfo.ibLocal) { |
| 263 | + throw Error("dst is local, which is not supported", ErrorCode::InvalidUsage); |
| 264 | + } |
| 265 | + |
| 266 | + auto dstMrInfo = dstTransportInfo.ibMrInfo; |
| 267 | + qp_.lock()->stageAtomicAdd(dstTransportInfo_.ibMr, dstMrInfo, /*wrId=*/0, dstOffset, value, /*signaled=*/true); |
| 268 | + qp_.lock()->postSend(); |
| 269 | + INFO(MSCCLPP_NET, "IBConnection atomicAdd: value %lu to %p", value, (uint8_t*)dstMrInfo.addr + dstOffset); |
| 270 | +} |
| 271 | + |
247 | 272 | void IBConnection::flush(int64_t timeoutUsec) { |
248 | 273 | #if defined(ENABLE_NPKIT) && defined(ENABLE_NPKIT_EVENT_CONN_IB_FLUSH_ENTRY) |
249 | 274 | NpKit::CollectCpuEvent(NPKIT_EVENT_CONN_IB_FLUSH_ENTRY, 0, 0, *NpKit::GetCpuTimestamp(), 0); |
@@ -409,6 +434,11 @@ void EthernetConnection::updateAndSync(RegisteredMemory dst, uint64_t dstOffset, |
409 | 434 | #endif |
410 | 435 | } |
411 | 436 |
|
| 437 | +void EthernetConnection::atomicAdd([[maybe_unused]] RegisteredMemory dst, [[maybe_unused]] uint64_t dstOffset, |
| 438 | + [[maybe_unused]] uint64_t value) { |
| 439 | + throw mscclpp::Error("EthernetConnection does not support atomicAdd", ErrorCode::InvalidUsage); |
| 440 | +} |
| 441 | + |
412 | 442 | void EthernetConnection::flush(int64_t) { |
413 | 443 | #if defined(ENABLE_NPKIT) && defined(ENABLE_NPKIT_EVENT_CONN_ETH_FLUSH_ENTRY) |
414 | 444 | NpKit::CollectCpuEvent(NPKIT_EVENT_CONN_ETH_FLUSH_ENTRY, 0, 0, *NpKit::GetCpuTimestamp(), 0); |
|
0 commit comments