Skip to content

Commit cdeae39

Browse files
authored
Merge pull request #2009 from psychocoderHPC/topic-fieldTmpGatherComm
`FieldTmp` gather support
2 parents a8805ac + 1c2bbab commit cdeae39

8 files changed

Lines changed: 117 additions & 18 deletions

File tree

examples/Bunch/include/simulation_defines/param/memory.param

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,4 +56,14 @@ constexpr uint32_t BYTES_EDGES = 32 * 1024; //32 kiB;
5656
/** number of scalar fields that are reserved as temporary fields */
5757
constexpr uint32_t fieldTmpNumSlots = 1;
5858

59+
/** can `FieldTmp` gather neighbor information
60+
*
61+
* If `true` it is possible to call the method `asyncCommunicationGather()`
62+
* to copy data from the border of neighboring GPU into the local guard.
63+
* This is also known as building up a "ghost" or "halo" region in domain
64+
* decomposition and only necessary for specific algorithms that extend
65+
* the basic PIC cycle, e.g. with dependence on derived density or energy fields.
66+
*/
67+
constexpr bool fieldTmpSupportGatherCommunication = true;
68+
5969
}//namespace picongpu

examples/KelvinHelmholtz/include/simulation_defines/param/memory.param

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,4 +56,14 @@ constexpr uint32_t BYTES_EDGES = 64 * 1024; //64 kiB;
5656
/** number of scalar fields that are reserved as temporary fields */
5757
constexpr uint32_t fieldTmpNumSlots = 1;
5858

59+
/** can `FieldTmp` gather neighbor information
60+
*
61+
* If `true` it is possible to call the method `asyncCommunicationGather()`
62+
* to copy data from the border of neighboring GPU into the local guard.
63+
* This is also known as building up a "ghost" or "halo" region in domain
64+
* decomposition and only necessary for specific algorithms that extend
65+
* the basic PIC cycle, e.g. with dependence on derived density or energy fields.
66+
*/
67+
constexpr bool fieldTmpSupportGatherCommunication = true;
68+
5969
}//namespace picongpu

examples/LaserWakefield/include/simulation_defines/param/memory.param

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,4 +56,14 @@ constexpr uint32_t BYTES_EDGES = 32 * 1024; //32 kiB;
5656
/** number of scalar fields that are reserved as temporary fields */
5757
constexpr uint32_t fieldTmpNumSlots = 1;
5858

59+
/** can `FieldTmp` gather neighbor information
60+
*
61+
* If `true` it is possible to call the method `asyncCommunicationGather()`
62+
* to copy data from the border of neighboring GPU into the local guard.
63+
* This is also known as building up a "ghost" or "halo" region in domain
64+
* decomposition and only necessary for specific algorithms that extend
65+
* the basic PIC cycle, e.g. with dependence on derived density or energy fields.
66+
*/
67+
constexpr bool fieldTmpSupportGatherCommunication = true;
68+
5969
}//namespace picongpu

examples/ThermalTest/include/simulation_defines/param/memory.param

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,4 +56,14 @@ constexpr uint32_t BYTES_EDGES = 3200 * 1024; //32 kiB;
5656
/** number of scalar fields that are reserved as temporary fields */
5757
constexpr uint32_t fieldTmpNumSlots = 1;
5858

59+
/** can `FieldTmp` gather neighbor information
60+
*
61+
* If `true` it is possible to call the method `asyncCommunicationGather()`
62+
* to copy data from the border of neighboring GPU into the local guard.
63+
* This is also known as building up a "ghost" or "halo" region in domain
64+
* decomposition and only necessary for specific algorithms that extend
65+
* the basic PIC cycle, e.g. with dependence on derived density or energy fields.
66+
*/
67+
constexpr bool fieldTmpSupportGatherCommunication = true;
68+
5969
}//namespace picongpu

examples/WeibelTransverse/include/simulation_defines/param/memory.param

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,4 +55,14 @@ constexpr uint32_t BYTES_EDGES = 8 * 256 * 1024; //8 MiB
5555
/** number of scalar fields that are reserved as temporary fields */
5656
constexpr uint32_t fieldTmpNumSlots = 1;
5757

58+
/** can `FieldTmp` gather neighbor information
59+
*
60+
* If `true` it is possible to call the method `asyncCommunicationGather()`
61+
* to copy data from the border of neighboring GPU into the local guard.
62+
* This is also known as building up a "ghost" or "halo" region in domain
63+
* decomposition and only necessary for specific algorithms that extend
64+
* the basic PIC cycle, e.g. with dependence on derived density or energy fields.
65+
*/
66+
constexpr bool fieldTmpSupportGatherCommunication = true;
67+
5868
}//namespace picongpu

src/picongpu/include/fields/FieldTmp.hpp

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,7 @@
2626
#include <vector>
2727

2828
/*pic default*/
29-
#include "pmacc_types.hpp"
3029
#include "simulation_defines.hpp"
31-
#include "simulation_classTypes.hpp"
3230

3331
#include "fields/Fields.def"
3432
#include "fields/SimulationFieldHelper.hpp"
@@ -88,10 +86,22 @@ namespace picongpu
8886

8987
static std::string getName();
9088

91-
uint32_t getCommTag();
92-
89+
/** scatter data to neighboring GPUs
90+
*
91+
* Add data from the local guard of the GPU to the border of the neighboring GPUs.
92+
* This method can be called before or after asyncCommunicationGather without
93+
* explicit handling to avoid race conditions between both methods.
94+
*/
9395
virtual EventTask asyncCommunication( EventTask serialEvent );
9496

97+
/** gather data from neighboring GPUs
98+
*
99+
* Copy data from the border of neighboring GPUs into the local guard.
100+
* This method can be called before or after asyncCommunication without
101+
* explicit handling to avoid race conditions between both methods.
102+
*/
103+
EventTask asyncCommunicationGather( EventTask serialEvent );
104+
95105
void init( );
96106

97107
DataBoxType getDeviceDataBox( );
@@ -125,10 +135,14 @@ namespace picongpu
125135
private:
126136

127137
GridBuffer<ValueType, simDim> *fieldTmp;
138+
GridBuffer<ValueType, simDim>* fieldTmpRecv;
128139

129140
uint32_t m_slotId;
130141

131-
uint32_t m_commTag;
142+
EventTask m_scatterEv;
143+
uint32_t m_commTagScatter;
144+
EventTask m_gatherEv;
145+
uint32_t m_commTagGather;
132146
};
133147

134148

src/picongpu/include/fields/FieldTmp.tpp

Lines changed: 38 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
#pragma once
2323

24-
#include "pmacc_types.hpp"
24+
#include "simulation_defines.hpp"
2525
#include "memory/buffers/GridBuffer.hpp"
2626
#include "mappings/simulation/GridController.hpp"
2727

@@ -61,13 +61,19 @@ namespace picongpu
6161
) :
6262
SimulationFieldHelper<MappingDesc>( cellDescription ),
6363
fieldTmp( nullptr ),
64+
fieldTmpRecv( nullptr ),
6465
m_slotId( slotId )
6566
{
66-
m_commTag =
67+
m_commTagScatter =
6768
++PMacc::traits::detail::GetUniqueTypeId< uint8_t >::counter +
6869
SPECIES_FIRSTTAG;
70+
m_commTagGather = ++PMacc::traits::detail::GetUniqueTypeId< uint8_t >::counter +
71+
SPECIES_FIRSTTAG;
72+
73+
fieldTmp = new GridBuffer <ValueType, simDim >( cellDescription.getGridLayout( ) );
6974

70-
fieldTmp = new GridBuffer<ValueType, simDim > ( cellDescription.getGridLayout( ) );
75+
if( fieldTmpSupportGatherCommunication )
76+
fieldTmpRecv = new GridBuffer< ValueType, simDim >( fieldTmp->getDeviceBuffer(), cellDescription.getGridLayout( ) );
7177

7278
/** \todo The exchange has to be resetted and set again regarding the
7379
* temporary "Fill-"Functor we want to use.
@@ -165,9 +171,21 @@ namespace picongpu
165171
};
166172

167173
}
168-
// std::cout << "ex " << i << " x=" << guardingCells[0] << " y=" << guardingCells[1] << " z=" << guardingCells[2] << std::endl;
169-
fieldTmp->addExchangeBuffer( i, guardingCells, m_commTag );
174+
175+
fieldTmp->addExchangeBuffer( i, guardingCells, m_commTagScatter );
176+
177+
if( fieldTmpRecv )
178+
{
179+
/* guarding cells depend on direction
180+
* for negative direction use originGuard else endGuard (relative direction ZERO is ignored)
181+
* don't switch end and origin because this is a read buffer and not send buffer
182+
*/
183+
for ( uint32_t d = 0; d < simDim; ++d )
184+
guardingCells[d] = ( relativMask[d] == -1 ? originGuard[d] : endGuard[d] );
185+
fieldTmpRecv->addExchange( GUARD, i, guardingCells, m_commTagGather );
186+
}
170187
}
188+
171189
}
172190

173191
FieldTmp::~FieldTmp( )
@@ -224,16 +242,29 @@ namespace picongpu
224242
EventTask FieldTmp::asyncCommunication( EventTask serialEvent )
225243
{
226244
EventTask ret;
227-
__startTransaction( serialEvent );
245+
__startTransaction( serialEvent + m_gatherEv + m_scatterEv );
228246
FieldFactory::getInstance( ).createTaskFieldReceiveAndInsert( *this );
229247
ret = __endTransaction( );
230248

231-
__startTransaction( serialEvent );
249+
__startTransaction( serialEvent + m_gatherEv + m_scatterEv);
232250
FieldFactory::getInstance( ).createTaskFieldSend( *this );
233251
ret += __endTransaction( );
252+
m_scatterEv = ret;
234253
return ret;
235254
}
236255

256+
EventTask FieldTmp::asyncCommunicationGather( EventTask serialEvent )
257+
{
258+
PMACC_VERIFY_MSG(
259+
fieldTmpSupportGatherCommunication == true,
260+
"fieldTmpSupportGatherCommunication in memory.param must be set to true"
261+
);
262+
263+
if( fieldTmpRecv != nullptr )
264+
m_gatherEv = fieldTmpRecv->asyncCommunication( serialEvent + m_scatterEv + m_gatherEv );
265+
return m_gatherEv;
266+
}
267+
237268
void FieldTmp::bashField( uint32_t exchangeType )
238269
{
239270
ExchangeMapping<GUARD, MappingDesc> mapper( this->cellDescription, exchangeType );
@@ -315,10 +346,4 @@ namespace picongpu
315346
return "FieldTmp";
316347
}
317348

318-
uint32_t
319-
FieldTmp::getCommTag( )
320-
{
321-
return m_commTag;
322-
}
323-
324349
} // namespace picongpu

src/picongpu/include/simulation_defines/param/memory.param

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,4 +56,14 @@ namespace picongpu
5656
/** number of scalar fields that are reserved as temporary fields */
5757
constexpr uint32_t fieldTmpNumSlots = 1;
5858

59+
/** can `FieldTmp` gather neighbor information
60+
*
61+
* If `true` it is possible to call the method `asyncCommunicationGather()`
62+
* to copy data from the border of neighboring GPU into the local guard.
63+
* This is also known as building up a "ghost" or "halo" region in domain
64+
* decomposition and only necessary for specific algorithms that extend
65+
* the basic PIC cycle, e.g. with dependence on derived density or energy fields.
66+
*/
67+
constexpr bool fieldTmpSupportGatherCommunication = true;
68+
5969
} //namespace picongpu

0 commit comments

Comments
 (0)