Skip to content

Commit fca298e

Browse files
committed
use lock_guard, test=develop
1 parent e7efd2c commit fca298e

4 files changed

Lines changed: 256 additions & 231 deletions

File tree

paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -34,29 +34,18 @@ namespace paddle {
3434
namespace memory {
3535
namespace allocation {
3636

37-
struct SpinLockGuard {
38-
explicit SpinLockGuard(MLOCK_T *mutex) {
39-
mutex_ = mutex;
40-
ACQUIRE_LOCK(mutex_);
41-
}
42-
~SpinLockGuard() { RELEASE_LOCK(mutex_); }
43-
MLOCK_T *mutex_;
44-
};
45-
4637
AutoGrowthBestFitAllocator::AutoGrowthBestFitAllocator(
4738
const std::shared_ptr<Allocator> &underlying_allocator, size_t alignment,
4839
size_t chunk_size)
4940
: underlying_allocator_(
5041
std::make_shared<AlignedAllocator>(underlying_allocator, alignment)),
5142
alignment_(alignment),
52-
chunk_size_(std::max(AlignedSize(chunk_size, alignment), alignment)) {
53-
INITIAL_LOCK(&mtx_);
54-
}
43+
chunk_size_(std::max(AlignedSize(chunk_size, alignment), alignment)) {}
5544

5645
Allocation *AutoGrowthBestFitAllocator::AllocateImpl(size_t size) {
5746
size = AlignedSize(size, alignment_);
5847

59-
SpinLockGuard guard(&mtx_);
48+
std::lock_guard<SpinLock> guard(spinlock_);
6049
auto iter = free_blocks_.lower_bound(std::make_pair(size, nullptr));
6150
BlockIt block_it;
6251
if (iter != free_blocks_.end()) {
@@ -110,7 +99,7 @@ Allocation *AutoGrowthBestFitAllocator::AllocateImpl(size_t size) {
11099
}
111100

112101
void AutoGrowthBestFitAllocator::FreeImpl(Allocation *allocation) {
113-
SpinLockGuard guard(&mtx_);
102+
std::lock_guard<SpinLock> guard(spinlock_);
114103
auto block_it = static_cast<BlockAllocation *>(allocation)->block_it_;
115104
auto &blocks = block_it->chunk_->blocks_;
116105

paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ class AutoGrowthBestFitAllocator : public Allocator {
8787
size_t alignment_;
8888
size_t chunk_size_;
8989

90-
MLOCK_T mtx_;
90+
SpinLock spinlock_;
9191
};
9292

9393
} // namespace allocation

paddle/fluid/memory/allocation/spin_lock.h

Lines changed: 13 additions & 216 deletions
Original file line numberDiff line numberDiff line change
@@ -12,228 +12,25 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
/*
16-
the spinlock implementation is borrowed from Doug Lea's malloc, released to the
17-
public domain, as explained at
18-
http://creativecommons.org/licenses/publicdomain. Send questions,
19-
comments, complaints, performance data, etc to [email protected]
20-
*/
21-
2215
#pragma once
2316

24-
#ifndef WIN32
25-
#include <pthread.h>
26-
#if defined(__SVR4) && defined(__sun) /* solaris */
27-
#include <thread.h>
28-
#endif /* solaris */
29-
#else
30-
#ifndef _M_AMD64
31-
/* These are already defined on AMD64 builds */
32-
#ifdef __cplusplus
33-
extern "C" {
34-
#endif /* __cplusplus */
35-
LONG __cdecl _InterlockedCompareExchange(LONG volatile* Dest, LONG Exchange,
36-
LONG Comp);
37-
LONG __cdecl _InterlockedExchange(LONG volatile* Target, LONG Value);
38-
#ifdef __cplusplus
39-
}
40-
#endif /* __cplusplus */
41-
#endif /* _M_AMD64 */
42-
#pragma intrinsic(_InterlockedCompareExchange)
43-
#pragma intrinsic(_InterlockedExchange)
44-
#define interlockedcompareexchange _InterlockedCompareExchange
45-
#define interlockedexchange _InterlockedExchange
46-
#endif /* Win32 */
47-
48-
#ifndef FORCEINLINE
49-
#if defined(__GNUC__)
50-
#define FORCEINLINE __inline __attribute__((always_inline))
51-
#elif defined(_MSC_VER)
52-
#define FORCEINLINE __forceinline
53-
#endif
54-
#endif
55-
#ifndef NOINLINE
56-
#if defined(__GNUC__)
57-
#define NOINLINE __attribute__((noinline))
58-
#elif defined(_MSC_VER)
59-
#define NOINLINE __declspec(noinline)
60-
#else
61-
#define NOINLINE
62-
#endif
63-
#endif
64-
65-
#ifdef __cplusplus
66-
extern "C" {
67-
#ifndef FORCEINLINE
68-
#define FORCEINLINE inline
69-
#endif
70-
#endif /* __cplusplus */
71-
#ifndef FORCEINLINE
72-
#define FORCEINLINE
73-
#endif
74-
#ifdef __cplusplus
75-
}; /* end of extern "C" */
76-
#endif /* __cplusplus */
77-
78-
#ifndef WIN32
79-
80-
/* Custom pthread-style spin locks on x86 and x64 for gcc */
81-
struct pthread_mlock_t {
82-
volatile unsigned int l;
83-
unsigned int c;
84-
pthread_t threadid;
85-
};
86-
#define MLOCK_T struct pthread_mlock_t
87-
#define CURRENT_THREAD pthread_self()
88-
#define INITIAL_LOCK(sl) ((sl)->threadid = 0, (sl)->l = (sl)->c = 0, 0)
89-
#define ACQUIRE_LOCK(sl) pthread_acquire_lock(sl)
90-
#define RELEASE_LOCK(sl) pthread_release_lock(sl)
91-
#define TRY_LOCK(sl) pthread_try_lock(sl)
92-
#define SPINS_PER_YIELD 63
17+
#include "paddle/fluid/memory/allocation/spin_lock_c.h"
9318

94-
// static MLOCK_T malloc_global_mutex = { 0, 0, 0};
19+
namespace paddle {
20+
namespace memory {
9521

96-
static FORCEINLINE int pthread_acquire_lock(MLOCK_T* sl) {
97-
int spins = 0;
98-
volatile unsigned int* lp = &sl->l;
99-
for (;;) {
100-
if (*lp != 0) {
101-
if (sl->threadid == CURRENT_THREAD) {
102-
++sl->c;
103-
return 0;
104-
}
105-
} else {
106-
/* place args to cmpxchgl in locals to evade oddities in some gccs */
107-
int cmp = 0;
108-
int val = 1;
109-
int ret;
110-
__asm__ __volatile__("lock; cmpxchgl %1, %2"
111-
: "=a"(ret)
112-
: "r"(val), "m"(*(lp)), "0"(cmp)
113-
: "memory", "cc");
114-
if (!ret) {
115-
assert(!sl->threadid);
116-
sl->threadid = CURRENT_THREAD;
117-
sl->c = 1;
118-
return 0;
119-
}
120-
}
121-
if ((++spins & SPINS_PER_YIELD) == 0) {
122-
#if defined(__SVR4) && defined(__sun) /* solaris */
123-
thr_yield();
124-
#else
125-
#if defined(__linux__) || defined(__FreeBSD__) || defined(__APPLE__)
126-
sched_yield();
127-
#else /* no-op yield on unknown systems */
128-
; // NOLINT
129-
#endif /* __linux__ || __FreeBSD__ || __APPLE__ */
130-
#endif /* solaris */
131-
}
132-
}
133-
}
22+
class SpinLock {
23+
public:
24+
SpinLock() { INITIAL_LOCK(&mlock_); }
13425

135-
static FORCEINLINE void pthread_release_lock(MLOCK_T* sl) {
136-
volatile unsigned int* lp = &sl->l;
137-
assert(*lp != 0);
138-
assert(sl->threadid == CURRENT_THREAD);
139-
if (--sl->c == 0) {
140-
sl->threadid = 0;
141-
int prev = 0;
142-
int ret;
143-
__asm__ __volatile__("lock; xchgl %0, %1"
144-
: "=r"(ret)
145-
: "m"(*(lp)), "0"(prev)
146-
: "memory");
147-
}
148-
}
26+
void lock() { ACQUIRE_LOCK(&mlock_); }
14927

150-
static FORCEINLINE int pthread_try_lock(MLOCK_T* sl) {
151-
volatile unsigned int* lp = &sl->l;
152-
if (*lp != 0) {
153-
if (sl->threadid == CURRENT_THREAD) {
154-
++sl->c;
155-
return 1;
156-
}
157-
} else {
158-
int cmp = 0;
159-
int val = 1;
160-
int ret;
161-
__asm__ __volatile__("lock; cmpxchgl %1, %2"
162-
: "=a"(ret)
163-
: "r"(val), "m"(*(lp)), "0"(cmp)
164-
: "memory", "cc");
165-
if (!ret) {
166-
assert(!sl->threadid);
167-
sl->threadid = CURRENT_THREAD;
168-
sl->c = 1;
169-
return 1;
170-
}
171-
}
172-
return 0;
173-
}
28+
void unlock() { RELEASE_LOCK(&mlock_); }
29+
DISABLE_COPY_AND_ASSIGN(SpinLock);
17430

175-
#else /* WIN32 */
176-
/* Custom win32-style spin locks on x86 and x64 for MSC */
177-
struct win32_mlock_t {
178-
volatile long l; // NOLINT
179-
unsigned int c;
180-
long threadid; // NOLINT
31+
private:
32+
MLOCK_T mlock_;
18133
};
18234

183-
#define MLOCK_T struct win32_mlock_t
184-
#define CURRENT_THREAD GetCurrentThreadId()
185-
#define INITIAL_LOCK(sl) ((sl)->threadid = 0, (sl)->l = (sl)->c = 0, 0)
186-
#define ACQUIRE_LOCK(sl) win32_acquire_lock(sl)
187-
#define RELEASE_LOCK(sl) win32_release_lock(sl)
188-
#define TRY_LOCK(sl) win32_try_lock(sl)
189-
#define SPINS_PER_YIELD 63
190-
191-
// static MLOCK_T malloc_global_mutex = { 0, 0, 0};
192-
193-
static FORCEINLINE int win32_acquire_lock(MLOCK_T *sl) {
194-
int spins = 0;
195-
for (;;) {
196-
if (sl->l != 0) {
197-
if (sl->threadid == CURRENT_THREAD) {
198-
++sl->c;
199-
return 0;
200-
}
201-
} else {
202-
if (!interlockedexchange(&sl->l, 1)) {
203-
assert(!sl->threadid);
204-
sl->threadid = CURRENT_THREAD;
205-
sl->c = 1;
206-
return 0;
207-
}
208-
}
209-
if ((++spins & SPINS_PER_YIELD) == 0) SleepEx(0, FALSE);
210-
}
211-
}
212-
213-
static FORCEINLINE void win32_release_lock(MLOCK_T *sl) {
214-
assert(sl->threadid == CURRENT_THREAD);
215-
assert(sl->l != 0);
216-
if (--sl->c == 0) {
217-
sl->threadid = 0;
218-
interlockedexchange(&sl->l, 0);
219-
}
220-
}
221-
222-
static FORCEINLINE int win32_try_lock(MLOCK_T *sl) {
223-
if (sl->l != 0) {
224-
if (sl->threadid == CURRENT_THREAD) {
225-
++sl->c;
226-
return 1;
227-
}
228-
} else {
229-
if (!interlockedexchange(&sl->l, 1)) {
230-
assert(!sl->threadid);
231-
sl->threadid = CURRENT_THREAD;
232-
sl->c = 1;
233-
return 1;
234-
}
235-
}
236-
return 0;
237-
}
238-
239-
#endif /* WIN32 */
35+
} // namespace memory
36+
} // namespace paddle

0 commit comments

Comments
 (0)