PaddlePaddle · QiJune · May 17, 2017 · May 17, 2017 · May 18, 2017 · May 18, 2017
diff --git a/paddle/majel/CMakeLists.txt b/paddle/majel/CMakeLists.txt
@@ -1,5 +1,7 @@
 cc_library(place SRCS place.cc)
 cc_library(ddim SRCS ddim.cc)
+cc_library(malloc SRCS malloc.cc)
+cc_library(allocation SRCS allocation.cc)
 
 if(WITH_TESTING)
     add_subdirectory(test)

diff --git a/paddle/majel/allocation.cc b/paddle/majel/allocation.cc
@@ -0,0 +1,89 @@
+#include "allocation.h"
+#include <boost/variant.hpp>
+#include "malloc.h"
+
+namespace majel {
+namespace detail {
+
+class Allocator : public boost::static_visitor<void*> {
+public:
+  Allocator(size_t size) : size_(size) {}
+
+  void* operator()(const CpuPlace& p) const {
+    void* address = majel::malloc::malloc(p, size_);
+    return address;
+  }
+
+  void* operator()(const GpuPlace& p) const {
+    void* address = majel::malloc::malloc(p, size_);
+    return address;
+  }
+
+private:
+  size_t size_;
+};
+
+class Deallocator : public boost::static_visitor<> {
+public:
+  Deallocator(void* ptr) : ptr_(ptr) {}
+
+  void operator()(CpuPlace p) const {
+    if (ptr_) {
+      majel::malloc::free(p, ptr_);
+    }
+  }
+
+  void operator()(GpuPlace p) const {
+    if (ptr_) {
+      majel::malloc::free(p, ptr_);
+    }
+  }
+
+private:
+  void* ptr_;
+};
+
+}  // namespace detail
+}  // namespace majel
+
+namespace majel {
+
+Allocation::Allocation() : Allocation(0, get_place()) {}
+
+Allocation::Allocation(size_t size) : Allocation(size, get_place()) {}
+
+Allocation::Allocation(size_t size, Place place)
+    : owned_(true), size_(size), place_(place) {
+  if (size > 0) {
+    majel::detail::Allocator allocator(size_);
+    ptr_ = boost::apply_visitor(allocator, place_);
+  } else {
+    ptr_ = nullptr;
+  }
+}
+
+Allocation::Allocation(void* ptr, size_t size, Place place)
+    : owned_(false), ptr_(ptr), size_(size), place_(place) {}
+
+Allocation::~Allocation() {
+  // If we don't own this allocation don't try to deallocate it
+  if (!owned_) {
+    return;
+  }
+
+  if (ptr_ != nullptr) {
+    majel::detail::Deallocator deallocator(ptr_);
+
+    boost::apply_visitor(deallocator, place_);
+  }
+}
+
+void* Allocation::ptr() const { return ptr_; }
+
+void* Allocation::end() const { return (uint8_t*)ptr_ + size_; }
+
+size_t Allocation::size() const { return size_; }
+
+Place Allocation::place() const { return place_; }
+
+}  // namespace majel
diff --git a/paddle/majel/allocation.h b/paddle/majel/allocation.h
@@ -0,0 +1,37 @@
+#pragma once
+#include <memory>
+
+#include "place.h"
+
+namespace majel {
+
+class Allocation {
+public:
+  Allocation();
+  Allocation(size_t size);
+  Allocation(size_t size, Place place);
+
+  // Creates a non-owned allocation (an allocation not owned by the Majel
+  // memory allocator); non-owned allocations are not cleaned up in the
+  // destructor.
+  Allocation(void* ptr, size_t size, Place place);
+
+  ~Allocation();
+  // No copying!
+  Allocation(const Allocation&) = delete;
+  // No assigning!
+  Allocation& operator=(const Allocation&) = delete;
+
+  void* ptr() const;
+  void* end() const;
+  Place place() const;
+  size_t size() const;
+
+private:
+  bool owned_;
+  void* ptr_;
+  size_t size_;
+  Place place_;
+};
+
+}  // namespace majel
diff --git a/paddle/majel/buffer.h b/paddle/majel/buffer.h
@@ -0,0 +1,40 @@
+#pragma once
+
+#include "allocation.h"
+#include "place.h"
+
+namespace majel {
+
+class Buffer {
+public:
+  Buffer()
+      : external_address_(nullptr),
+        allocation_(std::make_shared<Allocation>(0)) {}
+  Buffer(void* address)
+      : external_address_(address),
+        allocation_(std::make_shared<Allocation>(0)) {}
+  Buffer(void* address, Place p)
+      : external_address_(address),
+        allocation_(std::make_shared<Allocation>(0, p)) {}
+  Buffer(std::shared_ptr<Allocation> allocation)
+      : external_address_(nullptr), allocation_(allocation) {}
+
+public:
+  void* get_address() const {
+    if (allocation_->ptr() == nullptr) {
+      return external_address_;
+    }
+
+    return allocation_->ptr();
+  }
+
+  Place get_place() const { return allocation_->place(); }
+
+  std::shared_ptr<Allocation> data() const { return allocation_; }
+
+private:
+  void* external_address_;
+  std::shared_ptr<Allocation> allocation_;
+};
+
+}  // namespace majel
diff --git a/paddle/majel/malloc.cc b/paddle/majel/malloc.cc
@@ -0,0 +1,131 @@
+#include "malloc.h"
+#include <glog/logging.h>
+
+#ifndef PADDLE_ONLY_CPU
+#include <cuda_runtime.h>
+#endif
+
+#define CHECK_CUDA(cudaFunc)                                         \
+  do {                                                               \
+    cudaError_t cudaStat = cudaFunc;                                 \
+    CHECK_EQ(cudaSuccess, cudaStat) << "Cuda Error: "                \
+                                    << cudaGetErrorString(cudaStat); \
+  } while (0)
+
+namespace majel {
+namespace malloc {
+namespace detail {
+#ifndef PADDLE_ONLY_CPU
+const char* get_device_error_string() {
+  cudaError_t err = cudaGetLastError();
+  return cudaGetErrorString(err);
+}
+
+const char* get_device_error_string(size_t err) {
+  return cudaGetErrorString((cudaError_t)err);
+}
+
+void* malloc_device(size_t size) {
+  void* dest_d;
+
+  CHECK(size) << __func__ << ": the size for device memory is 0, please check.";
+  CHECK_CUDA(cudaMalloc((void**)&dest_d, size));
+
+  return dest_d;
+}
+
+void free_mem_device(void* dest_d) {
+  CHECK_NOTNULL(dest_d);
+
+  cudaError_t err = cudaFree(dest_d);
+  CHECK(cudaSuccess == err || cudaErrorCudartUnloading == err)
+      << get_device_error_string();
+}
+#endif
+
+class DefaultAllocator {
+public:
+  static void* malloc(majel::Place place, size_t size);
+
+  static void free(majel::Place, void* ptr);
+};
+
+class DefaultAllocatorMallocVisitor : public boost::static_visitor<void*> {
+public:
+  DefaultAllocatorMallocVisitor(size_t size) : size_(size) {}
+
+  void* operator()(majel::CpuPlace p) {
+    void* address;
+    CHECK_EQ(posix_memalign(&address, 32ul, size_), 0);
+    CHECK(address) << "Fail to allocate CPU memory: size=" << size_;
+    return address;
+  }
+
+#ifndef PADDLE_ONLY_CPU
+  void* operator()(majel::GpuPlace p) {
+    void* address = malloc_device(size_);
+    CHECK(address) << "Fail to allocate GPU memory " << size_ << " bytes";
+    return address;
+  }
+#else
+  void* operator()(majel::GpuPlace p) {
+    CHECK(majel::is_cpu_place(p)) << "GPU Place not supported";
+    return nullptr;
+  }
+#endif
+
+private:
+  size_t size_;
+};
+
+class DefaultAllocatorFreeVisitor : public boost::static_visitor<void> {
+public:
+  DefaultAllocatorFreeVisitor(void* ptr) : ptr_(ptr) {}
+  void operator()(majel::CpuPlace p) {
+    if (ptr_) {
+      ::free(ptr_);
+    }
+  }
+
+#ifndef PADDLE_ONLY_CPU
+  void operator()(majel::GpuPlace p) {
+    if (ptr_) {
+      free_mem_device(ptr_);
+    }
+  }
+
+#else
+  void operator()(majel::GpuPlace p) {
+    CHECK(majel::is_cpu_place(p)) << "GPU Place not supported";
+  }
+#endif
+
+private:
+  void* ptr_;
+};
+
+void* DefaultAllocator::malloc(majel::Place place, size_t size) {
+  DefaultAllocatorMallocVisitor visitor(size);
+  return boost::apply_visitor(visitor, place);
+}
+
+void DefaultAllocator::free(majel::Place place, void* ptr) {
+  DefaultAllocatorFreeVisitor visitor(ptr);
+  boost::apply_visitor(visitor, place);
+}
+
+}  // namespace detail
+}  // namespace malloc
+}  // namespace majel
+namespace majel {
+namespace malloc {
+
+void* malloc(majel::Place place, size_t size) {
+  return detail::DefaultAllocator::malloc(place, size);
+}
+
+void free(majel::Place place, void* ptr) {
+  detail::DefaultAllocator::free(place, ptr);
+}
+}  // namespace malloc
+}  // namespace majel
diff --git a/paddle/majel/malloc.h b/paddle/majel/malloc.h
@@ -0,0 +1,11 @@
+#pragma once
+#include "place.h"
+
+namespace majel {
+namespace malloc {
+
+void* malloc(majel::Place place, size_t size);
+void free(majel::Place place, void* ptr);
+
+}  // namespace malloc
+}  // namespace majel
diff --git a/paddle/majel/place.cc b/paddle/majel/place.cc
@@ -1,4 +1,4 @@
-#include "paddle/majel/place.h"
+#include "place.h"
 
 namespace majel {
 
@@ -13,21 +13,14 @@ class PlacePrinter : public boost::static_visitor<> {
 
   void operator()(const CpuPlace&) { os_ << "CpuPlace"; }
 
-  void operator()(const GpuPlace& p) { os_ << "GpuPlace(" << p.device << ")"; }
+  void operator()(const GpuPlace&) { os_ << "GpuPlace"; }
 };
 
 }  // namespace detail
 
 static Place the_default_place;
 
-void set_place(const Place& place) { the_default_place = place; }
-
 const Place& get_place() { return the_default_place; }
-
-const GpuPlace default_gpu() { return GpuPlace(0); }
-
-const CpuPlace default_cpu() { return CpuPlace(); }
-
 bool is_gpu_place(const Place& p) {
   return boost::apply_visitor(IsGpuPlace(), p);
 }

diff --git a/paddle/majel/place.h b/paddle/majel/place.h
@@ -14,33 +14,24 @@ struct CpuPlace {
 };
 
 struct GpuPlace {
-  GpuPlace(int d) : device(d) {}
+  GpuPlace() {}
 
   // needed for variant equality comparison
-  inline bool operator==(const GpuPlace& o) const { return device == o.device; }
+  inline bool operator==(const GpuPlace&) const { return true; }
 
-  inline bool operator!=(const GpuPlace& o) const { return !(*this == o); }
-
-  GpuPlace() : GpuPlace(0) {}
-  int device;
+  inline bool operator!=(const GpuPlace&) const { return false; }
 };
 
 class IsGpuPlace : public boost::static_visitor<bool> {
 public:
   bool operator()(const CpuPlace&) const { return false; }
 
-  bool operator()(const GpuPlace& gpu) const { return true; }
+  bool operator()(const GpuPlace&) const { return true; }
 };
 
-typedef boost::variant<GpuPlace, CpuPlace> Place;
-
-void set_place(const Place&);
+typedef boost::variant<CpuPlace, GpuPlace> Place;
 
 const Place& get_place();
-
-const GpuPlace default_gpu();
-const CpuPlace default_cpu();
-
 bool is_gpu_place(const Place&);
 bool is_cpu_place(const Place&);
 bool places_are_same_class(const Place&, const Place&);

diff --git a/paddle/majel/test/CMakeLists.txt b/paddle/majel/test/CMakeLists.txt
@@ -6,6 +6,10 @@ cc_test(ddim_test
     SRCS ddim_test.cc
     DEPS ddim)
 
+cc_test(allocation_test
+    SRCS allocation_test.cc
+    DEPS allocation malloc place)
+
 if(WITH_GPU)
     nv_test(cuda_test SRCS cuda_test.cu)
     nv_test(dim_test SRCS dim_test.cu DEPS ddim)