diff --git a/paddle/CMakeLists.txt b/paddle/CMakeLists.txt
index 573bd937a3..0cddb95244 100644
--- a/paddle/CMakeLists.txt
+++ b/paddle/CMakeLists.txt
@@ -10,6 +10,7 @@ add_subdirectory(trainer)
 add_subdirectory(scripts)
 add_subdirectory(optimizer)
 add_subdirectory(strings)
+add_subdirectory(memory)
 
 # Do not build go directory until go cmake is working smoothly.
 # if(CMAKE_Go_COMPILER)
diff --git a/paddle/memory/CMakeLists.txt b/paddle/memory/CMakeLists.txt
new file mode 100644
index 0000000000..3943c3cfad
--- /dev/null
+++ b/paddle/memory/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectory(detail)
diff --git a/paddle/memory/README.md b/paddle/memory/README.md
index fd32d07ef4..e5f7880e4c 100644
--- a/paddle/memory/README.md
+++ b/paddle/memory/README.md
@@ -31,7 +31,7 @@ In `paddle/memory/memory.h` we have:
 namespace memory {
 template <typename Place> void* Alloc(Place, size_t);
 template <typename Place> void Free(Place, void*);
-template <typename Place> void Used(Place);
+template <typename Place> size_t Used(Place);
 }  // namespace memory
 ```
 
@@ -39,7 +39,7 @@ These function templates have specializations on either `platform::CPUPlace` or
 
 ```cpp
 template<>
-void Alloc<CPUPlace>(CPUPlace p, size_t size) {
+void* Alloc<CPUPlace>(CPUPlace p, size_t size) {
   return GetCPUBuddyAllocator()->Alloc(size);
 }
 ```
@@ -102,15 +102,11 @@ class BuddyAllocator {
 };
 ```
 
-#### System Allocators
-
-The `GPUAllocator` and `CPUAllocator` are calls *system allocators*.  They work as the fallback allocators of `BuddyAllocator`.  A system allocator holds information about a device, including the amount of memory has been allocated, so we can call
+Because BuddyAllocator has the meta-data of each block, it can trace the used memory -- record the amount returned by `Alloc` freed in `Free`.  Instead, `CPUAllocator` and `GPUAllocator` doesn't know the size of freed memory block and cannot do the trace.
 
-- `GPUAllocator::Used()` and
-- `CPUAllocator::Used()`
-
-to get the amount of memory that has been allocated so far.
+#### System Allocators
 
+The `GPUAllocator` and `CPUAllocator` are calls *system allocators*.  They work as the fallback allocators of `BuddyAllocator`.
 
 ## Justification
 
diff --git a/paddle/memory/detail/CMakeLists.txt b/paddle/memory/detail/CMakeLists.txt
new file mode 100644
index 0000000000..fb8a11062d
--- /dev/null
+++ b/paddle/memory/detail/CMakeLists.txt
@@ -0,0 +1 @@
+cc_test(cpu_allocator_test SRCS cpu_allocator_test.cc)
diff --git a/paddle/memory/detail/cpu_allocator.h b/paddle/memory/detail/cpu_allocator.h
new file mode 100644
index 0000000000..8a872d3800
--- /dev/null
+++ b/paddle/memory/detail/cpu_allocator.h
@@ -0,0 +1,63 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <malloc.h>  // for malloc and free
+#include <stddef.h>  // for size_t
+
+namespace paddle {
+namespace memory {
+namespace detail {
+
+// CPUAllocator<staging=true> calls cudaMallocHost, which returns
+// pinned and mlocked memory as staging areas for data exchange
+// between host and device.  Allocates too much would reduce the
+// amount of memory available to the system for paging.  So, by
+// default, we should use CPUAllocator<staging=false>.
+template <bool staging>
+class CPUAllocator {
+public:
+  void* Alloc(size_t size);
+  void Free(void* p);
+};
+
+template <>
+class CPUAllocator<false> {
+public:
+  void* Alloc(size_t size) { return malloc(size); }
+  void Free(void* p) { free(p); }
+};
+
+// If CMake macro WITH_GPU is OFF, C++ compiler won't generate the
+// following specialization that depends on the CUDA library.
+#ifdef WITH_GPU
+template <>
+class CPUAllocator<true> {
+public:
+  void* Alloc(size_t size) {
+    void* p;
+    if (cudaMallocHost(&p, size) != cudaSuccess) {
+      return NULL;
+    }
+    return *p;
+  }
+
+  void Free(void* p) { cudaFreeHost(p); }
+};
+#endif  // WITH_GPU
+
+}  // namespace detail
+}  // namespace memory
+}  // namespace paddle
diff --git a/paddle/memory/detail/cpu_allocator_test.cc b/paddle/memory/detail/cpu_allocator_test.cc
new file mode 100644
index 0000000000..0aa33a22fd
--- /dev/null
+++ b/paddle/memory/detail/cpu_allocator_test.cc
@@ -0,0 +1,32 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/memory/detail/cpu_allocator.h"
+#include "gtest/gtest.h"
+
+TEST(CPUAllocator, NonStaging) {
+  paddle::memory::detail::CPUAllocator<false> a;
+  void* p = a.Alloc(4096);
+  EXPECT_NE(p, nullptr);
+  a.Free(p);
+}
+
+#ifdef WITH_GPU
+TEST(CPUAllocator, Staging) {
+  paddle::memory::detail::CPUAllocator<true> a;
+  void* p = a.Alloc(4096);
+  EXPECT_NE(p, nullptr);
+  a.Free(p);
+}
+#endif  // WITH_GPU
diff --git a/paddle/memory/memory.cc b/paddle/memory/memory.cc
new file mode 100644
index 0000000000..5f1253ede6
--- /dev/null
+++ b/paddle/memory/memory.cc
@@ -0,0 +1,51 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/memory/memory.h"
+
+namespace paddle {
+namespace memory {
+
+template <>
+void* Alloc<CPUPlace>(CPUPlace, size_t size) {
+  return GetCPUBuddyAllocator()->Alloc(size);
+}
+
+template <>
+void* Alloc<GPUPlace>(GPUPlace pl, size_t size) {
+  return GetGPUBuddyAllocator(pl.device)->Alloc(size);
+}
+
+template <>
+void Free<CPUPlace>(CPUPlace, void* p) {
+  return GetCPUBuddyAllocator()->Free(p);
+}
+
+template <>
+void* Alloc<GPUPlace>(GPUPlace pl, void* p) {
+  return GetGPUBuddyAllocator(pl.device)->Free(p);
+}
+
+template <>
+size_t Used<CPUPlace>(CPUPlace) {
+  return GetCPUBuddyAllocator()->Used();
+}
+
+template <>
+size_t Alloc<GPUPlace>(GPUPlace pl) {
+  return GetGPUBuddyAllocator(pl.device)->Used();
+}
+
+}  // namespace memory
+}  // namespace paddle
diff --git a/paddle/memory/memory.h b/paddle/memory/memory.h
new file mode 100644
index 0000000000..ae8ac6ca52
--- /dev/null
+++ b/paddle/memory/memory.h
@@ -0,0 +1,27 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/frameowork/place.h"
+
+namespace paddle {
+namespace memory {
+
+typename<typename paddle::framework::Place> void* Alloc(Place, size_t);
+typename<typename paddle::framework::Place> void Free(Place, void*);
+typename<typename paddle::framework::Place> size_t Used(Place);
+
+}  // namespace memory
+}  // namespace paddle
diff --git a/paddle/platform/place.cc b/paddle/platform/place.cc
index 1afd03c011..0704820aa0 100644
--- a/paddle/platform/place.cc
+++ b/paddle/platform/place.cc
@@ -8,8 +8,8 @@ namespace detail {
 class PlacePrinter : public boost::static_visitor<> {
  public:
   PlacePrinter(std::ostream &os) : os_(os) {}
-  void operator()(const CpuPlace &) { os_ << "CpuPlace"; }
-  void operator()(const GpuPlace &p) { os_ << "GpuPlace(" << p.device << ")"; }
+  void operator()(const CPUPlace &) { os_ << "CPUPlace"; }
+  void operator()(const GPUPlace &p) { os_ << "GPUPlace(" << p.device << ")"; }
 
  private:
   std::ostream &os_;
@@ -22,14 +22,14 @@ static Place the_default_place;
 void set_place(const Place &place) { the_default_place = place; }
 const Place &get_place() { return the_default_place; }
 
-const GpuPlace default_gpu() { return GpuPlace(0); }
-const CpuPlace default_cpu() { return CpuPlace(); }
+const GPUPlace default_gpu() { return GPUPlace(0); }
+const CPUPlace default_cpu() { return CPUPlace(); }
 
 bool is_gpu_place(const Place &p) {
-  return boost::apply_visitor(IsGpuPlace(), p);
+  return boost::apply_visitor(IsGPUPlace(), p);
 }
 bool is_cpu_place(const Place &p) {
-  return !boost::apply_visitor(IsGpuPlace(), p);
+  return !boost::apply_visitor(IsGPUPlace(), p);
 }
 
 bool places_are_same_class(const Place &p1, const Place &p2) {
diff --git a/paddle/platform/place.h b/paddle/platform/place.h
index 489572c526..7cead18388 100644
--- a/paddle/platform/place.h
+++ b/paddle/platform/place.h
@@ -1,43 +1,58 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
 #pragma once
+
 #include <boost/variant.hpp>
 #include <iostream>
 
 namespace paddle {
 namespace platform {
 
-struct CpuPlace {
+struct CPUPlace {
   // WORKAROUND: for some reason, omitting this constructor
   // causes errors with boost 1.59 and OSX
-  CpuPlace() {}
+  CPUPlace() {}
 
   // needed for variant equality comparison
-  inline bool operator==(const CpuPlace &) const { return true; }
-  inline bool operator!=(const CpuPlace &) const { return false; }
+  inline bool operator==(const CPUPlace &) const { return true; }
+  inline bool operator!=(const CPUPlace &) const { return false; }
 };
 
-struct GpuPlace {
-  GpuPlace() : GpuPlace(0) {}
-  GpuPlace(int d) : device(d) {}
+struct GPUPlace {
+  GPUPlace() : GPUPlace(0) {}
+  GPUPlace(int d) : device(d) {}
 
   // needed for variant equality comparison
-  inline bool operator==(const GpuPlace &o) const { return device == o.device; }
-  inline bool operator!=(const GpuPlace &o) const { return !(*this == o); }
+  inline bool operator==(const GPUPlace &o) const { return device == o.device; }
+  inline bool operator!=(const GPUPlace &o) const { return !(*this == o); }
 
   int device;
 };
 
-struct IsGpuPlace : public boost::static_visitor<bool> {
-  bool operator()(const CpuPlace &) const { return false; }
-  bool operator()(const GpuPlace &gpu) const { return true; }
+struct IsGPUPlace : public boost::static_visitor<bool> {
+  bool operator()(const CPUPlace &) const { return false; }
+  bool operator()(const GPUPlace &gpu) const { return true; }
 };
 
-typedef boost::variant<GpuPlace, CpuPlace> Place;
+typedef boost::variant<GPUPlace, CPUPlace> Place;
 
 void set_place(const Place &);
 const Place &get_place();
 
-const GpuPlace default_gpu();
-const CpuPlace default_cpu();
+const GPUPlace default_gpu();
+const CPUPlace default_cpu();
 
 bool is_gpu_place(const Place &);
 bool is_cpu_place(const Place &);
diff --git a/paddle/platform/place_test.cc b/paddle/platform/place_test.cc
index 73fccceedf..33e2e5a439 100644
--- a/paddle/platform/place_test.cc
+++ b/paddle/platform/place_test.cc
@@ -3,8 +3,8 @@
 #include "gtest/gtest.h"
 
 TEST(Place, Equality) {
-  paddle::platform::CpuPlace cpu;
-  paddle::platform::GpuPlace g0(0), g1(1), gg0(0);
+  paddle::platform::CPUPlace cpu;
+  paddle::platform::GPUPlace g0(0), g1(1), gg0(0);
 
   EXPECT_EQ(cpu, cpu);
   EXPECT_EQ(g0, g0);
@@ -22,19 +22,19 @@ TEST(Place, Default) {
   EXPECT_TRUE(paddle::platform::is_gpu_place(paddle::platform::default_gpu()));
   EXPECT_TRUE(paddle::platform::is_cpu_place(paddle::platform::default_cpu()));
 
-  paddle::platform::set_place(paddle::platform::CpuPlace());
+  paddle::platform::set_place(paddle::platform::CPUPlace());
   EXPECT_TRUE(paddle::platform::is_cpu_place(paddle::platform::get_place()));
 }
 
 TEST(Place, Print) {
   {
     std::stringstream ss;
-    ss << paddle::platform::GpuPlace(1);
-    EXPECT_EQ("GpuPlace(1)", ss.str());
+    ss << paddle::platform::GPUPlace(1);
+    EXPECT_EQ("GPUPlace(1)", ss.str());
   }
   {
     std::stringstream ss;
-    ss << paddle::platform::CpuPlace();
-    EXPECT_EQ("CpuPlace", ss.str());
+    ss << paddle::platform::CPUPlace();
+    EXPECT_EQ("CPUPlace", ss.str());
   }
 }