You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							196 lines
						
					
					
						
							5.4 KiB
						
					
					
				
			
		
		
	
	
							196 lines
						
					
					
						
							5.4 KiB
						
					
					
				| /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 | |
| 
 | |
| Licensed under the Apache License, Version 2.0 (the "License");
 | |
| you may not use this file except in compliance with the License.
 | |
| You may obtain a copy of the License at
 | |
| 
 | |
|     http://www.apache.org/licenses/LICENSE-2.0
 | |
| 
 | |
| Unless required by applicable law or agreed to in writing, software
 | |
| distributed under the License is distributed on an "AS IS" BASIS,
 | |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| See the License for the specific language governing permissions and
 | |
| limitations under the License. */
 | |
| 
 | |
| /*
 | |
|  execViaCpu is used to do operations on GpuMatirx and/or GpuIVector through
 | |
|  cpu functions. It can automatically make a temporary CPU copy for the
 | |
|  gpu matrix/vector, and copy back after executing the CPU function.
 | |
| 
 | |
|  Examples:
 | |
|  1. For a function, functor or lambda:
 | |
|    r = execViaCpu(&f, mat, vec)
 | |
| 
 | |
|  2. For member function of CpuMatirx, execViaCpu2 should be used:
 | |
|    execViaCpu2(&CpuMatrix::selectElements, *this, table, ids)
 | |
| */
 | |
| 
 | |
| #pragma once
 | |
| 
 | |
| namespace paddle {
 | |
| 
 | |
| template <typename Arg>
 | |
| class CopyToCpu {
 | |
| public:
 | |
|   explicit CopyToCpu(Arg& arg) : arg_(arg) {}
 | |
|   Arg& copiedArg() const { return arg_; }
 | |
| 
 | |
| private:
 | |
|   Arg& arg_;
 | |
| };
 | |
| 
 | |
| template <>
 | |
| class CopyToCpu<Matrix> {
 | |
| public:
 | |
|   explicit CopyToCpu(Matrix& arg) : arg_(arg) {
 | |
|     if (arg.useGpu()) {
 | |
|       CHECK(!arg.isTransposed()) << "Not supported";
 | |
|       copied_ = Matrix::create(arg.getHeight(),
 | |
|                                arg.getWidth(),
 | |
|                                /* trans= */ false,
 | |
|                                /* useGpu= */ false);
 | |
|       copied_->copyFrom(arg);
 | |
|     }
 | |
|   }
 | |
|   ~CopyToCpu() {
 | |
|     if (copied_) {
 | |
|       arg_.copyFrom(*copied_);
 | |
|     }
 | |
|   }
 | |
|   Matrix& copiedArg() const { return copied_ ? *copied_ : arg_; }
 | |
| 
 | |
| private:
 | |
|   Matrix& arg_;
 | |
|   MatrixPtr copied_;
 | |
| };
 | |
| 
 | |
| template <>
 | |
| class CopyToCpu<const Matrix> {
 | |
| public:
 | |
|   explicit CopyToCpu(const Matrix& arg) : arg_(arg) {
 | |
|     if (arg.useGpu()) {
 | |
|       CHECK(!arg.isTransposed()) << "Not supported";
 | |
|       copied_ = Matrix::create(arg.getHeight(),
 | |
|                                arg.getWidth(),
 | |
|                                /* trans= */ false,
 | |
|                                /* useGpu= */ false);
 | |
|       copied_->copyFrom(arg);
 | |
|     }
 | |
|   }
 | |
|   const Matrix& copiedArg() const { return copied_ ? *copied_ : arg_; }
 | |
| 
 | |
| private:
 | |
|   const Matrix& arg_;
 | |
|   MatrixPtr copied_;
 | |
| };
 | |
| 
 | |
| template <>
 | |
| class CopyToCpu<IVector> {
 | |
| public:
 | |
|   explicit CopyToCpu(IVector& arg) : arg_(arg) {
 | |
|     if (arg.useGpu()) {
 | |
|       copied_ = IVector::create(arg.getSize(), /* useGpu= */ false);
 | |
|       copied_->copyFrom(arg);
 | |
|     }
 | |
|   }
 | |
|   ~CopyToCpu() {
 | |
|     if (copied_) {
 | |
|       arg_.copyFrom(*copied_);
 | |
|     }
 | |
|   }
 | |
|   IVector& copiedArg() const { return copied_ ? *copied_ : arg_; }
 | |
| 
 | |
| private:
 | |
|   IVector& arg_;
 | |
|   IVectorPtr copied_;
 | |
| };
 | |
| 
 | |
| template <>
 | |
| class CopyToCpu<const IVector> {
 | |
| public:
 | |
|   explicit CopyToCpu(const IVector& arg) : arg_(arg) {
 | |
|     if (arg.useGpu()) {
 | |
|       copied_ = IVector::create(arg.getSize(), /* useGpu= */ false);
 | |
|       copied_->copyFrom(arg);
 | |
|     }
 | |
|   }
 | |
|   const IVector& copiedArg() const { return copied_ ? *copied_ : arg_; }
 | |
| 
 | |
| private:
 | |
|   const IVector& arg_;
 | |
|   IVectorPtr copied_;
 | |
| };
 | |
| 
 | |
| namespace detail {
 | |
| 
 | |
| template <bool isFunction, bool isFunctionPointer, bool isClass, typename F>
 | |
| class GpuFuncWrapperImp;
 | |
| 
 | |
| template <typename F, typename R, typename... Args>
 | |
| class GpuFuncWrapperBase {
 | |
| public:
 | |
|   typedef R ResultType;
 | |
|   R operator()(F&& f, Args... args) {
 | |
|     return f(CopyToCpu<typename std::remove_reference<Args>::type>(args)
 | |
|                  .copiedArg()...);
 | |
|   }
 | |
| };
 | |
| 
 | |
| // function
 | |
| template <typename R, typename... Args>
 | |
| class GpuFuncWrapperImp<true, false, false, R(Args...)>
 | |
|     : public GpuFuncWrapperBase<R(Args...), R, Args...> {};
 | |
| 
 | |
| // function pointer
 | |
| template <typename R, typename... Args>
 | |
| class GpuFuncWrapperImp<false, true, false, R (*)(Args...)>
 | |
|     : public GpuFuncWrapperBase<R (*)(Args...), R, Args...> {};
 | |
| 
 | |
| template <typename F, typename Op>
 | |
| class GpuFuncWrapperImp2;
 | |
| 
 | |
| template <typename F, typename C, typename R, typename... Args>
 | |
| class GpuFuncWrapperImp2<F, R (C::*)(Args...) const>
 | |
|     : public GpuFuncWrapperBase<F, R, Args...> {};
 | |
| 
 | |
| template <typename F, typename C, typename R, typename... Args>
 | |
| class GpuFuncWrapperImp2<F, R (C::*)(Args...)>
 | |
|     : public GpuFuncWrapperBase<F, R, Args...> {};
 | |
| 
 | |
| // functor or lambda
 | |
| template <typename F>
 | |
| class GpuFuncWrapperImp<false, false, true, F>
 | |
|     : public GpuFuncWrapperImp2<F, decltype(&F::operator())> {};
 | |
| 
 | |
| template <typename F>
 | |
| class GpuFuncWrapper2
 | |
|     : public GpuFuncWrapperImp<
 | |
|           std::is_function<F>::value,
 | |
|           std::is_pointer<F>::value &&
 | |
|               std::is_function<typename std::remove_pointer<F>::type>::value,
 | |
|           std::is_class<F>::value,
 | |
|           F> {};
 | |
| 
 | |
| template <typename F>
 | |
| class GpuFuncWrapper
 | |
|     : public GpuFuncWrapper2<typename std::remove_reference<F>::type> {};
 | |
| 
 | |
| }  // namespace detail
 | |
| 
 | |
| template <typename F, typename... Args>
 | |
| typename detail::GpuFuncWrapper<F>::ResultType execViaCpu(F&& f,
 | |
|                                                           Args&&... args) {
 | |
|   return detail::GpuFuncWrapper<F>()(std::move(f), args...);
 | |
| }
 | |
| 
 | |
| // The second version is for F as member function of CpuMatrix
 | |
| template <typename R, typename... FArgs, typename... Args>
 | |
| R execViaCpu2(R (CpuMatrix::*f)(FArgs...), Args&&... args) {
 | |
|   auto lambda = [](R (CpuMatrix::*f)(FArgs...), Matrix& ths, FArgs... args) {
 | |
|     return (((CpuMatrix&)ths).*f)(args...);
 | |
|   };
 | |
|   return execViaCpu(lambda, f, args...);
 | |
| }
 | |
| 
 | |
| }  // namespace paddle
 |