gpu codex warning fix

pull/2032/head
wilfChen 5 years ago
parent 4df861cb62
commit 3b09299b69

@ -96,7 +96,7 @@ inline __device__ void BlockReduce(const int& col_dim, T* mean, T* var, T* num,
__syncthreads(); __syncthreads();
if (threadIdx.x == 0) { if (threadIdx.x == 0) {
mean_addr[blockIdx.x] = share_mem[0]; // todo: blockDim.x < row mean_addr[blockIdx.x] = share_mem[0];
share_mem[1] /= col_dim; share_mem[1] /= col_dim;
var_addr[blockIdx.x] = share_mem[1]; var_addr[blockIdx.x] = share_mem[1];
} }

@ -96,7 +96,8 @@ bool DatasetIteratorKernel::Launch(const std::vector<AddressPtr> &, const std::v
} }
for (size_t i = 0; i < output_size_list_.size(); i++) { for (size_t i = 0; i < output_size_list_.size(); i++) {
CHECK_CUDA_RET_WITH_EXCEPT(cudaMemcpyAsync(outputs[i]->addr, addr, output_size_list_[i], cudaMemcpyDeviceToDevice, void *output_addr = GetDeviceAddress<void>(outputs, i);
CHECK_CUDA_RET_WITH_EXCEPT(cudaMemcpyAsync(output_addr, addr, output_size_list_[i], cudaMemcpyDeviceToDevice,
reinterpret_cast<cudaStream_t>(stream)), reinterpret_cast<cudaStream_t>(stream)),
"Cuda Memcpy Failed"); "Cuda Memcpy Failed");
addr = reinterpret_cast<unsigned char *>(addr) + output_size_list_[i]; addr = reinterpret_cast<unsigned char *>(addr) + output_size_list_[i];

@ -68,14 +68,14 @@ class BroadcastOpGpuKernel : public GpuKernel {
output_shape_[i] = shape3[i]; output_shape_[i] = shape3[i];
output_num_ *= shape3[i]; output_num_ *= shape3[i];
} }
int offset = shape3.size() - shape1.size(); int lhs_offset = shape3.size() - shape1.size();
for (size_t j = 0; j < shape1.size(); j++) { for (size_t j = 0; j < shape1.size(); j++) {
lhs_shape_[j + offset] = shape1[j]; lhs_shape_[j + lhs_offset] = shape1[j];
input1_num_ *= shape1[j]; input1_num_ *= shape1[j];
} }
offset = shape3.size() - shape2.size(); int rhs_offset = shape3.size() - shape2.size();
for (size_t k = 0; k < shape2.size(); k++) { for (size_t k = 0; k < shape2.size(); k++) {
rhs_shape_[k + offset] = shape2[k]; rhs_shape_[k + rhs_offset] = shape2[k];
input2_num_ *= shape2[k]; input2_num_ *= shape2[k];
} }

@ -74,14 +74,14 @@ class BroadcastOpGradGpuKernel : public GpuKernel {
dy_shape_[i] = shape3[i]; dy_shape_[i] = shape3[i];
output_num_ *= shape3[i]; output_num_ *= shape3[i];
} }
int offset = shape3.size() - shape1.size(); int x1_offset = shape3.size() - shape1.size();
for (size_t i = 0; i < shape1.size(); i++) { for (size_t i = 0; i < shape1.size(); i++) {
x1_shape_[i + offset] = shape1[i]; x1_shape_[i + x1_offset] = shape1[i];
input1_num_ *= shape1[i]; input1_num_ *= shape1[i];
} }
offset = shape3.size() - shape2.size(); int x2_offset = shape3.size() - shape2.size();
for (size_t i = 0; i < shape2.size(); i++) { for (size_t i = 0; i < shape2.size(); i++) {
x2_shape_[i + offset] = shape2[i]; x2_shape_[i + x2_offset] = shape2[i];
input2_num_ *= shape2[i]; input2_num_ *= shape2[i];
} }

@ -68,14 +68,12 @@ void DropoutGpuFwdKernel::DestroyResource() noexcept {}
void DropoutGpuFwdKernel::InitSizeLists() { void DropoutGpuFwdKernel::InitSizeLists() {
size_t input_size = num_count_ * sizeof(float); size_t input_size = num_count_ * sizeof(float);
size_t workspace_size = 0;
input_size_list_.push_back(input_size); input_size_list_.push_back(input_size);
output_size_list_.push_back(input_size); // output size: the same with input size output_size_list_.push_back(input_size); // output size: the same with input size
output_size_list_.push_back(input_size); // mask size: the same with input size output_size_list_.push_back(input_size); // mask size: the same with input size
workspace_size_list_.push_back(workspace_size);
} }
bool DropoutGpuFwdKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, bool DropoutGpuFwdKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, void *stream_ptr) { const std::vector<AddressPtr> &outputs, void *stream_ptr) {
if (is_null_input_) { if (is_null_input_) {
return true; return true;

@ -66,15 +66,13 @@ void DropoutGradGpuFwdKernel::InitSizeLists() {
size_t dy_size = num_count_ * sizeof(float); size_t dy_size = num_count_ * sizeof(float);
size_t mask_size = dy_size; size_t mask_size = dy_size;
size_t dx_size = dy_size; size_t dx_size = dy_size;
size_t workspace_size = 0;
input_size_list_.push_back(dy_size); input_size_list_.push_back(dy_size);
input_size_list_.push_back(mask_size); input_size_list_.push_back(mask_size);
output_size_list_.push_back(dx_size); output_size_list_.push_back(dx_size);
workspace_size_list_.push_back(workspace_size);
} }
bool DropoutGradGpuFwdKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, bool DropoutGradGpuFwdKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, void *stream_ptr) { const std::vector<AddressPtr> &outputs, void *stream_ptr) {
if (is_null_input_) { if (is_null_input_) {
return true; return true;

Loading…
Cancel
Save