codegen for fused elementwise operation (#19520)
* test=develop codegen for fused elementwise operation * fix test=developsigmoid_bug
parent
25c0eb28cc
commit
ed8f44ea21
@ -0,0 +1,96 @@
|
|||||||
|
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. */
|
||||||
|
#include "paddle/fluid/framework/ir/codegen.h"
|
||||||
|
#include <set>
|
||||||
|
#include <sstream>
|
||||||
|
#include "paddle/fluid/framework/ir/codegen_helper.h"
|
||||||
|
namespace paddle {
|
||||||
|
namespace framework {
|
||||||
|
namespace ir {
|
||||||
|
|
||||||
|
// we get the parameter list code for the expression information
|
||||||
|
std::string CodeGen::GetDeclarationCode(
|
||||||
|
std::vector<OperationExpression> expression) {
|
||||||
|
std::stringstream ret;
|
||||||
|
ret << "fuse_kernel";
|
||||||
|
ret << R"((int N )";
|
||||||
|
std::set<int> input_ids;
|
||||||
|
std::set<int> output_ids;
|
||||||
|
std::vector<int> last_output_idis;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < expression.size(); i++) {
|
||||||
|
std::vector<int> tmp_input = expression[i].GetInputIds();
|
||||||
|
for (size_t j = 0; j < tmp_input.size(); j++) {
|
||||||
|
int id = tmp_input[j];
|
||||||
|
input_ids.insert(id);
|
||||||
|
}
|
||||||
|
int tmp_output = expression[i].GetOutputId();
|
||||||
|
output_ids.insert(tmp_output);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::set<int>::iterator it = input_ids.begin();
|
||||||
|
while (it != input_ids.end()) {
|
||||||
|
int var_index = *it;
|
||||||
|
if (output_ids.find(var_index) != output_ids.end()) {
|
||||||
|
input_ids.erase(it++);
|
||||||
|
} else {
|
||||||
|
it++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (it = input_ids.begin(); it != input_ids.end(); it++) {
|
||||||
|
int var_index = *it;
|
||||||
|
ret << R"(, const T* var)" << var_index;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (it = output_ids.begin(); it != output_ids.end(); it++) {
|
||||||
|
int var_index = *it;
|
||||||
|
ret << R"(, T* var)" << var_index;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret << R"())";
|
||||||
|
|
||||||
|
return ret.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string CodeGen::GetOffsetCode() {
|
||||||
|
std::stringstream ret;
|
||||||
|
ret << indentation << "int offset = idx;" << std::endl;
|
||||||
|
return ret.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string CodeGen::GetComputeCode(
|
||||||
|
std::vector<OperationExpression> expression) {
|
||||||
|
// get the right experssion code using suffix expression
|
||||||
|
std::stringstream ret;
|
||||||
|
for (size_t i = 0; i < expression.size(); i++) {
|
||||||
|
ret << expression[i].GetExpression();
|
||||||
|
}
|
||||||
|
return ret.str();
|
||||||
|
}
|
||||||
|
// in order to get the right result of expression, we need to calculate, we
|
||||||
|
// store the expression as
|
||||||
|
// suffix Expressions using vector
|
||||||
|
std::string CodeGen::GetKernelCode(
|
||||||
|
std::vector<OperationExpression> expression) {
|
||||||
|
auto declaration_code = GetDeclarationCode(expression);
|
||||||
|
auto offset_code = GetOffsetCode();
|
||||||
|
auto compute_code = GetComputeCode(expression);
|
||||||
|
auto cuda_kernel = const_kernel_start + declaration_code + const_kernel_mid +
|
||||||
|
offset_code + compute_code + const_kernel_end;
|
||||||
|
return cuda_kernel;
|
||||||
|
}
|
||||||
|
} // namespace ir
|
||||||
|
} // namespace framework
|
||||||
|
} // namespace paddle
|
@ -0,0 +1,36 @@
|
|||||||
|
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. */
|
||||||
|
#pragma once
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include "paddle/fluid/framework/ir/codegen_helper.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace framework {
|
||||||
|
namespace ir {
|
||||||
|
|
||||||
|
class CodeGen {
|
||||||
|
public:
|
||||||
|
std::string GetKernelCode(std::vector<OperationExpression> expression);
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::string GetDeclarationCode(
|
||||||
|
std::vector<paddle::framework::ir::OperationExpression> expression);
|
||||||
|
std::string GetOffsetCode();
|
||||||
|
std::string GetComputeCode(
|
||||||
|
std::vector<paddle::framework::ir::OperationExpression> expression);
|
||||||
|
};
|
||||||
|
} // namespace ir
|
||||||
|
} // namespace framework
|
||||||
|
} // namespace paddle
|
@ -0,0 +1,61 @@
|
|||||||
|
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License. */
|
||||||
|
#include "paddle/fluid/framework/ir/codegen_helper.h"
|
||||||
|
#include <algorithm>
|
||||||
|
#include <sstream>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
namespace paddle {
|
||||||
|
namespace framework {
|
||||||
|
namespace ir {
|
||||||
|
|
||||||
|
OperationExpression::OperationExpression(std::vector<int> input_ids,
|
||||||
|
int output_id,
|
||||||
|
std::string search_operation) {
|
||||||
|
input_ids_ = input_ids;
|
||||||
|
output_id_ = output_id;
|
||||||
|
search_operation_ = search_operation;
|
||||||
|
}
|
||||||
|
|
||||||
|
// we Traverse the graph and get the group , all input id and output id is
|
||||||
|
// unique for the node which belong the group
|
||||||
|
std::string OperationExpression::GetExpression() {
|
||||||
|
std::stringstream ret;
|
||||||
|
if (operator_cuda_table.find(search_operation_) ==
|
||||||
|
operator_cuda_table.end()) {
|
||||||
|
std::cerr << "Not supportted operation, " << search_operation_ << std::endl;
|
||||||
|
} else {
|
||||||
|
auto rhs = operator_cuda_table[search_operation_];
|
||||||
|
std::string replaced_str = "$";
|
||||||
|
int count = 0;
|
||||||
|
auto pos = rhs.find(replaced_str);
|
||||||
|
while (pos != -1) {
|
||||||
|
auto index = input_ids_[count];
|
||||||
|
rhs.replace(pos, replaced_str.length(),
|
||||||
|
std::to_string(index) + R"([offset])");
|
||||||
|
pos = rhs.find(replaced_str);
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
auto lhs = std::string(indentation) + "var" + std::to_string(output_id_) +
|
||||||
|
R"([offset])";
|
||||||
|
auto equal_split = R"( = )";
|
||||||
|
auto semicolon = R"(;)";
|
||||||
|
ret << lhs << equal_split << rhs << semicolon << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret.str();
|
||||||
|
}
|
||||||
|
} // namespace ir
|
||||||
|
} // namespace framework
|
||||||
|
} // namespace paddle
|
@ -0,0 +1,70 @@
|
|||||||
|
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. */
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace framework {
|
||||||
|
namespace ir {
|
||||||
|
static std::unordered_map<std::string, std::string> operator_cuda_table = {
|
||||||
|
{"elementwise_add", "var$ + var$"},
|
||||||
|
{"elementwise_sub", "var$ - var$"},
|
||||||
|
{"elementwise_mul", "var$ * var$"},
|
||||||
|
{"elementwise_div", "var$ / var$"},
|
||||||
|
{"elementwise_min", "real_min(var$, var$)"},
|
||||||
|
{"elementwise_max", "real_max(var$, var$)"},
|
||||||
|
{"relu", "real_max(var$, 0)"},
|
||||||
|
{"sigmoid", "1.0 / (1.0 + real_exp(-var$))"}};
|
||||||
|
|
||||||
|
// op computation is composed by single or many operation
|
||||||
|
class OperationExpression {
|
||||||
|
public:
|
||||||
|
OperationExpression(std::vector<int> input_ids, int output_id,
|
||||||
|
std::string search_oprtation);
|
||||||
|
std::string GetExpression();
|
||||||
|
std::vector<int> GetInputIds() { return input_ids_; }
|
||||||
|
int GetOutputId() { return output_id_; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::vector<int> input_ids_;
|
||||||
|
int output_id_;
|
||||||
|
std::string search_operation_;
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char indentation[] = R"( )";
|
||||||
|
|
||||||
|
static const char const_kernel_start[] = R"(
|
||||||
|
template <typename T>
|
||||||
|
extern "C" __global__ void
|
||||||
|
)";
|
||||||
|
|
||||||
|
static const char const_kernel_mid[] = R"(
|
||||||
|
{
|
||||||
|
for(int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
idx < N;
|
||||||
|
idx += gridDim.x * blockDim.x) {
|
||||||
|
|
||||||
|
)";
|
||||||
|
|
||||||
|
static const char const_kernel_end[] = R"(
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)";
|
||||||
|
} // namespace ir
|
||||||
|
} // namespace framework
|
||||||
|
} // namespace paddle
|
@ -0,0 +1,43 @@
|
|||||||
|
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "paddle/fluid/framework/ir/codegen.h"
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include "paddle/fluid/framework/ir/codegen_helper.h"
|
||||||
|
#ifdef PADDLE_WITH_CUDA
|
||||||
|
TEST(codegen, cuda) {
|
||||||
|
std::vector<int> mul_input{1, 2};
|
||||||
|
std::vector<int> add_input{3, 4};
|
||||||
|
std::vector<int> sigmod_input{5};
|
||||||
|
int mul_out = 3;
|
||||||
|
int add_out = 5;
|
||||||
|
int sigmod_out = 6;
|
||||||
|
|
||||||
|
std::string op1 = "elementwise_mul";
|
||||||
|
std::string op2 = "elementwise_add";
|
||||||
|
std::string op3 = "sigmoid";
|
||||||
|
paddle::framework::ir::OperationExpression opexp1(mul_input, mul_out, op1);
|
||||||
|
paddle::framework::ir::OperationExpression opexp2(add_input, add_out, op2);
|
||||||
|
paddle::framework::ir::OperationExpression opexp3(sigmod_input, sigmod_out,
|
||||||
|
op3);
|
||||||
|
|
||||||
|
std::vector<paddle::framework::ir::OperationExpression> fused_op = {
|
||||||
|
opexp1, opexp2, opexp3};
|
||||||
|
paddle::framework::ir::CodeGen codegen;
|
||||||
|
std::string result = codegen.GetKernelCode(fused_op);
|
||||||
|
std::cout << result << std::endl;
|
||||||
|
}
|
||||||
|
#endif
|
Loading…
Reference in new issue