codegen code for reconstruction (#19728)

* codegen code for reconstruction test=develop * fix the cmake test=develop * fix review advice test=develop
6 years ago · c9ea317b36
parent 647ff784e2
commit c9ea317b36
6 changed files with 353 additions and 142 deletions
--- a/paddle/fluid/framework/ir/CMakeLists.txt
+++ b/paddle/fluid/framework/ir/CMakeLists.txt
@ -110,7 +110,9 @@ set(GLOB_PASS_LIB ${PASS_LIBRARY} CACHE INTERNAL "Global PASS library")

 cc_library(pass_builder SRCS pass_builder.cc DEPS pass)

-cc_test(codegen_test SRCS codegen_test.cc DEPS codegen_helper codegen)
+if(WITH_GPU)
+    cc_test(codegen_test SRCS codegen_test.cc DEPS codegen_helper codegen device_code lod_tensor)
+endif()
 cc_test(node_test SRCS node_test.cc DEPS node)
 cc_test(pass_test SRCS pass_test.cc DEPS graph pass graph_helper)
 cc_test(graph_test SRCS graph_test.cc DEPS graph graph_helper op_registry)
--- a/paddle/fluid/framework/ir/codegen.cc
+++ b/paddle/fluid/framework/ir/codegen.cc
@ -1,4 +1,4 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@ -19,76 +19,15 @@ namespace paddle {
 namespace framework {
 namespace ir {

-// we get the parameter list code for the expression information
-std::string CodeGen::GetDeclarationCode(
-    std::vector<OperationExpression> expression) {
-  std::stringstream ret;
-  ret << "fuse_kernel";
-  ret << R"((int N )";
-  std::set<int> input_ids;
-  std::set<int> output_ids;
-  std::vector<int> last_output_idis;
-
-  for (size_t i = 0; i < expression.size(); i++) {
-    std::vector<int> tmp_input = expression[i].GetInputIds();
-    for (size_t j = 0; j < tmp_input.size(); j++) {
-      int id = tmp_input[j];
-      input_ids.insert(id);
-    }
-    int tmp_output = expression[i].GetOutputId();
-    output_ids.insert(tmp_output);
-  }
-
-  std::set<int>::iterator it = input_ids.begin();
-  while (it != input_ids.end()) {
-    int var_index = *it;
-    if (output_ids.find(var_index) != output_ids.end()) {
-      input_ids.erase(it++);
-    } else {
-      it++;
-    }
-  }
-
-  for (it = input_ids.begin(); it != input_ids.end(); it++) {
-    int var_index = *it;
-    ret << R"(, const T* var)" << var_index;
-  }
-
-  for (it = output_ids.begin(); it != output_ids.end(); it++) {
-    int var_index = *it;
-    ret << R"(, T* var)" << var_index;
-  }
-
-  ret << R"())";
-
-  return ret.str();
-}
-
-std::string CodeGen::GetOffsetCode() {
-  std::stringstream ret;
-  ret << indentation << "int offset = idx;" << std::endl;
-  return ret.str();
+CodeGenerator::CodeGenerator(CodeTemplate code_template) {
+  code_template_ = code_template;
 }

-std::string CodeGen::GetComputeCode(
-    std::vector<OperationExpression> expression) {
-  // get the right experssion code using suffix expression
-  std::stringstream ret;
-  for (size_t i = 0; i < expression.size(); i++) {
-    ret << expression[i].GetExpression();
-  }
-  return ret.str();
-}
 // in order to get the right result of expression, we need to calculate, we
 // store the expression as
 // suffix Expressions using vector
-std::string CodeGen::GetKernelCode(
-    std::vector<OperationExpression> expression) {
-  auto declaration_code = GetDeclarationCode(expression);
-  auto offset_code = GetOffsetCode();
-  auto compute_code = GetComputeCode(expression);
-  auto cuda_kernel = const_kernel_start + declaration_code + const_kernel_mid +
-                     offset_code + compute_code + const_kernel_end;
+std::string CodeGenerator::GenerateCode(TemplateVariable template_var) {
+  auto cuda_kernel = kernel_function + code_template_.Format(template_var);
  return cuda_kernel;
 }
 }  // namespace ir
--- a/paddle/fluid/framework/ir/codegen.h
+++ b/paddle/fluid/framework/ir/codegen.h
@ -1,4 +1,4 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@ -20,16 +20,14 @@ namespace paddle {
 namespace framework {
 namespace ir {

-class CodeGen {
+class CodeGenerator {
 public:
-  std::string GetKernelCode(std::vector<OperationExpression> expression);
+  explicit CodeGenerator(CodeTemplate code_template);
+  std::string GenerateCode(TemplateVariable template_var);
+  // TODO(wangchao66) std::string GenerateCode(const Graph& graph)

 private:
-  std::string GetDeclarationCode(
-      std::vector<paddle::framework::ir::OperationExpression> expression);
-  std::string GetOffsetCode();
-  std::string GetComputeCode(
-      std::vector<paddle::framework::ir::OperationExpression> expression);
+  CodeTemplate code_template_;
 };
 }  // namespace ir
 }  // namespace framework
--- a/paddle/fluid/framework/ir/codegen_helper.cc
+++ b/paddle/fluid/framework/ir/codegen_helper.cc
@ -21,41 +21,46 @@ namespace framework {
 namespace ir {

 OperationExpression::OperationExpression(std::vector<int> input_ids,
-                                         int output_id,
-                                         std::string search_operation) {
+                                         int output_id, std::string op) {
  input_ids_ = input_ids;
  output_id_ = output_id;
-  search_operation_ = search_operation;
+  op_ = op;
 }

+std::string OperationExpression::GetRHSTemplate() {
+  std::stringstream ret;
+  std::string rhs_end = ";";
+  auto rhs = support_table[op_];
+  for (size_t i = 0; i < input_ids_.size(); i++) {
+    auto replaced_str = replaced_element_in_order[i];
+    auto pos = rhs.find(replaced_str);
+    auto index = input_ids_[i];
+    rhs.replace(pos, replaced_str.length(), std::to_string(index) + R"([idx])");
+  }
+  ret << rhs << rhs_end;
+  return ret.str();
+}
+
+std::string OperationExpression::GetLHSTemplate() {
+  std::stringstream ret;
+  ret << "var" << output_id_ << R"([idx] = )";
+  return ret.str();
+}
+
+bool OperationExpression::SupportState() {
+  return (support_table.find(op_) == support_table.end());
+}
 // we Traverse the graph and get the group , all input id and output id is
 // unique for the node which belong the group
 std::string OperationExpression::GetExpression() {
  std::stringstream ret;
-  if (operator_cuda_table.find(search_operation_) ==
-      operator_cuda_table.end()) {
-    std::cerr << "Not supportted operation, " << search_operation_ << std::endl;
-  } else {
-    auto rhs = operator_cuda_table[search_operation_];
-    std::string replaced_str = "$";
-    int count = 0;
-    auto pos = rhs.find(replaced_str);
-    while (pos != -1) {
-      auto index = input_ids_[count];
-      rhs.replace(pos, replaced_str.length(),
-                  std::to_string(index) + R"([offset])");
-      pos = rhs.find(replaced_str);
-      count++;
-    }
-    auto lhs = std::string(indentation) + "var" + std::to_string(output_id_) +
-               R"([offset])";
-    auto equal_split = R"( = )";
-    auto semicolon = R"(;)";
-    ret << lhs << equal_split << rhs << semicolon << std::endl;
+  if (!SupportState()) {
+    ret << GetLHSTemplate() << GetRHSTemplate();
  }

  return ret.str();
 }
+
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/fluid/framework/ir/codegen_helper.h
+++ b/paddle/fluid/framework/ir/codegen_helper.h
--- a/paddle/fluid/framework/ir/codegen_test.cc
+++ b/paddle/fluid/framework/ir/codegen_test.cc
@ -1,43 +1,140 @@
-// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
+/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.

+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
 #include "paddle/fluid/framework/ir/codegen.h"
 #include <gtest/gtest.h>
+#include <cmath>
 #include <string>
 #include <vector>
 #include "paddle/fluid/framework/ir/codegen_helper.h"
+#include "paddle/fluid/framework/lod_tensor.h"
+#include "paddle/fluid/operators/math.h"
+#include "paddle/fluid/platform/device_code.h"
+#include "paddle/fluid/platform/init.h"
 #ifdef PADDLE_WITH_CUDA
+
 TEST(codegen, cuda) {
  std::vector<int> mul_input{1, 2};
  std::vector<int> add_input{3, 4};
-  std::vector<int> sigmod_input{5};
+  std::vector<int> sub_input{5, 6};
+  std::vector<int> relu_input{7};
+  std::vector<int> sigmoid_input{8};
+
  int mul_out = 3;
  int add_out = 5;
-  int sigmod_out = 6;
+  int sub_out = 7;
+  int relu_out = 8;
+  int sigmoid_out = 9;

  std::string op1 = "elementwise_mul";
  std::string op2 = "elementwise_add";
-  std::string op3 = "sigmoid";
+  std::string op3 = "elementwise_sub";
+  std::string op4 = "relu";
+  std::string op5 = "sigmoid";
  paddle::framework::ir::OperationExpression opexp1(mul_input, mul_out, op1);
  paddle::framework::ir::OperationExpression opexp2(add_input, add_out, op2);
-  paddle::framework::ir::OperationExpression opexp3(sigmod_input, sigmod_out,
-                                                    op3);
+  paddle::framework::ir::OperationExpression opexp3(sub_input, sub_out, op3);
+  paddle::framework::ir::OperationExpression opexp4(relu_input, relu_out, op4);
+  paddle::framework::ir::OperationExpression opexp5(sigmoid_input, sigmoid_out,
+                                                    op5);

  std::vector<paddle::framework::ir::OperationExpression> fused_op = {
-      opexp1, opexp2, opexp3};
-  paddle::framework::ir::CodeGen codegen;
-  std::string result = codegen.GetKernelCode(fused_op);
-  std::cout << result << std::endl;
+      opexp1, opexp2, opexp3, opexp4, opexp5};
+  paddle::framework::ir::CodeTemplate code_template(
+      paddle::framework::ir::kernel_elementwise_template);
+  paddle::framework::ir::CodeGenerator codegen(code_template);
+  paddle::framework::ir::TemplateVariable template_var;
+  template_var.Add("$name", EmitUniqueName(fused_op));
+  template_var.Add("$parameter", EmitDeclarationCode(fused_op, "float"));
+  template_var.Add("$compute", EmitComputeCode(fused_op));
+  std::string saxpy_code = codegen.GenerateCode(template_var);
+
+  std::cout << saxpy_code << std::endl;
+  paddle::framework::InitDevices(false, {0});
+  paddle::platform::CUDAPlace place = paddle::platform::CUDAPlace(0);
+  paddle::platform::CUDADeviceCode code(place, EmitUniqueName(fused_op),
+                                        saxpy_code);
+
+  paddle::framework::Tensor cpu_a;
+  paddle::framework::Tensor cpu_b;
+  paddle::framework::Tensor cpu_c;
+  paddle::framework::Tensor cpu_d;
+  paddle::framework::Tensor cpu_e;
+  paddle::framework::Tensor cpu_f;
+  paddle::framework::Tensor cpu_g;
+  paddle::framework::Tensor cpu_h;
+  paddle::framework::Tensor cpu_o;
+
+  auto dims = paddle::framework::make_ddim(
+      {static_cast<int64_t>(256), static_cast<int64_t>(1024)});
+  cpu_a.mutable_data<float>(dims, paddle::platform::CPUPlace());
+  cpu_b.mutable_data<float>(dims, paddle::platform::CPUPlace());
+  cpu_c.mutable_data<float>(dims, paddle::platform::CPUPlace());
+  cpu_d.mutable_data<float>(dims, paddle::platform::CPUPlace());
+  cpu_e.mutable_data<float>(dims, paddle::platform::CPUPlace());
+  cpu_f.mutable_data<float>(dims, paddle::platform::CPUPlace());
+  cpu_g.mutable_data<float>(dims, paddle::platform::CPUPlace());
+  cpu_o.mutable_data<float>(dims, paddle::platform::CPUPlace());
+
+  size_t n = cpu_a.numel();
+  for (size_t i = 0; i < n; ++i) {
+    cpu_a.data<float>()[i] = static_cast<float>(i);
+  }
+  for (size_t i = 0; i < n; ++i) {
+    cpu_b.data<float>()[i] = static_cast<float>(0.5);
+    cpu_d.data<float>()[i] = static_cast<float>(10.0);
+    cpu_f.data<float>()[i] = static_cast<float>(0.0);
+  }
+
+  paddle::framework::Tensor a;
+  paddle::framework::Tensor b;
+  paddle::framework::Tensor c;
+  paddle::framework::Tensor d;
+  paddle::framework::Tensor e;
+  paddle::framework::Tensor f;
+  paddle::framework::Tensor g;
+  paddle::framework::Tensor h;
+  paddle::framework::Tensor o;
+
+  float* a_data = a.mutable_data<float>(dims, place);
+  float* b_data = b.mutable_data<float>(dims, place);
+  float* c_data = c.mutable_data<float>(dims, place);
+  float* d_data = d.mutable_data<float>(dims, place);
+  float* e_data = e.mutable_data<float>(dims, place);
+  float* f_data = f.mutable_data<float>(dims, place);
+  float* g_data = g.mutable_data<float>(dims, place);
+  float* h_data = h.mutable_data<float>(dims, place);
+  float* o_data = o.mutable_data<float>(dims, place);
+
+  TensorCopySync(cpu_a, place, &a);
+  TensorCopySync(cpu_b, place, &b);
+  TensorCopySync(cpu_d, place, &d);
+  TensorCopySync(cpu_f, place, &f);
+
+  code.Compile();
+
+  std::vector<void*> args = {&n,      &a_data, &b_data, &d_data, &f_data,
+                             &c_data, &e_data, &g_data, &h_data, &o_data};
+  code.SetNumThreads(1024);
+  code.SetWorkloadPerThread(1);
+  code.Launch(n, &args);
+
+  TensorCopySync(o, paddle::platform::CPUPlace(), &cpu_o);
+  for (size_t i = 0; i < n; i++) {
+    float result =
+        (1.0 / (1.0 + std::exp(-std::max(
+                          0.0, static_cast<float>(i) * 0.5 + 10.0 - 0.0))));
+    PADDLE_ENFORCE_EQ(cpu_o.data<float>()[i], result);
+  }
 }
 #endif