This reverts commit d834f4e6e8.
			
			
				revert-31068-fix_conv3d_windows
			
			
		
							parent
							
								
									7fbc68a2c0
								
							
						
					
					
						commit
						824a79d383
					
				
											
												
													File diff suppressed because it is too large
													Load Diff
												
											
										
									
								
											
												
													File diff suppressed because it is too large
													Load Diff
												
											
										
									
								@ -1,229 +0,0 @@
 | 
				
			||||
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 | 
				
			||||
 | 
				
			||||
Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||
you may not use this file except in compliance with the License.
 | 
				
			||||
You may obtain a copy of the License at
 | 
				
			||||
 | 
				
			||||
    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||
 | 
				
			||||
Unless required by applicable law or agreed to in writing, software
 | 
				
			||||
distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||
See the License for the specific language governing permissions and
 | 
				
			||||
limitations under the License. */
 | 
				
			||||
 | 
				
			||||
#include "paddle/fluid/platform/mkldnn_reuse.h"
 | 
				
			||||
 | 
				
			||||
namespace paddle {
 | 
				
			||||
namespace operators {
 | 
				
			||||
 | 
				
			||||
using paddle::framework::LoDTensor;
 | 
				
			||||
using paddle::framework::Tensor;
 | 
				
			||||
using paddle::platform::CPUDeviceContext;
 | 
				
			||||
using paddle::platform::CreateKey;
 | 
				
			||||
using paddle::platform::MKLDNNGetDataType;
 | 
				
			||||
using paddle::platform::MKLDNNMemDesc;
 | 
				
			||||
using platform::to_void_cast;
 | 
				
			||||
 | 
				
			||||
template <typename T, typename T_alg, typename T_out = T>
 | 
				
			||||
class RNNMKLDNNHandler : public platform::MKLDNNHandlerT<T, T_alg> {
 | 
				
			||||
 public:
 | 
				
			||||
  RNNMKLDNNHandler(const paddle::framework::ExecutionContext& ctx,
 | 
				
			||||
                   const platform::MKLDNNDeviceContext& dev_ctx,
 | 
				
			||||
                   const mkldnn::engine mkldnn_engine,
 | 
				
			||||
                   platform::Place cpu_place, const LoDTensor* input,
 | 
				
			||||
                   const Tensor* weight_h, const Tensor* h0,
 | 
				
			||||
                   const bool is_reverse, const int64_t N, const int64_t Ti,
 | 
				
			||||
                   const int64_t IC, const int64_t OC, const int64_t G,
 | 
				
			||||
                   const std::string& unique_name)
 | 
				
			||||
      : platform::MKLDNNHandlerT<T, T_alg>(
 | 
				
			||||
            dev_ctx, dev_ctx.GetEngine(), cpu_place,
 | 
				
			||||
            CreateKey(dev_ctx, unique_name, MKLDNNGetDataType<T>(), Ti)),
 | 
				
			||||
        N(N),
 | 
				
			||||
        Ti(Ti),
 | 
				
			||||
        IC(IC),
 | 
				
			||||
        OC(OC),
 | 
				
			||||
        G(G) {
 | 
				
			||||
    // Create memory key without Ti because weights, bias and h0 memories
 | 
				
			||||
    // do not depend on Ti size but primitive and input/output memory do
 | 
				
			||||
    memory_key_ = platform::ExtendKeyWithThreadInfoIfNeeded(
 | 
				
			||||
        dev_ctx, CreateKey(dev_ctx, unique_name, MKLDNNGetDataType<T>()));
 | 
				
			||||
 | 
				
			||||
    // Is it int8 kernel
 | 
				
			||||
    const bool is_INT8 = std::is_same<T, uint8_t>::value;
 | 
				
			||||
 | 
				
			||||
    if (is_INT8) {
 | 
				
			||||
      // Int8 attributes
 | 
				
			||||
      const float scale_data = ctx.Attr<float>("Scale_data");
 | 
				
			||||
      const float shift_data = ctx.Attr<float>("Shift_data");
 | 
				
			||||
      const auto scale_weights = ctx.Attr<std::vector<float>>("Scale_weights");
 | 
				
			||||
 | 
				
			||||
      const int weights_scale_mask =
 | 
				
			||||
          0 +
 | 
				
			||||
          (1 << 3)  // bit, indicating the unique scales for `g` dim in `ldigo`
 | 
				
			||||
          +
 | 
				
			||||
          (1 << 4);  // bit, indicating the unique scales for `o` dim in `ldigo`
 | 
				
			||||
 | 
				
			||||
      attr_.set_rnn_data_qparams(scale_data, shift_data);
 | 
				
			||||
      attr_.set_rnn_weights_qparams(weights_scale_mask, scale_weights);
 | 
				
			||||
    }
 | 
				
			||||
  }
 | 
				
			||||
 | 
				
			||||
  bool is_NTC() {
 | 
				
			||||
    return (platform::GetMKLDNNFormat(this->fwd_pd_->dst_desc()) ==
 | 
				
			||||
            dnnl::memory::format_tag::ntc);
 | 
				
			||||
  }
 | 
				
			||||
 | 
				
			||||
  void reorderRNNdata(void* input_data, void* output_data,
 | 
				
			||||
                      std::vector<size_t> lod, const bool is_reverse,
 | 
				
			||||
                      platform::RNNReorderType reorder_type) {
 | 
				
			||||
    switch (reorder_type) {
 | 
				
			||||
      // Reorder input memory [WORDS, C] + LoD -> [N, T, C]
 | 
				
			||||
      case platform::RNNReorderType::PP_NTC: {
 | 
				
			||||
        auto* input_data_iter = reinterpret_cast<T*>(input_data);
 | 
				
			||||
        auto* output_data_iter = reinterpret_cast<T*>(output_data);
 | 
				
			||||
        for (int n = 0; n < N; ++n) {
 | 
				
			||||
          const auto num_elements = (lod[n + 1] - lod[n]) * IC;
 | 
				
			||||
          const auto offset = is_reverse ? (Ti * IC - num_elements) : 0;
 | 
				
			||||
          memcpy(output_data_iter + n * Ti * IC + offset, input_data_iter,
 | 
				
			||||
                 sizeof(T) * num_elements);
 | 
				
			||||
          input_data_iter += num_elements;
 | 
				
			||||
        }
 | 
				
			||||
      } break;
 | 
				
			||||
      // Reorder input memory [WORDS, C] + LoD -> [T, N, C]
 | 
				
			||||
      case platform::RNNReorderType::PP_TNC: {
 | 
				
			||||
        auto* input_data_iter = reinterpret_cast<T*>(input_data);
 | 
				
			||||
        auto* output_data_iter = reinterpret_cast<T*>(output_data);
 | 
				
			||||
        for (int n = 0; n < N; ++n) {
 | 
				
			||||
          const auto num_elements = (lod[n + 1] - lod[n]);
 | 
				
			||||
          const auto offset = is_reverse ? (Ti - num_elements) : 0;
 | 
				
			||||
          for (size_t t = 0; t < num_elements; ++t) {
 | 
				
			||||
            memcpy(output_data_iter + (t + offset) * N * IC + n * IC,
 | 
				
			||||
                   input_data_iter, sizeof(T) * IC);
 | 
				
			||||
            input_data_iter += IC;
 | 
				
			||||
          }
 | 
				
			||||
        }
 | 
				
			||||
      } break;
 | 
				
			||||
      // Reorder output values to PP format [N, T, C] -> [WORDS, C]
 | 
				
			||||
      case platform::RNNReorderType::NTC_PP: {
 | 
				
			||||
        auto* input_data_iter = reinterpret_cast<T_out*>(input_data);
 | 
				
			||||
        auto* output_data_iter = reinterpret_cast<T_out*>(output_data);
 | 
				
			||||
        for (int n = 0; n < N; ++n) {
 | 
				
			||||
          const auto num_elements = (lod[n + 1] - lod[n]) * OC;
 | 
				
			||||
          const auto offset = is_reverse ? (Ti * OC - num_elements) : 0;
 | 
				
			||||
          memcpy(output_data_iter, input_data_iter + n * Ti * OC + offset,
 | 
				
			||||
                 sizeof(T_out) * num_elements);
 | 
				
			||||
          output_data_iter += num_elements;
 | 
				
			||||
        }
 | 
				
			||||
      } break;
 | 
				
			||||
      // Reorder output values to PP format [T, N, C] -> [WORDS, C]
 | 
				
			||||
      case platform::RNNReorderType::TNC_PP: {
 | 
				
			||||
        auto* input_data_iter = reinterpret_cast<T_out*>(input_data);
 | 
				
			||||
        auto* output_data_iter = reinterpret_cast<T_out*>(output_data);
 | 
				
			||||
        for (int n = 0; n < N; ++n) {
 | 
				
			||||
          const auto num_elements = lod[n + 1] - lod[n];
 | 
				
			||||
          const auto offset = is_reverse ? (Ti - num_elements) : 0;
 | 
				
			||||
          for (size_t t = 0; t < num_elements; ++t) {
 | 
				
			||||
            memcpy(output_data_iter,
 | 
				
			||||
                   input_data_iter + (t + offset) * N * OC + n * OC,
 | 
				
			||||
                   sizeof(T_out) * OC);
 | 
				
			||||
            output_data_iter += OC;
 | 
				
			||||
          }
 | 
				
			||||
        }
 | 
				
			||||
      } break;
 | 
				
			||||
    }
 | 
				
			||||
  }
 | 
				
			||||
 | 
				
			||||
  std::shared_ptr<dnnl::memory> AcquireInputMemoryWithReorder(
 | 
				
			||||
      const LoDTensor* input, const bool is_reverse) {
 | 
				
			||||
    const auto name = this->key_ + "@input_mem";
 | 
				
			||||
    auto memory_p =
 | 
				
			||||
        std::static_pointer_cast<dnnl::memory>(this->dev_ctx_.GetBlob(name));
 | 
				
			||||
 | 
				
			||||
    if (!memory_p) {
 | 
				
			||||
      memory_p = std::make_shared<dnnl::memory>(this->fwd_pd_->src_desc(),
 | 
				
			||||
                                                this->engine_);
 | 
				
			||||
      this->dev_ctx_.SetBlob(name, memory_p);
 | 
				
			||||
    }
 | 
				
			||||
 | 
				
			||||
    const auto& input_lod = input->lod()[0];
 | 
				
			||||
    auto* x_data = to_void_cast(input->data<T>());
 | 
				
			||||
 | 
				
			||||
    auto* x_onednn_data = memory_p->get_data_handle();
 | 
				
			||||
    memset(x_onednn_data, 0, sizeof(T) * N * Ti * IC);
 | 
				
			||||
 | 
				
			||||
    if (platform::GetMKLDNNFormat(this->fwd_pd_->src_desc()) ==
 | 
				
			||||
        dnnl::memory::format_tag::ntc) {
 | 
				
			||||
      reorderRNNdata(x_data, x_onednn_data, input_lod, is_reverse,
 | 
				
			||||
                     platform::RNNReorderType::PP_NTC);
 | 
				
			||||
    } else {
 | 
				
			||||
      reorderRNNdata(x_data, x_onednn_data, input_lod, is_reverse,
 | 
				
			||||
                     platform::RNNReorderType::PP_TNC);
 | 
				
			||||
    }
 | 
				
			||||
    return memory_p;
 | 
				
			||||
  }
 | 
				
			||||
 | 
				
			||||
  std::shared_ptr<dnnl::memory> AcquireOutputMemory() {
 | 
				
			||||
    const auto name = this->key_ + "@output_mem";
 | 
				
			||||
    auto memory_p =
 | 
				
			||||
        std::static_pointer_cast<dnnl::memory>(this->dev_ctx_.GetBlob(name));
 | 
				
			||||
 | 
				
			||||
    if (!memory_p) {
 | 
				
			||||
      memory_p = std::make_shared<dnnl::memory>(this->fwd_pd_->dst_desc(),
 | 
				
			||||
                                                this->engine_);
 | 
				
			||||
      this->dev_ctx_.SetBlob(name, memory_p);
 | 
				
			||||
    }
 | 
				
			||||
    return memory_p;
 | 
				
			||||
  }
 | 
				
			||||
 | 
				
			||||
  // TODO(grygielski) H0 is for now persistable
 | 
				
			||||
  // TODO(jczaja) H0 should be updated each iter and of T type (Fusion pass does
 | 
				
			||||
  // not support in yet)
 | 
				
			||||
  std::shared_ptr<dnnl::memory> AcquireH0Memory(const Tensor* h0) {
 | 
				
			||||
    const std::string h0_key = memory_key_ + "@h0";
 | 
				
			||||
    auto memory_p =
 | 
				
			||||
        std::static_pointer_cast<dnnl::memory>(this->dev_ctx_.GetBlob(h0_key));
 | 
				
			||||
 | 
				
			||||
    if (!memory_p) {
 | 
				
			||||
      auto user_h0_memory = dnnl::memory();
 | 
				
			||||
      if (h0) {
 | 
				
			||||
        user_h0_memory =
 | 
				
			||||
            dnnl::memory({{1, 1, N, OC},
 | 
				
			||||
                          MKLDNNGetDataType<float>(),
 | 
				
			||||
                          MKLDNNMemoryFormat::ldnc},
 | 
				
			||||
                         this->engine_, to_void_cast(h0->data<float>()));
 | 
				
			||||
      } else {
 | 
				
			||||
        user_h0_memory = dnnl::memory({{1, 1, N, OC},
 | 
				
			||||
                                       MKLDNNGetDataType<float>(),
 | 
				
			||||
                                       MKLDNNMemoryFormat::ldnc},
 | 
				
			||||
                                      this->engine_);
 | 
				
			||||
        memset(user_h0_memory.get_data_handle(), 0, sizeof(float) * N * OC);
 | 
				
			||||
      }
 | 
				
			||||
      memory_p = std::make_shared<dnnl::memory>(this->fwd_pd_->src_iter_desc(),
 | 
				
			||||
                                                this->engine_);
 | 
				
			||||
 | 
				
			||||
      dnnl::stream astream(this->engine_);
 | 
				
			||||
      dnnl::reorder(user_h0_memory, *memory_p, attr_)
 | 
				
			||||
          .execute(astream, user_h0_memory, *memory_p);
 | 
				
			||||
 | 
				
			||||
      this->dev_ctx_.SetBlob(h0_key, memory_p);
 | 
				
			||||
    }
 | 
				
			||||
    return memory_p;
 | 
				
			||||
  }
 | 
				
			||||
 | 
				
			||||
 protected:
 | 
				
			||||
  // RNN dimensions
 | 
				
			||||
  // N - Batch Size
 | 
				
			||||
  // Ti - Max sentence length
 | 
				
			||||
  // IC - Input Channels
 | 
				
			||||
  // OC - Output Channels
 | 
				
			||||
  // G  - Number of gates
 | 
				
			||||
  const int64_t N, Ti, IC, OC, G;
 | 
				
			||||
 | 
				
			||||
  // Memory size of weights, bias and h0 does not depend
 | 
				
			||||
  // on Ti size, thus we need another key to cache them
 | 
				
			||||
  std::string memory_key_;
 | 
				
			||||
  dnnl::primitive_attr attr_;
 | 
				
			||||
};
 | 
				
			||||
}  // namespace operators
 | 
				
			||||
}  // namespace paddle
 | 
				
			||||
@ -1,81 +0,0 @@
 | 
				
			||||
#   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 | 
				
			||||
#
 | 
				
			||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||
# you may not use this file except in compliance with the License.
 | 
				
			||||
# You may obtain a copy of the License at
 | 
				
			||||
#
 | 
				
			||||
#     http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||
#
 | 
				
			||||
# Unless required by applicable law or agreed to in writing, software
 | 
				
			||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||
# See the License for the specific language governing permissions and
 | 
				
			||||
# limitations under the License.
 | 
				
			||||
 | 
				
			||||
import unittest
 | 
				
			||||
import numpy as np
 | 
				
			||||
from paddle.fluid.tests.unittests.test_fusion_lstm_op import TestFusionLSTMOp
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
class TestFusionLSTMONEDNNOp(TestFusionLSTMOp):
 | 
				
			||||
    def set_conf(self):
 | 
				
			||||
        self.use_mkldnn = True
 | 
				
			||||
 | 
				
			||||
    def test_check_output(self):
 | 
				
			||||
        for use_seq in {True, False}:
 | 
				
			||||
            self.attrs['use_seq'] = use_seq
 | 
				
			||||
            self.check_output(check_dygraph=False, no_check_set=["Cell"])
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
class TestFusionLSTMONEDNNOpReverse(TestFusionLSTMONEDNNOp):
 | 
				
			||||
    def set_conf(self):
 | 
				
			||||
        self.is_reverse = True
 | 
				
			||||
        self.use_mkldnn = True
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
class TestFusionLSTMONEDNNOpInitReverse(TestFusionLSTMONEDNNOp):
 | 
				
			||||
    def set_conf(self):
 | 
				
			||||
        self.has_initial_state = True
 | 
				
			||||
        self.is_reverse = True
 | 
				
			||||
        self.use_mkldnn = True
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
class TestFusionLSTMONEDNNOpMD1(TestFusionLSTMONEDNNOp):
 | 
				
			||||
    def set_conf(self):
 | 
				
			||||
        self.M = 36
 | 
				
			||||
        self.D = 8
 | 
				
			||||
        self.use_mkldnn = True
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
class TestFusionLSTMONEDNNOpMD2(TestFusionLSTMONEDNNOp):
 | 
				
			||||
    def set_conf(self):
 | 
				
			||||
        self.M = 8
 | 
				
			||||
        self.D = 8
 | 
				
			||||
        self.use_mkldnn = True
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
class TestFusionLSTMONEDNNOpMD3(TestFusionLSTMONEDNNOp):
 | 
				
			||||
    def set_conf(self):
 | 
				
			||||
        self.M = 15
 | 
				
			||||
        self.D = 3
 | 
				
			||||
        self.use_mkldnn = True
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
class TestFusionLSTMONEDNNOpBS1(TestFusionLSTMONEDNNOp):
 | 
				
			||||
    def set_conf(self):
 | 
				
			||||
        self.lod = [[3]]
 | 
				
			||||
        self.D = 16
 | 
				
			||||
        self.use_mkldnn = True
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
class TestFusionLSTMONEDNNOpPeepholesInit(TestFusionLSTMONEDNNOp):
 | 
				
			||||
    def set_conf(self):
 | 
				
			||||
        self.use_peepholes = True
 | 
				
			||||
        self.has_initial_state = True
 | 
				
			||||
        self.use_mkldnn = True
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
if __name__ == '__main__':
 | 
				
			||||
    from paddle import enable_static
 | 
				
			||||
    enable_static()
 | 
				
			||||
    unittest.main()
 | 
				
			||||
					Loading…
					
					
				
		Reference in new issue