Merge pull request #3337 from tensor-tang/merge

Enable mkldnn_fc for general format
8 years ago · 2e87d747bd
parent f5812541fb 2d4c66d4b2
commit 2e87d747bd
14 changed files with 1213 additions and 3 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -144,7 +144,7 @@ if(WITH_GPU)
 endif(WITH_GPU)

 if(WITH_MKLDNN)
-    list(APPEND EXTERNAL_LIBS ${MKLDNN_LIBRARY} ${MKLDNN_IOMP_LIB})
+    list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB} ${MKLDNN_IOMP_LIB})
 endif()

 if(USE_NNPACK)
--- a/paddle/gserver/CMakeLists.txt
+++ b/paddle/gserver/CMakeLists.txt
@ -23,6 +23,17 @@ endmacro()

 filter_test(GSERVER_HEADER)
 filter_test(GSERVER_SOURCES)
+
+if(NOT WITH_MKLDNN)
+    file(GLOB_RECURSE DNN_HEADER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.h")
+    file(GLOB_RECURSE DNN_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.cpp")
+    list(REMOVE_ITEM GSERVER_HEADER ${DNN_HEADER})
+    list(REMOVE_ITEM GSERVER_SOURCES ${DNN_SOURCES})
+    message(STATUS "Skip compiling with MKLDNNLayers and MKLDNNActivations")
+else()
+    message(STATUS "Compile with MKLDNNLayers and MKLDNNActivations")
+endif()
+
 if(NOT WITH_GPU)
    list(REMOVE_ITEM GSERVER_HEADER
        layers/CudnnConvBaseLayer.h
--- a/paddle/gserver/layers/MKLDNNBase.h
+++ b/paddle/gserver/layers/MKLDNNBase.h
@ -0,0 +1,97 @@
+/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "mkldnn.hpp"
+
+namespace paddle {
+
+typedef enum {
+  MKLDNN_BASE = 1,   // basical info of MKLDNN
+  MKLDNN_TESTS = 1,  // gtest info of MKLDNN
+  MKLDNN_SIZES = 2,  // size info of MKLDNN
+  MKLDNN_FMTS = 3,   // format info of MKLDNN
+  MKLDNN_ALL = 4,    // show all info of MKLDNN
+} MKLDNN_LOG_LEVEL;
+
+/**
+ * @brief MKLDNN CPU engine.
+ *
+ */
+class CPUEngine {
+public:
+  static CPUEngine& Instance() {
+    // Thread-safe in C++11.
+    static CPUEngine myInstance;
+    return myInstance;
+  }
+
+  // Disallow copy or move
+  CPUEngine(const CPUEngine&) = delete;             // Copy constructor
+  CPUEngine(CPUEngine&&) = delete;                  // Move constructor
+  CPUEngine& operator=(const CPUEngine&) = delete;  // Copy assignment
+  CPUEngine& operator=(CPUEngine&&) = delete;       // Move assignment
+
+  mkldnn::engine& getEngine() { return cpuEngine_; }
+
+protected:
+  CPUEngine() : cpuEngine_(mkldnn::engine::cpu, 0) {}
+  //    CPUEngine() : cpuEngine_(mkldnn::engine::cpu_lazy, 0) {}
+  ~CPUEngine() {}
+
+private:
+  mkldnn::engine cpuEngine_;
+};
+
+/**
+ * @brief MKLDNN Stream.
+ *
+ */
+class MKLDNNStream {
+public:
+  MKLDNNStream() : ready_(false) { resetState(); }
+
+  virtual ~MKLDNNStream() {}
+
+  /**
+   * @brief Submit stream
+   * @param prims The primitives vector
+   * @param block Waiting for the stream to complete
+   */
+  void submit(std::vector<mkldnn::primitive>& prims, bool block = true) {
+    resetState();
+    stream_->submit(prims).wait(block);
+    ready_ = false;
+  }
+
+  /**
+   * @brief Reset the mkldnn stream
+   */
+  void resetState() {
+    if (ready_) {
+      return;
+    }
+    // TODO(TJ): change me when mkldnn have method to reset this state
+    // stream_.reset(new mkldnn::stream(mkldnn::stream::kind::lazy));
+    stream_.reset(new mkldnn::stream(mkldnn::stream::kind::eager));
+    ready_ = true;
+  }
+
+private:
+  bool ready_;
+  std::shared_ptr<mkldnn::stream> stream_;
+};
+
+}  // namespace paddle
--- a/paddle/gserver/layers/MKLDNNFcLayer.cpp
+++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp
--- a/paddle/gserver/layers/MKLDNNFcLayer.h
+++ b/paddle/gserver/layers/MKLDNNFcLayer.h
@ -0,0 +1,80 @@
+/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "MKLDNNLayer.h"
+#include "mkldnn.hpp"
+
+namespace paddle {
+
+/**
+ * @brief A subclass of MKLDNNLayer fc layer.
+ *
+ * The config file api is mkldnn_fc
+ */
+class MKLDNNFcLayer : public MKLDNNLayer {
+protected:
+  // input layer size, can not be change after init
+  size_t iLayerSize_;  // == ic * ih * iw
+
+  // if has already init the weight
+  bool hasInitedWgt_;
+
+  // if input layer has image size info (ih>1 && iw>1)
+  bool hasSpatial_;
+
+  // fc weight and bias
+  std::unique_ptr<Weight> weight_;
+  std::unique_ptr<Weight> biases_;
+
+public:
+  explicit MKLDNNFcLayer(const LayerConfig& config)
+      : MKLDNNLayer(config), hasInitedWgt_(false), hasSpatial_(true) {}
+
+  ~MKLDNNFcLayer() {}
+
+  bool init(const LayerMap& layerMap,
+            const ParameterMap& parameterMap) override;
+
+  void convertWeightsFromPaddle() override;
+
+  void convertWeightsToPaddle() override;
+
+  void forward(PassType passType) override;
+
+  void backward(const UpdateCallback& callback) override;
+
+protected:
+  /**
+   * reshape the input image sizes
+   * and reset output buffer size
+   * and reset mkldnn forward
+   */
+  void reshape();
+
+  /**
+   * reset the forward primitve and memory
+   * only would be called when input size changes
+   */
+  void resetFwd();
+
+  /**
+   * reset the backward primitve and memory for mkldnn fc
+   * only would be called when needed
+   */
+  void resetBwd();
+};
+
+}  // namespace paddle
--- a/paddle/gserver/layers/MKLDNNLayer.h
+++ b/paddle/gserver/layers/MKLDNNLayer.h
@ -0,0 +1,132 @@
+/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <vector>
+#include "Layer.h"
+#include "MKLDNNBase.h"
+#include "mkldnn.hpp"
+
+DECLARE_bool(use_mkldnn);
+DECLARE_bool(use_mkldnn_wgt);
+
+namespace paddle {
+
+class MKLDNNLayer;
+typedef std::shared_ptr<MKLDNNLayer> MKLDNNLayerPtr;
+
+/**
+ * @brief Base class of MKLDNNlayer.
+ *
+ */
+class MKLDNNLayer : public Layer {
+protected:
+  // batch size
+  int bs_;
+  // input image channel, height and width
+  int ic_, ih_, iw_;
+  // output image channel, height and width
+  int oc_, oh_, ow_;
+
+  // backward also need reset after reset forward handle
+  bool needResetBwd_;
+
+  // mkldnn engine, stream and primivtives
+  mkldnn::engine engine_;
+  std::shared_ptr<MKLDNNStream> stream_;
+  std::shared_ptr<mkldnn::primitive> fwd_;
+  std::shared_ptr<mkldnn::primitive> bwdWgt_;
+  std::shared_ptr<mkldnn::primitive> bwdData_;
+  std::vector<mkldnn::primitive> pipelineFwd_;
+  std::vector<mkldnn::primitive> pipelineBwd_;
+
+  // TODO(TJ): change below memory as MKLDNNMatrixPtr type
+  std::shared_ptr<mkldnn::memory> inVal_;
+  std::shared_ptr<mkldnn::memory> inGrad_;
+  std::shared_ptr<mkldnn::memory> outVal_;
+  std::shared_ptr<mkldnn::memory> outGrad_;
+  std::shared_ptr<mkldnn::memory> wgtVal_;
+  std::shared_ptr<mkldnn::memory> wgtGrad_;
+  std::shared_ptr<mkldnn::memory> biasVal_;
+  std::shared_ptr<mkldnn::memory> biasGrad_;
+
+public:
+  explicit MKLDNNLayer(const LayerConfig& config)
+      : Layer(config),
+        bs_(0),
+        ic_(0),
+        ih_(0),
+        iw_(0),
+        oc_(0),
+        oh_(0),
+        ow_(0),
+        needResetBwd_(true),
+        engine_(mkldnn::engine::cpu, 0),
+        stream_(nullptr),
+        fwd_(nullptr),
+        bwdWgt_(nullptr),
+        bwdData_(nullptr) {}
+
+  ~MKLDNNLayer() {}
+
+  virtual bool init(const LayerMap& layerMap,
+                    const ParameterMap& parameterMap) {
+    if (!Layer::init(layerMap, parameterMap)) {
+      return false;
+    }
+
+    CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn."
+                            << "Please set WITH_MKLDNN=ON "
+                            << "and set use_mkldnn=True";
+    stream_.reset(new MKLDNNStream());
+    engine_ = CPUEngine::Instance().getEngine();
+
+    // TODO(TJ): deivecId
+    return true;
+  }
+
+  /**
+   * convert weight from paddle format to mkldnn format
+   * weight_ will be override
+   */
+  virtual void convertWeightsFromPaddle() {}
+
+  /**
+   * convert mkldnn weight to paddle format
+   * weight_ will be override
+   */
+  virtual void convertWeightsToPaddle() {}
+
+  /**
+   * print info about sizes
+   */
+  virtual void printSizeInfo() {
+    VLOG(MKLDNN_SIZES) << getName() << ": bs: " << bs_ << ", ic: " << ic_
+                       << ", ih: " << ih_ << ", iw: " << iw_ << ", oc: " << oc_
+                       << ", oh: " << oh_ << ", ow: " << ow_;
+  }
+
+  // TODO(TJ): move to MkldnnMatrix
+  // create memory desc
+  inline mkldnn::memory::desc createMD(
+      mkldnn::memory::dims dims,
+      mkldnn::memory::format fmt,
+      mkldnn::memory::data_type type = mkldnn::memory::data_type::f32) {
+    // TODO(TJ): isFmtSuppoted(fmt)
+    return mkldnn::memory::desc(dims, type, fmt);
+  }
+};
+
+}  // namespace paddle
--- a/paddle/gserver/tests/CMakeLists.txt
+++ b/paddle/gserver/tests/CMakeLists.txt
@ -18,6 +18,15 @@ add_unittest_without_exec(test_LayerGrad
 add_test(NAME test_LayerGrad
    COMMAND test_LayerGrad)

+########## test_Mkldnn layers and activations ##########
+if(WITH_MKLDNN)
+    add_unittest_without_exec(test_MKLDNN
+        test_MKLDNN.cpp
+        MKLDNNTester.cpp
+        LayerGradUtil.cpp)
+    add_test(NAME test_MKLDNN COMMAND test_MKLDNN)
+endif()
+
 ################ test_CRFLayerGrad ####################
 add_unittest_without_exec(test_CRFLayerGrad
    test_CRFLayerGrad.cpp
--- a/paddle/gserver/tests/MKLDNNTester.cpp
+++ b/paddle/gserver/tests/MKLDNNTester.cpp
--- a/paddle/gserver/tests/MKLDNNTester.h
+++ b/paddle/gserver/tests/MKLDNNTester.h
@ -0,0 +1,120 @@
+/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include "LayerGradUtil.h"
+#include "paddle/gserver/layers/MKLDNNBase.h"
+
+namespace paddle {
+
+/**
+ * @brief test the functionality of Mkldnnlayers
+ * refer to paddle original function
+ */
+class MKLDNNTester {
+  enum {
+    DNN = 0,  // MKLDNN layer
+    REF = 1,  // Reference layer
+    NUM = 2,  // Number of total
+  };
+
+protected:
+  std::vector<TestConfig> configs_;
+  vector<string> layerNames_;
+  vector<vector<DataLayerPtr>> dataLayers_;
+  vector<vector<Argument>> datas_;
+  vector<LayerMap> layerMaps_;
+  vector<vector<ParameterPtr>> parameters_;
+  vector<LayerPtr> testLayers_;
+  LayerPtr dnnLayer_, refLayer_;
+
+  /// run some iterations, all the result should pass
+  size_t iter_;
+  /// whether to print out the details
+  bool log_;
+  /// vlog level to print the matrix details datas
+  int lvl_;
+  /// epsilon
+  float eps_;
+  /// input image size, default 1
+  size_t ih_, iw_;
+
+public:
+  explicit MKLDNNTester(size_t iter = 3, float epsilon = 1e-4) {
+    iter_ = iter;
+    eps_ = epsilon;
+    log_ = false;
+    lvl_ = MKLDNN_ALL;
+  }
+
+  ~MKLDNNTester() {}
+
+public:
+  void run(const TestConfig& dnn,
+           const TestConfig& ref,
+           size_t batchSize,
+           size_t inputImgH = 1,
+           size_t inputImgW = 1,
+           size_t iter = 3,
+           float epsilon = 1e-4,
+           bool log = false,
+           int level = MKLDNN_ALL);
+  void setLogLevel(int lvl) { lvl_ = lvl; }
+
+private:
+  void reset(const TestConfig& dnn, const TestConfig& ref, size_t batchSize);
+  void setInputImgSize();
+  void runOnce();
+
+  void randomWgtDatas();
+  void randomBotDatas();
+  void randomTopDiffs();
+
+  void checkForward();
+  void checkBackwardData();
+  void checkBackwardWgts();
+
+  void clearWgtDiffs();
+  void clearBotDiffs();
+  void clearBotDiffs(int n);  // clear specific layer
+  void clearTopDatas();
+
+  void printTopDatas();
+  void printMatrix(const MatrixPtr& m);
+  void printVector(const VectorPtr& v);
+
+  void saveWgt(const vector<ParameterPtr>& from, vector<VectorPtr>& to);
+  void restoreWgt(const vector<VectorPtr>& from, vector<ParameterPtr>& to);
+
+  double compareMatrix(const MatrixPtr& m1, const MatrixPtr& m2);
+  double compareVector(const VectorPtr& v1, const VectorPtr& v2);
+
+  /**
+   * Get delta percent
+   * if many(>failRate) wrong(abs(dnn-ref)/abs(ref)>thres) points return the
+   * max(diff/ref)
+   * else return sum(abs(a-b)) / sum(abs(b))
+   * The return value should smaller than eps when passing.
+   */
+  double getDelta(const real* d1,
+                  const real* d2,
+                  size_t len,
+                  const float failRate = 1e-3,
+                  const float thres = 0.1);
+};
+
+}  //  namespace paddle
--- a/paddle/gserver/tests/test_MKLDNN.cpp
+++ b/paddle/gserver/tests/test_MKLDNN.cpp
@ -0,0 +1,76 @@
+/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <gtest/gtest.h>
+#include <string>
+#include <vector>
+#include "MKLDNNTester.h"
+#include "ModelConfig.pb.h"
+
+using namespace paddle;  // NOLINT
+
+DECLARE_bool(thread_local_rand_use_global_seed);
+DECLARE_bool(use_gpu);
+DECLARE_bool(use_mkldnn);
+
+struct testFCDesc {
+  int bs;
+  int ic;
+  int oc;
+  int ih, iw;  // oh == ow == 1
+};
+
+void testFcLayer(const testFCDesc& pm) {
+  const std::string compareTypes[] = {"mkldnn_fc", "fc"};
+  TestConfig cfg;
+  cfg.layerConfig.set_type(compareTypes[0]);
+  cfg.layerConfig.set_size(pm.oc);
+  cfg.inputDefs.push_back(
+      {INPUT_DATA,
+       "layer_0",
+       /* size of input layer= */ size_t(pm.ic * pm.ih * pm.iw),
+       /* size of weight= */ size_t(pm.oc * pm.ic * pm.ih * pm.iw)});
+  cfg.layerConfig.add_inputs();
+
+  MKLDNNTester tester;
+  for (auto biasSize : {pm.oc, 0}) {
+    cfg.biasSize = biasSize;
+    TestConfig ref = cfg;
+    ref.layerConfig.set_type(compareTypes[1]);
+    for (auto bs : {pm.bs, 1}) {
+      tester.run(cfg, ref, bs, pm.ih, pm.iw);
+    }
+  }
+}
+
+TEST(MKLDNNLayer, FcLayer) {
+  testFcLayer({/*bs*/ 2, /*ic*/ 2, /*oc*/ 3, /*ih*/ 1, /*iw*/ 1});
+  testFcLayer({/*bs*/ 3, /*ic*/ 7, /*oc*/ 19, /*ih*/ 1, /*iw*/ 1});
+  testFcLayer({/*bs*/ 8, /*ic*/ 16, /*oc*/ 32, /*ih*/ 13, /*iw*/ 13});
+  testFcLayer({/*bs*/ 4, /*ic*/ 12, /*oc*/ 18, /*ih*/ 13, /*iw*/ 11});
+  testFcLayer({/*bs*/ 2, /*ic*/ 64, /*oc*/ 32, /*ih*/ 16, /*iw*/ 16});
+  testFcLayer({/*bs*/ 15, /*ic*/ 3, /*oc*/ 6, /*ih*/ 16, /*iw*/ 16});
+}
+
+// TODO(TJ): add branch test
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  FLAGS_use_gpu = false;
+  FLAGS_use_mkldnn = true;
+  initMain(argc, argv);
+  FLAGS_thread_local_rand_use_global_seed = true;
+  srand(1);
+  return RUN_ALL_TESTS();
+}
--- a/paddle/trainer/TrainerConfigHelper.cpp
+++ b/paddle/trainer/TrainerConfigHelper.cpp
@ -28,6 +28,8 @@ DECLARE_bool(with_cost);
 DECLARE_bool(with_gpu);
 DECLARE_bool(parallel_nn);
 DECLARE_string(config_args);
+DECLARE_bool(use_mkldnn);
+DECLARE_bool(use_mkldnn_wgt);

 const char *kConfigParserModuleName = "paddle.trainer.config_parser";
 const char *kConfigParserFuncName = "parse_config_and_serialize";
@ -44,6 +46,8 @@ TrainerConfigHelper::TrainerConfigHelper(const std::string &configFilePath)
  configArgs << "trainer_id=" << FLAGS_trainer_id << ",local=" << FLAGS_local
             << ",with_cost=" << FLAGS_with_cost << ",use_gpu=" << FLAGS_use_gpu
             << ",parallel_nn=" << FLAGS_parallel_nn
+             << ",use_mkldnn=" << FLAGS_use_mkldnn
+             << ",use_mkldnn_wgt=" << FLAGS_use_mkldnn_wgt
             << ",cudnn_version=" << hl_get_cudnn_lib_version();
  if (!FLAGS_config_args.empty()) {
    configArgs << "," << FLAGS_config_args;
--- a/paddle/utils/Flags.cpp
+++ b/paddle/utils/Flags.cpp
@ -20,6 +20,14 @@ DEFINE_bool(use_gpu, false, "Only support CPU training");
 DEFINE_bool(use_gpu, true, "Whether to use GPU for training");
 #endif

+#ifdef PADDLE_USE_MKLDNN
+// TODO(TJ): change to true when MKLDNN layers support multi-inputs
+DEFINE_bool(use_mkldnn, false, "Default still keep use CPU training");
+#else
+DEFINE_bool(use_mkldnn, false, "Only support CPU training");
+#endif
+
+DEFINE_bool(use_mkldnn_wgt, false, "Init weight from CPU weight");
 DEFINE_bool(parallel_nn,
            false,
            "Whether to use multi-threads to calculate one neural network."
--- a/paddle/utils/Flags.h
+++ b/paddle/utils/Flags.h
@ -40,3 +40,5 @@ DECLARE_bool(show_layer_stat);
 DECLARE_string(predict_file);
 DECLARE_bool(prev_batch_state);
 DECLARE_string(init_model_path);
+DECLARE_bool(use_mkldnn);
+DECLARE_bool(use_mkldnn_wgt);
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@ -1604,6 +1604,8 @@ class MultiClassCrossEntropySelfNormCostLayer(LayerBase):

@config_layer('fc')
 class FCLayer(LayerBase):
+    layer_type = 'fc'
+
    def __init__(self,
                 name,
                 size,
@ -1611,14 +1613,27 @@ class FCLayer(LayerBase):
                 bias=True,
                 error_clipping_threshold=None,
                 **xargs):
-        super(FCLayer, self).__init__(name, 'fc', size, inputs=inputs, **xargs)
+        use_mkldnn = bool(int(g_command_config_args.get("use_mkldnn", 0)))
+        use_mkldnn_wgt = bool(
+            int(g_command_config_args.get("use_mkldnn_wgt", 0)))
+        if use_mkldnn:
+            self.layer_type = 'mkldnn_fc'
+            config_assert(
+                len(inputs) == 1,
+                "MkldnnFCLayer support one and only one input!")
+        super(FCLayer, self).__init__(
+            name, self.layer_type, size, inputs=inputs, **xargs)
        for input_index in xrange(len(self.inputs)):
            input_layer = self.get_input_layer(input_index)
            psize = self.config.size * input_layer.size
            dims = [input_layer.size, self.config.size]
            format = self.inputs[input_index].format
            sparse = format == "csr" or format == "csc"
-
+            if use_mkldnn:
+                config_assert(not sparse,
+                              "MkldnnFCLayer do not support sparse format yet")
+                if use_mkldnn_wgt:
+                    dims = [self.config.size, input_layer.size]
            if sparse:
                psize = self.inputs[input_index].nnz
            else:
@ -1631,6 +1646,11 @@ class FCLayer(LayerBase):
            self.config.error_clipping_threshold = error_clipping_threshold


+@config_layer('mkldnn_fc')
+class MkldnnFcLayer(FCLayer):
+    layer_type = 'mkldnn_fc'
+
+
@config_layer('selective_fc')
 class SelectiveFCLayer(LayerBase):
    def __init__(self,