Fix sparse training for trainer_count=1 (#204)

* Fix sparse training for trainer_count=1

For trainer_count=1, the gradient machine is NeuralNetwork, which does not create parameter buf for PARAMETER_GRADIENT for sparse update in Parameter::enableType. But gradient parameter buf is still used in SgdThreadUpdater.

* Minor update to comment
avx_docs
emailweixu 8 years ago committed by Yu Yang
parent b22e50ede3
commit 28bc05b126

@ -75,7 +75,6 @@ class ChunkEvaluator : public Evaluator {
public: public:
virtual void init(const EvaluatorConfig& config) { virtual void init(const EvaluatorConfig& config) {
CHECK(!FLAGS_use_gpu) << "Not supported";
Evaluator::init(config); Evaluator::init(config);
if (config.chunk_scheme() == "IOB") { if (config.chunk_scheme() == "IOB") {
numTagTypes_ = 2; numTagTypes_ = 2;
@ -137,6 +136,7 @@ public:
CHECK_EQ(arguments.size(), (size_t)2); CHECK_EQ(arguments.size(), (size_t)2);
IVectorPtr& output = arguments[0].ids; IVectorPtr& output = arguments[0].ids;
IVectorPtr& label = arguments[1].ids; IVectorPtr& label = arguments[1].ids;
CHECK(!output->useGpu() && !label->useGpu()) << "Not supported";
auto sequenceStartPositions = auto sequenceStartPositions =
arguments[1].sequenceStartPositions->getVector(false); arguments[1].sequenceStartPositions->getVector(false);
CHECK_EQ(output->getSize(), label->getSize()); CHECK_EQ(output->getSize(), label->getSize());

@ -813,7 +813,6 @@ void TrainerThread::mergeGradSparse(
para->getMat(PARAMETER_GRADIENT).get()); para->getMat(PARAMETER_GRADIENT).get());
std::vector<uint32_t>& ids = mainMat->getIds(threadId_); std::vector<uint32_t>& ids = mainMat->getIds(threadId_);
ids.clear();
for (auto slaveParams : slaveParameters) { for (auto slaveParams : slaveParameters) {
SparseRowCpuMatrix* mat = SparseRowCpuMatrix* mat =
dynamic_cast<SparseRowCpuMatrix*>((*slaveParams)[pid] dynamic_cast<SparseRowCpuMatrix*>((*slaveParams)[pid]

@ -146,6 +146,12 @@ public:
} }
} }
void enableBufType(ParameterType type) {
if (bufs_[type]) return;
bufs_[type] = Vector::createParallelVector(config_.size(), useGpu_);
bufs_[type]->zeroMem();
}
void enableIntType(ParameterType type, size_t intStoreSize = 0) { void enableIntType(ParameterType type, size_t intStoreSize = 0) {
if (!intBufs_[type]) { if (!intBufs_[type]) {
SetDevice device(deviceId_); SetDevice device(deviceId_);

@ -20,6 +20,8 @@ limitations under the License. */
#include "paddle/math/SparseRowMatrix.h" #include "paddle/math/SparseRowMatrix.h"
#include "paddle/utils/Thread.h" #include "paddle/utils/Thread.h"
P_DECLARE_int32(trainer_count);
namespace paddle { namespace paddle {
SgdThreadUpdater::SgdThreadUpdater(const OptimizationConfig& optConfig) SgdThreadUpdater::SgdThreadUpdater(const OptimizationConfig& optConfig)
@ -48,6 +50,13 @@ void SgdThreadUpdater::init(std::vector<ParameterPtr>& parameters) {
false /*inPserver*/)); false /*inPserver*/));
size_t numRows = para->isGradSparseUpdate() ? para->getConfig().dims(0) : 0; size_t numRows = para->isGradSparseUpdate() ? para->getConfig().dims(0) : 0;
optimizers_[pid]->init(numRows, &para->getConfig()); optimizers_[pid]->init(numRows, &para->getConfig());
if (para->isGradSparseUpdate() && FLAGS_trainer_count == 1) {
// For trainer_count=1, the gradient machine is NeuralNetwork, which does
// not create parameter buf for PARAMETER_GRADIENT for sparse update in
// Parameter::enableType(). But gradient parameter buf is still used
// in SgdThreadUpdater. We need to explicitly create it.
para->enableBufType(PARAMETER_GRADIENT);
}
} }
} }
@ -211,7 +220,7 @@ void SgdThreadUpdater::threadUpdateSparse(
// From MultiGradientMachine // From MultiGradientMachine
SparseRowIdsCpuMatrix* mainMat = dynamic_cast<SparseRowIdsCpuMatrix*>( SparseRowIdsCpuMatrix* mainMat = dynamic_cast<SparseRowIdsCpuMatrix*>(
para->getMat(PARAMETER_GRADIENT).get()); para->getMat(PARAMETER_GRADIENT).get());
const std::vector<uint32_t>& sparseIds = mainMat->getIds(tid); std::vector<uint32_t>& sparseIds = mainMat->getIds(tid);
for (auto id : sparseIds) { for (auto id : sparseIds) {
// setup sub bufs // setup sub bufs
@ -221,6 +230,7 @@ void SgdThreadUpdater::threadUpdateSparse(
optimizer->update(vecs, para->getConfig(), id); optimizer->update(vecs, para->getConfig(), id);
vecs[PARAMETER_GRADIENT]->zeroMem(); vecs[PARAMETER_GRADIENT]->zeroMem();
} }
sparseIds.clear();
} else if (dynamic_cast<SparseRowCpuMatrix*>( } else if (dynamic_cast<SparseRowCpuMatrix*>(
para->getMat(PARAMETER_GRADIENT).get())) { para->getMat(PARAMETER_GRADIENT).get())) {
// From NeuralNetwork // From NeuralNetwork
@ -246,6 +256,10 @@ void SgdThreadUpdater::threadUpdateSparse(
optimizer->update(vecs, para->getConfig(), id); optimizer->update(vecs, para->getConfig(), id);
vecs[PARAMETER_GRADIENT]->zeroMem(); vecs[PARAMETER_GRADIENT]->zeroMem();
} }
// For numThreads > 1, MultiGradientMachine is used, which goes
// to the above branch.
CHECK_EQ(numThreads, 1);
mainMat->clearIndices();
} else { } else {
auto & m = *para->getMat(PARAMETER_GRADIENT).get(); auto & m = *para->getMat(PARAMETER_GRADIENT).get();
LOG(FATAL) << "Internal error: " << para->getName() << " " LOG(FATAL) << "Internal error: " << para->getName() << " "

@ -191,7 +191,7 @@ void installFailureWriter(void(*callback)(const char*, int));
} }
#endif // PADDLE_USE_GLOG #endif // PADDLE_USE_GLOG
#ifdef NDEBUG #ifndef NDEBUG
#define DEBUG_LEVEL 5 #define DEBUG_LEVEL 5
#define DBG VLOG(DEBUG_LEVEL) #define DBG VLOG(DEBUG_LEVEL)
#else #else

Loading…
Cancel
Save