You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							146 lines
						
					
					
						
							4.7 KiB
						
					
					
				
			
		
		
	
	
							146 lines
						
					
					
						
							4.7 KiB
						
					
					
				| /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 | |
| 
 | |
| Licensed under the Apache License, Version 2.0 (the "License");
 | |
| you may not use this file except in compliance with the License.
 | |
| You may obtain a copy of the License at
 | |
| 
 | |
|     http://www.apache.org/licenses/LICENSE-2.0
 | |
| 
 | |
| Unless required by applicable law or agreed to in writing, software
 | |
| distributed under the License is distributed on an "AS IS" BASIS,
 | |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| See the License for the specific language governing permissions and
 | |
| limitations under the License. */
 | |
| 
 | |
| #pragma once
 | |
| 
 | |
| #include "FirstOrderOptimizer.h"
 | |
| 
 | |
| namespace paddle {
 | |
| 
 | |
| // After Optimization, parameter values are further averaged within
 | |
| // time range.
 | |
| class AverageOptimizer : public ParameterOptimizer {
 | |
| public:
 | |
|   // if *useParameterApply* set, use PARAMETER_APPLY to store averaged parameter
 | |
|   // else use PARAMETER_VALUE, and value backup in PARAMETER_GRADIENT
 | |
|   AverageOptimizer(const OptimizationConfig& optConfig,
 | |
|                    ParameterOptimizer* optimizer,
 | |
|                    bool useParameterApply);
 | |
| 
 | |
|   static ParameterOptimizer* create(const OptimizationConfig& optConfig,
 | |
|                                     ParameterOptimizer* optimizer,
 | |
|                                     bool isParameterSparse = false,
 | |
|                                     bool useParameterApply = false);
 | |
| 
 | |
|   virtual void init(size_t numRows, const ParameterConfig* config) {
 | |
|     optimizer_->init(numRows, config);
 | |
|   }
 | |
| 
 | |
|   virtual void startPass() { optimizer_->startPass(); }
 | |
|   virtual void finishPass() {
 | |
|     optimizer_->finishPass();
 | |
|     updateAverageWindowLimit();
 | |
|   }
 | |
| 
 | |
|   virtual void startBatch(int64_t numSamplesProcessed);
 | |
|   virtual void finishBatch();
 | |
|   virtual void update(const VectorPtr vecs[],
 | |
|                       const ParameterConfig& paraConfig,
 | |
|                       size_t sparseId) const {
 | |
|     optimizer_->update(vecs, paraConfig, sparseId);
 | |
|     vecs[PARAMETER_SUM1]->add(*vecs[PARAMETER_VALUE], 1.0f);
 | |
|   }
 | |
| 
 | |
|   virtual TraverseCallback needSpecialTraversal(
 | |
|       const ParameterConfig& config) const;
 | |
| 
 | |
|   virtual TraverseCallback startCatchUpWith() const {
 | |
|     return optimizer_->startCatchUpWith();
 | |
|   }
 | |
|   virtual void finishCatchUpWith() { return optimizer_->finishCatchUpWith(); }
 | |
| 
 | |
|   virtual TraverseCallback apply();
 | |
|   virtual TraverseCallback restore();
 | |
| 
 | |
|   virtual void setNoDecay() { optimizer_->setNoDecay(); }
 | |
| 
 | |
| protected:
 | |
|   std::unique_ptr<ParameterOptimizer> optimizer_;
 | |
|   bool useApply_;
 | |
| 
 | |
|   // should only be called from finishPass()
 | |
|   void updateAverageWindowLimit() {
 | |
|     if (!optConfig_.has_max_average_window()) {
 | |
|       // use the number of batches in the last pass as maxAverageWindow_
 | |
|       CHECK_GT(numUpdates_, prevNumUpdates_);
 | |
|       maxAverageWindow_ = numUpdates_ - prevNumUpdates_;
 | |
|       prevNumUpdates_ = numUpdates_;
 | |
|     }
 | |
|     minAverageWindow_ = std::min(minAverageWindow_, numUpdates_);
 | |
|   }
 | |
| 
 | |
|   bool isAverageWindowTooLong() const {
 | |
|     return numAccumulates_ >= minAverageWindow_ &&
 | |
|            numAccumulates_ >=
 | |
|                std::min<int64_t>(maxAverageWindow_,
 | |
|                                  numUpdates_ * optConfig_.average_window());
 | |
|   }
 | |
| 
 | |
|   static const int64_t kMaxNumAccumulates = 16384;
 | |
|   int64_t numUpdates_;
 | |
|   int64_t prevNumUpdates_;
 | |
|   int64_t numAccumulates_;
 | |
|   int64_t oldNumAccumulates_;
 | |
|   int64_t minAverageWindow_;
 | |
|   int64_t maxAverageWindow_;
 | |
| };
 | |
| 
 | |
| // Average Optimizer with Sparse support.
 | |
| class AverageSparseOptimizer : public AverageOptimizer {
 | |
| public:
 | |
|   AverageSparseOptimizer(const OptimizationConfig& optConfig,
 | |
|                          ParameterOptimizer* optimizer,
 | |
|                          bool useParameterApply)
 | |
|       : AverageOptimizer(optConfig, optimizer, useParameterApply) {}
 | |
| 
 | |
|   virtual void init(size_t numRows, const ParameterConfig* config) {
 | |
|     AverageOptimizer::init(numRows, config);
 | |
| 
 | |
|     t0Vec_.resize(numRows);
 | |
| 
 | |
|     timer_ = 0;
 | |
|     t0Vec_.assign(t0Vec_.size(), 0);
 | |
|   }
 | |
|   virtual void finishBatch() {
 | |
|     AverageOptimizer::finishBatch();
 | |
|     timer_++;
 | |
|   }
 | |
|   virtual void update(const VectorPtr vecs[],
 | |
|                       const ParameterConfig& paraConfig,
 | |
|                       size_t sparseId) const;
 | |
|   void catchUpWith(const VectorPtr vecs[],
 | |
|                    const ParameterConfig& paraConfig,
 | |
|                    size_t sparseId) const;
 | |
|   virtual TraverseCallback startCatchUpWith() const;
 | |
|   virtual void finishCatchUpWith() {
 | |
|     optimizer_->finishCatchUpWith();
 | |
| 
 | |
|     timer_ = 0;
 | |
|     t0Vec_.assign(t0Vec_.size(), 0);
 | |
|   }
 | |
| 
 | |
| protected:
 | |
|   /**
 | |
|    *  counting batches, clear after catch up with
 | |
|    *  t(timer_) is current time,
 | |
|    *  t0(t0Vec_) are last occur time of i rows.
 | |
|    *  if one block is update by multi threads,
 | |
|    *  caller should hash sparse ids to avoid write conflict in t0Vec_.
 | |
|    */
 | |
|   int timer_;
 | |
|   mutable std::vector<int32_t> t0Vec_;
 | |
| };
 | |
| 
 | |
| }  // namespace paddle
 |