public SOM(int x, int y) { Width = x; Height = y; ConstantLearningRate = 0.5; Epoch = 1; Map = new Node[x, y]; _learningRate = new PowerSeriesLearningRate(ConstantLearningRate); }
public SOM() { Width = 0; Height = 0; ConstantLearningRate = 0; Epoch = 1; Map = new Node[Width, Height]; _learningRate = new PowerSeriesLearningRate(ConstantLearningRate); }
/// <summary> /// Initialize this strategy. /// </summary> /// /// <param name="train">The training algorithm.</param> public void Init(IMLTrain train) { _train = train; _ready = false; _setter = (ILearningRate)train; _trainingSize = train.Training.Count; _currentLearningRate = 1.0d / _trainingSize; EncogLogging.Log(EncogLogging.LevelDebug, "Starting learning rate: " + _currentLearningRate); _setter.LearningRate = _currentLearningRate; }
/// <summary> /// Initialize this strategy. /// </summary> /// <param name="train">The training algorithm.</param> public void Init(ITrain train) { this.train = train; this.ready = false; this.setter = (ILearningRate)train; this.trainingSize = DetermineTrainingSize(); this.currentLearningRate = 1.0 / this.trainingSize; #if logging if (this.logger.IsInfoEnabled) { this.logger.Info("Starting learning rate: " + this.currentLearningRate); } #endif this.setter.LearningRate = this.currentLearningRate; }
public void Init(IMLTrain train) { this._xd87f6a9c53c2ed9f = train; while (true) { this._x6c7711ed04d2ac90 = false; this._x6947f9fc231e17e8 = (ILearningRate) train; this._x985befeef351542c = train.Training.Count; this._x6300a707dc67f3a2 = 1.0 / ((double) this._x985befeef351542c); EncogLogging.Log(0, "Starting learning rate: " + this._x6300a707dc67f3a2); do { this._x6947f9fc231e17e8.LearningRate = this._x6300a707dc67f3a2; } while (-2147483648 == 0); if (0 == 0) { return; } } }
public void Train(int maxTrainingEpoch, ParallelCorpus trainCorpus, ParallelCorpus validCorpus, ILearningRate learningRate, List <IMetric> metrics, AdamOptimizer optimizer) { Logger.WriteLine("Start to train..."); for (int i = 0; i < maxTrainingEpoch; i++) { // Train one epoch over given devices. Forward part is implemented in RunForwardOnSingleDevice function in below, // backward, weights updates and other parts are implemented in the framework. You can see them in BaseSeq2SeqFramework.cs TrainOneEpoch(i, trainCorpus, validCorpus, learningRate, optimizer, metrics, m_modelMetaData, RunForwardOnSingleDevice); } }
public LearningRateConfig(ILearningRate trainer) { this.Trainer = trainer; this.InitializeComponent(); }
/// <summary> /// Initialize this strategy. /// </summary> /// /// <param name="train">The training algorithm.</param> public void Init(IMLTrain train) { _train = train; _ready = false; _setter = (ILearningRate) train; _trainingSize = train.Training.Count; _currentLearningRate = 1.0d/_trainingSize; EncogLogging.Log(EncogLogging.LevelDebug, "Starting learning rate: " + _currentLearningRate); _setter.LearningRate = _currentLearningRate; }
/// <summary> /// Initializes a new instance of the <see cref="WeightAdjustment"/> class. /// </summary> /// <param name="radiusFunction">The radius function.</param> /// <param name="neighboorFunction">The neighboor function.</param> /// <param name="learningRate">The learning rate.</param> public WeightAdjustment(IRadiusFunction radiusFunction, INeighborhoodFunction neighboorFunction, ILearningRate learningRate) { _radiusFunction = radiusFunction; _neighboorFunction = neighboorFunction; _learningRate = learningRate; }
public SOM(int x, int y, double learningRate) : this(x, y) { ConstantLearningRate = learningRate; _learningRate = new PowerSeriesLearningRate(ConstantLearningRate); Epoch = 1; }
internal void TrainOneEpoch(int ep, IEnumerable<SntPairBatch> trainCorpus, IEnumerable<SntPairBatch> validCorpus, ILearningRate learningRate, AdamOptimizer solver, List<IMetric> metrics, IModelMetaData modelMetaData, Func<IComputeGraph, List<List<string>>, List<List<string>>, int, bool, float> ForwardOnSingleDevice) { int processedLineInTotal = 0; DateTime startDateTime = DateTime.Now; double costInTotal = 0.0; long srcWordCntsInTotal = 0; long tgtWordCntsInTotal = 0; double avgCostPerWordInTotal = 0.0; Logger.WriteLine($"Start to process training corpus."); List<SntPairBatch> sntPairBatchs = new List<SntPairBatch>(); foreach (SntPairBatch sntPairBatch in trainCorpus) { sntPairBatchs.Add(sntPairBatch); if (sntPairBatchs.Count == m_deviceIds.Length) { // Copy weights from weights kept in default device to all other devices CopyWeightsFromDefaultDeviceToAllOtherDevices(); int batchSplitFactor = 1; bool runNetwordSuccssed = false; while (runNetwordSuccssed == false) { try { (float cost, int sWordCnt, int tWordCnt, int processedLine) = RunNetwork(ForwardOnSingleDevice, sntPairBatchs, batchSplitFactor); processedLineInTotal += processedLine; srcWordCntsInTotal += sWordCnt; tgtWordCntsInTotal += tWordCnt; //Sum up gradients in all devices, and kept it in default device for parameters optmization SumGradientsToTensorsInDefaultDevice(); //Optmize parameters float lr = learningRate.GetCurrentLearningRate(); List<IWeightTensor> models = GetParametersFromDefaultDevice(); solver.UpdateWeights(models, processedLine, lr, m_regc, m_weightsUpdateCount + 1); costInTotal += cost; avgCostPerWordInTotal = costInTotal / tgtWordCntsInTotal; m_weightsUpdateCount++; if (IterationDone != null && m_weightsUpdateCount % 100 == 0) { IterationDone(this, new CostEventArg() { LearningRate = lr, CostPerWord = cost / tWordCnt, AvgCostInTotal = avgCostPerWordInTotal, Epoch = ep, Update = m_weightsUpdateCount, ProcessedSentencesInTotal = processedLineInTotal, ProcessedWordsInTotal = srcWordCntsInTotal + tgtWordCntsInTotal, StartDateTime = startDateTime }); } runNetwordSuccssed = true; } catch (AggregateException err) { if (err.InnerExceptions != null) { string oomMessage = String.Empty; bool isOutOfMemException = false; bool isArithmeticException = false; foreach (var excep in err.InnerExceptions) { if (excep is OutOfMemoryException) { isOutOfMemException = true; oomMessage = excep.Message; break; } else if (excep is ArithmeticException) { isArithmeticException = true; oomMessage = excep.Message; break; } } if (isOutOfMemException) { batchSplitFactor *= 2; Logger.WriteLine($"Got an exception ('{oomMessage}'), so we increase batch split factor to {batchSplitFactor}, and retry it."); if (batchSplitFactor >= sntPairBatchs[0].BatchSize) { Logger.WriteLine($"Batch split factor is larger than batch size, so ignore current mini-batch."); break; } } else if (isArithmeticException) { Logger.WriteLine($"Arithmetic exception: '{err.Message}'"); break; } else { Logger.WriteLine(Logger.Level.err, ConsoleColor.Red, $"Exception: {err.Message}, Call stack: {err.StackTrace}"); throw err; } } else { Logger.WriteLine(Logger.Level.err, ConsoleColor.Red, $"Exception: {err.Message}, Call stack: {err.StackTrace}"); throw err; } } catch (OutOfMemoryException err) { batchSplitFactor *= 2; Logger.WriteLine($"Got an exception ('{err.Message}'), so we increase batch split factor to {batchSplitFactor}, and retry it."); if (batchSplitFactor >= sntPairBatchs[0].BatchSize) { Logger.WriteLine($"Batch split factor is larger than batch size, so ignore current mini-batch."); break; } } catch (ArithmeticException err) { Logger.WriteLine($"Arithmetic exception: '{err.Message}'"); break; } catch (Exception err) { Logger.WriteLine(Logger.Level.err, ConsoleColor.Red, $"Exception: {err.Message}, Call stack: {err.StackTrace}"); throw err; } } // Evaluate model every hour and save it if we could get a better one. TimeSpan ts = DateTime.Now - m_lastCheckPointDateTime; if (ts.TotalHours > 1.0) { CreateCheckPoint(validCorpus, metrics, modelMetaData, ForwardOnSingleDevice, avgCostPerWordInTotal); m_lastCheckPointDateTime = DateTime.Now; } sntPairBatchs.Clear(); } } Logger.WriteLine(Logger.Level.info, ConsoleColor.Green, $"Epoch '{ep}' took '{DateTime.Now - startDateTime}' time to finish. AvgCost = {avgCostPerWordInTotal.ToString("F6")}, AvgCostInLastEpoch = {m_avgCostPerWordInTotalInLastEpoch.ToString("F6")}"); // CreateCheckPoint(validCorpus, metrics, modelMetaData, ForwardOnSingleDevice, avgCostPerWordInTotal); m_avgCostPerWordInTotalInLastEpoch = avgCostPerWordInTotal; }
internal void TrainOneEpoch(int ep, ParallelCorpus trainCorpus, ParallelCorpus validCorpus, ILearningRate learningRate, AdamOptimizer solver, List <IMetric> metrics, IModelMetaData modelMetaData, Func <IComputeGraph, List <List <string> >, List <List <string> >, int, bool, float> ForwardOnSingleDevice) { int processedLineInTotal = 0; DateTime startDateTime = DateTime.Now; DateTime lastCheckPointDateTime = DateTime.Now; double costInTotal = 0.0; long srcWordCnts = 0; long tgtWordCnts = 0; double avgCostPerWordInTotal = 0.0; TensorAllocator.FreeMemoryAllDevices(); Logger.WriteLine($"Start to process training corpus."); List <SntPairBatch> sntPairBatchs = new List <SntPairBatch>(); foreach (SntPairBatch sntPairBatch in trainCorpus) { sntPairBatchs.Add(sntPairBatch); if (sntPairBatchs.Count == m_deviceIds.Length) { float cost = 0.0f; int tlen = 0; int processedLine = 0; // Copy weights from weights kept in default device to all other devices CopyWeightsFromDefaultDeviceToAllOtherDevices(); // Run forward and backward on all available processors Parallel.For(0, m_deviceIds.Length, i => { SntPairBatch sntPairBatch_i = sntPairBatchs[i]; // Construct sentences for encoding and decoding List <List <string> > srcTkns = new List <List <string> >(); List <List <string> > tgtTkns = new List <List <string> >(); int sLenInBatch = 0; int tLenInBatch = 0; for (int j = 0; j < sntPairBatch_i.BatchSize; j++) { srcTkns.Add(sntPairBatch_i.SntPairs[j].SrcSnt.ToList()); sLenInBatch += sntPairBatch_i.SntPairs[j].SrcSnt.Length; tgtTkns.Add(sntPairBatch_i.SntPairs[j].TgtSnt.ToList()); tLenInBatch += sntPairBatch_i.SntPairs[j].TgtSnt.Length; } float lcost = 0.0f; // Create a new computing graph instance using (IComputeGraph computeGraph_i = CreateComputGraph(i)) { // Run forward part lcost = ForwardOnSingleDevice(computeGraph_i, srcTkns, tgtTkns, i, true); // Run backward part and compute gradients computeGraph_i.Backward(); } lock (locker) { cost += lcost; srcWordCnts += sLenInBatch; tgtWordCnts += tLenInBatch; tlen += tLenInBatch; processedLineInTotal += sntPairBatch_i.BatchSize; processedLine += sntPairBatch_i.BatchSize; } }); //Sum up gradients in all devices, and kept it in default device for parameters optmization SumGradientsToTensorsInDefaultDevice(); //Optmize parameters float lr = learningRate.GetCurrentLearningRate(); List <IWeightTensor> models = GetParametersFromDefaultDevice(); solver.UpdateWeights(models, processedLine, lr, m_regc, m_weightsUpdateCount + 1); //Clear gradient over all devices ZeroGradientOnAllDevices(); costInTotal += cost; avgCostPerWordInTotal = costInTotal / tgtWordCnts; m_weightsUpdateCount++; if (IterationDone != null && m_weightsUpdateCount % 100 == 0) { IterationDone(this, new CostEventArg() { LearningRate = lr, CostPerWord = cost / tlen, AvgCostInTotal = avgCostPerWordInTotal, Epoch = ep, Update = m_weightsUpdateCount, ProcessedSentencesInTotal = processedLineInTotal, ProcessedWordsInTotal = srcWordCnts + tgtWordCnts, StartDateTime = startDateTime }); } // Evaluate model every hour and save it if we could get a better one. TimeSpan ts = DateTime.Now - lastCheckPointDateTime; if (ts.TotalHours > 1.0) { CreateCheckPoint(validCorpus, metrics, modelMetaData, ForwardOnSingleDevice, avgCostPerWordInTotal); lastCheckPointDateTime = DateTime.Now; } sntPairBatchs.Clear(); } } Logger.WriteLine(Logger.Level.info, ConsoleColor.Green, $"Epoch '{ep}' took '{DateTime.Now - startDateTime}' time to finish. AvgCost = {avgCostPerWordInTotal.ToString("F6")}, AvgCostInLastEpoch = {m_avgCostPerWordInTotalInLastEpoch.ToString("F6")}"); CreateCheckPoint(validCorpus, metrics, modelMetaData, ForwardOnSingleDevice, avgCostPerWordInTotal); m_avgCostPerWordInTotalInLastEpoch = avgCostPerWordInTotal; }