private protected virtual double[] GetGradient(IChannel ch) { Contracts.AssertValue(ch); if (DropoutRate > 0) { if (_droppedScores == null) { _droppedScores = new double[TrainingScores.Scores.Length]; } else { Array.Clear(_droppedScores, 0, _droppedScores.Length); } if (_scores == null) { _scores = new double[TrainingScores.Scores.Length]; } int numberOfTrees = Ensemble.NumTrees; int[] droppedTrees = Enumerable.Range(0, numberOfTrees).Where(t => (DropoutRng.NextDouble() < DropoutRate)).ToArray(); _numberOfDroppedTrees = droppedTrees.Length; if ((_numberOfDroppedTrees == 0) && (numberOfTrees > 0)) { droppedTrees = new int[] { DropoutRng.Next(numberOfTrees) }; // force at least a single tree to be dropped _numberOfDroppedTrees = droppedTrees.Length; } ch.Trace("dropout: Dropping {0} trees of {1} for rate {2}", _numberOfDroppedTrees, numberOfTrees, DropoutRate); foreach (int i in droppedTrees) { double[] s = _treeScores[i]; for (int j = 0; j < _droppedScores.Length; j++) { _droppedScores[j] += s[j]; // summing up the weights of the dropped tree s[j] *= _numberOfDroppedTrees / (1.0 + _numberOfDroppedTrees); // rescaling the dropped tree } Ensemble.GetTreeAt(i).ScaleOutputsBy(_numberOfDroppedTrees / (1.0 + _numberOfDroppedTrees)); } for (int j = 0; j < _scores.Length; j++) { _scores[j] = TrainingScores.Scores[j] - _droppedScores[j]; TrainingScores.Scores[j] -= _droppedScores[j] / (1.0 + _numberOfDroppedTrees); } return(ObjectiveFunction.GetGradient(ch, _scores)); } else { return(ObjectiveFunction.GetGradient(ch, TrainingScores.Scores)); } }
private protected override double[] GetGradient(IChannel ch) { Contracts.AssertValue(ch); _previousGradient = _currentGradient; _currentGradient = ObjectiveFunction.GetGradient(ch, TrainingScores.Scores); // We need to make a copy of gradient coz the reference returned is private structare of ObejctiveFunctionBase is valid only till next GetGradient call _currentGradient = (double[])_currentGradient.Clone(); double[] previousDk = _currentDk; //First iteration if (_previousGradient == null) { _previousGradient = _currentGradient; } #if !POLAK_RIBIERE_STEP // Compute Beta[k] = curG[k] * (curG[k] - prevG[k]) // TODO: this can be optimized for speed. Keeping it slow but simple for now double beta = VectorUtils.GetDotProduct(_currentGradient, VectorUtils.Subtract(_currentGradient, _previousGradient)) / VectorUtils.GetDotProduct(_previousGradient, _previousGradient); #else //Fletcher Reeves step // Compute Beta[k] = (curG[k]*cutG[k]) / (prevG[k] * prevG[k]) double beta = VectorUtils.GetDotProduct(currentGradient, currentGradient) / VectorUtils.GetDotProduct(previousGradient, previousGradient); #endif if (beta < 0) { beta = 0; } ch.Info("beta: {0}", beta); VectorUtils.MutiplyInPlace(previousDk, beta); VectorUtils.AddInPlace(previousDk, _currentGradient); _currentDk = previousDk; // Reallay no-op opration // We know that LeastSquaresRegressionTreeLearner does not destroy gradients so we can return our reference that we will need in next iter. if (TreeLearner is LeastSquaresRegressionTreeLearner) { return(_currentDk); } // Assume that other treLearners destroy the gradient array so return a copy. else { return((double[])_currentDk.Clone()); } }