public virtual void TrainWeightedData(GeneralDataset <L, F> data, float[] dataWeights) { //Use LogisticClassifierFactory to train instead. if (data.labelIndex.Size() != 2) { throw new Exception("LogisticClassifier is only for binary classification!"); } IMinimizer <IDiffFunction> minim; LogisticObjectiveFunction lof = null; if (data is Dataset <object, object> ) { lof = new LogisticObjectiveFunction(data.NumFeatureTypes(), data.GetDataArray(), data.GetLabelsArray(), prior, dataWeights); } else { if (data is RVFDataset <object, object> ) { lof = new LogisticObjectiveFunction(data.NumFeatureTypes(), data.GetDataArray(), data.GetValuesArray(), data.GetLabelsArray(), prior, dataWeights); } } minim = new QNMinimizer(lof); weights = minim.Minimize(lof, 1e-4, new double[data.NumFeatureTypes()]); featureIndex = data.featureIndex; classes[0] = data.labelIndex.Get(0); classes[1] = data.labelIndex.Get(1); }
private NaiveBayesClassifierFactory.NBWeights TrainWeightsUCL(int[][] data, int[] labels, int numFeatures, int numClasses) { int[] numValues = NumberValues(data, numFeatures); int[] sumValues = new int[numFeatures]; //how many feature-values are before this feature for (int j = 1; j < numFeatures; j++) { sumValues[j] = sumValues[j - 1] + numValues[j - 1]; } int[][] newdata = new int[data.Length][]; for (int i = 0; i < data.Length; i++) { newdata[i][0] = 0; for (int j_1 = 0; j_1 < numFeatures; j_1++) { newdata[i][j_1 + 1] = sumValues[j_1] + data[i][j_1] + 1; } } int totalFeatures = sumValues[numFeatures - 1] + numValues[numFeatures - 1] + 1; logger.Info("total feats " + totalFeatures); LogConditionalObjectiveFunction <L, F> objective = new LogConditionalObjectiveFunction <L, F>(totalFeatures, numClasses, newdata, labels, prior, sigma, 0.0); IMinimizer <IDiffFunction> min = new QNMinimizer(); double[] argmin = min.Minimize(objective, 1e-4, objective.Initial()); double[][] wts = objective.To2D(argmin); System.Console.Out.WriteLine("weights have dimension " + wts.Length); return(new NaiveBayesClassifierFactory.NBWeights(wts, numValues)); }
public virtual LogisticClassifier <L, F> TrainWeightedData(GeneralDataset <L, F> data, float[] dataWeights) { if (data is RVFDataset) { ((RVFDataset <L, F>)data).EnsureRealValues(); } if (data.labelIndex.Size() != 2) { throw new Exception("LogisticClassifier is only for binary classification!"); } IMinimizer <IDiffFunction> minim; LogisticObjectiveFunction lof = null; if (data is Dataset <object, object> ) { lof = new LogisticObjectiveFunction(data.NumFeatureTypes(), data.GetDataArray(), data.GetLabelsArray(), new LogPrior(LogPrior.LogPriorType.Quadratic), dataWeights); } else { if (data is RVFDataset <object, object> ) { lof = new LogisticObjectiveFunction(data.NumFeatureTypes(), data.GetDataArray(), data.GetValuesArray(), data.GetLabelsArray(), new LogPrior(LogPrior.LogPriorType.Quadratic), dataWeights); } } minim = new QNMinimizer(lof); weights = minim.Minimize(lof, 1e-4, new double[data.NumFeatureTypes()]); featureIndex = data.featureIndex; classes[0] = data.labelIndex.Get(0); classes[1] = data.labelIndex.Get(1); return(new LogisticClassifier <L, F>(weights, featureIndex, classes)); }
private NaiveBayesClassifierFactory.NBWeights TrainWeightsCL(int[][] data, int[] labels, int numFeatures, int numClasses) { LogConditionalEqConstraintFunction objective = new LogConditionalEqConstraintFunction(numFeatures, numClasses, data, labels, prior, sigma, 0.0); IMinimizer <IDiffFunction> min = new QNMinimizer(); double[] argmin = min.Minimize(objective, 1e-4, objective.Initial()); double[][][] wts = objective.To3D(argmin); double[] priors = objective.Priors(argmin); return(new NaiveBayesClassifierFactory.NBWeights(priors, wts)); }
/// <summary>Solves the problem using a quasi-newton method (L-BFGS).</summary> /// <remarks> /// Solves the problem using a quasi-newton method (L-BFGS). The solution /// is stored in the /// <c>lambda</c> /// array of /// <c>prob</c> /// . /// </remarks> public virtual void SolveQN() { CGRunner.LikelihoodFunction df = new CGRunner.LikelihoodFunction(prob, tol, useGaussianPrior, priorSigmaS, sigmaSquareds); CGRunner.MonitorFunction monitor = new CGRunner.MonitorFunction(prob, df, filename); IMinimizer <IDiffFunction> cgm = new QNMinimizer(monitor, 10); // all parameters are started at 0.0 prob.lambda = cgm.Minimize(df, tol, new double[df.DomainDimension()]); PrintOptimizationResults(df, monitor); }
public virtual LogisticClassifier <L, F> TrainClassifier(GeneralDataset <L, F> data, double l1reg, double tol, LogPrior prior, bool biased) { if (data is RVFDataset) { ((RVFDataset <L, F>)data).EnsureRealValues(); } if (data.labelIndex.Size() != 2) { throw new Exception("LogisticClassifier is only for binary classification!"); } IMinimizer <IDiffFunction> minim; if (!biased) { LogisticObjectiveFunction lof = null; if (data is Dataset <object, object> ) { lof = new LogisticObjectiveFunction(data.NumFeatureTypes(), data.GetDataArray(), data.GetLabelsArray(), prior); } else { if (data is RVFDataset <object, object> ) { lof = new LogisticObjectiveFunction(data.NumFeatureTypes(), data.GetDataArray(), data.GetValuesArray(), data.GetLabelsArray(), prior); } } if (l1reg > 0.0) { minim = ReflectionLoading.LoadByReflection("edu.stanford.nlp.optimization.OWLQNMinimizer", l1reg); } else { minim = new QNMinimizer(lof); } weights = minim.Minimize(lof, tol, new double[data.NumFeatureTypes()]); } else { BiasedLogisticObjectiveFunction lof = new BiasedLogisticObjectiveFunction(data.NumFeatureTypes(), data.GetDataArray(), data.GetLabelsArray(), prior); if (l1reg > 0.0) { minim = ReflectionLoading.LoadByReflection("edu.stanford.nlp.optimization.OWLQNMinimizer", l1reg); } else { minim = new QNMinimizer(lof); } weights = minim.Minimize(lof, tol, new double[data.NumFeatureTypes()]); } featureIndex = data.featureIndex; classes[0] = data.labelIndex.Get(0); classes[1] = data.labelIndex.Get(1); return(new LogisticClassifier <L, F>(weights, featureIndex, classes)); }
public virtual void TestQNMinimizerRosenbrock() { double[] initial = new double[] { 0.0, 0.0 }; IDiffFunction rf = new MinimizerTest.RosenbrockFunction(); QNMinimizer qn = new QNMinimizer(); double[] answer = qn.Minimize(rf, 1e-10, initial); System.Console.Error.WriteLine("Answer is: " + Arrays.ToString(answer)); NUnit.Framework.Assert.AreEqual(1.0, answer[0], 1e-8); NUnit.Framework.Assert.AreEqual(1.0, answer[1], 1e-8); }
public void TestRosenbrockFunction() { var minimizer = new QNMinimizer(); var f = new RosenbrockFunction(); var x = minimizer.Minimize(f); var minValue = f.ValueAt(x); Assert.AreEqual(x[0], 1.0, 1e-5); Assert.AreEqual(x[1], 1.0, 1e-5); Assert.AreEqual(minValue, 0, 1e-10); }
public void TestQuadraticFunction() { var minimizer = new QNMinimizer(); var f = new QuadraticFunction(); var x = minimizer.Minimize(f); var minValue = f.ValueAt(x); Assert.AreEqual(x[0], 1.0, 0.000001); Assert.AreEqual(x[1], 5.0, 0.000001); Assert.AreEqual(minValue, 10.0, 0.000001); }
protected internal override double[] TrainWeights(int[][][][] data, int[][] labels, IEvaluator[] evaluators, int pruneFeatureItr, double[][][][] featureVals) { CRFLogConditionalObjectiveFloatFunction func = new CRFLogConditionalObjectiveFloatFunction(data, labels, windowSize, classIndex, labelIndices, map, flags.backgroundSymbol, flags.sigma); cliquePotentialFunctionHelper = func; QNMinimizer minimizer; if (flags.interimOutputFreq != 0) { IFloatFunction monitor = new ResultStoringFloatMonitor(flags.interimOutputFreq, flags.serializeTo); minimizer = new QNMinimizer(monitor); } else { minimizer = new QNMinimizer(); } if (pruneFeatureItr == 0) { minimizer.SetM(flags.QNsize); } else { minimizer.SetM(flags.QNsize2); } float[] initialWeights; if (flags.initialWeights == null) { initialWeights = func.Initial(); } else { try { log.Info("Reading initial weights from file " + flags.initialWeights); using (DataInputStream dis = new DataInputStream(new BufferedInputStream(new GZIPInputStream(new FileInputStream(flags.initialWeights))))) { initialWeights = ConvertByteArray.ReadFloatArr(dis); } } catch (IOException) { throw new Exception("Could not read from float initial weight file " + flags.initialWeights); } } log.Info("numWeights: " + initialWeights.Length); float[] weightsArray = minimizer.Minimize(func, (float)flags.tolerance, initialWeights); return(ArrayMath.FloatArrayToDoubleArray(weightsArray)); }
public virtual void FinishTraining() { IntCounter <string> tagCounter = new IntCounter <string>(); WeightedDataset data = new WeightedDataset(datumCounter.Size()); foreach (TaggedWord word in datumCounter.KeySet()) { int count = datumCounter.GetIntCount(word); if (trainOnLowCount && count > trainCountThreshold) { continue; } if (functionWordTags.Contains(word.Word())) { continue; } tagCounter.IncrementCount(word.Tag()); if (trainByType) { count = 1; } data.Add(new BasicDatum(featExtractor.MakeFeatures(word.Word()), word.Tag()), count); } datumCounter = null; tagDist = Distribution.LaplaceSmoothedDistribution(tagCounter, tagCounter.Size(), 0.5); tagCounter = null; ApplyThresholds(data); Verbose("Making classifier..."); QNMinimizer minim = new QNMinimizer(); //new ResultStoringMonitor(5, "weights")); // minim.shutUp(); LinearClassifierFactory factory = new LinearClassifierFactory(minim); factory.SetTol(tol); factory.SetSigma(sigma); scorer = factory.TrainClassifier(data); Verbose("Done training."); }
public virtual double[] Minimize(IDiffFunction function, double functionTolerance, double[] initial, int maxIterations) { Sayln("SGDToQNMinimizer called on function of " + function.DomainDimension() + " variables;"); // check for stochastic derivatives if (!(function is AbstractStochasticCachingDiffFunction)) { throw new NotSupportedException(); } AbstractStochasticCachingDiffFunction dfunction = (AbstractStochasticCachingDiffFunction)function; dfunction.method = StochasticCalculateMethods.GradientOnly; ScaledSGDMinimizer sgd = new ScaledSGDMinimizer(this.gain, this.bSize, this.SGDPasses, 1, this.outputIterationsToFile); QNMinimizer qn = new QNMinimizer(this.QNMem, true); double[] x = sgd.Minimize(dfunction, functionTolerance, initial, this.SGDPasses); QNMinimizer.QNInfo qnInfo = new QNMinimizer.QNInfo(this, sgd.sList, sgd.yList); qnInfo.d = sgd.diag; qn.Minimize(dfunction, functionTolerance, x, this.QNPasses, qnInfo); log.Info(string.Empty); log.Info("Minimization complete."); log.Info(string.Empty); log.Info("Exiting for Debug"); return(x); }
private double[][] TrainWeights() { QNMinimizer minimizer = new QNMinimizer(15, true); minimizer.UseOWLQN(true, lambda); IDiffFunction objective = new ShiftParamsLogisticObjectiveFunction(data, dataValues, ConvertLabels(labels), numClasses, numFeatures + data.Length, numFeatures, prior); double[] augmentedThetas = new double[(numClasses - 1) * (numFeatures + data.Length)]; augmentedThetas = minimizer.Minimize(objective, 1e-4, augmentedThetas); // calculate number of non-zero parameters, for debugging int count = 0; for (int j = numFeatures; j < augmentedThetas.Length; j++) { if (augmentedThetas[j] != 0) { count++; } } Redwood.Log("NUM NONZERO PARAMETERS: " + count); double[][] thetas = new double[][] { }; LogisticUtils.Unflatten(augmentedThetas, thetas); return(thetas); }
public virtual void ExecuteOneTrainingBatch(IList <Tree> trainingBatch, IdentityHashMap <Tree, byte[]> compressedParses, double[] sumGradSquare) { Timing convertTiming = new Timing(); convertTiming.Doing("Converting trees"); IdentityHashMap <Tree, IList <Tree> > topParses = CacheParseHypotheses.ConvertToTrees(trainingBatch, compressedParses, op.trainOptions.trainingThreads); convertTiming.Done(); DVParserCostAndGradient gcFunc = new DVParserCostAndGradient(trainingBatch, topParses, dvModel, op); double[] theta = dvModel.ParamsToVector(); switch (Minimizer) { case (1): { //maxFuncIter = 10; // 1: QNMinimizer, 2: SGD QNMinimizer qn = new QNMinimizer(op.trainOptions.qnEstimates, true); qn.UseMinPackSearch(); qn.UseDiagonalScaling(); qn.TerminateOnAverageImprovement(true); qn.TerminateOnNumericalZero(true); qn.TerminateOnRelativeNorm(true); theta = qn.Minimize(gcFunc, op.trainOptions.qnTolerance, theta, op.trainOptions.qnIterationsPerBatch); break; } case 2: { //Minimizer smd = new SGDMinimizer(); double tol = 1e-4; theta = smd.minimize(gcFunc,tol,theta,op.trainOptions.qnIterationsPerBatch); double lastCost = 0; double currCost = 0; bool firstTime = true; for (int i = 0; i < op.trainOptions.qnIterationsPerBatch; i++) { //gcFunc.calculate(theta); double[] grad = gcFunc.DerivativeAt(theta); currCost = gcFunc.ValueAt(theta); log.Info("batch cost: " + currCost); // if(!firstTime){ // if(currCost > lastCost){ // System.out.println("HOW IS FUNCTION VALUE INCREASING????!!! ... still updating theta"); // } // if(Math.abs(currCost - lastCost) < 0.0001){ // System.out.println("function value is not decreasing. stop"); // } // }else{ // firstTime = false; // } lastCost = currCost; ArrayMath.AddMultInPlace(theta, grad, -1 * op.trainOptions.learningRate); } break; } case 3: { // AdaGrad double eps = 1e-3; double currCost = 0; for (int i = 0; i < op.trainOptions.qnIterationsPerBatch; i++) { double[] gradf = gcFunc.DerivativeAt(theta); currCost = gcFunc.ValueAt(theta); log.Info("batch cost: " + currCost); for (int feature = 0; feature < gradf.Length; feature++) { sumGradSquare[feature] = sumGradSquare[feature] + gradf[feature] * gradf[feature]; theta[feature] = theta[feature] - (op.trainOptions.learningRate * gradf[feature] / (System.Math.Sqrt(sumGradSquare[feature]) + eps)); } } break; } default: { throw new ArgumentException("Unsupported minimizer " + Minimizer); } } dvModel.VectorToParams(theta); }