Example #1
0
        public virtual void TrainWeightedData(GeneralDataset <L, F> data, float[] dataWeights)
        {
            //Use LogisticClassifierFactory to train instead.
            if (data.labelIndex.Size() != 2)
            {
                throw new Exception("LogisticClassifier is only for binary classification!");
            }
            IMinimizer <IDiffFunction> minim;
            LogisticObjectiveFunction  lof = null;

            if (data is Dataset <object, object> )
            {
                lof = new LogisticObjectiveFunction(data.NumFeatureTypes(), data.GetDataArray(), data.GetLabelsArray(), prior, dataWeights);
            }
            else
            {
                if (data is RVFDataset <object, object> )
                {
                    lof = new LogisticObjectiveFunction(data.NumFeatureTypes(), data.GetDataArray(), data.GetValuesArray(), data.GetLabelsArray(), prior, dataWeights);
                }
            }
            minim        = new QNMinimizer(lof);
            weights      = minim.Minimize(lof, 1e-4, new double[data.NumFeatureTypes()]);
            featureIndex = data.featureIndex;
            classes[0]   = data.labelIndex.Get(0);
            classes[1]   = data.labelIndex.Get(1);
        }
Example #2
0
        private NaiveBayesClassifierFactory.NBWeights TrainWeightsUCL(int[][] data, int[] labels, int numFeatures, int numClasses)
        {
            int[] numValues = NumberValues(data, numFeatures);
            int[] sumValues = new int[numFeatures];
            //how many feature-values are before this feature
            for (int j = 1; j < numFeatures; j++)
            {
                sumValues[j] = sumValues[j - 1] + numValues[j - 1];
            }
            int[][] newdata = new int[data.Length][];
            for (int i = 0; i < data.Length; i++)
            {
                newdata[i][0] = 0;
                for (int j_1 = 0; j_1 < numFeatures; j_1++)
                {
                    newdata[i][j_1 + 1] = sumValues[j_1] + data[i][j_1] + 1;
                }
            }
            int totalFeatures = sumValues[numFeatures - 1] + numValues[numFeatures - 1] + 1;

            logger.Info("total feats " + totalFeatures);
            LogConditionalObjectiveFunction <L, F> objective = new LogConditionalObjectiveFunction <L, F>(totalFeatures, numClasses, newdata, labels, prior, sigma, 0.0);
            IMinimizer <IDiffFunction>             min       = new QNMinimizer();

            double[]   argmin = min.Minimize(objective, 1e-4, objective.Initial());
            double[][] wts    = objective.To2D(argmin);
            System.Console.Out.WriteLine("weights have dimension " + wts.Length);
            return(new NaiveBayesClassifierFactory.NBWeights(wts, numValues));
        }
Example #3
0
        public virtual LogisticClassifier <L, F> TrainWeightedData(GeneralDataset <L, F> data, float[] dataWeights)
        {
            if (data is RVFDataset)
            {
                ((RVFDataset <L, F>)data).EnsureRealValues();
            }
            if (data.labelIndex.Size() != 2)
            {
                throw new Exception("LogisticClassifier is only for binary classification!");
            }
            IMinimizer <IDiffFunction> minim;
            LogisticObjectiveFunction  lof = null;

            if (data is Dataset <object, object> )
            {
                lof = new LogisticObjectiveFunction(data.NumFeatureTypes(), data.GetDataArray(), data.GetLabelsArray(), new LogPrior(LogPrior.LogPriorType.Quadratic), dataWeights);
            }
            else
            {
                if (data is RVFDataset <object, object> )
                {
                    lof = new LogisticObjectiveFunction(data.NumFeatureTypes(), data.GetDataArray(), data.GetValuesArray(), data.GetLabelsArray(), new LogPrior(LogPrior.LogPriorType.Quadratic), dataWeights);
                }
            }
            minim        = new QNMinimizer(lof);
            weights      = minim.Minimize(lof, 1e-4, new double[data.NumFeatureTypes()]);
            featureIndex = data.featureIndex;
            classes[0]   = data.labelIndex.Get(0);
            classes[1]   = data.labelIndex.Get(1);
            return(new LogisticClassifier <L, F>(weights, featureIndex, classes));
        }
Example #4
0
        private NaiveBayesClassifierFactory.NBWeights TrainWeightsCL(int[][] data, int[] labels, int numFeatures, int numClasses)
        {
            LogConditionalEqConstraintFunction objective = new LogConditionalEqConstraintFunction(numFeatures, numClasses, data, labels, prior, sigma, 0.0);
            IMinimizer <IDiffFunction>         min       = new QNMinimizer();

            double[]     argmin = min.Minimize(objective, 1e-4, objective.Initial());
            double[][][] wts    = objective.To3D(argmin);
            double[]     priors = objective.Priors(argmin);
            return(new NaiveBayesClassifierFactory.NBWeights(priors, wts));
        }
Example #5
0
        /// <summary>Solves the problem using a quasi-newton method (L-BFGS).</summary>
        /// <remarks>
        /// Solves the problem using a quasi-newton method (L-BFGS).  The solution
        /// is stored in the
        /// <c>lambda</c>
        /// array of
        /// <c>prob</c>
        /// .
        /// </remarks>
        public virtual void SolveQN()
        {
            CGRunner.LikelihoodFunction df      = new CGRunner.LikelihoodFunction(prob, tol, useGaussianPrior, priorSigmaS, sigmaSquareds);
            CGRunner.MonitorFunction    monitor = new CGRunner.MonitorFunction(prob, df, filename);
            IMinimizer <IDiffFunction>  cgm     = new QNMinimizer(monitor, 10);

            // all parameters are started at 0.0
            prob.lambda = cgm.Minimize(df, tol, new double[df.DomainDimension()]);
            PrintOptimizationResults(df, monitor);
        }
Example #6
0
        public virtual LogisticClassifier <L, F> TrainClassifier(GeneralDataset <L, F> data, double l1reg, double tol, LogPrior prior, bool biased)
        {
            if (data is RVFDataset)
            {
                ((RVFDataset <L, F>)data).EnsureRealValues();
            }
            if (data.labelIndex.Size() != 2)
            {
                throw new Exception("LogisticClassifier is only for binary classification!");
            }
            IMinimizer <IDiffFunction> minim;

            if (!biased)
            {
                LogisticObjectiveFunction lof = null;
                if (data is Dataset <object, object> )
                {
                    lof = new LogisticObjectiveFunction(data.NumFeatureTypes(), data.GetDataArray(), data.GetLabelsArray(), prior);
                }
                else
                {
                    if (data is RVFDataset <object, object> )
                    {
                        lof = new LogisticObjectiveFunction(data.NumFeatureTypes(), data.GetDataArray(), data.GetValuesArray(), data.GetLabelsArray(), prior);
                    }
                }
                if (l1reg > 0.0)
                {
                    minim = ReflectionLoading.LoadByReflection("edu.stanford.nlp.optimization.OWLQNMinimizer", l1reg);
                }
                else
                {
                    minim = new QNMinimizer(lof);
                }
                weights = minim.Minimize(lof, tol, new double[data.NumFeatureTypes()]);
            }
            else
            {
                BiasedLogisticObjectiveFunction lof = new BiasedLogisticObjectiveFunction(data.NumFeatureTypes(), data.GetDataArray(), data.GetLabelsArray(), prior);
                if (l1reg > 0.0)
                {
                    minim = ReflectionLoading.LoadByReflection("edu.stanford.nlp.optimization.OWLQNMinimizer", l1reg);
                }
                else
                {
                    minim = new QNMinimizer(lof);
                }
                weights = minim.Minimize(lof, tol, new double[data.NumFeatureTypes()]);
            }
            featureIndex = data.featureIndex;
            classes[0]   = data.labelIndex.Get(0);
            classes[1]   = data.labelIndex.Get(1);
            return(new LogisticClassifier <L, F>(weights, featureIndex, classes));
        }
Example #7
0
        public virtual void TestQNMinimizerRosenbrock()
        {
            double[]      initial = new double[] { 0.0, 0.0 };
            IDiffFunction rf      = new MinimizerTest.RosenbrockFunction();
            QNMinimizer   qn      = new QNMinimizer();

            double[] answer = qn.Minimize(rf, 1e-10, initial);
            System.Console.Error.WriteLine("Answer is: " + Arrays.ToString(answer));
            NUnit.Framework.Assert.AreEqual(1.0, answer[0], 1e-8);
            NUnit.Framework.Assert.AreEqual(1.0, answer[1], 1e-8);
        }
Example #8
0
        public void TestRosenbrockFunction()
        {
            var minimizer = new QNMinimizer();
            var f         = new RosenbrockFunction();
            var x         = minimizer.Minimize(f);
            var minValue  = f.ValueAt(x);

            Assert.AreEqual(x[0], 1.0, 1e-5);
            Assert.AreEqual(x[1], 1.0, 1e-5);
            Assert.AreEqual(minValue, 0, 1e-10);
        }
Example #9
0
        public void TestQuadraticFunction()
        {
            var minimizer = new QNMinimizer();
            var f         = new QuadraticFunction();
            var x         = minimizer.Minimize(f);
            var minValue  = f.ValueAt(x);

            Assert.AreEqual(x[0], 1.0, 0.000001);
            Assert.AreEqual(x[1], 5.0, 0.000001);
            Assert.AreEqual(minValue, 10.0, 0.000001);
        }
        protected internal override double[] TrainWeights(int[][][][] data, int[][] labels, IEvaluator[] evaluators, int pruneFeatureItr, double[][][][] featureVals)
        {
            CRFLogConditionalObjectiveFloatFunction func = new CRFLogConditionalObjectiveFloatFunction(data, labels, windowSize, classIndex, labelIndices, map, flags.backgroundSymbol, flags.sigma);

            cliquePotentialFunctionHelper = func;
            QNMinimizer minimizer;

            if (flags.interimOutputFreq != 0)
            {
                IFloatFunction monitor = new ResultStoringFloatMonitor(flags.interimOutputFreq, flags.serializeTo);
                minimizer = new QNMinimizer(monitor);
            }
            else
            {
                minimizer = new QNMinimizer();
            }
            if (pruneFeatureItr == 0)
            {
                minimizer.SetM(flags.QNsize);
            }
            else
            {
                minimizer.SetM(flags.QNsize2);
            }
            float[] initialWeights;
            if (flags.initialWeights == null)
            {
                initialWeights = func.Initial();
            }
            else
            {
                try
                {
                    log.Info("Reading initial weights from file " + flags.initialWeights);
                    using (DataInputStream dis = new DataInputStream(new BufferedInputStream(new GZIPInputStream(new FileInputStream(flags.initialWeights)))))
                    {
                        initialWeights = ConvertByteArray.ReadFloatArr(dis);
                    }
                }
                catch (IOException)
                {
                    throw new Exception("Could not read from float initial weight file " + flags.initialWeights);
                }
            }
            log.Info("numWeights: " + initialWeights.Length);
            float[] weightsArray = minimizer.Minimize(func, (float)flags.tolerance, initialWeights);
            return(ArrayMath.FloatArrayToDoubleArray(weightsArray));
        }
Example #11
0
        public virtual void FinishTraining()
        {
            IntCounter <string> tagCounter = new IntCounter <string>();
            WeightedDataset     data       = new WeightedDataset(datumCounter.Size());

            foreach (TaggedWord word in datumCounter.KeySet())
            {
                int count = datumCounter.GetIntCount(word);
                if (trainOnLowCount && count > trainCountThreshold)
                {
                    continue;
                }
                if (functionWordTags.Contains(word.Word()))
                {
                    continue;
                }
                tagCounter.IncrementCount(word.Tag());
                if (trainByType)
                {
                    count = 1;
                }
                data.Add(new BasicDatum(featExtractor.MakeFeatures(word.Word()), word.Tag()), count);
            }
            datumCounter = null;
            tagDist      = Distribution.LaplaceSmoothedDistribution(tagCounter, tagCounter.Size(), 0.5);
            tagCounter   = null;
            ApplyThresholds(data);
            Verbose("Making classifier...");
            QNMinimizer minim = new QNMinimizer();
            //new ResultStoringMonitor(5, "weights"));
            //    minim.shutUp();
            LinearClassifierFactory factory = new LinearClassifierFactory(minim);

            factory.SetTol(tol);
            factory.SetSigma(sigma);
            scorer = factory.TrainClassifier(data);
            Verbose("Done training.");
        }
Example #12
0
        public virtual double[] Minimize(IDiffFunction function, double functionTolerance, double[] initial, int maxIterations)
        {
            Sayln("SGDToQNMinimizer called on function of " + function.DomainDimension() + " variables;");
            // check for stochastic derivatives
            if (!(function is AbstractStochasticCachingDiffFunction))
            {
                throw new NotSupportedException();
            }
            AbstractStochasticCachingDiffFunction dfunction = (AbstractStochasticCachingDiffFunction)function;

            dfunction.method = StochasticCalculateMethods.GradientOnly;
            ScaledSGDMinimizer sgd = new ScaledSGDMinimizer(this.gain, this.bSize, this.SGDPasses, 1, this.outputIterationsToFile);
            QNMinimizer        qn  = new QNMinimizer(this.QNMem, true);

            double[]           x      = sgd.Minimize(dfunction, functionTolerance, initial, this.SGDPasses);
            QNMinimizer.QNInfo qnInfo = new QNMinimizer.QNInfo(this, sgd.sList, sgd.yList);
            qnInfo.d = sgd.diag;
            qn.Minimize(dfunction, functionTolerance, x, this.QNPasses, qnInfo);
            log.Info(string.Empty);
            log.Info("Minimization complete.");
            log.Info(string.Empty);
            log.Info("Exiting for Debug");
            return(x);
        }
        private double[][] TrainWeights()
        {
            QNMinimizer minimizer = new QNMinimizer(15, true);

            minimizer.UseOWLQN(true, lambda);
            IDiffFunction objective = new ShiftParamsLogisticObjectiveFunction(data, dataValues, ConvertLabels(labels), numClasses, numFeatures + data.Length, numFeatures, prior);

            double[] augmentedThetas = new double[(numClasses - 1) * (numFeatures + data.Length)];
            augmentedThetas = minimizer.Minimize(objective, 1e-4, augmentedThetas);
            // calculate number of non-zero parameters, for debugging
            int count = 0;

            for (int j = numFeatures; j < augmentedThetas.Length; j++)
            {
                if (augmentedThetas[j] != 0)
                {
                    count++;
                }
            }
            Redwood.Log("NUM NONZERO PARAMETERS: " + count);
            double[][] thetas = new double[][] {  };
            LogisticUtils.Unflatten(augmentedThetas, thetas);
            return(thetas);
        }
        public virtual void ExecuteOneTrainingBatch(IList <Tree> trainingBatch, IdentityHashMap <Tree, byte[]> compressedParses, double[] sumGradSquare)
        {
            Timing convertTiming = new Timing();

            convertTiming.Doing("Converting trees");
            IdentityHashMap <Tree, IList <Tree> > topParses = CacheParseHypotheses.ConvertToTrees(trainingBatch, compressedParses, op.trainOptions.trainingThreads);

            convertTiming.Done();
            DVParserCostAndGradient gcFunc = new DVParserCostAndGradient(trainingBatch, topParses, dvModel, op);

            double[] theta = dvModel.ParamsToVector();
            switch (Minimizer)
            {
            case (1):
            {
                //maxFuncIter = 10;
                // 1: QNMinimizer, 2: SGD
                QNMinimizer qn = new QNMinimizer(op.trainOptions.qnEstimates, true);
                qn.UseMinPackSearch();
                qn.UseDiagonalScaling();
                qn.TerminateOnAverageImprovement(true);
                qn.TerminateOnNumericalZero(true);
                qn.TerminateOnRelativeNorm(true);
                theta = qn.Minimize(gcFunc, op.trainOptions.qnTolerance, theta, op.trainOptions.qnIterationsPerBatch);
                break;
            }

            case 2:
            {
                //Minimizer smd = new SGDMinimizer();       double tol = 1e-4;      theta = smd.minimize(gcFunc,tol,theta,op.trainOptions.qnIterationsPerBatch);
                double lastCost  = 0;
                double currCost  = 0;
                bool   firstTime = true;
                for (int i = 0; i < op.trainOptions.qnIterationsPerBatch; i++)
                {
                    //gcFunc.calculate(theta);
                    double[] grad = gcFunc.DerivativeAt(theta);
                    currCost = gcFunc.ValueAt(theta);
                    log.Info("batch cost: " + currCost);
                    //          if(!firstTime){
                    //              if(currCost > lastCost){
                    //                  System.out.println("HOW IS FUNCTION VALUE INCREASING????!!! ... still updating theta");
                    //              }
                    //              if(Math.abs(currCost - lastCost) < 0.0001){
                    //                  System.out.println("function value is not decreasing. stop");
                    //              }
                    //          }else{
                    //              firstTime = false;
                    //          }
                    lastCost = currCost;
                    ArrayMath.AddMultInPlace(theta, grad, -1 * op.trainOptions.learningRate);
                }
                break;
            }

            case 3:
            {
                // AdaGrad
                double eps      = 1e-3;
                double currCost = 0;
                for (int i = 0; i < op.trainOptions.qnIterationsPerBatch; i++)
                {
                    double[] gradf = gcFunc.DerivativeAt(theta);
                    currCost = gcFunc.ValueAt(theta);
                    log.Info("batch cost: " + currCost);
                    for (int feature = 0; feature < gradf.Length; feature++)
                    {
                        sumGradSquare[feature] = sumGradSquare[feature] + gradf[feature] * gradf[feature];
                        theta[feature]         = theta[feature] - (op.trainOptions.learningRate * gradf[feature] / (System.Math.Sqrt(sumGradSquare[feature]) + eps));
                    }
                }
                break;
            }

            default:
            {
                throw new ArgumentException("Unsupported minimizer " + Minimizer);
            }
            }
            dvModel.VectorToParams(theta);
        }