Example #1
0
        /**Run KJIT with an importance sampler as the oracle on a number of
         * UCI real datasets.*/
        public void RunRealOnlineKEPSampling()
        {
            Rand.Restart(1);

            List <LogisticOpRecords> allRecs = new List <LogisticOpRecords>();

            // Create the Logistic operator instance only once because we want to use the same
            // one after a new problem (new seed).
            // stopwatch for measuring inference time for each problem
            Stopwatch watch         = new Stopwatch();
            var       logisticOpIns = new KEPOnlineISLogisticOpIns(
                new LogisticOpRecords(), watch, -8.95);

            logisticOpIns.SetOnlineBatchSizeTrigger(500);
            logisticOpIns.SetImportanceSamplingSize(importanceSamplingSize);
            logisticOpIns.IsRecordMessages       = true;
            logisticOpIns.IsPrintTrueWhenCertain = false;

            // See BayesLinRegFM's  BatchLearn() and KEPOnlineISLogisticOpIns()
            // If using the sum kernel
            logisticOpIns.SetFeatures(new int[] { 400, 800 });

            OpControl.Set(typeof(KEPOnlineLogisticOp), logisticOpIns);
            Type logisticOp = typeof(KEPOnlineLogisticOp);

            List <long> allInferTimes       = new List <long>();
            List <long> allOraInferTimes    = new List <long>();
            var         allPosteriors       = new List <VectorGaussian>();
            var         allDotNetPosteriors = new List <VectorGaussian>();

            LogisticOp2.IsCollectLogisticMessages = false;
            LogisticOp2.IsCollectProjMsgs         = false;
            LogisticOp2.IsCollectXMessages        = false;
            string folder = "online_uci/";

            for (int i = 0; i < dataNames.Length; i++)
            {
                Console.WriteLine();
                Console.WriteLine("----------- starting problem {0} --------------", dataAbbrv[i]);
                Console.WriteLine();

                Vector[] X;
                bool[]   Y;
                LoadDataFromMat(dataPaths[i], out X, out Y);
                Console.Write("Y: ");
                StringUtils.PrintArray(Y);

                VectorGaussian    wPost;
                LogisticOpRecords recorder = new LogisticOpRecords();
                // Set a new recorder for a new problem seed
                logisticOpIns.SetRecorder(recorder);
                //			Type logisticOp = typeof(LogisticOp2);

                // start the watch
                watch.Restart();
                // We do not include the bias term. So make sure the datasets
                // are standardized.
                LogisticRegression.InferCoefficientsNoBias(X, Y, out wPost, epIter, logisticOp);
                // stop the watch
                long inferenceTime = watch.ElapsedMilliseconds;
                recorder.inferenceTimes = new List <long>();
                recorder.inferenceTimes.Add(inferenceTime);
                allInferTimes.Add(inferenceTime);

                recorder.postW = MatrixUtils.ToList(wPost);
                allPosteriors.Add(wPost);

                allRecs.Add(recorder);
                //print
                Console.WriteLine("n: {0}", n);
                Console.WriteLine("d: {0}", d);
                int t = Y.Sum(o => o ? 1 : 0);
                Console.WriteLine("number of true: {0}", t);

                //			Vector meanW = wPost.GetMean();

                Console.WriteLine("Inferred w: ");
                Console.WriteLine(wPost);

                // Run Infer.net's operator on the same data
                VectorGaussian dotNetPostW;
                Stopwatch      oraWatch = new Stopwatch();
                oraWatch.Start();
                LogisticRegression.InferCoefficientsNoBias(X, Y, out dotNetPostW,
                                                           epIter, typeof(LogisticOp2));
                long oraInferTime = oraWatch.ElapsedMilliseconds;
                allOraInferTimes.Add(oraInferTime);
                recorder.dotNetPostW = MatrixUtils.ToList <VectorGaussian>(dotNetPostW);
                allDotNetPosteriors.Add(dotNetPostW);


                // write the records to a file
                string fname = string.Format("kjit_is{0}_{1}_iter{2}.mat",
                                             importanceSamplingSize, dataAbbrv[i], epIter);
                string recordPath = Config.PathToSavedFile(folder + fname);
                var    extra      = new Dictionary <string, object>();
                // MatlabWriter cannot write int
                extra.Add("d", (double)X[0].Count);
                extra.Add("n", (double)X.Length);
                extra.Add("epIter", (double)epIter);

                extra.Add("X", MatrixUtils.StackColumns(X));
                extra.Add("Y", MatrixUtils.ToDouble(Y));
                recorder.WriteRecords(recordPath, extra);
            }
            // merge all records and write
            LogisticOpRecords merged = LogisticOpRecords.Merge(allRecs.ToArray());

            merged.inferenceTimes = allInferTimes;
            merged.dotNetPostW    = allDotNetPosteriors;
            merged.postW          = allPosteriors;

            string fnameM = string.Format("kjit_is{0}_uci{1}_iter{2}.mat",
                                          importanceSamplingSize, dataAbbrv.Length, epIter);
            string recordPathM = Config.PathToSavedFile(folder + fnameM);
            var    allExtra    = new Dictionary <string, object>();

            double[] oraInferTimesArr = allOraInferTimes.Select(t => (double)t).ToArray();
            allExtra.Add("oraInferTimes", Vector.FromArray(oraInferTimesArr));
            merged.WriteRecords(recordPathM, allExtra);
        }
Example #2
0
        public void RunOnlineKEPSampling()
        {
            // Kernel EP with importance sampling.

            /**
             * Only one W just like in Ali's paper.
             * In practice, we typically observe multiple sets of observations
             * where we want to do inference on the same model with the same
             * parameter.
             */
            Rand.Restart(init_fixed_seed);
            Vector w = Vector.Zero(d);

            Rand.Normal(Vector.Zero(d), PositiveDefiniteMatrix.Identity(d), w);

            List <LogisticOpRecords> allRecs = new List <LogisticOpRecords>();

            // Create the Logistic operator instance only one because we want to use the same
            // one after a new problem (new seed).
            // stopwatch for measuring inference time for each problem
            Stopwatch watch         = new Stopwatch();
            var       logisticOpIns = new KEPOnlineISLogisticOpIns(
                new LogisticOpRecords(), watch, -8.5);

            logisticOpIns.SetImportanceSamplingSize(importanceSamplingSize);
            logisticOpIns.IsRecordMessages       = true;
            logisticOpIns.IsPrintTrueWhenCertain = false;
            /** Use mixture or not ...*/
            logisticOpIns.isGaussianOp.useMixtureProposal = false;
            logisticOpIns.SetFeatures(new int[] { 300, 500 });

            OpControl.Set(typeof(KEPOnlineLogisticOp), logisticOpIns);
            Type logisticOp = typeof(KEPOnlineLogisticOp);


            List <long> allInferTimes       = new List <long>();
            var         allPosteriors       = new List <VectorGaussian>();
            var         allDotNetPosteriors = new List <VectorGaussian>();

            LogisticOp2.IsCollectLogisticMessages = false;
            LogisticOp2.IsCollectProjMsgs         = false;
            LogisticOp2.IsCollectXMessages        = false;
            for (int seed = seed_from; seed <= seed_to; seed++)
            {
                Rand.Restart(seed);
                double b = 0;
                // combine the bias term into W
                Vector[] X;
                bool[]   Y;
                LogisticRegression.GenData(n, w, b, out X, out Y, seed);

                Console.Write("Y: ");
                StringUtils.PrintArray(Y);

                VectorGaussian wPost;

                LogisticOpRecords recorder = new LogisticOpRecords();
                // Set a new recorder for a new problem seed
                logisticOpIns.SetRecorder(recorder);
                //			Type logisticOp = typeof(LogisticOp2);

                // start the watch
                watch.Restart();
                LogisticRegression.InferCoefficientsNoBias(X, Y, out wPost, epIter, logisticOp);
                // stop the watch
                long inferenceTime = watch.ElapsedMilliseconds;
                recorder.inferenceTimes = new List <long>();
                recorder.inferenceTimes.Add(inferenceTime);
                allInferTimes.Add(inferenceTime);

                recorder.postW = MatrixUtils.ToList(wPost);
                allPosteriors.Add(wPost);

                allRecs.Add(recorder);
                //print
                Console.WriteLine("n: {0}", n);
                Console.WriteLine("d: {0}", d);
                int t = Y.Sum(o => o ? 1 : 0);
                Console.WriteLine("number of true: {0}", t);
                Console.WriteLine("True bias: {0}", b);
                //			Vector meanW = wPost.GetMean();

                Console.WriteLine("True w: {0}", w);
                Console.WriteLine("Inferred w: ");
                Console.WriteLine(wPost);

                // Run Infer.net's operator on the same data
                VectorGaussian dotNetPostW;
                LogisticRegression.InferCoefficientsNoBias(X, Y, out dotNetPostW,
                                                           epIter, typeof(LogisticOp2));
                recorder.dotNetPostW = MatrixUtils.ToList <VectorGaussian>(dotNetPostW);
                allDotNetPosteriors.Add(dotNetPostW);
                // write the records to a file
                string fname = string.Format("rec_onlinekep_is{0}_n{1}_logistic_iter{2}_s{3}.mat",
                                             importanceSamplingSize, n, epIter, seed);
                string recordPath = Config.PathToSavedFile(fname);
                var    extra      = new Dictionary <string, object>();
                // MatlabWriter cannot write int
                extra.Add("d", (double)d);
                extra.Add("n", (double)n);
                extra.Add("epIter", (double)epIter);
                extra.Add("trueW", w);
                extra.Add("X", MatrixUtils.StackColumns(X));
                extra.Add("Y", MatrixUtils.ToDouble(Y));
                recorder.WriteRecords(recordPath, extra);
            }
            // merge all records and write
            LogisticOpRecords merged = LogisticOpRecords.Merge(allRecs.ToArray());

            merged.inferenceTimes = allInferTimes;
            merged.dotNetPostW    = allDotNetPosteriors;
            merged.postW          = allPosteriors;

            string fnameM = string.Format("rec_onlinekep_is{0}_n{1}_logistic_iter{2}_sf{3}_st{4}.mat",
                                          importanceSamplingSize, n, epIter, seed_from, seed_to);
            string recordPathM = Config.PathToSavedFile(fnameM);

            merged.WriteRecords(recordPathM);
        }
Example #3
0
        public static void TestLogisticRegressionNoBias()
        {
            const int seed = 2;

            Rand.Restart(seed);
            const int d      = 10;
            const int n      = 300;
            const int epIter = 10;

            Vector w = Vector.Zero(d);

            Rand.Normal(Vector.Zero(d), PositiveDefiniteMatrix.Identity(d), w);
            double b = 0;

            // combine the bias term into W
            Vector[] X;
            bool[]   Y;
            GenData(n, w, b, out X, out Y);

            Console.Write("Y: ");
            StringUtils.PrintArray(Y);

            VectorGaussian wPost;

            //			string factorOpPath = Config.PathToFactorOperator(
            //				//				"serialFactorOp_fm_kgg_joint_irf500_orf1000_n400_iter5_sf1_st20_ntr5000.mat"
            //				"serialFactorOp_fm_kgg_joint_irf500_orf1000_proj_n400_iter5_sf1_st20_ntr5000.mat"
            //			                      );
            //			KEPLogisticOpInstance opIns = KEPLogisticOpInstance.LoadLogisticOpInstance(factorOpPath);
            //			opIns.SetPrintTrueMessages(true);
            //			OpControl.Add(typeof(KEPLogisticOp), opIns);
            //			Type logisticOp = typeof(KEPLogisticOp);
            LogisticOpRecords records = new LogisticOpRecords();

            OpControl.Add(typeof(KEPOnlineLogisticOp), new KEPOnlineISLogisticOpIns(records));
            Type logisticOp = typeof(KEPOnlineLogisticOp);

            //			Type logisticOp = typeof(LogisticOp2);

            InferCoefficientsNoBias(X, Y, out wPost, epIter, logisticOp);

            //print
            Console.WriteLine("n: {0}", n);
            Console.WriteLine("d: {0}", d);
            int t = Y.Sum(o => o ? 1 : 0);

            Console.WriteLine("number of true: {0}", t);
            Console.WriteLine("True bias: {0}", b);
            //			Vector meanW = wPost.GetMean();

            Console.WriteLine("True w: {0}", w);
            Console.WriteLine("Inferred w: ");
            Console.WriteLine(wPost);

            // write the records to a file
            string fname = string.Format("rec_onlinekep_is_logistic_iter{0}_n{1}.mat",
                                         epIter, n);
            string recordPath = Config.PathToSavedFile(fname);
            var    extra      = new Dictionary <string, object>();

            // MatlabWriter cannot write int
            extra.Add("d", (double)d);
            extra.Add("n", (double)n);
            extra.Add("epIter", (double)epIter);
            extra.Add("trueW", w);
            extra.Add("X", MatrixUtils.StackColumns(X));
            extra.Add("Y", MatrixUtils.ToDouble(Y));
            records.WriteRecords(recordPath, extra);
        }