public void RunOnlineKEPSampling() { // Kernel EP with importance sampling. /** * Only one W just like in Ali's paper. * In practice, we typically observe multiple sets of observations * where we want to do inference on the same model with the same * parameter. */ Rand.Restart(init_fixed_seed); Vector w = Vector.Zero(d); Rand.Normal(Vector.Zero(d), PositiveDefiniteMatrix.Identity(d), w); List <LogisticOpRecords> allRecs = new List <LogisticOpRecords>(); // Create the Logistic operator instance only one because we want to use the same // one after a new problem (new seed). // stopwatch for measuring inference time for each problem Stopwatch watch = new Stopwatch(); var logisticOpIns = new KEPOnlineISLogisticOpIns( new LogisticOpRecords(), watch, -8.5); logisticOpIns.SetImportanceSamplingSize(importanceSamplingSize); logisticOpIns.IsRecordMessages = true; logisticOpIns.IsPrintTrueWhenCertain = false; /** Use mixture or not ...*/ logisticOpIns.isGaussianOp.useMixtureProposal = false; logisticOpIns.SetFeatures(new int[] { 300, 500 }); OpControl.Set(typeof(KEPOnlineLogisticOp), logisticOpIns); Type logisticOp = typeof(KEPOnlineLogisticOp); List <long> allInferTimes = new List <long>(); var allPosteriors = new List <VectorGaussian>(); var allDotNetPosteriors = new List <VectorGaussian>(); LogisticOp2.IsCollectLogisticMessages = false; LogisticOp2.IsCollectProjMsgs = false; LogisticOp2.IsCollectXMessages = false; for (int seed = seed_from; seed <= seed_to; seed++) { Rand.Restart(seed); double b = 0; // combine the bias term into W Vector[] X; bool[] Y; LogisticRegression.GenData(n, w, b, out X, out Y, seed); Console.Write("Y: "); StringUtils.PrintArray(Y); VectorGaussian wPost; LogisticOpRecords recorder = new LogisticOpRecords(); // Set a new recorder for a new problem seed logisticOpIns.SetRecorder(recorder); // Type logisticOp = typeof(LogisticOp2); // start the watch watch.Restart(); LogisticRegression.InferCoefficientsNoBias(X, Y, out wPost, epIter, logisticOp); // stop the watch long inferenceTime = watch.ElapsedMilliseconds; recorder.inferenceTimes = new List <long>(); recorder.inferenceTimes.Add(inferenceTime); allInferTimes.Add(inferenceTime); recorder.postW = MatrixUtils.ToList(wPost); allPosteriors.Add(wPost); allRecs.Add(recorder); //print Console.WriteLine("n: {0}", n); Console.WriteLine("d: {0}", d); int t = Y.Sum(o => o ? 1 : 0); Console.WriteLine("number of true: {0}", t); Console.WriteLine("True bias: {0}", b); // Vector meanW = wPost.GetMean(); Console.WriteLine("True w: {0}", w); Console.WriteLine("Inferred w: "); Console.WriteLine(wPost); // Run Infer.net's operator on the same data VectorGaussian dotNetPostW; LogisticRegression.InferCoefficientsNoBias(X, Y, out dotNetPostW, epIter, typeof(LogisticOp2)); recorder.dotNetPostW = MatrixUtils.ToList <VectorGaussian>(dotNetPostW); allDotNetPosteriors.Add(dotNetPostW); // write the records to a file string fname = string.Format("rec_onlinekep_is{0}_n{1}_logistic_iter{2}_s{3}.mat", importanceSamplingSize, n, epIter, seed); string recordPath = Config.PathToSavedFile(fname); var extra = new Dictionary <string, object>(); // MatlabWriter cannot write int extra.Add("d", (double)d); extra.Add("n", (double)n); extra.Add("epIter", (double)epIter); extra.Add("trueW", w); extra.Add("X", MatrixUtils.StackColumns(X)); extra.Add("Y", MatrixUtils.ToDouble(Y)); recorder.WriteRecords(recordPath, extra); } // merge all records and write LogisticOpRecords merged = LogisticOpRecords.Merge(allRecs.ToArray()); merged.inferenceTimes = allInferTimes; merged.dotNetPostW = allDotNetPosteriors; merged.postW = allPosteriors; string fnameM = string.Format("rec_onlinekep_is{0}_n{1}_logistic_iter{2}_sf{3}_st{4}.mat", importanceSamplingSize, n, epIter, seed_from, seed_to); string recordPathM = Config.PathToSavedFile(fnameM); merged.WriteRecords(recordPathM); }
/**Run KJIT with an importance sampler as the oracle on a number of * UCI real datasets.*/ public void RunRealOnlineKEPSampling() { Rand.Restart(1); List <LogisticOpRecords> allRecs = new List <LogisticOpRecords>(); // Create the Logistic operator instance only once because we want to use the same // one after a new problem (new seed). // stopwatch for measuring inference time for each problem Stopwatch watch = new Stopwatch(); var logisticOpIns = new KEPOnlineISLogisticOpIns( new LogisticOpRecords(), watch, -8.95); logisticOpIns.SetOnlineBatchSizeTrigger(500); logisticOpIns.SetImportanceSamplingSize(importanceSamplingSize); logisticOpIns.IsRecordMessages = true; logisticOpIns.IsPrintTrueWhenCertain = false; // See BayesLinRegFM's BatchLearn() and KEPOnlineISLogisticOpIns() // If using the sum kernel logisticOpIns.SetFeatures(new int[] { 400, 800 }); OpControl.Set(typeof(KEPOnlineLogisticOp), logisticOpIns); Type logisticOp = typeof(KEPOnlineLogisticOp); List <long> allInferTimes = new List <long>(); List <long> allOraInferTimes = new List <long>(); var allPosteriors = new List <VectorGaussian>(); var allDotNetPosteriors = new List <VectorGaussian>(); LogisticOp2.IsCollectLogisticMessages = false; LogisticOp2.IsCollectProjMsgs = false; LogisticOp2.IsCollectXMessages = false; string folder = "online_uci/"; for (int i = 0; i < dataNames.Length; i++) { Console.WriteLine(); Console.WriteLine("----------- starting problem {0} --------------", dataAbbrv[i]); Console.WriteLine(); Vector[] X; bool[] Y; LoadDataFromMat(dataPaths[i], out X, out Y); Console.Write("Y: "); StringUtils.PrintArray(Y); VectorGaussian wPost; LogisticOpRecords recorder = new LogisticOpRecords(); // Set a new recorder for a new problem seed logisticOpIns.SetRecorder(recorder); // Type logisticOp = typeof(LogisticOp2); // start the watch watch.Restart(); // We do not include the bias term. So make sure the datasets // are standardized. LogisticRegression.InferCoefficientsNoBias(X, Y, out wPost, epIter, logisticOp); // stop the watch long inferenceTime = watch.ElapsedMilliseconds; recorder.inferenceTimes = new List <long>(); recorder.inferenceTimes.Add(inferenceTime); allInferTimes.Add(inferenceTime); recorder.postW = MatrixUtils.ToList(wPost); allPosteriors.Add(wPost); allRecs.Add(recorder); //print Console.WriteLine("n: {0}", n); Console.WriteLine("d: {0}", d); int t = Y.Sum(o => o ? 1 : 0); Console.WriteLine("number of true: {0}", t); // Vector meanW = wPost.GetMean(); Console.WriteLine("Inferred w: "); Console.WriteLine(wPost); // Run Infer.net's operator on the same data VectorGaussian dotNetPostW; Stopwatch oraWatch = new Stopwatch(); oraWatch.Start(); LogisticRegression.InferCoefficientsNoBias(X, Y, out dotNetPostW, epIter, typeof(LogisticOp2)); long oraInferTime = oraWatch.ElapsedMilliseconds; allOraInferTimes.Add(oraInferTime); recorder.dotNetPostW = MatrixUtils.ToList <VectorGaussian>(dotNetPostW); allDotNetPosteriors.Add(dotNetPostW); // write the records to a file string fname = string.Format("kjit_is{0}_{1}_iter{2}.mat", importanceSamplingSize, dataAbbrv[i], epIter); string recordPath = Config.PathToSavedFile(folder + fname); var extra = new Dictionary <string, object>(); // MatlabWriter cannot write int extra.Add("d", (double)X[0].Count); extra.Add("n", (double)X.Length); extra.Add("epIter", (double)epIter); extra.Add("X", MatrixUtils.StackColumns(X)); extra.Add("Y", MatrixUtils.ToDouble(Y)); recorder.WriteRecords(recordPath, extra); } // merge all records and write LogisticOpRecords merged = LogisticOpRecords.Merge(allRecs.ToArray()); merged.inferenceTimes = allInferTimes; merged.dotNetPostW = allDotNetPosteriors; merged.postW = allPosteriors; string fnameM = string.Format("kjit_is{0}_uci{1}_iter{2}.mat", importanceSamplingSize, dataAbbrv.Length, epIter); string recordPathM = Config.PathToSavedFile(folder + fnameM); var allExtra = new Dictionary <string, object>(); double[] oraInferTimesArr = allOraInferTimes.Select(t => (double)t).ToArray(); allExtra.Add("oraInferTimes", Vector.FromArray(oraInferTimesArr)); merged.WriteRecords(recordPathM, allExtra); }