/// <summary> /// Runs the online experiment. /// </summary> /// <param name="dataSet">Data set.</param> /// <param name="holdoutSet">Holdout set.</param> /// <param name="priors">Priors.</param> public void RunOnline(DataSet dataSet, DataSet holdoutSet, Marginals priors) { using (new CodeTimer("Running online experiment: " + Name)) { Console.WriteLine(); Metrics = new MetricsCollection(); HoldoutMetrics = new HoldoutMetricsCollection { Metrics = new Metrics[dataSet.NumberOfResidents][] }; PosteriorActivities = new Bernoulli[dataSet.NumberOfResidents][]; HoldoutPosteriorActivities = new Bernoulli[dataSet.NumberOfResidents][][]; IndividualPosteriors = new Marginals[dataSet.NumberOfResidents]; var accuracy = new double[dataSet.NumberOfResidents][]; for (int i = 0; i < dataSet.NumberOfResidents; i++) { var collection = new List<Metrics>(); HoldoutPosteriorActivities[i] = new Bernoulli[dataSet.NumberOfInstances[i]][]; accuracy[i] = new double[dataSet.NumberOfInstances[i]]; IndividualPosteriors[i] = new Marginals(priors); PosteriorActivities[i] = new Bernoulli[dataSet.NumberOfInstances[i]]; for (int j = 0; j < dataSet.NumberOfInstances[i]; j++) { var datum = dataSet.GetSubSet(i, j); PosteriorActivities[i][j] = TestModel.Test(datum, IndividualPosteriors[i])[0][0]; HoldoutPosteriorActivities[i][j] = TestModel.Test(holdoutSet.GetSubSet(i), IndividualPosteriors[i])[0]; // Test on holdout set var holdoutMetrics = new Metrics { Name = Name, Estimates = HoldoutPosteriorActivities[i][j], TrueLabels = holdoutSet.Labels[i] }; accuracy[i][j] = holdoutMetrics.AverageAccuracy; // PrintPrediction(i, temp[0][0], testLabels[0][i], testScores[0][i]); // Now retrain using this label IndividualPosteriors[i] = TrainModel.Train(datum, IndividualPosteriors[i], 10); collection.Add(holdoutMetrics); } // PrintPredictions(posteriorActivities.Select(ia => ia[0]).ToArray(), testLabels.Select(ia => ia[0]).ToArray()); Metrics.Add(new Metrics { Name = Name, Estimates = PosteriorActivities[i], TrueLabels = dataSet.Labels[i] }, true); HoldoutMetrics.Metrics[i] = collection.ToArray(); Console.WriteLine("{0,20}, Resident {1}, Hold out accuracy {2:N2}", Name, i, collection.Average(ia => ia.AverageAccuracy)); } HoldoutMetrics.RecomputeAggregateMetrics(); Metrics.RecomputeAggregateMetrics(); // Console.WriteLine("Accuracies " + string.Join(", ", accuracy.ColumnAverage().Select(x => x.ToString("N2")))); // Console.WriteLine("Std. dev. " + string.Join(", ", accuracy.ColumnStandardDeviation().Select(x => x.ToString("N2")))); // Console.WriteLine("Accuracies " + string.Join(", ", HoldoutMetrics.AverageAccuracy.Select(x => x.ToString("N2")))); } }
/// <summary> /// Runs the active experiment. /// </summary> /// <param name="dataSet">Data set.</param> /// <param name="holdoutSet">Holdout set.</param> /// <param name="numberOfSelections">Number of selections.</param> /// <param name="priors">Priors.</param> public void RunActive(DataSet dataSet, DataSet holdoutSet, int numberOfSelections, Marginals priors) { if (ActiveLearners == null) { throw new InvalidOperationException("Active Learner not provided"); } using (new CodeTimer("Running active experiment: " + Name)) { Console.WriteLine(); HoldoutMetrics = new HoldoutMetricsCollection { Metrics = new Metrics[dataSet.NumberOfResidents][] }; // Metrics = new MetricsCollection(numberOfSelections); PosteriorActivities = new Bernoulli[dataSet.NumberOfResidents][]; HoldoutPosteriorActivities = new Bernoulli[dataSet.NumberOfResidents][][]; IndividualPosteriors = new Marginals[dataSet.NumberOfResidents]; var accuracy = new double[dataSet.NumberOfResidents][]; for (int i = 0; i < dataSet.NumberOfResidents; i++) { HoldoutMetrics.Metrics[i] = new Metrics[numberOfSelections]; var collection = new List<Metrics>(); IndividualPosteriors[i] = new Marginals(priors); // Test on holdout set HoldoutPosteriorActivities[i] = new Bernoulli[numberOfSelections][]; accuracy[i] = new double[numberOfSelections]; var dataSetForResident = dataSet.GetSubSet(i); var holdoutSetForResident = holdoutSet.GetSubSet(i); // ActiveLearners[i].Transfer(i, 1); // var individualPosteriors = new Marginals(priors); for (int j = 0; j < numberOfSelections; j++) { PosteriorActivities[i] = TestModel.Test(dataSetForResident, IndividualPosteriors[i])[0]; HoldoutPosteriorActivities[i][j] = TestModel.Test(holdoutSetForResident, IndividualPosteriors[i])[0]; if (ActiveLearners[i].Unlabelled.Count == 0) { Console.WriteLine("Empty unlabelled set"); break; } // int index = ActiveLearner.GetValueOfInformation(i).ArgMax(); int index; double val; ActiveLearners[i].GetArgMaxVOI(PosteriorActivities[i], IndividualPosteriors[i], out index, out val); // Console.WriteLine("Index {0,4}, VOI {1:N4}", index, value); // Now retrain using this label ActiveLearners[i].UpdateModel(index); //IndividualPosteriors [i] = TrainModel.Train( dataSet.GetSubSet(i, ActiveLearners [i].Labelled.ToList()), priors, 10); IndividualPosteriors[i] = TrainModel.Train(dataSet.GetSubSet(i, index), IndividualPosteriors[i], 50); var metrics = new Metrics { Name = Name, Estimates = HoldoutPosteriorActivities[i][j], TrueLabels = holdoutSet.Labels[i] }; accuracy[i][j] = metrics.AverageAccuracy; collection.Add(metrics); } // PrintPredictions(posteriorActivities.Select(ia => ia[0]).ToArray(), testLabels.Select(ia => ia[0]).ToArray()); HoldoutMetrics.Metrics[i] = collection.ToArray(); Console.WriteLine("{0,20}, Resident {1}, \n\t\tClass ratio {5}, \n\t\tHold out accuracy {2:N2}, \n\t\tAccuracies {3} \n\t\tBriers {4}\n", Name, i, collection.Average(ia => ia.AverageAccuracy).ToString("N2"), string.Join(", ", collection.Select(ia => ia.AverageAccuracy.ToString("N2"))), string.Join(", ", collection.Select(ia => ia.BrierScore.ToString("N2"))), holdoutSet.Labels[i].Average().ToString("N2") ); } HoldoutMetrics.RecomputeAggregateMetrics(); } }
/// <summary> /// Runs the batch. /// </summary> /// <param name="dataSet">Data set.</param> /// <param name="priors">Priors.</param> public void RunBatch(DataSet dataSet, Marginals priors, int niter = 1) { // Posteriors = priors; // for (int rr = 0; rr < dataSet.NumberOfResidents; ++rr) // { // Posteriors = TrainModel.Train(dataSet.GetSubSet(rr), Posteriors); // } Posteriors = TrainModel.Train(dataSet, priors, niter); }
/// <summary> /// Train the specified dataSet and priors for the specified number of iterations. /// </summary> /// <param name="dataSet">Data set.</param> /// <param name="priors">Priors.</param> /// <param name="numberOfIterations">Number of iterations.</param> public Marginals Train(DataSet dataSet, Marginals priors, int numberOfIterations = 10) { SetObservedVariables( dataSet.Features, DistributionArrayHelpers.Copy(priors.WeightMeans), DistributionArrayHelpers.Copy(priors.WeightPrecisions), dataSet.Labels); #if !USE_PRECOMPILED_ALGORITHM engine.Algorithm.DefaultNumberOfIterations = numberOfIterations; var posteriorWeights = engine.Infer<Gaussian[][]>(weights); var posteriorWeightMeans = engine.Infer<Gaussian[]>(weightMeans); var posteriorWeightPrecisions = engine.Infer<Gamma[]>(weightPrecisions); #else algorithm.Execute(numberOfIterations); var posteriorWeights = algorithm.Marginal<Gaussian[][]>(weights.Name); var posteriorWeightMeans = algorithm.Marginal<Gaussian[]>(weightMeans.Name); var posteriorWeightPrecisions = algorithm.Marginal<Gamma[]>(weightPrecisions.Name); #endif return new Marginals { Weights = posteriorWeights, WeightMeans = posteriorWeightMeans, WeightPrecisions = posteriorWeightPrecisions }; }
public DataSet GetDataSet(IEnumerable<int> subjects, bool addbias, HashSet<int> selected, double keepProportion = 1.0) { //var rng = new Random( 12345 ); var features = new double[subjects.Count()][][]; var labels = new bool[subjects.Count()][]; var inds = new Dictionary<int, int>(); for (int ss = 0; ss < subjects.Count(); ++ss) inds[subjects.ElementAt(ss)] = ss; for (var ss = 0; ss < subjects.Count(); ++ss) { var feats = new List<double[]>(); var labs = new List<bool>(); for (int ii = 0; ii < N; ++ii) { if (inds.ContainsKey(s[ii]) && inds[s[ii]] == ss) { var thisfeat = new List<double>(); for (int ff = 0; ff < x[ii].Count(); ++ff) if (selected == null || !selected.Any() || selected.Contains(ff)) thisfeat.Add(x[ii][ff]); if (addbias) thisfeat.Add(1.0); feats.Add(thisfeat.ToArray()); labs.Add(y[ii]); } } //var order = Enumerable.Range( 0, labs.Count() ).OrderBy( ii => rng.NextDouble() ); var nKeep = Convert.ToInt32(keepProportion * feats.Count()); //features [ss] = order.Select( ii => feats[ii] ).ToArray(); //labels [ss] = order.Select( ii => labs[ii] ).ToArray(); features[ss] = feats.Take(nKeep).ToArray(); labels[ss] = labs.Take(nKeep).ToArray(); } var dataset = new DataSet { Features = features, Labels = labels }; return dataset; }
/// <summary> /// Test the specified dataSet and priors. /// </summary> /// <param name="dataSet">Data set.</param> /// <param name="priors">Priors.</param> public Bernoulli[][] Test(DataSet dataSet, Marginals priors) { SetObservedVariables( dataSet.Features, DistributionArrayHelpers.Copy(priors.WeightMeans), DistributionArrayHelpers.Copy(priors.WeightPrecisions)); #if !USE_PRECOMPILED_ALGORITHM var posteriorActivities = engine.Infer<Bernoulli[][]>(activities); #else algorithm.Execute(1); var posteriorActivities = algorithm.Marginal<Bernoulli[][]>(activities.Name); #endif return posteriorActivities; }
public Bernoulli ComputeEvidence(DataSet dataSet, Marginals priors) { SetObservedVariables( dataSet.Features, DistributionArrayHelpers.Copy(priors.WeightMeans), DistributionArrayHelpers.Copy(priors.WeightPrecisions), dataSet.Labels); engine.Algorithm.DefaultNumberOfIterations = 1; return engine.Infer<Bernoulli>(evidence); }
/// <summary> /// Generate the data using the specified noisy example proportion. /// </summary> /// <param name="noisyExampleProportion">Noisy example proportion.</param> /// <param name="holdout">If set to <c>true</c> holdout.</param> public void Generate(double noisyExampleProportion, int numberOfInstances, bool holdout = false) { if (NumberOfActivities != 2) { throw new InvalidOperationException("This version of the function is for binary data only"); } if (numberOfInstances == 0) { return; } if (Weights == null) { ComputeWeights(); } // int numberOfInstances = holdout ? NumberOfHoldoutInstances : NumberOfInstances; var scores = new double[NumberOfResidents][]; var features = new double[NumberOfResidents][][]; var labels = new bool[NumberOfResidents][]; int numFeaturesIncludingBias = NumberOfFeatures + (UseBias ? 1 : 0); for (int i = 0; i < NumberOfResidents; i++) { features[i] = new double[numberOfInstances][]; scores[i] = new double[numberOfInstances]; labels[i] = new bool[numberOfInstances]; // Generate weight per feature, and then sample from that per user, for this to match the model for (int j = 0; j < numberOfInstances; j++) { bool noisyExample = Rand.Double() > noisyExampleProportion; features[i][j] = new double[numFeaturesIncludingBias]; var products = new double[numFeaturesIncludingBias]; for (int k = 0; k < numFeaturesIncludingBias; k++) { // double feature = Rand.Double() > noisyExampleProportion ? (double)Rand.Int(2) - 0.5 : 0.0; // Rand.Double() - 0.5 : 0.0; double feature = noisyExample ? Rand.Double() - 0.5 : 0.0; // (double)Rand.Int(2); features[i][j][k] = (k == NumberOfFeatures) ? -1 : feature; products[k] = Weights[i][k] * features[i][j][k]; } scores[i][j] = new Gaussian(products.Sum(), 1).Sample(); labels[i][j] = scores[i][j] > 0; } } if (holdout) { HoldoutSet = new DataSet { Features = features, Labels = labels }; } else { DataSet = new DataSet { Features = features, Labels = labels }; } }
public void SplitTrainTest(double trainProportion, out DataSet trainSet, out DataSet testSet) { if (trainProportion < 0.0 || trainProportion > 1.0) { throw new ArgumentOutOfRangeException("trainProportion"); } var counts = Labels.Select(ia => (int)Math.Ceiling(trainProportion * (double)ia.Length)).ToArray(); trainSet = new DataSet { Features = Features.Select((ia, i) => ia.Take(counts[i]).ToArray()).ToArray(), Labels = Labels.Select((ia, i) => ia.Take(counts[i]).ToArray()).ToArray() }; testSet = new DataSet { Features = Features.Select((ia, i) => ia.Skip(counts[i]).ToArray()).ToArray(), Labels = Labels.Select((ia, i) => ia.Skip(counts[i]).ToArray()).ToArray() }; }
/// <summary> /// Gets the subset. /// </summary> /// <returns>The subset.</returns> /// <param name="residents">Residents.</param> /// <param name="indices">Indices.</param> public DataSet GetSubSet(IList<int> residents, IList<int> indices) { var dataSet = new DataSet { Features = new[] { new double[indices.Count][] }, Labels = new[] { new bool[indices.Count] } }; for (int i = 0; i < residents.Count; i++) { for (int j = 0; j < indices.Count; j++) { int resident = residents[i]; int index = indices[j]; dataSet.Features[i][j] = Features[resident][index]; dataSet.Labels[i][j] = Labels[resident][index]; } } return dataSet; }