/// <summary> /// Constructs a new AllergenData limited to the specific children. /// </summary> /// <param name="srcData">The source data.</param> /// <param name="indices">Indices of the children.</param> public AllergenData(AllergenData srcData, int[] indices = null) { this.IndicesToIncludedChildren = indices == null?Util.ArrayInit(srcData.NumChildren, i => i) : (int[])indices.Clone(); int numChildren = this.IndicesToIncludedChildren.Length; this.Allergens = srcData.Allergens; this.SkinTestData = Util.ArrayInit(AllergenData.NumYears, y => Util.ArrayInit(numChildren, n => Util.ArrayInit(srcData.NumAllergens, a => (int?)null))); this.IgeTestData = Util.ArrayInit(AllergenData.NumYears, y => Util.ArrayInit(numChildren, n => Util.ArrayInit(srcData.NumAllergens, a => (int?)null))); for (int n = 0; n < numChildren; n++) { int nSrc = this.IndicesToIncludedChildren[n]; for (int a = 0; a < srcData.NumAllergens; a++) { for (int y = 0; y < AllergenData.NumYears; y++) { this.SkinTestData[y][n][a] = srcData.SkinTestData[y][nSrc][a]; this.IgeTestData[y][n][a] = srcData.IgeTestData[y][nSrc][a]; } } } setStatisticsFromData(); }
public static AllergenData WithAllergensRemoved(AllergenData srcData, List <string> allergensToRemove) { AllergenData result = new AllergenData(); result.Allergens = AllergensInFile.Except(allergensToRemove).ToList(); int[] indexMapper = AllergensInFile.Select(str => result.Allergens.IndexOf(str)).ToArray(); result.SkinTestData = Util.ArrayInit(AllergenData.NumYears, y => Util.ArrayInit(srcData.NumChildren, n => Util.ArrayInit(result.NumAllergens, a => (int?)null))); result.IgeTestData = Util.ArrayInit(AllergenData.NumYears, y => Util.ArrayInit(srcData.NumChildren, n => Util.ArrayInit(result.NumAllergens, a => (int?)null))); for (int n = 0; n < srcData.NumChildren; n++) { for (int a = 0; a < srcData.NumAllergens; a++) { int mappedAllergenIndex = indexMapper[a]; if (mappedAllergenIndex < 0) { continue; } for (int y = 0; y < AllergenData.NumYears; y++) { result.SkinTestData[y][n][mappedAllergenIndex] = srcData.SkinTestData[y][n][a]; result.IgeTestData[y][n][mappedAllergenIndex] = srcData.IgeTestData[y][n][a]; } } } result.setStatisticsFromData(); return(result); }
private void SetPriors(AllergenData data, int numVulnerabilities, Beliefs beliefs) { int nY = AllergenData.NumYears; int nN = data.DataCountChild.Length; int nA = data.NumAllergens; bool useUniformClassPrior = true; if (beliefs == null) { this.probSensClassPrior.ObservedValue = useUniformClassPrior ? Dirichlet.PointMass(Vector.Constant(numVulnerabilities, 1.0 / numVulnerabilities)) : Dirichlet.Symmetric(numVulnerabilities, 0.1); this.probSens1Prior.ObservedValue = Util.ArrayInit(nA, numVulnerabilities, (a, v) => new Beta(1, 1)); this.probGainPrior.ObservedValue = Util.ArrayInit(nY, y => Util.ArrayInit(nA, numVulnerabilities, (a, v) => new Beta(1, 1))); this.probRetainPrior.ObservedValue = Util.ArrayInit(nY, y => Util.ArrayInit(nA, numVulnerabilities, (a, v) => new Beta(1, 1))); this.probSkinIfSensPrior.ObservedValue = new Beta(2.0, 1); this.probSkinIfNotSensPrior.ObservedValue = new Beta(1, 2.0); this.probIgeIfSensPrior.ObservedValue = new Beta(2.0, 1); this.probIgeIfNotSensPrior.ObservedValue = new Beta(1, 2.0); } else { this.probSensClassPrior.ObservedValue = beliefs.ProbVulnerabilityClass; probSens1Prior.ObservedValue = Util.ArrayInit(nA, numVulnerabilities, (a, v) => beliefs.ProbSensitizationAgeOne[a, v]); probGainPrior.ObservedValue = Util.ArrayInit(nY, y => Util.ArrayInit(nA, numVulnerabilities, (a, v) => beliefs.ProbGainSensitization[y][a, v])); probRetainPrior.ObservedValue = Util.ArrayInit(nY, y => Util.ArrayInit(nA, numVulnerabilities, (a, v) => beliefs.ProbRetainSensitization[y][a, v])); probSkinIfSensPrior.ObservedValue = beliefs.ProbSkinIfSensitized; probSkinIfNotSensPrior.ObservedValue = beliefs.ProbSkinIfNotSensitized; probIgeIfSensPrior.ObservedValue = beliefs.ProbIgEIfSensitized; probIgeIfNotSensPrior.ObservedValue = beliefs.ProbIgEIfNotSensitized; } }
public Beliefs Run(AllergenData data, int numVulnerabilities, Beliefs beliefs = null, bool initializeMessages = true, bool showFactorGraph = false) { this.Engine.ShowFactorGraph = showFactorGraph; if (initializeMessages && BreakSymmetry) { this.InitializeMessages(data, numVulnerabilities); } this.SetObservations(data, numVulnerabilities); this.SetPriors(data, numVulnerabilities, beliefs); var result = new Beliefs(); Engine.NumberOfIterations = this.Iterations; result.Sensitization = Engine.Infer <Bernoulli[][, ]>(this.sensitized); result.ProbSkinIfSensitized = Engine.Infer <Beta>(this.probSkinIfSens); result.ProbSkinIfNotSensitized = Engine.Infer <Beta>(this.probSkinIfNotSens); result.ProbIgEIfSensitized = Engine.Infer <Beta>(this.probIgeIfSens); result.ProbIgEIfNotSensitized = Engine.Infer <Beta>(this.probIgeIfNotSens); result.ProbSensitizationAgeOne = Engine.Infer <Beta[, ]>(this.probSens1); result.ProbGainSensitization = Engine.Infer <Beta[][, ]>(this.probGain); result.ProbRetainSensitization = Engine.Infer <Beta[][, ]>(this.probRetain); result.VulnerabilityClass = Engine.Infer <Discrete[]>(this.sensClass); return(result); }
public void InitializeMessages(AllergenData data, int numVulnerabilities) { int nN = data.DataCountChild.Length; var discreteUniform = Discrete.Uniform(numVulnerabilities); sensClassInitializer.ObservedValue = Distribution <int> .Array(Util.ArrayInit(nN, n => Discrete.PointMass(discreteUniform.Sample(), numVulnerabilities))); }
static AsthmaModel.Beliefs[] RunTraining( AllergenData data, int[] numberSensitizationClasses, bool showFactorGraph = false) { AsthmaModel.Beliefs[] result = new AsthmaModel.Beliefs[numberSensitizationClasses.Length]; AsthmaModel[] model = Util.ArrayInit( numberSensitizationClasses.Length, n => new AsthmaModel("AsthmaTrainingModel_" + numberSensitizationClasses[n]) { Iterations = AsthmaModelIterations // Increase this for more classes. }); Console.WriteLine("Model iteration progress"); var reporter = new ConsoleMultiProgressReporter( Util.ArrayInit(numberSensitizationClasses.Length, n => "AsthmaTrainingModel_" + numberSensitizationClasses[n]), Util.ArrayInit(numberSensitizationClasses.Length, n => AsthmaModelIterations)); // Randomly initialize messages externally to parallel loop so we remain deterministic. for (int n = 0; n < numberSensitizationClasses.Length; n++) { int idx = n; // for closure to work model[n].InitializeMessages(data, numberSensitizationClasses[n]); model[n].ProgressChanged += (e, p) => reporter.UpdateProgress(idx, p.Iteration + 1); } Parallel.For(0, numberSensitizationClasses.Length, n => { result[n] = model[n].Run(data, numberSensitizationClasses[n], initializeMessages: false, showFactorGraph: showFactorGraph); }); return(result); }
private void SetObservations(AllergenData data, int numVulnerabilities, bool initialize = true) { int nY = AllergenData.NumYears; int nN = data.DataCountChild.Length; int nA = data.NumAllergens; // Observations NumYears.ObservedValue = nY; NumChildren.ObservedValue = nN; NumAllergens.ObservedValue = nA; NumVulnerabilities.ObservedValue = numVulnerabilities; skinTest.ObservedValue = Util.ArrayInit(nY, y => Util.ArrayInit(nN, nA, (n, a) => data.SkinTestData[y][n][a] == 1)); igeTest.ObservedValue = Util.ArrayInit(nY, y => Util.ArrayInit(nN, nA, (n, a) => data.IgeTestData[y][n][a] == 1)); skinTestMissing.ObservedValue = Util.ArrayInit(nY, y => Util.ArrayInit(nN, nA, (n, a) => data.SkinTestData[y][n][a] == null)); igeTestMissing.ObservedValue = Util.ArrayInit(nY, y => Util.ArrayInit(nN, nA, (n, a) => data.IgeTestData[y][n][a] == null)); }
public static Dictionary <string, Dictionary <string, double> > GetDataCounts(AllergenData data) { int nT = AllergenData.NumTests; int nY = AllergenData.NumYears; return(Enumerable.Range(0, data.Allergens.Count).ToDictionary( a => data.Allergens[a], a => Enumerable.Range(0, nT * nY).ToDictionary( ty => AllergenData.Tests[ty % nT] + AllergenData.Years[ty / nT], ty => (double)data.DataCountAllergenTestYear[a][ty % nT][ty / nT]))); }
public static Dictionary <string, Dictionary <string, string> > GetPlusMinusStringChildrenWithOutcome(AsthmaModel.Beliefs beliefs, AllergenData data, int[] outcomeIndicesToPlot) { var childrenCountsPerOutcome = getChildrenCountsPerOutcome(beliefs, data, outcomeIndicesToPlot); return(childrenCountsPerOutcome.ToDictionary( kvp => kvp.Key, kvp => kvp.Value.ToDictionary( kvp1 => kvp1.Key, kvp1 => PlusMinusString(new Beta(kvp1.Value.Item1 + 1, kvp1.Value.Item2 + 1))))); }
public static Dictionary <string, Dictionary <string, double> > GetPercentageChildrenWithOutcome(AsthmaModel.Beliefs beliefs, AllergenData data, int[] outcomeIndicesToPlot) { var childrenCountsPerOutcome = getChildrenCountsPerOutcome(beliefs, data, outcomeIndicesToPlot); return(childrenCountsPerOutcome.ToDictionary( kvp => kvp.Key, kvp => kvp.Value.ToDictionary( kvp1 => kvp1.Key, kvp1 => { var denom = kvp1.Value.Item1 + kvp1.Value.Item2; return denom == 0.0 ? 0.0 : 100.00 * kvp1.Value.Item1 / denom; }))); }
private static Dictionary <string, Dictionary <string, Tuple <double, double, double> > > getChildrenCountsPerOutcome(AsthmaModel.Beliefs beliefs, AllergenData data, int[] outcomeIndicesToPlot) { var childVulnerabilityClass = beliefs.VulnerabilityClass.Select(cl => cl.GetMode()).ToArray(); var classCount = Enumerable.Range(0, beliefs.NumberOfClasses).Select(c => childVulnerabilityClass.Count(childVuln => childVuln == c)).ToArray(); var sortedClassIndices = classCount.Select((classCnt, classIndex) => new { classCnt, classIndex }).OrderByDescending(ci => ci.classCnt).Select(ic => ic.classIndex).ToArray(); return(Enumerable.Range(0, beliefs.NumberOfClasses).Where(cx => classCount[sortedClassIndices[cx]] > 0).ToDictionary( cx => "Class " + cx.ToString(), cx => { int c = sortedClassIndices[cx]; var indicesInClass = Enumerable.Range(0, beliefs.NumberOfChildren).Where(childIndex => childVulnerabilityClass[childIndex] == c); return Enumerable.Range(0, outcomeIndicesToPlot.Length).ToDictionary( o => data.OutcomeIndexToOutcomeName[outcomeIndicesToPlot[o]], o => { int oIdx = outcomeIndicesToPlot[o]; var postiveCount = indicesInClass.Count(childIndex => data.Outcomes[oIdx][childIndex] != null && data.Outcomes[oIdx][childIndex].Value == 1); var negativeCount = indicesInClass.Count(childIndex => data.Outcomes[oIdx][childIndex] != null && data.Outcomes[oIdx][childIndex].Value == 0); var nullCount = indicesInClass.Count(childIndex => data.Outcomes[oIdx][childIndex] == null); return Tuple.Create((double)postiveCount, (double)negativeCount, (double)nullCount); }); })); }
/// </param> /// <summary> /// Runs experiments, takes results and shows them via outputter. /// </summary> /// <param name="outputter">A container for experiments output.</param> /// <param name="numClassesForMultiClassRuns">Numbers of classes to use in experiments.</param> public static void RunExperiments(Outputter outputter, int[] numClassesForMultipleClassRuns) { bool showFactorGraph = false; Rand.Restart(2); Console.WriteLine($"\n{Contents.S2TryingOutTheModel.NumberedName}.\n"); Console.WriteLine("Running asthma model on synthetic data."); Console.WriteLine("Results will slightly differ from those in the book."); // Synthetic dataset was created using the following call //DatasetSynthesizer.Synthesize( // DatasetSynthesizer.DefaultSensitizationClassCollection, // DatasetSynthesizer.DefaultTests, // DatasetSynthesizer.DefaultDataMissingProbabilities, // Path.Combine("Data", "SyntheticDataset.tsv"), // Rand.Int()); AllergenData allData = new AllergenData(); allData.LoadDataFromTabDelimitedFile(Path.Combine("Data", "SyntheticDataset.tsv")); outputter.Out(allData, Contents.S2TryingOutTheModel.NumberedName, "Asthma model", "Inputs"); var dataCounts = AsthmaPlotData.GetDataCounts(allData); outputter.Out(dataCounts, Contents.S2TryingOutTheModel.NumberedName, "Asthma model", "DataCounts"); // Remove mould and peanut allergens from following analysis. AllergenData data = AllergenData.WithAllergensRemoved(allData, new List <string> { "Mould", "Peanut" }); int[] numClasses1Comp = new int[] { 1 }; AsthmaModel.Beliefs[] trainingResults1Comp = RunTraining(data, numClasses1Comp, showFactorGraph); outputter.Out(trainingResults1Comp, Contents.S2TryingOutTheModel.NumberedName, "Asthma model", "TrainingResults"); // Data for plots Dictionary <string, Dictionary <string, object> > results1Comp = BuildPlotsForAsthmaResults(allData, data, numClasses1Comp, trainingResults1Comp); outputter.Out(results1Comp, Contents.S2TryingOutTheModel.NumberedName, "Asthma model", "Plots"); Console.WriteLine($"\n{Contents.S4ModellingWithGates.NumberedName}.\n"); Rand.Restart(1); var trialResult = RunClinicalTrialExperiment(showFactorGraph); outputter.Out(trialResult, Contents.S4ModellingWithGates.NumberedName, "ClinicalTrialPlots"); Console.WriteLine($"\n{Contents.S5DiscoveringSensitizationClasses.NumberedName}.\n"); Console.WriteLine("Running asthma model on synthetic data."); Console.WriteLine("Results will slightly differ from those in the book."); outputter.Out(allData, Contents.S5DiscoveringSensitizationClasses.NumberedName, "Asthma model", "Inputs"); outputter.Out(dataCounts, Contents.S5DiscoveringSensitizationClasses.NumberedName, "Asthma model", "DataCounts"); Rand.Restart(3); // With this random seed we get exactly 4 classes in models allowing 5 or 6 // provided numClassesForMultipleClassRuns == new int[] { 2, 3, 4, 5, 6 } // With other seeds we may get additional classes containing very few people, which is normal AsthmaModel.Beliefs[] trainingResults = RunTraining(data, numClassesForMultipleClassRuns, showFactorGraph); outputter.Out(trainingResults, Contents.S5DiscoveringSensitizationClasses.NumberedName, "Asthma model", "TrainingResults"); // Data for plots Dictionary <string, Dictionary <string, object> > results = BuildPlotsForAsthmaResults(allData, data, numClassesForMultipleClassRuns, trainingResults); outputter.Out(results, Contents.S5DiscoveringSensitizationClasses.NumberedName, "Asthma model", "Plots"); Console.WriteLine("\nCompleted all experiments."); }
private static Dictionary <string, Dictionary <string, object> > BuildPlotsForAsthmaResults(AllergenData allData, AllergenData data, int[] numClasses, AsthmaModel.Beliefs[] trainingResults) { var allergenSensitizationPlots = trainingResults.Select(res => AsthmaPlotData.GetSensitizationPerAllergenPerClass(res, data.Allergens)).ToArray(); var yearSensitizationPlots = trainingResults.Select(res => AsthmaPlotData.GetSensitizationPerYearPerClass(res, data.Allergens)).ToArray(); var sensitizationCounts = trainingResults.Select(res => AsthmaPlotData.GetNumberOfChildrenWithInferredSensitization(res, data.Allergens)).ToArray(); var probGainingSensitivity = trainingResults.Select(res => AsthmaPlotData.GetTransitionProbabilities(res, false, data.Allergens)).ToArray(); var probRetainingSensitivity = trainingResults.Select(res => AsthmaPlotData.GetTransitionProbabilities(res, true, data.Allergens)).ToArray(); var conditionalProbs = trainingResults.Select(res => AsthmaPlotData.GetConditionalProbsOfPositiveTestAsStrings(res)).ToArray(); var outcomePercentagePlots = trainingResults.Select(res => AsthmaPlotData.GetPercentageChildrenWithOutcome(res, allData, new int[] { allData.OutcomeNameToOutcomeIndex["Asthma"] })).ToArray(); var outcomePlusMinusPlots = trainingResults.Select(res => AsthmaPlotData.GetPlusMinusStringChildrenWithOutcome(res, allData, new int[] { allData.OutcomeNameToOutcomeIndex["Asthma"] })).ToArray(); var results = Enumerable.Range(0, numClasses.Length).ToDictionary( c => "AsthmaResults" + numClasses[c], c => { var result = new Dictionary <string, object>(); result["AllergenSensitizationPlots"] = (object)allergenSensitizationPlots[c]; result["YearSensitizationPlots"] = (object)yearSensitizationPlots[c]; result["SensitizationPlots"] = (object)sensitizationCounts[c]; result["ProbabilityGainingSensitivity"] = (object)probGainingSensitivity[c]; result["ProbabilityRetainingSensitivity"] = (object)probRetainingSensitivity[c]; result["ConditionalProbsOfPositiveTest"] = conditionalProbs[c]; result["PercentageChildrenWithOutcome"] = outcomePercentagePlots[c]; result["PlusMinusChildrenWithOutcome"] = outcomePlusMinusPlots[c]; return(result); }); return(results); }