protected override double ComputeLLR(ModelScorer modelScorer, PhyloTree phyloTree, StringBuilder stringBuilder, double targetMarginal, double predictorMarginal, Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction, Converter <Leaf, SufficientStatistics> targetDistributionClassFunction) { Converter <Leaf, SufficientStatistics> LeafToJointDistributionClass = CreateAlternativeSufficientStatisticsMap(predictorDistributionClassFunction, targetDistributionClassFunction); double logLikelihoodIndependentModel, logLikelihoodJointModel; Score scoreIndTarget, scoreIndPredictor, scoreJoint; MessageInitializer messageInitializer; // first score the target. NullModelDistribution.EmpiricalEquilibrium = targetMarginal; messageInitializer = modelScorer.CreateMessageInitializer(predictorDistributionClassFunction, targetDistributionClassFunction, NullModelDistribution); scoreIndTarget = modelScorer.ScoreModel(messageInitializer, false); NullModelDistribution.EmpiricalEquilibrium = predictorMarginal; messageInitializer = modelScorer.CreateMessageInitializer(targetDistributionClassFunction, predictorDistributionClassFunction, NullModelDistribution); scoreIndPredictor = modelScorer.ScoreModel(messageInitializer, false); DistributionDiscreteJointBinary jointDistn = (DistributionDiscreteJointBinary)AlternativeModelDistribution; jointDistn.SetInitialParams(scoreIndPredictor.OptimizationParameters, scoreIndTarget.OptimizationParameters); messageInitializer = modelScorer.CreateMessageInitializer(null, LeafToJointDistributionClass, jointDistn); scoreJoint = modelScorer.ScoreModel(messageInitializer, false); logLikelihoodIndependentModel = scoreIndTarget.Loglikelihood + scoreIndPredictor.Loglikelihood; logLikelihoodJointModel = scoreJoint.Loglikelihood; stringBuilder.Append(SpecialFunctions.CreateTabString(scoreIndPredictor.ToString(NullModelDistribution), scoreIndTarget.ToString(NullModelDistribution), logLikelihoodIndependentModel, scoreJoint.ToString(jointDistn), "")); double diff = logLikelihoodJointModel - logLikelihoodIndependentModel; return(diff); }
// !! Add support for ModelTesterGaussian public static NullDataGenerator GetInstance(string generatorName, ModelScorer modelScorer, IDistribution distribution) { switch (generatorName.ToLower()) { case "predictorpermutation": return(new NullDataGeneratorPredictorPermutation()); case "targetpermutation": return(new NullDataGeneratorTargetPermutation()); case "predictorparametric": //!!HACK!! We can currently only evolve binary data. But even if modelTester is gaussian, the predictor is still binary. // so all we need is a discrete model tester with any distribution (they all have the same null distribution, which is all // that's used. SpecialFunctions.CheckCondition(distribution is DistributionDiscrete, "Parametric data generation is currently only supported for discrete variables"); return(new NullDataGeneratorPredictorParametric(modelScorer, (DistributionDiscrete)distribution)); case "targetparametric": SpecialFunctions.CheckCondition(distribution is DistributionDiscrete, "Parametric data generation is currently only supported for discrete variables"); return(new NullDataGeneratorTargetParametric(modelScorer, (DistributionDiscrete)distribution)); default: throw new ArgumentException("Cannot parse " + generatorName + " into a NullDataGenerator"); } }
protected override string CreateReportLine( ModelScorer modelScorer, PhyloTree phyloTree, RowData rowAndTargetData, UniversalWorkList workList, int rowIndex, int workListCount, int workIndex) { Dictionary <string, string> row = rowAndTargetData.Row; string predictorVariable = row[PhyloTree.PredictorVariableColumnName]; // e.g. hla string targetVariable = row[PhyloTree.TargetVariableColumnName]; // e.g. A@182 (amino acid "A" at position 182) int nullIndex = int.Parse(row[PhyloTree.NullIndexColumnName]); //Dictionary<string, bool> caseIdToNonMissingPredictorValue = workList.NullIndexToPredictorToCaseIdToNonMissingValue[nullIndex][predictorVariable]; Dictionary <string, SufficientStatistics> caseIdToNonMissingPredictorValue = rowAndTargetData.PredictorData; //workList.GetCaseIdToNonMissingValueForNullIndexAndPredictorVariable(nullIndex, predictorVariable); Dictionary <string, SufficientStatistics> caseIdToNonMissingTargetValue = rowAndTargetData.TargetData; TwoByTwo fishers2by2 = TwoByTwo.GetInstance( SufficientStatisticsMapToIntMap(caseIdToNonMissingPredictorValue), SufficientStatisticsMapToIntMap(caseIdToNonMissingTargetValue)); double pValue = fishers2by2.FisherExactTest; string reportLine = SpecialFunctions.CreateTabString(this, rowIndex, workListCount, workIndex, nullIndex, predictorVariable, targetVariable, fishers2by2.CountsString(), fishers2by2.FisherExactTest); return(reportLine); }
protected ModelEvaluatorDiscreteConditional( DistributionDiscreteSingleVariable nullDistn, DistributionDiscreteConditional conditionalDistn, ModelScorer scorer, bool includePredictorInScore) : base(SpecialFunctions.CreateSingletonList <IDistributionSingleVariable>(nullDistn), conditionalDistn, scorer) { _includePredictorInScore = includePredictorInScore; }
protected NullDataCollection CreateNullDataGenerator( string nullDataGeneratorName, ModelScorer modelScorer, PhyloTree phyloTree, RangeCollection nullIndexRangeCollection, IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration, IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration) { return(NullDataCollection.GetInstance( NullDataGenerator.GetInstance(nullDataGeneratorName, modelScorer, phyloTree, this), nullIndexRangeCollection, predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration)); }
public override EvaluationResults EvaluateModelOnData(Converter <Leaf, SufficientStatistics> predMap, Converter <Leaf, SufficientStatistics> targMap) { int predCount = ModelScorer.PhyloTree.CountOfNonMissingLeaves(predMap); int targCount = ModelScorer.PhyloTree.CountOfNonMissingLeaves(targMap); int globalNonMissingCount = ModelScorer.PhyloTree.CountOfNonMissingLeaves(predMap, targMap); MessageInitializerGaussian nullMessageInitializer = MessageInitializerGaussian.GetInstance( predMap, targMap, (DistributionGaussianConditional)NullDistns[0], ModelScorer.PhyloTree.LeafCollection); Score nullScore = ModelScorer.MaximizeLikelihood(nullMessageInitializer); MessageInitializerGaussian altMessageInitializer = MessageInitializerGaussian.GetInstance( predMap, targMap, (DistributionGaussianConditional)AltDistn, ModelScorer.PhyloTree.LeafCollection); Score altScore = ModelScorer.MaximizeLikelihood(altMessageInitializer); EvaluationResults evalResults = EvaluationResultsGaussian.GetInstance(this, nullScore, altScore, predCount, targCount, globalNonMissingCount, ChiSquareDegreesOfFreedom); return(evalResults); }
public override EvaluationResults EvaluateModelOnData(Converter <Leaf, SufficientStatistics> predictorMap, Converter <Leaf, SufficientStatistics> targetMap) { int[] realFisherCounts = ModelScorer.PhyloTree.FisherCounts(predictorMap, targetMap); int tt = realFisherCounts[(int)TwoByTwo.ParameterIndex.TT]; int tf = realFisherCounts[(int)TwoByTwo.ParameterIndex.TF]; int ft = realFisherCounts[(int)TwoByTwo.ParameterIndex.FT]; int ff = realFisherCounts[(int)TwoByTwo.ParameterIndex.FF]; int[] fisherCountsPred = new int[] { tt, ft, tf, ff }; //ModelScorer.PhyloTree.FisherCounts(targetMap, predictorMap); int[] fisherCountsTarg = realFisherCounts; #if NAIVE_EQUILIBRIUM //USE THIS FOR BACKWARDS COMPATABILITY int[] tempCountsPred = ModelScorer.PhyloTree.CountsOfLeaves(predictorMap); int[] tempCountsTarg = ModelScorer.PhyloTree.CountsOfLeaves(targetMap); fisherCountsPred = tempCountsPred; fisherCountsTarg = tempCountsTarg; #endif bool predIsInvariant, targIsInvariant; Score nullScorePred = ComputeSingleVariableScore(targetMap, predictorMap, (DistributionDiscreteSingleVariable)NullDistns[0], fisherCountsPred, out predIsInvariant); Score nullScoreTarg = ComputeSingleVariableScore(predictorMap, targetMap, (DistributionDiscreteSingleVariable)NullDistns[1], fisherCountsTarg, out targIsInvariant); List <Score> nullScores = new List <Score>(new Score[] { nullScorePred, nullScoreTarg }); OptimizationParameterList initParams = ((DistributionDiscreteJoint)AltDistn).GenerateInitialParams(nullScorePred.OptimizationParameters, nullScoreTarg.OptimizationParameters); Score jointScore; if (predIsInvariant || targIsInvariant) // cannot compute parameters in this case. They come directly from the single variable params { double jointLL = nullScorePred.Loglikelihood + nullScoreTarg.Loglikelihood; jointScore = Score.GetInstance(jointLL, initParams, AltDistn); } else { MessageInitializerDiscrete altMessageInitializer = MessageInitializerDiscrete.GetInstance(CreateJointMap(predictorMap, targetMap), (DistributionDiscreteJoint)AltDistn, initParams, ModelScorer.PhyloTree.LeafCollection); jointScore = ModelScorer.MaximizeLikelihood(altMessageInitializer); } EvaluationResults evalResults = EvaluationResultsDiscrete.GetInstance(this, nullScores, jointScore, realFisherCounts, ChiSquareDegreesOfFreedom); return(evalResults); }
public override EvaluationResults EvaluateModelOnDataGivenParams(Converter <Leaf, SufficientStatistics> predMap, Converter <Leaf, SufficientStatistics> targMap, EvaluationResults previousResults) { int predCount = ModelScorer.PhyloTree.CountOfNonMissingLeaves(predMap); int targCount = ModelScorer.PhyloTree.CountOfNonMissingLeaves(targMap); int globalNonMissingCount = ModelScorer.PhyloTree.CountOfNonMissingLeaves(predMap, targMap); MessageInitializerGaussian nullMessageInitializer = MessageInitializerGaussian.GetInstance( predMap, targMap, (DistributionGaussianConditional)NullDistns[0], ModelScorer.PhyloTree.LeafCollection); double nullLL = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializer, previousResults.NullScores[0].OptimizationParameters); Score nullScore = Score.GetInstance(nullLL, previousResults.NullScores[0].OptimizationParameters, previousResults.NullScores[0].Distribution); MessageInitializerGaussian altMessageInitializer = MessageInitializerGaussian.GetInstance( predMap, targMap, (DistributionGaussianConditional)AltDistn, ModelScorer.PhyloTree.LeafCollection); double altLL = ModelScorer.ComputeLogLikelihoodModelGivenData(altMessageInitializer, previousResults.AltScore.OptimizationParameters); Score altScore = Score.GetInstance(altLL, previousResults.AltScore.OptimizationParameters, previousResults.AltScore.Distribution); EvaluationResults evalResults = EvaluationResultsGaussian.GetInstance(this, nullScore, altScore, predCount, targCount, globalNonMissingCount, ChiSquareDegreesOfFreedom); return(evalResults); }
private Score ComputeConditionalVariableScore( Converter <Leaf, SufficientStatistics> predictorMap, Converter <Leaf, SufficientStatistics> targetMap, Score nullScore, int[] fisherCounts) { int tt = fisherCounts[(int)TwoByTwo.ParameterIndex.TT]; int tf = fisherCounts[(int)TwoByTwo.ParameterIndex.TF]; int ft = fisherCounts[(int)TwoByTwo.ParameterIndex.FT]; int sum = SpecialFunctions.Sum(fisherCounts); Score altScore; if (tt + ft == sum || tt + ft == 0) // target is always true or false { bool isNaN = sum == 0; OptimizationParameterList altParamList = AltDistn.GetParameters(); altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Predictor1].Value = 0; altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Predictor2].Value = 0; altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Lambda].Value = isNaN ? double.NaN : 0; altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Equilibrium].Value = (double)(tt + ft) / sum; altScore = Score.GetInstance(isNaN ? double.NaN : 0, altParamList, AltDistn); } else if (tt + tf == 0 || tt + tf == sum) // predictor is always true or false { OptimizationParameterList nullParamList = nullScore.OptimizationParameters; OptimizationParameterList altParamList = AltDistn.GetParameters(); altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Predictor1].Value = nullParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Predictor1].Value; altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Predictor2].Value = nullParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Predictor2].Value; altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Lambda].Value = nullParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Lambda].Value; altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Equilibrium].Value = nullParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Equilibrium].Value; altScore = Score.GetInstance(nullScore.Loglikelihood, altParamList, AltDistn); } else // compute ML using ModelScorer { MessageInitializerDiscrete altMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, (DistributionDiscreteConditional)AltDistn, nullScore.OptimizationParameters, ModelScorer.PhyloTree.LeafCollection); altScore = ModelScorer.MaximizeLikelihood(altMessageInitializer); } return(altScore); }
//public static Converter<Leaf, SufficientStatistics> TESTPRED, TESTTARG; //public static EvaluationResults TESTEVALRESULTS; public override EvaluationResults EvaluateModelOnDataGivenParams( Converter <Leaf, SufficientStatistics> predictorMap, Converter <Leaf, SufficientStatistics> targetMap, EvaluationResults previousResults) { int[] fisherCounts = ModelScorer.PhyloTree.FisherCounts(predictorMap, targetMap); int targNullIdx = _includePredictorInScore ? 1 : 0; OptimizationParameterList nullParamsTarg = previousResults.NullScores[targNullIdx].OptimizationParameters; MessageInitializerDiscrete nullMessageInitializerTarg = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, NullDistn, new int[0], ModelScorer.PhyloTree.LeafCollection); double nullLLTarg = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializerTarg, nullParamsTarg); Score nullScoreTarg = Score.GetInstance(nullLLTarg, nullParamsTarg, NullDistn); OptimizationParameterList altParams = previousResults.AltScore.OptimizationParameters; MessageInitializerDiscrete altMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, (DistributionDiscreteConditional)AltDistn, new int[0], ModelScorer.PhyloTree.LeafCollection); double condLL = ModelScorer.ComputeLogLikelihoodModelGivenData(altMessageInitializer, altParams); Score altScore = Score.GetInstance(condLL, altParams, AltDistn); List <Score> nullScores = new List <Score>(); if (_includePredictorInScore) { OptimizationParameterList nullParamsPred = previousResults.NullScores[0].OptimizationParameters; MessageInitializerDiscrete nullMessageInitializerPred = MessageInitializerDiscrete.GetInstance(targetMap, predictorMap, NullDistn, new int[0], ModelScorer.PhyloTree.LeafCollection); double nullLLPred = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializerPred, nullParamsPred); Score nullScorePred = Score.GetInstance(nullLLPred, nullParamsPred, NullDistn); nullScores.Add(nullScorePred); // conditional model altScore doesn't include predLL. If we're here, we want to add it to make it comparable to joint or reverseConditional altScore = Score.GetInstance(altScore.Loglikelihood + nullScorePred.Loglikelihood, altScore.OptimizationParameters, altScore.Distribution); } nullScores.Add(nullScoreTarg); EvaluationResults evalResults = EvaluationResultsDiscrete.GetInstance(this, nullScores, altScore, fisherCounts, ChiSquareDegreesOfFreedom); return(evalResults); }
public override EvaluationResults EvaluateModelOnDataGivenParams(Converter <Leaf, SufficientStatistics> predictorMap, Converter <Leaf, SufficientStatistics> targetMap, EvaluationResults previousResults) { int[] fisherCounts = ModelScorer.PhyloTree.FisherCounts(predictorMap, targetMap); OptimizationParameterList nullParamsTarg = previousResults.NullScores[1].OptimizationParameters; MessageInitializerDiscrete nullMessageInitializerTarg = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, (DistributionDiscreteSingleVariable)NullDistns[0], fisherCounts, ModelScorer.PhyloTree.LeafCollection); double nullLLTarg = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializerTarg, nullParamsTarg); Score nullScoreTarg = Score.GetInstance(nullLLTarg, nullParamsTarg, previousResults.NullScores[1].Distribution); OptimizationParameterList nullParamsPred = previousResults.NullScores[0].OptimizationParameters; MessageInitializerDiscrete nullMessageInitializerPred = MessageInitializerDiscrete.GetInstance(targetMap, predictorMap, (DistributionDiscreteSingleVariable)NullDistns[1], fisherCounts, ModelScorer.PhyloTree.LeafCollection); double nullLLPred = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializerPred, nullParamsPred); Score nullScorePred = Score.GetInstance(nullLLPred, nullParamsPred, previousResults.NullScores[0].Distribution); List <Score> nullScores = new List <Score>(new Score[] { nullScorePred, nullScoreTarg }); OptimizationParameterList altParams = previousResults.AltScore.OptimizationParameters; double altLL; if (((DistributionDiscreteJoint)AltDistn).ParametersCannotBeEvaluated(altParams)) { // we'll get here only if one of the variables is always (or never) true. In this case, the variables must be independent. altLL = nullLLTarg + nullLLPred; } else { MessageInitializerDiscrete altMessageInitializer = MessageInitializerDiscrete.GetInstance(CreateJointMap(predictorMap, targetMap), (DistributionDiscreteJoint)AltDistn, fisherCounts, ModelScorer.PhyloTree.LeafCollection); altLL = ModelScorer.ComputeLogLikelihoodModelGivenData(altMessageInitializer, altParams); } Score altScore = Score.GetInstance(altLL, altParams, previousResults.AltScore.Distribution); EvaluationResults evalResults = EvaluationResultsDiscrete.GetInstance(this, nullScores, altScore, fisherCounts, ChiSquareDegreesOfFreedom); return(evalResults); }
public static ModelEvaluator GetInstance(string nameAndParameters, ModelScorer scorer) { nameAndParameters = nameAndParameters.ToLower(); if (nameAndParameters.StartsWith(ModelEvaluatorCrossValidate.BaseName.ToLower())) { return(ModelEvaluatorCrossValidate.GetInstance(nameAndParameters.Substring(ModelEvaluatorCrossValidate.BaseName.Length), scorer)); } else if (nameAndParameters.StartsWith(ModelEvaluatorDiscreteConditionalCollection.BaseName.ToLower())) { return(ModelEvaluatorDiscreteConditionalCollection.GetInstance(nameAndParameters.Substring(ModelEvaluatorDiscreteConditionalCollection.BaseName.Length), scorer)); } else if (nameAndParameters.StartsWith(ModelEvaluatorDiscrete.BaseName.ToLower())) { return(ModelEvaluatorDiscrete.GetInstance(nameAndParameters.Substring(ModelEvaluatorDiscrete.BaseName.Length), scorer)); } else if (nameAndParameters.StartsWith(ModelEvaluatorGaussian.BaseName.ToLower())) { return(ModelEvaluatorGaussian.GetInstance(nameAndParameters.Substring(ModelEvaluatorGaussian.BaseName.Length), scorer)); } else { throw new ArgumentException("ModelEvaluator cannot parse " + nameAndParameters); } }
protected override double ComputeLLR(ModelScorer modelScorer, PhyloTree phyloTree, StringBuilder stringBuilder, double targetMarginal, double predictorMarginal, Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction, Converter <Leaf, SufficientStatistics> targetDistributionClassFunction) { NullModelDistribution.EmpiricalEquilibrium = targetMarginal; NullModelDistribution.InitialParamVals = null; MessageInitializer messageInitializer = modelScorer.CreateMessageInitializer(predictorDistributionClassFunction, targetDistributionClassFunction, NullModelDistribution); List <double> logLikelihoodList = new List <double>(); foreach (bool useParameter in new bool[] { false, true }) { Score score = modelScorer.ScoreModel(messageInitializer, useParameter); stringBuilder.Append(SpecialFunctions.CreateTabString(score.ToString(useParameter ? AlternativeModelDistribution : NullModelDistribution), "")); logLikelihoodList.Add(score.Loglikelihood); AltModelDistribution.InitialParamVals = score.OptimizationParameters; Debug.WriteLine(SpecialFunctions.CreateTabString("AltModelDistribution.InitialParamVals = score.OptimizationParameters", score.OptimizationParameters)); } double diff = logLikelihoodList[1] - logLikelihoodList[0]; return(diff); }
protected Score ComputeSingleVariableScore( Converter <Leaf, SufficientStatistics> predictorMap, Converter <Leaf, SufficientStatistics> targetMap, DistributionDiscreteSingleVariable nullDistn, int[] fisherCounts, out bool variableIsInvariant) { MessageInitializerDiscrete nullMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, nullDistn, fisherCounts, ModelScorer.PhyloTree.LeafCollection); double p = (double)TwoByTwo.GetRightSum(fisherCounts) / SpecialFunctions.Sum(fisherCounts); Score nullScore; if (TryGetSingleVariableScoreFromCounts(nullMessageInitializer, p, out nullScore)) { variableIsInvariant = true; } else { variableIsInvariant = false; nullScore = ModelScorer.MaximizeLikelihood(nullMessageInitializer); } return(nullScore); }
public static ModelEvaluatorDiscreteConditional GetInstance(string leafDistributionName, ModelScorer modelScorer, bool includePredictorInScore) { DistributionDiscreteSingleVariable nullDistn = DistributionDiscreteSingleVariable.GetInstance(); DistributionDiscreteConditional condDistn = DistributionDiscreteConditional.GetInstance(leafDistributionName); return(new ModelEvaluatorDiscreteConditional(nullDistn, condDistn, modelScorer, includePredictorInScore)); }
new public static ModelEvaluatorDiscreteConditional GetInstance(string leafDistributionName, ModelScorer modelScorer) { return(GetInstance(leafDistributionName, modelScorer, false)); }
protected ModelEvaluator(List <IDistributionSingleVariable> nullDistns, IDistribution altDistn, ModelScorer scorer) { _scorer = scorer; _nullDistns = nullDistns; _altDistn = altDistn; }
protected ModelEvaluatorGaussian(List <IDistributionSingleVariable> nullDistns, DistributionGaussianConditional altDistn, ModelScorer scorer) : base(nullDistns, altDistn, scorer) { }
new public static ModelEvaluatorGaussian GetInstance(string nameAndParameters, ModelScorer scorer) { IDistributionSingleVariable nullDistn = DistributionGaussianConditional.GetSingleVariableInstance(nameAndParameters); DistributionGaussianConditional altDistn = DistributionGaussianConditional.GetInstance(nameAndParameters); return(new ModelEvaluatorGaussian(SpecialFunctions.CreateSingletonList(nullDistn), altDistn, scorer)); }
public void Run( ModelScorer modelScorer, PhyloTree phyloTree, string predictorSparseFileName, string targetSparseFileName, string leafDistributionName, string nullDataGeneratorName, KeepTest <Dictionary <string, string> > keepTest, RangeCollection skipRowIndexRangeCollectionOrNull, string shortName, string outputDirectoryName, RangeCollection pieceIndexRangeCollection, int pieceCount, RangeCollection nullIndexRangeCollection, string optimizerName) { Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); Directory.CreateDirectory(outputDirectoryName); string outputFileName = string.Format(@"{0}\{1}.{2}.{3}.{4}.{5}.{6}{7}.txt", outputDirectoryName, shortName, leafDistributionName, nullDataGeneratorName, nullIndexRangeCollection, pieceCount, pieceIndexRangeCollection, skipRowIndexRangeCollectionOrNull == null ? "" : ".Skip" + skipRowIndexRangeCollectionOrNull.Count().ToString() ); #region from PhyloTree refactor //Dictionary<string, Dictionary<string, bool>> predictorVariableToCaseIdToRealNonMissingValue = LoadSparseFileInMemory<bool>(predictorSparseFileName); //IEnumerable<Pair<string, Dictionary<string, T>>> targetNameAndCaseIdToNonMissingValueEnumeration = LoadSparseFileEnumeration<T>(targetSparseFileName); //NullDataCollection nullDataGenerator = // NullDataCollection.GetInstance(this, modelTester, nullIndexRangeCollection, predictorVariableToCaseIdToRealNonMissingValue); //UniversalWorkList<T> workList = UniversalWorkList<T>.GetInstance( // predictorVariableToCaseIdToRealNonMissingValue, // targetNameAndCaseIdToNonMissingValueEnumeration, // nullDataGenerator, nullIndexRangeCollection, keepTest); #endregion bool speedOverMemory = true; IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration = CreateNameAndCaseIdToNonMissingValueEnumeration(predictorSparseFileName, speedOverMemory); IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration = CreateNameAndCaseIdToNonMissingValueEnumeration(targetSparseFileName, speedOverMemory); NullDataCollection nullDataGenerator = CreateNullDataGenerator(nullDataGeneratorName, modelScorer, phyloTree, nullIndexRangeCollection, predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration); UniversalWorkList workList = UniversalWorkList.GetInstance( predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration, nullDataGenerator, nullIndexRangeCollection, keepTest); int workListCount = SpecialFunctions.Count(workList.List()); int effectiveWorkListCount; if (skipRowIndexRangeCollectionOrNull == null) { effectiveWorkListCount = workListCount; } else { effectiveWorkListCount = 0; for (int iRowIndex = 0; iRowIndex < workListCount; iRowIndex++) { if (!skipRowIndexRangeCollectionOrNull.Contains(iRowIndex)) { effectiveWorkListCount++; } } } Console.WriteLine("{0} Total rows. Skipping {1} of them.", workListCount, workListCount - effectiveWorkListCount); using (TextWriter textWriter = File.CreateText(outputFileName)) { textWriter.WriteLine(Header); int rowIndex = -1; int effectiveRowIndex = -1; foreach (RowData rowAndTargetData in workList.List()) { //!!!make all these parameters and the calculation a class ++rowIndex; Debug.Assert(rowIndex < workListCount); // real assert if (skipRowIndexRangeCollectionOrNull == null || !skipRowIndexRangeCollectionOrNull.Contains(rowIndex)) { ++effectiveRowIndex; int workIndex = ExtractWorkIndex(effectiveRowIndex, pieceCount, effectiveWorkListCount); if (pieceIndexRangeCollection.Contains(workIndex)) { Debug.WriteLine("WorkItemIndex " + rowIndex.ToString()); string reportLine; try { reportLine = CreateReportLine(modelScorer, phyloTree, rowAndTargetData, workList, rowIndex, workListCount, workIndex); } catch (OutOfMemoryException) { Console.WriteLine("OUT OF MEMORY!! Clearing cache and trying to recover where we left off."); modelScorer.ClearCache(); reportLine = CreateReportLine(modelScorer, phyloTree, rowAndTargetData, workList, rowIndex, workListCount, workIndex); } textWriter.WriteLine(reportLine); textWriter.Flush(); } } } } stopwatch.Stop(); Console.WriteLine("Running time: " + stopwatch.Elapsed); }
new public static ModelEvaluatorDiscrete GetInstance(string nameAndParameters, ModelScorer scorer) { nameAndParameters = nameAndParameters.ToLower(); if (nameAndParameters.StartsWith(ModelEvaluatorDiscreteConditional.BaseName.ToLower())) { return(ModelEvaluatorDiscreteConditional.GetInstance(nameAndParameters.Substring(ModelEvaluatorDiscreteConditional.BaseName.Length), scorer)); } else if (nameAndParameters.StartsWith(ModelEvaluatorDiscreteJoint.BaseName.ToLower())) { return(ModelEvaluatorDiscreteJoint.GetInstance(nameAndParameters.Substring(ModelEvaluatorDiscreteJoint.BaseName.Length), scorer)); } else if (nameAndParameters.Equals(ModelEvaluatorDiscreteFisher.BaseName.ToLower())) { return(ModelEvaluatorDiscreteFisher.GetInstance(scorer.PhyloTree.LeafCollection)); } throw new ArgumentException("Cold not parse " + nameAndParameters + " into a ModelEvaluatorDiscrete."); }
public void ScoreTree( ModelScorer modelScorer, PhyloTree phyloTree, string predictorSparseFileName, string targetSparseFileName, string predictorVariableName, string targetVariableName, double[] nullModelArgs, double[] altModelArgs) { //Dictionary<string, Dictionary<string, SufficientStatistics>> predictorVariableToCaseIdToRealNonMissingValue = LoadSparseFileInMemory(predictorSparseFileName); IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration = LoadSparseFileEnumeration(predictorSparseFileName); IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration = LoadSparseFileEnumeration(targetSparseFileName); RangeCollection nullIndexRangeCollection = RangeCollection.GetInstance(-1, -1); NullDataCollection nullDataGenerator = CreateNullDataGenerator("PredictorPermutation", modelScorer, phyloTree, nullIndexRangeCollection, predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration); UniversalWorkList workList = UniversalWorkList.GetInstance( predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration, //targetNameAndCaseIdToNonMissingValueEnumeration, nullDataGenerator, nullIndexRangeCollection, AlwaysKeep <Dictionary <string, string> > .GetInstance()); foreach (RowData rowAndTargetData in workList.List()) { if (rowAndTargetData.Row[PhyloTree.PredictorVariableColumnName] == predictorVariableName && rowAndTargetData.Row[PhyloTree.TargetVariableColumnName] == targetVariableName) { Dictionary <string, SufficientStatistics> caseIdToNonNullPredictorValue = rowAndTargetData.PredictorData;//workList.GetCaseIdToNonMissingValueForNullIndexAndPredictorVariable(-1, predictorVariableName); Dictionary <string, SufficientStatistics> caseIdToNonMissingTargetValue = rowAndTargetData.TargetData; Converter <Leaf, SufficientStatistics> targetDistributionMap = CreateSufficientStatisticsMap(caseIdToNonMissingTargetValue); Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction = CreateSufficientStatisticsMap(caseIdToNonNullPredictorValue); Converter <Leaf, SufficientStatistics> altDistributionMap = CreateAlternativeSufficientStatisticsMap(predictorDistributionClassFunction, targetDistributionMap); double logLikelihood; Score scoreIndTarget, scoreIndPredictor, scoreAlt; MessageInitializer messageInitializer; OptimizationParameterList nullParams = NullModelDistribution.GetParameters(nullModelArgs); OptimizationParameterList altParams = AltModelDistribution.GetParameters(altModelArgs); Console.WriteLine(SpecialFunctions.CreateTabString("Variable", nullParams.ToStringHeader(), "LogL")); messageInitializer = modelScorer.CreateMessageInitializer(predictorDistributionClassFunction, targetDistributionMap, NullModelDistribution); logLikelihood = modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, nullParams); scoreIndTarget = Score.GetInstance(logLikelihood, nullParams); Console.WriteLine("Target\t" + scoreIndTarget); messageInitializer = modelScorer.CreateMessageInitializer(targetDistributionMap, predictorDistributionClassFunction, NullModelDistribution); logLikelihood = modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, nullParams); modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, nullParams); scoreIndPredictor = Score.GetInstance(logLikelihood, nullParams); Console.WriteLine("Predictor\t" + scoreIndPredictor); Console.WriteLine("\n" + SpecialFunctions.CreateTabString("Variable", altParams.ToStringHeader(), "LogL")); messageInitializer = modelScorer.CreateMessageInitializer(null, altDistributionMap, AltModelDistribution); logLikelihood = modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, altParams); scoreAlt = Score.GetInstance(logLikelihood, altParams); Console.WriteLine(SpecialFunctions.CreateTabString(AltModelDistribution, scoreAlt)); } } }
protected abstract string CreateReportLine( ModelScorer modelScorer, PhyloTree phyloTree, RowData rowAndTargetData, UniversalWorkList workList, int rowIndex, int workListCount, int workIndex);
new public static ModelEvaluatorDiscreteJoint GetInstance(string leafDistributionName, ModelScorer modelScorer) { DistributionDiscreteSingleVariable nullDistn = DistributionDiscreteSingleVariable.GetInstance(); DistributionDiscreteJoint jointDistn = DistributionDiscreteJoint.GetInstance(leafDistributionName); List <IDistributionSingleVariable> nullDistns = new List <IDistributionSingleVariable>(); nullDistns.Add(nullDistn); nullDistns.Add(nullDistn); return(new ModelEvaluatorDiscreteJoint(nullDistns, jointDistn, modelScorer)); }
public NullDataGeneratorTargetParametric(ModelScorer modelScorer, DistributionDiscrete distribution) : base(modelScorer, distribution) { }
public NullDataGeneratorParametric(ModelScorer modelScorer, DistributionDiscrete discreteDistn) { _modelScorer = modelScorer; _discreteDistribution = discreteDistn; }
protected override string CreateReportLine( ModelScorer modelScorer, PhyloTree phyloTree, RowData rowAndTargetData, UniversalWorkList workList, int rowIndex, int workListCount, int workIndex) { //!!!there is very similar code in ModelTesterDiscrete.cs Dictionary <string, string> row = rowAndTargetData.Row; string predictorVariable = row[PhyloTree.PredictorVariableColumnName]; string targetVariable = row[PhyloTree.TargetVariableColumnName]; // e.g. A@182 (amino acid "A" at position 182) int nullIndex = int.Parse(row[PhyloTree.NullIndexColumnName]); //Dictionary<string, bool> caseIdToNonNullPredictorValue = workList.NullIndexToPredictorToCaseIdToNonMissingValue[nullIndex][predictorVariable]; Dictionary <string, SufficientStatistics> caseIdToNonNullPredictorValue = rowAndTargetData.PredictorData; //workList.GetCaseIdToNonMissingValueForNullIndexAndPredictorVariable(nullIndex, predictorVariable); Dictionary <string, SufficientStatistics> caseIdToNonMissingTargetValue = rowAndTargetData.TargetData; Converter <Leaf, SufficientStatistics> targetDistributionMap = CreateSufficientStatisticsMap(caseIdToNonMissingTargetValue); Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction = CreateSufficientStatisticsMap(caseIdToNonNullPredictorValue); int[] predictorCounts = phyloTree.CountsOfLeaves(predictorDistributionClassFunction); int predictorFalseNameCount = predictorCounts[(int)DistributionDiscreteBinary.DistributionClass.False]; int predictorTrueNameCount = predictorCounts[(int)DistributionDiscreteBinary.DistributionClass.True]; int targetNonMissingCount = phyloTree.CountOfNonMissingLeaves(caseIdToNonMissingTargetValue); int globalNonMissingCount = phyloTree.GlobalNonMissingCount(predictorDistributionClassFunction, targetDistributionMap); StringBuilder stringBuilder = new StringBuilder( SpecialFunctions.CreateTabString( this, rowIndex, workListCount, workIndex, nullIndex, predictorVariable, predictorFalseNameCount, predictorTrueNameCount, predictorTrueNameCount + predictorFalseNameCount, targetVariable, targetNonMissingCount, globalNonMissingCount, "")); bool ignoreRow = false; foreach (int count in predictorCounts) { if (count == 0) { ignoreRow = true; } } if (ignoreRow) { CompleteRowWithNaN(stringBuilder); } else { List <double> logLikelihoodList = new List <double>(); MessageInitializer messageInitializer = modelScorer.CreateMessageInitializer(predictorDistributionClassFunction, targetDistributionMap, NullModelDistribution); NullModelDistribution.InitialParamVals = null; foreach (bool useParameter in new bool[] { false, true }) { Score score = modelScorer.ScoreModel(messageInitializer, useParameter); stringBuilder.Append(SpecialFunctions.CreateTabString(score, "")); Debug.Write(SpecialFunctions.CreateTabString(score, "")); logLikelihoodList.Add(score.Loglikelihood); AltModelDistribution.InitialParamVals = score.OptimizationParameters; } double diff = logLikelihoodList[1] - logLikelihoodList[0]; double pValue = SpecialFunctions.LogLikelihoodRatioTest(Math.Max(diff, 0), ChiSquareDegreesOfFreedom); stringBuilder.Append(SpecialFunctions.CreateTabString(diff, pValue)); Debug.WriteLine(SpecialFunctions.CreateTabString(diff, pValue)); } return(stringBuilder.ToString()); }
public override EvaluationResults EvaluateModelOnData(Converter <Leaf, SufficientStatistics> predictorMap, Converter <Leaf, SufficientStatistics> targetMap) { EvaluationResults evalResults; int[] fisherCounts = ModelScorer.PhyloTree.FisherCounts(predictorMap, targetMap); int[] realFisherCounts = fisherCounts; // for compatability when NAIVE_EQUILIBRIUM is set #if NAIVE_EQUILIBRIUM //USE THIS FOR BACKWARDS COMPATABILITY int[] tempCounts = ModelScorer.PhyloTree.CountsOfLeaves(targetMap); fisherCounts = tempCounts; #endif //MessageInitializerDiscrete nullMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, NullDistn, fisherCounts, ModelScorer.PhyloTree.LeafCollection); //if (TryShortCutFromCounts(realFisherCounts, nullMessageInitializer, out evalResults)) //{ // return evalResults; //} //Score nullScore = ModelScorer.MaximizeLikelihood(nullMessageInitializer); bool isInvariant; Score nullScoreTarg = ComputeSingleVariableScore(predictorMap, targetMap, NullDistn, fisherCounts, out isInvariant); Score altScore = ComputeConditionalVariableScore(predictorMap, targetMap, nullScoreTarg, fisherCounts); //(realFisherCounts, nullScoreTarg, out evalResults)) //{ // return evalResults; //} //MessageInitializerDiscrete altMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, (DistributionDiscreteConditional)AltDistn, nullScore.OptimizationParameters, ModelScorer.PhyloTree.LeafCollection); //Score condScore = ModelScorer.MaximizeLikelihood(altMessageInitializer); List <Score> nullScores = new List <Score>(); if (_includePredictorInScore) { int[] predFisherCounts = new int[] { realFisherCounts[0], realFisherCounts[2], realFisherCounts[1], realFisherCounts[3] }; Score predNullScore = ComputeSingleVariableScore(targetMap, predictorMap, NullDistn, predFisherCounts, out isInvariant); nullScores.Add(predNullScore); // conditional model altScore doesn't include predLL. If we're here, we want to add it to make it comparable to joint or reverseConditional altScore = Score.GetInstance(altScore.Loglikelihood + predNullScore.Loglikelihood, altScore.OptimizationParameters, altScore.Distribution); } nullScores.Add(nullScoreTarg); evalResults = EvaluationResultsDiscrete.GetInstance(this, nullScores, altScore, realFisherCounts, ChiSquareDegreesOfFreedom); #if DEBUG MessageInitializerDiscrete nullMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, NullDistn, fisherCounts, ModelScorer.PhyloTree.LeafCollection); MessageInitializerDiscrete altMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, (DistributionDiscreteConditional)AltDistn, nullScoreTarg.OptimizationParameters, ModelScorer.PhyloTree.LeafCollection); double nullLL = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializer, nullScoreTarg.OptimizationParameters); double altLL = ModelScorer.ComputeLogLikelihoodModelGivenData(altMessageInitializer, altScore.OptimizationParameters); if (_includePredictorInScore) { int[] predFisherCounts = new int[] { realFisherCounts[0], realFisherCounts[2], realFisherCounts[1], realFisherCounts[3] }; MessageInitializerDiscrete nullMessageInitializerPred = MessageInitializerDiscrete.GetInstance(targetMap, predictorMap, NullDistn, predFisherCounts, ModelScorer.PhyloTree.LeafCollection); double nullLLPred = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializerPred, nullScores[0].OptimizationParameters); altLL += nullLLPred; } EvaluationResults evalResults2 = EvaluateModelOnDataGivenParams(predictorMap, targetMap, evalResults); double eps = 1E-10; Debug.Assert(ComplexNumber.ApproxEqual(nullLL, nullScoreTarg.Loglikelihood, eps)); Debug.Assert(ComplexNumber.ApproxEqual(altLL, altScore.Loglikelihood, eps)); Debug.Assert(ComplexNumber.ApproxEqual(evalResults.NullLL, evalResults2.NullLL, eps) && ComplexNumber.ApproxEqual(evalResults.AltLL, evalResults2.AltLL, eps), "In ModelEvaluatorCond, results of maximizing LL and computing LL from same params are not the same."); #endif return(evalResults); }
private ModelEvaluatorDiscreteJoint(List <IDistributionSingleVariable> nullDistns, DistributionDiscreteJoint jointDistn, ModelScorer scorer) : base(nullDistns, jointDistn, scorer) { }
protected ModelEvaluatorDiscrete(List <IDistributionSingleVariable> nullDistns, DistributionDiscrete altDistn, ModelScorer scorer) : base(nullDistns, altDistn, scorer) { }