protected override double ComputeLLR(ModelScorer modelScorer, PhyloTree phyloTree, StringBuilder stringBuilder, double targetMarginal, double predictorMarginal, Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction, Converter <Leaf, SufficientStatistics> targetDistributionClassFunction) { Converter <Leaf, SufficientStatistics> LeafToJointDistributionClass = CreateAlternativeSufficientStatisticsMap(predictorDistributionClassFunction, targetDistributionClassFunction); double logLikelihoodIndependentModel, logLikelihoodJointModel; Score scoreIndTarget, scoreIndPredictor, scoreJoint; MessageInitializer messageInitializer; // first score the target. NullModelDistribution.EmpiricalEquilibrium = targetMarginal; messageInitializer = modelScorer.CreateMessageInitializer(predictorDistributionClassFunction, targetDistributionClassFunction, NullModelDistribution); scoreIndTarget = modelScorer.ScoreModel(messageInitializer, false); NullModelDistribution.EmpiricalEquilibrium = predictorMarginal; messageInitializer = modelScorer.CreateMessageInitializer(targetDistributionClassFunction, predictorDistributionClassFunction, NullModelDistribution); scoreIndPredictor = modelScorer.ScoreModel(messageInitializer, false); DistributionDiscreteJointBinary jointDistn = (DistributionDiscreteJointBinary)AlternativeModelDistribution; jointDistn.SetInitialParams(scoreIndPredictor.OptimizationParameters, scoreIndTarget.OptimizationParameters); messageInitializer = modelScorer.CreateMessageInitializer(null, LeafToJointDistributionClass, jointDistn); scoreJoint = modelScorer.ScoreModel(messageInitializer, false); logLikelihoodIndependentModel = scoreIndTarget.Loglikelihood + scoreIndPredictor.Loglikelihood; logLikelihoodJointModel = scoreJoint.Loglikelihood; stringBuilder.Append(SpecialFunctions.CreateTabString(scoreIndPredictor.ToString(NullModelDistribution), scoreIndTarget.ToString(NullModelDistribution), logLikelihoodIndependentModel, scoreJoint.ToString(jointDistn), "")); double diff = logLikelihoodJointModel - logLikelihoodIndependentModel; return(diff); }
public static void AddPhyloTreeEdge(PhyloTree phyloTree, IDiagramItem parentNodeSource, IDiagramItem childNodeSource, double weight = 1) { phyloTree.Edges.Add(new PhyloEdge(AddNode(phyloTree, parentNodeSource), AddNode(phyloTree, childNodeSource)) { Weight = (int)weight }); }
protected override string CreateReportLine( ModelScorer modelScorer, PhyloTree phyloTree, RowData rowAndTargetData, UniversalWorkList workList, int rowIndex, int workListCount, int workIndex) { Dictionary <string, string> row = rowAndTargetData.Row; string predictorVariable = row[PhyloTree.PredictorVariableColumnName]; // e.g. hla string targetVariable = row[PhyloTree.TargetVariableColumnName]; // e.g. A@182 (amino acid "A" at position 182) int nullIndex = int.Parse(row[PhyloTree.NullIndexColumnName]); //Dictionary<string, bool> caseIdToNonMissingPredictorValue = workList.NullIndexToPredictorToCaseIdToNonMissingValue[nullIndex][predictorVariable]; Dictionary <string, SufficientStatistics> caseIdToNonMissingPredictorValue = rowAndTargetData.PredictorData; //workList.GetCaseIdToNonMissingValueForNullIndexAndPredictorVariable(nullIndex, predictorVariable); Dictionary <string, SufficientStatistics> caseIdToNonMissingTargetValue = rowAndTargetData.TargetData; TwoByTwo fishers2by2 = TwoByTwo.GetInstance( SufficientStatisticsMapToIntMap(caseIdToNonMissingPredictorValue), SufficientStatisticsMapToIntMap(caseIdToNonMissingTargetValue)); double pValue = fishers2by2.FisherExactTest; string reportLine = SpecialFunctions.CreateTabString(this, rowIndex, workListCount, workIndex, nullIndex, predictorVariable, targetVariable, fishers2by2.CountsString(), fishers2by2.FisherExactTest); return(reportLine); }
private void ShowParseTree(Queue <TreeNode> nodes) { var form = new Form(); form.WindowState = FormWindowState.Maximized; GViewer viewer = new GViewer(); var tree = new PhyloTree(); while (nodes.Count > 0) { TreeNode treeNode = nodes.Dequeue(); foreach (TreeNode childNode in treeNode.Nodes) { Node node = tree.AddNode(treeNode.ToString()); node.Attr.FillColor = Microsoft.Msagl.Drawing.Color.Orange; tree.AddEdge(treeNode.ToString(), childNode.ToString()); nodes.Enqueue(childNode); } } viewer.Graph = tree; form.SuspendLayout(); viewer.Dock = System.Windows.Forms.DockStyle.Fill; form.Controls.Add(viewer); form.ResumeLayout(); form.ShowDialog(); }
public static ModelScorer GetInstance(PhyloTree aPhyloTree, string leafDistributionName, string optimizerName) { leafDistributionName = leafDistributionName.ToLower(); ModelScorer modelScorer; GridSearch optimizer = GridSearch.GetInstance(optimizerName); if (leafDistributionName.StartsWith(ModelEvaluatorCrossValidate.BaseName.ToLower())) { leafDistributionName = leafDistributionName.Substring(ModelEvaluatorCrossValidate.BaseName.Length); } else if (leafDistributionName.StartsWith(ModelEvaluatorReverse.BaseName.ToLower())) { leafDistributionName = leafDistributionName.Substring(ModelEvaluatorReverse.BaseName.Length); } if (leafDistributionName.StartsWith(ModelEvaluatorDiscrete.BaseName.ToLower())) { modelScorer = new ModelScorerDiscrete(aPhyloTree, optimizer); } //else if (leafDistributionName.StartsWith(ModelEvaluatorGaussian.BaseName.ToLower())) //{ // modelScorer = new ModelScorerGaussian(aPhyloTree, optimizer); //} else { modelScorer = null; throw new ArgumentException("Cannot parse " + leafDistributionName + " into a valid ModelScorer."); } //modelScorer.GridSearch = GridSearch.GetInstance(optimizerName); return(modelScorer); }
public static PhyloTree CreatePhyloTrees(Graph <IDiagramItem> graph) { PhyloTree phyloTree = new PhyloTree(); foreach (var node in graph.Nodes) { AddNode(phyloTree, node); } foreach (var edge in graph.Edges) { AddPhyloTreeEdge(phyloTree, edge.From, edge.To, edge.Weight); } return(phyloTree); }
protected NullDataCollection CreateNullDataGenerator( string nullDataGeneratorName, ModelScorer modelScorer, PhyloTree phyloTree, RangeCollection nullIndexRangeCollection, IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration, IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration) { return(NullDataCollection.GetInstance( NullDataGenerator.GetInstance(nullDataGeneratorName, modelScorer, phyloTree, this), nullIndexRangeCollection, predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration)); }
internal static LayeredLayoutEngine CalculateLayout(GeometryGraph msaglGraph, SugiyamaLayoutSettings settings, CancelToken cancelToken) { var engine = new LayeredLayoutEngine(msaglGraph, settings); #if USE_PHYLOTREE PhyloTree phyloTree = msaglGraph as PhyloTree; if (phyloTree != null) { var pc = new PhyloTreeLayoutCalclulation(phyloTree, settings, engine.IntGraph, engine.Database); pc.Run(); } else #endif engine.Run(cancelToken); return(engine); }
void button1_Click(object sender, EventArgs e) { var tree = new PhyloTree(); var edge = (PhyloEdge)tree.AddEdge("a", "b"); //edge.Length = 0.8; edge = (PhyloEdge)tree.AddEdge("a", "c"); //edge.Length = 0.2; tree.AddEdge("c", "d"); tree.AddEdge("c", "e"); tree.AddEdge("c", "f"); tree.AddEdge("e", "0"); tree.AddEdge("e", "1"); tree.AddEdge("e", "2"); viewer.Graph = tree; }
protected override double ComputeLLR(ModelScorer modelScorer, PhyloTree phyloTree, StringBuilder stringBuilder, double targetMarginal, double predictorMarginal, Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction, Converter <Leaf, SufficientStatistics> targetDistributionClassFunction) { NullModelDistribution.EmpiricalEquilibrium = targetMarginal; NullModelDistribution.InitialParamVals = null; MessageInitializer messageInitializer = modelScorer.CreateMessageInitializer(predictorDistributionClassFunction, targetDistributionClassFunction, NullModelDistribution); List <double> logLikelihoodList = new List <double>(); foreach (bool useParameter in new bool[] { false, true }) { Score score = modelScorer.ScoreModel(messageInitializer, useParameter); stringBuilder.Append(SpecialFunctions.CreateTabString(score.ToString(useParameter ? AlternativeModelDistribution : NullModelDistribution), "")); logLikelihoodList.Add(score.Loglikelihood); AltModelDistribution.InitialParamVals = score.OptimizationParameters; Debug.WriteLine(SpecialFunctions.CreateTabString("AltModelDistribution.InitialParamVals = score.OptimizationParameters", score.OptimizationParameters)); } double diff = logLikelihoodList[1] - logLikelihoodList[0]; return(diff); }
public override Dictionary <string, SufficientStatistics> GenerateRandomMapping(Dictionary <string, SufficientStatistics> realCaseIdToNonMissingValue, ref Random random) { //!!!!put check in to make sure ISufficientSTatistics is reall BooleanStatistics Converter <Leaf, SufficientStatistics> leafToDistnClassFunction = PhyloDDriver.CreateSufficientStatisticsMap(realCaseIdToNonMissingValue); PhyloTree tree = _modelScorer.PhyloTree; MessageInitializer messageInitializer = MessageInitializerDiscrete.GetInstance(leafToDistnClassFunction, _discreteDistribution, new int[] { 1, 1 }, tree.LeafCollection); Score score = _modelScorer.MaximizeLikelihood(messageInitializer); double percentNonMissing = (double)tree.CountOfNonMissingLeaves(realCaseIdToNonMissingValue) / (double)SpecialFunctions.Count(tree.LeafCollection); double equilibrium = score.OptimizationParameters[(int)DistributionDiscreteConditional.ParameterIndex.Equilibrium].Value; double lambda = score.OptimizationParameters[(int)DistributionDiscreteConditional.ParameterIndex.Lambda].Value; Dictionary <string, BooleanStatistics> randomCaseIdToNonMissingValue = tree.EvolveBinaryTree(equilibrium, lambda, 1 - percentNonMissing, ref random); Dictionary <string, SufficientStatistics> converted; SpecialFunctions.ConvertDictionaryToBaseClasses(randomCaseIdToNonMissingValue, out converted); return(converted); }
/// <summary> /// Does the work. /// </summary> public override void DoWork() { // get our input data and null the field to make sure we don't serialize it back InputData inputData = mInputData; mInputData = null; // get the job-specific names of input files FileDefCollection fileDefs = Job.FileDefs; string treeFileName = Utility.GetNamedFileDef(fileDefs, Constants.TreeFileDefName).LocalName; string predictorFileName = Utility.GetNamedFileDef(fileDefs, Constants.PredictorFileDefName).LocalName; string targetFileName = Utility.GetNamedFileDef(fileDefs, Constants.TargetFileDefName).LocalName; string skipRowIndexFileName = Utility.GetNamedFileDef(fileDefs, Constants.SkipRowIndexFileDefName).LocalName; // construct RangeCollections RangeCollection pieceIndexRangeCollection = RangeCollection.Parse(inputData.PieceIndexRange); RangeCollection nullIndexRangeCollection = RangeCollection.Parse(inputData.NullIndexRange); RangeCollection skipRowIndexRangeCollection; FileInfo fileInfo = new FileInfo(skipRowIndexFileName); if (fileInfo.Length > 0) { skipRowIndexRangeCollection = RangeCollection.Parse(File.ReadAllText(skipRowIndexFileName)); } else { skipRowIndexRangeCollection = null; } // do the rest PhyloTree aPhyloTree = PhyloTree.GetInstance(treeFileName, null); ModelScorer modelScorer = ModelScorer.GetInstance(aPhyloTree, inputData.LeafDistributionName, inputData.OptimizerName); ModelEvaluator modelEvaluator = ModelEvaluator.GetInstance(inputData.LeafDistributionName, modelScorer); KeepTest <Dictionary <string, string> > keepTest = KeepTest <Dictionary <string, string> > .GetInstance(null, inputData.KeepTestName); PhyloDDriver driver = PhyloDDriver.GetInstance(); // create a name for the temporary job sandbox. This directory gets created by driver.Run(...) string agentOutputDirectoryName = Path.Combine(Environment.CurrentDirectory, String.Format(CultureInfo.InvariantCulture, "{0}.{1}", Job.JobId, Task.TaskId)); // save the standard out and standard error in memory streams using (MemoryStream streamOut = new MemoryStream(), streamError = new MemoryStream()) { try { // redirect the outputs using ( StreamWriter writerOut = new StreamWriter(streamOut), writerError = new StreamWriter(streamError)) { Console.SetOut(writerOut); Console.SetError(writerError); try { // run the model string outputFileName = driver.Run( modelEvaluator, predictorFileName, targetFileName, inputData.LeafDistributionName, inputData.NullDataGeneratorName, keepTest, skipRowIndexRangeCollection, inputData.NiceName, agentOutputDirectoryName, pieceIndexRangeCollection, inputData.PieceCount, nullIndexRangeCollection, inputData.OptimizerName); // this is the expected output file name -- save this so it can be written on the master side with the same name. mOutputFileName = Path.GetFileName(outputFileName); mLocalOutputFileName = Path.Combine(inputData.LocalOutputDirectoryName, mOutputFileName); // get the output data string fullOutputPath = Path.Combine(agentOutputDirectoryName, mOutputFileName); if (!File.Exists(fullOutputPath)) { TaskResult.FailureReason = TaskFailureReason.MissingOutput; TaskResult.FailureMessage = String.Format(CultureInfo.CurrentCulture, "Cannot find output file '{0}'", targetFileName); TaskResult.Status = TaskAssignmentStatus.Failed; } using (StreamReader outputData = new StreamReader(fullOutputPath)) { mOutputData = outputData.ReadToEnd(); } } finally { // this finally is to make sure we delete the folder // get rid of the sandbox Directory.Delete(agentOutputDirectoryName, true); } } } finally { // this finally is to make sure we get console output Encoding encoding = Encoding.Default; TaskResult.StandardOutput = encoding.GetString(streamOut.GetBuffer()); TaskResult.StandardError = encoding.GetString(streamError.GetBuffer()); } } }
public void Run( ModelScorer modelScorer, PhyloTree phyloTree, string predictorSparseFileName, string targetSparseFileName, string leafDistributionName, string nullDataGeneratorName, KeepTest <Dictionary <string, string> > keepTest, RangeCollection skipRowIndexRangeCollectionOrNull, string shortName, string outputDirectoryName, RangeCollection pieceIndexRangeCollection, int pieceCount, RangeCollection nullIndexRangeCollection, string optimizerName) { Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); Directory.CreateDirectory(outputDirectoryName); string outputFileName = string.Format(@"{0}\{1}.{2}.{3}.{4}.{5}.{6}{7}.txt", outputDirectoryName, shortName, leafDistributionName, nullDataGeneratorName, nullIndexRangeCollection, pieceCount, pieceIndexRangeCollection, skipRowIndexRangeCollectionOrNull == null ? "" : ".Skip" + skipRowIndexRangeCollectionOrNull.Count().ToString() ); #region from PhyloTree refactor //Dictionary<string, Dictionary<string, bool>> predictorVariableToCaseIdToRealNonMissingValue = LoadSparseFileInMemory<bool>(predictorSparseFileName); //IEnumerable<Pair<string, Dictionary<string, T>>> targetNameAndCaseIdToNonMissingValueEnumeration = LoadSparseFileEnumeration<T>(targetSparseFileName); //NullDataCollection nullDataGenerator = // NullDataCollection.GetInstance(this, modelTester, nullIndexRangeCollection, predictorVariableToCaseIdToRealNonMissingValue); //UniversalWorkList<T> workList = UniversalWorkList<T>.GetInstance( // predictorVariableToCaseIdToRealNonMissingValue, // targetNameAndCaseIdToNonMissingValueEnumeration, // nullDataGenerator, nullIndexRangeCollection, keepTest); #endregion bool speedOverMemory = true; IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration = CreateNameAndCaseIdToNonMissingValueEnumeration(predictorSparseFileName, speedOverMemory); IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration = CreateNameAndCaseIdToNonMissingValueEnumeration(targetSparseFileName, speedOverMemory); NullDataCollection nullDataGenerator = CreateNullDataGenerator(nullDataGeneratorName, modelScorer, phyloTree, nullIndexRangeCollection, predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration); UniversalWorkList workList = UniversalWorkList.GetInstance( predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration, nullDataGenerator, nullIndexRangeCollection, keepTest); int workListCount = SpecialFunctions.Count(workList.List()); int effectiveWorkListCount; if (skipRowIndexRangeCollectionOrNull == null) { effectiveWorkListCount = workListCount; } else { effectiveWorkListCount = 0; for (int iRowIndex = 0; iRowIndex < workListCount; iRowIndex++) { if (!skipRowIndexRangeCollectionOrNull.Contains(iRowIndex)) { effectiveWorkListCount++; } } } Console.WriteLine("{0} Total rows. Skipping {1} of them.", workListCount, workListCount - effectiveWorkListCount); using (TextWriter textWriter = File.CreateText(outputFileName)) { textWriter.WriteLine(Header); int rowIndex = -1; int effectiveRowIndex = -1; foreach (RowData rowAndTargetData in workList.List()) { //!!!make all these parameters and the calculation a class ++rowIndex; Debug.Assert(rowIndex < workListCount); // real assert if (skipRowIndexRangeCollectionOrNull == null || !skipRowIndexRangeCollectionOrNull.Contains(rowIndex)) { ++effectiveRowIndex; int workIndex = ExtractWorkIndex(effectiveRowIndex, pieceCount, effectiveWorkListCount); if (pieceIndexRangeCollection.Contains(workIndex)) { Debug.WriteLine("WorkItemIndex " + rowIndex.ToString()); string reportLine; try { reportLine = CreateReportLine(modelScorer, phyloTree, rowAndTargetData, workList, rowIndex, workListCount, workIndex); } catch (OutOfMemoryException) { Console.WriteLine("OUT OF MEMORY!! Clearing cache and trying to recover where we left off."); modelScorer.ClearCache(); reportLine = CreateReportLine(modelScorer, phyloTree, rowAndTargetData, workList, rowIndex, workListCount, workIndex); } textWriter.WriteLine(reportLine); textWriter.Flush(); } } } } stopwatch.Stop(); Console.WriteLine("Running time: " + stopwatch.Elapsed); }
//protected override NullDataCollection CreateNullDataGenerator(ModelScorer modelScorer, PhyloTree phyloTree, RangeCollection nullIndexRangeCollection, Dictionary<string, Dictionary<string, BooleanStatistics>> predictorVariableToCaseIdToRealNonMissingValue) //{ // if (DateTime.Now.Date == new DateTime(2006, 6, 28).Date) // for testing, force it to use the parametric bootstrap // { // return NullDataCollection.GetInstance( // new NullDataGeneratorAlongTree(modelScorer, phyloTree, (ModelTesterDiscrete)this), // nullIndexRangeCollection, // predictorVariableToCaseIdToRealNonMissingValue); // } // return base.CreateNullDataGenerator(modelScorer, phyloTree, nullIndexRangeCollection, predictorVariableToCaseIdToRealNonMissingValue); //} //public override Converter<Leaf, SufficientStatistics> CreateTargetSufficientStatisticsMap(Dictionary<string, ISufficientStatistics> caseIdToNonMissingValue) //{ // return ISufficientStatistics.DictionaryToLeafMap(caseIdToNonMissingValue); //} //public override Converter<Leaf, SufficientStatistics> CreatePredictorSufficientStatisticsMap(Dictionary<string, BooleanStatistics> caseIdToNonMissingValue) //{ // return CreateTargetSufficientStatisticsMap(caseIdToNonMissingValue); //} protected override string CreateReportLine( ModelScorer modelScorer, PhyloTree phyloTree, RowData rowAndTargetData, UniversalWorkList workList, int rowIndex, int workListCount, int workIndex) { //!!!there is very similar code in ModelTesterGaussian.cs // we're iterating over each predictor (e.g. hla), each target (e.g. position in the sequence, // and each possible substring at that position). // Then we ask the question, Does the presence of predictor (e.g. hla) // influence the probability that target (e.g. mer in position n1pos) will show up? // nullIndex specifies whether this is the true data or randomized data. Dictionary <string, string> row = rowAndTargetData.Row; string predictorVariable = row[PhyloTree.PredictorVariableColumnName]; // e.g. hla string targetVariable = row[PhyloTree.TargetVariableColumnName]; // e.g. A@182 (amino acid "A" at position 182) int nullIndex = int.Parse(row[PhyloTree.NullIndexColumnName]); //Dictionary<string, bool> caseIdToNonMissingPredictorValue = workList.NullIndexToPredictorToCaseIdToNonMissingValue[nullIndex][predictorVariable]; Dictionary <string, SufficientStatistics> caseIdToNonMissingPredictorValue = rowAndTargetData.PredictorData; //workList.GetCaseIdToNonMissingValueForNullIndexAndPredictorVariable(nullIndex, predictorVariable); Dictionary <string, SufficientStatistics> caseIdToNonMissingTargetValue = rowAndTargetData.TargetData; IEnumerator <SufficientStatistics> enumerator = caseIdToNonMissingPredictorValue.Values.GetEnumerator(); enumerator.MoveNext(); SufficientStatistics representative = enumerator.Current; bool predictorIsBoolean = representative is BooleanStatistics; Converter <Leaf, SufficientStatistics> targetDistributionClassFunction = CreateSufficientStatisticsMap(caseIdToNonMissingTargetValue); Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction = CreateSufficientStatisticsMap(caseIdToNonMissingPredictorValue); int[] predictorCounts = predictorIsBoolean ? phyloTree.CountsOfLeaves(predictorDistributionClassFunction, NullModelDistribution) : new int[2]; int[] targetCounts = phyloTree.CountsOfLeaves(targetDistributionClassFunction, NullModelDistribution); int predictorFalseNameCount = predictorCounts[(int)DistributionDiscreteBinary.DistributionClass.False]; int predictorTrueNameCount = predictorCounts[(int)DistributionDiscreteBinary.DistributionClass.True]; int targetFalseNameCount = targetCounts[(int)DistributionDiscreteBinary.DistributionClass.False]; int targetTrueNameCount = targetCounts[(int)DistributionDiscreteBinary.DistributionClass.True]; int[] fisherCounts = predictorIsBoolean ? phyloTree.FisherCounts(predictorDistributionClassFunction, targetDistributionClassFunction) : new int[4]; int globalNonMissingCount = predictorIsBoolean ? fisherCounts[0] + fisherCounts[1] + fisherCounts[2] + fisherCounts[3] : phyloTree.GlobalNonMissingCount(predictorDistributionClassFunction, targetDistributionClassFunction); StringBuilder stringBuilder = new StringBuilder( SpecialFunctions.CreateTabString(this, rowIndex, workListCount, workIndex, nullIndex, predictorVariable, predictorFalseNameCount, predictorTrueNameCount, predictorTrueNameCount + predictorFalseNameCount, targetVariable, targetFalseNameCount, targetTrueNameCount, targetTrueNameCount + targetFalseNameCount, fisherCounts[0], fisherCounts[1], fisherCounts[2], fisherCounts[3], globalNonMissingCount, "")); bool ignoreRow = false; foreach (int[] counts in new int[][] { predictorIsBoolean?predictorCounts : new int[] { 1, 1 }, targetCounts }) { foreach (int count in counts) { if (count == 0) { ignoreRow = true; } } } if (ignoreRow) { CompleteRowWithNaN(stringBuilder); } else { double targetMarginal = (double)targetTrueNameCount / (double)(targetTrueNameCount + targetFalseNameCount); double predictorMarginal = (double)predictorTrueNameCount / (double)(predictorTrueNameCount + predictorFalseNameCount); double diff = ComputeLLR(modelScorer, phyloTree, stringBuilder, targetMarginal, predictorMarginal, predictorDistributionClassFunction, targetDistributionClassFunction); double pValue = SpecialFunctions.LogLikelihoodRatioTest(Math.Max(diff, 0), ChiSquareDegreesOfFreedom); stringBuilder.Append(SpecialFunctions.CreateTabString(diff, pValue)); } return(stringBuilder.ToString()); }
public void ScoreTree( ModelScorer modelScorer, PhyloTree phyloTree, string predictorSparseFileName, string targetSparseFileName, string predictorVariableName, string targetVariableName, double[] nullModelArgs, double[] altModelArgs) { //Dictionary<string, Dictionary<string, SufficientStatistics>> predictorVariableToCaseIdToRealNonMissingValue = LoadSparseFileInMemory(predictorSparseFileName); IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration = LoadSparseFileEnumeration(predictorSparseFileName); IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration = LoadSparseFileEnumeration(targetSparseFileName); RangeCollection nullIndexRangeCollection = RangeCollection.GetInstance(-1, -1); NullDataCollection nullDataGenerator = CreateNullDataGenerator("PredictorPermutation", modelScorer, phyloTree, nullIndexRangeCollection, predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration); UniversalWorkList workList = UniversalWorkList.GetInstance( predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration, //targetNameAndCaseIdToNonMissingValueEnumeration, nullDataGenerator, nullIndexRangeCollection, AlwaysKeep <Dictionary <string, string> > .GetInstance()); foreach (RowData rowAndTargetData in workList.List()) { if (rowAndTargetData.Row[PhyloTree.PredictorVariableColumnName] == predictorVariableName && rowAndTargetData.Row[PhyloTree.TargetVariableColumnName] == targetVariableName) { Dictionary <string, SufficientStatistics> caseIdToNonNullPredictorValue = rowAndTargetData.PredictorData;//workList.GetCaseIdToNonMissingValueForNullIndexAndPredictorVariable(-1, predictorVariableName); Dictionary <string, SufficientStatistics> caseIdToNonMissingTargetValue = rowAndTargetData.TargetData; Converter <Leaf, SufficientStatistics> targetDistributionMap = CreateSufficientStatisticsMap(caseIdToNonMissingTargetValue); Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction = CreateSufficientStatisticsMap(caseIdToNonNullPredictorValue); Converter <Leaf, SufficientStatistics> altDistributionMap = CreateAlternativeSufficientStatisticsMap(predictorDistributionClassFunction, targetDistributionMap); double logLikelihood; Score scoreIndTarget, scoreIndPredictor, scoreAlt; MessageInitializer messageInitializer; OptimizationParameterList nullParams = NullModelDistribution.GetParameters(nullModelArgs); OptimizationParameterList altParams = AltModelDistribution.GetParameters(altModelArgs); Console.WriteLine(SpecialFunctions.CreateTabString("Variable", nullParams.ToStringHeader(), "LogL")); messageInitializer = modelScorer.CreateMessageInitializer(predictorDistributionClassFunction, targetDistributionMap, NullModelDistribution); logLikelihood = modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, nullParams); scoreIndTarget = Score.GetInstance(logLikelihood, nullParams); Console.WriteLine("Target\t" + scoreIndTarget); messageInitializer = modelScorer.CreateMessageInitializer(targetDistributionMap, predictorDistributionClassFunction, NullModelDistribution); logLikelihood = modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, nullParams); modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, nullParams); scoreIndPredictor = Score.GetInstance(logLikelihood, nullParams); Console.WriteLine("Predictor\t" + scoreIndPredictor); Console.WriteLine("\n" + SpecialFunctions.CreateTabString("Variable", altParams.ToStringHeader(), "LogL")); messageInitializer = modelScorer.CreateMessageInitializer(null, altDistributionMap, AltModelDistribution); logLikelihood = modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, altParams); scoreAlt = Score.GetInstance(logLikelihood, altParams); Console.WriteLine(SpecialFunctions.CreateTabString(AltModelDistribution, scoreAlt)); } } }
protected abstract string CreateReportLine( ModelScorer modelScorer, PhyloTree phyloTree, RowData rowAndTargetData, UniversalWorkList workList, int rowIndex, int workListCount, int workIndex);
/// <summary> /// Computes the Log Likelihood ratio. Called from CreateReportLineDiscrete. Currently, a pValue is computed from this diff, assuming /// the LLR has a difference of 1 DF. !!!May need to change this in future versions to allow us to specific DF. /// ComputeLLR must print the value of any parameters that are specific to it (it should not print the return value). /// </summary> protected abstract double ComputeLLR(ModelScorer modelScorer, PhyloTree phyloTree, StringBuilder stringBuilder, double targetMarginal, double predictorMarginal, Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction, Converter <Leaf, SufficientStatistics> targetDistributionClassFunction);
public override IEnumerable <PositionInfo <IDiagramItem> > RelayoutGraphNodesPosition(Graph <IDiagramItem> graph) { Tree = MsaglGeometryGraphHelpers.CreatePhyloTrees(graph); LayoutCalculator.CalculateLayout(Tree); return(MsaglGeometryGraphHelpers.GetGetNodesPositionInfo(Tree)); }
protected override string CreateReportLine( ModelScorer modelScorer, PhyloTree phyloTree, RowData rowAndTargetData, UniversalWorkList workList, int rowIndex, int workListCount, int workIndex) { //!!!there is very similar code in ModelTesterDiscrete.cs Dictionary <string, string> row = rowAndTargetData.Row; string predictorVariable = row[PhyloTree.PredictorVariableColumnName]; string targetVariable = row[PhyloTree.TargetVariableColumnName]; // e.g. A@182 (amino acid "A" at position 182) int nullIndex = int.Parse(row[PhyloTree.NullIndexColumnName]); //Dictionary<string, bool> caseIdToNonNullPredictorValue = workList.NullIndexToPredictorToCaseIdToNonMissingValue[nullIndex][predictorVariable]; Dictionary <string, SufficientStatistics> caseIdToNonNullPredictorValue = rowAndTargetData.PredictorData; //workList.GetCaseIdToNonMissingValueForNullIndexAndPredictorVariable(nullIndex, predictorVariable); Dictionary <string, SufficientStatistics> caseIdToNonMissingTargetValue = rowAndTargetData.TargetData; Converter <Leaf, SufficientStatistics> targetDistributionMap = CreateSufficientStatisticsMap(caseIdToNonMissingTargetValue); Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction = CreateSufficientStatisticsMap(caseIdToNonNullPredictorValue); int[] predictorCounts = phyloTree.CountsOfLeaves(predictorDistributionClassFunction); int predictorFalseNameCount = predictorCounts[(int)DistributionDiscreteBinary.DistributionClass.False]; int predictorTrueNameCount = predictorCounts[(int)DistributionDiscreteBinary.DistributionClass.True]; int targetNonMissingCount = phyloTree.CountOfNonMissingLeaves(caseIdToNonMissingTargetValue); int globalNonMissingCount = phyloTree.GlobalNonMissingCount(predictorDistributionClassFunction, targetDistributionMap); StringBuilder stringBuilder = new StringBuilder( SpecialFunctions.CreateTabString( this, rowIndex, workListCount, workIndex, nullIndex, predictorVariable, predictorFalseNameCount, predictorTrueNameCount, predictorTrueNameCount + predictorFalseNameCount, targetVariable, targetNonMissingCount, globalNonMissingCount, "")); bool ignoreRow = false; foreach (int count in predictorCounts) { if (count == 0) { ignoreRow = true; } } if (ignoreRow) { CompleteRowWithNaN(stringBuilder); } else { List <double> logLikelihoodList = new List <double>(); MessageInitializer messageInitializer = modelScorer.CreateMessageInitializer(predictorDistributionClassFunction, targetDistributionMap, NullModelDistribution); NullModelDistribution.InitialParamVals = null; foreach (bool useParameter in new bool[] { false, true }) { Score score = modelScorer.ScoreModel(messageInitializer, useParameter); stringBuilder.Append(SpecialFunctions.CreateTabString(score, "")); Debug.Write(SpecialFunctions.CreateTabString(score, "")); logLikelihoodList.Add(score.Loglikelihood); AltModelDistribution.InitialParamVals = score.OptimizationParameters; } double diff = logLikelihoodList[1] - logLikelihoodList[0]; double pValue = SpecialFunctions.LogLikelihoodRatioTest(Math.Max(diff, 0), ChiSquareDegreesOfFreedom); stringBuilder.Append(SpecialFunctions.CreateTabString(diff, pValue)); Debug.WriteLine(SpecialFunctions.CreateTabString(diff, pValue)); } return(stringBuilder.ToString()); }
internal static GeometryGraph CreateAndLayoutGraph() { PhyloTree phyloTree = new PhyloTree(); double width = 40; double height = 10; foreach (string id in "A B C D E F G".Split(' ')) { DrawingUtilsForSamples.AddNode(id, phyloTree, width, height); } PhyloEdge e; double age_of_BC = 2; double age_of_D = 3.5; double age_of_F = 1.5; double age_of_G = 3.5; double age_of_E = 2; phyloTree.Edges.Add(e = new PhyloEdge(phyloTree.FindNodeByUserData("A"), phyloTree.FindNodeByUserData("B"))); e.Length = age_of_BC; phyloTree.Edges.Add(e = new PhyloEdge(phyloTree.FindNodeByUserData("A"), phyloTree.FindNodeByUserData("C"))); e.Length = age_of_BC; phyloTree.Edges.Add(e = new PhyloEdge(phyloTree.FindNodeByUserData("A"), phyloTree.FindNodeByUserData("D"))); e.Length = age_of_D; phyloTree.Edges.Add(e = new PhyloEdge(phyloTree.FindNodeByUserData("C"), phyloTree.FindNodeByUserData("E"))); e.Length = age_of_E; phyloTree.Edges.Add(e = new PhyloEdge(phyloTree.FindNodeByUserData("C"), phyloTree.FindNodeByUserData("F"))); e.Length = age_of_F; phyloTree.Edges.Add(e = new PhyloEdge(phyloTree.FindNodeByUserData("C"), phyloTree.FindNodeByUserData("G"))); e.Length = age_of_G; var sugiyamaLayoutSettings = new SugiyamaLayoutSettings(); foreach (var edge in phyloTree.Edges) { edge.EdgeGeometry.TargetArrowhead = new Arrowhead(); } Microsoft.Msagl.Miscellaneous.LayoutHelpers.CalculateLayout(phyloTree, new SugiyamaLayoutSettings(), null); // add a couple of non-tree edges Edge e0 = new Edge(phyloTree.FindNodeByUserData("F"), phyloTree.FindNodeByUserData("D")) { EdgeGeometry = { SourceArrowhead = new Arrowhead() } }; phyloTree.Edges.Add(e0); Edge e1 = new Edge(phyloTree.FindNodeByUserData("G"), phyloTree.FindNodeByUserData("D")) { EdgeGeometry = { SourceArrowhead = new Arrowhead() } }; phyloTree.Edges.Add(e1); // route the non-tree edges, every other edge is routed already double loosePadding = sugiyamaLayoutSettings.NodeSeparation / 10; double tightPadding = sugiyamaLayoutSettings.NodeSeparation / 10; double coneAngle = Math.PI / 6; var router = new SplineRouter(phyloTree, new[] { e0, e1 }, tightPadding, loosePadding, coneAngle, null); router.Run(); return(phyloTree); }
static void Main(string[] args) { try { ArgCollection argCollection = ArgCollection.GetInstance(args); if (argCollection.ExtractOptionalFlag("help")) { Console.WriteLine(""); Console.WriteLine(UsageMessage); Console.WriteLine(HelpMessage); return; } string optimizerName = argCollection.ExtractOptional <string>("optimizer", "BrentThenGrid"); string keepTestName = argCollection.ExtractOptional <string>("keepTest", "AlwaysKeep"); string skipRowIndexFileNameOrNull = argCollection.ExtractOptional <string>("skipRowIndexFile", null); argCollection.CheckNoMoreOptions(); string treeFileName = argCollection.ExtractNext <string>("treeFile"); string predictorFileName = argCollection.ExtractNext <string>("predictorFile"); string targetFileName = argCollection.ExtractNext <string>("targetFile"); string leafDistributionName = argCollection.ExtractNext <string>("leafDistribution"); string nullDataGeneratorName = argCollection.ExtractNext <string>("nullDataGenerator"); string niceName = argCollection.ExtractNext <string>("niceName"); string outputDirectory = argCollection.ExtractNext <string>("outputDirectory"); RangeCollection pieceIndexRangeCollection = argCollection.ExtractNext <RangeCollection>("pieceIndexRange"); int pieceCount = argCollection.ExtractNext <int>("pieceCount"); RangeCollection nullIndexRangeCollection = argCollection.ExtractNext <RangeCollection>("nullIndexRange"); argCollection.CheckThatEmpty(); if (!PhyloDDriver.ValidateDistribution(leafDistributionName)) { Console.WriteLine("{0} is not a recognized distribution name. Please choose a name from the following list:", leafDistributionName); foreach (string name in PhyloDDriver.GetDistributionNames()) { Console.WriteLine("\t{0}", name); } throw new ArgumentException("Invalid distribution name."); } RangeCollection skipRowIndexRangeCollectionOrNull = (null == skipRowIndexFileNameOrNull) || skipRowIndexFileNameOrNull == "null" ? null : RangeCollection.Parse(File.ReadAllText(skipRowIndexFileNameOrNull)); KeepTest <Dictionary <string, string> > keepTest = KeepTest <Dictionary <string, string> > .GetInstance(null, keepTestName); SpecialFunctions.CheckCondition(pieceIndexRangeCollection.IsBetween(0, pieceCount - 1), "pieceIndex must be at least 0 and less than pieceCount"); SpecialFunctions.CheckCondition(nullIndexRangeCollection.IsBetween(-1, int.MaxValue), "nullIndex must be at least -1"); PhyloTree aPhyloTree = PhyloTree.GetInstance(treeFileName, null); ModelScorer modelScorer = ModelScorer.GetInstance(aPhyloTree, leafDistributionName, optimizerName); ModelEvaluator modelEvaluator = ModelEvaluator.GetInstance(leafDistributionName, modelScorer); PhyloDDriver driver = PhyloDDriver.GetInstance(); driver.Run( modelEvaluator, predictorFileName, targetFileName, leafDistributionName, nullDataGeneratorName, keepTest, skipRowIndexRangeCollectionOrNull, niceName, outputDirectory, pieceIndexRangeCollection, pieceCount, nullIndexRangeCollection, optimizerName); //Console.Write("Press enter to exist."); //Console.Read(); } catch (Exception exception) { Console.WriteLine(""); Console.WriteLine(exception.Message); if (exception.InnerException != null) { Console.WriteLine(exception.InnerException.Message); } Console.WriteLine(""); Console.WriteLine(UsageMessage); throw; } }
public ModelScorerDiscrete(PhyloTree tree, GridSearch optimizer) : base(tree, optimizer) { }
protected override double ComputeLLR(ModelScorer modelScorer, PhyloTree phyloTree, StringBuilder stringBuilder, double targetMarginal, double predictorMarginal, Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction, Converter <Leaf, SufficientStatistics> targetDistributionClassFunction) { throw new Exception("The method or operation is not implemented."); }
protected ModelScorer(PhyloTree tree, GridSearch optimizer) { PhyloTree = tree; GridSearch = optimizer; }
public override double ComputeLogLikelihoodModelGivenData(MessageInitializer messageInitializer, OptimizationParameterList paramList, bool useLogMethod) { return(PhyloTree.ComputeLogLikelihoodModelGivenDataGaussian(messageInitializer, paramList)); }
public ModelScorerGaussian(PhyloTree tree, GridSearch optimizer) : base(tree, optimizer) { }