protected override double ComputeLLR(ModelScorer modelScorer, PhyloTree phyloTree, StringBuilder stringBuilder, double targetMarginal, double predictorMarginal,
                                             Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction, Converter <Leaf, SufficientStatistics> targetDistributionClassFunction)
        {
            Converter <Leaf, SufficientStatistics> LeafToJointDistributionClass =
                CreateAlternativeSufficientStatisticsMap(predictorDistributionClassFunction, targetDistributionClassFunction);

            double             logLikelihoodIndependentModel, logLikelihoodJointModel;
            Score              scoreIndTarget, scoreIndPredictor, scoreJoint;
            MessageInitializer messageInitializer;

            // first score the target.
            NullModelDistribution.EmpiricalEquilibrium = targetMarginal;
            messageInitializer = modelScorer.CreateMessageInitializer(predictorDistributionClassFunction, targetDistributionClassFunction, NullModelDistribution);
            scoreIndTarget     = modelScorer.ScoreModel(messageInitializer, false);

            NullModelDistribution.EmpiricalEquilibrium = predictorMarginal;
            messageInitializer = modelScorer.CreateMessageInitializer(targetDistributionClassFunction, predictorDistributionClassFunction, NullModelDistribution);
            scoreIndPredictor  = modelScorer.ScoreModel(messageInitializer, false);

            DistributionDiscreteJointBinary jointDistn = (DistributionDiscreteJointBinary)AlternativeModelDistribution;

            jointDistn.SetInitialParams(scoreIndPredictor.OptimizationParameters, scoreIndTarget.OptimizationParameters);
            messageInitializer = modelScorer.CreateMessageInitializer(null, LeafToJointDistributionClass, jointDistn);
            scoreJoint         = modelScorer.ScoreModel(messageInitializer, false);

            logLikelihoodIndependentModel = scoreIndTarget.Loglikelihood + scoreIndPredictor.Loglikelihood;
            logLikelihoodJointModel       = scoreJoint.Loglikelihood;

            stringBuilder.Append(SpecialFunctions.CreateTabString(scoreIndPredictor.ToString(NullModelDistribution), scoreIndTarget.ToString(NullModelDistribution),
                                                                  logLikelihoodIndependentModel, scoreJoint.ToString(jointDistn), ""));

            double diff = logLikelihoodJointModel - logLikelihoodIndependentModel;

            return(diff);
        }
Exemplo n.º 2
0
 public static void AddPhyloTreeEdge(PhyloTree phyloTree, IDiagramItem parentNodeSource, IDiagramItem childNodeSource, double weight = 1)
 {
     phyloTree.Edges.Add(new PhyloEdge(AddNode(phyloTree, parentNodeSource), AddNode(phyloTree, childNodeSource))
     {
         Weight = (int)weight
     });
 }
Exemplo n.º 3
0
        protected override string CreateReportLine(
            ModelScorer modelScorer,
            PhyloTree phyloTree,
            RowData rowAndTargetData,
            UniversalWorkList workList,
            int rowIndex, int workListCount, int workIndex)
        {
            Dictionary <string, string> row = rowAndTargetData.Row;
            string predictorVariable        = row[PhyloTree.PredictorVariableColumnName]; // e.g. hla
            string targetVariable           = row[PhyloTree.TargetVariableColumnName];    // e.g. A@182 (amino acid "A" at position 182)
            int    nullIndex = int.Parse(row[PhyloTree.NullIndexColumnName]);

            //Dictionary<string, bool> caseIdToNonMissingPredictorValue = workList.NullIndexToPredictorToCaseIdToNonMissingValue[nullIndex][predictorVariable];
            Dictionary <string, SufficientStatistics> caseIdToNonMissingPredictorValue = rowAndTargetData.PredictorData; //workList.GetCaseIdToNonMissingValueForNullIndexAndPredictorVariable(nullIndex, predictorVariable);
            Dictionary <string, SufficientStatistics> caseIdToNonMissingTargetValue    = rowAndTargetData.TargetData;

            TwoByTwo fishers2by2 = TwoByTwo.GetInstance(
                SufficientStatisticsMapToIntMap(caseIdToNonMissingPredictorValue),
                SufficientStatisticsMapToIntMap(caseIdToNonMissingTargetValue));

            double pValue = fishers2by2.FisherExactTest;

            string reportLine = SpecialFunctions.CreateTabString(this, rowIndex, workListCount, workIndex, nullIndex,
                                                                 predictorVariable,
                                                                 targetVariable,
                                                                 fishers2by2.CountsString(),
                                                                 fishers2by2.FisherExactTest);

            return(reportLine);
        }
Exemplo n.º 4
0
        private void ShowParseTree(Queue <TreeNode> nodes)
        {
            var form = new Form();

            form.WindowState = FormWindowState.Maximized;
            GViewer viewer = new GViewer();
            var     tree   = new PhyloTree();


            while (nodes.Count > 0)
            {
                TreeNode treeNode = nodes.Dequeue();
                foreach (TreeNode childNode in treeNode.Nodes)
                {
                    Node node = tree.AddNode(treeNode.ToString());
                    node.Attr.FillColor = Microsoft.Msagl.Drawing.Color.Orange;
                    tree.AddEdge(treeNode.ToString(), childNode.ToString());

                    nodes.Enqueue(childNode);
                }
            }

            viewer.Graph = tree;
            form.SuspendLayout();
            viewer.Dock = System.Windows.Forms.DockStyle.Fill;
            form.Controls.Add(viewer);
            form.ResumeLayout();
            form.ShowDialog();
        }
Exemplo n.º 5
0
        public static ModelScorer GetInstance(PhyloTree aPhyloTree, string leafDistributionName, string optimizerName)
        {
            leafDistributionName = leafDistributionName.ToLower();
            ModelScorer modelScorer;
            GridSearch  optimizer = GridSearch.GetInstance(optimizerName);

            if (leafDistributionName.StartsWith(ModelEvaluatorCrossValidate.BaseName.ToLower()))
            {
                leafDistributionName = leafDistributionName.Substring(ModelEvaluatorCrossValidate.BaseName.Length);
            }
            else if (leafDistributionName.StartsWith(ModelEvaluatorReverse.BaseName.ToLower()))
            {
                leafDistributionName = leafDistributionName.Substring(ModelEvaluatorReverse.BaseName.Length);
            }

            if (leafDistributionName.StartsWith(ModelEvaluatorDiscrete.BaseName.ToLower()))
            {
                modelScorer = new ModelScorerDiscrete(aPhyloTree, optimizer);
            }
            //else if (leafDistributionName.StartsWith(ModelEvaluatorGaussian.BaseName.ToLower()))
            //{
            //    modelScorer = new ModelScorerGaussian(aPhyloTree, optimizer);
            //}
            else
            {
                modelScorer = null;
                throw new ArgumentException("Cannot parse " + leafDistributionName + " into a valid ModelScorer.");
            }

            //modelScorer.GridSearch = GridSearch.GetInstance(optimizerName);
            return(modelScorer);
        }
Exemplo n.º 6
0
        public static PhyloTree CreatePhyloTrees(Graph <IDiagramItem> graph)
        {
            PhyloTree phyloTree = new PhyloTree();

            foreach (var node in graph.Nodes)
            {
                AddNode(phyloTree, node);
            }
            foreach (var edge in graph.Edges)
            {
                AddPhyloTreeEdge(phyloTree, edge.From, edge.To, edge.Weight);
            }
            return(phyloTree);
        }
 protected NullDataCollection CreateNullDataGenerator(
     string nullDataGeneratorName,
     ModelScorer modelScorer,
     PhyloTree phyloTree,
     RangeCollection nullIndexRangeCollection,
     IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration,
     IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration)
 {
     return(NullDataCollection.GetInstance(
                NullDataGenerator.GetInstance(nullDataGeneratorName, modelScorer, phyloTree, this),
                nullIndexRangeCollection,
                predictorNameAndCaseIdToNonMissingValueEnumeration,
                targetNameAndCaseIdToNonMissingValueEnumeration));
 }
Exemplo n.º 8
0
        internal static LayeredLayoutEngine CalculateLayout(GeometryGraph msaglGraph, SugiyamaLayoutSettings settings, CancelToken cancelToken)
        {
            var engine = new LayeredLayoutEngine(msaglGraph, settings);

#if USE_PHYLOTREE
            PhyloTree phyloTree = msaglGraph as PhyloTree;
            if (phyloTree != null)
            {
                var pc = new PhyloTreeLayoutCalclulation(phyloTree, settings, engine.IntGraph, engine.Database);
                pc.Run();
            }
            else
#endif
            engine.Run(cancelToken);
            return(engine);
        }
Exemplo n.º 9
0
        void button1_Click(object sender, EventArgs e)
        {
            var tree = new PhyloTree();
            var edge = (PhyloEdge)tree.AddEdge("a", "b");

            //edge.Length = 0.8;
            edge = (PhyloEdge)tree.AddEdge("a", "c");
            //edge.Length = 0.2;
            tree.AddEdge("c", "d");
            tree.AddEdge("c", "e");
            tree.AddEdge("c", "f");
            tree.AddEdge("e", "0");
            tree.AddEdge("e", "1");
            tree.AddEdge("e", "2");

            viewer.Graph = tree;
        }
        protected override double ComputeLLR(ModelScorer modelScorer, PhyloTree phyloTree, StringBuilder stringBuilder, double targetMarginal, double predictorMarginal,
                                             Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction, Converter <Leaf, SufficientStatistics> targetDistributionClassFunction)
        {
            NullModelDistribution.EmpiricalEquilibrium = targetMarginal;
            NullModelDistribution.InitialParamVals     = null;

            MessageInitializer messageInitializer = modelScorer.CreateMessageInitializer(predictorDistributionClassFunction, targetDistributionClassFunction, NullModelDistribution);

            List <double> logLikelihoodList = new List <double>();

            foreach (bool useParameter in new bool[] { false, true })
            {
                Score score = modelScorer.ScoreModel(messageInitializer, useParameter);

                stringBuilder.Append(SpecialFunctions.CreateTabString(score.ToString(useParameter ? AlternativeModelDistribution : NullModelDistribution), ""));
                logLikelihoodList.Add(score.Loglikelihood);
                AltModelDistribution.InitialParamVals = score.OptimizationParameters;
                Debug.WriteLine(SpecialFunctions.CreateTabString("AltModelDistribution.InitialParamVals = score.OptimizationParameters", score.OptimizationParameters));
            }

            double diff = logLikelihoodList[1] - logLikelihoodList[0];

            return(diff);
        }
Exemplo n.º 11
0
        public override Dictionary <string, SufficientStatistics> GenerateRandomMapping(Dictionary <string, SufficientStatistics> realCaseIdToNonMissingValue, ref Random random)
        {
            //!!!!put check in to make sure ISufficientSTatistics is reall BooleanStatistics
            Converter <Leaf, SufficientStatistics> leafToDistnClassFunction = PhyloDDriver.CreateSufficientStatisticsMap(realCaseIdToNonMissingValue);

            PhyloTree tree = _modelScorer.PhyloTree;

            MessageInitializer messageInitializer = MessageInitializerDiscrete.GetInstance(leafToDistnClassFunction, _discreteDistribution, new int[] { 1, 1 }, tree.LeafCollection);

            Score score = _modelScorer.MaximizeLikelihood(messageInitializer);

            double percentNonMissing = (double)tree.CountOfNonMissingLeaves(realCaseIdToNonMissingValue) /
                                       (double)SpecialFunctions.Count(tree.LeafCollection);
            double equilibrium = score.OptimizationParameters[(int)DistributionDiscreteConditional.ParameterIndex.Equilibrium].Value;
            double lambda      = score.OptimizationParameters[(int)DistributionDiscreteConditional.ParameterIndex.Lambda].Value;

            Dictionary <string, BooleanStatistics> randomCaseIdToNonMissingValue = tree.EvolveBinaryTree(equilibrium, lambda, 1 - percentNonMissing, ref random);

            Dictionary <string, SufficientStatistics> converted;

            SpecialFunctions.ConvertDictionaryToBaseClasses(randomCaseIdToNonMissingValue, out converted);

            return(converted);
        }
Exemplo n.º 12
0
        /// <summary>
        /// Does the work.
        /// </summary>
        public override void DoWork()
        {
            // get our input data and null the field to make sure we don't serialize it back
            InputData inputData = mInputData;

            mInputData = null;

            // get the job-specific names of input files
            FileDefCollection fileDefs             = Job.FileDefs;
            string            treeFileName         = Utility.GetNamedFileDef(fileDefs, Constants.TreeFileDefName).LocalName;
            string            predictorFileName    = Utility.GetNamedFileDef(fileDefs, Constants.PredictorFileDefName).LocalName;
            string            targetFileName       = Utility.GetNamedFileDef(fileDefs, Constants.TargetFileDefName).LocalName;
            string            skipRowIndexFileName = Utility.GetNamedFileDef(fileDefs, Constants.SkipRowIndexFileDefName).LocalName;

            // construct RangeCollections
            RangeCollection pieceIndexRangeCollection = RangeCollection.Parse(inputData.PieceIndexRange);
            RangeCollection nullIndexRangeCollection  = RangeCollection.Parse(inputData.NullIndexRange);
            RangeCollection skipRowIndexRangeCollection;
            FileInfo        fileInfo = new FileInfo(skipRowIndexFileName);

            if (fileInfo.Length > 0)
            {
                skipRowIndexRangeCollection = RangeCollection.Parse(File.ReadAllText(skipRowIndexFileName));
            }
            else
            {
                skipRowIndexRangeCollection = null;
            }

            // do the rest
            PhyloTree   aPhyloTree  = PhyloTree.GetInstance(treeFileName, null);
            ModelScorer modelScorer =
                ModelScorer.GetInstance(aPhyloTree, inputData.LeafDistributionName, inputData.OptimizerName);
            ModelEvaluator modelEvaluator = ModelEvaluator.GetInstance(inputData.LeafDistributionName, modelScorer);
            KeepTest <Dictionary <string, string> > keepTest =
                KeepTest <Dictionary <string, string> > .GetInstance(null, inputData.KeepTestName);

            PhyloDDriver driver = PhyloDDriver.GetInstance();

            // create a name for the temporary job sandbox.  This directory gets created by driver.Run(...)
            string agentOutputDirectoryName =
                Path.Combine(Environment.CurrentDirectory, String.Format(CultureInfo.InvariantCulture, "{0}.{1}", Job.JobId, Task.TaskId));

            // save the standard out and standard error in memory streams
            using (MemoryStream streamOut = new MemoryStream(), streamError = new MemoryStream())
            {
                try
                {
                    // redirect the outputs
                    using (
                        StreamWriter writerOut = new StreamWriter(streamOut),
                        writerError = new StreamWriter(streamError))
                    {
                        Console.SetOut(writerOut);
                        Console.SetError(writerError);

                        try
                        {
                            // run the model
                            string outputFileName = driver.Run(
                                modelEvaluator,
                                predictorFileName, targetFileName,
                                inputData.LeafDistributionName, inputData.NullDataGeneratorName,
                                keepTest, skipRowIndexRangeCollection,
                                inputData.NiceName,
                                agentOutputDirectoryName,
                                pieceIndexRangeCollection, inputData.PieceCount,
                                nullIndexRangeCollection,
                                inputData.OptimizerName);

                            // this is the expected output file name -- save this so it can be written on the master side with the same name.
                            mOutputFileName = Path.GetFileName(outputFileName);


                            mLocalOutputFileName = Path.Combine(inputData.LocalOutputDirectoryName, mOutputFileName);

                            // get the output data
                            string fullOutputPath = Path.Combine(agentOutputDirectoryName, mOutputFileName);
                            if (!File.Exists(fullOutputPath))
                            {
                                TaskResult.FailureReason  = TaskFailureReason.MissingOutput;
                                TaskResult.FailureMessage = String.Format(CultureInfo.CurrentCulture, "Cannot find output file '{0}'", targetFileName);
                                TaskResult.Status         = TaskAssignmentStatus.Failed;
                            }
                            using (StreamReader outputData = new StreamReader(fullOutputPath))
                            {
                                mOutputData = outputData.ReadToEnd();
                            }
                        }
                        finally
                        {
                            // this finally is to make sure we delete the folder
                            // get rid of the sandbox
                            Directory.Delete(agentOutputDirectoryName, true);
                        }
                    }
                }
                finally
                {
                    // this finally is to make sure we get console output
                    Encoding encoding = Encoding.Default;
                    TaskResult.StandardOutput = encoding.GetString(streamOut.GetBuffer());
                    TaskResult.StandardError  = encoding.GetString(streamError.GetBuffer());
                }
            }
        }
        public void Run(
            ModelScorer modelScorer,
            PhyloTree phyloTree,
            string predictorSparseFileName,
            string targetSparseFileName,
            string leafDistributionName,
            string nullDataGeneratorName,
            KeepTest <Dictionary <string, string> > keepTest,
            RangeCollection skipRowIndexRangeCollectionOrNull,
            string shortName,
            string outputDirectoryName,
            RangeCollection pieceIndexRangeCollection, int pieceCount,
            RangeCollection nullIndexRangeCollection,
            string optimizerName)
        {
            Stopwatch stopwatch = new Stopwatch();

            stopwatch.Start();
            Directory.CreateDirectory(outputDirectoryName);


            string outputFileName = string.Format(@"{0}\{1}.{2}.{3}.{4}.{5}.{6}{7}.txt",
                                                  outputDirectoryName, shortName,
                                                  leafDistributionName, nullDataGeneratorName,
                                                  nullIndexRangeCollection,
                                                  pieceCount,
                                                  pieceIndexRangeCollection,
                                                  skipRowIndexRangeCollectionOrNull == null ? "" : ".Skip" + skipRowIndexRangeCollectionOrNull.Count().ToString()
                                                  );

            #region from PhyloTree refactor
            //Dictionary<string, Dictionary<string, bool>> predictorVariableToCaseIdToRealNonMissingValue = LoadSparseFileInMemory<bool>(predictorSparseFileName);
            //IEnumerable<Pair<string, Dictionary<string, T>>> targetNameAndCaseIdToNonMissingValueEnumeration = LoadSparseFileEnumeration<T>(targetSparseFileName);

            //NullDataCollection nullDataGenerator =
            //    NullDataCollection.GetInstance(this, modelTester, nullIndexRangeCollection, predictorVariableToCaseIdToRealNonMissingValue);

            //UniversalWorkList<T> workList = UniversalWorkList<T>.GetInstance(
            //    predictorVariableToCaseIdToRealNonMissingValue,
            //    targetNameAndCaseIdToNonMissingValueEnumeration,
            //    nullDataGenerator, nullIndexRangeCollection, keepTest);
            #endregion
            bool speedOverMemory = true;

            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > >
            predictorNameAndCaseIdToNonMissingValueEnumeration = CreateNameAndCaseIdToNonMissingValueEnumeration(predictorSparseFileName, speedOverMemory);
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > >
            targetNameAndCaseIdToNonMissingValueEnumeration = CreateNameAndCaseIdToNonMissingValueEnumeration(targetSparseFileName, speedOverMemory);

            NullDataCollection nullDataGenerator =
                CreateNullDataGenerator(nullDataGeneratorName, modelScorer, phyloTree, nullIndexRangeCollection,
                                        predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration);

            UniversalWorkList workList = UniversalWorkList.GetInstance(
                predictorNameAndCaseIdToNonMissingValueEnumeration,
                targetNameAndCaseIdToNonMissingValueEnumeration,
                nullDataGenerator, nullIndexRangeCollection, keepTest);

            int workListCount = SpecialFunctions.Count(workList.List());

            int effectiveWorkListCount;
            if (skipRowIndexRangeCollectionOrNull == null)
            {
                effectiveWorkListCount = workListCount;
            }
            else
            {
                effectiveWorkListCount = 0;
                for (int iRowIndex = 0; iRowIndex < workListCount; iRowIndex++)
                {
                    if (!skipRowIndexRangeCollectionOrNull.Contains(iRowIndex))
                    {
                        effectiveWorkListCount++;
                    }
                }
            }
            Console.WriteLine("{0} Total rows. Skipping {1} of them.", workListCount, workListCount - effectiveWorkListCount);

            using (TextWriter textWriter = File.CreateText(outputFileName))
            {
                textWriter.WriteLine(Header);
                int rowIndex          = -1;
                int effectiveRowIndex = -1;

                foreach (RowData rowAndTargetData in workList.List())
                {
                    //!!!make all these parameters and the calculation a class
                    ++rowIndex;
                    Debug.Assert(rowIndex < workListCount); // real assert

                    if (skipRowIndexRangeCollectionOrNull == null || !skipRowIndexRangeCollectionOrNull.Contains(rowIndex))
                    {
                        ++effectiveRowIndex;

                        int workIndex = ExtractWorkIndex(effectiveRowIndex, pieceCount, effectiveWorkListCount);

                        if (pieceIndexRangeCollection.Contains(workIndex))
                        {
                            Debug.WriteLine("WorkItemIndex " + rowIndex.ToString());
                            string reportLine;
                            try
                            {
                                reportLine =
                                    CreateReportLine(modelScorer, phyloTree, rowAndTargetData, workList, rowIndex, workListCount, workIndex);
                            }
                            catch (OutOfMemoryException)
                            {
                                Console.WriteLine("OUT OF MEMORY!! Clearing cache and trying to recover where we left off.");
                                modelScorer.ClearCache();
                                reportLine =
                                    CreateReportLine(modelScorer, phyloTree, rowAndTargetData, workList, rowIndex, workListCount, workIndex);
                            }

                            textWriter.WriteLine(reportLine);
                            textWriter.Flush();
                        }
                    }
                }
            }
            stopwatch.Stop();
            Console.WriteLine("Running time: " + stopwatch.Elapsed);
        }
Exemplo n.º 14
0
        //protected override NullDataCollection CreateNullDataGenerator(ModelScorer modelScorer, PhyloTree phyloTree, RangeCollection nullIndexRangeCollection, Dictionary<string, Dictionary<string, BooleanStatistics>> predictorVariableToCaseIdToRealNonMissingValue)
        //{
        //    if (DateTime.Now.Date == new DateTime(2006, 6, 28).Date)  // for testing, force it to use the parametric bootstrap
        //    {
        //        return NullDataCollection.GetInstance(
        //            new NullDataGeneratorAlongTree(modelScorer, phyloTree, (ModelTesterDiscrete)this),
        //            nullIndexRangeCollection,
        //            predictorVariableToCaseIdToRealNonMissingValue);
        //    }


        //    return base.CreateNullDataGenerator(modelScorer, phyloTree, nullIndexRangeCollection, predictorVariableToCaseIdToRealNonMissingValue);
        //}

        //public override Converter<Leaf, SufficientStatistics> CreateTargetSufficientStatisticsMap(Dictionary<string, ISufficientStatistics> caseIdToNonMissingValue)
        //{
        //    return ISufficientStatistics.DictionaryToLeafMap(caseIdToNonMissingValue);
        //}

        //public override Converter<Leaf, SufficientStatistics> CreatePredictorSufficientStatisticsMap(Dictionary<string, BooleanStatistics> caseIdToNonMissingValue)
        //{
        //    return CreateTargetSufficientStatisticsMap(caseIdToNonMissingValue);
        //}

        protected override string CreateReportLine(
            ModelScorer modelScorer,
            PhyloTree phyloTree,
            RowData rowAndTargetData,
            UniversalWorkList workList,
            int rowIndex, int workListCount, int workIndex)
        {
            //!!!there is very similar code in ModelTesterGaussian.cs

            // we're iterating over each predictor (e.g. hla), each target (e.g. position in the sequence,
            // and each possible substring at that position).
            // Then we ask the question, Does the presence of predictor (e.g. hla)
            // influence the probability that target (e.g. mer in position n1pos) will show up?
            // nullIndex specifies whether this is the true data or randomized data.
            Dictionary <string, string> row = rowAndTargetData.Row;
            string predictorVariable        = row[PhyloTree.PredictorVariableColumnName]; // e.g. hla
            string targetVariable           = row[PhyloTree.TargetVariableColumnName];    // e.g. A@182 (amino acid "A" at position 182)
            int    nullIndex = int.Parse(row[PhyloTree.NullIndexColumnName]);

            //Dictionary<string, bool> caseIdToNonMissingPredictorValue = workList.NullIndexToPredictorToCaseIdToNonMissingValue[nullIndex][predictorVariable];
            Dictionary <string, SufficientStatistics> caseIdToNonMissingPredictorValue = rowAndTargetData.PredictorData; //workList.GetCaseIdToNonMissingValueForNullIndexAndPredictorVariable(nullIndex, predictorVariable);
            Dictionary <string, SufficientStatistics> caseIdToNonMissingTargetValue    = rowAndTargetData.TargetData;

            IEnumerator <SufficientStatistics> enumerator = caseIdToNonMissingPredictorValue.Values.GetEnumerator();

            enumerator.MoveNext();
            SufficientStatistics representative = enumerator.Current;
            bool predictorIsBoolean             = representative is BooleanStatistics;

            Converter <Leaf, SufficientStatistics> targetDistributionClassFunction    = CreateSufficientStatisticsMap(caseIdToNonMissingTargetValue);
            Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction = CreateSufficientStatisticsMap(caseIdToNonMissingPredictorValue);

            int[] predictorCounts = predictorIsBoolean ?
                                    phyloTree.CountsOfLeaves(predictorDistributionClassFunction, NullModelDistribution) : new int[2];
            int[] targetCounts = phyloTree.CountsOfLeaves(targetDistributionClassFunction, NullModelDistribution);


            int predictorFalseNameCount = predictorCounts[(int)DistributionDiscreteBinary.DistributionClass.False];
            int predictorTrueNameCount  = predictorCounts[(int)DistributionDiscreteBinary.DistributionClass.True];
            int targetFalseNameCount    = targetCounts[(int)DistributionDiscreteBinary.DistributionClass.False];
            int targetTrueNameCount     = targetCounts[(int)DistributionDiscreteBinary.DistributionClass.True];

            int[] fisherCounts = predictorIsBoolean ?
                                 phyloTree.FisherCounts(predictorDistributionClassFunction, targetDistributionClassFunction) : new int[4];

            int globalNonMissingCount = predictorIsBoolean ?
                                        fisherCounts[0] + fisherCounts[1] + fisherCounts[2] + fisherCounts[3] :
                                        phyloTree.GlobalNonMissingCount(predictorDistributionClassFunction, targetDistributionClassFunction);

            StringBuilder stringBuilder = new StringBuilder(
                SpecialFunctions.CreateTabString(this, rowIndex, workListCount, workIndex, nullIndex, predictorVariable,
                                                 predictorFalseNameCount,
                                                 predictorTrueNameCount,
                                                 predictorTrueNameCount + predictorFalseNameCount,
                                                 targetVariable,
                                                 targetFalseNameCount,
                                                 targetTrueNameCount,
                                                 targetTrueNameCount + targetFalseNameCount,
                                                 fisherCounts[0], fisherCounts[1], fisherCounts[2], fisherCounts[3],
                                                 globalNonMissingCount,
                                                 ""));

            bool ignoreRow = false;

            foreach (int[] counts in new int[][] { predictorIsBoolean?predictorCounts : new int[] { 1, 1 }, targetCounts })
            {
                foreach (int count in counts)
                {
                    if (count == 0)
                    {
                        ignoreRow = true;
                    }
                }
            }

            if (ignoreRow)
            {
                CompleteRowWithNaN(stringBuilder);
            }
            else
            {
                double targetMarginal    = (double)targetTrueNameCount / (double)(targetTrueNameCount + targetFalseNameCount);
                double predictorMarginal = (double)predictorTrueNameCount / (double)(predictorTrueNameCount + predictorFalseNameCount);

                double diff = ComputeLLR(modelScorer, phyloTree, stringBuilder, targetMarginal, predictorMarginal, predictorDistributionClassFunction, targetDistributionClassFunction);


                double pValue = SpecialFunctions.LogLikelihoodRatioTest(Math.Max(diff, 0), ChiSquareDegreesOfFreedom);

                stringBuilder.Append(SpecialFunctions.CreateTabString(diff, pValue));
            }

            return(stringBuilder.ToString());
        }
        public void ScoreTree(
            ModelScorer modelScorer,
            PhyloTree phyloTree,
            string predictorSparseFileName,
            string targetSparseFileName,
            string predictorVariableName,
            string targetVariableName,
            double[] nullModelArgs,
            double[] altModelArgs)
        {
            //Dictionary<string, Dictionary<string, SufficientStatistics>> predictorVariableToCaseIdToRealNonMissingValue = LoadSparseFileInMemory(predictorSparseFileName);
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration = LoadSparseFileEnumeration(predictorSparseFileName);
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration    = LoadSparseFileEnumeration(targetSparseFileName);

            RangeCollection    nullIndexRangeCollection = RangeCollection.GetInstance(-1, -1);
            NullDataCollection nullDataGenerator        =
                CreateNullDataGenerator("PredictorPermutation", modelScorer, phyloTree, nullIndexRangeCollection,
                                        predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration);

            UniversalWorkList workList = UniversalWorkList.GetInstance(
                predictorNameAndCaseIdToNonMissingValueEnumeration,
                targetNameAndCaseIdToNonMissingValueEnumeration,
                //targetNameAndCaseIdToNonMissingValueEnumeration,
                nullDataGenerator, nullIndexRangeCollection, AlwaysKeep <Dictionary <string, string> > .GetInstance());


            foreach (RowData rowAndTargetData in workList.List())
            {
                if (rowAndTargetData.Row[PhyloTree.PredictorVariableColumnName] == predictorVariableName &&
                    rowAndTargetData.Row[PhyloTree.TargetVariableColumnName] == targetVariableName)
                {
                    Dictionary <string, SufficientStatistics> caseIdToNonNullPredictorValue = rowAndTargetData.PredictorData;//workList.GetCaseIdToNonMissingValueForNullIndexAndPredictorVariable(-1, predictorVariableName);
                    Dictionary <string, SufficientStatistics> caseIdToNonMissingTargetValue = rowAndTargetData.TargetData;

                    Converter <Leaf, SufficientStatistics> targetDistributionMap = CreateSufficientStatisticsMap(caseIdToNonMissingTargetValue);
                    Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction = CreateSufficientStatisticsMap(caseIdToNonNullPredictorValue);
                    Converter <Leaf, SufficientStatistics> altDistributionMap = CreateAlternativeSufficientStatisticsMap(predictorDistributionClassFunction, targetDistributionMap);
                    double                    logLikelihood;
                    Score                     scoreIndTarget, scoreIndPredictor, scoreAlt;
                    MessageInitializer        messageInitializer;
                    OptimizationParameterList nullParams = NullModelDistribution.GetParameters(nullModelArgs);
                    OptimizationParameterList altParams  = AltModelDistribution.GetParameters(altModelArgs);

                    Console.WriteLine(SpecialFunctions.CreateTabString("Variable", nullParams.ToStringHeader(), "LogL"));
                    messageInitializer = modelScorer.CreateMessageInitializer(predictorDistributionClassFunction, targetDistributionMap, NullModelDistribution);
                    logLikelihood      = modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, nullParams);
                    scoreIndTarget     = Score.GetInstance(logLikelihood, nullParams);
                    Console.WriteLine("Target\t" + scoreIndTarget);

                    messageInitializer = modelScorer.CreateMessageInitializer(targetDistributionMap, predictorDistributionClassFunction, NullModelDistribution);
                    logLikelihood      = modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, nullParams);
                    modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, nullParams);
                    scoreIndPredictor = Score.GetInstance(logLikelihood, nullParams);
                    Console.WriteLine("Predictor\t" + scoreIndPredictor);

                    Console.WriteLine("\n" + SpecialFunctions.CreateTabString("Variable", altParams.ToStringHeader(), "LogL"));
                    messageInitializer = modelScorer.CreateMessageInitializer(null, altDistributionMap, AltModelDistribution);
                    logLikelihood      = modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, altParams);
                    scoreAlt           = Score.GetInstance(logLikelihood, altParams);
                    Console.WriteLine(SpecialFunctions.CreateTabString(AltModelDistribution, scoreAlt));
                }
            }
        }
 protected abstract string CreateReportLine(
     ModelScorer modelScorer,
     PhyloTree phyloTree,
     RowData rowAndTargetData,
     UniversalWorkList workList,
     int rowIndex, int workListCount, int workIndex);
Exemplo n.º 17
0
 /// <summary>
 /// Computes the Log Likelihood ratio. Called from CreateReportLineDiscrete. Currently, a pValue is computed from this diff, assuming
 /// the LLR has a difference of 1 DF. !!!May need to change this in future versions to allow us to specific DF.
 /// ComputeLLR must print the value of any parameters that are specific to it (it should not print the return value).
 /// </summary>
 protected abstract double ComputeLLR(ModelScorer modelScorer, PhyloTree phyloTree, StringBuilder stringBuilder, double targetMarginal, double predictorMarginal,
                                      Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction, Converter <Leaf, SufficientStatistics> targetDistributionClassFunction);
 public override IEnumerable <PositionInfo <IDiagramItem> > RelayoutGraphNodesPosition(Graph <IDiagramItem> graph)
 {
     Tree = MsaglGeometryGraphHelpers.CreatePhyloTrees(graph);
     LayoutCalculator.CalculateLayout(Tree);
     return(MsaglGeometryGraphHelpers.GetGetNodesPositionInfo(Tree));
 }
        protected override string CreateReportLine(
            ModelScorer modelScorer,
            PhyloTree phyloTree,
            RowData rowAndTargetData,
            UniversalWorkList workList,
            int rowIndex, int workListCount, int workIndex)
        {
            //!!!there is very similar code in ModelTesterDiscrete.cs

            Dictionary <string, string> row = rowAndTargetData.Row;
            string predictorVariable        = row[PhyloTree.PredictorVariableColumnName];
            string targetVariable           = row[PhyloTree.TargetVariableColumnName]; // e.g. A@182 (amino acid "A" at position 182)
            int    nullIndex = int.Parse(row[PhyloTree.NullIndexColumnName]);

            //Dictionary<string, bool> caseIdToNonNullPredictorValue = workList.NullIndexToPredictorToCaseIdToNonMissingValue[nullIndex][predictorVariable];
            Dictionary <string, SufficientStatistics> caseIdToNonNullPredictorValue = rowAndTargetData.PredictorData; //workList.GetCaseIdToNonMissingValueForNullIndexAndPredictorVariable(nullIndex, predictorVariable);
            Dictionary <string, SufficientStatistics> caseIdToNonMissingTargetValue = rowAndTargetData.TargetData;

            Converter <Leaf, SufficientStatistics> targetDistributionMap = CreateSufficientStatisticsMap(caseIdToNonMissingTargetValue);
            Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction = CreateSufficientStatisticsMap(caseIdToNonNullPredictorValue);

            int[] predictorCounts = phyloTree.CountsOfLeaves(predictorDistributionClassFunction);

            int predictorFalseNameCount = predictorCounts[(int)DistributionDiscreteBinary.DistributionClass.False];
            int predictorTrueNameCount  = predictorCounts[(int)DistributionDiscreteBinary.DistributionClass.True];
            int targetNonMissingCount   = phyloTree.CountOfNonMissingLeaves(caseIdToNonMissingTargetValue);
            int globalNonMissingCount   = phyloTree.GlobalNonMissingCount(predictorDistributionClassFunction, targetDistributionMap);

            StringBuilder stringBuilder = new StringBuilder(
                SpecialFunctions.CreateTabString(
                    this, rowIndex, workListCount, workIndex, nullIndex, predictorVariable,
                    predictorFalseNameCount,
                    predictorTrueNameCount,
                    predictorTrueNameCount + predictorFalseNameCount,
                    targetVariable,
                    targetNonMissingCount,
                    globalNonMissingCount,
                    ""));

            bool ignoreRow = false;

            foreach (int count in predictorCounts)
            {
                if (count == 0)
                {
                    ignoreRow = true;
                }
            }

            if (ignoreRow)
            {
                CompleteRowWithNaN(stringBuilder);
            }
            else
            {
                List <double>      logLikelihoodList  = new List <double>();
                MessageInitializer messageInitializer =
                    modelScorer.CreateMessageInitializer(predictorDistributionClassFunction, targetDistributionMap, NullModelDistribution);
                NullModelDistribution.InitialParamVals = null;
                foreach (bool useParameter in new bool[] { false, true })
                {
                    Score score = modelScorer.ScoreModel(messageInitializer, useParameter);
                    stringBuilder.Append(SpecialFunctions.CreateTabString(score, ""));
                    Debug.Write(SpecialFunctions.CreateTabString(score, ""));
                    logLikelihoodList.Add(score.Loglikelihood);
                    AltModelDistribution.InitialParamVals = score.OptimizationParameters;
                }

                double diff   = logLikelihoodList[1] - logLikelihoodList[0];
                double pValue = SpecialFunctions.LogLikelihoodRatioTest(Math.Max(diff, 0), ChiSquareDegreesOfFreedom);

                stringBuilder.Append(SpecialFunctions.CreateTabString(diff, pValue));
                Debug.WriteLine(SpecialFunctions.CreateTabString(diff, pValue));
            }
            return(stringBuilder.ToString());
        }
        internal static GeometryGraph CreateAndLayoutGraph()
        {
            PhyloTree phyloTree = new PhyloTree();
            double    width     = 40;
            double    height    = 10;

            foreach (string id in "A B C D E F G".Split(' '))
            {
                DrawingUtilsForSamples.AddNode(id, phyloTree, width, height);
            }

            PhyloEdge e;
            double    age_of_BC = 2;
            double    age_of_D  = 3.5;
            double    age_of_F  = 1.5;
            double    age_of_G  = 3.5;
            double    age_of_E  = 2;

            phyloTree.Edges.Add(e = new PhyloEdge(phyloTree.FindNodeByUserData("A"), phyloTree.FindNodeByUserData("B")));
            e.Length = age_of_BC;
            phyloTree.Edges.Add(e = new PhyloEdge(phyloTree.FindNodeByUserData("A"), phyloTree.FindNodeByUserData("C")));
            e.Length = age_of_BC;
            phyloTree.Edges.Add(e = new PhyloEdge(phyloTree.FindNodeByUserData("A"), phyloTree.FindNodeByUserData("D")));
            e.Length = age_of_D;
            phyloTree.Edges.Add(e = new PhyloEdge(phyloTree.FindNodeByUserData("C"), phyloTree.FindNodeByUserData("E")));
            e.Length = age_of_E;
            phyloTree.Edges.Add(e = new PhyloEdge(phyloTree.FindNodeByUserData("C"), phyloTree.FindNodeByUserData("F")));
            e.Length = age_of_F;
            phyloTree.Edges.Add(e = new PhyloEdge(phyloTree.FindNodeByUserData("C"), phyloTree.FindNodeByUserData("G")));
            e.Length = age_of_G;
            var sugiyamaLayoutSettings = new SugiyamaLayoutSettings();

            foreach (var edge in phyloTree.Edges)
            {
                edge.EdgeGeometry.TargetArrowhead = new Arrowhead();
            }
            Microsoft.Msagl.Miscellaneous.LayoutHelpers.CalculateLayout(phyloTree, new SugiyamaLayoutSettings(), null);

            // add a couple of  non-tree edges
            Edge e0 = new Edge(phyloTree.FindNodeByUserData("F"), phyloTree.FindNodeByUserData("D"))
            {
                EdgeGeometry = { SourceArrowhead = new Arrowhead() }
            };

            phyloTree.Edges.Add(e0);
            Edge e1 = new Edge(phyloTree.FindNodeByUserData("G"), phyloTree.FindNodeByUserData("D"))
            {
                EdgeGeometry = { SourceArrowhead = new Arrowhead() }
            };

            phyloTree.Edges.Add(e1);

            // route the non-tree edges, every other edge is routed already
            double loosePadding = sugiyamaLayoutSettings.NodeSeparation / 10;
            double tightPadding = sugiyamaLayoutSettings.NodeSeparation / 10;
            double coneAngle    = Math.PI / 6;
            var    router       = new SplineRouter(phyloTree, new[] { e0, e1 }, tightPadding, loosePadding, coneAngle, null);

            router.Run();
            return(phyloTree);
        }
Exemplo n.º 21
0
        static void Main(string[] args)
        {
            try
            {
                ArgCollection argCollection = ArgCollection.GetInstance(args);

                if (argCollection.ExtractOptionalFlag("help"))
                {
                    Console.WriteLine("");
                    Console.WriteLine(UsageMessage);
                    Console.WriteLine(HelpMessage);
                    return;
                }

                string optimizerName = argCollection.ExtractOptional <string>("optimizer", "BrentThenGrid");
                string keepTestName  = argCollection.ExtractOptional <string>("keepTest", "AlwaysKeep");
                string skipRowIndexFileNameOrNull = argCollection.ExtractOptional <string>("skipRowIndexFile", null);

                argCollection.CheckNoMoreOptions();

                string          treeFileName          = argCollection.ExtractNext <string>("treeFile");
                string          predictorFileName     = argCollection.ExtractNext <string>("predictorFile");
                string          targetFileName        = argCollection.ExtractNext <string>("targetFile");
                string          leafDistributionName  = argCollection.ExtractNext <string>("leafDistribution");
                string          nullDataGeneratorName = argCollection.ExtractNext <string>("nullDataGenerator");
                string          niceName                  = argCollection.ExtractNext <string>("niceName");
                string          outputDirectory           = argCollection.ExtractNext <string>("outputDirectory");
                RangeCollection pieceIndexRangeCollection = argCollection.ExtractNext <RangeCollection>("pieceIndexRange");
                int             pieceCount                = argCollection.ExtractNext <int>("pieceCount");
                RangeCollection nullIndexRangeCollection  = argCollection.ExtractNext <RangeCollection>("nullIndexRange");

                argCollection.CheckThatEmpty();

                if (!PhyloDDriver.ValidateDistribution(leafDistributionName))
                {
                    Console.WriteLine("{0} is not a recognized distribution name. Please choose a name from the following list:", leafDistributionName);
                    foreach (string name in PhyloDDriver.GetDistributionNames())
                    {
                        Console.WriteLine("\t{0}", name);
                    }
                    throw new ArgumentException("Invalid distribution name.");
                }
                RangeCollection skipRowIndexRangeCollectionOrNull = (null == skipRowIndexFileNameOrNull) || skipRowIndexFileNameOrNull == "null" ? null : RangeCollection.Parse(File.ReadAllText(skipRowIndexFileNameOrNull));
                KeepTest <Dictionary <string, string> > keepTest  = KeepTest <Dictionary <string, string> > .GetInstance(null, keepTestName);

                SpecialFunctions.CheckCondition(pieceIndexRangeCollection.IsBetween(0, pieceCount - 1), "pieceIndex must be at least 0 and less than pieceCount");
                SpecialFunctions.CheckCondition(nullIndexRangeCollection.IsBetween(-1, int.MaxValue), "nullIndex must be at least -1");

                PhyloTree aPhyloTree = PhyloTree.GetInstance(treeFileName, null);

                ModelScorer    modelScorer    = ModelScorer.GetInstance(aPhyloTree, leafDistributionName, optimizerName);
                ModelEvaluator modelEvaluator = ModelEvaluator.GetInstance(leafDistributionName, modelScorer);
                PhyloDDriver   driver         = PhyloDDriver.GetInstance();

                driver.Run(
                    modelEvaluator,
                    predictorFileName, targetFileName,
                    leafDistributionName, nullDataGeneratorName,
                    keepTest, skipRowIndexRangeCollectionOrNull,
                    niceName,
                    outputDirectory,
                    pieceIndexRangeCollection, pieceCount,
                    nullIndexRangeCollection,
                    optimizerName);

                //Console.Write("Press enter to exist.");
                //Console.Read();
            }
            catch (Exception exception)
            {
                Console.WriteLine("");
                Console.WriteLine(exception.Message);
                if (exception.InnerException != null)
                {
                    Console.WriteLine(exception.InnerException.Message);
                }

                Console.WriteLine("");
                Console.WriteLine(UsageMessage);
                throw;
            }
        }
Exemplo n.º 22
0
 public ModelScorerDiscrete(PhyloTree tree, GridSearch optimizer) : base(tree, optimizer)
 {
 }
Exemplo n.º 23
0
 protected override double ComputeLLR(ModelScorer modelScorer, PhyloTree phyloTree, StringBuilder stringBuilder, double targetMarginal, double predictorMarginal, Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction, Converter <Leaf, SufficientStatistics> targetDistributionClassFunction)
 {
     throw new Exception("The method or operation is not implemented.");
 }
Exemplo n.º 24
0
 protected ModelScorer(PhyloTree tree, GridSearch optimizer)
 {
     PhyloTree  = tree;
     GridSearch = optimizer;
 }
Exemplo n.º 25
0
 public override double ComputeLogLikelihoodModelGivenData(MessageInitializer messageInitializer, OptimizationParameterList paramList, bool useLogMethod)
 {
     return(PhyloTree.ComputeLogLikelihoodModelGivenDataGaussian(messageInitializer, paramList));
 }
Exemplo n.º 26
0
 public ModelScorerGaussian(PhyloTree tree, GridSearch optimizer) : base(tree, optimizer)
 {
 }