コード例 #1
0
        /// <summary>
        /// computes the connected components of this graph (only components with >= 2 elements are returned)
        /// </summary>
        /// <returns>the connected components of this graph (only components with >= 2 elements are returned)
        /// in a Dictionary. The key corresponds to the node.Name and the Value is the set, in which the corresponding node is in.
        /// </returns>
        public Dictionary <string, Set <GraphNode> > GetConnectedComponentsInDictionary()
        {
            IEnumerable <string> fromLabels = (from edge in Edges
                                               select edge.From).Distinct();
            List <string> labels = (from edge in Edges
                                    select edge.To).Distinct().ToList();

            labels.AddRange(fromLabels);
            IEnumerable <GraphNode> connectedNodes = from node in Nodes
                                                     where labels.Contains(node.Name)
                                                     select node;

            Dictionary <string, Set <GraphNode> > connectedComponents = new Dictionary <string, Set <GraphNode> >();

            // at the beginning each node is in its own component
            foreach (GraphNode node in connectedNodes)
            {
                connectedComponents.Add(node.Name, new Set <GraphNode>(node));
            }
            Set <GraphNode> set1     = null;
            Set <GraphNode> set2     = null;
            Set <GraphNode> setToAdd = null;
            Set <GraphNode> set      = null;

            // for each edge we join the components if the corresponding nodes are not already in the same component
            foreach (DirectedGraphEdge edge in Edges)
            {
                if (!Object.ReferenceEquals(connectedComponents[edge.From], connectedComponents[edge.To]))
                {
                    set1 = connectedComponents[edge.From];
                    set2 = connectedComponents[edge.To];
                    // add smaller set to bigger set
                    if (set1.Count > set2.Count)
                    {
                        set      = set1;
                        setToAdd = set2;
                    }
                    else
                    {
                        set      = set2;
                        setToAdd = set1;
                    }
                    set.AddNewRange(setToAdd);
                    // update references in added set
                    foreach (GraphNode tempNode in setToAdd)
                    {
                        connectedComponents[tempNode.Name] = set;
                    }
                }
            }

            return(connectedComponents);
        }
コード例 #2
0
        //static private Dictionary<Pair<NEC, Hla>, bool> CloseHuman = null;

        //GeneratorType.Hla | GeneratorType.Position | GeneratorType.Property | GeneratorType.AndHla | GeneratorType.Zero6Supertype | GeneratorType.AndZero6Supertype
        private static Set <IHashableFeature> GenerateFeatureSet(
            object entity, string supertypeTableSource,
            int?flankSizeOrNull,
            bool includeFlankNECFeatures,
            bool includeChemicalProperties, bool includeAAFeatures,
            bool addEiFeatures
            )
        {
            bool includeAndHlaAndSTWithEpitopeAdjFeatures = false;
            bool subtractSupertypeFeatures         = false;
            bool subtractHlaFeatures               = false;
            bool substractChemAACrissCrossFeatures = false;


            SpecialFunctions.CheckCondition(!includeAndHlaAndSTWithEpitopeAdjFeatures || includeFlankNECFeatures);

            Pair <NEC, Hla> necAndHlaX = (Pair <NEC, Hla>)entity;
            NEC             nec        = (null == flankSizeOrNull) ? necAndHlaX.First : NEC.GetInstance(necAndHlaX.First, (int)flankSizeOrNull);
            Hla             hla        = necAndHlaX.Second;

            Debug.Assert(nec.N.Length == nec.C.Length); // real assert
            Pair <NEC, Hla> necAndHla = new Pair <NEC, Hla>(nec, hla);

            Set <IHashableFeature> hlaishFeatureSet = new Set <IHashableFeature>();

            CreateAndAddHlaFeature(subtractHlaFeatures, hla, necAndHla, ref hlaishFeatureSet);
            CreateAndAddFeatureSupertype(supertypeTableSource, subtractSupertypeFeatures, hla, necAndHla, ref hlaishFeatureSet, Assembly.GetExecutingAssembly(), Predictor.ResourceString);

            Set <IHashableFeature> featureSet = Set <IHashableFeature> .GetInstance(hlaishFeatureSet);

            if (addEiFeatures)
            {
                AddEiFeatures(includeChemicalProperties, includeAAFeatures, substractChemAACrissCrossFeatures, nec, necAndHla, hlaishFeatureSet, featureSet);
            }



            if (includeFlankNECFeatures)
            {
                List <IHashableFeature> aaInNFlankFeatureList = new List <IHashableFeature>(In.GetAASeqInRegionInstance(1, necAndHla, NFlank.GetInstance()));
                DebugCheckThatEvaluatesToTrue(necAndHla, aaInNFlankFeatureList);



                if (includeAAFeatures)
                {
                    featureSet.AddNewRange(aaInNFlankFeatureList);                                                    //AA in N flank
                    featureSet.AddNewRange(In.GetAASeqInRegionInstance(2, necAndHla, NFlank.GetInstance()));          //AA1-AA2 in Nflank
                    featureSet.AddNewRange(SubSeq.GetInSubSeqEnumeration(NFlank.GetInstance(), false, 1, necAndHla)); //AA@x in N flank (numbering is 5 4 3 2 1)
                    featureSet.AddNewRange(SubSeq.GetInSubSeqEnumeration(NFlank.GetInstance(), false, 2, necAndHla)); //AA1-AA2@x in Nflank (x is position of AA2, i.e., the smaller number)


                    featureSet.AddNewRange(In.GetAASeqInRegionInstance(1, necAndHla, CFlank.GetInstance()));         //AA in Cflank
                    featureSet.AddNewRange(In.GetAASeqInRegionInstance(2, necAndHla, CFlank.GetInstance()));         //AA1-AA2 in Cflank
                    featureSet.AddNewRange(SubSeq.GetInSubSeqEnumeration(CFlank.GetInstance(), true, 1, necAndHla)); //AA@x in C flank (numbering is 1 2 3 4 5)
                    featureSet.AddNewRange(SubSeq.GetInSubSeqEnumeration(CFlank.GetInstance(), true, 2, necAndHla)); //AA1-AA2@x in Cflank (x is position of AA1, i.e., the smaller number)
                }

                if (includeChemicalProperties)
                {
                    featureSet.AddNewOrOldRange(InProperty.GetPropertySeqInRegionInstance(1, necAndHla, NFlank.GetInstance()));
                    featureSet.AddNewOrOldRange(SubSeq.GetInPropertySubSeqEnumeration(NFlank.GetInstance(), false, 1, necAndHla));
                    featureSet.AddNewOrOldRange(InProperty.GetPropertySeqInRegionInstance(1, necAndHla, CFlank.GetInstance()));
                    featureSet.AddNewOrOldRange(SubSeq.GetInPropertySubSeqEnumeration(CFlank.GetInstance(), true, 1, necAndHla));
                    featureSet.AddNewOrOldRange(InProperty.GetPropertySeqInRegionInstance(2, necAndHla, NFlank.GetInstance()));
                    featureSet.AddNewOrOldRange(SubSeq.GetInPropertySubSeqEnumeration(NFlank.GetInstance(), false, 2, necAndHla));
                    featureSet.AddNewOrOldRange(InProperty.GetPropertySeqInRegionInstance(2, necAndHla, CFlank.GetInstance()));
                    featureSet.AddNewOrOldRange(SubSeq.GetInPropertySubSeqEnumeration(CFlank.GetInstance(), true, 2, necAndHla));
                }
            }
            if (includeFlankNECFeatures)
            {
                if (includeAAFeatures)
                {
                    //EV in Epitope
                    AddFeatureWithOptionalAndHlaAndST(In.GetAASeqInRegionInstance(2, necAndHla, Epitope.GetInstance()), includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, false, ref featureSet);//AA1-AA2 in Epitope

                    //RR in Epitope[@1-2]
                    AddFeatureWithOptionalAndHlaAndST(SubSeq.GetInSubSeqEnumeration(Epitope.GetInstance(), true, 2, necAndHla), includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, false, ref featureSet);//AA1-AA2@x in Epitope (x is position of AA1, i.e., the smaller number)
                }
                if (includeChemicalProperties)
                {
                    //polar,cyclic in Epitope
                    AddFeatureWithOptionalAndHlaAndST(InProperty.GetPropertySeqInRegionInstance(2, necAndHla, Epitope.GetInstance()), includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, false, ref featureSet);
                    //polar,large in Epitope[@8-9]
                    AddFeatureWithOptionalAndHlaAndST(SubSeq.GetInPropertySubSeqEnumeration(Epitope.GetInstance(), true, 2, necAndHla), includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, false, ref featureSet);
                }

                //AA1-AA2 in Nflank,Epitope, etc
                if (null != flankSizeOrNull && (int)flankSizeOrNull > 0)
                {
                    string epitope = (string)Epitope.GetInstance().Evaluate(entity);

                    SubSeq lastNAAFeature = SubSeq.GetInstance(1, 1, false, NFlank.GetInstance());
                    string lastNAA        = (string)lastNAAFeature.Evaluate(entity);
                    In     inLastNAA      = In.GetInstance(lastNAA, lastNAAFeature);

                    SubSeq firstEAAFeature = SubSeq.GetInstance(1, 1, true, Epitope.GetInstance());
                    string firstEAA        = (string)firstEAAFeature.Evaluate(entity);
                    Debug.Assert(firstEAA == epitope.Substring(0, 1));// real assert
                    In inFirstEAA = In.GetInstance(firstEAA, firstEAAFeature);

                    SubSeq lastEAAFeature = SubSeq.GetInstance(epitope.Length, epitope.Length, true, Epitope.GetInstance());
                    string lastEAA        = (string)lastEAAFeature.Evaluate(entity);
                    In     inLastEAA      = In.GetInstance(lastEAA, lastEAAFeature);

                    SubSeq firstCAAFeature = SubSeq.GetInstance(1, 1, true, CFlank.GetInstance());
                    string firstCAA        = (string)firstCAAFeature.Evaluate(entity);
                    In     inFirstCAA      = In.GetInstance(firstCAA, firstCAAFeature);

                    if (includeAAFeatures)
                    {
                        And andLastNNAAFirstEAA = And.GetInstance(inLastNAA, inFirstEAA);
                        AddFeatureWithOptionalAndHlaAndST(andLastNNAAFirstEAA, includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, /*checkThatNew*/ true, ref featureSet);

                        And andLastEAAFirstCAA = And.GetInstance(inLastEAA, inFirstCAA);
                        AddFeatureWithOptionalAndHlaAndST(andLastEAAFirstCAA, includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, /*checkThatNew*/ true, ref featureSet);
                    }

                    if (includeChemicalProperties)
                    {
                        foreach (string lastNProperty in KmerProperties.AaToPropList[Biology.GetInstance().OneLetterAminoAcidAbbrevTo3Letter[lastNAA[0]]])
                        {
                            InProperty inLastNProperty = InProperty.GetInstance(lastNProperty, lastNAAFeature);

                            foreach (string firstEProperty in KmerProperties.AaToPropList[Biology.GetInstance().OneLetterAminoAcidAbbrevTo3Letter[firstEAA[0]]])
                            {
                                InProperty inFirstEProperty = InProperty.GetInstance(firstEProperty, firstEAAFeature); //!!!get this out of the loop?
                                And        andLastNPropertyFirstEProperty = And.GetInstance(inLastNProperty, inFirstEProperty);
                                AddFeatureWithOptionalAndHlaAndST(andLastNPropertyFirstEProperty, includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, /*checkThatNew*/ false, ref featureSet);
                                Debug.Assert((bool)andLastNPropertyFirstEProperty.Evaluate(necAndHla));
                            }
                        }
                        foreach (string lastEProperty in KmerProperties.AaToPropList[Biology.GetInstance().OneLetterAminoAcidAbbrevTo3Letter[lastEAA[0]]])
                        {
                            InProperty inlastEProperty = InProperty.GetInstance(lastEProperty, lastEAAFeature);

                            foreach (string firstCProperty in KmerProperties.AaToPropList[Biology.GetInstance().OneLetterAminoAcidAbbrevTo3Letter[firstCAA[0]]])
                            {
                                InProperty infirstCProperty = InProperty.GetInstance(firstCProperty, firstCAAFeature); //!!!get this out of the loop?
                                And        andlastEPropertyfirstCProperty = And.GetInstance(inlastEProperty, infirstCProperty);
                                AddFeatureWithOptionalAndHlaAndST(andlastEPropertyfirstCProperty, includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, /*checkThatNew*/ false, ref featureSet);
                                Debug.Assert((bool)andlastEPropertyfirstCProperty.Evaluate(necAndHla));
                            }
                        }
                    }
                }
            }

            return(featureSet);
        }
        public override EvaluationResults EvaluateModelOnData(Converter <Leaf, SufficientStatistics> v1, Converter <Leaf, SufficientStatistics> v2)
        {
            List <Leaf> nonMissingLeaves = new List <Leaf>(100);
            int         seed             = 0;

            foreach (Leaf leaf in ModelScorer.PhyloTree.LeafCollection)
            {
                SufficientStatistics class1 = v1(leaf);
                SufficientStatistics class2 = v2(leaf);
                if (!class1.IsMissing() && !class2.IsMissing())
                {
                    nonMissingLeaves.Add(leaf);
                    seed ^= (leaf.CaseName + class1.ToString() + class2.ToString()).GetHashCode();
                }
            }

            Random rand = new Random(seed);

            nonMissingLeaves = SpecialFunctions.Shuffle(nonMissingLeaves, ref rand);

            int groupSize = nonMissingLeaves.Count / _crossValidateCount;

            EvaluationResultsCrossValidate combinedResults = null;
            double testAltLLSum  = 0;   // for debugging
            double testNullLLSum = 0;   // for debugging

            for (int i = 0; i < _crossValidateCount; i++)
            {
                int        testStart  = i * groupSize;
                int        trainStart = testStart + groupSize;
                Set <Leaf> trainSet   = new Set <Leaf>(SpecialFunctions.SubList(nonMissingLeaves, trainStart, nonMissingLeaves.Count - trainStart));
                trainSet.AddNewRange(SpecialFunctions.SubList(nonMissingLeaves, 0, testStart));

                Converter <Leaf, SufficientStatistics> v1Train = CreateFilteredMap(v1, trainSet);
                Converter <Leaf, SufficientStatistics> v2Train = CreateFilteredMap(v2, trainSet);

                EvaluationResults trainingResults    = InternalEvaluator.EvaluateModelOnData(v1Train, v2Train);
                EvaluationResults testAndTrainResult = InternalEvaluator.EvaluateModelOnDataGivenParams(v1, v2, trainingResults);
                EvaluationResultsTestGivenTrain testGivenTrainResult = EvaluationResultsTestGivenTrain.GetInstance(this, trainingResults, testAndTrainResult);

                if (combinedResults == null)
                {
                    combinedResults = EvaluationResultsCrossValidate.GetInstance(this, testGivenTrainResult);
                }
                else
                {
                    combinedResults = combinedResults.AddNewResults(testGivenTrainResult);
                }

                if (double.IsInfinity(combinedResults.AltLL))   // no point in continuing...infinity will kill everything.
                {
                    break;
                }
#if DEBUG
                double            eps = 1E-10;
                EvaluationResults testTrainingResults = InternalEvaluator.EvaluateModelOnDataGivenParams(v1Train, v2Train, trainingResults);
                Debug.Assert(ComplexNumber.ApproxEqual(testTrainingResults.AltLL, trainingResults.AltLL, eps) &&
                             ComplexNumber.ApproxEqual(testTrainingResults.NullLL, trainingResults.NullLL, eps));
                //Debug.Assert(testTrainingResults.Equals(trainingResults));

                double newNullLL = testAndTrainResult.NullLL - trainingResults.NullLL;
                double newAltLL  = testAndTrainResult.AltLL - trainingResults.AltLL;

                Debug.Assert(ComplexNumber.ApproxEqual(newNullLL, testGivenTrainResult.NullLL, eps));
                Debug.Assert(ComplexNumber.ApproxEqual(newAltLL, testGivenTrainResult.AltLL, eps));

                testNullLLSum += newNullLL;
                testAltLLSum  += newAltLL;

                Debug.Assert(ComplexNumber.ApproxEqual(testNullLLSum, combinedResults.NullLL, eps), "Combined result has wrong NullLL");
                Debug.Assert(ComplexNumber.ApproxEqual(testAltLLSum, combinedResults.AltLL, eps), "Combined result has wrong AltLL");
#endif
            }
            return(combinedResults);
        }
コード例 #4
0
        //Similar to the other tabulators, but can work with multiple sets of pValues files
        //!!!would be better if could cut off really bad pValues to save memory
        //!!! also would be nice to have filters
        public static void CreateTabulateReport(ICollection <string> inputFilePatternCollection, string outputFileName,
                                                KeepTest <Dictionary <string, string> > keepTest, double maxPValue, bool auditRowIndexValues)
        {
            //SpecialFunctions.CheckCondition(!File.Exists(outputFileName), "Output file already exists: " + outputFileName);
            using (TextWriter textWriter = File.CreateText(outputFileName)) // Do this early so that if it fails, well know
            {
                List <Dictionary <string, string> > realRowCollectionToSort = new List <Dictionary <string, string> >();
                List <double> nullValueCollectionToBeSorted = new List <double>();

                string headerSoFar = null;

                Set <int> broadRealAndNullIndexSetSoFar = null;

                foreach (string broadInputFilePattern in inputFilePatternCollection)
                {
                    Set <int> narrowRealAndNullIndexSetSetSoFar = Set <int> .GetInstance();

                    foreach (string narrowInputFilePattern in broadInputFilePattern.Split('+'))
                    {
                        Set <int> realAndNullIndexSet =
                            CreateTabulateReportInternal(narrowInputFilePattern, keepTest, maxPValue, auditRowIndexValues,
                                                         ref realRowCollectionToSort, ref nullValueCollectionToBeSorted, ref headerSoFar);

                        //Instead of throwing an error, we could filter out the duplicated null indexes
                        SpecialFunctions.CheckCondition(narrowRealAndNullIndexSetSetSoFar.IntersectionIsEmpty(realAndNullIndexSet),
                                                        string.Format("Within inputFilePattern {0}, multiple '+'-connected parts cover the same nullIndex(s), {1}",
                                                                      broadInputFilePattern,
                                                                      narrowRealAndNullIndexSetSetSoFar.Intersection(realAndNullIndexSet)));

                        narrowRealAndNullIndexSetSetSoFar.AddNewRange(realAndNullIndexSet);
                    }

                    SpecialFunctions.CheckCondition(!auditRowIndexValues || narrowRealAndNullIndexSetSetSoFar.Contains(-1),
                                                    string.Format("The 'null' index -1 for the real data was not seen in {0}", broadInputFilePattern));


                    if (broadRealAndNullIndexSetSoFar == null)
                    {
                        broadRealAndNullIndexSetSoFar = narrowRealAndNullIndexSetSetSoFar;
                    }
                    else
                    {
                        SpecialFunctions.CheckCondition(broadRealAndNullIndexSetSoFar.Equals(narrowRealAndNullIndexSetSetSoFar),
                                                        string.Format("The broad inputFilePattern {0} covers a different set of nullIndexes ({1}) than its predecessors ({2})",
                                                                      broadInputFilePattern, narrowRealAndNullIndexSetSetSoFar, broadRealAndNullIndexSetSoFar));
                    }
                }

                double numberOfRandomizationRuns = broadRealAndNullIndexSetSoFar.Count - 1;
                Console.WriteLine("Detected {0} randomized runs relative to the number of real runs.", numberOfRandomizationRuns);
                Dictionary <Dictionary <string, string>, double> qValueList = SpecialFunctions.ComputeQValues(ref realRowCollectionToSort, AccessPValueFromPhylotreeRow, ref nullValueCollectionToBeSorted, numberOfRandomizationRuns);

                //!!!this code is repeated elsewhere
                textWriter.WriteLine(SpecialFunctions.CreateTabString(headerSoFar, "qValue"));
                foreach (Dictionary <string, string> row in realRowCollectionToSort)
                {
                    double qValue = qValueList[row];
                    textWriter.WriteLine(SpecialFunctions.CreateTabString(row[""], qValue));
                }
            }
        }