예제 #1
0
 public ProteinGroup2(MyProtein prot)
 {
     MyProteins = new List <MyProtein>()
     {
         prot
     };
     MyPeptides = prot.DistinctPeptides;
 }
예제 #2
0
        public List <MyProtein> MaxParsimonyList_Old()
        {
            //int proteinCounter = 0;

            //Lets generate an array that represents a list of proteins according to the MaxParsimony
            List <MyProtein> MaxParsimony = new List <MyProtein>(MyProteins.TheGroups.Count);

            foreach (int groupNumber in MyProteins.TheGroups)
            {
                List <MyProtein> proteins = MyProteins.MyProteinList.FindAll(a => a.GroupNo == groupNumber);


                if (proteins[0].MyGroupType == ProteinGroupType.All || proteins[0].MyGroupType == ProteinGroupType.Unique || proteins[0].MyGroupType == ProteinGroupType.Single)
                {
                    //proteinCounter++;
                    MaxParsimony.Add(proteins[0]);
                    continue;
                }

                if (proteins[0].MyGroupType == ProteinGroupType.Some && proteins.Count == 2)
                {
                    //proteinCounter += 2;
                    MaxParsimony.AddRange(proteins);
                    continue;
                }

                //We are dealing with a "some group"; lets generate the bipartite graph

                List <string> peptides = (from p in proteins
                                          from s in p.DistinctPeptides
                                          select s).Distinct().ToList();

                proteins = (from p in proteins
                            select p).OrderByDescending(p => p.DistinctPeptides).ThenByDescending(c => c.Length).ToList();

                //proteins.Sort((a,b) => b.DistinctPeptides.Count.CompareTo(a.DistinctPeptides.Count));

                //At each step, we will chose the item that contains the largest number of uncovered results
                // http://en.wikipedia.org/wiki/Set_cover_problem

                List <MyProtein> minumumCoverSet = new List <MyProtein>();
                //minumumCoverSet.Add(proteins[0]);
                MaxParsimony.Add(proteins[0]);
                proteins.RemoveAt(0);


                //Now, lets cycle until we have the full set complete
                try
                {
                    while (true)
                    {
                        //Get the covered peptides
                        List <string> coveredPeptides = (from p in minumumCoverSet
                                                         from pep in p.DistinctPeptides
                                                         select pep).Distinct().ToList();

                        //Find the maximum coverage possible

                        int maxCoverage = proteins.Max(a => a.DistinctPeptides.Union(coveredPeptides).Distinct().Count());

                        //find the item from the proteinNodes that will provide the greatest additional cover
                        MyProtein nextProtein = proteins.Find(a => a.DistinctPeptides.Union(coveredPeptides).Distinct().Count() == maxCoverage);

                        //add it to the minimum cover set
                        MaxParsimony.Add(nextProtein);
                        //minumumCoverSet.Add(nextProtein);

                        //remove it from the protein nodes
                        proteins.Remove(nextProtein);

                        //check if we still need to converge

                        if (proteins.Count == 0 || coveredPeptides.Count == peptides.Count)
                        {
                            break;
                        }
                    }
                }
                catch (Exception e)
                {
                    Console.WriteLine("Max parsimony problem on group number: " + groupNumber + "\n" + e.Message);
                }

                //proteinCounter += minumumCoverSet.Count;
            }

            return(MaxParsimony);

            //return proteinCounter;
        }
예제 #3
0
        public List <MyProtein> MaxParsimonyList()
        {
            //int proteinCounter = 0;

            //Lets generate an array that represents a list of proteins according to the MaxParsimony
            List <MyProtein> MaxParsimony = new List <MyProtein>(MyProteins.TheGroups.Count);
            List <MyProtein> ProteinsTMP  = PatternTools.ObjectCopier.Clone(MyProteins.MyProteinList);


            foreach (int groupNumber in MyProteins.TheGroups)
            {
                List <MyProtein> proteins        = ProteinsTMP.FindAll(a => a.GroupNo == groupNumber);
                List <MyProtein> minumumCoverSet = new List <MyProtein>();

                proteins.Sort((a, b) => b.Length.CompareTo(a.Length));
                if (proteins[0].MyGroupType == ProteinGroupType.All || proteins[0].MyGroupType == ProteinGroupType.Unique || proteins[0].MyGroupType == ProteinGroupType.Single)
                {
                    //proteinCounter++;
                    minumumCoverSet.Add(proteins[0]);
                    proteins.RemoveAt(0);


                    //Add for remaining proteins
                    if (proteins.Count > 0)
                    {
                        string add = "; Additional IDs concatenated into MaxParsimony group: ";
                        add += string.Join(", ", proteins.Select(a => a.Locus).ToList());
                        minumumCoverSet[0].Description += add;
                    }

                    MaxParsimony.AddRange(minumumCoverSet);

                    continue;
                }


                //We are dealing with a "some group"; lets generate the bipartite graph

                List <string> peptides = (from p in proteins
                                          from s in p.DistinctPeptides
                                          select s).Distinct().ToList();

                proteins = (from p in proteins
                            select p).OrderByDescending(p => p.DistinctPeptides.Count).ThenByDescending(p => p.Length).ToList();

                //proteins.Sort((a,b) => b.DistinctPeptides.Count.CompareTo(a.DistinctPeptides.Count));

                //At each step, we will chose the item that contains the largest number of uncovered results
                // http://en.wikipedia.org/wiki/Set_cover_problem


                //minumumCoverSet.Add(proteins[0]);
                //MaxParsimony.Add(proteins[0]);
                //proteins.RemoveAt(0);


                //Now, lets cycle until we have the full set complete
                try
                {
                    while (true)
                    {
                        //Get the covered peptides
                        List <string> coveredPeptides = (from p in minumumCoverSet
                                                         from pep in p.DistinctPeptides
                                                         select pep).Distinct().ToList();



                        //Find the maximum coverage possible
                        int maxCoverage = proteins.Max(a => a.DistinctPeptides.Intersect(peptides).Distinct().Count());

                        //find the item from the proteinNodes that will provide the greatest additional cover
                        MyProtein nextProtein = proteins.Find(a => a.DistinctPeptides.Intersect(peptides).Distinct().Count() == maxCoverage);

                        //add it to the minimum cover set
                        //MaxParsimony.Add(nextProtein);
                        minumumCoverSet.Add(nextProtein);

                        //remove it from the protein nodes
                        proteins.Remove(nextProtein);

                        //check if we still need to converge
                        //Get the covered peptides
                        coveredPeptides = (from p in minumumCoverSet
                                           from pep in p.DistinctPeptides
                                           select pep).Distinct().ToList();

                        peptides = peptides.Except(coveredPeptides).ToList();

                        if (proteins.Count == 0 || coveredPeptides.Count == peptides.Count || peptides.Count == 0)
                        {
                            break;
                        }
                    }
                }
                catch (Exception e)
                {
                    Console.WriteLine("Max parsimony problem on group number: " + groupNumber + "\n" + e.Message);
                }

                //Add for remaining proteins
                if (proteins.Count > 0)
                {
                    string add = "; Additional IDs concatenated into MaxParsimony group: ";
                    add += string.Join(", ", proteins.Select(a => a.Locus).ToList());
                    minumumCoverSet[0].Description += add;
                }


                MaxParsimony.AddRange(minumumCoverSet);
            }

            return(MaxParsimony);

            //return proteinCounter;
        }