public ProteinGroup2(MyProtein prot) { MyProteins = new List <MyProtein>() { prot }; MyPeptides = prot.DistinctPeptides; }
public List <MyProtein> MaxParsimonyList_Old() { //int proteinCounter = 0; //Lets generate an array that represents a list of proteins according to the MaxParsimony List <MyProtein> MaxParsimony = new List <MyProtein>(MyProteins.TheGroups.Count); foreach (int groupNumber in MyProteins.TheGroups) { List <MyProtein> proteins = MyProteins.MyProteinList.FindAll(a => a.GroupNo == groupNumber); if (proteins[0].MyGroupType == ProteinGroupType.All || proteins[0].MyGroupType == ProteinGroupType.Unique || proteins[0].MyGroupType == ProteinGroupType.Single) { //proteinCounter++; MaxParsimony.Add(proteins[0]); continue; } if (proteins[0].MyGroupType == ProteinGroupType.Some && proteins.Count == 2) { //proteinCounter += 2; MaxParsimony.AddRange(proteins); continue; } //We are dealing with a "some group"; lets generate the bipartite graph List <string> peptides = (from p in proteins from s in p.DistinctPeptides select s).Distinct().ToList(); proteins = (from p in proteins select p).OrderByDescending(p => p.DistinctPeptides).ThenByDescending(c => c.Length).ToList(); //proteins.Sort((a,b) => b.DistinctPeptides.Count.CompareTo(a.DistinctPeptides.Count)); //At each step, we will chose the item that contains the largest number of uncovered results // http://en.wikipedia.org/wiki/Set_cover_problem List <MyProtein> minumumCoverSet = new List <MyProtein>(); //minumumCoverSet.Add(proteins[0]); MaxParsimony.Add(proteins[0]); proteins.RemoveAt(0); //Now, lets cycle until we have the full set complete try { while (true) { //Get the covered peptides List <string> coveredPeptides = (from p in minumumCoverSet from pep in p.DistinctPeptides select pep).Distinct().ToList(); //Find the maximum coverage possible int maxCoverage = proteins.Max(a => a.DistinctPeptides.Union(coveredPeptides).Distinct().Count()); //find the item from the proteinNodes that will provide the greatest additional cover MyProtein nextProtein = proteins.Find(a => a.DistinctPeptides.Union(coveredPeptides).Distinct().Count() == maxCoverage); //add it to the minimum cover set MaxParsimony.Add(nextProtein); //minumumCoverSet.Add(nextProtein); //remove it from the protein nodes proteins.Remove(nextProtein); //check if we still need to converge if (proteins.Count == 0 || coveredPeptides.Count == peptides.Count) { break; } } } catch (Exception e) { Console.WriteLine("Max parsimony problem on group number: " + groupNumber + "\n" + e.Message); } //proteinCounter += minumumCoverSet.Count; } return(MaxParsimony); //return proteinCounter; }
public List <MyProtein> MaxParsimonyList() { //int proteinCounter = 0; //Lets generate an array that represents a list of proteins according to the MaxParsimony List <MyProtein> MaxParsimony = new List <MyProtein>(MyProteins.TheGroups.Count); List <MyProtein> ProteinsTMP = PatternTools.ObjectCopier.Clone(MyProteins.MyProteinList); foreach (int groupNumber in MyProteins.TheGroups) { List <MyProtein> proteins = ProteinsTMP.FindAll(a => a.GroupNo == groupNumber); List <MyProtein> minumumCoverSet = new List <MyProtein>(); proteins.Sort((a, b) => b.Length.CompareTo(a.Length)); if (proteins[0].MyGroupType == ProteinGroupType.All || proteins[0].MyGroupType == ProteinGroupType.Unique || proteins[0].MyGroupType == ProteinGroupType.Single) { //proteinCounter++; minumumCoverSet.Add(proteins[0]); proteins.RemoveAt(0); //Add for remaining proteins if (proteins.Count > 0) { string add = "; Additional IDs concatenated into MaxParsimony group: "; add += string.Join(", ", proteins.Select(a => a.Locus).ToList()); minumumCoverSet[0].Description += add; } MaxParsimony.AddRange(minumumCoverSet); continue; } //We are dealing with a "some group"; lets generate the bipartite graph List <string> peptides = (from p in proteins from s in p.DistinctPeptides select s).Distinct().ToList(); proteins = (from p in proteins select p).OrderByDescending(p => p.DistinctPeptides.Count).ThenByDescending(p => p.Length).ToList(); //proteins.Sort((a,b) => b.DistinctPeptides.Count.CompareTo(a.DistinctPeptides.Count)); //At each step, we will chose the item that contains the largest number of uncovered results // http://en.wikipedia.org/wiki/Set_cover_problem //minumumCoverSet.Add(proteins[0]); //MaxParsimony.Add(proteins[0]); //proteins.RemoveAt(0); //Now, lets cycle until we have the full set complete try { while (true) { //Get the covered peptides List <string> coveredPeptides = (from p in minumumCoverSet from pep in p.DistinctPeptides select pep).Distinct().ToList(); //Find the maximum coverage possible int maxCoverage = proteins.Max(a => a.DistinctPeptides.Intersect(peptides).Distinct().Count()); //find the item from the proteinNodes that will provide the greatest additional cover MyProtein nextProtein = proteins.Find(a => a.DistinctPeptides.Intersect(peptides).Distinct().Count() == maxCoverage); //add it to the minimum cover set //MaxParsimony.Add(nextProtein); minumumCoverSet.Add(nextProtein); //remove it from the protein nodes proteins.Remove(nextProtein); //check if we still need to converge //Get the covered peptides coveredPeptides = (from p in minumumCoverSet from pep in p.DistinctPeptides select pep).Distinct().ToList(); peptides = peptides.Except(coveredPeptides).ToList(); if (proteins.Count == 0 || coveredPeptides.Count == peptides.Count || peptides.Count == 0) { break; } } } catch (Exception e) { Console.WriteLine("Max parsimony problem on group number: " + groupNumber + "\n" + e.Message); } //Add for remaining proteins if (proteins.Count > 0) { string add = "; Additional IDs concatenated into MaxParsimony group: "; add += string.Join(", ", proteins.Select(a => a.Locus).ToList()); minumumCoverSet[0].Description += add; } MaxParsimony.AddRange(minumumCoverSet); } return(MaxParsimony); //return proteinCounter; }