public static IIdentifiedResult DoBuildGroupByPeptide(List <IIdentifiedSpectrum> spectra, Func <IIdentifiedPeptide, string> func) { IdentifiedResult result = new IdentifiedResult(); var singlePeptides = (from s in spectra where s.Peptides.Count == 1 select s).GroupBy(m => func(m.Peptide)); var multiplePeptides = (from s in spectra where s.Peptides.Count > 1 select s).ToList(); Dictionary <string, List <IIdentifiedPeptide> > dic = new Dictionary <string, List <IIdentifiedPeptide> >(); foreach (var g in singlePeptides) { dic[g.Key] = new List <IIdentifiedPeptide>(from s in g select s.Peptide); } foreach (var o in multiplePeptides) { var pc = (from p in o.Peptides let c = dic.ContainsKey(func(p)) ? dic[func(p)].Count : 0 orderby c descending select p).First(); if (!dic.ContainsKey(func(pc))) { dic[func(pc)] = new List <IIdentifiedPeptide>(); } dic[func(pc)].Add(pc); } var keys = new List <string>(dic.Keys); keys.Sort(); foreach (var key in keys) { IdentifiedProtein protein = new IdentifiedProtein(key); protein.Peptides.AddRange(dic[key]); protein.UniquePeptideCount = 1; protein.Description = dic[key][0].Proteins.Merge('/'); IdentifiedProteinGroup group = new IdentifiedProteinGroup(); group.Add(protein); result.Add(group); } result.BuildGroupIndex(); //result.Sort(); return(result); }
public object Clone() { IdentifiedProteinGroup result = new IdentifiedProteinGroup(); this.ForEach(p => result.Add((IIdentifiedProtein)p.Clone())); result.Index = this.Index; result.Enabled = this.Enabled; return(result); }
public override IIdentifiedResult ReadFromFile(string fileName) { if (!File.Exists(fileName)) { throw new FileNotFoundException("Protein file not exist : " + fileName); } string peptideFilename = GetPeptideFileName(fileName); if (!File.Exists(peptideFilename)) { throw new FileNotFoundException("Peptide file not exist : " + peptideFilename); } string linkFileName = GetLinkFileName(fileName); if (!File.Exists(linkFileName)) { throw new FileNotFoundException("Peptide2group file not exist : " + linkFileName); } var pepFileReader = new PeptideTextReader(GetEngineName()); List <IIdentifiedSpectrum> spectra = pepFileReader.ReadFromFile(peptideFilename); this.PeptideFormat = pepFileReader.PeptideFormat; var proFileReader = new ProteinTextReader(GetEngineName()); List <IIdentifiedProtein> proteins = proFileReader.ReadFromFile(fileName); this.ProteinFormat = proFileReader.ProteinFormat; var peptideMap = spectra.ToDictionary(m => m.Id); var proteinMap = proteins.GroupBy(m => m.GroupIndex); IIdentifiedResult result = Allocate(); foreach (var pros in proteinMap) { var group = new IdentifiedProteinGroup(); pros.ToList().ForEach(m => group.Add(m)); result.Add(group); } new Peptide2GroupTextReader().LinkPeptideToGroup(linkFileName, peptideMap, result.ToDictionary(m => m.Index)); string fastaFile = fileName + ".fasta"; if (File.Exists(fastaFile)) { IdentifiedResultUtils.FillSequenceFromFasta(fastaFile, result, null); } return(result); }
public void Run() { IdentifiedProteinGroupContaminationDescriptionFilter filter = new IdentifiedProteinGroupContaminationDescriptionFilter("KERATIN"); IdentifiedProteinGroup group = new IdentifiedProteinGroup(); group.Add(new IdentifiedProtein("P1") { Description = "P1 Keratin" }); Assert.IsTrue(filter.Accept(group)); }
public void TestFilter2() { var spectrum = new IdentifiedSpectrum(); spectrum.Query.FileScan.LongFileName = "ABDCDD.12.123.2.dat"; var pro1 = new IdentifiedProtein("P1"); pro1.Peptides.Add(new IdentifiedPeptide(spectrum) { Sequence = "AAAAAAA" }); var pro2 = new IdentifiedProtein("P2"); pro2.Peptides.Add(new IdentifiedPeptide(spectrum) { Sequence = "BBBBBBB" }); var g1 = new IdentifiedProteinGroup(); g1.Add(pro1); g1.Add(pro2); IdentifiedResult ir = new IdentifiedResult(); ir.Add(g1); Assert.AreEqual(1, ir.Count); Assert.AreEqual(2, ir[0].Count); Assert.AreEqual(1, ir.GetSpectra().Count); ir.Filter(m => { return(m.Sequence.Contains('A')); }); Assert.AreEqual(1, ir.Count); Assert.AreEqual(1, ir[0].Count); Assert.AreEqual(1, ir.GetSpectra().Count); Assert.AreSame(pro1, ir[0][0]); ir.Filter(m => { return(m.Sequence.Contains('C')); }); Assert.AreEqual(0, ir.Count); }
public void TestFilter() { var pro1 = new IdentifiedProtein("P1"); pro1.Peptides.Add(new IdentifiedPeptide(new IdentifiedSpectrum() { Charge = 1 })); pro1.Peptides.Add(new IdentifiedPeptide(new IdentifiedSpectrum() { Charge = 2 })); var pro2 = new IdentifiedProtein("P2"); pro2.Peptides.Add(new IdentifiedPeptide(new IdentifiedSpectrum() { Charge = 3 })); var g1 = new IdentifiedProteinGroup(); g1.Add(pro1); var g2 = new IdentifiedProteinGroup(); g2.Add(pro2); IdentifiedResult ir = new IdentifiedResult(); ir.Add(g1); ir.Add(g2); Assert.AreEqual(2, ir.Count); Assert.AreEqual(3, ir.GetSpectra().Count); ir.Filter(m => { return(m.Spectrum.Query.Charge > 1); }); Assert.AreEqual(2, ir.Count); Assert.AreEqual(2, ir.GetSpectra().Count); ir.GetSpectra().All(m => { return(m.Charge > 1); }); ir.Filter(m => { return(m.Spectrum.Query.Charge > 2); }); Assert.AreEqual(1, ir.Count); Assert.AreEqual(1, ir.GetSpectra().Count); ir.GetSpectra().All(m => { return(m.Charge > 2); }); Assert.AreEqual("P2", ir[0][0].Name); }
public int CompareTo(IdentifiedProteinGroup other) { return(CompareTo(other as IIdentifiedProteinGroup)); }
public List <IIdentifiedProteinGroup> Build(List <IIdentifiedProtein> proteins) { var result = new List <IIdentifiedProteinGroup>(); Progress.SetMessage("Initializing protein group/spectra map ..."); var groupMap = new Dictionary <IIdentifiedProteinGroup, HashSet <IIdentifiedSpectrum> >(); foreach (IIdentifiedProtein protein in proteins) { IIdentifiedProteinGroup group = new IdentifiedProteinGroup(); group.Add(protein); var spectraSet = new HashSet <IIdentifiedSpectrum>(protein.GetSpectra()); groupMap[group] = spectraSet; result.Add(group); } Progress.SetMessage("Sorting protein groups ..."); result.Sort((m1, m2) => { int ret = -m1[0].PeptideCount.CompareTo(m2[0].PeptideCount); if (ret == 0) { ret = -m1[0].UniquePeptideCount.CompareTo(m2[0].UniquePeptideCount); } return(ret); }); Progress.SetMessage("Merging proteins with same peptide-spectrum matches ..."); Progress.SetRange(0, result.Count); //首先合并所有内容相同的group for (int i = result.Count - 1; i > 0; i--) { Progress.SetPosition(result.Count - i); HashSet <IIdentifiedSpectrum> iSpectra = groupMap[result[i]]; for (int j = i - 1; j >= 0; j--) { if (result[j][0].PeptideCount == result[i][0].PeptideCount && result[j][0].UniquePeptideCount == result[i][0].UniquePeptideCount) { HashSet <IIdentifiedSpectrum> jSpectra = groupMap[result[j]]; if (jSpectra.SetEquals(iSpectra)) { //如果内容一致,则合并两个group foreach (IIdentifiedProtein protein in result[i]) { result[j].Add(protein); } //删除group i result.RemoveAt(i); break; } } else { break; } } } Progress.SetMessage("Initializing peptide group count ..."); InitializePeptideGroupCount(result); Progress.SetMessage("Extracting distinct protein groups ..."); var temp = result; result = new List <IIdentifiedProteinGroup>(); for (int i = temp.Count - 1; i > 0; i--) { if (temp[i].GetPeptides().All(m => m.GroupCount == 1)) { result.Add(temp[i]); temp.RemoveAt(i); } } Progress.SetMessage("There are {0} distinct and {1} undistinct protein groups. ", result.Count, temp.Count); Progress.SetMessage("Removing redundant protein groups from undistinct protein groups..."); var oldcount = temp.Count; Progress.SetRange(0, oldcount); //删除被包含的group for (int i = temp.Count - 1; i > 0; i--) { Progress.SetPosition(oldcount - i); HashSet <IIdentifiedSpectrum> iSpectra = groupMap[temp[i]]; for (int j = i - 1; j >= 0; j--) { HashSet <IIdentifiedSpectrum> jSpectra = groupMap[temp[j]]; if (jSpectra.Count == iSpectra.Count) { continue; } if (iSpectra.All(l => jSpectra.Contains(l))) { //删除group i temp.RemoveAt(i); break; } } } RemoveUndistinctProteinGroups(temp); result.AddRange(temp); Progress.SetMessage("Sorting proteins in group ..."); result.ForEach(m => m.SortByProteinName()); Progress.SetMessage("Building protein groups done."); return(result); }
private IIdentifiedProteinGroup ReadNextProteinGroup(StreamReader filein, Dictionary <string, IIdentifiedSpectrum> peptideMap, ref string lastLine) { Progress.SetPosition(filein.BaseStream.Position); while (!IdentifiedResultUtils.IsProteinLine(lastLine) && (lastLine = filein.ReadLine()) != null) { } if (lastLine == null) { return(null); } IIdentifiedProteinGroup result = new IdentifiedProteinGroup(); while (IdentifiedResultUtils.IsProteinLine(lastLine)) { IIdentifiedProtein protein = ProteinFormat.ParseString(lastLine); result.Add(protein); protein.GroupIndex = IdentifiedResultUtils.GetGroupIndex(lastLine); lastLine = filein.ReadLine(); } List <IIdentifiedSpectrum> peptides = new List <IIdentifiedSpectrum>(); while (!IdentifiedResultUtils.IsProteinLine(lastLine)) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } IIdentifiedSpectrum mphit = PeptideFormat.ParseString(lastLine); string id = string.Format("{0}-{1}-{2}-{3}", mphit.Query.FileScan.LongFileName, mphit.Rank, mphit.Engine, mphit.Tag); if (!peptideMap.ContainsKey(id)) { peptideMap[id] = mphit; } else { mphit = peptideMap[id]; } peptides.Add(mphit); lastLine = filein.ReadLine(); if (lastLine == null || lastLine.Trim().Length == 0) { break; } } foreach (IIdentifiedSpectrum hit in peptides) { result.AddIdentifiedSpectrum(hit); } return(result); }