public void TestUniqueCount() { var protein = new IdentifiedProtein(); var spectrum = new IdentifiedSpectrum(); var peptides = new List <IIdentifiedPeptide>(); peptides.Add(new IdentifiedPeptide(spectrum) { Sequence = "ILLLAR" }); peptides.Add(new IdentifiedPeptide(spectrum) { Sequence = "LILIAR" }); Assert.AreEqual(1, IdentifiedPeptideUtils.GetUniquePeptideCount(peptides)); peptides.Add(new IdentifiedPeptide(new IdentifiedSpectrum()) { Sequence = "LIIIAR" }); Assert.AreEqual(1, IdentifiedPeptideUtils.GetUniquePeptideCount(peptides)); }
public void TestNoredundant() { string header = " Reference PepCount UniquePepCount CoverPercent MW PI IdentifiedName"; IPropertyConverter <IIdentifiedProtein> converter = IdentifiedProteinPropertyConverterFactory.GetInstance().GetConverters(header, '\t'); Assert.AreEqual(header, converter.Name); string line = "\tIPI:IPI00784154.1|SWISS-PROT:P10809|TREMBL:B2R5M6;Q53QD5;Q53SE2;Q96RI4;Q9UCR6|ENSEMBL:ENSP00000340019;ENSP00000373620|REFSEQ:NP_002147;NP_955472|H-INV:HIT000031088 Tax_Id=9606 Gene_Symbol=HSPD1 60 kDa heat shock protein, mitochondrial 84 19 43.46% 61054.43 5.70 IPI:IPI00784154.1|SWISS-PROT:P10809|TREMBL:B2R5M6;Q53QD5;Q53SE2;Q96RI4;Q9UCR6|ENSEMBL:ENSP00000340019;ENSP00000373620|REFSEQ:NP_002147;NP_955472|H-INV:HIT000031088 Tax_Id=9606 Gene_Symbol=HSPD1 60 kDa heat shock protein, mitochondrial"; IIdentifiedProtein protein = new IdentifiedProtein(); converter.SetProperty(protein, line); Assert.AreEqual("IPI:IPI00784154.1|SWISS-PROT:P10809|TREMBL:B2R5M6;Q53QD5;Q53SE2;Q96RI4;Q9UCR6|ENSEMBL:ENSP00000340019;ENSP00000373620|REFSEQ:NP_002147;NP_955472|H-INV:HIT000031088", protein.Name); Assert.AreEqual("Tax_Id=9606 Gene_Symbol=HSPD1 60 kDa heat shock protein, mitochondrial", protein.Description); Assert.AreEqual(19, protein.UniquePeptideCount); Assert.AreEqual(43.46, protein.Coverage); Assert.AreEqual(61054.43, protein.MolecularWeight); Assert.AreEqual(5.7, protein.IsoelectricPoint); for (int i = 0; i < 84; i++) { protein.Peptides.Add(new IdentifiedPeptide(new IdentifiedSpectrum())); } Assert.AreEqual(line, converter.GetProperty(protein)); }
public void TestDtaselect() { string header = "Locus Sequence Count Spectrum Count Sequence Coverage Length MolWt pI Validation Status Descriptive Name"; IPropertyConverter <IIdentifiedProtein> converter = IdentifiedProteinPropertyConverterFactory.GetInstance().GetConverters(header, '\t'); Assert.AreEqual(header, converter.Name); string line = "YDR050C 495 495 81.10% 249 26795.41 5.74 U YDR050C TPI1 SGDID:S000002457, Chr IV from 556470-555724, reverse complement, Verified ORF, \"Triose phosphate isomerase, abundant glycolytic enzyme; mRNA half-life is regulated by iron availability; transcription is controlled by activators Reb1p, Gcr1p, and Rap1p through binding sites in the 5' non-coding region\""; IIdentifiedProtein protein = new IdentifiedProtein(); converter.SetProperty(protein, line); Assert.AreEqual("YDR050C", protein.Name); Assert.AreEqual("TPI1 SGDID:S000002457, Chr IV from 556470-555724, reverse complement, Verified ORF, \"Triose phosphate isomerase, abundant glycolytic enzyme; mRNA half-life is regulated by iron availability; transcription is controlled by activators Reb1p, Gcr1p, and Rap1p through binding sites in the 5' non-coding region\"", protein.Description); Assert.AreEqual(495, protein.UniquePeptideCount); Assert.AreEqual(81.1, protein.Coverage); Assert.AreEqual(26795.41, protein.MolecularWeight); Assert.AreEqual(5.74, protein.IsoelectricPoint); for (int i = 0; i < 495; i++) { protein.Peptides.Add(new IdentifiedPeptide(new IdentifiedSpectrum())); } Assert.AreEqual(line, converter.GetProperty(protein)); }
public List <IIdentifiedProtein> Build <T>(IEnumerable <T> spectra) where T : IIdentifiedSpectrumBase { Dictionary <string, IIdentifiedProtein> proteins = new Dictionary <string, IIdentifiedProtein>(); HashSet <string> inserted = new HashSet <string>(); foreach (var spectrum in spectra) { inserted.Clear(); foreach (var peptide in spectrum.Peptides) { foreach (string ac in peptide.Proteins) { //如果一个蛋白中多个肽段都对应了这个谱图,只选择第一个肽段加入谱图 if (inserted.Contains(ac)) { continue; } inserted.Add(ac); if (!proteins.ContainsKey(ac)) { IIdentifiedProtein pro = new IdentifiedProtein(); pro.Name = ac; proteins[ac] = pro; } proteins[ac].Peptides.Add(peptide); } } } return(new List <IIdentifiedProtein>(proteins.Values)); }
public void TestBuild() { var pep1 = new IdentifiedPeptide(new IdentifiedSpectrum(new SequestFilename("A", 1, 1, 1, ".dta"))) { Sequence = "A" }; var pep2 = new IdentifiedPeptide(new IdentifiedSpectrum(new SequestFilename("B", 1, 1, 1, ".dta"))) { Sequence = "B" }; var pep3 = new IdentifiedPeptide(new IdentifiedSpectrum(new SequestFilename("C", 1, 1, 1, ".dta"))) { Sequence = "C" }; var pep4 = new IdentifiedPeptide(new IdentifiedSpectrum(new SequestFilename("D", 1, 1, 1, ".dta"))) { Sequence = "D" }; var pep5 = new IdentifiedPeptide(new IdentifiedSpectrum(new SequestFilename("E", 1, 1, 1, ".dta"))) { Sequence = "E" }; var pep6 = new IdentifiedPeptide(new IdentifiedSpectrum(new SequestFilename("F", 1, 1, 1, ".dta"))) { Sequence = "F" }; var protein1 = new IdentifiedProtein() { Peptides = new IIdentifiedPeptide[] { pep1, pep3, pep5, pep6 }.ToList() }; var protein2 = new IdentifiedProtein() { Peptides = new IIdentifiedPeptide[] { pep2, pep3, pep4 }.ToList() }; //should be removed from final result since all peptides has been included in protein1 and protein2, even one protein contains both peptides var protein3 = new IdentifiedProtein() { Peptides = new IIdentifiedPeptide[] { pep1, pep2 }.ToList() }; //should be removed from final result since all peptides has been included in protein1 var protein4 = new IdentifiedProtein() { Peptides = new IIdentifiedPeptide[] { pep1, pep5 }.ToList() }; var actual = new IdentifiedProteinGroupBuilder().Build(new IIdentifiedProtein[] { protein1, protein2, protein3 }.ToList()); Assert.AreEqual(2, actual.Count); Assert.AreSame(protein1, actual[0][0]); Assert.AreSame(protein2, actual[1][0]); }
public static IIdentifiedResult DoBuildGroupByPeptide(List <IIdentifiedSpectrum> spectra, Func <IIdentifiedPeptide, string> func) { IdentifiedResult result = new IdentifiedResult(); var singlePeptides = (from s in spectra where s.Peptides.Count == 1 select s).GroupBy(m => func(m.Peptide)); var multiplePeptides = (from s in spectra where s.Peptides.Count > 1 select s).ToList(); Dictionary <string, List <IIdentifiedPeptide> > dic = new Dictionary <string, List <IIdentifiedPeptide> >(); foreach (var g in singlePeptides) { dic[g.Key] = new List <IIdentifiedPeptide>(from s in g select s.Peptide); } foreach (var o in multiplePeptides) { var pc = (from p in o.Peptides let c = dic.ContainsKey(func(p)) ? dic[func(p)].Count : 0 orderby c descending select p).First(); if (!dic.ContainsKey(func(pc))) { dic[func(pc)] = new List <IIdentifiedPeptide>(); } dic[func(pc)].Add(pc); } var keys = new List <string>(dic.Keys); keys.Sort(); foreach (var key in keys) { IdentifiedProtein protein = new IdentifiedProtein(key); protein.Peptides.AddRange(dic[key]); protein.UniquePeptideCount = 1; protein.Description = dic[key][0].Proteins.Merge('/'); IdentifiedProteinGroup group = new IdentifiedProteinGroup(); group.Add(protein); result.Add(group); } result.BuildGroupIndex(); //result.Sort(); return(result); }
public void TestFilter2() { var spectrum = new IdentifiedSpectrum(); spectrum.Query.FileScan.LongFileName = "ABDCDD.12.123.2.dat"; var pro1 = new IdentifiedProtein("P1"); pro1.Peptides.Add(new IdentifiedPeptide(spectrum) { Sequence = "AAAAAAA" }); var pro2 = new IdentifiedProtein("P2"); pro2.Peptides.Add(new IdentifiedPeptide(spectrum) { Sequence = "BBBBBBB" }); var g1 = new IdentifiedProteinGroup(); g1.Add(pro1); g1.Add(pro2); IdentifiedResult ir = new IdentifiedResult(); ir.Add(g1); Assert.AreEqual(1, ir.Count); Assert.AreEqual(2, ir[0].Count); Assert.AreEqual(1, ir.GetSpectra().Count); ir.Filter(m => { return(m.Sequence.Contains('A')); }); Assert.AreEqual(1, ir.Count); Assert.AreEqual(1, ir[0].Count); Assert.AreEqual(1, ir.GetSpectra().Count); Assert.AreSame(pro1, ir[0][0]); ir.Filter(m => { return(m.Sequence.Contains('C')); }); Assert.AreEqual(0, ir.Count); }
public void TestFilter() { var pro1 = new IdentifiedProtein("P1"); pro1.Peptides.Add(new IdentifiedPeptide(new IdentifiedSpectrum() { Charge = 1 })); pro1.Peptides.Add(new IdentifiedPeptide(new IdentifiedSpectrum() { Charge = 2 })); var pro2 = new IdentifiedProtein("P2"); pro2.Peptides.Add(new IdentifiedPeptide(new IdentifiedSpectrum() { Charge = 3 })); var g1 = new IdentifiedProteinGroup(); g1.Add(pro1); var g2 = new IdentifiedProteinGroup(); g2.Add(pro2); IdentifiedResult ir = new IdentifiedResult(); ir.Add(g1); ir.Add(g2); Assert.AreEqual(2, ir.Count); Assert.AreEqual(3, ir.GetSpectra().Count); ir.Filter(m => { return(m.Spectrum.Query.Charge > 1); }); Assert.AreEqual(2, ir.Count); Assert.AreEqual(2, ir.GetSpectra().Count); ir.GetSpectra().All(m => { return(m.Charge > 1); }); ir.Filter(m => { return(m.Spectrum.Query.Charge > 2); }); Assert.AreEqual(1, ir.Count); Assert.AreEqual(1, ir.GetSpectra().Count); ir.GetSpectra().All(m => { return(m.Charge > 2); }); Assert.AreEqual("P2", ir[0][0].Name); }
public void TestDistinctPeptides() { IdentifiedProtein protein = new IdentifiedProtein(); IdentifiedSpectrum sp1 = new IdentifiedSpectrum(); IdentifiedSpectrum sp2 = new IdentifiedSpectrum(); protein.Peptides.Add(new IdentifiedPeptide(sp1)); protein.Peptides.Add(new IdentifiedPeptide(sp1)); protein.Peptides.Add(new IdentifiedPeptide(sp2)); Assert.AreEqual(3, protein.Peptides.Count); Assert.AreEqual(2, protein.GetSpectra().Count); Assert.AreEqual(2, protein.GetDistinctPeptides().Count()); }
public void TestCalculateCoverage() { IdentifiedProtein protein = new IdentifiedProtein(); //total 30 amino acids protein.Sequence = "ABCDEDFDEFDSESLKJFDJLSLGFGDDGD"; IdentifiedSpectrum s1 = new IdentifiedSpectrum(); IdentifiedPeptide p1 = new IdentifiedPeptide(s1); p1.Sequence = "B.CDEDF.D"; protein.Peptides.Add(p1); protein.CalculateCoverage(); Assert.AreEqual(16.67, protein.Coverage, 0.01); IdentifiedSpectrum s2 = new IdentifiedSpectrum(); IdentifiedPeptide p2 = new IdentifiedPeptide(s2); p2.Sequence = "F.DSESL.K"; protein.Peptides.Add(p2); protein.CalculateCoverage(); Assert.AreEqual(33.33, protein.Coverage, 0.01); IdentifiedSpectrum s3 = new IdentifiedSpectrum(); IdentifiedPeptide p3 = new IdentifiedPeptide(s3); p3.Sequence = "L.SLGF.G"; protein.Peptides.Add(p3); protein.CalculateCoverage(); Assert.AreEqual(46.67, protein.Coverage, 0.01); IdentifiedSpectrum s4 = new IdentifiedSpectrum(); IdentifiedPeptide p4 = new IdentifiedPeptide(s4); p4.Sequence = "L.SLGFG.D"; protein.Peptides.Add(p4); protein.CalculateCoverage(); Assert.AreEqual(50.00, protein.Coverage, 0.01); }
private IdentifiedProtein ReadIndividualProtein(XmlTextReader reader) { IdentifiedProtein result = new IdentifiedProtein(); Dictionary <string, string> elements = new Dictionary <string, string>(); String nodeName = ""; while (reader.Read()) { switch (reader.NodeType) { case XmlNodeType.Element: if (reader.Name.Equals(peptideTag)) { result.Peptides.Add(ReadIndividualPeptide(reader, result.Name).Peptide); } else { nodeName = reader.Name; } break; case XmlNodeType.Text: if (nodeName.Equals("reference")) { result.Reference = reader.Value; } else { elements.Add(nodeName, reader.Value); } break; case XmlNodeType.EndElement: if (reader.Name.Equals(proteinTag)) { AssignProteinValue(result, elements); return(result); } break; } } return(result); }