public void TestFilter() { var pro1 = new IdentifiedProtein("P1"); pro1.Peptides.Add(new IdentifiedPeptide(new IdentifiedSpectrum() { Charge = 1 })); pro1.Peptides.Add(new IdentifiedPeptide(new IdentifiedSpectrum() { Charge = 2 })); var pro2 = new IdentifiedProtein("P2"); pro2.Peptides.Add(new IdentifiedPeptide(new IdentifiedSpectrum() { Charge = 3 })); var g1 = new IdentifiedProteinGroup(); g1.Add(pro1); var g2 = new IdentifiedProteinGroup(); g2.Add(pro2); IdentifiedResult ir = new IdentifiedResult(); ir.Add(g1); ir.Add(g2); Assert.AreEqual(2, ir.Count); Assert.AreEqual(3, ir.GetSpectra().Count); ir.Filter(m => { return m.Spectrum.Query.Charge > 1; }); Assert.AreEqual(2, ir.Count); Assert.AreEqual(2, ir.GetSpectra().Count); ir.GetSpectra().All(m => { return m.Charge > 1; }); ir.Filter(m => { return m.Spectrum.Query.Charge > 2; }); Assert.AreEqual(1, ir.Count); Assert.AreEqual(1, ir.GetSpectra().Count); ir.GetSpectra().All(m => { return m.Charge > 2; }); Assert.AreEqual("P2", ir[0][0].Name); }
public static List <IIdentifiedProtein> BuildProteins(IEnumerable <IIdentifiedSpectrum> peptides) { var proteinMap = new Dictionary <string, IIdentifiedProtein>(); foreach (IIdentifiedSpectrum mph in peptides) { foreach (IIdentifiedPeptide pep in mph.Peptides) { foreach (string proteinName in pep.Proteins) { if (!proteinMap.ContainsKey(proteinName)) { IIdentifiedProtein protein = new IdentifiedProtein(); protein.Name = proteinName; proteinMap[proteinName] = protein; protein.Peptides.Add(pep); } else { proteinMap[proteinName].Peptides.Add(pep); } } } } return(new List <IIdentifiedProtein>(proteinMap.Values)); }
public Dictionary <int, IIdentifiedProtein> ParseProteinMap(string fileName) { SQLiteDBHelper sqlite = new SQLiteDBHelper(fileName); var result = new Dictionary <int, IIdentifiedProtein>(); string sqlProtein = "select ps.ProteinID, pa.Description, pro.Sequence, ps.ProteinScore, ps.Coverage from ProteinAnnotations as pa, Proteins as pro, ProteinScores as ps where pro.ProteinID=pa.ProteinID and pro.ProteinID=ps.ProteinID"; var proteinReader = sqlite.ExecuteReader(sqlProtein, null); Progress.SetMessage("Parsing proteins ..."); while (proteinReader.Read()) { var protein = new IdentifiedProtein(); var proid = proteinReader.GetInt32(0); var des = proteinReader.GetString(1); if (des.Length > 0 && des[0] == '>') { des = des.Substring(1); } protein.Reference = des; protein.Sequence = proteinReader.GetString(2); protein.Score = proteinReader.GetDouble(3); protein.Coverage = proteinReader.GetDouble(4); result[proid] = protein; } return(result); }
private static List<IIdentifiedProtein> InitProteins() { var mph1 = new IdentifiedSpectrum(); mph1.Query.FileScan.Experimental = "EXP1"; var mp1 = new IdentifiedPeptide(mph1); mp1.AddProtein("Protein1"); mp1.AddProtein("Protein2"); mp1.Sequence = "SEQ1"; var mph2 = new IdentifiedSpectrum(); mph2.Query.FileScan.Experimental = "EXP2"; var mp2 = new IdentifiedPeptide(mph2); mp2.AddProtein("Protein1"); mp2.AddProtein("Protein3"); mp2.Sequence = "SEQ2"; var mpro1 = new IdentifiedProtein("Protein1"); mpro1.Peptides.Add(mp1); mpro1.Peptides.Add(mp2); var mpro2 = new IdentifiedProtein("Protein2"); mpro2.Peptides.Add(mp1); var mpro3 = new IdentifiedProtein("Protein3"); mpro3.Peptides.Add(mp2); var result = new List<IIdentifiedProtein>(); result.Add(mpro3); result.Add(mpro2); result.Add(mpro1); return result; }
public void TestSort() { var mph1 = new IdentifiedSpectrum(); var peptide1 = new IdentifiedPeptide(mph1); var mph2 = new IdentifiedSpectrum(); var peptide2 = new IdentifiedPeptide(mph2); var mpro1 = new IdentifiedProtein("Protein1"); mpro1.Peptides.Add(peptide1); mpro1.Peptides.Add(peptide2); var mpro2 = new IdentifiedProtein("Protein2"); mpro2.Peptides.Add(peptide1); var mpro3 = new IdentifiedProtein("Protein3"); mpro3.Peptides.Add(peptide2); var mpros = new List <IdentifiedProtein>(); mpros.Add(mpro3); mpros.Add(mpro2); mpros.Add(mpro1); mpros.Sort(); Assert.AreEqual(mpro1, mpros[0]); Assert.AreEqual(mpro2, mpros[1]); Assert.AreEqual(mpro3, mpros[2]); }
public void TestSort() { var mph1 = new IdentifiedSpectrum(); var peptide1 = new IdentifiedPeptide(mph1); var mph2 = new IdentifiedSpectrum(); var peptide2 = new IdentifiedPeptide(mph2); var mpro1 = new IdentifiedProtein("Protein1"); mpro1.Peptides.Add(peptide1); mpro1.Peptides.Add(peptide2); var mpro2 = new IdentifiedProtein("Protein2"); mpro2.Peptides.Add(peptide1); var mpro3 = new IdentifiedProtein("Protein3"); mpro3.Peptides.Add(peptide2); var mpros = new List<IdentifiedProtein>(); mpros.Add(mpro3); mpros.Add(mpro2); mpros.Add(mpro1); mpros.Sort(); Assert.AreEqual(mpro1, mpros[0]); Assert.AreEqual(mpro2, mpros[1]); Assert.AreEqual(mpro3, mpros[2]); }
public void TestKeepDistinctPeptideOnly() { var spectrum1 = new IdentifiedSpectrum(); var pep1 = spectrum1.NewPeptide(); var spectrum2 = new IdentifiedSpectrum(); var pep2 = spectrum2.NewPeptide(); var spectrum3 = new IdentifiedSpectrum(); var pep3 = spectrum3.NewPeptide(); var spectrum4 = new IdentifiedSpectrum(); var pep4 = spectrum4.NewPeptide(); var protein1 = new IdentifiedProtein(); protein1.Peptides.Add(pep1); protein1.Peptides.Add(pep2); var protein2 = new IdentifiedProtein(); protein2.Peptides.Add(pep1); protein2.Peptides.Add(pep3); var protein3 = new IdentifiedProtein(); protein3.Peptides.Add(pep2); protein3.Peptides.Add(pep4); var g1 = new IdentifiedProteinGroup(); g1.Add(protein1); var g2 = new IdentifiedProteinGroup(); g2.Add(protein2); var g3 = new IdentifiedProteinGroup(); g3.Add(protein3); IIdentifiedResult ir = new IdentifiedResult(); ir.Add(g1); ir.Add(g2); ir.Add(g3); new DistinctResultDistiller().KeepDistinctPeptideOnly(ir); Assert.AreEqual(2, ir.Count); Assert.AreEqual(g2, ir[0]); Assert.AreEqual(g3, ir[1]); Assert.AreEqual(1, ir[0].GetPeptides().Count); Assert.AreEqual(spectrum3, ir[0].GetPeptides()[0]); Assert.AreEqual(1, ir[1].GetPeptides().Count); Assert.AreEqual(spectrum4, ir[1].GetPeptides()[0]); }
public IIdentifiedProtein ParseString(string line) { IIdentifiedProtein result = new IdentifiedProtein(); this.converter.SetProperty(result, line); return(result); }
private IIdentifiedProteinGroup ReadNextProteinGroup(StreamReader filein, Dictionary <string, IIdentifiedSpectrum> peptideMap, ref string lastLine) { while (!IsProteinLine(lastLine) && (lastLine = filein.ReadLine()) != null) { } if (lastLine == null) { return(null); } IIdentifiedProteinGroup result = new IdentifiedProteinGroup(); while (IsProteinLine(lastLine)) { IIdentifiedProtein protein = new IdentifiedProtein(); this.proteinConverter.SetProperty(protein, lastLine); result.Add(protein); lastLine = filein.ReadLine(); } var peptides = new List <IIdentifiedSpectrum>(); while (!IsProteinLine(lastLine)) { IIdentifiedSpectrum mphit = new IdentifiedSpectrum(); this.peptideConverter.SetProperty(mphit, lastLine); string id = mphit.Query.FileScan.LongFileName + "-" + mphit.Rank; if (!peptideMap.ContainsKey(id)) { peptideMap[id] = mphit; } else { mphit = peptideMap[id]; } peptides.Add(mphit); lastLine = filein.ReadLine(); if (lastLine == null || lastLine.Trim().Length == 0) { break; } } peptides.Sort(); result.AddIdentifiedSpectra(peptides); return(result); }
private IdentifiedProtein GetProtein(String proteinContent) { List <String> proteinInfo = GetProteinInfo(proteinContent); var result = new IdentifiedProtein(); result.Name = proteinInfo[0]; result.MolecularWeight = MyConvert.ToDouble(proteinInfo[1]); result.Description = proteinInfo[2]; return(result); }
public void TestReadWrite() { var ann = new IdentifiedProtein(); var pqr = new ProteinQuantificationResult(); pqr.Items["D1"] = new QuantificationItem() { Enabled = true, Ratio = 1.5, SampleIntensity = 150, ReferenceIntensity = 100, Correlation = 0.9, ScanCount = 55, Filename = "test1.silac" }; pqr.Items["D2"] = new QuantificationItem() { Enabled = false, Ratio = 3, SampleIntensity = 250, ReferenceIntensity = 200, Correlation = 0.8, ScanCount = 77, Filename = "test2.silac" }; ann.Annotations[SilacQuantificationConstants.SILAC_KEY] = pqr; var converter = new SilacProteinQuantificationResultConverter2 <IAnnotation>(); List <IPropertyConverter <IAnnotation> > converters = new List <IPropertyConverter <IAnnotation> >(); converters.Add(converter); converters.AddRange(converter.GetRelativeConverter(new IAnnotation[] { ann }.ToList())); CompositePropertyConverter <IAnnotation> finalConverter = new CompositePropertyConverter <IAnnotation>(converters, ','); Assert.AreEqual("S_COUNT,SE_D1,SR_D1,SRC_D1,SSI_D1,SRI_D1,SE_D2,SR_D2,SRC_D2,SSI_D2,SRI_D2", finalConverter.Name); var line1 = finalConverter.GetProperty(ann); Assert.AreEqual("2,True,1.5000,0.9000,150.0,100.0,False,3.0000,0.8000,250.0,200.0", line1); var protein2 = new IdentifiedProtein(); var finalC = IdentifiedProteinPropertyConverterFactory.GetInstance().GetConverters(finalConverter.Name, ','); finalC.SetProperty(protein2, line1); var line2 = finalConverter.GetProperty(protein2); Assert.AreEqual(line1, line2); }
public void TestDistinctPeptides() { IdentifiedProtein protein = new IdentifiedProtein(); IdentifiedSpectrum sp1 = new IdentifiedSpectrum(); IdentifiedSpectrum sp2 = new IdentifiedSpectrum(); protein.Peptides.Add(new IdentifiedPeptide(sp1)); protein.Peptides.Add(new IdentifiedPeptide(sp1)); protein.Peptides.Add(new IdentifiedPeptide(sp2)); Assert.AreEqual(3, protein.Peptides.Count); Assert.AreEqual(2, protein.GetSpectra().Count); Assert.AreEqual(2, protein.GetDistinctPeptides().Count()); }
public void TestReadWrite() { var ann = new IdentifiedProtein(); var pqr = new ProteinQuantificationResult(); pqr.Items["D1"] = new QuantificationItem() { Enabled = true, Ratio = 1.5, SampleIntensity = 150, ReferenceIntensity = 100, Correlation = 0.9, ScanCount = 55, Filename = "test1.silac" }; pqr.Items["D2"] = new QuantificationItem() { Enabled = false, Ratio = 3, SampleIntensity = 250, ReferenceIntensity = 200, Correlation = 0.8, ScanCount = 77, Filename = "test2.silac" }; ann.Annotations[SilacQuantificationConstants.SILAC_KEY] = pqr; var converter = new SilacProteinQuantificationResultConverter2<IAnnotation>(); List<IPropertyConverter<IAnnotation>> converters = new List<IPropertyConverter<IAnnotation>>(); converters.Add(converter); converters.AddRange(converter.GetRelativeConverter(new IAnnotation[] { ann }.ToList())); CompositePropertyConverter<IAnnotation> finalConverter = new CompositePropertyConverter<IAnnotation>(converters, ','); Assert.AreEqual("S_COUNT,SE_D1,SR_D1,SRC_D1,SSI_D1,SRI_D1,SE_D2,SR_D2,SRC_D2,SSI_D2,SRI_D2", finalConverter.Name); var line1 = finalConverter.GetProperty(ann); Assert.AreEqual("2,True,1.5000,0.9000,150.0,100.0,False,3.0000,0.8000,250.0,200.0", line1); var protein2 = new IdentifiedProtein(); var finalC = IdentifiedProteinPropertyConverterFactory.GetInstance().GetConverters(finalConverter.Name, ','); finalC.SetProperty(protein2, line1); var line2 = finalConverter.GetProperty(protein2); Assert.AreEqual(line1, line2); }
private static List <IIdentifiedProtein> InitProteins() { var mph1 = new IdentifiedSpectrum(); mph1.Query.FileScan.Experimental = "EXP1"; var mp1 = new IdentifiedPeptide(mph1); mp1.AddProtein("Protein1"); mp1.AddProtein("Protein2"); mp1.Sequence = "SEQ1"; var mph2 = new IdentifiedSpectrum(); mph2.Query.FileScan.Experimental = "EXP2"; var mp2 = new IdentifiedPeptide(mph2); mp2.AddProtein("Protein1"); mp2.AddProtein("Protein3"); mp2.Sequence = "SEQ2"; var mpro1 = new IdentifiedProtein("Protein1"); mpro1.Peptides.Add(mp1); mpro1.Peptides.Add(mp2); var mpro2 = new IdentifiedProtein("Protein2"); mpro2.Peptides.Add(mp1); var mpro3 = new IdentifiedProtein("Protein3"); mpro3.Peptides.Add(mp2); var result = new List <IIdentifiedProtein>(); result.Add(mpro3); result.Add(mpro2); result.Add(mpro1); return(result); }
protected List <IdentifiedProtein> ParseSameMatchProteins(String redundant) { var result = new List <IdentifiedProtein>(); Match tableMatch = GetTableRegex().Match(redundant); while (tableMatch.Success) { try { IdentifiedProtein mp = GetProtein(tableMatch.Groups[1].Value); result.Add(mp); } catch (ArgumentException) { } tableMatch = tableMatch.NextMatch(); } return(result); }
public void TestFilter2() { var spectrum = new IdentifiedSpectrum(); spectrum.Query.FileScan.LongFileName = "ABDCDD.12.123.2.dat"; var pro1 = new IdentifiedProtein("P1"); pro1.Peptides.Add(new IdentifiedPeptide(spectrum) { Sequence = "AAAAAAA" }); var pro2 = new IdentifiedProtein("P2"); pro2.Peptides.Add(new IdentifiedPeptide(spectrum) { Sequence = "BBBBBBB" }); var g1 = new IdentifiedProteinGroup(); g1.Add(pro1); g1.Add(pro2); IdentifiedResult ir = new IdentifiedResult(); ir.Add(g1); Assert.AreEqual(1, ir.Count); Assert.AreEqual(2, ir[0].Count); Assert.AreEqual(1, ir.GetSpectra().Count); ir.Filter(m => { return m.Sequence.Contains('A'); }); Assert.AreEqual(1, ir.Count); Assert.AreEqual(1, ir[0].Count); Assert.AreEqual(1, ir.GetSpectra().Count); Assert.AreSame(pro1, ir[0][0]); ir.Filter(m => { return m.Sequence.Contains('C'); }); Assert.AreEqual(0, ir.Count); }
public void TestGetAnnotationKeys() { string key1 = "TEST1"; string key2 = "TEST2"; var mph1 = new IdentifiedSpectrum(); mph1.Annotations.Add(key1, null); mph1.Query.FileScan.Experimental = "EXP1"; new IdentifiedPeptide(mph1); var mph2 = new IdentifiedSpectrum(); mph2.Annotations.Add(key2, null); mph2.Query.FileScan.Experimental = "EXP2"; new IdentifiedPeptide(mph2); mph1.Peptide.Sequence = "SEQ1"; mph2.Peptide.Sequence = "SEQ2"; var protein = new IdentifiedProtein(); protein.Peptides.Add(mph1.Peptide); protein.Peptides.Add(mph2.Peptide); var mpg = new IdentifiedProteinGroup(); mpg.Add(protein); var mr = new MascotResult(); mr.Add(mpg); List <string> annotationKeys = AnnotationUtils.GetAnnotationKeys(mr.GetSpectra()); Assert.AreEqual(2, annotationKeys.Count); Assert.IsTrue(annotationKeys.Contains(key1)); Assert.IsTrue(annotationKeys.Contains(key2)); }
public void TestCalculateCoverage() { IdentifiedProtein protein = new IdentifiedProtein(); //total 30 amino acids protein.Sequence = "ABCDEDFDEFDSESLKJFDJLSLGFGDDGD"; IdentifiedSpectrum s1 = new IdentifiedSpectrum(); IdentifiedPeptide p1 = new IdentifiedPeptide(s1); p1.Sequence = "B.CDEDF.D"; protein.Peptides.Add(p1); protein.CalculateCoverage(); Assert.AreEqual(16.67, protein.Coverage, 0.01); IdentifiedSpectrum s2 = new IdentifiedSpectrum(); IdentifiedPeptide p2 = new IdentifiedPeptide(s2); p2.Sequence = "F.DSESL.K"; protein.Peptides.Add(p2); protein.CalculateCoverage(); Assert.AreEqual(33.33, protein.Coverage, 0.01); IdentifiedSpectrum s3 = new IdentifiedSpectrum(); IdentifiedPeptide p3 = new IdentifiedPeptide(s3); p3.Sequence = "L.SLGF.G"; protein.Peptides.Add(p3); protein.CalculateCoverage(); Assert.AreEqual(46.67, protein.Coverage, 0.01); IdentifiedSpectrum s4 = new IdentifiedSpectrum(); IdentifiedPeptide p4 = new IdentifiedPeptide(s4); p4.Sequence = "L.SLGFG.D"; protein.Peptides.Add(p4); protein.CalculateCoverage(); Assert.AreEqual(50.00, protein.Coverage, 0.01); }
public Dictionary <int, IIdentifiedProtein> ParseProteinMap(string fileName, bool isDecoy) { var suffix = isDecoy ? "_decoy" : ""; SQLiteDBHelper sqlite = new SQLiteDBHelper(fileName); var result = new Dictionary <int, IIdentifiedProtein>(); string sqlProtein = string.Format("select ps.ProteinID, pa.Description, pro.Sequence, ps.ProteinScore, ps.Coverage from ProteinAnnotations as pa, Proteins as pro, ProteinScores{0} as ps where pro.ProteinID=pa.ProteinID and pro.ProteinID=ps.ProteinID", suffix); var proteinReader = sqlite.ExecuteReader(sqlProtein, null); Progress.SetMessage("Parsing proteins ..."); while (proteinReader.Read()) { var protein = new IdentifiedProtein(); var proid = proteinReader.GetInt32(0); var des = proteinReader.GetString(1); if (des.Length > 0 && des[0] == '>') { des = des.Substring(1); } protein.Reference = des; protein.Sequence = proteinReader.GetString(2); protein.Score = proteinReader.GetDouble(3); protein.Coverage = proteinReader.GetDouble(4); result[proid] = protein; } if (isDecoy) { foreach (var v in result.Values) { v.Sequence = SequenceUtils.GetReversedSequence(v.Sequence); v.Reference = GetReversedReference(v.Reference); } } return(result); }
public void TestDtaselect() { string header = "Locus Sequence Count Spectrum Count Sequence Coverage Length MolWt pI Validation Status Descriptive Name"; IPropertyConverter<IIdentifiedProtein> converter = IdentifiedProteinPropertyConverterFactory.GetInstance().GetConverters(header, '\t'); Assert.AreEqual(header, converter.Name); string line = "YDR050C 495 495 81.10% 249 26795.41 5.74 U YDR050C TPI1 SGDID:S000002457, Chr IV from 556470-555724, reverse complement, Verified ORF, \"Triose phosphate isomerase, abundant glycolytic enzyme; mRNA half-life is regulated by iron availability; transcription is controlled by activators Reb1p, Gcr1p, and Rap1p through binding sites in the 5' non-coding region\""; IIdentifiedProtein protein = new IdentifiedProtein(); converter.SetProperty(protein, line); Assert.AreEqual("YDR050C", protein.Name); Assert.AreEqual("TPI1 SGDID:S000002457, Chr IV from 556470-555724, reverse complement, Verified ORF, \"Triose phosphate isomerase, abundant glycolytic enzyme; mRNA half-life is regulated by iron availability; transcription is controlled by activators Reb1p, Gcr1p, and Rap1p through binding sites in the 5' non-coding region\"", protein.Description); Assert.AreEqual(495, protein.UniquePeptideCount); Assert.AreEqual(81.1, protein.Coverage); Assert.AreEqual(26795.41, protein.MolecularWeight); Assert.AreEqual(5.74, protein.IsoelectricPoint); for (int i = 0; i < 495; i++) { protein.Peptides.Add(new IdentifiedPeptide( new IdentifiedSpectrum())); } Assert.AreEqual(line, converter.GetProperty(protein)); }
public void TestUniqueCount() { var protein = new IdentifiedProtein(); var spectrum = new IdentifiedSpectrum(); var peptides = new List<IIdentifiedPeptide>(); peptides.Add(new IdentifiedPeptide(spectrum){ Sequence = "ILLLAR" }); peptides.Add(new IdentifiedPeptide(spectrum){ Sequence = "LILIAR" }); Assert.AreEqual(1, IdentifiedPeptideUtils.GetUniquePeptideCount(peptides)); peptides.Add(new IdentifiedPeptide(new IdentifiedSpectrum()) { Sequence = "LIIIAR" }); Assert.AreEqual(1, IdentifiedPeptideUtils.GetUniquePeptideCount(peptides)); }
public List <Sequence> ParseProteinSequences(string fileName) { SQLiteDBHelper sqlite = new SQLiteDBHelper(fileName); var result = new List <Sequence>(); string sqlProtein = "select pa.Description, pro.Sequence from ProteinAnnotations as pa, Proteins as pro where pro.ProteinID=pa.ProteinID"; var proteinReader = sqlite.ExecuteReader(sqlProtein, null); Progress.SetMessage("Parsing proteins ..."); while (proteinReader.Read()) { var protein = new IdentifiedProtein(); var des = proteinReader.GetString(0); if (des.Length > 0 && des[0] == '>') { des = des.Substring(1); } result.Add(new Sequence(des, proteinReader.GetString(1))); } return(result); }
public void TestBuild() { var pep1 = new IdentifiedPeptide(new IdentifiedSpectrum(new SequestFilename("A", 1, 1, 1, ".dta"))) { Sequence = "A" }; var pep2 = new IdentifiedPeptide(new IdentifiedSpectrum(new SequestFilename("B", 1, 1, 1, ".dta"))) { Sequence = "B" }; var pep3 = new IdentifiedPeptide(new IdentifiedSpectrum(new SequestFilename("C", 1, 1, 1, ".dta"))) { Sequence = "C" }; var pep4 = new IdentifiedPeptide(new IdentifiedSpectrum(new SequestFilename("D", 1, 1, 1, ".dta"))) { Sequence = "D" }; var pep5 = new IdentifiedPeptide(new IdentifiedSpectrum(new SequestFilename("E", 1, 1, 1, ".dta"))) { Sequence = "E" }; var pep6 = new IdentifiedPeptide(new IdentifiedSpectrum(new SequestFilename("F", 1, 1, 1, ".dta"))) { Sequence = "F" }; var protein1 = new IdentifiedProtein() { Peptides = new IIdentifiedPeptide[] { pep1, pep3, pep5, pep6 }.ToList() }; var protein2 = new IdentifiedProtein() { Peptides = new IIdentifiedPeptide[] { pep2, pep3, pep4 }.ToList() }; //should be removed from final result since all peptides has been included in protein1 and protein2, even one protein contains both peptides var protein3 = new IdentifiedProtein() { Peptides = new IIdentifiedPeptide[] { pep1, pep2 }.ToList() }; //should be removed from final result since all peptides has been included in protein1 var protein4 = new IdentifiedProtein() { Peptides = new IIdentifiedPeptide[] { pep1, pep5 }.ToList() }; var actual = new IdentifiedProteinGroupBuilder().Build(new IIdentifiedProtein[] { protein1, protein2, protein3 }.ToList()); Assert.AreEqual(2, actual.Count); Assert.AreSame(protein1, actual[0][0]); Assert.AreSame(protein2, actual[1][0]); }
public void TestNoredundant() { string header = " Reference PepCount UniquePepCount CoverPercent MW PI IdentifiedName"; IPropertyConverter<IIdentifiedProtein> converter = IdentifiedProteinPropertyConverterFactory.GetInstance().GetConverters(header, '\t'); Assert.AreEqual(header, converter.Name); string line = "\tIPI:IPI00784154.1|SWISS-PROT:P10809|TREMBL:B2R5M6;Q53QD5;Q53SE2;Q96RI4;Q9UCR6|ENSEMBL:ENSP00000340019;ENSP00000373620|REFSEQ:NP_002147;NP_955472|H-INV:HIT000031088 Tax_Id=9606 Gene_Symbol=HSPD1 60 kDa heat shock protein, mitochondrial 84 19 43.46% 61054.43 5.70 IPI:IPI00784154.1|SWISS-PROT:P10809|TREMBL:B2R5M6;Q53QD5;Q53SE2;Q96RI4;Q9UCR6|ENSEMBL:ENSP00000340019;ENSP00000373620|REFSEQ:NP_002147;NP_955472|H-INV:HIT000031088 Tax_Id=9606 Gene_Symbol=HSPD1 60 kDa heat shock protein, mitochondrial"; IIdentifiedProtein protein = new IdentifiedProtein(); converter.SetProperty(protein, line); Assert.AreEqual("IPI:IPI00784154.1|SWISS-PROT:P10809|TREMBL:B2R5M6;Q53QD5;Q53SE2;Q96RI4;Q9UCR6|ENSEMBL:ENSP00000340019;ENSP00000373620|REFSEQ:NP_002147;NP_955472|H-INV:HIT000031088", protein.Name); Assert.AreEqual("Tax_Id=9606 Gene_Symbol=HSPD1 60 kDa heat shock protein, mitochondrial", protein.Description); Assert.AreEqual(19, protein.UniquePeptideCount); Assert.AreEqual(43.46, protein.Coverage); Assert.AreEqual(61054.43, protein.MolecularWeight); Assert.AreEqual(5.7, protein.IsoelectricPoint); for (int i = 0; i < 84; i++) { protein.Peptides.Add(new IdentifiedPeptide( new IdentifiedSpectrum())); } Assert.AreEqual(line, converter.GetProperty(protein)); }
public bool IsProteinLine(string line) { if (null == line) { return(false); } if (line.StartsWith("\t")) { return(false); } try { var mp = new IdentifiedProtein(); this.proteinConverter.SetProperty(mp, line); } catch (Exception) { return(false); } return(true); }
public void TestReadWrite() { var ann = new IdentifiedProtein(); var pqr = new ITraqQuantificationResult(); pqr.DatasetMap["D1"] = new ITraqQuantificationDatasetItem() { DatasetName = "D1" }; pqr.DatasetMap["D1"].RatioMap["R114/REF"] = new ITraqQuantificationChannelItem() { ChannelName = "R114/REF", Ratio = 1.5 }; pqr.DatasetMap["D1"].RatioMap["R115/REF"] = new ITraqQuantificationChannelItem() { ChannelName = "R115/REF", Ratio = 1.8 }; pqr.DatasetMap["D2"] = new ITraqQuantificationDatasetItem() { DatasetName = "D2" }; pqr.DatasetMap["D2"].RatioMap["R116/REF"] = new ITraqQuantificationChannelItem() { ChannelName = "R116/REF", Ratio = 2.5 }; pqr.DatasetMap["D2"].RatioMap["R117/REF"] = new ITraqQuantificationChannelItem() { ChannelName = "R117/REF", Ratio = 3.8 }; ann.Annotations[ITraqConsts.ITRAQ_KEY] = pqr; var converter = new ITraqQuantificationResultConverter <IAnnotation>(); List <IPropertyConverter <IAnnotation> > converters = new List <IPropertyConverter <IAnnotation> >(); converters.Add(converter); converters.AddRange(converter.GetRelativeConverter(new IAnnotation[] { ann }.ToList())); CompositePropertyConverter <IAnnotation> finalConverter = new CompositePropertyConverter <IAnnotation>(converters, ','); if (exportToConsole) { Console.WriteLine(finalConverter.Name); } Assert.AreEqual("ITRAQ_COUNT,IR_D1_R114/REF,IR_D1_R115/REF,IR_D2_R116/REF,IR_D2_R117/REF", finalConverter.Name); var line1 = finalConverter.GetProperty(ann); if (exportToConsole) { Console.WriteLine(line1); } Assert.AreEqual("2,1.5000,1.8000,2.5000,3.8000", line1); var protein2 = new IdentifiedProtein(); var finalC = IdentifiedProteinPropertyConverterFactory.GetInstance().GetConverters(finalConverter.Name, ','); finalC.SetProperty(protein2, line1); var line2 = finalConverter.GetProperty(protein2); Assert.AreEqual(line1, line2); }
public MascotResult ParseContent(String fileContent) { var result = new MascotResult(); this.modifications = new Dictionary <string, char>(); Pair <int, double> pValueScore = ParsePValueScore(fileContent); result.PValueScore = pValueScore.First; result.PValue = pValueScore.Second; var offsets = new List <Offset>(); try { result.PeakIsotopicType = ParsePeakIsotopicType(fileContent); } catch (ArgumentException) { } try { result.PeakTolerance = ParsePeakTolerance(fileContent); } catch (ArgumentException) { } var filters = new List <IFilter <IIdentifiedSpectrum> >(); if (this.filterByDefaultScoreAndPvalue) { filters.Add(new IdentifiedSpectrumScoreFilter(pValueScore.First)); filters.Add(new IdentifiedSpectrumExpectValueFilter(pValueScore.Second)); } filters.Add(new IdentifiedSpectrumRankFilter(1)); if (null != this.defaultPeptideFilter) { filters.Add(this.defaultPeptideFilter); } this.currentPeptideFilter = new AndFilter <IIdentifiedSpectrum>(filters); Match proteinMatch = GetProteinRegex().Match(fileContent); while (proteinMatch.Success) { IdentifiedProtein protein = ParseProtein(proteinMatch.Groups[1].Value); var group = new IdentifiedProteinGroup(); group.Add(protein); result.Add(group); offsets.Add(new Offset(proteinMatch.Index, proteinMatch.Index + proteinMatch.Length, group)); proteinMatch = proteinMatch.NextMatch(); } int endIndex = fileContent.IndexOf("Peptide matches not assigned to protein hits"); if (-1 == endIndex) { endIndex = fileContent.Length - 1; } for (int i = 0; i < offsets.Count; i++) { int start = offsets[i].End; int end = i == offsets.Count - 1 ? endIndex : offsets[i + 1].Start; String redundant = fileContent.Substring(start, end - start + 1); if (!redundant.Contains("Proteins matching the same set")) { continue; } List <IdentifiedProtein> sameMatchProteins = ParseSameMatchProteins(redundant); foreach (IdentifiedProtein mp in sameMatchProteins) { mp.Peptides.AddRange(offsets[i].Mpg[0].Peptides); offsets[i].Mpg.Add(mp); } } for (int i = result.Count - 1; i >= 0; i--) { if (0 == result[i][0].Peptides.Count) { result.RemoveAt(i); } } RefineModification(result); MergePeptides(result); result.InitUniquePeptideCount(); return(result); }
public void TestGetAnnotationKeys() { string key1 = "TEST1"; string key2 = "TEST2"; var mph1 = new IdentifiedSpectrum(); mph1.Annotations.Add(key1, null); mph1.Query.FileScan.Experimental = "EXP1"; new IdentifiedPeptide(mph1); var mph2 = new IdentifiedSpectrum(); mph2.Annotations.Add(key2, null); mph2.Query.FileScan.Experimental = "EXP2"; new IdentifiedPeptide(mph2); mph1.Peptide.Sequence = "SEQ1"; mph2.Peptide.Sequence = "SEQ2"; var protein = new IdentifiedProtein(); protein.Peptides.Add(mph1.Peptide); protein.Peptides.Add(mph2.Peptide); var mpg = new IdentifiedProteinGroup(); mpg.Add(protein); var mr = new MascotResult(); mr.Add(mpg); List<string> annotationKeys = AnnotationUtils.GetAnnotationKeys(mr.GetSpectra()); Assert.AreEqual(2, annotationKeys.Count); Assert.IsTrue(annotationKeys.Contains(key1)); Assert.IsTrue(annotationKeys.Contains(key2)); }
public override List <IIdentifiedProtein> ParseProteins(string fileName) { Dictionary <string, IIdentifiedProtein> proteinMap = new Dictionary <string, IIdentifiedProtein>(); using (StreamReader sr = new StreamReader(fileName)) { string line = sr.ReadLine(); string[] headerParts = line.Split('\t'); int seqIndex = Array.FindIndex(headerParts, (m => m == "Sequence")); int proIndex = Array.FindIndex(headerParts, (m => m == "Protein Accessions")); int modIndex = Array.FindIndex(headerParts, (m => m == "Modifications")); int xcIndex = Array.FindIndex(headerParts, (m => m == "XCorr")); int deltaIndex = Array.FindIndex(headerParts, (m => m.EndsWith(" Score"))); int chargeIndex = Array.FindIndex(headerParts, (m => m == "Charge")); int obsIndex = Array.FindIndex(headerParts, (m => m == "m/z [Da]")); int mhIndex = Array.FindIndex(headerParts, (m => m == "MH+ [Da]")); int fscanIndex = Array.FindIndex(headerParts, (m => m == "First Scan")); int lscanIndex = Array.FindIndex(headerParts, (m => m == "Last Scan")); int ionIndex = Array.FindIndex(headerParts, (m => m == "Ions Matched")); int fileIndex = Array.FindIndex(headerParts, (m => m == "Spectrum File")); Progress.SetRange(0, sr.BaseStream.Length); Progress.SetMessage("Parsing file ..."); while ((line = sr.ReadLine()) != null) { if (line.Trim().Length == 0) { break; } string[] parts = line.Split('\t'); if (parts[0].Length == 0) { continue; } Progress.SetPosition(sr.BaseStream.Position); string seq = parts[seqIndex]; string deltaCn = parts[deltaIndex]; if (deltaCn.Length == 0)//rank > 1 { continue; } string protein = parts[proIndex]; if (!proteinMap.ContainsKey(protein)) { sr.ReadLine(); string proLine = sr.ReadLine(); string[] proParts = proLine.Split('\t'); var p = new IdentifiedProtein(protein); p.Coverage = MyConvert.ToDouble(proParts[2]); p.MolecularWeight = MyConvert.ToDouble(proParts[5]) * 1000; p.IsoelectricPoint = MyConvert.ToDouble(proParts[6]); p.Score = MyConvert.ToDouble(proParts[7]); p.Description = proParts[8]; proteinMap[protein] = p; } var pro = proteinMap[protein]; IdentifiedSpectrum spectrum = new IdentifiedSpectrum(); IdentifiedPeptide peptide = new IdentifiedPeptide(spectrum); peptide.Sequence = seq.ToUpper(); peptide.AddProtein(protein); spectrum.Modifications = parts[modIndex]; spectrum.DeltaScore = MyConvert.ToDouble(deltaCn); spectrum.Charge = Convert.ToInt32(parts[chargeIndex]); spectrum.ObservedMz = MyConvert.ToDouble(parts[obsIndex]); spectrum.TheoreticalMH = MyConvert.ToDouble(parts[mhIndex]); spectrum.Ions = parts[ionIndex]; spectrum.Query.FileScan.FirstScan = Convert.ToInt32(parts[fscanIndex]); spectrum.Query.FileScan.LastScan = Convert.ToInt32(parts[lscanIndex]); spectrum.Query.FileScan.Experimental = FileUtils.RemoveAllExtension(parts[fileIndex]); pro.Peptides.Add(peptide); } } var proteins = proteinMap.Values.ToList(); return(proteins); }
public override List <IIdentifiedProtein> ParseProteins(string fileName) { Dictionary <string, IIdentifiedProtein> proteinMap = new Dictionary <string, IIdentifiedProtein>(); Application xApp = new Application(); //得到WorkBook对象, 可以用两种方式之一: 下面的是打开已有的文件 Workbook xBook = xApp.Workbooks._Open(fileName, Missing.Value, Missing.Value, Missing.Value, Missing.Value , Missing.Value, Missing.Value, Missing.Value, Missing.Value , Missing.Value, Missing.Value, Missing.Value, Missing.Value); try { Worksheet xSheet = (Worksheet)xBook.Sheets[1]; int fromRow = 2; int endRow = fromRow; for (; endRow <= xSheet.Rows.Count; endRow++) { string b = xSheet.Value('B', endRow); if (null == b) { break; } } endRow--; Progress.SetRange(fromRow, endRow); Progress.SetMessage("Parsing file ..."); for (int i = fromRow; i <= endRow; i++) { Progress.SetPosition(i); string seq = xSheet.Value('A', i); if (null == seq)//蛋白质信息 { continue; } string deltaCn = xSheet.Value('I', i); if (null == deltaCn)//rank > 1 { continue; } string protein = xSheet.Value('B', i); if (!proteinMap.ContainsKey(protein)) { var p = new IdentifiedProtein(protein); p.Coverage = MyConvert.ToDouble(xSheet.Value('C', i + 2)); p.MolecularWeight = MyConvert.ToDouble(xSheet.Value('F', i + 2)) * 1000; p.IsoelectricPoint = MyConvert.ToDouble(xSheet.Value('G', i + 2)); p.Score = MyConvert.ToDouble(xSheet.Value('H', i + 2)); p.Description = xSheet.Value('I', i + 2); proteinMap[protein] = p; } var pro = proteinMap[protein]; IdentifiedSpectrum spectrum = new IdentifiedSpectrum(); IdentifiedPeptide peptide = new IdentifiedPeptide(spectrum); peptide.Sequence = seq.ToUpper(); peptide.AddProtein(protein); spectrum.Modifications = xSheet.Value('F', i); spectrum.DeltaScore = MyConvert.ToDouble(deltaCn); spectrum.Charge = Convert.ToInt32(xSheet.Value('K', i)); spectrum.ObservedMz = MyConvert.ToDouble(xSheet.Value('L', i)); spectrum.TheoreticalMH = MyConvert.ToDouble(xSheet.Value('M', i)); spectrum.Ions = xSheet.Value('S', i); spectrum.Query.FileScan.FirstScan = Convert.ToInt32(xSheet.Value('P', i)); spectrum.Query.FileScan.LastScan = Convert.ToInt32(xSheet.Value('Q', i)); spectrum.Query.FileScan.Experimental = FileUtils.RemoveAllExtension(xSheet.Value('T', i)); pro.Peptides.Add(peptide); } } finally { xBook.Close(false, Type.Missing, Type.Missing); } var proteins = proteinMap.Values.ToList(); return(proteins); }
protected IdentifiedProtein ParseProtein(String proteinContent) { IdentifiedProtein result = GetProtein(proteinContent); List <String> peptideInfoContentList = GetPeptideInfoContentList(proteinContent); foreach (String peptideInfoContent in peptideInfoContentList) { List <String> peptideInfo = GetPeptideInfo(peptideInfoContent); if (0 == peptideInfo.Count) { continue; } IIdentifiedSpectrum mphit = new IdentifiedSpectrum(); // Group 0 : peptide mass from observed m/z double experimentalPeptideMass = MyConvert.ToDouble(peptideInfo[0]); mphit.ExperimentalMass = experimentalPeptideMass; // Group 1 : observed m/z double observed = MyConvert.ToDouble(peptideInfo[1]); mphit.Query.ObservedMz = observed; // Group 2 : charge int charge = int.Parse(peptideInfo[2]); mphit.Query.Charge = charge; // Group 3 : title String title = Uri.UnescapeDataString(peptideInfo[3]).Trim(); mphit.Query.Title = title; SequestFilename sf = MascotUtils.ParseTitle(title, charge); if (sf != null) { mphit.Query.FileScan.LongFileName = sf.LongFileName; } // Group 4 : query mphit.Query.QueryId = int.Parse(peptideInfo[4]); // Group 5 equals Group 1 // Group 6 equals Group 0 // Group 7 : calculated peptide mass mphit.TheoreticalMass = MyConvert.ToDouble(peptideInfo[7]); // Group 8 : different between observed peptide mass and calculated // peptide mass // Group 9 : miss cleavage mphit.NumMissedCleavages = int.Parse(peptideInfo[9]); // Group 10: score mphit.Score = int.Parse(peptideInfo[10]); // Group 11: expect p value mphit.ExpectValue = MyConvert.ToDouble(peptideInfo[11]); // Group 12: rank mphit.Rank = int.Parse(peptideInfo[12]); // Group 13: peptide sequence // K.YEINVLR<u>.</u>N + Label:18O(2) (C-term) String seq = peptideInfo[13].Replace(" ", ""); var mpep = new IdentifiedPeptide(mphit); string[] parts = Regex.Split(seq, "\\+"); if (parts.Length > 1) { seq = parts[0].Trim(); mphit.Modifications = parts[1].Trim(); string[] mods = parts[1].Trim().Split(new[] { ';' }, StringSplitOptions.RemoveEmptyEntries); foreach (string mod in mods) { Match m = this.modificationReg.Match(mod.Trim()); if (!this.modifications.ContainsKey(m.Groups[1].Value)) { this.modifications[m.Groups[1].Value] = ' '; } } } mpep.Sequence = seq; if (GetPeptideFilter().Accept(mphit)) { mpep.AddProtein(result.Name); result.Peptides.Add(mpep); } } return(result); }
public void TestReadWrite() { var ann = new IdentifiedProtein(); var pqr = new ITraqQuantificationResult(); pqr.DatasetMap["D1"] = new ITraqQuantificationDatasetItem() { DatasetName = "D1" }; pqr.DatasetMap["D1"].RatioMap["R114/REF"] = new ITraqQuantificationChannelItem() { ChannelName = "R114/REF", Ratio = 1.5 }; pqr.DatasetMap["D1"].RatioMap["R115/REF"] = new ITraqQuantificationChannelItem() { ChannelName = "R115/REF", Ratio = 1.8 }; pqr.DatasetMap["D2"] = new ITraqQuantificationDatasetItem() { DatasetName = "D2" }; pqr.DatasetMap["D2"].RatioMap["R116/REF"] = new ITraqQuantificationChannelItem() { ChannelName = "R116/REF", Ratio = 2.5 }; pqr.DatasetMap["D2"].RatioMap["R117/REF"] = new ITraqQuantificationChannelItem() { ChannelName = "R117/REF", Ratio = 3.8 }; ann.Annotations[ITraqConsts.ITRAQ_KEY] = pqr; var converter = new ITraqQuantificationResultConverter<IAnnotation>(); List<IPropertyConverter<IAnnotation>> converters = new List<IPropertyConverter<IAnnotation>>(); converters.Add(converter); converters.AddRange(converter.GetRelativeConverter(new IAnnotation[] { ann }.ToList())); CompositePropertyConverter<IAnnotation> finalConverter = new CompositePropertyConverter<IAnnotation>(converters, ','); if (exportToConsole) { Console.WriteLine(finalConverter.Name); } Assert.AreEqual("ITRAQ_COUNT,IR_D1_R114/REF,IR_D1_R115/REF,IR_D2_R116/REF,IR_D2_R117/REF", finalConverter.Name); var line1 = finalConverter.GetProperty(ann); if (exportToConsole) { Console.WriteLine(line1); } Assert.AreEqual("2,1.5000,1.8000,2.5000,3.8000", line1); var protein2 = new IdentifiedProtein(); var finalC = IdentifiedProteinPropertyConverterFactory.GetInstance().GetConverters(finalConverter.Name, ','); finalC.SetProperty(protein2, line1); var line2 = finalConverter.GetProperty(protein2); Assert.AreEqual(line1, line2); }