public override void SetProperty(T t, string value) { string[] proteins = reg.Split(value); if (t.Peptides.Count != proteins.Length) { t.ClearPeptides(); for (int i = 0; i < proteins.Length; i++) { IIdentifiedPeptide mp = new IdentifiedPeptide(t); string[] parts = proteins[i].Split(chars); foreach (string part in parts) { mp.AddProtein(part); } } } else { for (int i = 0; i < proteins.Length; i++) { string[] parts = proteins[i].Split(chars); t.Peptides[i].ClearProteins(); foreach (string part in parts) { t.Peptides[i].AddProtein(part); } } } }
private void ParseSearchHit(IIdentifiedSpectrum sph, XElement searchHit, PepXmlModifications ppmods) { var sp = new IdentifiedPeptide(sph); var mod_info = searchHit.FindFirstDescendant("modification_info"); string seq = searchHit.Attribute("peptide").Value; if (mod_info != null) { var modified_peptide = mod_info.Attribute("modified_peptide"); if (modified_peptide != null && !modReg.Match(modified_peptide.Value).Success) { seq = modified_peptide.Value; } else { var pureSeq = seq; var modaas = PeptideProphetUtils.ParseModificationAminoacidMass(mod_info); if (modaas != null && modaas.Count > 0) { modaas.Reverse(); foreach (var modaa in modaas) { string modchar = FindModificationChar(ppmods, modaa, pureSeq); seq = seq.Insert(modaa.Position, modchar); } } } } if (searchHit.Attribute("peptide_prev_aa") != null) { sp.Sequence = searchHit.Attribute("peptide_prev_aa").Value + "." + seq + "." + searchHit.Attribute("peptide_next_aa").Value; } else { sp.Sequence = seq; } sph.NumMissedCleavages = GetAttributeValue(searchHit, "num_missed_cleavages", 0); sph.NumProteaseTermini = GetAttributeValue(searchHit, "num_tol_term", 2); sp.AddProtein(searchHit.Attribute("protein").Value); var NumTotalProteins = int.Parse(searchHit.Attribute("num_tot_proteins").Value); if (NumTotalProteins > 1) { var alternative_proteins = searchHit.FindDescendants("alternative_protein"); foreach (var alternative_protein in alternative_proteins) { sp.AddProtein(alternative_protein.Attribute("protein").Value); } } ParseScoreAndOtherInformation(sph, searchHit); }
protected void ModifySequence(IdentifiedPeptide mp, Dictionary <int, string> modifications, PFindModification mm) { var positions = (from m in modifications where !mm.StaticModification.ModificationMap.ContainsValue(m.Value) orderby m.Key descending select m.Key).ToList(); foreach (var pos in positions) { string mod = modifications[pos]; if (this.ModificationCharMap.ContainsKey(mod)) { if (pos == mp.Sequence.Length) { mp.Sequence = mp.Sequence + this.ModificationCharMap[mod].ToString(); } else { mp.Sequence = mp.Sequence.Insert(pos + 1, this.ModificationCharMap[mod].ToString()); } } else { throw new Exception(MyConvert.Format("Cannot find dynamic modification {0} definition", mod)); } } }
public List <IIdentifiedSpectrum> ReadFromFile(string fileName) { var result = new List <IIdentifiedSpectrum>(); XElement root = XElement.Load(fileName); var psms = root.FindElement("psms").FindElements("psm"); foreach (var psm in psms) { IIdentifiedSpectrum spec = new IdentifiedSpectrum(); spec.Id = psm.FindAttribute("psm_id").Value.StringAfter("decoy_"); spec.FromDecoy = psm.FindAttribute("decoy").Value.Equals("true"); spec.SpScore = double.Parse(psm.FindElement("svm_score").Value); spec.QValue = double.Parse(psm.FindElement("q_value").Value); spec.Score = double.Parse(psm.FindElement("pep").Value); spec.Probability = double.Parse(psm.FindElement("p_value").Value); spec.TheoreticalMH = double.Parse(psm.FindElement("calc_mass").Value); spec.Query.FileScan.Experimental = Path.GetFileName(fileName).StringBefore("."); var pep = new IdentifiedPeptide(spec); var pepseq = psm.FindElement("peptide_seq"); pep.Sequence = pepseq.FindAttribute("seq").Value; pep.AddProtein(psm.FindElement("protein_id").Value); result.Add(spec); } return(result); }
public override void SetProperty(T t, string value) { if (string.IsNullOrEmpty(value)) { return; } string[] siteProbs = reg.Split(value); if (t.Peptides.Count != siteProbs.Length) { t.ClearPeptides(); for (int i = 0; i < siteProbs.Length; i++) { IIdentifiedPeptide mp = new IdentifiedPeptide(t); mp.SiteProbability = siteProbs[i]; } } else { for (int i = 0; i < siteProbs.Length; i++) { t.Peptides[i].SiteProbability = siteProbs[i]; } } }
public void SetUp() { seqs = new Sequence[] { new Sequence("Test1", "ADFADJLFASRDLFKJWONNCKAOIWJEFLNC"), new Sequence("Test2", "WEUOIRJKJNCJKLSDTORWELSDJF") }.ToList(); identical = new IdentifiedPeptide(new IdentifiedSpectrum()) { Sequence = "JWONNCK" }; singlemutation = new IdentifiedPeptide(new IdentifiedSpectrum()) { Sequence = "LSDAOR" }; misscleavageIdentical = new IdentifiedPeptide(new IdentifiedSpectrum()) { Sequence = "LFKJWONNCK" }; misscleavageSingleMutation = new IdentifiedPeptide(new IdentifiedSpectrum()) { Sequence = "JKLSDAOR" }; nptIdentical = new IdentifiedPeptide(new IdentifiedSpectrum()) { Sequence = "JWONNC" }; nptSingleMutation = new IdentifiedPeptide(new IdentifiedSpectrum()) { Sequence = "LSDAO" }; //validator = new SnpDataValidator(null, null, null, new Protease("Trypsin", true, "KR", "P")); }
private static List<IIdentifiedProtein> InitProteins() { var mph1 = new IdentifiedSpectrum(); mph1.Query.FileScan.Experimental = "EXP1"; var mp1 = new IdentifiedPeptide(mph1); mp1.AddProtein("Protein1"); mp1.AddProtein("Protein2"); mp1.Sequence = "SEQ1"; var mph2 = new IdentifiedSpectrum(); mph2.Query.FileScan.Experimental = "EXP2"; var mp2 = new IdentifiedPeptide(mph2); mp2.AddProtein("Protein1"); mp2.AddProtein("Protein3"); mp2.Sequence = "SEQ2"; var mpro1 = new IdentifiedProtein("Protein1"); mpro1.Peptides.Add(mp1); mpro1.Peptides.Add(mp2); var mpro2 = new IdentifiedProtein("Protein2"); mpro2.Peptides.Add(mp1); var mpro3 = new IdentifiedProtein("Protein3"); mpro3.Peptides.Add(mp2); var result = new List<IIdentifiedProtein>(); result.Add(mpro3); result.Add(mpro2); result.Add(mpro1); return result; }
public void TestSort() { var mph1 = new IdentifiedSpectrum(); var peptide1 = new IdentifiedPeptide(mph1); var mph2 = new IdentifiedSpectrum(); var peptide2 = new IdentifiedPeptide(mph2); var mpro1 = new IdentifiedProtein("Protein1"); mpro1.Peptides.Add(peptide1); mpro1.Peptides.Add(peptide2); var mpro2 = new IdentifiedProtein("Protein2"); mpro2.Peptides.Add(peptide1); var mpro3 = new IdentifiedProtein("Protein3"); mpro3.Peptides.Add(peptide2); var mpros = new List <IdentifiedProtein>(); mpros.Add(mpro3); mpros.Add(mpro2); mpros.Add(mpro1); mpros.Sort(); Assert.AreEqual(mpro1, mpros[0]); Assert.AreEqual(mpro2, mpros[1]); Assert.AreEqual(mpro3, mpros[2]); }
public void TestSort() { var mph1 = new IdentifiedSpectrum(); var peptide1 = new IdentifiedPeptide(mph1); var mph2 = new IdentifiedSpectrum(); var peptide2 = new IdentifiedPeptide(mph2); var mpro1 = new IdentifiedProtein("Protein1"); mpro1.Peptides.Add(peptide1); mpro1.Peptides.Add(peptide2); var mpro2 = new IdentifiedProtein("Protein2"); mpro2.Peptides.Add(peptide1); var mpro3 = new IdentifiedProtein("Protein3"); mpro3.Peptides.Add(peptide2); var mpros = new List<IdentifiedProtein>(); mpros.Add(mpro3); mpros.Add(mpro2); mpros.Add(mpro1); mpros.Sort(); Assert.AreEqual(mpro1, mpros[0]); Assert.AreEqual(mpro2, mpros[1]); Assert.AreEqual(mpro3, mpros[2]); }
public void Test() { IPropertyConverter <IdentifiedSpectrum> io = new IdentifiedSpectrumReferenceConverter <IdentifiedSpectrum>(); var mph = new IdentifiedSpectrum(); var mp1 = new IdentifiedPeptide(mph); mp1.AddProtein("11111"); var mp2 = new IdentifiedPeptide(mph); mp2.AddProtein("22222"); mp2.AddProtein("33333"); Assert.AreEqual("Reference", io.Name); Assert.AreEqual("11111 ! 22222/33333", io.GetProperty(mph)); io.SetProperty(mph, "44444/55555 ! 66666"); Assert.AreEqual(2, mph.Peptides[0].Proteins.Count); Assert.AreEqual("44444", mph.Peptides[0].Proteins[0]); Assert.AreEqual("55555", mph.Peptides[0].Proteins[1]); Assert.AreEqual(1, mph.Peptides[1].Proteins.Count); Assert.AreEqual("66666", mph.Peptides[1].Proteins[0]); }
/// <summary> /// /// Get the query/peptide map from pNovo result. /// /// </summary> /// <param name="filename">pNovo proteins file</param> /// <param name="minRank">Minimum rank of peptide identified in same spectrum</param> /// <param name="minScore">Minimum score of peptide identified in same spectrum</param> /// <returns>Query/peptide map</returns> public List <IIdentifiedSpectrum> ParsePeptides(string filename, int maxRank, double minScore) { var result = new List <IIdentifiedSpectrum>(); SequestFilename sf = null; int charge = 2; double expmh = 0; using (var sr = new StreamReader(filename)) { string line; while ((line = sr.ReadLine()) != null) { var parts = line.Split('\t'); if (parts.Length <= 5) { //spectrum information var seqcount = Convert.ToInt32(parts.Last()); if (seqcount == 0) { continue; } sf = parser.GetValue(parts[0]); expmh = MyConvert.ToDouble(parts[1]); charge = Convert.ToInt32(parts[2]); } else { int curIndex = Convert.ToInt32(parts[0]); if (curIndex <= maxRank) { var score = MyConvert.ToDouble(parts[2]); if (score < minScore) { continue; } var curSpectrum = new IdentifiedSpectrum(); curSpectrum.Query.FileScan = sf; curSpectrum.Query.Charge = charge; curSpectrum.ExperimentalMH = expmh; curSpectrum.Score = score; result.Add(curSpectrum); IdentifiedPeptide pep = new IdentifiedPeptide(curSpectrum); pep.Sequence = ModifySequence(parts[9]); pep.Spectrum.TheoreticalMH = MyConvert.ToDouble(parts[11]); pep.Spectrum.Rank = curIndex; } } } } return(result); }
public void TestSetSequence() { var pi = new IdentifiedPeptide(new IdentifiedSpectrum()); pi.Sequence = "-.MAS*ESETLNPSAR.I"; Assert.AreEqual("-.MAS*ESETLNPSAR.I", pi.Sequence); Assert.AreEqual("MASESETLNPSAR", pi.PureSequence); pi.Sequence = "MAS*ESETLNPSAR"; Assert.AreEqual("MAS*ESETLNPSAR", pi.Sequence); Assert.AreEqual("MASESETLNPSAR", pi.PureSequence); }
public void TestAddProtein() { var pi = new IdentifiedPeptide(new IdentifiedSpectrum()); pi.AddProtein("AAAAA\tBBBBB"); Assert.AreEqual(1, pi.Proteins.Count); Assert.AreEqual("AAAAA BBBBB", pi.Proteins[0]); pi.SetProtein(0, "CCCCC\tDDDDD"); Assert.AreEqual(1, pi.Proteins.Count); Assert.AreEqual("CCCCC DDDDD", pi.Proteins[0]); }
public void TestGetSequenceString() { var mph = new IdentifiedSpectrum(); var mp1 = new IdentifiedPeptide(mph); mp1.Sequence = "P1"; var mp2 = new IdentifiedPeptide(mph); mp2.Sequence = "P2"; Assert.AreEqual("P1 ! P2", MascotPeptideHitTextWriter.GetSequenceString(mph)); }
public void TestGetProteinString() { var mph = new IdentifiedSpectrum(); var mp1 = new IdentifiedPeptide(mph); mp1.AddProtein("P1"); var mp2 = new IdentifiedPeptide(mph); mp2.AddProtein("P2"); mp2.AddProtein("P3"); Assert.AreEqual("P1 ! P2/P3", MascotPeptideHitTextWriter.GetProteinString(mph)); }
public void TestBuildProteins() { var mph1 = new IdentifiedSpectrum(); mph1.Query.FileScan.Experimental = "EXP1"; var mp1 = new IdentifiedPeptide(mph1); mp1.AddProtein("Protein1"); mp1.AddProtein("Protein2"); var mph2 = new IdentifiedSpectrum(); mph2.Query.FileScan.Experimental = "EXP2"; var mp2 = new IdentifiedPeptide(mph2); mp2.AddProtein("Protein1"); mp2.AddProtein("Protein3"); var mphs = new List <IIdentifiedSpectrum>(); mphs.Add(mph1); mphs.Add(mph2); List <IIdentifiedProtein> proteins = MascotUtils.BuildProteins(mphs); Assert.AreEqual(3, proteins.Count); foreach (IdentifiedProtein mp in proteins) { if (mp.Name.Equals("Protein1")) { Assert.AreEqual(2, mp.Peptides.Count); continue; } if (mp.Name.Equals("Protein2")) { Assert.AreEqual(1, mp.Peptides.Count); Assert.AreEqual(mp1, mp.Peptides[0]); continue; } if (mp.Name.Equals("Protein3")) { Assert.AreEqual(1, mp.Peptides.Count); Assert.AreEqual(mp2, mp.Peptides[0]); continue; } } }
public void TestBuildProteins() { var mph1 = new IdentifiedSpectrum(); mph1.Query.FileScan.Experimental = "EXP1"; var mp1 = new IdentifiedPeptide(mph1); mp1.AddProtein("Protein1"); mp1.AddProtein("Protein2"); var mph2 = new IdentifiedSpectrum(); mph2.Query.FileScan.Experimental = "EXP2"; var mp2 = new IdentifiedPeptide(mph2); mp2.AddProtein("Protein1"); mp2.AddProtein("Protein3"); var mphs = new List<IIdentifiedSpectrum>(); mphs.Add(mph1); mphs.Add(mph2); List<IIdentifiedProtein> proteins = MascotUtils.BuildProteins(mphs); Assert.AreEqual(3, proteins.Count); foreach (IdentifiedProtein mp in proteins) { if (mp.Name.Equals("Protein1")) { Assert.AreEqual(2, mp.Peptides.Count); continue; } if (mp.Name.Equals("Protein2")) { Assert.AreEqual(1, mp.Peptides.Count); Assert.AreEqual(mp1, mp.Peptides[0]); continue; } if (mp.Name.Equals("Protein3")) { Assert.AreEqual(1, mp.Peptides.Count); Assert.AreEqual(mp2, mp.Peptides[0]); continue; } } }
// 1. 1 / 1 0 1964.9940 0.0000 5.6970 2133.9 21/30 sw|P02666|CASBBOVIN +1 K.FQSEEQQQTEDELQDK.I protected bool ParseFromOutfileLine(string line, IdentifiedSpectrum entry) { // Console.Out.WriteLine(line); // dfadfas entry.IsProteinFromOutFile = true; string sLine = line.Trim().Replace('/', ' '); string[] sLines = this.reg.Split(sLine); if (sLines.Length < itemIndex.MinCount) { return(false); } entry.Rank = int.Parse(sLines[itemIndex.RankIndex]); entry.SpRank = int.Parse(sLines[itemIndex.SpRankIndex]); entry.TheoreticalMH = MyConvert.ToDouble(sLines[itemIndex.TheoreticalMHIndex]); entry.DeltaScore = MyConvert.ToDouble(sLines[itemIndex.DeltaScoreIndex]); entry.Score = MyConvert.ToDouble(sLines[itemIndex.ScoreIndex]); entry.SpScore = MyConvert.ToDouble(sLines[itemIndex.SpScoreIndex]); entry.MatchedIonCount = int.Parse(sLines[itemIndex.MatchedIonCountIndex]); entry.TheoreticalIonCount = int.Parse(sLines[itemIndex.TheoreticalIonCountIndex]); entry.ClearPeptides(); string sequence; if ('+' != sLines[itemIndex.SequenceIndex][0]) { entry.DuplicatedCount = 0; sequence = sLines[itemIndex.SequenceIndex]; } else { entry.DuplicatedCount = int.Parse(sLines[itemIndex.SequenceIndex].Substring(1, sLines[itemIndex.SequenceIndex].Length - 1)); sequence = sLines[itemIndex.SequenceIndex + 1]; } CheckSequenceValid(ref sequence); var sp = new IdentifiedPeptide(entry); sp.Sequence = sequence; sp.AddProtein(sLines[itemIndex.ProteinIndex]); return(true); }
public void Test() { IPropertyConverter<IdentifiedSpectrum> io = new IdentifiedSpectrumSequenceConverter<IdentifiedSpectrum>(); var mph = new IdentifiedSpectrum(); var mp1 = new IdentifiedPeptide(mph); mp1.Sequence = "AAAAA"; var mp2 = new IdentifiedPeptide(mph); mp2.Sequence = "BBBBB"; Assert.AreEqual("Sequence", io.Name); Assert.AreEqual("AAAAA ! BBBBB", io.GetProperty(mph)); io.SetProperty(mph, "CCCCC ! DDDDD ! EEEEE"); Assert.AreEqual(3, mph.Peptides.Count); Assert.AreEqual("CCCCC", mph.Peptides[0].Sequence); Assert.AreEqual("DDDDD", mph.Peptides[1].Sequence); Assert.AreEqual("EEEEE", mph.Peptides[2].Sequence); }
private static List <IIdentifiedProtein> InitProteins() { var mph1 = new IdentifiedSpectrum(); mph1.Query.FileScan.Experimental = "EXP1"; var mp1 = new IdentifiedPeptide(mph1); mp1.AddProtein("Protein1"); mp1.AddProtein("Protein2"); mp1.Sequence = "SEQ1"; var mph2 = new IdentifiedSpectrum(); mph2.Query.FileScan.Experimental = "EXP2"; var mp2 = new IdentifiedPeptide(mph2); mp2.AddProtein("Protein1"); mp2.AddProtein("Protein3"); mp2.Sequence = "SEQ2"; var mpro1 = new IdentifiedProtein("Protein1"); mpro1.Peptides.Add(mp1); mpro1.Peptides.Add(mp2); var mpro2 = new IdentifiedProtein("Protein2"); mpro2.Peptides.Add(mp1); var mpro3 = new IdentifiedProtein("Protein3"); mpro3.Peptides.Add(mp2); var result = new List <IIdentifiedProtein>(); result.Add(mpro3); result.Add(mpro2); result.Add(mpro1); return(result); }
protected bool ParseFromOutfileLineWithId(List <string> sLines, IdentifiedSpectrum entry) { if (sLines.Count < 12) { return(false); } //entry.Index = int.Parse(sLines[0].Substring(0, sLines[0].Length - 1)); entry.Rank = int.Parse(sLines[1]); entry.SpRank = int.Parse(sLines[2]); //entry.Id = int.Parse(sLines[3]); entry.TheoreticalMH = MyConvert.ToDouble(sLines[4]); entry.DeltaScore = MyConvert.ToDouble(sLines[5]); entry.Score = MyConvert.ToDouble(sLines[6]); entry.SpScore = MyConvert.ToDouble(sLines[7]); entry.MatchedIonCount = int.Parse(sLines[8]); entry.TheoreticalIonCount = int.Parse(sLines[9]); entry.ClearPeptides(); string sequence; if ('+' != sLines[11][0]) { entry.DuplicatedCount = 0; sequence = sLines[11]; } else { entry.DuplicatedCount = int.Parse(sLines[11].Substring(1, sLines[11].Length - 1)); sequence = sLines[12]; } CheckSequenceValid(ref sequence); var sp = new IdentifiedPeptide(entry); sp.Sequence = sequence; sp.AddProtein(sLines[10]); return(true); }
public void Test() { IPropertyConverter <IdentifiedSpectrum> io = new IdentifiedSpectrumSequenceConverter <IdentifiedSpectrum>(); var mph = new IdentifiedSpectrum(); var mp1 = new IdentifiedPeptide(mph); mp1.Sequence = "AAAAA"; var mp2 = new IdentifiedPeptide(mph); mp2.Sequence = "BBBBB"; Assert.AreEqual("Sequence", io.Name); Assert.AreEqual("AAAAA ! BBBBB", io.GetProperty(mph)); io.SetProperty(mph, "CCCCC ! DDDDD ! EEEEE"); Assert.AreEqual(3, mph.Peptides.Count); Assert.AreEqual("CCCCC", mph.Peptides[0].Sequence); Assert.AreEqual("DDDDD", mph.Peptides[1].Sequence); Assert.AreEqual("EEEEE", mph.Peptides[2].Sequence); }
public void TestCalculateCoverage() { IdentifiedProtein protein = new IdentifiedProtein(); //total 30 amino acids protein.Sequence = "ABCDEDFDEFDSESLKJFDJLSLGFGDDGD"; IdentifiedSpectrum s1 = new IdentifiedSpectrum(); IdentifiedPeptide p1 = new IdentifiedPeptide(s1); p1.Sequence = "B.CDEDF.D"; protein.Peptides.Add(p1); protein.CalculateCoverage(); Assert.AreEqual(16.67, protein.Coverage, 0.01); IdentifiedSpectrum s2 = new IdentifiedSpectrum(); IdentifiedPeptide p2 = new IdentifiedPeptide(s2); p2.Sequence = "F.DSESL.K"; protein.Peptides.Add(p2); protein.CalculateCoverage(); Assert.AreEqual(33.33, protein.Coverage, 0.01); IdentifiedSpectrum s3 = new IdentifiedSpectrum(); IdentifiedPeptide p3 = new IdentifiedPeptide(s3); p3.Sequence = "L.SLGF.G"; protein.Peptides.Add(p3); protein.CalculateCoverage(); Assert.AreEqual(46.67, protein.Coverage, 0.01); IdentifiedSpectrum s4 = new IdentifiedSpectrum(); IdentifiedPeptide p4 = new IdentifiedPeptide(s4); p4.Sequence = "L.SLGFG.D"; protein.Peptides.Add(p4); protein.CalculateCoverage(); Assert.AreEqual(50.00, protein.Coverage, 0.01); }
public List <IIdentifiedSpectrum> ReadFromFile(string fileName) { var result = new List <IIdentifiedSpectrum>(); XElement root = XElement.Load(fileName); var features = root.FindElement("featureDescriptions"); var descriptions = features.FindElements("featureDescription"); var missIndex = FindIndex(fileName, descriptions, "# Missed Cleavages"); var scans = root.FindElements("fragSpectrumScan"); foreach (var scan in scans) { var scanNumber = int.Parse(scan.FindAttribute("scanNumber").Value); var psms = scan.FindElements("peptideSpectrumMatch"); foreach (var psm in psms) { IIdentifiedSpectrum spec = new IdentifiedSpectrum(); spec.Query.QueryId = scanNumber; spec.Id = psm.FindAttribute("id").Value.StringAfter("decoy_"); spec.FromDecoy = psm.FindAttribute("isDecoy").Value.Equals("true"); spec.TheoreticalMH = double.Parse(psm.FindAttribute("calculatedMassToCharge").Value); spec.ExperimentalMH = double.Parse(psm.FindAttribute("experimentalMassToCharge").Value); spec.Query.Charge = int.Parse(psm.FindAttribute("chargeState").Value); var pep = new IdentifiedPeptide(spec); pep.Sequence = psm.FindElement("peptide").FindElement("peptideSequence").Value; pep.AddProtein(psm.FindElement("occurence").FindAttribute("proteinId").Value); var featureEles = psm.FindElement("features").FindElements("feature"); //The first one is the score. spec.Score = double.Parse(featureEles[0].Value); spec.NumMissedCleavages = int.Parse(featureEles[missIndex].Value); result.Add(spec); } } return(result); }
public void TestNoredundant() { string header = "\t\"File, Scan(s)\"\tSequence\tMH+\tDiff(MH+)\tCharge\tRank\tScore\tDeltaScore\tExpectValue\tQuery\tIons\tReference\tDIFF_MODIFIED_CANDIDATE\tPI\tMissCleavage\tModification"; IPropertyConverter<IIdentifiedSpectrum> converter = IdentifiedSpectrumPropertyConverterFactory.GetInstance().GetConverters(header, '\t'); Assert.AreEqual(header, converter.Name); IIdentifiedSpectrum mphit = new IdentifiedSpectrum(); mphit.Query.FileScan.ShortFileName = "AAA,1-2"; IdentifiedPeptide mp1 = new IdentifiedPeptide(mphit); mp1.Sequence = "AAAAA"; mp1.AddProtein("PROTEIN1"); mp1.AddProtein("PROTEIN2"); IdentifiedPeptide mp2 = new IdentifiedPeptide(mphit); mp2.Sequence = "BBBBB"; mp2.AddProtein("PROTEIN3"); mphit.TheoreticalMH = 1000.00102; mphit.ExperimentalMH = 1000.0; mphit.Query.Charge = 2; mphit.Rank = 1; mphit.Score = 100.2; mphit.DeltaScore = 0.5; mphit.ExpectValue = 1.1e-2; mphit.Query.QueryId = 10; mphit.NumMissedCleavages = 1; mphit.Modifications = "O18(1)"; string expect = " AAA,1 - 2 AAAAA ! BBBBB 1000.00102 0.00102 2 1 100.2 0.5 1.10E-002 10 0|0 PROTEIN1/PROTEIN2 ! PROTEIN3 0.00 1 O18(1)"; Assert.AreEqual(expect, converter.GetProperty(mphit)); string expectNew = " BBB,2 - 3 BBBBB 1002.00783 -0.00200 3 2 200.2 0.6 1.20E-003 20 0|0 PROTEIN2/PROTEIN4 0.00 2 O18(2)"; converter.SetProperty(mphit, expectNew); Assert.AreEqual(expectNew, converter.GetProperty(mphit)); }
public void TestBuild() { var pep1 = new IdentifiedPeptide(new IdentifiedSpectrum(new SequestFilename("A", 1, 1, 1, ".dta"))) { Sequence = "A" }; var pep2 = new IdentifiedPeptide(new IdentifiedSpectrum(new SequestFilename("B", 1, 1, 1, ".dta"))) { Sequence = "B" }; var pep3 = new IdentifiedPeptide(new IdentifiedSpectrum(new SequestFilename("C", 1, 1, 1, ".dta"))) { Sequence = "C" }; var pep4 = new IdentifiedPeptide(new IdentifiedSpectrum(new SequestFilename("D", 1, 1, 1, ".dta"))) { Sequence = "D" }; var pep5 = new IdentifiedPeptide(new IdentifiedSpectrum(new SequestFilename("E", 1, 1, 1, ".dta"))) { Sequence = "E" }; var pep6 = new IdentifiedPeptide(new IdentifiedSpectrum(new SequestFilename("F", 1, 1, 1, ".dta"))) { Sequence = "F" }; var protein1 = new IdentifiedProtein() { Peptides = new IIdentifiedPeptide[] { pep1, pep3, pep5, pep6 }.ToList() }; var protein2 = new IdentifiedProtein() { Peptides = new IIdentifiedPeptide[] { pep2, pep3, pep4 }.ToList() }; //should be removed from final result since all peptides has been included in protein1 and protein2, even one protein contains both peptides var protein3 = new IdentifiedProtein() { Peptides = new IIdentifiedPeptide[] { pep1, pep2 }.ToList() }; //should be removed from final result since all peptides has been included in protein1 var protein4 = new IdentifiedProtein() { Peptides = new IIdentifiedPeptide[] { pep1, pep5 }.ToList() }; var actual = new IdentifiedProteinGroupBuilder().Build(new IIdentifiedProtein[] { protein1, protein2, protein3 }.ToList()); Assert.AreEqual(2, actual.Count); Assert.AreSame(protein1, actual[0][0]); Assert.AreSame(protein2, actual[1][0]); }
public List <IIdentifiedSpectrum> ReadFromFile(string fileName) { var result = new List <IIdentifiedSpectrum>(); var anns = new AnnotationFormat().ReadFromFile(fileName); foreach (var ann in anns) { var peptideId = ann.Annotations["PeptideId"] as string; var sequence = peptideId.StringBefore("_"); var spec = new IdentifiedSpectrum(); var pep = new IdentifiedPeptide(spec); pep.Sequence = sequence; spec.Query.FileScan.Experimental = ann.Annotations["Sample"] as string; spec.Query.FileScan.RetentionTime = double.Parse(ann.Annotations["PredictionRetentionTime"] as string); spec.Query.FileScan.Charge = int.Parse(ann.Annotations["Charge"] as string); spec.IsPrecursorMonoisotopic = true; spec.TheoreticalMH = PrecursorUtils.MzToMH(double.Parse(ann.Annotations["TheoreticalMz"] as string), spec.Query.FileScan.Charge, true); result.Add(spec); } return(result); }
public List <IIdentifiedSpectrum> ReadFromFile(string fileName) { var result = new List <IIdentifiedSpectrum>(); XElement root = XElement.Load(fileName); var peptides = root.FindElement("peptides").FindElements("peptide"); foreach (var peptide in peptides) { IIdentifiedSpectrum spec = new IdentifiedSpectrum(); var pep = new IdentifiedPeptide(spec); pep.Sequence = peptide.FindAttribute("peptide_id").Value; spec.FromDecoy = peptide.FindAttribute("decoy").Value.Equals("true"); spec.SpScore = double.Parse(peptide.FindElement("svm_score").Value); spec.QValue = double.Parse(peptide.FindElement("q_value").Value); spec.Score = double.Parse(peptide.FindElement("pep").Value); spec.TheoreticalMass = double.Parse(peptide.FindElement("calc_mass").Value); pep.AddProtein(peptide.FindElement("protein_id").Value); spec.Probability = double.Parse(peptide.FindElement("p_value").Value); result.Add(spec); } return(result); }
public void Test() { IPropertyConverter<IdentifiedSpectrum> io = new IdentifiedSpectrumReferenceConverter<IdentifiedSpectrum>(); var mph = new IdentifiedSpectrum(); var mp1 = new IdentifiedPeptide(mph); mp1.AddProtein("11111"); var mp2 = new IdentifiedPeptide(mph); mp2.AddProtein("22222"); mp2.AddProtein("33333"); Assert.AreEqual("Reference", io.Name); Assert.AreEqual("11111 ! 22222/33333", io.GetProperty(mph)); io.SetProperty(mph, "44444/55555 ! 66666"); Assert.AreEqual(2, mph.Peptides[0].Proteins.Count); Assert.AreEqual("44444", mph.Peptides[0].Proteins[0]); Assert.AreEqual("55555", mph.Peptides[0].Proteins[1]); Assert.AreEqual(1, mph.Peptides[1].Proteins.Count); Assert.AreEqual("66666", mph.Peptides[1].Proteins[0]); }
/// <summary> /// /// Get top one peptide list from xtandem xml file /// /// </summary> /// <param name="fileName">xtandem xml filename</param> /// <returns>List of IIdentifiedSpectrum</returns> public List <IIdentifiedSpectrum> ReadFromFile(string fileName) { string sourceFilename = GetSourceFile(fileName); List <IIdentifiedSpectrum> result = new List <IIdentifiedSpectrum>(); XmlDocument doc = new XmlDocument(); doc.Load(fileName); this.xmlHelper = new XmlHelper(doc); XmlNode root = doc.DocumentElement; Match mSource = Regex.Match(sourceFilename, @"(.+)\.(?:RAW)", RegexOptions.IgnoreCase); if (mSource.Success) { sourceFilename = mSource.Groups[1].Value; } else { mSource = Regex.Match(sourceFilename, @"(.+?)\."); if (mSource.Success) { sourceFilename = mSource.Groups[1].Value; } } XmlNode parameters = xmlHelper.GetFirstChildByNameAndAttribute(root, "group", "label", "input parameters"); ParseParameters(parameters); int pos = sourceFilename.LastIndexOfAny(new char[] { '/', '\\' }); string rawFileName; if (pos > 0) { rawFileName = sourceFilename.Substring(pos + 1); } else { rawFileName = sourceFilename; } rawFileName = FileUtils.ChangeExtension(rawFileName, ""); List <XmlNode> groupNodes = xmlHelper.GetChildrenByNameAndAttribute(root, "group", "type", "model"); foreach (XmlNode groupNode in groupNodes) { Dictionary <string, IIdentifiedPeptide> pepmap = new Dictionary <string, IIdentifiedPeptide>(); IIdentifiedSpectrum spectrum = new IdentifiedSpectrum(); List <XmlNode> proteins = xmlHelper.GetChildren(groupNode, "protein"); foreach (XmlNode proteinNode in proteins) { XmlNode domainNode = xmlHelper.GetValidChild(xmlHelper.GetValidChild(proteinNode, "peptide"), "domain"); int numMissedCleavages = int.Parse(domainNode.Attributes["missed_cleavages"].Value); string preSeq = domainNode.Attributes["pre"].Value; if (preSeq.Equals("[")) { preSeq = "-"; } string postSeq = domainNode.Attributes["post"].Value; if (postSeq.Equals("]")) { postSeq = "-"; } StringBuilder pepSeqSB = new StringBuilder(domainNode.Attributes["seq"].Value); int start = int.Parse(domainNode.Attributes["start"].Value); int end = int.Parse(domainNode.Attributes["end"].Value); List <XmlNode> modifications = xmlHelper.GetChildren(domainNode, "aa"); if (modifications.Count > 0) { List <ModificationItem> items = new List <ModificationItem>(); foreach (XmlNode modification in modifications) { int at = int.Parse(modification.Attributes["at"].Value); if (at < start || at > end) { continue; } ModificationItem item = new ModificationItem(); item.Type = modification.Attributes["type"].Value; item.At = at; item.Modified = MyConvert.ToDouble(modification.Attributes["modified"].Value); if (!staticModifications.ContainsKey(item.Type[0])) { items.Add(item); } } spectrum.Modifications = ""; if (items.Count > 0) { items.Sort((m1, m2) => m1.At - m2.At); var mod = ""; foreach (ModificationItem item in items) { mod = mod + MyConvert.Format(",{0}({1:0.0000})", item.Type, item.Modified); } spectrum.Modifications = mod.Substring(1); items.Sort((m1, m2) => m2.At - m1.At); foreach (ModificationItem item in items) { var key = GetModifiedKey(item.Modified); if (!dynamicModificationChars.ContainsKey(key)) { AddDynamicModificationChar(key); } char modificationChar = dynamicModificationChars[key]; pepSeqSB.Insert(item.At - start + 1, modificationChar.ToString()); } spectrum.Modifications = mod.Substring(1); } } StringBuilder sb = new StringBuilder(); sb.Append(preSeq.Substring(preSeq.Length - 1)); sb.Append("."); sb.Append(pepSeqSB.ToString()); sb.Append("."); sb.Append(postSeq[0]); string pepSeq = sb.ToString(); if (!pepmap.ContainsKey(pepSeq)) { IdentifiedPeptide pep = new IdentifiedPeptide(spectrum); pep.Sequence = pepSeq; pepmap[pepSeq] = pep; spectrum.TheoreticalMH = MyConvert.ToDouble(domainNode.Attributes["mh"].Value); spectrum.Score = MyConvert.ToDouble(domainNode.Attributes["hyperscore"].Value); double nextScore = MyConvert.ToDouble(domainNode.Attributes["nextscore"].Value); spectrum.DeltaScore = (spectrum.Score - nextScore) / spectrum.Score; spectrum.NumMissedCleavages = int.Parse(domainNode.Attributes["missed_cleavages"].Value); } var noteNode = xmlHelper.GetValidChild(proteinNode, "note"); string proteinName = noteNode.InnerText.StringBefore(" ").StringBefore("\t"); pepmap[pepSeq].AddProtein(proteinName); } if (spectrum.Peptides.Count > 0) { spectrum.DigestProtease = protease; result.Add(spectrum); spectrum.Query.QueryId = int.Parse(groupNode.Attributes["id"].Value); spectrum.ExperimentalMH = MyConvert.ToDouble(groupNode.Attributes["mh"].Value); spectrum.ExpectValue = MyConvert.ToDouble(groupNode.Attributes["expect"].Value); XmlNode spectrumNode = xmlHelper.GetFirstChildByNameAndAttribute(groupNode, "group", "label", "fragment ion mass spectrum"); XmlNode labelNode = xmlHelper.GetFirstChildByNameAndAttribute(spectrumNode, "note", "label", "Description"); string title = labelNode.InnerText.Trim(); if (title.StartsWith("RTINSECONDS")) { var rtvalue = title.StringAfter("=").StringBefore(" ").StringBefore("-"); spectrum.Query.FileScan.RetentionTime = double.Parse(rtvalue); title = title.StringAfter(" ").Trim(); } SequestFilename sf = this.TitleParser.GetValue(title); if (sf.Experimental == null || sf.Experimental.Length == 0) { sf.Experimental = sourceFilename; } spectrum.Query.FileScan.LongFileName = sf.LongFileName; if (sf.RetentionTime > 0 && spectrum.Query.FileScan.RetentionTime == 0) { spectrum.Query.FileScan.RetentionTime = sf.RetentionTime; } spectrum.Query.Charge = int.Parse(groupNode.Attributes["z"].Value); spectrum.Query.Title = title; } } return(result); }
/// <summary> /// /// Get the query/peptide map from pNovo result. /// /// </summary> /// <param name="filename">pNovo proteins file</param> /// <param name="minRank">Minimum rank of peptide identified in same spectrum</param> /// <param name="minScore">Minimum score of peptide identified in same spectrum</param> /// <returns>Query/peptide map</returns> public List <IIdentifiedSpectrum> ParsePeptides(string filename, int maxRank = 10, double minScore = 0.0) { var result = new List <IIdentifiedSpectrum>(); SequestFilename sf = null; int curIndex = 0; using (var sr = new StreamReader(filename)) { string line; while ((line = sr.ReadLine()) != null) { if (string.IsNullOrWhiteSpace(line)) { continue; } if (line.StartsWith("S")) { var title = line.StringAfter("\t"); sf = this.parser.GetValue(title); curIndex = 0; continue; } var parts = line.Split('\t'); var score = MyConvert.ToDouble(parts[2]); if (score < minScore) { continue; } curIndex++; IIdentifiedSpectrum curSpectrum; if (curIndex == 1) { curSpectrum = new IdentifiedSpectrum(); curSpectrum.Query.FileScan = sf; curSpectrum.Query.Charge = sf.Charge; curSpectrum.Score = score; curSpectrum.Rank = curIndex; result.Add(curSpectrum); } else if (score == result.Last().Score) { curSpectrum = result.Last(); } else if (curIndex > maxRank) { continue; } else { curSpectrum = new IdentifiedSpectrum(); curSpectrum.Query.FileScan = sf; curSpectrum.Query.Charge = sf.Charge; curSpectrum.Score = score; curSpectrum.Rank = curIndex; result.Add(curSpectrum); } IdentifiedPeptide pep = new IdentifiedPeptide(curSpectrum); pep.Sequence = ModifySequence(parts[1]); } } return(result); }
public bool Equals(TargetPeptide obj) { if (!PeptideMass.Equals(obj.PeptideMass) || !PeptideSequence.Equals(obj.PeptideSequence) || !Modifications.Count.Equals(obj.Modifications.Count) || !IdentifiedPeptide.Equals(obj.IdentifiedPeptide)) { return(false); } foreach (string key in obj.Modifications.Keys) { if (obj.Modifications[key] != Modifications[key]) { return(false); } } return(true); }
public List <IIdentifiedSpectrum> ReadFromFile(string fileName) { XElement root = XElement.Load(fileName); var name = root.FindElement("AnalysisSoftwareList"). FindElement("AnalysisSoftware"). FindElement("SoftwareName"). FindElement("cvParam").Attribute("name").Value; var defaultExp = Path.GetFileNameWithoutExtension(fileName); foreach (var ext in extensions) { if (defaultExp.ToLower().EndsWith(ext)) { defaultExp = defaultExp.Substring(0, defaultExp.Length - ext.Length); } } //parsing identification protocol first var protocols = root.FindElement("AnalysisProtocolCollection"); var sip = protocols.FindElement("SpectrumIdentificationProtocol"); var modMap = ParseSearchModificationMap(sip.FindElement("ModificationParams")); var proteases = ParseEnzymes(sip.FindElement("Enzymes")); var protease = proteases.FirstOrDefault(); //parsing sequence collection, including protein<->peptide map var seqs = root.FindElement("SequenceCollection"); var proteinMap = (from ele in seqs.FindElements("DBSequence") let id = ele.Attribute("id").Value let accession = ParseAccession(ele.Attribute("accession").Value) let db = ele.Attribute("searchDatabase_ref").Value select new { Id = id, Accession = accession, DB = db }).ToDictionary(m => m.Id); var peptideMap = (from ele in seqs.FindElements("Peptide") let id = ele.Attribute("id").Value let seq = ele.FindElement("PeptideSequence").Value let mods = (from modEle in ele.FindElements("Modification") let mod = ParseModification(modEle, modMap) where mod != null orderby mod.Location descending select mod).ToArray() let numMiss = protease == null ? 0 : protease.GetMissCleavageSiteCount(seq) select new MzIdentPeptideItem() { Id = id, PureSequence = seq, Modifications = mods, Sequence = GetModifiedSequence(seq, mods), NumMissCleavage = numMiss }).ToDictionary(m => m.Id); var peptideEvidenceMap = (from g in (from ele in seqs.FindElements("PeptideEvidence") select new MzIdentPeptideEvidenceItem() { Id = ele.Attribute("id").Value, PeptideRef = ele.Attribute("peptide_ref").Value, DbRef = ele.Attribute("dBSequence_ref").Value, Pre = ele.Attribute("pre").Value, Post = ele.Attribute("post").Value }).GroupBy(m => m.Id) select g.First()).ToDictionary(m => m.Id); //now parsing data var data = root.FindElement("DataCollection"); var result = new List <IIdentifiedSpectrum>(); var analysisData = data.FindElement("AnalysisData"); var idList = analysisData.FindElement("SpectrumIdentificationList"); foreach (var sir in idList.FindElements("SpectrumIdentificationResult")) { var items = FilterItems(sir.FindElements("SpectrumIdentificationItem"), peptideMap, peptideEvidenceMap); if (items.Count == 0) { continue; } var spectrum = new IdentifiedSpectrum(); result.Add(spectrum); var spectrumId = sir.Attribute("spectrumID").Value; var sirCvParams = GetCvParams(sir); string value; if (sirCvParams.TryGetValue("MS:1000796", out value)) { spectrum.Query.FileScan = TitleParser.GetValue(value); } else { if (spectrumId.StartsWith("index=") || spectrumId.StartsWith("scan=")) { spectrum.Query.FileScan.Experimental = defaultExp; spectrum.Query.FileScan.FirstScan = int.Parse(spectrumId.StringAfter("=")); spectrum.Query.FileScan.LastScan = spectrum.Query.FileScan.FirstScan; } else { spectrum.Query.FileScan.Experimental = spectrumId; } } if (sirCvParams.TryGetValue("MS:1001115", out value)) { spectrum.Query.FileScan.FirstScan = int.Parse(value); } if (spectrum.Query.FileScan.FirstScan == 0) { throw new Exception(string.Format("Cannot find scan information in file {0}", fileName)); } bool bFirst = true; foreach (var sit in items) { if (bFirst) //only parse score once { spectrum.Id = sit.Attribute("id").Value; spectrum.Charge = int.Parse(sit.Attribute("chargeState").Value); spectrum.TheoreticalMH = PrecursorUtils.MzToMH(double.Parse(sit.Attribute("calculatedMassToCharge").Value), spectrum.Charge, true); spectrum.ExperimentalMH = PrecursorUtils.MzToMH(double.Parse(sit.Attribute("experimentalMassToCharge").Value), spectrum.Charge, true); var cvParams = GetCvParams(sit); if (cvParams.TryGetValue("MS:1001121", out value)) { spectrum.MatchedIonCount = int.Parse(value); } if (cvParams.TryGetValue("MS:1001362", out value)) { spectrum.TheoreticalIonCount = int.Parse(value) + spectrum.MatchedIonCount; } ParseScore(spectrum, cvParams); var userParams = GetUserParams(sit); ParseUserParams(spectrum, userParams); bFirst = false; } var peptide = new IdentifiedPeptide(spectrum); var pep_ref = sit.Attribute("peptide_ref").Value; var pep = peptideMap[pep_ref]; spectrum.Modifications = (from m in pep.Modifications select string.Format("{0}:{1}", m.Location, m.Item.Name)).Reverse().Merge(","); spectrum.NumMissedCleavages = pep.NumMissCleavage; foreach (var per in sit.FindElements("PeptideEvidenceRef")) { var pe_ref = per.Attribute("peptideEvidence_ref").Value; var pe = peptideEvidenceMap[pe_ref]; peptide.Sequence = pe.Pre + "." + pep.Sequence + "." + pe.Post; var protein = proteinMap[pe.DbRef]; peptide.AddProtein(protein.Accession); } } } return(result); }
public Dictionary <int, IIdentifiedPeptide> ParsePeptideMap(string fileName) { SQLiteDBHelper sqlite = new SQLiteDBHelper(fileName); Dictionary <int, IIdentifiedPeptide> result = new Dictionary <int, IIdentifiedPeptide>(); var pniReader = sqlite.ExecuteReader("select distinct(ProcessingNodeID) from peptidescores", null); if (!pniReader.Read()) { return(result); } var nodeid = pniReader.GetInt32(0); var pniScore = sqlite.ExecuteReader(string.Format("select scoreid from processingnodescores where processingnodeid={0} and ismainscore=1", nodeid), null); if (!pniScore.Read()) { return(result); } var scoreid = pniScore.GetInt32(0); Dictionary <int, IIdentifiedSpectrum> spectra = ParseSpectrumMap(fileName); var aas = ParseAminoacids(fileName); //读取肽段列表 string sqlPeptide = string.Format("select pep.SpectrumID, pep.PeptideID, pep.TotalIonsCount, pep.MatchedIonsCount, pep.ConfidenceLevel, pep.Sequence, pep.MissedCleavages, ps.ScoreValue from Peptides as pep, PeptideScores as ps where pep.PeptideID=ps.PeptideID and ps.ScoreID={0} order by pep.SpectrumID, pep.SearchEngineRank", scoreid); var peptideReader = sqlite.ExecuteReader(sqlPeptide, null); Progress.SetMessage("Parsing peptides ..."); while (peptideReader.Read()) { var specid = peptideReader.GetInt32(0); if (!spectra.ContainsKey(specid)) { continue; } var pepid = peptideReader.GetInt32(1); var seq = peptideReader.GetString(5); var missedCleavage = peptideReader.GetInt32(6); var score = peptideReader.GetDouble(7); IIdentifiedSpectrum spectrum = spectra[specid]; if (spectrum.Peptides.Count == 0) { spectrum.TheoreticalIonCount = peptideReader.GetInt32(2); spectrum.MatchedIonCount = peptideReader.GetInt32(3); IdentifiedPeptide peptide = new IdentifiedPeptide(spectrum); peptide.ConfidenceLevel = peptideReader.GetInt32(4); peptide.Sequence = seq; spectrum.NumMissedCleavages = missedCleavage; spectrum.Score = score; spectrum.TheoreticalMass = aas.MonoPeptideMass(peptide.Sequence); spectrum.Rank = 1; spectrum.DeltaScore = 1.0; result[pepid] = peptide; continue; } else { if (score == spectrum.Score) { IIdentifiedPeptide peptide = new IdentifiedPeptide(spectrum); peptide.ConfidenceLevel = peptideReader.GetInt32(4); peptide.Sequence = seq; result[pepid] = peptide; continue; } if (seq == spectrum.Peptide.Sequence) { continue; } spectrum.DeltaScore = (spectrum.Score - score) / spectrum.Score; } } //动态氨基酸修饰 var modMap = ParseModifications(fileName); string sqlPeptideMod = "select PeptideID, AminoAcidModificationID, Position from PeptidesAminoacidModifications order by Position desc"; var pepModReader = sqlite.ExecuteReader(sqlPeptideMod, null); Progress.SetMessage("Parsing peptide modifications ..."); while (pepModReader.Read()) { var pepid = pepModReader.GetInt32(0); if (!result.ContainsKey(pepid)) { continue; } var modid = pepModReader.GetInt32(1); var position = pepModReader.GetInt32(2); var mod = modMap[modid]; var peptide = result[pepid]; var aminoacid = peptide.Sequence[position]; if (peptide.IsTopOne()) { var modStr = string.Format("{0} ({1})", mod.SignStr, aminoacid); if (string.IsNullOrEmpty(peptide.Spectrum.Modifications)) { peptide.Spectrum.Modifications = modStr; } else { peptide.Spectrum.Modifications = peptide.Spectrum.Modifications + "; " + modStr; } peptide.Spectrum.TheoreticalMass += mod.DeltaMass; } var modchar = mod.SignChar; var seq = peptide.Sequence; peptide.Sequence = seq.Insert(position + 1, modchar.ToString()); } //动态末端修饰 string sqlTermMod = "select PeptideID, TerminalModificationID from PeptidesTerminalModifications"; var termModReader = sqlite.ExecuteReader(sqlTermMod, null); Progress.SetMessage("Parsing terminal modifications ..."); while (termModReader.Read()) { var pepid = termModReader.GetInt32(0); if (result.ContainsKey(pepid)) { var modid = termModReader.GetInt32(1); var peptide = result[pepid]; var mod = modMap[modid]; if (peptide.IsTopOne()) { if (string.IsNullOrEmpty(peptide.Spectrum.Modifications)) { peptide.Spectrum.Modifications = mod.SignStr; } else if (mod.PositionType == 1) { peptide.Spectrum.Modifications = mod.SignStr + "; " + peptide.Spectrum.Modifications; } else { peptide.Spectrum.Modifications = peptide.Spectrum.Modifications + "; " + mod.SignStr; } peptide.Spectrum.TheoreticalMass += mod.DeltaMass; } var modchar = mod.SignChar; var seq = peptide.Sequence; if (mod.PositionType == 1) { seq = modchar.ToString() + seq; } else { seq = seq + modchar.ToString(); } peptide.Sequence = seq; } } //其他Score var dcReader = sqlite.ExecuteReader("select ps.PeptideID, pns.ScoreName, ps.ScoreValue from PeptideScores as ps, ProcessingNodeScores as pns where ps.ScoreID=pns.ScoreID and pns.IsMainScore=0", null); while (dcReader.Read()) { var pepid = dcReader.GetInt32(0); if (result.ContainsKey(pepid)) { var pep = result[pepid]; if (pep.IsTopOne()) { var name = dcReader.GetString(1); var value = dcReader.GetDouble(2); if (name.Equals("SpScore")) { pep.Spectrum.SpScore = value; } else if (name.Equals("ProbabilityScore")) { pep.Spectrum.Probability = value; } } } } return(result); }
public override List <IIdentifiedProtein> ParseProteins(string fileName) { Dictionary <string, IIdentifiedProtein> proteinMap = new Dictionary <string, IIdentifiedProtein>(); Application xApp = new Application(); //得到WorkBook对象, 可以用两种方式之一: 下面的是打开已有的文件 Workbook xBook = xApp.Workbooks._Open(fileName, Missing.Value, Missing.Value, Missing.Value, Missing.Value , Missing.Value, Missing.Value, Missing.Value, Missing.Value , Missing.Value, Missing.Value, Missing.Value, Missing.Value); try { Worksheet xSheet = (Worksheet)xBook.Sheets[1]; int fromRow = 2; int endRow = fromRow; for (; endRow <= xSheet.Rows.Count; endRow++) { string b = xSheet.Value('B', endRow); if (null == b) { break; } } endRow--; Progress.SetRange(fromRow, endRow); Progress.SetMessage("Parsing file ..."); for (int i = fromRow; i <= endRow; i++) { Progress.SetPosition(i); string seq = xSheet.Value('A', i); if (null == seq)//蛋白质信息 { continue; } string deltaCn = xSheet.Value('I', i); if (null == deltaCn)//rank > 1 { continue; } string protein = xSheet.Value('B', i); if (!proteinMap.ContainsKey(protein)) { var p = new IdentifiedProtein(protein); p.Coverage = MyConvert.ToDouble(xSheet.Value('C', i + 2)); p.MolecularWeight = MyConvert.ToDouble(xSheet.Value('F', i + 2)) * 1000; p.IsoelectricPoint = MyConvert.ToDouble(xSheet.Value('G', i + 2)); p.Score = MyConvert.ToDouble(xSheet.Value('H', i + 2)); p.Description = xSheet.Value('I', i + 2); proteinMap[protein] = p; } var pro = proteinMap[protein]; IdentifiedSpectrum spectrum = new IdentifiedSpectrum(); IdentifiedPeptide peptide = new IdentifiedPeptide(spectrum); peptide.Sequence = seq.ToUpper(); peptide.AddProtein(protein); spectrum.Modifications = xSheet.Value('F', i); spectrum.DeltaScore = MyConvert.ToDouble(deltaCn); spectrum.Charge = Convert.ToInt32(xSheet.Value('K', i)); spectrum.ObservedMz = MyConvert.ToDouble(xSheet.Value('L', i)); spectrum.TheoreticalMH = MyConvert.ToDouble(xSheet.Value('M', i)); spectrum.Ions = xSheet.Value('S', i); spectrum.Query.FileScan.FirstScan = Convert.ToInt32(xSheet.Value('P', i)); spectrum.Query.FileScan.LastScan = Convert.ToInt32(xSheet.Value('Q', i)); spectrum.Query.FileScan.Experimental = FileUtils.RemoveAllExtension(xSheet.Value('T', i)); pro.Peptides.Add(peptide); } } finally { xBook.Close(false, Type.Missing, Type.Missing); } var proteins = proteinMap.Values.ToList(); return(proteins); }
/// <summary> /// /// Get the query/peptide map from mascot dat file. /// /// </summary> /// <param name="filename">pFind proteins file</param> /// <param name="minRank">Minimum rank of peptide identified in same spectrum</param> /// <param name="minScore">Minimum score of peptide identified in same spectrum</param> /// <returns>Query/peptide map</returns> public Dictionary <int, List <IIdentifiedSpectrum> > ParsePeptides(string filename, int minRank, double minScore) { var result = new Dictionary <int, List <IIdentifiedSpectrum> >(); var sourceDir = GetSourceFile(filename); using (var sr = new StreamReader(filename)) { var parameters = ParseSection(sr, "Search"); var mm = ParseModification(parameters); foreach (var mod in mm.DynamicModification) { if (!this.ModificationCharMap.ContainsKey(mod.Modification)) { this.ModificationCharMap[mod.Modification] = ModificationConsts.MODIFICATION_CHAR[this.ModificationCharMap.Count + 1]; } } var headers = ParseSection(sr, "Total"); var queryCount = int.Parse(headers["Spectra"]); Progress.SetRange(1, queryCount); for (int queryId = 1; queryId <= queryCount; queryId++) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } Progress.SetPosition(queryId); var speName = MyConvert.Format("Spectrum{0}", queryId); var peptideSection = ParseSection(sr, speName); int candidateCount = int.Parse(peptideSection["ValidCandidate"]); if (candidateCount == 0) { continue; } var expMH = MyConvert.ToDouble(peptideSection["MH"]); var expMz = MyConvert.ToDouble(peptideSection["MZ"]); var charge = int.Parse(peptideSection["Charge"]); var iPeps = new List <IIdentifiedSpectrum>(); result[queryId] = iPeps; IIdentifiedSpectrum lastHit = null; int rank = 0; for (int k = 1; k <= candidateCount; k++) { string key = "NO" + k.ToString(); var scoreKey = key + "_Score"; if (!peptideSection.ContainsKey(scoreKey)) { if (null != lastHit) { lastHit.DeltaScore = 1.0; } break; } double score = MyConvert.ToDouble(peptideSection[scoreKey]); if (score < minScore || score == 0.0) { if (null != lastHit) { lastHit.DeltaScore = 1.0 - score / lastHit.Score; } break; } bool bSameRank = null != lastHit && score == lastHit.Score; if (!bSameRank) { if (null != lastHit) { lastHit.DeltaScore = 1.0 - score / lastHit.Score; } rank++; if (rank > minRank) { break; } } IIdentifiedSpectrum mphit; if (bSameRank) { mphit = lastHit; } else { mphit = new IdentifiedSpectrum(); mphit.Rank = rank; mphit.Score = score; mphit.ExpectValue = MyConvert.ToDouble(peptideSection[key + "_EValue"]); var mhkey = key + "_MH"; if (peptideSection.ContainsKey(mhkey)) { mphit.TheoreticalMH = MyConvert.ToDouble(peptideSection[mhkey]); } else { mphit.TheoreticalMH = MyConvert.ToDouble(peptideSection[key + "_Mass"]); } var micKey = key + "_Matched_Peaks"; if (peptideSection.ContainsKey(micKey)) { mphit.MatchedIonCount = int.Parse(peptideSection[micKey]); mphit.MatchedTIC = MyConvert.ToDouble(peptideSection[key + "_Matched_Intensity"]); } var misKey = key + "_MissCleave"; if (peptideSection.ContainsKey(misKey)) { mphit.NumMissedCleavages = int.Parse(peptideSection[misKey]); } mphit.ExperimentalMH = expMH; mphit.DeltaScore = 1.0; mphit.Query.QueryId = queryId; mphit.Query.ObservedMz = expMz; mphit.Query.Charge = charge; //mphit.Query.MatchCount = queryItem.MatchCount; lastHit = mphit; } var mp = new IdentifiedPeptide(mphit); mp.Sequence = peptideSection[key + "_SQ"]; string modificationPos = peptideSection[key + "_Modify_Pos"]; string modificationName = peptideSection[key + "_Modify_Name"]; Dictionary <int, string> modifications = GetModifications(modificationPos, modificationName); ModifySequence(mp, modifications, mm); AssignModification(mphit, modifications, mm); string proteins = peptideSection[key + "_Proteins"]; var parts = proteins.Split(','); for (int i = 1; i < parts.Count(); i++) { mp.AddProtein(parts[i]); } if (!bSameRank) { iPeps.Add(mphit); } } var title = new FileInfo(peptideSection["Input"]).Name; SequestFilename sf = this.TitleParser.GetValue(title); sf.Charge = charge; if (sf.Experimental == null || sf.Experimental.Length == 0) { sf.Experimental = sourceDir; } foreach (IIdentifiedSpectrum mp in iPeps) { mp.Query.Title = title; mp.Query.FileScan.LongFileName = sf.LongFileName; } } } return(result); }
public List <IIdentifiedSpectrum> ReadFromFile(string fileName) { XElement root = XElement.Load(fileName); XElement request = root.FindElement("MSSearch_request"); //parsing identification protocol first var modMap = ParseSearchModificationMap(request.FindFirstDescendant("MSSearchSettings_variable")); var protease = ParseProtease(request.FindFirstDescendant("MSSearchSettings_enzyme")); Func <string, int> missCalc; if (protease == null) { missCalc = m => 0; } else { missCalc = m => protease.GetMissCleavageSiteCount(m); } //parsing sequence collection, including protein<->peptide map var result = new List <IIdentifiedSpectrum>(); var response = root.FindElement("MSSearch_response"); var scale = double.Parse(response.FindFirstDescendant("MSResponse_scale").Value); var idList = response.FindFirstDescendant("MSResponse_hitsets"); foreach (var sir in idList.FindElements("MSHitSet")) { var hits = sir.FindElement("MSHitSet_hits"); if (hits == null) { continue; } var spectrum = new IdentifiedSpectrum(); result.Add(spectrum); var title = sir.FindElement("MSHitSet_ids").FindElement("MSHitSet_ids_E").Value; spectrum.Query.FileScan = this.TitleParser.GetValue(title); foreach (var hit in hits.FindElements("MSHits")) { var evalue = double.Parse(hit.FindElement("MSHits_evalue").Value); if (spectrum.Peptides.Count > 0) { if (evalue > spectrum.ExpectValue) { continue; } if (evalue < spectrum.ExpectValue) { spectrum.ClearPeptides(); } } spectrum.ExpectValue = evalue; spectrum.Score = -Math.Log(spectrum.ExpectValue); if (spectrum.Query.Charge == 0) // trust the charge from title { spectrum.Query.Charge = int.Parse(hit.FindElement("MSHits_charge").Value); } spectrum.ExperimentalMass = double.Parse(hit.FindElement("MSHits_mass").Value) / scale; spectrum.TheoreticalMass = double.Parse(hit.FindElement("MSHits_theomass").Value) / scale; var peptide = new IdentifiedPeptide(spectrum); var seq = hit.FindElement("MSHits_pepstring").Value; spectrum.NumMissedCleavages = missCalc(seq); var mods = hit.FindElement("MSHits_mods"); if (mods != null) { var modsloc = (from ele in mods.FindElements("MSModHit") let loc = int.Parse(ele.FindElement("MSModHit_site").Value) let modtype = ele.FindElement("MSModHit_modtype").FindElement("MSMod").Value orderby loc descending select new { Location = loc, ModType = modtype }).ToList(); foreach (var modloc in modsloc) { seq = seq.Insert(modloc.Location + 1, modMap[modloc.ModType]); } } peptide.Sequence = hit.FindElement("MSHits_pepstart").Value + "." + seq + "." + hit.FindElement("MSHits_pepstop").Value; foreach (var pep in hit.FindElement("MSHits_pephits").FindElements("MSPepHit")) { var proteinName = pep.FindElement("MSPepHit_defline").Value.StringBefore(" ").StringBefore("\t"); peptide.AddProtein(proteinName); } } } return(result); }
public override List <IIdentifiedProtein> ParseProteins(string fileName) { Dictionary <string, IIdentifiedProtein> proteinMap = new Dictionary <string, IIdentifiedProtein>(); using (StreamReader sr = new StreamReader(fileName)) { string line = sr.ReadLine(); string[] headerParts = line.Split('\t'); int seqIndex = Array.FindIndex(headerParts, (m => m == "Sequence")); int proIndex = Array.FindIndex(headerParts, (m => m == "Protein Accessions")); int modIndex = Array.FindIndex(headerParts, (m => m == "Modifications")); int xcIndex = Array.FindIndex(headerParts, (m => m == "XCorr")); int deltaIndex = Array.FindIndex(headerParts, (m => m.EndsWith(" Score"))); int chargeIndex = Array.FindIndex(headerParts, (m => m == "Charge")); int obsIndex = Array.FindIndex(headerParts, (m => m == "m/z [Da]")); int mhIndex = Array.FindIndex(headerParts, (m => m == "MH+ [Da]")); int fscanIndex = Array.FindIndex(headerParts, (m => m == "First Scan")); int lscanIndex = Array.FindIndex(headerParts, (m => m == "Last Scan")); int ionIndex = Array.FindIndex(headerParts, (m => m == "Ions Matched")); int fileIndex = Array.FindIndex(headerParts, (m => m == "Spectrum File")); Progress.SetRange(0, sr.BaseStream.Length); Progress.SetMessage("Parsing file ..."); while ((line = sr.ReadLine()) != null) { if (line.Trim().Length == 0) { break; } string[] parts = line.Split('\t'); if (parts[0].Length == 0) { continue; } Progress.SetPosition(sr.BaseStream.Position); string seq = parts[seqIndex]; string deltaCn = parts[deltaIndex]; if (deltaCn.Length == 0)//rank > 1 { continue; } string protein = parts[proIndex]; if (!proteinMap.ContainsKey(protein)) { sr.ReadLine(); string proLine = sr.ReadLine(); string[] proParts = proLine.Split('\t'); var p = new IdentifiedProtein(protein); p.Coverage = MyConvert.ToDouble(proParts[2]); p.MolecularWeight = MyConvert.ToDouble(proParts[5]) * 1000; p.IsoelectricPoint = MyConvert.ToDouble(proParts[6]); p.Score = MyConvert.ToDouble(proParts[7]); p.Description = proParts[8]; proteinMap[protein] = p; } var pro = proteinMap[protein]; IdentifiedSpectrum spectrum = new IdentifiedSpectrum(); IdentifiedPeptide peptide = new IdentifiedPeptide(spectrum); peptide.Sequence = seq.ToUpper(); peptide.AddProtein(protein); spectrum.Modifications = parts[modIndex]; spectrum.DeltaScore = MyConvert.ToDouble(deltaCn); spectrum.Charge = Convert.ToInt32(parts[chargeIndex]); spectrum.ObservedMz = MyConvert.ToDouble(parts[obsIndex]); spectrum.TheoreticalMH = MyConvert.ToDouble(parts[mhIndex]); spectrum.Ions = parts[ionIndex]; spectrum.Query.FileScan.FirstScan = Convert.ToInt32(parts[fscanIndex]); spectrum.Query.FileScan.LastScan = Convert.ToInt32(parts[lscanIndex]); spectrum.Query.FileScan.Experimental = FileUtils.RemoveAllExtension(parts[fileIndex]); pro.Peptides.Add(peptide); } } var proteins = proteinMap.Values.ToList(); return(proteins); }