private void MergePeptides(MascotResult result) { var peptideMap = new Dictionary <String, IIdentifiedPeptide>(); foreach (IIdentifiedProteinGroup group in result) { foreach (IIdentifiedProtein protein in group) { for (int i = 0; i < protein.Peptides.Count; i++) { String pepid = protein.Peptides[i].Spectrum.Query.QueryId + "_" + protein.Peptides[i].Sequence; if (peptideMap.ContainsKey(pepid)) { IIdentifiedPeptide old = peptideMap[pepid]; old.AddProtein(protein.Name); protein.Peptides[i] = old; } else { peptideMap[pepid] = protein.Peptides[i]; } } } } }
private void RefineModification(MascotResult result) { var mods = new List <string>(this.modifications.Keys); mods.Sort(); for (int i = 0; i < mods.Count; i++) { this.modifications[mods[i]] = ModificationConsts.MODIFICATION_CHAR[i + 1]; } List <IIdentifiedSpectrum> spectra = result.GetSpectra(); foreach (IIdentifiedSpectrum spectrum in spectra) { foreach (IIdentifiedPeptide pep in spectrum.Peptides) { if (null == spectrum.Modifications) { continue; } string seq = pep.Sequence; string mod = spectrum.Modifications; string[] modStrs = mod.Split(new[] { ';' }, StringSplitOptions.RemoveEmptyEntries); foreach (string modStr in modStrs) { Match m = this.modificationReg.Match(modStr.Trim()); char modChar = this.modifications[m.Groups[1].Value]; seq = seq.Replace("<u>" + m.Groups[2].Value + "</u>", m.Groups[2].Value + modChar); seq = seq.Replace("<U>" + m.Groups[2].Value + "</U>", m.Groups[2].Value + modChar); } seq = seq.Replace("<u>", ""); seq = seq.Replace("</u>", "*"); seq = seq.Replace("<U>", ""); seq = seq.Replace("</U>", "*"); seq = seq.Replace(".*", "."); pep.Sequence = seq; } } }
public IIdentifiedResult ReadFromFile(string proteinFile) { if (!File.Exists(proteinFile)) { throw new FileNotFoundException("File not exist : " + proteinFile); } var result = new MascotResult(); var peptideMap = new Dictionary <string, IIdentifiedSpectrum>(); using (var filein = new StreamReader(proteinFile)) { string lastLine; while ((lastLine = filein.ReadLine()) != null) { if (lastLine.StartsWith("Locus")) { this.proteinConverter = IdentifiedProteinPropertyConverterFactory.GetInstance().GetConverters(lastLine, '\t', "Dtaselect"); } if (lastLine.StartsWith("Unique")) { this.peptideConverter = IdentifiedSpectrumPropertyConverterFactory.GetInstance().GetConverters(lastLine, '\t', "Dtaselect"); break; } } IIdentifiedProteinGroup group; lastLine = null; while ((group = ReadNextProteinGroup(filein, peptideMap, ref lastLine)) != null) { result.Add(group); } } return(result); }
public void TestGetAnnotationKeys() { string key1 = "TEST1"; string key2 = "TEST2"; var mph1 = new IdentifiedSpectrum(); mph1.Annotations.Add(key1, null); mph1.Query.FileScan.Experimental = "EXP1"; new IdentifiedPeptide(mph1); var mph2 = new IdentifiedSpectrum(); mph2.Annotations.Add(key2, null); mph2.Query.FileScan.Experimental = "EXP2"; new IdentifiedPeptide(mph2); mph1.Peptide.Sequence = "SEQ1"; mph2.Peptide.Sequence = "SEQ2"; var protein = new IdentifiedProtein(); protein.Peptides.Add(mph1.Peptide); protein.Peptides.Add(mph2.Peptide); var mpg = new IdentifiedProteinGroup(); mpg.Add(protein); var mr = new MascotResult(); mr.Add(mpg); List <string> annotationKeys = AnnotationUtils.GetAnnotationKeys(mr.GetSpectra()); Assert.AreEqual(2, annotationKeys.Count); Assert.IsTrue(annotationKeys.Contains(key1)); Assert.IsTrue(annotationKeys.Contains(key2)); }
public MascotResult ParseContent(String fileContent) { var result = new MascotResult(); this.modifications = new Dictionary <string, char>(); Pair <int, double> pValueScore = ParsePValueScore(fileContent); result.PValueScore = pValueScore.First; result.PValue = pValueScore.Second; var offsets = new List <Offset>(); try { result.PeakIsotopicType = ParsePeakIsotopicType(fileContent); } catch (ArgumentException) { } try { result.PeakTolerance = ParsePeakTolerance(fileContent); } catch (ArgumentException) { } var filters = new List <IFilter <IIdentifiedSpectrum> >(); if (this.filterByDefaultScoreAndPvalue) { filters.Add(new IdentifiedSpectrumScoreFilter(pValueScore.First)); filters.Add(new IdentifiedSpectrumExpectValueFilter(pValueScore.Second)); } filters.Add(new IdentifiedSpectrumRankFilter(1)); if (null != this.defaultPeptideFilter) { filters.Add(this.defaultPeptideFilter); } this.currentPeptideFilter = new AndFilter <IIdentifiedSpectrum>(filters); Match proteinMatch = GetProteinRegex().Match(fileContent); while (proteinMatch.Success) { IdentifiedProtein protein = ParseProtein(proteinMatch.Groups[1].Value); var group = new IdentifiedProteinGroup(); group.Add(protein); result.Add(group); offsets.Add(new Offset(proteinMatch.Index, proteinMatch.Index + proteinMatch.Length, group)); proteinMatch = proteinMatch.NextMatch(); } int endIndex = fileContent.IndexOf("Peptide matches not assigned to protein hits"); if (-1 == endIndex) { endIndex = fileContent.Length - 1; } for (int i = 0; i < offsets.Count; i++) { int start = offsets[i].End; int end = i == offsets.Count - 1 ? endIndex : offsets[i + 1].Start; String redundant = fileContent.Substring(start, end - start + 1); if (!redundant.Contains("Proteins matching the same set")) { continue; } List <IdentifiedProtein> sameMatchProteins = ParseSameMatchProteins(redundant); foreach (IdentifiedProtein mp in sameMatchProteins) { mp.Peptides.AddRange(offsets[i].Mpg[0].Peptides); offsets[i].Mpg.Add(mp); } } for (int i = result.Count - 1; i >= 0; i--) { if (0 == result[i][0].Peptides.Count) { result.RemoveAt(i); } } RefineModification(result); MergePeptides(result); result.InitUniquePeptideCount(); return(result); }