/// <summary> /// /// Get the query/peptide map from pNovo result. /// /// </summary> /// <param name="filename">pNovo proteins file</param> /// <param name="minRank">Minimum rank of peptide identified in same spectrum</param> /// <param name="minScore">Minimum score of peptide identified in same spectrum</param> /// <returns>Query/peptide map</returns> public List <IIdentifiedSpectrum> ParsePeptides(string filename, int maxRank, double minScore) { var result = new List <IIdentifiedSpectrum>(); SequestFilename sf = null; int charge = 2; double expmh = 0; using (var sr = new StreamReader(filename)) { string line; while ((line = sr.ReadLine()) != null) { var parts = line.Split('\t'); if (parts.Length <= 5) { //spectrum information var seqcount = Convert.ToInt32(parts.Last()); if (seqcount == 0) { continue; } sf = parser.GetValue(parts[0]); expmh = MyConvert.ToDouble(parts[1]); charge = Convert.ToInt32(parts[2]); } else { int curIndex = Convert.ToInt32(parts[0]); if (curIndex <= maxRank) { var score = MyConvert.ToDouble(parts[2]); if (score < minScore) { continue; } var curSpectrum = new IdentifiedSpectrum(); curSpectrum.Query.FileScan = sf; curSpectrum.Query.Charge = charge; curSpectrum.ExperimentalMH = expmh; curSpectrum.Score = score; result.Add(curSpectrum); IdentifiedPeptide pep = new IdentifiedPeptide(curSpectrum); pep.Sequence = ModifySequence(parts[9]); pep.Spectrum.TheoreticalMH = MyConvert.ToDouble(parts[11]); pep.Spectrum.Rank = curIndex; } } } } return(result); }
public SequestFilename GetValue(string obj) { for (int i = 0; i < parsers.Count; i++) { ITitleParser parser = parsers[i]; try { SequestFilename result = parser.GetValue(obj); if (i != 0 && moveCount < 10) { parsers.Remove(parser); parsers.Insert(0, parser); moveCount++; } return(result); } catch (Exception) { } } return(MascotUtils.ParseTitle(obj, 2)); }
public SequestFilename GetFilename(PeakList <Peak> pkl, String mgfName) { if (pkl.Annotations.ContainsKey(MascotGenericFormatConstants.TITLE_TAG)) { var title = (String)pkl.Annotations[MascotGenericFormatConstants.TITLE_TAG]; SequestFilename sf = parser.GetValue(title); sf.Extension = "dta"; sf.Charge = pkl.PrecursorCharge; return(sf); } this.scanIndex++; return(new SequestFilename(mgfName, this.scanIndex, this.scanIndex, pkl.PrecursorCharge, "dta")); }
private static Dictionary <string, PeakList <Peak> > ReadPeakMap(string file, ITitleParser parser) { var reader = new MascotGenericFormatReader <Peak>(); List <PeakList <Peak> > hList = reader.ReadFromFile(file); return(hList.ToDictionary(m => { var filename = parser.GetValue(m.Annotations[MascotGenericFormatConstants.TITLE_TAG] as string); if (string.IsNullOrEmpty(m.Experimental)) { m.Experimental = filename.Experimental; } if (m.FirstScan == 0) { m.FirstScan = filename.FirstScan; } if (filename.Charge > 0) { m.PrecursorCharge = filename.Charge; } return filename.FirstScan.ToString(); })); }
public List <PeakList <Peak> > ReadFromFile(string fileName) { List <PeakList <Peak> > result = new List <PeakList <Peak> >(); using (StreamReader sr = new StreamReader(fileName)) { Progress.SetRange(0, sr.BaseStream.Length); string line; Dictionary <string, string> headers = new Dictionary <string, string>(); List <string> peaks = new List <string>(); while ((line = sr.ReadLine()) != null) { if (line.Trim().Equals("peaklist start")) { Progress.SetPosition(StreamUtils.GetCharpos(sr)); headers.Clear(); peaks.Clear(); bool inHeader = true; while ((line = sr.ReadLine()) != null) { var tline = line.Trim(); if (tline.Equals("peaklist end")) { break; } if (tline.Length == 0) { continue; } if (!inHeader) { peaks.Add(tline); } else if (Char.IsLetter(tline[0])) { var pos = tline.IndexOf('='); var key = tline.Substring(0, pos); var value = tline.Substring(pos + 1); headers[key] = value; } else { inHeader = false; peaks.Add(tline); } } if (headers.Count > 0 && peaks.Count > 0) { PeakList <Peak> pkl = new PeakList <Peak>(); pkl.PrecursorMZ = MyConvert.ToDouble(headers["mz"]); pkl.PrecursorCharge = Convert.ToInt32(headers["charge"]); pkl.MsLevel = 2; pkl.ScanMode = headers["fragmentation"]; SequestFilename sf = parser.GetValue(headers["header"]); pkl.ScanTimes.Add(new ScanTime(sf.FirstScan, 0.0)); pkl.Experimental = sf.Experimental; result.Add(pkl); foreach (var l in peaks) { var p = l.Split('\t'); if (p.Length > 1) { pkl.Add(new Peak(MyConvert.ToDouble(p[0]), MyConvert.ToDouble(p[1]))); } } } } } } return(result); }
public override IEnumerable <string> Process(string peptideFile) { Progress.SetMessage("Loading peptide file {0}...", peptideFile); var format = new MascotPeptideTextFormat(); var peptides = format.ReadFromFile(peptideFile); var map = peptides.ToDictionary(p => GetScan(p.Query.FileScan)); var pepMap = new Dictionary <string, List <IIdentifiedSpectrum> >(); Regex silac = new Regex(@"\.((?:iso|sil\d))_\d+.msm"); Dictionary <string, StreamWriter> swmap = new Dictionary <string, StreamWriter>(); try { int count = 0; foreach (var msmFile in sourceFiles) { string resultFileName = GetResultFilename(silac, msmFile, peptideFile); if (!swmap.ContainsKey(resultFileName)) { swmap[resultFileName] = null; } count++; Progress.SetMessage("Parsing {0}/{1} : {2} ...", count, sourceFiles.Length, msmFile); using (var sr = new StreamReader(msmFile)) { Progress.SetRange(0, sr.BaseStream.Length); MascotGenericFormatSectionReader reader = new MascotGenericFormatSectionReader(sr); while (reader.HasNext() && map.Count > 0) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } string title = reader.GetNextTitle(); var scan = GetScan(parser.GetValue(title)); if (map.ContainsKey(scan)) { var spectrum = map[scan]; var section = reader.Next(); var sw = swmap[resultFileName]; if (sw == null) { sw = new StreamWriter(resultFileName); swmap[resultFileName] = sw; pepMap[resultFileName] = new List <IIdentifiedSpectrum>(); } section.ForEach(m => sw.WriteLine(m)); pepMap[resultFileName].Add(spectrum); map.Remove(scan); } else { reader.SkipNext(); } Progress.SetPosition(sr.BaseStream.Position); } } } } finally { foreach (var sw in swmap.Values) { if (sw != null) { sw.Close(); } } } var result = new List <string>(from k in swmap where k.Value != null select k.Key); foreach (var pep in pepMap) { var pepFilename = FileUtils.ChangeExtension(pep.Key, ".peptides"); format.WriteToFile(pepFilename, pep.Value); } if (map.Count > 0) { var missed = peptideFile + ".missed"; result.Add(missed); format.WriteToFile(missed, map.Values.ToList()); } return(result); }
public override IEnumerable <string> Process(string targetDir) { var result = new List <string>(); targetDir = new DirectoryInfo(targetDir).FullName; foreach (var sourceFile in sourceFiles) { var sourceDir = new FileInfo(sourceFile).DirectoryName; string targetFile; bool isSame = sourceDir.ToUpper() == targetDir.ToUpper(); if (isSame) { targetFile = sourceFile + ".tmp"; } else { targetFile = targetDir + "\\" + new FileInfo(sourceFile).Name; } var chargereg = new Regex(@"(\d+)"); var mparser = new MascotPepXmlParser() { TitleParser = parser }; var spectra = mparser.ReadFromFile(sourceFile).ToDictionary(m => m.Query.FileScan.ShortFileName, m => m.GetMatchSequence()); string sequence = string.Empty; using (StreamReader sr = new StreamReader(sourceFile)) { using (StreamWriter sw = new StreamWriter(targetFile)) { string line; while ((line = sr.ReadLine()) != null) { if (line.Contains("<spectrum_query")) { var query = line.StringAfter("spectrum=\"").StringBefore("\""); var sf = parser.GetValue(query); sequence = spectra[sf.ShortFileName]; sw.WriteLine(line); } else if (line.Contains("<modification_info")) { if (!line.Contains("modified_peptide")) { line = line.StringBefore(">") + " modified_peptide=\"" + sequence + "\">" + line.StringAfter(">"); } sw.WriteLine(line); } else { sw.WriteLine(line); } } } } if (isSame) { File.Delete(sourceFile); File.Move(targetFile, sourceFile); result.Add(sourceFile); } else { result.Add(targetFile); } } return(result); }