public SequestFilename GetValue(string obj) { for (int i = 0; i < parsers.Count; i++) { ITitleParser parser = parsers[i]; try { SequestFilename result = parser.GetValue(obj); if (i != 0 && moveCount < 10) { parsers.Remove(parser); parsers.Insert(0, parser); moveCount++; } return(result); } catch (Exception) { } } return(MascotUtils.ParseTitle(obj, 2)); }
public void TestSetShortFilename3() { SequestFilename sf = new SequestFilename(); sf.ShortFileName = "\"JWH_SAX_25_050906,13426\""; Assert.AreEqual("JWH_SAX_25_050906", sf.Experimental); Assert.AreEqual(13426, sf.FirstScan); Assert.AreEqual(13426, sf.LastScan); }
/// <summary> /// /// Get the query/peptide map from pNovo result. /// /// </summary> /// <param name="filename">pNovo proteins file</param> /// <param name="minRank">Minimum rank of peptide identified in same spectrum</param> /// <param name="minScore">Minimum score of peptide identified in same spectrum</param> /// <returns>Query/peptide map</returns> public List <IIdentifiedSpectrum> ParsePeptides(string filename, int maxRank, double minScore) { var result = new List <IIdentifiedSpectrum>(); SequestFilename sf = null; int charge = 2; double expmh = 0; using (var sr = new StreamReader(filename)) { string line; while ((line = sr.ReadLine()) != null) { var parts = line.Split('\t'); if (parts.Length <= 5) { //spectrum information var seqcount = Convert.ToInt32(parts.Last()); if (seqcount == 0) { continue; } sf = parser.GetValue(parts[0]); expmh = MyConvert.ToDouble(parts[1]); charge = Convert.ToInt32(parts[2]); } else { int curIndex = Convert.ToInt32(parts[0]); if (curIndex <= maxRank) { var score = MyConvert.ToDouble(parts[2]); if (score < minScore) { continue; } var curSpectrum = new IdentifiedSpectrum(); curSpectrum.Query.FileScan = sf; curSpectrum.Query.Charge = charge; curSpectrum.ExperimentalMH = expmh; curSpectrum.Score = score; result.Add(curSpectrum); IdentifiedPeptide pep = new IdentifiedPeptide(curSpectrum); pep.Sequence = ModifySequence(parts[9]); pep.Spectrum.TheoreticalMH = MyConvert.ToDouble(parts[11]); pep.Spectrum.Rank = curIndex; } } } } return(result); }
private static CensusPeptideItem Parse(string line) { var result = new CensusPeptideItem(); if (line.StartsWith("S\t")) { result.Singleton = false; } else if (line.StartsWith("&S\t")) { result.Singleton = true; } else { return(null); } string[] parts = line.Split(new[] { '\t' }); if (parts.Length < 13) { return(null); } result.Unique = parts[1]; result.Sequence = parts[2]; result.Ratio = MyConvert.ToDouble(parts[3]); result.RegressionFactor = MyConvert.ToDouble(parts[4]); result.DeterminantFactor = MyConvert.ToDouble(parts[5]); result.Score = MyConvert.ToDouble(parts[6]); result.DeltaScore = MyConvert.ToDouble(parts[7]); result.SampleIntensity = MyConvert.ToDouble(parts[8]); result.ReferenceIntensity = MyConvert.ToDouble(parts[9]); result.AreaRatio = MyConvert.ToDouble(parts[10]); result.SingletonScore = MyConvert.ToDouble(parts[11]); if (parts[12].EndsWith(".")) { result.Filename = SequestFilename.Parse(parts[12]); } else { result.Filename = SequestFilename.Parse(parts[12] + "."); } return(result); }
public HashSet <string> ReadFromFile(string fileName) { HashSet <string> result = new HashSet <string>(); SequestFilename sf = new SequestFilename(); using (StreamReader sr = new StreamReader(fileName)) { //ignore header lines string line; while ((line = sr.ReadLine()) != null) { string[] parts = line.Trim().Split(chars); try { sf.ShortFileName = parts[0]; break; } catch (Exception) { } } if (line != null) { result.Add(sf.Experimental); } while ((line = sr.ReadLine()) != null) { line = line.Trim(); if (line.Length == 0) { break; } if (IdentifiedResultUtils.IsProteinLine(line)) { continue; } string[] parts = line.Split(chars); sf.ShortFileName = parts[0]; result.Add(sf.Experimental); } } return(result); }
public SequestFilename GetValue(string obj) { SequestFilename result = new SequestFilename(); if (rawFileRegex != null) { Match m = rawFileRegex.Match(obj); if (!m.Success) { throw new ArgumentException("Cannot parse raw file from title " + obj); } result.Experimental = m.Groups[1].Value; } if (scanNumberRegex != null) { Match m = scanNumberRegex.Match(obj); if (!m.Success) { throw new ArgumentException("Cannot parse scan number from title " + obj); } result.FirstScan = int.Parse(m.Groups[1].Value); if (m.Groups.Count > 2) { result.LastScan = int.Parse(m.Groups[2].Value); } else { result.LastScan = result.FirstScan; } } if (rtRegex != null) { Match m = rtRegex.Match(obj); if (!m.Success) { throw new ArgumentException("Cannot parse retention time from title " + obj); } result.RetentionTime = double.Parse(m.Groups[1].Value); } result.Extension = "dta"; return(result); }
public void TestCmpd2() { ParserItem item = new ParserItem(); ParserFormat format = new ParserFormat(); format.FormatName = "TurboRAW2MGF, Cmpd"; format.Add(new ParserItem("rawFile", "")); format.Add(new ParserItem("scanNumber", @"Cmpd\s*(\d+)\s*,")); TitleParser parser = new TitleParser(format); SequestFilename sf = parser.GetValue("Cmpd 2345, xxxxx"); Assert.AreEqual(2345, sf.FirstScan); Assert.AreEqual(2345, sf.LastScan); }
public void TestDta() { ParserItem item = new ParserItem(); ParserFormat format = new ParserFormat(); format.FormatName = "TurboRAW2MGF, DTA Format"; format.Add(new ParserItem("rawFile", @"(.+)\.\d+\.\d+\.\d\.(?:dta|DTA)")); format.Add(new ParserItem("scanNumber", @".+\.(\d+)\.(\d+)\.\d\.(?:dta|DTA)")); TitleParser parser = new TitleParser(format); SequestFilename sf = parser.GetValue("TEST.2345.2346.1.dta"); Assert.AreEqual("TEST", sf.Experimental); Assert.AreEqual(2345, sf.FirstScan); Assert.AreEqual(2346, sf.LastScan); }
public SequestFilename GetFilename(PeakList <Peak> pkl, String mgfName) { if (pkl.Annotations.ContainsKey(MascotGenericFormatConstants.TITLE_TAG)) { var title = (String)pkl.Annotations[MascotGenericFormatConstants.TITLE_TAG]; SequestFilename sf = parser.GetValue(title); sf.Extension = "dta"; sf.Charge = pkl.PrecursorCharge; return(sf); } this.scanIndex++; return(new SequestFilename(mgfName, this.scanIndex, this.scanIndex, pkl.PrecursorCharge, "dta")); }
public override void SetProperty(CensusPeptideItem t, string value) { if (SequestFilename.IsLongFilename(value)) { PureExperiment = false; t.Filename = SequestFilename.Parse(value); } else { PureExperiment = true; if (t.Filename == null) { t.Filename = new SequestFilename(); } t.Filename.Experimental = value; } }
public void TestParseTitle() { SequestFilename sf = MascotUtils.ParseTitle(" Cmpd. 6612. 6612. 1. dta", 1); Assert.AreEqual("Cmpd.6612.6612.1.dta", sf.LongFileName); sf = MascotUtils.ParseTitle(" FIT_PPN_SAX_Online_1mg_060909_01, Cmpd 7736, +MSn(600.4082), 51.88 min", 1); Assert.AreEqual("FIT_PPN_SAX_Online_1mg_060909_01.7736.7736.1.dta", sf.LongFileName); sf = MascotUtils.ParseTitle( "spectrumId=6085 Polarity=Positive ScanMode=ProductIon TimeInMinutes=61.875 acqNumber=3712494", 1); Assert.AreEqual(".6085.6085.1.dta", sf.LongFileName); sf = MascotUtils.ParseTitle( "Elution from: 40.798 to 40.798 period: 0 experiment: 1 cycles: 1 precIntensity: 355220.0 FinneganScanNumber: 6070 MStype: enumIsNormalMS rawFile: KR_mix_lc_090115.raw", 1); Assert.AreEqual("KR_mix_lc_090115.6070.6070.1.dta", sf.LongFileName); }
/// <summary> /// /// Get the query/peptide map from pNovo result. /// /// </summary> /// <param name="filename">pNovo proteins file</param> /// <param name="minRank">Minimum rank of peptide identified in same spectrum</param> /// <param name="minScore">Minimum score of peptide identified in same spectrum</param> /// <returns>Query/peptide map</returns> public List <IIdentifiedSpectrum> ParsePeptides(string filename, int maxRank = 10, double minScore = 0.0) { var result = new List <IIdentifiedSpectrum>(); SequestFilename sf = null; int curIndex = 0; using (var sr = new StreamReader(filename)) { string line; while ((line = sr.ReadLine()) != null) { if (string.IsNullOrWhiteSpace(line)) { continue; } if (line.StartsWith("S")) { var title = line.StringAfter("\t"); sf = this.parser.GetValue(title); curIndex = 0; continue; } var parts = line.Split('\t'); var score = MyConvert.ToDouble(parts[2]); if (score < minScore) { continue; } curIndex++; IIdentifiedSpectrum curSpectrum; if (curIndex == 1) { curSpectrum = new IdentifiedSpectrum(); curSpectrum.Query.FileScan = sf; curSpectrum.Query.Charge = sf.Charge; curSpectrum.Score = score; curSpectrum.Rank = curIndex; result.Add(curSpectrum); } else if (score == result.Last().Score) { curSpectrum = result.Last(); } else if (curIndex > maxRank) { continue; } else { curSpectrum = new IdentifiedSpectrum(); curSpectrum.Query.FileScan = sf; curSpectrum.Query.Charge = sf.Charge; curSpectrum.Score = score; curSpectrum.Rank = curIndex; result.Add(curSpectrum); } IdentifiedPeptide pep = new IdentifiedPeptide(curSpectrum); pep.Sequence = ModifySequence(parts[1]); } } return(result); }
/// <summary> /// /// Get top one peptide list from xtandem xml file /// /// </summary> /// <param name="fileName">xtandem xml filename</param> /// <returns>List of IIdentifiedSpectrum</returns> public List <IIdentifiedSpectrum> ReadFromFile(string fileName) { string sourceFilename = GetSourceFile(fileName); List <IIdentifiedSpectrum> result = new List <IIdentifiedSpectrum>(); XmlDocument doc = new XmlDocument(); doc.Load(fileName); this.xmlHelper = new XmlHelper(doc); XmlNode root = doc.DocumentElement; Match mSource = Regex.Match(sourceFilename, @"(.+)\.(?:RAW)", RegexOptions.IgnoreCase); if (mSource.Success) { sourceFilename = mSource.Groups[1].Value; } else { mSource = Regex.Match(sourceFilename, @"(.+?)\."); if (mSource.Success) { sourceFilename = mSource.Groups[1].Value; } } XmlNode parameters = xmlHelper.GetFirstChildByNameAndAttribute(root, "group", "label", "input parameters"); ParseParameters(parameters); int pos = sourceFilename.LastIndexOfAny(new char[] { '/', '\\' }); string rawFileName; if (pos > 0) { rawFileName = sourceFilename.Substring(pos + 1); } else { rawFileName = sourceFilename; } rawFileName = FileUtils.ChangeExtension(rawFileName, ""); List <XmlNode> groupNodes = xmlHelper.GetChildrenByNameAndAttribute(root, "group", "type", "model"); foreach (XmlNode groupNode in groupNodes) { Dictionary <string, IIdentifiedPeptide> pepmap = new Dictionary <string, IIdentifiedPeptide>(); IIdentifiedSpectrum spectrum = new IdentifiedSpectrum(); List <XmlNode> proteins = xmlHelper.GetChildren(groupNode, "protein"); foreach (XmlNode proteinNode in proteins) { XmlNode domainNode = xmlHelper.GetValidChild(xmlHelper.GetValidChild(proteinNode, "peptide"), "domain"); int numMissedCleavages = int.Parse(domainNode.Attributes["missed_cleavages"].Value); string preSeq = domainNode.Attributes["pre"].Value; if (preSeq.Equals("[")) { preSeq = "-"; } string postSeq = domainNode.Attributes["post"].Value; if (postSeq.Equals("]")) { postSeq = "-"; } StringBuilder pepSeqSB = new StringBuilder(domainNode.Attributes["seq"].Value); int start = int.Parse(domainNode.Attributes["start"].Value); int end = int.Parse(domainNode.Attributes["end"].Value); List <XmlNode> modifications = xmlHelper.GetChildren(domainNode, "aa"); if (modifications.Count > 0) { List <ModificationItem> items = new List <ModificationItem>(); foreach (XmlNode modification in modifications) { int at = int.Parse(modification.Attributes["at"].Value); if (at < start || at > end) { continue; } ModificationItem item = new ModificationItem(); item.Type = modification.Attributes["type"].Value; item.At = at; item.Modified = MyConvert.ToDouble(modification.Attributes["modified"].Value); if (!staticModifications.ContainsKey(item.Type[0])) { items.Add(item); } } spectrum.Modifications = ""; if (items.Count > 0) { items.Sort((m1, m2) => m1.At - m2.At); var mod = ""; foreach (ModificationItem item in items) { mod = mod + MyConvert.Format(",{0}({1:0.0000})", item.Type, item.Modified); } spectrum.Modifications = mod.Substring(1); items.Sort((m1, m2) => m2.At - m1.At); foreach (ModificationItem item in items) { var key = GetModifiedKey(item.Modified); if (!dynamicModificationChars.ContainsKey(key)) { AddDynamicModificationChar(key); } char modificationChar = dynamicModificationChars[key]; pepSeqSB.Insert(item.At - start + 1, modificationChar.ToString()); } spectrum.Modifications = mod.Substring(1); } } StringBuilder sb = new StringBuilder(); sb.Append(preSeq.Substring(preSeq.Length - 1)); sb.Append("."); sb.Append(pepSeqSB.ToString()); sb.Append("."); sb.Append(postSeq[0]); string pepSeq = sb.ToString(); if (!pepmap.ContainsKey(pepSeq)) { IdentifiedPeptide pep = new IdentifiedPeptide(spectrum); pep.Sequence = pepSeq; pepmap[pepSeq] = pep; spectrum.TheoreticalMH = MyConvert.ToDouble(domainNode.Attributes["mh"].Value); spectrum.Score = MyConvert.ToDouble(domainNode.Attributes["hyperscore"].Value); double nextScore = MyConvert.ToDouble(domainNode.Attributes["nextscore"].Value); spectrum.DeltaScore = (spectrum.Score - nextScore) / spectrum.Score; spectrum.NumMissedCleavages = int.Parse(domainNode.Attributes["missed_cleavages"].Value); } var noteNode = xmlHelper.GetValidChild(proteinNode, "note"); string proteinName = noteNode.InnerText.StringBefore(" ").StringBefore("\t"); pepmap[pepSeq].AddProtein(proteinName); } if (spectrum.Peptides.Count > 0) { spectrum.DigestProtease = protease; result.Add(spectrum); spectrum.Query.QueryId = int.Parse(groupNode.Attributes["id"].Value); spectrum.ExperimentalMH = MyConvert.ToDouble(groupNode.Attributes["mh"].Value); spectrum.ExpectValue = MyConvert.ToDouble(groupNode.Attributes["expect"].Value); XmlNode spectrumNode = xmlHelper.GetFirstChildByNameAndAttribute(groupNode, "group", "label", "fragment ion mass spectrum"); XmlNode labelNode = xmlHelper.GetFirstChildByNameAndAttribute(spectrumNode, "note", "label", "Description"); string title = labelNode.InnerText.Trim(); if (title.StartsWith("RTINSECONDS")) { var rtvalue = title.StringAfter("=").StringBefore(" ").StringBefore("-"); spectrum.Query.FileScan.RetentionTime = double.Parse(rtvalue); title = title.StringAfter(" ").Trim(); } SequestFilename sf = this.TitleParser.GetValue(title); if (sf.Experimental == null || sf.Experimental.Length == 0) { sf.Experimental = sourceFilename; } spectrum.Query.FileScan.LongFileName = sf.LongFileName; if (sf.RetentionTime > 0 && spectrum.Query.FileScan.RetentionTime == 0) { spectrum.Query.FileScan.RetentionTime = sf.RetentionTime; } spectrum.Query.Charge = int.Parse(groupNode.Attributes["z"].Value); spectrum.Query.Title = title; } } return(result); }
/// <summary> /// /// Get the query/peptide map from mascot dat file. /// /// </summary> /// <param name="filename">pFind proteins file</param> /// <param name="minRank">Minimum rank of peptide identified in same spectrum</param> /// <param name="minScore">Minimum score of peptide identified in same spectrum</param> /// <returns>Query/peptide map</returns> public Dictionary <int, List <IIdentifiedSpectrum> > ParsePeptides(string filename, int minRank, double minScore) { var result = new Dictionary <int, List <IIdentifiedSpectrum> >(); var sourceDir = GetSourceFile(filename); using (var sr = new StreamReader(filename)) { var parameters = ParseSection(sr, "Search"); var mm = ParseModification(parameters); foreach (var mod in mm.DynamicModification) { if (!this.ModificationCharMap.ContainsKey(mod.Modification)) { this.ModificationCharMap[mod.Modification] = ModificationConsts.MODIFICATION_CHAR[this.ModificationCharMap.Count + 1]; } } var headers = ParseSection(sr, "Total"); var queryCount = int.Parse(headers["Spectra"]); Progress.SetRange(1, queryCount); for (int queryId = 1; queryId <= queryCount; queryId++) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } Progress.SetPosition(queryId); var speName = MyConvert.Format("Spectrum{0}", queryId); var peptideSection = ParseSection(sr, speName); int candidateCount = int.Parse(peptideSection["ValidCandidate"]); if (candidateCount == 0) { continue; } var expMH = MyConvert.ToDouble(peptideSection["MH"]); var expMz = MyConvert.ToDouble(peptideSection["MZ"]); var charge = int.Parse(peptideSection["Charge"]); var iPeps = new List <IIdentifiedSpectrum>(); result[queryId] = iPeps; IIdentifiedSpectrum lastHit = null; int rank = 0; for (int k = 1; k <= candidateCount; k++) { string key = "NO" + k.ToString(); var scoreKey = key + "_Score"; if (!peptideSection.ContainsKey(scoreKey)) { if (null != lastHit) { lastHit.DeltaScore = 1.0; } break; } double score = MyConvert.ToDouble(peptideSection[scoreKey]); if (score < minScore || score == 0.0) { if (null != lastHit) { lastHit.DeltaScore = 1.0 - score / lastHit.Score; } break; } bool bSameRank = null != lastHit && score == lastHit.Score; if (!bSameRank) { if (null != lastHit) { lastHit.DeltaScore = 1.0 - score / lastHit.Score; } rank++; if (rank > minRank) { break; } } IIdentifiedSpectrum mphit; if (bSameRank) { mphit = lastHit; } else { mphit = new IdentifiedSpectrum(); mphit.Rank = rank; mphit.Score = score; mphit.ExpectValue = MyConvert.ToDouble(peptideSection[key + "_EValue"]); var mhkey = key + "_MH"; if (peptideSection.ContainsKey(mhkey)) { mphit.TheoreticalMH = MyConvert.ToDouble(peptideSection[mhkey]); } else { mphit.TheoreticalMH = MyConvert.ToDouble(peptideSection[key + "_Mass"]); } var micKey = key + "_Matched_Peaks"; if (peptideSection.ContainsKey(micKey)) { mphit.MatchedIonCount = int.Parse(peptideSection[micKey]); mphit.MatchedTIC = MyConvert.ToDouble(peptideSection[key + "_Matched_Intensity"]); } var misKey = key + "_MissCleave"; if (peptideSection.ContainsKey(misKey)) { mphit.NumMissedCleavages = int.Parse(peptideSection[misKey]); } mphit.ExperimentalMH = expMH; mphit.DeltaScore = 1.0; mphit.Query.QueryId = queryId; mphit.Query.ObservedMz = expMz; mphit.Query.Charge = charge; //mphit.Query.MatchCount = queryItem.MatchCount; lastHit = mphit; } var mp = new IdentifiedPeptide(mphit); mp.Sequence = peptideSection[key + "_SQ"]; string modificationPos = peptideSection[key + "_Modify_Pos"]; string modificationName = peptideSection[key + "_Modify_Name"]; Dictionary <int, string> modifications = GetModifications(modificationPos, modificationName); ModifySequence(mp, modifications, mm); AssignModification(mphit, modifications, mm); string proteins = peptideSection[key + "_Proteins"]; var parts = proteins.Split(','); for (int i = 1; i < parts.Count(); i++) { mp.AddProtein(parts[i]); } if (!bSameRank) { iPeps.Add(mphit); } } var title = new FileInfo(peptideSection["Input"]).Name; SequestFilename sf = this.TitleParser.GetValue(title); sf.Charge = charge; if (sf.Experimental == null || sf.Experimental.Length == 0) { sf.Experimental = sourceDir; } foreach (IIdentifiedSpectrum mp in iPeps) { mp.Query.Title = title; mp.Query.FileScan.LongFileName = sf.LongFileName; } } } return(result); }
public List <PeakList <Peak> > ReadFromFile(string fileName) { List <PeakList <Peak> > result = new List <PeakList <Peak> >(); using (StreamReader sr = new StreamReader(fileName)) { Progress.SetRange(0, sr.BaseStream.Length); string line; Dictionary <string, string> headers = new Dictionary <string, string>(); List <string> peaks = new List <string>(); while ((line = sr.ReadLine()) != null) { if (line.Trim().Equals("peaklist start")) { Progress.SetPosition(StreamUtils.GetCharpos(sr)); headers.Clear(); peaks.Clear(); bool inHeader = true; while ((line = sr.ReadLine()) != null) { var tline = line.Trim(); if (tline.Equals("peaklist end")) { break; } if (tline.Length == 0) { continue; } if (!inHeader) { peaks.Add(tline); } else if (Char.IsLetter(tline[0])) { var pos = tline.IndexOf('='); var key = tline.Substring(0, pos); var value = tline.Substring(pos + 1); headers[key] = value; } else { inHeader = false; peaks.Add(tline); } } if (headers.Count > 0 && peaks.Count > 0) { PeakList <Peak> pkl = new PeakList <Peak>(); pkl.PrecursorMZ = MyConvert.ToDouble(headers["mz"]); pkl.PrecursorCharge = Convert.ToInt32(headers["charge"]); pkl.MsLevel = 2; pkl.ScanMode = headers["fragmentation"]; SequestFilename sf = parser.GetValue(headers["header"]); pkl.ScanTimes.Add(new ScanTime(sf.FirstScan, 0.0)); pkl.Experimental = sf.Experimental; result.Add(pkl); foreach (var l in peaks) { var p = l.Split('\t'); if (p.Length > 1) { pkl.Add(new Peak(MyConvert.ToDouble(p[0]), MyConvert.ToDouble(p[1]))); } } } } } } return(result); }
public static SequestFilename ParseTitle(string title, int charge) { string trimTitle = title.Trim(); if (trimTitle.StartsWith("Cmpd ")) { Match match = cmpdRegex.Match(trimTitle); if (match.Success) { int scan = int.Parse(match.Groups[1].Value); var result = new SequestFilename(); result.Experimental = "Cmpd"; result.FirstScan = scan; result.LastScan = scan; result.Charge = charge; result.Extension = "dta"; return(result); } else { throw new ArgumentException("Unknow title fomat :" + title + " ; contact author please."); } } if (trimTitle.Contains("Cmpd ")) { Match match = sqhCmpdRegex.Match(trimTitle); if (match.Success) { int scan = int.Parse(match.Groups[2].Value); var result = new SequestFilename(); result.Experimental = match.Groups[1].Value.Trim(); result.FirstScan = scan; result.LastScan = scan; result.Charge = charge; result.Extension = "dta"; return(result); } else { throw new ArgumentException("Unknow title fomat :" + title + " ; contact author please."); } } if (trimTitle.StartsWith("spectrumId=")) { //spectrumId=6085 Polarity=Positive ScanMode=ProductIon TimeInMinutes=61.875 acqNumber=3712494 Match match = qtofRegex.Match(trimTitle); if (match.Success) { int scan = int.Parse(match.Groups[1].Value); var result = new SequestFilename(); result.Experimental = ""; result.FirstScan = scan; result.LastScan = scan; result.Charge = charge; result.Extension = "dta"; return(result); } else { throw new ArgumentException("Unknow title fomat :" + title + " ; contact author please."); } } //Elution from: 40.798 to 40.798 period: 0 experiment: 1 cycles: 1 precIntensity: 355220.0 FinneganScanNumber: 6070 MStype: enumIsNormalMS rawFile: KR_mix_lc_090115.raw if (trimTitle.StartsWith("Elution from")) { Match match = dtasuperchargeRegex.Match(trimTitle); if (match.Success) { int scan = int.Parse(match.Groups[1].Value); var result = new SequestFilename(); result.Experimental = FileUtils.ChangeExtension(match.Groups[2].Value, ""); result.FirstScan = scan; result.LastScan = scan; result.Charge = charge; result.Extension = "dta"; return(result); } else { throw new ArgumentException("Unknow title fomat :" + title + " ; contact author please."); } } try { return(SequestFilename.Parse(trimTitle)); } catch (Exception) { } try { return(SequestFilename.Parse(trimTitle + ".dta")); } catch (Exception) { } try { return(SequestFilename.Parse(trimTitle + "." + charge + ".dta")); } catch (Exception) { } throw new ArgumentException("Unknow title format :" + title + " ; contact author please."); }
public override IEnumerable <string> Process(string targetDir) { var result = new List <string>(); targetDir = new DirectoryInfo(targetDir).FullName; foreach (var sourceFile in sourceFiles) { var sourceDir = new FileInfo(sourceFile).DirectoryName; string targetFile; bool isSame = sourceDir.ToUpper() == targetDir.ToUpper(); if (isSame) { targetFile = sourceFile + ".tmp"; } else { targetFile = targetDir + "\\" + new FileInfo(sourceFile).Name; } var chargereg = new Regex(@"(\d+)"); using (StreamReader sr = new StreamReader(sourceFile)) { using (StreamWriter sw = new StreamWriter(targetFile)) { string line; string filename = null; while ((line = sr.ReadLine()) != null) { if (line.StartsWith("title=")) { string title = Uri.UnescapeDataString(line.Substring(6)); SequestFilename sf = this.parser.GetValue(title); if (string.IsNullOrEmpty(sf.Experimental)) { if (string.IsNullOrEmpty(filename)) { filename = Path.GetFileNameWithoutExtension(sourceFile); } sf.Experimental = filename; } List <string> lines = new List <string>(); while ((line = sr.ReadLine()) != null) { lines.Add(line); if (line.StartsWith("--")) { break; } } var chargeLine = lines.Where(m => m.StartsWith("charge=")).First(); sf.Charge = Convert.ToInt32(chargereg.Match(chargeLine).Groups[1].Value); line = "title=" + Uri.EscapeDataString(sf.LongFileName); sw.WriteLine(line); lines.ForEach(m => sw.WriteLine(m)); } else if (line.StartsWith("FILE=")) { filename = Path.GetFileNameWithoutExtension(line.StringAfter("File Name:").Trim()); } else { sw.WriteLine(line); } } } } if (isSame) { File.Delete(sourceFile); File.Move(targetFile, sourceFile); result.Add(sourceFile); } else { result.Add(targetFile); } } return(result); }
public PeakList <T> ReadFromFile(string dtaFilename) { var fi = new FileInfo(dtaFilename); if (!fi.Exists) { throw new FileNotFoundException("Cannot find the file " + dtaFilename); } var result = new PeakList <T>(); SequestFilename sf = SequestFilename.Parse(fi.Name); result.Experimental = sf.Experimental; result.ScanTimes.Add(new ScanTime(sf.FirstScan, 0.0)); if (sf.FirstScan != sf.LastScan) { result.ScanTimes.Add(new ScanTime(sf.LastScan, 0.0)); } result.PrecursorCharge = sf.Charge; using (var filein = new StreamReader(new FileStream(dtaFilename, FileMode.Open, FileAccess.Read))) { string lastline; while ((lastline = filein.ReadLine()) != null) { if (lastline.Trim().Length > 0) { break; } } if (lastline == null) { return(null); } string[] parts = Regex.Split(lastline, @"\s+"); double precursorMass = MyConvert.ToDouble(parts[0]); result.PrecursorCharge = int.Parse(parts[1]); result.PrecursorMZ = PrecursorUtils.MHToMz(precursorMass, result.PrecursorCharge, true); while ((lastline = filein.ReadLine()) != null) { if (lastline.Length == 0) { continue; } if (lastline[0] == '>') { break; } string[] peakParts = Regex.Split(lastline, @"\s+"); var peak = new T(); peak.Mz = MyConvert.ToDouble(peakParts[0]); peak.Intensity = MyConvert.ToDouble(peakParts[1]); if (peakParts.Length > 2) { peak.Charge = int.Parse(peakParts[2]); } result.Add(peak); } return(result); } }
private static string GetScan(SequestFilename p) { return(MyConvert.Format("{0}.{1}", p.Experimental, p.FirstScan)); }
public IdentifiedSpectrum(SequestFilename sf) : this() { query.FileScan = sf; }
protected IdentifiedProtein ParseProtein(String proteinContent) { IdentifiedProtein result = GetProtein(proteinContent); List <String> peptideInfoContentList = GetPeptideInfoContentList(proteinContent); foreach (String peptideInfoContent in peptideInfoContentList) { List <String> peptideInfo = GetPeptideInfo(peptideInfoContent); if (0 == peptideInfo.Count) { continue; } IIdentifiedSpectrum mphit = new IdentifiedSpectrum(); // Group 0 : peptide mass from observed m/z double experimentalPeptideMass = MyConvert.ToDouble(peptideInfo[0]); mphit.ExperimentalMass = experimentalPeptideMass; // Group 1 : observed m/z double observed = MyConvert.ToDouble(peptideInfo[1]); mphit.Query.ObservedMz = observed; // Group 2 : charge int charge = int.Parse(peptideInfo[2]); mphit.Query.Charge = charge; // Group 3 : title String title = Uri.UnescapeDataString(peptideInfo[3]).Trim(); mphit.Query.Title = title; SequestFilename sf = MascotUtils.ParseTitle(title, charge); if (sf != null) { mphit.Query.FileScan.LongFileName = sf.LongFileName; } // Group 4 : query mphit.Query.QueryId = int.Parse(peptideInfo[4]); // Group 5 equals Group 1 // Group 6 equals Group 0 // Group 7 : calculated peptide mass mphit.TheoreticalMass = MyConvert.ToDouble(peptideInfo[7]); // Group 8 : different between observed peptide mass and calculated // peptide mass // Group 9 : miss cleavage mphit.NumMissedCleavages = int.Parse(peptideInfo[9]); // Group 10: score mphit.Score = int.Parse(peptideInfo[10]); // Group 11: expect p value mphit.ExpectValue = MyConvert.ToDouble(peptideInfo[11]); // Group 12: rank mphit.Rank = int.Parse(peptideInfo[12]); // Group 13: peptide sequence // K.YEINVLR<u>.</u>N + Label:18O(2) (C-term) String seq = peptideInfo[13].Replace(" ", ""); var mpep = new IdentifiedPeptide(mphit); string[] parts = Regex.Split(seq, "\\+"); if (parts.Length > 1) { seq = parts[0].Trim(); mphit.Modifications = parts[1].Trim(); string[] mods = parts[1].Trim().Split(new[] { ';' }, StringSplitOptions.RemoveEmptyEntries); foreach (string mod in mods) { Match m = this.modificationReg.Match(mod.Trim()); if (!this.modifications.ContainsKey(m.Groups[1].Value)) { this.modifications[m.Groups[1].Value] = ' '; } } } mpep.Sequence = seq; if (GetPeptideFilter().Accept(mphit)) { mpep.AddProtein(result.Name); result.Peptides.Add(mpep); } } return(result); }
public Dictionary <int, List <IIdentifiedSpectrum> > DoParsePeptides(string datFilename, int minRank, double minScore, bool isDecoy) { var result = new Dictionary <int, List <IIdentifiedSpectrum> >(); Dictionary <string, string> headers; int queryCount; Dictionary <int, MascotQueryItem> queryItems; Dictionary <string, string> peptideSection; var prefix = isDecoy ? "decoy_" : ""; using (var sr = new StreamReader(datFilename)) { InitializeBoundary(sr); CurrentParameters = ParseSection(sr, "parameters"); var hasDecoy = CurrentParameters.ContainsKey("DECOY") && CurrentParameters["DECOY"].Equals("1"); if (!hasDecoy && isDecoy) { return(result); } var masses = ParseSection(sr, "masses"); CurrentModifications = ParseModification(masses); long curPos = sr.GetCharpos(); CurrentProtease = ParseEnzyme(sr); sr.SetCharpos(curPos); headers = ParseSection(sr, "header"); queryCount = int.Parse(headers["queries"]); queryItems = ParseQueryItems(sr, queryCount, prefix); peptideSection = ParseSection(sr, prefix + "peptides", !isDecoy); } string file = CurrentParameters["FILE"]; if (file.StartsWith("File Name: ")) { file = file.Substring(10).Trim(); } string defaultExperimental = FileUtils.ChangeExtension(new FileInfo(file).Name, ""); bool isPrecursorMonoisotopic = true; if (CurrentParameters.ContainsKey("MASS")) { isPrecursorMonoisotopic = CurrentParameters["MASS"].Equals("Monoisotopic"); } using (var sr = new StreamReader(datFilename)) { //Progress.SetRange(1, queryCount); for (int queryId = 1; queryId <= queryCount; queryId++) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } //Progress.SetPosition(queryId); MascotQueryItem queryItem = queryItems[queryId]; var iPeps = new List <IIdentifiedSpectrum>(); result[queryId] = iPeps; IIdentifiedSpectrum lastHit = null; int rank = 0; for (int k = 1; k <= 10; k++) { string key = "q" + queryId + "_p" + k; if (!peptideSection.ContainsKey(key)) { if (null != lastHit) { lastHit.DeltaScore = 1.0; } break; } string line = peptideSection[key]; if (line == null || line.Equals("-1")) { if (null != lastHit) { lastHit.DeltaScore = 1.0; } break; } Match mDetail = this.peptideRegex.Match(line); if (!mDetail.Success) { throw new Exception("Wrong format of peptides : " + line); } double score = MyConvert.ToDouble(mDetail.Groups["Score"].Value); if (score < minScore) { if (null != lastHit) { lastHit.DeltaScore = 1.0 - score / lastHit.Score; } break; } bool bSameRank = null != lastHit && score == lastHit.Score; if (!bSameRank) { if (null != lastHit) { lastHit.DeltaScore = 1.0 - score / lastHit.Score; } rank++; if (rank > minRank) { break; } } IIdentifiedSpectrum mphit; if (bSameRank) { mphit = lastHit; } else { mphit = new IdentifiedSpectrum(); mphit.IsPrecursorMonoisotopic = isPrecursorMonoisotopic; mphit.Rank = rank; mphit.NumMissedCleavages = int.Parse(mDetail.Groups["MissCleavage"].Value); mphit.TheoreticalMass = MyConvert.ToDouble(mDetail.Groups["TheoreticalMass"].Value); mphit.ExperimentalMass = queryItem.ExperimentalMass; mphit.Score = score; mphit.ExpectValue = ExpectValueCalculator.Calc(mphit.Score, queryItem.MatchCount, 0.05); mphit.Query.QueryId = queryId; mphit.Query.ObservedMz = queryItem.Observed; mphit.Query.Charge = queryItem.Charge; mphit.Query.MatchCount = queryItem.MatchCount; if (queryItem.HomologyScore != 0) { mphit.Annotations[HomologyScoreKey] = queryItem.HomologyScore; } if (CurrentProtease.IsSemiSpecific) { mphit.NumProteaseTermini = 1; } lastHit = mphit; } var pureSeq = mDetail.Groups["Sequence"].Value; string modification = mDetail.Groups["Modification"].Value; var seq = ModifySequence(pureSeq, modification); AssignModification(mphit, modification, CurrentModifications); string proteins = mDetail.Groups["ProteinNames"].Value; Match proteinNameMatch = this.proteinNameRegex.Match(proteins); string key_terms = key + "_terms"; if (!peptideSection.ContainsKey(key_terms)) { throw new Exception("Mascot version is too old. It's not supported."); } string value_terms = peptideSection[key_terms]; Match termsMatch = this.termsRegex.Match(value_terms); int numProteaseTermini = 0; while (proteinNameMatch.Success && termsMatch.Success) { var fullSeq = MyConvert.Format("{0}.{1}.{2}", termsMatch.Groups[1].Value, seq, termsMatch.Groups[2].Value); var name = proteinNameMatch.Groups[1].Value.Replace("/", "_"); if (isDecoy) { name = DECOY_PREFIX + name; } bool findPeptide = false; for (int i = 0; i < mphit.Peptides.Count; i++) { if (mphit.Peptides[i].Sequence == fullSeq) { mphit.Peptides[i].AddProtein(name); findPeptide = true; break; } } if (!findPeptide) { var mp = new IdentifiedPeptide(mphit); mp.Sequence = fullSeq; mp.AddProtein(name); if (CurrentProtease.IsSemiSpecific) { int position = Convert.ToInt32(proteinNameMatch.Groups[2].Value); int count = CurrentProtease.GetNumProteaseTermini(termsMatch.Groups[1].Value[0], pureSeq, termsMatch.Groups[2].Value[0], '-', position); numProteaseTermini = Math.Max(numProteaseTermini, count); } } proteinNameMatch = proteinNameMatch.NextMatch(); termsMatch = termsMatch.NextMatch(); } if (CurrentProtease.IsSemiSpecific) { mphit.NumProteaseTermini = Math.Max(mphit.NumProteaseTermini, numProteaseTermini); } if (!bSameRank) { iPeps.Add(mphit); } } string query = "query" + queryId; Dictionary <string, string> querySection = ParseSection(sr, query); string title = Uri.UnescapeDataString(querySection["title"]); SequestFilename sf = this.TitleParser.GetValue(title); sf.Charge = queryItem.Charge; if (sf.Experimental == null || sf.Experimental.Length == 0) { sf.Experimental = defaultExperimental; } foreach (IIdentifiedSpectrum mp in iPeps) { mp.Query.Title = title; mp.Query.FileScan = sf; } } } return(result); }
public override IEnumerable <string> Process() { var spectra = new MaxQuantPeptideTextReader().ReadFromFile(options.SiteFile); spectra.RemoveAll(m => m.DeltaScore < options.MinDeltaScore || m.Probability < options.MinProbability); spectra = (from g in spectra.GroupBy(m => m.Query.FileScan.ShortFileName) select g.OrderBy(l => l.Score).Last()).ToList(); if (options.IsSILAC) { var spmap = spectra.ToDictionary(m => m.Query.FileScan.ShortFileName); var existModificationChar = (from sp in spectra from c in sp.Sequence where !char.IsLetter(c) select c).Distinct().Count(); Dictionary <char, char> labelChars = new Dictionary <char, char>(); foreach (var c in options.SILACAminoacids) { labelChars[c] = ModificationConsts.MODIFICATION_CHAR[++existModificationChar]; } using (var sr = new StreamReader(options.MSMSFile)) { var headers = sr.ReadLine().Split('\t'); var rawIndex = Array.IndexOf(headers, "Raw file"); var scanIndex = Array.IndexOf(headers, "Scan number"); string line; while ((line = sr.ReadLine()) != null) { if (string.IsNullOrWhiteSpace(line)) { break; } var parts = line.Split('\t'); var raw = parts[rawIndex]; var scan = int.Parse(parts[scanIndex]); var sf = new SequestFilename(raw, scan, scan, 0, ""); var name = sf.ShortFileName; IIdentifiedSpectrum sp; if (spmap.TryGetValue(name, out sp)) { foreach (var pep in sp.Peptides) { var seq = pep.Sequence; StringBuilder sb = new StringBuilder(); for (int i = seq.Length - 1; i >= 0; i--) { char heavyChar; if (labelChars.TryGetValue(seq[i], out heavyChar)) { sb.Append(heavyChar); } sb.Append(seq[i]); } pep.Sequence = SequenceUtils.GetReversedSequence(sb.ToString()); } } } } } string resultFilename = options.SiteFile + ".peptides"; new MascotPeptideTextFormat("\t\"File, Scan(s)\"\tSequence\tCharge\tScore\tDeltaScore\tExpectValue\tPValue\tModification").WriteToFile(resultFilename, spectra); return(new[] { resultFilename }); }
public override IEnumerable <string> Process(string targetDir) { var result = new List <string>(); targetDir = new DirectoryInfo(targetDir).FullName; foreach (var sourceFile in sourceFiles) { var sourceDir = new FileInfo(sourceFile).DirectoryName; string targetFile; bool isSame = sourceDir.ToUpper() == targetDir.ToUpper(); if (isSame) { targetFile = sourceFile + ".tmp"; } else { targetFile = targetDir + "\\" + new FileInfo(sourceFile).Name; } var chargereg = new Regex(@"(\d+)"); using (StreamReader sr = new StreamReader(sourceFile)) { using (StreamWriter sw = new StreamWriter(targetFile)) { string line; int charge = 0; while ((line = sr.ReadLine()) != null) { if (line.StartsWith("BEGIN IONS")) { charge = 0; } else if (line.StartsWith("CHARGE=")) { charge = Convert.ToInt32(chargereg.Match(line).Groups[1].Value); } else if (line.StartsWith("TITLE=")) { string title = line.Substring(6); SequestFilename sf = this.parser.GetValue(title); sf.Extension = "dta"; sf.Charge = charge; line = "TITLE=" + sf.LongFileName; } sw.WriteLine(line); } } } if (isSame) { File.Delete(sourceFile); File.Move(targetFile, sourceFile); result.Add(sourceFile); } else { result.Add(targetFile); } } return(result); }