//Do not use in parallel as the gappedItems sorting will screw up public List <ScoredSequence> DetailedDenovoSequencing(PatternTools.MSParser.MSFull theMS) { MSFull myMS = PatternTools.ObjectCopier.Clone(theMS); PrepareMSForDeNovo(myMS); List <GraphNode> allNodes = GetNodes(myMS); List <List <GraphNode> > allPaths = new List <List <GraphNode> >(); foreach (GraphNode g in allNodes) { //foreach (List<GraphNode> dp in g.DownPaths) { // dp.Insert(0, g); //} allPaths.AddRange(g.DownPaths); } allPaths = allPaths.Distinct().ToList(); allPaths.Sort((a, b) => EvaluatePathForIntensity(b).CompareTo(EvaluatePathForIntensity(a))); List <ScoredSequence> theResults = new List <ScoredSequence>(); foreach (List <GraphNode> p in allPaths) { double avgPPM = EvaluatePathForPPMError(p); double intensity = EvaluatePathForIntensity(p); int sequentialScore = p.Count; string sequence = GetSequence(p); ScoredSequence s = new ScoredSequence(); s.AvgPPMError = Math.Round(avgPPM, 1); s.Sequence = sequence; s.TotalIntensity = Math.Round(intensity, 0); s.SequentialScore = sequentialScore; theResults.Add(s); } int removed = theResults.RemoveAll(a => a.Sequence.Equals("")); Console.WriteLine("Removing bad results: " + removed); Dictionary <string, List <ScoredSequence> > cluster = (from result in theResults group result by result.Sequence into resultGroup select new { theRegex = resultGroup.Key, results = resultGroup }).ToDictionary(a => a.theRegex, a => a.results.ToList()); theResults.Clear(); foreach (KeyValuePair <string, List <ScoredSequence> > kvp in cluster) { kvp.Value.Sort((a, b) => b.TotalIntensity.CompareTo(a.TotalIntensity)); theResults.Add(kvp.Value[0]); } return(theResults); }
public static string PrintSpectrumMGF(MSFull ms) { StringBuilder sw = new StringBuilder(); sw.AppendLine("BEGIN IONS"); sw.AppendLine("TITLE=Spectrum " + ms.ScanNumber + "; " + string.Join("; ", ms.Ilines)); sw.AppendLine("RITINSeconds=" + ms.CromatographyRetentionTime); sw.AppendLine("CHARGE=" + string.Join(",", ms.Charges)); sw.AppendLine("PEPMASS=" + ms.ChargedPrecursor); foreach (Ion i in ms.MSData) { sw.AppendLine(i.MZ + " " + i.Intensity); } return(sw.ToString()); }
public static string PrintSpectrumMS2(MSFull ms) { StringBuilder sw = new StringBuilder(); sw.Append("S\t" + ms.ScanNumber + "\t" + ms.ScanNumber + "\t" + ms.ChargedPrecursor + "\n"); sw.Append("I\tRetTime\t" + ms.CromatographyRetentionTime + "\n"); sw.Append("I\tIonInjectionTime\t" + ms.IonInjectionTime + "\n"); sw.Append("I\tActivationType\t" + ms.ActivationType + "\n"); sw.Append("I\tInstrumentType\t" + ms.InstrumentType + "\n"); foreach (string iline in ms.Ilines) { sw.Append("I\tAdditionalInformation:\t" + iline + "\n"); } if (ms.isMS2) { if (ms.ZLines.Count > 0) { foreach (string zLine in ms.ZLines) { //Write the Z Lines sw.Append(zLine + "\n"); } } else if (ms.Charges.Count > 0) { foreach (int charge in ms.Charges) { sw.Append("Z\t" + charge + "\t" + PatternTools.pTools.DechargeMSPeakToPlus1(ms.ChargedPrecursor, charge) + "\n"); } } } foreach (Ion i in ms.MSData) { sw.Append(i.MZ + " " + i.Intensity + "\n"); } return(sw.ToString()); }
//Optimized for finding most intense sequence public double FastMaxSequentialScoreCoverage(PatternTools.MSParser.MSFull theMS) { MSFull myMS = PatternTools.ObjectCopier.Clone(theMS); PrepareMSForDeNovo(myMS); List <GraphNode> allNodes = GetNodes(myMS); List <List <GraphNode> > allPaths = new List <List <GraphNode> >(); foreach (GraphNode g in allNodes) { allPaths.AddRange(g.DownPaths); } allPaths = allPaths.Distinct().ToList(); allPaths.Sort((a, b) => EvaluatePathForIntensity(b).CompareTo(EvaluatePathForIntensity(a))); double coverage = allPaths[0].Max(a => a.MZ) - allPaths[0].Min(a => a.MZ); return(coverage); }
public List <MSFull> GetNext(int noSpectra) { List <MSFull> myScans = new List <MSFull>(noSpectra); string thisLine = ""; MSFull thisScan; if (cycleCounter == 0) { thisScan = new MSParser.MSFull(); } else { thisScan = bufferScan; } while (true) { thisLine = sr.ReadLine(); if (thisLine == null) { break; } if (thisLine.Length == 0) { continue; } if (isNumber.Match(thisLine, 0, 1).Success) { //We are dealing with MS ion data string[] ionData = mzSeparator.Split(thisLine); Ion ion = new Ion(double.Parse(ionData[0]), double.Parse(ionData[1]), thisScan.CromatographyRetentionTime, thisScan.ScanNumber); //Save our data thisScan.MSData.Add(ion); } else if (thisLine.StartsWith("Z")) { string[] theStrings3 = tabSeparator.Split(thisLine); if (thisScan.Charges == null) { thisScan.Charges = new List <int> { int.Parse(theStrings3[1]) }; thisScan.Precursors = new List <double> { double.Parse(theStrings3[2]) }; } else { thisScan.Charges.Add(int.Parse(theStrings3[1])); if (thisScan.Precursors == null) { thisScan.Precursors = new List <double>(); } thisScan.Precursors.Add(double.Parse(theStrings3[2])); } } else if (thisLine.Contains("ActivationType")) { string[] theStrings2 = tabSeparator.Split(thisLine); thisScan.ActivationType = theStrings2[2]; } else if (thisLine.Contains("InstrumentType")) { string[] theStrings2 = tabSeparator.Split(thisLine); thisScan.InstrumentType = theStrings2[2]; } else if (thisLine.Contains("RetTime")) { string[] theStrings2 = tabSeparator.Split(thisLine); if (theStrings2.Length == 2) { //The retention time is separated with a space so we need to further break it up string[] s = Regex.Split(theStrings2[1], @" "); thisScan.CromatographyRetentionTime = double.Parse(s[1]); } else if (theStrings2.Length == 3) { thisScan.CromatographyRetentionTime = double.Parse(theStrings2[2]); } else { throw new Exception("Problems parsing Retention time Line.\n" + thisLine); } } else if (thisLine.StartsWith("S")) { //Step 1:Save the Old One if (thisScan.MSData.Count > 0) //Make sure we dont have an empty scan! { myScans.Add(thisScan); } //Step 2:Get the new one ready thisScan = new MSParser.MSFull(); string[] theStrings = tabSeparator.Split(thisLine); thisScan.ScanNumber = int.Parse(theStrings[1]); thisScan.MSData = new List <Ion>(); if (theStrings.Length != 3) { //we are dealing with MS1 - the precursor shall be 0 //We are dealing with MS and should save precursor information thisScan.ChargedPrecursor = double.Parse(theStrings[3]); } if (myScans.Count == noSpectra) { bufferScan = thisScan; break; } } } if (thisLine == null) { sr.Close(); } cycleCounter++; return(myScans); }
public static List <MSFull> ParseMGFFile(string file, string activationType = "CID", string instrumentType = "ITMS") { System.IO.StreamReader sr = new System.IO.StreamReader(file); List <MSFull> theMS2 = new List <MSFull>(); string line = ""; MSFull ms = new MSFull(); while ((line = sr.ReadLine()) != null) { //take care of the header if (line.StartsWith("#") || line.StartsWith("_") || line.Equals("")) { //This is a header line //msFile.Header += line; } else if (line.Equals("BEGIN IONS")) { //Prepare Z lines theMS2.Add(ms); ms = new MSFull(); ms.isMS2 = true; ms.ActivationType = activationType; ms.InstrumentType = instrumentType; } else if (line.StartsWith("CHARGE")) { string[] cols = Regex.Split(line, "="); string c = cols[1].Replace(@"+", ""); if (c.Equals("2,3")) { ms.Charges.Add(2); ms.Charges.Add(3); } else { int charge = int.Parse(c); ms.Charges.Add(charge); } } else if (line.StartsWith("PEPMASS")) { string[] cols = Regex.Split(line, "="); string[] nums = Regex.Split(cols[1], " "); double chargedPrecursor; if (nums[0].Equals("")) { chargedPrecursor = double.Parse(nums[1]); } else { chargedPrecursor = double.Parse(nums[0]); } ms.ChargedPrecursor = (chargedPrecursor + ((double)ms.Charges[0] * 1.007276466)) / (double)ms.Charges[0]; ms.ZLines.Add("Z\t" + ms.Charges[0] + "\t" + PatternTools.pTools.DechargeMSPeakToPlus1(ms.ChargedPrecursor, ms.Charges[0])); } else if (line.StartsWith("RTINSECONDS")) { string[] cols = Regex.Split(line, "="); double retTime; if (cols[1].Contains("-")) { string[] nums = Regex.Split(cols[1], "-"); retTime = double.Parse(nums[0]); } else { retTime = double.Parse(cols[1]); } ms.CromatographyRetentionTime = retTime; } else if (line.StartsWith("SCANS")) { string[] cols = Regex.Split(line, "="); double scan; if (cols[1].Contains("-")) { string[] nums = Regex.Split(cols[1], "-"); scan = double.Parse(nums[0]); } else { scan = double.Parse(cols[1]); } ms.ScanNumber = (int)scan; } else if (line.StartsWith("TITLE")) { ms.Ilines.Add(line); if (line.Contains("Fragmentation:hcd")) { ms.ActivationType = "HCD"; ms.InstrumentType = "FTMS"; } } else if (Regex.IsMatch(line, "^[0-9]+")) { //If the line begins with a number it is an ion line string[] cols = Regex.Split(line, "\t| "); try { PatternTools.MSParser.Ion i = new PatternTools.MSParser.Ion(double.Parse(cols[0]), double.Parse(cols[1]), ms.CromatographyRetentionTime, ms.ScanNumber); ms.MSData.Add(i); } catch { throw new Exception("An inconsistency has been found in file: " + file + "\nThe line reads:\n" + line); } } } theMS2.Add(ms); //The first one is always bogus! if (theMS2.Count > 0) { theMS2.RemoveAt(0); } sr.Close(); return(theMS2); }