public static IEnumerable <LCIMSMSFeature> FindDriftTimePeaks(Peak driftProfilePeak, LCIMSMSFeature lcimsmsFeature, double averageTOFLength, double framePressure) { var imsmsFeatureList = lcimsmsFeature.IMSMSFeatureList; var sortByScanLCQuery = from imsmsFeature in imsmsFeatureList orderby imsmsFeature.ScanLC select imsmsFeature; var globalIMSScanMinimum = double.MaxValue; var globalIMSScanMaximum = double.MinValue; // Grab all of the intensity values for each IMS-MS Feature and find the global minimum and maximum Drift Times foreach (var imsmsFeature in sortByScanLCQuery) { imsmsFeature.GetMinAndMaxIMSScan(out var localIMSScanMinimum, out var localIMSScanMaximum); if (localIMSScanMinimum < globalIMSScanMinimum) { globalIMSScanMinimum = localIMSScanMinimum; } if (localIMSScanMaximum > globalIMSScanMaximum) { globalIMSScanMaximum = localIMSScanMaximum; } } var smoothedDriftProfilePeak = PeakUtil.KDESmooth(driftProfilePeak, Settings.SmoothingStDev); // TODO: Find a good value. 0.15? Less smooth = more conformations! var smoothedDriftProfileInterpolation = PeakUtil.GetLinearInterpolationMethod(smoothedDriftProfilePeak); var xyPairList = new List <XYPair>(); var peakList = new List <Peak>(); var previousIntensity = double.MinValue; var movingUp = true; // lcimsmsFeature.GetMinAndMaxScanLC(out var minScanLC, out var maxScanLC); var minimumIntensityToConsider = smoothedDriftProfilePeak.GetMaximumYValue() * 0.05; //DisplayPeakXYData(smoothedDriftProfilePeak); //Console.WriteLine("Global IMS Scan Min = " + globalIMSScanMinimum + "\tGlobal IMS Scan Max = " + globalIMSScanMaximum); for (var i = globalIMSScanMinimum; i <= globalIMSScanMaximum; i += 1) { var imsScan = i; var intensity = smoothedDriftProfileInterpolation.Interpolate(imsScan); if (intensity > minimumIntensityToConsider) { //Console.WriteLine(imsScan + "\t" + intensity + "\t" + movingUp); if (intensity > previousIntensity) { // End of Peak if (!movingUp && xyPairList.Count > 0) { PadXYPairsWithZeros(ref xyPairList, 2); //xyPairList = PadXYPairsWithZeros(xyPairList, imsScanMinimum, i - DRIFT_TIME_SLICE_WIDTH, 1); var peak = new Peak(xyPairList); if (peak.XYPairList.Count >= 7) { peakList.Add(peak); } // Start over with a new Peak xyPairList.Clear(); movingUp = true; } } else { movingUp = false; } var xyPair = new XYPair(imsScan, intensity); xyPairList.Add(xyPair); previousIntensity = intensity; } else { movingUp = false; previousIntensity = 0; } } // When you get to the end, end the last Peak, but only if it has a non-zero value if (xyPairList.Any(xyPair => xyPair.YValue > minimumIntensityToConsider)) { PadXYPairsWithZeros(ref xyPairList, 2); //xyPairList = PadXYPairsWithZeros(xyPairList, imsScanMinimum, globalIMSScanMaximum, 1); var lastPeak = new Peak(xyPairList); if (lastPeak.XYPairList.Count >= 7) { peakList.Add(lastPeak); } } // var resolvingPower = GetResolvingPower(lcimsmsFeature.Charge); var newLCIMSMSFeatureList = new List <LCIMSMSFeature>(); foreach (var peak in peakList) { var repIMSScan = peak.GetQuadraticFit(); // TODO: Fix this //double theoreticalFWHM = driftTime / resolvingPower; double theoreticalFWHM = 3; peak.GetMinAndMaxXValues(out var minimumXValue, out var maximumXValue); const int numPoints = 100; var normalDistributionXYPairList = PeakUtil.CreateTheoreticalGaussianPeak(repIMSScan, theoreticalFWHM, numPoints); PadXYPairsWithZeros(ref normalDistributionXYPairList, 5); var normalDistributionPeak = new Peak(normalDistributionXYPairList); var peakInterpolation = PeakUtil.GetLinearInterpolationMethod(peak); var fitScore = PeakUtil.CalculatePeakFit(peak, normalDistributionPeak, 0); // Create a new LC-IMS-MS Feature var newLCIMSMSFeature = new LCIMSMSFeature(lcimsmsFeature.Charge) { OriginalIndex = lcimsmsFeature.OriginalIndex, IMSScore = (float)fitScore, AbundanceMaxRaw = Math.Round(peak.GetMaximumYValue()), // Using Math.Floor instaed of Math.Round because I used to cast this to an int which is esentially Math.Floor. // The difference is negligible, but OHSU would complain if results were the slightest bit different if the app was re-run on the same dataset. AbundanceSumRaw = Math.Floor(peakInterpolation.Integrate(peak.GetMaximumXValue())), DriftTime = ConvertIMSScanToDriftTime(repIMSScan, averageTOFLength, framePressure) }; // Create new IMS-MS Features by grabbing MS Features in each LC Scan that are in the defined window of the detected drift time foreach (var imsmsFeature in lcimsmsFeature.IMSMSFeatureList) { var msFeatures = imsmsFeature.FindMSFeaturesInScanIMSRange(minimumXValue, maximumXValue).ToList(); if (!msFeatures.Any()) { continue; } var newIMSMSFeature = new IMSMSFeature(imsmsFeature.ScanLC, imsmsFeature.Charge); newIMSMSFeature.AddMSFeatureList(msFeatures); newLCIMSMSFeature.AddIMSMSFeature(newIMSMSFeature); } if (newLCIMSMSFeature.IMSMSFeatureList.Count > 0) { newLCIMSMSFeatureList.Add(newLCIMSMSFeature); /* * // TODO: Find LC Peaks * var sortByScanLC = from imsmsFeature in newLCIMSMSFeature.IMSMSFeatureList * orderby imsmsFeature.ScanLC ascending * select imsmsFeature; * * Console.WriteLine("*************************************************"); * Console.WriteLine("Index = " + index + "\tMass = " + newLCIMSMSFeature.CalculateAverageMass() + "\tDrift = " + driftTime + "\tLC Range = " + sortByScanLC.First().ScanLC + "\t" + sortByScanLC.Last().ScanLC); * * List<XYPair> lcXYPairList = new List<XYPair>(); * int scanLC = sortByScanLC.First().ScanLC - 1; * * foreach (IMSMSFeature imsmsFeature in sortByScanLC) * { * int currentScanLC = imsmsFeature.ScanLC; * * for (int i = scanLC + 1; i < currentScanLC; i++) * { * XYPair zeroValue = new XYPair(i, 0); * lcXYPairList.Add(zeroValue); * Console.Write("0\t"); * } * * XYPair xyPair = new XYPair(currentScanLC, imsmsFeature.GetIntensity()); * lcXYPairList.Add(xyPair); * * scanLC = currentScanLC; * * Console.Write(imsmsFeature.GetIntensity() + "\t"); * } * Console.WriteLine(""); * Console.WriteLine("*************************************************"); */ // TODO: Calculate LC Score } else { //Console.WriteLine("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ FOUND EMPTY $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$"); // TODO: Figure out why this actually happens. I believe that this SHOULD NOT happen. Below is a hack to return a conformation even if this happens // It actually looks like most of these occurences are due to large gaps in the drift time, which cause a small peak to be found in the gap which has no members. //Console.WriteLine("**********************************************************************"); //Console.WriteLine("Detected Drift Time = " + driftTime + "\tLow = " + lowDriftTime + "\tHigh = " + highDriftTime); //lcimsmsFeature.PrintLCAndDriftTimeMap(); //Console.WriteLine("**********************************************************************"); //Console.WriteLine("==============================================================="); //Console.WriteLine("DT = " + driftTime + "\tLow DT = " + lowDriftTime + "\tHigh DT = " + highDriftTime); //Console.WriteLine("Global Min = " + globalDriftTimeMinimum + "\tGlobal Max = " + globalDriftTimeMaximum); //peak.PrintPeakToConsole(); //Console.WriteLine("==============================================================="); } } // Find the Conformation that has the highest member count and store the value into all conformations of this LC-IMS-MS Feature if (newLCIMSMSFeatureList.Count > 0) { var maxMemberCount = newLCIMSMSFeatureList.Select(feature => feature.GetMemberCount()).Max(); foreach (var feature in newLCIMSMSFeatureList) { feature.MaxMemberCount = maxMemberCount; } } return(newLCIMSMSFeatureList); }
/// <summary> /// Executes the cross-link search for LC-IMS-TOF data. /// </summary> /// <param name="settings">Settings object to control parameters for cross-linking.</param> /// <param name="proteinSequenceEnumerable">IEnumerable of protein sequences, as a .NET Bio ISequence object.</param> /// <param name="featureList">List of LC-IMS-MS Features, as LcImsMsFeature.</param> /// <param name="peakList">List of Isotopic Peaks, as IsotopicPeak.</param> /// <returns>An enumerable of CrossLinkResult objects.</returns> public static IList <CrossLinkResult> Execute( CrossLinkSettings settings, IEnumerable <ISequence> proteinSequenceEnumerable, List <LcImsMsFeature> featureList, List <IsotopicPeak> peakList) { var massToleranceBase = settings.MassTolerance; var maxMissedCleavages = settings.MaxMissedCleavages; var digestionRule = settings.TrypticType; CrossLinkUtil.StaticDeltaMass = settings.StaticDeltaMass; CrossLinkUtil.UseC13 = settings.UseC13; CrossLinkUtil.UseN15 = settings.UseN15; Console.WriteLine(); Console.WriteLine("Mass Tolerance: " + massToleranceBase + " ppm"); Console.WriteLine("Max missed cleavages: " + maxMissedCleavages); Console.WriteLine("Digestion rule: " + settings.TrypticType); Console.WriteLine("Delta mass uses C13: " + settings.UseC13); Console.WriteLine("Delta mass uses N15: " + settings.UseN15); Console.WriteLine("Static delta mass addon: " + settings.StaticDeltaMass + " Da"); // Used for finding Isotopic Profiles in the data var msFeatureFinder = new BasicTFF(); var crossLinkList = new List <CrossLink>(); var lastProgress = DateTime.UtcNow; var proteinsProcessed = 0; // Create CrossLink objects from all of the protein sequences foreach (var proteinSequence in proteinSequenceEnumerable) { var proteinSequenceString = new string(proteinSequence.Select((a => (char)a)).ToArray()); var proteinId = proteinSequence.ID; // Get a List of Peptides from the Protein Sequence var peptideList = SequenceUtil.DigestProtein(proteinSequenceString, digestionRule, maxMissedCleavages); // Find all possible cross links from the peptide list var crossLinkEnumerable = CrossLinkUtil.GenerateTheoreticalCrossLinks(peptideList, proteinSequenceString, proteinId); crossLinkList.AddRange(crossLinkEnumerable); proteinsProcessed++; if (DateTime.UtcNow.Subtract(lastProgress).TotalSeconds >= 15) { lastProgress = DateTime.UtcNow; Console.WriteLine("Creating cross linked peptide list; " + proteinsProcessed + " proteins processed"); } } Console.WriteLine("Sorting cross-linked peptides"); // Sort the CrossLinks by mass so that the results are ordered in a friendly way IEnumerable <CrossLink> orderedCrossLinkEnumerable = crossLinkList.OrderBy(x => x.Mass); // Sort Feature by mass so we can use binary search featureList = featureList.OrderBy(x => x.MassMonoisotopic).ToList(); // Set up a Feature Comparer and Peak Comparer to use for binary search later on var featureComparer = new AnonymousComparer <LcImsMsFeature>((x, y) => x.MassMonoisotopic.CompareTo(y.MassMonoisotopic)); var peakComparer = new AnonymousComparer <IsotopicPeak>((x, y) => x.ScanLc != y.ScanLc ? x.ScanLc.CompareTo(y.ScanLc) : x.ScanIms != y.ScanIms ? x.ScanIms.CompareTo(y.ScanIms) : x.Mz.CompareTo(y.Mz)); // Sort the Isotopic Peaks by LC Scan, IMS Scan, and m/z to set them up for binary search later on peakList.Sort(peakComparer); var crossLinkResultList = new List <CrossLinkResult>(); var totalCandidatePeptides = crossLinkList.Count; Console.WriteLine("Searching isotopic data vs. " + totalCandidatePeptides.ToString("#,##0") + " candidate cross-linked peptides"); lastProgress = DateTime.UtcNow; var crosslinkCandidatesProcessed = 0; // Search the data for the existence of cross-links foreach (var crossLink in orderedCrossLinkEnumerable) { // Calculate mass tolerance to use for binary search var massTolerance = massToleranceBase * crossLink.Mass / GeneralConstants.PPM_DIVISOR; var lowFeature = new LcImsMsFeature { MassMonoisotopic = crossLink.Mass - massTolerance }; var highFeature = new LcImsMsFeature { MassMonoisotopic = crossLink.Mass + massTolerance }; var lowFeaturePosition = featureList.BinarySearch(lowFeature, featureComparer); var highFeaturePosition = featureList.BinarySearch(highFeature, featureComparer); lowFeaturePosition = lowFeaturePosition < 0 ? ~lowFeaturePosition : lowFeaturePosition; highFeaturePosition = highFeaturePosition < 0 ? ~highFeaturePosition : highFeaturePosition; // Iterate over all LC-IMS-MS Features that match the Unmodified cross-link mass for (var i = lowFeaturePosition; i < highFeaturePosition; i++) { var feature = featureList[i]; // Search for a mass shift in each of the LC Scans the unmodified cross-link mass was found for (var currentScanLc = feature.ScanLcStart; currentScanLc <= feature.ScanLcEnd; currentScanLc++) { var crossLinkResult = new CrossLinkResult(crossLink, feature, currentScanLc); var candidatePeaks = PeakUtil.FindCandidatePeaks(peakList, feature.MzMonoisotopic, currentScanLc, feature.ScanImsRep); var massShiftList = crossLink.MassShiftList; var shiftedMassList = new List <double>(); // Calculate the shifted mass values that we want to search for switch (massShiftList.Count) { case 1: { var firstNewMass = feature.MassMonoisotopic + massShiftList[0]; shiftedMassList.Add(firstNewMass); } break; case 2: { var firstNewMass = feature.MassMonoisotopic + massShiftList[0]; var secondNewMass = feature.MassMonoisotopic + massShiftList[1]; var thirdNewMass = feature.MassMonoisotopic + massShiftList[0] + massShiftList[1]; shiftedMassList.Add(firstNewMass); shiftedMassList.Add(secondNewMass); shiftedMassList.Add(thirdNewMass); } break; } // Search for shifted mass values in Isotopic Peaks foreach (var shiftedMass in shiftedMassList) { var shiftedMz = (shiftedMass / feature.ChargeState) + GeneralConstants.MASS_OF_PROTON; // Create theoretical Isotopic Peaks that will later form a theoretical Isotopic Profile var theoreticalPeakList = new List <MSPeak> { new MSPeak { XValue = shiftedMz, Height = 1 } }; for (double k = 1; k < 4; k++) { theoreticalPeakList.Add(new MSPeak { XValue = shiftedMz + (k * 1.003 / feature.ChargeState), Height = (float)(1.0 - (k / 4)) }); theoreticalPeakList.Add(new MSPeak { XValue = shiftedMz - (k * 1.003 / feature.ChargeState), Height = (float)(1.0 - (k / 4)) }); } // Sort peaks by m/z var sortPeaksQuery = from peak in theoreticalPeakList orderby peak.XValue select peak; // Create a theoretical Isotopic Profile for DeconTools to search for var isotopicProfile = new IsotopicProfile { MonoIsotopicMass = shiftedMass, MonoPeakMZ = shiftedMz, ChargeState = feature.ChargeState, Peaklist = sortPeaksQuery.ToList() }; // Search for the theoretical Isotopic Profile var foundProfile = msFeatureFinder.FindMSFeature(candidatePeaks, isotopicProfile, massToleranceBase, false); /* * It is possible that the set mono pass of the previous theoretical distribution was the right-most peak of the actual distribution * If so, we should be able to shift the theoretical distribution over to the left and find the actual distribution */ if (foundProfile == null) { foreach (var msPeak in sortPeaksQuery) { msPeak.XValue -= (1.003 / feature.ChargeState); } isotopicProfile = new IsotopicProfile { MonoIsotopicMass = shiftedMass - 1.003, MonoPeakMZ = shiftedMz - (1.003 / feature.ChargeState), ChargeState = feature.ChargeState, Peaklist = sortPeaksQuery.ToList() }; foundProfile = msFeatureFinder.FindMSFeature(candidatePeaks, isotopicProfile, massToleranceBase, false); } // Add to results, even if we did not find it. var didFindProfile = foundProfile != null; crossLinkResult.MassShiftResults.KvpList.Add(new KeyValuePair <double, bool>(shiftedMass, didFindProfile)); } crossLinkResultList.Add(crossLinkResult); } } crosslinkCandidatesProcessed++; if (DateTime.UtcNow.Subtract(lastProgress).TotalSeconds >= 10) { lastProgress = DateTime.UtcNow; var percentComplete = crosslinkCandidatesProcessed / (double)totalCandidatePeptides * 100; Console.WriteLine("Searching isotopic data; " + percentComplete.ToString("0.0") + "% complete"); } } return(crossLinkResultList); }