/// <summary> /// </summary> /// <param name="scan"></param> /// <param name="path"></param> /// <param name="mz"></param> /// <param name="mzRange"></param> /// <returns></returns> public static List <XYData> GetParentSpectrum(string path, int scan, double minMz, double maxMz) { ISpectraProvider provider = GetProvider(path); if (provider == null) { return(null); } List <XYData> spectrum = null; try { var summary = new ScanSummary(); spectrum = provider.GetRawSpectra(scan, 1, out summary); } catch { Logger.PrintMessage("Could not load the raw spectra"); return(null); } if (spectrum == null) { return(null); } var data = (from x in spectrum where x.X > minMz && x.X < maxMz select x).ToList(); return(data); }
private MSSpectra GetSpectrum(ISpectraProvider reader, int scan, int group, double mzTolerance = .5) { var summary = new ScanSummary(); var peaks = reader.GetRawSpectra(scan, 2, out summary); var spectrum = new MSSpectra(); spectrum.Peaks = peaks; return(spectrum); }
/// <summary> /// Creates an XIC from the given set of target features. /// </summary> /// <param name="massError">Mass error to use when pulling peaks</param> /// <param name="msFeatures">Seed features that provide the targets</param> /// <param name="provider"></param> /// <returns></returns> public IEnumerable <MSFeatureLight> CreateXic(IList <MSFeatureLight> msFeatures, double massError, ISpectraProvider provider) { var newFeatures = new List <MSFeatureLight>(); if (msFeatures.Count <= 0) { return(newFeatures); } var minScan = msFeatures[0].Scan; var maxScan = msFeatures[msFeatures.Count - 1].Scan; minScan -= 100; maxScan += 100; minScan = Math.Max(0, minScan); var min = double.MaxValue; var max = double.MinValue; double maxIntensity = 0; var featureMap = new Dictionary <int, MSFeatureLight>(); double mz = 0; foreach (var chargeFeature in msFeatures) { min = Math.Min(min, chargeFeature.Mz); max = Math.Max(max, chargeFeature.Mz); if (chargeFeature.Abundance > maxIntensity) { maxIntensity = chargeFeature.Abundance; mz = chargeFeature.Mz; } // Map the feature... if (!featureMap.ContainsKey(chargeFeature.Scan)) { featureMap.Add(chargeFeature.Scan, chargeFeature); } } var features = CreateXic(mz, massError, minScan, maxScan, provider); foreach (var msFeature in features) { var scan = msFeature.Scan; if (featureMap.ContainsKey(msFeature.Scan)) { featureMap[scan].Abundance = msFeature.Abundance; } newFeatures.Add(msFeature); } return(newFeatures); }
/// <summary> /// Reconstruct the MS/MS for a feature. /// Requires that the features's LCMS and MS features have been reconstructed. /// </summary> /// <param name="umc">The feature to reconstruct.</param> public static void ReconstructUMCMsMs(this UMCLight umc, ISpectraProvider provider, bool getPeaks = true) { foreach (var msFeature in umc.Features) { var fragmentationSpectra = provider.GetMSMSSpectra( msFeature.Scan, msFeature.Mz, false); msFeature.MSnSpectra.AddRange(fragmentationSpectra); } }
public static void LoadMsMs(List <UMCLight> features, ISpectraProvider spectraProvider) { foreach (var feature in features) { foreach (var msFeature in feature.Features) { var fragmentationSpectra = spectraProvider.GetMSMSSpectra( msFeature.Scan, msFeature.Mz, false); msFeature.MSnSpectra.AddRange(fragmentationSpectra); } } }
public static MSSpectra GetSpectrum(ISpectraProvider reader, int scan, int group, double mzTolerance = .5) { var summary = new ScanSummary(); var spectrum = reader.GetSpectrum(scan, group, 2, out summary, true); if (ShouldLogScale) { foreach (var peak in spectrum.Peaks) { peak.Y = Math.Log(peak.Y, 2); } } return(spectrum); }
/// <summary> /// Creates an XIC from the m/z values provided. /// </summary> /// <param name="mz"></param> /// <param name="massError"></param> /// <param name="minScan"></param> /// <param name="maxScan"></param> /// <param name="provider"></param> /// <returns></returns> public IEnumerable <MSFeatureLight> CreateXic(double mz, double massError, int minScan, int maxScan, ISpectraProvider provider) { var newFeatures = new List <MSFeatureLight>(); var lower = FeatureLight.ComputeDaDifferenceFromPPM(mz, massError); var higher = FeatureLight.ComputeDaDifferenceFromPPM(mz, -massError); for (var i = minScan; i < maxScan; i++) { List <XYData> spectrum = null; try { var summary = new ScanSummary(); spectrum = provider.GetRawSpectra(i, 0, 1, out summary); } catch { } if (spectrum == null) { continue; } var data = (from x in spectrum where x.X > lower && x.X < higher select x).ToList(); var summedIntensity = data.Sum(x => x.Y); var newFeature = new MSFeatureLight { Scan = i, Net = i, Abundance = Convert.ToInt64(summedIntensity) }; newFeatures.Add(newFeature); } return(newFeatures); }
/// <summary> /// Finds LCMS Features using the PNNL Omics linkage clustering algorithms. /// </summary> public List <UMCLight> FindFeatures(List <MSFeatureLight> rawMsFeatures, LCMSFeatureFindingOptions options, ISpectraProvider provider) { const ClusterCentroidRepresentation centroidType = ClusterCentroidRepresentation.Mean; List <UMCLight> features = null; m_options = options; m_minScan = int.MaxValue; m_maxScan = int.MinValue; foreach (var feature in rawMsFeatures) { m_minScan = Math.Min(feature.Scan, m_minScan); m_maxScan = Math.Max(feature.Scan, m_maxScan); } var finder = new MSFeatureSingleLinkageClustering <MSFeatureLight, UMCLight> { Parameters = { DistanceFunction = WeightedNETDistanceFunction, RangeFunction = WithinRange, Tolerances = { Mass = options.ConstraintMonoMass, RetentionTime = 100, DriftTime = 100 } } }; finder.Parameters.CentroidRepresentation = centroidType; m_maxDistance = options.MaxDistance; features = finder.Cluster(rawMsFeatures); // Remove the short UMC's. features.RemoveAll(x => (x.ScanEnd - x.ScanStart + 1) < options.MinUMCLength); var id = 0; foreach (var feature in features) { feature.NET = Convert.ToDouble(feature.Scan - m_minScan) / Convert.ToDouble(m_maxScan - m_minScan); feature.RetentionTime = feature.NET; feature.ID = id++; } return(features); }
/// <summary> /// Finds LCMS Features using the PNNL Omics linkage clustering algorithms. /// </summary> public List<UMCLight> FindFeatures( List<MSFeatureLight> rawMsFeatures, LCMSFeatureFindingOptions options, ISpectraProvider provider) { const ClusterCentroidRepresentation centroidType = ClusterCentroidRepresentation.Mean; List<UMCLight> features = null; m_options = options; m_minScan = int.MaxValue; m_maxScan = int.MinValue; foreach (var feature in rawMsFeatures) { m_minScan = Math.Min(feature.Scan, m_minScan); m_maxScan = Math.Max(feature.Scan, m_maxScan); } var finder = new MSFeatureSingleLinkageClustering<MSFeatureLight, UMCLight> { Parameters = { DistanceFunction = WeightedNETDistanceFunction, RangeFunction = WithinRange, Tolerances = {Mass = options.ConstraintMonoMass, RetentionTime = 100, DriftTime = 100} } }; finder.Parameters.CentroidRepresentation = centroidType; m_maxDistance = options.MaxDistance; features = finder.Cluster(rawMsFeatures); // Remove the short UMC's. features.RemoveAll(x => (x.ScanEnd - x.ScanStart + 1) < options.MinUMCLength); var id = 0; foreach (var feature in features) { feature.NET = Convert.ToDouble(feature.Scan - m_minScan) / Convert.ToDouble(m_maxScan - m_minScan); feature.RetentionTime = feature.NET; feature.ID = id++; } return features; }
public CachedFeatureSpectraProvider(ISpectraProvider reader, IEnumerable<UMCLight> features) { m_reader = reader; m_spectraMap = new Dictionary<int, MSSpectra>(); // Sort out the features to make a dictionary so we can look up spectra // and summary information later on without having to touch the disk again...and // this restricts all possible spectra to those that came from deisotoped data. foreach (var feature in features) { foreach (var msFeature in feature.MsFeatures) { foreach (var spectrum in msFeature.MSnSpectra) { if (!m_spectraMap.ContainsKey(spectrum.Scan)) m_spectraMap.Add(spectrum.Scan, spectrum); } } } }
public CachedFeatureSpectraProvider(ISpectraProvider reader, IEnumerable <UMCLight> features) { m_reader = reader; m_spectraMap = new Dictionary <int, MSSpectra>(); // Sort out the features to make a dictionary so we can look up spectra // and summary information later on without having to touch the disk again...and // this restricts all possible spectra to those that came from deisotoped data. foreach (var feature in features) { foreach (var msFeature in feature.MsFeatures) { foreach (var spectrum in msFeature.MSnSpectra) { if (!m_spectraMap.ContainsKey(spectrum.Scan)) { m_spectraMap.Add(spectrum.Scan, spectrum); } } } } }
protected static SpectralAnalysis MatchDatasets(SpectralComparison comparerType, ISpectraProvider readerX, ISpectraProvider readerY, SpectralOptions options, AlignmentDataset datasetX, AlignmentDataset datasetY, List <string> names) { var peptideReader = PeptideReaderFactory.CreateReader(SequenceFileType.MSGF); var finder = new SpectralAnchorPointFinder(); var validator = new SpectralAnchorPointValidator(); var comparer = SpectralComparerFactory.CreateSpectraComparer(comparerType); var filter = SpectrumFilterFactory.CreateFilter(SpectraFilters.TopPercent); var matches = finder.FindAnchorPoints(readerX, readerY, comparer, filter, options); var peptidesX = peptideReader.Read(datasetX.PeptideFile); var peptidesY = peptideReader.Read(datasetY.PeptideFile); validator.ValidateMatches(matches, peptidesX, peptidesY, options); var analysis = new SpectralAnalysis { DatasetNames = names, Matches = matches, Options = options }; return(analysis); }
public static MSSpectra GetSpectra(double mzTolerance, double percent, ISpectraFilter filter, ISpectraProvider readerY, int scany, int numberRequiredPeaks) { var spectrum = GetSpectrum(readerY, scany, 0, mzTolerance); if (spectrum.Peaks.Count < numberRequiredPeaks) { return(null); } spectrum.Peaks = filter.Threshold(spectrum.Peaks, percent); spectrum.Peaks = XYData.Bin(spectrum.Peaks, 0, 2000, mzTolerance); return(spectrum); }
public static List <XYData> GetDaughterSpectrum(string path, int scan) { ISpectraProvider provider = GetProvider(path); if (provider == null) { return(null); } List <XYData> spectrum = null; try { var summary = new ScanSummary(); spectrum = provider.GetRawSpectra(scan, 2, out summary); } catch { Logger.PrintMessage("Could not load the raw spectra"); return(null); } return(spectrum); }
public RawLoaderCache(ISpectraProvider provider) { m_summaryMap = new Dictionary<int, Dictionary<int, ScanSummary>>(); m_provider = provider; }
/// <summary> /// Finds features /// </summary> /// <returns></returns> public List<UMCLight> FindFeatures(List<MSFeatureLight> msFeatures, LcmsFeatureFindingOptions options, ISpectraProvider provider) { var clusterer = new MsFeatureTreeClusterer<MSFeatureLight, UMCLight> { Tolerances = new FeatureTolerances { Mass = options.InstrumentTolerances.Mass, Net = options.MaximumNetRange }, ScanTolerance = options.MaximumScanRange, SpectraProvider = provider //TODO: Make sure we have a mass range for XIC's too.... }; clusterer.SpectraProvider = provider; OnStatus("Starting cluster definition"); clusterer.Progress += (sender, args) => OnStatus(args.Message); var features = clusterer.Cluster(msFeatures); var minScan = int.MaxValue; var maxScan = int.MinValue; foreach (var feature in msFeatures) { minScan = Math.Min(feature.Scan, minScan); maxScan = Math.Max(feature.Scan, maxScan); } var id = 0; var newFeatures = new List<UMCLight>(); foreach (var feature in features) { if (feature.MsFeatures.Count < 1) continue; feature.Net = Convert.ToDouble(feature.Scan - minScan)/Convert.ToDouble(maxScan - minScan); feature.CalculateStatistics(ClusterCentroidRepresentation.Median); feature.Net = feature.Net; feature.Id = id++; newFeatures.Add(feature); //Sets the width of the feature to be the width of the peak, not the width of the tails var maxAbundance = double.MinValue; var maxAbundanceIndex = 0; for (var msFeatureIndex = 0; msFeatureIndex < feature.MsFeatures.Count - 1; msFeatureIndex++) { var msFeature = feature.MsFeatures[msFeatureIndex]; if (msFeature.Abundance > maxAbundance) { maxAbundance = msFeature.Abundance; maxAbundanceIndex = msFeatureIndex; } } for (var msFeatureIndex = maxAbundanceIndex; msFeatureIndex > 0; msFeatureIndex--) { if (feature.MsFeatures[msFeatureIndex].Abundance / maxAbundance <= 0.05) { feature.ScanStart = feature.MsFeatures[msFeatureIndex].Scan; break; } } for (var msFeatureIndex = maxAbundanceIndex; msFeatureIndex < feature.MsFeatures.Count - 1; msFeatureIndex++) { if (feature.MsFeatures[msFeatureIndex].Abundance / maxAbundance <= 0.05) { feature.ScanEnd = feature.MsFeatures[msFeatureIndex].Scan; break; } } } return features; }
/// <summary> /// Runs the MultiAlign analysis /// </summary> public void AlignDatasets( IEnumerable<UMCLight> baselineFeatures, IEnumerable<UMCLight> aligneeFeatures, ISpectraProvider providerX, ISpectraProvider providerY, IFeatureAligner<IEnumerable<UMCLight>, IEnumerable<UMCLight>, classAlignmentData> aligner, IClusterer<UMCLight, UMCClusterLight> clusterer, string matchPath, string errorPath) { // cluster before we do anything else.... var allFeatures = new List<UMCLight>(); allFeatures.AddRange(baselineFeatures); allFeatures.AddRange(aligneeFeatures); var maxBaseline = baselineFeatures.Max(x => x.Scan); var minBaseline = baselineFeatures.Min(x => x.Scan); var maxAlignee = aligneeFeatures.Max(x => x.Scan); var minAlignee = aligneeFeatures.Min(x => x.Scan); foreach (var feature in aligneeFeatures) { feature.Net = Convert.ToDouble(feature.Scan - minAlignee) / Convert.ToDouble(maxAlignee - minAlignee); feature.MassMonoisotopicAligned = feature.MassMonoisotopic; } foreach (var feature in baselineFeatures) { feature.Net = Convert.ToDouble(feature.Scan - minBaseline) / Convert.ToDouble(maxBaseline - minBaseline); feature.MassMonoisotopicAligned = feature.MassMonoisotopic; } // This tells us the differences before we align. var clusters = clusterer.Cluster(allFeatures); var clusterId = 0; foreach (var cluster in clusters) { cluster.Id = clusterId++; } var scorer = new GlobalPeptideClusterScorer(); var preAlignment = scorer.Score(clusters); aligner.AligneeSpectraProvider = providerY; aligner.BaselineSpectraProvider = providerX; UpdateStatus("Aligning data"); // Aligner data var data = aligner.Align(baselineFeatures, aligneeFeatures); var matches = data.Matches; // create anchor points for LCMSWarp alignment var massPoints = new List<RegressionPoint>(); var netPoints = new List<RegressionPoint>(); foreach (var match in matches) { var massError = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz, match.AnchorPointY.Mz); var netError = match.AnchorPointX.Net - match.AnchorPointY.Net; var massPoint = new RegressionPoint(match.AnchorPointX.Mz, 0, massError, netError); massPoints.Add(massPoint); var netPoint = new RegressionPoint(match.AnchorPointX.Net, 0, massError, netError); netPoints.Add(netPoint); } foreach (var feature in allFeatures) { feature.UmcCluster = null; feature.ClusterId = -1; } // Then cluster after alignment! UpdateStatus("clustering data"); clusters = clusterer.Cluster(allFeatures); var postAlignment = scorer.Score(clusters); UpdateStatus("Note\tSame\tDifferent"); UpdateStatus(string.Format("Pre\t{0}\t{1}", preAlignment.SameCluster, preAlignment.DifferentCluster)); UpdateStatus(string.Format("Post\t{0}\t{1}", postAlignment.SameCluster, postAlignment.DifferentCluster)); matches = FilterMatches(matches, 40); SaveMatches(matchPath, matches); DeRegisterProgressNotifier(aligner); DeRegisterProgressNotifier(clusterer); }
/// <summary> /// Creates SIC's mapped by charge state for the MS Features in the feature. /// </summary> /// <param name="feature"></param> /// <param name="provider">Object that can read data from a raw file or data source.</param> /// <returns></returns> public static Dictionary <int, List <XYZData> > CreateChargeSIC(this UMCLight feature, ISpectraProvider provider) { var chargeMap = feature.CreateChargeMap(); var sicMap = new Dictionary <int, List <XYZData> >(); foreach (var charge in chargeMap.Keys) { chargeMap[charge].Sort(delegate(MSFeatureLight x, MSFeatureLight y) { return(x.Scan.CompareTo(y.Scan)); } ); var data = chargeMap[charge].ConvertAll(x => new XYZData(x.Scan, x.Abundance, x.Mz)); sicMap.Add(charge, data); } if (provider != null) { // Creates an SIC map for a given charge state of the feature. foreach (var charge in sicMap.Keys) { var data = sicMap[charge]; // The data is alread sorted. var minScan = int.MaxValue; var maxScan = int.MinValue; var mzValues = new List <double>(); foreach (var x in data) { mzValues.Add(x.Z); minScan = Math.Min(minScan, Convert.ToInt32(x.X)); maxScan = Math.Max(maxScan, Convert.ToInt32(x.X)); } mzValues.Sort(); double mz = 0; var mid = Convert.ToInt32(mzValues.Count / 2); mz = mzValues[mid]; minScan -= 20; maxScan += 20; // Build the SIC var intensities = new List <XYZData>(); for (var scan = minScan; scan < maxScan; scan++) { var summary = new ScanSummary(); var spectrum = provider.GetRawSpectra(scan, 1, out summary); double intensity = 0; var minDistance = double.MaxValue; var index = -1; for (var i = 0; i < spectrum.Count; i++) { var distance = spectrum[i].X - mz; if (distance < minDistance) { index = i; minDistance = distance; } } if (index >= 0) { intensity = spectrum[index].Y; } var newPoint = new XYZData(scan, intensity, mz); intensities.Add(newPoint); } sicMap[charge] = intensities; } } return(sicMap); }
public IEnumerable <UMCLight> CreateXic(IList <UMCLight> features, double massError, ISpectraProvider provider) { // this algorithm works as follows // // PART A - Build the XIC target list // For each UMC Light , find the XIC representation // for each charge in a feature // from start scan to end scan // 1. Compute a lower / upper m/z bound // 2. build an XIC chomatogram object // 3. reference the original UMC Feature -- this allows us to easily add // chromatograms to the corresponding feature // 4. store the chomatogram (with unique ID across all features) // // PART B - Read Data From File // Sort the list of XIC's by scan // for each scan s = start scan to end scan // 1. find all xic's that start before and end after s - // a. cache these xics in a dictionary based on unique id // b. NOTE: this is why we sort so we can do an O(N) search for // all XIC's that need data from this scan s // 2. Then for each XIC that needs data // a. Pull intensity data from lower / upper m/z bound // b. create an MS Feature // c. store in original UMC Feature // d. Test to see if the XIC is done building (Intensity < 1 or s > scan end) // 3. Remove features that are done building from cache // // CONCLUSIONS // Building UMC's then takes linear time (well O(N Lg N) time if you consider sort) // and theoretically is only bounded by the time it takes to read an entire raw file // if (features.Count <= 0) { throw new Exception("No features were available to create XIC's from"); } var minScan = Math.Max(1, features.Min(x => x.Scan - ScanWindowSize)); var maxScan = features.Max(x => x.Scan + ScanWindowSize); OnProgress("Sorting features for optimized scan partitioning"); // PART A // Map the feature ID to the xic based features var xicFeatures = new SortedSet <XicFeature>(); var allFeatures = CreateXicTargets(features, massError); // PART B // sort the features... var featureCount = allFeatures.Count; allFeatures = allFeatures.OrderBy(x => x.StartScan).ToList(); // This map tracks all possible features to keep var msFeatureId = 0; // This list stores a temporary amount of parent MS features // so that we can link MS/MS spectra to MS Features var parentMsList = new List <MSFeatureLight>(); // Creates a comparison function for building a BST from a spectrum. var msmsFeatureId = 0; var totalScans = provider.GetTotalScans(0); OnProgress(string.Format("Analyzing {0} scans", totalScans)); // Iterate over all the scans... for (var currentScan = minScan; currentScan < maxScan && currentScan <= totalScans; currentScan++) { // Find any features that need data from this scan var featureIndex = 0; while (featureIndex < featureCount) { var xicFeature = allFeatures[featureIndex]; // This means that no new features were eluting with this scan.... if (xicFeature.StartScan > currentScan) { break; } // This means that there is a new feature... if (currentScan <= xicFeature.EndScan) { if (!xicFeatures.Contains(xicFeature)) { xicFeatures.Add(xicFeature); } } featureIndex++; } // Skip pulling the data from the file if there is nothing to pull from. if (xicFeatures.Count < 1) { continue; } // Here We link the MSMS Spectra to the UMC Features ScanSummary summary; var spectrum = provider.GetRawSpectra(currentScan, 0, 1, out summary); if (summary.MsLevel > 1) { // If it is an MS 2 spectra... then let's link it to the parent MS // Feature var matching = parentMsList.Where( x => Math.Abs(x.Mz - summary.PrecursorMz) <= FragmentationSizeWindow ); foreach (var match in matching) { // We create multiple spectra because this guy is matched to multiple ms // features var spectraData = new MSSpectra { Id = msmsFeatureId, ScanMetaData = summary, CollisionType = summary.CollisionType, Scan = currentScan, MsLevel = summary.MsLevel, PrecursorMz = summary.PrecursorMz, TotalIonCurrent = summary.TotalIonCurrent }; match.MSnSpectra.Add(spectraData); spectraData.ParentFeature = match; } if (spectrum != null) { spectrum.Clear(); } msmsFeatureId++; continue; } var mzList = new double[spectrum.Count]; var intensityList = new double[spectrum.Count]; XYData.XYDataListToArrays(spectrum, mzList, intensityList); Array.Sort(mzList, intensityList); // Tracks which spectra need to be removed from the cache var toRemove = new List <XicFeature>(); // Tracks which features we need to link to MSMS spectra with parentMsList.Clear(); // now we iterate through all features that need data from this scan foreach (var xic in xicFeatures) { var lower = xic.LowMz; var higher = xic.HighMz; var startIndex = Array.BinarySearch(mzList, lower); // A bitwise complement of the index, so use the bitwise complement if (startIndex < 0) { startIndex = ~startIndex; } double summedIntensity = 0; if (startIndex < mzList.Count() && mzList[startIndex] < lower) { // All data in the list is lighter than lower; nothing to sum } else { while (startIndex < mzList.Count() && mzList[startIndex] <= higher) { summedIntensity += intensityList[startIndex]; startIndex++; } } // See if we need to remove this feature // We only do so if the intensity has dropped off and we are past the end of the feature. if (summedIntensity < 1 && currentScan > xic.EndScan) { toRemove.Add(xic); continue; } var umc = xic.Feature; // otherwise create a new feature here... var msFeature = new MSFeatureLight { ChargeState = xic.ChargeState, Mz = xic.Mz, MassMonoisotopic = umc.MassMonoisotopic, Scan = currentScan, Abundance = Convert.ToInt64(summedIntensity), Id = msFeatureId++, DriftTime = umc.DriftTime, Net = currentScan, GroupId = umc.GroupId }; parentMsList.Add(msFeature); xic.Feature.AddChildFeature(msFeature); } // Remove features that end their elution prior to the current scan toRemove.ForEach(x => xicFeatures.Remove(x)); } OnProgress("Filtering bad features with no data."); features = features.Where(x => x.MsFeatures.Count > 0).ToList(); OnProgress("Refining XIC features."); return(RefineFeatureXics(features)); }
///// <summary> ///// Links anchor points use the raw spectra provided. ///// </summary> //public IEnumerable<SpectralAnchorPointMatch> FindAnchorPoints2( ISpectraProvider readerX, // ISpectraProvider readerY, // ISpectralComparer comparer, // ISpectraFilter filter, // SpectralOptions options, // bool skipComparison = true) //{ // var matches = new List<SpectralAnchorPointMatch>(); // var scanDataX = readerX.GetScanData(0); // var scanDataY = readerY.GetScanData(0); // // Determine the scan extrema // var maxX = scanDataX.Aggregate((l, r) => l.Value.Scan > r.Value.Scan ? l : r).Key; // var minX = scanDataX.Aggregate((l, r) => l.Value.Scan < r.Value.Scan ? l : r).Key; // var maxY = scanDataY.Aggregate((l, r) => l.Value.Scan > r.Value.Scan ? l : r).Key; // var minY = scanDataY.Aggregate((l, r) => l.Value.Scan < r.Value.Scan ? l : r).Key; // // Create a spectral comparer // var ySpectraCache = new Dictionary<int, MSSpectra>(); // // Here we sort the summary spectra....so that we can improve run time efficiency // // and minimize as much memory as possible. // var ySpectraSummary = scanDataY.Values.Where(summary => summary.MsLevel == 2).ToList(); // var xSpectraSummary = scanDataX.Values.Where(summary => summary.MsLevel == 2).ToList(); // ySpectraSummary.Sort((x, y) => x.PrecursorMZ.CompareTo(y.PrecursorMZ)); // xSpectraSummary.Sort((x, y) => x.PrecursorMZ.CompareTo(y.PrecursorMZ)); // double mzTolerance = options.MzTolerance; // foreach (var xsum in xSpectraSummary) // { // int scanx = xsum.Scan; // // Grab the first spectra // var spectrumX = SpectralUtilities.GetSpectra(options.MzBinSize, // options.TopIonPercent, // filter, // readerX, // scanx, // options.RequiredPeakCount); // spectrumX.PrecursorMZ = xsum.PrecursorMZ; // // Here we make sure that we are efficiently using the cache...we want to clear any // // cached spectra that we arent using. We know that the summaries are sorted by m/z // // so if the xsum m/z is greater than anything in the cache, dump the spectra... // double currentMz = xsum.PrecursorMZ; // // Use linq? // var toRemove = new List<int>(); // foreach (int scan in ySpectraCache.Keys) // { // MSSpectra yscan = ySpectraCache[scan]; // double difference = currentMz - yscan.PrecursorMZ; // // We only need to care about smaller m/z's // if (difference >= mzTolerance) // { // toRemove.Add(scan); // } // else // { // // Because if we are here, we are within range...AND! // // ...the m/z of i + 1 > i...because they are sorted... // // so if the m/z comes within range (positive) then // // that means we need to evaluate the tolerance. // break; // } // } // // Then we clean up...since spectra can be large...we'll take the performance hit here... // // and minimize memory impacts! // if (toRemove.Count > 0) // { // toRemove.ForEach(x => ySpectraCache.Remove(x)); // GC.Collect(); // GC.WaitForPendingFinalizers(); // } // // Iterate through the other analysis. // foreach (var ysum in ySpectraSummary) // { // int scany = ysum.Scan; // // We know that we are out of range here.... // if (Math.Abs(xsum.PrecursorMZ - ysum.PrecursorMZ) >= mzTolerance) // continue; // double netX = Convert.ToDouble(scanx - minX) / Convert.ToDouble(maxX - minX); // double netY = Convert.ToDouble(scany - minY) / Convert.ToDouble(maxY - minY); // double net = Convert.ToDouble(netX - netY); // // Has to pass the NET tolerance // if (options.NetTolerance < Math.Abs(net)) continue; // // Grab the first spectra...if we have it, great dont re-read // MSSpectra spectrumY = null; // if (ySpectraCache.ContainsKey(scany)) // { // if (!skipComparison) // spectrumY = ySpectraCache[scany]; // } // else // { // if (!skipComparison) // { // spectrumY = SpectralUtilities.GetSpectra(options.MzBinSize, // options.TopIonPercent, // filter, // readerY, // scany, // options.RequiredPeakCount); // spectrumY.PrecursorMZ = ysum.PrecursorMZ; // ySpectraCache.Add(scany, spectrumY); // } // } // // compare the spectra // double spectralSimilarity = 0; // if (!skipComparison) // spectralSimilarity = comparer.CompareSpectra(spectrumX, spectrumY); // if (double.IsNaN(spectralSimilarity) || double.IsNegativeInfinity(spectralSimilarity) || double.IsPositiveInfinity(spectralSimilarity)) // continue; // if (spectralSimilarity < options.SimilarityCutoff) // continue; // var pointX = new SpectralAnchorPoint // { // Net = netX, // Mass = 0, // Mz = xsum.PrecursorMZ, // Scan = scanx, // Spectrum = spectrumX // }; // var pointY = new SpectralAnchorPoint // { // Net = netX, // Mass = 0, // Mz = ysum.PrecursorMZ, // Scan = scany, // Spectrum = spectrumY // }; // var match = new SpectralAnchorPointMatch // { // AnchorPointX = pointX, // AnchorPointY = pointY, // SimilarityScore = spectralSimilarity, // IsValidMatch = AnchorPointMatchType.FalseMatch // }; // matches.Add(match); // } // } // return matches; //} /// <summary> /// Computes all anchor point matches between two sets of spectra. /// </summary> /// <param name="readerX"></param> /// <param name="readerY"></param> /// <param name="comparer"></param> /// <param name="filter"></param> /// <param name="options"></param> /// <param name="skipComparison"></param> /// <returns></returns> public IEnumerable <SpectralAnchorPointMatch> FindAnchorPoints(ISpectraProvider readerX, ISpectraProvider readerY, ISpectralComparer comparer, ISpectraFilter filter, SpectralOptions options, bool skipComparison = false) { if (readerX == null || readerY == null) { throw new ArgumentNullException(); } var matches = new List <SpectralAnchorPointMatch>(); var scanDataX = readerX.GetScanSummaries(); var scanDataY = readerY.GetScanSummaries(0); // Determine the scan extrema var maxX = scanDataX.Aggregate((l, r) => l.Scan > r.Scan ? l : r).Scan; var minX = scanDataX.Aggregate((l, r) => l.Scan < r.Scan ? l : r).Scan; var maxY = scanDataY.Aggregate((l, r) => l.Scan > r.Scan ? l : r).Scan; var minY = scanDataY.Aggregate((l, r) => l.Scan < r.Scan ? l : r).Scan; // Here we sort the summary spectra....so that we can improve run time efficiency // and minimize as much memory as possible. var ySpectraSummary = scanDataY.Where(summary => summary.MsLevel == 2).ToList(); var xSpectraSummary = scanDataX.Where(summary => summary.MsLevel == 2).ToList(); ySpectraSummary.Sort((x, y) => x.PrecursorMz.CompareTo(y.PrecursorMz)); xSpectraSummary.Sort((x, y) => x.PrecursorMz.CompareTo(y.PrecursorMz)); var netTolerance = options.NetTolerance; var mzTolerance = options.MzTolerance; var j = 0; var i = 0; var yTotal = ySpectraSummary.Count; var xTotal = xSpectraSummary.Count; var similarities = new List <double>(); var cache = new Dictionary <int, MSSpectra>(); var pointsY = new Dictionary <int, SpectralAnchorPoint>(); while (i < xTotal && j < yTotal) { var xsum = xSpectraSummary[i]; var scanx = xsum.Scan; var precursorX = xsum.PrecursorMz; MSSpectra spectrumX = null; while (j < yTotal && ySpectraSummary[j].PrecursorMz < (precursorX - mzTolerance)) { // Here we make sure we arent caching something var scany = ySpectraSummary[j].Scan; if (cache.ContainsKey(scany)) { cache.Remove(scany); if (pointsY.ContainsKey(scany)) { if (pointsY[scany].Spectrum.Peaks != null) { pointsY[scany].Spectrum.Peaks.Clear(); pointsY[scany].Spectrum.Peaks = null; } } } j++; } var k = 0; var points = new List <SpectralAnchorPoint>(); while ((j + k) < yTotal && Math.Abs(ySpectraSummary[j + k].PrecursorMz - precursorX) < mzTolerance) { var ysum = ySpectraSummary[j + k]; k++; var scany = ysum.Scan; var netX = Convert.ToDouble(scanx - minX) / Convert.ToDouble(maxX - minX); var netY = Convert.ToDouble(scany - minY) / Convert.ToDouble(maxY - minY); var net = Convert.ToDouble(netX - netY); // Test whether the spectra are within decent range. if (Math.Abs(net) < netTolerance) { // We didnt pull this spectrum before, because we arent sure // if it will be within tolerance....so we just delay this // until we have to...after this happens, we only pull it once. if (spectrumX == null) { if (!skipComparison) { // Grab the first spectra spectrumX = SpectralUtilities.GetSpectra(options.MzBinSize, options.TopIonPercent, filter, readerX, scanx, options.RequiredPeakCount); if (spectrumX != null) { spectrumX.PrecursorMz = xsum.PrecursorMz; } else { // This spectra does not have enough peaks or did not pass our filters, throw it away! break; } } } MSSpectra spectrumY = null; if (!skipComparison) { if (cache.ContainsKey(scany)) { spectrumY = cache[scany]; } else { spectrumY = SpectralUtilities.GetSpectra(options.MzBinSize, options.TopIonPercent, filter, readerY, scany, options.RequiredPeakCount); if (spectrumY != null) { spectrumY.PrecursorMz = ysum.PrecursorMz; cache.Add(scany, spectrumY); } else { continue; // This spectra does not have enough peaks or did not pass our filters, throw it away! } } } if (spectrumX == null || spectrumY == null) { continue; } // compare the spectra double spectralSimilarity = 0; if (!skipComparison) { spectralSimilarity = comparer.CompareSpectra(spectrumX, spectrumY); } // similarities.Add(spectralSimilarity); File.AppendAllText(@"c:\data\proteomics\test.txt", string.Format("{0}\t{1}\t{2}\n", spectrumX.PrecursorMz, spectrumY.PrecursorMz, spectralSimilarity)); if (double.IsNaN(spectralSimilarity) || double.IsInfinity(spectralSimilarity)) { continue; } if (spectralSimilarity < options.SimilarityCutoff) { continue; } var pointX = new SpectralAnchorPoint { Net = netX, Mass = 0, Mz = xsum.PrecursorMz, Scan = scanx, Spectrum = spectrumX }; var pointY = new SpectralAnchorPoint { Net = netY, Mass = 0, Mz = ysum.PrecursorMz, Scan = scany, Spectrum = spectrumY }; var match = new SpectralAnchorPointMatch(); match.AnchorPointX = pointX; match.AnchorPointY = pointY; match.SimilarityScore = spectralSimilarity; match.IsValidMatch = AnchorPointMatchType.FalseMatch; matches.Add(match); points.Add(pointX); if (!pointsY.ContainsKey(scany)) { pointsY.Add(scany, pointY); } } } // Move to the next spectra in the x-list i++; foreach (var p in points) { if (p.Spectrum.Peaks != null) { p.Spectrum.Peaks.Clear(); p.Spectrum.Peaks = null; } } points.Clear(); } return(matches); }
public IDictionary <int, IList <MSFeatureLight> > CreateXic(UMCLight feature, double massError, ISpectraProvider provider) { var features = new Dictionary <int, IList <MSFeatureLight> >(); var chargeFeatures = feature.CreateChargeMap(); // For each UMC... foreach (var charge in chargeFeatures.Keys) { // Find the mininmum and maximum features var msFeatures = CreateXic(chargeFeatures[charge], massError, provider); features.Add(charge, new List <MSFeatureLight>()); foreach (var newFeature in msFeatures) { // Here we ask if this is a new MS Feature or old... if (!chargeFeatures.ContainsKey(newFeature.Scan)) { // Otherwise add the new feature newFeature.MassMonoisotopic = feature.MassMonoisotopic; newFeature.DriftTime = feature.DriftTime; newFeature.GroupId = feature.GroupId; } features[charge].Add(newFeature); } } return(features); }
private MSSpectra GetSpectrum(ISpectraProvider reader, int scan, int group, double mzTolerance = .5) { var summary = new ScanSummary(); var peaks = reader.GetRawSpectra(scan, group, 2, out summary); var spectrum = new MSSpectra(); spectrum.Peaks = peaks; return spectrum; }
public RawLoaderCache(ISpectraProvider provider) { m_summaryMap = new Dictionary <int, Dictionary <int, ScanSummary> >(); m_provider = provider; }
/// <summary> /// Clusters spectra together based on similarity. /// </summary> /// <param name="start"></param> /// <param name="stop"></param> /// <param name="features"></param> private List <MsmsCluster> Cluster(int start, int stop, List <MSFeatureLight> features, ISpectraProvider provider, double similarityTolerance) { var massTolerance = MassTolerance; // Maps the feature to a cluster ID. var featureMap = new Dictionary <MSFeatureLight, int>(); // Maps the cluster ID to a cluster. var clusterMap = new Dictionary <int, MsmsCluster>(); var clusters = new List <MsmsCluster>(); // Create singleton clusters. var id = 0; for (var i = start; i < stop; i++) { var feature = features[i]; var cluster = new MsmsCluster(); cluster.Id = id++; cluster.MeanScore = 0; cluster.Features.Add(feature); featureMap.Add(feature, cluster.Id); clusterMap.Add(cluster.Id, cluster); } var protonMass = AdductMass; // Then iterate and cluster. for (var i = start; i < stop; i++) { var featureI = features[i]; var clusterI = clusterMap[featureMap[featureI]]; for (var j = i + 1; j < stop; j++) { var featureJ = features[j]; var clusterJ = clusterMap[featureMap[featureJ]]; // Don't cluster the same thing if (clusterI.Id == clusterJ.Id) { continue; } // Don't cluster from the same dataset. Let the linkage algorithm decide if they // belong in the same cluster, and later, go back and determine if the cluster is valid or not. if (featureI.GroupId == featureJ.GroupId) { continue; } // Check the scan difference. If it fits then we are within range. var scanDiff = Math.Abs(featureI.Scan - featureJ.Scan); if (scanDiff <= ScanRange) { // Use the most abundant mass because it had a higher chance of being fragmented. var mzI = (featureI.MassMonoisotopicMostAbundant / featureI.ChargeState) + protonMass; var mzJ = (featureJ.MassMonoisotopicMostAbundant / featureJ.ChargeState) + protonMass; var mzDiff = Math.Abs(mzI - mzJ); if (mzDiff <= MzTolerance) { var scanSummary = new ScanSummary(); if (featureI.MSnSpectra[0].Peaks.Count <= 0) { featureI.MSnSpectra[0].Peaks = provider.GetRawSpectra(featureI.MSnSpectra[0].Scan, featureI.GroupId, out scanSummary); featureI.MSnSpectra[0].Peaks = XYData.Bin(featureI.MSnSpectra[0].Peaks, 0, 2000, MzTolerance); } if (featureJ.MSnSpectra[0].Peaks.Count <= 0) { featureJ.MSnSpectra[0].Peaks = provider.GetRawSpectra(featureJ.MSnSpectra[0].Scan, featureJ.GroupId, out scanSummary); featureJ.MSnSpectra[0].Peaks = XYData.Bin(featureJ.MSnSpectra[0].Peaks, 0, 2000, MzTolerance); } // Compute similarity var score = SpectralComparer.CompareSpectra(featureI.MSnSpectra[0], featureJ.MSnSpectra[0]); if (score >= similarityTolerance) { clusterJ.MeanScore += score; foreach (var xFeature in clusterI.Features) { clusterJ.Features.Add(xFeature); featureMap[xFeature] = clusterJ.Id; clusterMap.Remove(clusterI.Id); } } } } } } clusters.AddRange(clusterMap.Values); for (var i = start; i < stop; i++) { features[i].MSnSpectra[0].Peaks.Clear(); } foreach (var cluster in clusters) { cluster.MeanScore /= (cluster.Features.Count - 1); } return(clusters); }
/// <summary> /// Aligns features based on MSMS spectral similarity. /// </summary> /// <param name="featureMap"></param> /// <param name="msms"></param> public List <MsmsCluster> Cluster(List <UMCLight> features, ISpectraProvider provider) { UpdateStatus("Mapping UMC's to MS/MS spectra using intensity profile."); // Step 1: Cluster the spectra // Create the collection of samples. var msFeatures = new List <MSFeatureLight>(); // Sort through the features foreach (var feature in features) { // Sort out charge states...? var chargeMap = new Dictionary <int, MSFeatureLight>(); double abundance = int.MinValue; MSFeatureLight maxFeature = null; // Find the max abundance spectrum. This the number of features we have to search. foreach (var msFeature in feature.MsFeatures) { if (msFeature.Abundance > abundance && msFeature.MSnSpectra.Count > 0) { abundance = msFeature.Abundance; maxFeature = msFeature; } } if (maxFeature != null) { msFeatures.Add(maxFeature); } } UpdateStatus(string.Format("Found {0} total spectra for clustering.", msFeatures.Count)); UpdateStatus("Sorting spectra."); // Sort based on mass using the max abundance of the feature. msFeatures.Sort(delegate(MSFeatureLight x, MSFeatureLight y) { return(x.MassMonoisotopicMostAbundant.CompareTo(y.MassMonoisotopicMostAbundant)); }); // Then cluster the spectra. var j = 1; var h = 0; var N = msFeatures.Count; var clusters = new List <MsmsCluster>(); var tol = MassTolerance; var lastTotal = 0; UpdateStatus("Clustering spectra."); while (j < N) { var i = j - 1; var featureJ = msFeatures[j]; var featureI = msFeatures[i]; var diff = FeatureLight.ComputeMassPPMDifference(featureJ.MassMonoisotopicMostAbundant, featureI.MassMonoisotopicMostAbundant); if (Math.Abs(diff) > tol) { // We only care to create clusters of size greater than one. if ((j - h) > 1) { var data = Cluster(h, j, msFeatures, provider, SimilarityTolerance); clusters.AddRange(data); } // Reset the count, we're done looking at those clusters. h = j; } if (j - lastTotal > 500) { lastTotal = j; UpdateStatus(string.Format("Processed {0} / {1} total spectra.", lastTotal, N)); } j++; } UpdateStatus("Finishing last cluster data."); // Cluster the rest if ((j - h) > 1) { var data = Cluster(h, j, msFeatures, provider, SimilarityTolerance); clusters.AddRange(data); } UpdateStatus("Finished clustering."); var passingClusters = clusters.Where(cluster => cluster.Features.Count >= MinimumClusterSize); return(passingClusters.ToList()); }
/// <summary> /// Runs the MultiAlign analysis /// </summary> public void AlignDatasets(IEnumerable <UMCLight> baselineFeatures, IEnumerable <UMCLight> aligneeFeatures, ISpectraProvider providerX, ISpectraProvider providerY, IFeatureAligner <IEnumerable <UMCLight>, IEnumerable <UMCLight>, AlignmentData> aligner, IClusterer <UMCLight, UMCClusterLight> clusterer, string matchPath, string errorPath) { // cluster before we do anything else.... var allFeatures = new List <UMCLight>(); allFeatures.AddRange(baselineFeatures); allFeatures.AddRange(aligneeFeatures); var maxBaseline = baselineFeatures.Max(x => x.Scan); var minBaseline = baselineFeatures.Min(x => x.Scan); var maxAlignee = aligneeFeatures.Max(x => x.Scan); var minAlignee = aligneeFeatures.Min(x => x.Scan); foreach (var feature in aligneeFeatures) { feature.Net = Convert.ToDouble(feature.Scan - minAlignee) / Convert.ToDouble(maxAlignee - minAlignee); feature.MassMonoisotopicAligned = feature.MassMonoisotopic; } foreach (var feature in baselineFeatures) { feature.Net = Convert.ToDouble(feature.Scan - minBaseline) / Convert.ToDouble(maxBaseline - minBaseline); feature.MassMonoisotopicAligned = feature.MassMonoisotopic; } // This tells us the differences before we align. var clusters = clusterer.Cluster(allFeatures); var clusterId = 0; foreach (var cluster in clusters) { cluster.Id = clusterId++; } var scorer = new GlobalPeptideClusterScorer(); var preAlignment = scorer.Score(clusters); aligner.AligneeSpectraProvider = providerY; aligner.BaselineSpectraProvider = providerX; UpdateStatus("Aligning data"); // Aligner data var data = aligner.Align(baselineFeatures, aligneeFeatures); var matches = data.Matches; // create anchor points for LCMSWarp alignment var massPoints = new List <RegressionPoint>(); var netPoints = new List <RegressionPoint>(); foreach (var match in matches) { var massError = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz, match.AnchorPointY.Mz); var netError = match.AnchorPointX.Net - match.AnchorPointY.Net; var massPoint = new RegressionPoint(match.AnchorPointX.Mz, 0, massError, netError); massPoints.Add(massPoint); var netPoint = new RegressionPoint(match.AnchorPointX.Net, 0, massError, netError); netPoints.Add(netPoint); } foreach (var feature in allFeatures) { feature.UmcCluster = null; feature.ClusterId = -1; } // Then cluster after alignment! UpdateStatus("clustering data"); clusters = clusterer.Cluster(allFeatures); var postAlignment = scorer.Score(clusters); UpdateStatus("Note\tSame\tDifferent"); UpdateStatus(string.Format("Pre\t{0}\t{1}", preAlignment.SameCluster, preAlignment.DifferentCluster)); UpdateStatus(string.Format("Post\t{0}\t{1}", postAlignment.SameCluster, postAlignment.DifferentCluster)); matches = FilterMatches(matches, 40); SaveMatches(matchPath, matches); DeRegisterProgressNotifier(aligner); DeRegisterProgressNotifier(clusterer); }
public Dictionary <int, int> LinkMSFeaturesToMSn(List <MSFeatureLight> features, List <MSSpectra> fragmentSpectra, ISpectraProvider provider) { return(LinkMSFeaturesToMSn(features, fragmentSpectra)); }
protected static SpectralAnalysis MatchDatasets(SpectralComparison comparerType, ISpectraProvider readerX, ISpectraProvider readerY, SpectralOptions options, AlignmentDataset datasetX, AlignmentDataset datasetY, List<string> names) { var peptideReader = PeptideReaderFactory.CreateReader(SequenceFileType.MSGF); var finder = new SpectralAnchorPointFinder(); var validator = new SpectralAnchorPointValidator(); var comparer = SpectralComparerFactory.CreateSpectraComparer(comparerType); var filter = SpectrumFilterFactory.CreateFilter(SpectraFilters.TopPercent); var matches = finder.FindAnchorPoints(readerX, readerY, comparer, filter, options); var peptidesX = peptideReader.Read(datasetX.PeptideFile); var peptidesY = peptideReader.Read(datasetY.PeptideFile); validator.ValidateMatches(matches, peptidesX, peptidesY, options); var analysis = new SpectralAnalysis { DatasetNames = names, Matches = matches, Options = options }; return analysis; }