/// <summary> /// Computes errors for mass and retention time given a set of linked and matched features. /// </summary> public Tuple <AlignmentMeasurement <double>, AlignmentMeasurement <double> > MeasureErrors(IEnumerable <SpectralAnchorPointMatch> matches) { var netError = new AlignmentMeasurement <double>(); var massError = new AlignmentMeasurement <double>(); var errors = new Tuple <AlignmentMeasurement <double>, AlignmentMeasurement <double> > (netError, massError); foreach (var match in matches) { var x = match.AnchorPointX; var y = match.AnchorPointY; var featureX = x.Spectrum.ParentFeature; var featureY = y.Spectrum.ParentFeature; if (featureX == null || featureY == null) { continue; } var umcX = featureX.ParentFeature; var umcY = featureY.ParentFeature; netError.PreAlignment.Add(umcX.Net - umcY.Net); netError.PostAlignment.Add(umcX.Net - umcY.NetAligned); massError.PreAlignment.Add(FeatureLight.ComputeMassPPMDifference(umcX.MassMonoisotopic, umcY.MassMonoisotopic)); massError.PostAlignment.Add(FeatureLight.ComputeMassPPMDifference(umcX.MassMonoisotopic, umcY.MassMonoisotopicAligned)); } return(errors); }
/// <summary> /// Performs Mass error regression based on NET of the match /// </summary> /// <param name="matches"></param> /// <returns></returns> public LcmsWarpMassAlignmentFunction CalculateCalibration(List <LcmsWarpFeatureMatch> matches) { var netMassRecalibration = new LcmsWarpCombinedRegression(); netMassRecalibration.SetCentralRegressionOptions( this.options.MassCalibNumXSlices, this.options.MassCalibNumYSlices, this.options.MassCalibMaxJump, this.options.MassCalibMaxZScore, this.options.RegressionType); var calibrations = new List <RegressionPoint>(); foreach (var match in matches) { var feature = match.AligneeFeature; var baselineFeature = match.BaselineFeature; var ppm = FeatureLight.ComputeMassPPMDifference(feature.MassMonoisotopic, baselineFeature.MassMonoisotopic); var netDiff = baselineFeature.Net - feature.NetAligned; calibrations.Add(new RegressionPoint(feature.Net, 0, netDiff, ppm)); } netMassRecalibration.CalculateRegressionFunction(calibrations, "ScanMassError"); return(new LcmsWarpMassAlignmentFunction { Calibrations = new List <LcmsWarpCombinedRegression> { netMassRecalibration } }); }
public void MassMassCalculations(double massX, double massY) { var ppm = FeatureLight.ComputeMassPPMDifference(massX, massY); var massYdelta = FeatureLight.ComputeDaDifferenceFromPPM(massX, ppm); Assert.AreEqual(massY, massYdelta); }
private IEnumerable <SpectralAnchorPointMatch> FilterMatches(IEnumerable <SpectralAnchorPointMatch> matches, double ppm) { return (matches.Where(x => ppm > Math.Abs(FeatureLight.ComputeMassPPMDifference(x.AnchorPointX.Spectrum.ParentFeature.Mz, x.AnchorPointY.Spectrum.ParentFeature.Mz)))); }
public void MassPPMCalculations(double massX, double ppm, double epsilon) { var massYdelta = FeatureLight.ComputeDaDifferenceFromPPM(massX, ppm); var ppmDelta = FeatureLight.ComputeMassPPMDifference(massX, massYdelta); //Assert.IsTrue( (ppm - ppmDelta) < epsilon); Assert.Less(ppm - ppmDelta, epsilon); }
/// <summary> /// Computes the mass difference between two features. /// </summary> /// <param name="x"></param> /// <param name="y"></param> /// <returns></returns> private bool WithinRange(T x, T y) { // later is more related to determining a scalar value instead. var massDiff = Math.Abs(FeatureLight.ComputeMassPPMDifference(x.MassMonoisotopicAligned, y.MassMonoisotopicAligned)); var netDiff = Math.Abs(x.Net - y.Net); var driftDiff = Math.Abs(x.DriftTime - y.DriftTime); // Make sure we fall within the distance range before computing... return(massDiff <= Tolerances.Mass && netDiff <= Tolerances.Net && driftDiff <= Tolerances.DriftTime); }
/// <summary> /// Calculates the weighted Euclidean distance based on drift time, aligned mass, and aligned NET. /// </summary> /// <param name="x">Feature x.</param> /// <param name="y">Feature y.</param> /// <param name="massWeight"></param> /// <param name="netWeight"></param> /// <param name="driftWeight"></param> /// <returns>Distance calculated as </returns> public double EuclideanDistance(T x, T y, double massWeight, double netWeight, double driftWeight) { var massDifference = FeatureLight.ComputeMassPPMDifference(x.MassMonoisotopicAligned, y.MassMonoisotopicAligned); var netDifference = x.Net - y.Net; var driftDifference = x.DriftTime - y.DriftTime; var sum = (massDifference * massDifference) * massWeight + (netDifference * netDifference) * netDifference + (driftDifference * driftDifference) * driftWeight; return(Math.Sqrt(sum)); }
private void SaveMatches(string path, IEnumerable <SpectralAnchorPointMatch> matches) { using (var writer = File.CreateText(path)) { writer.WriteLine( "NET-apx\tNET-apy\tNETAligned-apy\tmz-apx\tmzAligned-apx\tmz-apy\tmzAligned-apy\tScan-x\tScan-y\tpmz-x\tpmz-y\tpmonomass-x\tpmonomass-y\tpNET-x\tpNET-y\tpNETa-x\tpNETa-y\tpmonomass-x\tpmonomassyx\tpmonomass-errorppm\tpmz-errorppm"); foreach (var match in matches) { if (match.AnchorPointX.Spectrum == null) { continue; } if (match.AnchorPointY.Spectrum == null) { continue; } var parentFeatureX = match.AnchorPointX.Spectrum.ParentFeature; var parentFeatureY = match.AnchorPointY.Spectrum.ParentFeature; var data = string.Format( "{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}\t{11}\t{12}\t{13}\t{14}\t{15}\t{16}\t{17}\t{18}\t{19}\t{20}\t", match.AnchorPointX.Net, match.AnchorPointY.Net, match.AnchorPointY.NetAligned, match.AnchorPointX.Mz, match.AnchorPointX.MzAligned, match.AnchorPointY.Mz, match.AnchorPointY.MzAligned, parentFeatureX.Scan, parentFeatureY.Scan, parentFeatureX.Mz, parentFeatureY.Mz, parentFeatureX.MassMonoisotopic, parentFeatureY.MassMonoisotopic, parentFeatureX.GetParentFeature().Net, parentFeatureY.GetParentFeature().Net, parentFeatureX.GetParentFeature().NetAligned, parentFeatureY.GetParentFeature().NetAligned, parentFeatureX.GetParentFeature().MassMonoisotopicAligned, parentFeatureY.GetParentFeature().MassMonoisotopicAligned, FeatureLight.ComputeMassPPMDifference(parentFeatureX.Mz, parentFeatureY.Mz), FeatureLight.ComputeMassPPMDifference(parentFeatureX.GetParentFeature().MassMonoisotopicAligned, parentFeatureY.GetParentFeature().MassMonoisotopicAligned) ); writer.WriteLine(data); } } }
public void TestDistances() { var dist = new WeightedEuclideanDistance <UMCClusterLight>(); var clusterA = CreateCluster(500, .2, 27); var clusterB = CreateCluster(500, .2, 27); var N = 50; var stepMass = .5; var stepNET = .001; var stepDrift = .01; Console.WriteLine("Walk in drift time"); for (var i = 0; i < N; i++) { clusterB.DriftTime += stepDrift; var distance = dist.EuclideanDistance(clusterA, clusterB); Console.WriteLine("{0}, {1}, {3}, {2}", clusterB.DriftTime, clusterB.DriftTime, distance, clusterB.DriftTime - clusterA.DriftTime); } Console.WriteLine(); Console.WriteLine("Walk in net "); clusterB.DriftTime = 27; for (var i = 0; i < N; i++) { clusterB.Net += stepNET; var distance = dist.EuclideanDistance(clusterA, clusterB); Console.WriteLine("{0}, {1}, {3}, {2}", clusterB.Net, clusterB.Net, distance, clusterB.Net - clusterA.Net); } Console.WriteLine(); Console.WriteLine("Walk in mass "); clusterB.Net = .2; for (var i = 0; i < N; i++) { var d = FeatureLight.ComputeDaDifferenceFromPPM(clusterA.MassMonoisotopic, stepMass * i); clusterB.MassMonoisotopic = d; var distance = dist.EuclideanDistance(clusterA, clusterB); Console.WriteLine("{0}, {1}, {3}, {2}", clusterB.MassMonoisotopic, clusterB.MassMonoisotopic, distance, FeatureLight.ComputeMassPPMDifference(clusterA.MassMonoisotopic, clusterB.MassMonoisotopic)); } }
private List <TChildFeature> FilterMsFeatures(List <TChildFeature> rawMsFeatures) { // sort by scan... var allFeatures = rawMsFeatures.OrderBy(x => x.Scan).ToList(); var newFeatures = new List <TChildFeature>(); var features = new List <TChildFeature>(); var totalFeatures = rawMsFeatures.Count; var currentScan = 0; for (var i = 0; i < totalFeatures; i++) { var feature = allFeatures[i]; // Process the scans... if (currentScan != feature.Scan) { var mzFeatures = features.OrderBy(x => x.Mz).ToList(); var mzMap = new Dictionary <double, List <TChildFeature> >(); for (var j = 1; j < mzFeatures.Count; j++) { var featureJ = mzFeatures[j]; var featurePrev = mzFeatures[j - 1]; // find the mass difference, here we are looking to see if there are unique // m/z features or not, if not, then we need to process them. var ppm = FeatureLight.ComputeMassPPMDifference(featureJ.Mz, featureJ.Mz); if (Math.Abs(ppm) > 1) { if (!mzMap.ContainsKey(featureJ.Mz)) { mzMap.Add(featureJ.Mz, new List <TChildFeature>()); } mzMap[featureJ.Mz].Add(featureJ); mzMap[featureJ.Mz].Add(featurePrev); } } features.Clear(); } else { features.Add(feature); } } return(newFeatures); }
public static PlotBase CreateMassMzResidualAlignmentPlot <T>(IEnumerable <T> x, IEnumerable <T> y) where T : FeatureLight { Func <T, double> mz = t => t.Mz; Func <T, T, double> massPre = (t, u) => FeatureLight.ComputeMassPPMDifference(t.MassMonoisotopic, u.MassMonoisotopic); Func <T, T, double> massPost = (t, u) => FeatureLight.ComputeMassPPMDifference(t.MassMonoisotopic, u.MassMonoisotopicAligned); return(CreateResidualAlignmentPlot(x, y, mz, massPre, massPost, "mz", "Mass Residual (ppm)")); }
/// <summary> /// Compares a feature to the list of feature /// </summary> public int CompareMonoisotopic(FeatureLight featureX, FeatureLight featureY) { // If they are in mass range... var mzDiff = FeatureLight.ComputeMassPPMDifference(featureX.MassMonoisotopic, featureY.MassMonoisotopic); if (Math.Abs(mzDiff) < Tolerances.Mass && featureX.ChargeState == featureY.ChargeState) { // otherwise make sure that our scan value is within range var scanDiff = featureX.Net - featureY.Net; return(Math.Abs(scanDiff) <= Tolerances.Net ? 0 : 1); } if (mzDiff < 0) { return(-1); } return(1); }
public static PlotBase CreateMassScanResidualAlignmentPlot <T>(IEnumerable <T> x, IEnumerable <T> y) where T : FeatureLight { Func <T, double> scan = t => t.Scan; Func <T, T, double> massPre = (t, u) => FeatureLight.ComputeMassPPMDifference(t.MassMonoisotopic, u.MassMonoisotopic); Func <T, T, double> massPost = (t, u) => FeatureLight.ComputeMassPPMDifference(t.MassMonoisotopic, u.MassMonoisotopicAligned); var plot = CreateResidualAlignmentPlot(x, y, scan, massPre, massPost, "scan", "Mass Residual (ppm)"); return(plot); }
/// <summary> /// Determines if two clusters are within mass, NET, and drift time tolerances /// </summary> /// <param name="clusterX">One of the two clusters to test</param> /// <param name="clusterY">One of the two clusters to test</param> /// <returns>True if clusters are within tolerance, false otherwise</returns> protected override bool AreClustersWithinTolerance(U clusterX, U clusterY) { // Grab the tolerances var massTolerance = Parameters.Tolerances.Mass; var netTolerance = Parameters.Tolerances.Net; var driftTolerance = Parameters.Tolerances.DriftTime; // Calculate differences var massDiff = Math.Abs(FeatureLight.ComputeMassPPMDifference(clusterX.MassMonoisotopicAligned, clusterY.MassMonoisotopicAligned)); var netDiff = Math.Abs(clusterX.Net - clusterY.Net); var driftDiff = Math.Abs(clusterX.DriftTime - clusterY.DriftTime); // Return true only if all differences are within tolerance if (massDiff <= massTolerance && netDiff <= netTolerance && driftDiff <= driftTolerance) { return(true); } return(false); }
public MsToLcmsFeatures(IScanSummaryProvider provider, LcmsFeatureFindingOptions options = null) { if (provider == null) { throw new ArgumentNullException(); } Comparison <MSFeatureLight> mzSort = (x, y) => x.Mz.CompareTo(y.Mz); Comparison <UMCLight> monoSort = (x, y) => x.MassMonoisotopic.CompareTo(y.MassMonoisotopic); Func <MSFeatureLight, MSFeatureLight, double> mzDiff = (x, y) => FeatureLight.ComputeMassPPMDifference(x.Mz, y.Mz); Func <UMCLight, UMCLight, double> monoDiff = (x, y) => FeatureLight.ComputeMassPPMDifference(x.MassMonoisotopic, y.MassMonoisotopic); this.provider = provider; this.options = options ?? new LcmsFeatureFindingOptions(); // Set clusterers if (this.options.FirstPassClusterer == MsFeatureClusteringAlgorithmType.BinarySearchTree) { this.firstPassClusterer = new MsFeatureTreeClusterer <MSFeatureLight, UMCLight>( mzSort, mzDiff, MassComparison.Mz, this.options.InstrumentTolerances.Mass); } else { this.firstPassClusterer = ClusterFactory.Create(this.options.FirstPassClusterer); } if (this.options.SecondPassClusterer == GenericClusteringAlgorithmType.BinarySearchTree) { this.secondPassClusterer = new MsFeatureTreeClusterer <UMCLight, UMCLight>( monoSort, monoDiff, MassComparison.Monoisotopic, this.options.InstrumentTolerances.Mass); } else { var clusterFactory = new GenericClusterFactory <UMCLight, UMCLight>(); this.secondPassClusterer = clusterFactory.Create(this.options.SecondPassClusterer); } }
protected virtual bool AreClustersWithinTolerance(UMCLight clusterX, UMCLight clusterY, double massTolerance, double netTolerance, double driftTolerance) { // Calculate differences var massDiff = Math.Abs(FeatureLight.ComputeMassPPMDifference(clusterX.MassMonoisotopicAligned, clusterY.MassMonoisotopicAligned)); var netDiff = Math.Abs(clusterX.Net - clusterY.Net); var driftDiff = Math.Abs(clusterX.DriftTime - clusterY.DriftTime); // Return true only if all differences are within tolerance if (massDiff <= massTolerance && netDiff <= netTolerance && driftDiff <= driftTolerance) { return(true); } return(false); }
private void SaveMatches(string path, IEnumerable <SpectralAnchorPointMatch> matches) { using (var writer = File.CreateText(path)) { writer.WriteLine("[Header]"); writer.WriteLine("p mz = parentMz - A and B denote dataset A and dataset B"); writer.WriteLine("[Data]"); writer.WriteLine("Net-A\tpMz-A\tScan-A\tNet-B\tpMz-B\tScan-B\tMassErrorPpm\tSimScore"); foreach (var match in matches) { if (match.AnchorPointX.Spectrum == null) { continue; } if (match.AnchorPointY.Spectrum == null) { continue; } var parentFeatureX = match.AnchorPointX.Spectrum.ParentFeature; var parentFeatureY = match.AnchorPointY.Spectrum.ParentFeature; var data = string.Format("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}", parentFeatureX.GetParentFeature().Net, parentFeatureX.GetParentFeature().Mz, parentFeatureX.GetParentFeature().Scan, parentFeatureY.GetParentFeature().Net, parentFeatureY.GetParentFeature().Mz, parentFeatureY.GetParentFeature().Scan, FeatureLight.ComputeMassPPMDifference(parentFeatureX.Mz, parentFeatureY.Mz), match.SimilarityScore); writer.WriteLine(data); } } }
/// <summary> /// Compares a feature to the list of feature /// </summary> public int CompareMz(FeatureLight featureX, FeatureLight featureY) { // If they are in mass range... var mzDiff = FeatureLight.ComputeMassPPMDifference(featureX.Mz, featureY.Mz); if (Math.Abs(mzDiff) < Tolerances.Mass) { // otherwise make sure that our scan value is within range var scanDiff = featureX.Scan - featureY.Scan; if (Math.Abs(scanDiff) > ScanTolerance) { return(1); } return(featureX.ChargeState != featureY.ChargeState ? 1 : 0); } if (mzDiff < 0) { return(-1); } return(1); }
private static void WriteErrors(string errorPath, IEnumerable <SpectralAnchorPointMatch> matches) { using (var writer = File.CreateText(errorPath)) { writer.WriteLine( "NET\tMass\tNET\tMass\tNETA\tMassA\tNETA\tMassA\tNetError\tMassError\tScore"); foreach (var match in matches) { var massError = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz, match.AnchorPointY.Mz); var netError = match.AnchorPointX.Net - match.AnchorPointY.NetAligned; writer.WriteLine("{0:F5}\t{1:F5}\t{2:F5}\t{3:F5}\t{4:F5}\t{5:F5}\t{6:F5}\t{7:F5}\t{8:F5}\t", match.AnchorPointX.Net, match.AnchorPointX.Mz, match.AnchorPointY.Net, match.AnchorPointY.Mz, match.AnchorPointY.NetAligned, match.AnchorPointY.MzAligned, netError, massError, match.SimilarityScore); } } }
/// <summary> /// Aligns features based on MSMS spectral similarity. /// </summary> /// <param name="featureMap"></param> /// <param name="msms"></param> public List <MsmsCluster> Cluster(List <UMCLight> features, ISpectraProvider provider) { UpdateStatus("Mapping UMC's to MS/MS spectra using intensity profile."); // Step 1: Cluster the spectra // Create the collection of samples. var msFeatures = new List <MSFeatureLight>(); // Sort through the features foreach (var feature in features) { // Sort out charge states...? var chargeMap = new Dictionary <int, MSFeatureLight>(); double abundance = int.MinValue; MSFeatureLight maxFeature = null; // Find the max abundance spectrum. This the number of features we have to search. foreach (var msFeature in feature.MsFeatures) { if (msFeature.Abundance > abundance && msFeature.MSnSpectra.Count > 0) { abundance = msFeature.Abundance; maxFeature = msFeature; } } if (maxFeature != null) { msFeatures.Add(maxFeature); } } UpdateStatus(string.Format("Found {0} total spectra for clustering.", msFeatures.Count)); UpdateStatus("Sorting spectra."); // Sort based on mass using the max abundance of the feature. msFeatures.Sort(delegate(MSFeatureLight x, MSFeatureLight y) { return(x.MassMonoisotopicMostAbundant.CompareTo(y.MassMonoisotopicMostAbundant)); }); // Then cluster the spectra. var j = 1; var h = 0; var N = msFeatures.Count; var clusters = new List <MsmsCluster>(); var tol = MassTolerance; var lastTotal = 0; UpdateStatus("Clustering spectra."); while (j < N) { var i = j - 1; var featureJ = msFeatures[j]; var featureI = msFeatures[i]; var diff = FeatureLight.ComputeMassPPMDifference(featureJ.MassMonoisotopicMostAbundant, featureI.MassMonoisotopicMostAbundant); if (Math.Abs(diff) > tol) { // We only care to create clusters of size greater than one. if ((j - h) > 1) { var data = Cluster(h, j, msFeatures, provider, SimilarityTolerance); clusters.AddRange(data); } // Reset the count, we're done looking at those clusters. h = j; } if (j - lastTotal > 500) { lastTotal = j; UpdateStatus(string.Format("Processed {0} / {1} total spectra.", lastTotal, N)); } j++; } UpdateStatus("Finishing last cluster data."); // Cluster the rest if ((j - h) > 1) { var data = Cluster(h, j, msFeatures, provider, SimilarityTolerance); clusters.AddRange(data); } UpdateStatus("Finished clustering."); var passingClusters = clusters.Where(cluster => cluster.Features.Count >= MinimumClusterSize); return(passingClusters.ToList()); }
/// <summary> /// Clusters a set of data /// </summary> /// <param name="data"></param> /// <param name="clusters"></param> /// <returns></returns> public virtual List <U> Cluster(List <T> data, List <U> clusters) { /* * This clustering algorithm first sorts the list of input UMC's by mass. It then iterates * through this list partitioning the data into blocks of UMC's based on a mass tolerance. * When it finds gaps larger or equal to the mass (ppm) tolerance specified by the user, * it will process the data before the gap (a block) until the current index of the features in question. */ // Make sure we have data to cluster first. if (data == null) { throw new NullReferenceException("The input feature data list was null. Cannot process this data."); } // Make sure there is no null UMC data in the input list. var nullIndex = data.FindIndex(delegate(T x) { return(x == null); }); if (nullIndex > 0) { throw new NullReferenceException("The feature at index " + nullIndex + " was null. Cannot process this data."); } OnNotify("Sorting cluster mass list"); // The first thing we do is to sort the features based on mass since we know that has the least variability in the data across runs. data.Sort(m_massComparer); // Now partition the data based on mass ranges and the parameter values. var massTolerance = Parameters.Tolerances.Mass; // This is the index of first feature of a given mass partition. var startUMCIndex = 0; var totalFeatures = data.Count; OnNotify("Detecting mass partitions"); var tenPercent = Convert.ToInt32(totalFeatures * .1); var counter = 0; var percent = 0; for (var i = 0; i < totalFeatures - 1; i++) { if (counter > tenPercent) { counter = 0; percent += 10; OnNotify(string.Format("Clustering Completed...{0}%", percent)); } counter++; // Here we compute the ppm mass difference between consecutive features (based on mass). // This will determine if we cluster a block of data or not. var umcX = data[i]; var umcY = data[i + 1]; var ppm = Math.Abs(FeatureLight.ComputeMassPPMDifference(umcX.MassMonoisotopicAligned, umcY.MassMonoisotopicAligned)); // If the difference is greater than the tolerance then we cluster // - we dont check the sign of the ppm because the data should be sorted based on mass. if (ppm > massTolerance) { // If start UMC Index is equal to one, then that means the feature at startUMCIndex // could not find any other features near it within the mass tolerance specified. if (startUMCIndex == i) { var cluster = new U(); cluster.AmbiguityScore = m_maxDistance; umcX.SetParentFeature(cluster); cluster.AddChildFeature(umcX); clusters.Add(cluster); } else { // Otherwise we have more than one feature to to consider. var distances = CalculatePairWiseDistances(startUMCIndex, i, data); var localClusters = CreateSingletonClusters(data, startUMCIndex, i); var blockClusters = LinkFeatures(distances, localClusters); CalculateAmbiguityScore(blockClusters); clusters.AddRange(blockClusters); } startUMCIndex = i + 1; } } // Make sure that we cluster what is left over. if (startUMCIndex < totalFeatures) { OnNotify(string.Format("Clustering last partition...{0}%", percent)); var distances = CalculatePairWiseDistances(startUMCIndex, totalFeatures - 1, data); var localClusters = CreateSingletonClusters(data, startUMCIndex, totalFeatures - 1); var blockClusters = LinkFeatures(distances, localClusters); CalculateAmbiguityScore(blockClusters); if (localClusters.Count < 2) { clusters.AddRange(localClusters.Values); } else { clusters.AddRange(blockClusters); } } OnNotify("Generating cluster statistics"); foreach (var cluster in clusters) { cluster.CalculateStatistics(Parameters.CentroidRepresentation); } return(clusters); }
public List<U> ProcessClusters(List<U> clusters) { var newClusters = new List<U>(); //Sort the clusters // Look for merged clusters that need to be split... foreach (var cluster in clusters) { var medianNet = cluster.Net; var medianMass = cluster.MassMonoisotopic; var medianDrift = cluster.DriftTime; var massDistributions = new Dictionary<T, double>(); var netDistributions = new Dictionary<T, double>(); var driftDistributions = new Dictionary<T, double>(); var massDistances = new List<double>(); var netDistances = new List<double>(); var driftDistances = new List<double>(); // Build distributions foreach (var feature in cluster.Features) { var mass = FeatureLight.ComputeMassPPMDifference(feature.MassMonoisotopicAligned, medianMass); var net = feature.Net - medianNet; var drift = feature.DriftTime - medianDrift; massDistributions.Add(feature, mass); netDistributions.Add(feature, drift); driftDistributions.Add(feature, net); massDistances.Add(mass); driftDistances.Add(drift); netDistances.Add(net); } massDistances.Sort(); netDistances.Sort(); driftDistances.Sort(); // Calculates the sample means for positive and negative sides of the median. var massDistribution = CalculateAllDistributions(massDistances); var netDistribution = CalculateAllDistributions(netDistances); var driftDistribution = CalculateAllDistributions(driftDistances); var massZScore = CalculateZScore(massDistribution.Item1, massDistribution.Item2); var netZScore = CalculateZScore(netDistribution.Item1, netDistribution.Item2); var driftZScore = CalculateZScore(driftDistribution.Item1, driftDistribution.Item2); // Now that we have data we can test the distributions to see if they are similar or not... Console.WriteLine(" Neg to Pos "); Console.WriteLine("Mass z-score \t{0}", massZScore); Console.WriteLine("Net z-score \t{0}", netZScore); Console.WriteLine("Drift z-score\t{0}", driftZScore); Console.WriteLine(); massZScore = CalculateZScore(massDistribution.Item1, massDistribution.Item3); netZScore = CalculateZScore(netDistribution.Item1, netDistribution.Item3); driftZScore = CalculateZScore(driftDistribution.Item1, driftDistribution.Item3); Console.WriteLine(" Negative "); Console.WriteLine("Mass z-score \t{0}", massZScore); Console.WriteLine("Net z-score \t{0}", netZScore); Console.WriteLine("Drift z-score\t{0}", driftZScore); Console.WriteLine(); Console.WriteLine(" Positive "); massZScore = CalculateZScore(massDistribution.Item2, massDistribution.Item3); netZScore = CalculateZScore(netDistribution.Item2, netDistribution.Item3); driftZScore = CalculateZScore(driftDistribution.Item2, driftDistribution.Item3); Console.WriteLine("Mass z-score \t{0}", massZScore); Console.WriteLine("Net z-score \t{0}", netZScore); Console.WriteLine("Drift z-score\t{0}", driftZScore); //Console.WriteLine(); //Console.WriteLine("Mass Difference"); //DisplayDistance(massDistances); //Console.WriteLine(); //Console.WriteLine("NET Difference"); //DisplayDistance(netDistances); //Console.WriteLine(); //Console.WriteLine("Drift Time Difference"); //DisplayDistance(driftDistances); } return newClusters; }
public void CreateAlignmentFunctions(IEnumerable <SpectralAnchorPointMatch> matches) { var netXvalues = new List <double>(); var netYvalues = new List <double>(); var massXvalues = new List <double>(); var massYvalues = new List <double>(); matches = matches.ToList().OrderBy(x => x.AnchorPointX.Net); // 1. Find the best matches // 2. Find only matches that have been made once. var bestMatches = new Dictionary <int, SpectralAnchorPointMatch>(); foreach (var match in matches) { var scan = match.AnchorPointX.Scan; if (bestMatches.ContainsKey(scan)) { if (bestMatches[scan].SimilarityScore < match.SimilarityScore) { bestMatches[scan] = match; } } else { bestMatches.Add(scan, match); } } // 2. Find only those matched once var all = new Dictionary <int, SpectralAnchorPointMatch>(); foreach (var match in bestMatches.Values) { var scan = match.AnchorPointY.Scan; if (all.ContainsKey(scan)) { if (all[scan].SimilarityScore < match.SimilarityScore) { all[scan] = match; } } else { all.Add(scan, match); } } // Then generate the NET Alignment using R1 var anchorPoints = all.Values.OrderBy(x => x.AnchorPointY.Net).ToList(); matches = anchorPoints.Where( x => FeatureLight.ComputeMassPPMDifference(x.AnchorPointX.Mz, x.AnchorPointY.Mz) < 20 && x.AnchorPointX.Spectrum.ParentFeature.ChargeState == x.AnchorPointY.Spectrum.ParentFeature.ChargeState ).ToList(); foreach (var match in matches) { netXvalues.Add(match.AnchorPointX.Net); netYvalues.Add(match.AnchorPointY.Net); } var netInterpolator = new LoessInterpolator(Bandwidth, 5); netInterpolator.Smooth(netYvalues, netXvalues, FitFunctionFactory.Create(FitFunctionTypes.TriCubic)); // Then generate the Mass Alignment using R1 // We also have to resort the matches based on mass now too anchorPoints = all.Values.OrderBy(x => x.AnchorPointY.Mz).ToList(); foreach (var match in anchorPoints) { massXvalues.Add(match.AnchorPointX.Mz); massYvalues.Add(match.AnchorPointY.Mz); } var massInterpolator = new LoessInterpolator(); massInterpolator.Smooth(massYvalues, massXvalues, FitFunctionFactory.Create(FitFunctionTypes.TriCubic)); m_netInterpolator = netInterpolator; m_massInterpolator = massInterpolator; foreach (var match in anchorPoints) { match.AnchorPointY.NetAligned = netInterpolator.Predict(match.AnchorPointY.Net); match.AnchorPointY.MzAligned = massInterpolator.Predict(match.AnchorPointY.Mz); } }
/// <summary> /// Clusters features based on their pairwise distances by finding the minimal spanning tree (MST) via Prim's algorithm. /// </summary> /// <param name="distances">Pairwise distances between all features in question.</param> /// <param name="clusters">Singleton clusters from each feature.</param> /// <returns>List of features clustered together.</returns> public override List <U> LinkFeatures(List <Data.PairwiseDistance <T> > potentialDistances, Dictionary <int, U> clusters) { var newClusters = new List <U>(); var distances = new List <Data.PairwiseDistance <T> >(); // There is an edge case with this setup that a singleton outside of the range // of other features made it into the batch of edges, but there is no corresponding edge // to the rest of the graph(s). So here we hash all features // then we ask for within the range, pare down that hash to a set of features that // have no corresponding edge. These guys would ultimately be singletons we want // to capture... var clusterMap = new HashSet <T>(); foreach (var cluster in clusters.Values) { foreach (var feature in cluster.Features) { if (!clusterMap.Contains(feature)) { clusterMap.Add(feature); } } } foreach (var distance in potentialDistances) { if (AreClustersWithinTolerance(distance.FeatureX, distance.FeatureY)) { //distances.Add(distance); if (clusterMap.Contains(distance.FeatureX)) { clusterMap.Remove(distance.FeatureX); } if (clusterMap.Contains(distance.FeatureY)) { clusterMap.Remove(distance.FeatureY); } } } // Once we have removed any cluster foreach (var feature in clusterMap) { var cluster = new U(); feature.SetParentFeature(cluster); cluster.AddChildFeature(feature); newClusters.Add(cluster); } var newDistances = (from element in potentialDistances orderby element.Distance select element).ToList(); var queue = new Queue <Edge <T> >(); var graph = new FeatureGraph <T>(); // Sort out the distances so we dont have to recalculate distances. var id = 0; var edges = new List <Edge <T> >(); newDistances.ForEach(x => edges.Add(new Edge <T>(id++, x.Distance, x.FeatureX, x.FeatureY))); graph.CreateGraph(edges); edges.ForEach(x => queue.Enqueue(x)); // This makes sure we have var seenEdge = new HashSet <int>(); // Now we start at the MST building if (DumpLinearRelationship) { Console.WriteLine("GraphEdgeLength"); } while (queue.Count > 0) { var startEdge = queue.Dequeue(); // If we have already seen the edge, ignore it... if (seenEdge.Contains(startEdge.ID)) { continue; } var mstGroup = ConstructSubTree(graph, seenEdge, startEdge); var clusterTree = new MstLrTree <Edge <T> >(); // Get the mst value . double sum = 0; double mean = 0; foreach (var dist in mstGroup.LinearRelationship) { seenEdge.Add(dist.ID); sum += dist.Length; clusterTree.Insert(dist); var ppmDist = FeatureLight.ComputeMassPPMDifference(dist.VertexB.MassMonoisotopicAligned, dist.VertexA.MassMonoisotopicAligned); if (DumpLinearRelationship) { Console.WriteLine("{0}", dist.Length); /*,,{1},{2},{3},{4},{5},{6},{7},{8}", dist.Length, * dist.VertexA.NetAligned, * dist.VertexA.MassMonoisotopicAligned, * dist.VertexA.DriftTime, * dist.VertexB.NetAligned, * dist.VertexB.MassMonoisotopicAligned, * dist.VertexB.DriftTime, * ppmDist, * Math.Abs(dist.VertexA.NetAligned - dist.VertexB.NetAligned)); */ } } var N = Convert.ToDouble(mstGroup.LinearRelationship.Count); // Calculate the standard deviation. mean = sum / N; sum = 0; foreach (var dist in mstGroup.LinearRelationship) { var diff = dist.Length - mean; sum += (diff * diff); } var stdev = Math.Sqrt(sum / N); var cutoff = NSigma; // *stdev; // stdev* NSigma; var mstClusters = CreateClusters(mstGroup, cutoff); newClusters.AddRange(mstClusters); } return(newClusters); }
public void MassPartitionTest(string databasePath, int chargeState, double massTolerance, double netTolerance, double driftTolerance) { var database = new UmcAdoDAO(); database.DatabasePath = databasePath; Logger.PrintMessage(string.Format("Extracting Features"), true); var data = database.FindByCharge(chargeState); // Make sure there is no null UMC data in the input list. var nullIndex = data.FindIndex(delegate(UMCLight x) { return(x == null); }); if (nullIndex > 0) { throw new NullReferenceException("The feature at index " + nullIndex + " was null. Cannot process this data."); } // The first thing we do is to sort the features based on mass since we know that has the least variability in the data across runs. data.Sort(m_massComparer); // This is the index of first feature of a given mass partition. var startUMCIndex = 0; var totalFeatures = data.Count; var tenPercent = Convert.ToInt32(totalFeatures * .1); var singletons = 0; var sizes = new List <int>(); var times = new List <double>(); for (var i = 0; i < totalFeatures - 1; i++) { // Here we compute the ppm mass difference between consecutive features (based on mass). // This will determine if we cluster a block of data or not. var umcX = data[i]; var umcY = data[i + 1]; var ppm = Math.Abs(FeatureLight.ComputeMassPPMDifference(umcX.MassMonoisotopicAligned, umcY.MassMonoisotopicAligned)); // If the difference is greater than the tolerance then we cluster // - we dont check the sign of the ppm because the data should be sorted based on mass. if (ppm > massTolerance) { // If start UMC Index is equal to one, then that means the feature at startUMCIndex // could not find any other features near it within the mass tolerance specified. if (startUMCIndex == i) { singletons++; } else { var starttime = DateTime.Now; var distances = CalculatePairwiseDistances(data, startUMCIndex, i, massTolerance, netTolerance, driftTolerance); var endTime = DateTime.Now; sizes.Add(i - startUMCIndex + 1); times.Add(endTime.Subtract(starttime).TotalMilliseconds); } startUMCIndex = i + 1; } } var xxx = 0; xxx++; if (xxx > 1) { sizes.Add(0); } Console.WriteLine("{0}", singletons); Console.WriteLine(); for (var i = 0; i < sizes.Count; i++) { Console.WriteLine("{0}\t{1}", sizes[i], times[i]); } }
/// <summary> /// Runs the MultiAlign analysis /// </summary> public void PerformMultiAlignAnalysis(DatasetInformation baselineDataset, IEnumerable <DatasetInformation> aligneeDatasets, LcmsFeatureFindingOptions featureFindingOptions, MsFeatureFilteringOptions msFilterOptions, LcmsFeatureFilteringOptions lcmsFilterOptions, SpectralOptions peptideOptions, MultiAlignCore.Algorithms.FeatureFinding.IFeatureFinder featureFinder, IFeatureAligner <IEnumerable <UMCLight>, IEnumerable <UMCLight>, AlignmentData> aligner, IClusterer <UMCLight, UMCClusterLight> clusterer, string matchPath, string errorPath) { UpdateStatus("Loading baseline features."); var msFeatures = UmcLoaderFactory.LoadMsFeatureData(baselineDataset.Features.Path); msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions); // Load the baseline reference set using (var rawProviderX = new InformedProteomicsReader()) { rawProviderX.AddDataFile(baselineDataset.RawFile.Path, 0); UpdateStatus("Creating Baseline LCMS Features."); var baselineFeatures = featureFinder.FindFeatures(msFeatures, featureFindingOptions, rawProviderX); LinkPeptidesToFeatures(baselineDataset.Sequence.Path, baselineFeatures, peptideOptions.Fdr, peptideOptions.IdScore); var providerX = new CachedFeatureSpectraProvider(rawProviderX, baselineFeatures); // Then load the alignee dataset foreach (var dataset in aligneeDatasets) { var aligneeMsFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path); aligneeMsFeatures = LcmsFeatureFilters.FilterMsFeatures(aligneeMsFeatures, msFilterOptions); using (var rawProviderY = new InformedProteomicsReader()) { rawProviderY.AddDataFile(dataset.RawFile.Path, 0); UpdateStatus("Finding alignee features"); var aligneeFeatures = featureFinder.FindFeatures(aligneeMsFeatures, featureFindingOptions, rawProviderY); LinkPeptidesToFeatures(dataset.Sequence.Path, aligneeFeatures, peptideOptions.Fdr, peptideOptions.IdScore); var providerY = new CachedFeatureSpectraProvider(rawProviderY, aligneeFeatures); // cluster before we do anything else.... var allFeatures = new List <UMCLight>(); allFeatures.AddRange(baselineFeatures); allFeatures.AddRange(aligneeFeatures); foreach (var feature in allFeatures) { feature.Net = feature.Net; feature.MassMonoisotopicAligned = feature.MassMonoisotopic; } // This tells us the differences before we align. var clusters = clusterer.Cluster(allFeatures); var preAlignment = AnalyzeClusters(clusters); aligner.AligneeSpectraProvider = providerY; aligner.BaselineSpectraProvider = providerX; UpdateStatus("Aligning data"); // Aligner data var data = aligner.Align(baselineFeatures, aligneeFeatures); var matches = data.Matches; WriteErrors(errorPath, matches); // create anchor points for LCMSWarp alignment var massPoints = new List <RegressionPoint>(); var netPoints = new List <RegressionPoint>(); foreach (var match in matches) { var massError = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz, match.AnchorPointY.Mz); var netError = match.AnchorPointX.Net - match.AnchorPointY.Net; var massPoint = new RegressionPoint(match.AnchorPointX.Mz, 0, massError, netError); massPoints.Add(massPoint); var netPoint = new RegressionPoint(match.AnchorPointX.Net, 0, massError, netError); netPoints.Add(netPoint); } foreach (var feature in allFeatures) { feature.UmcCluster = null; feature.ClusterId = -1; } // Then cluster after alignment! UpdateStatus("clustering data"); clusters = clusterer.Cluster(allFeatures); var postAlignment = AnalyzeClusters(clusters); UpdateStatus("Note\tSame\tDifferent"); UpdateStatus(string.Format("Pre\t{0}\t{1}", preAlignment.SameCluster, preAlignment.DifferentCluster)); UpdateStatus(string.Format("Post\t{0}\t{1}", postAlignment.SameCluster, postAlignment.DifferentCluster)); SaveMatches(matchPath, matches); } } } DeRegisterProgressNotifier(aligner); DeRegisterProgressNotifier(featureFinder); DeRegisterProgressNotifier(clusterer); }
/// <summary> /// Calculates pairwise distances between features in the list of /// potential features to cluster. /// </summary> /// <param name="start">Start UMC index.</param> /// <param name="stop">Stop UMC index.</param> /// <param name="data">List of data to compute distances over.</param> /// <returns>List of UMC distances to consider during clustering.</returns> protected List <Data.PairwiseDistance <U> > CalculateDistances(Dictionary <int, U> clusters) { var massTolerance = Parameters.Tolerances.Mass; var netTolerance = Parameters.Tolerances.Net; var driftTolerance = Parameters.Tolerances.DriftTime; var onlyClusterSameChargeStates = Parameters.OnlyClusterSameChargeStates; var distances = new List <Data.PairwiseDistance <U> >(); foreach (var clusterI in clusters.Values) { var driftTimeX = clusterI.DriftTime; var netAlignedX = clusterI.Net; var massAlignedX = clusterI.MassMonoisotopicAligned; var chargeStateX = clusterI.ChargeState; foreach (var clusterJ in clusters.Values) { // Don't calculate distance to other features within same group if (clusterI == clusterJ) { continue; } // Calculate the distances here (using a cube). We dont care if we are going to re-compute // these again later, because here we want to fall within the cube, the distance function used // later is more related to determining a scalar value instead. var massDiff = Math.Abs(FeatureLight.ComputeMassPPMDifference(massAlignedX, clusterJ.MassMonoisotopicAligned)); var netDiff = Math.Abs(netAlignedX - clusterJ.Net); var driftDiff = Math.Abs(driftTimeX - clusterJ.DriftTime); // Make sure we fall within the distance range before computing... if (massDiff <= massTolerance && netDiff <= netTolerance && driftDiff <= driftTolerance) { // If IMS or equivalent only cluster similar charge states if (onlyClusterSameChargeStates) { // Make sure it's the same charge state if (chargeStateX == clusterJ.ChargeState) { // Calculate the pairwise distance var pairwiseDistance = new Data.PairwiseDistance <U>(); pairwiseDistance.FeatureX = clusterI; pairwiseDistance.FeatureY = clusterJ; pairwiseDistance.Distance = GetAverageClusterDistance(clusterI, clusterJ, Parameters.DistanceFunction); distances.Add(pairwiseDistance); } } else { // Calculate the pairwise distance var pairwiseDistance = new Data.PairwiseDistance <U>(); pairwiseDistance.FeatureX = clusterI; pairwiseDistance.FeatureY = clusterJ; pairwiseDistance.Distance = GetAverageClusterDistance(clusterI, clusterJ, Parameters.DistanceFunction); distances.Add(pairwiseDistance); } } } } return(distances); }
/// <summary> /// Runs the MultiAlign analysis /// </summary> public void AlignDatasets(IEnumerable <UMCLight> baselineFeatures, IEnumerable <UMCLight> aligneeFeatures, ISpectraProvider providerX, ISpectraProvider providerY, IFeatureAligner <IEnumerable <UMCLight>, IEnumerable <UMCLight>, AlignmentData> aligner, IClusterer <UMCLight, UMCClusterLight> clusterer, string matchPath, string errorPath) { // cluster before we do anything else.... var allFeatures = new List <UMCLight>(); allFeatures.AddRange(baselineFeatures); allFeatures.AddRange(aligneeFeatures); var maxBaseline = baselineFeatures.Max(x => x.Scan); var minBaseline = baselineFeatures.Min(x => x.Scan); var maxAlignee = aligneeFeatures.Max(x => x.Scan); var minAlignee = aligneeFeatures.Min(x => x.Scan); foreach (var feature in aligneeFeatures) { feature.Net = Convert.ToDouble(feature.Scan - minAlignee) / Convert.ToDouble(maxAlignee - minAlignee); feature.MassMonoisotopicAligned = feature.MassMonoisotopic; } foreach (var feature in baselineFeatures) { feature.Net = Convert.ToDouble(feature.Scan - minBaseline) / Convert.ToDouble(maxBaseline - minBaseline); feature.MassMonoisotopicAligned = feature.MassMonoisotopic; } // This tells us the differences before we align. var clusters = clusterer.Cluster(allFeatures); var clusterId = 0; foreach (var cluster in clusters) { cluster.Id = clusterId++; } var scorer = new GlobalPeptideClusterScorer(); var preAlignment = scorer.Score(clusters); aligner.AligneeSpectraProvider = providerY; aligner.BaselineSpectraProvider = providerX; UpdateStatus("Aligning data"); // Aligner data var data = aligner.Align(baselineFeatures, aligneeFeatures); var matches = data.Matches; // create anchor points for LCMSWarp alignment var massPoints = new List <RegressionPoint>(); var netPoints = new List <RegressionPoint>(); foreach (var match in matches) { var massError = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz, match.AnchorPointY.Mz); var netError = match.AnchorPointX.Net - match.AnchorPointY.Net; var massPoint = new RegressionPoint(match.AnchorPointX.Mz, 0, massError, netError); massPoints.Add(massPoint); var netPoint = new RegressionPoint(match.AnchorPointX.Net, 0, massError, netError); netPoints.Add(netPoint); } foreach (var feature in allFeatures) { feature.UmcCluster = null; feature.ClusterId = -1; } // Then cluster after alignment! UpdateStatus("clustering data"); clusters = clusterer.Cluster(allFeatures); var postAlignment = scorer.Score(clusters); UpdateStatus("Note\tSame\tDifferent"); UpdateStatus(string.Format("Pre\t{0}\t{1}", preAlignment.SameCluster, preAlignment.DifferentCluster)); UpdateStatus(string.Format("Post\t{0}\t{1}", postAlignment.SameCluster, postAlignment.DifferentCluster)); matches = FilterMatches(matches, 40); SaveMatches(matchPath, matches); DeRegisterProgressNotifier(aligner); DeRegisterProgressNotifier(clusterer); }
/// <summary> /// Finds LCMS Features from MS Features. /// </summary> /// <param name="rawMsFeatures"></param> /// <returns></returns> public List <TParentFeature> Cluster(List <TChildFeature> rawMsFeatures) { Comparison <TChildFeature> mzSort = (x, y) => x.Mz.CompareTo(y.Mz); Comparison <TParentFeature> monoSort = (x, y) => x.MassMonoisotopic.CompareTo(y.MassMonoisotopic); Func <TChildFeature, TChildFeature, double> mzDiff = (x, y) => FeatureLight.ComputeMassPPMDifference(x.Mz, y.Mz); Func <TParentFeature, TParentFeature, double> monoDiff = (x, y) => FeatureLight.ComputeMassPPMDifference(x.MassMonoisotopic, y.MassMonoisotopic); var minScan = Convert.ToDouble(rawMsFeatures.Min(x => x.Scan)); var maxScan = Convert.ToDouble(rawMsFeatures.Max(x => x.Scan)); foreach (var msFeature in rawMsFeatures) { msFeature.Net = (Convert.ToDouble(msFeature.Scan) - minScan) / (maxScan - minScan); } OnProgress("Filtering ambiguous features"); //rawMsFeatures = FilterMsFeatures(rawMsFeatures); OnProgress("Clustering child features into potential UMC candidates"); // First cluster based on m/z finding the XIC's var features = Cluster <TChildFeature, TParentFeature>(rawMsFeatures, mzSort, mzDiff, CompareMz, Tolerances.Mass); var n = features.Count(); OnProgress(string.Format("Found {0} unique child features from {1} total features", n, rawMsFeatures.Count())); OnProgress("Filtering Features"); // Then we group into UMC's for clustering across charge states... if (features == null) { throw new InvalidDataException("No features were found from the input MS Feature list."); } OnProgress("Filtering poor features with no data. Calculating statistics for the good ones."); features = features.Where(x => x.MsFeatures.Count > 0).ToList(); foreach (var feature in features) { feature.CalculateStatistics(ClusterCentroidRepresentation.Median); feature.MassMonoisotopic = (feature.Mz * feature.ChargeState) - (SubAtomicParticleLibrary.MASS_PROTON * feature.ChargeState); } // Here we should merge the XIC data...trying to find the best possible feature // Note that at this point we dont have UMC's. We only have features // that are separated by mass , scan , and charge // so this method should interrogate each one of these.... if (SpectraProvider != null) { OnProgress(string.Format("Building XIC's from child features")); var generator = new XicCreator(); generator.Progress += generator_Progress; features = generator.CreateXic(features as List <UMCLight>, Tolerances.Mass, SpectraProvider) as IEnumerable <TParentFeature>; generator.Progress -= generator_Progress; } OnProgress(string.Format("Calculating statistics for each feature")); foreach (var feature in features) { feature.CalculateStatistics(ClusterCentroidRepresentation.Median); feature.Net = Convert.ToDouble(feature.Scan - minScan) / Convert.ToDouble(maxScan - minScan); } OnProgress(string.Format("Combining child feature charge states")); features = Cluster <TParentFeature, TParentFeature>(features, monoSort, monoDiff, CompareMonoisotopic, Tolerances.Mass); var id = 0; OnProgress(string.Format("Assigning unique feature id's to each feature.")); var featureList = features.ToList(); foreach (var feature in featureList) { feature.Id = id++; } return(featureList); }