/// <summary> /// Computes errors for mass and retention time given a set of linked and matched features. /// </summary> public Tuple <AlignmentMeasurement <double>, AlignmentMeasurement <double> > MeasureErrors(IEnumerable <SpectralAnchorPointMatch> matches) { var netError = new AlignmentMeasurement <double>(); var massError = new AlignmentMeasurement <double>(); var errors = new Tuple <AlignmentMeasurement <double>, AlignmentMeasurement <double> > (netError, massError); foreach (var match in matches) { var x = match.AnchorPointX; var y = match.AnchorPointY; var featureX = x.Spectrum.ParentFeature; var featureY = y.Spectrum.ParentFeature; if (featureX == null || featureY == null) { continue; } var umcX = featureX.ParentFeature; var umcY = featureY.ParentFeature; netError.PreAlignment.Add(umcX.Net - umcY.Net); netError.PostAlignment.Add(umcX.Net - umcY.NetAligned); massError.PreAlignment.Add(FeatureLight.ComputeMassPPMDifference(umcX.MassMonoisotopic, umcY.MassMonoisotopic)); massError.PostAlignment.Add(FeatureLight.ComputeMassPPMDifference(umcX.MassMonoisotopic, umcY.MassMonoisotopicAligned)); } return(errors); }
private IEnumerable <XicFeature> CreateXicTargetsYield(IEnumerable <UMCLight> features, double massError) { int id = 0; foreach (var feature in features) { int minScan = Int32.MaxValue; int maxScan = 0; foreach (var msFeature in feature.MsFeatures) { minScan = Math.Min(minScan, msFeature.Scan); maxScan = Math.Max(maxScan, msFeature.Scan); } yield return(new XicFeature { HighMz = FeatureLight.ComputeDaDifferenceFromPPM(feature.Mz, -massError), LowMz = FeatureLight.ComputeDaDifferenceFromPPM(feature.Mz, massError), Mz = feature.Mz, Feature = feature, Id = id++, EndScan = minScan + ScanWindowSize, StartScan = maxScan - ScanWindowSize, ChargeState = feature.ChargeState }); } }
private IEnumerable <SpectralAnchorPointMatch> FilterMatches(IEnumerable <SpectralAnchorPointMatch> matches, double ppm) { return (matches.Where(x => ppm > Math.Abs(FeatureLight.ComputeMassPPMDifference(x.AnchorPointX.Spectrum.ParentFeature.Mz, x.AnchorPointY.Spectrum.ParentFeature.Mz)))); }
/// <summary> /// Performs Mass error regression based on NET of the match /// </summary> /// <param name="matches"></param> /// <returns></returns> public LcmsWarpMassAlignmentFunction CalculateCalibration(List <LcmsWarpFeatureMatch> matches) { var netMassRecalibration = new LcmsWarpCombinedRegression(); netMassRecalibration.SetCentralRegressionOptions( this.options.MassCalibNumXSlices, this.options.MassCalibNumYSlices, this.options.MassCalibMaxJump, this.options.MassCalibMaxZScore, this.options.RegressionType); var calibrations = new List <RegressionPoint>(); foreach (var match in matches) { var feature = match.AligneeFeature; var baselineFeature = match.BaselineFeature; var ppm = FeatureLight.ComputeMassPPMDifference(feature.MassMonoisotopic, baselineFeature.MassMonoisotopic); var netDiff = baselineFeature.Net - feature.NetAligned; calibrations.Add(new RegressionPoint(feature.Net, 0, netDiff, ppm)); } netMassRecalibration.CalculateRegressionFunction(calibrations, "ScanMassError"); return(new LcmsWarpMassAlignmentFunction { Calibrations = new List <LcmsWarpCombinedRegression> { netMassRecalibration } }); }
public void MassMassCalculations(double massX, double massY) { var ppm = FeatureLight.ComputeMassPPMDifference(massX, massY); var massYdelta = FeatureLight.ComputeDaDifferenceFromPPM(massX, ppm); Assert.AreEqual(massY, massYdelta); }
public void MassPPMCalculations(double massX, double ppm, double epsilon) { var massYdelta = FeatureLight.ComputeDaDifferenceFromPPM(massX, ppm); var ppmDelta = FeatureLight.ComputeMassPPMDifference(massX, massYdelta); //Assert.IsTrue( (ppm - ppmDelta) < epsilon); Assert.Less(ppm - ppmDelta, epsilon); }
/// <summary> /// Creates XIC Targets from a list of UMC Features /// </summary> /// <param name="features"></param> /// <param name="massError"></param> /// <returns></returns> private List <XicFeature> CreateXicTargets(IEnumerable <UMCLight> features, double massError) { var allFeatures = new List <XicFeature>(); // Create XIC Features var id = 0; // Then for each feature turn it into a new feature foreach (var feature in features) { // Build XIC features from each var x = feature.CreateChargeMap(); foreach (var charge in x.Keys) { double maxIntensity = 0; double mz = 0; var min = double.MaxValue; var max = double.MinValue; var scanStart = int.MaxValue; var scanEnd = 0; foreach (var chargeFeature in x[charge]) { min = Math.Min(min, chargeFeature.Mz); max = Math.Max(max, chargeFeature.Mz); scanStart = Math.Min(scanStart, chargeFeature.Scan); scanEnd = Math.Min(scanStart, chargeFeature.Scan); if (chargeFeature.Abundance > maxIntensity) { maxIntensity = chargeFeature.Abundance; mz = chargeFeature.Mz; } } // Clear the ms feature list...because later we will populate it feature.MsFeatures.Clear(); var xicFeature = new XicFeature { HighMz = FeatureLight.ComputeDaDifferenceFromPPM(mz, -massError), LowMz = FeatureLight.ComputeDaDifferenceFromPPM(mz, massError), Mz = mz, Feature = feature, Id = id++, EndScan = scanEnd + ScanWindowSize, StartScan = scanStart - ScanWindowSize, ChargeState = charge }; allFeatures.Add(xicFeature); } } return(allFeatures); }
/// <summary> /// Computes the mass difference between two features. /// </summary> /// <param name="x"></param> /// <param name="y"></param> /// <returns></returns> private bool WithinRange(T x, T y) { // later is more related to determining a scalar value instead. var massDiff = Math.Abs(FeatureLight.ComputeMassPPMDifference(x.MassMonoisotopicAligned, y.MassMonoisotopicAligned)); var netDiff = Math.Abs(x.Net - y.Net); var driftDiff = Math.Abs(x.DriftTime - y.DriftTime); // Make sure we fall within the distance range before computing... return(massDiff <= Tolerances.Mass && netDiff <= Tolerances.Net && driftDiff <= Tolerances.DriftTime); }
public double EuclideanDistance(T x, FeatureLight y) { var massDifference = x.MassMonoisotopicAligned - y.MassMonoisotopicAligned; var netDifference = x.Net - y.Net; var driftDifference = x.DriftTime - y.DriftTime; var sum = MassWeight * (massDifference * massDifference) + NetWeight * (netDifference * netDifference) + DriftWeight * (driftDifference * driftDifference); return(Math.Sqrt(sum)); }
/// <summary> /// Calculates the weighted Euclidean distance based on drift time, aligned mass, and aligned NET. /// </summary> /// <param name="x">Feature x.</param> /// <param name="y">Feature y.</param> /// <param name="massWeight"></param> /// <param name="netWeight"></param> /// <param name="driftWeight"></param> /// <returns>Distance calculated as </returns> public double EuclideanDistance(T x, T y, double massWeight, double netWeight, double driftWeight) { var massDifference = FeatureLight.ComputeMassPPMDifference(x.MassMonoisotopicAligned, y.MassMonoisotopicAligned); var netDifference = x.Net - y.Net; var driftDifference = x.DriftTime - y.DriftTime; var sum = (massDifference * massDifference) * massWeight + (netDifference * netDifference) * netDifference + (driftDifference * driftDifference) * driftWeight; return(Math.Sqrt(sum)); }
public void TestDistanceChangeEuclidean() { var cluster = new UMCClusterLight(); cluster.MassMonoisotopic = 500; cluster.Net = .5; cluster.Net = .5; cluster.DriftTime = 20; var euclid = new EuclideanDistanceMetric <UMCClusterLight>(); DistanceFunction <UMCClusterLight> func = euclid.EuclideanDistance; var deltaNet = .01; double deltaMassPPM = 1; double deltaDriftTime = 1; Console.WriteLine("Mass Diff, Mass Dist, Net, Net Dist, Drift, Drift Dist"); for (var i = 0; i < 50; i++) { var clusterD = new UMCClusterLight(); var clusterN = new UMCClusterLight(); var clusterM = new UMCClusterLight(); clusterM.DriftTime = cluster.DriftTime + deltaDriftTime; clusterM.Net = cluster.Net + deltaNet; clusterM.Net = cluster.Net + deltaNet; clusterM.MassMonoisotopic = FeatureLight.ComputeDaDifferenceFromPPM(cluster.MassMonoisotopic, deltaMassPPM * i); clusterN.DriftTime = cluster.DriftTime + deltaDriftTime; clusterN.Net = cluster.Net + (deltaNet * i); clusterN.Net = cluster.Net + (deltaNet * i); clusterN.MassMonoisotopic = FeatureLight.ComputeDaDifferenceFromPPM(cluster.MassMonoisotopic, deltaMassPPM); clusterD.DriftTime = cluster.DriftTime + (deltaDriftTime * i); clusterD.Net = cluster.Net + deltaNet; clusterD.Net = cluster.Net + deltaNet; clusterD.MassMonoisotopic = FeatureLight.ComputeDaDifferenceFromPPM(cluster.MassMonoisotopic, deltaMassPPM); var distM = func(cluster, clusterM); var distN = func(cluster, clusterN); var distD = func(cluster, clusterD); var output = string.Format("{0},{1},{2},{3},{4},{5}", deltaMassPPM * i, distM, deltaNet * i, distN, deltaDriftTime * i, distD); Console.WriteLine(output); } }
private void SaveMatches(string path, IEnumerable <SpectralAnchorPointMatch> matches) { using (var writer = File.CreateText(path)) { writer.WriteLine( "NET-apx\tNET-apy\tNETAligned-apy\tmz-apx\tmzAligned-apx\tmz-apy\tmzAligned-apy\tScan-x\tScan-y\tpmz-x\tpmz-y\tpmonomass-x\tpmonomass-y\tpNET-x\tpNET-y\tpNETa-x\tpNETa-y\tpmonomass-x\tpmonomassyx\tpmonomass-errorppm\tpmz-errorppm"); foreach (var match in matches) { if (match.AnchorPointX.Spectrum == null) { continue; } if (match.AnchorPointY.Spectrum == null) { continue; } var parentFeatureX = match.AnchorPointX.Spectrum.ParentFeature; var parentFeatureY = match.AnchorPointY.Spectrum.ParentFeature; var data = string.Format( "{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}\t{11}\t{12}\t{13}\t{14}\t{15}\t{16}\t{17}\t{18}\t{19}\t{20}\t", match.AnchorPointX.Net, match.AnchorPointY.Net, match.AnchorPointY.NetAligned, match.AnchorPointX.Mz, match.AnchorPointX.MzAligned, match.AnchorPointY.Mz, match.AnchorPointY.MzAligned, parentFeatureX.Scan, parentFeatureY.Scan, parentFeatureX.Mz, parentFeatureY.Mz, parentFeatureX.MassMonoisotopic, parentFeatureY.MassMonoisotopic, parentFeatureX.GetParentFeature().Net, parentFeatureY.GetParentFeature().Net, parentFeatureX.GetParentFeature().NetAligned, parentFeatureY.GetParentFeature().NetAligned, parentFeatureX.GetParentFeature().MassMonoisotopicAligned, parentFeatureY.GetParentFeature().MassMonoisotopicAligned, FeatureLight.ComputeMassPPMDifference(parentFeatureX.Mz, parentFeatureY.Mz), FeatureLight.ComputeMassPPMDifference(parentFeatureX.GetParentFeature().MassMonoisotopicAligned, parentFeatureY.GetParentFeature().MassMonoisotopicAligned) ); writer.WriteLine(data); } } }
public void TestDistances() { var dist = new WeightedEuclideanDistance <UMCClusterLight>(); var clusterA = CreateCluster(500, .2, 27); var clusterB = CreateCluster(500, .2, 27); var N = 50; var stepMass = .5; var stepNET = .001; var stepDrift = .01; Console.WriteLine("Walk in drift time"); for (var i = 0; i < N; i++) { clusterB.DriftTime += stepDrift; var distance = dist.EuclideanDistance(clusterA, clusterB); Console.WriteLine("{0}, {1}, {3}, {2}", clusterB.DriftTime, clusterB.DriftTime, distance, clusterB.DriftTime - clusterA.DriftTime); } Console.WriteLine(); Console.WriteLine("Walk in net "); clusterB.DriftTime = 27; for (var i = 0; i < N; i++) { clusterB.Net += stepNET; var distance = dist.EuclideanDistance(clusterA, clusterB); Console.WriteLine("{0}, {1}, {3}, {2}", clusterB.Net, clusterB.Net, distance, clusterB.Net - clusterA.Net); } Console.WriteLine(); Console.WriteLine("Walk in mass "); clusterB.Net = .2; for (var i = 0; i < N; i++) { var d = FeatureLight.ComputeDaDifferenceFromPPM(clusterA.MassMonoisotopic, stepMass * i); clusterB.MassMonoisotopic = d; var distance = dist.EuclideanDistance(clusterA, clusterB); Console.WriteLine("{0}, {1}, {3}, {2}", clusterB.MassMonoisotopic, clusterB.MassMonoisotopic, distance, FeatureLight.ComputeMassPPMDifference(clusterA.MassMonoisotopic, clusterB.MassMonoisotopic)); } }
/// <summary> /// Get the MS/MS identifications for the given feature. /// </summary> /// <param name="feature">The feature to get MS/MS identifications for.</param> /// <returns>The list of identifications.</returns> private List <Peptide> GetIdentifications(FeatureLight feature) { var peptides = new List <Peptide>(); var provider = this.identificationProviderCache.GetProvider(feature.GroupId); var ids = provider.GetAllIdentifications(); foreach (var msnSpectrum in feature.MSnSpectra) { if (ids.ContainsKey(msnSpectrum.Scan)) { peptides.AddRange(ids[msnSpectrum.Scan]); } } return(peptides); }
/// <summary> /// Creates an XIC from the m/z values provided. /// </summary> /// <param name="mz"></param> /// <param name="massError"></param> /// <param name="minScan"></param> /// <param name="maxScan"></param> /// <param name="provider"></param> /// <returns></returns> public IEnumerable <MSFeatureLight> CreateXic(double mz, double massError, int minScan, int maxScan, ISpectraProvider provider) { var newFeatures = new List <MSFeatureLight>(); var lower = FeatureLight.ComputeDaDifferenceFromPPM(mz, massError); var higher = FeatureLight.ComputeDaDifferenceFromPPM(mz, -massError); for (var i = minScan; i < maxScan; i++) { List <XYData> spectrum = null; try { var summary = new ScanSummary(); spectrum = provider.GetRawSpectra(i, 0, 1, out summary); } catch { } if (spectrum == null) { continue; } var data = (from x in spectrum where x.X > lower && x.X < higher select x).ToList(); var summedIntensity = data.Sum(x => x.Y); var newFeature = new MSFeatureLight { Scan = i, Net = i, Abundance = Convert.ToInt64(summedIntensity) }; newFeatures.Add(newFeature); } return(newFeatures); }
private List <TChildFeature> FilterMsFeatures(List <TChildFeature> rawMsFeatures) { // sort by scan... var allFeatures = rawMsFeatures.OrderBy(x => x.Scan).ToList(); var newFeatures = new List <TChildFeature>(); var features = new List <TChildFeature>(); var totalFeatures = rawMsFeatures.Count; var currentScan = 0; for (var i = 0; i < totalFeatures; i++) { var feature = allFeatures[i]; // Process the scans... if (currentScan != feature.Scan) { var mzFeatures = features.OrderBy(x => x.Mz).ToList(); var mzMap = new Dictionary <double, List <TChildFeature> >(); for (var j = 1; j < mzFeatures.Count; j++) { var featureJ = mzFeatures[j]; var featurePrev = mzFeatures[j - 1]; // find the mass difference, here we are looking to see if there are unique // m/z features or not, if not, then we need to process them. var ppm = FeatureLight.ComputeMassPPMDifference(featureJ.Mz, featureJ.Mz); if (Math.Abs(ppm) > 1) { if (!mzMap.ContainsKey(featureJ.Mz)) { mzMap.Add(featureJ.Mz, new List <TChildFeature>()); } mzMap[featureJ.Mz].Add(featureJ); mzMap[featureJ.Mz].Add(featurePrev); } } features.Clear(); } else { features.Add(feature); } } return(newFeatures); }
/// <summary> /// Score two features against each other by comparing their identifications. /// Each matching identification: +1 /// Each non-matching identification: -1 /// </summary> /// <param name="feature1">The first feature.</param> /// <param name="feature2">The second feature.</param> /// <returns>The score of the two features.</returns> public double ScoreComparison(FeatureLight feature1, FeatureLight feature2) { var leftProteins = this.GetIdentifications(feature1); var rightProteins = this.GetIdentifications(feature2); var intersect = leftProteins.Intersect(rightProteins).ToList(); var leftOnly = leftProteins.Except(intersect); var rightOnly = rightProteins.Except(intersect); double score = 0.0; score += intersect.Count; score -= leftOnly.Count(); score -= rightOnly.Count(); return(score); }
/// <summary> /// Compares a feature to the list of feature /// </summary> public int CompareMonoisotopic(FeatureLight featureX, FeatureLight featureY) { // If they are in mass range... var mzDiff = FeatureLight.ComputeMassPPMDifference(featureX.MassMonoisotopic, featureY.MassMonoisotopic); if (Math.Abs(mzDiff) < Tolerances.Mass && featureX.ChargeState == featureY.ChargeState) { // otherwise make sure that our scan value is within range var scanDiff = featureX.Net - featureY.Net; return(Math.Abs(scanDiff) <= Tolerances.Net ? 0 : 1); } if (mzDiff < 0) { return(-1); } return(1); }
public static PlotBase CreateMassMzResidualAlignmentPlot <T>(IEnumerable <T> x, IEnumerable <T> y) where T : FeatureLight { Func <T, double> mz = t => t.Mz; Func <T, T, double> massPre = (t, u) => FeatureLight.ComputeMassPPMDifference(t.MassMonoisotopic, u.MassMonoisotopic); Func <T, T, double> massPost = (t, u) => FeatureLight.ComputeMassPPMDifference(t.MassMonoisotopic, u.MassMonoisotopicAligned); return(CreateResidualAlignmentPlot(x, y, mz, massPre, massPost, "mz", "Mass Residual (ppm)")); }
public MsToLcmsFeatures(IScanSummaryProvider provider, LcmsFeatureFindingOptions options = null) { if (provider == null) { throw new ArgumentNullException(); } Comparison <MSFeatureLight> mzSort = (x, y) => x.Mz.CompareTo(y.Mz); Comparison <UMCLight> monoSort = (x, y) => x.MassMonoisotopic.CompareTo(y.MassMonoisotopic); Func <MSFeatureLight, MSFeatureLight, double> mzDiff = (x, y) => FeatureLight.ComputeMassPPMDifference(x.Mz, y.Mz); Func <UMCLight, UMCLight, double> monoDiff = (x, y) => FeatureLight.ComputeMassPPMDifference(x.MassMonoisotopic, y.MassMonoisotopic); this.provider = provider; this.options = options ?? new LcmsFeatureFindingOptions(); // Set clusterers if (this.options.FirstPassClusterer == MsFeatureClusteringAlgorithmType.BinarySearchTree) { this.firstPassClusterer = new MsFeatureTreeClusterer <MSFeatureLight, UMCLight>( mzSort, mzDiff, MassComparison.Mz, this.options.InstrumentTolerances.Mass); } else { this.firstPassClusterer = ClusterFactory.Create(this.options.FirstPassClusterer); } if (this.options.SecondPassClusterer == GenericClusteringAlgorithmType.BinarySearchTree) { this.secondPassClusterer = new MsFeatureTreeClusterer <UMCLight, UMCLight>( monoSort, monoDiff, MassComparison.Monoisotopic, this.options.InstrumentTolerances.Mass); } else { var clusterFactory = new GenericClusterFactory <UMCLight, UMCLight>(); this.secondPassClusterer = clusterFactory.Create(this.options.SecondPassClusterer); } }
public static PlotBase CreateMassScanResidualAlignmentPlot <T>(IEnumerable <T> x, IEnumerable <T> y) where T : FeatureLight { Func <T, double> scan = t => t.Scan; Func <T, T, double> massPre = (t, u) => FeatureLight.ComputeMassPPMDifference(t.MassMonoisotopic, u.MassMonoisotopic); Func <T, T, double> massPost = (t, u) => FeatureLight.ComputeMassPPMDifference(t.MassMonoisotopic, u.MassMonoisotopicAligned); var plot = CreateResidualAlignmentPlot(x, y, scan, massPre, massPost, "scan", "Mass Residual (ppm)"); return(plot); }
/// <summary> /// Determines if two clusters are within mass, NET, and drift time tolerances /// </summary> /// <param name="clusterX">One of the two clusters to test</param> /// <param name="clusterY">One of the two clusters to test</param> /// <returns>True if clusters are within tolerance, false otherwise</returns> protected override bool AreClustersWithinTolerance(U clusterX, U clusterY) { // Grab the tolerances var massTolerance = Parameters.Tolerances.Mass; var netTolerance = Parameters.Tolerances.Net; var driftTolerance = Parameters.Tolerances.DriftTime; // Calculate differences var massDiff = Math.Abs(FeatureLight.ComputeMassPPMDifference(clusterX.MassMonoisotopicAligned, clusterY.MassMonoisotopicAligned)); var netDiff = Math.Abs(clusterX.Net - clusterY.Net); var driftDiff = Math.Abs(clusterX.DriftTime - clusterY.DriftTime); // Return true only if all differences are within tolerance if (massDiff <= massTolerance && netDiff <= netTolerance && driftDiff <= driftTolerance) { return(true); } return(false); }
protected virtual bool AreClustersWithinTolerance(UMCLight clusterX, UMCLight clusterY, double massTolerance, double netTolerance, double driftTolerance) { // Calculate differences var massDiff = Math.Abs(FeatureLight.ComputeMassPPMDifference(clusterX.MassMonoisotopicAligned, clusterY.MassMonoisotopicAligned)); var netDiff = Math.Abs(clusterX.Net - clusterY.Net); var driftDiff = Math.Abs(clusterX.DriftTime - clusterY.DriftTime); // Return true only if all differences are within tolerance if (massDiff <= massTolerance && netDiff <= netTolerance && driftDiff <= driftTolerance) { return(true); } return(false); }
private void SaveMatches(string path, IEnumerable <SpectralAnchorPointMatch> matches) { using (var writer = File.CreateText(path)) { writer.WriteLine("[Header]"); writer.WriteLine("p mz = parentMz - A and B denote dataset A and dataset B"); writer.WriteLine("[Data]"); writer.WriteLine("Net-A\tpMz-A\tScan-A\tNet-B\tpMz-B\tScan-B\tMassErrorPpm\tSimScore"); foreach (var match in matches) { if (match.AnchorPointX.Spectrum == null) { continue; } if (match.AnchorPointY.Spectrum == null) { continue; } var parentFeatureX = match.AnchorPointX.Spectrum.ParentFeature; var parentFeatureY = match.AnchorPointY.Spectrum.ParentFeature; var data = string.Format("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}", parentFeatureX.GetParentFeature().Net, parentFeatureX.GetParentFeature().Mz, parentFeatureX.GetParentFeature().Scan, parentFeatureY.GetParentFeature().Net, parentFeatureY.GetParentFeature().Mz, parentFeatureY.GetParentFeature().Scan, FeatureLight.ComputeMassPPMDifference(parentFeatureX.Mz, parentFeatureY.Mz), match.SimilarityScore); writer.WriteLine(data); } } }
public double ScoreComparison(FeatureLight feature1, FeatureLight feature2) { double score = 0.0; var leftSpectraProvider = this.spectraProvider.GetScanSummaryProvider(feature1.GroupId) as ISpectraProvider; var rightSpectraProvider = this.spectraProvider.GetScanSummaryProvider(feature2.GroupId) as ISpectraProvider; if (leftSpectraProvider == null) { throw new DatasetInformation.MissingRawDataException("Do not have spectra data available for dataset.", feature1.GroupId); } if (rightSpectraProvider == null) { throw new DatasetInformation.MissingRawDataException("Do not have spectra data available for dataset.", feature2.GroupId); } var leftSpectra = leftSpectraProvider.GetMSMSSpectra(feature1.Scan, feature1.Mz, true); var rightSpectra = rightSpectraProvider.GetMSMSSpectra(feature2.Scan, feature2.Mz, true); if ((leftSpectra.Count == 0 || rightSpectra.Count == 0) && leftSpectra.Count != rightSpectra.Count) { // One has MS/MS but the other doesn't score = -1; } for (int i = 0; i < leftSpectra.Count; i++) { var leftSpectrum = leftSpectra[i]; for (int j = 0; j < rightSpectra.Count; j++) { var rightSpectrum = rightSpectra[i]; var specScore = this.comparer.CompareSpectra(leftSpectrum, rightSpectrum); score += this.IsScoreWithinTolerance(specScore) ? 1 : -1; } } return(score); }
/// <summary> /// Compares a feature to the list of feature /// </summary> public int CompareMz(FeatureLight featureX, FeatureLight featureY) { // If they are in mass range... var mzDiff = FeatureLight.ComputeMassPPMDifference(featureX.Mz, featureY.Mz); if (Math.Abs(mzDiff) < Tolerances.Mass) { // otherwise make sure that our scan value is within range var scanDiff = featureX.Scan - featureY.Scan; if (Math.Abs(scanDiff) > ScanTolerance) { return(1); } return(featureX.ChargeState != featureY.ChargeState ? 1 : 0); } if (mzDiff < 0) { return(-1); } return(1); }
private static void WriteErrors(string errorPath, IEnumerable <SpectralAnchorPointMatch> matches) { using (var writer = File.CreateText(errorPath)) { writer.WriteLine( "NET\tMass\tNET\tMass\tNETA\tMassA\tNETA\tMassA\tNetError\tMassError\tScore"); foreach (var match in matches) { var massError = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz, match.AnchorPointY.Mz); var netError = match.AnchorPointX.Net - match.AnchorPointY.NetAligned; writer.WriteLine("{0:F5}\t{1:F5}\t{2:F5}\t{3:F5}\t{4:F5}\t{5:F5}\t{6:F5}\t{7:F5}\t{8:F5}\t", match.AnchorPointX.Net, match.AnchorPointX.Mz, match.AnchorPointY.Net, match.AnchorPointY.Mz, match.AnchorPointY.NetAligned, match.AnchorPointY.MzAligned, netError, massError, match.SimilarityScore); } } }
/// <summary> /// Clusters a set of data /// </summary> /// <param name="data"></param> /// <param name="clusters"></param> /// <returns></returns> public virtual List <U> Cluster(List <T> data, List <U> clusters) { /* * This clustering algorithm first sorts the list of input UMC's by mass. It then iterates * through this list partitioning the data into blocks of UMC's based on a mass tolerance. * When it finds gaps larger or equal to the mass (ppm) tolerance specified by the user, * it will process the data before the gap (a block) until the current index of the features in question. */ // Make sure we have data to cluster first. if (data == null) { throw new NullReferenceException("The input feature data list was null. Cannot process this data."); } // Make sure there is no null UMC data in the input list. var nullIndex = data.FindIndex(delegate(T x) { return(x == null); }); if (nullIndex > 0) { throw new NullReferenceException("The feature at index " + nullIndex + " was null. Cannot process this data."); } OnNotify("Sorting cluster mass list"); // The first thing we do is to sort the features based on mass since we know that has the least variability in the data across runs. data.Sort(m_massComparer); // Now partition the data based on mass ranges and the parameter values. var massTolerance = Parameters.Tolerances.Mass; // This is the index of first feature of a given mass partition. var startUMCIndex = 0; var totalFeatures = data.Count; OnNotify("Detecting mass partitions"); var tenPercent = Convert.ToInt32(totalFeatures * .1); var counter = 0; var percent = 0; for (var i = 0; i < totalFeatures - 1; i++) { if (counter > tenPercent) { counter = 0; percent += 10; OnNotify(string.Format("Clustering Completed...{0}%", percent)); } counter++; // Here we compute the ppm mass difference between consecutive features (based on mass). // This will determine if we cluster a block of data or not. var umcX = data[i]; var umcY = data[i + 1]; var ppm = Math.Abs(FeatureLight.ComputeMassPPMDifference(umcX.MassMonoisotopicAligned, umcY.MassMonoisotopicAligned)); // If the difference is greater than the tolerance then we cluster // - we dont check the sign of the ppm because the data should be sorted based on mass. if (ppm > massTolerance) { // If start UMC Index is equal to one, then that means the feature at startUMCIndex // could not find any other features near it within the mass tolerance specified. if (startUMCIndex == i) { var cluster = new U(); cluster.AmbiguityScore = m_maxDistance; umcX.SetParentFeature(cluster); cluster.AddChildFeature(umcX); clusters.Add(cluster); } else { // Otherwise we have more than one feature to to consider. var distances = CalculatePairWiseDistances(startUMCIndex, i, data); var localClusters = CreateSingletonClusters(data, startUMCIndex, i); var blockClusters = LinkFeatures(distances, localClusters); CalculateAmbiguityScore(blockClusters); clusters.AddRange(blockClusters); } startUMCIndex = i + 1; } } // Make sure that we cluster what is left over. if (startUMCIndex < totalFeatures) { OnNotify(string.Format("Clustering last partition...{0}%", percent)); var distances = CalculatePairWiseDistances(startUMCIndex, totalFeatures - 1, data); var localClusters = CreateSingletonClusters(data, startUMCIndex, totalFeatures - 1); var blockClusters = LinkFeatures(distances, localClusters); CalculateAmbiguityScore(blockClusters); if (localClusters.Count < 2) { clusters.AddRange(localClusters.Values); } else { clusters.AddRange(blockClusters); } } OnNotify("Generating cluster statistics"); foreach (var cluster in clusters) { cluster.CalculateStatistics(Parameters.CentroidRepresentation); } return(clusters); }
public List<U> ProcessClusters(List<U> clusters) { var newClusters = new List<U>(); //Sort the clusters // Look for merged clusters that need to be split... foreach (var cluster in clusters) { var medianNet = cluster.Net; var medianMass = cluster.MassMonoisotopic; var medianDrift = cluster.DriftTime; var massDistributions = new Dictionary<T, double>(); var netDistributions = new Dictionary<T, double>(); var driftDistributions = new Dictionary<T, double>(); var massDistances = new List<double>(); var netDistances = new List<double>(); var driftDistances = new List<double>(); // Build distributions foreach (var feature in cluster.Features) { var mass = FeatureLight.ComputeMassPPMDifference(feature.MassMonoisotopicAligned, medianMass); var net = feature.Net - medianNet; var drift = feature.DriftTime - medianDrift; massDistributions.Add(feature, mass); netDistributions.Add(feature, drift); driftDistributions.Add(feature, net); massDistances.Add(mass); driftDistances.Add(drift); netDistances.Add(net); } massDistances.Sort(); netDistances.Sort(); driftDistances.Sort(); // Calculates the sample means for positive and negative sides of the median. var massDistribution = CalculateAllDistributions(massDistances); var netDistribution = CalculateAllDistributions(netDistances); var driftDistribution = CalculateAllDistributions(driftDistances); var massZScore = CalculateZScore(massDistribution.Item1, massDistribution.Item2); var netZScore = CalculateZScore(netDistribution.Item1, netDistribution.Item2); var driftZScore = CalculateZScore(driftDistribution.Item1, driftDistribution.Item2); // Now that we have data we can test the distributions to see if they are similar or not... Console.WriteLine(" Neg to Pos "); Console.WriteLine("Mass z-score \t{0}", massZScore); Console.WriteLine("Net z-score \t{0}", netZScore); Console.WriteLine("Drift z-score\t{0}", driftZScore); Console.WriteLine(); massZScore = CalculateZScore(massDistribution.Item1, massDistribution.Item3); netZScore = CalculateZScore(netDistribution.Item1, netDistribution.Item3); driftZScore = CalculateZScore(driftDistribution.Item1, driftDistribution.Item3); Console.WriteLine(" Negative "); Console.WriteLine("Mass z-score \t{0}", massZScore); Console.WriteLine("Net z-score \t{0}", netZScore); Console.WriteLine("Drift z-score\t{0}", driftZScore); Console.WriteLine(); Console.WriteLine(" Positive "); massZScore = CalculateZScore(massDistribution.Item2, massDistribution.Item3); netZScore = CalculateZScore(netDistribution.Item2, netDistribution.Item3); driftZScore = CalculateZScore(driftDistribution.Item2, driftDistribution.Item3); Console.WriteLine("Mass z-score \t{0}", massZScore); Console.WriteLine("Net z-score \t{0}", netZScore); Console.WriteLine("Drift z-score\t{0}", driftZScore); //Console.WriteLine(); //Console.WriteLine("Mass Difference"); //DisplayDistance(massDistances); //Console.WriteLine(); //Console.WriteLine("NET Difference"); //DisplayDistance(netDistances); //Console.WriteLine(); //Console.WriteLine("Drift Time Difference"); //DisplayDistance(driftDistances); } return newClusters; }
/// <summary> /// Clusters features based on their pairwise distances by finding the minimal spanning tree (MST) via Prim's algorithm. /// </summary> /// <param name="distances">Pairwise distances between all features in question.</param> /// <param name="clusters">Singleton clusters from each feature.</param> /// <returns>List of features clustered together.</returns> public override List <U> LinkFeatures(List <Data.PairwiseDistance <T> > potentialDistances, Dictionary <int, U> clusters) { var newClusters = new List <U>(); var distances = new List <Data.PairwiseDistance <T> >(); // There is an edge case with this setup that a singleton outside of the range // of other features made it into the batch of edges, but there is no corresponding edge // to the rest of the graph(s). So here we hash all features // then we ask for within the range, pare down that hash to a set of features that // have no corresponding edge. These guys would ultimately be singletons we want // to capture... var clusterMap = new HashSet <T>(); foreach (var cluster in clusters.Values) { foreach (var feature in cluster.Features) { if (!clusterMap.Contains(feature)) { clusterMap.Add(feature); } } } foreach (var distance in potentialDistances) { if (AreClustersWithinTolerance(distance.FeatureX, distance.FeatureY)) { //distances.Add(distance); if (clusterMap.Contains(distance.FeatureX)) { clusterMap.Remove(distance.FeatureX); } if (clusterMap.Contains(distance.FeatureY)) { clusterMap.Remove(distance.FeatureY); } } } // Once we have removed any cluster foreach (var feature in clusterMap) { var cluster = new U(); feature.SetParentFeature(cluster); cluster.AddChildFeature(feature); newClusters.Add(cluster); } var newDistances = (from element in potentialDistances orderby element.Distance select element).ToList(); var queue = new Queue <Edge <T> >(); var graph = new FeatureGraph <T>(); // Sort out the distances so we dont have to recalculate distances. var id = 0; var edges = new List <Edge <T> >(); newDistances.ForEach(x => edges.Add(new Edge <T>(id++, x.Distance, x.FeatureX, x.FeatureY))); graph.CreateGraph(edges); edges.ForEach(x => queue.Enqueue(x)); // This makes sure we have var seenEdge = new HashSet <int>(); // Now we start at the MST building if (DumpLinearRelationship) { Console.WriteLine("GraphEdgeLength"); } while (queue.Count > 0) { var startEdge = queue.Dequeue(); // If we have already seen the edge, ignore it... if (seenEdge.Contains(startEdge.ID)) { continue; } var mstGroup = ConstructSubTree(graph, seenEdge, startEdge); var clusterTree = new MstLrTree <Edge <T> >(); // Get the mst value . double sum = 0; double mean = 0; foreach (var dist in mstGroup.LinearRelationship) { seenEdge.Add(dist.ID); sum += dist.Length; clusterTree.Insert(dist); var ppmDist = FeatureLight.ComputeMassPPMDifference(dist.VertexB.MassMonoisotopicAligned, dist.VertexA.MassMonoisotopicAligned); if (DumpLinearRelationship) { Console.WriteLine("{0}", dist.Length); /*,,{1},{2},{3},{4},{5},{6},{7},{8}", dist.Length, * dist.VertexA.NetAligned, * dist.VertexA.MassMonoisotopicAligned, * dist.VertexA.DriftTime, * dist.VertexB.NetAligned, * dist.VertexB.MassMonoisotopicAligned, * dist.VertexB.DriftTime, * ppmDist, * Math.Abs(dist.VertexA.NetAligned - dist.VertexB.NetAligned)); */ } } var N = Convert.ToDouble(mstGroup.LinearRelationship.Count); // Calculate the standard deviation. mean = sum / N; sum = 0; foreach (var dist in mstGroup.LinearRelationship) { var diff = dist.Length - mean; sum += (diff * diff); } var stdev = Math.Sqrt(sum / N); var cutoff = NSigma; // *stdev; // stdev* NSigma; var mstClusters = CreateClusters(mstGroup, cutoff); newClusters.AddRange(mstClusters); } return(newClusters); }