public void CalculateStatisticsTestSingleUmc(double umcMass, double umcNet, float umcDriftTime, int umcCharge, int umcAbundance, ClusterCentroidRepresentation representation) { var cluster = new UMCClusterLight { UmcList = new List <UMCLight>() }; var umc = new UMCLight { MassMonoisotopicAligned = umcMass, Net = umcNet, DriftTime = umcDriftTime, ChargeState = umcCharge, Abundance = umcAbundance }; cluster.UmcList.Add(umc); cluster.CalculateStatistics(representation); Assert.AreEqual(umc.MassMonoisotopicAligned, cluster.MassMonoisotopic, "Monoisotopic Mass"); Assert.AreEqual(umc.Net, cluster.Net, "NET"); Assert.AreEqual(umc.DriftTime, cluster.DriftTime, "Drift Time"); Assert.AreEqual(umc.ChargeState, cluster.ChargeState, "Charge State"); }
public void CalculateStatisticsMultipleNet(ClusterCentroidRepresentation representation) { var cluster = new UMCClusterLight(); cluster.UmcList = new List <UMCLight>(); var umc = new UMCLight(); umc.MassMonoisotopicAligned = 100; umc.Net = 100; umc.DriftTime = 100; umc.ChargeState = 2; umc.Abundance = 100; cluster.UmcList.Add(umc); var umc2 = new UMCLight(); umc2.MassMonoisotopicAligned = 100; umc2.Net = 200; umc2.DriftTime = 100; umc2.ChargeState = 2; umc2.Abundance = 100; cluster.UmcList.Add(umc2); cluster.CalculateStatistics(representation); Assert.AreEqual(150, cluster.Net); }
/// <summary> /// Finds LCMS Features using the PNNL Omics linkage clustering algorithms. /// </summary> public List <UMCLight> FindFeatures(List <MSFeatureLight> rawMsFeatures, LCMSFeatureFindingOptions options, ISpectraProvider provider) { const ClusterCentroidRepresentation centroidType = ClusterCentroidRepresentation.Mean; List <UMCLight> features = null; m_options = options; m_minScan = int.MaxValue; m_maxScan = int.MinValue; foreach (var feature in rawMsFeatures) { m_minScan = Math.Min(feature.Scan, m_minScan); m_maxScan = Math.Max(feature.Scan, m_maxScan); } var finder = new MSFeatureSingleLinkageClustering <MSFeatureLight, UMCLight> { Parameters = { DistanceFunction = WeightedNETDistanceFunction, RangeFunction = WithinRange, Tolerances = { Mass = options.ConstraintMonoMass, RetentionTime = 100, DriftTime = 100 } } }; finder.Parameters.CentroidRepresentation = centroidType; m_maxDistance = options.MaxDistance; features = finder.Cluster(rawMsFeatures); // Remove the short UMC's. features.RemoveAll(x => (x.ScanEnd - x.ScanStart + 1) < options.MinUMCLength); var id = 0; foreach (var feature in features) { feature.NET = Convert.ToDouble(feature.Scan - m_minScan) / Convert.ToDouble(m_maxScan - m_minScan); feature.RetentionTime = feature.NET; feature.ID = id++; } return(features); }
public void CalculateStatisticsTestMultipleUmCs(double umcMass, double umcNet, float umcDrifTime, int umcCharge, int umcAbundance, int multiplier, int numUmCs, ClusterCentroidRepresentation representation) { var cluster = new UMCClusterLight { UmcList = new List <UMCLight>() }; var k = numUmCs / 2; double medianMass = 0; double medianNet = 0; double medianDriftTime = 0; for (var i = 0; i < numUmCs; i++) { var umc = new UMCLight { MassMonoisotopicAligned = umcMass + multiplier * i, Net = umcNet + multiplier * i, DriftTime = umcDrifTime + multiplier * i, ChargeState = umcCharge, Abundance = umcAbundance + multiplier * i }; cluster.UmcList.Add(umc); if (representation == ClusterCentroidRepresentation.Mean) { medianMass += umc.MassMonoisotopicAligned; medianNet += umc.Net; medianDriftTime += umc.DriftTime; } // Odd else if (k == i && (numUmCs % 2 == 1)) { medianMass = umc.MassMonoisotopicAligned; medianNet = umc.Net; medianDriftTime = umc.DriftTime; } // Even else if ((numUmCs % 2) == 0) { // When we have an even number of features // We want to calculate the median as the average between // the two median features (k, k + 1), where k is numUMCs / 2 // Remeber that we use k - 1 because i is zero indexed if (k - 1 == i) { medianMass = umc.MassMonoisotopicAligned; medianNet = umc.Net; medianDriftTime = umc.DriftTime; } else if (k == i) { medianMass += umc.MassMonoisotopicAligned; medianNet += umc.Net; medianDriftTime += umc.DriftTime; medianMass /= 2; medianNet /= 2; medianDriftTime /= 2; } } } // We make sure that we calculate the mean correctly here. if (representation == ClusterCentroidRepresentation.Mean) { medianMass /= numUmCs; medianNet /= numUmCs; medianDriftTime /= numUmCs; } cluster.CalculateStatistics(representation); Assert.AreEqual(medianMass, cluster.MassMonoisotopic, "Monoisotopic Mass"); Assert.AreEqual(medianNet, cluster.Net, "NET"); Assert.AreEqual(medianDriftTime, cluster.DriftTime, "Drift Time"); Assert.AreEqual(umcCharge, cluster.ChargeState, "Charge State"); }
/// <summary> /// Calculates the centroid and other statistics about the cluster. /// </summary> /// <param name="centroid"></param> public void CalculateStatistics(ClusterCentroidRepresentation centroid) { if (UmcList == null) { throw new NullReferenceException("The UMC list was not set to an object reference."); } if (UmcList.Count < 1) { throw new Exception("No data to compute statistics over."); } // Lists for holding onto masses etc. var net = new List <double>(); var mass = new List <double>(); var driftTime = new List <double>(); // Histogram of representative charge states var chargeStates = new Dictionary <int, int>(); double sumNet = 0; double sumMass = 0; double sumDrifttime = 0; var datasetMembers = new Dictionary <int, int>(); MemberCount = UmcList.Count; foreach (var umc in UmcList) { if (umc == null) { throw new NullReferenceException("A UMC was null when trying to calculate cluster statistics."); } if (!datasetMembers.ContainsKey(umc.GroupId)) { datasetMembers.Add(umc.GroupId, 0); } datasetMembers[umc.GroupId]++; net.Add(umc.Net); mass.Add(umc.MassMonoisotopicAligned); driftTime.Add(umc.DriftTime); sumNet += umc.Net; sumMass += umc.MassMonoisotopicAligned; sumDrifttime += umc.DriftTime; // Calculate charge states. if (!chargeStates.ContainsKey(umc.ChargeState)) { chargeStates.Add(umc.ChargeState, 1); } else { chargeStates[umc.ChargeState]++; } } DatasetMemberCount = datasetMembers.Keys.Count; var numUmCs = UmcList.Count; // Calculate the centroid of the cluster. switch (centroid) { case ClusterCentroidRepresentation.Mean: MassMonoisotopic = (sumMass / numUmCs); Net = (sumNet / numUmCs); DriftTime = Convert.ToSingle(sumDrifttime / numUmCs); break; case ClusterCentroidRepresentation.Median: net.Sort(); mass.Sort(); driftTime.Sort(); // If the median index is odd. Then take the average. int median; if ((numUmCs % 2) == 0) { median = Convert.ToInt32(numUmCs / 2); MassMonoisotopic = (mass[median] + mass[median - 1]) / 2; Net = (net[median] + net[median - 1]) / 2; DriftTime = Convert.ToSingle((driftTime[median] + driftTime[median - 1]) / 2); } else { median = Convert.ToInt32((numUmCs) / 2); MassMonoisotopic = mass[median]; Net = net[median]; DriftTime = Convert.ToSingle(driftTime[median]); } break; } var distances = new List <double>(); double distanceSum = 0; double massDeviationSum = 0; double netDeviationSum = 0; foreach (var umc in UmcList) { var netValue = Net - umc.Net; var massValue = MassMonoisotopic - umc.MassMonoisotopicAligned; var driftValue = DriftTime - Convert.ToSingle(umc.DriftTime); massDeviationSum += (massValue * massValue); netDeviationSum += (netValue * netValue); var distance = Math.Sqrt((netValue * netValue) + (massValue * massValue) + (driftValue * driftValue)); distances.Add(distance); distanceSum += distance; } NetStandardDeviation = Math.Sqrt(netDeviationSum / Convert.ToDouble(UmcList.Count)); MassStandardDeviation = Math.Sqrt(massDeviationSum / Convert.ToDouble(UmcList.Count)); if (centroid == ClusterCentroidRepresentation.Mean) { Tightness = Convert.ToSingle(distanceSum / UmcList.Count); } else { var mid = distances.Count / 2; distances.Sort(); Tightness = Convert.ToSingle(distances[mid]); } // Calculate representative charge state as the mode. var maxCharge = int.MinValue; foreach (var charge in chargeStates.Keys) { if (maxCharge == int.MinValue || chargeStates[charge] > chargeStates[maxCharge]) { maxCharge = charge; } } ChargeState = maxCharge; }
/// <summary> /// Calculates the centroid and other statistics about the cluster. /// </summary> /// <param name="centroid"></param> public void CalculateStatistics(ClusterCentroidRepresentation centroid = ClusterCentroidRepresentation.Apex) { if (MsFeatures == null) { throw new NullReferenceException("The UMC list was not set to an object reference."); } if (MsFeatures.Count < 1) { throw new Exception("No data in feature to compute statistics over."); } // Lists for holding onto masses etc. var net = new List <double>(); var mass = new List <double>(); var driftTime = new List <double>(); double sumNet = 0; double sumMass = 0; double sumDrifttime = 0; double sumAbundance = 0; var minScan = int.MaxValue; var maxScan = int.MinValue; var minNet = double.PositiveInfinity; var maxNet = 0.0; double maxAbundance = int.MinValue; double representativeMz = 0; foreach (var feature in MsFeatures) { if (feature == null) { throw new NullReferenceException("A MS feature was null when trying to calculate cluster statistics."); } if (feature.Abundance > maxAbundance) { maxAbundance = feature.Abundance; Scan = feature.Scan; ChargeState = feature.ChargeState; representativeMz = feature.Mz; } this.MinCharge = Math.Min(this.MinCharge, feature.ChargeState); this.MaxCharge = Math.Max(this.MaxCharge, feature.ChargeState); net.Add(feature.Net); mass.Add(feature.MassMonoisotopic); driftTime.Add(feature.DriftTime); sumAbundance += feature.Abundance; sumNet += feature.Net; sumMass += feature.MassMonoisotopicAligned; sumDrifttime += feature.DriftTime; minScan = Math.Min(feature.Scan, minScan); maxScan = Math.Max(feature.Scan, maxScan); minNet = Math.Min(feature.Net, minNet); maxNet = Math.Max(feature.Net, maxNet); } Abundance = maxAbundance; AbundanceSum = sumAbundance; ScanEnd = maxScan; ScanStart = minScan; NetStart = minNet; NetEnd = maxNet; var numUmCs = MsFeatures.Count; // Calculate the centroid of the cluster. switch (centroid) { case ClusterCentroidRepresentation.Mean: MassMonoisotopic = (sumMass / numUmCs); Net = (sumNet / numUmCs); DriftTime = Convert.ToSingle(sumDrifttime / numUmCs); break; case ClusterCentroidRepresentation.Median: net.Sort(); mass.Sort(); driftTime.Sort(); // If the median index is odd. Then take the average. int median; if ((numUmCs % 2) == 0) { median = Convert.ToInt32(numUmCs / 2); Net = (net[median] + net[median - 1]) / 2; DriftTime = Convert.ToSingle((driftTime[median] + driftTime[median - 1]) / 2); } else { median = Convert.ToInt32((numUmCs) / 2); Net = net[median]; DriftTime = Convert.ToSingle(driftTime[median]); } break; case ClusterCentroidRepresentation.Apex: double apexNet = 0; double apexAbundance = 0; foreach (var msFeature in this.MsFeatures) { if (msFeature.Abundance >= apexAbundance) { apexNet = msFeature.Net; apexAbundance = msFeature.Abundance; } this.Net = apexNet; } break; } if ((numUmCs % 2) == 1) { MassMonoisotopic = mass[numUmCs / 2]; } else { MassMonoisotopic = .5 * (mass[numUmCs / 2 - 1] + mass[numUmCs / 2]); } var distances = new List <double>(); double distanceSum = 0; foreach (var umc in MsFeatures) { var netValue = Net - umc.Net; var massValue = MassMonoisotopic - umc.MassMonoisotopicAligned; var driftValue = DriftTime - umc.DriftTime; var distance = Math.Sqrt((netValue * netValue) + (massValue * massValue) + (driftValue * driftValue)); distances.Add(distance); distanceSum += distance; } if (centroid == ClusterCentroidRepresentation.Mean) { Score = Convert.ToSingle(distanceSum / MsFeatures.Count); } else { var mid = distances.Count / 2; distances.Sort(); Score = Convert.ToSingle(distances[mid]); } Mz = representativeMz; }