Beispiel #1
0
        public void CalculateStatisticsTestSingleUmc(double umcMass,
                                                     double umcNet,
                                                     float umcDriftTime,
                                                     int umcCharge,
                                                     int umcAbundance,
                                                     ClusterCentroidRepresentation representation)
        {
            var cluster = new UMCClusterLight {
                UmcList = new List <UMCLight>()
            };

            var umc = new UMCLight
            {
                MassMonoisotopicAligned = umcMass,
                Net         = umcNet,
                DriftTime   = umcDriftTime,
                ChargeState = umcCharge,
                Abundance   = umcAbundance
            };

            cluster.UmcList.Add(umc);
            cluster.CalculateStatistics(representation);

            Assert.AreEqual(umc.MassMonoisotopicAligned, cluster.MassMonoisotopic, "Monoisotopic Mass");
            Assert.AreEqual(umc.Net, cluster.Net, "NET");
            Assert.AreEqual(umc.DriftTime, cluster.DriftTime, "Drift Time");
            Assert.AreEqual(umc.ChargeState, cluster.ChargeState, "Charge State");
        }
Beispiel #2
0
        public void CalculateStatisticsMultipleNet(ClusterCentroidRepresentation representation)
        {
            var cluster = new UMCClusterLight();

            cluster.UmcList = new List <UMCLight>();

            var umc = new UMCLight();

            umc.MassMonoisotopicAligned = 100;
            umc.Net         = 100;
            umc.DriftTime   = 100;
            umc.ChargeState = 2;
            umc.Abundance   = 100;
            cluster.UmcList.Add(umc);

            var umc2 = new UMCLight();

            umc2.MassMonoisotopicAligned = 100;
            umc2.Net         = 200;
            umc2.DriftTime   = 100;
            umc2.ChargeState = 2;
            umc2.Abundance   = 100;
            cluster.UmcList.Add(umc2);

            cluster.CalculateStatistics(representation);
            Assert.AreEqual(150, cluster.Net);
        }
Beispiel #3
0
        /// <summary>
        /// Finds LCMS Features using the PNNL Omics linkage clustering algorithms.
        /// </summary>
        public List <UMCLight> FindFeatures(List <MSFeatureLight> rawMsFeatures,
                                            LCMSFeatureFindingOptions options,
                                            ISpectraProvider provider)
        {
            const ClusterCentroidRepresentation centroidType = ClusterCentroidRepresentation.Mean;
            List <UMCLight> features = null;

            m_options = options;

            m_minScan = int.MaxValue;
            m_maxScan = int.MinValue;
            foreach (var feature in rawMsFeatures)
            {
                m_minScan = Math.Min(feature.Scan, m_minScan);
                m_maxScan = Math.Max(feature.Scan, m_maxScan);
            }

            var finder = new MSFeatureSingleLinkageClustering <MSFeatureLight, UMCLight>
            {
                Parameters =
                {
                    DistanceFunction = WeightedNETDistanceFunction,
                    RangeFunction    = WithinRange,
                    Tolerances       = { Mass = options.ConstraintMonoMass, RetentionTime = 100, DriftTime = 100 }
                }
            };

            finder.Parameters.CentroidRepresentation = centroidType;
            m_maxDistance = options.MaxDistance;
            features      = finder.Cluster(rawMsFeatures);

            // Remove the short UMC's.
            features.RemoveAll(x => (x.ScanEnd - x.ScanStart + 1) < options.MinUMCLength);

            var id = 0;

            foreach (var feature in features)
            {
                feature.NET           = Convert.ToDouble(feature.Scan - m_minScan) / Convert.ToDouble(m_maxScan - m_minScan);
                feature.RetentionTime = feature.NET;
                feature.ID            = id++;
            }

            return(features);
        }
Beispiel #4
0
        public void CalculateStatisticsTestMultipleUmCs(double umcMass,
                                                        double umcNet,
                                                        float umcDrifTime,
                                                        int umcCharge,
                                                        int umcAbundance,
                                                        int multiplier,
                                                        int numUmCs,
                                                        ClusterCentroidRepresentation representation)
        {
            var cluster = new UMCClusterLight {
                UmcList = new List <UMCLight>()
            };

            var    k               = numUmCs / 2;
            double medianMass      = 0;
            double medianNet       = 0;
            double medianDriftTime = 0;

            for (var i = 0; i < numUmCs; i++)
            {
                var umc = new UMCLight
                {
                    MassMonoisotopicAligned = umcMass + multiplier * i,
                    Net         = umcNet + multiplier * i,
                    DriftTime   = umcDrifTime + multiplier * i,
                    ChargeState = umcCharge,
                    Abundance   = umcAbundance + multiplier * i
                };
                cluster.UmcList.Add(umc);

                if (representation == ClusterCentroidRepresentation.Mean)
                {
                    medianMass      += umc.MassMonoisotopicAligned;
                    medianNet       += umc.Net;
                    medianDriftTime += umc.DriftTime;
                }
                // Odd
                else if (k == i && (numUmCs % 2 == 1))
                {
                    medianMass      = umc.MassMonoisotopicAligned;
                    medianNet       = umc.Net;
                    medianDriftTime = umc.DriftTime;
                }
                // Even
                else if ((numUmCs % 2) == 0)
                {
                    // When we have an even number of features
                    // We want to calculate the median as the average between
                    // the two median features (k, k + 1), where k is numUMCs / 2
                    // Remeber that we use k - 1 because i is zero indexed
                    if (k - 1 == i)
                    {
                        medianMass      = umc.MassMonoisotopicAligned;
                        medianNet       = umc.Net;
                        medianDriftTime = umc.DriftTime;
                    }
                    else if (k == i)
                    {
                        medianMass      += umc.MassMonoisotopicAligned;
                        medianNet       += umc.Net;
                        medianDriftTime += umc.DriftTime;
                        medianMass      /= 2;
                        medianNet       /= 2;
                        medianDriftTime /= 2;
                    }
                }
            }

            // We make sure that we calculate the mean correctly here.
            if (representation == ClusterCentroidRepresentation.Mean)
            {
                medianMass      /= numUmCs;
                medianNet       /= numUmCs;
                medianDriftTime /= numUmCs;
            }

            cluster.CalculateStatistics(representation);

            Assert.AreEqual(medianMass, cluster.MassMonoisotopic, "Monoisotopic Mass");
            Assert.AreEqual(medianNet, cluster.Net, "NET");
            Assert.AreEqual(medianDriftTime, cluster.DriftTime, "Drift Time");
            Assert.AreEqual(umcCharge, cluster.ChargeState, "Charge State");
        }
Beispiel #5
0
        /// <summary>
        /// Calculates the centroid and other statistics about the cluster.
        /// </summary>
        /// <param name="centroid"></param>
        public void CalculateStatistics(ClusterCentroidRepresentation centroid)
        {
            if (UmcList == null)
            {
                throw new NullReferenceException("The UMC list was not set to an object reference.");
            }

            if (UmcList.Count < 1)
            {
                throw new Exception("No data to compute statistics over.");
            }

            // Lists for holding onto masses etc.
            var net       = new List <double>();
            var mass      = new List <double>();
            var driftTime = new List <double>();

            // Histogram of representative charge states
            var chargeStates = new Dictionary <int, int>();

            double sumNet       = 0;
            double sumMass      = 0;
            double sumDrifttime = 0;

            var datasetMembers = new Dictionary <int, int>();

            MemberCount = UmcList.Count;

            foreach (var umc in UmcList)
            {
                if (umc == null)
                {
                    throw new NullReferenceException("A UMC was null when trying to calculate cluster statistics.");
                }

                if (!datasetMembers.ContainsKey(umc.GroupId))
                {
                    datasetMembers.Add(umc.GroupId, 0);
                }
                datasetMembers[umc.GroupId]++;

                net.Add(umc.Net);
                mass.Add(umc.MassMonoisotopicAligned);
                driftTime.Add(umc.DriftTime);

                sumNet       += umc.Net;
                sumMass      += umc.MassMonoisotopicAligned;
                sumDrifttime += umc.DriftTime;

                // Calculate charge states.
                if (!chargeStates.ContainsKey(umc.ChargeState))
                {
                    chargeStates.Add(umc.ChargeState, 1);
                }
                else
                {
                    chargeStates[umc.ChargeState]++;
                }
            }

            DatasetMemberCount = datasetMembers.Keys.Count;

            var numUmCs = UmcList.Count;

            // Calculate the centroid of the cluster.
            switch (centroid)
            {
            case ClusterCentroidRepresentation.Mean:
                MassMonoisotopic = (sumMass / numUmCs);
                Net       = (sumNet / numUmCs);
                DriftTime = Convert.ToSingle(sumDrifttime / numUmCs);
                break;

            case ClusterCentroidRepresentation.Median:
                net.Sort();
                mass.Sort();
                driftTime.Sort();

                // If the median index is odd.  Then take the average.
                int median;
                if ((numUmCs % 2) == 0)
                {
                    median           = Convert.ToInt32(numUmCs / 2);
                    MassMonoisotopic = (mass[median] + mass[median - 1]) / 2;
                    Net       = (net[median] + net[median - 1]) / 2;
                    DriftTime = Convert.ToSingle((driftTime[median] + driftTime[median - 1]) / 2);
                }
                else
                {
                    median           = Convert.ToInt32((numUmCs) / 2);
                    MassMonoisotopic = mass[median];
                    Net       = net[median];
                    DriftTime = Convert.ToSingle(driftTime[median]);
                }
                break;
            }


            var    distances   = new List <double>();
            double distanceSum = 0;

            double massDeviationSum = 0;
            double netDeviationSum  = 0;

            foreach (var umc in UmcList)
            {
                var netValue   = Net - umc.Net;
                var massValue  = MassMonoisotopic - umc.MassMonoisotopicAligned;
                var driftValue = DriftTime - Convert.ToSingle(umc.DriftTime);

                massDeviationSum += (massValue * massValue);
                netDeviationSum  += (netValue * netValue);

                var distance = Math.Sqrt((netValue * netValue) + (massValue * massValue) + (driftValue * driftValue));
                distances.Add(distance);
                distanceSum += distance;
            }

            NetStandardDeviation  = Math.Sqrt(netDeviationSum / Convert.ToDouble(UmcList.Count));
            MassStandardDeviation = Math.Sqrt(massDeviationSum / Convert.ToDouble(UmcList.Count));

            if (centroid == ClusterCentroidRepresentation.Mean)
            {
                Tightness = Convert.ToSingle(distanceSum / UmcList.Count);
            }
            else
            {
                var mid = distances.Count / 2;

                distances.Sort();
                Tightness = Convert.ToSingle(distances[mid]);
            }
            // Calculate representative charge state as the mode.
            var maxCharge = int.MinValue;

            foreach (var charge in chargeStates.Keys)
            {
                if (maxCharge == int.MinValue || chargeStates[charge] > chargeStates[maxCharge])
                {
                    maxCharge = charge;
                }
            }
            ChargeState = maxCharge;
        }
Beispiel #6
0
        /// <summary>
        /// Calculates the centroid and other statistics about the cluster.
        /// </summary>
        /// <param name="centroid"></param>
        public void CalculateStatistics(ClusterCentroidRepresentation centroid = ClusterCentroidRepresentation.Apex)
        {
            if (MsFeatures == null)
            {
                throw new NullReferenceException("The UMC list was not set to an object reference.");
            }

            if (MsFeatures.Count < 1)
            {
                throw new Exception("No data in feature to compute statistics over.");
            }

            // Lists for holding onto masses etc.
            var net       = new List <double>();
            var mass      = new List <double>();
            var driftTime = new List <double>();

            double sumNet           = 0;
            double sumMass          = 0;
            double sumDrifttime     = 0;
            double sumAbundance     = 0;
            var    minScan          = int.MaxValue;
            var    maxScan          = int.MinValue;
            var    minNet           = double.PositiveInfinity;
            var    maxNet           = 0.0;
            double maxAbundance     = int.MinValue;
            double representativeMz = 0;

            foreach (var feature in MsFeatures)
            {
                if (feature == null)
                {
                    throw new NullReferenceException("A MS feature was null when trying to calculate cluster statistics.");
                }

                if (feature.Abundance > maxAbundance)
                {
                    maxAbundance     = feature.Abundance;
                    Scan             = feature.Scan;
                    ChargeState      = feature.ChargeState;
                    representativeMz = feature.Mz;
                }

                this.MinCharge = Math.Min(this.MinCharge, feature.ChargeState);
                this.MaxCharge = Math.Max(this.MaxCharge, feature.ChargeState);

                net.Add(feature.Net);
                mass.Add(feature.MassMonoisotopic);
                driftTime.Add(feature.DriftTime);

                sumAbundance += feature.Abundance;
                sumNet       += feature.Net;
                sumMass      += feature.MassMonoisotopicAligned;
                sumDrifttime += feature.DriftTime;
                minScan       = Math.Min(feature.Scan, minScan);
                maxScan       = Math.Max(feature.Scan, maxScan);
                minNet        = Math.Min(feature.Net, minNet);
                maxNet        = Math.Max(feature.Net, maxNet);
            }
            Abundance    = maxAbundance;
            AbundanceSum = sumAbundance;
            ScanEnd      = maxScan;
            ScanStart    = minScan;
            NetStart     = minNet;
            NetEnd       = maxNet;
            var numUmCs = MsFeatures.Count;

            // Calculate the centroid of the cluster.
            switch (centroid)
            {
            case ClusterCentroidRepresentation.Mean:
                MassMonoisotopic = (sumMass / numUmCs);
                Net       = (sumNet / numUmCs);
                DriftTime = Convert.ToSingle(sumDrifttime / numUmCs);
                break;

            case ClusterCentroidRepresentation.Median:
                net.Sort();
                mass.Sort();
                driftTime.Sort();

                // If the median index is odd.  Then take the average.
                int median;
                if ((numUmCs % 2) == 0)
                {
                    median    = Convert.ToInt32(numUmCs / 2);
                    Net       = (net[median] + net[median - 1]) / 2;
                    DriftTime = Convert.ToSingle((driftTime[median] + driftTime[median - 1]) / 2);
                }
                else
                {
                    median    = Convert.ToInt32((numUmCs) / 2);
                    Net       = net[median];
                    DriftTime = Convert.ToSingle(driftTime[median]);
                }
                break;

            case ClusterCentroidRepresentation.Apex:
                double apexNet       = 0;
                double apexAbundance = 0;
                foreach (var msFeature in this.MsFeatures)
                {
                    if (msFeature.Abundance >= apexAbundance)
                    {
                        apexNet       = msFeature.Net;
                        apexAbundance = msFeature.Abundance;
                    }

                    this.Net = apexNet;
                }
                break;
            }
            if ((numUmCs % 2) == 1)
            {
                MassMonoisotopic = mass[numUmCs / 2];
            }
            else
            {
                MassMonoisotopic = .5 * (mass[numUmCs / 2 - 1] + mass[numUmCs / 2]);
            }

            var    distances   = new List <double>();
            double distanceSum = 0;

            foreach (var umc in MsFeatures)
            {
                var netValue   = Net - umc.Net;
                var massValue  = MassMonoisotopic - umc.MassMonoisotopicAligned;
                var driftValue = DriftTime - umc.DriftTime;
                var distance   = Math.Sqrt((netValue * netValue) + (massValue * massValue) + (driftValue * driftValue));
                distances.Add(distance);
                distanceSum += distance;
            }

            if (centroid == ClusterCentroidRepresentation.Mean)
            {
                Score = Convert.ToSingle(distanceSum / MsFeatures.Count);
            }
            else
            {
                var mid = distances.Count / 2;

                distances.Sort();
                Score = Convert.ToSingle(distances[mid]);
            }
            Mz = representativeMz;
        }