示例#1
0
        /// <summary>
        /// Computes errors for mass and retention time given a set of linked and matched features.
        /// </summary>
        public Tuple <AlignmentMeasurement <double>, AlignmentMeasurement <double> > MeasureErrors(IEnumerable <SpectralAnchorPointMatch> matches)
        {
            var netError  = new AlignmentMeasurement <double>();
            var massError = new AlignmentMeasurement <double>();

            var errors = new Tuple <AlignmentMeasurement <double>, AlignmentMeasurement <double> >
                             (netError, massError);


            foreach (var match in matches)
            {
                var x        = match.AnchorPointX;
                var y        = match.AnchorPointY;
                var featureX = x.Spectrum.ParentFeature;
                var featureY = y.Spectrum.ParentFeature;

                if (featureX == null || featureY == null)
                {
                    continue;
                }

                var umcX = featureX.ParentFeature;
                var umcY = featureY.ParentFeature;

                netError.PreAlignment.Add(umcX.Net - umcY.Net);
                netError.PostAlignment.Add(umcX.Net - umcY.NetAligned);

                massError.PreAlignment.Add(FeatureLight.ComputeMassPPMDifference(umcX.MassMonoisotopic, umcY.MassMonoisotopic));
                massError.PostAlignment.Add(FeatureLight.ComputeMassPPMDifference(umcX.MassMonoisotopic, umcY.MassMonoisotopicAligned));
            }

            return(errors);
        }
示例#2
0
        /// <summary>
        /// Performs Mass error regression based on NET of the match
        /// </summary>
        /// <param name="matches"></param>
        /// <returns></returns>
        public LcmsWarpMassAlignmentFunction CalculateCalibration(List <LcmsWarpFeatureMatch> matches)
        {
            var netMassRecalibration = new LcmsWarpCombinedRegression();

            netMassRecalibration.SetCentralRegressionOptions(
                this.options.MassCalibNumXSlices,
                this.options.MassCalibNumYSlices,
                this.options.MassCalibMaxJump,
                this.options.MassCalibMaxZScore,
                this.options.RegressionType);
            var calibrations = new List <RegressionPoint>();

            foreach (var match in matches)
            {
                var feature         = match.AligneeFeature;
                var baselineFeature = match.BaselineFeature;
                var ppm             = FeatureLight.ComputeMassPPMDifference(feature.MassMonoisotopic, baselineFeature.MassMonoisotopic);
                var netDiff         = baselineFeature.Net - feature.NetAligned;

                calibrations.Add(new RegressionPoint(feature.Net, 0, netDiff, ppm));
            }

            netMassRecalibration.CalculateRegressionFunction(calibrations, "ScanMassError");

            return(new LcmsWarpMassAlignmentFunction
            {
                Calibrations = new List <LcmsWarpCombinedRegression> {
                    netMassRecalibration
                }
            });
        }
示例#3
0
        public void MassMassCalculations(double massX, double massY)
        {
            var ppm        = FeatureLight.ComputeMassPPMDifference(massX, massY);
            var massYdelta = FeatureLight.ComputeDaDifferenceFromPPM(massX, ppm);

            Assert.AreEqual(massY, massYdelta);
        }
示例#4
0
 private IEnumerable <SpectralAnchorPointMatch> FilterMatches(IEnumerable <SpectralAnchorPointMatch> matches, double ppm)
 {
     return
         (matches.Where(x =>
                        ppm > Math.Abs(FeatureLight.ComputeMassPPMDifference(x.AnchorPointX.Spectrum.ParentFeature.Mz,
                                                                             x.AnchorPointY.Spectrum.ParentFeature.Mz))));
 }
示例#5
0
        public void MassPPMCalculations(double massX, double ppm, double epsilon)
        {
            var massYdelta = FeatureLight.ComputeDaDifferenceFromPPM(massX, ppm);
            var ppmDelta   = FeatureLight.ComputeMassPPMDifference(massX, massYdelta);

            //Assert.IsTrue( (ppm - ppmDelta) < epsilon);
            Assert.Less(ppm - ppmDelta, epsilon);
        }
示例#6
0
        /// <summary>
        /// Computes the mass difference between two features.
        /// </summary>
        /// <param name="x"></param>
        /// <param name="y"></param>
        /// <returns></returns>
        private bool WithinRange(T x, T y)
        {
            // later is more related to determining a scalar value instead.
            var massDiff  = Math.Abs(FeatureLight.ComputeMassPPMDifference(x.MassMonoisotopicAligned, y.MassMonoisotopicAligned));
            var netDiff   = Math.Abs(x.Net - y.Net);
            var driftDiff = Math.Abs(x.DriftTime - y.DriftTime);

            // Make sure we fall within the distance range before computing...
            return(massDiff <= Tolerances.Mass && netDiff <= Tolerances.Net && driftDiff <= Tolerances.DriftTime);
        }
示例#7
0
        /// <summary>
        /// Calculates the weighted Euclidean distance based on drift time, aligned mass, and aligned NET.
        /// </summary>
        /// <param name="x">Feature x.</param>
        /// <param name="y">Feature y.</param>
        /// <param name="massWeight"></param>
        /// <param name="netWeight"></param>
        /// <param name="driftWeight"></param>
        /// <returns>Distance calculated as </returns>
        public double EuclideanDistance(T x, T y, double massWeight, double netWeight, double driftWeight)
        {
            var massDifference  = FeatureLight.ComputeMassPPMDifference(x.MassMonoisotopicAligned, y.MassMonoisotopicAligned);
            var netDifference   = x.Net - y.Net;
            var driftDifference = x.DriftTime - y.DriftTime;
            var sum             = (massDifference * massDifference) * massWeight +
                                  (netDifference * netDifference) * netDifference +
                                  (driftDifference * driftDifference) * driftWeight;

            return(Math.Sqrt(sum));
        }
示例#8
0
        private void SaveMatches(string path, IEnumerable <SpectralAnchorPointMatch> matches)
        {
            using (var writer = File.CreateText(path))
            {
                writer.WriteLine(
                    "NET-apx\tNET-apy\tNETAligned-apy\tmz-apx\tmzAligned-apx\tmz-apy\tmzAligned-apy\tScan-x\tScan-y\tpmz-x\tpmz-y\tpmonomass-x\tpmonomass-y\tpNET-x\tpNET-y\tpNETa-x\tpNETa-y\tpmonomass-x\tpmonomassyx\tpmonomass-errorppm\tpmz-errorppm");
                foreach (var match in matches)
                {
                    if (match.AnchorPointX.Spectrum == null)
                    {
                        continue;
                    }

                    if (match.AnchorPointY.Spectrum == null)
                    {
                        continue;
                    }


                    var parentFeatureX = match.AnchorPointX.Spectrum.ParentFeature;
                    var parentFeatureY = match.AnchorPointY.Spectrum.ParentFeature;


                    var data =
                        string.Format(
                            "{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}\t{11}\t{12}\t{13}\t{14}\t{15}\t{16}\t{17}\t{18}\t{19}\t{20}\t",
                            match.AnchorPointX.Net,
                            match.AnchorPointY.Net,
                            match.AnchorPointY.NetAligned,
                            match.AnchorPointX.Mz,
                            match.AnchorPointX.MzAligned,
                            match.AnchorPointY.Mz,
                            match.AnchorPointY.MzAligned,
                            parentFeatureX.Scan,
                            parentFeatureY.Scan,
                            parentFeatureX.Mz,
                            parentFeatureY.Mz,
                            parentFeatureX.MassMonoisotopic,
                            parentFeatureY.MassMonoisotopic,
                            parentFeatureX.GetParentFeature().Net,
                            parentFeatureY.GetParentFeature().Net,
                            parentFeatureX.GetParentFeature().NetAligned,
                            parentFeatureY.GetParentFeature().NetAligned,
                            parentFeatureX.GetParentFeature().MassMonoisotopicAligned,
                            parentFeatureY.GetParentFeature().MassMonoisotopicAligned,
                            FeatureLight.ComputeMassPPMDifference(parentFeatureX.Mz, parentFeatureY.Mz),
                            FeatureLight.ComputeMassPPMDifference(parentFeatureX.GetParentFeature().MassMonoisotopicAligned,
                                                                  parentFeatureY.GetParentFeature().MassMonoisotopicAligned)
                            );

                    writer.WriteLine(data);
                }
            }
        }
示例#9
0
        public void TestDistances()
        {
            var dist = new WeightedEuclideanDistance <UMCClusterLight>();


            var clusterA = CreateCluster(500, .2, 27);
            var clusterB = CreateCluster(500, .2, 27);

            var N         = 50;
            var stepMass  = .5;
            var stepNET   = .001;
            var stepDrift = .01;


            Console.WriteLine("Walk in drift time");
            for (var i = 0; i < N; i++)
            {
                clusterB.DriftTime += stepDrift;
                var distance = dist.EuclideanDistance(clusterA, clusterB);
                Console.WriteLine("{0}, {1}, {3}, {2}", clusterB.DriftTime, clusterB.DriftTime, distance, clusterB.DriftTime - clusterA.DriftTime);
            }

            Console.WriteLine();
            Console.WriteLine("Walk in net ");
            clusterB.DriftTime = 27;

            for (var i = 0; i < N; i++)
            {
                clusterB.Net += stepNET;
                var distance = dist.EuclideanDistance(clusterA, clusterB);
                Console.WriteLine("{0}, {1}, {3}, {2}", clusterB.Net, clusterB.Net, distance, clusterB.Net - clusterA.Net);
            }


            Console.WriteLine();
            Console.WriteLine("Walk in mass ");
            clusterB.Net = .2;
            for (var i = 0; i < N; i++)
            {
                var d = FeatureLight.ComputeDaDifferenceFromPPM(clusterA.MassMonoisotopic, stepMass * i);
                clusterB.MassMonoisotopic = d;
                var distance = dist.EuclideanDistance(clusterA, clusterB);
                Console.WriteLine("{0}, {1}, {3}, {2}", clusterB.MassMonoisotopic,
                                  clusterB.MassMonoisotopic,
                                  distance,
                                  FeatureLight.ComputeMassPPMDifference(clusterA.MassMonoisotopic, clusterB.MassMonoisotopic));
            }
        }
示例#10
0
        private List <TChildFeature> FilterMsFeatures(List <TChildFeature> rawMsFeatures)
        {
            // sort by scan...
            var allFeatures = rawMsFeatures.OrderBy(x => x.Scan).ToList();


            var newFeatures   = new List <TChildFeature>();
            var features      = new List <TChildFeature>();
            var totalFeatures = rawMsFeatures.Count;
            var currentScan   = 0;

            for (var i = 0; i < totalFeatures; i++)
            {
                var feature = allFeatures[i];
                // Process the scans...
                if (currentScan != feature.Scan)
                {
                    var mzFeatures = features.OrderBy(x => x.Mz).ToList();
                    var mzMap      = new Dictionary <double, List <TChildFeature> >();
                    for (var j = 1; j < mzFeatures.Count; j++)
                    {
                        var featureJ    = mzFeatures[j];
                        var featurePrev = mzFeatures[j - 1];

                        // find the mass difference, here we are looking to see if there are unique
                        // m/z features or not, if not, then we need to process them.
                        var ppm = FeatureLight.ComputeMassPPMDifference(featureJ.Mz, featureJ.Mz);
                        if (Math.Abs(ppm) > 1)
                        {
                            if (!mzMap.ContainsKey(featureJ.Mz))
                            {
                                mzMap.Add(featureJ.Mz, new List <TChildFeature>());
                            }
                            mzMap[featureJ.Mz].Add(featureJ);
                            mzMap[featureJ.Mz].Add(featurePrev);
                        }
                    }
                    features.Clear();
                }
                else
                {
                    features.Add(feature);
                }
            }

            return(newFeatures);
        }
示例#11
0
        public static PlotBase CreateMassMzResidualAlignmentPlot <T>(IEnumerable <T> x, IEnumerable <T> y)
            where T : FeatureLight
        {
            Func <T, double>    mz      = t => t.Mz;
            Func <T, T, double> massPre =
                (t, u) => FeatureLight.ComputeMassPPMDifference(t.MassMonoisotopic, u.MassMonoisotopic);
            Func <T, T, double> massPost =
                (t, u) => FeatureLight.ComputeMassPPMDifference(t.MassMonoisotopic, u.MassMonoisotopicAligned);

            return(CreateResidualAlignmentPlot(x,
                                               y,
                                               mz,
                                               massPre,
                                               massPost,
                                               "mz",
                                               "Mass Residual (ppm)"));
        }
示例#12
0
        /// <summary>
        /// Compares a feature to the list of feature
        /// </summary>
        public int CompareMonoisotopic(FeatureLight featureX, FeatureLight featureY)
        {
            // If they are in mass range...
            var mzDiff = FeatureLight.ComputeMassPPMDifference(featureX.MassMonoisotopic, featureY.MassMonoisotopic);

            if (Math.Abs(mzDiff) < Tolerances.Mass && featureX.ChargeState == featureY.ChargeState)
            {
                // otherwise make sure that our scan value is within range
                var scanDiff = featureX.Net - featureY.Net;
                return(Math.Abs(scanDiff) <= Tolerances.Net ? 0 : 1);
            }
            if (mzDiff < 0)
            {
                return(-1);
            }
            return(1);
        }
示例#13
0
        public static PlotBase CreateMassScanResidualAlignmentPlot <T>(IEnumerable <T> x, IEnumerable <T> y)
            where T : FeatureLight
        {
            Func <T, double>    scan    = t => t.Scan;
            Func <T, T, double> massPre =
                (t, u) => FeatureLight.ComputeMassPPMDifference(t.MassMonoisotopic, u.MassMonoisotopic);
            Func <T, T, double> massPost =
                (t, u) => FeatureLight.ComputeMassPPMDifference(t.MassMonoisotopic, u.MassMonoisotopicAligned);

            var plot = CreateResidualAlignmentPlot(x,
                                                   y,
                                                   scan,
                                                   massPre,
                                                   massPost,
                                                   "scan",
                                                   "Mass Residual (ppm)");

            return(plot);
        }
示例#14
0
        /// <summary>
        /// Determines if two clusters are within mass, NET, and drift time tolerances
        /// </summary>
        /// <param name="clusterX">One of the two clusters to test</param>
        /// <param name="clusterY">One of the two clusters to test</param>
        /// <returns>True if clusters are within tolerance, false otherwise</returns>
        protected override bool AreClustersWithinTolerance(U clusterX, U clusterY)
        {
            // Grab the tolerances
            var massTolerance  = Parameters.Tolerances.Mass;
            var netTolerance   = Parameters.Tolerances.Net;
            var driftTolerance = Parameters.Tolerances.DriftTime;

            // Calculate differences
            var massDiff  = Math.Abs(FeatureLight.ComputeMassPPMDifference(clusterX.MassMonoisotopicAligned, clusterY.MassMonoisotopicAligned));
            var netDiff   = Math.Abs(clusterX.Net - clusterY.Net);
            var driftDiff = Math.Abs(clusterX.DriftTime - clusterY.DriftTime);

            // Return true only if all differences are within tolerance
            if (massDiff <= massTolerance && netDiff <= netTolerance && driftDiff <= driftTolerance)
            {
                return(true);
            }
            return(false);
        }
示例#15
0
        public MsToLcmsFeatures(IScanSummaryProvider provider, LcmsFeatureFindingOptions options = null)
        {
            if (provider == null)
            {
                throw new ArgumentNullException();
            }

            Comparison <MSFeatureLight> mzSort   = (x, y) => x.Mz.CompareTo(y.Mz);
            Comparison <UMCLight>       monoSort = (x, y) => x.MassMonoisotopic.CompareTo(y.MassMonoisotopic);
            Func <MSFeatureLight, MSFeatureLight, double> mzDiff   = (x, y) => FeatureLight.ComputeMassPPMDifference(x.Mz, y.Mz);
            Func <UMCLight, UMCLight, double>             monoDiff = (x, y) => FeatureLight.ComputeMassPPMDifference(x.MassMonoisotopic, y.MassMonoisotopic);

            this.provider = provider;
            this.options  = options ?? new LcmsFeatureFindingOptions();

            // Set clusterers
            if (this.options.FirstPassClusterer == MsFeatureClusteringAlgorithmType.BinarySearchTree)
            {
                this.firstPassClusterer = new MsFeatureTreeClusterer <MSFeatureLight, UMCLight>(
                    mzSort,
                    mzDiff,
                    MassComparison.Mz,
                    this.options.InstrumentTolerances.Mass);
            }
            else
            {
                this.firstPassClusterer = ClusterFactory.Create(this.options.FirstPassClusterer);
            }

            if (this.options.SecondPassClusterer == GenericClusteringAlgorithmType.BinarySearchTree)
            {
                this.secondPassClusterer = new MsFeatureTreeClusterer <UMCLight, UMCLight>(
                    monoSort,
                    monoDiff,
                    MassComparison.Monoisotopic,
                    this.options.InstrumentTolerances.Mass);
            }
            else
            {
                var clusterFactory = new GenericClusterFactory <UMCLight, UMCLight>();
                this.secondPassClusterer = clusterFactory.Create(this.options.SecondPassClusterer);
            }
        }
示例#16
0
        protected virtual bool AreClustersWithinTolerance(UMCLight clusterX,
                                                          UMCLight clusterY,
                                                          double massTolerance,
                                                          double netTolerance,
                                                          double driftTolerance)
        {
            // Calculate differences
            var massDiff =
                Math.Abs(FeatureLight.ComputeMassPPMDifference(clusterX.MassMonoisotopicAligned,
                                                               clusterY.MassMonoisotopicAligned));
            var netDiff   = Math.Abs(clusterX.Net - clusterY.Net);
            var driftDiff = Math.Abs(clusterX.DriftTime - clusterY.DriftTime);

            // Return true only if all differences are within tolerance
            if (massDiff <= massTolerance && netDiff <= netTolerance && driftDiff <= driftTolerance)
            {
                return(true);
            }
            return(false);
        }
示例#17
0
        private void SaveMatches(string path, IEnumerable <SpectralAnchorPointMatch> matches)
        {
            using (var writer = File.CreateText(path))
            {
                writer.WriteLine("[Header]");
                writer.WriteLine("p mz = parentMz - A and B denote dataset A and dataset B");
                writer.WriteLine("[Data]");
                writer.WriteLine("Net-A\tpMz-A\tScan-A\tNet-B\tpMz-B\tScan-B\tMassErrorPpm\tSimScore");
                foreach (var match in matches)
                {
                    if (match.AnchorPointX.Spectrum == null)
                    {
                        continue;
                    }

                    if (match.AnchorPointY.Spectrum == null)
                    {
                        continue;
                    }

                    var parentFeatureX = match.AnchorPointX.Spectrum.ParentFeature;
                    var parentFeatureY = match.AnchorPointY.Spectrum.ParentFeature;

                    var data =
                        string.Format("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}",

                                      parentFeatureX.GetParentFeature().Net,
                                      parentFeatureX.GetParentFeature().Mz,
                                      parentFeatureX.GetParentFeature().Scan,

                                      parentFeatureY.GetParentFeature().Net,
                                      parentFeatureY.GetParentFeature().Mz,
                                      parentFeatureY.GetParentFeature().Scan,

                                      FeatureLight.ComputeMassPPMDifference(parentFeatureX.Mz, parentFeatureY.Mz),
                                      match.SimilarityScore);

                    writer.WriteLine(data);
                }
            }
        }
示例#18
0
        /// <summary>
        /// Compares a feature to the list of feature
        /// </summary>
        public int CompareMz(FeatureLight featureX, FeatureLight featureY)
        {
            // If they are in mass range...
            var mzDiff = FeatureLight.ComputeMassPPMDifference(featureX.Mz, featureY.Mz);

            if (Math.Abs(mzDiff) < Tolerances.Mass)
            {
                // otherwise make sure that our scan value is within range
                var scanDiff = featureX.Scan - featureY.Scan;
                if (Math.Abs(scanDiff) > ScanTolerance)
                {
                    return(1);
                }

                return(featureX.ChargeState != featureY.ChargeState ? 1 : 0);
            }
            if (mzDiff < 0)
            {
                return(-1);
            }
            return(1);
        }
示例#19
0
        private static void WriteErrors(string errorPath, IEnumerable <SpectralAnchorPointMatch> matches)
        {
            using (var writer = File.CreateText(errorPath))
            {
                writer.WriteLine(
                    "NET\tMass\tNET\tMass\tNETA\tMassA\tNETA\tMassA\tNetError\tMassError\tScore");
                foreach (var match in matches)
                {
                    var massError = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz, match.AnchorPointY.Mz);
                    var netError  = match.AnchorPointX.Net - match.AnchorPointY.NetAligned;

                    writer.WriteLine("{0:F5}\t{1:F5}\t{2:F5}\t{3:F5}\t{4:F5}\t{5:F5}\t{6:F5}\t{7:F5}\t{8:F5}\t",
                                     match.AnchorPointX.Net,
                                     match.AnchorPointX.Mz,
                                     match.AnchorPointY.Net,
                                     match.AnchorPointY.Mz,
                                     match.AnchorPointY.NetAligned,
                                     match.AnchorPointY.MzAligned,
                                     netError,
                                     massError,
                                     match.SimilarityScore);
                }
            }
        }
示例#20
0
        /// <summary>
        /// Aligns features based on MSMS spectral similarity.
        /// </summary>
        /// <param name="featureMap"></param>
        /// <param name="msms"></param>
        public List <MsmsCluster> Cluster(List <UMCLight> features, ISpectraProvider provider)
        {
            UpdateStatus("Mapping UMC's to MS/MS spectra using intensity profile.");
            // Step 1: Cluster the spectra
            // Create the collection of samples.
            var msFeatures = new List <MSFeatureLight>();

            // Sort through the features
            foreach (var feature in features)
            {
                // Sort out charge states...?
                var chargeMap = new Dictionary <int, MSFeatureLight>();

                double         abundance  = int.MinValue;
                MSFeatureLight maxFeature = null;

                // Find the max abundance spectrum.  This the number of features we have to search.
                foreach (var msFeature in feature.MsFeatures)
                {
                    if (msFeature.Abundance > abundance && msFeature.MSnSpectra.Count > 0)
                    {
                        abundance  = msFeature.Abundance;
                        maxFeature = msFeature;
                    }
                }

                if (maxFeature != null)
                {
                    msFeatures.Add(maxFeature);
                }
            }

            UpdateStatus(string.Format("Found {0} total spectra for clustering.", msFeatures.Count));

            UpdateStatus("Sorting spectra.");
            // Sort based on mass using the max abundance of the feature.
            msFeatures.Sort(delegate(MSFeatureLight x, MSFeatureLight y)
                            { return(x.MassMonoisotopicMostAbundant.CompareTo(y.MassMonoisotopicMostAbundant)); });

            // Then cluster the spectra.
            var j = 1;
            var h = 0;
            var N = msFeatures.Count;

            var clusters  = new List <MsmsCluster>();
            var tol       = MassTolerance;
            var lastTotal = 0;

            UpdateStatus("Clustering spectra.");
            while (j < N)
            {
                var i        = j - 1;
                var featureJ = msFeatures[j];
                var featureI = msFeatures[i];
                var diff     = FeatureLight.ComputeMassPPMDifference(featureJ.MassMonoisotopicMostAbundant, featureI.MassMonoisotopicMostAbundant);

                if (Math.Abs(diff) > tol)
                {
                    // We only care to create clusters of size greater than one.
                    if ((j - h) > 1)
                    {
                        var data = Cluster(h,
                                           j,
                                           msFeatures,
                                           provider,
                                           SimilarityTolerance);
                        clusters.AddRange(data);
                    }

                    // Reset the count, we're done looking at those clusters.
                    h = j;
                }
                if (j - lastTotal > 500)
                {
                    lastTotal = j;
                    UpdateStatus(string.Format("Processed {0} / {1} total spectra.", lastTotal, N));
                }
                j++;
            }
            UpdateStatus("Finishing last cluster data.");

            // Cluster the rest
            if ((j - h) > 1)
            {
                var data = Cluster(h,
                                   j,
                                   msFeatures,
                                   provider,
                                   SimilarityTolerance);
                clusters.AddRange(data);
            }
            UpdateStatus("Finished clustering.");
            var passingClusters = clusters.Where(cluster => cluster.Features.Count >= MinimumClusterSize);

            return(passingClusters.ToList());
        }
示例#21
0
        /// <summary>
        /// Clusters a set of data
        /// </summary>
        /// <param name="data"></param>
        /// <param name="clusters"></param>
        /// <returns></returns>
        public virtual List <U> Cluster(List <T> data, List <U> clusters)
        {
            /*
             * This clustering algorithm first sorts the list of input UMC's by mass.  It then iterates
             * through this list partitioning the data into blocks of UMC's based on a mass tolerance.
             * When it finds gaps larger or equal to the mass (ppm) tolerance specified by the user,
             * it will process the data before the gap (a block) until the current index of the features in question.
             */

            // Make sure we have data to cluster first.
            if (data == null)
            {
                throw new NullReferenceException("The input feature data list was null.  Cannot process this data.");
            }

            // Make sure there is no null UMC data in the input list.
            var nullIndex = data.FindIndex(delegate(T x) { return(x == null); });

            if (nullIndex > 0)
            {
                throw new NullReferenceException("The feature at index " + nullIndex + " was null.  Cannot process this data.");
            }

            OnNotify("Sorting cluster mass list");

            // The first thing we do is to sort the features based on mass since we know that has the least variability in the data across runs.
            data.Sort(m_massComparer);

            // Now partition the data based on mass ranges and the parameter values.
            var massTolerance = Parameters.Tolerances.Mass;

            // This is the index of first feature of a given mass partition.
            var startUMCIndex = 0;
            var totalFeatures = data.Count;


            OnNotify("Detecting mass partitions");
            var tenPercent = Convert.ToInt32(totalFeatures * .1);
            var counter    = 0;
            var percent    = 0;

            for (var i = 0; i < totalFeatures - 1; i++)
            {
                if (counter > tenPercent)
                {
                    counter  = 0;
                    percent += 10;
                    OnNotify(string.Format("Clustering Completed...{0}%", percent));
                }
                counter++;

                // Here we compute the ppm mass difference between consecutive features (based on mass).
                // This will determine if we cluster a block of data or not.
                var umcX = data[i];
                var umcY = data[i + 1];
                var ppm  = Math.Abs(FeatureLight.ComputeMassPPMDifference(umcX.MassMonoisotopicAligned, umcY.MassMonoisotopicAligned));

                // If the difference is greater than the tolerance then we cluster
                //  - we dont check the sign of the ppm because the data should be sorted based on mass.
                if (ppm > massTolerance)
                {
                    // If start UMC Index is equal to one, then that means the feature at startUMCIndex
                    // could not find any other features near it within the mass tolerance specified.
                    if (startUMCIndex == i)
                    {
                        var cluster = new U();
                        cluster.AmbiguityScore = m_maxDistance;
                        umcX.SetParentFeature(cluster);
                        cluster.AddChildFeature(umcX);
                        clusters.Add(cluster);
                    }
                    else
                    {
                        // Otherwise we have more than one feature to to consider.
                        var distances     = CalculatePairWiseDistances(startUMCIndex, i, data);
                        var localClusters = CreateSingletonClusters(data, startUMCIndex, i);
                        var blockClusters = LinkFeatures(distances, localClusters);
                        CalculateAmbiguityScore(blockClusters);
                        clusters.AddRange(blockClusters);
                    }

                    startUMCIndex = i + 1;
                }
            }

            // Make sure that we cluster what is left over.
            if (startUMCIndex < totalFeatures)
            {
                OnNotify(string.Format("Clustering last partition...{0}%", percent));
                var distances     = CalculatePairWiseDistances(startUMCIndex, totalFeatures - 1, data);
                var localClusters = CreateSingletonClusters(data, startUMCIndex, totalFeatures - 1);
                var blockClusters = LinkFeatures(distances, localClusters);
                CalculateAmbiguityScore(blockClusters);
                if (localClusters.Count < 2)
                {
                    clusters.AddRange(localClusters.Values);
                }
                else
                {
                    clusters.AddRange(blockClusters);
                }
            }


            OnNotify("Generating cluster statistics");
            foreach (var cluster in clusters)
            {
                cluster.CalculateStatistics(Parameters.CentroidRepresentation);
            }

            return(clusters);
        }
示例#22
0
        public List<U> ProcessClusters(List<U> clusters)
        {
            var newClusters = new List<U>();

            //Sort the clusters
            // Look for merged clusters that need to be split...
            foreach (var cluster in clusters)
            {
                var medianNet = cluster.Net;
                var medianMass = cluster.MassMonoisotopic;
                var medianDrift = cluster.DriftTime;

                var massDistributions = new Dictionary<T, double>();
                var netDistributions = new Dictionary<T, double>();
                var driftDistributions = new Dictionary<T, double>();

                var massDistances = new List<double>();
                var netDistances = new List<double>();
                var driftDistances = new List<double>();

                // Build distributions
                foreach (var feature in cluster.Features)
                {
                    var mass = FeatureLight.ComputeMassPPMDifference(feature.MassMonoisotopicAligned, medianMass);
                    var net = feature.Net - medianNet;
                    var drift = feature.DriftTime - medianDrift;

                    massDistributions.Add(feature, mass);
                    netDistributions.Add(feature, drift);
                    driftDistributions.Add(feature, net);

                    massDistances.Add(mass);
                    driftDistances.Add(drift);
                    netDistances.Add(net);
                }

                massDistances.Sort();
                netDistances.Sort();
                driftDistances.Sort();

                // Calculates the sample means for positive and negative sides of the median.
                var massDistribution    = CalculateAllDistributions(massDistances);
                var netDistribution     = CalculateAllDistributions(netDistances);
                var driftDistribution   = CalculateAllDistributions(driftDistances);

                var massZScore   = CalculateZScore(massDistribution.Item1, massDistribution.Item2);
                var netZScore    = CalculateZScore(netDistribution.Item1, netDistribution.Item2);
                var driftZScore  = CalculateZScore(driftDistribution.Item1, driftDistribution.Item2);

                // Now that we have data we can test the distributions to see if they are similar or not...
                Console.WriteLine("   Neg to Pos ");
                Console.WriteLine("Mass z-score \t{0}",  massZScore);
                Console.WriteLine("Net z-score  \t{0}",   netZScore);
                Console.WriteLine("Drift z-score\t{0}", driftZScore);
                Console.WriteLine();

                massZScore = CalculateZScore(massDistribution.Item1, massDistribution.Item3);
                netZScore = CalculateZScore(netDistribution.Item1, netDistribution.Item3);
                driftZScore = CalculateZScore(driftDistribution.Item1, driftDistribution.Item3);
                Console.WriteLine("   Negative ");
                Console.WriteLine("Mass z-score \t{0}", massZScore);
                Console.WriteLine("Net z-score  \t{0}", netZScore);
                Console.WriteLine("Drift z-score\t{0}", driftZScore);
                Console.WriteLine();
                
                Console.WriteLine("   Positive ");
                massZScore = CalculateZScore(massDistribution.Item2, massDistribution.Item3);
                netZScore = CalculateZScore(netDistribution.Item2, netDistribution.Item3);
                driftZScore = CalculateZScore(driftDistribution.Item2, driftDistribution.Item3);                                
                Console.WriteLine("Mass z-score \t{0}", massZScore);
                Console.WriteLine("Net z-score  \t{0}", netZScore);
                Console.WriteLine("Drift z-score\t{0}", driftZScore);

                //Console.WriteLine();
                //Console.WriteLine("Mass Difference");
                //DisplayDistance(massDistances);

                //Console.WriteLine();
                //Console.WriteLine("NET Difference");
                //DisplayDistance(netDistances);

                //Console.WriteLine();
                //Console.WriteLine("Drift Time Difference");
                //DisplayDistance(driftDistances);
            }

            return newClusters;
        }
示例#23
0
        public void CreateAlignmentFunctions(IEnumerable <SpectralAnchorPointMatch> matches)
        {
            var netXvalues  = new List <double>();
            var netYvalues  = new List <double>();
            var massXvalues = new List <double>();
            var massYvalues = new List <double>();

            matches = matches.ToList().OrderBy(x => x.AnchorPointX.Net);

            // 1. Find the best matches
            // 2. Find only matches that have been made once.
            var bestMatches = new Dictionary <int, SpectralAnchorPointMatch>();

            foreach (var match in matches)
            {
                var scan = match.AnchorPointX.Scan;
                if (bestMatches.ContainsKey(scan))
                {
                    if (bestMatches[scan].SimilarityScore < match.SimilarityScore)
                    {
                        bestMatches[scan] = match;
                    }
                }
                else
                {
                    bestMatches.Add(scan, match);
                }
            }

            // 2. Find only those matched once
            var all = new Dictionary <int, SpectralAnchorPointMatch>();

            foreach (var match in bestMatches.Values)
            {
                var scan = match.AnchorPointY.Scan;
                if (all.ContainsKey(scan))
                {
                    if (all[scan].SimilarityScore < match.SimilarityScore)
                    {
                        all[scan] = match;
                    }
                }
                else
                {
                    all.Add(scan, match);
                }
            }

            // Then generate the NET Alignment using R1
            var anchorPoints = all.Values.OrderBy(x => x.AnchorPointY.Net).ToList();

            matches =
                anchorPoints.Where(
                    x => FeatureLight.ComputeMassPPMDifference(x.AnchorPointX.Mz, x.AnchorPointY.Mz) < 20 &&
                    x.AnchorPointX.Spectrum.ParentFeature.ChargeState == x.AnchorPointY.Spectrum.ParentFeature.ChargeState
                    ).ToList();

            foreach (var match in matches)
            {
                netXvalues.Add(match.AnchorPointX.Net);
                netYvalues.Add(match.AnchorPointY.Net);
            }

            var netInterpolator = new LoessInterpolator(Bandwidth, 5);

            netInterpolator.Smooth(netYvalues, netXvalues, FitFunctionFactory.Create(FitFunctionTypes.TriCubic));

            // Then generate the Mass Alignment using R1
            // We also have to resort the matches based on mass now too
            anchorPoints = all.Values.OrderBy(x => x.AnchorPointY.Mz).ToList();
            foreach (var match in anchorPoints)
            {
                massXvalues.Add(match.AnchorPointX.Mz);
                massYvalues.Add(match.AnchorPointY.Mz);
            }

            var massInterpolator = new LoessInterpolator();

            massInterpolator.Smooth(massYvalues, massXvalues, FitFunctionFactory.Create(FitFunctionTypes.TriCubic));

            m_netInterpolator  = netInterpolator;
            m_massInterpolator = massInterpolator;

            foreach (var match in anchorPoints)
            {
                match.AnchorPointY.NetAligned = netInterpolator.Predict(match.AnchorPointY.Net);
                match.AnchorPointY.MzAligned  = massInterpolator.Predict(match.AnchorPointY.Mz);
            }
        }
示例#24
0
        /// <summary>
        /// Clusters features based on their pairwise distances by finding the minimal spanning tree (MST) via Prim's algorithm.
        /// </summary>
        /// <param name="distances">Pairwise distances between all features in question.</param>
        /// <param name="clusters">Singleton clusters from each feature.</param>
        /// <returns>List of features clustered together.</returns>
        public override List <U> LinkFeatures(List <Data.PairwiseDistance <T> > potentialDistances, Dictionary <int, U> clusters)
        {
            var newClusters = new List <U>();
            var distances   = new List <Data.PairwiseDistance <T> >();

            // There is an edge case with this setup that a singleton outside of the range
            // of other features made it into the batch of edges, but there is no corresponding edge
            // to the rest of the graph(s).  So here we hash all features
            // then we ask for within the range, pare down that hash to a set of features that
            // have no corresponding edge.  These guys would ultimately be singletons we want
            // to capture...
            var clusterMap = new HashSet <T>();

            foreach (var cluster in clusters.Values)
            {
                foreach (var feature in cluster.Features)
                {
                    if (!clusterMap.Contains(feature))
                    {
                        clusterMap.Add(feature);
                    }
                }
            }


            foreach (var distance in potentialDistances)
            {
                if (AreClustersWithinTolerance(distance.FeatureX, distance.FeatureY))
                {
                    //distances.Add(distance);
                    if (clusterMap.Contains(distance.FeatureX))
                    {
                        clusterMap.Remove(distance.FeatureX);
                    }
                    if (clusterMap.Contains(distance.FeatureY))
                    {
                        clusterMap.Remove(distance.FeatureY);
                    }
                }
            }

            // Once we have removed any cluster
            foreach (var feature in clusterMap)
            {
                var cluster = new U();
                feature.SetParentFeature(cluster);
                cluster.AddChildFeature(feature);
                newClusters.Add(cluster);
            }

            var newDistances = (from element in potentialDistances
                                orderby element.Distance
                                select element).ToList();

            var queue = new Queue <Edge <T> >();
            var graph = new FeatureGraph <T>();

            // Sort out the distances so we dont have to recalculate distances.
            var id    = 0;
            var edges = new List <Edge <T> >();

            newDistances.ForEach(x => edges.Add(new Edge <T>(id++,
                                                             x.Distance,
                                                             x.FeatureX,
                                                             x.FeatureY)));
            graph.CreateGraph(edges);
            edges.ForEach(x => queue.Enqueue(x));

            // This makes sure we have
            var seenEdge = new HashSet <int>();


            // Now we start at the MST building
            if (DumpLinearRelationship)
            {
                Console.WriteLine("GraphEdgeLength");
            }
            while (queue.Count > 0)
            {
                var startEdge = queue.Dequeue();

                // If we have already seen the edge, ignore it...
                if (seenEdge.Contains(startEdge.ID))
                {
                    continue;
                }

                var mstGroup = ConstructSubTree(graph,
                                                seenEdge,
                                                startEdge);

                var clusterTree = new MstLrTree <Edge <T> >();

                // Get the mst value .
                double sum  = 0;
                double mean = 0;
                foreach (var dist in mstGroup.LinearRelationship)
                {
                    seenEdge.Add(dist.ID);
                    sum += dist.Length;

                    clusterTree.Insert(dist);

                    var ppmDist = FeatureLight.ComputeMassPPMDifference(dist.VertexB.MassMonoisotopicAligned,
                                                                        dist.VertexA.MassMonoisotopicAligned);

                    if (DumpLinearRelationship)
                    {
                        Console.WriteLine("{0}", dist.Length); /*,,{1},{2},{3},{4},{5},{6},{7},{8}", dist.Length,
                                                                *         dist.VertexA.NetAligned,
                                                                *         dist.VertexA.MassMonoisotopicAligned,
                                                                *         dist.VertexA.DriftTime,
                                                                *         dist.VertexB.NetAligned,
                                                                *         dist.VertexB.MassMonoisotopicAligned,
                                                                *         dist.VertexB.DriftTime,
                                                                *         ppmDist,
                                                                *         Math.Abs(dist.VertexA.NetAligned - dist.VertexB.NetAligned));
                                                                */
                    }
                }

                var N = Convert.ToDouble(mstGroup.LinearRelationship.Count);

                // Calculate the standard deviation.
                mean = sum / N;
                sum  = 0;
                foreach (var dist in mstGroup.LinearRelationship)
                {
                    var diff = dist.Length - mean;
                    sum += (diff * diff);
                }

                var stdev  = Math.Sqrt(sum / N);
                var cutoff = NSigma; // *stdev; // stdev* NSigma;

                var mstClusters = CreateClusters(mstGroup, cutoff);
                newClusters.AddRange(mstClusters);
            }

            return(newClusters);
        }
示例#25
0
        public void MassPartitionTest(string databasePath,
                                      int chargeState,
                                      double massTolerance,
                                      double netTolerance,
                                      double driftTolerance)
        {
            var database = new UmcAdoDAO();

            database.DatabasePath = databasePath;

            Logger.PrintMessage(string.Format("Extracting Features"), true);
            var data = database.FindByCharge(chargeState);

            // Make sure there is no null UMC data in the input list.
            var nullIndex = data.FindIndex(delegate(UMCLight x) { return(x == null); });

            if (nullIndex > 0)
            {
                throw new NullReferenceException("The feature at index " + nullIndex +
                                                 " was null.  Cannot process this data.");
            }

            // The first thing we do is to sort the features based on mass since we know that has the least variability in the data across runs.
            data.Sort(m_massComparer);

            // This is the index of first feature of a given mass partition.
            var startUMCIndex = 0;
            var totalFeatures = data.Count;
            var tenPercent    = Convert.ToInt32(totalFeatures * .1);
            var singletons    = 0;
            var sizes         = new List <int>();
            var times         = new List <double>();

            for (var i = 0; i < totalFeatures - 1; i++)
            {
                // Here we compute the ppm mass difference between consecutive features (based on mass).
                // This will determine if we cluster a block of data or not.
                var umcX = data[i];
                var umcY = data[i + 1];
                var ppm  =
                    Math.Abs(FeatureLight.ComputeMassPPMDifference(umcX.MassMonoisotopicAligned,
                                                                   umcY.MassMonoisotopicAligned));

                // If the difference is greater than the tolerance then we cluster
                //  - we dont check the sign of the ppm because the data should be sorted based on mass.
                if (ppm > massTolerance)
                {
                    // If start UMC Index is equal to one, then that means the feature at startUMCIndex
                    // could not find any other features near it within the mass tolerance specified.
                    if (startUMCIndex == i)
                    {
                        singletons++;
                    }
                    else
                    {
                        var starttime = DateTime.Now;
                        var distances = CalculatePairwiseDistances(data,
                                                                   startUMCIndex,
                                                                   i,
                                                                   massTolerance,
                                                                   netTolerance,
                                                                   driftTolerance);
                        var endTime = DateTime.Now;
                        sizes.Add(i - startUMCIndex + 1);
                        times.Add(endTime.Subtract(starttime).TotalMilliseconds);
                    }
                    startUMCIndex = i + 1;
                }
            }

            var xxx = 0;

            xxx++;
            if (xxx > 1)
            {
                sizes.Add(0);
            }
            Console.WriteLine("{0}", singletons);
            Console.WriteLine();

            for (var i = 0; i < sizes.Count; i++)
            {
                Console.WriteLine("{0}\t{1}", sizes[i], times[i]);
            }
        }
示例#26
0
        /// <summary>
        ///     Runs the MultiAlign analysis
        /// </summary>
        public void PerformMultiAlignAnalysis(DatasetInformation baselineDataset,
                                              IEnumerable <DatasetInformation> aligneeDatasets,
                                              LcmsFeatureFindingOptions featureFindingOptions,
                                              MsFeatureFilteringOptions msFilterOptions,
                                              LcmsFeatureFilteringOptions lcmsFilterOptions,
                                              SpectralOptions peptideOptions,
                                              MultiAlignCore.Algorithms.FeatureFinding.IFeatureFinder featureFinder,
                                              IFeatureAligner <IEnumerable <UMCLight>,
                                                               IEnumerable <UMCLight>,
                                                               AlignmentData> aligner,
                                              IClusterer <UMCLight, UMCClusterLight> clusterer,
                                              string matchPath,
                                              string errorPath)
        {
            UpdateStatus("Loading baseline features.");
            var msFeatures = UmcLoaderFactory.LoadMsFeatureData(baselineDataset.Features.Path);

            msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions);

            // Load the baseline reference set
            using (var rawProviderX = new InformedProteomicsReader())
            {
                rawProviderX.AddDataFile(baselineDataset.RawFile.Path, 0);
                UpdateStatus("Creating Baseline LCMS Features.");
                var baselineFeatures = featureFinder.FindFeatures(msFeatures,
                                                                  featureFindingOptions,
                                                                  rawProviderX);
                LinkPeptidesToFeatures(baselineDataset.Sequence.Path, baselineFeatures, peptideOptions.Fdr,
                                       peptideOptions.IdScore);

                var providerX = new CachedFeatureSpectraProvider(rawProviderX, baselineFeatures);

                // Then load the alignee dataset
                foreach (var dataset in aligneeDatasets)
                {
                    var aligneeMsFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path);
                    aligneeMsFeatures = LcmsFeatureFilters.FilterMsFeatures(aligneeMsFeatures, msFilterOptions);
                    using (var rawProviderY = new InformedProteomicsReader())
                    {
                        rawProviderY.AddDataFile(dataset.RawFile.Path, 0);

                        UpdateStatus("Finding alignee features");
                        var aligneeFeatures = featureFinder.FindFeatures(aligneeMsFeatures,
                                                                         featureFindingOptions,
                                                                         rawProviderY);
                        LinkPeptidesToFeatures(dataset.Sequence.Path, aligneeFeatures, peptideOptions.Fdr,
                                               peptideOptions.IdScore);

                        var providerY = new CachedFeatureSpectraProvider(rawProviderY, aligneeFeatures);

                        // cluster before we do anything else....
                        var allFeatures = new List <UMCLight>();
                        allFeatures.AddRange(baselineFeatures);
                        allFeatures.AddRange(aligneeFeatures);
                        foreach (var feature in allFeatures)
                        {
                            feature.Net = feature.Net;
                            feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
                        }

                        // This tells us the differences before we align.
                        var clusters     = clusterer.Cluster(allFeatures);
                        var preAlignment = AnalyzeClusters(clusters);

                        aligner.AligneeSpectraProvider  = providerY;
                        aligner.BaselineSpectraProvider = providerX;


                        UpdateStatus("Aligning data");
                        // Aligner data
                        var data    = aligner.Align(baselineFeatures, aligneeFeatures);
                        var matches = data.Matches;


                        WriteErrors(errorPath, matches);

                        // create anchor points for LCMSWarp alignment
                        var massPoints = new List <RegressionPoint>();
                        var netPoints  = new List <RegressionPoint>();
                        foreach (var match in matches)
                        {
                            var massError = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz,
                                                                                  match.AnchorPointY.Mz);
                            var netError  = match.AnchorPointX.Net - match.AnchorPointY.Net;
                            var massPoint = new RegressionPoint(match.AnchorPointX.Mz, 0, massError, netError);
                            massPoints.Add(massPoint);

                            var netPoint = new RegressionPoint(match.AnchorPointX.Net, 0, massError, netError);
                            netPoints.Add(netPoint);
                        }


                        foreach (var feature in allFeatures)
                        {
                            feature.UmcCluster = null;
                            feature.ClusterId  = -1;
                        }
                        // Then cluster after alignment!
                        UpdateStatus("clustering data");
                        clusters = clusterer.Cluster(allFeatures);
                        var postAlignment = AnalyzeClusters(clusters);

                        UpdateStatus("Note\tSame\tDifferent");
                        UpdateStatus(string.Format("Pre\t{0}\t{1}", preAlignment.SameCluster,
                                                   preAlignment.DifferentCluster));
                        UpdateStatus(string.Format("Post\t{0}\t{1}", postAlignment.SameCluster,
                                                   postAlignment.DifferentCluster));

                        SaveMatches(matchPath, matches);
                    }
                }
            }

            DeRegisterProgressNotifier(aligner);
            DeRegisterProgressNotifier(featureFinder);
            DeRegisterProgressNotifier(clusterer);
        }
示例#27
0
        /// <summary>
        /// Calculates pairwise distances between features in the list of
        /// potential features to cluster.
        /// </summary>
        /// <param name="start">Start UMC index.</param>
        /// <param name="stop">Stop UMC index.</param>
        /// <param name="data">List of data to compute distances over.</param>
        /// <returns>List of UMC distances to consider during clustering.</returns>
        protected List <Data.PairwiseDistance <U> > CalculateDistances(Dictionary <int, U> clusters)
        {
            var massTolerance  = Parameters.Tolerances.Mass;
            var netTolerance   = Parameters.Tolerances.Net;
            var driftTolerance = Parameters.Tolerances.DriftTime;
            var onlyClusterSameChargeStates = Parameters.OnlyClusterSameChargeStates;

            var distances = new List <Data.PairwiseDistance <U> >();

            foreach (var clusterI in clusters.Values)
            {
                var driftTimeX   = clusterI.DriftTime;
                var netAlignedX  = clusterI.Net;
                var massAlignedX = clusterI.MassMonoisotopicAligned;
                var chargeStateX = clusterI.ChargeState;

                foreach (var clusterJ in clusters.Values)
                {
                    // Don't calculate distance to other features within same group
                    if (clusterI == clusterJ)
                    {
                        continue;
                    }

                    // Calculate the distances here (using a cube).  We dont care if we are going to re-compute
                    // these again later, because here we want to fall within the cube, the distance function used
                    // later is more related to determining a scalar value instead.
                    var massDiff = Math.Abs(FeatureLight.ComputeMassPPMDifference(massAlignedX,
                                                                                  clusterJ.MassMonoisotopicAligned));
                    var netDiff   = Math.Abs(netAlignedX - clusterJ.Net);
                    var driftDiff = Math.Abs(driftTimeX - clusterJ.DriftTime);

                    // Make sure we fall within the distance range before computing...
                    if (massDiff <= massTolerance && netDiff <= netTolerance && driftDiff <= driftTolerance)
                    {
                        // If IMS or equivalent only cluster similar charge states
                        if (onlyClusterSameChargeStates)
                        {
                            // Make sure it's the same charge state
                            if (chargeStateX == clusterJ.ChargeState)
                            {
                                // Calculate the pairwise distance
                                var pairwiseDistance = new Data.PairwiseDistance <U>();
                                pairwiseDistance.FeatureX = clusterI;
                                pairwiseDistance.FeatureY = clusterJ;
                                pairwiseDistance.Distance = GetAverageClusterDistance(clusterI, clusterJ, Parameters.DistanceFunction);
                                distances.Add(pairwiseDistance);
                            }
                        }
                        else
                        {
                            // Calculate the pairwise distance
                            var pairwiseDistance = new Data.PairwiseDistance <U>();
                            pairwiseDistance.FeatureX = clusterI;
                            pairwiseDistance.FeatureY = clusterJ;
                            pairwiseDistance.Distance = GetAverageClusterDistance(clusterI, clusterJ, Parameters.DistanceFunction);
                            distances.Add(pairwiseDistance);
                        }
                    }
                }
            }
            return(distances);
        }
示例#28
0
        /// <summary>
        ///     Runs the MultiAlign analysis
        /// </summary>
        public void AlignDatasets(IEnumerable <UMCLight> baselineFeatures,
                                  IEnumerable <UMCLight> aligneeFeatures,
                                  ISpectraProvider providerX,
                                  ISpectraProvider providerY,
                                  IFeatureAligner <IEnumerable <UMCLight>,
                                                   IEnumerable <UMCLight>,
                                                   AlignmentData> aligner,
                                  IClusterer <UMCLight, UMCClusterLight> clusterer,
                                  string matchPath,
                                  string errorPath)
        {
            // cluster before we do anything else....
            var allFeatures = new List <UMCLight>();

            allFeatures.AddRange(baselineFeatures);
            allFeatures.AddRange(aligneeFeatures);


            var maxBaseline = baselineFeatures.Max(x => x.Scan);
            var minBaseline = baselineFeatures.Min(x => x.Scan);

            var maxAlignee = aligneeFeatures.Max(x => x.Scan);
            var minAlignee = aligneeFeatures.Min(x => x.Scan);

            foreach (var feature in aligneeFeatures)
            {
                feature.Net = Convert.ToDouble(feature.Scan - minAlignee) / Convert.ToDouble(maxAlignee - minAlignee);
                feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
            }

            foreach (var feature in baselineFeatures)
            {
                feature.Net = Convert.ToDouble(feature.Scan - minBaseline) / Convert.ToDouble(maxBaseline - minBaseline);
                feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
            }

            // This tells us the differences before we align.
            var clusters  = clusterer.Cluster(allFeatures);
            var clusterId = 0;

            foreach (var cluster in clusters)
            {
                cluster.Id = clusterId++;
            }
            var scorer       = new GlobalPeptideClusterScorer();
            var preAlignment = scorer.Score(clusters);

            aligner.AligneeSpectraProvider  = providerY;
            aligner.BaselineSpectraProvider = providerX;

            UpdateStatus("Aligning data");
            // Aligner data
            var data    = aligner.Align(baselineFeatures, aligneeFeatures);
            var matches = data.Matches;

            // create anchor points for LCMSWarp alignment
            var massPoints = new List <RegressionPoint>();
            var netPoints  = new List <RegressionPoint>();

            foreach (var match in matches)
            {
                var massError = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz,
                                                                      match.AnchorPointY.Mz);
                var netError  = match.AnchorPointX.Net - match.AnchorPointY.Net;
                var massPoint = new RegressionPoint(match.AnchorPointX.Mz, 0, massError, netError);
                massPoints.Add(massPoint);

                var netPoint = new RegressionPoint(match.AnchorPointX.Net, 0, massError, netError);
                netPoints.Add(netPoint);
            }

            foreach (var feature in allFeatures)
            {
                feature.UmcCluster = null;
                feature.ClusterId  = -1;
            }
            // Then cluster after alignment!
            UpdateStatus("clustering data");
            clusters = clusterer.Cluster(allFeatures);
            var postAlignment = scorer.Score(clusters);

            UpdateStatus("Note\tSame\tDifferent");
            UpdateStatus(string.Format("Pre\t{0}\t{1}",
                                       preAlignment.SameCluster,
                                       preAlignment.DifferentCluster));
            UpdateStatus(string.Format("Post\t{0}\t{1}",
                                       postAlignment.SameCluster,
                                       postAlignment.DifferentCluster));

            matches = FilterMatches(matches, 40);

            SaveMatches(matchPath, matches);
            DeRegisterProgressNotifier(aligner);
            DeRegisterProgressNotifier(clusterer);
        }
示例#29
0
        /// <summary>
        /// Finds LCMS Features from MS Features.
        /// </summary>
        /// <param name="rawMsFeatures"></param>
        /// <returns></returns>
        public List <TParentFeature> Cluster(List <TChildFeature> rawMsFeatures)
        {
            Comparison <TChildFeature>  mzSort   = (x, y) => x.Mz.CompareTo(y.Mz);
            Comparison <TParentFeature> monoSort = (x, y) => x.MassMonoisotopic.CompareTo(y.MassMonoisotopic);
            Func <TChildFeature, TChildFeature, double>   mzDiff   = (x, y) => FeatureLight.ComputeMassPPMDifference(x.Mz, y.Mz);
            Func <TParentFeature, TParentFeature, double> monoDiff = (x, y) => FeatureLight.ComputeMassPPMDifference(x.MassMonoisotopic, y.MassMonoisotopic);

            var minScan = Convert.ToDouble(rawMsFeatures.Min(x => x.Scan));
            var maxScan = Convert.ToDouble(rawMsFeatures.Max(x => x.Scan));

            foreach (var msFeature in rawMsFeatures)
            {
                msFeature.Net = (Convert.ToDouble(msFeature.Scan) - minScan) / (maxScan - minScan);
            }

            OnProgress("Filtering ambiguous features");
            //rawMsFeatures = FilterMsFeatures(rawMsFeatures);

            OnProgress("Clustering child features into potential UMC candidates");
            // First cluster based on m/z finding the XIC's
            var features = Cluster <TChildFeature, TParentFeature>(rawMsFeatures,
                                                                   mzSort,
                                                                   mzDiff,
                                                                   CompareMz,
                                                                   Tolerances.Mass);

            var n = features.Count();

            OnProgress(string.Format("Found {0} unique  child features from {1} total features",
                                     n,
                                     rawMsFeatures.Count()));

            OnProgress("Filtering Features");

            // Then we group into UMC's for clustering across charge states...
            if (features == null)
            {
                throw new InvalidDataException("No features were found from the input MS Feature list.");
            }

            OnProgress("Filtering poor features with no data.  Calculating statistics for the good ones.");
            features = features.Where(x => x.MsFeatures.Count > 0).ToList();
            foreach (var feature in features)
            {
                feature.CalculateStatistics(ClusterCentroidRepresentation.Median);
                feature.MassMonoisotopic = (feature.Mz * feature.ChargeState) - (SubAtomicParticleLibrary.MASS_PROTON * feature.ChargeState);
            }

            // Here we should merge the XIC data...trying to find the best possible feature
            // Note that at this point we dont have UMC's.  We only have features
            // that are separated by mass , scan , and charge
            // so this method should interrogate each one of these....
            if (SpectraProvider != null)
            {
                OnProgress(string.Format("Building XIC's from child features"));
                var generator = new XicCreator();
                generator.Progress += generator_Progress;
                features            = generator.CreateXic(features as List <UMCLight>, Tolerances.Mass, SpectraProvider) as IEnumerable <TParentFeature>;
                generator.Progress -= generator_Progress;
            }

            OnProgress(string.Format("Calculating statistics for each feature"));
            foreach (var feature in features)
            {
                feature.CalculateStatistics(ClusterCentroidRepresentation.Median);
                feature.Net = Convert.ToDouble(feature.Scan - minScan) / Convert.ToDouble(maxScan - minScan);
            }

            OnProgress(string.Format("Combining child feature charge states"));
            features = Cluster <TParentFeature, TParentFeature>(features,
                                                                monoSort,
                                                                monoDiff,
                                                                CompareMonoisotopic,
                                                                Tolerances.Mass);

            var id = 0;

            OnProgress(string.Format("Assigning unique feature id's to each feature."));
            var featureList = features.ToList();

            foreach (var feature in featureList)
            {
                feature.Id = id++;
            }
            return(featureList);
        }