Exemple #1
0
        /// <summary>
        /// Computes errors for mass and retention time given a set of linked and matched features.
        /// </summary>
        public Tuple <AlignmentMeasurement <double>, AlignmentMeasurement <double> > MeasureErrors(IEnumerable <SpectralAnchorPointMatch> matches)
        {
            var netError  = new AlignmentMeasurement <double>();
            var massError = new AlignmentMeasurement <double>();

            var errors = new Tuple <AlignmentMeasurement <double>, AlignmentMeasurement <double> >
                             (netError, massError);


            foreach (var match in matches)
            {
                var x        = match.AnchorPointX;
                var y        = match.AnchorPointY;
                var featureX = x.Spectrum.ParentFeature;
                var featureY = y.Spectrum.ParentFeature;

                if (featureX == null || featureY == null)
                {
                    continue;
                }

                var umcX = featureX.ParentFeature;
                var umcY = featureY.ParentFeature;

                netError.PreAlignment.Add(umcX.Net - umcY.Net);
                netError.PostAlignment.Add(umcX.Net - umcY.NetAligned);

                massError.PreAlignment.Add(FeatureLight.ComputeMassPPMDifference(umcX.MassMonoisotopic, umcY.MassMonoisotopic));
                massError.PostAlignment.Add(FeatureLight.ComputeMassPPMDifference(umcX.MassMonoisotopic, umcY.MassMonoisotopicAligned));
            }

            return(errors);
        }
Exemple #2
0
        private IEnumerable <XicFeature> CreateXicTargetsYield(IEnumerable <UMCLight> features, double massError)
        {
            int id = 0;

            foreach (var feature in features)
            {
                int minScan = Int32.MaxValue;
                int maxScan = 0;
                foreach (var msFeature in feature.MsFeatures)
                {
                    minScan = Math.Min(minScan, msFeature.Scan);
                    maxScan = Math.Max(maxScan, msFeature.Scan);
                }

                yield return(new XicFeature
                {
                    HighMz = FeatureLight.ComputeDaDifferenceFromPPM(feature.Mz, -massError),
                    LowMz = FeatureLight.ComputeDaDifferenceFromPPM(feature.Mz, massError),
                    Mz = feature.Mz,
                    Feature = feature,
                    Id = id++,
                    EndScan = minScan + ScanWindowSize,
                    StartScan = maxScan - ScanWindowSize,
                    ChargeState = feature.ChargeState
                });
            }
        }
Exemple #3
0
 private IEnumerable <SpectralAnchorPointMatch> FilterMatches(IEnumerable <SpectralAnchorPointMatch> matches, double ppm)
 {
     return
         (matches.Where(x =>
                        ppm > Math.Abs(FeatureLight.ComputeMassPPMDifference(x.AnchorPointX.Spectrum.ParentFeature.Mz,
                                                                             x.AnchorPointY.Spectrum.ParentFeature.Mz))));
 }
Exemple #4
0
        /// <summary>
        /// Performs Mass error regression based on NET of the match
        /// </summary>
        /// <param name="matches"></param>
        /// <returns></returns>
        public LcmsWarpMassAlignmentFunction CalculateCalibration(List <LcmsWarpFeatureMatch> matches)
        {
            var netMassRecalibration = new LcmsWarpCombinedRegression();

            netMassRecalibration.SetCentralRegressionOptions(
                this.options.MassCalibNumXSlices,
                this.options.MassCalibNumYSlices,
                this.options.MassCalibMaxJump,
                this.options.MassCalibMaxZScore,
                this.options.RegressionType);
            var calibrations = new List <RegressionPoint>();

            foreach (var match in matches)
            {
                var feature         = match.AligneeFeature;
                var baselineFeature = match.BaselineFeature;
                var ppm             = FeatureLight.ComputeMassPPMDifference(feature.MassMonoisotopic, baselineFeature.MassMonoisotopic);
                var netDiff         = baselineFeature.Net - feature.NetAligned;

                calibrations.Add(new RegressionPoint(feature.Net, 0, netDiff, ppm));
            }

            netMassRecalibration.CalculateRegressionFunction(calibrations, "ScanMassError");

            return(new LcmsWarpMassAlignmentFunction
            {
                Calibrations = new List <LcmsWarpCombinedRegression> {
                    netMassRecalibration
                }
            });
        }
Exemple #5
0
        public void MassMassCalculations(double massX, double massY)
        {
            var ppm        = FeatureLight.ComputeMassPPMDifference(massX, massY);
            var massYdelta = FeatureLight.ComputeDaDifferenceFromPPM(massX, ppm);

            Assert.AreEqual(massY, massYdelta);
        }
Exemple #6
0
        public void MassPPMCalculations(double massX, double ppm, double epsilon)
        {
            var massYdelta = FeatureLight.ComputeDaDifferenceFromPPM(massX, ppm);
            var ppmDelta   = FeatureLight.ComputeMassPPMDifference(massX, massYdelta);

            //Assert.IsTrue( (ppm - ppmDelta) < epsilon);
            Assert.Less(ppm - ppmDelta, epsilon);
        }
Exemple #7
0
        /// <summary>
        /// Creates XIC Targets from a list of UMC Features
        /// </summary>
        /// <param name="features"></param>
        /// <param name="massError"></param>
        /// <returns></returns>
        private List <XicFeature> CreateXicTargets(IEnumerable <UMCLight> features, double massError)
        {
            var allFeatures = new List <XicFeature>();

            // Create XIC Features
            var id = 0;

            // Then for each feature turn it into a new feature
            foreach (var feature in features)
            {
                // Build XIC features from each
                var x = feature.CreateChargeMap();
                foreach (var charge in x.Keys)
                {
                    double maxIntensity = 0;
                    double mz           = 0;
                    var    min          = double.MaxValue;
                    var    max          = double.MinValue;

                    var scanStart = int.MaxValue;
                    var scanEnd   = 0;

                    foreach (var chargeFeature in x[charge])
                    {
                        min       = Math.Min(min, chargeFeature.Mz);
                        max       = Math.Max(max, chargeFeature.Mz);
                        scanStart = Math.Min(scanStart, chargeFeature.Scan);
                        scanEnd   = Math.Min(scanStart, chargeFeature.Scan);

                        if (chargeFeature.Abundance > maxIntensity)
                        {
                            maxIntensity = chargeFeature.Abundance;
                            mz           = chargeFeature.Mz;
                        }
                    }

                    // Clear the ms feature list...because later we will populate it
                    feature.MsFeatures.Clear();

                    var xicFeature = new XicFeature
                    {
                        HighMz      = FeatureLight.ComputeDaDifferenceFromPPM(mz, -massError),
                        LowMz       = FeatureLight.ComputeDaDifferenceFromPPM(mz, massError),
                        Mz          = mz,
                        Feature     = feature,
                        Id          = id++,
                        EndScan     = scanEnd + ScanWindowSize,
                        StartScan   = scanStart - ScanWindowSize,
                        ChargeState = charge
                    };

                    allFeatures.Add(xicFeature);
                }
            }

            return(allFeatures);
        }
Exemple #8
0
        /// <summary>
        /// Computes the mass difference between two features.
        /// </summary>
        /// <param name="x"></param>
        /// <param name="y"></param>
        /// <returns></returns>
        private bool WithinRange(T x, T y)
        {
            // later is more related to determining a scalar value instead.
            var massDiff  = Math.Abs(FeatureLight.ComputeMassPPMDifference(x.MassMonoisotopicAligned, y.MassMonoisotopicAligned));
            var netDiff   = Math.Abs(x.Net - y.Net);
            var driftDiff = Math.Abs(x.DriftTime - y.DriftTime);

            // Make sure we fall within the distance range before computing...
            return(massDiff <= Tolerances.Mass && netDiff <= Tolerances.Net && driftDiff <= Tolerances.DriftTime);
        }
Exemple #9
0
        public double EuclideanDistance(T x, FeatureLight y)
        {
            var massDifference  = x.MassMonoisotopicAligned - y.MassMonoisotopicAligned;
            var netDifference   = x.Net - y.Net;
            var driftDifference = x.DriftTime - y.DriftTime;
            var sum             = MassWeight * (massDifference * massDifference) +
                                  NetWeight * (netDifference * netDifference) +
                                  DriftWeight * (driftDifference * driftDifference);

            return(Math.Sqrt(sum));
        }
Exemple #10
0
        /// <summary>
        /// Calculates the weighted Euclidean distance based on drift time, aligned mass, and aligned NET.
        /// </summary>
        /// <param name="x">Feature x.</param>
        /// <param name="y">Feature y.</param>
        /// <param name="massWeight"></param>
        /// <param name="netWeight"></param>
        /// <param name="driftWeight"></param>
        /// <returns>Distance calculated as </returns>
        public double EuclideanDistance(T x, T y, double massWeight, double netWeight, double driftWeight)
        {
            var massDifference  = FeatureLight.ComputeMassPPMDifference(x.MassMonoisotopicAligned, y.MassMonoisotopicAligned);
            var netDifference   = x.Net - y.Net;
            var driftDifference = x.DriftTime - y.DriftTime;
            var sum             = (massDifference * massDifference) * massWeight +
                                  (netDifference * netDifference) * netDifference +
                                  (driftDifference * driftDifference) * driftWeight;

            return(Math.Sqrt(sum));
        }
Exemple #11
0
        public void TestDistanceChangeEuclidean()
        {
            var cluster = new UMCClusterLight();

            cluster.MassMonoisotopic = 500;
            cluster.Net       = .5;
            cluster.Net       = .5;
            cluster.DriftTime = 20;


            var euclid = new EuclideanDistanceMetric <UMCClusterLight>();
            DistanceFunction <UMCClusterLight> func = euclid.EuclideanDistance;

            var    deltaNet       = .01;
            double deltaMassPPM   = 1;
            double deltaDriftTime = 1;

            Console.WriteLine("Mass Diff, Mass Dist, Net, Net Dist, Drift, Drift Dist");

            for (var i = 0; i < 50; i++)
            {
                var clusterD = new UMCClusterLight();
                var clusterN = new UMCClusterLight();
                var clusterM = new UMCClusterLight();

                clusterM.DriftTime        = cluster.DriftTime + deltaDriftTime;
                clusterM.Net              = cluster.Net + deltaNet;
                clusterM.Net              = cluster.Net + deltaNet;
                clusterM.MassMonoisotopic = FeatureLight.ComputeDaDifferenceFromPPM(cluster.MassMonoisotopic,
                                                                                    deltaMassPPM * i);


                clusterN.DriftTime        = cluster.DriftTime + deltaDriftTime;
                clusterN.Net              = cluster.Net + (deltaNet * i);
                clusterN.Net              = cluster.Net + (deltaNet * i);
                clusterN.MassMonoisotopic = FeatureLight.ComputeDaDifferenceFromPPM(cluster.MassMonoisotopic,
                                                                                    deltaMassPPM);


                clusterD.DriftTime        = cluster.DriftTime + (deltaDriftTime * i);
                clusterD.Net              = cluster.Net + deltaNet;
                clusterD.Net              = cluster.Net + deltaNet;
                clusterD.MassMonoisotopic = FeatureLight.ComputeDaDifferenceFromPPM(cluster.MassMonoisotopic,
                                                                                    deltaMassPPM);

                var distM = func(cluster, clusterM);
                var distN = func(cluster, clusterN);
                var distD = func(cluster, clusterD);

                var output = string.Format("{0},{1},{2},{3},{4},{5}", deltaMassPPM * i, distM, deltaNet * i, distN,
                                           deltaDriftTime * i, distD);
                Console.WriteLine(output);
            }
        }
Exemple #12
0
        private void SaveMatches(string path, IEnumerable <SpectralAnchorPointMatch> matches)
        {
            using (var writer = File.CreateText(path))
            {
                writer.WriteLine(
                    "NET-apx\tNET-apy\tNETAligned-apy\tmz-apx\tmzAligned-apx\tmz-apy\tmzAligned-apy\tScan-x\tScan-y\tpmz-x\tpmz-y\tpmonomass-x\tpmonomass-y\tpNET-x\tpNET-y\tpNETa-x\tpNETa-y\tpmonomass-x\tpmonomassyx\tpmonomass-errorppm\tpmz-errorppm");
                foreach (var match in matches)
                {
                    if (match.AnchorPointX.Spectrum == null)
                    {
                        continue;
                    }

                    if (match.AnchorPointY.Spectrum == null)
                    {
                        continue;
                    }


                    var parentFeatureX = match.AnchorPointX.Spectrum.ParentFeature;
                    var parentFeatureY = match.AnchorPointY.Spectrum.ParentFeature;


                    var data =
                        string.Format(
                            "{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}\t{11}\t{12}\t{13}\t{14}\t{15}\t{16}\t{17}\t{18}\t{19}\t{20}\t",
                            match.AnchorPointX.Net,
                            match.AnchorPointY.Net,
                            match.AnchorPointY.NetAligned,
                            match.AnchorPointX.Mz,
                            match.AnchorPointX.MzAligned,
                            match.AnchorPointY.Mz,
                            match.AnchorPointY.MzAligned,
                            parentFeatureX.Scan,
                            parentFeatureY.Scan,
                            parentFeatureX.Mz,
                            parentFeatureY.Mz,
                            parentFeatureX.MassMonoisotopic,
                            parentFeatureY.MassMonoisotopic,
                            parentFeatureX.GetParentFeature().Net,
                            parentFeatureY.GetParentFeature().Net,
                            parentFeatureX.GetParentFeature().NetAligned,
                            parentFeatureY.GetParentFeature().NetAligned,
                            parentFeatureX.GetParentFeature().MassMonoisotopicAligned,
                            parentFeatureY.GetParentFeature().MassMonoisotopicAligned,
                            FeatureLight.ComputeMassPPMDifference(parentFeatureX.Mz, parentFeatureY.Mz),
                            FeatureLight.ComputeMassPPMDifference(parentFeatureX.GetParentFeature().MassMonoisotopicAligned,
                                                                  parentFeatureY.GetParentFeature().MassMonoisotopicAligned)
                            );

                    writer.WriteLine(data);
                }
            }
        }
Exemple #13
0
        public void TestDistances()
        {
            var dist = new WeightedEuclideanDistance <UMCClusterLight>();


            var clusterA = CreateCluster(500, .2, 27);
            var clusterB = CreateCluster(500, .2, 27);

            var N         = 50;
            var stepMass  = .5;
            var stepNET   = .001;
            var stepDrift = .01;


            Console.WriteLine("Walk in drift time");
            for (var i = 0; i < N; i++)
            {
                clusterB.DriftTime += stepDrift;
                var distance = dist.EuclideanDistance(clusterA, clusterB);
                Console.WriteLine("{0}, {1}, {3}, {2}", clusterB.DriftTime, clusterB.DriftTime, distance, clusterB.DriftTime - clusterA.DriftTime);
            }

            Console.WriteLine();
            Console.WriteLine("Walk in net ");
            clusterB.DriftTime = 27;

            for (var i = 0; i < N; i++)
            {
                clusterB.Net += stepNET;
                var distance = dist.EuclideanDistance(clusterA, clusterB);
                Console.WriteLine("{0}, {1}, {3}, {2}", clusterB.Net, clusterB.Net, distance, clusterB.Net - clusterA.Net);
            }


            Console.WriteLine();
            Console.WriteLine("Walk in mass ");
            clusterB.Net = .2;
            for (var i = 0; i < N; i++)
            {
                var d = FeatureLight.ComputeDaDifferenceFromPPM(clusterA.MassMonoisotopic, stepMass * i);
                clusterB.MassMonoisotopic = d;
                var distance = dist.EuclideanDistance(clusterA, clusterB);
                Console.WriteLine("{0}, {1}, {3}, {2}", clusterB.MassMonoisotopic,
                                  clusterB.MassMonoisotopic,
                                  distance,
                                  FeatureLight.ComputeMassPPMDifference(clusterA.MassMonoisotopic, clusterB.MassMonoisotopic));
            }
        }
Exemple #14
0
        /// <summary>
        /// Get the MS/MS identifications for the given feature.
        /// </summary>
        /// <param name="feature">The feature to get MS/MS identifications for.</param>
        /// <returns>The list of identifications.</returns>
        private List <Peptide> GetIdentifications(FeatureLight feature)
        {
            var peptides = new List <Peptide>();
            var provider = this.identificationProviderCache.GetProvider(feature.GroupId);
            var ids      = provider.GetAllIdentifications();

            foreach (var msnSpectrum in feature.MSnSpectra)
            {
                if (ids.ContainsKey(msnSpectrum.Scan))
                {
                    peptides.AddRange(ids[msnSpectrum.Scan]);
                }
            }

            return(peptides);
        }
Exemple #15
0
        /// <summary>
        /// Creates an XIC from the m/z values provided.
        /// </summary>
        /// <param name="mz"></param>
        /// <param name="massError"></param>
        /// <param name="minScan"></param>
        /// <param name="maxScan"></param>
        /// <param name="provider"></param>
        /// <returns></returns>
        public IEnumerable <MSFeatureLight> CreateXic(double mz,
                                                      double massError,
                                                      int minScan,
                                                      int maxScan,
                                                      ISpectraProvider provider)
        {
            var newFeatures = new List <MSFeatureLight>();
            var lower       = FeatureLight.ComputeDaDifferenceFromPPM(mz, massError);
            var higher      = FeatureLight.ComputeDaDifferenceFromPPM(mz, -massError);



            for (var i = minScan; i < maxScan; i++)
            {
                List <XYData> spectrum = null;

                try
                {
                    var summary = new ScanSummary();
                    spectrum = provider.GetRawSpectra(i, 0, 1, out summary);
                }
                catch
                {
                }

                if (spectrum == null)
                {
                    continue;
                }

                var data = (from x in spectrum
                            where x.X > lower && x.X < higher
                            select x).ToList();

                var summedIntensity = data.Sum(x => x.Y);


                var newFeature = new MSFeatureLight
                {
                    Scan      = i,
                    Net       = i,
                    Abundance = Convert.ToInt64(summedIntensity)
                };
                newFeatures.Add(newFeature);
            }
            return(newFeatures);
        }
Exemple #16
0
        private List <TChildFeature> FilterMsFeatures(List <TChildFeature> rawMsFeatures)
        {
            // sort by scan...
            var allFeatures = rawMsFeatures.OrderBy(x => x.Scan).ToList();


            var newFeatures   = new List <TChildFeature>();
            var features      = new List <TChildFeature>();
            var totalFeatures = rawMsFeatures.Count;
            var currentScan   = 0;

            for (var i = 0; i < totalFeatures; i++)
            {
                var feature = allFeatures[i];
                // Process the scans...
                if (currentScan != feature.Scan)
                {
                    var mzFeatures = features.OrderBy(x => x.Mz).ToList();
                    var mzMap      = new Dictionary <double, List <TChildFeature> >();
                    for (var j = 1; j < mzFeatures.Count; j++)
                    {
                        var featureJ    = mzFeatures[j];
                        var featurePrev = mzFeatures[j - 1];

                        // find the mass difference, here we are looking to see if there are unique
                        // m/z features or not, if not, then we need to process them.
                        var ppm = FeatureLight.ComputeMassPPMDifference(featureJ.Mz, featureJ.Mz);
                        if (Math.Abs(ppm) > 1)
                        {
                            if (!mzMap.ContainsKey(featureJ.Mz))
                            {
                                mzMap.Add(featureJ.Mz, new List <TChildFeature>());
                            }
                            mzMap[featureJ.Mz].Add(featureJ);
                            mzMap[featureJ.Mz].Add(featurePrev);
                        }
                    }
                    features.Clear();
                }
                else
                {
                    features.Add(feature);
                }
            }

            return(newFeatures);
        }
Exemple #17
0
        /// <summary>
        /// Score two features against each other by comparing their identifications.
        /// Each matching identification: +1
        /// Each non-matching identification: -1
        /// </summary>
        /// <param name="feature1">The first feature.</param>
        /// <param name="feature2">The second feature.</param>
        /// <returns>The score of the two features.</returns>
        public double ScoreComparison(FeatureLight feature1, FeatureLight feature2)
        {
            var leftProteins  = this.GetIdentifications(feature1);
            var rightProteins = this.GetIdentifications(feature2);

            var intersect = leftProteins.Intersect(rightProteins).ToList();
            var leftOnly  = leftProteins.Except(intersect);
            var rightOnly = rightProteins.Except(intersect);

            double score = 0.0;

            score += intersect.Count;
            score -= leftOnly.Count();
            score -= rightOnly.Count();

            return(score);
        }
Exemple #18
0
        /// <summary>
        /// Compares a feature to the list of feature
        /// </summary>
        public int CompareMonoisotopic(FeatureLight featureX, FeatureLight featureY)
        {
            // If they are in mass range...
            var mzDiff = FeatureLight.ComputeMassPPMDifference(featureX.MassMonoisotopic, featureY.MassMonoisotopic);

            if (Math.Abs(mzDiff) < Tolerances.Mass && featureX.ChargeState == featureY.ChargeState)
            {
                // otherwise make sure that our scan value is within range
                var scanDiff = featureX.Net - featureY.Net;
                return(Math.Abs(scanDiff) <= Tolerances.Net ? 0 : 1);
            }
            if (mzDiff < 0)
            {
                return(-1);
            }
            return(1);
        }
Exemple #19
0
        public static PlotBase CreateMassMzResidualAlignmentPlot <T>(IEnumerable <T> x, IEnumerable <T> y)
            where T : FeatureLight
        {
            Func <T, double>    mz      = t => t.Mz;
            Func <T, T, double> massPre =
                (t, u) => FeatureLight.ComputeMassPPMDifference(t.MassMonoisotopic, u.MassMonoisotopic);
            Func <T, T, double> massPost =
                (t, u) => FeatureLight.ComputeMassPPMDifference(t.MassMonoisotopic, u.MassMonoisotopicAligned);

            return(CreateResidualAlignmentPlot(x,
                                               y,
                                               mz,
                                               massPre,
                                               massPost,
                                               "mz",
                                               "Mass Residual (ppm)"));
        }
Exemple #20
0
        public MsToLcmsFeatures(IScanSummaryProvider provider, LcmsFeatureFindingOptions options = null)
        {
            if (provider == null)
            {
                throw new ArgumentNullException();
            }

            Comparison <MSFeatureLight> mzSort   = (x, y) => x.Mz.CompareTo(y.Mz);
            Comparison <UMCLight>       monoSort = (x, y) => x.MassMonoisotopic.CompareTo(y.MassMonoisotopic);
            Func <MSFeatureLight, MSFeatureLight, double> mzDiff   = (x, y) => FeatureLight.ComputeMassPPMDifference(x.Mz, y.Mz);
            Func <UMCLight, UMCLight, double>             monoDiff = (x, y) => FeatureLight.ComputeMassPPMDifference(x.MassMonoisotopic, y.MassMonoisotopic);

            this.provider = provider;
            this.options  = options ?? new LcmsFeatureFindingOptions();

            // Set clusterers
            if (this.options.FirstPassClusterer == MsFeatureClusteringAlgorithmType.BinarySearchTree)
            {
                this.firstPassClusterer = new MsFeatureTreeClusterer <MSFeatureLight, UMCLight>(
                    mzSort,
                    mzDiff,
                    MassComparison.Mz,
                    this.options.InstrumentTolerances.Mass);
            }
            else
            {
                this.firstPassClusterer = ClusterFactory.Create(this.options.FirstPassClusterer);
            }

            if (this.options.SecondPassClusterer == GenericClusteringAlgorithmType.BinarySearchTree)
            {
                this.secondPassClusterer = new MsFeatureTreeClusterer <UMCLight, UMCLight>(
                    monoSort,
                    monoDiff,
                    MassComparison.Monoisotopic,
                    this.options.InstrumentTolerances.Mass);
            }
            else
            {
                var clusterFactory = new GenericClusterFactory <UMCLight, UMCLight>();
                this.secondPassClusterer = clusterFactory.Create(this.options.SecondPassClusterer);
            }
        }
Exemple #21
0
        public static PlotBase CreateMassScanResidualAlignmentPlot <T>(IEnumerable <T> x, IEnumerable <T> y)
            where T : FeatureLight
        {
            Func <T, double>    scan    = t => t.Scan;
            Func <T, T, double> massPre =
                (t, u) => FeatureLight.ComputeMassPPMDifference(t.MassMonoisotopic, u.MassMonoisotopic);
            Func <T, T, double> massPost =
                (t, u) => FeatureLight.ComputeMassPPMDifference(t.MassMonoisotopic, u.MassMonoisotopicAligned);

            var plot = CreateResidualAlignmentPlot(x,
                                                   y,
                                                   scan,
                                                   massPre,
                                                   massPost,
                                                   "scan",
                                                   "Mass Residual (ppm)");

            return(plot);
        }
Exemple #22
0
        /// <summary>
        /// Determines if two clusters are within mass, NET, and drift time tolerances
        /// </summary>
        /// <param name="clusterX">One of the two clusters to test</param>
        /// <param name="clusterY">One of the two clusters to test</param>
        /// <returns>True if clusters are within tolerance, false otherwise</returns>
        protected override bool AreClustersWithinTolerance(U clusterX, U clusterY)
        {
            // Grab the tolerances
            var massTolerance  = Parameters.Tolerances.Mass;
            var netTolerance   = Parameters.Tolerances.Net;
            var driftTolerance = Parameters.Tolerances.DriftTime;

            // Calculate differences
            var massDiff  = Math.Abs(FeatureLight.ComputeMassPPMDifference(clusterX.MassMonoisotopicAligned, clusterY.MassMonoisotopicAligned));
            var netDiff   = Math.Abs(clusterX.Net - clusterY.Net);
            var driftDiff = Math.Abs(clusterX.DriftTime - clusterY.DriftTime);

            // Return true only if all differences are within tolerance
            if (massDiff <= massTolerance && netDiff <= netTolerance && driftDiff <= driftTolerance)
            {
                return(true);
            }
            return(false);
        }
Exemple #23
0
        protected virtual bool AreClustersWithinTolerance(UMCLight clusterX,
                                                          UMCLight clusterY,
                                                          double massTolerance,
                                                          double netTolerance,
                                                          double driftTolerance)
        {
            // Calculate differences
            var massDiff =
                Math.Abs(FeatureLight.ComputeMassPPMDifference(clusterX.MassMonoisotopicAligned,
                                                               clusterY.MassMonoisotopicAligned));
            var netDiff   = Math.Abs(clusterX.Net - clusterY.Net);
            var driftDiff = Math.Abs(clusterX.DriftTime - clusterY.DriftTime);

            // Return true only if all differences are within tolerance
            if (massDiff <= massTolerance && netDiff <= netTolerance && driftDiff <= driftTolerance)
            {
                return(true);
            }
            return(false);
        }
Exemple #24
0
        private void SaveMatches(string path, IEnumerable <SpectralAnchorPointMatch> matches)
        {
            using (var writer = File.CreateText(path))
            {
                writer.WriteLine("[Header]");
                writer.WriteLine("p mz = parentMz - A and B denote dataset A and dataset B");
                writer.WriteLine("[Data]");
                writer.WriteLine("Net-A\tpMz-A\tScan-A\tNet-B\tpMz-B\tScan-B\tMassErrorPpm\tSimScore");
                foreach (var match in matches)
                {
                    if (match.AnchorPointX.Spectrum == null)
                    {
                        continue;
                    }

                    if (match.AnchorPointY.Spectrum == null)
                    {
                        continue;
                    }

                    var parentFeatureX = match.AnchorPointX.Spectrum.ParentFeature;
                    var parentFeatureY = match.AnchorPointY.Spectrum.ParentFeature;

                    var data =
                        string.Format("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}",

                                      parentFeatureX.GetParentFeature().Net,
                                      parentFeatureX.GetParentFeature().Mz,
                                      parentFeatureX.GetParentFeature().Scan,

                                      parentFeatureY.GetParentFeature().Net,
                                      parentFeatureY.GetParentFeature().Mz,
                                      parentFeatureY.GetParentFeature().Scan,

                                      FeatureLight.ComputeMassPPMDifference(parentFeatureX.Mz, parentFeatureY.Mz),
                                      match.SimilarityScore);

                    writer.WriteLine(data);
                }
            }
        }
Exemple #25
0
        public double ScoreComparison(FeatureLight feature1, FeatureLight feature2)
        {
            double score = 0.0;

            var leftSpectraProvider  = this.spectraProvider.GetScanSummaryProvider(feature1.GroupId) as ISpectraProvider;
            var rightSpectraProvider = this.spectraProvider.GetScanSummaryProvider(feature2.GroupId) as ISpectraProvider;

            if (leftSpectraProvider == null)
            {
                throw new DatasetInformation.MissingRawDataException("Do not have spectra data available for dataset.", feature1.GroupId);
            }

            if (rightSpectraProvider == null)
            {
                throw new DatasetInformation.MissingRawDataException("Do not have spectra data available for dataset.", feature2.GroupId);
            }

            var leftSpectra  = leftSpectraProvider.GetMSMSSpectra(feature1.Scan, feature1.Mz, true);
            var rightSpectra = rightSpectraProvider.GetMSMSSpectra(feature2.Scan, feature2.Mz, true);

            if ((leftSpectra.Count == 0 || rightSpectra.Count == 0) && leftSpectra.Count != rightSpectra.Count)
            {   // One has MS/MS but the other doesn't
                score = -1;
            }

            for (int i = 0; i < leftSpectra.Count; i++)
            {
                var leftSpectrum = leftSpectra[i];

                for (int j = 0; j < rightSpectra.Count; j++)
                {
                    var rightSpectrum = rightSpectra[i];
                    var specScore     = this.comparer.CompareSpectra(leftSpectrum, rightSpectrum);
                    score += this.IsScoreWithinTolerance(specScore) ? 1 : -1;
                }
            }

            return(score);
        }
Exemple #26
0
        /// <summary>
        /// Compares a feature to the list of feature
        /// </summary>
        public int CompareMz(FeatureLight featureX, FeatureLight featureY)
        {
            // If they are in mass range...
            var mzDiff = FeatureLight.ComputeMassPPMDifference(featureX.Mz, featureY.Mz);

            if (Math.Abs(mzDiff) < Tolerances.Mass)
            {
                // otherwise make sure that our scan value is within range
                var scanDiff = featureX.Scan - featureY.Scan;
                if (Math.Abs(scanDiff) > ScanTolerance)
                {
                    return(1);
                }

                return(featureX.ChargeState != featureY.ChargeState ? 1 : 0);
            }
            if (mzDiff < 0)
            {
                return(-1);
            }
            return(1);
        }
Exemple #27
0
        private static void WriteErrors(string errorPath, IEnumerable <SpectralAnchorPointMatch> matches)
        {
            using (var writer = File.CreateText(errorPath))
            {
                writer.WriteLine(
                    "NET\tMass\tNET\tMass\tNETA\tMassA\tNETA\tMassA\tNetError\tMassError\tScore");
                foreach (var match in matches)
                {
                    var massError = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz, match.AnchorPointY.Mz);
                    var netError  = match.AnchorPointX.Net - match.AnchorPointY.NetAligned;

                    writer.WriteLine("{0:F5}\t{1:F5}\t{2:F5}\t{3:F5}\t{4:F5}\t{5:F5}\t{6:F5}\t{7:F5}\t{8:F5}\t",
                                     match.AnchorPointX.Net,
                                     match.AnchorPointX.Mz,
                                     match.AnchorPointY.Net,
                                     match.AnchorPointY.Mz,
                                     match.AnchorPointY.NetAligned,
                                     match.AnchorPointY.MzAligned,
                                     netError,
                                     massError,
                                     match.SimilarityScore);
                }
            }
        }
Exemple #28
0
        /// <summary>
        /// Clusters a set of data
        /// </summary>
        /// <param name="data"></param>
        /// <param name="clusters"></param>
        /// <returns></returns>
        public virtual List <U> Cluster(List <T> data, List <U> clusters)
        {
            /*
             * This clustering algorithm first sorts the list of input UMC's by mass.  It then iterates
             * through this list partitioning the data into blocks of UMC's based on a mass tolerance.
             * When it finds gaps larger or equal to the mass (ppm) tolerance specified by the user,
             * it will process the data before the gap (a block) until the current index of the features in question.
             */

            // Make sure we have data to cluster first.
            if (data == null)
            {
                throw new NullReferenceException("The input feature data list was null.  Cannot process this data.");
            }

            // Make sure there is no null UMC data in the input list.
            var nullIndex = data.FindIndex(delegate(T x) { return(x == null); });

            if (nullIndex > 0)
            {
                throw new NullReferenceException("The feature at index " + nullIndex + " was null.  Cannot process this data.");
            }

            OnNotify("Sorting cluster mass list");

            // The first thing we do is to sort the features based on mass since we know that has the least variability in the data across runs.
            data.Sort(m_massComparer);

            // Now partition the data based on mass ranges and the parameter values.
            var massTolerance = Parameters.Tolerances.Mass;

            // This is the index of first feature of a given mass partition.
            var startUMCIndex = 0;
            var totalFeatures = data.Count;


            OnNotify("Detecting mass partitions");
            var tenPercent = Convert.ToInt32(totalFeatures * .1);
            var counter    = 0;
            var percent    = 0;

            for (var i = 0; i < totalFeatures - 1; i++)
            {
                if (counter > tenPercent)
                {
                    counter  = 0;
                    percent += 10;
                    OnNotify(string.Format("Clustering Completed...{0}%", percent));
                }
                counter++;

                // Here we compute the ppm mass difference between consecutive features (based on mass).
                // This will determine if we cluster a block of data or not.
                var umcX = data[i];
                var umcY = data[i + 1];
                var ppm  = Math.Abs(FeatureLight.ComputeMassPPMDifference(umcX.MassMonoisotopicAligned, umcY.MassMonoisotopicAligned));

                // If the difference is greater than the tolerance then we cluster
                //  - we dont check the sign of the ppm because the data should be sorted based on mass.
                if (ppm > massTolerance)
                {
                    // If start UMC Index is equal to one, then that means the feature at startUMCIndex
                    // could not find any other features near it within the mass tolerance specified.
                    if (startUMCIndex == i)
                    {
                        var cluster = new U();
                        cluster.AmbiguityScore = m_maxDistance;
                        umcX.SetParentFeature(cluster);
                        cluster.AddChildFeature(umcX);
                        clusters.Add(cluster);
                    }
                    else
                    {
                        // Otherwise we have more than one feature to to consider.
                        var distances     = CalculatePairWiseDistances(startUMCIndex, i, data);
                        var localClusters = CreateSingletonClusters(data, startUMCIndex, i);
                        var blockClusters = LinkFeatures(distances, localClusters);
                        CalculateAmbiguityScore(blockClusters);
                        clusters.AddRange(blockClusters);
                    }

                    startUMCIndex = i + 1;
                }
            }

            // Make sure that we cluster what is left over.
            if (startUMCIndex < totalFeatures)
            {
                OnNotify(string.Format("Clustering last partition...{0}%", percent));
                var distances     = CalculatePairWiseDistances(startUMCIndex, totalFeatures - 1, data);
                var localClusters = CreateSingletonClusters(data, startUMCIndex, totalFeatures - 1);
                var blockClusters = LinkFeatures(distances, localClusters);
                CalculateAmbiguityScore(blockClusters);
                if (localClusters.Count < 2)
                {
                    clusters.AddRange(localClusters.Values);
                }
                else
                {
                    clusters.AddRange(blockClusters);
                }
            }


            OnNotify("Generating cluster statistics");
            foreach (var cluster in clusters)
            {
                cluster.CalculateStatistics(Parameters.CentroidRepresentation);
            }

            return(clusters);
        }
Exemple #29
0
        public List<U> ProcessClusters(List<U> clusters)
        {
            var newClusters = new List<U>();

            //Sort the clusters
            // Look for merged clusters that need to be split...
            foreach (var cluster in clusters)
            {
                var medianNet = cluster.Net;
                var medianMass = cluster.MassMonoisotopic;
                var medianDrift = cluster.DriftTime;

                var massDistributions = new Dictionary<T, double>();
                var netDistributions = new Dictionary<T, double>();
                var driftDistributions = new Dictionary<T, double>();

                var massDistances = new List<double>();
                var netDistances = new List<double>();
                var driftDistances = new List<double>();

                // Build distributions
                foreach (var feature in cluster.Features)
                {
                    var mass = FeatureLight.ComputeMassPPMDifference(feature.MassMonoisotopicAligned, medianMass);
                    var net = feature.Net - medianNet;
                    var drift = feature.DriftTime - medianDrift;

                    massDistributions.Add(feature, mass);
                    netDistributions.Add(feature, drift);
                    driftDistributions.Add(feature, net);

                    massDistances.Add(mass);
                    driftDistances.Add(drift);
                    netDistances.Add(net);
                }

                massDistances.Sort();
                netDistances.Sort();
                driftDistances.Sort();

                // Calculates the sample means for positive and negative sides of the median.
                var massDistribution    = CalculateAllDistributions(massDistances);
                var netDistribution     = CalculateAllDistributions(netDistances);
                var driftDistribution   = CalculateAllDistributions(driftDistances);

                var massZScore   = CalculateZScore(massDistribution.Item1, massDistribution.Item2);
                var netZScore    = CalculateZScore(netDistribution.Item1, netDistribution.Item2);
                var driftZScore  = CalculateZScore(driftDistribution.Item1, driftDistribution.Item2);

                // Now that we have data we can test the distributions to see if they are similar or not...
                Console.WriteLine("   Neg to Pos ");
                Console.WriteLine("Mass z-score \t{0}",  massZScore);
                Console.WriteLine("Net z-score  \t{0}",   netZScore);
                Console.WriteLine("Drift z-score\t{0}", driftZScore);
                Console.WriteLine();

                massZScore = CalculateZScore(massDistribution.Item1, massDistribution.Item3);
                netZScore = CalculateZScore(netDistribution.Item1, netDistribution.Item3);
                driftZScore = CalculateZScore(driftDistribution.Item1, driftDistribution.Item3);
                Console.WriteLine("   Negative ");
                Console.WriteLine("Mass z-score \t{0}", massZScore);
                Console.WriteLine("Net z-score  \t{0}", netZScore);
                Console.WriteLine("Drift z-score\t{0}", driftZScore);
                Console.WriteLine();
                
                Console.WriteLine("   Positive ");
                massZScore = CalculateZScore(massDistribution.Item2, massDistribution.Item3);
                netZScore = CalculateZScore(netDistribution.Item2, netDistribution.Item3);
                driftZScore = CalculateZScore(driftDistribution.Item2, driftDistribution.Item3);                                
                Console.WriteLine("Mass z-score \t{0}", massZScore);
                Console.WriteLine("Net z-score  \t{0}", netZScore);
                Console.WriteLine("Drift z-score\t{0}", driftZScore);

                //Console.WriteLine();
                //Console.WriteLine("Mass Difference");
                //DisplayDistance(massDistances);

                //Console.WriteLine();
                //Console.WriteLine("NET Difference");
                //DisplayDistance(netDistances);

                //Console.WriteLine();
                //Console.WriteLine("Drift Time Difference");
                //DisplayDistance(driftDistances);
            }

            return newClusters;
        }
Exemple #30
0
        /// <summary>
        /// Clusters features based on their pairwise distances by finding the minimal spanning tree (MST) via Prim's algorithm.
        /// </summary>
        /// <param name="distances">Pairwise distances between all features in question.</param>
        /// <param name="clusters">Singleton clusters from each feature.</param>
        /// <returns>List of features clustered together.</returns>
        public override List <U> LinkFeatures(List <Data.PairwiseDistance <T> > potentialDistances, Dictionary <int, U> clusters)
        {
            var newClusters = new List <U>();
            var distances   = new List <Data.PairwiseDistance <T> >();

            // There is an edge case with this setup that a singleton outside of the range
            // of other features made it into the batch of edges, but there is no corresponding edge
            // to the rest of the graph(s).  So here we hash all features
            // then we ask for within the range, pare down that hash to a set of features that
            // have no corresponding edge.  These guys would ultimately be singletons we want
            // to capture...
            var clusterMap = new HashSet <T>();

            foreach (var cluster in clusters.Values)
            {
                foreach (var feature in cluster.Features)
                {
                    if (!clusterMap.Contains(feature))
                    {
                        clusterMap.Add(feature);
                    }
                }
            }


            foreach (var distance in potentialDistances)
            {
                if (AreClustersWithinTolerance(distance.FeatureX, distance.FeatureY))
                {
                    //distances.Add(distance);
                    if (clusterMap.Contains(distance.FeatureX))
                    {
                        clusterMap.Remove(distance.FeatureX);
                    }
                    if (clusterMap.Contains(distance.FeatureY))
                    {
                        clusterMap.Remove(distance.FeatureY);
                    }
                }
            }

            // Once we have removed any cluster
            foreach (var feature in clusterMap)
            {
                var cluster = new U();
                feature.SetParentFeature(cluster);
                cluster.AddChildFeature(feature);
                newClusters.Add(cluster);
            }

            var newDistances = (from element in potentialDistances
                                orderby element.Distance
                                select element).ToList();

            var queue = new Queue <Edge <T> >();
            var graph = new FeatureGraph <T>();

            // Sort out the distances so we dont have to recalculate distances.
            var id    = 0;
            var edges = new List <Edge <T> >();

            newDistances.ForEach(x => edges.Add(new Edge <T>(id++,
                                                             x.Distance,
                                                             x.FeatureX,
                                                             x.FeatureY)));
            graph.CreateGraph(edges);
            edges.ForEach(x => queue.Enqueue(x));

            // This makes sure we have
            var seenEdge = new HashSet <int>();


            // Now we start at the MST building
            if (DumpLinearRelationship)
            {
                Console.WriteLine("GraphEdgeLength");
            }
            while (queue.Count > 0)
            {
                var startEdge = queue.Dequeue();

                // If we have already seen the edge, ignore it...
                if (seenEdge.Contains(startEdge.ID))
                {
                    continue;
                }

                var mstGroup = ConstructSubTree(graph,
                                                seenEdge,
                                                startEdge);

                var clusterTree = new MstLrTree <Edge <T> >();

                // Get the mst value .
                double sum  = 0;
                double mean = 0;
                foreach (var dist in mstGroup.LinearRelationship)
                {
                    seenEdge.Add(dist.ID);
                    sum += dist.Length;

                    clusterTree.Insert(dist);

                    var ppmDist = FeatureLight.ComputeMassPPMDifference(dist.VertexB.MassMonoisotopicAligned,
                                                                        dist.VertexA.MassMonoisotopicAligned);

                    if (DumpLinearRelationship)
                    {
                        Console.WriteLine("{0}", dist.Length); /*,,{1},{2},{3},{4},{5},{6},{7},{8}", dist.Length,
                                                                *         dist.VertexA.NetAligned,
                                                                *         dist.VertexA.MassMonoisotopicAligned,
                                                                *         dist.VertexA.DriftTime,
                                                                *         dist.VertexB.NetAligned,
                                                                *         dist.VertexB.MassMonoisotopicAligned,
                                                                *         dist.VertexB.DriftTime,
                                                                *         ppmDist,
                                                                *         Math.Abs(dist.VertexA.NetAligned - dist.VertexB.NetAligned));
                                                                */
                    }
                }

                var N = Convert.ToDouble(mstGroup.LinearRelationship.Count);

                // Calculate the standard deviation.
                mean = sum / N;
                sum  = 0;
                foreach (var dist in mstGroup.LinearRelationship)
                {
                    var diff = dist.Length - mean;
                    sum += (diff * diff);
                }

                var stdev  = Math.Sqrt(sum / N);
                var cutoff = NSigma; // *stdev; // stdev* NSigma;

                var mstClusters = CreateClusters(mstGroup, cutoff);
                newClusters.AddRange(mstClusters);
            }

            return(newClusters);
        }