Пример #1
0
        public void CalculateStatisticsTestSingleUmc(double umcMass,
                                                     double umcNet,
                                                     float umcDriftTime,
                                                     int umcCharge,
                                                     int umcAbundance,
                                                     ClusterCentroidRepresentation representation)
        {
            var cluster = new UMCClusterLight {
                UmcList = new List <UMCLight>()
            };

            var umc = new UMCLight
            {
                MassMonoisotopicAligned = umcMass,
                Net         = umcNet,
                DriftTime   = umcDriftTime,
                ChargeState = umcCharge,
                Abundance   = umcAbundance
            };

            cluster.UmcList.Add(umc);
            cluster.CalculateStatistics(representation);

            Assert.AreEqual(umc.MassMonoisotopicAligned, cluster.MassMonoisotopic, "Monoisotopic Mass");
            Assert.AreEqual(umc.Net, cluster.Net, "NET");
            Assert.AreEqual(umc.DriftTime, cluster.DriftTime, "Drift Time");
            Assert.AreEqual(umc.ChargeState, cluster.ChargeState, "Charge State");
        }
Пример #2
0
        public void CalculateStatisticsTestNullUMC()
        {
            var cluster = new UMCClusterLight();

            cluster.UmcList = null;
            Assert.Throws <NullReferenceException>(() => cluster.CalculateStatistics(ClusterCentroidRepresentation.Median));
        }
Пример #3
0
        public void CalculateStatisticsTestEmptyUMC()
        {
            var cluster = new UMCClusterLight();

            cluster.UmcList = new List <UMCLight>();
            Assert.Throws <Exception>(() => cluster.CalculateStatistics(ClusterCentroidRepresentation.Median));
        }
Пример #4
0
        public void CalculateStatisticsTestEmptyUMC()
        {
            var cluster = new UMCClusterLight();

            cluster.UmcList = new List <UMCLight>();
            cluster.CalculateStatistics(ClusterCentroidRepresentation.Median);
        }
Пример #5
0
        public void CalculateStatisticsMultipleNet(ClusterCentroidRepresentation representation)
        {
            var cluster = new UMCClusterLight();

            cluster.UmcList = new List <UMCLight>();

            var umc = new UMCLight();

            umc.MassMonoisotopicAligned = 100;
            umc.Net         = 100;
            umc.DriftTime   = 100;
            umc.ChargeState = 2;
            umc.Abundance   = 100;
            cluster.UmcList.Add(umc);

            var umc2 = new UMCLight();

            umc2.MassMonoisotopicAligned = 100;
            umc2.Net         = 200;
            umc2.DriftTime   = 100;
            umc2.ChargeState = 2;
            umc2.Abundance   = 100;
            cluster.UmcList.Add(umc2);

            cluster.CalculateStatistics(representation);
            Assert.AreEqual(150, cluster.Net);
        }
Пример #6
0
        public void CalculateStatisticsTestNullUMC()
        {
            var cluster = new UMCClusterLight();

            cluster.UmcList = null;
            cluster.CalculateStatistics(ClusterCentroidRepresentation.Median);
        }
Пример #7
0
        public void TestTwoClusters(string path)
        {
            Console.WriteLine("Test: " + path);
            var features = GetClusterData(Path.Combine(TestPathSingleton.TestDirectory, path));

            Assert.IsNotEmpty(features);

            var cluster = new UMCClusterLight();

            cluster.Id = features[0].Id;
            features.ForEach(x => cluster.AddChildFeature(x));


            cluster.CalculateStatistics(ClusterCentroidRepresentation.Median);
            Console.WriteLine("Cluster\tMass\tNET");
            Console.WriteLine("{0}\t{1}\t{2}\t", cluster.Id, cluster.MassStandardDeviation, cluster.NetStandardDeviation);
            Console.WriteLine();

            var distance = new EuclideanDistanceMetric <FeatureLight>();

            features.ForEach(x => Console.WriteLine(distance.EuclideanDistance(x, cluster)));
        }
Пример #8
0
        public void TestReprocessing(string path)
        {
            Console.WriteLine("Test: " + path);
            var features = GetClusterData(Path.Combine(TestPathSingleton.TestDirectory, path));

            Assert.IsNotEmpty(features);

            var cluster = new UMCClusterLight();

            cluster.Id = features[0].Id;
            features.ForEach(x => cluster.AddChildFeature(x));

            cluster.CalculateStatistics(ClusterCentroidRepresentation.Median);
            Console.WriteLine("Cluster\tMass\tNET");
            Console.WriteLine("{0}\t{1}\t{2}\t", cluster.Id, cluster.MassStandardDeviation, cluster.NetStandardDeviation);
            Console.WriteLine();

            IClusterReprocessor <UMCLight, UMCClusterLight> reprocessor = new MedianSplitReprocessor <UMCLight, UMCClusterLight>();

            reprocessor.ProcessClusters(new List <UMCClusterLight> {
                cluster
            });
        }
Пример #9
0
        public void TestClusterGeneration(string databasePath,
            string crossPath,
            int charge,
            int minimumClusterSize)
        {
            File.Delete(databasePath);
            NHibernateUtil.ConnectToDatabase(databasePath, true);

            IDatasetDAO datasetCache = new DatasetDAOHibernate();
            IUmcClusterDAO clusterCache = new UmcClusterDAOHibernate();
            IUmcDAO featureCache = new UmcDAOHibernate();

            // Creating a dataset
            Console.WriteLine("Creating dummy datasets");
            var datasets = new List<DatasetInformation>();
            var total = 10;
            for (var i = 0; i < total; i++)
            {
                var dataset = new DatasetInformation();
                dataset.DatasetId = i;
                dataset.DatasetName = "test" + i;
                datasets.Add(dataset);
            }
            datasetCache.AddAll(datasets);
            datasets.Clear();
            datasets = datasetCache.FindAll();

            // Create features
            Console.WriteLine("Creating features");
            var features = new List<UMCLight>();
            var clusters = new List<UMCClusterLight>();
            var x = new Random();
            var featureId = 0;
            for (var i = 0; i < 100; i++)
            {
                var cluster = new UMCClusterLight();
                cluster.Id = i;
                cluster.AmbiguityScore = i;
                cluster.Tightness = i;

                var N = x.Next(1, total);
                cluster.Id = i;
                cluster.ChargeState = charge;
                var hash = new HashSet<int>();

                for (var j = 0; j < N; j++)
                {
                    var did = -1;
                    do
                    {
                        did = x.Next(0, total);
                        if (!hash.Contains(did))
                        {
                            hash.Add(did);
                            break;
                        }
                    } while (true);

                    var feature = new UMCLight();
                    feature.GroupId = did;
                    feature.Id = featureId++;
                    feature.ChargeState = charge;
                    feature.MassMonoisotopic = x.NextDouble();
                    feature.Net = x.NextDouble();
                    feature.AbundanceSum = x.Next(100, 200);
                    feature.Abundance = feature.Abundance;
                    feature.ClusterId = cluster.Id;

                    cluster.AddChildFeature(feature);
                    features.Add(feature);
                }
                cluster.CalculateStatistics(ClusterCentroidRepresentation.Mean);
                clusters.Add(cluster);
            }
            featureCache.AddAll(features);
            clusterCache.AddAll(clusters);
            clusters = clusterCache.FindAll();

            Console.WriteLine("Find all clusters");
            clusters = clusterCache.FindByCharge(charge);

            WriteClusters(datasets,
                clusters,
                minimumClusterSize,
                charge,
                crossPath,
                databasePath,
                300000);
        }
Пример #10
0
        public void CalculateStatisticsTestMultipleUmCs(double umcMass,
                                                        double umcNet,
                                                        float umcDrifTime,
                                                        int umcCharge,
                                                        int umcAbundance,
                                                        int multiplier,
                                                        int numUmCs,
                                                        ClusterCentroidRepresentation representation)
        {
            var cluster = new UMCClusterLight {
                UmcList = new List <UMCLight>()
            };

            var    k               = numUmCs / 2;
            double medianMass      = 0;
            double medianNet       = 0;
            double medianDriftTime = 0;

            for (var i = 0; i < numUmCs; i++)
            {
                var umc = new UMCLight
                {
                    MassMonoisotopicAligned = umcMass + multiplier * i,
                    Net         = umcNet + multiplier * i,
                    DriftTime   = umcDrifTime + multiplier * i,
                    ChargeState = umcCharge,
                    Abundance   = umcAbundance + multiplier * i
                };
                cluster.UmcList.Add(umc);

                if (representation == ClusterCentroidRepresentation.Mean)
                {
                    medianMass      += umc.MassMonoisotopicAligned;
                    medianNet       += umc.Net;
                    medianDriftTime += umc.DriftTime;
                }
                // Odd
                else if (k == i && (numUmCs % 2 == 1))
                {
                    medianMass      = umc.MassMonoisotopicAligned;
                    medianNet       = umc.Net;
                    medianDriftTime = umc.DriftTime;
                }
                // Even
                else if ((numUmCs % 2) == 0)
                {
                    // When we have an even number of features
                    // We want to calculate the median as the average between
                    // the two median features (k, k + 1), where k is numUMCs / 2
                    // Remeber that we use k - 1 because i is zero indexed
                    if (k - 1 == i)
                    {
                        medianMass      = umc.MassMonoisotopicAligned;
                        medianNet       = umc.Net;
                        medianDriftTime = umc.DriftTime;
                    }
                    else if (k == i)
                    {
                        medianMass      += umc.MassMonoisotopicAligned;
                        medianNet       += umc.Net;
                        medianDriftTime += umc.DriftTime;
                        medianMass      /= 2;
                        medianNet       /= 2;
                        medianDriftTime /= 2;
                    }
                }
            }

            // We make sure that we calculate the mean correctly here.
            if (representation == ClusterCentroidRepresentation.Mean)
            {
                medianMass      /= numUmCs;
                medianNet       /= numUmCs;
                medianDriftTime /= numUmCs;
            }

            cluster.CalculateStatistics(representation);

            Assert.AreEqual(medianMass, cluster.MassMonoisotopic, "Monoisotopic Mass");
            Assert.AreEqual(medianNet, cluster.Net, "NET");
            Assert.AreEqual(medianDriftTime, cluster.DriftTime, "Drift Time");
            Assert.AreEqual(umcCharge, cluster.ChargeState, "Charge State");
        }
Пример #11
0
        public void TestClusterGeneration(string databasePath,
                                          string crossPath,
                                          int charge,
                                          int minimumClusterSize)
        {
            File.Delete(databasePath);
            NHibernateUtil.ConnectToDatabase(databasePath, true);

            IDatasetDAO    datasetCache = new DatasetDAOHibernate();
            IUmcClusterDAO clusterCache = new UmcClusterDAOHibernate();
            IUmcDAO        featureCache = new UmcDAOHibernate();

            // Creating a dataset
            Console.WriteLine("Creating dummy datasets");
            var datasets = new List <DatasetInformation>();
            var total    = 10;

            for (var i = 0; i < total; i++)
            {
                var dataset = new DatasetInformation();
                dataset.DatasetId   = i;
                dataset.DatasetName = "test" + i;
                datasets.Add(dataset);
            }
            datasetCache.AddAll(datasets);
            datasets.Clear();
            datasets = datasetCache.FindAll();

            // Create features
            Console.WriteLine("Creating features");
            var features  = new List <UMCLight>();
            var clusters  = new List <UMCClusterLight>();
            var x         = new Random();
            var featureId = 0;

            for (var i = 0; i < 100; i++)
            {
                var cluster = new UMCClusterLight();
                cluster.Id             = i;
                cluster.AmbiguityScore = i;
                cluster.Tightness      = i;

                var N = x.Next(1, total);
                cluster.Id          = i;
                cluster.ChargeState = charge;
                var hash = new HashSet <int>();

                for (var j = 0; j < N; j++)
                {
                    var did = -1;
                    do
                    {
                        did = x.Next(0, total);
                        if (!hash.Contains(did))
                        {
                            hash.Add(did);
                            break;
                        }
                    } while (true);


                    var feature = new UMCLight();
                    feature.GroupId          = did;
                    feature.Id               = featureId++;
                    feature.ChargeState      = charge;
                    feature.MassMonoisotopic = x.NextDouble();
                    feature.Net              = x.NextDouble();
                    feature.AbundanceSum     = x.Next(100, 200);
                    feature.Abundance        = feature.Abundance;
                    feature.ClusterId        = cluster.Id;

                    cluster.AddChildFeature(feature);
                    features.Add(feature);
                }
                cluster.CalculateStatistics(ClusterCentroidRepresentation.Mean);
                clusters.Add(cluster);
            }
            featureCache.AddAll(features);
            clusterCache.AddAll(clusters);
            clusters = clusterCache.FindAll();

            Console.WriteLine("Find all clusters");
            clusters = clusterCache.FindByCharge(charge);

            WriteClusters(datasets,
                          clusters,
                          minimumClusterSize,
                          charge,
                          crossPath,
                          databasePath,
                          300000);
        }
Пример #12
0
        public void CompareMs2IdsToMs1Ids(string liquidResultsPath, string isosFile, string rawFile)
        {
            // Read mass tags.
            var massTagReader = new LiquidResultsFileLoader(liquidResultsPath);
            var massTags      = massTagReader.LoadDatabase();

            // Get identifications - this rereads the liquid results file, but I'm leaving it that way
            // for now because this is just a test.
            var scansToIds = this.GetIds(liquidResultsPath);

            // Read raw data file.
            var spectraProviderCache = new SpectraProviderCache();
            var spectraProvider      = spectraProviderCache.GetSpectraProvider(rawFile);

            // Read isos features
            var isosReader = new MsFeatureLightFileReader();

            isosReader.IsosFilteroptions = new DeconToolsIsosFilterOptions {
                MaximumIsotopicFit = 0.15
            };
            var msFeatures = isosReader.ReadFile(isosFile).ToList();

            // Get LCMS features
            var msFeatureClusterer = new MsToLcmsFeatures(spectraProvider);
            var lcmsFeatures       = msFeatureClusterer.Convert(msFeatures);

            lcmsFeatures.ForEach(feature => { feature.NetAligned = feature.Net; feature.MassMonoisotopicAligned = feature.MassMonoisotopic; });

            // Create clusters - Since this is only working on a single dataset, there should be a 1:1 mapping
            // between LCMS features and clusters.
            var clusters = new List <UMCClusterLight> {
                Capacity = lcmsFeatures.Count
            };

            foreach (var lcmsFeature in lcmsFeatures)
            {
                var cluster = new UMCClusterLight(lcmsFeature);
                cluster.CalculateStatistics(ClusterCentroidRepresentation.Median);
                clusters.Add(cluster);
            }

            // Do STAC AMT matching
            var stacAdapter = new STACAdapter <UMCClusterLight>
            {
                Options = new FeatureMatcherParameters
                {
                    ShouldCalculateShiftFDR = false,
                    UsePriors           = true,
                    UseEllipsoid        = true,
                    UseDriftTime        = false,
                    ShouldCalculateSTAC = true,
                }
            };
            var amtMatches = stacAdapter.PerformPeakMatching(clusters, massTags);

            // Group AMT matches by cluster, convert MassTags to Protein objects (represents lipid ID,
            // rather than Protein ID here) for simplicity in comparing them to the MS/MS IDs.
            var ms1Matches = clusters.ToDictionary(cluster => cluster, cluster => new List <Protein>());

            foreach (var amtMatch in amtMatches)
            {
                var cluster = amtMatch.Observed;
                var massTag = amtMatch.Target;
                ms1Matches[cluster].Add(new Protein
                {
                    Name            = massTag.ProteinName,
                    Sequence        = massTag.PeptideSequence,
                    ChemicalFormula = massTag.PeptideSequence
                });
            }

            // Now we need to backtrack MS/MS identifications -> clusters
            var ms2Matches = new Dictionary <UMCClusterLight, List <Protein> >();

            foreach (var cluster in clusters)
            {
                ms2Matches.Add(cluster, new List <Protein>());
                foreach (var lcmsFeature in cluster.UmcList)
                {
                    foreach (var msFeature in lcmsFeature.MsFeatures)
                    {
                        foreach (var msmsFeature in msFeature.MSnSpectra)
                        {
                            if (scansToIds.ContainsKey(msmsFeature.Scan))
                            {
                                ms2Matches[cluster].AddRange(scansToIds[msmsFeature.Scan]);
                            }
                        }
                    }
                }
            }

            // How many clusters have IDs from MS/MS?
            var clusterMs1IdCount = ms1Matches.Values.Count(value => value.Any());
            var clusterMs2IdCount = ms2Matches.Values.Count(value => value.Any());

            int overlapCount = 0; // Number of IDs that overlapped between MS1 and MS2 identifications.

            // Finally compare the MS1 IDs to the MS2 IDs.
            foreach (var cluster in clusters)
            {
                // For now only comparing by name
                var ms1Ids    = ms1Matches[cluster];
                var ms1Lipids = ms1Ids.Select(id => id.Name);

                var ms2Ids    = ms2Matches[cluster];
                var ms2Lipids = ms2Ids.Select(id => id.Name);

                // Compare MS1 IDs for the cluster vs MS2 IDs for the cluster.
                var ms1OnlyIds = ms1Lipids.Where(lipid => !ms2Lipids.Contains(lipid));
                var ms2OnlyIds = ms2Lipids.Where(lipid => !ms1Lipids.Contains(lipid));

                overlapCount += ms1OnlyIds.Intersect(ms2OnlyIds).Count();

                // Write Results
                if (ms1OnlyIds.Any() || ms2OnlyIds.Any())
                {
                    Console.WriteLine("Cluster {0}:", cluster.Id);
                    if (ms1OnlyIds.Any())
                    {
                        Console.WriteLine("\tMs1 Only IDs:");
                        foreach (var id in ms1OnlyIds)
                        {
                            Console.WriteLine("\t\t{0}", id);
                        }
                    }

                    if (ms2OnlyIds.Any())
                    {
                        Console.WriteLine("\tMs2 Only IDs:");
                        foreach (var id in ms2OnlyIds)
                        {
                            Console.WriteLine("\t\t{0}", id);
                        }
                    }
                }
            }

            Console.WriteLine("Overlap: {0}", overlapCount);
        }