예제 #1
0
        public List <UMCClusterLight> CreateSingletonClustersFromClusteredFeatures(List <UMCClusterLight> clusters)
        {
            var newClusters = new List <UMCClusterLight>();

            var i = 0;

            foreach (var cluster in clusters)
            {
                foreach (var feature in cluster.Features)
                {
                    var x = new UMCClusterLight
                    {
                        MassMonoisotopic = feature.MassMonoisotopic,
                        Net = feature.Net
                    };
                    x.Net         = feature.NetAligned;
                    x.DriftTime   = feature.DriftTime;
                    x.ChargeState = feature.ChargeState;
                    x.Id          = i++;
                    x.AddChildFeature(feature);
                    newClusters.Add(x);
                }
            }
            return(newClusters);
        }
예제 #2
0
        public void TestDatasets(string path)
        {
            Console.WriteLine("Test: " + path);
            var features = GetClusterData(Path.Combine(TestPathSingleton.TestDirectory, path));

            Assert.IsNotEmpty(features);

            var cluster = new UMCClusterLight();

            cluster.Id = features[0].Id;
            features.ForEach(x => cluster.AddChildFeature(x));

            var maps = new Dictionary <int, UMCClusterLight>();


            // Map the features
            var mapFeatures = new Dictionary <int, List <UMCLight> >();

            foreach (var feature in features)
            {
                if (!mapFeatures.ContainsKey(feature.GroupId))
                {
                    mapFeatures.Add(feature.GroupId, new List <UMCLight>());
                }
                mapFeatures[feature.GroupId].Add(feature);
            }

            Console.WriteLine("Cluster\tMass\tNET");
            Console.WriteLine("{0}\t{1}\t{2}\t", cluster.Id, cluster.MassStandardDeviation, cluster.NetStandardDeviation);
            Console.WriteLine();

            var distance = new EuclideanDistanceMetric <FeatureLight>();

            //features.ForEach(x => Console.WriteLine(distance.EuclideanDistance(x, cluster)));
        }
예제 #3
0
        public void TestPairwise(string path)
        {
            Console.WriteLine("Test: " + path);
            var features = GetClusterData(Path.Combine(TestPathSingleton.TestDirectory, path));

            Assert.IsNotEmpty(features);

            var cluster = new UMCClusterLight();

            cluster.Id = features[0].Id;
            features.ForEach(x => cluster.AddChildFeature(x));


            var distance = new EuclideanDistanceMetric <FeatureLight>();

            for (var i = 0; i < features.Count; i++)
            {
                var featureX = features[i];
                for (var j = 0; j < features.Count; j++)
                {
                    if (i != j)
                    {
                        var featureY = features[j];
                        // Console.WriteLine(distance.EuclideanDistance(featureX, featureY));
                    }
                }
            }
        }
예제 #4
0
        /// <summary>
        ///     Determines if MS/MS should also be discovered.
        /// </summary>
        /// <param name="cluster"></param>
        /// <param name="providers"></param>
        /// <param name="getMsMS"></param>
        public static void ReconstructUMCCluster(this UMCClusterLight cluster, FeatureDataAccessProviders providers,
                                                 bool getUmcs, bool getMatches, bool getMsFeature, bool getMsMs)
        {
            if (getUmcs)
            {
                // Reconstruct UMCs
                cluster.Features.Clear();

                var features = providers.FeatureCache.FindByClusterID(cluster.Id);
                if (features == null || features.Count == 0)
                {
                    return;
                }

                var totalSpectra    = 0;
                var totalIdentified = 0;
                foreach (var feature in features)
                {
                    cluster.AddChildFeature(feature);

                    if (getMsFeature)
                    {
                        feature.ReconstructUMC(providers, getMsMs);

                        foreach (var msFeature in feature.MsFeatures)
                        {
                            totalSpectra += msFeature.MSnSpectra.Count;
                            foreach (var spectrum in msFeature.MSnSpectra)
                            {
                                if (spectrum.Peptides.Count > 0)
                                {
                                    totalIdentified++;
                                }
                            }
                        }
                    }
                }

                cluster.IdentifiedSpectraCount = totalIdentified;
                cluster.MsMsCount = totalSpectra;
            }

            if (getMatches)
            {
                // Reconstruct matches
                cluster.MassTags.Clear();
                var matches = providers.MassTagMatches.FindByClusterId(cluster.Id);
                if (matches != null && matches.Any())
                {
                    var massTags = providers.MassTags.FindMassTags(matches.Select(match => match.MassTagId).ToList());
                    cluster.MassTags.AddRange(massTags);
                }
            }
        }
예제 #5
0
        public void TestTwoClusters(string path)
        {
            Console.WriteLine("Test: " + path);
            var features = GetClusterData(Path.Combine(TestPathSingleton.TestDirectory, path));

            Assert.IsNotEmpty(features);

            var cluster = new UMCClusterLight();

            cluster.Id = features[0].Id;
            features.ForEach(x => cluster.AddChildFeature(x));


            cluster.CalculateStatistics(ClusterCentroidRepresentation.Median);
            Console.WriteLine("Cluster\tMass\tNET");
            Console.WriteLine("{0}\t{1}\t{2}\t", cluster.Id, cluster.MassStandardDeviation, cluster.NetStandardDeviation);
            Console.WriteLine();

            var distance = new EuclideanDistanceMetric <FeatureLight>();

            features.ForEach(x => Console.WriteLine(distance.EuclideanDistance(x, cluster)));
        }
예제 #6
0
        //[TestCase(@"ClusterData\clusterData-merged-nodelin.txt")]
        public void TestWeightedAverageLinkage(string path)
        {
            Console.WriteLine("Test: " + path);
            var features = GetClusterData(Path.Combine(TestPaths.TestFilesDirectory, path));

            Assert.IsNotEmpty(features);

            var cluster = new UMCClusterLight();

            cluster.Id = features[0].Id;
            features.ForEach(x => cluster.AddChildFeature(x));

            var maps = new Dictionary <int, UMCClusterLight>();

            var average = new UMCAverageLinkageClusterer <UMCLight, UMCClusterLight>();

            average.Parameters = new FeatureClusterParameters <UMCLight>();
            average.Parameters.CentroidRepresentation = ClusterCentroidRepresentation.Mean;
            average.Parameters.Tolerances             = new Algorithms.FeatureTolerances();

            var distance = new WeightedEuclideanDistance <UMCLight>();

            average.Parameters.DistanceFunction = distance.EuclideanDistance;
            var clusters = average.Cluster(features);

            Console.WriteLine("dataset\tfeature\tmass\tnet\tdrift");
            foreach (var newCluster in clusters)
            {
                foreach (var feature in newCluster.Features)
                {
                    Console.WriteLine("{0},{1},{2},{3},{4}", feature.GroupId,
                                      feature.Id,
                                      feature.Net,
                                      feature.MassMonoisotopicAligned,
                                      feature.DriftTime);
                }
            }
        }
예제 #7
0
        public void TestReprocessing(string path)
        {
            Console.WriteLine("Test: " + path);
            var features = GetClusterData(Path.Combine(TestPathSingleton.TestDirectory, path));

            Assert.IsNotEmpty(features);

            var cluster = new UMCClusterLight();

            cluster.Id = features[0].Id;
            features.ForEach(x => cluster.AddChildFeature(x));

            cluster.CalculateStatistics(ClusterCentroidRepresentation.Median);
            Console.WriteLine("Cluster\tMass\tNET");
            Console.WriteLine("{0}\t{1}\t{2}\t", cluster.Id, cluster.MassStandardDeviation, cluster.NetStandardDeviation);
            Console.WriteLine();

            IClusterReprocessor <UMCLight, UMCClusterLight> reprocessor = new MedianSplitReprocessor <UMCLight, UMCClusterLight>();

            reprocessor.ProcessClusters(new List <UMCClusterLight> {
                cluster
            });
        }
예제 #8
0
        public List<UMCClusterLight> CreateSingletonClustersFromClusteredFeatures(List<UMCClusterLight> clusters)
        {
            var newClusters = new List<UMCClusterLight>();

            var i = 0;
            foreach (var cluster in clusters)
            {
                foreach (var feature in cluster.Features)
                {
                    var x = new UMCClusterLight
                    {
                        MassMonoisotopic = feature.MassMonoisotopic,
                        Net = feature.Net
                    };
                    x.Net = feature.NetAligned;
                    x.DriftTime = feature.DriftTime;
                    x.ChargeState = feature.ChargeState;
                    x.Id = i++;
                    x.AddChildFeature(feature);
                    newClusters.Add(x);
                }
            }
            return newClusters;
        }
예제 #9
0
        public void TestClusterGeneration(string databasePath,
            string crossPath,
            int charge,
            int minimumClusterSize)
        {
            File.Delete(databasePath);
            NHibernateUtil.ConnectToDatabase(databasePath, true);

            IDatasetDAO datasetCache = new DatasetDAOHibernate();
            IUmcClusterDAO clusterCache = new UmcClusterDAOHibernate();
            IUmcDAO featureCache = new UmcDAOHibernate();

            // Creating a dataset
            Console.WriteLine("Creating dummy datasets");
            var datasets = new List<DatasetInformation>();
            var total = 10;
            for (var i = 0; i < total; i++)
            {
                var dataset = new DatasetInformation();
                dataset.DatasetId = i;
                dataset.DatasetName = "test" + i;
                datasets.Add(dataset);
            }
            datasetCache.AddAll(datasets);
            datasets.Clear();
            datasets = datasetCache.FindAll();

            // Create features
            Console.WriteLine("Creating features");
            var features = new List<UMCLight>();
            var clusters = new List<UMCClusterLight>();
            var x = new Random();
            var featureId = 0;
            for (var i = 0; i < 100; i++)
            {
                var cluster = new UMCClusterLight();
                cluster.Id = i;
                cluster.AmbiguityScore = i;
                cluster.Tightness = i;

                var N = x.Next(1, total);
                cluster.Id = i;
                cluster.ChargeState = charge;
                var hash = new HashSet<int>();

                for (var j = 0; j < N; j++)
                {
                    var did = -1;
                    do
                    {
                        did = x.Next(0, total);
                        if (!hash.Contains(did))
                        {
                            hash.Add(did);
                            break;
                        }
                    } while (true);

                    var feature = new UMCLight();
                    feature.GroupId = did;
                    feature.Id = featureId++;
                    feature.ChargeState = charge;
                    feature.MassMonoisotopic = x.NextDouble();
                    feature.Net = x.NextDouble();
                    feature.AbundanceSum = x.Next(100, 200);
                    feature.Abundance = feature.Abundance;
                    feature.ClusterId = cluster.Id;

                    cluster.AddChildFeature(feature);
                    features.Add(feature);
                }
                cluster.CalculateStatistics(ClusterCentroidRepresentation.Mean);
                clusters.Add(cluster);
            }
            featureCache.AddAll(features);
            clusterCache.AddAll(clusters);
            clusters = clusterCache.FindAll();

            Console.WriteLine("Find all clusters");
            clusters = clusterCache.FindByCharge(charge);

            WriteClusters(datasets,
                clusters,
                minimumClusterSize,
                charge,
                crossPath,
                databasePath,
                300000);
        }
예제 #10
0
        //[TestCase(@"ClusterData\clusterData-single-1500.txt")]
        public void TestAverageLinkage(string path)
        {
            Console.WriteLine("Average Linkage Test: " + path);
            var features = GetClusterData(Path.Combine(TestPaths.TestFilesDirectory, path));

            Assert.IsNotEmpty(features);

            var cluster = new UMCClusterLight();

            cluster.Id = features[0].Id;
            features.ForEach(x => cluster.AddChildFeature(x));

            var maps = new Dictionary <int, UMCClusterLight>();

            var average = new UMCAverageLinkageClusterer <UMCLight, UMCClusterLight>();

            average.Parameters = new FeatureClusterParameters <UMCLight>();
            average.Parameters.CentroidRepresentation = ClusterCentroidRepresentation.Median;
            average.Parameters.Tolerances             = new Algorithms.FeatureTolerances();
            average.Parameters.Tolerances.Net         = .02;
            average.Parameters.Tolerances.Mass        = 6;
            average.Parameters.Tolerances.DriftTime   = .3;

            var distance = new WeightedEuclideanDistance <UMCLight>();

            average.Parameters.DistanceFunction = distance.EuclideanDistance;
            var euclid = new EuclideanDistanceMetric <UMCLight>();

            average.Parameters.DistanceFunction = euclid.EuclideanDistance;
            var clusters = average.Cluster(features);

            Console.WriteLine("Clusters = {0}", clusters.Count);
            var id = 1;

            foreach (var testCluster in clusters)
            {
                testCluster.CalculateStatistics(ClusterCentroidRepresentation.Mean);
                var distances = new List <double>();

                // Show a sampling of 5 results
                var threshold = (int)(testCluster.Features.Count / (double)5);
                if (threshold < 1)
                {
                    threshold = 1;
                }

                testCluster.Id = id++;
                var featureID = 0;

                foreach (var feature in testCluster.Features)
                {
                    featureID++;
                    if (featureID % threshold == 0)
                    {
                        Console.WriteLine("{0},{1},{2},{3}",
                                          feature.Net,
                                          feature.MassMonoisotopicAligned,
                                          feature.DriftTime,
                                          testCluster.Id);
                    }

                    var newDistance = distance.EuclideanDistance(feature, testCluster);
                    distances.Add(newDistance);
                }
                //Console.WriteLine();
                //Console.WriteLine("Distances");
                //distances.ForEach(x => Console.WriteLine(x));
                //Console.WriteLine();
            }
        }
예제 #11
0
        //[TestCase(@"ClusterData\clusterData-merged-nodelin.txt")]
        public void TestPrims(string path)
        {
            Console.WriteLine("Test: " + path);
            var features = GetClusterData(Path.Combine(TestPathSingleton.TestDirectory, path));

            Assert.IsNotEmpty(features);

            var cluster = new UMCClusterLight();

            cluster.Id = features[0].Id;
            features.ForEach(x => cluster.AddChildFeature(x));

            var maps = new Dictionary <int, UMCClusterLight>();

            var prims = new UMCPrimsClustering <UMCLight, UMCClusterLight>();

            prims.Parameters = new FeatureClusterParameters <UMCLight>();
            prims.Parameters.CentroidRepresentation = ClusterCentroidRepresentation.Mean;
            prims.Parameters.Tolerances             = new FeatureTolerances();

            var clusters = prims.Cluster(features);

            var counts = new Dictionary <int, Dictionary <int, int> >();
            var cid    = 0;

            foreach (var clusterx in clusters)
            {
                clusterx.Id = cid++;
                foreach (var feature in clusterx.Features)
                {
                    if (!counts.ContainsKey(feature.GroupId))
                    {
                        counts.Add(feature.GroupId, new Dictionary <int, int>());
                    }
                    if (!counts[feature.GroupId].ContainsKey(feature.Id))
                    {
                        counts[feature.GroupId].Add(feature.Id, 0);
                    }

                    if (feature.Id == 51 || feature.Id == 37)
                    {
                        Console.WriteLine("Found it {0} cluster {1}", feature.Id, clusterx.Id);
                    }

                    counts[feature.GroupId][feature.Id]++;
                    Console.WriteLine("Found {0}", clusterx.Id);
                    if (counts[feature.GroupId][feature.Id] > 1)
                    {
                        Console.WriteLine("Duplicate!!!! cluster {0}  feature {1}", clusterx.Id, feature.Id);
                    }
                }
            }

            Console.WriteLine("Group\tFeature\tCount");
            foreach (var group in counts.Keys)
            {
                foreach (var id in counts[group].Keys)
                {
                    Console.WriteLine("{0}\t{1}\t{2}", group, id, counts[group][id]);
                }
            }

            Console.WriteLine("Clusters = {0}", clusters.Count);
        }
예제 #12
0
        //[TestCase(@"ClusterData\clusterData-single-1500.txt", 4)]
        //[TestCase(@"ClusterData\clusterData-single-1500-two.txt", 4)]
        public void TestPrimsWeighted(string path, double sigma)
        {
            sigma = 1;

            Console.WriteLine();
            Console.WriteLine("Tests: " + path);
            Console.WriteLine("Sigma Cutoff: {0}", sigma);
            var features = GetClusterData(Path.Combine(TestPathSingleton.TestDirectory, path));

            Assert.IsNotEmpty(features);

            var cluster = new UMCClusterLight();

            cluster.Id = features[0].Id;
            features.ForEach(x => cluster.AddChildFeature(x));

            var maps = new Dictionary <int, UMCClusterLight>();


            var prims = new UMCPrimsClustering <UMCLight, UMCClusterLight>(sigma);

            prims.Parameters = new FeatureClusterParameters <UMCLight>();
            prims.Parameters.CentroidRepresentation      = ClusterCentroidRepresentation.Mean;
            prims.Parameters.Tolerances                  = new FeatureTolerances();
            prims.Parameters.OnlyClusterSameChargeStates = false;
            prims.Parameters.Tolerances.DriftTime        = .3;
            prims.Parameters.Tolerances.Mass             = 15;
            prims.Parameters.Tolerances.Net              = .02;
            prims.DumpLinearRelationship                 = false;

            var distance = new WeightedEuclideanDistance <UMCLight>();

            prims.Parameters.DistanceFunction = distance.EuclideanDistance;
            var clusters = prims.Cluster(features);


            Console.WriteLine();
            Console.WriteLine("Clusters = {0}", clusters.Count);

            var id = 1;

            foreach (var testCluster in clusters)
            {
                testCluster.CalculateStatistics(ClusterCentroidRepresentation.Mean);

                var distances = new List <double>();

                // Show a sampling of 15 results
                var threshold = (int)(testCluster.Features.Count / (double)15);
                if (threshold < 1)
                {
                    threshold = 1;
                }

                testCluster.Id = id++;
                var featureID = 0;

                foreach (var feature in testCluster.Features)
                {
                    featureID++;
                    if (featureID % threshold == 0)
                    {
                        Console.WriteLine("{0},{1},{2},{3}",
                                          feature.Net,
                                          feature.MassMonoisotopicAligned,
                                          feature.DriftTime,
                                          testCluster.Id);
                    }

                    var newDistance = distance.EuclideanDistance(feature, testCluster);
                    distances.Add(newDistance);
                }
                //Console.WriteLine();
                //Console.WriteLine("Distances");
                //distances.ForEach(x => Console.WriteLine(x));
                //Console.WriteLine();
            }
            Console.WriteLine();
            Console.WriteLine("Test Done:");
            Console.WriteLine();
        }
예제 #13
0
        public void TestClusterGeneration(string databasePath,
                                          string crossPath,
                                          int charge,
                                          int minimumClusterSize)
        {
            File.Delete(databasePath);
            NHibernateUtil.ConnectToDatabase(databasePath, true);

            IDatasetDAO    datasetCache = new DatasetDAOHibernate();
            IUmcClusterDAO clusterCache = new UmcClusterDAOHibernate();
            IUmcDAO        featureCache = new UmcDAOHibernate();

            // Creating a dataset
            Console.WriteLine("Creating dummy datasets");
            var datasets = new List <DatasetInformation>();
            var total    = 10;

            for (var i = 0; i < total; i++)
            {
                var dataset = new DatasetInformation();
                dataset.DatasetId   = i;
                dataset.DatasetName = "test" + i;
                datasets.Add(dataset);
            }
            datasetCache.AddAll(datasets);
            datasets.Clear();
            datasets = datasetCache.FindAll();

            // Create features
            Console.WriteLine("Creating features");
            var features  = new List <UMCLight>();
            var clusters  = new List <UMCClusterLight>();
            var x         = new Random();
            var featureId = 0;

            for (var i = 0; i < 100; i++)
            {
                var cluster = new UMCClusterLight();
                cluster.Id             = i;
                cluster.AmbiguityScore = i;
                cluster.Tightness      = i;

                var N = x.Next(1, total);
                cluster.Id          = i;
                cluster.ChargeState = charge;
                var hash = new HashSet <int>();

                for (var j = 0; j < N; j++)
                {
                    var did = -1;
                    do
                    {
                        did = x.Next(0, total);
                        if (!hash.Contains(did))
                        {
                            hash.Add(did);
                            break;
                        }
                    } while (true);


                    var feature = new UMCLight();
                    feature.GroupId          = did;
                    feature.Id               = featureId++;
                    feature.ChargeState      = charge;
                    feature.MassMonoisotopic = x.NextDouble();
                    feature.Net              = x.NextDouble();
                    feature.AbundanceSum     = x.Next(100, 200);
                    feature.Abundance        = feature.Abundance;
                    feature.ClusterId        = cluster.Id;

                    cluster.AddChildFeature(feature);
                    features.Add(feature);
                }
                cluster.CalculateStatistics(ClusterCentroidRepresentation.Mean);
                clusters.Add(cluster);
            }
            featureCache.AddAll(features);
            clusterCache.AddAll(clusters);
            clusters = clusterCache.FindAll();

            Console.WriteLine("Find all clusters");
            clusters = clusterCache.FindByCharge(charge);

            WriteClusters(datasets,
                          clusters,
                          minimumClusterSize,
                          charge,
                          crossPath,
                          databasePath,
                          300000);
        }