public List <UMCClusterLight> CreateSingletonClustersFromClusteredFeatures(List <UMCClusterLight> clusters) { var newClusters = new List <UMCClusterLight>(); var i = 0; foreach (var cluster in clusters) { foreach (var feature in cluster.Features) { var x = new UMCClusterLight { MassMonoisotopic = feature.MassMonoisotopic, Net = feature.Net }; x.Net = feature.NetAligned; x.DriftTime = feature.DriftTime; x.ChargeState = feature.ChargeState; x.Id = i++; x.AddChildFeature(feature); newClusters.Add(x); } } return(newClusters); }
public void TestDatasets(string path) { Console.WriteLine("Test: " + path); var features = GetClusterData(Path.Combine(TestPathSingleton.TestDirectory, path)); Assert.IsNotEmpty(features); var cluster = new UMCClusterLight(); cluster.Id = features[0].Id; features.ForEach(x => cluster.AddChildFeature(x)); var maps = new Dictionary <int, UMCClusterLight>(); // Map the features var mapFeatures = new Dictionary <int, List <UMCLight> >(); foreach (var feature in features) { if (!mapFeatures.ContainsKey(feature.GroupId)) { mapFeatures.Add(feature.GroupId, new List <UMCLight>()); } mapFeatures[feature.GroupId].Add(feature); } Console.WriteLine("Cluster\tMass\tNET"); Console.WriteLine("{0}\t{1}\t{2}\t", cluster.Id, cluster.MassStandardDeviation, cluster.NetStandardDeviation); Console.WriteLine(); var distance = new EuclideanDistanceMetric <FeatureLight>(); //features.ForEach(x => Console.WriteLine(distance.EuclideanDistance(x, cluster))); }
public void TestPairwise(string path) { Console.WriteLine("Test: " + path); var features = GetClusterData(Path.Combine(TestPathSingleton.TestDirectory, path)); Assert.IsNotEmpty(features); var cluster = new UMCClusterLight(); cluster.Id = features[0].Id; features.ForEach(x => cluster.AddChildFeature(x)); var distance = new EuclideanDistanceMetric <FeatureLight>(); for (var i = 0; i < features.Count; i++) { var featureX = features[i]; for (var j = 0; j < features.Count; j++) { if (i != j) { var featureY = features[j]; // Console.WriteLine(distance.EuclideanDistance(featureX, featureY)); } } } }
/// <summary> /// Determines if MS/MS should also be discovered. /// </summary> /// <param name="cluster"></param> /// <param name="providers"></param> /// <param name="getMsMS"></param> public static void ReconstructUMCCluster(this UMCClusterLight cluster, FeatureDataAccessProviders providers, bool getUmcs, bool getMatches, bool getMsFeature, bool getMsMs) { if (getUmcs) { // Reconstruct UMCs cluster.Features.Clear(); var features = providers.FeatureCache.FindByClusterID(cluster.Id); if (features == null || features.Count == 0) { return; } var totalSpectra = 0; var totalIdentified = 0; foreach (var feature in features) { cluster.AddChildFeature(feature); if (getMsFeature) { feature.ReconstructUMC(providers, getMsMs); foreach (var msFeature in feature.MsFeatures) { totalSpectra += msFeature.MSnSpectra.Count; foreach (var spectrum in msFeature.MSnSpectra) { if (spectrum.Peptides.Count > 0) { totalIdentified++; } } } } } cluster.IdentifiedSpectraCount = totalIdentified; cluster.MsMsCount = totalSpectra; } if (getMatches) { // Reconstruct matches cluster.MassTags.Clear(); var matches = providers.MassTagMatches.FindByClusterId(cluster.Id); if (matches != null && matches.Any()) { var massTags = providers.MassTags.FindMassTags(matches.Select(match => match.MassTagId).ToList()); cluster.MassTags.AddRange(massTags); } } }
public void TestTwoClusters(string path) { Console.WriteLine("Test: " + path); var features = GetClusterData(Path.Combine(TestPathSingleton.TestDirectory, path)); Assert.IsNotEmpty(features); var cluster = new UMCClusterLight(); cluster.Id = features[0].Id; features.ForEach(x => cluster.AddChildFeature(x)); cluster.CalculateStatistics(ClusterCentroidRepresentation.Median); Console.WriteLine("Cluster\tMass\tNET"); Console.WriteLine("{0}\t{1}\t{2}\t", cluster.Id, cluster.MassStandardDeviation, cluster.NetStandardDeviation); Console.WriteLine(); var distance = new EuclideanDistanceMetric <FeatureLight>(); features.ForEach(x => Console.WriteLine(distance.EuclideanDistance(x, cluster))); }
//[TestCase(@"ClusterData\clusterData-merged-nodelin.txt")] public void TestWeightedAverageLinkage(string path) { Console.WriteLine("Test: " + path); var features = GetClusterData(Path.Combine(TestPaths.TestFilesDirectory, path)); Assert.IsNotEmpty(features); var cluster = new UMCClusterLight(); cluster.Id = features[0].Id; features.ForEach(x => cluster.AddChildFeature(x)); var maps = new Dictionary <int, UMCClusterLight>(); var average = new UMCAverageLinkageClusterer <UMCLight, UMCClusterLight>(); average.Parameters = new FeatureClusterParameters <UMCLight>(); average.Parameters.CentroidRepresentation = ClusterCentroidRepresentation.Mean; average.Parameters.Tolerances = new Algorithms.FeatureTolerances(); var distance = new WeightedEuclideanDistance <UMCLight>(); average.Parameters.DistanceFunction = distance.EuclideanDistance; var clusters = average.Cluster(features); Console.WriteLine("dataset\tfeature\tmass\tnet\tdrift"); foreach (var newCluster in clusters) { foreach (var feature in newCluster.Features) { Console.WriteLine("{0},{1},{2},{3},{4}", feature.GroupId, feature.Id, feature.Net, feature.MassMonoisotopicAligned, feature.DriftTime); } } }
public void TestReprocessing(string path) { Console.WriteLine("Test: " + path); var features = GetClusterData(Path.Combine(TestPathSingleton.TestDirectory, path)); Assert.IsNotEmpty(features); var cluster = new UMCClusterLight(); cluster.Id = features[0].Id; features.ForEach(x => cluster.AddChildFeature(x)); cluster.CalculateStatistics(ClusterCentroidRepresentation.Median); Console.WriteLine("Cluster\tMass\tNET"); Console.WriteLine("{0}\t{1}\t{2}\t", cluster.Id, cluster.MassStandardDeviation, cluster.NetStandardDeviation); Console.WriteLine(); IClusterReprocessor <UMCLight, UMCClusterLight> reprocessor = new MedianSplitReprocessor <UMCLight, UMCClusterLight>(); reprocessor.ProcessClusters(new List <UMCClusterLight> { cluster }); }
public List<UMCClusterLight> CreateSingletonClustersFromClusteredFeatures(List<UMCClusterLight> clusters) { var newClusters = new List<UMCClusterLight>(); var i = 0; foreach (var cluster in clusters) { foreach (var feature in cluster.Features) { var x = new UMCClusterLight { MassMonoisotopic = feature.MassMonoisotopic, Net = feature.Net }; x.Net = feature.NetAligned; x.DriftTime = feature.DriftTime; x.ChargeState = feature.ChargeState; x.Id = i++; x.AddChildFeature(feature); newClusters.Add(x); } } return newClusters; }
public void TestClusterGeneration(string databasePath, string crossPath, int charge, int minimumClusterSize) { File.Delete(databasePath); NHibernateUtil.ConnectToDatabase(databasePath, true); IDatasetDAO datasetCache = new DatasetDAOHibernate(); IUmcClusterDAO clusterCache = new UmcClusterDAOHibernate(); IUmcDAO featureCache = new UmcDAOHibernate(); // Creating a dataset Console.WriteLine("Creating dummy datasets"); var datasets = new List<DatasetInformation>(); var total = 10; for (var i = 0; i < total; i++) { var dataset = new DatasetInformation(); dataset.DatasetId = i; dataset.DatasetName = "test" + i; datasets.Add(dataset); } datasetCache.AddAll(datasets); datasets.Clear(); datasets = datasetCache.FindAll(); // Create features Console.WriteLine("Creating features"); var features = new List<UMCLight>(); var clusters = new List<UMCClusterLight>(); var x = new Random(); var featureId = 0; for (var i = 0; i < 100; i++) { var cluster = new UMCClusterLight(); cluster.Id = i; cluster.AmbiguityScore = i; cluster.Tightness = i; var N = x.Next(1, total); cluster.Id = i; cluster.ChargeState = charge; var hash = new HashSet<int>(); for (var j = 0; j < N; j++) { var did = -1; do { did = x.Next(0, total); if (!hash.Contains(did)) { hash.Add(did); break; } } while (true); var feature = new UMCLight(); feature.GroupId = did; feature.Id = featureId++; feature.ChargeState = charge; feature.MassMonoisotopic = x.NextDouble(); feature.Net = x.NextDouble(); feature.AbundanceSum = x.Next(100, 200); feature.Abundance = feature.Abundance; feature.ClusterId = cluster.Id; cluster.AddChildFeature(feature); features.Add(feature); } cluster.CalculateStatistics(ClusterCentroidRepresentation.Mean); clusters.Add(cluster); } featureCache.AddAll(features); clusterCache.AddAll(clusters); clusters = clusterCache.FindAll(); Console.WriteLine("Find all clusters"); clusters = clusterCache.FindByCharge(charge); WriteClusters(datasets, clusters, minimumClusterSize, charge, crossPath, databasePath, 300000); }
//[TestCase(@"ClusterData\clusterData-single-1500.txt")] public void TestAverageLinkage(string path) { Console.WriteLine("Average Linkage Test: " + path); var features = GetClusterData(Path.Combine(TestPaths.TestFilesDirectory, path)); Assert.IsNotEmpty(features); var cluster = new UMCClusterLight(); cluster.Id = features[0].Id; features.ForEach(x => cluster.AddChildFeature(x)); var maps = new Dictionary <int, UMCClusterLight>(); var average = new UMCAverageLinkageClusterer <UMCLight, UMCClusterLight>(); average.Parameters = new FeatureClusterParameters <UMCLight>(); average.Parameters.CentroidRepresentation = ClusterCentroidRepresentation.Median; average.Parameters.Tolerances = new Algorithms.FeatureTolerances(); average.Parameters.Tolerances.Net = .02; average.Parameters.Tolerances.Mass = 6; average.Parameters.Tolerances.DriftTime = .3; var distance = new WeightedEuclideanDistance <UMCLight>(); average.Parameters.DistanceFunction = distance.EuclideanDistance; var euclid = new EuclideanDistanceMetric <UMCLight>(); average.Parameters.DistanceFunction = euclid.EuclideanDistance; var clusters = average.Cluster(features); Console.WriteLine("Clusters = {0}", clusters.Count); var id = 1; foreach (var testCluster in clusters) { testCluster.CalculateStatistics(ClusterCentroidRepresentation.Mean); var distances = new List <double>(); // Show a sampling of 5 results var threshold = (int)(testCluster.Features.Count / (double)5); if (threshold < 1) { threshold = 1; } testCluster.Id = id++; var featureID = 0; foreach (var feature in testCluster.Features) { featureID++; if (featureID % threshold == 0) { Console.WriteLine("{0},{1},{2},{3}", feature.Net, feature.MassMonoisotopicAligned, feature.DriftTime, testCluster.Id); } var newDistance = distance.EuclideanDistance(feature, testCluster); distances.Add(newDistance); } //Console.WriteLine(); //Console.WriteLine("Distances"); //distances.ForEach(x => Console.WriteLine(x)); //Console.WriteLine(); } }
//[TestCase(@"ClusterData\clusterData-merged-nodelin.txt")] public void TestPrims(string path) { Console.WriteLine("Test: " + path); var features = GetClusterData(Path.Combine(TestPathSingleton.TestDirectory, path)); Assert.IsNotEmpty(features); var cluster = new UMCClusterLight(); cluster.Id = features[0].Id; features.ForEach(x => cluster.AddChildFeature(x)); var maps = new Dictionary <int, UMCClusterLight>(); var prims = new UMCPrimsClustering <UMCLight, UMCClusterLight>(); prims.Parameters = new FeatureClusterParameters <UMCLight>(); prims.Parameters.CentroidRepresentation = ClusterCentroidRepresentation.Mean; prims.Parameters.Tolerances = new FeatureTolerances(); var clusters = prims.Cluster(features); var counts = new Dictionary <int, Dictionary <int, int> >(); var cid = 0; foreach (var clusterx in clusters) { clusterx.Id = cid++; foreach (var feature in clusterx.Features) { if (!counts.ContainsKey(feature.GroupId)) { counts.Add(feature.GroupId, new Dictionary <int, int>()); } if (!counts[feature.GroupId].ContainsKey(feature.Id)) { counts[feature.GroupId].Add(feature.Id, 0); } if (feature.Id == 51 || feature.Id == 37) { Console.WriteLine("Found it {0} cluster {1}", feature.Id, clusterx.Id); } counts[feature.GroupId][feature.Id]++; Console.WriteLine("Found {0}", clusterx.Id); if (counts[feature.GroupId][feature.Id] > 1) { Console.WriteLine("Duplicate!!!! cluster {0} feature {1}", clusterx.Id, feature.Id); } } } Console.WriteLine("Group\tFeature\tCount"); foreach (var group in counts.Keys) { foreach (var id in counts[group].Keys) { Console.WriteLine("{0}\t{1}\t{2}", group, id, counts[group][id]); } } Console.WriteLine("Clusters = {0}", clusters.Count); }
//[TestCase(@"ClusterData\clusterData-single-1500.txt", 4)] //[TestCase(@"ClusterData\clusterData-single-1500-two.txt", 4)] public void TestPrimsWeighted(string path, double sigma) { sigma = 1; Console.WriteLine(); Console.WriteLine("Tests: " + path); Console.WriteLine("Sigma Cutoff: {0}", sigma); var features = GetClusterData(Path.Combine(TestPathSingleton.TestDirectory, path)); Assert.IsNotEmpty(features); var cluster = new UMCClusterLight(); cluster.Id = features[0].Id; features.ForEach(x => cluster.AddChildFeature(x)); var maps = new Dictionary <int, UMCClusterLight>(); var prims = new UMCPrimsClustering <UMCLight, UMCClusterLight>(sigma); prims.Parameters = new FeatureClusterParameters <UMCLight>(); prims.Parameters.CentroidRepresentation = ClusterCentroidRepresentation.Mean; prims.Parameters.Tolerances = new FeatureTolerances(); prims.Parameters.OnlyClusterSameChargeStates = false; prims.Parameters.Tolerances.DriftTime = .3; prims.Parameters.Tolerances.Mass = 15; prims.Parameters.Tolerances.Net = .02; prims.DumpLinearRelationship = false; var distance = new WeightedEuclideanDistance <UMCLight>(); prims.Parameters.DistanceFunction = distance.EuclideanDistance; var clusters = prims.Cluster(features); Console.WriteLine(); Console.WriteLine("Clusters = {0}", clusters.Count); var id = 1; foreach (var testCluster in clusters) { testCluster.CalculateStatistics(ClusterCentroidRepresentation.Mean); var distances = new List <double>(); // Show a sampling of 15 results var threshold = (int)(testCluster.Features.Count / (double)15); if (threshold < 1) { threshold = 1; } testCluster.Id = id++; var featureID = 0; foreach (var feature in testCluster.Features) { featureID++; if (featureID % threshold == 0) { Console.WriteLine("{0},{1},{2},{3}", feature.Net, feature.MassMonoisotopicAligned, feature.DriftTime, testCluster.Id); } var newDistance = distance.EuclideanDistance(feature, testCluster); distances.Add(newDistance); } //Console.WriteLine(); //Console.WriteLine("Distances"); //distances.ForEach(x => Console.WriteLine(x)); //Console.WriteLine(); } Console.WriteLine(); Console.WriteLine("Test Done:"); Console.WriteLine(); }
public void TestClusterGeneration(string databasePath, string crossPath, int charge, int minimumClusterSize) { File.Delete(databasePath); NHibernateUtil.ConnectToDatabase(databasePath, true); IDatasetDAO datasetCache = new DatasetDAOHibernate(); IUmcClusterDAO clusterCache = new UmcClusterDAOHibernate(); IUmcDAO featureCache = new UmcDAOHibernate(); // Creating a dataset Console.WriteLine("Creating dummy datasets"); var datasets = new List <DatasetInformation>(); var total = 10; for (var i = 0; i < total; i++) { var dataset = new DatasetInformation(); dataset.DatasetId = i; dataset.DatasetName = "test" + i; datasets.Add(dataset); } datasetCache.AddAll(datasets); datasets.Clear(); datasets = datasetCache.FindAll(); // Create features Console.WriteLine("Creating features"); var features = new List <UMCLight>(); var clusters = new List <UMCClusterLight>(); var x = new Random(); var featureId = 0; for (var i = 0; i < 100; i++) { var cluster = new UMCClusterLight(); cluster.Id = i; cluster.AmbiguityScore = i; cluster.Tightness = i; var N = x.Next(1, total); cluster.Id = i; cluster.ChargeState = charge; var hash = new HashSet <int>(); for (var j = 0; j < N; j++) { var did = -1; do { did = x.Next(0, total); if (!hash.Contains(did)) { hash.Add(did); break; } } while (true); var feature = new UMCLight(); feature.GroupId = did; feature.Id = featureId++; feature.ChargeState = charge; feature.MassMonoisotopic = x.NextDouble(); feature.Net = x.NextDouble(); feature.AbundanceSum = x.Next(100, 200); feature.Abundance = feature.Abundance; feature.ClusterId = cluster.Id; cluster.AddChildFeature(feature); features.Add(feature); } cluster.CalculateStatistics(ClusterCentroidRepresentation.Mean); clusters.Add(cluster); } featureCache.AddAll(features); clusterCache.AddAll(clusters); clusters = clusterCache.FindAll(); Console.WriteLine("Find all clusters"); clusters = clusterCache.FindByCharge(charge); WriteClusters(datasets, clusters, minimumClusterSize, charge, crossPath, databasePath, 300000); }