public void TestRestrictiveBoxMethod(string path, DistanceMetric dist, bool useBoxMethod) { var features = ReadFeatures(path); var clusterer = new UMCAverageLinkageClusterer <UMCLight, UMCClusterLight> { ShouldTestClustersWithinTolerance = useBoxMethod, Parameters = { CentroidRepresentation = ClusterCentroidRepresentation.Mean, DistanceFunction = DistanceFactory <UMCLight> .CreateDistanceFunction(dist), OnlyClusterSameChargeStates = true, Tolerances = { Mass = 10, DriftTime = .3, Net = .03 } } }; var clusters = clusterer.Cluster(features); var i = 0; clusters.ForEach(x => x.Id = i++); WriteClusters(clusters); }
public IClusterer <T, U> Create(GenericClusteringAlgorithmType clusterType) { IClusterer <T, U> clusterer = null; switch (clusterType) { case GenericClusteringAlgorithmType.AverageLinkage: clusterer = new UMCAverageLinkageClusterer <T, U>(); break; case GenericClusteringAlgorithmType.Centroid: clusterer = new UMCCentroidClusterer <T, U>(); break; case GenericClusteringAlgorithmType.SingleLinkage: clusterer = new UMCSingleLinkageClusterer <T, U>(); break; case GenericClusteringAlgorithmType.Prims: clusterer = new UMCPrimsClustering <T, U>(); break; case GenericClusteringAlgorithmType.BinarySearchTree: clusterer = new Clustering.MsFeatureTreeClusterer <T, U>(); break; default: throw new ArgumentOutOfRangeException(string.Format("Cannot create generic {0} clusterer.", clusterType)); } return(clusterer); }
public void TestDistanceDistributions(string path, DistanceMetric dist) { var features = ReadFeatures(path); var clusterer = new UMCAverageLinkageClusterer <UMCLight, UMCClusterLight> { ShouldTestClustersWithinTolerance = false, Parameters = { CentroidRepresentation = ClusterCentroidRepresentation.Mean, DistanceFunction = DistanceFactory <UMCLight> .CreateDistanceFunction(dist), OnlyClusterSameChargeStates = true, Tolerances = { Mass = 10, DriftTime = .3, Net = .03 } } }; var clusters = clusterer.Cluster(features); var distances = new List <double>(); foreach (var cluster in clusters) { var centroid = new UMCLight(); centroid.MassMonoisotopicAligned = cluster.MassMonoisotopic; centroid.Net = cluster.Net; centroid.DriftTime = cluster.DriftTime; var func = clusterer.Parameters.DistanceFunction; foreach (var feature in cluster.Features) { var distance = func(feature, centroid); distances.Add(distance); } distances.Sort(); var sum = 0; foreach (var distance in distances) { sum++; Console.WriteLine("{0},{1}", distance, sum); } } }
public static IClusterer<UMCLight, UMCClusterLight> Create(LcmsFeatureClusteringAlgorithmType clusterType) { IClusterer<UMCLight, UMCClusterLight> clusterer = null; switch (clusterType) { case LcmsFeatureClusteringAlgorithmType.AverageLinkage: clusterer = new UMCAverageLinkageClusterer<UMCLight, UMCClusterLight>(); break; case LcmsFeatureClusteringAlgorithmType.Centroid: clusterer = new UMCCentroidClusterer<UMCLight, UMCClusterLight>(); break; case LcmsFeatureClusteringAlgorithmType.SingleLinkage: clusterer = new UMCSingleLinkageClusterer<UMCLight, UMCClusterLight>(); break; case LcmsFeatureClusteringAlgorithmType.Prims: clusterer = new UMCPrimsClustering<UMCLight, UMCClusterLight>(); break; } return clusterer; }
//[TestCase(@"ClusterData\clusterData-merged-nodelin.txt")] public void TestWeightedAverageLinkage(string path) { Console.WriteLine("Test: " + path); var features = GetClusterData(Path.Combine(TestPaths.TestFilesDirectory, path)); Assert.IsNotEmpty(features); var cluster = new UMCClusterLight(); cluster.Id = features[0].Id; features.ForEach(x => cluster.AddChildFeature(x)); var maps = new Dictionary <int, UMCClusterLight>(); var average = new UMCAverageLinkageClusterer <UMCLight, UMCClusterLight>(); average.Parameters = new FeatureClusterParameters <UMCLight>(); average.Parameters.CentroidRepresentation = ClusterCentroidRepresentation.Mean; average.Parameters.Tolerances = new Algorithms.FeatureTolerances(); var distance = new WeightedEuclideanDistance <UMCLight>(); average.Parameters.DistanceFunction = distance.EuclideanDistance; var clusters = average.Cluster(features); Console.WriteLine("dataset\tfeature\tmass\tnet\tdrift"); foreach (var newCluster in clusters) { foreach (var feature in newCluster.Features) { Console.WriteLine("{0},{1},{2},{3},{4}", feature.GroupId, feature.Id, feature.Net, feature.MassMonoisotopicAligned, feature.DriftTime); } } }
/// <summary> /// The main entry point for the application. /// </summary> static int Main(string [] args) { var handle = System.Diagnostics.Process.GetCurrentProcess().MainWindowHandle; SetConsoleMode(handle, ENABLE_EXTENDED_FLAGS); try { if (args.Length < 2) { Console.WriteLine(@"MultiAlignChargeStateProcessor databasePath chargeState crossTabPath [dataset List]"); Console.WriteLine(@"\tThe cross-tab file will be placed in the same directory as the database path"); return(1); } // Setup the analysis processing var databasePath = args[0]; var databaseName = Path.GetFileNameWithoutExtension(databasePath); var path = Path.GetDirectoryName(databasePath); var crossPath = args[2]; var chargeState = Convert.ToInt32(args[1]); List <string> datasetList = null; if (args.Length == 4) { datasetList = File.ReadAllLines(args[3]).ToList(); } if (path == null) { Console.WriteLine(@"The directory path is invalid"); return(1); } NHibernateUtil.ConnectToDatabase(databasePath, false); IDatasetDAO datasetCache = new DatasetDAOHibernate(); var dateSuffix = AnalysisPathUtils.BuildDateSuffix(); Logger.LogPath = Path.Combine(path, string.Format("{0}_charge_{2}_{1}.txt", databaseName, dateSuffix, chargeState)); Logger.PrintMessage("Find all datasets", true); var datasets = datasetCache.FindAll(); Logger.PrintMessage(string.Format("Found {0} datasets", datasets.Count), true); // Create the clustering algorithm - average linkage IClusterer <UMCLight, UMCClusterLight> clusterer = new UMCAverageLinkageClusterer <UMCLight, UMCClusterLight>(); // Create the DAO object to extract the features var database = new UmcAdoDAO { DatabasePath = databasePath }; IUmcDAO featureDao = database; Logger.PrintMessage(string.Format("Extracting Features"), true); var tempFeatures = featureDao.FindByCharge(chargeState); Logger.PrintMessage(string.Format("Found {0} features", tempFeatures.Count), true); var features = new List <UMCLight>(); if (datasetList != null) { var featuremap = datasets.ToDictionary(info => info.DatasetName.ToLower()); var focusedDatasetList = new Dictionary <int, DatasetInformation>(); foreach (var name in datasetList) { var key = name.ToLower(); if (featuremap.ContainsKey(key)) { Logger.PrintMessage("Using dataset: " + name); focusedDatasetList.Add(featuremap[key].DatasetId, featuremap[key]); } else { throw new Exception("Didn't find the dataset required..." + name); } } features.AddRange(from feature in tempFeatures let use = focusedDatasetList.ContainsKey(feature.GroupId) where use select feature); Logger.PrintMessage(string.Format("Found {0} filtered features for dataset list", features.Count), true); } else { features = tempFeatures; } // Handle logging progress. clusterer.Progress += clusterer_Progress; clusterer.Parameters.Tolerances.DriftTime = .3; clusterer.Parameters.Tolerances.Mass = 16; clusterer.Parameters.Tolerances.Net = .014; clusterer.Parameters.OnlyClusterSameChargeStates = true; clusterer.Parameters.CentroidRepresentation = ClusterCentroidRepresentation.Mean; clusterer.Parameters.DistanceFunction = DistanceFactory <UMCLight> .CreateDistanceFunction(DistanceMetric.WeightedEuclidean); // Then cluster var clusterWriter = new UmcClusterWriter(); IClusterWriter <UMCClusterLight> writer = clusterWriter; //new UMCClusterDummyWriter(); try { clusterWriter.Open(crossPath); clusterWriter.WriteHeader(datasets); clusterer.ClusterAndProcess(features, writer); Logger.PrintMessage("", true); Logger.PrintMessage("ANALYSIS SUCCESS", true); return(0); } catch (Exception ex) { Logger.PrintMessage("Unhandled Error: " + ex.Message); var innerEx = ex.InnerException; while (innerEx != null) { Logger.PrintMessage("Inner Exception: " + innerEx.Message); innerEx = innerEx.InnerException; } Logger.PrintMessage("Stack: " + ex.StackTrace); Logger.PrintMessage(""); Logger.PrintMessage("ANALYSIS FAILED"); return(1); } finally { clusterWriter.Close(); } } catch (Exception ex) { Logger.PrintMessage("Unhandled Error: " + ex.Message, true); var innerEx = ex.InnerException; while (innerEx != null) { Logger.PrintMessage("Inner Exception: " + innerEx.Message); innerEx = innerEx.InnerException; } Logger.PrintMessage("Stack: " + ex.StackTrace, true); Logger.PrintMessage(""); Logger.PrintMessage("ANALYSIS FAILED"); return(1); } }
//[TestCase(@"ClusterData\clusterData-single-1500.txt")] public void TestAverageLinkage(string path) { Console.WriteLine("Average Linkage Test: " + path); var features = GetClusterData(Path.Combine(TestPaths.TestFilesDirectory, path)); Assert.IsNotEmpty(features); var cluster = new UMCClusterLight(); cluster.Id = features[0].Id; features.ForEach(x => cluster.AddChildFeature(x)); var maps = new Dictionary <int, UMCClusterLight>(); var average = new UMCAverageLinkageClusterer <UMCLight, UMCClusterLight>(); average.Parameters = new FeatureClusterParameters <UMCLight>(); average.Parameters.CentroidRepresentation = ClusterCentroidRepresentation.Median; average.Parameters.Tolerances = new Algorithms.FeatureTolerances(); average.Parameters.Tolerances.Net = .02; average.Parameters.Tolerances.Mass = 6; average.Parameters.Tolerances.DriftTime = .3; var distance = new WeightedEuclideanDistance <UMCLight>(); average.Parameters.DistanceFunction = distance.EuclideanDistance; var euclid = new EuclideanDistanceMetric <UMCLight>(); average.Parameters.DistanceFunction = euclid.EuclideanDistance; var clusters = average.Cluster(features); Console.WriteLine("Clusters = {0}", clusters.Count); var id = 1; foreach (var testCluster in clusters) { testCluster.CalculateStatistics(ClusterCentroidRepresentation.Mean); var distances = new List <double>(); // Show a sampling of 5 results var threshold = (int)(testCluster.Features.Count / (double)5); if (threshold < 1) { threshold = 1; } testCluster.Id = id++; var featureID = 0; foreach (var feature in testCluster.Features) { featureID++; if (featureID % threshold == 0) { Console.WriteLine("{0},{1},{2},{3}", feature.Net, feature.MassMonoisotopicAligned, feature.DriftTime, testCluster.Id); } var newDistance = distance.EuclideanDistance(feature, testCluster); distances.Add(newDistance); } //Console.WriteLine(); //Console.WriteLine("Distances"); //distances.ForEach(x => Console.WriteLine(x)); //Console.WriteLine(); } }
public void TestRestrictiveBoxMethod(string path, DistanceMetric dist, bool useBoxMethod) { var features = ReadFeatures(path); var clusterer = new UMCAverageLinkageClusterer<UMCLight, UMCClusterLight> { ShouldTestClustersWithinTolerance = useBoxMethod, Parameters = { CentroidRepresentation = ClusterCentroidRepresentation.Mean, DistanceFunction = DistanceFactory<UMCLight>.CreateDistanceFunction(dist), OnlyClusterSameChargeStates = true, Tolerances = { Mass = 10, DriftTime = .3, Net = .03 } } }; var clusters = clusterer.Cluster(features); var i = 0; clusters.ForEach(x => x.Id = i++); WriteClusters(clusters); }
public void TestDistanceDistributions(string path, DistanceMetric dist) { var features = ReadFeatures(path); var clusterer = new UMCAverageLinkageClusterer<UMCLight, UMCClusterLight> { ShouldTestClustersWithinTolerance = false, Parameters = { CentroidRepresentation = ClusterCentroidRepresentation.Mean, DistanceFunction = DistanceFactory<UMCLight>.CreateDistanceFunction(dist), OnlyClusterSameChargeStates = true, Tolerances = { Mass = 10, DriftTime = .3, Net = .03 } } }; var clusters = clusterer.Cluster(features); var distances = new List<double>(); foreach (var cluster in clusters) { var centroid = new UMCLight(); centroid.MassMonoisotopicAligned = cluster.MassMonoisotopic; centroid.Net = cluster.Net; centroid.DriftTime = cluster.DriftTime; var func = clusterer.Parameters.DistanceFunction; foreach (var feature in cluster.Features) { var distance = func(feature, centroid); distances.Add(distance); } distances.Sort(); var sum = 0; foreach (var distance in distances) { sum++; Console.WriteLine("{0},{1}", distance, sum); } } }
/// <summary> /// The main entry point for the application. /// </summary> static int Main(string [] args) { var handle = System.Diagnostics.Process.GetCurrentProcess().MainWindowHandle; SetConsoleMode(handle, ENABLE_EXTENDED_FLAGS); try { if (args.Length < 2) { Console.WriteLine(@"MultiAlignChargeStateProcessor databasePath chargeState crossTabPath [dataset List]"); Console.WriteLine(@"\tThe cross-tab file will be placed in the same directory as the database path"); return 1; } // Setup the analysis processing var databasePath = args[0]; var databaseName = Path.GetFileNameWithoutExtension(databasePath); var path = Path.GetDirectoryName(databasePath); var crossPath = args[2]; var chargeState = Convert.ToInt32(args[1]); List<string> datasetList = null; if (args.Length == 4) { datasetList = File.ReadAllLines(args[3]).ToList(); } if (path == null) { Console.WriteLine(@"The directory path is invalid"); return 1; } NHibernateUtil.ConnectToDatabase(databasePath, false); IDatasetDAO datasetCache = new DatasetDAOHibernate(); var dateSuffix = AnalysisPathUtils.BuildDateSuffix(); Logger.LogPath = Path.Combine(path, string.Format("{0}_charge_{2}_{1}.txt", databaseName, dateSuffix, chargeState)); Logger.PrintMessage("Find all datasets", true); var datasets = datasetCache.FindAll(); Logger.PrintMessage(string.Format("Found {0} datasets", datasets.Count), true); // Create the clustering algorithm - average linkage IClusterer<UMCLight, UMCClusterLight> clusterer = new UMCAverageLinkageClusterer<UMCLight, UMCClusterLight>(); // Create the DAO object to extract the features var database = new UmcAdoDAO {DatabasePath = databasePath}; IUmcDAO featureDao = database; Logger.PrintMessage(string.Format("Extracting Features"), true); var tempFeatures = featureDao.FindByCharge(chargeState); Logger.PrintMessage(string.Format("Found {0} features", tempFeatures.Count), true); var features = new List<UMCLight>(); if (datasetList != null) { var featuremap = datasets.ToDictionary(info => info.DatasetName.ToLower()); var focusedDatasetList = new Dictionary<int, DatasetInformation>(); foreach (var name in datasetList) { var key = name.ToLower(); if (featuremap.ContainsKey(key)) { Logger.PrintMessage("Using dataset: " + name); focusedDatasetList.Add(featuremap[key].DatasetId, featuremap[key]); } else throw new Exception("Didn't find the dataset required..." + name); } features.AddRange(from feature in tempFeatures let use = focusedDatasetList.ContainsKey(feature.GroupId) where use select feature); Logger.PrintMessage(string.Format("Found {0} filtered features for dataset list", features.Count), true); } else { features = tempFeatures; } // Handle logging progress. clusterer.Progress += clusterer_Progress; clusterer.Parameters.Tolerances.DriftTime = .3; clusterer.Parameters.Tolerances.Mass = 16; clusterer.Parameters.Tolerances.Net = .014; clusterer.Parameters.OnlyClusterSameChargeStates = true; clusterer.Parameters.CentroidRepresentation = ClusterCentroidRepresentation.Mean; clusterer.Parameters.DistanceFunction = PNNLOmics.Algorithms.Distance.DistanceFactory<UMCLight>.CreateDistanceFunction(PNNLOmics.Algorithms.Distance.DistanceMetric.WeightedEuclidean); // Then cluster var clusterWriter = new UmcClusterWriter(); IClusterWriter<UMCClusterLight> writer = clusterWriter; //new UMCClusterDummyWriter(); try { clusterWriter.Open(crossPath); clusterWriter.WriteHeader(datasets); clusterer.ClusterAndProcess(features, writer); Logger.PrintMessage("", true); Logger.PrintMessage("ANALYSIS SUCCESS", true); return 0; } catch (Exception ex) { Logger.PrintMessage("Unhandled Error: " + ex.Message); var innerEx = ex.InnerException; while (innerEx != null) { Logger.PrintMessage("Inner Exception: " + innerEx.Message); innerEx = innerEx.InnerException; } Logger.PrintMessage("Stack: " + ex.StackTrace); Logger.PrintMessage(""); Logger.PrintMessage("ANALYSIS FAILED"); return 1; } finally { clusterWriter.Close(); } } catch (Exception ex) { Logger.PrintMessage("Unhandled Error: " + ex.Message, true); var innerEx = ex.InnerException; while (innerEx != null) { Logger.PrintMessage("Inner Exception: " + innerEx.Message); innerEx = innerEx.InnerException; } Logger.PrintMessage("Stack: " + ex.StackTrace, true); Logger.PrintMessage(""); Logger.PrintMessage("ANALYSIS FAILED"); return 1; } }