public void TestClusterWriting(string databasePath, string crossPath, int charge, int minimumClusterSize) { NHibernateUtil.ConnectToDatabase(databasePath, false); IDatasetDAO datasetCache = new DatasetDAOHibernate(); IUmcClusterDAO clusterCache = new UmcClusterDAOHibernate(); IUmcDAO featureCache = new UmcDAOHibernate(); Console.WriteLine("Find all datasets"); var datasets = datasetCache.FindAll(); Console.WriteLine("Find all clusters"); var clusters = clusterCache.FindByCharge(charge); WriteClusters(datasets, clusters, minimumClusterSize, charge, crossPath, databasePath, 300000); }
public void CreateCrossTab(string databasePath, string crossPath, int charge, int minimumClusterSize) { NHibernateUtil.ConnectToDatabase(databasePath, false); IDatasetDAO datasetCache = new DatasetDAOHibernate(); IUmcClusterDAO clusterCache = new UmcClusterDAOHibernate(); IUmcDAO featureCache = new UmcDAOHibernate(); Console.WriteLine("Find all datasets"); var datasets = datasetCache.FindAll(); Console.WriteLine("Find all clusters"); var clusters = clusterCache.FindByCharge(charge); WriteClusters(datasets, clusters, minimumClusterSize, charge, crossPath, databasePath, 50000); }
public void CreateDatasetMap(string databasePath, string crossPath, int charge, int minimumClusterSize) { NHibernateUtil.ConnectToDatabase(databasePath, false); IDatasetDAO datasetCache = new DatasetDAOHibernate(); IUmcClusterDAO clusterCache = new UmcClusterDAOHibernate(); IUmcDAO featureCache = new UmcDAOHibernate(); Console.WriteLine("Find all datasets"); var datasets = datasetCache.FindAll(); using (TextWriter writer = File.CreateText(crossPath + ".csv")) { writer.WriteLine("Dataset, Dataset Id"); foreach (var info in datasets) { writer.WriteLine("{0},{1}", info.DatasetName, info.DatasetId); } } }
/// <summary> /// The main entry point for the application. /// </summary> static int Main(string [] args) { var handle = System.Diagnostics.Process.GetCurrentProcess().MainWindowHandle; SetConsoleMode(handle, ENABLE_EXTENDED_FLAGS); try { if (args.Length < 2) { Console.WriteLine(@"MultiAlignChargeStateProcessor databasePath chargeState crossTabPath [dataset List]"); Console.WriteLine(@"\tThe cross-tab file will be placed in the same directory as the database path"); return(1); } // Setup the analysis processing var databasePath = args[0]; var databaseName = Path.GetFileNameWithoutExtension(databasePath); var path = Path.GetDirectoryName(databasePath); var crossPath = args[2]; var chargeState = Convert.ToInt32(args[1]); List <string> datasetList = null; if (args.Length == 4) { datasetList = File.ReadAllLines(args[3]).ToList(); } if (path == null) { Console.WriteLine(@"The directory path is invalid"); return(1); } NHibernateUtil.ConnectToDatabase(databasePath, false); IDatasetDAO datasetCache = new DatasetDAOHibernate(); var dateSuffix = AnalysisPathUtils.BuildDateSuffix(); Logger.LogPath = Path.Combine(path, string.Format("{0}_charge_{2}_{1}.txt", databaseName, dateSuffix, chargeState)); Logger.PrintMessage("Find all datasets", true); var datasets = datasetCache.FindAll(); Logger.PrintMessage(string.Format("Found {0} datasets", datasets.Count), true); // Create the clustering algorithm - average linkage IClusterer <UMCLight, UMCClusterLight> clusterer = new UMCAverageLinkageClusterer <UMCLight, UMCClusterLight>(); // Create the DAO object to extract the features var database = new UmcAdoDAO { DatabasePath = databasePath }; IUmcDAO featureDao = database; Logger.PrintMessage(string.Format("Extracting Features"), true); var tempFeatures = featureDao.FindByCharge(chargeState); Logger.PrintMessage(string.Format("Found {0} features", tempFeatures.Count), true); var features = new List <UMCLight>(); if (datasetList != null) { var featuremap = datasets.ToDictionary(info => info.DatasetName.ToLower()); var focusedDatasetList = new Dictionary <int, DatasetInformation>(); foreach (var name in datasetList) { var key = name.ToLower(); if (featuremap.ContainsKey(key)) { Logger.PrintMessage("Using dataset: " + name); focusedDatasetList.Add(featuremap[key].DatasetId, featuremap[key]); } else { throw new Exception("Didn't find the dataset required..." + name); } } features.AddRange(from feature in tempFeatures let use = focusedDatasetList.ContainsKey(feature.GroupId) where use select feature); Logger.PrintMessage(string.Format("Found {0} filtered features for dataset list", features.Count), true); } else { features = tempFeatures; } // Handle logging progress. clusterer.Progress += clusterer_Progress; clusterer.Parameters.Tolerances.DriftTime = .3; clusterer.Parameters.Tolerances.Mass = 16; clusterer.Parameters.Tolerances.Net = .014; clusterer.Parameters.OnlyClusterSameChargeStates = true; clusterer.Parameters.CentroidRepresentation = ClusterCentroidRepresentation.Mean; clusterer.Parameters.DistanceFunction = DistanceFactory <UMCLight> .CreateDistanceFunction(DistanceMetric.WeightedEuclidean); // Then cluster var clusterWriter = new UmcClusterWriter(); IClusterWriter <UMCClusterLight> writer = clusterWriter; //new UMCClusterDummyWriter(); try { clusterWriter.Open(crossPath); clusterWriter.WriteHeader(datasets); clusterer.ClusterAndProcess(features, writer); Logger.PrintMessage("", true); Logger.PrintMessage("ANALYSIS SUCCESS", true); return(0); } catch (Exception ex) { Logger.PrintMessage("Unhandled Error: " + ex.Message); var innerEx = ex.InnerException; while (innerEx != null) { Logger.PrintMessage("Inner Exception: " + innerEx.Message); innerEx = innerEx.InnerException; } Logger.PrintMessage("Stack: " + ex.StackTrace); Logger.PrintMessage(""); Logger.PrintMessage("ANALYSIS FAILED"); return(1); } finally { clusterWriter.Close(); } } catch (Exception ex) { Logger.PrintMessage("Unhandled Error: " + ex.Message, true); var innerEx = ex.InnerException; while (innerEx != null) { Logger.PrintMessage("Inner Exception: " + innerEx.Message); innerEx = innerEx.InnerException; } Logger.PrintMessage("Stack: " + ex.StackTrace, true); Logger.PrintMessage(""); Logger.PrintMessage("ANALYSIS FAILED"); return(1); } }
public void TestClusterGeneration(string databasePath, string crossPath, int charge, int minimumClusterSize) { File.Delete(databasePath); NHibernateUtil.ConnectToDatabase(databasePath, true); IDatasetDAO datasetCache = new DatasetDAOHibernate(); IUmcClusterDAO clusterCache = new UmcClusterDAOHibernate(); IUmcDAO featureCache = new UmcDAOHibernate(); // Creating a dataset Console.WriteLine("Creating dummy datasets"); var datasets = new List<DatasetInformation>(); var total = 10; for (var i = 0; i < total; i++) { var dataset = new DatasetInformation(); dataset.DatasetId = i; dataset.DatasetName = "test" + i; datasets.Add(dataset); } datasetCache.AddAll(datasets); datasets.Clear(); datasets = datasetCache.FindAll(); // Create features Console.WriteLine("Creating features"); var features = new List<UMCLight>(); var clusters = new List<UMCClusterLight>(); var x = new Random(); var featureId = 0; for (var i = 0; i < 100; i++) { var cluster = new UMCClusterLight(); cluster.Id = i; cluster.AmbiguityScore = i; cluster.Tightness = i; var N = x.Next(1, total); cluster.Id = i; cluster.ChargeState = charge; var hash = new HashSet<int>(); for (var j = 0; j < N; j++) { var did = -1; do { did = x.Next(0, total); if (!hash.Contains(did)) { hash.Add(did); break; } } while (true); var feature = new UMCLight(); feature.GroupId = did; feature.Id = featureId++; feature.ChargeState = charge; feature.MassMonoisotopic = x.NextDouble(); feature.Net = x.NextDouble(); feature.AbundanceSum = x.Next(100, 200); feature.Abundance = feature.Abundance; feature.ClusterId = cluster.Id; cluster.AddChildFeature(feature); features.Add(feature); } cluster.CalculateStatistics(ClusterCentroidRepresentation.Mean); clusters.Add(cluster); } featureCache.AddAll(features); clusterCache.AddAll(clusters); clusters = clusterCache.FindAll(); Console.WriteLine("Find all clusters"); clusters = clusterCache.FindByCharge(charge); WriteClusters(datasets, clusters, minimumClusterSize, charge, crossPath, databasePath, 300000); }
public void TestCreateDummyDatabase(string databasePath, int totalDatasets, int totalClusters) { File.Delete(databasePath); NHibernateUtil.ConnectToDatabase(databasePath, true); IDatasetDAO datasetCache = new DatasetDAOHibernate(); IUmcClusterDAO clusterCache = new UmcClusterDAOHibernate(); IUmcDAO featureCache = new UmcDAOHibernate(); // Creating a dataset Console.WriteLine("Creating dummy datasets"); var datasets = new List<DatasetInformation>(); var total = totalDatasets; for (var i = 0; i < total; i++) { var dataset = new DatasetInformation(); dataset.DatasetId = i; dataset.DatasetName = "test" + i; datasets.Add(dataset); } datasetCache.AddAll(datasets); datasets.Clear(); datasets = datasetCache.FindAll(); // Create features Console.WriteLine("Creating features"); var features = new List<UMCLight>(); var clusters = new List<UMCClusterLight>(); var x = new Random(); var featureId = 0; for (var i = 0; i < totalClusters; i++) { var N = x.Next(1, total); var charge = x.Next(1, 10); var hash = new HashSet<int>(); var net = x.NextDouble(); var mass = 400 + (1600*x.NextDouble()); var dt = 60*x.NextDouble(); for (var j = 0; j < N; j++) { var did = -1; do { did = x.Next(0, total); if (!hash.Contains(did)) { hash.Add(did); break; } } while (true); var feature = new UMCLight { GroupId = did, Id = featureId++, ChargeState = charge, MassMonoisotopic = FeatureLight.ComputeDaDifferenceFromPPM(mass, 3) }; feature.MassMonoisotopicAligned = feature.MassMonoisotopic; feature.Net = net + 0.03 * x.NextDouble(); feature.NetAligned = feature.Net; feature.Net = feature.Net; feature.DriftTime = dt; feature.AbundanceSum = x.Next(100, 200); feature.Abundance = feature.Abundance; feature.ClusterId = -1; features.Add(feature); } } featureCache.AddAll(features); }
public void TestCreateDummyDatabase(string databasePath, int totalDatasets, int totalClusters) { File.Delete(databasePath); NHibernateUtil.ConnectToDatabase(databasePath, true); IDatasetDAO datasetCache = new DatasetDAOHibernate(); IUmcClusterDAO clusterCache = new UmcClusterDAOHibernate(); IUmcDAO featureCache = new UmcDAOHibernate(); // Creating a dataset Console.WriteLine("Creating dummy datasets"); var datasets = new List <DatasetInformation>(); var total = totalDatasets; for (var i = 0; i < total; i++) { var dataset = new DatasetInformation(); dataset.DatasetId = i; dataset.DatasetName = "test" + i; datasets.Add(dataset); } datasetCache.AddAll(datasets); datasets.Clear(); datasets = datasetCache.FindAll(); // Create features Console.WriteLine("Creating features"); var features = new List <UMCLight>(); var clusters = new List <UMCClusterLight>(); var x = new Random(); var featureId = 0; for (var i = 0; i < totalClusters; i++) { var N = x.Next(1, total); var charge = x.Next(1, 10); var hash = new HashSet <int>(); var net = x.NextDouble(); var mass = 400 + (1600 * x.NextDouble()); var dt = 60 * x.NextDouble(); for (var j = 0; j < N; j++) { var did = -1; do { did = x.Next(0, total); if (!hash.Contains(did)) { hash.Add(did); break; } } while (true); var feature = new UMCLight(); feature.GroupId = did; feature.Id = featureId++; feature.ChargeState = charge; feature.MassMonoisotopic = FeatureLight.ComputeDaDifferenceFromPPM(mass, 3); feature.MassMonoisotopicAligned = feature.MassMonoisotopic; feature.Net = net + .03 * x.NextDouble(); feature.NetAligned = feature.Net; feature.Net = feature.Net; feature.DriftTime = dt; feature.AbundanceSum = x.Next(100, 200); feature.Abundance = feature.Abundance; feature.ClusterId = -1; features.Add(feature); } } featureCache.AddAll(features); }
public void TestClusterGeneration(string databasePath, string crossPath, int charge, int minimumClusterSize) { File.Delete(databasePath); NHibernateUtil.ConnectToDatabase(databasePath, true); IDatasetDAO datasetCache = new DatasetDAOHibernate(); IUmcClusterDAO clusterCache = new UmcClusterDAOHibernate(); IUmcDAO featureCache = new UmcDAOHibernate(); // Creating a dataset Console.WriteLine("Creating dummy datasets"); var datasets = new List <DatasetInformation>(); var total = 10; for (var i = 0; i < total; i++) { var dataset = new DatasetInformation(); dataset.DatasetId = i; dataset.DatasetName = "test" + i; datasets.Add(dataset); } datasetCache.AddAll(datasets); datasets.Clear(); datasets = datasetCache.FindAll(); // Create features Console.WriteLine("Creating features"); var features = new List <UMCLight>(); var clusters = new List <UMCClusterLight>(); var x = new Random(); var featureId = 0; for (var i = 0; i < 100; i++) { var cluster = new UMCClusterLight(); cluster.Id = i; cluster.AmbiguityScore = i; cluster.Tightness = i; var N = x.Next(1, total); cluster.Id = i; cluster.ChargeState = charge; var hash = new HashSet <int>(); for (var j = 0; j < N; j++) { var did = -1; do { did = x.Next(0, total); if (!hash.Contains(did)) { hash.Add(did); break; } } while (true); var feature = new UMCLight(); feature.GroupId = did; feature.Id = featureId++; feature.ChargeState = charge; feature.MassMonoisotopic = x.NextDouble(); feature.Net = x.NextDouble(); feature.AbundanceSum = x.Next(100, 200); feature.Abundance = feature.Abundance; feature.ClusterId = cluster.Id; cluster.AddChildFeature(feature); features.Add(feature); } cluster.CalculateStatistics(ClusterCentroidRepresentation.Mean); clusters.Add(cluster); } featureCache.AddAll(features); clusterCache.AddAll(clusters); clusters = clusterCache.FindAll(); Console.WriteLine("Find all clusters"); clusters = clusterCache.FindByCharge(charge); WriteClusters(datasets, clusters, minimumClusterSize, charge, crossPath, databasePath, 300000); }
/// <summary> /// The main entry point for the application. /// </summary> static int Main(string [] args) { var handle = System.Diagnostics.Process.GetCurrentProcess().MainWindowHandle; SetConsoleMode(handle, ENABLE_EXTENDED_FLAGS); try { if (args.Length < 2) { Console.WriteLine(@"MultiAlignChargeStateProcessor databasePath chargeState crossTabPath [dataset List]"); Console.WriteLine(@"\tThe cross-tab file will be placed in the same directory as the database path"); return 1; } // Setup the analysis processing var databasePath = args[0]; var databaseName = Path.GetFileNameWithoutExtension(databasePath); var path = Path.GetDirectoryName(databasePath); var crossPath = args[2]; var chargeState = Convert.ToInt32(args[1]); List<string> datasetList = null; if (args.Length == 4) { datasetList = File.ReadAllLines(args[3]).ToList(); } if (path == null) { Console.WriteLine(@"The directory path is invalid"); return 1; } NHibernateUtil.ConnectToDatabase(databasePath, false); IDatasetDAO datasetCache = new DatasetDAOHibernate(); var dateSuffix = AnalysisPathUtils.BuildDateSuffix(); Logger.LogPath = Path.Combine(path, string.Format("{0}_charge_{2}_{1}.txt", databaseName, dateSuffix, chargeState)); Logger.PrintMessage("Find all datasets", true); var datasets = datasetCache.FindAll(); Logger.PrintMessage(string.Format("Found {0} datasets", datasets.Count), true); // Create the clustering algorithm - average linkage IClusterer<UMCLight, UMCClusterLight> clusterer = new UMCAverageLinkageClusterer<UMCLight, UMCClusterLight>(); // Create the DAO object to extract the features var database = new UmcAdoDAO {DatabasePath = databasePath}; IUmcDAO featureDao = database; Logger.PrintMessage(string.Format("Extracting Features"), true); var tempFeatures = featureDao.FindByCharge(chargeState); Logger.PrintMessage(string.Format("Found {0} features", tempFeatures.Count), true); var features = new List<UMCLight>(); if (datasetList != null) { var featuremap = datasets.ToDictionary(info => info.DatasetName.ToLower()); var focusedDatasetList = new Dictionary<int, DatasetInformation>(); foreach (var name in datasetList) { var key = name.ToLower(); if (featuremap.ContainsKey(key)) { Logger.PrintMessage("Using dataset: " + name); focusedDatasetList.Add(featuremap[key].DatasetId, featuremap[key]); } else throw new Exception("Didn't find the dataset required..." + name); } features.AddRange(from feature in tempFeatures let use = focusedDatasetList.ContainsKey(feature.GroupId) where use select feature); Logger.PrintMessage(string.Format("Found {0} filtered features for dataset list", features.Count), true); } else { features = tempFeatures; } // Handle logging progress. clusterer.Progress += clusterer_Progress; clusterer.Parameters.Tolerances.DriftTime = .3; clusterer.Parameters.Tolerances.Mass = 16; clusterer.Parameters.Tolerances.Net = .014; clusterer.Parameters.OnlyClusterSameChargeStates = true; clusterer.Parameters.CentroidRepresentation = ClusterCentroidRepresentation.Mean; clusterer.Parameters.DistanceFunction = PNNLOmics.Algorithms.Distance.DistanceFactory<UMCLight>.CreateDistanceFunction(PNNLOmics.Algorithms.Distance.DistanceMetric.WeightedEuclidean); // Then cluster var clusterWriter = new UmcClusterWriter(); IClusterWriter<UMCClusterLight> writer = clusterWriter; //new UMCClusterDummyWriter(); try { clusterWriter.Open(crossPath); clusterWriter.WriteHeader(datasets); clusterer.ClusterAndProcess(features, writer); Logger.PrintMessage("", true); Logger.PrintMessage("ANALYSIS SUCCESS", true); return 0; } catch (Exception ex) { Logger.PrintMessage("Unhandled Error: " + ex.Message); var innerEx = ex.InnerException; while (innerEx != null) { Logger.PrintMessage("Inner Exception: " + innerEx.Message); innerEx = innerEx.InnerException; } Logger.PrintMessage("Stack: " + ex.StackTrace); Logger.PrintMessage(""); Logger.PrintMessage("ANALYSIS FAILED"); return 1; } finally { clusterWriter.Close(); } } catch (Exception ex) { Logger.PrintMessage("Unhandled Error: " + ex.Message, true); var innerEx = ex.InnerException; while (innerEx != null) { Logger.PrintMessage("Inner Exception: " + innerEx.Message); innerEx = innerEx.InnerException; } Logger.PrintMessage("Stack: " + ex.StackTrace, true); Logger.PrintMessage(""); Logger.PrintMessage("ANALYSIS FAILED"); return 1; } }