public void MassPartitionTest(string databasePath, int chargeState, double massTolerance, double netTolerance, double driftTolerance) { var database = new UmcAdoDAO(); database.DatabasePath = databasePath; Logger.PrintMessage(string.Format("Extracting Features"), true); var data = database.FindByCharge(chargeState); // Make sure there is no null UMC data in the input list. var nullIndex = data.FindIndex(delegate(UMCLight x) { return x == null; }); if (nullIndex > 0) { throw new NullReferenceException("The feature at index " + nullIndex + " was null. Cannot process this data."); } // The first thing we do is to sort the features based on mass since we know that has the least variability in the data across runs. data.Sort(m_massComparer); // This is the index of first feature of a given mass partition. var startUMCIndex = 0; var totalFeatures = data.Count; var tenPercent = Convert.ToInt32(totalFeatures*.1); var singletons = 0; var sizes = new List<int>(); var times = new List<double>(); for (var i = 0; i < totalFeatures - 1; i++) { // Here we compute the ppm mass difference between consecutive features (based on mass). // This will determine if we cluster a block of data or not. var umcX = data[i]; var umcY = data[i + 1]; var ppm = Math.Abs(FeatureLight.ComputeMassPPMDifference(umcX.MassMonoisotopicAligned, umcY.MassMonoisotopicAligned)); // If the difference is greater than the tolerance then we cluster // - we dont check the sign of the ppm because the data should be sorted based on mass. if (ppm > massTolerance) { // If start UMC Index is equal to one, then that means the feature at startUMCIndex // could not find any other features near it within the mass tolerance specified. if (startUMCIndex == i) { singletons++; } else { var starttime = DateTime.Now; var distances = CalculatePairwiseDistances(data, startUMCIndex, i, massTolerance, netTolerance, driftTolerance); var endTime = DateTime.Now; sizes.Add(i - startUMCIndex + 1); times.Add(endTime.Subtract(starttime).TotalMilliseconds); } startUMCIndex = i + 1; } } var xxx = 0; xxx++; if (xxx > 1) { sizes.Add(0); } Console.WriteLine("{0}", singletons); Console.WriteLine(); for (var i = 0; i < sizes.Count; i++) { Console.WriteLine("{0}\t{1}", sizes[i], times[i]); } }
/// <summary> /// The main entry point for the application. /// </summary> static int Main(string [] args) { var handle = System.Diagnostics.Process.GetCurrentProcess().MainWindowHandle; SetConsoleMode(handle, ENABLE_EXTENDED_FLAGS); try { if (args.Length < 2) { Console.WriteLine(@"MultiAlignChargeStateProcessor databasePath chargeState crossTabPath [dataset List]"); Console.WriteLine(@"\tThe cross-tab file will be placed in the same directory as the database path"); return 1; } // Setup the analysis processing var databasePath = args[0]; var databaseName = Path.GetFileNameWithoutExtension(databasePath); var path = Path.GetDirectoryName(databasePath); var crossPath = args[2]; var chargeState = Convert.ToInt32(args[1]); List<string> datasetList = null; if (args.Length == 4) { datasetList = File.ReadAllLines(args[3]).ToList(); } if (path == null) { Console.WriteLine(@"The directory path is invalid"); return 1; } NHibernateUtil.ConnectToDatabase(databasePath, false); IDatasetDAO datasetCache = new DatasetDAOHibernate(); var dateSuffix = AnalysisPathUtils.BuildDateSuffix(); Logger.LogPath = Path.Combine(path, string.Format("{0}_charge_{2}_{1}.txt", databaseName, dateSuffix, chargeState)); Logger.PrintMessage("Find all datasets", true); var datasets = datasetCache.FindAll(); Logger.PrintMessage(string.Format("Found {0} datasets", datasets.Count), true); // Create the clustering algorithm - average linkage IClusterer<UMCLight, UMCClusterLight> clusterer = new UMCAverageLinkageClusterer<UMCLight, UMCClusterLight>(); // Create the DAO object to extract the features var database = new UmcAdoDAO {DatabasePath = databasePath}; IUmcDAO featureDao = database; Logger.PrintMessage(string.Format("Extracting Features"), true); var tempFeatures = featureDao.FindByCharge(chargeState); Logger.PrintMessage(string.Format("Found {0} features", tempFeatures.Count), true); var features = new List<UMCLight>(); if (datasetList != null) { var featuremap = datasets.ToDictionary(info => info.DatasetName.ToLower()); var focusedDatasetList = new Dictionary<int, DatasetInformation>(); foreach (var name in datasetList) { var key = name.ToLower(); if (featuremap.ContainsKey(key)) { Logger.PrintMessage("Using dataset: " + name); focusedDatasetList.Add(featuremap[key].DatasetId, featuremap[key]); } else throw new Exception("Didn't find the dataset required..." + name); } features.AddRange(from feature in tempFeatures let use = focusedDatasetList.ContainsKey(feature.GroupId) where use select feature); Logger.PrintMessage(string.Format("Found {0} filtered features for dataset list", features.Count), true); } else { features = tempFeatures; } // Handle logging progress. clusterer.Progress += clusterer_Progress; clusterer.Parameters.Tolerances.DriftTime = .3; clusterer.Parameters.Tolerances.Mass = 16; clusterer.Parameters.Tolerances.Net = .014; clusterer.Parameters.OnlyClusterSameChargeStates = true; clusterer.Parameters.CentroidRepresentation = ClusterCentroidRepresentation.Mean; clusterer.Parameters.DistanceFunction = PNNLOmics.Algorithms.Distance.DistanceFactory<UMCLight>.CreateDistanceFunction(PNNLOmics.Algorithms.Distance.DistanceMetric.WeightedEuclidean); // Then cluster var clusterWriter = new UmcClusterWriter(); IClusterWriter<UMCClusterLight> writer = clusterWriter; //new UMCClusterDummyWriter(); try { clusterWriter.Open(crossPath); clusterWriter.WriteHeader(datasets); clusterer.ClusterAndProcess(features, writer); Logger.PrintMessage("", true); Logger.PrintMessage("ANALYSIS SUCCESS", true); return 0; } catch (Exception ex) { Logger.PrintMessage("Unhandled Error: " + ex.Message); var innerEx = ex.InnerException; while (innerEx != null) { Logger.PrintMessage("Inner Exception: " + innerEx.Message); innerEx = innerEx.InnerException; } Logger.PrintMessage("Stack: " + ex.StackTrace); Logger.PrintMessage(""); Logger.PrintMessage("ANALYSIS FAILED"); return 1; } finally { clusterWriter.Close(); } } catch (Exception ex) { Logger.PrintMessage("Unhandled Error: " + ex.Message, true); var innerEx = ex.InnerException; while (innerEx != null) { Logger.PrintMessage("Inner Exception: " + innerEx.Message); innerEx = innerEx.InnerException; } Logger.PrintMessage("Stack: " + ex.StackTrace, true); Logger.PrintMessage(""); Logger.PrintMessage("ANALYSIS FAILED"); return 1; } }