/// <summary> /// Constructor. /// </summary> /// <param name="featureCache">LCMS Features</param> /// <param name="clusterCache">LCMS Feature clusters</param> /// <param name="msFeatureCache">MS Features</param> /// <param name="msnFeatureCache">MS/MS Features</param> /// <param name="msFeatureMap">MS To LCMS Feature map</param> /// <param name="msnFeatureMap">MS to MSn Feature map</param> public FeatureDataAccessProviders(IUmcDAO featureCache, IUmcClusterDAO clusterCache, IMSFeatureDAO msFeatureCache, IMSnFeatureDAO msnFeatureCache, IMsnFeatureToMSFeatureDAO msnFeatureMap, IDatasetDAO datasetCache, IMassTagMatchDAO massTagMatches, IMassTagDAO massTags, IFactorDao factorCache, IDatasetToFactorMapDAO factorAssignmentCache, IMSMSClusterMapDAO msmsClusterCache, IDatabaseSearchSequenceDAO sequenceCache, ISequenceToMsnFeatureDAO sequenceMapCache) : this() { ClusterCache = clusterCache; FeatureCache = featureCache; MSFeatureCache = msFeatureCache; MSnFeatureCache = msnFeatureCache; MSFeatureToMSnFeatureCache = msnFeatureMap; DatasetCache = datasetCache; MassTagMatches = massTagMatches; MassTags = massTags; FactorAssignmentCache = factorAssignmentCache; FactorCache = factorCache; MSMSClusterCache = msmsClusterCache; DatabaseSequenceCache = sequenceCache; SequenceMsnMapCache = sequenceMapCache; }
/// <summary> /// The main entry point for the application. /// </summary> static int Main(string [] args) { var handle = System.Diagnostics.Process.GetCurrentProcess().MainWindowHandle; SetConsoleMode(handle, ENABLE_EXTENDED_FLAGS); try { if (args.Length < 2) { Console.WriteLine(@"MultiAlignChargeStateProcessor databasePath chargeState crossTabPath [dataset List]"); Console.WriteLine(@"\tThe cross-tab file will be placed in the same directory as the database path"); return(1); } // Setup the analysis processing var databasePath = args[0]; var databaseName = Path.GetFileNameWithoutExtension(databasePath); var path = Path.GetDirectoryName(databasePath); var crossPath = args[2]; var chargeState = Convert.ToInt32(args[1]); List <string> datasetList = null; if (args.Length == 4) { datasetList = File.ReadAllLines(args[3]).ToList(); } if (path == null) { Console.WriteLine(@"The directory path is invalid"); return(1); } NHibernateUtil.ConnectToDatabase(databasePath, false); IDatasetDAO datasetCache = new DatasetDAOHibernate(); var dateSuffix = AnalysisPathUtils.BuildDateSuffix(); Logger.LogPath = Path.Combine(path, string.Format("{0}_charge_{2}_{1}.txt", databaseName, dateSuffix, chargeState)); Logger.PrintMessage("Find all datasets", true); var datasets = datasetCache.FindAll(); Logger.PrintMessage(string.Format("Found {0} datasets", datasets.Count), true); // Create the clustering algorithm - average linkage IClusterer <UMCLight, UMCClusterLight> clusterer = new UMCAverageLinkageClusterer <UMCLight, UMCClusterLight>(); // Create the DAO object to extract the features var database = new UmcAdoDAO { DatabasePath = databasePath }; IUmcDAO featureDao = database; Logger.PrintMessage(string.Format("Extracting Features"), true); var tempFeatures = featureDao.FindByCharge(chargeState); Logger.PrintMessage(string.Format("Found {0} features", tempFeatures.Count), true); var features = new List <UMCLight>(); if (datasetList != null) { var featuremap = datasets.ToDictionary(info => info.DatasetName.ToLower()); var focusedDatasetList = new Dictionary <int, DatasetInformation>(); foreach (var name in datasetList) { var key = name.ToLower(); if (featuremap.ContainsKey(key)) { Logger.PrintMessage("Using dataset: " + name); focusedDatasetList.Add(featuremap[key].DatasetId, featuremap[key]); } else { throw new Exception("Didn't find the dataset required..." + name); } } features.AddRange(from feature in tempFeatures let use = focusedDatasetList.ContainsKey(feature.GroupId) where use select feature); Logger.PrintMessage(string.Format("Found {0} filtered features for dataset list", features.Count), true); } else { features = tempFeatures; } // Handle logging progress. clusterer.Progress += clusterer_Progress; clusterer.Parameters.Tolerances.DriftTime = .3; clusterer.Parameters.Tolerances.Mass = 16; clusterer.Parameters.Tolerances.Net = .014; clusterer.Parameters.OnlyClusterSameChargeStates = true; clusterer.Parameters.CentroidRepresentation = ClusterCentroidRepresentation.Mean; clusterer.Parameters.DistanceFunction = DistanceFactory <UMCLight> .CreateDistanceFunction(DistanceMetric.WeightedEuclidean); // Then cluster var clusterWriter = new UmcClusterWriter(); IClusterWriter <UMCClusterLight> writer = clusterWriter; //new UMCClusterDummyWriter(); try { clusterWriter.Open(crossPath); clusterWriter.WriteHeader(datasets); clusterer.ClusterAndProcess(features, writer); Logger.PrintMessage("", true); Logger.PrintMessage("ANALYSIS SUCCESS", true); return(0); } catch (Exception ex) { Logger.PrintMessage("Unhandled Error: " + ex.Message); var innerEx = ex.InnerException; while (innerEx != null) { Logger.PrintMessage("Inner Exception: " + innerEx.Message); innerEx = innerEx.InnerException; } Logger.PrintMessage("Stack: " + ex.StackTrace); Logger.PrintMessage(""); Logger.PrintMessage("ANALYSIS FAILED"); return(1); } finally { clusterWriter.Close(); } } catch (Exception ex) { Logger.PrintMessage("Unhandled Error: " + ex.Message, true); var innerEx = ex.InnerException; while (innerEx != null) { Logger.PrintMessage("Inner Exception: " + innerEx.Message); innerEx = innerEx.InnerException; } Logger.PrintMessage("Stack: " + ex.StackTrace, true); Logger.PrintMessage(""); Logger.PrintMessage("ANALYSIS FAILED"); return(1); } }
/// <summary> /// Loads feature data from the files provided. /// </summary> /// <returns></returns> public static IList<UMCLight> LoadUmcFeatureData(string path, int datasetId, IUmcDAO featureCache) { var features = new List<UMCLight>(); var extension = Path.GetExtension(path); if (extension == null) return features; extension = extension.ToUpper(); switch (extension) { case ".TXT": var umcReader = new LCMSFeatureFileReader(path); features = umcReader.GetUmcList(); break; case ".DB3": features = featureCache.FindByDatasetId(datasetId); break; default: //Was reconstructing features from scratch even when they were already cached because the file extention was ".csv" not ".db3" features = featureCache.FindByDatasetId(datasetId); break; } return features; }
/// <summary> /// Loads feature data from the files provided. /// </summary> /// <returns></returns> public static IList <UMCLight> LoadUmcFeatureData(DatasetInformation dataset, IUmcDAO featureCache, IScanSummaryProvider provider = null) { var features = new List <UMCLight>(); var extension = Path.GetExtension(dataset.Features.Path); if (extension == null) { return(features); } extension = extension.ToUpper(); switch (extension) { case ".TXT": if (dataset.Features.Path.EndsWith("_LCMSFeatures.txt")) { var reader = new LcImsFeatureFileReader(provider, dataset.DatasetId); features = reader.ReadFile(dataset.Features.Path).ToList(); } else { var umcReader = new LCMSFeatureFileReader(dataset.Features.Path); features = umcReader.GetUmcList(); } break; case ".DB3": features = featureCache.FindByDatasetId(dataset.DatasetId); break; case ".MS1FT": if (provider != null && provider is InformedProteomicsReader) { var promexReader = new PromexFileReader(provider as InformedProteomicsReader, dataset.DatasetId); features = promexReader.ReadFile(dataset.Features.Path).ToList(); } break; default: //Was reconstructing features from scratch even when they were already cached because the file extention was ".csv" not ".db3" features = featureCache.FindByDatasetId(dataset.DatasetId); break; } if (features != null && provider is ISpectraProvider) { var spectraProvider = provider as ISpectraProvider; LoadMsMs(features, spectraProvider); } return(features); }
public void ClusterFeatures() { this.algorithms = this.builder.GetAlgorithmProvider(this.options); var clusterer = this.algorithms.Clusterer; clusterer.Parameters = LcmsClusteringOptions.ConvertToOmics(this.options.LcmsClusteringOptions); this.featureCache = this.analysis.DataProviders.FeatureCache; // This just tells us whether we are using mammoth memory partitions or not. var clusterCount = 0; var providers = this.analysis.DataProviders; // Here we see if we need to separate the charge... // IMS is said to require charge separation if (!this.analysis.Options.LcmsClusteringOptions.ShouldSeparateCharge) { var features = this.featureCache.FindAll(); var clusters = new List<UMCClusterLight>(); clusters = clusterer.Cluster(features, clusters); foreach (var cluster in clusters) { cluster.Id = clusterCount++; cluster.UmcList.ForEach(x => x.ClusterId = cluster.Id); // Updates the cluster with statistics foreach (var feature in cluster.UmcList) { cluster.MsMsCount += feature.MsMsCount; cluster.IdentifiedSpectraCount += feature.IdentifiedSpectraCount; } } providers.ClusterCache.AddAll(clusters); providers.FeatureCache.UpdateAll(features); this.analysis.Clusters = clusters; } else { var maxChargeState = this.featureCache.FindMaxCharge(); /* * Here we cluster all charge states separately. Probably IMS Data. */ for (var chargeState = 1; chargeState <= maxChargeState; chargeState++) { var features = this.featureCache.FindByCharge(chargeState); if (features.Count < 1) { break; } var clusters = clusterer.Cluster(features); foreach (var cluster in clusters) { cluster.Id = clusterCount++; cluster.UmcList.ForEach(x => x.ClusterId = cluster.Id); // Updates the cluster with statistics foreach (var feature in cluster.Features) { cluster.MsMsCount += feature.MsMsCount; cluster.IdentifiedSpectraCount += feature.IdentifiedSpectraCount; } } this.analysis.DataProviders.ClusterCache.AddAll(clusters); this.analysis.DataProviders.FeatureCache.UpdateAll(features); } this.analysis.Clusters = this.analysis.DataProviders.ClusterCache.FindAll(); } MessageBox.Show("Working Command"); }
internal void ClusterFeatures(IProgress <ProgressData> workflowProgress = null) { var taskBarProgress = TaskBarProgress.GetInstance(); taskBarProgress.ShowProgress(this, true); workflowProgress = workflowProgress ?? new Progress <ProgressData>(); IProgress <ProgressData> internalProgress = new Progress <ProgressData>(pd => { this.progress.Report((int)pd.Percent); this.ProgressPercent = pd.Percent; taskBarProgress.SetProgress(this, pd.Percent); workflowProgress.Report(pd); }); this.algorithms = this.builder.GetAlgorithmProvider(this.options); var clusterer = this.algorithms.Clusterer; clusterer.Parameters = LcmsClusteringOptions.ConvertToOmics(this.options.LcmsClusteringOptions); this.featureCache = this.analysis.DataProviders.FeatureCache; if (clusterer is PromexClusterer) { var promexClusterer = clusterer as PromexClusterer; promexClusterer.Readers = this.analysis.DataProviders.ScanSummaryProviderCache; } foreach (var dataset in this.Datasets) { if (dataset.FeaturesFound) { dataset.DatasetState = DatasetInformationViewModel.DatasetStates.Clustering; } } ThreadSafeDispatcher.Invoke(this.ClusterFeaturesCommand.RaiseCanExecuteChanged); ThreadSafeDispatcher.Invoke(this.DisplayClustersCommand.RaiseCanExecuteChanged); this.ShouldShowProgress = true; var progData = new ProgressData(internalProgress); var clusterProgress = new Progress <ProgressData>(pd => progData.Report(pd.Percent)); this.analysis.DataProviders.DatabaseLock.EnterWriteLock(); DatabaseIndexer.IndexClustersDrop(NHibernateUtil.Path); this.analysis.DataProviders.ClusterCache.ClearAllClusters(); this.analysis.DataProviders.DatabaseLock.ExitWriteLock(); // The id for a cluster - keep track here to avoid duplicates when separating by charge. var clusterCount = 0; // Here we see if we need to separate the charge... // IMS is said to require charge separation if (!this.analysis.Options.LcmsClusteringOptions.ShouldSeparateCharge) { progData.StepRange(45); var features = new List <UMCLight>(); var i = 0; var datasets = this.Datasets.Where(ds => ds.FeaturesFound).ToList(); foreach (var dataset in datasets) { this.analysis.DataProviders.DatabaseLock.EnterReadLock(); features.AddRange(this.featureCache.FindByDatasetId(dataset.DatasetId)); this.analysis.DataProviders.DatabaseLock.ExitReadLock(); progData.Report(++i, datasets.Count); } progData.StepRange(100); ClusterGroupOfFeatures(clusterer, features, ref clusterCount, clusterProgress); } else { var maxChargeState = this.featureCache.FindMaxCharge(); // Here we cluster all charge states separately. Probably IMS Data. for (var chargeState = 1; chargeState <= maxChargeState; chargeState++) { var maxPercent = ((100.0 * chargeState) / maxChargeState); // TODO: Add restriction by selected dataset ids? var features = this.featureCache.FindByCharge(chargeState); if (features.Count < 1) { continue; } progData.StepRange(maxPercent); ClusterGroupOfFeatures(clusterer, features, ref clusterCount, clusterProgress); } this.analysis.Clusters = this.analysis.DataProviders.ClusterCache.FindAll(); } this.analysis.DataProviders.DatabaseLock.EnterWriteLock(); DatabaseIndexer.IndexClusters(NHibernateUtil.Path); this.analysis.DataProviders.DatabaseLock.ExitWriteLock(); foreach (var dataset in this.Datasets) { if (dataset.DatasetState == DatasetInformationViewModel.DatasetStates.PersistingClusters) { dataset.DatasetState = DatasetInformationViewModel.DatasetStates.Clustered; } } try { // Write to file this.WriteClusterData(string.Format("{0}_crosstab.tsv", this.analysis.AnalysisName), this.analysis.Clusters); } catch (Exception ex) { var errMsg = "Error writing results to text file: " + ex.Message; Logger.PrintMessage(errMsg); // Todo: Add this: if (!GlobalSettings.AutomatedAnalysisMode) MessageBox.Show(errMsg); } ThreadSafeDispatcher.Invoke(this.ClusterFeaturesCommand.RaiseCanExecuteChanged); ThreadSafeDispatcher.Invoke(this.DisplayClustersCommand.RaiseCanExecuteChanged); taskBarProgress.ShowProgress(this, false); this.ShouldShowProgress = false; }