internal void ClusterFeatures(IProgress <ProgressData> workflowProgress = null) { var taskBarProgress = TaskBarProgress.GetInstance(); taskBarProgress.ShowProgress(this, true); workflowProgress = workflowProgress ?? new Progress <ProgressData>(); IProgress <ProgressData> internalProgress = new Progress <ProgressData>(pd => { this.progress.Report((int)pd.Percent); this.ProgressPercent = pd.Percent; taskBarProgress.SetProgress(this, pd.Percent); workflowProgress.Report(pd); }); this.algorithms = this.builder.GetAlgorithmProvider(this.options); var clusterer = this.algorithms.Clusterer; clusterer.Parameters = LcmsClusteringOptions.ConvertToOmics(this.options.LcmsClusteringOptions); this.featureCache = this.analysis.DataProviders.FeatureCache; if (clusterer is PromexClusterer) { var promexClusterer = clusterer as PromexClusterer; promexClusterer.Readers = this.analysis.DataProviders.ScanSummaryProviderCache; } foreach (var dataset in this.Datasets) { if (dataset.FeaturesFound) { dataset.DatasetState = DatasetInformationViewModel.DatasetStates.Clustering; } } ThreadSafeDispatcher.Invoke(this.ClusterFeaturesCommand.RaiseCanExecuteChanged); ThreadSafeDispatcher.Invoke(this.DisplayClustersCommand.RaiseCanExecuteChanged); this.ShouldShowProgress = true; var progData = new ProgressData(internalProgress); var clusterProgress = new Progress <ProgressData>(pd => progData.Report(pd.Percent)); this.analysis.DataProviders.DatabaseLock.EnterWriteLock(); DatabaseIndexer.IndexClustersDrop(NHibernateUtil.Path); this.analysis.DataProviders.ClusterCache.ClearAllClusters(); this.analysis.DataProviders.DatabaseLock.ExitWriteLock(); // The id for a cluster - keep track here to avoid duplicates when separating by charge. var clusterCount = 0; // Here we see if we need to separate the charge... // IMS is said to require charge separation if (!this.analysis.Options.LcmsClusteringOptions.ShouldSeparateCharge) { progData.StepRange(45); var features = new List <UMCLight>(); var i = 0; var datasets = this.Datasets.Where(ds => ds.FeaturesFound).ToList(); foreach (var dataset in datasets) { this.analysis.DataProviders.DatabaseLock.EnterReadLock(); features.AddRange(this.featureCache.FindByDatasetId(dataset.DatasetId)); this.analysis.DataProviders.DatabaseLock.ExitReadLock(); progData.Report(++i, datasets.Count); } progData.StepRange(100); ClusterGroupOfFeatures(clusterer, features, ref clusterCount, clusterProgress); } else { var maxChargeState = this.featureCache.FindMaxCharge(); // Here we cluster all charge states separately. Probably IMS Data. for (var chargeState = 1; chargeState <= maxChargeState; chargeState++) { var maxPercent = ((100.0 * chargeState) / maxChargeState); // TODO: Add restriction by selected dataset ids? var features = this.featureCache.FindByCharge(chargeState); if (features.Count < 1) { continue; } progData.StepRange(maxPercent); ClusterGroupOfFeatures(clusterer, features, ref clusterCount, clusterProgress); } this.analysis.Clusters = this.analysis.DataProviders.ClusterCache.FindAll(); } this.analysis.DataProviders.DatabaseLock.EnterWriteLock(); DatabaseIndexer.IndexClusters(NHibernateUtil.Path); this.analysis.DataProviders.DatabaseLock.ExitWriteLock(); foreach (var dataset in this.Datasets) { if (dataset.DatasetState == DatasetInformationViewModel.DatasetStates.PersistingClusters) { dataset.DatasetState = DatasetInformationViewModel.DatasetStates.Clustered; } } try { // Write to file this.WriteClusterData(string.Format("{0}_crosstab.tsv", this.analysis.AnalysisName), this.analysis.Clusters); } catch (Exception ex) { var errMsg = "Error writing results to text file: " + ex.Message; Logger.PrintMessage(errMsg); // Todo: Add this: if (!GlobalSettings.AutomatedAnalysisMode) MessageBox.Show(errMsg); } ThreadSafeDispatcher.Invoke(this.ClusterFeaturesCommand.RaiseCanExecuteChanged); ThreadSafeDispatcher.Invoke(this.DisplayClustersCommand.RaiseCanExecuteChanged); taskBarProgress.ShowProgress(this, false); this.ShouldShowProgress = false; }
/// <summary> /// Performs clustering of LCMS Features /// </summary> public void PerformLcmsFeatureClustering(AnalysisConfig config) { var analysis = config.Analysis; var clusterer = m_algorithms.Clusterer; RegisterProgressNotifier(clusterer); UpdateStatus("Using Cluster Algorithm: " + clusterer); clusterer.Parameters = LcmsClusteringOptions.ConvertToOmics(analysis.Options.LcmsClusteringOptions); // This just tells us whether we are using mammoth memory partitions or not. var featureCache = config.Analysis.DataProviders.FeatureCache; var clusterCount = 0; var providers = config.Analysis.DataProviders; // Here we see if we need to separate the charge... // IMS is said to require charge separation if (!analysis.Options.LcmsClusteringOptions.ShouldSeparateCharge) { UpdateStatus("Clustering features from all charge states."); UpdateStatus("Retrieving features for clustering from cache."); var features = featureCache.FindAll(); UpdateStatus(string.Format("Clustering {0} features. ", features.Count)); var clusters = new List <UMCClusterLight>(); clusters = clusterer.Cluster(features, clusters); foreach (var cluster in clusters) { cluster.Id = clusterCount++; cluster.UmcList.ForEach(x => x.ClusterId = cluster.Id); // Updates the cluster with statistics foreach (var feature in cluster.UmcList) { cluster.MsMsCount += feature.MsMsCount; cluster.IdentifiedSpectraCount += feature.IdentifiedSpectraCount; } } providers.ClusterCache.AddAll(clusters); providers.FeatureCache.UpdateAllStateless(features); config.Analysis.Clusters = clusters; UpdateStatus(string.Format("Found {0} clusters.", clusters.Count)); if (FeaturesClustered != null) { FeaturesClustered(this, new FeaturesClusteredEventArgs(clusters)); } } else { var maxChargeState = featureCache.FindMaxCharge(); /* * Here we cluster all charge states separately. Probably IMS Data. */ UpdateStatus("Clustering charge states individually."); for (var chargeState = 1; chargeState <= maxChargeState; chargeState++) { var features = featureCache.FindByCharge(chargeState); if (features.Count < 1) { UpdateStatus(string.Format("No features found for charge state {0}. Stopping clustering", chargeState)); break; } UpdateStatus( string.Format("Retrieved and is clustering {0} features from charge state {1}.", features.Count, chargeState)); var clusters = clusterer.Cluster(features); foreach (var cluster in clusters) { cluster.Id = clusterCount++; cluster.UmcList.ForEach(x => x.ClusterId = cluster.Id); // Updates the cluster with statistics foreach (var feature in cluster.Features) { cluster.MsMsCount += feature.MsMsCount; cluster.IdentifiedSpectraCount += feature.IdentifiedSpectraCount; } } config.Analysis.DataProviders.ClusterCache.AddAll(clusters); config.Analysis.DataProviders.FeatureCache.UpdateAllStateless(features); UpdateStatus(string.Format("Found {0} clusters.", clusters.Count)); if (FeaturesClustered != null) { FeaturesClustered(this, new FeaturesClusteredEventArgs(clusters, chargeState)); } } config.Analysis.Clusters = config.Analysis.DataProviders.ClusterCache.FindAll(); } DeRegisterProgressNotifier(clusterer); UpdateStatus(string.Format("Finished clustering. Found {0} total clusters.", clusterCount)); }