internal void LoadFeatures(List <DatasetInformationViewModel> workFlowDatasets = null, IProgress <ProgressData> workflowProgress = null) { var featureCache = new FeatureLoader { Providers = this.analysis.DataProviders }; this.ShouldShowProgress = true; var selectedFiles = workFlowDatasets ?? this.Datasets.Where(file => !file.DoingWork).Where(ds => ds.IsSelected).ToList(); foreach (var file in selectedFiles) { file.DatasetState = DatasetInformationViewModel.DatasetStates.FindingFeatures; ThreadSafeDispatcher.Invoke(() => this.PlotMsFeaturesCommand.RaiseCanExecuteChanged()); ThreadSafeDispatcher.Invoke(() => this.FindMsFeaturesCommand.RaiseCanExecuteChanged()); } var taskBarProgress = TaskBarProgress.GetInstance(); taskBarProgress.ShowProgress(this, true); workflowProgress = workflowProgress ?? new Progress <ProgressData>(); IProgress <ProgressData> totalProgressRpt = new Progress <ProgressData>(pd => { this.TotalProgress = pd.Percent; taskBarProgress.SetProgress(this, pd.Percent); workflowProgress.Report(pd); }); var totalProgressData = new ProgressData(totalProgressRpt); DatabaseIndexer.IndexClustersDrop(NHibernateUtil.Path); DatabaseIndexer.IndexFeaturesDrop(NHibernateUtil.Path); var i = 1; foreach (var file in selectedFiles) { // Set range based on file totalProgressData.StepRange((i++ *100.0) / selectedFiles.Count); var fileInstance = file; var progData = new ProgressData(new Progress <ProgressData>(pd => { fileInstance.Progress = pd.Percent; // Report file progress totalProgressData.Report(fileInstance.Progress); })); var progressRpt = new Progress <ProgressData>(pd => progData.Report(pd.Percent)); progData.StepRange(30); IList <UMCLight> features; // Load features from the database. try { this.analysis.DataProviders.DatabaseLock.EnterReadLock(); features = featureCache.LoadDataset( file.Dataset, this.analysis.Options.MsFilteringOptions, this.analysis.Options.LcmsFindingOptions, this.analysis.Options.LcmsFilteringOptions, this.analysis.Options.DataLoadOptions, this.analysis.DataProviders.ScanSummaryProviderCache, this.analysis.DataProviders.IdentificationProviderCache, progressRpt); } finally { // Always close read lock, even during failure condition so we don't have a recursive lock error. this.analysis.DataProviders.DatabaseLock.ExitReadLock(); } if (!this.featuresByDataset.ContainsKey(file.Dataset)) { this.featuresByDataset.Add(file.Dataset, new List <UMCLight>()); } this.featuresByDataset[file.Dataset] = features; file.DatasetState = DatasetInformationViewModel.DatasetStates.PersistingFeatures; ThreadSafeDispatcher.Invoke(() => this.PlotMsFeaturesCommand.RaiseCanExecuteChanged()); // TODO: We were using this log file to track speed changes for writing the database. We probably don't need it anymore. using (var logger = new StreamWriter("nhibernate_stats.txt", true)) { logger.WriteLine(); var stopWatch = new Stopwatch(); stopWatch.Start(); var scanSumProvider = this.analysis.DataProviders.ScanSummaryProviderCache.GetScanSummaryProvider( file.Dataset.DatasetId); if (scanSumProvider.IsBackedByFile) { var ssDao = this.analysis.DataProviders.ScanSummaryDao; ssDao.DeleteByDatasetId(file.Dataset.DatasetId); // Add all of the Scan Summaries for this dataset to the database, but first properly set the dataset ID ssDao.AddAllStateless( scanSumProvider.GetScanSummaries().Select( summ => { summ.DatasetId = file.Dataset.DatasetId; return(summ); }).ToList()); } progData.StepRange(100); // Cache features to database. try { this.analysis.DataProviders.DatabaseLock.EnterWriteLock(); featureCache.CacheFeatures(features, progressRpt); } catch (NonUniqueObjectException ex) { MessageBox.Show("Could not completely persist features: " + ex.Message); } catch (Exception ex) // TODO: Figure out which exception should actually be caught here { MessageBox.Show("Could not persist features to database: " + ex.Message); file.DatasetState = DatasetInformationViewModel.DatasetStates.Loaded; continue; } finally { // Always close write lock, even during failure condition so we don't have a recursive lock error. this.analysis.DataProviders.DatabaseLock.ExitWriteLock(); } stopWatch.Stop(); logger.WriteLine("Writing: {0}s", stopWatch.Elapsed.TotalSeconds); } file.DatasetState = DatasetInformationViewModel.DatasetStates.FeaturesFound; ThreadSafeDispatcher.Invoke(() => this.FindMsFeaturesCommand.RaiseCanExecuteChanged()); file.Progress = 0; } DatabaseIndexer.IndexFeatures(NHibernateUtil.Path); taskBarProgress.ShowProgress(this, false); this.ShouldShowProgress = false; }
public void CreateFeatureDatabase(string directoryPath, string databasePath) { var directory = GetPath(directoryPath); databasePath = GetPath(databasePath); // Loads the supported MultiAlign types var supportedTypes = DatasetLoader.SupportedFileTypes; var extensions = new List <string>(); supportedTypes.ForEach(x => extensions.Add("*" + x.Extension)); // Find our datasets var datasetLoader = new DatasetLoader(); var datasets = datasetLoader.GetValidDatasets(directory, extensions, SearchOption.TopDirectoryOnly); // Options setup var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap); var featureTolerances = new FeatureTolerances { Mass = instrumentOptions.Mass + 6, Net = instrumentOptions.NetTolerance, DriftTime = instrumentOptions.DriftTimeTolerance }; var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances) { MaximumNetRange = .002, MaximumScanRange = 50 }; var lcmsFilters = new LcmsFeatureFilteringOptions { FeatureLengthRangeScans = new FilterRange(50, 300) }; var msFilterOptions = new MsFeatureFilteringOptions { MinimumIntensity = 5000, ChargeRange = new FilterRange(1, 6), ShouldUseChargeFilter = true, ShouldUseDeisotopingFilter = true, ShouldUseIntensityFilter = true }; var spectralOptions = new SpectralOptions { ComparerType = SpectralComparison.CosineDotProduct, Fdr = .01, IdScore = 1e-09, MzBinSize = .5, MzTolerance = .5, NetTolerance = .1, RequiredPeakCount = 32, SimilarityCutoff = .75, TopIonPercent = .8 }; var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); NHibernateUtil.CreateDatabase(databasePath); // Synchronization and IO for serializing all data to the database. var providers = DataAccessFactory.CreateDataAccessProviders(databasePath, true); var cache = new FeatureLoader { Providers = providers }; var datasetId = 0; foreach (var dataset in datasets) { dataset.DatasetId = datasetId++; var features = FindFeatures(dataset, featureFindingOptions, msFilterOptions, lcmsFilters, spectralOptions, finder); cache.CacheFeatures(features); } providers.DatasetCache.AddAll(datasets); }
/// <summary> /// Loads baseline data for alignment. /// </summary> private IList <UMCLight> LoadBaselineData(DatasetInformation baselineInfo, MsFeatureFilteringOptions msFilterOptions, LcmsFeatureFindingOptions lcmsFindingOptions, LcmsFeatureFilteringOptions lcmsFilterOptions, DataLoadingOptions dataLoadOptions, FeatureDataAccessProviders dataProviders, MassTagDatabase database, bool shouldUseMassTagDbAsBaseline) { IList <UMCLight> baselineFeatures = null; UpdateStatus("Loading baseline features."); if (!shouldUseMassTagDbAsBaseline) { if (baselineInfo == null) { throw new Exception("The baseline dataset was never set."); } var cache = new FeatureLoader { Providers = dataProviders }; RegisterProgressNotifier(cache); UpdateStatus("Loading baseline features from " + baselineInfo.DatasetName + " for alignment."); baselineFeatures = cache.LoadDataset(baselineInfo, msFilterOptions, lcmsFindingOptions, lcmsFilterOptions, dataLoadOptions, m_scanSummaryProviderCache, this.m_identificationsProvider); cache.CacheFeatures(baselineFeatures); if (BaselineFeaturesLoaded != null) { BaselineFeaturesLoaded(this, new BaselineFeaturesLoadedEventArgs(baselineInfo, baselineFeatures.ToList())); } DeRegisterProgressNotifier(cache); } else { if (database == null) { throw new NullReferenceException( "The mass tag database has to have data in it if it's being used for drift time alignment."); } UpdateStatus("Setting baseline features for post drift time alignment from mass tag database."); var tags = FeatureDataConverters.ConvertToUMC(database.MassTags); if (BaselineFeaturesLoaded == null) { return(tags); } if (tags != null) { BaselineFeaturesLoaded(this, new BaselineFeaturesLoadedEventArgs(null, tags.ToList(), database)); } } return(baselineFeatures); }
/// <summary> /// Perform alignment. /// </summary> /// <param name="workFlowDatasets">Datasets to run on when being called externally form this view model.</param> /// <param name="workflowProgress">The progress reporter for when this method is called externally from this view model.</param> internal void AlignToBaseline(List <DatasetInformationViewModel> workFlowDatasets = null, IProgress <ProgressData> workflowProgress = null) { // Use Promiscuous points when aligning to an AMT tag database // Do not use Promiscuous points when aligning to a baseline dataset this.analysis.Options.AlignmentOptions.UsePromiscuousPoints = !this.ShouldAlignToBaseline; // Flag whether we are aligning to an AMT tag database this.analysis.Options.AlignmentOptions.LCMSWarpOptions.AlignToMassTagDatabase = !this.ShouldAlignToBaseline; // Show the progress bar this.ShowAlignmentProgress = true; var taskBarProgress = TaskBarProgress.GetInstance(); taskBarProgress.ShowProgress(this, true); // Update algorithms and providers var featureCache = new FeatureLoader { Providers = this.analysis.DataProviders }; this.algorithms = this.builder.GetAlgorithmProvider(this.analysis.Options); ////this.algorithms.DatabaseAligner.Progress += aligner_Progress; ////this.algorithms.DatasetAligner.Progress += aligner_Progress; this.aligner.m_algorithms = this.algorithms; var baselineFeatures = new List <UMCLight>(); if (this.ShouldAlignToBaseline) { baselineFeatures = featureCache.Providers.FeatureCache.FindByDatasetId(this.selectedBaseline.DatasetId); this.SelectedBaseline.DatasetState = DatasetInformationViewModel.DatasetStates.Baseline; var priorAlignment = (from x in this.alignmentInformation where x.DatasetID == this.selectedBaseline.DatasetId select x).ToList(); if (priorAlignment.Any()) { this.alignmentInformation.Remove(priorAlignment.Single()); } } var alignmentData = new AlignmentDAOHibernate(); alignmentData.ClearAll(); var selectedFiles = workFlowDatasets ?? this.Datasets.Where(file => file.IsSelected && !file.DoingWork && (this.ShouldAlignToAMT || !file.IsBaseline)).ToList(); foreach (var file in selectedFiles) { file.DatasetState = DatasetInformationViewModel.DatasetStates.Aligning; } workflowProgress = workflowProgress ?? new Progress <ProgressData>(); IProgress <ProgressData> totalProgress = new Progress <ProgressData>(pd => { this.AlignmentProgress = pd.Percent; workflowProgress.Report(pd); taskBarProgress.SetProgress(this, pd.Percent); }); var totalProgressData = new ProgressData(totalProgress); DatabaseIndexer.IndexClustersDrop(NHibernateUtil.Path); DatabaseIndexer.IndexFeaturesDrop(NHibernateUtil.Path); var i = 1; foreach (var file in selectedFiles) { ThreadSafeDispatcher.Invoke(() => this.AlignCommand.RaiseCanExecuteChanged()); ThreadSafeDispatcher.Invoke(() => this.DisplayAlignmentCommand.RaiseCanExecuteChanged()); ThreadSafeDispatcher.Invoke(() => this.SaveAlignmentPlotsCommand.RaiseCanExecuteChanged()); if ((file.Dataset.IsBaseline || !file.FeaturesFound) && this.ShouldAlignToBaseline) { file.DatasetState = DatasetInformationViewModel.DatasetStates.Aligned; continue; } this.analysis.DataProviders.DatabaseLock.EnterReadLock(); IList <UMCLight> features = featureCache.Providers.FeatureCache.FindByDatasetId(file.DatasetId); this.analysis.DataProviders.DatabaseLock.ExitReadLock(); AlignmentData alignment; totalProgressData.StepRange((100.0 * i++) / selectedFiles.Count); var fileInstance = file; var datasetProgress = new Progress <ProgressData>( pd => { fileInstance.Progress = pd.Percent; totalProgressData.Report(pd.Percent); }); if (this.ShouldAlignToBaseline) { // Aligning to a baseline dataset alignment = this.aligner.AlignToDataset(ref features, file.Dataset, baselineFeatures, datasetProgress); alignment.BaselineIsAmtDB = false; } else { // Aligning to a database alignment = this.aligner.AlignToDatabase(ref features, file.Dataset, this.analysis.MassTagDatabase, datasetProgress); alignment.BaselineIsAmtDB = true; } // Check if there is information from a previous alignment for this dataset. If so, replace it. If not, just add the new one. var priorAlignment = this.alignmentInformation.Where(x => x.DatasetID == alignment.DatasetID).ToList(); if (priorAlignment.Any()) { this.alignmentInformation.Remove(priorAlignment.Single()); this.alignmentInformation.Add(alignment); } else { this.alignmentInformation.Add(alignment); } file.Dataset.AlignmentData = alignment; this.analysis.DataProviders.DatabaseLock.EnterWriteLock(); featureCache.CacheFeatures(features); this.analysis.DataProviders.DatabaseLock.ExitWriteLock(); file.DatasetState = DatasetInformationViewModel.DatasetStates.Aligned; ThreadSafeDispatcher.Invoke(() => this.AlignCommand.RaiseCanExecuteChanged()); ThreadSafeDispatcher.Invoke(() => this.DisplayAlignmentCommand.RaiseCanExecuteChanged()); ThreadSafeDispatcher.Invoke(() => this.SaveAlignmentPlotsCommand.RaiseCanExecuteChanged()); file.Progress = 0; } if (this.ShouldAlignToBaseline) { this.SelectedBaseline.DatasetState = DatasetInformationViewModel.DatasetStates.Aligned; } DatabaseIndexer.IndexFeatures(NHibernateUtil.Path); taskBarProgress.ShowProgress(this, false); this.ShowAlignmentProgress = false; this.AlignmentProgress = 0; }
/// <summary> /// Load the data from the dataset information objects to the cache at the analysis Path /// </summary> private void PerformDataLoadAndAlignment(AnalysisConfig config) { UmcLoaderFactory.Status += UMCLoaderFactory_Status; UpdateStatus("Loading data."); var analysisOptions = config.Analysis.Options; var datasets = config.Analysis.MetaData.Datasets.ToList(); var lcmsFilterOptions = analysisOptions.LcmsFilteringOptions; var msFilterOptions = analysisOptions.MsFilteringOptions; var dataLoadOptions = analysisOptions.DataLoadOptions; var baselineDataset = config.Analysis.MetaData.BaselineDataset; var baselineFeatures = LoadBaselineData(baselineDataset, msFilterOptions, analysisOptions.LcmsFindingOptions, lcmsFilterOptions, dataLoadOptions, config.Analysis.DataProviders, config.Analysis.MassTagDatabase, config.Analysis.Options.AlignmentOptions.IsAlignmentBaselineAMasstagDB); var alignmentData = new AlignmentDAOHibernate(); alignmentData.ClearAll(); var providers = config.Analysis.DataProviders; var featureCache = new FeatureLoader { Providers = providers }; RegisterProgressNotifier(featureCache); MassTagDatabase database = null; if (config.Analysis.MassTagDatabase != null) { database = new MassTagDatabase(config.Analysis.MassTagDatabase, config.Analysis.Options.AlignmentOptions.MassTagObservationCount); } SingletonDataProviders.Providers = config.Analysis.DataProviders; foreach (var dataset in datasets) { if (dataset.IsBaseline) { continue; } var features = featureCache.LoadDataset(dataset, analysisOptions.MsFilteringOptions, analysisOptions.LcmsFindingOptions, analysisOptions.LcmsFilteringOptions, analysisOptions.DataLoadOptions, m_scanSummaryProviderCache, m_identificationsProvider); features = AlignDataset(features, baselineFeatures, database, dataset, baselineDataset); featureCache.CacheFeatures(features); } UmcLoaderFactory.Status -= UMCLoaderFactory_Status; }