/// <summary> /// Load the data from the dataset information objects to the cache at the analysis Path /// </summary> private void PerformDataLoadAndAlignment(AnalysisConfig config) { UmcLoaderFactory.Status += UMCLoaderFactory_Status; UpdateStatus("Loading data."); var analysisOptions = config.Analysis.Options; var datasets = config.Analysis.MetaData.Datasets.ToList(); var lcmsFilterOptions = analysisOptions.LcmsFilteringOptions; var msFilterOptions = analysisOptions.MsFilteringOptions; var baselineDataset = config.Analysis.MetaData.BaselineDataset; var baselineFeatures = LoadBaselineData(baselineDataset, msFilterOptions, analysisOptions.LcmsFindingOptions, lcmsFilterOptions, config.Analysis.DataProviders, config.Analysis.MassTagDatabase, config.Analysis.Options.AlignmentOptions.IsAlignmentBaselineAMasstagDB); var alignmentData = new AlignmentDAOHibernate(); alignmentData.ClearAll(); var providers = config.Analysis.DataProviders; var featureCache = new FeatureLoader { Providers = providers }; RegisterProgressNotifier(featureCache); MassTagDatabase database = null; if (config.Analysis.MassTagDatabase != null) database = new MassTagDatabase(config.Analysis.MassTagDatabase, config.Analysis.Options.AlignmentOptions.MassTagObservationCount); SingletonDataProviders.Providers = config.Analysis.DataProviders; foreach (var dataset in datasets) { if (dataset.IsBaseline) continue; var features = featureCache.LoadDataset(dataset, analysisOptions.MsFilteringOptions, analysisOptions.LcmsFindingOptions, analysisOptions.LcmsFilteringOptions); features = AlignDataset(features, baselineFeatures, database, dataset, baselineDataset); featureCache.CacheFeatures(features); } UmcLoaderFactory.Status -= UMCLoaderFactory_Status; }
public void CreateFeatureDatabase(string directoryPath, string databasePath) { var directory = GetPath(directoryPath); databasePath = GetPath(databasePath); // Loads the supported MultiAlign types var supportedTypes = DatasetInformation.SupportedFileTypes; var extensions = new List<string>(); supportedTypes.ForEach(x => extensions.Add("*" + x.Extension)); // Find our datasets var inputFiles = DatasetSearcher.FindDatasets(directory, extensions, SearchOption.TopDirectoryOnly); var datasets = DatasetInformation.ConvertInputFilesIntoDatasets(inputFiles); // Options setup var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap); var featureTolerances = new FeatureTolerances { Mass = instrumentOptions.Mass + 6, Net = instrumentOptions.NetTolerance, DriftTime = instrumentOptions.DriftTimeTolerance }; var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances) { MaximumNetRange = .002, MaximumScanRange = 50 }; var lcmsFilters = new LcmsFeatureFilteringOptions { FeatureLengthRange = new FilterRange(50, 300) }; var msFilterOptions = new MsFeatureFilteringOptions { MinimumIntensity = 5000, ChargeRange = new FilterRange(1, 6), ShouldUseChargeFilter = true, ShouldUseDeisotopingFilter = true, ShouldUseIntensityFilter = true }; var spectralOptions = new SpectralOptions { ComparerType = SpectralComparison.CosineDotProduct, Fdr = .01, IdScore = 1e-09, MzBinSize = .5, MzTolerance = .5, NetTolerance = .1, RequiredPeakCount = 32, SimilarityCutoff = .75, TopIonPercent = .8 }; var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); NHibernateUtil.CreateDatabase(databasePath); // Synchronization and IO for serializing all data to the database. var providers = DataAccessFactory.CreateDataAccessProviders(databasePath, true); var cache = new FeatureLoader { Providers = providers }; var datasetId = 0; foreach(var dataset in datasets) { dataset.DatasetId = datasetId++; var features = FindFeatures(dataset, featureFindingOptions, msFilterOptions, lcmsFilters, spectralOptions, finder); cache.CacheFeatures(features); } providers.DatasetCache.AddAll(datasets); }
/// <summary> /// Loads baseline data for alignment. /// </summary> private IList<UMCLight> LoadBaselineData(DatasetInformation baselineInfo, MsFeatureFilteringOptions msFilterOptions, LcmsFeatureFindingOptions lcmsFindingOptions, LcmsFeatureFilteringOptions lcmsFilterOptions, FeatureDataAccessProviders dataProviders, MassTagDatabase database, bool shouldUseMassTagDbAsBaseline) { IList<UMCLight> baselineFeatures = null; UpdateStatus("Loading baseline features."); if (!shouldUseMassTagDbAsBaseline) { if (baselineInfo == null) { throw new Exception("The baseline dataset was never set."); } var cache = new FeatureLoader { Providers = dataProviders }; RegisterProgressNotifier(cache); UpdateStatus("Loading baseline features from " + baselineInfo.DatasetName + " for alignment."); baselineFeatures = cache.LoadDataset(baselineInfo, msFilterOptions, lcmsFindingOptions, lcmsFilterOptions); cache.CacheFeatures(baselineFeatures); if (BaselineFeaturesLoaded != null) { BaselineFeaturesLoaded(this, new BaselineFeaturesLoadedEventArgs(baselineInfo, baselineFeatures.ToList())); } DeRegisterProgressNotifier(cache); } else { if (database == null) throw new NullReferenceException( "The mass tag database has to have data in it if it's being used for drift time alignment."); UpdateStatus("Setting baseline features for post drift time alignment from mass tag database."); var tags = FeatureDataConverters.ConvertToUMC(database.MassTags); if (BaselineFeaturesLoaded == null) return tags; if (tags != null) BaselineFeaturesLoaded(this, new BaselineFeaturesLoadedEventArgs(null, tags.ToList(), database)); } return baselineFeatures; }