Пример #1
0
 /// <summary>
 /// Sets up the NHibernate caches for storing and retrieving data.
 /// </summary>
 /// <returns></returns>
 private static FeatureDataAccessProviders SetupDataProviders(string path, bool createNew)
 {
     try
     {
         return(DataAccessFactory.CreateDataAccessProviders(path, createNew));
     }
     catch (IOException ex)
     {
         throw ex;
     }
 }
Пример #2
0
 private FeatureDataAccessProviders SetupDataProviders(string path, bool createNew)
 {
     try
     {
         return(DataAccessFactory.CreateDataAccessProviders(path, createNew));
     }
     catch (IOException ex)
     {
         Logger.PrintMessage("Could not access the database.  Is it opened somewhere else?" + ex.Message);
         throw;
     }
 }
Пример #3
0
        /// <summary>
        ///     Loads the analysis.
        /// </summary>
        /// <param name="recentAnalysis"></param>
        public void LoadAnalysis(RecentAnalysis recentAnalysis)
        {
            if (recentAnalysis == null)
            {
                OnStatus("Cannot open analysis file.");
                return;
            }


            Action loadAnalysis = delegate
            {
                var filename = Path.Combine(recentAnalysis.Path, recentAnalysis.Name);

                OnStatus("Gaining access to the analysis database...");
                var providers = DataAccessFactory.CreateDataAccessProviders(filename, false);
                var analysis  = new MultiAlignAnalysis();
                analysis.MetaData.AnalysisPath      = recentAnalysis.Path;
                analysis.MetaData.AnalysisName      = recentAnalysis.Name;
                analysis.MetaData.AnalysisSetupInfo = null;
                analysis.DataProviders = providers;

                OnStatus("Detecting your clusters...");
                analysis.Clusters = providers.ClusterCache.FindAll();

                OnStatus("Updating your datasets...");
                analysis.MetaData.Datasets = providers.DatasetCache.FindAll().ToObservableCollection();

                OnStatus("Securing mass tags...");
                var provider = new MassTagDatabaseLoaderCache();
                provider.Provider        = analysis.DataProviders.MassTags;
                analysis.MassTagDatabase = provider.LoadDatabase();

                OnStatus("Analysis Loaded...");
                ThreadSafeDispatcher.Invoke(() =>
                {
                    if (AnalysisLoaded != null)
                    {
                        AnalysisLoaded(this, new AnalysisStatusArgs(analysis));
                    }
                });
            };


            m_loadingTask = new Task(loadAnalysis);
            m_loadingTask.Start();
        }
Пример #4
0
        public void TestClusters(string databasetPath, int minMsMsCount)
        {
            var providers = DataAccessFactory.CreateDataAccessProviders(databasetPath, false);
            var clusters  = providers.ClusterCache.FindAll();

            Console.WriteLine(@"Cluster ID\tCluster Size\tMs Ms Total\tMatching");
            foreach (var cluster in clusters)
            {
                var clusterPeptideMap = new ClusterIdentificationStatistic();

                if (cluster.MsMsCount > minMsMsCount)
                {
                    cluster.ReconstructUMCCluster(providers, true, false, true, true);
                }

                var hasIdentifications = false;
                foreach (var umc in cluster.UmcList)
                {
                    foreach (var feature in umc.Features)
                    {
                        foreach (var spectrum in feature.MSnSpectra)
                        {
                            foreach (var peptide in spectrum.Peptides)
                            {
                                var sequence = peptide.Sequence;
                                if (!clusterPeptideMap.Peptides.ContainsKey(sequence))
                                {
                                    clusterPeptideMap.Peptides.Add(sequence, 0);
                                    clusterPeptideMap.PeptideDatasets.Add(sequence, new List <int>());
                                }
                                clusterPeptideMap.PeptideDatasets[sequence].Add(umc.GroupId);
                                clusterPeptideMap.Peptides[sequence]++;
                                hasIdentifications = true;
                            }
                        }
                    }
                }

                if (hasIdentifications)
                {
                    Console.WriteLine("{0}\t{1}\t{2}\t{3}", cluster.Id, cluster.UmcList.Count, cluster.MsMsCount,
                                      clusterPeptideMap.TotalDatasetsObserved);
                }
            }
        }
Пример #5
0
        public void TestAddClusters(string name)
        {
            var databasePath = Path.Combine(m_basePath, name);

            // This is a factory based method that creates a set of data access providers used throughout MultiAlign
            var providers = DataAccessFactory.CreateDataAccessProviders(databasePath, true);


            var clusters = new List <UMCClusterLight>();

            for (var i = 0; i < 10; i++)
            {
                var cluster = new UMCClusterLight();
                cluster.Id      = i;
                cluster.GroupId = 0;
            }
            providers.ClusterCache.AddAll(clusters);
        }
Пример #6
0
 /// <summary>
 ///     Sets up the NHibernate caches for storing and retrieving data.
 /// </summary>
 /// <returns></returns>
 private static void SetupDataProviders(string path, bool createNew)
 {
     DataAccessFactory.CreateDataAccessProviders(path, createNew);
 }
Пример #7
0
        public void CreateUMCClusterLight(string databasePath, bool indexDatabase)
        {
            // If the database is not index then do so...but before the session to the db is opened.
            if (indexDatabase)
            {
                DatabaseIndexer.IndexClusters(databasePath);
                DatabaseIndexer.IndexFeatures(databasePath);
            }

            // This is a factory based method that creates a set of data access providers used throughout MultiAlign
            var providers = DataAccessFactory.CreateDataAccessProviders(databasePath, false);

            // If you just wanted the clusters you could do this:
            // 1. Connect to the database
            //NHibernateUtil.ConnectToDatabase(databasePath, false);
            // 2. Then extract all of the clusters
            //IUmcClusterDAO clusterCache     = new UmcClusterDAOHibernate();
            //List<UMCClusterLight> clusters  = clusterCache.FindAll();

            var clusters              = providers.ClusterCache.FindAll();
            var shouldGetMsFeatures   = true;
            var shouldGetMsMsFeatures = true;
            var shouldGetRawData      = false;

            // This gets all of the dataset information and maps to a dictionary...if you want the raw data
            // otherwise comment this out.
            var datasets   = providers.DatasetCache.FindAll();
            var datasetMap = new Dictionary <int, DatasetInformation>();

            datasets.ForEach(x => datasetMap.Add(x.DatasetId, x));

            foreach (var cluster in clusters)
            {
                cluster.ReconstructUMCCluster(providers,
                                              true,
                                              false,
                                              shouldGetMsFeatures,
                                              shouldGetMsMsFeatures);

                foreach (var feature in cluster.Features)
                {
                    foreach (var msFeature in feature.Features)
                    {
                        foreach (var spectrumMetaData in msFeature.MSnSpectra)
                        {
                            // then you can do stuff with the ms/ms spectra
                            // If you had the path to the raw file, you could create a reader for you to extract the MS/MS spectra
                            // This supports mzXML and .RAW Thermo files based on the file extension.
                            if (shouldGetRawData)
                            {
                                DatasetInformation info = null;
                                var hasKey = datasetMap.TryGetValue(spectrumMetaData.GroupId, out info);
                                if (hasKey)
                                {
                                    if (info.RawFile != null)
                                    {
                                        // This might seem kind of klunky, but it's called a bridge, this way I can access
                                        // MS/MS spectra from PNNLOmics without having to reference any of the Thermo DLL's
                                        // Nor support file reading capability.  This is also nice because I don't have to load
                                        // several MS/MS spectra when analyzing large datasets for my spectral clustering work.
                                        var rawReader = new InformedProteomicsReader(spectrumMetaData.GroupId, info.RawFile.Path);

                                        // Then grab the actual spectrum...
                                        var summary  = new ScanSummary();
                                        var spectrum = rawReader.GetRawSpectra(spectrumMetaData.Scan, 2, out summary);

                                        // Then do what you want...
                                        // Profit???
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
Пример #8
0
        public void GenerateClusterAlignmentStatistics(string relativeDatabasePath,
                                                       string relativeName,
                                                       string name,
                                                       FeatureAlignmentType alignmentType,
                                                       LcmsFeatureClusteringAlgorithmType clusterType)
        {
            var databasePath = GetPath(relativeDatabasePath);
            var outputPath   = GetOutputPath(relativeName);

            if (!Directory.Exists(outputPath))
            {
                Directory.CreateDirectory(outputPath);
            }

            // Connect to the NHibernate database
            var providers = DataAccessFactory.CreateDataAccessProviders(databasePath, false);

            // Setup our alignment options
            var alignmentOptions = new AlignmentOptions();
            var spectralOptions  = new SpectralOptions
            {
                ComparerType      = SpectralComparison.CosineDotProduct,
                Fdr               = .01,
                IdScore           = 1e-09,
                MzBinSize         = .5,
                MzTolerance       = .5,
                NetTolerance      = .1,
                RequiredPeakCount = 32,
                SimilarityCutoff  = .75,
                TopIonPercent     = .8
            };

            // Options setup
            var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap);
            var featureTolerances = new FeatureTolerances
            {
                Mass      = instrumentOptions.Mass + 6,
                Net       = instrumentOptions.NetTolerance,
                DriftTime = instrumentOptions.DriftTimeTolerance
            };

            UpdateStatus("Retrieving all datasets for test.");
            var datasets = providers.DatasetCache.FindAll();

            // Create our algorithms
            var aligner = FeatureAlignerFactory.CreateDatasetAligner(alignmentType,
                                                                     alignmentOptions.LCMSWarpOptions,
                                                                     spectralOptions);
            var clusterer = ClusterFactory.Create(clusterType);

            clusterer.Parameters = new FeatureClusterParameters <UMCLight>
            {
                Tolerances = featureTolerances
            };

            RegisterProgressNotifier(aligner);
            RegisterProgressNotifier(clusterer);

            for (var i = 0; i < datasets.Count - 1; i++)
            {
                var matchPath = string.Format("{0}-{1}-matches.txt", name, i);
                var errorPath = string.Format("{0}-{1}-errors.txt", name, i);

                matchPath = Path.Combine(outputPath, matchPath);
                errorPath = Path.Combine(outputPath, errorPath);



                var aligneeDataset  = datasets[i + 1];
                var baselineDataset = datasets[i];

                // Load the baseline reference set
                using (var rawProviderX = new InformedProteomicsReader())
                {
                    rawProviderX.AddDataFile(baselineDataset.RawFile.Path, 0);
                    // Load the baseline reference set
                    using (var rawProviderY = new InformedProteomicsReader())
                    {
                        rawProviderY.AddDataFile(aligneeDataset.RawFile.Path, 0);

                        var baselineFeatures = RetrieveFeatures(baselineDataset.DatasetId, providers);
                        var aligneeFeatures  = RetrieveFeatures(aligneeDataset.DatasetId, providers);
                        var providerX        = new CachedFeatureSpectraProvider(rawProviderX, baselineFeatures);
                        var providerY        = new CachedFeatureSpectraProvider(rawProviderY, aligneeFeatures);

                        AlignDatasets(baselineFeatures,
                                      aligneeFeatures,
                                      providerX,
                                      providerY,
                                      aligner,
                                      clusterer,
                                      matchPath,
                                      errorPath);
                    }
                }
            }
        }
Пример #9
0
        public void CreateFeatureDatabase(string directoryPath, string databasePath)
        {
            var directory = GetPath(directoryPath);

            databasePath = GetPath(databasePath);

            // Loads the supported MultiAlign types
            var supportedTypes = DatasetLoader.SupportedFileTypes;
            var extensions     = new List <string>();

            supportedTypes.ForEach(x => extensions.Add("*" + x.Extension));

            // Find our datasets
            var datasetLoader = new DatasetLoader();
            var datasets      = datasetLoader.GetValidDatasets(directory, extensions, SearchOption.TopDirectoryOnly);

            // Options setup
            var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap);
            var featureTolerances = new FeatureTolerances
            {
                Mass      = instrumentOptions.Mass + 6,
                Net       = instrumentOptions.NetTolerance,
                DriftTime = instrumentOptions.DriftTimeTolerance
            };
            var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances)
            {
                MaximumNetRange  = .002,
                MaximumScanRange = 50
            };
            var lcmsFilters = new LcmsFeatureFilteringOptions
            {
                FeatureLengthRangeScans = new FilterRange(50, 300)
            };
            var msFilterOptions = new MsFeatureFilteringOptions
            {
                MinimumIntensity           = 5000,
                ChargeRange                = new FilterRange(1, 6),
                ShouldUseChargeFilter      = true,
                ShouldUseDeisotopingFilter = true,
                ShouldUseIntensityFilter   = true
            };
            var spectralOptions = new SpectralOptions
            {
                ComparerType      = SpectralComparison.CosineDotProduct,
                Fdr               = .01,
                IdScore           = 1e-09,
                MzBinSize         = .5,
                MzTolerance       = .5,
                NetTolerance      = .1,
                RequiredPeakCount = 32,
                SimilarityCutoff  = .75,
                TopIonPercent     = .8
            };
            var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);

            NHibernateUtil.CreateDatabase(databasePath);
            // Synchronization and IO for serializing all data to the database.
            var providers = DataAccessFactory.CreateDataAccessProviders(databasePath, true);
            var cache     = new FeatureLoader
            {
                Providers = providers
            };

            var datasetId = 0;

            foreach (var dataset in datasets)
            {
                dataset.DatasetId = datasetId++;
                var features = FindFeatures(dataset,
                                            featureFindingOptions,
                                            msFilterOptions,
                                            lcmsFilters,
                                            spectralOptions,
                                            finder);

                cache.CacheFeatures(features);
            }
            providers.DatasetCache.AddAll(datasets);
        }