Пример #1
0
        /// <summary>
        ///     Creates LCMS Features
        /// </summary>
        public List <UMCLight> CreateLcmsFeatures(
            DatasetInformation information,
            List <MSFeatureLight> msFeatures,
            LcmsFeatureFindingOptions options,
            LcmsFeatureFilteringOptions filterOptions,
            IScanSummaryProvider provider,
            IProgress <ProgressData> progress = null)
        {
            // Make features
            if (msFeatures.Count < 1)
            {
                throw new Exception("No features were found in the feature files provided.");
            }

            UpdateStatus("Finding features.");

            ValidateFeatureFinderMaxScanLength(information, options, filterOptions);

            var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);

            finder.Progress += (sender, args) => UpdateStatus(args.Message);
            var features = finder.FindFeatures(msFeatures, options, provider, progress);

            UpdateStatus("Filtering features.");
            List <UMCLight> filteredFeatures = LcmsFeatureFilters.FilterFeatures(features, filterOptions, provider);

            UpdateStatus(string.Format("Filtered features from: {0} to {1}.", features.Count, filteredFeatures.Count));
            return(filteredFeatures);
        }
Пример #2
0
        /// <summary>
        ///  Finds features given a dataset
        /// </summary>
        private IList <UMCLight> FindFeatures(DatasetInformation information,
                                              LcmsFeatureFindingOptions featureFindingOptions,
                                              MsFeatureFilteringOptions msFilterOptions,
                                              LcmsFeatureFilteringOptions lcmsFilterOptions,
                                              SpectralOptions peptideOptions,
                                              MultiAlignCore.Algorithms.FeatureFinding.IFeatureFinder featureFinder)

        {
            UpdateStatus("Loading baseline features.");
            var msFeatures = UmcLoaderFactory.LoadMsFeatureData(information.Features.Path);

            msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions);

            // Load the baseline reference set
            using (var rawProviderX = RawLoaderFactory.CreateFileReader(information.RawFile.Path))
            {
                rawProviderX.AddDataFile(information.RawFile.Path, 0);
                UpdateStatus("Creating LCMS Features.");
                var features = featureFinder.FindFeatures(msFeatures,
                                                          featureFindingOptions,
                                                          rawProviderX);
                features = LcmsFeatureFilters.FilterFeatures(features, lcmsFilterOptions, information.ScanTimes);

                var datasetId = information.DatasetId;
                foreach (var feature in features)
                {
                    var lightEntry = new List <MSFeatureLight>();
                    feature.GroupId = datasetId;
                    foreach (var msFeature in feature.MsFeatures)
                    {
                        msFeature.GroupId = datasetId;
                        foreach (var msmsFeature in msFeature.MSnSpectra)
                        {
                            msmsFeature.GroupId = datasetId;
                            foreach (var peptide in msmsFeature.Peptides)
                            {
                                peptide.GroupId = datasetId;
                            }
                        }

                        if (msFeature.MSnSpectra.Count > 0)
                        {
                            lightEntry.Add(msFeature);
                        }
                    }

                    // We are doing this so that we dont have a ton of MS features in the database
                    feature.MsFeatures.Clear();
                    feature.MsFeatures.AddRange(lightEntry);
                }

                LinkPeptidesToFeatures(information.SequenceFile.Path,
                                       features,
                                       peptideOptions.Fdr,
                                       peptideOptions.IdScore);

                DeRegisterProgressNotifier(featureFinder);
                return(features);
            }
        }
Пример #3
0
 /// <summary>
 ///     Constructor
 /// </summary>
 public MultiAlignAnalysisOptions()
 {
     InstrumentTolerances = new FeatureTolerances();
     MassTagDatabaseOptions = new MassTagDatabaseOptions();
     MsFilteringOptions = new MsFeatureFilteringOptions();
     LcmsFindingOptions = new LcmsFeatureFindingOptions(InstrumentTolerances);
     LcmsFilteringOptions = new LcmsFeatureFilteringOptions();
     LcmsFilteringOptions.TreatAsTimeNotScan = true;
     LcmsFilteringOptions.FeatureLengthRange = new FilterRange(0, 20);
     AlignmentOptions = new AlignmentOptions();
     LcmsClusteringOptions = new LcmsClusteringOptions(InstrumentTolerances);
     StacOptions = new StacOptions();
     HasMsMs = false;
     UsedIonMobility = false;
 }
Пример #4
0
        /// <summary>
        ///     Constructor
        /// </summary>
        public MultiAlignAnalysisOptions()
        {
            DataLoadOptions        = new DataLoadingOptions();
            InstrumentTolerances   = new FeatureTolerances();
            MassTagDatabaseOptions = new MassTagDatabaseOptions();
            MsFilteringOptions     = new MsFeatureFilteringOptions();
            LcmsFindingOptions     = new LcmsFeatureFindingOptions(InstrumentTolerances);
            LcmsFilteringOptions   = new LcmsFeatureFilteringOptions
            {
                FilterOnMinutes           = true,
                FeatureLengthRangeMinutes = new FilterRange(0, 20),
                MinimumDataPoints         = 3,
                FeatureLengthRangeScans   = new FilterRange(0, 2000)
            };

            AlignmentOptions      = new AlignmentOptions();
            LcmsClusteringOptions = new LcmsClusteringOptions(InstrumentTolerances);
            StacOptions           = new StacOptions();
            HasMsMs         = false;
            UsedIonMobility = false;
            this.ClusterPostProcessingoptions = new ClusterPostProcessingOptions();
        }
Пример #5
0
 /// <summary>
 /// Make sure the value for options.MaximumScanRange, which is used by the Feature Finder,
 /// is at least as large as the filterOptions.FeatureLengthRange.Maximum value,
 /// which is used for filtering the features by length
 /// </summary>
 /// <param name="information"></param>
 /// <param name="options"></param>
 /// <param name="filterOptions"></param>
 private static void ValidateFeatureFinderMaxScanLength(
     DatasetInformation information,
     LcmsFeatureFindingOptions options,
     LcmsFeatureFilteringOptions filterOptions)
 {
     if (!filterOptions.FilterOnMinutes)
     {
         if (options.MaximumScanRange < filterOptions.FeatureLengthRangeMinutes.Maximum)
         {
             // Bump up the scan range used by the LCMS Feature Finder to allow for longer features
             options.MaximumScanRange = (int)filterOptions.FeatureLengthRangeMinutes.Maximum;
         }
     }
     else
     {
         if (options.MaximumScanRange < filterOptions.FeatureLengthRangeScans.Maximum)
         {
             // Bump up the scan range used by the LCMS Feature Finder to allow for longer features
             options.MaximumScanRange = (int)filterOptions.FeatureLengthRangeScans.Maximum;
         }
     }
 }
Пример #6
0
        /// <summary>
        ///     Runs the MultiAlign analysis
        /// </summary>
        public void PerformMultiAlignAnalysis(DatasetInformation baselineDataset,
                                              IEnumerable <DatasetInformation> aligneeDatasets,
                                              LcmsFeatureFindingOptions featureFindingOptions,
                                              MsFeatureFilteringOptions msFilterOptions,
                                              LcmsFeatureFilteringOptions lcmsFilterOptions,
                                              SpectralOptions peptideOptions,
                                              MultiAlignCore.Algorithms.FeatureFinding.IFeatureFinder featureFinder,
                                              IFeatureAligner <IEnumerable <UMCLight>,
                                                               IEnumerable <UMCLight>,
                                                               AlignmentData> aligner,
                                              IClusterer <UMCLight, UMCClusterLight> clusterer,
                                              string matchPath,
                                              string errorPath)
        {
            UpdateStatus("Loading baseline features.");
            var msFeatures = UmcLoaderFactory.LoadMsFeatureData(baselineDataset.Features.Path);

            msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions);

            // Load the baseline reference set
            using (var rawProviderX = new InformedProteomicsReader())
            {
                rawProviderX.AddDataFile(baselineDataset.RawFile.Path, 0);
                UpdateStatus("Creating Baseline LCMS Features.");
                var baselineFeatures = featureFinder.FindFeatures(msFeatures,
                                                                  featureFindingOptions,
                                                                  rawProviderX);
                LinkPeptidesToFeatures(baselineDataset.Sequence.Path, baselineFeatures, peptideOptions.Fdr,
                                       peptideOptions.IdScore);

                var providerX = new CachedFeatureSpectraProvider(rawProviderX, baselineFeatures);

                // Then load the alignee dataset
                foreach (var dataset in aligneeDatasets)
                {
                    var aligneeMsFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path);
                    aligneeMsFeatures = LcmsFeatureFilters.FilterMsFeatures(aligneeMsFeatures, msFilterOptions);
                    using (var rawProviderY = new InformedProteomicsReader())
                    {
                        rawProviderY.AddDataFile(dataset.RawFile.Path, 0);

                        UpdateStatus("Finding alignee features");
                        var aligneeFeatures = featureFinder.FindFeatures(aligneeMsFeatures,
                                                                         featureFindingOptions,
                                                                         rawProviderY);
                        LinkPeptidesToFeatures(dataset.Sequence.Path, aligneeFeatures, peptideOptions.Fdr,
                                               peptideOptions.IdScore);

                        var providerY = new CachedFeatureSpectraProvider(rawProviderY, aligneeFeatures);

                        // cluster before we do anything else....
                        var allFeatures = new List <UMCLight>();
                        allFeatures.AddRange(baselineFeatures);
                        allFeatures.AddRange(aligneeFeatures);
                        foreach (var feature in allFeatures)
                        {
                            feature.Net = feature.Net;
                            feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
                        }

                        // This tells us the differences before we align.
                        var clusters     = clusterer.Cluster(allFeatures);
                        var preAlignment = AnalyzeClusters(clusters);

                        aligner.AligneeSpectraProvider  = providerY;
                        aligner.BaselineSpectraProvider = providerX;


                        UpdateStatus("Aligning data");
                        // Aligner data
                        var data    = aligner.Align(baselineFeatures, aligneeFeatures);
                        var matches = data.Matches;


                        WriteErrors(errorPath, matches);

                        // create anchor points for LCMSWarp alignment
                        var massPoints = new List <RegressionPoint>();
                        var netPoints  = new List <RegressionPoint>();
                        foreach (var match in matches)
                        {
                            var massError = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz,
                                                                                  match.AnchorPointY.Mz);
                            var netError  = match.AnchorPointX.Net - match.AnchorPointY.Net;
                            var massPoint = new RegressionPoint(match.AnchorPointX.Mz, 0, massError, netError);
                            massPoints.Add(massPoint);

                            var netPoint = new RegressionPoint(match.AnchorPointX.Net, 0, massError, netError);
                            netPoints.Add(netPoint);
                        }


                        foreach (var feature in allFeatures)
                        {
                            feature.UmcCluster = null;
                            feature.ClusterId  = -1;
                        }
                        // Then cluster after alignment!
                        UpdateStatus("clustering data");
                        clusters = clusterer.Cluster(allFeatures);
                        var postAlignment = AnalyzeClusters(clusters);

                        UpdateStatus("Note\tSame\tDifferent");
                        UpdateStatus(string.Format("Pre\t{0}\t{1}", preAlignment.SameCluster,
                                                   preAlignment.DifferentCluster));
                        UpdateStatus(string.Format("Post\t{0}\t{1}", postAlignment.SameCluster,
                                                   postAlignment.DifferentCluster));

                        SaveMatches(matchPath, matches);
                    }
                }
            }

            DeRegisterProgressNotifier(aligner);
            DeRegisterProgressNotifier(featureFinder);
            DeRegisterProgressNotifier(clusterer);
        }
Пример #7
0
        public void TestClustering(
            string directory,
            string outputPath,
            FeatureAlignmentType alignmentType,
            LcmsFeatureClusteringAlgorithmType clusterType)
        {
            var matchPath = string.Format("{0}.txt", outputPath);
            var errorPath = string.Format("{0}-errors.txt", outputPath);

            // Loads the supported MultiAlign types
            var supportedTypes = DatasetLoader.SupportedFileTypes;
            var extensions     = new List <string>();

            supportedTypes.ForEach(x => extensions.Add("*" + x.Extension));

            // Find our datasets
            var datasetLoader = new DatasetLoader();
            var datasets      = datasetLoader.GetValidDatasets(directory, extensions, SearchOption.TopDirectoryOnly);

            // Setup our alignment options
            var alignmentOptions = new AlignmentOptions();
            var spectralOptions  = new SpectralOptions
            {
                ComparerType      = SpectralComparison.CosineDotProduct,
                Fdr               = .01,
                IdScore           = 1e-09,
                MzBinSize         = .5,
                MzTolerance       = .5,
                NetTolerance      = .1,
                RequiredPeakCount = 32,
                SimilarityCutoff  = .75,
                TopIonPercent     = .8
            };


            // Options setup
            var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap);
            var featureTolerances = new FeatureTolerances
            {
                Mass      = instrumentOptions.Mass + 6,
                Net       = instrumentOptions.NetTolerance,
                DriftTime = instrumentOptions.DriftTimeTolerance
            };
            var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances)
            {
                MaximumNetRange  = .002,
                MaximumScanRange = 50
            };

            // Create our algorithms
            var finder  = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);
            var aligner = FeatureAlignerFactory.CreateDatasetAligner(alignmentType,
                                                                     alignmentOptions.LCMSWarpOptions,
                                                                     spectralOptions);
            var clusterer = ClusterFactory.Create(clusterType);

            clusterer.Parameters = new FeatureClusterParameters <UMCLight>
            {
                Tolerances = featureTolerances
            };

            RegisterProgressNotifier(aligner);
            RegisterProgressNotifier(finder);
            RegisterProgressNotifier(clusterer);

            var lcmsFilters = new LcmsFeatureFilteringOptions
            {
                FeatureLengthRangeScans = new FilterRange(50, 300)
            };
            var msFilterOptions = new MsFeatureFilteringOptions
            {
                MinimumIntensity           = 5000,
                ChargeRange                = new FilterRange(1, 6),
                ShouldUseChargeFilter      = true,
                ShouldUseDeisotopingFilter = true,
                ShouldUseIntensityFilter   = true
            };

            for (var i = 0; i < 1; i++)
            {
                var aligneeDatasets = datasets.Where((t, j) => j != i).ToList();
                PerformMultiAlignAnalysis(datasets[0],
                                          aligneeDatasets,
                                          featureFindingOptions,
                                          msFilterOptions,
                                          lcmsFilters,
                                          spectralOptions,
                                          finder,
                                          aligner,
                                          clusterer,
                                          matchPath,
                                          errorPath);
            }
        }
Пример #8
0
        public void CreateFeatureDatabase(string directoryPath, string databasePath)
        {
            var directory  = GetPath(directoryPath);
            databasePath   = GetPath(databasePath);

            // Loads the supported MultiAlign types
            var supportedTypes = DatasetInformation.SupportedFileTypes;
            var extensions = new List<string>();
            supportedTypes.ForEach(x => extensions.Add("*" + x.Extension));

            // Find our datasets
            var inputFiles = DatasetSearcher.FindDatasets(directory,
                extensions,
                SearchOption.TopDirectoryOnly);
            var datasets = DatasetInformation.ConvertInputFilesIntoDatasets(inputFiles);

            // Options setup
            var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap);
            var featureTolerances = new FeatureTolerances
            {
                Mass = instrumentOptions.Mass + 6,
                Net = instrumentOptions.NetTolerance,
                DriftTime = instrumentOptions.DriftTimeTolerance
            };
            var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances)
            {
                MaximumNetRange = .002,
                MaximumScanRange = 50
            };
            var lcmsFilters = new LcmsFeatureFilteringOptions
            {
                FeatureLengthRange = new FilterRange(50, 300)
            };
            var msFilterOptions = new MsFeatureFilteringOptions
            {
                MinimumIntensity = 5000,
                ChargeRange = new FilterRange(1, 6),
                ShouldUseChargeFilter = true,
                ShouldUseDeisotopingFilter = true,
                ShouldUseIntensityFilter = true
            };
            var spectralOptions = new SpectralOptions
            {
                ComparerType = SpectralComparison.CosineDotProduct,
                Fdr = .01,
                IdScore = 1e-09,
                MzBinSize = .5,
                MzTolerance = .5,
                NetTolerance = .1,
                RequiredPeakCount = 32,
                SimilarityCutoff = .75,
                TopIonPercent = .8
            };
            var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);
            NHibernateUtil.CreateDatabase(databasePath);
            // Synchronization and IO for serializing all data to the database.
            var providers   = DataAccessFactory.CreateDataAccessProviders(databasePath, true);
            var cache       = new FeatureLoader
            {
               Providers = providers
            };

            var datasetId = 0;
            foreach(var dataset in datasets)
            {
                dataset.DatasetId = datasetId++;
                var features = FindFeatures(dataset,
                                            featureFindingOptions,
                                            msFilterOptions,
                                            lcmsFilters,
                                            spectralOptions,
                                            finder);

                cache.CacheFeatures(features);
            }
            providers.DatasetCache.AddAll(datasets);
        }
Пример #9
0
        /// <summary>
        ///  Finds features given a dataset
        /// </summary>
        private IList<UMCLight> FindFeatures(  DatasetInformation               information,
            LcmsFeatureFindingOptions   featureFindingOptions,
            MsFeatureFilteringOptions   msFilterOptions,
            LcmsFeatureFilteringOptions lcmsFilterOptions,
            SpectralOptions             peptideOptions,
            IFeatureFinder              featureFinder)
        {
            UpdateStatus("Loading baseline features.");
            var msFeatures  = UmcLoaderFactory.LoadMsFeatureData(information.Features.Path);
            msFeatures      = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions);

            // Load the baseline reference set
            using (var rawProviderX  = RawLoaderFactory.CreateFileReader(information.RawPath))
            {
                rawProviderX.AddDataFile(information.RawPath, 0);
                UpdateStatus("Creating LCMS Features.");
                var features    = featureFinder.FindFeatures(msFeatures,
                                                             featureFindingOptions,
                                                             rawProviderX);
                features        = LcmsFeatureFilters.FilterFeatures(features, lcmsFilterOptions);

                var datasetId = information.DatasetId;
                foreach (var feature in features)
                {
                    var lightEntry = new List<MSFeatureLight>();
                    feature.GroupId = datasetId;
                    foreach (var msFeature in feature.MsFeatures)
                    {
                        msFeature.GroupId = datasetId;
                        foreach (var msmsFeature in msFeature.MSnSpectra)
                        {
                            msmsFeature.GroupId = datasetId;
                            foreach (var peptide in msmsFeature.Peptides)
                            {
                                peptide.GroupId = datasetId;
                            }

                        }

                        if (msFeature.MSnSpectra.Count > 0)
                            lightEntry.Add(msFeature);
                    }

                    // We are doing this so that we dont have a ton of MS features in the database
                    feature.MsFeatures.Clear();
                    feature.MsFeatures.AddRange(lightEntry);
                }

                LinkPeptidesToFeatures(information.SequencePath,
                                        features,
                                        peptideOptions.Fdr,
                                        peptideOptions.IdScore);

                DeRegisterProgressNotifier(featureFinder);
                return features;
            }
        }
Пример #10
0
        /// <summary>
        ///     Load a single dataset from the provider.
        /// </summary>
        /// <returns></returns>
        public IList <UMCLight> LoadDataset(DatasetInformation dataset,
                                            MsFeatureFilteringOptions msFilteringOptions,
                                            LcmsFeatureFindingOptions lcmsFindingOptions,
                                            LcmsFeatureFilteringOptions lcmsFilteringOptions,
                                            DataLoadingOptions dataLoadOptions,
                                            ScanSummaryProviderCache providerCache,
                                            IdentificationProviderCache identificationProviders,
                                            IProgress <ProgressData> progress = null)
        {
            var progData = new ProgressData(progress);
            IScanSummaryProvider provider = null;

            if (!string.IsNullOrWhiteSpace(dataset.RawFile.Path))
            {
                UpdateStatus("Using raw data to create better features.");
                provider = providerCache.GetScanSummaryProvider(dataset.RawFile.Path, dataset.DatasetId);
            }

            progData.StepRange(1);
            progData.Status = "Looking for existing features in the database.";
            UpdateStatus(string.Format("[{0}] - Loading dataset [{0}] - {1}.", dataset.DatasetId, dataset.DatasetName));
            var datasetId     = dataset.DatasetId;
            var features      = UmcLoaderFactory.LoadUmcFeatureData(dataset, Providers.FeatureCache, provider);
            var hasMsFeatures = features.Any(f => f.MsFeatures.Any());

            var msFeatures = new List <MSFeatureLight>();

            if (!hasMsFeatures)
            {
                progData.StepRange(2);
                progData.Status = "Loading MS Feature Data.";
                UpdateStatus(string.Format("[{0}] Loading MS Feature Data [{0}] - {1}.", dataset.DatasetId,
                                           dataset.DatasetName));

                var isosFilterOptions = dataLoadOptions.GetIsosFilterOptions();
                msFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path, isosFilterOptions);
            }

            progData.StepRange(3);
            progData.Status = "Loading scan summaries.";
            ////var scansInfo = UmcLoaderFactory.LoadScanSummaries(dataset.Scans.Path);
            ////dataset.BuildScanTimes(scansInfo);

            progData.StepRange(100);

            var msnSpectra = new List <MSSpectra>();

            // If we don't have any features, then we have to create some from the MS features
            // provided to us.
            if (features.Count < 1)
            {
                msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilteringOptions);
                msFeatures = Filter(msFeatures, provider, ref dataset);

                progData.Status = "Creating LCMS features.";
                features        = CreateLcmsFeatures(dataset,
                                                     msFeatures,
                                                     lcmsFindingOptions,
                                                     lcmsFilteringOptions,
                                                     provider,
                                                     new Progress <ProgressData>(pd => progData.Report(pd.Percent)));

                //var maxScan = Convert.ToDouble(features.Max(feature => feature.Scan));
                //var minScan = Convert.ToDouble(features.Min(feature => feature.Scan));
                var maxScan   = features.Max(feature => feature.Scan);
                var minScan   = features.Min(feature => feature.Scan);
                var id        = 0;
                var scanTimes = dataset.ScanTimes;

                foreach (var feature in features)
                {
                    feature.Id = id++;
                    //feature.Net = (Convert.ToDouble(feature.Scan) - minScan) / (maxScan - minScan);
                    feature.Net = (Convert.ToDouble(scanTimes[feature.Scan]) - scanTimes[minScan]) / (scanTimes[maxScan] - scanTimes[minScan]);
                    feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
                    feature.NetAligned    = feature.Net;
                    feature.GroupId       = datasetId;
                    feature.SpectralCount = feature.MsFeatures.Count;

                    foreach (var msFeature in feature.MsFeatures.Where(msFeature => msFeature != null))
                    {
                        msFeature.UmcId   = feature.Id;
                        msFeature.GroupId = datasetId;
                        msFeature.MSnSpectra.ForEach(x => x.GroupId = datasetId);
                        msnSpectra.AddRange(msFeature.MSnSpectra);
                    }
                }
            }
            else
            {
                if (!UmcLoaderFactory.AreExistingFeatures(dataset.Features.Path))
                {
                    var i = 0;
                    foreach (var feature in features)
                    {
                        feature.GroupId = datasetId;
                        feature.Id      = i++;
                    }
                }

                // Otherwise, we need to map the MS features to the LCMS Features provided.
                // This would mean that we extracted data from an existing database.
                if (msFeatures.Count > 0)
                {
                    var map = FeatureDataConverters.MapFeature(features);
                    foreach (var feature in
                             from feature in msFeatures
                             let doesFeatureExists = map.ContainsKey(feature.UmcId)
                                                     where doesFeatureExists
                                                     select feature)
                    {
                        map[feature.UmcId].AddChildFeature(feature);
                    }
                }
            }

            //if (provider is ISpectraProvider)
            //{
            //    var spectraProvider = provider as ISpectraProvider;
            //    UmcLoaderFactory.LoadMsMs(features.ToList(), spectraProvider);
            //}

            // Process the MS/MS data with peptides
            UpdateStatus("Reading List of Peptides");
            if (dataset.SequenceFile != null && !string.IsNullOrEmpty(dataset.SequenceFile.Path))
            {
                UpdateStatus("Reading List of Peptides");
                var idProvider  = identificationProviders.GetProvider(dataset.SequenceFile.Path, dataset.DatasetId);
                var peptideList = idProvider.GetAllIdentifications();

                UpdateStatus("Linking MS/MS to any known Peptide/Metabolite Sequences");

                var linker = new PeptideMsMsLinker();
                linker.LinkPeptidesToSpectra(msnSpectra, peptideList);
            }

            progData.Report(100);

            return(features);
        }
Пример #11
0
 public UmcTreeFeatureFinder()
 {
     FilteringOptions = new LcmsFeatureFilteringOptions();
 }
Пример #12
0
        /// <summary>
        ///     Load a single dataset from the provider.
        /// </summary>
        /// <returns></returns>
        public IList<UMCLight> LoadDataset(DatasetInformation dataset,
            MsFeatureFilteringOptions msFilteringOptions,
            LcmsFeatureFindingOptions lcmsFindingOptions,
            LcmsFeatureFilteringOptions lcmsFilteringOptions)
        {
            UpdateStatus(string.Format("[{0}] - Loading dataset [{0}] - {1}.", dataset.DatasetId, dataset.DatasetName));
            var datasetId = dataset.DatasetId;
            var features = UmcLoaderFactory.LoadUmcFeatureData(dataset.Features.Path, dataset.DatasetId,
                Providers.FeatureCache);

            UpdateStatus(string.Format("[{0}] Loading MS Feature Data [{0}] - {1}.", dataset.DatasetId,
                dataset.DatasetName));
            var msFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path);
            var scansInfo = UmcLoaderFactory.LoadScanSummaries(dataset.Scans.Path);
            dataset.BuildScanTimes(scansInfo);

            var msnSpectra = new List<MSSpectra>();

            // If we don't have any features, then we have to create some from the MS features
            // provided to us.
            if (features.Count < 1)
            {
                msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilteringOptions);
                msFeatures = Filter(msFeatures, ref dataset);

                features = CreateLcmsFeatures(dataset,
                    msFeatures,
                    lcmsFindingOptions,
                    lcmsFilteringOptions);

                //var maxScan = Convert.ToDouble(features.Max(feature => feature.Scan));
                //var minScan = Convert.ToDouble(features.Min(feature => feature.Scan));
                var maxScan = features.Max(feature => feature.Scan);
                var minScan = features.Min(feature => feature.Scan);
                var id = 0;
                var scanTimes = dataset.ScanTimes;

                foreach (var feature in features)
                {
                    feature.Id = id++;
                    //feature.Net = (Convert.ToDouble(feature.Scan) - minScan) / (maxScan - minScan);
                    feature.Net = (Convert.ToDouble(scanTimes[feature.Scan]) - scanTimes[minScan]) / (scanTimes[maxScan] - scanTimes[minScan]);
                    feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
                    feature.NetAligned = feature.Net;
                    feature.GroupId = datasetId;
                    feature.SpectralCount = feature.MsFeatures.Count;

                    foreach (var msFeature in feature.MsFeatures.Where(msFeature => msFeature != null))
                    {
                        msFeature.UmcId = feature.Id;
                        msFeature.GroupId = datasetId;
                        msFeature.MSnSpectra.ForEach(x => x.GroupId = datasetId);
                        msnSpectra.AddRange(msFeature.MSnSpectra);
                    }
                }
            }
            else
            {
                if (!UmcLoaderFactory.AreExistingFeatures(dataset.Features.Path))
                {
                    var i = 0;
                    foreach (var feature in features)
                    {
                        feature.GroupId = datasetId;
                        feature.Id = i++;
                    }
                }

                // Otherwise, we need to map the MS features to the LCMS Features provided.
                // This would mean that we extracted data from an existing database.
                if (msFeatures.Count > 0)
                {
                    var map = FeatureDataConverters.MapFeature(features);
                    foreach (var feature in
                        from feature in msFeatures
                        let doesFeatureExists = map.ContainsKey(feature.UmcId)
                        where doesFeatureExists
                        select feature)
                    {
                        map[feature.UmcId].AddChildFeature(feature);
                    }
                }
            }

            // Process the MS/MS data with peptides
            UpdateStatus("Reading List of Peptides");
            var sequenceProvider = PeptideReaderFactory.CreateReader(dataset.SequencePath);
            if (sequenceProvider != null)
            {
                UpdateStatus("Reading List of Peptides");
                var peptides = sequenceProvider.Read(dataset.SequencePath);
                var count = 0;
                var peptideList = peptides.ToList();
                peptideList.ForEach(x => x.Id = count++);

                UpdateStatus("Linking MS/MS to any known Peptide/Metabolite Sequences");

                var linker = new PeptideMsMsLinker();
                linker.LinkPeptidesToSpectra(msnSpectra, peptideList);
            }
            return features;
        }
Пример #13
0
        public void CreateFeaturesTest(string relativePath, string outputPath)
        {
            var path = GetPath(relativePath);

            var tolerances  = new FeatureTolerances
            {
                Mass =  13,
                Net = .01,
                DriftTime =  30,
                FragmentationWindowSize = .5
            };

            var reader      = new MsFeatureLightFileReader();
            var rawFeatures = reader.ReadFile(path);
            var msFilterOptions = new MsFeatureFilteringOptions
            {
                ChargeRange =  new FilterRange(1,6),
                MinimumIntensity = 200000,
                ShouldUseDeisotopingFilter =  true,
                ShouldUseIntensityFilter   =  true
            };
            rawFeatures = LcmsFeatureFilters.FilterMsFeatures(rawFeatures, msFilterOptions);

            var finder      = new MsFeatureTreeClusterer<MSFeatureLight, UMCLight> {Tolerances = tolerances};
            finder.Progress += (sender, args) => Console.WriteLine(args.Message);
            var features    = finder.Cluster(rawFeatures.ToList());

            var filterOptions = new LcmsFeatureFilteringOptions
            {
                FeatureLengthRange = new FilterRange
                {
                    Maximum = 30,
                    Minimum = 10
                }
            };
            features        = LcmsFeatureFilters.FilterFeatures(features, filterOptions);

            Console.WriteLine(@"Found - {0} features", features.Count);
            using (var writer = File.CreateText(GetPath(outputPath)))
            {
                var index = 0;
                foreach (var feature in features)
                {
                    feature.Id = index++;
                    feature.CalculateStatistics(ClusterCentroidRepresentation.Mean);
                    writer.WriteLine("{1}{0}{2}{0}{3}{0}{4}{0}{5}{0}{6}{0}{7}{0}{8}{0}{9}{0}{10}",
                        TextDelimiter,
                        feature.Net,
                        feature.ChargeState,
                        feature.Mz,
                        feature.Scan,
                        feature.MassMonoisotopic,
                        feature.MassMonoisotopicAligned,
                        feature.Id,
                        feature.ScanStart,
                        feature.ScanEnd,
                        feature.ScanAligned
                        );
                }
            }
        }
        /// <summary>
        ///     Loads baseline data for alignment.
        /// </summary>
        private IList<UMCLight> LoadBaselineData(DatasetInformation baselineInfo,
            MsFeatureFilteringOptions msFilterOptions,
            LcmsFeatureFindingOptions lcmsFindingOptions,
            LcmsFeatureFilteringOptions lcmsFilterOptions,
            FeatureDataAccessProviders dataProviders,
            MassTagDatabase database,
            bool shouldUseMassTagDbAsBaseline)
        {
            IList<UMCLight> baselineFeatures = null;

            UpdateStatus("Loading baseline features.");
            if (!shouldUseMassTagDbAsBaseline)
            {
                if (baselineInfo == null)
                {
                    throw new Exception("The baseline dataset was never set.");
                }

                var cache = new FeatureLoader
                {
                    Providers = dataProviders
                };

                RegisterProgressNotifier(cache);

                UpdateStatus("Loading baseline features from " + baselineInfo.DatasetName + " for alignment.");

                baselineFeatures = cache.LoadDataset(baselineInfo,
                    msFilterOptions,
                    lcmsFindingOptions,
                    lcmsFilterOptions);

                cache.CacheFeatures(baselineFeatures);
                if (BaselineFeaturesLoaded != null)
                {
                    BaselineFeaturesLoaded(this,
                        new BaselineFeaturesLoadedEventArgs(baselineInfo, baselineFeatures.ToList()));
                }

                DeRegisterProgressNotifier(cache);
            }
            else
            {
                if (database == null)
                    throw new NullReferenceException(
                        "The mass tag database has to have data in it if it's being used for drift time alignment.");

                UpdateStatus("Setting baseline features for post drift time alignment from mass tag database.");
                var tags = FeatureDataConverters.ConvertToUMC(database.MassTags);

                if (BaselineFeaturesLoaded == null)
                    return tags;

                if (tags != null)
                    BaselineFeaturesLoaded(this, new BaselineFeaturesLoadedEventArgs(null, tags.ToList(), database));
            }
            return baselineFeatures;
        }
Пример #15
0
        /// <summary>
        ///     Runs the MultiAlign analysis
        /// </summary>
        public void PerformMultiAlignAnalysis(DatasetInformation baselineDataset,
            IEnumerable<DatasetInformation> aligneeDatasets,
            LcmsFeatureFindingOptions featureFindingOptions,
            MsFeatureFilteringOptions msFilterOptions,
            LcmsFeatureFilteringOptions lcmsFilterOptions,
            SpectralOptions peptideOptions,
            IFeatureFinder featureFinder,
            IFeatureAligner<IEnumerable<UMCLight>,
            IEnumerable<UMCLight>,
            classAlignmentData> aligner,
            IClusterer<UMCLight, UMCClusterLight> clusterer,
            string matchPath,
            string errorPath)
        {
            UpdateStatus("Loading baseline features.");
            var msFeatures = UmcLoaderFactory.LoadMsFeatureData(baselineDataset.Features.Path);
            msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions);

            // Load the baseline reference set
            using (var rawProviderX = RawLoaderFactory.CreateFileReader(baselineDataset.RawPath))
            {
                rawProviderX.AddDataFile(baselineDataset.RawPath, 0);
                UpdateStatus("Creating Baseline LCMS Features.");
                var baselineFeatures = featureFinder.FindFeatures(msFeatures,
                    featureFindingOptions,
                    rawProviderX);
                LinkPeptidesToFeatures(baselineDataset.SequencePath, baselineFeatures, peptideOptions.Fdr,
                    peptideOptions.IdScore);

                var providerX = new CachedFeatureSpectraProvider(rawProviderX, baselineFeatures);

                // Then load the alignee dataset
                foreach (var dataset in aligneeDatasets)
                {
                    var aligneeMsFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path);
                    aligneeMsFeatures = LcmsFeatureFilters.FilterMsFeatures(aligneeMsFeatures, msFilterOptions);
                    using (var rawProviderY = RawLoaderFactory.CreateFileReader(dataset.RawPath))
                    {
                        rawProviderY.AddDataFile(dataset.RawPath, 0);

                        UpdateStatus("Finding alignee features");
                        var aligneeFeatures = featureFinder.FindFeatures(aligneeMsFeatures,
                            featureFindingOptions,
                            rawProviderY);
                        LinkPeptidesToFeatures(dataset.SequencePath, aligneeFeatures, peptideOptions.Fdr,
                            peptideOptions.IdScore);

                        var providerY = new CachedFeatureSpectraProvider(rawProviderY, aligneeFeatures);

                        // cluster before we do anything else....
                        var allFeatures = new List<UMCLight>();
                        allFeatures.AddRange(baselineFeatures);
                        allFeatures.AddRange(aligneeFeatures);
                        foreach (var feature in allFeatures)
                        {
                            feature.Net = feature.Net;
                            feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
                        }

                        // This tells us the differences before we align.
                        var clusters = clusterer.Cluster(allFeatures);
                        var preAlignment = AnalyzeClusters(clusters);

                        aligner.AligneeSpectraProvider = providerY;
                        aligner.BaselineSpectraProvider = providerX;

                        UpdateStatus("Aligning data");
                        // Aligner data
                        var data = aligner.Align(baselineFeatures, aligneeFeatures);
                        var matches = data.Matches;

                        WriteErrors(errorPath, matches);

                        // create anchor points for LCMSWarp alignment
                        var massPoints = new List<RegressionPoint>();
                        var netPoints = new List<RegressionPoint>();
                        foreach (var match in matches)
                        {
                            var massError = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz,
                                match.AnchorPointY.Mz);
                            var netError = match.AnchorPointX.Net - match.AnchorPointY.Net;
                            var massPoint = new RegressionPoint(match.AnchorPointX.Mz, 0, massError, netError);
                            massPoints.Add(massPoint);

                            var netPoint = new RegressionPoint(match.AnchorPointX.Net, 0, massError, netError);
                            netPoints.Add(netPoint);
                        }

                        foreach (var feature in allFeatures)
                        {
                            feature.UmcCluster = null;
                            feature.ClusterId = -1;
                        }
                        // Then cluster after alignment!
                        UpdateStatus("clustering data");
                        clusters = clusterer.Cluster(allFeatures);
                        var postAlignment = AnalyzeClusters(clusters);

                        UpdateStatus("Note\tSame\tDifferent");
                        UpdateStatus(string.Format("Pre\t{0}\t{1}", preAlignment.SameCluster,
                            preAlignment.DifferentCluster));
                        UpdateStatus(string.Format("Post\t{0}\t{1}", postAlignment.SameCluster,
                            postAlignment.DifferentCluster));

                        SaveMatches(matchPath, matches);
                    }
                }
            }

            DeRegisterProgressNotifier(aligner);
            DeRegisterProgressNotifier(featureFinder);
            DeRegisterProgressNotifier(clusterer);
        }
Пример #16
0
        public void TestClustering(
            string directory,
            string outputPath,
            FeatureAlignmentType alignmentType,
            LcmsFeatureClusteringAlgorithmType clusterType)
        {
            var matchPath = string.Format("{0}.txt", outputPath);
            var errorPath = string.Format("{0}-errors.txt", outputPath);

            // Loads the supported MultiAlign types
            var supportedTypes = DatasetInformation.SupportedFileTypes;
            var extensions = new List<string>();
            supportedTypes.ForEach(x => extensions.Add("*" + x.Extension));

            // Find our datasets
            var inputFiles = DatasetSearcher.FindDatasets(directory,
                extensions,
                SearchOption.TopDirectoryOnly);
            var datasets = DatasetInformation.ConvertInputFilesIntoDatasets(inputFiles);

            // Setup our alignment options
            var alignmentOptions = new AlignmentOptions();
            var spectralOptions = new SpectralOptions
            {
                ComparerType = SpectralComparison.CosineDotProduct,
                Fdr = .01,
                IdScore = 1e-09,
                MzBinSize = .5,
                MzTolerance = .5,
                NetTolerance = .1,
                RequiredPeakCount = 32,
                SimilarityCutoff = .75,
                TopIonPercent = .8
            };

            // Options setup
            var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap);
            var featureTolerances = new FeatureTolerances
            {
                Mass = instrumentOptions.Mass + 6,
                Net = instrumentOptions.NetTolerance,
                DriftTime = instrumentOptions.DriftTimeTolerance
            };
            var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances)
            {
                MaximumNetRange = .002,
                MaximumScanRange = 50
            };

            // Create our algorithms
            var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);
            var aligner = FeatureAlignerFactory.CreateDatasetAligner(alignmentType,
                alignmentOptions.LCMSWarpOptions,
                spectralOptions);
            var clusterer = ClusterFactory.Create(clusterType);
            clusterer.Parameters = new FeatureClusterParameters<UMCLight>
            {
                Tolerances = featureTolerances
            };

            RegisterProgressNotifier(aligner);
            RegisterProgressNotifier(finder);
            RegisterProgressNotifier(clusterer);

            var lcmsFilters = new LcmsFeatureFilteringOptions
            {
                FeatureLengthRange = new FilterRange(50, 300)
            };
            var msFilterOptions = new MsFeatureFilteringOptions
            {
                MinimumIntensity = 5000,
                ChargeRange = new FilterRange(1, 6),
                ShouldUseChargeFilter = true,
                ShouldUseDeisotopingFilter = true,
                ShouldUseIntensityFilter = true
            };

            for (var i = 0; i < 1; i++)
            {
                var aligneeDatasets = datasets.Where((t, j) => j != i).ToList();
                PerformMultiAlignAnalysis(datasets[0],
                    aligneeDatasets,
                    featureFindingOptions,
                    msFilterOptions,
                    lcmsFilters,
                    spectralOptions,
                    finder,
                    aligner,
                    clusterer,
                    matchPath,
                    errorPath);
            }
        }
Пример #17
0
        /// <summary>
        ///     Loads baseline data for alignment.
        /// </summary>
        private IList <UMCLight> LoadBaselineData(DatasetInformation baselineInfo,
                                                  MsFeatureFilteringOptions msFilterOptions,
                                                  LcmsFeatureFindingOptions lcmsFindingOptions,
                                                  LcmsFeatureFilteringOptions lcmsFilterOptions,
                                                  DataLoadingOptions dataLoadOptions,
                                                  FeatureDataAccessProviders dataProviders,
                                                  MassTagDatabase database,
                                                  bool shouldUseMassTagDbAsBaseline)
        {
            IList <UMCLight> baselineFeatures = null;

            UpdateStatus("Loading baseline features.");
            if (!shouldUseMassTagDbAsBaseline)
            {
                if (baselineInfo == null)
                {
                    throw new Exception("The baseline dataset was never set.");
                }

                var cache = new FeatureLoader
                {
                    Providers = dataProviders
                };

                RegisterProgressNotifier(cache);

                UpdateStatus("Loading baseline features from " + baselineInfo.DatasetName + " for alignment.");

                baselineFeatures = cache.LoadDataset(baselineInfo,
                                                     msFilterOptions,
                                                     lcmsFindingOptions,
                                                     lcmsFilterOptions,
                                                     dataLoadOptions,
                                                     m_scanSummaryProviderCache,
                                                     this.m_identificationsProvider);

                cache.CacheFeatures(baselineFeatures);
                if (BaselineFeaturesLoaded != null)
                {
                    BaselineFeaturesLoaded(this,
                                           new BaselineFeaturesLoadedEventArgs(baselineInfo, baselineFeatures.ToList()));
                }

                DeRegisterProgressNotifier(cache);
            }
            else
            {
                if (database == null)
                {
                    throw new NullReferenceException(
                              "The mass tag database has to have data in it if it's being used for drift time alignment.");
                }

                UpdateStatus("Setting baseline features for post drift time alignment from mass tag database.");
                var tags = FeatureDataConverters.ConvertToUMC(database.MassTags);

                if (BaselineFeaturesLoaded == null)
                {
                    return(tags);
                }

                if (tags != null)
                {
                    BaselineFeaturesLoaded(this, new BaselineFeaturesLoadedEventArgs(null, tags.ToList(), database));
                }
            }
            return(baselineFeatures);
        }
Пример #18
0
 /// <summary>
 /// Constructor
 /// </summary>
 public MsFeatureTreeClusterer()
 {
     Tolerances       = new FeatureTolerances();
     ScanTolerance    = CONST_SCAN_TOLERANCE;
     FilteringOptions = new LcmsFeatureFilteringOptions();
 }
Пример #19
0
        /// <summary>
        /// Make sure the value for options.MaximumScanRange, which is used by the Feature Finder, 
        /// is at least as large as the filterOptions.FeatureLengthRange.Maximum value, 
        /// which is used for filtering the features by length
        /// </summary>
        /// <param name="information"></param>
        /// <param name="options"></param>
        /// <param name="filterOptions"></param>
        private static void ValidateFeatureFinderMaxScanLength(
            DatasetInformation information,
            LcmsFeatureFindingOptions options,
            LcmsFeatureFilteringOptions filterOptions)
        {
            if (!filterOptions.TreatAsTimeNotScan)
            {
                if (options.MaximumScanRange < filterOptions.FeatureLengthRange.Maximum)
                {
                    // Bump up the scan range used by the LCMS Feature Finder to allow for longer featuers
                    options.MaximumScanRange = (int)filterOptions.FeatureLengthRange.Maximum;
                }
                return;
            }

            int maxScanLength;

            if (information.ScanTimes.Count == 0)
            {
                // FeatureLengthRange.Maximum is in minutes
                // Assume 3 scans/second (ballpark estimate)
                maxScanLength = (int)filterOptions.FeatureLengthRange.Maximum * 60 * 3;
            }
            else
            {
                // Find the average number of scans that spans FeatureLengthRange.Maximum minutes

                // Step through the dictionary to find the average number of scans per minute
                var minuteThreshold = 1;
                var scanCountCurrent = 0;
                var scanCountsPerMinute = new List<int>();

                foreach (var entry in information.ScanTimes)
                {
                    if (entry.Value < minuteThreshold)
                    {
                        scanCountCurrent++;
                    }
                    else
                    {
                        if (scanCountCurrent > 0)
                        {
                            scanCountsPerMinute.Add(scanCountCurrent);
                        }
                        scanCountCurrent = 0;
                        minuteThreshold++;
                    }
                }

                int averageScansPerMinute;
                if (scanCountsPerMinute.Count > 0)
                {
                    averageScansPerMinute = (int)scanCountsPerMinute.Average();
                }
                else
                {
                    averageScansPerMinute = 180;
                }

                maxScanLength = (int)(filterOptions.FeatureLengthRange.Maximum * averageScansPerMinute * 1.25);
            }

            if (options.MaximumScanRange < maxScanLength)
            {
                // Bump up the scan range used by the LCMS Feature Finder to allow for longer featuers
                options.MaximumScanRange = maxScanLength;
            }
        }
Пример #20
0
        public void CreateFeatureDatabase(string directoryPath, string databasePath)
        {
            var directory = GetPath(directoryPath);

            databasePath = GetPath(databasePath);

            // Loads the supported MultiAlign types
            var supportedTypes = DatasetLoader.SupportedFileTypes;
            var extensions     = new List <string>();

            supportedTypes.ForEach(x => extensions.Add("*" + x.Extension));

            // Find our datasets
            var datasetLoader = new DatasetLoader();
            var datasets      = datasetLoader.GetValidDatasets(directory, extensions, SearchOption.TopDirectoryOnly);

            // Options setup
            var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap);
            var featureTolerances = new FeatureTolerances
            {
                Mass      = instrumentOptions.Mass + 6,
                Net       = instrumentOptions.NetTolerance,
                DriftTime = instrumentOptions.DriftTimeTolerance
            };
            var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances)
            {
                MaximumNetRange  = .002,
                MaximumScanRange = 50
            };
            var lcmsFilters = new LcmsFeatureFilteringOptions
            {
                FeatureLengthRangeScans = new FilterRange(50, 300)
            };
            var msFilterOptions = new MsFeatureFilteringOptions
            {
                MinimumIntensity           = 5000,
                ChargeRange                = new FilterRange(1, 6),
                ShouldUseChargeFilter      = true,
                ShouldUseDeisotopingFilter = true,
                ShouldUseIntensityFilter   = true
            };
            var spectralOptions = new SpectralOptions
            {
                ComparerType      = SpectralComparison.CosineDotProduct,
                Fdr               = .01,
                IdScore           = 1e-09,
                MzBinSize         = .5,
                MzTolerance       = .5,
                NetTolerance      = .1,
                RequiredPeakCount = 32,
                SimilarityCutoff  = .75,
                TopIonPercent     = .8
            };
            var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);

            NHibernateUtil.CreateDatabase(databasePath);
            // Synchronization and IO for serializing all data to the database.
            var providers = DataAccessFactory.CreateDataAccessProviders(databasePath, true);
            var cache     = new FeatureLoader
            {
                Providers = providers
            };

            var datasetId = 0;

            foreach (var dataset in datasets)
            {
                dataset.DatasetId = datasetId++;
                var features = FindFeatures(dataset,
                                            featureFindingOptions,
                                            msFilterOptions,
                                            lcmsFilters,
                                            spectralOptions,
                                            finder);

                cache.CacheFeatures(features);
            }
            providers.DatasetCache.AddAll(datasets);
        }
Пример #21
0
        public void CreateFeaturesTest(string relativePath, string outputPath)
        {
            var path = GetPath(relativePath);

            var tolerances = new FeatureTolerances
            {
                Mass      = 13,
                Net       = .01,
                DriftTime = 30,
                FragmentationWindowSize = .5
            };

            var reader          = new MsFeatureLightFileReader();
            var rawFeatures     = reader.ReadFile(path);
            var msFilterOptions = new MsFeatureFilteringOptions
            {
                ChargeRange                = new FilterRange(1, 6),
                MinimumIntensity           = 200000,
                ShouldUseDeisotopingFilter = true,
                ShouldUseIntensityFilter   = true
            };

            rawFeatures = LcmsFeatureFilters.FilterMsFeatures(rawFeatures, msFilterOptions);

            var finder = new MsFeatureTreeClusterer <MSFeatureLight, UMCLight> {
                Tolerances = tolerances
            };

            finder.Progress += (sender, args) => Console.WriteLine(args.Message);
            var features = finder.Cluster(rawFeatures.ToList());

            var filterOptions = new LcmsFeatureFilteringOptions
            {
                FeatureLengthRangeScans = new FilterRange
                {
                    Maximum = 30,
                    Minimum = 10
                }
            };

            features = LcmsFeatureFilters.FilterFeatures(features, filterOptions);

            Console.WriteLine(@"Found - {0} features", features.Count);
            using (var writer = File.CreateText(GetPath(outputPath)))
            {
                var index = 0;
                foreach (var feature in features)
                {
                    feature.Id = index++;
                    feature.CalculateStatistics(ClusterCentroidRepresentation.Mean);
                    writer.WriteLine("{1}{0}{2}{0}{3}{0}{4}{0}{5}{0}{6}{0}{7}{0}{8}{0}{9}{0}{10}",
                                     TextDelimiter,
                                     feature.Net,
                                     feature.ChargeState,
                                     feature.Mz,
                                     feature.Scan,
                                     feature.MassMonoisotopic,
                                     feature.MassMonoisotopicAligned,
                                     feature.Id,
                                     feature.ScanStart,
                                     feature.ScanEnd,
                                     feature.ScanAligned
                                     );
                }
            }
        }
Пример #22
0
 public UmcTreeFeatureFinder()
 {
     FilteringOptions = new LcmsFeatureFilteringOptions();
 }
Пример #23
0
        /// <summary>
        ///     Creates LCMS Features
        /// </summary>
        public List<UMCLight> CreateLcmsFeatures(
            DatasetInformation information,
            List<MSFeatureLight> msFeatures,
            LcmsFeatureFindingOptions options,
            LcmsFeatureFilteringOptions filterOptions)
        {
            // Make features
            if (msFeatures.Count < 1)
                throw new Exception("No features were found in the feature files provided.");

            UpdateStatus("Finding features.");
            ISpectraProvider provider = null;
            if (information.RawPath != null && !string.IsNullOrWhiteSpace(information.RawPath))
            {
                UpdateStatus("Using raw data to create better features.");
                provider = RawLoaderFactory.CreateFileReader(information.RawPath);
                provider.AddDataFile(information.RawPath, 0);
            }

            ValidateFeatureFinderMaxScanLength(information, options, filterOptions);

            var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);
            finder.Progress += (sender, args) => UpdateStatus(args.Message);
            var features = finder.FindFeatures(msFeatures, options, provider);

            UpdateStatus("Filtering features.");
            List<UMCLight> filteredFeatures;
            if (filterOptions.TreatAsTimeNotScan) //Feature length determined based on time (mins)
            {
                filteredFeatures = LcmsFeatureFilters.FilterFeatures(features,
                    filterOptions, information.ScanTimes);
            }
            else //Feature length determined based on scans
            {
                filteredFeatures = LcmsFeatureFilters.FilterFeatures(features, filterOptions);
            }

            UpdateStatus(string.Format("Filtered features from: {0} to {1}.", features.Count, filteredFeatures.Count));
            return filteredFeatures;
        }