Пример #1
0
        /// <summary>
        /// Aligns the dataset to the data stored in the alignment processor.
        /// </summary>
        /// <param name="alignmentProcessor">Aligner</param>
        /// <param name="features">LC-MS Features to align to the baseline</param>
        /// <param name="alignmentOptions">Options</param>
        /// <param name="progress"></param>
        /// <returns></returns>
        private AlignmentData AlignFeatures(LcmsWarpAlignmentProcessor alignmentProcessor,
                                            IEnumerable <UMCLight> features,
                                            LcmsWarpAlignmentOptions alignmentOptions,
                                            IProgress <ProgressData> progress = null)
        {
            var progData      = new ProgressData(progress);
            var localProgress = new Progress <ProgressData>(p => progData.Report(p.Percent, p.Status));
            var alignmentData = new AlignmentData();

            OnStatus("Starting alignment of features.");

            // Set minMtdbnet and maxMtdbnet to 0
            alignmentData.MinMTDBNET = 0;
            alignmentData.MaxMTDBNET = 0;

            var umcLights = features as List <UMCLight> ?? features.ToList();

            progData.StepRange(5, "Starting alignment of features.");
            var filteredFeatures = FilterFeaturesByAbundance(umcLights, alignmentOptions);

            // Convert the features, and make a map, so that we can re-adjust the aligned values later.
            var map = FeatureDataConverters.MapFeature(umcLights);

            progData.StepRange(10, "Setting alignee features.");
            // Set features
            OnStatus("Setting alignee features.");
            alignmentProcessor.SetAligneeDatasetFeatures(filteredFeatures);

            progData.StepRange(90, "Performing alignment warping.");
            // Find alignment
            OnStatus("Performing alignment warping.");
            alignmentProcessor.PerformAlignmentToMsFeatures(localProgress);

            progData.StepRange(95);
            // Extract alignment function
            alignmentData.AlignmentFunction = alignmentProcessor.GetAlignmentFunction();

            progData.StepRange(100);
            // Extract the NET value for every scan
            _scanToNETMap = alignmentProcessor.GetScanToNETMapping();

            // Correct the features (updates NetAligned and MassMonoisotopicAligned)
            OnStatus("Applying alignment function to all features.");
            progData.Status = "Applying alignment function to all features.";
            umcLights       = alignmentProcessor.ApplyNetMassFunctionToAligneeDatasetFeatures(umcLights);
            progData.Report(100);

            // Find min/max scan for meta-data
            var minScanBaseline = int.MaxValue;
            var maxScanBaseline = int.MinValue;

            foreach (var feature in umcLights)
            {
                maxScanBaseline = Math.Max(maxScanBaseline, feature.Scan);
                minScanBaseline = Math.Min(minScanBaseline, feature.Scan);
            }

            // Update the scan and NET ranges
            alignmentData.MinScanBaseline = minScanBaseline;
            alignmentData.MaxScanBaseline = maxScanBaseline;
            alignmentData.MinMTDBNET      = (float)alignmentProcessor.MinReferenceNet;
            alignmentData.MaxMTDBNET      = (float)alignmentProcessor.MaxReferenceNet;

            // Cache the matching features
            alignmentData.FeatureMatches = alignmentProcessor.FeatureMatches;

            // Pull out the heat maps...
            OnStatus("Retrieving alignment data.");
            progData.Status          = "Retrieving alignment data.";
            alignmentData.HeatScores = alignmentProcessor.GetAlignmentHeatMap(alignmentOptions.StandardizeHeatScores);

            // Mass and net error histograms!
            alignmentData.MassErrorHistogram  = alignmentProcessor.GetMassErrorHistogram(alignmentOptions.MassBinSize);
            alignmentData.NetErrorHistogram   = alignmentProcessor.GetNetErrorHistogram(alignmentOptions.NetBinSize);
            alignmentData.DriftErrorHistogram = alignmentProcessor.GetDriftErrorHistogram(alignmentOptions.DriftTimeBinSize);

            // Get the residual data from the warp.
            alignmentData.ResidualData = alignmentProcessor.GetResidualData();

            alignmentData.NETIntercept          = alignmentProcessor.NetIntercept;
            alignmentData.NETRsquared           = alignmentProcessor.NetRsquared;
            alignmentData.NETSlope              = alignmentProcessor.NetSlope;
            alignmentData.MassMean              = alignmentProcessor.MassMu;
            alignmentData.MassStandardDeviation = alignmentProcessor.MassStd;
            alignmentData.NETMean = alignmentProcessor.NetMu;
            alignmentData.NETStandardDeviation = alignmentProcessor.NetStd;
            alignmentData.BaselineIsAmtDB      = _options.AlignToMassTagDatabase;

            return(alignmentData);
        }
Пример #2
0
        /// <summary>
        ///     Load a single dataset from the provider.
        /// </summary>
        /// <returns></returns>
        public IList <UMCLight> LoadDataset(DatasetInformation dataset,
                                            MsFeatureFilteringOptions msFilteringOptions,
                                            LcmsFeatureFindingOptions lcmsFindingOptions,
                                            LcmsFeatureFilteringOptions lcmsFilteringOptions,
                                            DataLoadingOptions dataLoadOptions,
                                            ScanSummaryProviderCache providerCache,
                                            IdentificationProviderCache identificationProviders,
                                            IProgress <ProgressData> progress = null)
        {
            var progData = new ProgressData(progress);
            IScanSummaryProvider provider = null;

            if (!string.IsNullOrWhiteSpace(dataset.RawFile.Path))
            {
                UpdateStatus("Using raw data to create better features.");
                provider = providerCache.GetScanSummaryProvider(dataset.RawFile.Path, dataset.DatasetId);
            }

            progData.StepRange(1);
            progData.Status = "Looking for existing features in the database.";
            UpdateStatus(string.Format("[{0}] - Loading dataset [{0}] - {1}.", dataset.DatasetId, dataset.DatasetName));
            var datasetId     = dataset.DatasetId;
            var features      = UmcLoaderFactory.LoadUmcFeatureData(dataset, Providers.FeatureCache, provider);
            var hasMsFeatures = features.Any(f => f.MsFeatures.Any());

            var msFeatures = new List <MSFeatureLight>();

            if (!hasMsFeatures)
            {
                progData.StepRange(2);
                progData.Status = "Loading MS Feature Data.";
                UpdateStatus(string.Format("[{0}] Loading MS Feature Data [{0}] - {1}.", dataset.DatasetId,
                                           dataset.DatasetName));

                var isosFilterOptions = dataLoadOptions.GetIsosFilterOptions();
                msFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path, isosFilterOptions);
            }

            progData.StepRange(3);
            progData.Status = "Loading scan summaries.";
            ////var scansInfo = UmcLoaderFactory.LoadScanSummaries(dataset.Scans.Path);
            ////dataset.BuildScanTimes(scansInfo);

            progData.StepRange(100);

            var msnSpectra = new List <MSSpectra>();

            // If we don't have any features, then we have to create some from the MS features
            // provided to us.
            if (features.Count < 1)
            {
                msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilteringOptions);
                msFeatures = Filter(msFeatures, provider, ref dataset);

                progData.Status = "Creating LCMS features.";
                features        = CreateLcmsFeatures(dataset,
                                                     msFeatures,
                                                     lcmsFindingOptions,
                                                     lcmsFilteringOptions,
                                                     provider,
                                                     new Progress <ProgressData>(pd => progData.Report(pd.Percent)));

                //var maxScan = Convert.ToDouble(features.Max(feature => feature.Scan));
                //var minScan = Convert.ToDouble(features.Min(feature => feature.Scan));
                var maxScan   = features.Max(feature => feature.Scan);
                var minScan   = features.Min(feature => feature.Scan);
                var id        = 0;
                var scanTimes = dataset.ScanTimes;

                foreach (var feature in features)
                {
                    feature.Id = id++;
                    //feature.Net = (Convert.ToDouble(feature.Scan) - minScan) / (maxScan - minScan);
                    feature.Net = (Convert.ToDouble(scanTimes[feature.Scan]) - scanTimes[minScan]) / (scanTimes[maxScan] - scanTimes[minScan]);
                    feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
                    feature.NetAligned    = feature.Net;
                    feature.GroupId       = datasetId;
                    feature.SpectralCount = feature.MsFeatures.Count;

                    foreach (var msFeature in feature.MsFeatures.Where(msFeature => msFeature != null))
                    {
                        msFeature.UmcId   = feature.Id;
                        msFeature.GroupId = datasetId;
                        msFeature.MSnSpectra.ForEach(x => x.GroupId = datasetId);
                        msnSpectra.AddRange(msFeature.MSnSpectra);
                    }
                }
            }
            else
            {
                if (!UmcLoaderFactory.AreExistingFeatures(dataset.Features.Path))
                {
                    var i = 0;
                    foreach (var feature in features)
                    {
                        feature.GroupId = datasetId;
                        feature.Id      = i++;
                    }
                }

                // Otherwise, we need to map the MS features to the LCMS Features provided.
                // This would mean that we extracted data from an existing database.
                if (msFeatures.Count > 0)
                {
                    var map = FeatureDataConverters.MapFeature(features);
                    foreach (var feature in
                             from feature in msFeatures
                             let doesFeatureExists = map.ContainsKey(feature.UmcId)
                                                     where doesFeatureExists
                                                     select feature)
                    {
                        map[feature.UmcId].AddChildFeature(feature);
                    }
                }
            }

            //if (provider is ISpectraProvider)
            //{
            //    var spectraProvider = provider as ISpectraProvider;
            //    UmcLoaderFactory.LoadMsMs(features.ToList(), spectraProvider);
            //}

            // Process the MS/MS data with peptides
            UpdateStatus("Reading List of Peptides");
            if (dataset.SequenceFile != null && !string.IsNullOrEmpty(dataset.SequenceFile.Path))
            {
                UpdateStatus("Reading List of Peptides");
                var idProvider  = identificationProviders.GetProvider(dataset.SequenceFile.Path, dataset.DatasetId);
                var peptideList = idProvider.GetAllIdentifications();

                UpdateStatus("Linking MS/MS to any known Peptide/Metabolite Sequences");

                var linker = new PeptideMsMsLinker();
                linker.LinkPeptidesToSpectra(msnSpectra, peptideList);
            }

            progData.Report(100);

            return(features);
        }
Пример #3
0
        /// <summary>
        ///     Loads baseline data for alignment.
        /// </summary>
        private IList <UMCLight> LoadBaselineData(DatasetInformation baselineInfo,
                                                  MsFeatureFilteringOptions msFilterOptions,
                                                  LcmsFeatureFindingOptions lcmsFindingOptions,
                                                  LcmsFeatureFilteringOptions lcmsFilterOptions,
                                                  DataLoadingOptions dataLoadOptions,
                                                  FeatureDataAccessProviders dataProviders,
                                                  MassTagDatabase database,
                                                  bool shouldUseMassTagDbAsBaseline)
        {
            IList <UMCLight> baselineFeatures = null;

            UpdateStatus("Loading baseline features.");
            if (!shouldUseMassTagDbAsBaseline)
            {
                if (baselineInfo == null)
                {
                    throw new Exception("The baseline dataset was never set.");
                }

                var cache = new FeatureLoader
                {
                    Providers = dataProviders
                };

                RegisterProgressNotifier(cache);

                UpdateStatus("Loading baseline features from " + baselineInfo.DatasetName + " for alignment.");

                baselineFeatures = cache.LoadDataset(baselineInfo,
                                                     msFilterOptions,
                                                     lcmsFindingOptions,
                                                     lcmsFilterOptions,
                                                     dataLoadOptions,
                                                     m_scanSummaryProviderCache,
                                                     this.m_identificationsProvider);

                cache.CacheFeatures(baselineFeatures);
                if (BaselineFeaturesLoaded != null)
                {
                    BaselineFeaturesLoaded(this,
                                           new BaselineFeaturesLoadedEventArgs(baselineInfo, baselineFeatures.ToList()));
                }

                DeRegisterProgressNotifier(cache);
            }
            else
            {
                if (database == null)
                {
                    throw new NullReferenceException(
                              "The mass tag database has to have data in it if it's being used for drift time alignment.");
                }

                UpdateStatus("Setting baseline features for post drift time alignment from mass tag database.");
                var tags = FeatureDataConverters.ConvertToUMC(database.MassTags);

                if (BaselineFeaturesLoaded == null)
                {
                    return(tags);
                }

                if (tags != null)
                {
                    BaselineFeaturesLoaded(this, new BaselineFeaturesLoadedEventArgs(null, tags.ToList(), database));
                }
            }
            return(baselineFeatures);
        }