Exemple #1
0
        /// <summary>
        ///     Links a list of peptides to the features provided if the dataset has knowledge of the sequence file file
        /// </summary>
        private void LinkPeptidesToFeatures(string sequencePath, List <UMCLight> aligneeFeatures, double fdr,
                                            double idScore)
        {
            // Get the peptides associated with this feature set.
            var peptideReaderY = PeptideReaderFactory.CreateReader(sequencePath);

            if (peptideReaderY == null)
            {
                return;
            }

            // Load the peptide Y
            UpdateStatus("Linking peptides to ms/ms");
            var linker           = new PeptideMsMsLinker();
            var peptides         = peptideReaderY.Read(sequencePath).ToList();
            var filteredPeptides = peptides.ToList().Where(x => PeptideUtility.PassesCutoff(x, idScore, fdr)).ToList();

            var msnSpectra = new List <MSSpectra>();

            foreach (var feature in aligneeFeatures)
            {
                foreach (var msFeature in feature.MsFeatures)
                {
                    msnSpectra.AddRange(msFeature.MSnSpectra);
                }
            }


            linker.LinkPeptidesToSpectra(msnSpectra, filteredPeptides);
        }
Exemple #2
0
        /// <summary>
        ///     Load a single dataset from the provider.
        /// </summary>
        /// <returns></returns>
        public IList <UMCLight> LoadDataset(DatasetInformation dataset,
                                            MsFeatureFilteringOptions msFilteringOptions,
                                            LcmsFeatureFindingOptions lcmsFindingOptions,
                                            LcmsFeatureFilteringOptions lcmsFilteringOptions,
                                            DataLoadingOptions dataLoadOptions,
                                            ScanSummaryProviderCache providerCache,
                                            IdentificationProviderCache identificationProviders,
                                            IProgress <ProgressData> progress = null)
        {
            var progData = new ProgressData(progress);
            IScanSummaryProvider provider = null;

            if (!string.IsNullOrWhiteSpace(dataset.RawFile.Path))
            {
                UpdateStatus("Using raw data to create better features.");
                provider = providerCache.GetScanSummaryProvider(dataset.RawFile.Path, dataset.DatasetId);
            }

            progData.StepRange(1);
            progData.Status = "Looking for existing features in the database.";
            UpdateStatus(string.Format("[{0}] - Loading dataset [{0}] - {1}.", dataset.DatasetId, dataset.DatasetName));
            var datasetId     = dataset.DatasetId;
            var features      = UmcLoaderFactory.LoadUmcFeatureData(dataset, Providers.FeatureCache, provider);
            var hasMsFeatures = features.Any(f => f.MsFeatures.Any());

            var msFeatures = new List <MSFeatureLight>();

            if (!hasMsFeatures)
            {
                progData.StepRange(2);
                progData.Status = "Loading MS Feature Data.";
                UpdateStatus(string.Format("[{0}] Loading MS Feature Data [{0}] - {1}.", dataset.DatasetId,
                                           dataset.DatasetName));

                var isosFilterOptions = dataLoadOptions.GetIsosFilterOptions();
                msFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path, isosFilterOptions);
            }

            progData.StepRange(3);
            progData.Status = "Loading scan summaries.";
            ////var scansInfo = UmcLoaderFactory.LoadScanSummaries(dataset.Scans.Path);
            ////dataset.BuildScanTimes(scansInfo);

            progData.StepRange(100);

            var msnSpectra = new List <MSSpectra>();

            // If we don't have any features, then we have to create some from the MS features
            // provided to us.
            if (features.Count < 1)
            {
                msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilteringOptions);
                msFeatures = Filter(msFeatures, provider, ref dataset);

                progData.Status = "Creating LCMS features.";
                features        = CreateLcmsFeatures(dataset,
                                                     msFeatures,
                                                     lcmsFindingOptions,
                                                     lcmsFilteringOptions,
                                                     provider,
                                                     new Progress <ProgressData>(pd => progData.Report(pd.Percent)));

                //var maxScan = Convert.ToDouble(features.Max(feature => feature.Scan));
                //var minScan = Convert.ToDouble(features.Min(feature => feature.Scan));
                var maxScan   = features.Max(feature => feature.Scan);
                var minScan   = features.Min(feature => feature.Scan);
                var id        = 0;
                var scanTimes = dataset.ScanTimes;

                foreach (var feature in features)
                {
                    feature.Id = id++;
                    //feature.Net = (Convert.ToDouble(feature.Scan) - minScan) / (maxScan - minScan);
                    feature.Net = (Convert.ToDouble(scanTimes[feature.Scan]) - scanTimes[minScan]) / (scanTimes[maxScan] - scanTimes[minScan]);
                    feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
                    feature.NetAligned    = feature.Net;
                    feature.GroupId       = datasetId;
                    feature.SpectralCount = feature.MsFeatures.Count;

                    foreach (var msFeature in feature.MsFeatures.Where(msFeature => msFeature != null))
                    {
                        msFeature.UmcId   = feature.Id;
                        msFeature.GroupId = datasetId;
                        msFeature.MSnSpectra.ForEach(x => x.GroupId = datasetId);
                        msnSpectra.AddRange(msFeature.MSnSpectra);
                    }
                }
            }
            else
            {
                if (!UmcLoaderFactory.AreExistingFeatures(dataset.Features.Path))
                {
                    var i = 0;
                    foreach (var feature in features)
                    {
                        feature.GroupId = datasetId;
                        feature.Id      = i++;
                    }
                }

                // Otherwise, we need to map the MS features to the LCMS Features provided.
                // This would mean that we extracted data from an existing database.
                if (msFeatures.Count > 0)
                {
                    var map = FeatureDataConverters.MapFeature(features);
                    foreach (var feature in
                             from feature in msFeatures
                             let doesFeatureExists = map.ContainsKey(feature.UmcId)
                                                     where doesFeatureExists
                                                     select feature)
                    {
                        map[feature.UmcId].AddChildFeature(feature);
                    }
                }
            }

            //if (provider is ISpectraProvider)
            //{
            //    var spectraProvider = provider as ISpectraProvider;
            //    UmcLoaderFactory.LoadMsMs(features.ToList(), spectraProvider);
            //}

            // Process the MS/MS data with peptides
            UpdateStatus("Reading List of Peptides");
            if (dataset.SequenceFile != null && !string.IsNullOrEmpty(dataset.SequenceFile.Path))
            {
                UpdateStatus("Reading List of Peptides");
                var idProvider  = identificationProviders.GetProvider(dataset.SequenceFile.Path, dataset.DatasetId);
                var peptideList = idProvider.GetAllIdentifications();

                UpdateStatus("Linking MS/MS to any known Peptide/Metabolite Sequences");

                var linker = new PeptideMsMsLinker();
                linker.LinkPeptidesToSpectra(msnSpectra, peptideList);
            }

            progData.Report(100);

            return(features);
        }